diff --git a/configure.ac b/configure.ac
index fb719e480b548e5be486efc3caa43c783b9a7362..282707e4809cbfaf658abef7b7dbe2ba7e1f341e 100755
--- a/configure.ac
+++ b/configure.ac
@@ -113,6 +113,12 @@ AM_CONDITIONAL(BUILDCUDA, test ! x$NVCC = x"no")
 
 ###########################
 
+# Define that there is MPI
+
+AC_DEFINE([HAVE_MPI],[],[MPI Enabled])
+
+#####
+
 no_avx=no
 no_sse42=no
 no_sse41=no
diff --git a/src/CartesianGraphFactory_unit_test.hpp b/src/CartesianGraphFactory_unit_test.hpp
deleted file mode 100644
index b1a84c43a28c83f97cef5812760af6aa5f0e173f..0000000000000000000000000000000000000000
--- a/src/CartesianGraphFactory_unit_test.hpp
+++ /dev/null
@@ -1,60 +0,0 @@
-#ifndef CARTESIAN_GRAPH_UNIT_TEST_HPP
-#define CARTESIAN_GRAPH_UNIT_TEST_HPP
-
-#include "Graph/CartesianGraphFactory.hpp"
-#include "map_graph.hpp"
-
-#define GS_SIZE 8
-
-/*!
- *
- * Test node
- *
- */
-
-struct node_cp
-{
-	//! The node contain 3 unsigned long integer for comunication computation and memory
-	typedef boost::fusion::vector<size_t,size_t,size_t> type;
-
-	//! Attributes name
-	struct attributes
-	{
-		static const std::string name[];
-	};
-
-	//! The data
-	type data;
-
-	//! communication property id in boost::fusion::vector
-	static const unsigned int communication = 0;
-	//! computation property id in boost::fusion::vector
-	static const unsigned int computation = 1;
-	//! memory property id in boost::fusion::vector
-	static const unsigned int memory = 2;
-	//! total number of properties boost::fusion::vector
-	static const unsigned int max_prop = 3;
-};
-
-const std::string node_cp::attributes::name[] = {"communication","computation","memory"};
-
-BOOST_AUTO_TEST_SUITE( CartesianGraphFactory_test )
-
-BOOST_AUTO_TEST_CASE( CartesianGraphFactory_use)
-{
-	typedef node_cp node;
-
-	CartesianGraphFactory<3,Graph_CSR<Point_test<float>,Point_test<float>>> g_factory;
-
-	// Cartesian grid
-	size_t sz[3] = {GS_SIZE,GS_SIZE,GS_SIZE};
-
-	// Box
-	Box<3,float> box({0.0,0.0,0.0},{1.0,1.0,1.0});
-
-	Graph_CSR<Point_test<float>,Point_test<float>> g = g_factory.construct<node::communication,float,2>(sz,box);
-}
-
-BOOST_AUTO_TEST_SUITE_END()
-
-#endif
diff --git a/src/Decomposition/CartDecomposition.hpp b/src/Decomposition/CartDecomposition.hpp
index 5f5d75e91355405b006b0841da330be7c20cd0f7..c2636bfd72341e68dc47374faec1449521d130cd 100644
--- a/src/Decomposition/CartDecomposition.hpp
+++ b/src/Decomposition/CartDecomposition.hpp
@@ -10,11 +10,10 @@
 
 #include "config.h"
 #include "Decomposition.hpp"
-#include "map_vector.hpp"
+#include "Vector/map_vector.hpp"
 #include <vector>
 #include "global_const.hpp"
 #include <initializer_list>
-#include "map_vector.hpp"
 #include "SubdomainGraphNodes.hpp"
 #include "metis_util.hpp"
 #include "dec_optimizer.hpp"
@@ -106,11 +105,11 @@ private:
 		// Here we use METIS
 
 		// Create a cartesian grid graph
-		CartesianGraphFactory<dim,Graph_CSR<nm_v,nm_e>> g_factory_part;
+		CartesianGraphFactory<dim,Graph_CSR<nm_part_v,nm_part_e>> g_factory_part;
 
 		// Processor graph
 
-		Graph_CSR<nm_v,nm_e> gp = g_factory_part.template construct<NO_EDGE,T,dim-1,0,1>(div,domain);
+		Graph_CSR<nm_part_v,nm_part_e> gp = g_factory_part.template construct<NO_EDGE,T,dim-1>(div,domain);
 
 		// Get the number of processing units
 		size_t Np = v_cl.getProcessingUnits();
@@ -119,33 +118,22 @@ private:
 		long int p_id = v_cl.getProcessUnitID();
 
 		// Convert the graph to metis
-		Metis<Graph_CSR<nm_v,nm_e>> met(gp,Np);
+		Metis<Graph_CSR<nm_part_v,nm_part_e>> met(gp,Np);
 
 		// decompose
 
-		met.decompose<nm_v::id>();
+		met.decompose<nm_part_v::id>();
 
 		// Optimize the decomposition creating bigger spaces
 		// And reducing Ghost over-stress
 
-		dec_optimizer<dim,Graph_CSR<nm_v,nm_e>> d_o(gp,div);
+		dec_optimizer<dim,Graph_CSR<nm_part_v,nm_part_e>> d_o(gp,div);
 
 		// set of Boxes produced by the decomposition optimizer
 		openfpm::vector<::Box<dim,size_t>> loc_box;
 
-		// a grig key poiting to the origin
-		grid_key_dx<dim> keyZero;
-		keyZero.zero();
-
 		// optimize the decomposition
-		d_o.template optimize<nm_v::sub_id,nm_v::id>(keyZero,gp,p_id,loc_box);
-
-		//-------------------DEBUG---------
-		VTKWriter<decltype(gp)> vtk(gp);
-		vtk.write("out_graph.vtk");
-		//---------------------------------
-
-		exit(1);
+		d_o.template optimize<nm_part_v::sub_id,nm_part_v::id>(gp,p_id,loc_box);
 
 		// convert into sub-domain
 		for (size_t s = 0 ; s < loc_box.size() ; s++)
diff --git a/src/Graph/CartesianGraphFactory.hpp b/src/Graph/CartesianGraphFactory.hpp
index 500ad2c201b703656aa5e7cee998436627c326e8..610c497614eadf068168cb99b72ceddf56b33cf3 100644
--- a/src/Graph/CartesianGraphFactory.hpp
+++ b/src/Graph/CartesianGraphFactory.hpp
@@ -8,8 +8,8 @@
 #ifndef CARTESIANGRAPHFACTORY_HPP_
 #define CARTESIANGRAPHFACTORY_HPP_
 
-#include "map_vector.hpp"
-#include "map_graph.hpp"
+#include "Vector/map_vector.hpp"
+#include "Graph/map_graph.hpp"
 #include "Grid/grid.hpp"
 #include "Space/Shape/Box.hpp"
 #include "Space/Shape/HyperCube.hpp"
diff --git a/src/Grid/grid_dist_id.hpp b/src/Grid/grid_dist_id.hpp
index 27cf22d1fcf0aa6c6eb13e2b4378397b71384e5e..d23dc5c69d39de74e938782d82eeb5927b3d58df 100644
--- a/src/Grid/grid_dist_id.hpp
+++ b/src/Grid/grid_dist_id.hpp
@@ -223,6 +223,17 @@ public:
 	~grid_dist_id()
 	{
 	}
+
+	/*! \brief Get the Virtual Cluster machine
+	 *
+	 * \return the Virtual cluster machine
+	 *
+	 */
+
+	Vcluster & getVC()
+	{
+		return v_cl;
+	}
 };
 
 #endif
diff --git a/src/Grid/grid_dist_id_unit_test.hpp b/src/Grid/grid_dist_id_unit_test.hpp
index d4bc120890122230e9f49c2bb655cae738104665..4f373a746aeb54e6b4824b1c1149fe34af6602c6 100644
--- a/src/Grid/grid_dist_id_unit_test.hpp
+++ b/src/Grid/grid_dist_id_unit_test.hpp
@@ -55,6 +55,14 @@ BOOST_AUTO_TEST_CASE( grid_dist_id_iterator_test_use)
 		++dom;
 	}
 
+	// Get the virtual cluster machine
+	Vcluster & vcl = g_dist.getVC();
+
+	// reduce
+	vcl.reduce(count);
+	vcl.execute();
+
+	// Check
 	BOOST_REQUIRE_EQUAL(count,1024*1024);
 
 /*	auto g_it = g_dist.getIteratorBulk();
diff --git a/src/dec_optimizer.hpp b/src/dec_optimizer.hpp
index 383253bdd836953d00d8e0b3d81c768553d66526..baf34e0b5d7e5ec53aa6aa89468be7c644f21d0d 100644
--- a/src/dec_optimizer.hpp
+++ b/src/dec_optimizer.hpp
@@ -181,7 +181,8 @@ private:
 
 		for (size_t j = 0 ; j < domains.size() ; j++)
 		{
-			if (graph.vertex(domains.get(j)).template get<p_sub>() < 0)
+			long int gs = graph.vertex(domains.get(j)).template get<p_sub>();
+			if (gs < 0)
 			{
 				// not assigned push it
 
@@ -217,13 +218,17 @@ private:
 
 				// get the vertex and if does not have a sub-id and is assigned ...
 
-				if (graph.vertex(gh.LinId(gk)).template get<p_sub>() < 0)
+				long int pid = graph.vertex(gh.LinId(gk)).template get<p_sub>();
+
+				if (pid < 0)
 				{
 					// ... and the p_id different from -1
 					if (pr_id != -1)
 					{
 						// ... and the processor id of the sub-domain match p_id, add to the queue
-						if ( pr_id == graph.vertex(gh.LinId(gk)).template get<p_id>() )
+
+						long int pp_id = graph.vertex(gh.LinId(gk)).template get<p_id>();
+						if ( pr_id == pp_id)
 							domains_new.add(gh.LinId(gk));
 					}
 					else
@@ -296,8 +301,11 @@ private:
 					// we get the processor id of the neighborhood sub-domain on direction d
 					size_t exp_p = graph.vertex(sub_w_e).template get<p_id>();
 
-					// we check if it is the same processor id
-					w_can_expand &= exp_p == domain_id;
+					// Check if already assigned
+					long int ass = graph.vertex(sub_w_e).template get<p_sub>();
+
+					// we check if it is the same processor id ans is not assigned
+					w_can_expand &= ((exp_p == domain_id) & (ass < 0));
 
 					// next domain
 					++it;
@@ -376,8 +384,9 @@ private:
 		}
 	}
 
-	/*! \brief Initialize the wavefront
+	/*! \brief Initialize the wavefronts
 	 *
+	 * \param starting point of the wavefront set
 	 * \param v_w Wavefront to initialize
 	 *
 	 */
@@ -395,6 +404,55 @@ private:
 		}
 	}
 
+	/*! \brief Get the first seed
+	 *
+	 * search in the graph for one sub-domain labelled with processor id
+	 * to use as seed
+	 *
+	 * \tparam p_id property in the graph storing the sub-domain id
+	 *
+	 * \param Graph graph
+	 * \param id processor id
+	 *
+	 */
+
+	template<unsigned int p_id> grid_key_dx<dim> search_first_seed(Graph & graph, long int id)
+	{
+		// if no processor is selected return the first point
+		if (id < -1)
+		{
+			grid_key_dx<dim> key;
+			key.zero();
+
+			return key;
+		}
+
+		// Create a grid iterator
+		grid_key_dx_iterator<dim> g_sub(gh);
+
+		// iterate through all grid points
+
+		while (g_sub.isNext())
+		{
+			// get the actual key
+			const grid_key_dx<dim> & gk = g_sub.get();
+
+			// if the subdomain has the id we are searching stop
+			if (graph.vertex(gh.LinId(gk)).template get<p_id>() == id)
+			{
+				return gk;
+			}
+
+			++g_sub;
+		}
+
+		// If not found return an invalid key
+		grid_key_dx<dim> key;
+		key.invalid();
+
+		return key;
+	}
+
 public:
 
 	/*! \brief Constructor
@@ -421,7 +479,7 @@ public:
 	 * \tparam j property containing the decomposition
 	 * \tparam i property to fill with the sub-decomposition
 	 *
-	 * \param Seed point
+	 * \param start_p seed point
 	 * \param graph we are processing
 	 *
 	 */
@@ -445,7 +503,32 @@ public:
 	 * \tparam j property containing the decomposition
 	 * \tparam i property to fill with the sub-decomposition
 	 *
-	 * \param Seed point
+	 * \param graph we are processing
+	 * \param p_id Processor id (if p_id == -1 the optimization is done for all the processors)
+	 * \param list of sub-domain boxes
+	 *
+	 */
+
+	template <unsigned int p_sub, unsigned int p_id> void optimize(Graph & graph, long int pr_id, openfpm::vector<Box<dim,size_t>> & lb)
+	{
+		// search for the first seed
+		grid_key_dx<dim> key_seed = search_first_seed<p_id>(graph,pr_id);
+
+		// optimize
+		optimize<p_sub,p_id>(key_seed,graph,pr_id,lb);
+	}
+
+	/*! \brief optimize the graph
+	 *
+	 * Starting from a domain (hyper-cubic), it create wavefront at the boundary and expand
+	 * the boundary until the wavefronts cannot expand any more.
+	 * To the domains inside the hyper-cube one sub-id is assigned. This procedure continue until
+	 * all the domain of one p_id has a sub-id
+	 *
+	 * \tparam j property containing the decomposition
+	 * \tparam i property to fill with the sub-decomposition
+	 *
+	 * \param start_p seed point
 	 * \param graph we are processing
 	 * \param p_id Processor id (if p_id == -1 the optimization is done for all the processors)
 	 * \param list of sub-domain boxes
diff --git a/src/dec_optimizer_unit_test.hpp b/src/dec_optimizer_unit_test.hpp
index ec777e356bfd1111c2da90adeab0bae9d388c388..e17f6f7a523e97a27c8fddf70a570e3c30f00e02 100644
--- a/src/dec_optimizer_unit_test.hpp
+++ b/src/dec_optimizer_unit_test.hpp
@@ -10,7 +10,7 @@
 
 
 #include "Graph/CartesianGraphFactory.hpp"
-#include "map_graph.hpp"
+#include "Graph/map_graph.hpp"
 #include "metis_util.hpp"
 #include "dec_optimizer.hpp"
 
diff --git a/src/main.cpp b/src/main.cpp
index f8e4edea184feffd8c4cfd6ecf710089595a6d0e..1de016b806338b131b552addb777895789a6b55f 100755
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -1,4 +1,5 @@
 #include <iostream>
+#include "config.h"
 #include "Graph/CartesianGraphFactory.hpp"
 
 #define BOOST_DISABLE_ASSERTS
@@ -14,7 +15,7 @@
 #include "Space/Shape/Box.hpp"
 #include "util.hpp"
 
-#include "CartesianGraphFactory_unit_test.hpp"
+#include "Graph/CartesianGraphFactory_unit_test.hpp"
 #include "metis_util_unit_test.hpp"
 #include "dec_optimizer_unit_test.hpp"
 #include "Grid/grid_dist_id_unit_test.hpp"
diff --git a/src/metis_util_unit_test.hpp b/src/metis_util_unit_test.hpp
index 961220ac30a09c7bef8e15ca43169d8cfcbb2d77..c09dd5667ce5457b912c2aa583ba738e9abdb317 100644
--- a/src/metis_util_unit_test.hpp
+++ b/src/metis_util_unit_test.hpp
@@ -9,7 +9,7 @@
 #define METIS_UTIL_UNIT_TEST_HPP_
 
 #include "Graph/CartesianGraphFactory.hpp"
-#include "map_graph.hpp"
+#include "Graph/map_graph.hpp"
 #include "metis_util.hpp"
 
 #undef GS_SIZE