diff --git a/src/Decomposition/BasicDecomposition.hpp b/src/Decomposition/BasicDecomposition.hpp
index e2f421cb89162f5a8c3ad1b7f26b7d1109849a64..462751f210122ac6137c6e0d5af2a9663ddd8149 100644
--- a/src/Decomposition/BasicDecomposition.hpp
+++ b/src/Decomposition/BasicDecomposition.hpp
@@ -129,6 +129,24 @@ private:
 	//! Cell-list that store the geometrical information of the local internal ghost boxes
 	CellList<dim, T, FAST> lgeo_cell;
 
+	//! Convert the graph to parmetis format
+	Parmetis<Graph_CSR<nm_v, nm_e>> parmetis_graph;
+	
+	//! Processor sub-sub-domain graph
+	Graph_CSR<nm_v, nm_e> sub_g;
+	
+	//! Global sub-sub-domain graph
+	Graph_CSR<nm_v, nm_e> gp;
+	
+	//! Init vtxdist needed for Parmetis
+	openfpm::vector<idx_t> vtxdist;
+
+	//! partitions
+	openfpm::vector<openfpm::vector<idx_t>> partitions;
+	
+	//! Init data structure to keep trace of new vertices distribution in processors (needed to update main graph)
+	openfpm::vector<openfpm::vector<size_t>> v_per_proc;
+	
 	static void * message_receive(size_t msg_i ,size_t total_msg, size_t total_p, size_t i, size_t ri, void * ptr)
 	{
 		openfpm::vector<openfpm::vector<idx_t>> * v = static_cast<openfpm::vector<openfpm::vector<idx_t>> *>(ptr);
@@ -161,7 +179,7 @@ private:
 			spacing[i] = (bs.getHigh(i) - bs.getLow(i)) / gr.size(i);
 		}
 
-		//! MPI initialization
+		//! Get the processor id
 		size_t p_id = v_cl.getProcessUnitID();
 
 		// Here we use PARMETIS
@@ -169,7 +187,7 @@ private:
 		CartesianGraphFactory<dim, Graph_CSR<nm_v, nm_e>> g_factory_part;
 
 		// Processor graph
-		Graph_CSR<nm_v, nm_e> gp = g_factory_part.template construct<NO_EDGE, nm_v::id, T, dim - 1, 0, 1, 2>(gr.getSize(), domain);
+		gp = g_factory_part.template construct<NO_EDGE, nm_v::id, T, dim - 1, 0, 1, 2>(gr.getSize(), domain);
 
 		//! Add computation information to each vertex (shape can change)
 		addWeights(gp, HYPERBOLOID);
@@ -186,9 +204,6 @@ private:
 		size_t mod_v = gp.getNVertex() % Np;
 		size_t div_v = gp.getNVertex() / Np;
 
-		//! Init vtxdist needed for Parmetis
-		openfpm::vector<idx_t> vtxdist(Np + 1);
-
 		for (int i = 0; i <= Np; i++) 
 		{
 			if (i < mod_v)
@@ -204,24 +219,21 @@ private:
 		}
 
 		//TODO transform in factory
-		//! Create subgraph
-		Graph_CSR<nm_v, nm_e> sub_g;
+
 
 		//! Put vertices into processor graph (different for each processor)
 		fillSubGraph(sub_g, gp, vtxdist, p_id, Np);
 
-		// Convert the graph to parmetis format
-		Parmetis<Graph_CSR<nm_v, nm_e>> parmetis_graph(sub_g, Np);
-
+		parmetis_graph.initSubGraph(sub_g);
+		
 		// Decompose
-		parmetis_graph.decompose<nm_v::proc_id>(vtxdist);
+		parmetis_graph.decompose<nm_v::proc_id>(vtxdist,sub_g);
 
 		// Get result partition for this processors
 		idx_t *partition = parmetis_graph.getPartition();
 
 		// Prepare vector of arrays to contain all partitions
 		
-		openfpm::vector<openfpm::vector<idx_t>> partitions(Np);
 		partitions.get(p_id).resize(sub_g.getNVertex());
 		std::copy(partition,partition+sub_g.getNVertex(),&partitions.get(p_id).get(0));
 		
@@ -240,9 +252,6 @@ private:
 		}
 		
 		v_cl.sendrecvMultipleMessagesNBX(prc.size(),&sz.get(0),&prc.get(0),&ptr.get(0),message_receive,&partitions,NONE);
-
-		// Init data structure to keep trace of new vertices distribution in processors (needed to update main graph)
-		openfpm::vector<openfpm::vector<size_t>> v_per_proc(Np);
 		
 		// Update graphs with the new distributions
 		updateGraphs(partitions, gp, sub_g, v_per_proc, vtxdist, p_id, Np);
@@ -270,60 +279,13 @@ private:
 			}
 		}
 
-		if (p_id == 0) {
+		if (p_id == 0) 
+		{
 			VTKWriter<Graph_CSR<nm_v, nm_e>, GRAPH> gv2(gp);
 			gv2.write("test_graph_3.vtk");
 		}
-
-		// Reset parmetis graph and reconstruct it
-		parmetis_graph.reset(gp);
-
-		// Refine
-		parmetis_graph.refine<nm_v::proc_id>(vtxdist);
-
-		// Get result partition for this processor
-		partition = parmetis_graph.getPartition();
-
-		partitions.get(p_id).resize(sub_g.getNVertex());
-		std::copy(partition,partition+sub_g.getNVertex(),&partitions.get(p_id).get(0));
-
-		// Reset data structure to keep trace of new vertices distribution in processors (needed to update main graph)
-		for (int i = 0; i < Np; ++i) 
-		{
-			v_per_proc.get(i).clear();
-		}
-
-		prc.clear();
-		sz.clear();
-		ptr.clear();
-
-		for (size_t i = 0 ; i < Np ; i++)
-		{
-			if (i != v_cl.getProcessUnitID())
-			{
-			  partitions.get(i).clear();
-			  prc.add(i);
-			  sz.add(sub_g.getNVertex() * sizeof(idx_t));
-
-//			  std::cout << "sub_g: " << sub_g.getNVertex() * sizeof(idx_t) << "\n";
-			  
-			  ptr.add(partitions.get(p_id).getPointer());
-			}
-		}
-		
-		v_cl.sendrecvMultipleMessagesNBX(prc.size(),&sz.get(0),&prc.get(0),&ptr.get(0),message_receive,&partitions,NONE);
-		
-		// Update graphs with the new distributions
-		updateGraphs(partitions, gp, sub_g, v_per_proc, vtxdist, p_id, Np);
 		
-		//
-
-		if (p_id == 1) {
-			VTKWriter<Graph_CSR<nm_v, nm_e>, GRAPH> gv2(gp);
-			gv2.write("test_graph_4.vtk");
-			bool test = compare("test_graph_4.vtk","test_graph_test.vtk");
-			BOOST_REQUIRE_EQUAL(test,true);
-		}
+		refine();
 
 		/*
 		 // fill the structure that store the processor id for each sub-domain
@@ -445,6 +407,64 @@ private:
 	// Save the ghost boundaries
 	Ghost<dim, T> ghost;
 
+	void refine()
+	{
+		size_t Np = v_cl.getProcessingUnits();
+		size_t p_id = v_cl.getProcessUnitID();
+	  
+		// Reset parmetis graph and reconstruct it
+		parmetis_graph.reset(gp,sub_g);
+
+		// Refine
+		parmetis_graph.refine<nm_v::proc_id>(vtxdist,sub_g);
+
+		// Get result partition for this processor
+		idx_t * partition = parmetis_graph.getPartition();
+
+		partitions.get(p_id).resize(sub_g.getNVertex());
+		std::copy(partition,partition+sub_g.getNVertex(),&partitions.get(p_id).get(0));
+
+		// Reset data structure to keep trace of new vertices distribution in processors (needed to update main graph)
+		for (int i = 0; i < Np; ++i) 
+		{
+			v_per_proc.get(i).clear();
+		}
+
+		
+		openfpm::vector<size_t> prc;
+		openfpm::vector<size_t> sz;
+		openfpm::vector<void *> ptr;
+
+		for (size_t i = 0 ; i < Np ; i++)
+		{
+			if (i != v_cl.getProcessUnitID())
+			{
+			  partitions.get(i).clear();
+			  prc.add(i);
+			  sz.add(sub_g.getNVertex() * sizeof(idx_t));
+
+//			  std::cout << "sub_g: " << sub_g.getNVertex() * sizeof(idx_t) << "\n";
+			  
+			  ptr.add(partitions.get(p_id).getPointer());
+			}
+		}
+		
+		v_cl.sendrecvMultipleMessagesNBX(prc.size(),&sz.get(0),&prc.get(0),&ptr.get(0),message_receive,&partitions,NONE);
+		
+		// Update graphs with the new distributions
+		updateGraphs(partitions, gp, sub_g, v_per_proc, vtxdist, p_id, Np);
+		
+		//
+
+		if (p_id == 1) 
+		{
+			VTKWriter<Graph_CSR<nm_v, nm_e>, GRAPH> gv2(gp);
+			gv2.write("test_graph_4.vtk");
+			bool test = compare("test_graph_4.vtk","test_graph_test.vtk");
+			BOOST_REQUIRE_EQUAL(test,true);
+		}
+	}
+	
 	/*! \brief Create the subspaces that decompose your domain
 	 *
 	 */
@@ -715,7 +735,8 @@ public:
 	 *
 	 */
 	BasicDecomposition(Vcluster & v_cl) :
-			nn_prcs<dim, T>(v_cl), v_cl(v_cl) {
+	nn_prcs<dim, T>(v_cl), v_cl(v_cl),parmetis_graph(v_cl, v_cl.getProcessingUnits()),vtxdist(v_cl.getProcessingUnits() + 1),partitions(v_cl.getProcessingUnits()),v_per_proc(v_cl.getProcessingUnits())
+	{
 		// Reset the box to zero
 		bbox.zero();
 	}
diff --git a/src/parmetis_util.hpp b/src/parmetis_util.hpp
index b9362cadd11da45c1bdf621840c26e0aca3564c4..d37b309e892e468d442f6fed2f069ee41f95eabd 100644
--- a/src/parmetis_util.hpp
+++ b/src/parmetis_util.hpp
@@ -11,6 +11,7 @@
 #include <iostream>
 #include "parmetis.h"
 #include "VTKWriter.hpp"
+#include "VCluster.hpp"
 
 /*! \brief Metis graph structure
  *
@@ -86,6 +87,8 @@ struct Parmetis_graph {
 
 /*! \brief Helper class to define Metis graph
  *
+ *  TODO Transform pointer to openfpm vector
+ * 
  * \tparam graph structure that store the graph
  *
  */
@@ -95,20 +98,27 @@ class Parmetis {
 	Parmetis_graph Mg;
 
 	// Original graph
-	Graph & g;
+//	Graph & g;
 
 	// Communticator for OpenMPI
 	MPI_Comm comm = NULL;
 
+	// VCluster
+	Vcluster & v_cl;
+	
 	// Process rank information
-	int MPI_PROC_ID = 0;
+	int p_id = 0;
 
+	// nc Number of partition
+	size_t nc = 0;
+	
 	/*! \brief Construct Adjacency list
 	 *
 	 * \param g Reference graph to get informations
 	 *
 	 */
-	void constructAdjList(Graph &refGraph) {
+	void constructAdjList(Graph &refGraph, Graph & g)
+	{
 		// create xadj and adjlist
 		Mg.vwgt = new idx_t[g.getNVertex()];
 		Mg.xadj = new idx_t[g.getNVertex() + 1];
@@ -158,8 +168,8 @@ class Parmetis {
 		// Fill the last point
 		Mg.xadj[id] = prev;
 
-		/*
-		 std::cout << MPI_PROC_ID << "\n";
+		
+/*		 std::cout << p_id << "\n";
 		 for(int i=0; i<= g.getNVertex();i++){
 		 std::cout << Mg.xadj[i] << " ";
 		 }
@@ -167,8 +177,8 @@ class Parmetis {
 		 for(int i=0; i< g.getNEdge();i++){
 		 std::cout << Mg.adjncy[i] << " ";
 		 }
-		 std::cout << "\n\n";
-		 */
+		 std::cout << "\n\n";*/
+		 
 
 	}
 
@@ -205,7 +215,7 @@ class Parmetis {
 				size_t child = refGraph.getChild(i, s);
 
 				// Check if child is not in this processor
-				if(child > old_vtxdist[MPI_PROC_ID+1] || child < old_vtxdist[MPI_PROC_ID])
+				if(child > old_vtxdist[p_id+1] || child < old_vtxdist[p_id])
 
 				Mg.adjncy[prev + s] = child;
 			}
@@ -220,7 +230,7 @@ class Parmetis {
 		Mg.xadj[id] = prev;
 
 
-		 std::cout << MPI_PROC_ID << "\n";
+		 std::cout << p_id << "\n";
 		 for(int i=0; i<= g.getNVertex();i++){
 		 std::cout << Mg.xadj[i] << " ";
 		 }
@@ -243,84 +253,13 @@ public:
 	 * \param nc number of partitions
 	 *
 	 */
-	Parmetis(Graph & g, size_t nc) :
-			g(g) {
-		// Init OpenMPI structures
-
+	Parmetis(Vcluster & v_cl, size_t nc)
+	:v_cl(v_cl),nc(nc)
+	{
+		// TODO Move into VCluster
 		MPI_Comm_dup(MPI_COMM_WORLD, &comm);
-		MPI_Comm_rank(MPI_COMM_WORLD, &MPI_PROC_ID);
-
-		// Get the number of vertex
-
-		Mg.nvtxs = new idx_t[1];
-		Mg.nvtxs[0] = g.getNVertex();
-
-		// Set the number of constrains
-
-		Mg.ncon = new idx_t[1];
-		Mg.ncon[0] = 1;
-
-		// Set to null the weight of the vertex
-
-		Mg.vwgt = NULL;
-
-		// construct the adjacency list
-
-		constructAdjList(g);
-
-		// Put the total communication size to NULL
-
-		Mg.vsize = NULL;
-
-		// Set to null the weight of the edge
-
-		Mg.adjwgt = NULL;
-
-		// Set the total number of partitions
-
-		Mg.nparts = new idx_t[1];
-		Mg.nparts[0] = nc;
-
-		//! Set option for the graph partitioning (set as default)
-
-		Mg.options = new idx_t[4];
-		Mg.options[0] = 1;
-		Mg.options[1] = 3;
-		Mg.options[2] = 0;
-		Mg.options[3] = 0;
-
-		//! is an output vector containing the partition for each vertex
-
-		Mg.part = new idx_t[g.getNVertex()];
-		for (int i = 0; i < g.getNVertex(); i++)
-			Mg.part[i] = MPI_PROC_ID;
-
-		//! adaptiveRepart itr value
-		Mg.itr = new real_t[1];
-		Mg.itr[0] = 1000.0;
-
-		//! init tpwgts to have balanced vertices and ubvec
-
-		Mg.tpwgts = new real_t[Mg.nparts[0]];
-		Mg.ubvec = new real_t[Mg.nparts[0]];
-
-		for (int s = 0; s < Mg.nparts[0]; s++) {
-			Mg.tpwgts[s] = 1.0 / Mg.nparts[0];
-			Mg.ubvec[s] = 1.05;
-		}
-
-		Mg.edgecut = new idx_t[1];
-		Mg.edgecut[0] = 0;
-
-		//! This is used to indicate the numbering scheme that is used for the vtxdist, xadj, adjncy, and part arrays. (0 for C-style, start from 0 index)
-		Mg.numflag = new idx_t[1];
-		Mg.numflag[0] = 0;
-
-		//! This is used to indicate if the graph is weighted. wgtflag can take one of four values:
-		Mg.wgtflag = new idx_t[1];
-		Mg.wgtflag[0] = 2;
 	}
-
+	
 	//TODO deconstruct new variables
 	/*! \brief destructor
 	 *
@@ -386,14 +325,96 @@ public:
 		}
 	}
 
+	/*! \brief Set the Sub-graph
+	 * 
+	 * \param sub_g Sub-graph to set
+	 * 
+	 */
+	void initSubGraph(Graph & sub_g)
+	{
+		p_id = v_cl.getProcessUnitID();
+
+		// Get the number of vertex
+
+		Mg.nvtxs = new idx_t[1];
+		Mg.nvtxs[0] = sub_g.getNVertex();
+
+		// Set the number of constrains
+
+		Mg.ncon = new idx_t[1];
+		Mg.ncon[0] = 1;
+
+		// Set to null the weight of the vertex
+
+		Mg.vwgt = NULL;
+
+		// construct the adjacency list
+
+		constructAdjList(sub_g,sub_g);
+
+		// Put the total communication size to NULL
+
+		Mg.vsize = NULL;
+
+		// Set to null the weight of the edge
+
+		Mg.adjwgt = NULL;
+
+		// Set the total number of partitions
+
+		Mg.nparts = new idx_t[1];
+		Mg.nparts[0] = nc;
+
+		//! Set option for the graph partitioning (set as default)
+
+		Mg.options = new idx_t[4];
+		Mg.options[0] = 1;
+		Mg.options[1] = 3;
+		Mg.options[2] = 0;
+		Mg.options[3] = 0;
+
+		//! is an output vector containing the partition for each vertex
+
+		Mg.part = new idx_t[sub_g.getNVertex()];
+		for (int i = 0; i < sub_g.getNVertex(); i++)
+			Mg.part[i] = p_id;
+
+		//! adaptiveRepart itr value
+		Mg.itr = new real_t[1];
+		Mg.itr[0] = 1000.0;
+
+		//! init tpwgts to have balanced vertices and ubvec
+
+		Mg.tpwgts = new real_t[Mg.nparts[0]];
+		Mg.ubvec = new real_t[Mg.nparts[0]];
+
+		for (int s = 0; s < Mg.nparts[0]; s++) {
+			Mg.tpwgts[s] = 1.0 / Mg.nparts[0];
+			Mg.ubvec[s] = 1.05;
+		}
+
+		Mg.edgecut = new idx_t[1];
+		Mg.edgecut[0] = 0;
+
+		//! This is used to indicate the numbering scheme that is used for the vtxdist, xadj, adjncy, and part arrays. (0 for C-style, start from 0 index)
+		Mg.numflag = new idx_t[1];
+		Mg.numflag[0] = 0;
+
+		//! This is used to indicate if the graph is weighted. wgtflag can take one of four values:
+		Mg.wgtflag = new idx_t[1];
+		Mg.wgtflag[0] = 2;
+	}
+	
 	/*! \brief Decompose the graph
 	 *
 	 * \tparam i which property store the decomposition
 	 *
+	 * 
+	 * 
 	 */
-
 	template<unsigned int i>
-	void decompose(openfpm::vector<idx_t> & vtxdist) {
+	void decompose(openfpm::vector<idx_t> & vtxdist, Graph & sub_g) 
+	{
 
 		// Decompose
 		ParMETIS_V3_PartKway((idx_t *)vtxdist.getPointer(), Mg.xadj, Mg.adjncy, Mg.vwgt, Mg.adjwgt, Mg.wgtflag, Mg.numflag, Mg.ncon, Mg.nparts, Mg.tpwgts,
@@ -405,9 +426,9 @@ public:
 		 */
 
 		// For each vertex store the processor that contain the data
-		for (size_t j = 0, id = 0; j < g.getNVertex(); j++, id++) {
+		for (size_t j = 0, id = 0; j < sub_g.getNVertex() ; j++, id++) {
 
-			g.vertex(j).template get<i>() = Mg.part[id];
+			sub_g.vertex(j).template get<i>() = Mg.part[id];
 
 		}
 
@@ -420,18 +441,18 @@ public:
 	 */
 
 	template<unsigned int i>
-	void refine(openfpm::vector<idx_t> & vtxdist) 
+	void refine(openfpm::vector<idx_t> & vtxdist, Graph & sub_g) 
 	{
 		// Refine
-
+	  
 		ParMETIS_V3_RefineKway((idx_t *)vtxdist.getPointer(), Mg.xadj, Mg.adjncy, Mg.vwgt, Mg.adjwgt, Mg.wgtflag, Mg.numflag, Mg.ncon, Mg.nparts, Mg.tpwgts,
 				Mg.ubvec, Mg.options, Mg.edgecut, Mg.part, &comm);
 
 		// For each vertex store the processor that contain the data
 
-		for (size_t j = 0, id = 0; j < g.getNVertex(); j++, id++) {
+		for (size_t j = 0, id = 0; j < sub_g.getNVertex(); j++, id++) {
 
-			g.vertex(j).template get<i>() = Mg.part[id];
+			sub_g.vertex(j).template get<i>() = Mg.part[id];
 
 		}
 	}
@@ -446,7 +467,8 @@ public:
 	/*! \brief Reset graph and reconstruct it
 	 *
 	 */
-	void reset(Graph & mainGraph) {
+	void reset(Graph & mainGraph, Graph & sub_g) 
+	{
 		// Deallocate the graph structures
 
 		if (Mg.xadj != NULL) {
@@ -469,16 +491,15 @@ public:
 			delete[] Mg.part;
 		}
 
-		Mg.nvtxs[0] = g.getNVertex();
+		Mg.nvtxs[0] = sub_g.getNVertex();
 
-		Mg.part = new idx_t[g.getNVertex()];
+		Mg.part = new idx_t[sub_g.getNVertex()];
 
-		for (int i = 0; i < g.getNVertex(); i++)
-			Mg.part[i] = MPI_PROC_ID;
+		for (int i = 0; i < sub_g.getNVertex(); i++)
+			Mg.part[i] = p_id;
 
 		// construct the adjacency list
-		constructAdjList(mainGraph);
-
+		constructAdjList(mainGraph,sub_g);
 	}
 
 };