diff --git a/example/Vector/7_SPH_dlb/main.cpp b/example/Vector/7_SPH_dlb/main.cpp
index c8c2040803c585e0af4f256264dbd7e85bf58a92..83c5a0a065e193a6d27a433f8dc49761ba899989 100644
--- a/example/Vector/7_SPH_dlb/main.cpp
+++ b/example/Vector/7_SPH_dlb/main.cpp
@@ -22,6 +22,9 @@
  *
  */
 
+//#define SE_CLASS1
+//#define STOP_ON_ERROR
+
 //! \cond [inclusion] \endcond
 #include "Vector/vector_dist.hpp"
 #include <math.h>
@@ -1096,7 +1099,7 @@ int main(int argc, char* argv[])
 
 	//! \cond [load balancing] \endcond
 
-//	vd.addComputationCosts(md);
+	vd.addComputationCosts(md);
 //	vd.getDecomposition().getDistribution().write("AFTER_DECOMPOSE1");
 
 //	vd.getDecomposition().rebalance(1);
@@ -1132,6 +1135,9 @@ int main(int argc, char* argv[])
 
 	//! \cond [main loop] \endcond
 
+	double time_mean = 0.0;
+	double time_min = 1000.0;
+	double time_max = 0.0;
 	size_t write = 0;
 	size_t it = 0;
 	size_t it_reb = 0;
@@ -1147,16 +1153,31 @@ int main(int argc, char* argv[])
 		{
 			vd.map();
 
-			it_reb = 0;
+			vd.getDecomposition().write("DLB_BEFORE_");
+//			it_reb = 0;
 			ModelCustom md;
 			vd.addComputationCosts(md);
-			vd.getDecomposition().rebalance(1);
+			vd.getDecomposition().decompose();
 
 			if (v_cl.getProcessUnitID() == 0)
 				std::cout << "REBALANCED " << std::endl;
 		}
 
 		vd.map();
+
+
+		if (it_reb == 200)
+		{
+			vd.getDecomposition().write("DLB_AFTER_");
+
+			it_reb = 0;
+			vd.addComputationCosts(md);
+			std::cout << "PROCESSOR LOAD: " << vd.getDecomposition().getDistribution().getProcessorLoad() << "   MEAN: " << time_mean / 200 << "     MIN: " << time_min << "      MAX: " << time_max  << std::endl;
+			time_mean = 0.0;
+			time_min = 1000.0;
+			time_max = 0.0;
+		}
+
 		vd.ghost_get<type,rho,Pressure,velocity>();
 
 		// Calculate pressure from the density
@@ -1173,6 +1194,9 @@ int main(int argc, char* argv[])
 		// Get the maximum viscosity term across processors
 		v_cl.max(max_visc);
 		v_cl.execute();
+		time_mean += it_time.getwct();
+		time_min = std::min(time_min,it_time.getwct());
+		time_max = std::max(time_max,it_time.getwct());
 
 		// Calculate delta t integration
 		double dt = calc_deltaT(vd,max_visc);
diff --git a/src/Decomposition/Distribution/ParMetisDistribution.hpp b/src/Decomposition/Distribution/ParMetisDistribution.hpp
index 0c3c09182c5087035fb92fb3bb2ff2b2413bbc54..d0b9b0c24dd569ac56dada1530c379ade65c0d52 100644
--- a/src/Decomposition/Distribution/ParMetisDistribution.hpp
+++ b/src/Decomposition/Distribution/ParMetisDistribution.hpp
@@ -34,6 +34,9 @@
 template<unsigned int dim, typename T>
 class ParMetisDistribution
 {
+	//! Is distributed
+	bool is_distributed = false;
+
 	//! Vcluster
 	Vcluster & v_cl;
 
@@ -207,6 +210,100 @@ class ParMetisDistribution
 		return &(v->get(i).get(0));
 	}
 
+	/*! \brief It update the full decomposition
+	 *
+	 *
+	 */
+	void postDecomposition()
+	{
+		//! Get the processor id
+		size_t p_id = v_cl.getProcessUnitID();
+
+		//! Get the number of processing units
+		size_t Np = v_cl.getProcessingUnits();
+
+		// Number of local vertex
+		size_t nl_vertex = vtxdist.get(p_id+1).id - vtxdist.get(p_id).id;
+
+		//! Get result partition for this processors
+		idx_t * partition = parmetis_graph.getPartition();
+
+		//! Prepare vector of arrays to contain all partitions
+		partitions.get(p_id).resize(nl_vertex);
+		std::copy(partition, partition + nl_vertex, &partitions.get(p_id).get(0));
+
+		// Reset data structure to keep trace of new vertices distribution in processors (needed to update main graph)
+		for (size_t i = 0; i < Np; ++i)
+		{
+			v_per_proc.get(i).clear();
+		}
+
+		// Communicate the local distribution to the other processors
+		// to reconstruct individually the global graph
+		openfpm::vector<size_t> prc;
+		openfpm::vector<size_t> sz;
+		openfpm::vector<void *> ptr;
+
+		for (size_t i = 0; i < Np; i++)
+		{
+			if (i != v_cl.getProcessUnitID())
+			{
+				partitions.get(i).clear();
+				prc.add(i);
+				sz.add(nl_vertex * sizeof(idx_t));
+				ptr.add(partitions.get(p_id).getPointer());
+			}
+		}
+
+		if (prc.size() == 0)
+			v_cl.sendrecvMultipleMessagesNBX(0, NULL, NULL, NULL, message_receive, &partitions,NONE);
+		else
+			v_cl.sendrecvMultipleMessagesNBX(prc.size(), &sz.get(0), &prc.get(0), &ptr.get(0), message_receive, &partitions,NONE);
+
+		// Update graphs with the received data
+		updateGraphs();
+
+
+		/////////////////////////////////////////
+
+
+		// Get result partition for this processor
+/*		idx_t * partition = parmetis_graph.getPartition();
+
+		//! Prepare vector of arrays to contain all partitions
+		partitions.get(p_id).resize(nl_vertex.id);
+		std::copy(partition, partition + nl_vertex.id, &partitions.get(p_id).get(0));
+
+		// Reset data structure to keep trace of new vertices distribution in processors (needed to update main graph)
+		for (size_t i = 0; i < Np; ++i)
+		{
+			v_per_proc.get(i).clear();
+		}
+
+		openfpm::vector<size_t> prc;
+		openfpm::vector<size_t> sz;
+		openfpm::vector<void *> ptr;
+
+		for (size_t i = 0; i < Np; i++)
+		{
+			if (i != v_cl.getProcessUnitID())
+			{
+				partitions.get(i).clear();
+				prc.add(i);
+				sz.add(nl_vertex.id * sizeof(idx_t));
+				ptr.add(partitions.get(p_id).getPointer());
+			}
+		}
+
+		// Exchange informations through processors
+		v_cl.sendrecvMultipleMessagesNBX(prc.size(), &sz.get(0), &prc.get(0), &ptr.get(0), message_receive, &partitions,
+		NONE);
+
+		// Update graphs with the new distributions
+		updateGraphs();*/
+	}
+
+
 public:
 
 	/*! Constructor for the ParMetis class
@@ -214,7 +311,7 @@ public:
 	 * \param v_cl Vcluster to use as communication object in this class
 	 */
 	ParMetisDistribution(Vcluster & v_cl)
-	:v_cl(v_cl), parmetis_graph(v_cl, v_cl.getProcessingUnits()), vtxdist(v_cl.getProcessingUnits() + 1), partitions(v_cl.getProcessingUnits()), v_per_proc(v_cl.getProcessingUnits())
+	:is_distributed(false),v_cl(v_cl), parmetis_graph(v_cl, v_cl.getProcessingUnits()), vtxdist(v_cl.getProcessingUnits() + 1), partitions(v_cl.getProcessingUnits()), v_per_proc(v_cl.getProcessingUnits())
 	{
 	}
 
@@ -305,51 +402,18 @@ public:
 	 */
 	void decompose()
 	{
-
-		//! Get the processor id
-		size_t p_id = v_cl.getProcessUnitID();
-
-		//! Get the number of processing units
-		size_t Np = v_cl.getProcessingUnits();
-
-		// Number of local vertex
-		size_t nl_vertex = vtxdist.get(p_id+1).id - vtxdist.get(p_id).id;
-
-		parmetis_graph.initSubGraph(gp, vtxdist, m2g, verticesGotWeights);
+		if (is_distributed == false)
+			parmetis_graph.initSubGraph(gp, vtxdist, m2g, verticesGotWeights);
+		else
+			parmetis_graph.reset(gp, vtxdist, m2g, verticesGotWeights);
 
 		//! Decompose
 		parmetis_graph.decompose<nm_v::proc_id>(vtxdist);
 
-		//! Get result partition for this processors
-		idx_t *partition = parmetis_graph.getPartition();
-
-		//! Prepare vector of arrays to contain all partitions
-		partitions.get(p_id).resize(nl_vertex);
-		std::copy(partition, partition + nl_vertex, &partitions.get(p_id).get(0));
-
-		// Communicate the local distribution to the other processors
-		// to reconstruct individually the global graph
-		openfpm::vector<size_t> prc;
-		openfpm::vector<size_t> sz;
-		openfpm::vector<void *> ptr;
+		// update after decomposition
+		postDecomposition();
 
-		for (size_t i = 0; i < Np; i++)
-		{
-			if (i != v_cl.getProcessUnitID())
-			{
-				prc.add(i);
-				sz.add(nl_vertex * sizeof(idx_t));
-				ptr.add(partitions.get(p_id).getPointer());
-			}
-		}
-
-		if (prc.size() == 0)
-			v_cl.sendrecvMultipleMessagesNBX(0, NULL, NULL, NULL, message_receive, &partitions,NONE);
-		else
-			v_cl.sendrecvMultipleMessagesNBX(prc.size(), &sz.get(0), &prc.get(0), &ptr.get(0), message_receive, &partitions,NONE);
-
-		// Update graphs with the received data
-		updateGraphs();
+		is_distributed = true;
 	}
 
 	/*! \brief Refine current decomposition
@@ -360,51 +424,13 @@ public:
 	 */
 	void refine()
 	{
-		size_t Np = v_cl.getProcessingUnits();
-		size_t p_id = v_cl.getProcessUnitID();
-
-		// Number of local vertex
-		rid nl_vertex = vtxdist.get(p_id+1) - vtxdist.get(p_id);
-
 		// Reset parmetis graph and reconstruct it
 		parmetis_graph.reset(gp, vtxdist, m2g, verticesGotWeights);
 
 		// Refine
 		parmetis_graph.refine<nm_v::proc_id>(vtxdist);
 
-		// Get result partition for this processor
-		idx_t * partition = parmetis_graph.getPartition();
-
-		partitions.get(p_id).resize(nl_vertex.id);
-		std::copy(partition, partition + nl_vertex.id, &partitions.get(p_id).get(0));
-
-		// Reset data structure to keep trace of new vertices distribution in processors (needed to update main graph)
-		for (size_t i = 0; i < Np; ++i)
-		{
-			v_per_proc.get(i).clear();
-		}
-
-		openfpm::vector<size_t> prc;
-		openfpm::vector<size_t> sz;
-		openfpm::vector<void *> ptr;
-
-		for (size_t i = 0; i < Np; i++)
-		{
-			if (i != v_cl.getProcessUnitID())
-			{
-				partitions.get(i).clear();
-				prc.add(i);
-				sz.add(nl_vertex.id * sizeof(idx_t));
-				ptr.add(partitions.get(p_id).getPointer());
-			}
-		}
-
-		// Exchange informations through processors
-		v_cl.sendrecvMultipleMessagesNBX(prc.size(), &sz.get(0), &prc.get(0), &ptr.get(0), message_receive, &partitions,
-		NONE);
-
-		// Update graphs with the new distributions
-		updateGraphs();
+		postDecomposition();
 	}
 
 	/*! \brief Compute the unbalance of the processor compared to the optimal balance
@@ -574,6 +600,7 @@ public:
 
 	const ParMetisDistribution<dim,T> & operator=(const ParMetisDistribution<dim,T> & dist)
 	{
+		is_distributed = dist.is_distributed;
 		gr = dist.gr;
 		domain = dist.domain;
 		gp = dist.gp;
@@ -587,6 +614,7 @@ public:
 
 	const ParMetisDistribution<dim,T> & operator=(ParMetisDistribution<dim,T> && dist)
 	{
+		is_distributed = dist.is_distributed;
 		v_cl = dist.v_cl;
 		gr = dist.gr;
 		domain = dist.domain;
diff --git a/src/Vector/vector_dist.hpp b/src/Vector/vector_dist.hpp
index 4e2132e1245ba9ea219283f2b693dbed0e44d79d..9bcffed3b93ee60a449b458f70e766b2223e7e40 100644
--- a/src/Vector/vector_dist.hpp
+++ b/src/Vector/vector_dist.hpp
@@ -45,6 +45,44 @@
 #define NO_GHOST 0
 #define WITH_GHOST 2
 
+//! General function t get a cell-list
+template<unsigned int dim, typename St, typename CellL, typename Vector>
+struct gcl
+{
+	/*! \brief Get the Cell list based on the type
+	 *
+	 * \param vd Distributed vector
+	 * \param r_cut Cut-off radius
+	 * \param g Ghost
+	 *
+	 * \return the constructed cell-list
+	 *
+	 */
+	static inline CellL get(Vector & vd, const St & r_cut, const Ghost<dim,St> & g)
+	{
+		return vd.getCellList(r_cut);
+	}
+};
+
+//! General function t get a cell-list
+template<unsigned int dim, typename St, typename Vector>
+struct gcl<dim,St,CellList_hilb<dim, St, FAST, shift<dim, St> >,Vector>
+{
+	/*! \brief Get the Cell list based on the type
+	 *
+	 * \param vd Distributed vector
+	 * \param r_cut Cut-off radius
+	 * \param g Ghost
+	 *
+	 * \return the constructed cell-list
+	 *
+	 */
+	static inline CellList_hilb<dim, St, FAST, shift<dim, St> > get(Vector & vd, const St & r_cut, const Ghost<dim,St> & g)
+	{
+		return vd.getCellList_hilb(r_cut,g);
+	}
+};
+
 /*! \brief Distributed vector
  *
  * This class reppresent a distributed vector, the distribution of the structure
@@ -129,6 +167,32 @@ private:
 
 	}
 
+	/*! \brief Check if the cell-list must be reconstructed because domain has changed
+	 *         because of load balancing
+	 *
+	 * \param bx Box to reconstruct
+	 *
+	 * \return true if the cell-list must be reconstructed
+	 *
+	 */
+	bool check_cl_reconstruct(const Box<dim,St> & bx, St r_cut)
+	{
+		// Get ghost and anlarge by 1%
+		Ghost<dim,St> g = getDecomposition().getGhost();
+		g.magnify(1.013);
+
+		// Division array
+		size_t div[dim];
+
+		// get the processor bounding box
+		Box<dim, St> pbox = getDecomposition().getProcessorBounds();
+
+		// Processor bounding box
+		cl_param_calculate(pbox, div, r_cut, g);
+
+		return !bx.isContained(pbox);
+	}
+
 public:
 
 	//! space type
@@ -137,6 +201,9 @@ public:
 	//! dimensions of space
 	static const unsigned int dims = dim;
 
+	//! Self type
+	typedef vector_dist<dim,St,prop,Decomposition,Memory> self;
+
 	/*! \brief Operator= for distributed vector
 	 *
 	 * \param v vector to copy
@@ -446,9 +513,28 @@ public:
 	 */
 	template<typename CellL = CellList<dim, St, FAST, shift<dim, St> > > void updateCellList(CellL & cell_list)
 	{
-		populate_cell_list(v_pos,cell_list,g_m,CL_NON_SYMMETRIC);
+		// This function assume equal spacing in all directions
+		// but in the worst case we take the maximum
+		St r_cut = 0;
+		for (size_t i = 0 ; i < dim ; i++)
+			r_cut = std::max(r_cut,cell_list.getCellBox().getHigh(i));
 
-		cell_list.set_gm(g_m);
+		// Here we have to check that the Box defined by the Cell-list is the same as the domain box of this
+		// processor. if it is not like that we have to completely reconstruct from stratch
+		bool to_reconstruct = check_cl_reconstruct(cell_list.getDomain(),r_cut);
+
+		if (to_reconstruct == false)
+		{
+			populate_cell_list(v_pos,cell_list,g_m,CL_NON_SYMMETRIC);
+
+			cell_list.set_gm(g_m);
+		}
+		else
+		{
+			CellL cli_tmp = gcl<dim,St,CellL,self>::get(*this,r_cut,getDecomposition().getGhost());
+
+			cell_list.swap(cli_tmp);
+		}
 	}
 
 	/*! \brief Update a cell list using the stored particles