diff --git a/example/Vector/3_molecular_dynamic/Makefile b/example/Vector/3_molecular_dynamic/Makefile
index e9ce585fd6c8b94c615662f222b82fddd65f2a79..cc252770e8128e156b313910f200a693a552f837 100644
--- a/example/Vector/3_molecular_dynamic/Makefile
+++ b/example/Vector/3_molecular_dynamic/Makefile
@@ -6,11 +6,13 @@ LDIR =
 
 OBJ = main.o
 OBJ_EXPR = main_expr.o
+OBJ_VL = main_vl.o
+OBJ_VL_SYM = main_vl_sym.o
 
-all: md_dyn md_dyn_expr
+all: md_dyn md_dyn_expr md_dyn_vl md_dyn_vl_sym
 
 %.o: %.cpp
-	$(CC) -O3 -g -c --std=c++11 -o $@ $< $(INCLUDE_PATH)
+	$(CC) -O3 -c --std=c++11 -o $@ $< $(INCLUDE_PATH)
 
 md_dyn: $(OBJ)
 	$(CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS)
@@ -18,11 +20,17 @@ md_dyn: $(OBJ)
 md_dyn_expr: $(OBJ_EXPR)
 	$(CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS)
 
+md_dyn_vl: $(OBJ_VL)
+	$(CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS)
+
+md_dyn_vl_sym: $(OBJ_VL_SYM)
+	$(CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS)
+
 run: all
-	source $$HOME/openfpm_vars; mpirun -np 3 ./md_dyn; mpirun -np 3 ./md_dyn_expr
+	source $$HOME/openfpm_vars; mpirun -np 3 ./md_dyn; mpirun -np 3 ./md_dyn_expr; mpirun -np 3 ./md_dyn_vl; mpirun -np 3 ./md_dyn_vl_sym
 
 .PHONY: clean all run
 
 clean:
-	rm -f *.o *~ core md_dyn md_dyn_expr
+	rm -f *.o *~ core md_dyn md_dyn_expr md_dyn_vl md_dyn_vl_sym
 
diff --git a/example/Vector/3_molecular_dynamic/main.cpp b/example/Vector/3_molecular_dynamic/main.cpp
index 81188e97d753597ca1bc9cfeec51cb8fbd127e85..223e63adbbb078bc6ea2cf20f82a334aa216fc51 100644
--- a/example/Vector/3_molecular_dynamic/main.cpp
+++ b/example/Vector/3_molecular_dynamic/main.cpp
@@ -1,9 +1,9 @@
-
 #include "Vector/vector_dist.hpp"
 #include "Decomposition/CartDecomposition.hpp"
 #include "data_type/aggregate.hpp"
 #include "Plot/GoogleChart.hpp"
 #include "Plot/util.hpp"
+#include "timer.hpp"
 
 /*!
  * \page Vector_3_md Vector 3 molecular dynamic
@@ -48,7 +48,7 @@ constexpr int force = 1;
 
 //! \cond [calc forces] \endcond
 
-void calc_forces(vector_dist<3,double, aggregate<double[3],double[3]> > & vd, CellList<3, double, FAST, shift<3, double> > & NN, double sigma12, double sigma6)
+void calc_forces(vector_dist<3,double, aggregate<double[3],double[3]> > & vd, CellList<3, double, FAST, shift<3, double> > & NN, double sigma12, double sigma6, double r_cut2)
 {
 
 //! \cond [calc forces] \endcond
@@ -127,6 +127,9 @@ void calc_forces(vector_dist<3,double, aggregate<double[3],double[3]> > & vd, Ce
 			// take the norm of this vector
 			double rn = norm2(r);
 
+			if (rn > r_cut2)
+			{++Np; continue;};
+
 			// Calculate the force, using pow is slower
 			Point<3,double> f = 24.0*(2.0 *sigma12 / (rn*rn*rn*rn*rn*rn*rn) -  sigma6 / (rn*rn*rn*rn)) * r;
 
@@ -165,8 +168,10 @@ void calc_forces(vector_dist<3,double, aggregate<double[3],double[3]> > & vd, Ce
 
 //! \cond [calc energy] \endcond
 
-double calc_energy(vector_dist<3,double, aggregate<double[3],double[3]> > & vd, CellList<3, double, FAST, shift<3, double> > & NN, double sigma12, double sigma6)
+double calc_energy(vector_dist<3,double, aggregate<double[3],double[3]> > & vd, CellList<3, double, FAST, shift<3, double> > & NN, double sigma12, double sigma6, double r_cut2)
 {
+        double rc = r_cut2;
+        double shift = 2.0 * ( sigma12 / (rc*rc*rc*rc*rc*rc) - sigma6 / ( rc*rc*rc) );
 
 //! \cond [calc energy] \endcond
 
@@ -185,7 +190,7 @@ double calc_energy(vector_dist<3,double, aggregate<double[3],double[3]> > & vd,
 	//! \cond [up cell ene] \endcond
 
 	double E = 0.0;
-	vd.updateCellList(NN);
+//	vd.updateCellList(NN);
 
 	//! \cond [up cell ene] \endcond
 
@@ -236,8 +241,11 @@ double calc_energy(vector_dist<3,double, aggregate<double[3],double[3]> > & vd,
 			// take the normalized direction
 			double rn = norm2(xp - xq);
 
+			if (rn > r_cut2)
+			{++Np;continue;}
+
 			// potential energy (using pow is slower)
-			E += 2.0 * ( sigma12 / (rn*rn*rn*rn*rn*rn) - sigma6 / ( rn*rn*rn) );
+			E += 2.0 * ( sigma12 / (rn*rn*rn*rn*rn*rn) - sigma6 / ( rn*rn*rn) ) - shift;
 
 			// Next neighborhood
 			++Np;
@@ -414,13 +422,16 @@ int main(int argc, char* argv[])
 	 *
 	 */
 
+	timer tsim;
+	tsim.start();
+
 	//! \cond [md steps] \endcond
 
 	// Get the Cell list structure
 	auto NN = vd.getCellList(r_cut);
 
 	// calculate forces
-	calc_forces(vd,NN,sigma12,sigma6);
+	calc_forces(vd,NN,sigma12,sigma6,r_cut*r_cut);
 	unsigned long int f = 0;
 
 	// MD time stepping
@@ -452,7 +463,7 @@ int main(int argc, char* argv[])
 		vd.template ghost_get<>();
 
 		// calculate forces or a(tn + 1) Step 2
-		calc_forces(vd,NN,sigma12,sigma6);
+		calc_forces(vd,NN,sigma12,sigma6,r_cut*r_cut);
 
 
 		// Integrate the velocity Step 3
@@ -481,7 +492,7 @@ int main(int argc, char* argv[])
 			vd.ghost_get<>();
 
 			// We calculate the energy
-			double energy = calc_energy(vd,NN,sigma12,sigma6);
+			double energy = calc_energy(vd,NN,sigma12,sigma6,r_cut*r_cut);
 			auto & vcl = create_vcluster();
 			vcl.sum(energy);
 			vcl.execute();
@@ -501,6 +512,9 @@ int main(int argc, char* argv[])
 
 	//! \cond [md steps] \endcond
 
+	tsim.stop();
+	std::cout << "Time: " << tsim.getwct() << std::endl;
+
 	/*!
 	 * \page Vector_3_md Vector 3 molecular dynamic
 	 *
diff --git a/example/Vector/3_molecular_dynamic/main_expr.cpp b/example/Vector/3_molecular_dynamic/main_expr.cpp
index 57f474efe8f2c12ffd35f2f253e8d4afab28f348..5e214636672e0866c109717a775c9fd444f9f1bf 100644
--- a/example/Vector/3_molecular_dynamic/main_expr.cpp
+++ b/example/Vector/3_molecular_dynamic/main_expr.cpp
@@ -1,22 +1,23 @@
 #include "Vector/vector_dist.hpp"
 #include "Plot/GoogleChart.hpp"
 #include "Operators/Vector/vector_dist_operators.hpp"
+#include "timer.hpp"
 
 constexpr int velocity = 0;
 constexpr int force = 1;
 
 struct ln_potential
 {
-	double sigma12,sigma6;
+	double sigma12,sigma6,r_cut2,shift;
 
-	ln_potential(double sigma12_, double sigma6_) {sigma12 = sigma12_, sigma6 = sigma6_;}
+	ln_potential(double sigma12_, double sigma6_, double r_cut2_, double shift_) {sigma12 = sigma12_; sigma6 = sigma6_; r_cut2 = r_cut2_;shift = shift_;}
 
 	Point<2,double> value(const Point<3,double> & xp, const Point<3,double> xq)
 	{
 		double rn = norm2(xp - xq);
+		if (rn >= r_cut2)	return 0.0;
 
-		Point<2,double> E({2.0 * ( sigma12 / (rn*rn*rn*rn*rn*rn) - sigma6 / ( rn*rn*rn) ),
-						 0.0});
+		Point<2,double> E({2.0 * ( sigma12 / (rn*rn*rn*rn*rn*rn) - sigma6 / ( rn*rn*rn) ) - shift,0.0});
 
 		return E;
 	}
@@ -24,24 +25,28 @@ struct ln_potential
 
 struct ln_force
 {
-	double sigma12,sigma6;
+	double sigma12,sigma6,r_cut2;
 
-	ln_force(double sigma12_, double sigma6_) {sigma12 = sigma12_; sigma6 = sigma6_;}
+	ln_force(double sigma12_, double sigma6_, double r_cut2_) {sigma12 = sigma12_; sigma6 = sigma6_;r_cut2 = r_cut2_;}
 
 	Point<3,double> value(const Point<3,double> & xp, const Point<3,double> xq)
 	{
 		Point<3,double> r = xp - xq;
 		double rn = norm2(r);
 
+		if (rn > r_cut2)	return 0.0;
+
 		return 24.0*(2.0 * sigma12 / (rn*rn*rn*rn*rn*rn*rn) -  sigma6 / (rn*rn*rn*rn)) * r;
 	}
 };
 
 int main(int argc, char* argv[])
 {
-	double dt = 0.0005, sigma = 0.1, r_cut = 0.3;
+	double dt = 0.0005, sigma = 0.1, r_cut = 3.0*sigma;
 
 	double sigma6 = pow(sigma,6), sigma12 = pow(sigma,12);
+	double rc2 = r_cut * r_cut;
+	double shift = 2.0 * ( sigma12 / (rc2*rc2*rc2*rc2*rc2*rc2) - sigma6 / ( rc2*rc2*rc2) );
 
 	openfpm::vector<double> x;
 	openfpm::vector<openfpm::vector<double>> y;
@@ -55,8 +60,8 @@ int main(int argc, char* argv[])
 	size_t bc[3]={PERIODIC,PERIODIC,PERIODIC};
 	Ghost<3,float> ghost(r_cut);
 
-	ln_force lf(sigma12,sigma6);
-	ln_potential lp(sigma12,sigma6);
+	ln_force lf(sigma12,sigma6,r_cut*r_cut);
+	ln_potential lp(sigma12,sigma6,r_cut*r_cut,shift);
 
 	vector_dist<3,double, aggregate<Point<3,double>,Point<3,double>> > vd(0,box,bc,ghost);
 
@@ -82,6 +87,9 @@ int main(int argc, char* argv[])
 	v_force = 0;
 	v_velocity = 0;
 
+	timer tsim;
+	tsim.start();
+
 	auto NN = vd.getCellList(r_cut);
 
 	vd.updateCellList(NN);
@@ -109,6 +117,7 @@ int main(int argc, char* argv[])
 			vd.write("particles_",f);
 			vd.ghost_get<>();
 
+			vd.updateCellList(NN);
 			Point<2,double> E = rsum(applyKernel_in_sim(vd,NN,lp) + (v_velocity * v_velocity)/2.0,vd).get();
 
 			vcl.sum(E.get(0));vcl.sum(E.get(1));
@@ -125,6 +134,9 @@ int main(int argc, char* argv[])
 		}
 	}
 
+	tsim.stop();
+	std::cout << "Time: " << tsim.getwct() << std::endl;
+
 	GCoptions options;
 	options.title = std::string("Energy with time");
 	options.yAxis = std::string("Energy");
diff --git a/openfpm_data b/openfpm_data
index f075dd5da60f5170e01a3265a88881e98fc3b7e4..9301fe459ca2ef81115c642ee7a54c983ddfd6f1 160000
--- a/openfpm_data
+++ b/openfpm_data
@@ -1 +1 @@
-Subproject commit f075dd5da60f5170e01a3265a88881e98fc3b7e4
+Subproject commit 9301fe459ca2ef81115c642ee7a54c983ddfd6f1
diff --git a/src/Makefile.am b/src/Makefile.am
index d61146d9da87b3f2187f8199ff4aa44fbf81ae53..143ccee42292fdbd051b5aa7dda3223dad5f18b2 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -9,7 +9,7 @@ nobase_include_HEADERS = Decomposition/CartDecomposition.hpp Decomposition/CartD
          Decomposition/nn_processor.hpp Decomposition/ie_loc_ghost.hpp Decomposition/ORB.hpp \
          Graph/CartesianGraphFactory.hpp \
          Grid/grid_dist_id.hpp Grid/grid_dist_id_iterator_dec.hpp Grid/grid_dist_util.hpp  Grid/grid_dist_id_iterator_sub.hpp Grid/grid_dist_id_iterator.hpp Grid/grid_dist_key.hpp Grid/staggered_dist_grid.hpp Grid/staggered_dist_grid_util.hpp Grid/staggered_dist_grid_copy.hpp \
-         Vector/vector_dist.hpp Vector/vector_dist_ofb.hpp Vector/vector_dist_iterator.hpp Vector/vector_dist_key.hpp \
+         Vector/vector_dist_comm.hpp Vector/vector_dist.hpp Vector/vector_dist_ofb.hpp Vector/vector_dist_iterator.hpp Vector/vector_dist_key.hpp \
          config/config.h \
          example.mk \
          Decomposition/Distribution/metis_util.hpp Decomposition/Distribution/parmetis_dist_util.hpp  Decomposition/Distribution/parmetis_util.hpp Decomposition/Distribution/MetisDistribution.hpp Decomposition/Distribution/ParMetisDistribution.hpp Decomposition/Distribution/DistParMetisDistribution.hpp  dec_optimizer.hpp SubdomainGraphNodes.hpp \
diff --git a/src/Vector/vector_dist.hpp b/src/Vector/vector_dist.hpp
index f5348417c3882d42b06ff68b1443fae8d1017143..0dc710bf845b8bb8c0fc51aced5dbca3c52c64f5 100644
--- a/src/Vector/vector_dist.hpp
+++ b/src/Vector/vector_dist.hpp
@@ -30,8 +30,7 @@
 #include "Decomposition/CartDecomposition.hpp"
 #include "data_type/aggregate.hpp"
 #include "NN/VerletList/VerletList.hpp"
-
-#define V_SUB_UNIT_FACTOR 64
+#include "vector_dist_comm.hpp"
 
 #define NO_ID false
 #define ID true
@@ -40,9 +39,6 @@
 #define GET	1
 #define PUT 2
 
-#define NO_POSITION 1
-#define WITH_POSITION 2
-
 // Write the particles with ghost
 #define NO_GHOST 0
 #define WITH_GHOST 2
@@ -73,16 +69,13 @@
  */
 
 template<unsigned int dim, typename St, typename prop, typename Decomposition = CartDecomposition<dim,St>, typename Memory = HeapMemory>
-class vector_dist
+class vector_dist : public vector_dist_comm<dim,St,prop,Decomposition,Memory>
 {
 private:
 
 	//! Ghost marker, all the particle with id > g_m are ghost all with g_m < are real particle
 	size_t g_m = 0;
 
-	//! Space Decomposition
-	Decomposition dec;
-
 	//! Particle position vector, (It has 2 elements) the first has real particles assigned to a processor
 	//! the second element contain unassigned particles
 	openfpm::vector<Point<dim, St>> v_pos;
@@ -94,708 +87,6 @@ private:
 	//! Virtual cluster
 	Vcluster & v_cl;
 
-	//! definition of the send vector for position
-	typedef openfpm::vector<Point<dim, St>, ExtPreAlloc<Memory>, typename memory_traits_lin<Point<dim, St>>::type, memory_traits_lin , openfpm::grow_policy_identity> send_pos_vector;
-
-	//////////////////////////////
-	// COMMUNICATION variables
-	//////////////////////////////
-
-	//! It map the processor id with the communication request into map procedure
-	openfpm::vector<size_t> p_map_req;
-
-	//! For each near processor, outgoing particle id and shift vector
-	openfpm::vector<openfpm::vector<size_t>> opart;
-
-	//! For each near processor, particle shift vector
-	openfpm::vector<openfpm::vector<size_t>> oshift;
-
-	//! For each adjacent processor the size of the ghost sending buffer
-	openfpm::vector<size_t> ghost_prc_sz;
-
-	//! Sending buffer for the ghost particles properties
-	BHeapMemory g_prp_mem;
-
-	//! Sending buffer for the ghost particles position
-	BHeapMemory g_pos_mem;
-
-	//! For each adjacent processor it store the size of the receiving message in byte
-	openfpm::vector<size_t> recv_sz;
-
-	//! For each adjacent processor it store the received message for ghost get
-	openfpm::vector<BHeapMemory> recv_mem_gg;
-
-	//! For each processor it store the received message for global map
-	openfpm::vector<BHeapMemory> recv_mem_gm;
-
-	/*! \brief It store for each processor the position and properties vector of the particles
-	 *
-	 * This structure is used in the map function
-	 *
-	 */
-	struct pos_prop
-	{
-		//! position vector
-		openfpm::vector<Point<dim, St>, PreAllocHeapMemory<2>, typename memory_traits_lin<Point<dim, St>>::type, memory_traits_lin, openfpm::grow_policy_identity> pos;
-		//! properties vector
-		openfpm::vector<prop, PreAllocHeapMemory<2>, typename memory_traits_lin<prop>::type, memory_traits_lin, openfpm::grow_policy_identity> prp;
-	};
-
-	/*! \brief for each processor store 2 vector containing the sending buffers
-	 *
-	 * This structure is used in the map_list function
-	 *
-	 */
-	template <typename sel_prop>
-	struct pos_prop_sel
-	{
-		//! position vector
-		openfpm::vector<Point<dim, St>, PreAllocHeapMemory<2>, typename memory_traits_lin<Point<dim, St>>::type, memory_traits_lin, openfpm::grow_policy_identity> pos;
-		//! properties vector
-		openfpm::vector<sel_prop, PreAllocHeapMemory<2>, typename memory_traits_lin<sel_prop>::type, memory_traits_lin, openfpm::grow_policy_identity> prp;
-	};
-
-	/*! \brief Label particles for mappings
-	 *
-	 * \param lbl_p Particle labeled
-	 * \param prc_sz For each processor the number of particles to send
-	 * \param opart id of the particles to send
-	 *
-	 */
-	template<typename obp> void labelParticleProcessor(openfpm::vector<openfpm::vector<size_t>> & lbl_p, openfpm::vector<size_t> & prc_sz, openfpm::vector<size_t> & opart)
-	{
-		// reset lbl_p
-		lbl_p.resize(v_cl.getProcessingUnits());
-		for (size_t i = 0; i < lbl_p.size(); i++)
-			lbl_p.get(i).clear();
-
-		// resize the label buffer
-		prc_sz.resize(v_cl.getProcessingUnits());
-
-		auto it = v_pos.getIterator();
-
-		// Label all the particles with the processor id where they should go
-		while (it.isNext())
-		{
-			auto key = it.get();
-
-			// Apply the boundary conditions
-			dec.applyPointBC(v_pos.get(key));
-
-			size_t p_id = 0;
-
-			// Check if the particle is inside the domain
-			if (dec.getDomain().isInside(v_pos.get(key)) == true)
-				p_id = dec.processorIDBC(v_pos.get(key));
-			else
-				p_id = obp::out(key, v_cl.getProcessUnitID());
-
-			// Particle to move
-			if (p_id != v_cl.getProcessUnitID())
-			{
-				if ((long int) p_id != -1)
-				{
-					prc_sz.get(p_id)++;lbl_p
-					.get(p_id).add(key);
-					opart.add(key);
-				}
-				else
-				{
-					opart.add(key);
-				}
-			}
-
-			// Add processors and add size
-
-			++it;
-		}
-	}
-
-	/*! \brief Label the particles
-	 *
-	 * It count the number of particle to send to each processors and save its ids
-	 *
-	 * \see nn_prcs::getShiftvectors()
-	 *
-	 */
-	void labelParticlesGhost()
-	{
-		// Buffer that contain the number of elements to send for each processor
-		ghost_prc_sz.clear();
-		ghost_prc_sz.resize(dec.getNNProcessors());
-
-		// Buffer that contain for each processor the id of the particle to send
-		opart.clear();
-		opart.resize(dec.getNNProcessors());
-
-		// Buffer that contain for each processor the id of the shift vector
-		oshift.clear();
-		oshift.resize(dec.getNNProcessors());
-
-		// Iterate over all particles
-		auto it = v_pos.getIteratorTo(g_m);
-		while (it.isNext())
-		{
-			auto key = it.get();
-
-			// Given a particle, it return which processor require it (first id) and shift id, second id
-			// For an explanation about shifts vectors please consult getShiftVector in ie_ghost
-			const openfpm::vector<std::pair<size_t, size_t>> & vp_id = dec.template ghost_processorID_pair<typename Decomposition::lc_processor_id, typename Decomposition::shift_id>(v_pos.get(key), UNIQUE);
-
-			for (size_t i = 0; i < vp_id.size(); i++)
-			{
-				// processor id
-				size_t p_id = vp_id.get(i).first;
-
-				// add particle to communicate
-				ghost_prc_sz.get(p_id)++;
-				opart.get(p_id).add(key);
-				oshift.get(p_id).add(vp_id.get(i).second);
-			}
-
-			++it;
-		}
-	}
-
-	/*! \brief Add local particles based on the boundary conditions
-	 *
-	 * In order to understand what this function use the following
-	 *
-	 \verbatim
-
-	 [1,1]
-	 +---------+------------------------+---------+
-	 | (1,-1)  |                        | (1,1)   |
-	 |   |     |    (1,0) --> 7         |   |     |
-	 |   v     |                        |   v     |
-	 |   6     |                        |   8     |
-	 +--------------------------------------------+
-	 |         |                        |         |
-	 |         |                        |         |
-	 |         |                        |         |
-	 | (-1,0)  |                        | (1,0)   |
-	 |    |    |                        |   |     |
-	 |    v    |      (0,0) --> 4       |   v     |
-	 |    3    |                        |   5     |
-	 |         |                        |         |
- B	 |         |                        |     A   |
- *	 |         |                        |    *    |
-	 |         |                        |         |
-	 |         |                        |         |
-	 |         |                        |         |
-	 +--------------------------------------------+
-	 | (-1,-1) |                        | (-1,1)  |
-	 |    |    |   (-1,0) --> 1         |    |    |
-	 |    v    |                        |    v    |
-	 |    0    |                        |    2    |
-	 +---------+------------------------+---------+
-
-
-	 \endverbatim
-
-	 *
-	 *  The box is the domain, while all boxes at the border (so not (0,0) ) are the
-	 *  ghost part at the border of the domain. If a particle A is in the position in figure
-	 *  a particle B must be created. This function duplicate the particle A, if A and B are
-	 *  local
-	 *
-	 */
-	void add_loc_particles_bc()
-	{
-		// get the shift vectors
-		const openfpm::vector<Point<dim, St>> & shifts = dec.getShiftVectors();
-
-		// this map is used to check if a combination is already present
-		std::unordered_map<size_t, size_t> map_cmb;
-
-		// Add local particles coming from periodic boundary, the only boxes that count are the one
-		// touching the border, filter them
-
-		// The boxes touching the border of the domain are divided in groups (first vector)
-		// each group contain internal ghost coming from sub-domain of the same section
-		openfpm::vector_std<openfpm::vector_std<Box<dim, St>>>box_f;
-		openfpm::vector_std<comb<dim>> box_cmb;
-
-		for (size_t i = 0; i < dec.getNLocalSub(); i++)
-		{
-			size_t Nl = dec.getLocalNIGhost(i);
-
-			for (size_t j = 0; j < Nl; j++)
-			{
-				// If the ghost does not come from the intersection with an out of
-				// border sub-domain the combination is all zero and n_zero return dim
-				if (dec.getLocalIGhostPos(i, j).n_zero() == dim)
-					continue;
-
-				// Check if we already have boxes with such combination
-				auto it = map_cmb.find(dec.getLocalIGhostPos(i, j).lin());
-				if (it == map_cmb.end())
-				{
-					// we do not have it
-					box_f.add();
-					box_f.last().add(dec.getLocalIGhostBox(i, j));
-					box_cmb.add(dec.getLocalIGhostPos(i, j));
-					map_cmb[dec.getLocalIGhostPos(i, j).lin()] = box_f.size() - 1;
-				}
-				else
-				{
-					// we have it
-					box_f.get(it->second).add(dec.getLocalIGhostBox(i, j));
-				}
-
-			}
-		}
-
-		if (box_f.size() == 0)
-			return;
-		else
-		{
-			// Label the internal (assigned) particles
-			auto it = v_pos.getIteratorTo(g_m);
-
-			while (it.isNext())
-			{
-				auto key = it.get();
-
-				// If particles are inside these boxes
-				for (size_t i = 0; i < box_f.size(); i++)
-				{
-					for (size_t j = 0; j < box_f.get(i).size(); j++)
-					{
-						if (box_f.get(i).get(j).isInside(v_pos.get(key)) == true)
-						{
-							Point<dim, St> p = v_pos.get(key);
-							// shift
-							p -= shifts.get(box_cmb.get(i).lin());
-
-							// add this particle shifting its position
-							v_pos.add(p);
-							v_prp.add();
-							v_prp.last() = v_prp.get(key);
-
-							// boxes in one group can be overlapping
-							// we do not have to search for the other
-							// boxes otherwise we will have duplicate particles
-							//
-							// A small note overlap of boxes across groups is fine
-							// (and needed) because each group has different shift
-							// producing non overlapping particles
-							//
-							break;
-						}
-					}
-				}
-
-				++it;
-			}
-		}
-	}
-
-	/*! \brief This function fill the send buffer for the particle position after the particles has been label with labelParticles
-	 *
-	 * \param g_pos_send Send buffer to fill
-	 * \param prAlloc_pos Memory object for the send buffer
-	 *
-	 */
-	void fill_send_ghost_pos_buf(openfpm::vector<send_pos_vector> & g_pos_send, ExtPreAlloc<Memory> * prAlloc_pos)
-	{
-		// get the shift vectors
-		const openfpm::vector<Point<dim, St>> & shifts = dec.getShiftVectors();
-
-		// create a number of send buffers equal to the near processors
-		g_pos_send.resize(ghost_prc_sz.size());
-		for (size_t i = 0; i < g_pos_send.size(); i++)
-		{
-			// set the preallocated memory to ensure contiguity
-			g_pos_send.get(i).setMemory(*prAlloc_pos);
-
-			// resize the sending vector (No allocation is produced)
-			g_pos_send.get(i).resize(ghost_prc_sz.get(i));
-		}
-
-		// Fill the send buffer
-		for (size_t i = 0; i < opart.size(); i++)
-		{
-			for (size_t j = 0; j < opart.get(i).size(); j++)
-			{
-				Point<dim, St> s = v_pos.get(opart.get(i).get(j));
-				s -= shifts.get(oshift.get(i).get(j));
-				g_pos_send.get(i).set(j, s);
-			}
-		}
-	}
-
-	/*! \brief This function fill the send buffer for properties after the particles has been label with labelParticles
-	 *
-	 * \tparam send_vector type used to send data
-	 * \tparam prp_object object containing only the properties to send
-	 * \tparam prp set of properties to send
-	 *
-	 * \param g_send_prp Send buffer to fill
-	 * \param prAlloc_prp Memory object for the send buffer
-	 *
-	 */
-	template<typename send_vector, typename prp_object, int ... prp> void fill_send_ghost_prp_buf(openfpm::vector<send_vector> & g_send_prp, ExtPreAlloc<Memory> * prAlloc_prp)
-	{
-		// create a number of send buffers equal to the near processors
-		g_send_prp.resize(ghost_prc_sz.size());
-		for (size_t i = 0; i < g_send_prp.size(); i++)
-		{
-			// set the preallocated memory to ensure contiguity
-			g_send_prp.get(i).setMemory(*prAlloc_prp);
-
-			// resize the sending vector (No allocation is produced)
-			g_send_prp.get(i).resize(ghost_prc_sz.get(i));
-		}
-
-		// Fill the send buffer
-		for (size_t i = 0; i < opart.size(); i++)
-		{
-			for (size_t j = 0; j < opart.get(i).size(); j++)
-			{
-				// source object type
-				typedef encapc<1, prop, typename openfpm::vector<prop>::layout_type> encap_src;
-				// destination object type
-				typedef encapc<1, prp_object, typename openfpm::vector<prp_object>::layout_type> encap_dst;
-
-				// Copy only the selected properties
-				object_si_d<encap_src, encap_dst, OBJ_ENCAP, prp...>(v_prp.get(opart.get(i).get(j)), g_send_prp.get(i).get(j));
-			}
-		}
-	}
-
-	/*! \brief allocate and fill the send buffer for the map function
-	 *
-	 * \param prc_r List of processor rank involved in the send
-	 * \param prc_sz_r For each processor in the list the size of the message to send
-	 * \param pb send buffer
-	 *
-	 */
-	void fill_send_map_buf(openfpm::vector<size_t> & prc_r, openfpm::vector<size_t> & prc_sz_r, openfpm::vector<pos_prop> & pb)
-	{
-		pb.resize(prc_r.size());
-
-		for (size_t i = 0; i < prc_r.size(); i++)
-		{
-			// Create the size required to store the particles position and properties to communicate
-			size_t s1 = openfpm::vector<Point<dim, St>, HeapMemory, typename memory_traits_lin<Point<dim, St>>::type, memory_traits_lin, openfpm::grow_policy_identity>::calculateMem(prc_sz_r.get(i), 0);
-			size_t s2 = openfpm::vector<prop, HeapMemory, typename memory_traits_lin<prop>::type, memory_traits_lin, openfpm::grow_policy_identity>::calculateMem(prc_sz_r.get(i), 0);
-
-			// Preallocate the memory
-			size_t sz[2] = { s1, s2 };
-			PreAllocHeapMemory<2> * mem = new PreAllocHeapMemory<2>(sz);
-
-			// Set the memory allocator
-			pb.get(i).pos.setMemory(*mem);
-			pb.get(i).prp.setMemory(*mem);
-
-			// set the size and allocate, using mem warant that pos and prp is contiguous
-			pb.get(i).pos.resize(prc_sz_r.get(i));
-			pb.get(i).prp.resize(prc_sz_r.get(i));
-		}
-
-		// Run through all the particles and fill the sending buffer
-
-		for (size_t i = 0; i < opart.size(); i++)
-		{
-			auto it = opart.get(i).getIterator();
-			size_t lbl = p_map_req.get(i);
-
-			while (it.isNext())
-			{
-				size_t key = it.get();
-				size_t id = opart.get(i).get(key);
-
-				pb.get(lbl).pos.set(key, v_pos.get(id));
-				pb.get(lbl).prp.set(key, v_prp.get(id));
-
-				++it;
-			}
-		}
-	}
-
-	/*! \brief allocate and fill the send buffer for the map function
-	 *
-	 * \param prc_r List of processor rank involved in the send
-	 * \param prc_sz_r For each processor in the list the size of the message to send
-	 * \param pb send buffer
-	 *
-	 */
-	template<typename prp_object,int ... prp> void fill_send_map_buf_list(openfpm::vector<size_t> & prc_r, openfpm::vector<size_t> & prc_sz_r, openfpm::vector<pos_prop_sel<prp_object>> & pb)
-	{
-		pb.resize(prc_r.size());
-
-		for (size_t i = 0; i < prc_r.size(); i++)
-		{
-			// Create the size required to store the particles position and properties to communicate
-			size_t s1 = openfpm::vector<Point<dim, St>, HeapMemory, typename memory_traits_lin<Point<dim, St>>::type, memory_traits_lin, openfpm::grow_policy_identity>::calculateMem(prc_sz_r.get(i), 0);
-			size_t s2 = openfpm::vector<prp_object, HeapMemory, typename memory_traits_lin<prp_object>::type, memory_traits_lin, openfpm::grow_policy_identity>::calculateMem(prc_sz_r.get(i), 0);
-
-			// Preallocate the memory
-			size_t sz[2] = { s1, s2 };
-			PreAllocHeapMemory<2> * mem = new PreAllocHeapMemory<2>(sz);
-
-			// Set the memory allocator
-			pb.get(i).pos.setMemory(*mem);
-			pb.get(i).prp.setMemory(*mem);
-
-			// set the size and allocate, using mem warant that pos and prp is contiguous
-			pb.get(i).pos.resize(prc_sz_r.get(i));
-			pb.get(i).prp.resize(prc_sz_r.get(i));
-		}
-
-		// Run through all the particles and fill the sending buffer
-
-		for (size_t i = 0; i < opart.size(); i++)
-		{
-			auto it = opart.get(i).getIterator();
-			size_t lbl = p_map_req.get(i);
-
-			while (it.isNext())
-			{
-				size_t key = it.get();
-				size_t id = opart.get(i).get(key);
-
-				pb.get(lbl).pos.set(key, v_pos.get(id));
-
-				// source object type
-				typedef encapc<1, prop, typename openfpm::vector<prop>::layout_type> encap_src;
-				// destination object type
-				typedef encapc<1, prp_object, typename openfpm::vector<prp_object>::layout_type> encap_dst;
-
-				// Copy only the selected properties
-				object_si_d<encap_src, encap_dst, OBJ_ENCAP, prp...>(v_prp.get(id), pb.get(lbl).prp.get(key));
-
-				++it;
-			}
-		}
-	}
-
-	/*! \brief This function process the receiced data for the properties and populate the ghost
-	 *
-	 * \tparam send_vector type used to send data
-	 * \tparam prp_object object containing only the properties to send
-	 * \tparam prp set of properties to send
-	 *
-	 */
-	template<typename send_vector, typename prp_object, int ... prp> void process_received_ghost_prp()
-	{
-		// Mark the ghost part
-		g_m = v_prp.size();
-
-		// Process the received data (recv_mem_gg != 0 if you have data)
-		for (size_t i = 0; i < dec.getNNProcessors() && recv_mem_gg.size() != 0; i++)
-		{
-			// calculate the number of received elements
-			size_t n_ele = recv_sz.get(i) / sizeof(prp_object);
-
-			// add the received particles to the vector
-			PtrMemory * ptr1 = new PtrMemory(recv_mem_gg.get(i).getPointer(), recv_sz.get(i));
-
-			// create vector representation to a piece of memory already allocated
-			openfpm::vector<prp_object, PtrMemory, typename memory_traits_lin<prp_object>::type, memory_traits_lin , openfpm::grow_policy_identity> v2;
-
-			v2.setMemory(*ptr1);
-
-			// resize with the number of elements
-			v2.resize(n_ele);
-
-			// Add the ghost particle
-			v_prp.template add_prp<prp_object, PtrMemory, openfpm::grow_policy_identity, prp...>(v2);
-		}
-	}
-
-	/*! \brief This function process the received data for the properties and populate the ghost
-	 *
-	 */
-	void process_received_ghost_pos()
-	{
-		// Process the received data (recv_mem_gg != 0 if you have data)
-		for (size_t i = 0; i < dec.getNNProcessors() && recv_mem_gg.size() != 0; i++)
-		{
-			// calculate the number of received elements
-			size_t n_ele = recv_sz.get(i) / sizeof(Point<dim, St> );
-
-			// add the received particles to the vector
-			PtrMemory * ptr1 = new PtrMemory(recv_mem_gg.get(i).getPointer(), recv_sz.get(i));
-
-			// create vector representation to a piece of memory already allocated
-
-			openfpm::vector<Point<dim, St>, PtrMemory, typename memory_traits_lin<Point<dim, St>>::type, memory_traits_lin , openfpm::grow_policy_identity> v2;
-
-			v2.setMemory(*ptr1);
-
-			// resize with the number of elements
-			v2.resize(n_ele);
-
-			// Add the ghost particle
-			v_pos.template add<PtrMemory, openfpm::grow_policy_identity>(v2);
-		}
-	}
-
-	/*! \brief Process the received particles
-	 *
-	 * \param out_part list of the out-going particles
-	 *
-	 */
-	void process_received_map(openfpm::vector<size_t> & out_part)
-	{
-		size_t o_p_id = 0;
-
-		for (size_t i = 0; i < recv_mem_gm.size(); i++)
-		{
-			// Get the number of elements
-
-			size_t n_ele = recv_mem_gm.get(i).size() / (sizeof(Point<dim, St> ) + sizeof(prop));
-
-			// Pointer of the received positions for each near processor
-			void * ptr_pos = (unsigned char *) recv_mem_gm.get(i).getPointer();
-			// Pointer of the received properties for each near processor
-			void * ptr_prp = (unsigned char *) recv_mem_gm.get(i).getPointer() + n_ele * sizeof(Point<dim, St> );
-
-			PtrMemory * ptr1 = new PtrMemory(ptr_pos, n_ele * sizeof(Point<dim, St> ));
-			PtrMemory * ptr2 = new PtrMemory(ptr_prp, n_ele * sizeof(prop));
-
-			// create vector representation to a piece of memory already allocated
-
-			openfpm::vector<Point<dim, St>, PtrMemory, typename memory_traits_lin<Point<dim, St>>::type, memory_traits_lin ,openfpm::grow_policy_identity> vpos;
-			openfpm::vector<prop, PtrMemory, typename memory_traits_lin<prop>::type, memory_traits_lin ,openfpm::grow_policy_identity> vprp;
-
-			vpos.setMemory(*ptr1);
-			vprp.setMemory(*ptr2);
-
-			vpos.resize(n_ele);
-			vprp.resize(n_ele);
-
-			// Add the received particles to v_pos and v_prp
-
-			size_t j = 0;
-			for (; j < vpos.size() && o_p_id < out_part.size(); j++, o_p_id++)
-			{
-				v_pos.set(out_part.get(o_p_id), vpos.get(j));
-				v_prp.set(out_part.get(o_p_id), vprp.get(j));
-			}
-
-			for (; j < vpos.size(); j++)
-			{
-				v_pos.add();
-				v_pos.set(v_pos.size() - 1, vpos.get(j));
-				v_prp.add();
-				v_prp.set(v_prp.size() - 1, vprp.get(j));
-			}
-		}
-
-		// remove the (out-going particles) in the vector
-
-		v_pos.remove(out_part, o_p_id);
-		v_prp.remove(out_part, o_p_id);
-	}
-
-	/*! \brief Process the received particles
-	 *
-	 * \param out_part list of the out-going particles
-	 *
-	 */
-	template<typename prp_object , int ... prp> void process_received_map_list(openfpm::vector<size_t> & out_part)
-	{
-		size_t o_p_id = 0;
-
-		for (size_t i = 0; i < recv_mem_gm.size(); i++)
-		{
-			// Get the number of elements
-
-			size_t n_ele = recv_mem_gm.get(i).size() / (sizeof(Point<dim, St> ) + sizeof(prp_object));
-
-			// Pointer of the received positions for each near processor
-			void * ptr_pos = (unsigned char *) recv_mem_gm.get(i).getPointer();
-			// Pointer of the received properties for each near processor
-			void * ptr_prp = (unsigned char *) recv_mem_gm.get(i).getPointer() + n_ele * sizeof(Point<dim, St> );
-
-			PtrMemory * ptr1 = new PtrMemory(ptr_pos, n_ele * sizeof(Point<dim, St> ));
-			PtrMemory * ptr2 = new PtrMemory(ptr_prp, n_ele * sizeof(prp_object));
-
-			// create vector representation to a piece of memory already allocated
-
-			openfpm::vector<Point<dim, St>, PtrMemory, typename memory_traits_lin<Point<dim, St>>::type, memory_traits_lin ,openfpm::grow_policy_identity> vpos;
-			openfpm::vector<prp_object, PtrMemory, typename memory_traits_lin<prp_object>::type, memory_traits_lin ,openfpm::grow_policy_identity> vprp;
-
-			vpos.setMemory(*ptr1);
-			vprp.setMemory(*ptr2);
-
-			vpos.resize(n_ele);
-			vprp.resize(n_ele);
-
-			// Add the received particles to v_pos and v_prp
-
-			size_t j = 0;
-			for (; j < vpos.size() && o_p_id < out_part.size(); j++, o_p_id++)
-			{
-				v_pos.set(out_part.get(o_p_id), vpos.get(j));
-				v_prp.template set_o<decltype(vprp.get(j)), prp... >(out_part.get(o_p_id), vprp.get(j));
-			}
-
-			for (; j < vpos.size(); j++)
-			{
-				v_pos.add();
-				v_pos.set(v_pos.size() - 1, vpos.get(j));
-				v_prp.template set_o<decltype(vprp.get(j)), prp... >(v_prp.size() - 1, vprp.get(j));
-			}
-		}
-
-		// remove the (out-going particles) in the vector
-
-		v_pos.remove(out_part, o_p_id);
-		v_prp.remove(out_part, o_p_id);
-	}
-
-	/*! \brief Calculate send buffers total size and allocation
-	 *
-	 * \tparam prp_object object containing only the properties to send
-	 *
-	 * \param size_byte_prp total size for the property buffer
-	 * \param size_byte_pos total size for the position buffer
-	 *
-	 */
-	template<typename prp_object> void calc_send_ghost_buf(size_t & size_byte_prp, size_t & size_byte_pos)
-	{
-		// Calculate the total size required for the sending buffer
-		for (size_t i = 0; i < ghost_prc_sz.size(); i++)
-		{
-			size_t alloc_ele = openfpm::vector<prp_object, HeapMemory, typename memory_traits_lin<prp_object>::type, memory_traits_lin , openfpm::grow_policy_identity>::calculateMem(ghost_prc_sz.get(i), 0);
-			size_byte_prp += alloc_ele;
-
-			alloc_ele = openfpm::vector<Point<dim, St>, HeapMemory, typename memory_traits_lin<Point<dim, St>>::type, memory_traits_lin, openfpm::grow_policy_identity>::calculateMem(ghost_prc_sz.get(i), 0);
-			size_byte_pos += alloc_ele;
-		}
-	}
-
-	/*! \brief Calculate parameters for the cell list
-	 *
-	 * \param div Division array
-	 * \param r_cut interation radius or size of each cell
-	 * \param enlarge In case of padding particles the cell list must be enlarged, like a ghost. This parameter says how much must be enlarged
-	 *
-	 * \return the processor bounding box
-	 */
-/*	inline Box<dim, St> cl_param_calculate(size_t (&div)[dim], St r_cut, const Ghost<dim, St> & enlarge)
-	{
-		// calculate the parameters of the cell list
-
-		// get the processor bounding box
-		Box<dim, St> pbox = dec.getProcessorBounds();
-
-		// extend by the ghost
-		pbox.enlarge(enlarge);
-
-		// Calculate the division array and the cell box
-		for (size_t i = 0; i < dim; i++)
-		{
-			div[i] = static_cast<size_t>((pbox.getP2().get(i) - pbox.getP1().get(i)) / r_cut);
-			div[i]++;
-			pbox.setHigh(i,pbox.getLow(i) + div[i]*r_cut);
-		}
-		return pbox;
-	}*/
 
 	/*! \brief Initialize the structures
 	 *
@@ -823,34 +114,6 @@ private:
 		g_m = p_np;
 	}
 
-	/*! \brief Initialize the decomposition
-	 *
-	 * \param box domain
-	 * \param bc boundary conditions
-	 * \param g ghost extension
-	 *
-	 */
-	void init_decomposition(Box<dim,St> & box, const size_t (& bc)[dim],const Ghost<dim,St> & g)
-	{
-		// Create a valid decomposition of the space
-		// Get the number of processor and calculate the number of sub-domain
-		// for decomposition
-		size_t n_proc = v_cl.getProcessingUnits();
-		size_t n_sub = n_proc * getDefaultNsubsub();
-
-		// Calculate the maximum number (before merging) of sub-domain on
-		// each dimension
-		size_t div[dim];
-		for (size_t i = 0; i < dim; i++)
-		{
-			div[i] = openfpm::math::round_big_2(pow(n_sub, 1.0 / dim));
-		}
-
-		// Create the sub-domains
-		dec.setParameters(div, box, bc, g);
-		dec.decompose();
-	}
-
 public:
 
 	//! space type
@@ -868,7 +131,7 @@ public:
 	 */
 	vector_dist<dim,St,prop,Decomposition,Memory> & operator=(const vector_dist<dim,St,prop,Decomposition,Memory> & v)
 	{
-		dec = v.dec;
+		static_cast<vector_dist_comm<dim,St,prop,Decomposition,Memory> *>(this)->operator=(static_cast<vector_dist_comm<dim,St,prop,Decomposition,Memory>>(v));
 
 		g_m = v.g_m;
 		v_pos = v.v_pos;
@@ -886,7 +149,7 @@ public:
 	 */
 	vector_dist<dim,St,prop,Decomposition,Memory> & operator=(vector_dist<dim,St,prop,Decomposition,Memory> && v)
 	{
-		dec.swap(v.dec);
+		static_cast<vector_dist_comm<dim,St,prop,Decomposition,Memory> *>(this)->operator=(static_cast<vector_dist_comm<dim,St,prop,Decomposition,Memory> >(v));
 
 		g_m = v.g_m;
 		v_pos.swap(v.v_pos);
@@ -901,7 +164,7 @@ public:
 	 *
 	 */
 	vector_dist(const vector_dist<dim,St,prop,Decomposition,Memory> & v)
-	:dec(v.v_cl),v_cl(v.v_cl)
+	:vector_dist_comm<dim,St,prop,Decomposition,Memory>(v.dec),v_cl(v.v_cl)
 	{
 #ifdef SE_CLASS2
 		check_new(this,8,VECTOR_DIST_EVENT,4);
@@ -932,7 +195,7 @@ public:
 	 *
 	 */
 	vector_dist(const Decomposition & dec, size_t np) :
-			dec(dec), v_cl(create_vcluster())
+	vector_dist_comm<dim,St,prop,Decomposition,Memory>(dec), v_cl(create_vcluster())
 	{
 #ifdef SE_CLASS2
 		check_new(this,8,VECTOR_DIST_EVENT,4);
@@ -950,15 +213,15 @@ public:
 	 * \param g Ghost margins
 	 *
 	 */
-	vector_dist(size_t np, Box<dim, St> box, const size_t (&bc)[dim], const Ghost<dim, St> & g) :
-			dec(create_vcluster()), v_cl(create_vcluster())
+	vector_dist(size_t np, Box<dim, St> box, const size_t (&bc)[dim], const Ghost<dim, St> & g)
+	:v_cl(create_vcluster())
 	{
 #ifdef SE_CLASS2
 		check_new(this,8,VECTOR_DIST_EVENT,4);
 #endif
 
 		init_structures(np);
-		init_decomposition(box,bc,g);
+		this->init_decomposition(box,bc,g);
 	}
 
 	~vector_dist()
@@ -968,14 +231,14 @@ public:
 #endif
 	}
 
-	/*! \brief Get the number of minimum sub-domain
+	/*! \brief return the local size of the vector
 	 *
-	 * \return minimum number
+	 * \return local size
 	 *
 	 */
-	static size_t getDefaultNsubsub()
+	size_t size_local()
 	{
-		return V_SUB_UNIT_FACTOR;
+		return g_m;
 	}
 
 	/*! \brief return the local size of the vector
@@ -983,9 +246,9 @@ public:
 	 * \return local size
 	 *
 	 */
-	size_t size_local()
+	size_t size_local_with_ghost()
 	{
-		return g_m;
+		return v_pos.size();
 	}
 
 	/*! \brief Get the position of an element
@@ -1046,292 +309,6 @@ public:
 		return v_prp.template get<id>(vec_key.getKey());
 	}
 
-	/*! \brief It move all the particles that does not belong to the local processor to the respective processor
-	 *
-	 * \tparam out of bound policy it specify what to do when the particles are detected out of bound
-	 *
-	 * In general this function is called after moving the particles to move the
-	 * elements out the local processor. Or just after initialization if each processor
-	 * contain non local particles
-	 *
-	 */
-	template<typename obp = KillParticle> void map()
-	{
-		// outgoing particles-id
-		openfpm::vector<size_t> out_part;
-
-		// Processor communication size
-		openfpm::vector<size_t> prc_sz(v_cl.getProcessingUnits());
-
-		// It contain the list of the processors this processor should to communicate with
-		openfpm::vector<size_t> p_list;
-
-		// map completely reset the ghost part
-		v_pos.resize(g_m);
-		v_prp.resize(g_m);
-
-		// Contain the processor id of each particle (basically where they have to go)
-		labelParticleProcessor<obp>(opart, prc_sz, out_part);
-
-		// Calculate the sending buffer size for each processor, put this information in
-		// a contiguous buffer
-		p_map_req.resize(v_cl.getProcessingUnits());
-		openfpm::vector<size_t> prc_sz_r;
-		openfpm::vector<size_t> prc_r;
-
-		for (size_t i = 0; i < v_cl.getProcessingUnits(); i++)
-		{
-			if (prc_sz.get(i) != 0)
-			{
-				p_map_req.get(i) = prc_r.size();
-				prc_r.add(i);
-				prc_sz_r.add(prc_sz.get(i));
-			}
-		}
-
-		// Allocate the send buffers
-
-		openfpm::vector<pos_prop> pb;
-
-		// fill the send buffers
-		fill_send_map_buf(prc_r, prc_sz_r, pb);
-
-		// Create the set of pointers
-		openfpm::vector<void *> ptr(prc_r.size());
-		for (size_t i = 0; i < prc_r.size(); i++)
-		{
-			ptr.get(i) = pb.get(i).pos.getPointer();
-		}
-
-		// convert the particle number to buffer size
-		for (size_t i = 0; i < prc_sz_r.size(); i++)
-		{
-			prc_sz_r.get(i) = prc_sz_r.get(i) * (sizeof(prop) + sizeof(Point<dim, St> ));
-		}
-
-		// Send and receive the particles
-
-		recv_mem_gm.clear();
-		v_cl.sendrecvMultipleMessagesNBX(prc_sz_r.size(), (size_t *) prc_sz_r.getPointer(), (size_t *) prc_r.getPointer(), (void **) ptr.getPointer(), vector_dist::message_alloc_map, this, NEED_ALL_SIZE);
-
-		// Process the incoming particles
-
-		process_received_map(out_part);
-
-		// mark the ghost part
-
-		g_m = v_pos.size();
-	}
-
-	/*! \brief It move all the particles that does not belong to the local processor to the respective processor
-	 *
-	 * \tparam out of bound policy it specify what to do when the particles are detected out of bound
-	 *
-	 * In general this function is called after moving the particles to move the
-	 * elements out the local processor. Or just after initialization if each processor
-	 * contain non local particles
-	 *
-	 */
-	template<unsigned int ... prp> void map_list()
-	{
-		typedef KillParticle obp;
-
-		// outgoing particles-id
-		openfpm::vector<size_t> out_part;
-
-		// Processor communication size
-		openfpm::vector<size_t> prc_sz(v_cl.getProcessingUnits());
-
-		// It contain the list of the processors this processor should to communicate with
-		openfpm::vector<size_t> p_list;
-
-		// map completely reset the ghost part
-		v_pos.resize(g_m);
-		v_prp.resize(g_m);
-
-		// Contain the processor id of each particle (basically where they have to go)
-		labelParticleProcessor<obp>(opart, prc_sz, out_part);
-
-		// Calculate the sending buffer size for each processor, put this information in
-		// a contiguous buffer
-		p_map_req.resize(v_cl.getProcessingUnits());
-		openfpm::vector<size_t> prc_sz_r;
-		openfpm::vector<size_t> prc_r;
-
-		for (size_t i = 0; i < v_cl.getProcessingUnits(); i++)
-		{
-			if (prc_sz.get(i) != 0)
-			{
-				p_map_req.get(i) = prc_r.size();
-				prc_r.add(i);
-				prc_sz_r.add(prc_sz.get(i));
-			}
-		}
-
-		// Sending property object
-		typedef object<typename object_creator<typename prop::type, prp...>::type> prp_object;
-
-		// Allocate the send buffers
-
-		openfpm::vector<pos_prop_sel<prp_object>> pb;
-
-		// fill the send buffers
-		fill_send_map_buf_list<prp_object,prp...>(prc_r, prc_sz_r, pb);
-
-		// Create the set of pointers
-		openfpm::vector<void *> ptr(prc_r.size());
-		for (size_t i = 0; i < prc_r.size(); i++)
-		{
-			ptr.get(i) = pb.get(i).pos.getPointer();
-		}
-
-		// convert the particle number to buffer size
-		for (size_t i = 0; i < prc_sz_r.size(); i++)
-		{
-			prc_sz_r.get(i) = prc_sz_r.get(i) * (sizeof(prp_object) + sizeof(Point<dim, St> ));
-		}
-
-		// Send and receive the particles
-
-		recv_mem_gm.clear();
-		v_cl.sendrecvMultipleMessagesNBX(prc_sz_r.size(), (size_t *) prc_sz_r.getPointer(), (size_t *) prc_r.getPointer(), (void **) ptr.getPointer(), vector_dist::message_alloc_map, this, NEED_ALL_SIZE);
-
-		// Process the incoming particles
-
-		process_received_map_list<prp_object, prp...>(out_part);
-
-		// mark the ghost part
-
-		g_m = v_pos.size();
-	}
-
-	/*! \brief It synchronize the properties and position of the ghost particles
-	 *
-	 * \tparam prp list of properties to get synchronize
-	 * \param opt options WITH_POSITION, it send also the positional information of the particles
-	 *
-	 */
-	template<int ... prp> void ghost_get(size_t opt = WITH_POSITION)
-	{
-		// Unload receive buffer
-		for (size_t i = 0 ; i < recv_mem_gg.size() ; i++)
-			recv_sz.get(i) = 0;
-
-		// Sending property object
-		typedef object<typename object_creator<typename prop::type, prp...>::type> prp_object;
-
-		// send vector for each processor
-		typedef openfpm::vector<prp_object, ExtPreAlloc<Memory>, typename memory_traits_lin<prp_object>::type, memory_traits_lin, openfpm::grow_policy_identity> send_vector;
-
-		// reset the ghost part
-		v_pos.resize(g_m);
-		v_prp.resize(g_m);
-
-		// Label all the particles
-		labelParticlesGhost();
-
-		// Calculate memory and allocation for the send buffers
-
-		// Total size
-		size_t size_byte_prp = 0;
-		size_t size_byte_pos = 0;
-
-		calc_send_ghost_buf<prp_object>(size_byte_prp, size_byte_pos);
-
-		// Create memory for the send buffer
-
-		g_prp_mem.resize(size_byte_prp);
-		if (opt != NO_POSITION)
-			g_pos_mem.resize(size_byte_pos);
-
-		// Create and fill send buffer for particle properties
-
-		ExtPreAlloc<Memory> * prAlloc_prp = new ExtPreAlloc<Memory>(size_byte_prp, g_prp_mem);
-
-		openfpm::vector<send_vector> g_send_prp;
-		fill_send_ghost_prp_buf<send_vector, prp_object, prp...>(g_send_prp, prAlloc_prp);
-
-		// Create and fill the send buffer for the particle position
-
-		ExtPreAlloc<Memory> * prAlloc_pos;
-		openfpm::vector<send_pos_vector> g_pos_send;
-		if (opt != NO_POSITION)
-		{
-			prAlloc_pos = new ExtPreAlloc<Memory>(size_byte_pos, g_pos_mem);
-			fill_send_ghost_pos_buf(g_pos_send, prAlloc_pos);
-		}
-
-		// Create processor list
-		openfpm::vector<size_t> prc;
-		for (size_t i = 0; i < opart.size(); i++)
-			prc.add(dec.IDtoProc(i));
-
-		// Send/receive the particle properties information
-		v_cl.sendrecvMultipleMessagesNBX(prc, g_send_prp, msg_alloc_ghost_get, this);
-		process_received_ghost_prp<send_vector, prp_object, prp...>();
-
-		if (opt != NO_POSITION)
-		{
-			// Send/receive the particle properties information
-			v_cl.sendrecvMultipleMessagesNBX(prc, g_pos_send, msg_alloc_ghost_get, this);
-			process_received_ghost_pos();
-		}
-
-		add_loc_particles_bc();
-	}
-
-	/*! \brief Call-back to allocate buffer to receive incoming elements (particles)
-	 *
-	 * \param msg_i message size required to receive from i
-	 * \param total_msg message size to receive from all the processors
-	 * \param total_p the total number of processor want to communicate with you
-	 * \param i processor id
-	 * \param ri request id (it is an id that goes from 0 to total_p, and is unique
-	 *           every time message_alloc is called)
-	 * \param ptr void pointer parameter for additional data to pass to the call-back
-	 *
-	 */
-	static void * msg_alloc_ghost_get(size_t msg_i, size_t total_msg, size_t total_p, size_t i, size_t ri, void * ptr)
-	{
-		vector_dist<dim, St, prop, Decomposition, Memory> * v = static_cast<vector_dist<dim, St, prop, Decomposition, Memory> *>(ptr);
-
-		v->recv_sz.resize(v->dec.getNNProcessors());
-		v->recv_mem_gg.resize(v->dec.getNNProcessors());
-
-		// Get the local processor id
-		size_t lc_id = v->dec.ProctoID(i);
-
-		// resize the receive buffer
-		v->recv_mem_gg.get(lc_id).resize(msg_i);
-		v->recv_sz.get(lc_id) = msg_i;
-
-		return v->recv_mem_gg.get(lc_id).getPointer();
-	}
-
-	/*! \brief Call-back to allocate buffer to receive incoming elements (particles)
-	 *
-	 * \param msg_i size required to receive the message from i
-	 * \param total_msg total size to receive from all the processors
-	 * \param total_p the total number of processor that want to communicate with you
-	 * \param i processor id
-	 * \param ri request id (it is an id that goes from 0 to total_p, and is unique
-	 *           every time message_alloc is called)
-	 * \param ptr a pointer to the vector_dist structure
-	 *
-	 * \return the pointer where to store the message for the processor i
-	 *
-	 */
-	static void * message_alloc_map(size_t msg_i, size_t total_msg, size_t total_p, size_t i, size_t ri, void * ptr)
-	{
-		// cast the pointer
-		vector_dist<dim, St, prop, Decomposition, Memory> * vd = static_cast<vector_dist<dim, St, prop, Decomposition, Memory> *>(ptr);
-
-		vd->recv_mem_gm.resize(vd->v_cl.getProcessingUnits());
-		vd->recv_mem_gm.get(i).resize(msg_i);
-
-		return vd->recv_mem_gm.get(i).getPointer();
-	}
-
 	/*! \brief Add local particle
 	 *
 	 * It add a local particle, with "local" we mean in this processor
@@ -1382,7 +359,7 @@ public:
 	template<typename CellL = CellList<dim, St, FAST, shift<dim, St> > > CellL getCellList(St r_cut)
 	{
 		// Get ghost and anlarge by 1%
-		Ghost<dim,St> g = dec.getGhost();
+		Ghost<dim,St> g = getDecomposition().getGhost();
 		g.magnify(1.013);
 
 		return getCellList(r_cut, g);
@@ -1400,7 +377,7 @@ public:
 	template<typename CellL = CellList_hilb<dim, St, FAST, shift<dim, St> > > CellL getCellList_hilb(St r_cut)
 	{
 		// Get ghost and anlarge by 1%
-		Ghost<dim,St> g = dec.getGhost();
+		Ghost<dim,St> g = getDecomposition().getGhost();
 		g.magnify(1.013);
 
 		return getCellList_hilb(r_cut, g);
@@ -1457,7 +434,7 @@ public:
 		size_t div[dim];
 
 		// get the processor bounding box
-		Box<dim, St> pbox = dec.getProcessorBounds();
+		Box<dim, St> pbox = getDecomposition().getProcessorBounds();
 
 		// Processor bounding box
 		cl_param_calculate(pbox, div, r_cut, enlarge);
@@ -1492,7 +469,7 @@ public:
 		size_t div[dim];
 
 		// get the processor bounding box
-		Box<dim, St> pbox = dec.getProcessorBounds();
+		Box<dim, St> pbox = getDecomposition().getProcessorBounds();
 
 		// Processor bounding box
 		cl_param_calculate(pbox,div, r_cut, enlarge);
@@ -1505,29 +482,45 @@ public:
 	}
 
 
+
 	/*! \brief for each particle get the verlet list
 	 *
 	 * \param r_cut cut-off radius
+	 * \param opt option like VL_SYMMETRIC and VL_NON_SYMMETRIC
 	 *
 	 */
-	VerletList<dim,St,FAST,shift<dim,St> > getVerlet(St r_cut)
+	VerletList<dim,St,FAST,shift<dim,St> > getVerlet(St r_cut, size_t opt = VL_NON_SYMMETRIC)
 	{
 		VerletList<dim,St,FAST,shift<dim,St>> ver;
 
-		// Division array
-		size_t div[dim];
-
 		// get the processor bounding box
-		Box<dim, St> bt = dec.getProcessorBounds();
+		Box<dim, St> bt = getDecomposition().getProcessorBounds();
+
+		// Get the ghost
+		Ghost<dim,St> g = getDecomposition().getGhost();
+		g.magnify(1.013);
 
-		// Calculate the divisions for the Cell-lists
-		cl_param_calculate(bt,div,r_cut,Ghost<dim,St>(0.0));
+		// enlarge the box where the Verlet is defined
+		bt.enlarge(g);
 
-		ver.Initialize(bt,r_cut,v_pos,g_m);
+		ver.Initialize(bt,getDecomposition().getProcessorBounds(),r_cut,v_pos,g_m,opt);
 
 		return ver;
 	}
 
+	/*! \brief for each particle get the verlet list
+	 *
+	 * \param r_cut cut-off radius
+	 * \param ver Verlet to update
+	 * \param r_cut cutoff radius
+	 * \param opt option like VL_SYMMETRIC and VL_NON_SYMMETRIC
+	 *
+	 */
+	void updateVerlet(VerletList<dim,St,FAST,shift<dim,St> > & ver, St r_cut, size_t opt = VL_NON_SYMMETRIC)
+	{
+		ver.update(getDecomposition().getDomain(),r_cut,v_pos,g_m, opt);
+	}
+
 	/*! \brief for each particle get the verlet list
 	 *
 	 * \param verlet output verlet list for each particle
@@ -1598,7 +591,7 @@ public:
 	 */
 	template<typename CellL=CellList<dim,St,FAST,shift<dim,St> > > void reorder (int32_t m)
 	{
-		reorder(m,dec.getGhost());
+		reorder(m,getDecomposition().getGhost());
 	}
 
 
@@ -1626,7 +619,7 @@ public:
 		// calculate the parameters of the cell list
 
 		// get the processor bounding box
-		Box<dim,St> pbox = dec.getProcessorBounds();
+		Box<dim,St> pbox = getDecomposition().getProcessorBounds();
 		// extend by the ghost
 		pbox.enlarge(enlarge);
 
@@ -1762,7 +755,7 @@ public:
 			stop.set_d(i, sz[i] - 1);
 		}
 
-		grid_dist_id_iterator_dec<Decomposition> it_dec(dec, sz, start, stop);
+		grid_dist_id_iterator_dec<Decomposition> it_dec(getDecomposition(), sz, start, stop);
 		return it_dec;
 	}
 
@@ -1786,14 +779,69 @@ public:
 		return vector_dist_iterator(0, g_m);
 	}
 
+	/*! \brief Get an iterator that traverse the particles in the domain
+	 *
+	 * \return an iterator
+	 *
+	 */
+	vector_dist_iterator getDomainAndGhostIterator() const
+	{
+		return vector_dist_iterator(0, v_pos.size());
+	}
+
 	/*! \brief Get the decomposition
 	 *
 	 * \return
 	 *
 	 */
-	Decomposition & getDecomposition()
+	inline Decomposition & getDecomposition()
+	{
+		return vector_dist_comm<dim,St,prop,Decomposition,Memory>::getDecomposition();
+	}
+
+	/*! \brief It move all the particles that does not belong to the local processor to the respective processor
+	 *
+	 * \tparam out of bound policy it specify what to do when the particles are detected out of bound
+	 *
+	 * In general this function is called after moving the particles to move the
+	 * elements out the local processor. Or just after initialization if each processor
+	 * contain non local particles
+	 *
+	 * \tparam prp properties to communicate
+	 *
+	 *
+	 */
+	template<unsigned int ... prp> void map_list()
 	{
-		return dec;
+		this->template map_list_<prp...>(v_pos,v_prp,g_m);
+	}
+
+
+	/*! \brief It move all the particles that does not belong to the local processor to the respective processor
+	 *
+	 * \tparam out of bound policy it specify what to do when the particles are detected out of bound
+	 *
+	 * In general this function is called after moving the particles to move the
+	 * elements out the local processor. Or just after initialization if each processor
+	 * contain non local particles
+	 *
+	 *
+	 */
+	template<typename obp = KillParticle> void map()
+	{
+		this->template map_<obp>(v_pos,v_prp,g_m);
+	}
+
+	/*! \brief It synchronize the properties and position of the ghost particles
+	 *
+	 * \tparam prp list of properties to get synchronize
+	 *
+	 * \param opt options WITH_POSITION, it send also the positional information of the particles
+	 *
+	 */
+	template<int ... prp> inline void ghost_get(size_t opt = WITH_POSITION)
+	{
+		this->template ghost_get_<prp...>(v_pos,v_prp,g_m,opt);
 	}
 
 	/*! \brief Remove a set of elements from the distributed vector
@@ -1832,9 +880,11 @@ public:
 	{
 		CellDecomposer_sm<dim, St> cdsm;
 
+		Decomposition & dec = getDecomposition();
+
 		cdsm.setDimensions(dec.getDomain(), dec.getGrid().getSize(), 0);
 
-		for (size_t i = 0; i < dec.getNSubSubDomains(); i++)
+		for (size_t i = 0; i < getDecomposition().getNSubSubDomains(); i++)
 		{
 			dec.setSubSubDomainComputationCost(i, 1);
 		}
diff --git a/src/Vector/vector_dist_cell_list_tests.hpp b/src/Vector/vector_dist_cell_list_tests.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..ca3a1b49f96ae8848debaf94fd7c03099635b0d7
--- /dev/null
+++ b/src/Vector/vector_dist_cell_list_tests.hpp
@@ -0,0 +1,925 @@
+/*
+ * vector_dist_cell_list_tests.hpp
+ *
+ *  Created on: Aug 16, 2016
+ *      Author: i-bird
+ */
+
+#ifndef SRC_VECTOR_VECTOR_DIST_CELL_LIST_TESTS_HPP_
+#define SRC_VECTOR_VECTOR_DIST_CELL_LIST_TESTS_HPP_
+
+
+///////////////////////// test hilb ///////////////////////////////
+
+BOOST_AUTO_TEST_CASE( vector_dist_reorder_2d_test )
+{
+	Vcluster & v_cl = create_vcluster();
+
+	if (v_cl.getProcessingUnits() > 48)
+		return;
+
+    // set the seed
+	// create the random generator engine
+	std::srand(v_cl.getProcessUnitID());
+    std::default_random_engine eg;
+    std::uniform_real_distribution<float> ud(0.0f, 1.0f);
+
+    long int k = 524288 * v_cl.getProcessingUnits();
+
+	long int big_step = k / 4;
+	big_step = (big_step == 0)?1:big_step;
+
+	print_test_v( "Testing 2D vector with hilbert curve reordering k<=",k);
+
+	// 2D test
+	for ( ; k >= 2 ; k-= decrement(k,big_step) )
+	{
+		BOOST_TEST_CHECKPOINT( "Testing 2D vector with hilbert curve reordering k=" << k );
+
+		Box<2,float> box({0.0,0.0},{1.0,1.0});
+
+		// Boundary conditions
+		size_t bc[2]={NON_PERIODIC,NON_PERIODIC};
+
+		vector_dist<2,float, Point_test<float>, CartDecomposition<2,float> > vd(k,box,bc,Ghost<2,float>(0.0));
+
+		auto it = vd.getIterator();
+
+		while (it.isNext())
+		{
+			auto key = it.get();
+
+			vd.getPos(key)[0] = ud(eg);
+			vd.getPos(key)[1] = ud(eg);
+
+			++it;
+		}
+
+		vd.map();
+
+		// Create first cell list
+
+		auto NN1 = vd.getCellList(0.01);
+
+		//An order of a curve
+		int32_t m = 6;
+
+		//Reorder a vector
+		vd.reorder(m);
+
+		// Create second cell list
+		auto NN2 = vd.getCellList(0.01);
+
+		//Check equality of cell sizes
+		for (size_t i = 0 ; i < NN1.getGrid().size() ; i++)
+		{
+			size_t n1 = NN1.getNelements(i);
+			size_t n2 = NN2.getNelements(i);
+
+			BOOST_REQUIRE_EQUAL(n1,n2);
+		}
+	}
+}
+
+BOOST_AUTO_TEST_CASE( vector_dist_cl_random_vs_hilb_forces_test )
+{
+	Vcluster & v_cl = create_vcluster();
+
+	if (v_cl.getProcessingUnits() > 48)
+		return;
+
+	///////////////////// INPUT DATA //////////////////////
+
+	// Dimensionality of the space
+	const size_t dim = 3;
+	// Cut-off radiuses. Can be put different number of values
+	openfpm::vector<float> cl_r_cutoff {0.05};
+	// The starting amount of particles (remember that this number is multiplied by number of processors you use for testing)
+	size_t cl_k_start = 10000;
+	// The lower threshold for number of particles
+	size_t cl_k_min = 1000;
+	// Ghost part of distributed vector
+	double ghost_part = 0.05;
+
+	///////////////////////////////////////////////////////
+
+	//For different r_cut
+	for (size_t r = 0; r < cl_r_cutoff.size(); r++ )
+	{
+		//Cut-off radius
+		float r_cut = cl_r_cutoff.get(r);
+
+		//Number of particles
+		size_t k = cl_k_start * v_cl.getProcessingUnits();
+
+		std::string str("Testing " + std::to_string(dim) + "D vector's forces (random vs hilb celllist) k<=");
+
+		vector_dist_test::print_test_v(str,k);
+
+		//For different number of particles
+		for (size_t k_int = k ; k_int >= cl_k_min ; k_int/=2 )
+		{
+			BOOST_TEST_CHECKPOINT( "Testing " << dim << "D vector's forces (random vs hilb celllist) k<=" << k_int );
+
+			Box<dim,float> box;
+
+			for (size_t i = 0; i < dim; i++)
+			{
+				box.setLow(i,0.0);
+				box.setHigh(i,1.0);
+			}
+
+			// Boundary conditions
+			size_t bc[dim];
+
+			for (size_t i = 0; i < dim; i++)
+				bc[i] = PERIODIC;
+
+			vector_dist<dim,float, aggregate<float[dim]>, CartDecomposition<dim,float> > vd(k_int,box,bc,Ghost<dim,float>(ghost_part));
+
+			vector_dist<dim,float, aggregate<float[dim]>, CartDecomposition<dim,float> > vd2(k_int,box,bc,Ghost<dim,float>(ghost_part));
+
+			// Initialize dist vectors
+			vd_initialize_double<dim>(vd, vd2, v_cl, k_int);
+
+			vd.template ghost_get<0>();
+			vd2.template ghost_get<0>();
+
+			//Get a cell list
+
+			auto NN = vd.getCellList(r_cut);
+
+			//Calculate forces
+
+			calc_forces<dim>(NN,vd,r_cut);
+
+			//Get a cell list hilb
+
+			auto NN_hilb = vd2.getCellList_hilb(r_cut);
+
+			//Calculate forces
+			calc_forces_hilb<dim>(NN_hilb,vd2,r_cut);
+
+			// Calculate average
+			size_t count = 1;
+			Point<dim,float> avg;
+			for (size_t i = 0 ; i < dim ; i++)	{avg.get(i) = 0.0;}
+
+			auto it_v2 = vd.getIterator();
+			while (it_v2.isNext())
+			{
+				//key
+				vect_dist_key_dx key = it_v2.get();
+
+				for (size_t i = 0; i < dim; i++)
+					avg.get(i) += fabs(vd.template getProp<0>(key)[i]);
+
+				++count;
+				++it_v2;
+			}
+
+			for (size_t i = 0 ; i < dim ; i++)	{avg.get(i) /= count;}
+
+			auto it_v = vd.getIterator();
+			while (it_v.isNext())
+			{
+				//key
+				vect_dist_key_dx key = it_v.get();
+
+				for (size_t i = 0; i < dim; i++)
+				{
+					auto a1 = vd.template getProp<0>(key)[i];
+					auto a2 = vd2.template getProp<0>(key)[i];
+
+					//Check that the forces are (almost) equal
+					float per = 0.1;
+					if (a1 != 0.0)
+						per = fabs(0.1*avg.get(i)/a1);
+
+					BOOST_REQUIRE_CLOSE(a1,a2,per);
+				}
+
+				++it_v;
+			}
+		}
+	}
+}
+
+BOOST_AUTO_TEST_CASE( vector_dist_cl_random_vs_reorder_forces_test )
+{
+	Vcluster & v_cl = create_vcluster();
+
+	if (v_cl.getProcessingUnits() > 48)
+		return;
+
+	///////////////////// INPUT DATA //////////////////////
+
+	// Dimensionality of the space
+	const size_t dim = 3;
+	// Cut-off radiuses. Can be put different number of values
+	openfpm::vector<float> cl_r_cutoff {0.01};
+	// The starting amount of particles (remember that this number is multiplied by number of processors you use for testing)
+	size_t cl_k_start = 10000;
+	// The lower threshold for number of particles
+	size_t cl_k_min = 1000;
+	// Ghost part of distributed vector
+	double ghost_part = 0.01;
+
+	///////////////////////////////////////////////////////
+
+	//For different r_cut
+	for (size_t r = 0; r < cl_r_cutoff.size(); r++ )
+	{
+		//Cut-off radius
+		float r_cut = cl_r_cutoff.get(r);
+
+		//Number of particles
+		size_t k = cl_k_start * v_cl.getProcessingUnits();
+
+		std::string str("Testing " + std::to_string(dim) + "D vector's forces (random vs reorder) k<=");
+
+		vector_dist_test::print_test_v(str,k);
+
+		//For different number of particles
+		for (size_t k_int = k ; k_int >= cl_k_min ; k_int/=2 )
+		{
+			BOOST_TEST_CHECKPOINT( "Testing " << dim << "D vector's forces (random vs reorder) k<=" << k_int );
+
+			Box<dim,float> box;
+
+			for (size_t i = 0; i < dim; i++)
+			{
+				box.setLow(i,0.0);
+				box.setHigh(i,1.0);
+			}
+
+			// Boundary conditions
+			size_t bc[dim];
+
+			for (size_t i = 0; i < dim; i++)
+				bc[i] = PERIODIC;
+
+			vector_dist<dim,float, aggregate<float[dim], float[dim]>, CartDecomposition<dim,float> > vd(k_int,box,bc,Ghost<dim,float>(ghost_part));
+
+			// Initialize vd
+			vd_initialize<dim,decltype(vd)>(vd, v_cl, k_int);
+
+			vd.template ghost_get<0>();
+
+			//Get a cell list
+
+			auto NN1 = vd.getCellList(r_cut);
+
+			//Calculate forces '0'
+
+			calc_forces<dim>(NN1,vd,r_cut);
+
+			//Reorder and get a cell list again
+
+			vd.reorder(4);
+
+			vd.template ghost_get<0>();
+
+			auto NN2 = vd.getCellList(r_cut);
+
+			//Calculate forces '1'
+			calc_forces<dim,1>(NN2,vd,r_cut);
+
+			// Calculate average (For Coverty scan we start from 1)
+			size_t count = 1;
+			Point<dim,float> avg;
+			for (size_t i = 0 ; i < dim ; i++)	{avg.get(i) = 0.0;}
+
+			auto it_v2 = vd.getIterator();
+			while (it_v2.isNext())
+			{
+				//key
+				vect_dist_key_dx key = it_v2.get();
+
+				for (size_t i = 0; i < dim; i++)
+					avg.get(i) += fabs(vd.template getProp<0>(key)[i]);
+
+				++count;
+				++it_v2;
+			}
+
+			for (size_t i = 0 ; i < dim ; i++)	{avg.get(i) /= count;}
+
+			//Test for equality of forces
+			auto it_v = vd.getDomainIterator();
+
+			while (it_v.isNext())
+			{
+				//key
+				vect_dist_key_dx key = it_v.get();
+
+				for (size_t i = 0; i < dim; i++)
+				{
+					auto a1 = vd.template getProp<0>(key)[i];
+					auto a2 = vd.template getProp<1>(key)[i];
+
+					//Check that the forces are (almost) equal
+					float per = 0.1;
+					if (a1 != 0.0)
+						per = fabs(0.1*avg.get(i)/a1);
+
+					BOOST_REQUIRE_CLOSE(a1,a2,per);
+				}
+
+				++it_v;
+			}
+		}
+	}
+}
+
+
+BOOST_AUTO_TEST_CASE( vector_dist_sym_cell_list_test )
+{
+	long int k = 4096*create_vcluster().getProcessingUnits();
+
+	long int big_step = k / 30;
+	big_step = (big_step == 0)?1:big_step;
+	long int small_step = 21;
+
+	print_test( "Testing vector symmetric cell-list k<=",k);
+
+	// 3D test
+	for ( ; k > 8*big_step ; k-= (k > 2*big_step)?big_step:small_step )
+	{
+		double r_cut = 0.1;
+
+		// domain
+		Box<3,double> box({0.0,0.0,0.0},{1.0,1.0,1.0});
+
+		// Boundary conditions
+		size_t bc[3]={PERIODIC,PERIODIC,PERIODIC};
+
+		// ghost, big enough to contain the interaction radius
+		Ghost<3,float> ghost(r_cut);
+
+		vector_dist<3,double, aggregate<double> > vd(k,box,bc,ghost);
+
+		{
+		auto it = vd.getDomainIterator();
+
+		while (it.isNext())
+		{
+			auto p = it.get();
+
+			vd.getPos(p)[0] = (double)rand()/RAND_MAX;
+			vd.getPos(p)[1] = (double)rand()/RAND_MAX;
+			vd.getPos(p)[2] = (double)rand()/RAND_MAX;
+
+			++it;
+		}
+		}
+
+		vd.map();
+		vd.ghost_get<>();
+
+		// Get the Cell list structure
+		auto NN = vd.getCellList(r_cut);
+
+		// Get an iterator over particles
+		auto it2 = vd.getDomainAndGhostIterator();
+
+		openfpm::vector<openfpm::vector<size_t>> idx;
+		idx.resize(vd.size_local_with_ghost());
+
+		/////////// SYMMETRIC CASE CELL-LIST ////////
+
+		// For each particle ...
+		while (it2.isNext())
+		{
+			// ... p
+			auto p = it2.get();
+
+			// Get the position of the particle p
+			Point<3,double> xp = vd.getPos(p);
+
+			// Get an iterator over the neighborhood of the particle p symmetric
+			auto NpSym = NN.template getNNIteratorSym<NO_CHECK>(NN.getCell(vd.getPos(p)),p.getKey());
+
+			// For each neighborhood of the particle p
+			while (NpSym.isNext())
+			{
+				// Neighborhood particle q
+				auto q = NpSym.get();
+
+				// if p == q skip this particle
+				if (q == p.getKey())	{++NpSym; continue;};
+
+				// Get position of the particle q
+				Point<3,double> xq = vd.getPos(q);
+
+				// take the normalized direction
+				double rn = norm2(xp - xq);
+
+				// potential energy (using pow is slower)
+				vd.getProp<0>(p) += rn;
+				vd.getProp<0>(q) += rn;
+
+				idx.get(p.getKey()).add(q);
+				idx.get(q).add(p.getKey());
+
+
+				// Next neighborhood
+				++NpSym;
+			}
+
+			// Next Particle
+			++it2;
+		}
+
+		/////////////// NON SYMMETRIC CASE ////////////////////////
+
+		openfpm::vector<openfpm::vector<size_t>> idx2;
+		idx2.resize(vd.size_local());
+
+		auto it = vd.getDomainIterator();
+
+		// For each particle ...
+		while (it.isNext())
+		{
+			// ... p
+			auto p = it.get();
+
+			// Get the position of the particle p
+			Point<3,double> xp = vd.getPos(p);
+
+			// Get an iterator over the neighborhood of the particle p
+			auto Np = NN.template getNNIterator<NO_CHECK>(NN.getCell(vd.getPos(p)));
+
+			double Ep = 0.0;
+
+			// For each neighborhood of the particle p
+			while (Np.isNext())
+			{
+				// Neighborhood particle q
+				auto q = Np.get();
+
+				// if p == q skip this particle
+				if (q == p.getKey())	{++Np; continue;};
+
+				// Get position of the particle q
+				Point<3,double> xq = vd.getPos(q);
+
+				// take the normalized direction
+				double rn = norm2(xp - xq);
+
+				idx2.get(p.getKey()).add(q);
+
+				// potential energy (using pow is slower)
+				Ep += rn;
+
+				// Next neighborhood
+				++Np;
+			}
+
+			idx.get(p.getKey()).sort();
+			idx2.get(p.getKey()).sort();
+
+			bool ret = true;
+
+			for (size_t i = 0 ; i < idx.get(p.getKey()).size() ; i++)
+				ret &= idx.get(p.getKey()).get(i) == idx2.get(p.getKey()).get(i);
+
+			BOOST_REQUIRE_EQUAL(ret,true);
+
+			BOOST_REQUIRE_CLOSE(Ep,vd.getProp<0>(p),0.01);
+
+			// Next Particle
+			++it;
+		}
+	}
+}
+
+
+BOOST_AUTO_TEST_CASE( vector_dist_sym_verlet_list_test )
+{
+	if (create_vcluster().getProcessingUnits() > 12)
+		return;
+
+	long int k = 4096*create_vcluster().getProcessingUnits();
+
+	long int big_step = k / 30;
+	big_step = (big_step == 0)?1:big_step;
+	long int small_step = 21;
+
+	print_test( "Testing vector symmetric verlet-list k<=",k);
+
+	// 3D test
+	for ( ; k > 8*big_step ; k-= (k > 2*big_step)?big_step:small_step )
+	{
+		double r_cut = 0.1;
+
+		// domain
+		Box<3,double> box({0.0,0.0,0.0},{1.0,1.0,1.0});
+
+		// Boundary conditions
+		size_t bc[3]={PERIODIC,PERIODIC,PERIODIC};
+
+		// ghost, big enough to contain the interaction radius
+		Ghost<3,float> ghost(r_cut);
+
+		vector_dist<3,double, aggregate<double> > vd(k,box,bc,ghost);
+
+		{
+		auto it = vd.getDomainIterator();
+
+		while (it.isNext())
+		{
+			auto p = it.get();
+
+			vd.getPos(p)[0] = (double)rand()/RAND_MAX;
+			vd.getPos(p)[1] = (double)rand()/RAND_MAX;
+			vd.getPos(p)[2] = (double)rand()/RAND_MAX;
+
+			++it;
+		}
+		}
+
+		vd.map();
+		vd.ghost_get<>();
+
+		// Get the Cell list structure
+		auto NN = vd.getVerlet(r_cut,VL_SYMMETRIC);
+		auto NNc = vd.getVerlet(r_cut);
+
+		// Get an iterator over particles
+		auto it2 = vd.getDomainAndGhostIterator();
+
+		openfpm::vector<openfpm::vector<size_t>> idx;
+		idx.resize(vd.size_local_with_ghost());
+
+		/////////// SYMMETRIC CASE VERLET-LIST ////////
+
+		// For each particle ...
+		while (it2.isNext())
+		{
+			// ... p
+			auto p = it2.get();
+
+			// Get the position of the particle p
+			Point<3,double> xp = vd.getPos(p);
+
+			// Get an iterator over the neighborhood of the particle p symmetric
+			auto NpSym = NN.template getNNIterator<NO_CHECK>(p.getKey());
+
+			// For each neighborhood of the particle p
+			while (NpSym.isNext())
+			{
+				// Neighborhood particle q
+				auto q = NpSym.get();
+
+				// if p == q skip this particle
+				if (q == p.getKey())	{++NpSym; continue;};
+
+				// Get position of the particle q
+				Point<3,double> xq = vd.getPos(q);
+
+				// take the normalized direction
+				double rn = norm2(xp - xq);
+
+				// potential energy (using pow is slower)
+				vd.getProp<0>(p) += rn;
+				vd.getProp<0>(q) += rn;
+
+				idx.get(p.getKey()).add(q);
+				idx.get(q).add(p.getKey());
+
+
+				// Next neighborhood
+				++NpSym;
+			}
+
+			// Next Particle
+			++it2;
+		}
+
+		/////////////// NON SYMMETRIC CASE VERLET-LIST ////////////////////////
+
+		openfpm::vector<openfpm::vector<size_t>> idx2;
+		idx2.resize(vd.size_local());
+
+		auto it = vd.getDomainIterator();
+
+		// For each particle ...
+		while (it.isNext())
+		{
+			// ... p
+			auto p = it.get();
+
+			// Get the position of the particle p
+			Point<3,double> xp = vd.getPos(p);
+
+			// Get an iterator over the neighborhood of the particle p
+			auto Np = NNc.template getNNIterator<NO_CHECK>(p.getKey());
+
+			double Ep = 0.0;
+
+			// For each neighborhood of the particle p
+			while (Np.isNext())
+			{
+				// Neighborhood particle q
+				auto q = Np.get();
+
+				// if p == q skip this particle
+				if (q == p.getKey())	{++Np; continue;};
+
+				// Get position of the particle q
+				Point<3,double> xq = vd.getPos(q);
+
+				// take the normalized direction
+				double rn = norm2(xp - xq);
+
+				idx2.get(p.getKey()).add(q);
+
+				// potential energy (using pow is slower)
+				Ep += rn;
+
+				// Next neighborhood
+				++Np;
+			}
+
+			idx.get(p.getKey()).sort();
+			idx2.get(p.getKey()).sort();
+
+			bool ret = true;
+
+			BOOST_REQUIRE_EQUAL(idx.get(p.getKey()).size(),idx2.get(p.getKey()).size());
+
+			for (size_t i = 0 ; i < idx.get(p.getKey()).size() ; i++)
+			{
+				ret &= idx.get(p.getKey()).get(i) == idx2.get(p.getKey()).get(i);
+			}
+
+			BOOST_REQUIRE_EQUAL(ret,true);
+
+			BOOST_REQUIRE_CLOSE(Ep,vd.getProp<0>(p),0.01);
+
+			// Next Particle
+			++it;
+		}
+	}
+}
+
+struct couple_contrib
+{
+	size_t i;
+	size_t j;
+	size_t ir;
+	size_t jr;
+	double E;
+
+	bool operator<(couple_contrib & ct)
+	{
+		return E < ct.E;
+	}
+};
+
+BOOST_AUTO_TEST_CASE( vector_dist_sym_tot_red_computation )
+{
+	if (create_vcluster().getProcessingUnits() > 12)
+		return;
+
+	long int k = 4096*create_vcluster().getProcessingUnits();
+
+	size_t base = create_vcluster().getProcessUnitID() * 4096;
+
+	long int big_step = k / 30;
+	big_step = (big_step == 0)?1:big_step;
+	long int small_step = 21;
+
+	print_test( "Testing vector symmetric verlet-list k<=",k);
+
+	openfpm::vector<couple_contrib> cct;
+	openfpm::vector<couple_contrib> cct2;
+
+	// 3D test
+/*	for ( ; k > 8*big_step ; k-= (k > 2*big_step)?big_step:small_step )
+	{*/
+		double r_cut = 0.05;
+
+		// domain
+		Box<3,double> box({0.0,0.0,0.0},{1.0,1.0,1.0});
+
+		// Boundary conditions
+		size_t bc[3]={PERIODIC,PERIODIC,PERIODIC};
+
+		// ghost, big enough to contain the interaction radius
+		Ghost<3,float> ghost(r_cut);
+
+		vector_dist<3,double, aggregate<size_t> > vd(k,box,bc,ghost);
+
+		{
+		auto it = vd.getDomainIterator();
+
+		while (it.isNext())
+		{
+			auto p = it.get();
+
+			vd.getPos(p)[0] = (double)rand()/RAND_MAX;
+			vd.getPos(p)[1] = (double)rand()/RAND_MAX;
+			vd.getPos(p)[2] = (double)rand()/RAND_MAX;
+
+			vd.getProp<0>(p) = p.getKey() + base;
+
+			++it;
+		}
+		}
+
+		vd.map();
+		vd.ghost_get<0>();
+
+		// Get the Cell list structure
+		auto NN = vd.getVerlet(r_cut,VL_SYMMETRIC_RED);
+		auto NNc = vd.getVerlet(r_cut);
+
+		// Get an iterator over particles
+		auto it2 = vd.getDomainAndGhostIterator();
+
+		/////////// SYMMETRIC CASE VERLET-LIST REDUCTION ////////
+
+		{
+			auto it2 = vd.getDomainAndGhostIterator();
+
+			// DEBUG
+
+			// For each particle ...
+			while (it2.isNext())
+			{
+				// ... p
+				auto p = it2.get();
+
+				if( vd.getProp<0>(p) == 2524 || vd.getProp<0>(p) == 4135 )
+				{
+					Vcluster & v_cl = create_vcluster();
+					Point<3,double> xp = vd.getPos(p);
+
+					std::cout << "Particle " << p.getKey() << "   " << xp.toString() << "    " << v_cl.getProcessUnitID() << std::endl;
+				}
+
+				++it2;
+			}
+		}
+
+
+		openfpm::vector<couple_contrib> ctt;
+
+		double tot_sym = 0.0;
+		size_t tot_n_sym = 0;
+
+		// For each particle ...
+		while (it2.isNext())
+		{
+			// ... p
+			auto p = it2.get();
+
+			// Get the position of the particle p
+			Point<3,double> xp = vd.getPos(p);
+
+			// Get an iterator over the neighborhood of the particle p symmetric
+			auto NpSym = NN.template getNNIterator<NO_CHECK>(p.getKey());
+
+			// For each neighborhood of the particle p
+			while (NpSym.isNext())
+			{
+				// Neighborhood particle q
+				auto q = NpSym.get();
+
+				// if p == q skip this particle
+				if (q == p.getKey())	{++NpSym; continue;};
+
+				// Get position of the particle q
+				Point<3,double> xq = vd.getPos(q);
+
+				// take the normalized direction
+				double rn = norm2(xp - xq);
+
+				////// debug ////////////
+
+				couple_contrib cc;
+				cc.i = vd.getProp<0>(p);
+				cc.j = vd.getProp<0>(q);
+				cc.ir = p.getKey();
+				cc.jr = q;
+				cc.E = rn;
+
+				cct.add(cc);
+
+				/////////////////////////
+
+				// potential energy (using pow is slower)
+				tot_sym += rn;
+				tot_n_sym++;
+
+				// Next neighborhood
+				++NpSym;
+			}
+
+			// Next Particle
+			++it2;
+		}
+
+		/////////////// NON SYMMETRIC CASE VERLET-LIST ////////////////////////
+
+		double tot = 0.0;
+		size_t tot_n = 0;
+
+		auto it = vd.getDomainIterator();
+
+		// For each particle ...
+		while (it.isNext())
+		{
+			// ... p
+			auto p = it.get();
+
+			// Get the position of the particle p
+			Point<3,double> xp = vd.getPos(p);
+
+			// Get an iterator over the neighborhood of the particle p
+			auto Np = NNc.template getNNIterator<NO_CHECK>(p.getKey());
+
+			// For each neighborhood of the particle p
+			while (Np.isNext())
+			{
+				// Neighborhood particle q
+				auto q = Np.get();
+
+				// if p == q skip this particle
+				if (q == p.getKey())	{++Np; continue;};
+
+				// Get position of the particle q
+				Point<3,double> xq = vd.getPos(q);
+
+				// take the normalized direction
+				double rn = norm2(xp - xq);
+
+				// potential energy (using pow is slower)
+				tot += rn/2.0;
+				tot_n++;
+
+				////// debug ////////////
+
+				couple_contrib cc;
+				cc.i = vd.getProp<0>(p);
+				cc.j = vd.getProp<0>(q);
+				cc.E = rn;
+
+				cct2.add(cc);
+
+				/////////////////////////
+
+				// Next neighborhood
+				++Np;
+			}
+
+			// Next Particle
+			++it;
+		}
+
+		Vcluster & v_cl = create_vcluster();
+
+		v_cl.sum(tot_n);
+		v_cl.sum(tot_n_sym);
+		v_cl.sum(tot);
+		v_cl.sum(tot_sym);
+		v_cl.execute();
+
+		openfpm::vector<couple_contrib> cc_collect;
+		openfpm::vector<couple_contrib> cc_collect2;
+
+		v_cl.SGather(cct,cc_collect,0);
+		v_cl.SGather(cct2,cc_collect2,0);
+
+		if (v_cl.getProcessUnitID() == 0)
+		{
+			cc_collect.sort();
+			cc_collect2.sort();
+
+			for (size_t i = 0 ; i < cc_collect2.size() ; i++)
+			{
+				if (i % 10000 == 0)
+					std::cout << i << "  " << cc_collect2.size() << std::endl;
+
+				for (size_t k = i+1 ; k < i+2 && k < cc_collect2.size() ; k++)
+				{
+					if (cc_collect2.get(i).i == cc_collect2.get(k).j && cc_collect2.get(i).j == cc_collect2.get(k).i)
+					{
+						cc_collect2.remove(k);
+					}
+				}
+			}
+
+			for (size_t i = 0 ; i < cc_collect.size() && i < cc_collect2.size() ; i++)
+				std::cout << "E1: " << cc_collect.get(i).E << "   " << cc_collect.get(i).i << "(" << cc_collect.get(i).ir << ")" << "  " << cc_collect.get(i).j << "(" << cc_collect.get(i).jr << ")" << "          E2: " << cc_collect2.get(i).E << "   " << cc_collect2.get(i).i << "  " << cc_collect2.get(i).j << std::endl;
+		}
+
+//		BOOST_REQUIRE_EQUAL(2*tot_n_sym,tot_n);
+//		BOOST_REQUIRE_CLOSE(tot,tot_sym,0.00001);
+
+/*	}*/
+}
+
+#endif /* SRC_VECTOR_VECTOR_DIST_CELL_LIST_TESTS_HPP_ */
diff --git a/src/Vector/vector_dist_comm.hpp b/src/Vector/vector_dist_comm.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..fa8dd9c417058b187173ec5c86545f6c2b4d9895
--- /dev/null
+++ b/src/Vector/vector_dist_comm.hpp
@@ -0,0 +1,1217 @@
+/*
+ * vector_dist_comm.hpp
+ *
+ *  Created on: Aug 18, 2016
+ *      Author: i-bird
+ */
+
+#ifndef SRC_VECTOR_VECTOR_DIST_COMM_HPP_
+#define SRC_VECTOR_VECTOR_DIST_COMM_HPP_
+
+#define V_SUB_UNIT_FACTOR 64
+
+#define SKIP_LABELLING 512
+
+#define NO_POSITION 1
+#define WITH_POSITION 2
+
+/*! \brief This class is an helper for the communication of vector_dist
+ *
+ * \tparam dim Dimensionality of the space where the elements lives
+ * \tparam St type of space float, double ...
+ * \tparam prop properties the vector element store in OpenFPM data structure format
+ * \tparam Decomposition Decomposition strategy to use CartDecomposition ...
+ * \tparam Memory Memory pool where store the information HeapMemory ...
+ *
+ * \see vector_dist
+ *
+ */
+
+template<unsigned int dim, typename St, typename prop, typename Decomposition = CartDecomposition<dim,St>, typename Memory = HeapMemory>
+class vector_dist_comm
+{
+	//! VCluster
+	Vcluster & v_cl;
+
+	//! Domain decomposition
+	Decomposition dec;
+
+	//! It map the processor id with the communication request into map procedure
+	openfpm::vector<size_t> p_map_req;
+
+	//! For each near processor, outgoing particle id and shift vector
+	openfpm::vector<openfpm::vector<size_t>> opart;
+
+	//! For each near processor, particle shift vector
+	openfpm::vector<openfpm::vector<size_t>> oshift;
+
+	//! For each adjacent processor the size of the ghost sending buffer
+	openfpm::vector<size_t> ghost_prc_sz;
+
+	//! Sending buffer for the ghost particles properties
+	BHeapMemory g_prp_mem;
+
+	//! Sending buffer for the ghost particles position
+	BHeapMemory g_pos_mem;
+
+	//! For each adjacent processor it store the size of the receiving message in byte
+	openfpm::vector<size_t> recv_sz;
+
+	//! For each adjacent processor it store the received message for ghost get
+	openfpm::vector<BHeapMemory> recv_mem_gg;
+
+	//! For each processor it store the received message for global map
+	openfpm::vector<BHeapMemory> recv_mem_gm;
+
+	/*! \brief It store for each processor the position and properties vector of the particles
+	 *
+	 * This structure is used in the map function
+	 *
+	 */
+	struct pos_prop
+	{
+		//! position vector
+		openfpm::vector<Point<dim, St>, PreAllocHeapMemory<2>, typename memory_traits_lin<Point<dim, St>>::type, memory_traits_lin, openfpm::grow_policy_identity> pos;
+		//! properties vector
+		openfpm::vector<prop, PreAllocHeapMemory<2>, typename memory_traits_lin<prop>::type, memory_traits_lin, openfpm::grow_policy_identity> prp;
+	};
+
+	/*! \brief for each processor store 2 vector containing the sending buffers
+	 *
+	 * This structure is used in the map_list function
+	 *
+	 */
+	template <typename sel_prop>
+	struct pos_prop_sel
+	{
+		//! position vector
+		openfpm::vector<Point<dim, St>, PreAllocHeapMemory<2>, typename memory_traits_lin<Point<dim, St>>::type, memory_traits_lin, openfpm::grow_policy_identity> pos;
+		//! properties vector
+		openfpm::vector<sel_prop, PreAllocHeapMemory<2>, typename memory_traits_lin<sel_prop>::type, memory_traits_lin, openfpm::grow_policy_identity> prp;
+	};
+
+	//! definition of the send vector for position
+	typedef openfpm::vector<Point<dim, St>, ExtPreAlloc<Memory>, typename memory_traits_lin<Point<dim, St>>::type, memory_traits_lin , openfpm::grow_policy_identity> send_pos_vector;
+
+	bool is_shift_box_created = false;
+
+	// this map is used to check if a combination is already present
+	std::unordered_map<size_t, size_t> map_cmb;
+
+	// The boxes touching the border of the domain are divided in groups (first vector)
+	// each group contain internal ghost coming from sub-domains of the same section
+	openfpm::vector_std<openfpm::vector_std<Box<dim, St>>>box_f;
+
+	// Store the sector for each group (previous vector)
+	openfpm::vector_std<comb<dim>> box_cmb;
+
+	//
+	openfpm::vector<aggregate<size_t,size_t>> o_part_loc;
+
+	/*! \brief For every internal ghost box we create a structure that order such internal local ghost box in
+	 *         shift vectors
+	 *
+	 * \param shifts vectors
+	 *
+	 */
+	void createShiftBox()
+	{
+		if (is_shift_box_created == true)
+			return;
+
+		// Add local particles coming from periodic boundary, the only boxes that count are the one
+		// touching the border, filter them
+		for (size_t i = 0; i < dec.getNLocalSub(); i++)
+		{
+			size_t Nl = dec.getLocalNIGhost(i);
+
+			for (size_t j = 0; j < Nl; j++)
+			{
+				// If the ghost does not come from the intersection with an out of
+				// border sub-domain the combination is all zero and n_zero return dim
+				if (dec.getLocalIGhostPos(i, j).n_zero() == dim)
+					continue;
+
+				// Check if we already have boxes with such combination
+				auto it = map_cmb.find(dec.getLocalIGhostPos(i, j).lin());
+				if (it == map_cmb.end())
+				{
+					// we do not have it
+					box_f.add();
+					box_f.last().add(dec.getLocalIGhostBox(i, j));
+					box_cmb.add(dec.getLocalIGhostPos(i, j));
+					map_cmb[dec.getLocalIGhostPos(i, j).lin()] = box_f.size() - 1;
+				}
+				else
+				{
+					// we have it
+					box_f.get(it->second).add(dec.getLocalIGhostBox(i, j));
+				}
+
+			}
+		}
+
+		is_shift_box_created = true;
+	}
+
+	/*! \brief Local ghost from labeled particles
+	 *
+	 *
+	 */
+	void local_ghost_from_opart(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp)
+	{
+		// get the shift vectors
+		const openfpm::vector<Point<dim, St>> & shifts = dec.getShiftVectors();
+
+		for (size_t i = 0 ; i < o_part_loc.size() ; i++)
+		{
+			size_t lin_id = o_part_loc.get<1>(i);
+			size_t key = o_part_loc.template get<0>(i);
+
+			Point<dim, St> p = v_pos.get(key);
+			// shift
+			p -= shifts.get(lin_id);
+
+			// add this particle shifting its position
+			v_pos.add(p);
+			v_prp.add();
+			v_prp.last() = v_prp.get(key);
+		}
+	}
+
+	/*! \brief Local ghost from decomposition
+	 *
+	 * \param v_pos vector of particle positions
+	 * \param v_prp vector of particle properties
+	 * \g_m ghost marker
+	 *
+	 */
+	void local_ghost_from_dec(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp, size_t g_m)
+	{
+		o_part_loc.clear();
+
+		// get the shift vectors
+		const openfpm::vector<Point<dim, St>> & shifts = dec.getShiftVectors();
+
+		// Label the internal (assigned) particles
+		auto it = v_pos.getIteratorTo(g_m);
+
+		while (it.isNext())
+		{
+			auto key = it.get();
+
+			// If particles are inside these boxes
+			for (size_t i = 0; i < box_f.size(); i++)
+			{
+				for (size_t j = 0; j < box_f.get(i).size(); j++)
+				{
+					if (box_f.get(i).get(j).isInside(v_pos.get(key)) == true)
+					{
+						size_t lin_id = box_cmb.get(i).lin();
+
+						o_part_loc.add();
+						o_part_loc.template get<0>(o_part_loc.size()-1) = key;
+						o_part_loc.template get<1>(o_part_loc.size()-1) = lin_id;
+
+						Point<dim, St> p = v_pos.get(key);
+						// shift
+						p -= shifts.get(lin_id);
+
+						// add this particle shifting its position
+						v_pos.add(p);
+						v_prp.add();
+						v_prp.last() = v_prp.get(key);
+
+						// boxes in one group can be overlapping
+						// we do not have to search for the other
+						// boxes otherwise we will have duplicate particles
+						//
+						// A small note overlap of boxes across groups is fine
+						// (and needed) because each group has different shift
+						// producing non overlapping particles
+						//
+						break;
+					}
+				}
+			}
+
+			++it;
+		}
+	}
+
+	/*! \brief Add local particles based on the boundary conditions
+	 *
+	 * In order to understand what this function use the following
+	 *
+	 \verbatim
+
+	 [1,1]
+	 +---------+------------------------+---------+
+	 | (1,-1)  |                        | (1,1)   |
+	 |   |     |    (1,0) --> 7         |   |     |
+	 |   v     |                        |   v     |
+	 |   6     |                        |   8     |
+	 +--------------------------------------------+
+	 |         |                        |         |
+	 |         |                        |         |
+	 |         |                        |         |
+	 | (-1,0)  |                        | (1,0)   |
+	 |    |    |                        |   |     |
+	 |    v    |      (0,0) --> 4       |   v     |
+	 |    3    |                        |   5     |
+	 |         |                        |         |
+ B	 |         |                        |     A   |
+ *	 |         |                        |    *    |
+	 |         |                        |         |
+	 |         |                        |         |
+	 |         |                        |         |
+	 +--------------------------------------------+
+	 | (-1,-1) |                        | (-1,1)  |
+	 |    |    |   (-1,0) --> 1         |    |    |
+	 |    v    |                        |    v    |
+	 |    0    |                        |    2    |
+	 +---------+------------------------+---------+
+
+
+	 \endverbatim
+
+	 *
+	 *  The box is the domain, while all boxes at the border (so not (0,0) ) are the
+	 *  ghost part at the border of the domain. If a particle A is in the position in figure
+	 *  a particle B must be created. This function duplicate the particle A, if A and B are
+	 *  local
+	 *
+	 * \param v_pos vector of particle of positions
+	 * \param v_prp vector of particle properties
+	 * \param g_m ghost marker
+	 * \param opt options
+	 *
+	 */
+	void add_loc_particles_bc(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp ,size_t & g_m, size_t opt)
+	{
+		// Create the shift boxes
+		createShiftBox();
+
+		if (box_f.size() == 0)
+			return;
+		else
+		{
+			if (opt & SKIP_LABELLING)
+				local_ghost_from_opart(v_pos,v_prp);
+			else
+				local_ghost_from_dec(v_pos,v_prp,g_m);
+		}
+	}
+
+	/*! \brief This function fill the send buffer for the particle position after the particles has been label with labelParticles
+	 *
+	 * \param v_pos vector of particle positions
+	 * \param g_pos_send Send buffer to fill
+	 * \param prAlloc_pos Memory object for the send buffer
+	 *
+	 */
+	void fill_send_ghost_pos_buf(openfpm::vector<Point<dim, St>> & v_pos,openfpm::vector<send_pos_vector> & g_pos_send, ExtPreAlloc<Memory> * prAlloc_pos)
+	{
+		// get the shift vectors
+		const openfpm::vector<Point<dim, St>> & shifts = dec.getShiftVectors();
+
+		// create a number of send buffers equal to the near processors
+		g_pos_send.resize(ghost_prc_sz.size());
+		for (size_t i = 0; i < g_pos_send.size(); i++)
+		{
+			// set the preallocated memory to ensure contiguity
+			g_pos_send.get(i).setMemory(*prAlloc_pos);
+
+			// resize the sending vector (No allocation is produced)
+			g_pos_send.get(i).resize(ghost_prc_sz.get(i));
+		}
+
+		// Fill the send buffer
+		for (size_t i = 0; i < opart.size(); i++)
+		{
+			for (size_t j = 0; j < opart.get(i).size(); j++)
+			{
+				Point<dim, St> s = v_pos.get(opart.get(i).get(j));
+				s -= shifts.get(oshift.get(i).get(j));
+				g_pos_send.get(i).set(j, s);
+			}
+		}
+	}
+
+	/*! \brief This function fill the send buffer for properties after the particles has been label with labelParticles
+	 *
+	 * \tparam send_vector type used to send data
+	 * \tparam prp_object object containing only the properties to send
+	 * \tparam prp set of properties to send
+	 *
+	 * \param v_prp vector of particle properties
+	 * \param g_send_prp Send buffer to fill
+	 * \param prAlloc_prp Memory object for the send buffer
+	 *
+	 */
+	template<typename send_vector, typename prp_object, int ... prp> void fill_send_ghost_prp_buf(openfpm::vector<prop> & v_prp, openfpm::vector<send_vector> & g_send_prp, ExtPreAlloc<Memory> * prAlloc_prp)
+	{
+		// create a number of send buffers equal to the near processors
+		g_send_prp.resize(ghost_prc_sz.size());
+		for (size_t i = 0; i < g_send_prp.size(); i++)
+		{
+			// set the preallocated memory to ensure contiguity
+			g_send_prp.get(i).setMemory(*prAlloc_prp);
+
+			// resize the sending vector (No allocation is produced)
+			g_send_prp.get(i).resize(ghost_prc_sz.get(i));
+		}
+
+		// Fill the send buffer
+		for (size_t i = 0; i < opart.size(); i++)
+		{
+			for (size_t j = 0; j < opart.get(i).size(); j++)
+			{
+				// source object type
+				typedef encapc<1, prop, typename openfpm::vector<prop>::layout_type> encap_src;
+				// destination object type
+				typedef encapc<1, prp_object, typename openfpm::vector<prp_object>::layout_type> encap_dst;
+
+				// Copy only the selected properties
+				object_si_d<encap_src, encap_dst, OBJ_ENCAP, prp...>(v_prp.get(opart.get(i).get(j)), g_send_prp.get(i).get(j));
+			}
+		}
+	}
+
+	/*! \brief allocate and fill the send buffer for the map function
+	 *
+	 * \param v_pos vector of particle positions
+	 * \param v_prp vector of particles properties
+	 * \param prc_r List of processor rank involved in the send
+	 * \param prc_sz_r For each processor in the list the size of the message to send
+	 * \param pb send buffer
+	 *
+	 */
+	void fill_send_map_buf(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp,openfpm::vector<size_t> & prc_r, openfpm::vector<size_t> & prc_sz_r, openfpm::vector<pos_prop> & pb)
+	{
+		pb.resize(prc_r.size());
+
+		for (size_t i = 0; i < prc_r.size(); i++)
+		{
+			// Create the size required to store the particles position and properties to communicate
+			size_t s1 = openfpm::vector<Point<dim, St>, HeapMemory, typename memory_traits_lin<Point<dim, St>>::type, memory_traits_lin, openfpm::grow_policy_identity>::calculateMem(prc_sz_r.get(i), 0);
+			size_t s2 = openfpm::vector<prop, HeapMemory, typename memory_traits_lin<prop>::type, memory_traits_lin, openfpm::grow_policy_identity>::calculateMem(prc_sz_r.get(i), 0);
+
+			// Preallocate the memory
+			size_t sz[2] = { s1, s2 };
+			PreAllocHeapMemory<2> * mem = new PreAllocHeapMemory<2>(sz);
+
+			// Set the memory allocator
+			pb.get(i).pos.setMemory(*mem);
+			pb.get(i).prp.setMemory(*mem);
+
+			// set the size and allocate, using mem warant that pos and prp is contiguous
+			pb.get(i).pos.resize(prc_sz_r.get(i));
+			pb.get(i).prp.resize(prc_sz_r.get(i));
+		}
+
+		// Run through all the particles and fill the sending buffer
+
+		for (size_t i = 0; i < opart.size(); i++)
+		{
+			auto it = opart.get(i).getIterator();
+			size_t lbl = p_map_req.get(i);
+
+			while (it.isNext())
+			{
+				size_t key = it.get();
+				size_t id = opart.get(i).get(key);
+
+				pb.get(lbl).pos.set(key, v_pos.get(id));
+				pb.get(lbl).prp.set(key, v_prp.get(id));
+
+				++it;
+			}
+		}
+	}
+
+	/*! \brief allocate and fill the send buffer for the map function
+	 *
+	 * \param v_pos vector of particle positions
+	 * \param v_prp vector of particle properties
+	 * \param prc_r List of processor rank involved in the send
+	 * \param prc_sz_r For each processor in the list the size of the message to send
+	 * \param pb send buffer
+	 *
+	 */
+	template<typename prp_object,int ... prp> void fill_send_map_buf_list(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp, openfpm::vector<size_t> & prc_r, openfpm::vector<size_t> & prc_sz_r, openfpm::vector<pos_prop_sel<prp_object>> & pb)
+	{
+		pb.resize(prc_r.size());
+
+		for (size_t i = 0; i < prc_r.size(); i++)
+		{
+			// Create the size required to store the particles position and properties to communicate
+			size_t s1 = openfpm::vector<Point<dim, St>, HeapMemory, typename memory_traits_lin<Point<dim, St>>::type, memory_traits_lin, openfpm::grow_policy_identity>::calculateMem(prc_sz_r.get(i), 0);
+			size_t s2 = openfpm::vector<prp_object, HeapMemory, typename memory_traits_lin<prp_object>::type, memory_traits_lin, openfpm::grow_policy_identity>::calculateMem(prc_sz_r.get(i), 0);
+
+			// Preallocate the memory
+			size_t sz[2] = { s1, s2 };
+			PreAllocHeapMemory<2> * mem = new PreAllocHeapMemory<2>(sz);
+
+			// Set the memory allocator
+			pb.get(i).pos.setMemory(*mem);
+			pb.get(i).prp.setMemory(*mem);
+
+			// set the size and allocate, using mem warant that pos and prp is contiguous
+			pb.get(i).pos.resize(prc_sz_r.get(i));
+			pb.get(i).prp.resize(prc_sz_r.get(i));
+		}
+
+		// Run through all the particles and fill the sending buffer
+
+		for (size_t i = 0; i < opart.size(); i++)
+		{
+			auto it = opart.get(i).getIterator();
+			size_t lbl = p_map_req.get(i);
+
+			while (it.isNext())
+			{
+				size_t key = it.get();
+				size_t id = opart.get(i).get(key);
+
+				pb.get(lbl).pos.set(key, v_pos.get(id));
+
+				// source object type
+				typedef encapc<1, prop, typename openfpm::vector<prop>::layout_type> encap_src;
+				// destination object type
+				typedef encapc<1, prp_object, typename openfpm::vector<prp_object>::layout_type> encap_dst;
+
+				// Copy only the selected properties
+				object_si_d<encap_src, encap_dst, OBJ_ENCAP, prp...>(v_prp.get(id), pb.get(lbl).prp.get(key));
+
+				++it;
+			}
+		}
+	}
+
+	/*! \brief Label particles for mappings
+	 *
+	 * \param v_pos vector of particle positions
+	 * \param lbl_p Particle labeled
+	 * \param prc_sz For each processor the number of particles to send
+	 * \param opart id of the particles to send
+	 *
+	 */
+	template<typename obp> void labelParticleProcessor(openfpm::vector<Point<dim, St>> & v_pos,openfpm::vector<openfpm::vector<size_t>> & lbl_p, openfpm::vector<size_t> & prc_sz, openfpm::vector<size_t> & opart)
+	{
+		// reset lbl_p
+		lbl_p.resize(v_cl.getProcessingUnits());
+		for (size_t i = 0; i < lbl_p.size(); i++)
+			lbl_p.get(i).clear();
+
+		// resize the label buffer
+		prc_sz.resize(v_cl.getProcessingUnits());
+
+		auto it = v_pos.getIterator();
+
+		// Label all the particles with the processor id where they should go
+		while (it.isNext())
+		{
+			auto key = it.get();
+
+			// Apply the boundary conditions
+			dec.applyPointBC(v_pos.get(key));
+
+			size_t p_id = 0;
+
+			// Check if the particle is inside the domain
+			if (dec.getDomain().isInside(v_pos.get(key)) == true)
+				p_id = dec.processorIDBC(v_pos.get(key));
+			else
+				p_id = obp::out(key, v_cl.getProcessUnitID());
+
+			// Particle to move
+			if (p_id != v_cl.getProcessUnitID())
+			{
+				if ((long int) p_id != -1)
+				{
+					prc_sz.get(p_id)++;lbl_p
+					.get(p_id).add(key);
+					opart.add(key);
+				}
+				else
+				{
+					opart.add(key);
+				}
+			}
+
+			// Add processors and add size
+
+			++it;
+		}
+	}
+
+	/*! \brief This function process the receiced data for the properties and populate the ghost
+	 *
+	 * \tparam send_vector type used to send data
+	 * \tparam prp_object object containing only the properties to send
+	 * \tparam prp set of properties to send
+	 *
+	 * \param v_prp vector of particle properties
+	 * \param g_m ghost marker
+	 *
+	 */
+	template<typename send_vector, typename prp_object, int ... prp> void process_received_ghost_prp(openfpm::vector<prop> & v_prp, size_t & g_m)
+	{
+		// Mark the ghost part
+		g_m = v_prp.size();
+
+		// Process the received data (recv_mem_gg != 0 if you have data)
+		for (size_t i = 0; i < dec.getNNProcessors() && recv_mem_gg.size() != 0; i++)
+		{
+			// calculate the number of received elements
+			size_t n_ele = recv_sz.get(i) / sizeof(prp_object);
+
+			// add the received particles to the vector
+			PtrMemory * ptr1 = new PtrMemory(recv_mem_gg.get(i).getPointer(), recv_sz.get(i));
+
+			// create vector representation to a piece of memory already allocated
+			openfpm::vector<prp_object, PtrMemory, typename memory_traits_lin<prp_object>::type, memory_traits_lin , openfpm::grow_policy_identity> v2;
+
+			v2.setMemory(*ptr1);
+
+			// resize with the number of elements
+			v2.resize(n_ele);
+
+			// Add the ghost particle
+			v_prp.template add_prp<prp_object, PtrMemory, openfpm::grow_policy_identity, prp...>(v2);
+		}
+	}
+
+	/*! \brief This function process the received data for the properties and populate the ghost
+	 *
+	 * \param v_pos vector of particle positions
+	 *
+	 */
+	void process_received_ghost_pos(openfpm::vector<Point<dim, St>> & v_pos)
+	{
+		// Process the received data (recv_mem_gg != 0 if you have data)
+		for (size_t i = 0; i < dec.getNNProcessors() && recv_mem_gg.size() != 0; i++)
+		{
+			// calculate the number of received elements
+			size_t n_ele = recv_sz.get(i) / sizeof(Point<dim, St> );
+
+			// add the received particles to the vector
+			PtrMemory * ptr1 = new PtrMemory(recv_mem_gg.get(i).getPointer(), recv_sz.get(i));
+
+			// create vector representation to a piece of memory already allocated
+
+			openfpm::vector<Point<dim, St>, PtrMemory, typename memory_traits_lin<Point<dim, St>>::type, memory_traits_lin , openfpm::grow_policy_identity> v2;
+
+			v2.setMemory(*ptr1);
+
+			// resize with the number of elements
+			v2.resize(n_ele);
+
+			// Add the ghost particle
+			v_pos.template add<PtrMemory, openfpm::grow_policy_identity>(v2);
+		}
+	}
+
+	/*! \brief Process the received particles
+	 *
+	 * \param v_pos vector of particle positions
+	 * \param v_prp vector of particle properties
+	 * \param out_part list of the out-going particles
+	 *
+	 */
+	void process_received_map(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp,openfpm::vector<size_t> & out_part)
+	{
+		size_t o_p_id = 0;
+
+		for (size_t i = 0; i < recv_mem_gm.size(); i++)
+		{
+			// Get the number of elements
+
+			size_t n_ele = recv_mem_gm.get(i).size() / (sizeof(Point<dim, St> ) + sizeof(prop));
+
+			// Pointer of the received positions for each near processor
+			void * ptr_pos = (unsigned char *) recv_mem_gm.get(i).getPointer();
+			// Pointer of the received properties for each near processor
+			void * ptr_prp = (unsigned char *) recv_mem_gm.get(i).getPointer() + n_ele * sizeof(Point<dim, St> );
+
+			PtrMemory * ptr1 = new PtrMemory(ptr_pos, n_ele * sizeof(Point<dim, St> ));
+			PtrMemory * ptr2 = new PtrMemory(ptr_prp, n_ele * sizeof(prop));
+
+			// create vector representation to a piece of memory already allocated
+
+			openfpm::vector<Point<dim, St>, PtrMemory, typename memory_traits_lin<Point<dim, St>>::type, memory_traits_lin ,openfpm::grow_policy_identity> vpos;
+			openfpm::vector<prop, PtrMemory, typename memory_traits_lin<prop>::type, memory_traits_lin ,openfpm::grow_policy_identity> vprp;
+
+			vpos.setMemory(*ptr1);
+			vprp.setMemory(*ptr2);
+
+			vpos.resize(n_ele);
+			vprp.resize(n_ele);
+
+			// Add the received particles to v_pos and v_prp
+
+			size_t j = 0;
+			for (; j < vpos.size() && o_p_id < out_part.size(); j++, o_p_id++)
+			{
+				v_pos.set(out_part.get(o_p_id), vpos.get(j));
+				v_prp.set(out_part.get(o_p_id), vprp.get(j));
+			}
+
+			for (; j < vpos.size(); j++)
+			{
+				v_pos.add();
+				v_pos.set(v_pos.size() - 1, vpos.get(j));
+				v_prp.add();
+				v_prp.set(v_prp.size() - 1, vprp.get(j));
+			}
+		}
+
+		// remove the (out-going particles) in the vector
+
+		v_pos.remove(out_part, o_p_id);
+		v_prp.remove(out_part, o_p_id);
+	}
+
+	/*! \brief Process the received particles
+	 *
+	 * \param v_pos vector of particle positions
+	 * \param v_prp vector of particle properties
+	 * \param out_part list of the out-going particles
+	 *
+	 */
+	template<typename prp_object , int ... prp> void process_received_map_list(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp, openfpm::vector<size_t> & out_part)
+	{
+		size_t o_p_id = 0;
+
+		for (size_t i = 0; i < recv_mem_gm.size(); i++)
+		{
+			// Get the number of elements
+
+			size_t n_ele = recv_mem_gm.get(i).size() / (sizeof(Point<dim, St> ) + sizeof(prp_object));
+
+			// Pointer of the received positions for each near processor
+			void * ptr_pos = (unsigned char *) recv_mem_gm.get(i).getPointer();
+			// Pointer of the received properties for each near processor
+			void * ptr_prp = (unsigned char *) recv_mem_gm.get(i).getPointer() + n_ele * sizeof(Point<dim, St> );
+
+			PtrMemory * ptr1 = new PtrMemory(ptr_pos, n_ele * sizeof(Point<dim, St> ));
+			PtrMemory * ptr2 = new PtrMemory(ptr_prp, n_ele * sizeof(prp_object));
+
+			// create vector representation to a piece of memory already allocated
+
+			openfpm::vector<Point<dim, St>, PtrMemory, typename memory_traits_lin<Point<dim, St>>::type, memory_traits_lin ,openfpm::grow_policy_identity> vpos;
+			openfpm::vector<prp_object, PtrMemory, typename memory_traits_lin<prp_object>::type, memory_traits_lin ,openfpm::grow_policy_identity> vprp;
+
+			vpos.setMemory(*ptr1);
+			vprp.setMemory(*ptr2);
+
+			vpos.resize(n_ele);
+			vprp.resize(n_ele);
+
+			// Add the received particles to v_pos and v_prp
+
+			size_t j = 0;
+			for (; j < vpos.size() && o_p_id < out_part.size(); j++, o_p_id++)
+			{
+				v_pos.set(out_part.get(o_p_id), vpos.get(j));
+				v_prp.template set_o<decltype(vprp.get(j)), prp... >(out_part.get(o_p_id), vprp.get(j));
+			}
+
+			for (; j < vpos.size(); j++)
+			{
+				v_pos.add();
+				v_pos.set(v_pos.size() - 1, vpos.get(j));
+				v_prp.template set_o<decltype(vprp.get(j)), prp... >(v_prp.size() - 1, vprp.get(j));
+			}
+		}
+
+		// remove the (out-going particles) in the vector
+
+		v_pos.remove(out_part, o_p_id);
+		v_prp.remove(out_part, o_p_id);
+	}
+
+	/*! \brief Calculate send buffers total size and allocation
+	 *
+	 * \tparam prp_object object containing only the properties to send
+	 *
+	 * \param v_pos vector of particle positions
+	 * \param v_prp vector of particle properties
+	 * \param size_byte_prp total size for the property buffer
+	 * \param size_byte_pos total size for the position buffer
+	 *
+	 */
+	template<typename prp_object> void calc_send_ghost_buf(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp, size_t & size_byte_prp, size_t & size_byte_pos)
+	{
+		// Calculate the total size required for the sending buffer
+		for (size_t i = 0; i < ghost_prc_sz.size(); i++)
+		{
+			size_t alloc_ele = openfpm::vector<prp_object, HeapMemory, typename memory_traits_lin<prp_object>::type, memory_traits_lin , openfpm::grow_policy_identity>::calculateMem(ghost_prc_sz.get(i), 0);
+			size_byte_prp += alloc_ele;
+
+			alloc_ele = openfpm::vector<Point<dim, St>, HeapMemory, typename memory_traits_lin<Point<dim, St>>::type, memory_traits_lin, openfpm::grow_policy_identity>::calculateMem(ghost_prc_sz.get(i), 0);
+			size_byte_pos += alloc_ele;
+		}
+	}
+
+	/*! \brief Label the particles
+	 *
+	 * It count the number of particle to send to each processors and save its ids
+	 *
+	 * \see nn_prcs::getShiftvectors()
+	 *
+	 * \param v_pos vector of particle positions
+	 * \param v_prp vector of particle properties
+	 * \param g_m ghost marker
+	 *
+	 */
+	void labelParticlesGhost(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp, size_t & g_m)
+	{
+		// Buffer that contain the number of elements to send for each processor
+		ghost_prc_sz.clear();
+		ghost_prc_sz.resize(dec.getNNProcessors());
+
+		// Buffer that contain for each processor the id of the particle to send
+		opart.clear();
+		opart.resize(dec.getNNProcessors());
+
+		// Buffer that contain for each processor the id of the shift vector
+		oshift.clear();
+		oshift.resize(dec.getNNProcessors());
+
+		// Iterate over all particles
+		auto it = v_pos.getIteratorTo(g_m);
+		while (it.isNext())
+		{
+			auto key = it.get();
+
+			// Given a particle, it return which processor require it (first id) and shift id, second id
+			// For an explanation about shifts vectors please consult getShiftVector in ie_ghost
+			const openfpm::vector<std::pair<size_t, size_t>> & vp_id = dec.template ghost_processorID_pair<typename Decomposition::lc_processor_id, typename Decomposition::shift_id>(v_pos.get(key), UNIQUE);
+
+			for (size_t i = 0; i < vp_id.size(); i++)
+			{
+				// processor id
+				size_t p_id = vp_id.get(i).first;
+
+				// add particle to communicate
+				ghost_prc_sz.get(p_id)++;
+				opart.get(p_id).add(key);
+				oshift.get(p_id).add(vp_id.get(i).second);
+			}
+
+			++it;
+		}
+	}
+
+	/*! \brief Call-back to allocate buffer to receive incoming elements (particles)
+	 *
+	 * \param msg_i message size required to receive from i
+	 * \param total_msg message size to receive from all the processors
+	 * \param total_p the total number of processor want to communicate with you
+	 * \param i processor id
+	 * \param ri request id (it is an id that goes from 0 to total_p, and is unique
+	 *           every time message_alloc is called)
+	 * \param ptr void pointer parameter for additional data to pass to the call-back
+	 *
+	 */
+	static void * msg_alloc_ghost_get(size_t msg_i, size_t total_msg, size_t total_p, size_t i, size_t ri, void * ptr)
+	{
+		vector_dist_comm<dim, St, prop, Decomposition, Memory> * v = static_cast<vector_dist_comm<dim, St, prop, Decomposition, Memory> *>(ptr);
+
+		v->recv_sz.resize(v->dec.getNNProcessors());
+		v->recv_mem_gg.resize(v->dec.getNNProcessors());
+
+		// Get the local processor id
+		size_t lc_id = v->dec.ProctoID(i);
+
+		// resize the receive buffer
+		v->recv_mem_gg.get(lc_id).resize(msg_i);
+		v->recv_sz.get(lc_id) = msg_i;
+
+		return v->recv_mem_gg.get(lc_id).getPointer();
+	}
+
+
+	/*! \brief Call-back to allocate buffer to receive incoming elements (particles)
+	 *
+	 * \param msg_i size required to receive the message from i
+	 * \param total_msg total size to receive from all the processors
+	 * \param total_p the total number of processor that want to communicate with you
+	 * \param i processor id
+	 * \param ri request id (it is an id that goes from 0 to total_p, and is unique
+	 *           every time message_alloc is called)
+	 * \param ptr a pointer to the vector_dist structure
+	 *
+	 * \return the pointer where to store the message for the processor i
+	 *
+	 */
+	static void * message_alloc_map(size_t msg_i, size_t total_msg, size_t total_p, size_t i, size_t ri, void * ptr)
+	{
+		// cast the pointer
+		vector_dist_comm<dim, St, prop, Decomposition, Memory> * vd = static_cast<vector_dist_comm<dim, St, prop, Decomposition, Memory> *>(ptr);
+
+		vd->recv_mem_gm.resize(vd->v_cl.getProcessingUnits());
+		vd->recv_mem_gm.get(i).resize(msg_i);
+
+		return vd->recv_mem_gm.get(i).getPointer();
+	}
+
+public:
+
+	/*! \brief Constructor
+	 *
+	 * \param dec Domain decompositon
+	 *
+	 */
+	vector_dist_comm(const Decomposition & dec)
+	:v_cl(create_vcluster()),dec(dec)
+	{
+
+	}
+
+	/*! \brief Constructor
+	 *
+	 * \param dec Domain decompositon
+	 *
+	 */
+	vector_dist_comm(Decomposition && dec)
+	:v_cl(create_vcluster),dec(dec)
+	{
+
+	}
+
+	/*! \brief Constructor
+	 *
+	 */
+	vector_dist_comm()
+	:v_cl(create_vcluster()),dec(create_vcluster())
+	{
+	}
+
+	/*! \brief Get the number of minimum sub-domain
+	 *
+	 * \return minimum number
+	 *
+	 */
+	static size_t getDefaultNsubsub()
+	{
+		return V_SUB_UNIT_FACTOR;
+	}
+
+	/*! \brief Initialize the decomposition
+	 *
+	 * \param box domain
+	 * \param bc boundary conditions
+	 * \param g ghost extension
+	 *
+	 */
+	void init_decomposition(Box<dim,St> & box, const size_t (& bc)[dim],const Ghost<dim,St> & g)
+	{
+		// Create a valid decomposition of the space
+		// Get the number of processor and calculate the number of sub-domain
+		// for decomposition
+		size_t n_proc = v_cl.getProcessingUnits();
+		size_t n_sub = n_proc * getDefaultNsubsub();
+
+		// Calculate the maximum number (before merging) of sub-domain on
+		// each dimension
+		size_t div[dim];
+		for (size_t i = 0; i < dim; i++)
+		{
+			div[i] = openfpm::math::round_big_2(pow(n_sub, 1.0 / dim));
+		}
+
+		// Create the sub-domains
+		dec.setParameters(div, box, bc, g);
+		dec.decompose();
+	}
+
+	/*! \brief It synchronize the properties and position of the ghost particles
+	 *
+	 * \tparam prp list of properties to get synchronize
+	 *
+	 * \param opt options WITH_POSITION, it send also the positional information of the particles
+	 * \param v_pos vector of position to update
+	 * \param v_prp vector of properties to update
+	 * \param g_m marker between real and ghost particles
+	 *
+	 */
+	template<int ... prp> inline void ghost_get_(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp, size_t & g_m, size_t opt = WITH_POSITION)
+	{
+		// Unload receive buffer
+		for (size_t i = 0 ; i < recv_mem_gg.size() ; i++)
+			recv_sz.get(i) = 0;
+
+		// Sending property object
+		typedef object<typename object_creator<typename prop::type, prp...>::type> prp_object;
+
+		// send vector for each processor
+		typedef openfpm::vector<prp_object, ExtPreAlloc<Memory>, typename memory_traits_lin<prp_object>::type, memory_traits_lin, openfpm::grow_policy_identity> send_vector;
+
+		// reset the ghost part
+		v_pos.resize(g_m);
+		v_prp.resize(g_m);
+
+		// Label all the particles
+		if ((opt & SKIP_LABELLING) == false)
+			labelParticlesGhost(v_pos,v_prp,g_m);
+
+		// Calculate memory and allocation for the send buffers
+
+		// Total size
+		size_t size_byte_prp = 0;
+		size_t size_byte_pos = 0;
+
+		calc_send_ghost_buf<prp_object>(v_pos,v_prp,size_byte_prp, size_byte_pos);
+
+		// Create memory for the send buffer
+
+		g_prp_mem.resize(size_byte_prp);
+		if (opt != NO_POSITION)
+			g_pos_mem.resize(size_byte_pos);
+
+		// Create and fill send buffer for particle properties
+
+		ExtPreAlloc<Memory> * prAlloc_prp = new ExtPreAlloc<Memory>(size_byte_prp, g_prp_mem);
+
+		openfpm::vector<send_vector> g_send_prp;
+		fill_send_ghost_prp_buf<send_vector, prp_object, prp...>(v_prp,g_send_prp, prAlloc_prp);
+
+		// Create and fill the send buffer for the particle position
+
+		ExtPreAlloc<Memory> * prAlloc_pos;
+		openfpm::vector<send_pos_vector> g_pos_send;
+		if (opt != NO_POSITION)
+		{
+			prAlloc_pos = new ExtPreAlloc<Memory>(size_byte_pos, g_pos_mem);
+			fill_send_ghost_pos_buf(v_pos,g_pos_send, prAlloc_pos);
+		}
+
+		// Create processor list
+		openfpm::vector<size_t> prc;
+		for (size_t i = 0; i < opart.size(); i++)
+			prc.add(dec.IDtoProc(i));
+
+		// Send/receive the particle properties information
+		v_cl.sendrecvMultipleMessagesNBX(prc, g_send_prp, msg_alloc_ghost_get, this);
+		process_received_ghost_prp<send_vector, prp_object, prp...>(v_prp,g_m);
+
+		if (opt != NO_POSITION)
+		{
+			// Send/receive the particle properties information
+			v_cl.sendrecvMultipleMessagesNBX(prc, g_pos_send, msg_alloc_ghost_get, this);
+			process_received_ghost_pos(v_pos);
+		}
+
+		add_loc_particles_bc(v_pos,v_prp,g_m,opt);
+	}
+
+
+	/*! \brief It move all the particles that does not belong to the local processor to the respective processor
+	 *
+	 * \tparam out of bound policy it specify what to do when the particles are detected out of bound
+	 *
+	 * In general this function is called after moving the particles to move the
+	 * elements out the local processor. Or just after initialization if each processor
+	 * contain non local particles
+	 *
+	 * \tparam prp properties to communicate
+	 *
+	 * \param v_pos vector of particle positions
+	 * \param v_prp vector of particle properties
+	 * \param g_m ghost marker
+	 *
+	 */
+	template<unsigned int ... prp> void map_list_(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp, size_t & g_m)
+	{
+		typedef KillParticle obp;
+
+		// outgoing particles-id
+		openfpm::vector<size_t> out_part;
+
+		// Processor communication size
+		openfpm::vector<size_t> prc_sz(v_cl.getProcessingUnits());
+
+		// It contain the list of the processors this processor should to communicate with
+		openfpm::vector<size_t> p_list;
+
+		// map completely reset the ghost part
+		v_pos.resize(g_m);
+		v_prp.resize(g_m);
+
+		// Contain the processor id of each particle (basically where they have to go)
+		labelParticleProcessor<obp>(v_pos,opart, prc_sz, out_part);
+
+		// Calculate the sending buffer size for each processor, put this information in
+		// a contiguous buffer
+		p_map_req.resize(v_cl.getProcessingUnits());
+		openfpm::vector<size_t> prc_sz_r;
+		openfpm::vector<size_t> prc_r;
+
+		for (size_t i = 0; i < v_cl.getProcessingUnits(); i++)
+		{
+			if (prc_sz.get(i) != 0)
+			{
+				p_map_req.get(i) = prc_r.size();
+				prc_r.add(i);
+				prc_sz_r.add(prc_sz.get(i));
+			}
+		}
+
+		// Sending property object
+		typedef object<typename object_creator<typename prop::type, prp...>::type> prp_object;
+
+		// Allocate the send buffers
+
+		openfpm::vector<pos_prop_sel<prp_object>> pb;
+
+		// fill the send buffers
+		fill_send_map_buf_list<prp_object,prp...>(v_pos,v_prp,prc_r, prc_sz_r, pb);
+
+		// Create the set of pointers
+		openfpm::vector<void *> ptr(prc_r.size());
+		for (size_t i = 0; i < prc_r.size(); i++)
+		{
+			ptr.get(i) = pb.get(i).pos.getPointer();
+		}
+
+		// convert the particle number to buffer size
+		for (size_t i = 0; i < prc_sz_r.size(); i++)
+		{
+			prc_sz_r.get(i) = prc_sz_r.get(i) * (sizeof(prp_object) + sizeof(Point<dim, St> ));
+		}
+
+		// Send and receive the particles
+
+		recv_mem_gm.clear();
+		v_cl.sendrecvMultipleMessagesNBX(prc_sz_r.size(), (size_t *) prc_sz_r.getPointer(), (size_t *) prc_r.getPointer(), (void **) ptr.getPointer(), vector_dist_comm::message_alloc_map, this, NEED_ALL_SIZE);
+
+		// Process the incoming particles
+
+		process_received_map_list<prp_object, prp...>(v_pos,v_prp,out_part);
+
+		// mark the ghost part
+
+		g_m = v_pos.size();
+	}
+
+	/*! \brief It move all the particles that does not belong to the local processor to the respective processor
+	 *
+	 * \tparam out of bound policy it specify what to do when the particles are detected out of bound
+	 *
+	 * In general this function is called after moving the particles to move the
+	 * elements out the local processor. Or just after initialization if each processor
+	 * contain non local particles
+	 *
+	 * \param v_pos vector of particle positions
+	 * \param v_prp vector of particle properties
+	 * \param g_m ghost marker
+	 *
+	 */
+	template<typename obp = KillParticle> void map_(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp, size_t & g_m)
+	{
+		// outgoing particles-id
+		openfpm::vector<size_t> out_part;
+
+		// Processor communication size
+		openfpm::vector<size_t> prc_sz(v_cl.getProcessingUnits());
+
+		// It contain the list of the processors this processor should to communicate with
+		openfpm::vector<size_t> p_list;
+
+		// map completely reset the ghost part
+		v_pos.resize(g_m);
+		v_prp.resize(g_m);
+
+		// Contain the processor id of each particle (basically where they have to go)
+		labelParticleProcessor<obp>(v_pos,opart, prc_sz, out_part);
+
+		// Calculate the sending buffer size for each processor, put this information in
+		// a contiguous buffer
+		p_map_req.resize(v_cl.getProcessingUnits());
+		openfpm::vector<size_t> prc_sz_r;
+		openfpm::vector<size_t> prc_r;
+
+		for (size_t i = 0; i < v_cl.getProcessingUnits(); i++)
+		{
+			if (prc_sz.get(i) != 0)
+			{
+				p_map_req.get(i) = prc_r.size();
+				prc_r.add(i);
+				prc_sz_r.add(prc_sz.get(i));
+			}
+		}
+
+		// Allocate the send buffers
+
+		openfpm::vector<pos_prop> pb;
+
+		// fill the send buffers
+		fill_send_map_buf(v_pos,v_prp,prc_r, prc_sz_r, pb);
+
+		// Create the set of pointers
+		openfpm::vector<void *> ptr(prc_r.size());
+		for (size_t i = 0; i < prc_r.size(); i++)
+		{
+			ptr.get(i) = pb.get(i).pos.getPointer();
+		}
+
+		// convert the particle number to buffer size
+		for (size_t i = 0; i < prc_sz_r.size(); i++)
+		{
+			prc_sz_r.get(i) = prc_sz_r.get(i) * (sizeof(prop) + sizeof(Point<dim, St> ));
+		}
+
+		// Send and receive the particles
+
+		recv_mem_gm.clear();
+		v_cl.sendrecvMultipleMessagesNBX(prc_sz_r.size(), (size_t *) prc_sz_r.getPointer(), (size_t *) prc_r.getPointer(), (void **) ptr.getPointer(), vector_dist_comm::message_alloc_map, this, NEED_ALL_SIZE);
+
+		// Process the incoming particles
+
+		process_received_map(v_pos,v_prp,out_part);
+
+		// mark the ghost part
+
+		g_m = v_pos.size();
+	}
+
+	/*! \brief Get the decomposition
+	 *
+	 * \return
+	 *
+	 */
+	inline Decomposition & getDecomposition()
+	{
+		return dec;
+	}
+
+	/*! \brief Copy a vector
+	 *
+	 * \param vc vector to copy
+	 *
+	 */
+	vector_dist_comm<dim,St,prop,Decomposition,Memory> & operator=(const vector_dist_comm<dim,St,prop,Decomposition,Memory> & vc)
+	{
+		dec = vc.dec;
+
+		return *this;
+	}
+
+	/*! \brief Copy a vector
+	 *
+	 * \param vc vector to copy
+	 *
+	 */
+	vector_dist_comm<dim,St,prop,Decomposition,Memory> & operator=(vector_dist_comm<dim,St,prop,Decomposition,Memory> && vc)
+	{
+		dec.swap(vc.dec);
+
+		return *this;
+	}
+};
+
+
+#endif /* SRC_VECTOR_VECTOR_DIST_COMM_HPP_ */
diff --git a/src/Vector/vector_dist_unit_test.hpp b/src/Vector/vector_dist_unit_test.hpp
index 6c300d2643b599a2b4d47dedccf9136e41b549a6..f3bb94365c6d065ea84ed48e2c395767c3975b98 100644
--- a/src/Vector/vector_dist_unit_test.hpp
+++ b/src/Vector/vector_dist_unit_test.hpp
@@ -1389,7 +1389,192 @@ BOOST_AUTO_TEST_CASE( vector_dist_periodic_map_list )
 	}
 }
 
+
+BOOST_AUTO_TEST_CASE( vector_dist_ghost_with_ghost_buffering )
+{
+	Vcluster & v_cl = create_vcluster();
+
+	if (v_cl.getProcessingUnits() > 3)
+		return;
+
+    // set the seed
+	// create the random generator engine
+	std::srand(v_cl.getProcessUnitID());
+    std::default_random_engine eg;
+    std::uniform_real_distribution<float> ud(0.0f, 1.0f);
+
+    long int k = 524288 * v_cl.getProcessingUnits();
+
+	long int big_step = k / 4;
+	big_step = (big_step == 0)?1:big_step;
+
+	print_test("Testing 3D periodic vector with ghost buffering k=",k);
+	BOOST_TEST_CHECKPOINT( "Testing 3D periodic with ghost buffering k=" << k );
+
+	Box<3,float> box({0.0,0.0,0.0},{1.0,1.0,1.0});
+
+	// Boundary conditions
+	size_t bc[3]={PERIODIC,PERIODIC,PERIODIC};
+
+	// ghost
+	Ghost<3,float> ghost(0.1);
+
+	typedef  aggregate<float> part_prop;
+
+	// Distributed vector
+	vector_dist<3,float, part_prop > vd(k,box,bc,ghost);
+
+	auto it = vd.getIterator();
+
+	while (it.isNext())
+	{
+		auto key = it.get();
+
+		vd.getPos(key)[0] = ud(eg);
+		vd.getPos(key)[1] = ud(eg);
+		vd.getPos(key)[2] = ud(eg);
+
+		// Fill some properties randomly
+
+		vd.getProp<0>(key) = 0.0;
+
+		++it;
+	}
+
+	vd.map();
+
+	// sync the ghost
+	vd.ghost_get<0>();
+
+	openfpm::vector<size_t> list_idx;
+	openfpm::vector<size_t> list_idx2;
+
+	auto it3 = vd.getGhostIterator();
+	while (it3.isNext())
+	{
+		auto key = it3.get();
+
+		list_idx.add(key.getKey());
+
+		++it3;
+	}
+
+	list_idx.sort();
+
+	for (size_t i = 0 ; i < 10 ; i++)
+	{
+		auto it = vd.getDomainIterator();
+
+		while (it.isNext())
+		{
+			auto key = it.get();
+
+			vd.getPos(key)[0] = ud(eg);
+			vd.getPos(key)[1] = ud(eg);
+			vd.getPos(key)[2] = ud(eg);
+
+			// Fill some properties randomly
+
+			vd.getProp<0>(key) = i;
+
+			++it;
+		}
+
+		vd.ghost_get<0>(SKIP_LABELLING);
+
+		list_idx2.clear();
+		auto it2 = vd.getGhostIterator();
+		bool ret = true;
+
+		while (it2.isNext())
+		{
+			auto key = it2.get();
+
+			list_idx2.add(key.getKey());
+			ret &= vd.getProp<0>(key) == i;
+
+			++it2;
+		}
+
+		BOOST_REQUIRE_EQUAL(ret,true);
+		BOOST_REQUIRE_EQUAL(list_idx.size(),list_idx2.size());
+
+		list_idx2.sort();
+
+		ret = true;
+		for (size_t i = 0 ; i < list_idx.size() ; i++)
+			ret &= list_idx.get(i) == list_idx2.get(i);
+
+		BOOST_REQUIRE_EQUAL(ret,true);
+	}
+
+	vd.map();
+	vd.ghost_get<0>();
+
+	list_idx.clear();
+
+	auto it4 = vd.getGhostIterator();
+	while (it4.isNext())
+	{
+		auto key = it4.get();
+
+		list_idx.add(key.getKey());
+
+		++it4;
+	}
+
+	list_idx.sort();
+
+	for (size_t i = 0 ; i < 10 ; i++)
+	{
+		auto it = vd.getDomainIterator();
+
+		while (it.isNext())
+		{
+			auto key = it.get();
+
+			vd.getPos(key)[0] = ud(eg);
+			vd.getPos(key)[1] = ud(eg);
+			vd.getPos(key)[2] = ud(eg);
+
+			// Fill some properties randomly
+
+			vd.getProp<0>(key) = i;
+
+			++it;
+		}
+
+		vd.ghost_get<0>(SKIP_LABELLING);
+
+		list_idx2.clear();
+		auto it2 = vd.getGhostIterator();
+		bool ret = true;
+
+		while (it2.isNext())
+		{
+			auto key = it2.get();
+
+			list_idx2.add(key.getKey());
+			ret &= vd.getProp<0>(key) == i;
+
+			++it2;
+		}
+
+		BOOST_REQUIRE_EQUAL(ret,true);
+		BOOST_REQUIRE_EQUAL(list_idx.size(),list_idx2.size());
+
+		list_idx2.sort();
+
+		ret = true;
+		for (size_t i = 0 ; i < list_idx.size() ; i++)
+			ret &= list_idx.get(i) == list_idx2.get(i);
+
+		BOOST_REQUIRE_EQUAL(ret,true);
+	}
+}
+
 #include "vector_dist_cell_list_tests.hpp"
+#include "vector_dist_NN_tests.hpp"
 
 BOOST_AUTO_TEST_SUITE_END()
 
diff --git a/src/pdata_performance.hpp b/src/pdata_performance.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..d8c74c90fbee1027354f5058672cef96d41d2a5d
--- /dev/null
+++ b/src/pdata_performance.hpp
@@ -0,0 +1,20 @@
+/*
+ * performance.hpp
+ *
+ *  Created on: Mar 9, 2016
+ *      Author: yaroslav
+ */
+
+#ifndef SRC_PDATA_PERFORMANCE_HPP_
+#define SRC_PDATA_PERFORMANCE_HPP_
+
+BOOST_AUTO_TEST_SUITE( performance )
+
+#include "Vector/vector_dist_verlet_performance_tests.hpp"
+#include "Vector/vector_dist_cl_performance_tests.hpp"
+#include "Vector/vector_dist_cl_hilb_performance_tests.hpp"
+
+
+BOOST_AUTO_TEST_SUITE_END()
+
+#endif /* SRC_PDATA_PERFORMANCE_HPP_ */