From c3556b53ac976fc5683a963ecf80e523061cd23a Mon Sep 17 00:00:00 2001
From: Pietro Incardona <i-bird@localhost.localdomain>
Date: Thu, 1 Sep 2016 03:50:46 +0200
Subject: [PATCH] Ghost put working

---
 example/VCluster/0_simple/main.cpp          |   3 +-
 example/Vector/0_simple/main.cpp            |   4 +-
 example/Vector/3_molecular_dynamic/Makefile |   4 +-
 example/Vector/3_molecular_dynamic/main.cpp |  63 ++---
 openfpm_data                                |   2 +-
 src/Vector/vector_dist.hpp                  |  12 +
 src/Vector/vector_dist_cell_list_tests.hpp  | 259 +------------------
 src/Vector/vector_dist_comm.hpp             | 263 ++++++++++++++++++--
 src/Vector/vector_dist_unit_test.hpp        | 118 +++++++++
 9 files changed, 420 insertions(+), 308 deletions(-)

diff --git a/example/VCluster/0_simple/main.cpp b/example/VCluster/0_simple/main.cpp
index 7e0ac8b5..3056cade 100644
--- a/example/VCluster/0_simple/main.cpp
+++ b/example/VCluster/0_simple/main.cpp
@@ -29,7 +29,6 @@ int main(int argc, char* argv[])
 	//
 	// Get the vcluster object and the number of processor
 	//
-
 	Vcluster & v_cl = create_vcluster();
 	long int N_prc = v_cl.getProcessingUnits();
 
@@ -37,7 +36,7 @@ int main(int argc, char* argv[])
 	// ### WIKI 3 ###
 	//
 	// We find the maximum of the processors rank, that should be the Number of
-	// processora minus one, only processor 0 print on terminal
+	// processor minus one, only processor 0 print on terminal
 	//
 
 	long int id = v_cl.getProcessUnitID();
diff --git a/example/Vector/0_simple/main.cpp b/example/Vector/0_simple/main.cpp
index 45d205a7..8a19b9e1 100644
--- a/example/Vector/0_simple/main.cpp
+++ b/example/Vector/0_simple/main.cpp
@@ -344,7 +344,7 @@ int main(int argc, char* argv[])
 	/*!
 	 * \page Vector_0_simple Vector 0 simple
 	 *
-	 * ## Finalize ## {#finalize}
+	 * ## Finalize ## {#finalize_e0_sim}
 	 *
 	 *  At the very end of the program we have always to de-initialize the library
 	 *
@@ -361,7 +361,7 @@ int main(int argc, char* argv[])
 	/*!
 	 * \page Vector_0_simple Vector 0 simple
 	 *
-	 * # Full code # {#code}
+	 * ## Full code ## {#code_e0_sim}
 	 *
 	 * \include Vector/0_simple/main.cpp
 	 *
diff --git a/example/Vector/3_molecular_dynamic/Makefile b/example/Vector/3_molecular_dynamic/Makefile
index cc252770..9c467c81 100644
--- a/example/Vector/3_molecular_dynamic/Makefile
+++ b/example/Vector/3_molecular_dynamic/Makefile
@@ -12,7 +12,7 @@ OBJ_VL_SYM = main_vl_sym.o
 all: md_dyn md_dyn_expr md_dyn_vl md_dyn_vl_sym
 
 %.o: %.cpp
-	$(CC) -O3 -c --std=c++11 -o $@ $< $(INCLUDE_PATH)
+	$(CC) -O3 -g -c --std=c++11 -o $@ $< $(INCLUDE_PATH)
 
 md_dyn: $(OBJ)
 	$(CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS)
@@ -27,7 +27,7 @@ md_dyn_vl_sym: $(OBJ_VL_SYM)
 	$(CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS)
 
 run: all
-	source $$HOME/openfpm_vars; mpirun -np 3 ./md_dyn; mpirun -np 3 ./md_dyn_expr; mpirun -np 3 ./md_dyn_vl; mpirun -np 3 ./md_dyn_vl_sym
+	source $$HOME/openfpm_vars; mpirun -np 3 ./md_dyn; mpirun -np 3 ./md_dyn_expr; mpirun -np 3 ./md_dyn_vl; mpirun -np 3 ./md_dyn_vl_sym;
 
 .PHONY: clean all run
 
diff --git a/example/Vector/3_molecular_dynamic/main.cpp b/example/Vector/3_molecular_dynamic/main.cpp
index 223e63ad..8d70b8c9 100644
--- a/example/Vector/3_molecular_dynamic/main.cpp
+++ b/example/Vector/3_molecular_dynamic/main.cpp
@@ -1,3 +1,10 @@
+/*! \page Vector_3_md Vector 3 molecular dynamic
+ *
+ * \subpage Vector_3_md_dyn
+ * \subpage Vector_3_md_vl
+ *
+ */
+
 #include "Vector/vector_dist.hpp"
 #include "Decomposition/CartDecomposition.hpp"
 #include "data_type/aggregate.hpp"
@@ -6,7 +13,7 @@
 #include "timer.hpp"
 
 /*!
- * \page Vector_3_md Vector 3 molecular dynamic
+ * \page Vector_3_md_dyn Vector 3 molecular dynamic with cell-list
  *
  * [TOC]
  *
@@ -31,7 +38,7 @@ constexpr int force = 1;
 
 /*!
  *
- * \page Vector_3_md Vector 3 molecular dynamic
+ * \page Vector_3_md_dyn Vector 3 molecular dynamic with cell-list
  *
  * ## Calculate forces ## {#e3_md_cf}
  *
@@ -54,7 +61,7 @@ void calc_forces(vector_dist<3,double, aggregate<double[3],double[3]> > & vd, Ce
 //! \cond [calc forces] \endcond
 
 	/*!
-	 * \page Vector_3_md Vector 3 molecular dynamic
+	 * \page Vector_3_md_dyn Vector 3 molecular dynamic with cell-list
 	 *
 	 * This function in called several time and require neighborhood of each particle. In order to speed-up the
 	 * Cell-list construction we can use updateCellList function to reuse the memory of the previous cell-list.
@@ -74,7 +81,7 @@ void calc_forces(vector_dist<3,double, aggregate<double[3],double[3]> > & vd, Ce
 
 	/*!
 	 *
-	 * \page Vector_3_md Vector 3 molecular dynamic
+	 * \page Vector_3_md Vector 3 molecular dynamic with cell-list
 	 *
 	 * Get an iterator over the particles and get its position. For each particle p iterate in its neighborhood q
 	 * and calculate the force based on the Lennard-Jhones potential given by
@@ -155,7 +162,7 @@ void calc_forces(vector_dist<3,double, aggregate<double[3],double[3]> > & vd, Ce
 //! \cond [calc forces2] \endcond
 
 /*!
- * \page Vector_3_md Vector 3 molecular dynamic
+ * \page Vector_3_md_dyn Vector 3 molecular dynamic with cell-list
  *
  * ## Calculate energy ## {#e3_md_ce}
  *
@@ -176,7 +183,7 @@ double calc_energy(vector_dist<3,double, aggregate<double[3],double[3]> > & vd,
 //! \cond [calc energy] \endcond
 
 	/*!
-	 * \page Vector_3_md Vector 3 molecular dynamic
+	 * \page Vector_3_md_dyn Vector 3 molecular dynamic with cell-list
 	 *
 	 * Reset the counter for the energy counter and
 	 * update the cell list from the actual particle configuration
@@ -196,7 +203,7 @@ double calc_energy(vector_dist<3,double, aggregate<double[3],double[3]> > & vd,
 
 	/*!
 	 *
-	 * \page Vector_3_md Vector 3 molecular dynamic
+	 * \page Vector_3_md_dyn Vector 3 molecular dynamic with cell-list
 	 *
 	 * First we get an iterator over the particles and get its position. For each particle p iterate in its neighborhood q
 	 * and calculate the energy based on the Lennard-Jhones potential given by
@@ -273,7 +280,7 @@ double calc_energy(vector_dist<3,double, aggregate<double[3],double[3]> > & vd,
 int main(int argc, char* argv[])
 {
 	/*!
-	 * \page Vector_3_md Vector 3 molecular dynamic
+	 * \page Vector_3_md_dyn Vector 3 molecular dynamic with cell-list
 	 *
 	 * ## Initialization ## {#e3_md_init}
 	 *
@@ -282,11 +289,11 @@ int main(int argc, char* argv[])
 	 * size of the box, and cut-off radius of the interaction. We also define 2 vectors
 	 * x and y (they are like std::vector) used for statistic
 	 *
-	 * \snippet Vector/3_molecular_dynamic/main.cpp constants
+	 * \snippet Vector/3_molecular_dynamic/main.cpp constants run
 	 *
 	 */
 
-	//! \cond [constants] \endcond
+	//! \cond [constants run] \endcond
 
 	double dt = 0.0005;
 	double sigma = 0.1;
@@ -297,10 +304,10 @@ int main(int argc, char* argv[])
 	openfpm::vector<double> x;
 	openfpm::vector<openfpm::vector<double>> y;
 
-	//! \cond [constants] \endcond
+	//! \cond [constants run] \endcond
 
 	/*!
-	 * \page Vector_3_md Vector 3 molecular dynamic
+	 * \page Vector_3_md_dyn Vector 3 molecular dynamic with cell-list
 	 *
 	 * Here we Initialize the library, we create a Box that define our domain, boundary conditions and ghost
 	 *
@@ -330,7 +337,7 @@ int main(int argc, char* argv[])
 	//! \cond [init] \endcond
 
 	/*!
-	 * \page Vector_3_md Vector 3 molecular dynamic
+	 * \page Vector_3_md_dyn Vector 3 molecular dynamic with cell-list
 	 *
 	 * Than we define a distributed vector in 3D, containing 2 vectorial properties the
 	 * first is the actual velocity of the particle the other is the force
@@ -348,7 +355,7 @@ int main(int argc, char* argv[])
 	//! \cond [vect create] \endcond
 
 	/*!
-	 * \page Vector_3_md Vector 3 molecular dynamic
+	 * \page Vector_3_md_dyn Vector 3 molecular dynamic with cell-list
 	 *
 	 * ## Particles on a grid like position ## {#e3_md_gl}
 	 *
@@ -389,7 +396,7 @@ int main(int argc, char* argv[])
 	//! \cond [vect grid] \endcond
 
 	/*!
-	 * \page Vector_3_md Vector 3 molecular dynamic
+	 * \page Vector_3_md_dyn Vector 3 molecular dynamic with cell-list
 	 *
 	 * ## Molecular dynamic steps ## {#e3_md_vi}
 	 *
@@ -397,16 +404,16 @@ int main(int argc, char* argv[])
 	 *
 	 * The verlet integration stepping look like this
 	 *
-	 * $$ \vec{v}(t_{n+1/2}) = \vec{v}_p(t_n) + \frac{1}{2} \delta t \vec{a}(t_n) $$ // Step 1
-	 * $$ \vec{x}(t_{n}) = \vec{x}_p(t_n) + \delta t \vec{v}(t_n+1/2) $$             // Step 1
+	 * \f[ \vec{v}(t_{n+1/2}) = \vec{v}_p(t_n) + \frac{1}{2} \delta t \vec{a}(t_n) \f]
+	 * \f[ \vec{x}(t_{n}) = \vec{x}_p(t_n) + \delta t \vec{v}(t_n+1/2) \f]
 	 *
-	 * calculate the forces $$ \vec{a} (t_{n}) $$ from $$ \vec{x} (t_{n}) $$         // Step 2
+	 * calculate the forces from \f$ \vec{a} (t_{n}) \f$ finally
 	 *
-	 * $$ \vec{v}(t_{n+1}) = \vec{v}_p(t_n+1/2) + \frac{1}{2} \delta t \vec{a}(t_n+1) $$ // Step 3
+	 * \f[ \vec{v}(t_{n+1}) = \vec{v}_p(t_n+1/2) + \frac{1}{2} \delta t \vec{a}(t_n+1) \f]
 	 *
 	 * The cell-list structure is required to calculate forces
 	 *
-	 * Inside this cycle we are using several features that has been explained before in particuilar
+	 * Inside this cycle we are using several features that has been explained before in particular
 	 *
 	 * \see \ref e0_s_assign_pos
 	 *
@@ -516,7 +523,7 @@ int main(int argc, char* argv[])
 	std::cout << "Time: " << tsim.getwct() << std::endl;
 
 	/*!
-	 * \page Vector_3_md Vector 3 molecular dynamic
+	 * \page Vector_3_md_dyn Vector 3 molecular dynamic with cell-list
 	 *
 	 * ## Plotting graphs ## {#e3_md_pg}
 	 *
@@ -560,13 +567,13 @@ int main(int argc, char* argv[])
 	//! \cond [google chart] \endcond
 
 	/*!
-	 * \page Vector_3_md Vector 3 molecular dynamic
+	 * \page Vector_3_md_dyn Vector 3 molecular dynamic with cell-list
 	 *
-	 * ## Finalize ## {#finalize}
+	 * ## Finalize ## {#finalize_v_e3_md}
 	 *
 	 *  At the very end of the program we have always to de-initialize the library
 	 *
-	 * \snippet Vector/1_celllist/main.cpp finalize
+	 * \snippet Vector/3_molecular_dynamic/main.cpp finalize
 	 *
 	 */
 
@@ -577,18 +584,18 @@ int main(int argc, char* argv[])
 	//! \cond [finalize] \endcond
 
 	/*!
-	 * \page Vector_3_md Vector 3 molecular dynamic
+	 * \page Vector_3_md_dyn Vector 3 molecular dynamic with cell-list
 	 *
-	 * # Full code # {#code}
+	 * ## Full code ## {#code_v_e3_md}
 	 *
 	 * \include Vector/3_molecular_dynamic/main.cpp
 	 *
 	 */
 
 	/*!
-	 * \page Vector_3_md Vector 3 molecular dynamic
+	 * \page Vector_3_md_dyn Vector 3 molecular dynamic with cell-list
 	 *
-	 * # Code with expression # {#code}
+	 * ## Code with expression ## {#code_v_e3_md_expr}
 	 *
 	 * Here we also show how we can simplify the example using expressions
 	 *
diff --git a/openfpm_data b/openfpm_data
index 9301fe45..36003173 160000
--- a/openfpm_data
+++ b/openfpm_data
@@ -1 +1 @@
-Subproject commit 9301fe459ca2ef81115c642ee7a54c983ddfd6f1
+Subproject commit 360031739584423b20497b1364d2c3bcf26c5143
diff --git a/src/Vector/vector_dist.hpp b/src/Vector/vector_dist.hpp
index 0dc710bf..ef47a83d 100644
--- a/src/Vector/vector_dist.hpp
+++ b/src/Vector/vector_dist.hpp
@@ -844,6 +844,18 @@ public:
 		this->template ghost_get_<prp...>(v_pos,v_prp,g_m,opt);
 	}
 
+	/*! \brief It synchronize the properties and position of the ghost particles
+	 *
+	 * \tparam op which kind of operation to apply
+	 * \tparam prp list of properties to get synchronize
+	 *
+	 *
+	 */
+	template<template<typename,typename> class op, int ... prp> inline void ghost_put()
+	{
+		this->template ghost_put_<op,prp...>(v_pos,v_prp,g_m);
+	}
+
 	/*! \brief Remove a set of elements from the distributed vector
 	 *
 	 * \warning keys must be sorted
diff --git a/src/Vector/vector_dist_cell_list_tests.hpp b/src/Vector/vector_dist_cell_list_tests.hpp
index ca3a1b49..66747b43 100644
--- a/src/Vector/vector_dist_cell_list_tests.hpp
+++ b/src/Vector/vector_dist_cell_list_tests.hpp
@@ -407,7 +407,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_sym_cell_list_test )
 				auto q = NpSym.get();
 
 				// if p == q skip this particle
-				if (q == p.getKey())	{++NpSym; continue;};
+				if (q == p.getKey() )	{++NpSym; continue;};
 
 				// Get position of the particle q
 				Point<3,double> xq = vd.getPos(q);
@@ -664,262 +664,5 @@ BOOST_AUTO_TEST_CASE( vector_dist_sym_verlet_list_test )
 	}
 }
 
-struct couple_contrib
-{
-	size_t i;
-	size_t j;
-	size_t ir;
-	size_t jr;
-	double E;
-
-	bool operator<(couple_contrib & ct)
-	{
-		return E < ct.E;
-	}
-};
-
-BOOST_AUTO_TEST_CASE( vector_dist_sym_tot_red_computation )
-{
-	if (create_vcluster().getProcessingUnits() > 12)
-		return;
-
-	long int k = 4096*create_vcluster().getProcessingUnits();
-
-	size_t base = create_vcluster().getProcessUnitID() * 4096;
-
-	long int big_step = k / 30;
-	big_step = (big_step == 0)?1:big_step;
-	long int small_step = 21;
-
-	print_test( "Testing vector symmetric verlet-list k<=",k);
-
-	openfpm::vector<couple_contrib> cct;
-	openfpm::vector<couple_contrib> cct2;
-
-	// 3D test
-/*	for ( ; k > 8*big_step ; k-= (k > 2*big_step)?big_step:small_step )
-	{*/
-		double r_cut = 0.05;
-
-		// domain
-		Box<3,double> box({0.0,0.0,0.0},{1.0,1.0,1.0});
-
-		// Boundary conditions
-		size_t bc[3]={PERIODIC,PERIODIC,PERIODIC};
-
-		// ghost, big enough to contain the interaction radius
-		Ghost<3,float> ghost(r_cut);
-
-		vector_dist<3,double, aggregate<size_t> > vd(k,box,bc,ghost);
-
-		{
-		auto it = vd.getDomainIterator();
-
-		while (it.isNext())
-		{
-			auto p = it.get();
-
-			vd.getPos(p)[0] = (double)rand()/RAND_MAX;
-			vd.getPos(p)[1] = (double)rand()/RAND_MAX;
-			vd.getPos(p)[2] = (double)rand()/RAND_MAX;
-
-			vd.getProp<0>(p) = p.getKey() + base;
-
-			++it;
-		}
-		}
-
-		vd.map();
-		vd.ghost_get<0>();
-
-		// Get the Cell list structure
-		auto NN = vd.getVerlet(r_cut,VL_SYMMETRIC_RED);
-		auto NNc = vd.getVerlet(r_cut);
-
-		// Get an iterator over particles
-		auto it2 = vd.getDomainAndGhostIterator();
-
-		/////////// SYMMETRIC CASE VERLET-LIST REDUCTION ////////
-
-		{
-			auto it2 = vd.getDomainAndGhostIterator();
-
-			// DEBUG
-
-			// For each particle ...
-			while (it2.isNext())
-			{
-				// ... p
-				auto p = it2.get();
-
-				if( vd.getProp<0>(p) == 2524 || vd.getProp<0>(p) == 4135 )
-				{
-					Vcluster & v_cl = create_vcluster();
-					Point<3,double> xp = vd.getPos(p);
-
-					std::cout << "Particle " << p.getKey() << "   " << xp.toString() << "    " << v_cl.getProcessUnitID() << std::endl;
-				}
-
-				++it2;
-			}
-		}
-
-
-		openfpm::vector<couple_contrib> ctt;
-
-		double tot_sym = 0.0;
-		size_t tot_n_sym = 0;
-
-		// For each particle ...
-		while (it2.isNext())
-		{
-			// ... p
-			auto p = it2.get();
-
-			// Get the position of the particle p
-			Point<3,double> xp = vd.getPos(p);
-
-			// Get an iterator over the neighborhood of the particle p symmetric
-			auto NpSym = NN.template getNNIterator<NO_CHECK>(p.getKey());
-
-			// For each neighborhood of the particle p
-			while (NpSym.isNext())
-			{
-				// Neighborhood particle q
-				auto q = NpSym.get();
-
-				// if p == q skip this particle
-				if (q == p.getKey())	{++NpSym; continue;};
-
-				// Get position of the particle q
-				Point<3,double> xq = vd.getPos(q);
-
-				// take the normalized direction
-				double rn = norm2(xp - xq);
-
-				////// debug ////////////
-
-				couple_contrib cc;
-				cc.i = vd.getProp<0>(p);
-				cc.j = vd.getProp<0>(q);
-				cc.ir = p.getKey();
-				cc.jr = q;
-				cc.E = rn;
-
-				cct.add(cc);
-
-				/////////////////////////
-
-				// potential energy (using pow is slower)
-				tot_sym += rn;
-				tot_n_sym++;
-
-				// Next neighborhood
-				++NpSym;
-			}
-
-			// Next Particle
-			++it2;
-		}
-
-		/////////////// NON SYMMETRIC CASE VERLET-LIST ////////////////////////
-
-		double tot = 0.0;
-		size_t tot_n = 0;
-
-		auto it = vd.getDomainIterator();
-
-		// For each particle ...
-		while (it.isNext())
-		{
-			// ... p
-			auto p = it.get();
-
-			// Get the position of the particle p
-			Point<3,double> xp = vd.getPos(p);
-
-			// Get an iterator over the neighborhood of the particle p
-			auto Np = NNc.template getNNIterator<NO_CHECK>(p.getKey());
-
-			// For each neighborhood of the particle p
-			while (Np.isNext())
-			{
-				// Neighborhood particle q
-				auto q = Np.get();
-
-				// if p == q skip this particle
-				if (q == p.getKey())	{++Np; continue;};
-
-				// Get position of the particle q
-				Point<3,double> xq = vd.getPos(q);
-
-				// take the normalized direction
-				double rn = norm2(xp - xq);
-
-				// potential energy (using pow is slower)
-				tot += rn/2.0;
-				tot_n++;
-
-				////// debug ////////////
-
-				couple_contrib cc;
-				cc.i = vd.getProp<0>(p);
-				cc.j = vd.getProp<0>(q);
-				cc.E = rn;
-
-				cct2.add(cc);
-
-				/////////////////////////
-
-				// Next neighborhood
-				++Np;
-			}
-
-			// Next Particle
-			++it;
-		}
-
-		Vcluster & v_cl = create_vcluster();
-
-		v_cl.sum(tot_n);
-		v_cl.sum(tot_n_sym);
-		v_cl.sum(tot);
-		v_cl.sum(tot_sym);
-		v_cl.execute();
-
-		openfpm::vector<couple_contrib> cc_collect;
-		openfpm::vector<couple_contrib> cc_collect2;
-
-		v_cl.SGather(cct,cc_collect,0);
-		v_cl.SGather(cct2,cc_collect2,0);
-
-		if (v_cl.getProcessUnitID() == 0)
-		{
-			cc_collect.sort();
-			cc_collect2.sort();
-
-			for (size_t i = 0 ; i < cc_collect2.size() ; i++)
-			{
-				if (i % 10000 == 0)
-					std::cout << i << "  " << cc_collect2.size() << std::endl;
-
-				for (size_t k = i+1 ; k < i+2 && k < cc_collect2.size() ; k++)
-				{
-					if (cc_collect2.get(i).i == cc_collect2.get(k).j && cc_collect2.get(i).j == cc_collect2.get(k).i)
-					{
-						cc_collect2.remove(k);
-					}
-				}
-			}
-
-			for (size_t i = 0 ; i < cc_collect.size() && i < cc_collect2.size() ; i++)
-				std::cout << "E1: " << cc_collect.get(i).E << "   " << cc_collect.get(i).i << "(" << cc_collect.get(i).ir << ")" << "  " << cc_collect.get(i).j << "(" << cc_collect.get(i).jr << ")" << "          E2: " << cc_collect2.get(i).E << "   " << cc_collect2.get(i).i << "  " << cc_collect2.get(i).j << std::endl;
-		}
-
-//		BOOST_REQUIRE_EQUAL(2*tot_n_sym,tot_n);
-//		BOOST_REQUIRE_CLOSE(tot,tot_sym,0.00001);
-
-/*	}*/
-}
 
 #endif /* SRC_VECTOR_VECTOR_DIST_CELL_LIST_TESTS_HPP_ */
diff --git a/src/Vector/vector_dist_comm.hpp b/src/Vector/vector_dist_comm.hpp
index fa8dd9c4..8dad03af 100644
--- a/src/Vector/vector_dist_comm.hpp
+++ b/src/Vector/vector_dist_comm.hpp
@@ -39,7 +39,7 @@ class vector_dist_comm
 	//! It map the processor id with the communication request into map procedure
 	openfpm::vector<size_t> p_map_req;
 
-	//! For each near processor, outgoing particle id and shift vector
+	//! For each near processor, outgoing particle id
 	openfpm::vector<openfpm::vector<size_t>> opart;
 
 	//! For each near processor, particle shift vector
@@ -54,15 +54,31 @@ class vector_dist_comm
 	//! Sending buffer for the ghost particles position
 	BHeapMemory g_pos_mem;
 
+	//! For each adjacent processor it store from which processor come from
+	openfpm::vector<size_t> prc_recv;
+
+	//! the same as prc_recv but for put
+	openfpm::vector<size_t> prc_recv_put;
+
+	//! Number of received elements
+	openfpm::vector<size_t> n_recv_ele;
+
 	//! For each adjacent processor it store the size of the receiving message in byte
 	openfpm::vector<size_t> recv_sz;
 
+	//! The same as recv_sz but for put
+	openfpm::vector<size_t> recv_sz_put;
+
 	//! For each adjacent processor it store the received message for ghost get
 	openfpm::vector<BHeapMemory> recv_mem_gg;
 
 	//! For each processor it store the received message for global map
 	openfpm::vector<BHeapMemory> recv_mem_gm;
 
+	//! Local ghost marker (across the ghost particles it mark from where we have the)
+	//! replicated ghost particles that are local
+	size_t lg_m;
+
 	/*! \brief It store for each processor the position and properties vector of the particles
 	 *
 	 * This structure is used in the map function
@@ -93,26 +109,25 @@ class vector_dist_comm
 	//! definition of the send vector for position
 	typedef openfpm::vector<Point<dim, St>, ExtPreAlloc<Memory>, typename memory_traits_lin<Point<dim, St>>::type, memory_traits_lin , openfpm::grow_policy_identity> send_pos_vector;
 
+	//! Flags that indicate that the function createShiftBox() has been called
 	bool is_shift_box_created = false;
 
-	// this map is used to check if a combination is already present
+	//! this map is used to check if a combination is already present
 	std::unordered_map<size_t, size_t> map_cmb;
 
-	// The boxes touching the border of the domain are divided in groups (first vector)
-	// each group contain internal ghost coming from sub-domains of the same section
+	//! The boxes touching the border of the domain are divided in groups (first vector)
+	//! each group contain internal ghost coming from sub-domains of the same section
 	openfpm::vector_std<openfpm::vector_std<Box<dim, St>>>box_f;
 
-	// Store the sector for each group (previous vector)
+	//! Store the sector for each group (previous vector)
 	openfpm::vector_std<comb<dim>> box_cmb;
 
-	//
+	//! Id of the local particle to replicate for ghost_get
 	openfpm::vector<aggregate<size_t,size_t>> o_part_loc;
 
 	/*! \brief For every internal ghost box we create a structure that order such internal local ghost box in
 	 *         shift vectors
 	 *
-	 * \param shifts vectors
-	 *
 	 */
 	void createShiftBox()
 	{
@@ -156,10 +171,14 @@ class vector_dist_comm
 
 	/*! \brief Local ghost from labeled particles
 	 *
+	 * \param v_pos vector of particle positions
+	 * \param v_prp vector of particles properties
 	 *
 	 */
 	void local_ghost_from_opart(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp)
 	{
+		lg_m = v_prp.size();
+
 		// get the shift vectors
 		const openfpm::vector<Point<dim, St>> & shifts = dec.getShiftVectors();
 
@@ -183,7 +202,7 @@ class vector_dist_comm
 	 *
 	 * \param v_pos vector of particle positions
 	 * \param v_prp vector of particle properties
-	 * \g_m ghost marker
+	 * \param g_m ghost marker
 	 *
 	 */
 	void local_ghost_from_dec(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp, size_t g_m)
@@ -292,6 +311,8 @@ class vector_dist_comm
 		// Create the shift boxes
 		createShiftBox();
 
+		lg_m = v_prp.size();
+
 		if (box_f.size() == 0)
 			return;
 		else
@@ -338,6 +359,55 @@ class vector_dist_comm
 		}
 	}
 
+	/*! \brief This function fill the send buffer for ghost_put
+	 *
+	 * \tparam send_vector type used to send data
+	 * \tparam prp_object object containing only the properties to send
+	 * \tparam prp set of properties to send
+	 *
+	 * \param v_prp vector of particle properties
+	 * \param g_send_prp Send buffer to fill
+	 * \param prAlloc_prp Memory object for the send buffer
+	 * \param g_m ghost marker
+	 *
+	 */
+	template<typename send_vector, typename prp_object, int ... prp> void fill_send_ghost_put_prp_buf(openfpm::vector<prop> & v_prp, openfpm::vector<send_vector> & g_send_prp, ExtPreAlloc<Memory> * prAlloc_prp, size_t & g_m)
+	{
+		// create a number of send buffers equal to the near processors
+		// from which we received
+		g_send_prp.resize(prc_recv.size());
+		for (size_t i = 0; i < g_send_prp.size(); i++)
+		{
+			// set the preallocated memory to ensure contiguity
+			g_send_prp.get(i).setMemory(*prAlloc_prp);
+
+			// resize the sending vector (No allocation is produced)
+			g_send_prp.get(i).resize(n_recv_ele.get(i));
+		}
+
+		size_t accum = g_m;
+
+		// Fill the send buffer
+		for (size_t i = 0; i < prc_recv.size(); i++)
+		{
+			size_t j2 = 0;
+			for (size_t j = accum; j < accum + n_recv_ele.get(i); j++)
+			{
+				// source object type
+				typedef encapc<1, prop, typename openfpm::vector<prop>::layout_type> encap_src;
+				// destination object type
+				typedef encapc<1, prp_object, typename openfpm::vector<prp_object>::layout_type> encap_dst;
+
+				// Copy only the selected properties
+				object_si_d<encap_src, encap_dst, OBJ_ENCAP, prp...>(v_prp.get(j), g_send_prp.get(i).get(j2));
+
+				j2++;
+			}
+
+			accum = accum + n_recv_ele.get(i);
+		}
+	}
+
 	/*! \brief This function fill the send buffer for properties after the particles has been label with labelParticles
 	 *
 	 * \tparam send_vector type used to send data
@@ -530,8 +600,8 @@ class vector_dist_comm
 			{
 				if ((long int) p_id != -1)
 				{
-					prc_sz.get(p_id)++;lbl_p
-					.get(p_id).add(key);
+					prc_sz.get(p_id)++;
+					lbl_p.get(p_id).add(key);
 					opart.add(key);
 				}
 				else
@@ -546,7 +616,7 @@ class vector_dist_comm
 		}
 	}
 
-	/*! \brief This function process the receiced data for the properties and populate the ghost
+	/*! \brief This function process the received data for the properties and populate the ghost
 	 *
 	 * \tparam send_vector type used to send data
 	 * \tparam prp_object object containing only the properties to send
@@ -558,6 +628,8 @@ class vector_dist_comm
 	 */
 	template<typename send_vector, typename prp_object, int ... prp> void process_received_ghost_prp(openfpm::vector<prop> & v_prp, size_t & g_m)
 	{
+		n_recv_ele.resize(recv_mem_gg.size());
+
 		// Mark the ghost part
 		g_m = v_prp.size();
 
@@ -575,14 +647,74 @@ class vector_dist_comm
 
 			v2.setMemory(*ptr1);
 
-			// resize with the number of elements
+			// resize with the number of elements and store the number
+			// or received elements
 			v2.resize(n_ele);
+			n_recv_ele.get(i) = n_ele;
 
 			// Add the ghost particle
 			v_prp.template add_prp<prp_object, PtrMemory, openfpm::grow_policy_identity, prp...>(v2);
 		}
 	}
 
+
+	/*! \brief This function process the received data from ghost put
+	 *
+	 * \tparam op operation to do
+	 * \tparam send_vector type used to send data
+	 * \tparam prp_object object containing only the properties to send
+	 * \tparam prp set of properties to send
+	 *
+	 * \param v_prp vector of particle properties
+	 * \param g_m ghost marker
+	 *
+	 */
+	template<template<typename,typename> class op, typename send_vector, typename prp_object, int ... prp> void process_received_put_ghost_prp(openfpm::vector<prop> & v_prp, size_t g_m)
+	{
+		// Process the received data (recv_mem_gg != 0 if you have data)
+		for (size_t i = 0; i <  recv_sz_put.size(); i++)
+		{
+			// calculate the number of received elements
+			size_t n_ele = recv_sz_put.get(i) / sizeof(prp_object);
+
+			// add the received particles to the vector
+			PtrMemory * ptr1 = new PtrMemory(recv_mem_gg.get(i).getPointer(), recv_sz_put.get(i));
+
+			// create vector representation to a piece of memory already allocated
+			openfpm::vector<prp_object, PtrMemory, typename memory_traits_lin<prp_object>::type, memory_traits_lin , openfpm::grow_policy_identity> v2;
+
+			v2.setMemory(*ptr1);
+
+			// resize with the number of elements
+			v2.resize(n_ele);
+
+			// Add the ghost particle
+			v_prp.template merge_prp<op,prp_object, PtrMemory, openfpm::grow_policy_identity, prp...>(v2,opart.get(i));
+		}
+
+		// process also the local replicated particles
+
+		size_t i2 = 0;
+
+#ifdef SE_CLASS1
+
+		if (v_prp.size() - lg_m != o_part_loc.size())
+			std::cerr << "Error: " << __FILE__ << ":" << __LINE__ << " v_prp.size() - lg_m = " << v_prp.size() - lg_m << " != " << o_part_loc.size() << std::endl;
+
+#endif
+
+		for (size_t i = lg_m ; i < v_prp.size() ; i++)
+		{
+			auto dst = v_prp.get(o_part_loc.template get<0>(i2));
+			auto src = v_prp.get(i);
+			copy_cpu_encap_encap_op_prp<op,decltype(v_prp.get(0)),decltype(v_prp.get(0)),prp...> cp(src,dst);
+
+			boost::mpl::for_each_ref< boost::mpl::range_c<int,0,sizeof...(prp)> >(cp);
+
+			i2++;
+		}
+	}
+
 	/*! \brief This function process the received data for the properties and populate the ghost
 	 *
 	 * \param v_pos vector of particle positions
@@ -755,6 +887,26 @@ class vector_dist_comm
 		}
 	}
 
+	/*! \brief Calculate send buffers total size and allocation
+	 *
+	 * \tparam prp_object object containing only the properties to send
+	 *
+	 * \param v_pos vector of particle positions
+	 * \param v_prp vector of particle properties
+	 * \param size_byte_prp total size for the property buffer
+	 * \param size_byte_pos total size for the position buffer
+	 *
+	 */
+	template<typename prp_object> void calc_send_ghost_put_buf(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp, size_t & size_byte_prp, size_t & size_byte_pos)
+	{
+		// Calculate the total size required for the sending buffer
+		for (size_t i = 0; i < recv_sz.size(); i++)
+		{
+			size_t alloc_ele = openfpm::vector<prp_object, HeapMemory, typename memory_traits_lin<prp_object>::type, memory_traits_lin , openfpm::grow_policy_identity>::calculateMem(n_recv_ele.get(i), 0);
+			size_byte_prp += alloc_ele;
+		}
+	}
+
 	/*! \brief Label the particles
 	 *
 	 * It count the number of particle to send to each processors and save its ids
@@ -805,6 +957,38 @@ class vector_dist_comm
 		}
 	}
 
+	/*! \brief Call-back to allocate buffer to receive incoming elements (particles)
+	 *
+	 * \param msg_i message size required to receive from i
+	 * \param total_msg message size to receive from all the processors
+	 * \param total_p the total number of processor want to communicate with you
+	 * \param i processor id
+	 * \param ri request id (it is an id that goes from 0 to total_p, and is unique
+	 *           every time message_alloc is called)
+	 * \param ptr void pointer parameter for additional data to pass to the call-back
+	 *
+	 */
+	static void * msg_alloc_ghost_put(size_t msg_i, size_t total_msg, size_t total_p, size_t i, size_t ri, void * ptr)
+	{
+		vector_dist_comm<dim, St, prop, Decomposition, Memory> * v = static_cast<vector_dist_comm<dim, St, prop, Decomposition, Memory> *>(ptr);
+
+		v->recv_sz_put.resize(v->dec.getNNProcessors());
+		v->recv_mem_gg.resize(v->dec.getNNProcessors());
+		v->prc_recv_put.resize(v->dec.getNNProcessors());
+
+		// Get the local processor id
+		size_t lc_id = v->dec.ProctoID(i);
+
+		// resize the receive buffer
+		v->recv_mem_gg.get(lc_id).resize(msg_i);
+		v->recv_sz_put.get(lc_id) = msg_i;
+
+		// save the processor id
+		v->prc_recv_put.get(lc_id) = i;
+
+		return v->recv_mem_gg.get(lc_id).getPointer();
+	}
+
 	/*! \brief Call-back to allocate buffer to receive incoming elements (particles)
 	 *
 	 * \param msg_i message size required to receive from i
@@ -822,6 +1006,7 @@ class vector_dist_comm
 
 		v->recv_sz.resize(v->dec.getNNProcessors());
 		v->recv_mem_gg.resize(v->dec.getNNProcessors());
+		v->prc_recv.resize(v->dec.getNNProcessors());
 
 		// Get the local processor id
 		size_t lc_id = v->dec.ProctoID(i);
@@ -830,10 +1015,12 @@ class vector_dist_comm
 		v->recv_mem_gg.get(lc_id).resize(msg_i);
 		v->recv_sz.get(lc_id) = msg_i;
 
+		// save the processor id
+		v->prc_recv.get(lc_id) = i;
+
 		return v->recv_mem_gg.get(lc_id).getPointer();
 	}
 
-
 	/*! \brief Call-back to allocate buffer to receive incoming elements (particles)
 	 *
 	 * \param msg_i size required to receive the message from i
@@ -941,7 +1128,7 @@ public:
 	template<int ... prp> inline void ghost_get_(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp, size_t & g_m, size_t opt = WITH_POSITION)
 	{
 		// Unload receive buffer
-		for (size_t i = 0 ; i < recv_mem_gg.size() ; i++)
+		for (size_t i = 0 ; i < recv_sz.size() ; i++)
 			recv_sz.get(i) = 0;
 
 		// Sending property object
@@ -1192,6 +1379,8 @@ public:
 	 *
 	 * \param vc vector to copy
 	 *
+	 * \return iteself
+	 *
 	 */
 	vector_dist_comm<dim,St,prop,Decomposition,Memory> & operator=(const vector_dist_comm<dim,St,prop,Decomposition,Memory> & vc)
 	{
@@ -1204,6 +1393,8 @@ public:
 	 *
 	 * \param vc vector to copy
 	 *
+	 * \return itself
+	 *
 	 */
 	vector_dist_comm<dim,St,prop,Decomposition,Memory> & operator=(vector_dist_comm<dim,St,prop,Decomposition,Memory> && vc)
 	{
@@ -1211,6 +1402,48 @@ public:
 
 		return *this;
 	}
+
+	/*! \brief Ghost put
+	 *
+	 * \tparam op operation to apply
+	 * \tparam prp set of properties
+	 *
+	 * \param v_pos vector of particle positions
+	 * \param v_prp vector od particle properties
+	 * \param g_m ghost marker
+	 *
+	 */
+	template<template<typename,typename> class op, int ... prp> void ghost_put_(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp, size_t & g_m)
+	{
+		// Sending property object
+		typedef object<typename object_creator<typename prop::type, prp...>::type> prp_object;
+
+		// send vector for each processor
+		typedef openfpm::vector<prp_object, ExtPreAlloc<Memory>, typename memory_traits_lin<prp_object>::type, memory_traits_lin, openfpm::grow_policy_identity> send_vector;
+
+		// Calculate memory and allocation for the send buffers
+
+		// Total size
+		size_t size_byte_prp = 0;
+		size_t size_byte_pos = 0;
+
+		calc_send_ghost_put_buf<prp_object>(v_pos,v_prp,size_byte_prp, size_byte_pos);
+
+		// Create memory for the send buffer
+
+		g_prp_mem.resize(size_byte_prp);
+
+		// Create and fill send buffer for particle properties
+
+		ExtPreAlloc<Memory> * prAlloc_prp = new ExtPreAlloc<Memory>(size_byte_prp, g_prp_mem);
+
+		openfpm::vector<send_vector> g_send_prp;
+		fill_send_ghost_put_prp_buf<send_vector, prp_object, prp...>(v_prp,g_send_prp, prAlloc_prp,g_m);
+
+		// Send/receive the particle properties information
+		v_cl.sendrecvMultipleMessagesNBX(prc_recv, g_send_prp, msg_alloc_ghost_put, this);
+		process_received_put_ghost_prp<op,send_vector, prp_object, prp...>(v_prp,g_m);
+	}
 };
 
 
diff --git a/src/Vector/vector_dist_unit_test.hpp b/src/Vector/vector_dist_unit_test.hpp
index f3bb9436..c9448216 100644
--- a/src/Vector/vector_dist_unit_test.hpp
+++ b/src/Vector/vector_dist_unit_test.hpp
@@ -1573,6 +1573,124 @@ BOOST_AUTO_TEST_CASE( vector_dist_ghost_with_ghost_buffering )
 	}
 }
 
+
+
+BOOST_AUTO_TEST_CASE( vector_dist_ghost_put )
+{
+	Vcluster & v_cl = create_vcluster();
+
+	long int k = 25*25*25*create_vcluster().getProcessingUnits();
+	k = std::pow(k, 1/3.);
+
+	if (v_cl.getProcessingUnits() > 48)
+		return;
+
+	print_test("Testing 3D periodic vector with ghost buffering k=",k);
+	BOOST_TEST_CHECKPOINT( "Testing 3D periodic with ghost buffering k=" << k );
+
+	long int big_step = k / 30;
+	big_step = (big_step == 0)?1:big_step;
+	long int small_step = 21;
+
+	// 3D test
+	for ( ; k >= 2 ; k-= (k > 2*big_step)?big_step:small_step )
+	{
+		Box<3,float> box({0.0,0.0,0.0},{1.0,1.0,1.0});
+
+		// Boundary conditions
+		size_t bc[3]={PERIODIC,PERIODIC,PERIODIC};
+
+		// ghost
+		Ghost<3,float> ghost(1.3/(k));
+
+		typedef  aggregate<float> part_prop;
+
+		// Distributed vector
+		vector_dist<3,float, part_prop > vd(0,box,bc,ghost);
+
+		auto it = vd.getGridIterator({k,k,k});
+
+		while (it.isNext())
+		{
+			auto key = it.get();
+
+			vd.add();
+
+			vd.getLastPos()[0] = key.get(0)*it.getSpacing(0);
+			vd.getLastPos()[1] = key.get(1)*it.getSpacing(1);
+			vd.getLastPos()[2] = key.get(2)*it.getSpacing(2);
+
+			// Fill some properties randomly
+
+			vd.getLastProp<0>() = 0.0;
+
+			++it;
+		}
+
+		vd.map();
+
+		// sync the ghost
+		vd.ghost_get<0>();
+
+		auto NN = vd.getCellList(1.3/k);
+		float a = 1.0f*k*k;
+
+		// run trough all the particles + ghost
+
+		auto it2 = vd.getDomainIterator();
+
+		while (it2.isNext())
+		{
+			// particle p
+			auto p = it2.get();
+			Point<3,float> xp = vd.getPos(p);
+
+			// Get an iterator over the neighborhood particles of p
+			auto Np = NN.template getNNIterator<NO_CHECK>(NN.getCell(vd.getPos(p)));
+
+			// For each neighborhood particle ...
+			while (Np.isNext())
+			{
+				auto q = Np.get();
+				Point<3,float> xq = vd.getPos(q);
+
+				float dist = xp.distance(xq);
+
+				if (dist < 0.05)
+					vd.getProp<0>(q) += a*(-dist*dist+1.0/k/k);
+
+				++Np;
+			}
+
+			++it2;
+		}
+
+		vd.ghost_put<add_,0>();
+
+		bool ret = true;
+		auto it3 = vd.getDomainIterator();
+
+		float constant = vd.getProp<0>(it3.get());
+		float eps = 0.001;
+
+		while (it3.isNext())
+		{
+			float constant2 = vd.getProp<0>(it3.get());
+			if (fabs((constant - constant2)/constant > eps))
+			{
+				std::cout << Point<3,float>(vd.getPos(it3.get())).toString() << "    " <<  constant2 << "    " << v_cl.getProcessUnitID() << std::endl;
+				ret = false;
+				break;
+			}
+
+			++it3;
+		}
+
+
+		BOOST_REQUIRE_EQUAL(ret,true);
+	}
+}
+
 #include "vector_dist_cell_list_tests.hpp"
 #include "vector_dist_NN_tests.hpp"
 
-- 
GitLab