From 03378f4dc59bfda145fdac64610b1bbfb95924e5 Mon Sep 17 00:00:00 2001
From: Pietro Incardona <incardon@mpi-cbg.de>
Date: Tue, 26 Dec 2017 17:05:40 +0100
Subject: [PATCH] Verlet-list with smaller memory

---
 CHANGELOG.md                               |   7 +
 openfpm_data                               |   2 +-
 openfpm_numerics                           |   2 +-
 src/Vector/vector_dist.hpp                 | 152 +++++++++++------
 src/Vector/vector_dist_NN_tests.hpp        |  34 ++--
 src/Vector/vector_dist_cell_list_tests.hpp | 188 +++++++++++++--------
 src/Vector/vector_dist_comm.hpp            |   6 +-
 7 files changed, 251 insertions(+), 140 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 62f1c56e6..63163df78 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,9 @@ All notable changes to this project will be documented in this file.
 ### Added
 
 - Interface for Multi-vector dynamic load balancing
+- Added Verlet List with balanced Memory and wise memory form
+- Increaded performance for grid ghost get
+- Introduced forms to increase the performance of the grid iterator in case of stencil code (see example 5_GrayScott)
 
 ### Fixed
 
@@ -13,6 +16,10 @@ All notable changes to this project will be documented in this file.
 - 2D Fixing IO in binary for vector
 - 1D Fixing grid writer in ASCII mode
 
+### Changed
+
+- VerletList<3, double, FAST, shift<3, double> > is now VerletList<3, double, Mem_fast<>, shift<3, double> >
+
 ## [1.0.0] 13 September 2017
 
 ### Added
diff --git a/openfpm_data b/openfpm_data
index c0ea16143..fe9c2ec10 160000
--- a/openfpm_data
+++ b/openfpm_data
@@ -1 +1 @@
-Subproject commit c0ea161438f849f70cab9c8763012b515613ac83
+Subproject commit fe9c2ec101e72556238a6749c8e677be2d3d901a
diff --git a/openfpm_numerics b/openfpm_numerics
index 7b9aff6de..9910a6ed2 160000
--- a/openfpm_numerics
+++ b/openfpm_numerics
@@ -1 +1 @@
-Subproject commit 7b9aff6deec78f5db1be1035a7c253927ea477bf
+Subproject commit 9910a6ed26129214a85cab8b74d40caf900827e9
diff --git a/src/Vector/vector_dist.hpp b/src/Vector/vector_dist.hpp
index f2517621c..283d2f53a 100644
--- a/src/Vector/vector_dist.hpp
+++ b/src/Vector/vector_dist.hpp
@@ -123,13 +123,27 @@ struct gcl<dim,St,CellL,Vector,GCL_SYMMETRIC>
 };
 
 #define CELL_MEMFAST(dim,St) CellList_gen<dim, St, Process_keys_lin, Mem_fast<>, shift<dim, St> >
-#define CELL_MEMBAL(dim,St) CellList_gen<dim, St, Process_keys_lin, Mem_bal, shift<dim, St> >
-#define CELL_MEMMW(dim,St) CellList_gen<dim, St, Process_keys_lin, Mem_mw, shift<dim, St> >
+#define CELL_MEMBAL(dim,St) CellList_gen<dim, St, Process_keys_lin, Mem_bal<>, shift<dim, St> >
+#define CELL_MEMMW(dim,St) CellList_gen<dim, St, Process_keys_lin, Mem_mw<>, shift<dim, St> >
 
 #define CELL_MEMFAST_HILB(dim,St) CellList_gen<dim, St, Process_keys_hilb, Mem_fast<>, shift<dim, St> >
-#define CELL_MEMBAL_HILB(dim,St) CellList_gen<dim, St, Process_keys_hilb, Mem_bal, shift<dim, St> >
-#define CELL_MEMMW_HILB(dim,St) CellList_gen<dim, St, Process_keys_hilb, Mem_mw, shift<dim, St> >
+#define CELL_MEMBAL_HILB(dim,St) CellList_gen<dim, St, Process_keys_hilb, Mem_bal<>, shift<dim, St> >
+#define CELL_MEMMW_HILB(dim,St) CellList_gen<dim, St, Process_keys_hilb, Mem_mw<>, shift<dim, St> >
 
+#define VERLET_MEMFAST(dim,St) VerletList<dim,St,Mem_fast<>,shift<dim,St> >
+#define VERLET_MEMBAL(dim,St)  VerletList<dim,St,Mem_bal<>,shift<dim,St> >
+#define VERLET_MEMMW(dim,St)   VerletList<dim,St,Mem_mw<>,shift<dim,St> >
+
+#define VERLET_MEMFAST_INT(dim,St) VerletList<dim,St,Mem_fast<unsigned int>,shift<dim,St> >
+#define VERLET_MEMBAL_INT(dim,St)  VerletList<dim,St,Mem_bal<unsigned int>,shift<dim,St> >
+#define VERLET_MEMMW_INT(dim,St)   VerletList<dim,St,Mem_mw<unsigned int>,shift<dim,St> >
+
+enum reorder_opt
+{
+	NO_REORDER = 0,
+	HILBERT = 1,
+	LINEAR = 2
+};
 
 /*! \brief Distributed vector
  *
@@ -260,6 +274,51 @@ private:
 		}
 	}
 
+	/*! \brief Reorder based on hilbert space filling curve
+	 *
+	 * \param v_pos_dest reordered vector of position
+	 * \param v_prp_dest reordered vector of properties
+	 * \param m order of the space filling curve
+	 * \param cell_list cell-list
+	 *
+	 */
+	template<typename CellL, typename sfc_it>
+	void reorder_sfc(openfpm::vector<Point<dim,St>> & v_pos_dest,
+						 openfpm::vector<prop> & v_prp_dest,
+						 sfc_it & h_it,
+						 CellL & cell_list)
+	{
+		v_pos_dest.resize(v_pos.size());
+		v_prp_dest.resize(v_prp.size());
+
+		//Index for v_pos_dest
+		size_t count = 0;
+
+		grid_key_dx<dim> ksum;
+
+		for (size_t i = 0; i < dim ; i++)
+		{ksum.set_d(i,cell_list.getPadding(i));}
+
+		while (h_it.isNext())
+		{
+		  auto key = h_it.get();
+		  key += ksum;
+
+		  size_t lin = cell_list.getGrid().LinId(key);
+
+		  // for each particle in the Cell "lin"
+		  for (size_t i = 0; i < cell_list.getNelements(lin); i++)
+		  {
+			  //reorder
+			  auto v = cell_list.get(lin,i);
+			  v_pos_dest.get(count) = v_pos.get(v);
+			  v_prp_dest.get(count) = v_prp.get(v);
+
+			  count++;
+		  }
+		  ++h_it;
+		}
+	}
 
 public:
 
@@ -1159,13 +1218,14 @@ public:
 	 * \return the verlet list
 	 *
 	 */
-	VerletList<dim,St,Mem_fast<>,shift<dim,St> > getVerletSym(St r_cut)
+	template <typename VerletL = VerletList<dim,St,Mem_fast<>,shift<dim,St> >>
+	VerletL getVerletSym(St r_cut)
 	{
 #ifdef SE_CLASS3
 		se3.getNN();
 #endif
 
-		VerletList<dim,St,Mem_fast<>,shift<dim,St>> ver;
+		VerletL ver;
 
 		// Processor bounding box
 		Box<dim, St> pbox = getDecomposition().getProcessorBounds();
@@ -1184,7 +1244,8 @@ public:
 	 * \return the verlet list
 	 *
 	 */
-	VerletList<dim,St,Mem_fast<>,shift<dim,St> > getVerletCrs(St r_cut)
+	template <typename VerletL = VerletList<dim,St,Mem_fast<>,shift<dim,St> >>
+	VerletL getVerletCrs(St r_cut)
 	{
 #ifdef SE_CLASS1
 		if (!(opt & BIND_DEC_TO_GHOST))
@@ -1198,7 +1259,7 @@ public:
 		se3.getNN();
 #endif
 
-		VerletList<dim,St,Mem_fast<>,shift<dim,St>> ver;
+		VerletL ver;
 
 		// Processor bounding box
 		Box<dim, St> pbox = getDecomposition().getProcessorBounds();
@@ -1236,13 +1297,14 @@ public:
 	 * \return a VerletList object
 	 *
 	 */
-	VerletList<dim,St,Mem_fast<>,shift<dim,St> > getVerlet(St r_cut)
+	template <typename VerletL = VerletList<dim,St,Mem_fast<>,shift<dim,St> >>
+	VerletL getVerlet(St r_cut)
 	{
 #ifdef SE_CLASS3
 		se3.getNN();
 #endif
 
-		VerletList<dim,St,Mem_fast<>,shift<dim,St>> ver;
+		VerletL ver;
 
 		// get the processor bounding box
 		Box<dim, St> bt = getDecomposition().getProcessorBounds();
@@ -1269,7 +1331,7 @@ public:
 	 * \param opt option like VL_SYMMETRIC and VL_NON_SYMMETRIC or VL_CRS_SYMMETRIC
 	 *
 	 */
-	void updateVerlet(VerletList<dim,St,Mem_fast<>,shift<dim,St> > & ver, St r_cut, size_t opt = VL_NON_SYMMETRIC)
+	template<typename Mem_type> void updateVerlet(VerletList<dim,St,Mem_type,shift<dim,St> > & ver, St r_cut, size_t opt = VL_NON_SYMMETRIC)
 	{
 #ifdef SE_CLASS3
 		se3.getNN();
@@ -1287,9 +1349,9 @@ public:
 				ver.update(getDecomposition().getDomain(),r_cut,v_pos,g_m, opt);
 			else
 			{
-				VerletList<dim,St,Mem_fast<>,shift<dim,St> > ver_tmp;
+				VerletList<dim,St,Mem_type,shift<dim,St> > ver_tmp;
 
-				ver_tmp = getVerlet(r_cut);
+				ver_tmp = getVerlet<VerletList<dim,St,Mem_type,shift<dim,St> >>(r_cut);
 				ver.swap(ver);
 			}
 		}
@@ -1328,9 +1390,9 @@ public:
 			}
 			else
 			{
-				VerletList<dim,St,Mem_fast<>,shift<dim,St> > ver_tmp;
+				VerletList<dim,St,Mem_type,shift<dim,St> > ver_tmp;
 
-				ver_tmp = getVerletCrs(r_cut);
+				ver_tmp = getVerletCrs<VerletList<dim,St,Mem_type,shift<dim,St> >>(r_cut);
 				ver.swap(ver_tmp);
 			}
 		}
@@ -1346,9 +1408,9 @@ public:
 				ver.update(getDecomposition().getDomain(),r_cut,v_pos,g_m, opt);
 			else
 			{
-				VerletList<dim,St,Mem_fast<>,shift<dim,St> > ver_tmp;
+				VerletList<dim,St,Mem_type,shift<dim,St> > ver_tmp;
 
-				ver_tmp = getVerlet(r_cut);
+				ver_tmp = getVerlet<VerletList<dim,St,Mem_type,shift<dim,St> >>(r_cut);
 				ver.swap(ver_tmp);
 			}
 		}
@@ -1362,9 +1424,10 @@ public:
 	 * \param m an order of a hilbert curve
 	 *
 	 */
-	template<typename CellL=CellList_gen<dim,St,Process_keys_lin,Mem_fast<>,shift<dim,St> > > void reorder (int32_t m)
+	template<typename CellL=CellList_gen<dim,St,Process_keys_lin,Mem_bal<>,shift<dim,St> > >
+	void reorder (int32_t m, reorder_opt opt = reorder_opt::HILBERT)
 	{
-		reorder(m,getDecomposition().getGhost());
+		reorder<CellL>(m,getDecomposition().getGhost(),opt);
 	}
 
 
@@ -1380,7 +1443,8 @@ public:
 	 * \param enlarge In case of padding particles the cell list must be enlarged, like a ghost this parameter say how much must be enlarged
 	 *
 	 */
-	template<typename CellL=CellList_gen<dim,St,Process_keys_lin,Mem_fast<>,shift<dim,St> > > void reorder(int32_t m, const Ghost<dim,St> & enlarge)
+	template<typename CellL=CellList_gen<dim,St,Process_keys_lin,Mem_bal<>,shift<dim,St> > >
+	void reorder(int32_t m, const Ghost<dim,St> & enlarge, reorder_opt opt = reorder_opt::HILBERT)
 	{
 		// reset the ghost part
 		v_pos.resize(g_m);
@@ -1428,38 +1492,24 @@ public:
 		openfpm::vector<Point<dim,St>> v_pos_dest;
 		openfpm::vector<prop> v_prp_dest;
 
-		v_pos_dest.resize(v_pos.size());
-		v_prp_dest.resize(v_prp.size());
-
-		//hilberts curve iterator
-		grid_key_dx_iterator_hilbert<dim> h_it(m);
-
-		//Index for v_pos_dest
-		size_t count = 0;
-
-		grid_key_dx<dim> ksum;
-
-		for (size_t i = 0; i < dim ; i++)
-			ksum.set_d(i,cell_list.getPadding(i));
-
-		while (h_it.isNext())
+		if (opt == reorder_opt::HILBERT)
 		{
-		  auto key = h_it.get();
-		  key += ksum;
+			grid_key_dx_iterator_hilbert<dim> h_it(m);
 
-		  size_t lin = cell_list.getGrid().LinId(key);
-
-		  // for each particle in the Cell "lin"
-		  for (size_t i = 0; i < cell_list.getNelements(lin); i++)
-		  {
-			  //reorder
-			  auto v = cell_list.get(lin,i);
-			  v_pos_dest.get(count) = v_pos.get(v);
-			  v_prp_dest.get(count) = v_prp.get(v);
+			reorder_sfc<CellL,grid_key_dx_iterator_hilbert<dim>>(v_pos_dest,v_prp_dest,h_it,cell_list);
+		}
+		else if (reorder_opt::LINEAR)
+		{
+			grid_sm<dim,void> gs(div);
+			grid_key_dx_iterator<dim> h_it(gs);
 
-			  count++;
-		  }
-		  ++h_it;
+			reorder_sfc<CellL,grid_key_dx_iterator<dim>>(v_pos_dest,v_prp_dest,h_it,cell_list);
+		}
+		else
+		{
+			// We do nothing, we second swap nullify the first
+			v_pos.swap(v_pos_dest);
+			v_prp.swap(v_prp_dest);
 		}
 
 		v_pos.swap(v_pos_dest);
@@ -2166,7 +2216,7 @@ public:
 	 * \return Particle iterator
 	 *
 	 */
-	template<typename vrl> openfpm::vector_key_iterator_seq<typename vrl::local_index_t> getParticleIteratorCRS(vrl & NN)
+	template<typename vrl> openfpm::vector_key_iterator_seq<typename vrl::Mem_type_type::loc_index> getParticleIteratorCRS(vrl & NN)
 	{
 #ifdef SE_CLASS1
 		if (!(opt & BIND_DEC_TO_GHOST))
@@ -2177,7 +2227,7 @@ public:
 #endif
 
 		// First we check that
-		return openfpm::vector_key_iterator_seq<typename vrl::local_index_t>(NN.getParticleSeq());
+		return openfpm::vector_key_iterator_seq<typename vrl::Mem_type_type::loc_index>(NN.getParticleSeq());
 	}
 
 	/*! \brief Return from which cell we have to start in case of CRS interation
diff --git a/src/Vector/vector_dist_NN_tests.hpp b/src/Vector/vector_dist_NN_tests.hpp
index 0b38a0e7a..fcf6f3b75 100644
--- a/src/Vector/vector_dist_NN_tests.hpp
+++ b/src/Vector/vector_dist_NN_tests.hpp
@@ -8,8 +8,8 @@
 #ifndef SRC_VECTOR_VECTOR_DIST_NN_TESTS_HPP_
 #define SRC_VECTOR_VECTOR_DIST_NN_TESTS_HPP_
 
-
-BOOST_AUTO_TEST_CASE( vector_dist_full_NN )
+template<typename VerletList>
+void test_full_nn(long int k)
 {
 	Vcluster & v_cl = create_vcluster();
 
@@ -22,12 +22,6 @@ BOOST_AUTO_TEST_CASE( vector_dist_full_NN )
     std::default_random_engine eg;
     std::uniform_real_distribution<float> ud(0.0f, 1.0f);
 
-#ifdef TEST_COVERAGE_MODE
-    long int k = 50 * v_cl.getProcessingUnits();
-#else
-    long int k = 750 * v_cl.getProcessingUnits();
-#endif
-
 	long int big_step = k / 4;
 	big_step = (big_step == 0)?1:big_step;
 
@@ -134,14 +128,14 @@ BOOST_AUTO_TEST_CASE( vector_dist_full_NN )
 
 		///////////////////////////////////
 
-		auto NNv = vd.getVerlet(r_cut*1.0001);
+		auto NNv = vd.template getVerlet<VerletList>(r_cut*1.0001);
 
 		it = vd.getDomainIterator();
 
 		while (it.isNext())
 		{
 			Point<3,float> xp = vd.getPos(it.get());
-			auto Np = NNv.getNNIterator<NO_CHECK>(it.get().getKey());
+			auto Np = NNv.template getNNIterator<NO_CHECK>(it.get().getKey());
 
 			list_idx2.get(it.get().getKey()).clear();
 
@@ -185,7 +179,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_full_NN )
 		while (it.isNext())
 		{
 			Point<3,float> xp = vd.getPos(it.get());
-			auto Np = NNv.getNNIterator<NO_CHECK>(it.get().getKey());
+			auto Np = NNv.template getNNIterator<NO_CHECK>(it.get().getKey());
 
 			list_idx2.get(it.get().getKey()).clear();
 
@@ -221,6 +215,24 @@ BOOST_AUTO_TEST_CASE( vector_dist_full_NN )
 	}
 }
 
+BOOST_AUTO_TEST_CASE( vector_dist_full_NN )
+{
+	auto & v_cl = create_vcluster();
+
+#ifdef TEST_COVERAGE_MODE
+    long int k = 50 * v_cl.getProcessingUnits();
+#else
+    long int k = 750 * v_cl.getProcessingUnits();
+#endif
+
+	test_full_nn<VERLET_MEMFAST(3,float)>(k);
+
+	k /= 2;
+	test_full_nn<VERLET_MEMBAL(3,float)>(k);
+	k /= 2;
+	test_full_nn<VERLET_MEMMW(3,float)>(k);
+}
+
 BOOST_AUTO_TEST_CASE( vector_dist_particle_iteration )
 {
 	Vcluster & v_cl = create_vcluster();
diff --git a/src/Vector/vector_dist_cell_list_tests.hpp b/src/Vector/vector_dist_cell_list_tests.hpp
index 389660c11..1d643803e 100644
--- a/src/Vector/vector_dist_cell_list_tests.hpp
+++ b/src/Vector/vector_dist_cell_list_tests.hpp
@@ -13,7 +13,7 @@
 
 ///////////////////////// test hilb ///////////////////////////////
 
-BOOST_AUTO_TEST_CASE( vector_dist_reorder_2d_test )
+void test_reorder_sfc(reorder_opt opt)
 {
 	Vcluster & v_cl = create_vcluster();
 
@@ -35,12 +35,12 @@ BOOST_AUTO_TEST_CASE( vector_dist_reorder_2d_test )
 	long int big_step = k / 4;
 	big_step = (big_step == 0)?1:big_step;
 
-	print_test_v( "Testing 2D vector with hilbert curve reordering k<=",k);
+	print_test_v( "Testing 2D vector with sfc curve reordering k<=",k);
 
 	// 2D test
 	for ( ; k >= 2 ; k-= decrement(k,big_step) )
 	{
-		BOOST_TEST_CHECKPOINT( "Testing 2D vector with hilbert curve reordering k=" << k );
+		BOOST_TEST_CHECKPOINT( "Testing 2D vector with sfc curve reordering k=" << k );
 
 		Box<2,float> box({0.0,0.0},{1.0,1.0});
 
@@ -73,7 +73,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_reorder_2d_test )
 		int32_t m = 6;
 
 		//Reorder a vector
-		vd.reorder(m);
+		vd.reorder(m,opt);
 
 		// Create second cell list
 		auto NN2 = vd.getCellList(0.01,true);
@@ -89,6 +89,12 @@ BOOST_AUTO_TEST_CASE( vector_dist_reorder_2d_test )
 	}
 }
 
+BOOST_AUTO_TEST_CASE( vector_dist_reorder_2d_test )
+{
+	test_reorder_sfc(reorder_opt::HILBERT);
+	test_reorder_sfc(reorder_opt::LINEAR);
+}
+
 BOOST_AUTO_TEST_CASE( vector_dist_cl_random_vs_hilb_forces_test )
 {
 	Vcluster & v_cl = create_vcluster();
@@ -776,7 +782,8 @@ BOOST_AUTO_TEST_CASE( vector_dist_symmetric_crs_cell_list )
 	BOOST_REQUIRE_EQUAL(ret,true);
 }
 
-BOOST_AUTO_TEST_CASE( vector_dist_symmetric_verlet_list )
+template<typename VerletList>
+void test_vd_symmetric_verlet_list()
 {
 	Vcluster & v_cl = create_vcluster();
 
@@ -839,9 +846,9 @@ BOOST_AUTO_TEST_CASE( vector_dist_symmetric_verlet_list )
 
 		// Fill some properties randomly
 
-		vd.getPropWrite<0>(key) = 0;
-		vd.getPropWrite<1>(key) = 0;
-		vd.getPropWrite<2>(key) = key.getKey() + start;
+		vd.template getPropWrite<0>(key) = 0;
+		vd.template getPropWrite<1>(key) = 0;
+		vd.template getPropWrite<2>(key) = key.getKey() + start;
 
 		++it;
 	}
@@ -849,9 +856,9 @@ BOOST_AUTO_TEST_CASE( vector_dist_symmetric_verlet_list )
 	vd.map();
 
 	// sync the ghost
-	vd.ghost_get<0,2>();
+	vd.template ghost_get<0,2>();
 
-	auto NN = vd.getVerlet(r_cut);
+	auto NN = vd.template getVerlet<VerletList>(r_cut);
 	auto p_it = vd.getDomainIterator();
 
 	while (p_it.isNext())
@@ -883,10 +890,10 @@ BOOST_AUTO_TEST_CASE( vector_dist_symmetric_verlet_list )
 
 			if (distance < r_cut )
 			{
-				vd.getPropWrite<0>(p)++;
-				vd.getPropWrite<3>(p).add();
-				vd.getPropWrite<3>(p).last().xq = xq;
-				vd.getPropWrite<3>(p).last().id = vd.getPropRead<2>(q);
+				vd.template getPropWrite<0>(p)++;
+				vd.template getPropWrite<3>(p).add();
+				vd.template getPropWrite<3>(p).last().xq = xq;
+				vd.template getPropWrite<3>(p).last().id = vd.template getPropRead<2>(q);
 			}
 
 			++Np;
@@ -897,7 +904,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_symmetric_verlet_list )
 
 	// We now try symmetric  Cell-list
 
-	auto NN2 = vd.getVerletSym(r_cut);
+	auto NN2 = vd.template getVerletSym<VerletList>(r_cut);
 
 	auto p_it2 = vd.getDomainIterator();
 
@@ -907,7 +914,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_symmetric_verlet_list )
 
 		Point<3,float> xp = vd.getPosRead(p);
 
-		auto Np = NN2.getNNIterator<NO_CHECK>(p.getKey());
+		auto Np = NN2.template getNNIterator<NO_CHECK>(p.getKey());
 
 		while (Np.isNext())
 		{
@@ -930,16 +937,16 @@ BOOST_AUTO_TEST_CASE( vector_dist_symmetric_verlet_list )
 
 			if (distance < r_cut )
 			{
-				vd.getPropWrite<1>(p)++;
-				vd.getPropWrite<1>(q)++;
+				vd.template getPropWrite<1>(p)++;
+				vd.template getPropWrite<1>(q)++;
 
-				vd.getPropWrite<4>(p).add();
-				vd.getPropWrite<4>(q).add();
+				vd.template getPropWrite<4>(p).add();
+				vd.template getPropWrite<4>(q).add();
 
-				vd.getPropWrite<4>(p).last().xq = xq;
-				vd.getPropWrite<4>(q).last().xq = xp;
-				vd.getPropWrite<4>(p).last().id = vd.getPropRead<2>(q);
-				vd.getPropWrite<4>(q).last().id = vd.getPropRead<2>(p);
+				vd.template getPropWrite<4>(p).last().xq = xq;
+				vd.template getPropWrite<4>(q).last().xq = xp;
+				vd.template getPropWrite<4>(p).last().id = vd.template getPropRead<2>(q);
+				vd.template getPropWrite<4>(q).last().id = vd.template getPropRead<2>(p);
 			}
 
 			++Np;
@@ -948,8 +955,8 @@ BOOST_AUTO_TEST_CASE( vector_dist_symmetric_verlet_list )
 		++p_it2;
 	}
 
-	vd.ghost_put<add_,1>();
-	vd.ghost_put<merge_,4>();
+	vd.template ghost_put<add_,1>();
+	vd.template ghost_put<merge_,4>();
 
 	auto p_it3 = vd.getDomainIterator();
 
@@ -958,15 +965,15 @@ BOOST_AUTO_TEST_CASE( vector_dist_symmetric_verlet_list )
 	{
 		auto p = p_it3.get();
 
-		ret &= vd.getPropRead<1>(p) == vd.getPropRead<0>(p);
+		ret &= vd.template getPropRead<1>(p) == vd.template getPropRead<0>(p);
 
-		vd.getPropWrite<3>(p).sort();
-		vd.getPropWrite<4>(p).sort();
+		vd.template getPropWrite<3>(p).sort();
+		vd.template getPropWrite<4>(p).sort();
 
-		ret &= vd.getPropRead<3>(p).size() == vd.getPropRead<4>(p).size();
+		ret &= vd.template getPropRead<3>(p).size() == vd.template getPropRead<4>(p).size();
 
-		for (size_t i = 0 ; i < vd.getPropRead<3>(p).size() ; i++)
-			ret &= vd.getPropRead<3>(p).get(i).id == vd.getPropRead<4>(p).get(i).id;
+		for (size_t i = 0 ; i < vd.template getPropRead<3>(p).size() ; i++)
+			ret &= vd.template getPropRead<3>(p).get(i).id == vd.template getPropRead<4>(p).get(i).id;
 
 		if (ret == false)
 			break;
@@ -977,7 +984,15 @@ BOOST_AUTO_TEST_CASE( vector_dist_symmetric_verlet_list )
 	BOOST_REQUIRE_EQUAL(ret,true);
 }
 
-BOOST_AUTO_TEST_CASE( vector_dist_symmetric_verlet_list_no_bottom )
+BOOST_AUTO_TEST_CASE( vector_dist_symmetric_verlet_list )
+{
+	test_vd_symmetric_verlet_list<VERLET_MEMFAST(3,float)>();
+	test_vd_symmetric_verlet_list<VERLET_MEMBAL(3,float)>();
+	test_vd_symmetric_verlet_list<VERLET_MEMMW(3,float)>();
+}
+
+template<typename VerletList>
+void vector_sym_verlet_list_nb()
 {
 	Vcluster & v_cl = create_vcluster();
 
@@ -1051,13 +1066,13 @@ BOOST_AUTO_TEST_CASE( vector_dist_symmetric_verlet_list_no_bottom )
 
 			// Fill some properties randomly
 
-			vd.getPropWrite<0>(key) = 0;
-			vd.getPropWrite<1>(key) = 0;
-			vd.getPropWrite<2>(key) = key.getKey() + start;
+			vd.template getPropWrite<0>(key) = 0;
+			vd.template getPropWrite<1>(key) = 0;
+			vd.template getPropWrite<2>(key) = key.getKey() + start;
 
-			vd2.getPropWrite<0>(key) = 0;
-			vd2.getPropWrite<1>(key) = 0;
-			vd2.getPropWrite<2>(key) = key.getKey() + start;
+			vd2.template getPropWrite<0>(key) = 0;
+			vd2.template getPropWrite<1>(key) = 0;
+			vd2.template getPropWrite<2>(key) = key.getKey() + start;
 
 			++it;
 		}
@@ -1066,10 +1081,10 @@ BOOST_AUTO_TEST_CASE( vector_dist_symmetric_verlet_list_no_bottom )
 		vd2.map();
 
 		// sync the ghost
-		vd.ghost_get<0,2>();
-		vd2.ghost_get<0,2>();
+		vd.template ghost_get<0,2>();
+		vd2.template ghost_get<0,2>();
 
-		auto NN = vd.getVerlet(r_cut);
+		auto NN = vd.template getVerlet<VerletList>(r_cut);
 		auto p_it = vd.getDomainIterator();
 
 		while (p_it.isNext())
@@ -1101,10 +1116,10 @@ BOOST_AUTO_TEST_CASE( vector_dist_symmetric_verlet_list_no_bottom )
 
 				if (distance < r_cut )
 				{
-					vd.getPropWrite<0>(p)++;
-					vd.getPropWrite<3>(p).add();
-					vd.getPropWrite<3>(p).last().xq = xq;
-					vd.getPropWrite<3>(p).last().id = vd.getPropRead<2>(q);
+					vd.template getPropWrite<0>(p)++;
+					vd.template getPropWrite<3>(p).add();
+					vd.template getPropWrite<3>(p).last().xq = xq;
+					vd.template getPropWrite<3>(p).last().id = vd.template getPropRead<2>(q);
 				}
 
 				++Np;
@@ -1115,7 +1130,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_symmetric_verlet_list_no_bottom )
 
 		// We now try symmetric  Cell-list
 
-		auto NN2 = vd2.getVerletSym(r_cut);
+		auto NN2 = vd2.template getVerletSym<VerletList>(r_cut);
 
 		auto p_it2 = vd2.getDomainIterator();
 
@@ -1125,7 +1140,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_symmetric_verlet_list_no_bottom )
 
 			Point<3,float> xp = vd2.getPosRead(p);
 
-			auto Np = NN2.getNNIterator<NO_CHECK>(p.getKey());
+			auto Np = NN2.template getNNIterator<NO_CHECK>(p.getKey());
 
 			while (Np.isNext())
 			{
@@ -1148,16 +1163,16 @@ BOOST_AUTO_TEST_CASE( vector_dist_symmetric_verlet_list_no_bottom )
 
 				if (distance < r_cut )
 				{
-					vd2.getPropWrite<1>(p)++;
-					vd2.getPropWrite<1>(q)++;
+					vd2.template getPropWrite<1>(p)++;
+					vd2.template getPropWrite<1>(q)++;
 
-					vd2.getPropWrite<4>(p).add();
-					vd2.getPropWrite<4>(q).add();
+					vd2.template getPropWrite<4>(p).add();
+					vd2.template getPropWrite<4>(q).add();
 
-					vd2.getPropWrite<4>(p).last().xq = xq;
-					vd2.getPropWrite<4>(q).last().xq = xp;
-					vd2.getPropWrite<4>(p).last().id = vd2.getPropRead<2>(q);
-					vd2.getPropWrite<4>(q).last().id = vd2.getPropRead<2>(p);
+					vd2.template getPropWrite<4>(p).last().xq = xq;
+					vd2.template getPropWrite<4>(q).last().xq = xp;
+					vd2.template getPropWrite<4>(p).last().id = vd2.template getPropRead<2>(q);
+					vd2.template getPropWrite<4>(q).last().id = vd2.template getPropRead<2>(p);
 				}
 
 				++Np;
@@ -1167,8 +1182,8 @@ BOOST_AUTO_TEST_CASE( vector_dist_symmetric_verlet_list_no_bottom )
 			++p_it2;
 		}
 
-		vd2.ghost_put<add_,1>();
-		vd2.ghost_put<merge_,4>();
+		vd2.template ghost_put<add_,1>();
+		vd2.template ghost_put<merge_,4>();
 
 #ifdef SE_CLASS3
 		vd2.getDomainIterator();
@@ -1181,16 +1196,15 @@ BOOST_AUTO_TEST_CASE( vector_dist_symmetric_verlet_list_no_bottom )
 		{
 			auto p = p_it3.get();
 
-			ret &= vd2.getPropRead<1>(p) == vd.getPropRead<0>(p);
+			ret &= vd2.template getPropRead<1>(p) == vd.template getPropRead<0>(p);
 
+			vd.template getPropWrite<3>(p).sort();
+			vd2.template getPropWrite<4>(p).sort();
 
-			vd.getPropWrite<3>(p).sort();
-			vd2.getPropWrite<4>(p).sort();
+			ret &= vd.template getPropRead<3>(p).size() == vd2.template getPropRead<4>(p).size();
 
-			ret &= vd.getPropRead<3>(p).size() == vd2.getPropRead<4>(p).size();
-
-			for (size_t i = 0 ; i < vd.getPropRead<3>(p).size() ; i++)
-				ret &= vd.getPropRead<3>(p).get(i).id == vd2.getPropRead<4>(p).get(i).id;
+			for (size_t i = 0 ; i < vd.template getPropRead<3>(p).size() ; i++)
+				ret &= vd.template getPropRead<3>(p).get(i).id == vd2.template getPropRead<4>(p).get(i).id;
 
 			if (ret == false)
 				break;
@@ -1202,7 +1216,18 @@ BOOST_AUTO_TEST_CASE( vector_dist_symmetric_verlet_list_no_bottom )
 	}
 }
 
-template<typename part_prop> void test_crs_full(vector_dist<3,float, part_prop > & vd,
+BOOST_AUTO_TEST_CASE( vector_dist_symmetric_verlet_list_no_bottom )
+{
+	vector_sym_verlet_list_nb<VERLET_MEMFAST(3,float)>();
+	vector_sym_verlet_list_nb<VERLET_MEMBAL(3,float)>();
+	vector_sym_verlet_list_nb<VERLET_MEMMW(3,float)>();
+
+	vector_sym_verlet_list_nb<VERLET_MEMFAST_INT(3,float)>();
+	vector_sym_verlet_list_nb<VERLET_MEMBAL_INT(3,float)>();
+	vector_sym_verlet_list_nb<VERLET_MEMMW_INT(3,float)>();
+}
+
+template<typename VerletList, typename part_prop> void test_crs_full(vector_dist<3,float, part_prop > & vd,
 		                                        vector_dist<3,float, part_prop > & vd2,
 												std::default_random_engine & eg,
 												std::uniform_real_distribution<float> & ud,
@@ -1243,7 +1268,7 @@ template<typename part_prop> void test_crs_full(vector_dist<3,float, part_prop >
 	vd.template ghost_get<0,2>();
 	vd2.template ghost_get<0,2>();
 
-	auto NN = vd.getVerlet(r_cut);
+	auto NN = vd.template getVerlet<VerletList>(r_cut);
 	auto p_it = vd.getDomainIterator();
 
 	while (p_it.isNext())
@@ -1289,7 +1314,7 @@ template<typename part_prop> void test_crs_full(vector_dist<3,float, part_prop >
 
 	// We now try symmetric Verlet-list Crs scheme
 
-	auto NN2 = vd2.getVerletCrs(r_cut);
+	auto NN2 = vd2.template getVerletCrs<VerletList>(r_cut);
 
 	// Because iterating across particles in the CSR scheme require a Cell-list
 	auto p_it2 = vd2.getParticleIteratorCRS_Cell(NN2.getInternalCellList());
@@ -1378,7 +1403,7 @@ template<typename part_prop> void test_crs_full(vector_dist<3,float, part_prop >
 	BOOST_REQUIRE_EQUAL(ret,true);
 }
 
-
+template<typename VerletList>
 void test_csr_verlet_list()
 {
 	Vcluster & v_cl = create_vcluster();
@@ -1435,9 +1460,10 @@ void test_csr_verlet_list()
 	vector_dist<3,float, part_prop > vd2(k,box,bc,ghost2,BIND_DEC_TO_GHOST);
 	size_t start = vd.init_size_accum(k);
 
-	test_crs_full(vd,vd2,eg,ud,start,r_cut);
+	test_crs_full<VerletList>(vd,vd2,eg,ud,start,r_cut);
 }
 
+template<typename VerletList>
 void test_csr_verlet_list_override()
 {
 	Vcluster & v_cl = create_vcluster();
@@ -1508,20 +1534,25 @@ void test_csr_verlet_list_override()
 	vector_dist<3,float, part_prop > vd2(k,box,bc,ghost2,BIND_DEC_TO_GHOST,gdist2_d);
 	size_t start = vd.init_size_accum(k);
 
-	test_crs_full(vd,vd2,eg,ud,start,r_cut);
+	test_crs_full<VerletList>(vd,vd2,eg,ud,start,r_cut);
 }
 
 BOOST_AUTO_TEST_CASE( vector_dist_symmetric_crs_verlet_list )
 {
-	test_csr_verlet_list();
+	test_csr_verlet_list<VERLET_MEMFAST(3,float)>();
+	test_csr_verlet_list<VERLET_MEMBAL(3,float)>();
+	test_csr_verlet_list<VERLET_MEMMW(3,float)>();
 }
 
 BOOST_AUTO_TEST_CASE( vector_dist_symmetric_crs_verlet_list_dec_override )
 {
-	test_csr_verlet_list_override();
+	test_csr_verlet_list_override<VERLET_MEMFAST(3,float)>();
+	test_csr_verlet_list_override<VERLET_MEMBAL(3,float)>();
+	test_csr_verlet_list_override<VERLET_MEMMW(3,float)>();
 }
 
-BOOST_AUTO_TEST_CASE( vector_dist_symmetric_crs_verlet_list_partit )
+template <typename VerletList>
+void test_vd_symmetric_crs_verlet()
 {
 	Vcluster & v_cl = create_vcluster();
 
@@ -1590,7 +1621,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_symmetric_crs_verlet_list_partit )
 
 	// We now try symmetric Verlet-list Crs scheme
 
-	auto NN2 = vd.getVerletCrs(r_cut);
+	auto NN2 = vd.template getVerletCrs<VerletList>(r_cut);
 
 	// Because iterating across particles in the CSR scheme require a Cell-list
 	auto p_it2 = vd.getParticleIteratorCRS_Cell(NN2.getInternalCellList());
@@ -1613,6 +1644,13 @@ BOOST_AUTO_TEST_CASE( vector_dist_symmetric_crs_verlet_list_partit )
 	BOOST_REQUIRE_EQUAL(ret,true);
 }
 
+BOOST_AUTO_TEST_CASE( vector_dist_symmetric_crs_verlet_list_partit )
+{
+	test_vd_symmetric_crs_verlet<VERLET_MEMFAST(3,float)>();
+	test_vd_symmetric_crs_verlet<VERLET_MEMBAL(3,float)>();
+	test_vd_symmetric_crs_verlet<VERLET_MEMMW(3,float)>();
+}
+
 BOOST_AUTO_TEST_CASE( vector_dist_checking_unloaded_processors )
 {
 	Vcluster & v_cl = create_vcluster();
diff --git a/src/Vector/vector_dist_comm.hpp b/src/Vector/vector_dist_comm.hpp
index 5a666ca24..a9ae0a12b 100644
--- a/src/Vector/vector_dist_comm.hpp
+++ b/src/Vector/vector_dist_comm.hpp
@@ -1231,7 +1231,11 @@ public:
 	 * \param opt options
 	 *
 	 */
-	template<template<typename,typename> class op, int ... prp> void ghost_put_(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp, size_t & g_m, size_t opt)
+	template<template<typename,typename> class op, int ... prp>
+	void ghost_put_(openfpm::vector<Point<dim, St>> & v_pos,
+					openfpm::vector<prop> & v_prp,
+					size_t & g_m,
+					size_t opt)
 	{
 		// Sending property object
 		typedef object<typename object_creator<typename prop::type, prp...>::type> prp_object;
-- 
GitLab