From 2cbc4947bb12ddc9cbc37dbbfe1434f403c2c51a Mon Sep 17 00:00:00 2001
From: Pietro Incardona <incardon@mpi-cbg.de>
Date: Tue, 22 Sep 2015 17:45:20 +0200
Subject: [PATCH] General Fixing + improve vector test

---
 m4/ax_cuda.m4                           |   2 +-
 openfpm_data                            |   2 +-
 openfpm_vcluster                        |   2 +-
 src/Decomposition/CartDecomposition.hpp |  14 ++
 src/Decomposition/Decomposition.cpp     |   0
 src/Decomposition/ORB.hpp               |   4 +-
 src/Decomposition/common.hpp            |   3 +
 src/Decomposition/ie_ghost.hpp          |   3 -
 src/Grid/grid_dist_id_unit_test.hpp     |  10 +-
 src/Vector/vector_dist.hpp              | 100 +++++++-----
 src/Vector/vector_dist_key.hpp          |  12 ++
 src/Vector/vector_dist_unit_test.hpp    | 209 ++++++++++++++++++------
 12 files changed, 259 insertions(+), 102 deletions(-)
 delete mode 100644 src/Decomposition/Decomposition.cpp

diff --git a/m4/ax_cuda.m4 b/m4/ax_cuda.m4
index 3188f19c..17e2f068 100644
--- a/m4/ax_cuda.m4
+++ b/m4/ax_cuda.m4
@@ -60,7 +60,7 @@ AS_IF([test "x$NVCC_EXIST" = "xno"],[],[
                 [
                  AS_IF([ test -d $CUDA_PATH/lib64 ], [ CUDA_LIBS+="64" ], [])
                  # Be carefull the return code 0 mean true return code 1 mean false
-                 AS_IF([ command -v bumblebee >/dev/null ], [ CUDA_LIBS+=" -L/usr/lib64/nvidia-bumblebee/ "  ],
+                 AS_IF([ command -v bumblebeed >/dev/null ], [ CUDA_LIBS+=" -L/usr/lib64/nvidia-bumblebee/ "  ],
                                                              [
                                                                echo "bumblebee, NVIDIA optimus,  not found"
                                                              ])
diff --git a/openfpm_data b/openfpm_data
index 69b3ebb0..2e36a553 160000
--- a/openfpm_data
+++ b/openfpm_data
@@ -1 +1 @@
-Subproject commit 69b3ebb03b2d5ccffba0d4c9240b472b1ab9d7e2
+Subproject commit 2e36a553e379e6712b62abeffce479b005774c91
diff --git a/openfpm_vcluster b/openfpm_vcluster
index 8f4764f8..54fcd2ae 160000
--- a/openfpm_vcluster
+++ b/openfpm_vcluster
@@ -1 +1 @@
-Subproject commit 8f4764f868276bcbb2b8f88b3221b9a47b845d73
+Subproject commit 54fcd2aeb78744c3b4f085d8f58b86cb8e325e52
diff --git a/src/Decomposition/CartDecomposition.hpp b/src/Decomposition/CartDecomposition.hpp
index 5dae8049..7a95df61 100644
--- a/src/Decomposition/CartDecomposition.hpp
+++ b/src/Decomposition/CartDecomposition.hpp
@@ -544,6 +544,20 @@ p1[0]<-----+         +----> p2[0]
 		}
 	}
 
+	/*! \brief The default grid size
+	 *
+	 *  The default grid is always an isotropic grid that adapt with the number of processors,
+	 *  it define in how many cell it will be divided the space for a particular required minimum
+	 *  number of sub-domain
+	 *
+	 */
+	static size_t getDefaultGrid(size_t n_sub)
+	{
+		// Calculate the number of sub-sub-domain on
+		// each dimension
+		return openfpm::math::round_big_2(pow(n_sub,1.0/dim));
+	}
+
 	/*! \brief Given a point return in which processor the particle should go
 	 *
 	 * \return processorID
diff --git a/src/Decomposition/Decomposition.cpp b/src/Decomposition/Decomposition.cpp
deleted file mode 100644
index e69de29b..00000000
diff --git a/src/Decomposition/ORB.hpp b/src/Decomposition/ORB.hpp
index fa524ab3..5fe3a732 100644
--- a/src/Decomposition/ORB.hpp
+++ b/src/Decomposition/ORB.hpp
@@ -225,8 +225,8 @@ class ORB
 		local_cm<dir>(start - n_node);
 
 		// reduce the local cm and cm_cnt
-		v_cl.reduce(cm);
-		v_cl.reduce(cm_cnt);
+		v_cl.sum(cm);
+		v_cl.sum(cm_cnt);
 		v_cl.execute();
 
 		// set the CM for the previous leaf (they are not anymore leaf)
diff --git a/src/Decomposition/common.hpp b/src/Decomposition/common.hpp
index 403820e7..94f8b5f5 100644
--- a/src/Decomposition/common.hpp
+++ b/src/Decomposition/common.hpp
@@ -8,6 +8,9 @@
 #ifndef SRC_DECOMPOSITION_COMMON_HPP_
 #define SRC_DECOMPOSITION_COMMON_HPP_
 
+#define UNIQUE 1
+#define MULTIPLE 2
+
 #include "Vector/map_vector.hpp"
 
 
diff --git a/src/Decomposition/ie_ghost.hpp b/src/Decomposition/ie_ghost.hpp
index deced3bb..b92ddbc6 100644
--- a/src/Decomposition/ie_ghost.hpp
+++ b/src/Decomposition/ie_ghost.hpp
@@ -11,9 +11,6 @@
 #include "common.hpp"
 #include "nn_processor.hpp"
 
-#define UNIQUE 1
-#define MULTIPLE 2
-
 /*! \brief structure that store and compute the internal and external local ghost box
  *
  * \tparam dim is the dimensionality of the physical domain we are going to decompose.
diff --git a/src/Grid/grid_dist_id_unit_test.hpp b/src/Grid/grid_dist_id_unit_test.hpp
index fa8c5971..8f2ccbb7 100644
--- a/src/Grid/grid_dist_id_unit_test.hpp
+++ b/src/Grid/grid_dist_id_unit_test.hpp
@@ -67,7 +67,7 @@ BOOST_AUTO_TEST_CASE( grid_dist_id_domain_grid_unit_converter_test)
 			vol += g_box.getVolumeKey();
 		}
 
-		v_cl.reduce(vol);
+		v_cl.sum(vol);
 		v_cl.execute();
 
 		BOOST_REQUIRE_EQUAL(vol,sz[0]*sz[1]);
@@ -133,7 +133,7 @@ void Test2D(const Box<2,float> & domain, long int k)
 		Vcluster & vcl = g_dist.getVC();
 
 		// reduce
-		vcl.reduce(count);
+		vcl.sum(count);
 		vcl.execute();
 
 		// Check
@@ -235,7 +235,7 @@ void Test3D(const Box<3,float> & domain, long int k)
 		Vcluster & vcl = g_dist.getVC();
 
 		// reduce
-		vcl.reduce(count);
+		vcl.sum(count);
 		vcl.execute();
 
 		// Check
@@ -364,7 +364,7 @@ void Test2D_complex(const Box<2,float> & domain, long int k)
 		Vcluster & vcl = g_dist.getVC();
 
 		// reduce
-		vcl.reduce(count);
+		vcl.sum(count);
 		vcl.execute();
 
 		// Check
@@ -525,7 +525,7 @@ void Test3D_complex(const Box<3,float> & domain, long int k)
 		Vcluster & vcl = g_dist.getVC();
 
 		// reduce
-		vcl.reduce(count);
+		vcl.sum(count);
 		vcl.execute();
 
 		// Check
diff --git a/src/Vector/vector_dist.hpp b/src/Vector/vector_dist.hpp
index 6546f4a0..68111862 100644
--- a/src/Vector/vector_dist.hpp
+++ b/src/Vector/vector_dist.hpp
@@ -20,6 +20,9 @@
 #include "util/object_util.hpp"
 #include "memory/ExtPreAlloc.hpp"
 #include "CSVWriter.hpp"
+#include "Decomposition/common.hpp"
+
+#define V_SUB_UNIT_FACTOR 64
 
 #define NO_ID false
 #define ID true
@@ -38,10 +41,14 @@
 #define WITH_GHOST 2
 
 /*! \brief Distributed vector
+ *
+ * \tparam dim Dimensionality of the space where the object live
+ * \tparam St type of space
+ * \tparam prop properties the object store
  *
  */
 
-template<typename point, typename prop, typename Box, typename Decomposition , typename Memory=HeapMemory, bool with_id=false>
+template<unsigned int dim, typename St, typename prop, typename Decomposition , typename Memory=HeapMemory, bool with_id=false>
 class vector_dist
 {
 private:
@@ -56,7 +63,7 @@ private:
 	Decomposition dec;
 
 	// Particle position vector for each sub-domain the last one is the unassigned particles vector
-	Vcluster_object_array<openfpm::vector<point>> v_pos;
+	Vcluster_object_array<openfpm::vector<Point<dim,St>>> v_pos;
 
 	// Particle properties vector for each sub-domain the last one is the unassigned particles vector
 	Vcluster_object_array<openfpm::vector<prop>> v_prp;
@@ -65,7 +72,7 @@ private:
 	Vcluster & v_cl;
 
 	// Geometrical cell list
-	CellList<point::dims,typename point::coord_type,FAST> geo_cell;
+	CellList<dim,St,FAST> geo_cell;
 
 	// Label particles
 
@@ -77,55 +84,72 @@ public:
 	 * \param Global number of elements
 	 *
 	 */
-	vector_dist(size_t np, Box box, Ghost<point::dims,typename point::coord_type> g = Ghost<point::dims,typename point::coord_type>())
-	:dec(Decomposition(*global_v_cluster)),v_cl(*global_v_cluster)
+	vector_dist(size_t np, Box<dim,St> box, Ghost<dim,St> g = Ghost<dim,St>())
+	:dec(*global_v_cluster),v_cl(*global_v_cluster)
 	{
 		// Allocate unassigned particles vectors
-		v_pos = v_cl.template allocate<openfpm::vector<point>>(1);
+		v_pos = v_cl.template allocate<openfpm::vector<Point<dim,St>>>(1);
 		v_prp = v_cl.template allocate<openfpm::vector<prop>>(1);
 
 		// convert to a local number of elements
-		np /= v_cl.getProcessingUnits();
+		size_t p_np = np / v_cl.getProcessingUnits();
+
+		// Get non divisible part
+		size_t r = np % v_cl.getProcessingUnits();
+
+		// Distribute the remain particles
+		if (v_cl.getProcessUnitID() < r)
+			p_np++;
 
 		// resize the position vector
-		v_pos.get(0).resize(np);
+		v_pos.get(0).resize(p_np);
 
 		// resize the properties vector
-		v_prp.get(0).resize(np);
+		v_prp.get(0).resize(p_np);
 
 		// Create a valid decomposition of the space
 		// Get the number of processor and calculate the number of sub-domain
 		// for decomposition
 		size_t n_proc = v_cl.getProcessingUnits();
-		size_t n_sub = n_proc * SUB_UNIT_FACTOR;
+		size_t n_sub = n_proc * getDefaultNsubsub();
 
 		// Calculate the maximum number (before merging) of sub-domain on
 		// each dimension
-		size_t div[point::dims];
-		for (size_t i = 0 ; i < point::dims ; i++)
-		{div[i] = openfpm::math::round_big_2(pow(n_sub,1.0/point::dims));}
+		size_t div[dim];
+		for (size_t i = 0 ; i < dim ; i++)
+		{div[i] = openfpm::math::round_big_2(pow(n_sub,1.0/dim));}
 
 		// Create the sub-domains
 		dec.setParameters(div,box,g);
 
 		// Get the bounding box containing the processor domain
-		const ::Box<point::dims,typename point::coord_type> & bbound = dec.getProcessorBounds();
+		const ::Box<dim,St> & bbound = dec.getProcessorBounds();
 
-		const ::Box<point::dims,typename point::coord_type> & smallest_unit = dec.getSmallestSubdivision();
+		const ::Box<dim,St> & smallest_unit = dec.getSmallestSubdivision();
 
 		// convert spacing divisions
-		size_t n_g[point::dims];
+		size_t n_g[dim];
 
-		for (size_t i = 0 ; i < point::dims ; i++)
+		for (size_t i = 0 ; i < dim ; i++)
 			n_g[i] = (bbound.getHigh(i) - bbound.getLow(i)) / smallest_unit.getHigh(i);
 
-		point p;
+		Point<dim,St> p;
 		p.zero();
 
 		// Initialize the geo cell list
 		geo_cell.Initialize(box,n_g,p,8);
 	}
 
+	/*! \brief Get the number of minimum sub-domain
+	 *
+	 * \return minimum number
+	 *
+	 */
+	static size_t getDefaultNsubsub()
+	{
+		return  V_SUB_UNIT_FACTOR;
+	}
+
 	/*! \brief return the local size of the vector
 	 *
 	 * \return local size
@@ -163,7 +187,7 @@ public:
 	struct pos_prop
 	{
 		//! position vector
-		openfpm::vector<point,PreAllocHeapMemory<2>,openfpm::grow_policy_identity> pos;
+		openfpm::vector<Point<dim,St>,PreAllocHeapMemory<2>,openfpm::grow_policy_identity> pos;
 		//! properties vector
 		openfpm::vector<prop,PreAllocHeapMemory<2>,openfpm::grow_policy_identity> prp;
 	};
@@ -254,7 +278,7 @@ public:
 		for (size_t i = 0 ;  i < prc_r.size() ; i++)
 		{
 			// Create the size required to store the particles position and properties to communicate
-			size_t s1 = openfpm::vector<point,HeapMemory,openfpm::grow_policy_identity>::calculateMem(prc_sz_r.get(i),0);
+			size_t s1 = openfpm::vector<Point<dim,St>,HeapMemory,openfpm::grow_policy_identity>::calculateMem(prc_sz_r.get(i),0);
 			size_t s2 = openfpm::vector<prop,HeapMemory,openfpm::grow_policy_identity>::calculateMem(prc_sz_r.get(i),0);
 
 			// Preallocate the memory
@@ -310,13 +334,13 @@ public:
 		// convert the particle number to buffer size
 		for (size_t i = 0 ; i < prc_sz_r.size() ; i++)
 		{
-			prc_sz_r.get(i) = prc_sz_r.get(i)*(sizeof(prop) + sizeof(point));
+			prc_sz_r.get(i) = prc_sz_r.get(i)*(sizeof(prop) + sizeof(Point<dim,St>));
 		}
 
 		// Send and receive the particles
 
 		recv_cnt = 0;
-		v_cl.sendrecvMultipleMessagesPCX(prc_sz_r.size(),&p_map.get(0), &prc_sz_r.get(0), &prc_r.get(0) , &ptr.get(0) , vector_dist::message_alloc_map, this ,NEED_ALL_SIZE);
+		v_cl.sendrecvMultipleMessagesPCX(prc_sz_r.size(),&p_map.get(0), (size_t *)prc_sz_r.getPointer(), (size_t *)prc_r.getPointer() , (void **)ptr.getPointer() , vector_dist::message_alloc_map, this ,NEED_ALL_SIZE);
 
 		// overwrite the outcoming particle with the incoming particle and resize the vectors
 
@@ -327,19 +351,19 @@ public:
 		{
 			// Get the number of elements
 
-			size_t n_ele = v_proc.get(i) / (sizeof(point) + sizeof(prop));
+			size_t n_ele = v_proc.get(i) / (sizeof(Point<dim,St>) + sizeof(prop));
 
 			// Pointer of the received positions for each near processor
-			void * ptr_pos = ((unsigned char *)hp_recv.getPointer()) + (total_element * (sizeof(point) + sizeof(prop)));
+			void * ptr_pos = ((unsigned char *)hp_recv.getPointer()) + (total_element * (sizeof(Point<dim,St>) + sizeof(prop)));
 			// Pointer of the received properties for each near processor
-			void * ptr_prp = ((unsigned char *)hp_recv.getPointer()) + (total_element * (sizeof(point) + sizeof(prop))) + n_ele * sizeof(point);
+			void * ptr_prp = ((unsigned char *)hp_recv.getPointer()) + (total_element * (sizeof(Point<dim,St>) + sizeof(prop))) + n_ele * sizeof(Point<dim,St>);
 
-			PtrMemory * ptr1 = new PtrMemory(ptr_pos,n_ele * sizeof(point));
+			PtrMemory * ptr1 = new PtrMemory(ptr_pos,n_ele * sizeof(Point<dim,St>));
 			PtrMemory * ptr2 = new PtrMemory(ptr_prp,n_ele * sizeof(prop));
 
 			// create vector representation to a piece of memory already allocated
 
-			openfpm::vector<point,PtrMemory,openfpm::grow_policy_identity> vpos;
+			openfpm::vector<Point<dim,St>,PtrMemory,openfpm::grow_policy_identity> vpos;
 			openfpm::vector<prop,PtrMemory,openfpm::grow_policy_identity> vprp;
 
 			vpos.setMemory(*ptr1);
@@ -450,7 +474,7 @@ public:
 			pap_prp.push_back(alloc_ele);
 			size_byte_prp += alloc_ele;
 
-			alloc_ele = openfpm::vector<point>::calculateMem(ghost_prc_sz.get(i),0);
+			alloc_ele = openfpm::vector<Point<dim,St>>::calculateMem(ghost_prc_sz.get(i),0);
 			pap_pos.push_back(alloc_ele);
 			size_byte_pos += alloc_ele;
 		}
@@ -499,7 +523,7 @@ public:
 		// Create the buffer for particle position
 
 		// definition of the send vector for position for each processor
-		typedef  openfpm::vector<point,ExtPreAlloc<Memory>> send_pos_vector;
+		typedef  openfpm::vector<Point<dim,St>,ExtPreAlloc<Memory>> send_pos_vector;
 
 		openfpm::vector<send_pos_vector> g_pos_send;
 		if (opt != NO_POSITION)
@@ -569,14 +593,14 @@ public:
 			for (size_t i = 0 ; i < dec.getNNProcessors() && recv_mem_gg.size() != 0 ; i++)
 			{
 				// calculate the number of received elements
-				size_t n_ele = recv_sz.get(i) / sizeof(point);
+				size_t n_ele = recv_sz.get(i) / sizeof(Point<dim,St>);
 
 				// add the received particles to the vector
 				PtrMemory * ptr1 = new PtrMemory(recv_mem_gg.get(i).getPointer(),recv_sz.get(i));
 
 				// create vector representation to a piece of memory already allocated
 
-				openfpm::vector<point,PtrMemory,openfpm::grow_policy_identity> v2;
+				openfpm::vector<Point<dim,St>,PtrMemory,openfpm::grow_policy_identity> v2;
 
 				v2.setMemory(*ptr1);
 
@@ -608,7 +632,7 @@ public:
 	 */
 	static void * msg_alloc_ghost_get(size_t msg_i ,size_t total_msg, size_t total_p, size_t i, size_t ri, void * ptr)
 	{
-		vector_dist<point,prop,Box,Decomposition,Memory,with_id> * v = static_cast<vector_dist<point,prop,Box,Decomposition,Memory,with_id> *>(ptr);
+		vector_dist<dim,St,prop,Decomposition,Memory,with_id> * v = static_cast<vector_dist<dim,St,prop,Decomposition,Memory,with_id> *>(ptr);
 
 		v->recv_sz.resize(v->dec.getNNProcessors());
 		v->recv_mem_gg.resize(v->dec.getNNProcessors());
@@ -648,7 +672,7 @@ public:
 	static void * message_alloc_map(size_t msg_i ,size_t total_msg, size_t total_p, size_t i, size_t ri, void * ptr)
 	{
 		// cast the pointer
-		vector_dist<point,prop,Box,Decomposition,Memory,with_id> * vd = static_cast<vector_dist<point,prop,Box,Decomposition,Memory,with_id> *>(ptr);
+		vector_dist<dim,St,prop,Decomposition,Memory,with_id> * vd = static_cast<vector_dist<dim,St,prop,Decomposition,Memory,with_id> *>(ptr);
 
 		// Resize the receive buffer, and the size of each message buffer
 		vd->hp_recv.resize(total_msg);
@@ -672,9 +696,9 @@ public:
 	 * \return an iterator
 	 *
 	 */
-	vector_dist_iterator<openfpm::vector<point>> getIterator()
+	vector_dist_iterator<openfpm::vector<Point<dim,St>>> getIterator()
 	{
-		return vector_dist_iterator<openfpm::vector<point>>(v_pos);
+		return vector_dist_iterator<openfpm::vector<Point<dim,St>>>(v_pos);
 	}
 
 	/*! \brief Get the iterator across the position of the ghost particles
@@ -682,9 +706,9 @@ public:
 	 * \return an iterator
 	 *
 	 */
-	vector_dist_iterator<openfpm::vector<point>> getGhostIterator()
+	vector_dist_iterator<openfpm::vector<Point<dim,St>>> getGhostIterator()
 	{
-		return vector_dist_iterator<openfpm::vector<point>>(v_pos,g_m);
+		return vector_dist_iterator<openfpm::vector<Point<dim,St>>>(v_pos,g_m);
 	}
 
 	/*! \brief Get the iterator across the properties of the particles
@@ -730,7 +754,7 @@ public:
 		if (hasEnding(out,".csv"))
 		{
 			// CSVWriter test
-			CSVWriter<openfpm::vector<point>, openfpm::vector<prop> > csv_writer;
+			CSVWriter<openfpm::vector<Point<dim,St>>, openfpm::vector<prop> > csv_writer;
 
 			std::string output = std::to_string(v_cl.getProcessUnitID()) + std::string("_") + out;
 
diff --git a/src/Vector/vector_dist_key.hpp b/src/Vector/vector_dist_key.hpp
index a79bc680..227ee708 100644
--- a/src/Vector/vector_dist_key.hpp
+++ b/src/Vector/vector_dist_key.hpp
@@ -48,6 +48,18 @@ public:
 		return key;
 	}
 
+	/*! \brief Convert the key into a message
+	 *
+	 */
+	std::string to_string()
+	{
+		std::stringstream ts;
+
+		ts << "x[0]=" << key;
+
+		return ts.str();
+	}
+
 	vect_dist_key_dx(int v_c, size_t key)
 	:v_c(v_c),key(key)
 	{
diff --git a/src/Vector/vector_dist_unit_test.hpp b/src/Vector/vector_dist_unit_test.hpp
index a829a298..21cae3dd 100644
--- a/src/Vector/vector_dist_unit_test.hpp
+++ b/src/Vector/vector_dist_unit_test.hpp
@@ -21,15 +21,29 @@ BOOST_AUTO_TEST_CASE( vector_dist_ghost )
 	typedef Point_test<float> p;
 	typedef Point<2,float> s;
 
-	// TODO generalize
-	if (v_cl.getProcessingUnits() != 4)
-		return;
+	// Get the default minimum number of sub-sub-domain per processor (granularity of the decomposition)
+	size_t n_sub = vector_dist<2,float, Point_test<float>, CartDecomposition<2,float> >::getDefaultNsubsub() * v_cl.getProcessingUnits();
+	// Convert the request of having a minimum n_sub number of sub-sub domain into grid decompsition of the space
+	size_t sz = CartDecomposition<2,float>::getDefaultGrid(n_sub);
 
 	Box<2,float> box({0.0,0.0},{1.0,1.0});
-	size_t g_div[]= {16,16};
+	size_t g_div[]= {sz,sz};
 
-	// processor division on y direction
-	size_t point_div = g_div[1] / v_cl.getProcessingUnits();
+	// number of particles
+	size_t np = sz * sz;
+
+	// Calculate the number of objects this processor is going to obtain
+	size_t p_np = np / v_cl.getProcessingUnits();
+
+	// Get non divisible part
+	size_t r = np % v_cl.getProcessingUnits();
+
+	// Get the offset
+	size_t offset = v_cl.getProcessUnitID() * p_np + std::min(v_cl.getProcessUnitID(),r);
+
+	// Distribute the remain objects
+	if (v_cl.getProcessUnitID() < r)
+		p_np++;
 
 	// Create a grid info
 	grid_sm<2,void> g_info(g_div);
@@ -41,50 +55,57 @@ BOOST_AUTO_TEST_CASE( vector_dist_ghost )
 	// middle spacing
 	Point<2,float> m_spacing = spacing / 2;
 
-	// create a sub iterator
-	grid_key_dx<2> start(point_div * v_cl.getProcessUnitID(),0);
-	grid_key_dx<2> stop(point_div * (v_cl.getProcessUnitID() + 1) - 1,g_div[0]);
-	auto g_sub = g_info.getSubIterator(start,stop);
-
 	// set the ghost based on the radius cut off (make just a little bit smaller than the spacing)
 	Ghost<2,float> g(spacing.get(0) - spacing .get(0) * 0.0001);
 
 	// Vector of particles
-	vector_dist<Point<2,float>, Point_test<float>, Box<2,float>, CartDecomposition<2,float> > vd(g_info.size(),box,g);
+	vector_dist<2,float, Point_test<float>, CartDecomposition<2,float> > vd(g_info.size(),box,g);
+
+	// size_t
+	size_t cobj = 0;
 
-	auto it = vd.getIterator();
+	grid_key_dx_iterator_sp<2> it(g_info,offset,offset+p_np-1);
+	auto v_it = vd.getIterator();
 
-	while (it.isNext())
+	while (v_it.isNext() && it.isNext())
 	{
-		auto key_v = it.get();
-		auto key = g_sub.get();
+		auto key = it.get();
+		auto key_v = v_it.get();
 
 		// set the particle position
 
 		vd.template getPos<s::x>(key_v)[0] = key.get(0) * spacing[0] + m_spacing[0];
 		vd.template getPos<s::x>(key_v)[1] = key.get(1) * spacing[1] + m_spacing[1];
 
-		++g_sub;
+		cobj++;
+
+		++v_it;
 		++it;
 	}
 
+	// Both iterators must signal the end, and the number of object in the vector, must the equal to the
+	// predicted one
+	BOOST_REQUIRE_EQUAL(v_it.isNext(),false);
+	BOOST_REQUIRE_EQUAL(it.isNext(),false);
+	BOOST_REQUIRE_EQUAL(cobj,p_np);
+
 	// Debug write the particles
-	vd.write("Particles_before_map.csv");
+//	vd.write("Particles_before_map.csv");
 
 	// redistribute the particles according to the decomposition
 	vd.map();
 
 	// Debug write particles
-	vd.write("Particles_after_map.csv");
+//	vd.write("Particles_after_map.csv");
 
 	// Fill the scalar with the particle position
 	const auto & ct = vd.getDecomposition();
 
-	it = vd.getIterator();
+	v_it = vd.getIterator();
 
-	while (it.isNext())
+	while (v_it.isNext())
 	{
-		auto key = it.get();
+		auto key = v_it.get();
 
 		// fill with the processor ID where these particle live
 		vd.template getProp<p::s>(key) = vd.getPos<s::x>(key)[0] + vd.getPos<s::x>(key)[1] * 16;
@@ -92,7 +113,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_ghost )
 		vd.template getProp<p::v>(key)[1] = v_cl.getProcessUnitID();
 		vd.template getProp<p::v>(key)[2] = v_cl.getProcessUnitID();
 
-		++it;
+		++v_it;
 	}
 
 	//! Output the decomposition
@@ -156,59 +177,145 @@ BOOST_AUTO_TEST_CASE( vector_dist_ghost )
 	}
 }
 
-BOOST_AUTO_TEST_CASE( vector_dist_iterator_test_use )
+void print_test_v(std::string test, size_t sz)
+{
+	if (global_v_cluster->getProcessUnitID() == 0)
+		std::cout << test << " " << sz << "\n";
+}
+
+BOOST_AUTO_TEST_CASE( vector_dist_iterator_test_use_2d )
 {
 	typedef Point<2,float> s;
 
 	Vcluster & v_cl = *global_v_cluster;
 
-	if (v_cl.getProcessingUnits() != 4)
-		return;
-
     // set the seed
 	// create the random generator engine
 	std::srand(v_cl.getProcessUnitID());
     std::default_random_engine eg;
     std::uniform_real_distribution<float> ud(0.0f, 1.0f);
 
-	Box<2,float> box({0.0,0.0},{1.0,1.0});
-	vector_dist<Point<2,float>, Point_test<float>, Box<2,float>, CartDecomposition<2,float> > vd(4096,box);
+    size_t k = 4096 * v_cl.getProcessingUnits();
 
-	auto it = vd.getIterator();
+	long int big_step = k / 30;
+	big_step = (big_step == 0)?1:big_step;
+	long int small_step = 1;
 
-	while (it.isNext())
+	// 2D test
+	for ( ; k >= 2 ; k-= (k > 2*big_step)?big_step:small_step )
 	{
-		auto key = it.get();
+		BOOST_TEST_CHECKPOINT( "Testing 2D vector k=" << k );
+		print_test_v( "Testing 2D vector k=",k);
+		Box<2,float> box({0.0,0.0},{1.0,1.0});
+		vector_dist<2,float, Point_test<float>, CartDecomposition<2,float> > vd(k,box);
 
-		vd.template getPos<s::x>(key)[0] = ud(eg);
-		vd.template getPos<s::x>(key)[1] = ud(eg);
+		auto it = vd.getIterator();
 
-		++it;
+		while (it.isNext())
+		{
+			auto key = it.get();
+
+			vd.template getPos<s::x>(key)[0] = ud(eg);
+			vd.template getPos<s::x>(key)[1] = ud(eg);
+
+			++it;
+		}
+
+		vd.map();
+
+		// Check if we have all the local particles
+		size_t cnt = 0;
+		const CartDecomposition<2,float> & ct = vd.getDecomposition();
+		it = vd.getIterator();
+
+		while (it.isNext())
+		{
+			auto key = it.get();
+
+			// Check if local
+			BOOST_REQUIRE_EQUAL(ct.isLocal(vd.template getPos<s::x>(key)),true);
+
+			cnt++;
+
+			++it;
+		}
+
+		//
+		v_cl.sum(cnt);
+		v_cl.execute();
+		BOOST_REQUIRE_EQUAL(cnt,k);
 	}
+}
 
-	vd.map();
+BOOST_AUTO_TEST_CASE( vector_dist_iterator_test_use_3d )
+{
+	typedef Point<3,float> s;
 
-	// Check if we have all the local particles
-	size_t cnt = 0;
-	auto & ct = vd.getDecomposition();
-	it = vd.getIterator();
+	Vcluster & v_cl = *global_v_cluster;
 
-	while (it.isNext())
+    // set the seed
+	// create the random generator engine
+	std::srand(v_cl.getProcessUnitID());
+    std::default_random_engine eg;
+    std::uniform_real_distribution<float> ud(0.0f, 1.0f);
+
+    size_t k = 4096 * v_cl.getProcessingUnits();
+
+	long int big_step = k / 30;
+	big_step = (big_step == 0)?1:big_step;
+	long int small_step = 1;
+
+	// 3D test
+	for ( ; k >= 2 ; k-= (k > 2*big_step)?big_step:small_step )
 	{
-		auto key = it.get();
+		BOOST_TEST_CHECKPOINT( "Testing 3D vector k=" << k );
+		print_test_v( "Testing 3D vector k=",k);
+		Box<3,float> box({0.0,0.0,0.0},{1.0,1.0,1.0});
+		vector_dist<3,float, Point_test<float>, CartDecomposition<3,float> > vd(k,box);
 
-		// Check if local
-		BOOST_REQUIRE_EQUAL(ct.isLocal(vd.template getPos<s::x>(key)),true);
+		auto it = vd.getIterator();
 
-		cnt++;
+		while (it.isNext())
+		{
+			auto key = it.get();
 
-		++it;
-	}
+			vd.template getPos<s::x>(key)[0] = ud(eg);
+			vd.template getPos<s::x>(key)[1] = ud(eg);
+			vd.template getPos<s::x>(key)[2] = ud(eg);
+
+			++it;
+		}
+
+		vd.map();
+
+		// Check if we have all the local particles
+		size_t cnt = 0;
+		const CartDecomposition<3,float> & ct = vd.getDecomposition();
+		it = vd.getIterator();
+
+		while (it.isNext())
+		{
+			auto key = it.get();
+
+			// Check if local
+//			BOOST_REQUIRE_EQUAL(ct.isLocal(vd.template getPos<s::x>(key)),true);
 
-	//
-	v_cl.reduce(cnt);
-	v_cl.execute();
-	BOOST_REQUIRE_EQUAL(cnt,4096);
+			if (ct.isLocal(vd.template getPos<s::x>(key)) == false)
+			{
+				std::cerr << "Error " << v_cl.getProcessUnitID() << key.to_string() << "Non local\n";
+				exit(-1);
+			}
+
+			cnt++;
+
+			++it;
+		}
+
+		//
+		v_cl.sum(cnt);
+		v_cl.execute();
+		BOOST_REQUIRE_EQUAL(cnt,k);
+	}
 }
 
 BOOST_AUTO_TEST_SUITE_END()
-- 
GitLab