diff --git a/openfpm_data b/openfpm_data
index dde50897665d36fc2723aabb0917291c9573c065..a677ffb0def932b598829ea77f86b2d10588f784 160000
--- a/openfpm_data
+++ b/openfpm_data
@@ -1 +1 @@
-Subproject commit dde50897665d36fc2723aabb0917291c9573c065
+Subproject commit a677ffb0def932b598829ea77f86b2d10588f784
diff --git a/src/Decomposition/CartDecomposition.hpp b/src/Decomposition/CartDecomposition.hpp
index f1c0ffba37597121cf22bcd1737ad7088d172eb6..56416ebcb90440dc56e53624d3bc896bc19f2d29 100755
--- a/src/Decomposition/CartDecomposition.hpp
+++ b/src/Decomposition/CartDecomposition.hpp
@@ -119,6 +119,9 @@ protected:
 	//! the set of all local sub-domain as vector
 	openfpm::vector<SpaceBox<dim, T>> sub_domains;
 
+	//! the global set of all sub-domains as vector of 'sub_domains' vectors
+	mutable openfpm::vector<openfpm::vector<SpaceBox<dim, T>>> sub_domains_global;
+
 	//! for each sub-domain, contain the list of the neighborhood processors
 	openfpm::vector<openfpm::vector<long unsigned int> > box_nn_processor;
 
@@ -1223,6 +1226,16 @@ public:
 		return domain;
 	}
 
+	openfpm::vector<SpaceBox<dim, T>> getSubDomains() const
+	{
+		return sub_domains;
+	}
+
+	openfpm::vector<openfpm::vector<SpaceBox<dim, T>>> & getSubDomainsGlobal()
+	{
+		return sub_domains_global;
+	}
+
 	/*! \brief Check if the particle is local
 	 *
 	 * \warning if the particle id outside the domain the result is unreliable
diff --git a/src/Decomposition/ie_ghost.hpp b/src/Decomposition/ie_ghost.hpp
index 2a60db9f96aff36ce4346d78d5cc2b9ea88a5274..cf74851475b13662fd810a65ae82c7625b20dbcb 100755
--- a/src/Decomposition/ie_ghost.hpp
+++ b/src/Decomposition/ie_ghost.hpp
@@ -38,7 +38,7 @@ class ie_ghost
 	openfpm::vector<p_box<dim,T> > vb_int;
 
 	//! Cell-list that store the geometrical information of the internal ghost boxes
-	CellList<dim,T,FAST,shift<dim,T>> geo_cell;
+	CellList<dim,T,Mem_fast<dim,T>,shift<dim,T>> geo_cell;
 
 	//! shift vectors
 	openfpm::vector<Point<dim,T>> shifts;
@@ -750,9 +750,9 @@ public:
 	 * \return An iterator with the id's of the internal boxes in which the point fall
 	 *
 	 */
-	auto getInternalIDBoxes(Point<dim,T> & p) -> decltype(geo_cell.getIterator(geo_cell.getCell(p)))
+	auto getInternalIDBoxes(Point<dim,T> & p) -> decltype(geo_cell.getCellIterator(geo_cell.getCell(p)))
 	{
-		return geo_cell.getIterator(geo_cell.getCell(p));
+		return geo_cell.getCellIterator(geo_cell.getCell(p));
 	}
 
 	/*! \brief if the point fall into the ghost of some near processor it return the processors id's in which
@@ -762,9 +762,9 @@ public:
 	 * \return iterator of the processors id's
 	 *
 	 */
-	inline auto labelPoint(Point<dim,T> & p) -> decltype(geo_cell.getIterator(geo_cell.getCell(p)))
+	inline auto labelPoint(Point<dim,T> & p) -> decltype(geo_cell.getCellIterator(geo_cell.getCell(p)))
 	{
-		return geo_cell.getIterator(geo_cell.getCell(p));
+		return geo_cell.getCellIterator(geo_cell.getCell(p));
 	}
 
 	/*! \brief Given a position it return if the position belong to any neighborhood processor ghost
@@ -790,7 +790,7 @@ public:
 
 		// Check with geo-cell if a particle is inside one Cell containing boxes
 
-		auto cell_it = geo_cell.getIterator(geo_cell.getCell(p));
+		auto cell_it = geo_cell.getCellIterator(geo_cell.getCell(p));
 
 		// For each element in the cell, check if the point is inside the box
 		// if it is, store the processor id
@@ -839,7 +839,7 @@ public:
 
 		// Check with geo-cell if a particle is inside one Cell containing boxes
 
-		auto cell_it = geo_cell.getIterator(geo_cell.getCell(p));
+		auto cell_it = geo_cell.getCellIterator(geo_cell.getCell(p));
 
 		// For each element in the cell, check if the point is inside the box
 		// if it is, store the processor id
@@ -883,7 +883,7 @@ public:
 
 		// Check with geo-cell if a particle is inside one Cell containing boxes
 
-		auto cell_it = geo_cell.getIterator(geo_cell.getCell(p));
+		auto cell_it = geo_cell.getCellIterator(geo_cell.getCell(p));
 
 		// For each element in the cell, check if the point is inside the box
 		// if it is, store the processor id
@@ -926,7 +926,7 @@ public:
 
 		// Check with geo-cell if a particle is inside one Cell containing boxes
 
-		auto cell_it = geo_cell.getIterator(geo_cell.getCell(p));
+		auto cell_it = geo_cell.getCellIterator(geo_cell.getCell(p));
 
 		// For each element in the cell, check if the point is inside the box
 		// if it is, store the processor id
diff --git a/src/Grid/grid_dist_id.hpp b/src/Grid/grid_dist_id.hpp
index cfd6791edfb3a9942bf345ba9e33759fba09c3c6..ae3ee199bd6d16dc603c88a1c71c7dea807739c8 100644
--- a/src/Grid/grid_dist_id.hpp
+++ b/src/Grid/grid_dist_id.hpp
@@ -19,6 +19,8 @@
 #include "Packer_Unpacker/Unpacker.hpp"
 #include "Decomposition/CartDecomposition.hpp"
 #include "data_type/aggregate.hpp"
+#include "hdf5.h"
+#include "grid_dist_id_comm.hpp"
 
 //! Internal ghost box sent to construct external ghost box into the other processors
 template<unsigned int dim>
@@ -63,7 +65,7 @@ struct Box_fix
  *
  */
 template<unsigned int dim, typename St, typename T, typename Decomposition = CartDecomposition<dim,St>,typename Memory=HeapMemory , typename device_grid=grid_cpu<dim,T> >
-class grid_dist_id
+class grid_dist_id : public grid_dist_id_comm<dim,St,T,Decomposition,Memory,device_grid>
 {
 	//! Domain
 	Box<dim,St> domain;
@@ -72,7 +74,10 @@ class grid_dist_id
 	Ghost<dim,St> ghost;
 
 	//! Local grids
-	openfpm::vector<device_grid> loc_grid;
+	mutable openfpm::vector<device_grid> loc_grid;
+
+	//! Old local grids
+	mutable openfpm::vector<device_grid> loc_grid_old;
 
 	//! Space Decomposition
 	Decomposition dec;
@@ -80,6 +85,12 @@ class grid_dist_id
 	//! Extension of each grid: Domain and ghost + domain
 	openfpm::vector<GBoxes<device_grid::dims>> gdb_ext;
 
+	//! Global gdb_ext
+	mutable openfpm::vector<GBoxes<device_grid::dims>> gdb_ext_global;
+
+	//! Extension of each old grid (old): Domain and ghost + domain
+	openfpm::vector<GBoxes<device_grid::dims>> gdb_ext_old;
+
 	//! Size of the grid on each dimension
 	size_t g_sz[dim];
 
@@ -1012,6 +1023,121 @@ public:
 		return gdb_ext;
 	}
 
+	/*! \brief It gathers the information about local grids for all of the processors
+	 *
+	 *
+	 *
+	 */
+	void getGlobalGridsInfo(openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext_global) const
+	{
+#ifdef SE_CLASS2
+		check_valid(this,8);
+#endif
+		v_cl.SGather(gdb_ext,gdb_ext_global,0);
+		v_cl.execute();
+
+		size_t size_r;
+
+		if (v_cl.getProcessUnitID()  == 0)
+		{
+			size_t size = gdb_ext_global.size();
+			for (size_t i = 0; i < v_cl.getProcessingUnits(); i++)
+			{
+				v_cl.send(i,0,&size,sizeof(size_t));
+			}
+		}
+		else
+		{
+			v_cl.recv(0,0,&size_r,sizeof(size_t));
+		}
+		v_cl.execute();
+
+		gdb_ext_global.resize(size_r);
+
+
+		if (v_cl.getProcessUnitID()  == 0)
+		{
+			for (size_t i = 0; i < v_cl.getProcessingUnits(); i++)
+			{
+				v_cl.send(i,0,gdb_ext_global);
+			}
+		}
+		else
+		{
+			v_cl.recv(0,0,gdb_ext_global);
+		}
+
+		v_cl.execute();
+	}
+
+	/*! \brief It gathers the local grids for all of the processors
+	 *
+	 *
+	 *
+	 */
+	void getGlobalGrids(openfpm::vector<openfpm::vector<device_grid>> & loc_grid_global) const
+	{
+#ifdef SE_CLASS2
+		check_valid(this,8);
+#endif
+		v_cl.SGather(loc_grid,loc_grid_global,0);
+		v_cl.execute();
+
+		size_t size_r;
+
+		if (v_cl.getProcessUnitID()  == 0)
+		{
+			size_t size = loc_grid_global.size();
+			for (size_t i = 0; i < v_cl.getProcessingUnits(); i++)
+			{
+				v_cl.send(i,0,&size,sizeof(size_t));
+			}
+		}
+		else
+		{
+			v_cl.recv(0,0,&size_r,sizeof(size_t));
+		}
+		v_cl.execute();
+
+		loc_grid_global.resize(size_r);
+
+
+		if (v_cl.getProcessUnitID()  == 0)
+		{
+			for (size_t i = 0; i < v_cl.getProcessingUnits(); i++)
+			{
+				v_cl.send(i,0,loc_grid_global);
+			}
+		}
+		else
+		{
+			v_cl.recv(0,0,loc_grid_global);
+		}
+
+		v_cl.execute();
+	}
+
+	/*! \brief It return an iterator that span the full grid domain (each processor span its local domain)
+	 *
+	 * \return the iterator
+	 *
+	 */
+	grid_dist_iterator<dim,device_grid,FREE> getOldDomainIterator() const
+	{
+#ifdef SE_CLASS2
+		check_valid(this,8);
+#endif
+
+		grid_key_dx<dim> stop(ginfo_v.getSize());
+		grid_key_dx<dim> one;
+		one.one();
+		stop = stop - one;
+
+		grid_dist_iterator<dim,device_grid,FREE> it(loc_grid_old,gdb_ext_old,stop);
+
+		return it;
+	}
+
 	/*! \brief It return an iterator that span the full grid domain (each processor span its local domain)
 	 *
 	 * \return the iterator
@@ -1556,6 +1682,563 @@ public:
 		}
 	}
 
+	/*! \brief It move all the grid parts that do not belong to the local processor to the respective processor
+	 *
+	 *
+	 *
+	 *
+	 */
+	void map()
+	{
+		getGlobalGridsInfo(gdb_ext_global);
+/*
+		std::cout << "Global size: " << gdb_ext_global.size() << std::endl;
+
+		for (size_t i = 0; i < gdb_ext_global.size(); i++)
+		{
+			std::cout << "(" << gdb_ext_global.get(i).Dbox.getLow(0) << "; " << gdb_ext_global.get(i).Dbox.getLow(1) << "); (" << gdb_ext_global.get(i).Dbox.getHigh(0) << "; " << gdb_ext_global.get(i).Dbox.getHigh(1) << ")" << std::endl;
+			std::cout << "I = " << i << ", Origin is (" << gdb_ext_global.get(i).origin.get(0) << "; " << gdb_ext_global.get(i).origin.get(1) << ")" << std::endl;
+		}
+
+		if (v_cl.getProcessUnitID() == 0)
+		{
+			for (size_t i = 0; i < gdb_ext.size(); i++)
+			{
+				Box<dim,long int> box = gdb_ext.get(i).Dbox;
+				box += gdb_ext.get(i).origin;
+				std::cout << "(" << box.getLow(0) << "; " << box.getLow(1) << "); (" << box.getHigh(0) << "; " << box.getHigh(1) << ")" << std::endl;
+			}
+		}
+
+		if (v_cl.getProcessUnitID() == 0)
+		{
+			for (size_t i = 0; i < loc_grid_old.size(); i++)
+			{
+				Point<dim,St> p1;
+				Point<dim,St> p2;
+				for (size_t n = 0; n < dim; n++)
+				{
+					p1.get(n) = loc_grid_old.get(i).getGrid().getBox().getLow(n);
+					p2.get(n) = loc_grid_old.get(i).getGrid().getBox().getHigh(n);
+				}
+
+				std::cout << "Loc_grid_old: (" << p1.get(0) << "; " << p1.get(1) << "); (" << p2.get(0) << "; " << p2.get(1) << "); " << "Gdb_ext_old: (" << gdb_ext_old.get(i).Dbox.getLow(0) << "; " << gdb_ext_old.get(i).Dbox.getLow(1) << "); (" << gdb_ext_old.get(i).Dbox.getHigh(0) << "; " << gdb_ext_old.get(i).Dbox.getHigh(1) << ")" << std::endl;
+			}
+		}
+*/
+		this->template map_(dec,cd_sm,loc_grid,loc_grid_old,gdb_ext,gdb_ext_old,gdb_ext_global);
+	}
+
+	inline void save(const std::string & filename) const
+	{
+		//std::cout << "Loc_grid.size() before save: " << loc_grid.size() << std::endl;
+		//std::cout << "Gdb_ext.size() before save: " << gdb_ext.size() << std::endl;
+
+		//Pack_request vector
+		size_t req = 0;
+
+		//Pack request
+		Packer<decltype(loc_grid),HeapMemory>::packRequest(loc_grid,req);
+		Packer<decltype(gdb_ext),HeapMemory>::packRequest(gdb_ext,req);
+
+		//std::cout << "Req: " << req << std::endl;
+
+		// allocate the memory
+		HeapMemory pmem;
+		//pmem.allocate(req);
+		ExtPreAlloc<HeapMemory> & mem = *(new ExtPreAlloc<HeapMemory>(req,pmem));
+		mem.incRef();
+
+		//Packing
+
+		Pack_stat sts;
+
+		Packer<decltype(loc_grid),HeapMemory>::pack(mem,loc_grid,sts);
+		Packer<decltype(gdb_ext),HeapMemory>::pack(mem,gdb_ext,sts);
+
+	    /*****************************************************************
+	     * Create a new file with default creation and access properties.*
+	     * Then create a dataset and write data to it and close the file *
+	     * and dataset.                                                  *
+	     *****************************************************************/
+
+		int mpi_rank = v_cl.getProcessUnitID();
+		int mpi_size = v_cl.getProcessingUnits();
+
+		MPI_Comm comm = v_cl.getMPIComm();
+		MPI_Info info  = MPI_INFO_NULL;
+
+	    // Set up file access property list with parallel I/O access
+
+		hid_t plist_id = H5Pcreate(H5P_FILE_ACCESS);
+		H5Pset_fapl_mpio(plist_id, comm, info);
+
+		// Create a new file collectively and release property list identifier.
+		hid_t file = H5Fcreate (filename.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, plist_id);
+		H5Pclose(plist_id);
+
+		size_t sz = pmem.size();
+		//std::cout << "Pmem.size: " << pmem.size() << std::endl;
+		openfpm::vector<size_t> sz_others;
+		v_cl.allGather(sz,sz_others);
+		v_cl.execute();
+
+		size_t sum = 0;
+
+		for (size_t i = 0; i < sz_others.size(); i++)
+			sum += sz_others.get(i);
+
+		//Size for data space in file
+		hsize_t fdim[1] = {sum};
+
+		//Size for data space in file
+		hsize_t fdim2[1] = {(size_t)mpi_size};
+
+		//Create data space in file
+		hid_t file_dataspace_id = H5Screate_simple(1, fdim, NULL);
+
+		//Create data space in file
+		hid_t file_dataspace_id_2 = H5Screate_simple(1, fdim2, NULL);
+
+		//Size for data space in memory
+		hsize_t mdim[1] = {pmem.size()};
+
+		//Create data space in memory
+		hid_t mem_dataspace_id = H5Screate_simple(1, mdim, NULL);
+
+		//if (mpi_rank == 0)
+			//std::cout << "Total object size: " << sum << std::endl;
+
+		//Create data set in file
+		hid_t file_dataset = H5Dcreate (file, "grid_dist", H5T_NATIVE_CHAR, file_dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+
+		//Create data set 2 in file
+		hid_t file_dataset_2 = H5Dcreate (file, "metadata", H5T_NATIVE_INT, file_dataspace_id_2, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+
+	    //H5Pclose(plist_id);
+	    H5Sclose(file_dataspace_id);
+	    H5Sclose(file_dataspace_id_2);
+
+	    hsize_t block[1] = {pmem.size()};
+
+	    //hsize_t stride[1] = {1};
+
+		hsize_t count[1] = {1};
+
+	    hsize_t offset[1] = {0};
+
+	    for (int i = 0; i < mpi_rank; i++)
+	    {
+	    	if (mpi_rank == 0)
+				offset[0] = 0;
+	    	else
+	    		offset[0] += sz_others.get(i);
+	    }
+
+	//    std::cout << "MPI rank: " << mpi_rank << ", MPI size: " << mpi_size << ", Offset: " << offset[0] << ", Block: " << block[0] << std::endl;
+
+	    int metadata[mpi_size];
+
+	    for (int i = 0; i < mpi_size; i++)
+	    	metadata[i] = sz_others.get(i);
+
+	    //Select hyperslab in the file.
+	    file_dataspace_id = H5Dget_space(file_dataset);
+	    H5Sselect_hyperslab(file_dataspace_id, H5S_SELECT_SET, offset, NULL, count, block);
+
+	    file_dataspace_id_2 = H5Dget_space(file_dataset_2);
+
+
+	    //Create property list for collective dataset write.
+	    plist_id = H5Pcreate(H5P_DATASET_XFER);
+	    H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE);
+
+		//Write a data set to a file
+		H5Dwrite(file_dataset, H5T_NATIVE_CHAR, mem_dataspace_id, file_dataspace_id, plist_id, (const char *)pmem.getPointer());
+
+		//Write a data set 2 to a file
+		H5Dwrite(file_dataset_2, H5T_NATIVE_INT, H5S_ALL, file_dataspace_id_2, plist_id, metadata);
+/*
+		for (size_t i = 0; i < gdb_ext.size(); i++)
+		{
+			Box<dim,long int> box = gdb_ext.get(i).Dbox;
+			std::cout << "Dboxes saved: (" << box.getLow(0) << "; " << box.getLow(1) << "); (" << box.getHigh(0) << "; " << box.getHigh(1) << ")" << std::endl;
+		}
+
+		for (size_t i = 0; i < loc_grid.size(); i++)
+		{
+			std::cout << "loc_grids saved: (" << loc_grid.get(i).getGrid().getBox().getLow(0) << "; " << loc_grid.get(i).getGrid().getBox().getLow(1) << "); (" << loc_grid.get(i).getGrid().getBox().getHigh(0) << "; " << loc_grid.get(i).getGrid().getBox().getHigh(1) << ")" << std::endl;
+		}
+*/
+	    //Close/release resources.
+	    H5Dclose(file_dataset);
+	    H5Sclose(file_dataspace_id);
+	    H5Dclose(file_dataset_2);
+	    H5Sclose(file_dataspace_id_2);
+	    H5Sclose(mem_dataspace_id);
+	    H5Pclose(plist_id);
+	    H5Fclose(file);
+	}
+
+	void load_block(long int bid, hssize_t mpi_size_old, int * metadata_out, openfpm::vector<size_t> metadata_accum, hid_t plist_id, hid_t dataset_2)
+	{
+/*	  	if (mpi_size >= mpi_size_old)
+	  	{
+			if (mpi_rank >= mpi_size_old)
+				block[0] = 0;
+			else
+				block[0] = {(size_t)metadata_out[mpi_rank]};
+	  	}
+	  	else
+	  	{
+	  		int x = mpi_size_old/mpi_size;
+	  		int shift = mpi_rank*x;
+	  		for (int i = 0; i < x; i++)
+	  		{
+	  			//block0.get(mpi_rank).add(metadata_out[shift]);
+	  			block[0] += metadata_out[shift];
+	  			shift++;
+	  		}
+	  		int y = mpi_size_old%mpi_size;
+	  		if (mpi_rank < y)
+	  		{
+				block_add[0] += metadata_out[mpi_size*x+mpi_rank];
+				//block_add0.get(mpi_rank).add(metadata_out[mpi_size*x+mpi_rank]);
+	  		}
+	  	}*/
+
+//		std::cout << "BID: " << bid << std::endl;
+
+		hsize_t offset[1];
+		hsize_t block[1];
+
+		if (bid < mpi_size_old && bid != -1)
+		{
+			offset[0] = metadata_accum.get(bid);
+			block[0] = metadata_out[bid];
+		}
+		else
+		{
+			offset[0] = 0;
+			block[0] = 0;
+		}
+
+//		std::cout << "Offset: " << offset[0] << "; Block: " << block[0]<<  std::endl;
+//	    hsize_t offset_add[1] = {0};
+
+/*	    if (mpi_size >= mpi_size_old)
+		{
+			if (mpi_rank >= mpi_size_old)
+				offset[0] = 0;
+			else
+			{
+				for (int i = 0; i < mpi_rank; i++)
+				offset[0] += metadata_out[i];
+			}
+		}
+	    else
+	    {
+	  		int x = mpi_size_old/mpi_size;
+	  		int shift = mpi_rank*x;
+
+	  		for (int i = 0; i < shift; i++)
+	  		{
+	  			offset[0] += metadata_out[i];
+	  			//offset0.get(mpi_rank).add(metadata_out[i]);
+	  		}
+
+	  		int y = mpi_size_old%mpi_size;
+	  		if (mpi_rank < y)
+	  		{
+	  			for (int i = 0; i < mpi_size*x + mpi_rank; i++)
+	  			{
+	  				offset_add[0] += metadata_out[i];
+	  				//offset_add0.get(mpi_rank).add(metadata_out[i]);
+	  			}
+	  		}
+	    }*/
+
+	    //hsize_t stride[1] = {1};
+	    hsize_t count[1] = {1};
+
+	    //std::cout << "LOAD: MPI rank: " << mpi_rank << ", MPI size: " << mpi_size << ", Offset: " << offset[0] << ", Offset_add: " << offset_add[0] << ", Block: " << block[0] << ", Block_add: " << block_add[0] << std::endl;
+/*
+	    std::cout << "LOAD: MPI rank: " << mpi_rank << ", MPI size: " << mpi_size << std::endl;
+	    for (size_t i = 0; i < offset0.get(mpi_rank).size(); i++)
+	    	std::cout << ", Offset: " << offset0.get(mpi_rank).get(i) << std::endl;
+		for (size_t i = 0; i < offset_add0.get(mpi_rank).size(); i++)
+			std::cout << ", Offset_add: " << offset_add0.get(mpi_rank).get(i) << std::endl;
+		for (size_t i = 0; i < block0.get(mpi_rank).size(); i++)
+			std::cout << ", Block: " << block0.get(mpi_rank).get(i) << std::endl;
+		for (size_t i = 0; i < block_add0.get(mpi_rank).size(); i++)
+			std::cout << ", Block_add: " << block_add0.get(mpi_rank).get(i) << std::endl;
+*/
+
+		//Select file dataspace
+		hid_t file_dataspace_id_2 = H5Dget_space(dataset_2);
+
+        H5Sselect_hyperslab(file_dataspace_id_2, H5S_SELECT_SET, offset, NULL, count, block);
+
+		//Select file dataspace
+/*		hid_t file_dataspace_id_3 = H5Dget_space(dataset_2);
+
+        H5Sselect_hyperslab(file_dataspace_id_3, H5S_SELECT_SET, offset_add, NULL, count, block_add);*/
+
+        hsize_t mdim_2[1] = {block[0]};
+//        hsize_t mdim_3[1] = {block_add[0]};
+
+
+		//Size for data space in memory
+
+		/*if (mpi_rank >= mpi_size_old)
+			mdim_2[0] = 0;
+		else
+			mdim_2[0] = metadata_out[mpi_rank];*/
+
+		//Create data space in memory
+		hid_t mem_dataspace_id_2 = H5Screate_simple(1, mdim_2, NULL);
+//		hid_t mem_dataspace_id_3 = H5Screate_simple(1, mdim_3, NULL);
+
+		//if (mpi_rank == 0)
+	/*	{
+			hssize_t size2;
+
+			size2 = H5Sget_select_npoints (mem_dataspace_id_2);
+			printf ("\nLOAD: memspace_id_2 size: %llu\n", size2);
+			size2 = H5Sget_select_npoints (file_dataspace_id_2);
+			printf ("LOAD: dataspace_id_2 size: %llu\n", size2);
+		}*/
+/*
+		if (mpi_rank == 0)
+		{
+			hssize_t size2;
+
+			size2 = H5Sget_select_npoints (mem_dataspace_id_3);
+			printf ("\nLOAD: memspace_id_3 size: %llu\n", size2);
+			size2 = H5Sget_select_npoints (file_dataspace_id_3);
+			printf ("LOAD: dataspace_id_3 size: %llu\n", size2);
+		}
+*/
+	size_t sum = 0;
+
+		for (int i = 0; i < mpi_size_old; i++)
+		{
+			sum += metadata_out[i];
+		}
+
+	//	std::cout << "LOAD: sum: " << sum << std::endl;
+
+		// allocate the memory
+		HeapMemory pmem;
+//		HeapMemory pmem2;
+		//pmem.allocate(req);
+		ExtPreAlloc<HeapMemory> & mem = *(new ExtPreAlloc<HeapMemory>(block[0],pmem));
+		mem.incRef();
+//		ExtPreAlloc<HeapMemory> & mem2 = *(new ExtPreAlloc<HeapMemory>(block_add[0],pmem2));
+//		mem2.incRef();
+
+	  	// Read the dataset.
+	    H5Dread(dataset_2, H5T_NATIVE_CHAR, mem_dataspace_id_2, file_dataspace_id_2, plist_id, (char *)mem.getPointer());
+
+	    // Read the dataset.
+//		H5Dread(dataset_2, H5T_NATIVE_CHAR, mem_dataspace_id_3, file_dataspace_id_3, plist_id, (char *)mem2.getPointer());
+
+		mem.allocate(pmem.size());
+//		mem2.allocate(pmem2.size());
+	//	std::cout << "Mem.size(): " << mem.size() << " = " << block[0] << std::endl;
+
+		Unpack_stat ps;
+
+		openfpm::vector<device_grid> loc_grid_old_unp;
+		openfpm::vector<GBoxes<device_grid::dims>> gdb_ext_old_unp;
+
+		Unpacker<decltype(loc_grid_old),HeapMemory>::unpack(mem,loc_grid_old_unp,ps,1);
+		Unpacker<decltype(gdb_ext_old),HeapMemory>::unpack(mem,gdb_ext_old_unp,ps,1);
+/*
+		std::cout << "Loc_grid_old.size() before merge: " << loc_grid_old.size() << std::endl;
+		std::cout << "Gdb_ext_old.size() before merge: " << gdb_ext_old.size() << std::endl;
+
+		std::cout << "Loc_grid_old_unp.size() before merge: " << loc_grid_old_unp.size() << std::endl;
+		std::cout << "Gdb_ext_old_unp.size() before merge: " << gdb_ext_old_unp.size() << std::endl;
+*/
+		for (size_t i = 0; i < loc_grid_old_unp.size(); i++)
+			loc_grid_old.add(loc_grid_old_unp.get(i));
+
+		for (size_t i = 0; i < gdb_ext_old_unp.size(); i++)
+			gdb_ext_old.add(gdb_ext_old_unp.get(i));
+
+//		std::cout << "Loc_grid_old.size() after merge: " << loc_grid_old.size() << std::endl;
+//		std::cout << "Gdb_ext_old.size() after merge: " << gdb_ext_old.size() << std::endl;
+//		std::cout << "*********************************" << std::endl;
+
+		mem.decRef();
+		delete &mem;
+
+	}
+
+	inline void load(const std::string & filename)
+	{
+		MPI_Comm comm = v_cl.getMPIComm();
+		MPI_Info info  = MPI_INFO_NULL;
+
+		int mpi_rank = v_cl.getProcessUnitID();
+		//int mpi_size = v_cl.getProcessingUnits();
+
+		// Set up file access property list with parallel I/O access
+		hid_t plist_id = H5Pcreate(H5P_FILE_ACCESS);
+		H5Pset_fapl_mpio(plist_id, comm, info);
+
+		//Open a file
+	    hid_t file = H5Fopen (filename.c_str(), H5F_ACC_RDONLY, plist_id);
+	    H5Pclose(plist_id);
+
+	    //Open dataset
+	    hid_t dataset = H5Dopen (file, "metadata", H5P_DEFAULT);
+
+	    //Create property list for collective dataset read
+	  	plist_id = H5Pcreate(H5P_DATASET_XFER);
+	  	H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE);
+
+		//Select file dataspace
+		hid_t file_dataspace_id = H5Dget_space(dataset);
+
+		hssize_t mpi_size_old = H5Sget_select_npoints (file_dataspace_id);
+
+		//if (mpi_rank == 0)
+			//printf ("\nOld MPI size: %llu\n", mpi_size_old);
+
+	  	//Where to read metadata
+	  	int metadata_out[mpi_size_old];
+
+	  	for (int i = 0; i < mpi_size_old; i++)
+	  	{
+	  		metadata_out[i] = 0;
+	  	}
+
+		//Size for data space in memory
+		hsize_t mdim[1] = {(size_t)mpi_size_old};
+
+		//Create data space in memory
+		hid_t mem_dataspace_id = H5Screate_simple(1, mdim, NULL);
+
+/*
+		if (mpi_rank == 0)
+		{
+			hssize_t size;
+
+			size = H5Sget_select_npoints (mem_dataspace_id);
+			printf ("\nmemspace_id size: %llu\n", size);
+			size = H5Sget_select_npoints (file_dataspace_id);
+			printf ("dataspace_id size: %llu\n", size);
+		}
+*/
+	  	// Read the dataset.
+	    H5Dread(dataset, H5T_NATIVE_INT, mem_dataspace_id, file_dataspace_id, plist_id, metadata_out);
+/*
+		if (mpi_rank == 0)
+		{
+			std::cout << "Metadata_out[]: ";
+			for (int i = 0; i < mpi_size_old; i++)
+			{
+				std::cout << metadata_out[i] << " ";
+			}
+			std::cout << " " << std::endl;
+		}
+*/
+
+	    openfpm::vector<size_t> metadata_accum;
+	    metadata_accum.resize(mpi_size_old);
+
+	    metadata_accum.get(0) = 0;
+	    for (int i = 1 ; i < mpi_size_old ; i++)
+	    	metadata_accum.get(i) = metadata_accum.get(i-1) + metadata_out[i-1];
+
+	    //Open dataset
+	    hid_t dataset_2 = H5Dopen (file, "grid_dist", H5P_DEFAULT);
+
+	    //Create property list for collective dataset read
+	  	plist_id = H5Pcreate(H5P_DATASET_XFER);
+	  	H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE);
+
+	  	/////////////////////////////////////
+
+	  	openfpm::vector<size_t> n_block;
+	  	n_block.resize(v_cl.getProcessingUnits());
+
+
+	  	for(size_t i = 0 ; i < n_block.size() ; i++)
+	  		n_block.get(i) = mpi_size_old / v_cl.getProcessingUnits();
+
+	  	size_t rest_block = mpi_size_old % v_cl.getProcessingUnits();
+
+	  //	std::cout << "MPI size old: " << mpi_size_old << std::endl;
+	  	//std::cout << "MPI size: " << v_cl.getProcessingUnits() << std::endl;
+
+
+	  //	std::cout << "Rest block: " << rest_block << std::endl;
+
+	  	size_t max_block;
+
+	  	if (rest_block != 0)
+	  		max_block = n_block.get(0) + 1;
+	  	else
+	  		max_block = n_block.get(0);
+
+	  	//for(size_t i = 0 ; i < n_block.size() ; i++)
+	  	for(size_t i = 0 ; i < rest_block ; i++)
+	  		n_block.get(i) += 1;
+
+
+	  	//for(size_t i = 0 ; i < n_block.size() ; i++)
+	  		//std::cout << "n_block.get(i): " << n_block.get(i) << std::endl;
+
+	  	size_t start_block = 0;
+	  	size_t stop_block = 0;
+
+
+	  	if (v_cl.getProcessUnitID() != 0)
+	  	{
+			for(size_t i = 0 ; i < v_cl.getProcessUnitID() ; i++)
+				start_block += n_block.get(i);
+	  	}
+
+	  	stop_block = start_block + n_block.get(v_cl.getProcessUnitID());
+
+//	  	std::cout << "ID: " << v_cl.getProcessUnitID() << "; Start block: " << start_block << "; " << "Stop block: " << stop_block << std::endl;
+
+	  	if (mpi_rank >= mpi_size_old)
+	  		load_block(start_block,mpi_size_old,metadata_out,metadata_accum,plist_id,dataset_2);
+	  	else
+	  	{
+	  		size_t n_bl = 0;
+	  		size_t lb = start_block;
+			for ( ; lb < stop_block ; lb++, n_bl++)
+				load_block(lb,mpi_size_old,metadata_out,metadata_accum,plist_id,dataset_2);
+
+			if (n_bl < max_block)
+				load_block(-1,mpi_size_old,metadata_out,metadata_accum,plist_id,dataset_2);
+	  	}
+
+	  	////////////////////////////////////
+
+		//std::cout << "LOAD: sum: " << sum << std::endl;
+
+	    // Close the dataset.
+	    H5Dclose(dataset);
+	    H5Dclose(dataset_2);
+	    // Close the file.
+	    H5Fclose(file);
+	    H5Pclose(plist_id);
+
+		// Map the distributed grid
+		map();
+/*
+		for (size_t i = 0; i < loc_grid.size(); i++)
+		{
+			std::cout << "loc_grids loaded: (" << loc_grid.get(i).getGrid().getBox().getLow(0) << "; " << loc_grid.get(i).getGrid().getBox().getLow(1) << "); (" << loc_grid.get(i).getGrid().getBox().getHigh(0) << "; " << loc_grid.get(i).getGrid().getBox().getHigh(1) << ")" << std::endl;
+		}*/
+	}
+
 	//! Define friend classes
 	//\cond
 	friend grid_dist_id<dim,St,T,typename Decomposition::extended_type,Memory,device_grid>;
diff --git a/src/Grid/grid_dist_id_HDF5_chckpnt_restart_test.hpp b/src/Grid/grid_dist_id_HDF5_chckpnt_restart_test.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..cfb08eee597377cd3b7f9fd82ed8f9ac7f17b3ea
--- /dev/null
+++ b/src/Grid/grid_dist_id_HDF5_chckpnt_restart_test.hpp
@@ -0,0 +1,174 @@
+/*
+ * grid_dist_id_HDF5_chckpnt_restart_test.hpp
+ *
+ *  Created on: Nov 9, 2016
+ *      Author: Yaroslav Zaluzhnyi
+ */
+
+#ifndef SRC_GRID_GRID_DIST_ID_HDF5_CHCKPNT_RESTART_TEST_HPP_
+#define SRC_GRID_GRID_DIST_ID_HDF5_CHCKPNT_RESTART_TEST_HPP_
+
+#include "Grid/grid_dist_id.hpp"
+
+BOOST_AUTO_TEST_SUITE( gd_hdf5_chckpnt_rstrt_test )
+
+BOOST_AUTO_TEST_CASE( grid_dist_id_hdf5_save_test )
+{
+
+	// Input data
+	size_t k = 2400;
+
+	float ghost_part = 0.0;
+
+	// Domain
+	Box<2,float> domain({0.0,0.0},{1.0,1.0});
+
+	Vcluster & v_cl = create_vcluster();
+
+	// Skip this test on big scale
+	if (v_cl.getProcessingUnits() >= 32)
+		return;
+
+	if (v_cl.getProcessUnitID() == 0)
+			std::cout << "Saving Distributed 2D Grid..." << std::endl;
+
+	// grid size
+	size_t sz[2];
+	sz[0] = k;
+	sz[1] = k;
+
+	// Ghost
+	Ghost<2,float> g(ghost_part);
+
+	// Distributed grid with id decomposition
+	grid_dist_id<2, float, scalar<float>, CartDecomposition<2,float>> g_dist(sz,domain,g);
+
+	// get the decomposition
+	auto & dec = g_dist.getDecomposition();
+
+	// check the consistency of the decomposition
+	bool val = dec.check_consistency();
+	BOOST_REQUIRE_EQUAL(val,true);
+
+	size_t count = 0;
+
+	auto it = g_dist.getDomainIterator();
+
+	while (it.isNext())
+	{
+		//key
+		auto key = it.get();
+
+		auto keyg = g_dist.getGKey(key);
+
+		g_dist.template get<0>(key) = keyg.get(0);
+
+		++it;
+		count++;
+	}
+
+	std::cout << "Count: " << count << std::endl;
+
+	openfpm::vector<size_t> count_total;
+	v_cl.allGather(count,count_total);
+	v_cl.execute();
+
+	size_t sum = 0;
+
+	for (size_t i = 0; i < count_total.size(); i++)
+		sum += count_total.get(i);
+
+	std::cout << "Sum: " << sum << std::endl;
+
+	timer t;
+	t.start();
+	// Save the grid
+    g_dist.save("grid_dist_id.h5");
+	t.stop();
+
+	std::cout << "Saving time: " << t.getwct() << std::endl;
+}
+
+BOOST_AUTO_TEST_CASE( grid_dist_id_hdf5_load_test )
+{
+
+	// Input data
+	size_t k = 2400;
+
+	float ghost_part = 0.0;
+
+	// Domain
+	Box<2,float> domain({0.0,0.0},{1.0,1.0});
+
+	Vcluster & v_cl = create_vcluster();
+
+	// Skip this test on big scale
+	if (v_cl.getProcessingUnits() >= 32)
+		return;
+
+	if (v_cl.getProcessUnitID() == 0)
+		std::cout << "Loading Distributed 2D Grid..." << std::endl;
+
+	// grid size
+	size_t sz[2];
+	sz[0] = k;
+	sz[1] = k;
+
+	// Ghost
+	Ghost<2,float> g(ghost_part);
+
+	// Distributed grid with id decomposition
+	grid_dist_id<2, float, scalar<float>, CartDecomposition<2,float>> g_dist(sz,domain,g);
+
+	g_dist.getDecomposition().write("Before_load_grid_decomposition");
+	g_dist.write("Before_Loaded_grid");
+
+	timer t;
+	t.start();
+	// Save the grid
+	g_dist.load("grid_dist_id.h5");
+	t.stop();
+
+	g_dist.write("Loaded_grid");
+	g_dist.getDecomposition().write("Loaded_grid_decomposition");
+
+	std::cout << "Loading time: " << t.getwct() << std::endl;
+
+	auto it = g_dist.getDomainIterator();
+
+	size_t count = 0;
+
+	while (it.isNext())
+	{
+		//key
+		auto key = it.get();
+
+		//BOOST_CHECK_CLOSE(g_dist.template get<0>(key),1,0.0001);
+		//std::cout << "Element: " << g_dist.template get<0>(key) << std::endl;
+
+		auto keyg = g_dist.getGKey(key);
+
+		BOOST_REQUIRE_EQUAL(g_dist.template get<0>(key), keyg.get(0));
+
+		++it;
+		count++;
+	}
+
+	std::cout << "COOOOOOUNT: " << count << std::endl;
+
+	openfpm::vector<size_t> count_total;
+	v_cl.allGather(count,count_total);
+	v_cl.execute();
+
+	size_t sum = 0;
+
+	for (size_t i = 0; i < count_total.size(); i++)
+		sum += count_total.get(i);
+
+	BOOST_REQUIRE_EQUAL(sum, (size_t)k*k);
+}
+
+
+BOOST_AUTO_TEST_SUITE_END()
+
+#endif /* SRC_GRID_GRID_DIST_ID_HDF5_CHCKPNT_RESTART_TEST_HPP_ */
diff --git a/src/Grid/grid_dist_id_comm.hpp b/src/Grid/grid_dist_id_comm.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..d4309776938fc06faaa406ae46dd8a6e148b22b4
--- /dev/null
+++ b/src/Grid/grid_dist_id_comm.hpp
@@ -0,0 +1,476 @@
+/*
+ * grid_dist_id_comm.hpp
+ *
+ *  Created on: Nov 13, 2016
+ *      Author: yaroslav
+ */
+
+#ifndef SRC_GRID_GRID_DIST_ID_COMM_HPP_
+#define SRC_GRID_GRID_DIST_ID_COMM_HPP_
+
+#include "Vector/vector_dist_ofb.hpp"
+#include "data_type/scalar.hpp"
+
+
+/*! \brief This class is an helper for the communication of grid_dist_id
+ *
+ * \tparam dim Dimensionality of the grid
+ * \tparam St Type of space where the grid is living
+ * \tparam T object the grid is storing
+ * \tparam Decomposition Class that decompose the grid for example CartDecomposition
+ * \tparam Memory Is the allocator
+ * \tparam device_grid of base structure is going to store the data
+ *
+ * \see grid_dist_id
+ *
+ */
+
+template<unsigned int dim, typename St, typename T, typename Decomposition = CartDecomposition<dim,St>,typename Memory=HeapMemory , typename device_grid=grid_cpu<dim,T> >
+class grid_dist_id_comm
+{
+	//! VCluster
+	Vcluster & v_cl;
+
+	//! Maps the processor id with the communication request into map procedure
+	openfpm::vector<size_t> p_map_req;
+
+	//! Stores the list of processors that communicate with us (local processor)
+	openfpm::vector<size_t> prc_recv_map;
+
+	//! Stores the size of the elements added for each processor that communicate with us (local processor)
+	openfpm::vector<size_t> recv_sz_map;
+
+	//! For each near processor, outgoing intersection grid
+	//! \warning m_oGrid is assumed to be an ordered list
+	//! first id is grid
+	//! second id is the processor id
+	openfpm::vector<openfpm::vector<aggregate<device_grid,SpaceBox<dim,long int>>>> m_oGrid;
+
+public:
+
+	/*! \brief Reconstruct the local grids
+	 *
+	 * \param m_oGrid_recv Vector of labeled grids to combine into a local grid
+	 */
+	inline void grids_reconstruct(openfpm::vector<openfpm::vector<aggregate<device_grid,SpaceBox<dim,long int>>>> & m_oGrid_recv, openfpm::vector<device_grid> & loc_grid, openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext, CellDecomposer_sm<dim,St,shift<dim,St>> & cd_sm)
+	{
+		size_t count2 = 0;
+		for (size_t a = 0; a < m_oGrid_recv.size(); a++)
+		{
+			for (size_t k = 0; k < m_oGrid_recv.get(a).size(); k++)
+			{
+				device_grid g = m_oGrid_recv.get(a).template get<0>(k);
+
+				size_t count = 0;
+
+
+				auto it = g.getIterator();
+
+				while (it.isNext())
+				{
+					//auto key = it.get();
+
+					//if (g.template get<0>(key) != 1)
+						//std::cout << "WRONG???????" << std::endl;
+
+					++it;
+					count++;
+				}
+
+				SpaceBox<dim,long int> b = m_oGrid_recv.get(a).template get<1>(k);
+
+				//device_grid gr_send(sz);
+				//gr_send.setMemory();
+
+				//std::cout << "B: (" << b.getLow(0) << "; " << b.getLow(1) << "); (" << b.getHigh(0) << "; " << b.getHigh(1) << "); " << "G: (" << g.getGrid().getBox().getHigh(0) << "; " << g.getGrid().getBox().getHigh(1) << ")" << std::endl;
+
+				// Set the dimensions of the local grid
+				//g.resize(l_res);
+
+				Point<dim,St> p;
+				for (size_t n = 0; n < dim; n++)
+					p.get(n) = g.getGrid().getBox().getHigh(n);
+
+				//std::cout << "G after resize: (" << g.getGrid().getBox().getLow(0) << "; " << g.getGrid().getBox().getLow(1) << "); (" << g.getGrid().getBox().getHigh(0) << "; " << g.getGrid().getBox().getHigh(1) << ")" << std::endl;
+
+				Point<dim,St> point;
+				for (size_t n = 0; n < dim; n++)
+					point.get(n) = (b.getHigh(n) + b.getLow(n))/2;
+
+				for (size_t j = 0; j < gdb_ext.size(); j++)
+				{
+					// Local sub-domain
+					SpaceBox<dim,long int> sub = gdb_ext.get(j).Dbox;
+					sub += gdb_ext.get(j).origin;
+
+					if (sub.isInside(point) == true)
+					{
+						grid_key_dx<dim> start = b.getKP1() - grid_key_dx<dim>(gdb_ext.get(j).origin.asArray());
+						grid_key_dx<dim> stop = b.getKP2() - grid_key_dx<dim>(gdb_ext.get(j).origin.asArray());
+
+						std::string start2 = start.to_string();
+						std::string stop2 = stop.to_string();
+
+						auto it = loc_grid.get(j).getSubIterator(start,stop);
+
+						// Copy selected elements into a local grid
+						while (it.isNext())
+						{
+							auto key = it.get();
+							std::string str = key.to_string();
+							grid_key_dx<dim> key2 = key - start;
+
+							//std::cout << "Key: " << str << std::endl;
+							loc_grid.get(j).get_o(key) = g.get_o(key2);
+							count2++;
+
+							++it;
+						}
+					}
+				}
+			}
+		}
+		//std::cout << "Count after: " << count2 << std::endl;
+	}
+
+
+	/*! \brief Reconstruct the local grids
+	 *
+	 * \param m_oGrid_recv Vector of labeled grids to combine into a local grid
+	 */
+	inline void grids_reconstruct(openfpm::vector<openfpm::vector<aggregate<device_grid,SpaceBox<dim,long int>>>> & m_oGrid_recv, openfpm::vector<device_grid> & loc_grid, openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext, CellDecomposer_sm<dim,St,shift<dim,St>> & cd_sm, openfpm::vector<size_t> & prc_r)
+	{
+
+	}
+
+	/*! \brief Label intersection grids for mappings
+	 *
+	 * \param prc_sz For each processor the number of grids to send to
+	 */
+	inline void labelIntersectionGridsProcessor(Decomposition & dec, CellDecomposer_sm<dim,St,shift<dim,St>> & cd_sm, openfpm::vector<device_grid> & loc_grid_old, openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext, openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext_old, openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext_global, openfpm::vector<openfpm::vector<aggregate<device_grid,SpaceBox<dim,long int>>>> & lbl_b, openfpm::vector<size_t> & prc_sz)
+	{
+		// resize the label buffer
+		lbl_b.resize(v_cl.getProcessingUnits());
+
+		size_t count2 = 0;
+
+		// Label all the intersection grids with the processor id where they should go
+
+		for (size_t i = 0; i < gdb_ext_old.size(); i++)
+		{
+			// Local old sub-domain in global coordinates
+			SpaceBox<dim,long int> sub_dom = gdb_ext_old.get(i).Dbox;
+			sub_dom += gdb_ext_old.get(i).origin;
+
+			for (size_t j = 0; j < gdb_ext_global.size(); j++)
+			{
+				size_t p_id = 0;
+
+				// Intersection box
+				SpaceBox<dim,long int> inte_box;
+
+				// Global new sub-domain in global coordinates
+				SpaceBox<dim,long int> sub_dom_new = gdb_ext_global.get(j).Dbox;
+				sub_dom_new += gdb_ext_global.get(j).origin;
+
+				bool intersect = false;
+
+				if (sub_dom.isValid() == true && sub_dom_new.isValid() == true)
+					intersect = sub_dom.Intersect(sub_dom_new, inte_box);
+
+				if (intersect == true)
+				{
+					//// DEBUG/////
+					count2++;
+					//////////////
+
+					//std::cout << "Inte_box: (" << inte_box.getLow(0) << "; " << inte_box.getLow(1) << "); (" << inte_box.getHigh(0) << "; " << inte_box.getHigh(1) << ")" << std::endl;
+
+					auto inte_box_cont = cd_sm.convertCellUnitsIntoDomainSpace(inte_box);
+
+					// Get processor ID that store intersection box
+					Point<dim,St> p;
+					for (size_t n = 0; n < dim; n++)
+						p.get(n) = (inte_box_cont.getHigh(n) + inte_box_cont.getLow(n))/2;
+
+					//std::cout << "Point: (" << p.get(0) << "; " << p.get(1) << ")" << std::endl;
+
+					p_id = dec.processorID(p);
+					prc_sz.get(p_id)++;
+
+					//std::cout << "P_id: " << p_id << std::endl;
+
+					// Transform coordinates to local
+					auto inte_box_local = inte_box;
+
+					inte_box_local -= gdb_ext_old.get(i).origin;
+
+					//std::cout << "gdb_ext_old.get(i): (" << sub_dom.getLow(0) << "; " << sub_dom.getLow(1) << "); (" << sub_dom.getHigh(0) << "; " << sub_dom.getHigh(1) << ")" << std::endl;
+
+					//std::cout << "gdb_ext_global.get(j): (" << sub_dom_new.getLow(0) << "; " << sub_dom_new.getLow(1) << "); (" << sub_dom_new.getHigh(0) << "; " << sub_dom_new.getHigh(1) << ")" << std::endl;
+
+					//std::cout << "Inte_box_local: (" << inte_box_local.getLow(0) << "; " << inte_box_local.getLow(1) << "); (" << inte_box_local.getHigh(0) << "; " << inte_box_local.getHigh(1) << ")" << std::endl;
+
+					// Grid corresponding for gdb_ext_old.get(i) box
+					device_grid & gr = loc_grid_old.get(i);
+
+					//std::cout << "loc_grid_old.get(i): (" << gr.getGrid().getBox().getLow(0) << "; " << gr.getGrid().getBox().getLow(1) << "); (" << gr.getGrid().getBox().getHigh(0) << "; " << gr.getGrid().getBox().getHigh(1) << ")" << std::endl;
+
+					//for (size_t l = 0; l < dim; l++)
+						//std::cout << "loc_grid_old.get(i).size on " << l << " dimension: " << gr.getGrid().size(l) << std::endl;
+					// Size of the grid to send
+					size_t sz[dim];
+					for (size_t l = 0; l < dim; l++)
+					{
+						sz[l] = inte_box_local.getHigh(l) - inte_box_local.getLow(l) + 1;
+						//std::cout << "GR_send size on " << l << " dimension: " << sz[l] << std::endl;
+					}
+
+					// Grid to send
+					device_grid gr_send(sz);
+					gr_send.setMemory();
+
+					// Sub iterator across intersection box inside local grid
+					grid_key_dx<dim> start = inte_box_local.getKP1();
+					grid_key_dx<dim> stop = inte_box_local.getKP2();
+
+					Point<dim,St> p1;
+					for (size_t n = 0; n < dim; n++)
+						p1.get(n) = gr_send.getGrid().getBox().getLow(n);
+
+					//std::cout << "Grid send P1: (" << p1.get(0) << "; " << p1.get(1) << ")" << std::endl;
+
+					Point<dim,St> p2;
+					for (size_t n = 0; n < dim; n++)
+						p2.get(n) = gr_send.getGrid().getBox().getHigh(n);
+
+					//std::cout << "Grid send P2: (" << p2.get(0) << "; " << p2.get(1) << ")" << std::endl;
+/*
+					Point<dim,St> p3;
+					for (size_t n = 0; n < dim; n++)
+						p3.get(n) = gr.getGrid().getBox().getLow(n);
+
+					std::cout << "Grid local P1: (" << p3.get(0) << "; " << p3.get(1) << ")" << std::endl;
+
+					Point<dim,St> p4;
+					for (size_t n = 0; n < dim; n++)
+						p4.get(n) = gr.getGrid().getBox().getHigh(n);
+
+					std::cout << "Grid local P2: (" << p4.get(0) << "; " << p4.get(1) << ")" << std::endl;
+
+*/
+					std::string start2 = start.to_string();
+					std::string stop2 = stop.to_string();
+
+					//std::cout << "Start: " << start2 << "; Stop: " << stop2 << std::endl;
+
+					auto it = gr.getSubIterator(start,stop);
+
+					// Copy selected elements into a new sub-grid
+					while (it.isNext())
+					{
+						auto key = it.get();
+						grid_key_dx<dim> key2 = key - start;
+						std::string str = key.to_string();
+
+						//std::cout << "Key: " << str << std::endl;
+						gr_send.get_o(key2) = gr.get_o(key);
+/*
+						////////// DEBUG ///////////////
+						if (gr.template get<0>(key) == 1)
+						{
+							count++;
+						}
+						else if (gr_send.template get<0>(key2) != 1)
+						{
+							std::cout << "AHHHHHHHHHH????????" << std::endl;
+						}
+*/
+						////////////////
+
+						//gr_send.set(key,gr,key);
+
+						++it;
+					}
+
+					aggregate<device_grid,SpaceBox<dim,long int>> aggr;
+
+					aggr.template get<0>() = gr_send;
+					aggr.template get<1>() = inte_box;
+
+					// Add to the labeling vector
+					lbl_b.get(p_id).add(aggr);
+				}
+			}
+		}
+		//std::cout << "Count for points: " << count << std::endl;
+		//std::cout << "Count for inte_boxes: " << count2 << std::endl;
+	}
+
+	/*! \brief Moves all the grids that does not belong to the local processor to the respective processor
+	 *
+	 * \tparam out of bound policy it specify what to do when the particles are detected out of bound
+	 *
+	 * In general this function is called after moving the particles to move the
+	 * elements out the local processor. Or just after initialization if each processor
+	 * contain non local particles
+	 *
+	 * \param v_pos vector of particle positions
+	 * \param v_prp vector of particle properties
+	 * \param g_m ghost marker
+	 *
+	 */
+	void map_(Decomposition & dec, CellDecomposer_sm<dim,St,shift<dim,St>> & cd_sm, openfpm::vector<device_grid> & loc_grid, openfpm::vector<device_grid> & loc_grid_old, openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext, openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext_old, openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext_global)
+	{
+		// Processor communication size
+		openfpm::vector<size_t> prc_sz(v_cl.getProcessingUnits());
+
+		// Contains the processor id of each box (basically where they have to go)
+		labelIntersectionGridsProcessor(dec,cd_sm,loc_grid_old,gdb_ext,gdb_ext_old,gdb_ext_global,m_oGrid,prc_sz);
+/*
+		for (size_t i = 0; i < m_oGrid.size(); i++)
+		{
+			for (size_t k = 0; k < m_oGrid.get(i).size(); k++)
+			{
+				device_grid g = m_oGrid.get(i).template get<0>(k);
+
+				auto it = g.getIterator();
+
+				while (it.isNext())
+				{
+					auto key = it.get();
+
+					if (g.template get<0>(key) != 1)
+						std::cout << "WROOOOOOONG" << std::endl;
+
+					++it;
+				}
+			}
+		}
+*/
+
+		// Calculate the sending buffer size for each processor, put this information in
+		// a contiguous buffer
+		p_map_req.resize(v_cl.getProcessingUnits());
+
+		// Vector of number of sending grids for each involved processor
+		openfpm::vector<size_t> prc_sz_r;
+		// Vector of ranks of involved processors
+		openfpm::vector<size_t> prc_r;
+
+		for (size_t i = 0; i < v_cl.getProcessingUnits(); i++)
+		{
+			if (m_oGrid.get(i).size() != 0)
+			{
+				p_map_req.get(i) = prc_r.size();
+				prc_r.add(i);
+				prc_sz_r.add(m_oGrid.get(i).size());
+			}
+		}
+/*
+		for (size_t i = 0; i < m_oGrid.size(); i++)
+		{
+			if(m_oGrid.get(i).size() == 0)
+				m_oGrid.remove(i);
+		}
+*/
+
+		decltype(m_oGrid) m_oGrid_new;
+		for (size_t i = 0; i < v_cl.getProcessingUnits(); i++)
+		{
+			if (m_oGrid.get(i).size() != 0)
+				m_oGrid_new.add(m_oGrid.get(i));
+		}
+
+		// Vector for receiving of intersection grids
+		openfpm::vector<openfpm::vector<aggregate<device_grid,SpaceBox<dim,long int>>>> m_oGrid_recv;
+
+		//std::cout << "vcl.getProcessUnitID(): " << v_cl.getProcessUnitID() << "; prc_r.size(): " << prc_r.size() << std::endl;
+
+		//std::cout << "vcl.getProcessUnitID(): " << v_cl.getProcessUnitID() << "; m_oGrid_new.size(): " << m_oGrid_new.size() << std::endl;
+/*
+		for (size_t i = 0; i < m_oGrid.size(); i++)
+		{
+			std::cout << "Processor ID:" << v_cl.getProcessUnitID() << "; I: " << i << ", Size: " << m_oGrid.get(i).size() << std::endl;
+		}
+*/
+/*
+		for (size_t i = 0; i < m_oGrid_new.size(); i++)
+		{
+			for (size_t k = 0; k < m_oGrid_new.get(i).size(); k++)
+			{
+				device_grid g = m_oGrid_new.get(i).template get<0>(k);
+
+				auto it = g.getIterator();
+
+				while (it.isNext())
+				{
+					auto key = it.get();
+
+					if (g.template get<0>(key) != 1)
+						std::cout << "WRONG BEFORE SENDRCV" << std::endl;
+
+					++it;
+				}
+			}
+		}
+*/
+
+		// Send and recieve intersection grids
+		v_cl.SSendRecv(m_oGrid_new,m_oGrid_recv,prc_r,prc_recv_map,recv_sz_map);
+/*
+		for (size_t i = 0; i < m_oGrid_recv.size(); i++)
+		{
+			for (size_t k = 0; k < m_oGrid_recv.get(i).size(); k++)
+			{
+				device_grid g = m_oGrid_recv.get(i).template get<0>(k);
+
+				auto it = g.getIterator();
+
+				while (it.isNext())
+				{
+					auto key = it.get();
+
+					if (g.template get<0>(key) != 1)
+						std::cout << "WRONG AFTER SENDRCV" << std::endl;
+
+					++it;
+				}
+			}
+		}
+*/
+/*
+		std::cout << "vcl.getProcessUnitID(): " << v_cl.getProcessUnitID() << "; m_oGrid_recv.size(): " << m_oGrid_recv.size() << std::endl;
+
+		for (size_t i = 0; i < m_oGrid_recv.size(); i++)
+		{
+			std::cout << "Processor ID:" << v_cl.getProcessUnitID() << "; I_recv: " << i << ", Size: " << m_oGrid_recv.get(i).size() << std::endl;
+		}
+
+		for (size_t i = 0; i < prc_r.size(); i++)
+			std::cout << "vcl.getProcessUnitID(): " << v_cl.getProcessUnitID() << "; prc_r: " << prc_r.get(i) << std::endl;
+
+		for (size_t i = 0; i < prc_recv_map.size(); i++)
+			std::cout << "vcl.getProcessUnitID(): " << v_cl.getProcessUnitID() << "; prc_recv_map: " << prc_recv_map.get(i) << std::endl;
+
+		for (size_t i = 0; i < recv_sz_map.size(); i++)
+			std::cout << "vcl.getProcessUnitID(): " << v_cl.getProcessUnitID() << "; recv_sz_map: " << recv_sz_map.get(i) << std::endl;
+*/
+		// Reconstruct the new local grids
+		grids_reconstruct(m_oGrid_recv,loc_grid,gdb_ext,cd_sm);
+	}
+
+	/*! \brief Constructor
+	 *
+	 * \param dec Domain decompositon
+	 *
+	 */
+	grid_dist_id_comm()
+	:v_cl(create_vcluster())
+	{
+
+	}
+};
+
+
+#endif /* SRC_GRID_GRID_DIST_ID_COMM_HPP_ */
diff --git a/src/Vector/performance/vector_dist_performance_util.hpp b/src/Vector/performance/vector_dist_performance_util.hpp
index 7cf20686b7012fd88746fe85d2e0173bf3087247..216af97e812a8d20b5bc62d8c6708924fc23976b 100644
--- a/src/Vector/performance/vector_dist_performance_util.hpp
+++ b/src/Vector/performance/vector_dist_performance_util.hpp
@@ -15,6 +15,73 @@
 #define N_VERLET_TEST 3
 #define N_STAT_TEST 30
 
+static inline void warning_set(int & warning_level, double mean, double mean_ref, double sigma)
+{
+	int warning_level_candidate;
+
+	if (mean - mean_ref < -2.0*sigma )
+		warning_level_candidate = -1;
+	else if (mean - mean_ref < 2.0*sigma)
+		warning_level_candidate = 0;
+	else if (mean - mean_ref < 3.0*sigma)
+		warning_level_candidate = 1;
+	else
+		warning_level_candidate = 2;
+
+	if (warning_level_candidate > warning_level)
+		warning_level = warning_level_candidate;
+}
+
+static inline void addchartarea(std::string & chart_area, int lvl)
+{
+	std::string color;
+
+	if (lvl == -1)
+	{
+		chart_area = std::string(",chartArea: {\
+		    backgroundColor: {\
+		        stroke: '#00FF00',\
+		        strokeWidth: 6\
+		    }\
+		}");
+	}
+	else if (lvl == 0)
+	{
+		// NOTHING TO DO
+	}
+	else if (lvl == 1)
+	{
+		chart_area = std::string(",chartArea: {\
+		    backgroundColor: {\
+		        stroke: '#FFFF00',\
+		        strokeWidth: 6\
+		    }\
+		}");
+	}
+	else if (lvl == 2)
+	{
+		chart_area = std::string(",chartArea: {\
+		    backgroundColor: {\
+		        stroke: '#FF0000',\
+		        strokeWidth: 6\
+		    }\
+		}");
+	}
+
+}
+
+void addUpdtateTime(GoogleChart & cg)
+{
+    time_t t = time(0);   // get time now
+    struct tm * now = localtime( & t );
+
+    std::stringstream str;
+
+    str << "<h3>Updated: " << now->tm_mday << "/" << now->tm_mon + 1 << "/" << now->tm_year+1900 << "     " << now->tm_hour << ":" << now->tm_min << ":" << now->tm_sec << std::endl;
+
+	cg.addHTML(str.str());
+}
+
 /*! \brief Standard deviation
  *
  * \param measures set of measures
diff --git a/src/Vector/vector_dist.hpp b/src/Vector/vector_dist.hpp
index 057070eb816d6c2aa554d5356ff07d5874022925..87c6eaf87cb5c24dcda0ca1f16a397c9211c6b39 100644
--- a/src/Vector/vector_dist.hpp
+++ b/src/Vector/vector_dist.hpp
@@ -8,6 +8,8 @@
 #ifndef VECTOR_HPP_
 #define VECTOR_HPP_
 
+#include "../../openfpm_data/src/NN/CellList/CellListFast_gen.hpp"
+#include "../../openfpm_data/src/NN/CellList/CellListFast_gen.hpp"
 #include "HDF5_XdmfWriter/HDF5_XdmfWriter.hpp"
 #include "VCluster/VCluster.hpp"
 #include "Space/Shape/Point.hpp"
@@ -16,7 +18,6 @@
 #include "Vector/vector_dist_key.hpp"
 #include "memory/PtrMemory.hpp"
 #include "NN/CellList/CellList.hpp"
-#include "NN/CellList/CellListFast_hilb.hpp"
 #include "util/common.hpp"
 #include "util/object_util.hpp"
 #include "memory/ExtPreAlloc.hpp"
@@ -31,6 +32,8 @@
 #include "NN/VerletList/VerletList.hpp"
 #include "vector_dist_comm.hpp"
 #include "DLB/LB_Model.hpp"
+#include "NN/CellList/ParticleIt_Cells.hpp"
+#include "NN/CellList/ProcKeys.hpp"
 
 #define VECTOR_DIST_ERROR_OBJECT std::runtime_error("Runtime vector distributed error");
 
@@ -78,8 +81,8 @@ struct gcl
 };
 
 //! General function t get a cell-list
-template<unsigned int dim, typename St, typename Vector>
-struct gcl<dim,St,CellList_hilb<dim, St, FAST, shift<dim, St> >,Vector>
+template<unsigned int dim, typename St, typename Vector, typename Mem_type>
+struct gcl<dim,St,CellList_gen<dim, St, Process_keys_hilb,Mem_type, shift<dim, St> >,Vector>
 {
 	/*! \brief Get the Cell list based on the type
 	 *
@@ -90,7 +93,7 @@ struct gcl<dim,St,CellList_hilb<dim, St, FAST, shift<dim, St> >,Vector>
 	 * \return the constructed cell-list
 	 *
 	 */
-	static inline CellList_hilb<dim, St, FAST, shift<dim, St> > get(Vector & vd, const St & r_cut, const Ghost<dim,St> & g)
+	static inline CellList_gen<dim, St, Process_keys_hilb, Mem_type, shift<dim, St> > get(Vector & vd, const St & r_cut, const Ghost<dim,St> & g)
 	{
 		return vd.getCellList_hilb(r_cut,g);
 	}
@@ -672,7 +675,7 @@ public:
 	 * \return the Cell list
 	 *
 	 */
-	template<typename CellL = CellList<dim, St, FAST, shift<dim, St> > > CellL getCellListSym(St r_cut)
+	template<typename CellL = CellList<dim, St, Mem_fast<dim,St>, shift<dim, St> > > CellL getCellListSym(St r_cut)
 	{
 #ifdef SE_CLASS1
 		if ((opt & BIND_DEC_TO_GHOST))
@@ -702,33 +705,6 @@ public:
 		return cell_list;
 	}
 
-	/*! \brief return the neighborhood cells of a cells to do symmetric interactions
-	 *
-	 * \warning Used in in combination of getNNIteratorSym in a Cell-list
-	 *
-	 * \return the neighborhood cells of a cell
-	 *
-	 *
-	 */
-/*	const openfpm::vector<subsub_lin<dim>> & getNNCells(size_t cell) const
-	{
-		return getDecomposition().getDomainCellNNSym();
-	}*/
-
-	/*! \brief Construct a cell list symmetric based on a cut of radius
-	 *
-	 * \tparam CellL CellList type to construct
-	 *
-	 * \param r_cut interation radius, or size of each cell
-	 *
-	 * \return the Cell list
-	 *
-	 */
-	template<typename CellL = CellList<dim, St, FAST, shift<dim, St> > > CellL getCellListSymNoBind(St r_cut)
-	{
-		return getCellList(r_cut);
-	}
-
 
 	/*! \brief Construct a cell list starting from the stored particles
 	 *
@@ -739,7 +715,7 @@ public:
 	 * \return the Cell list
 	 *
 	 */
-	template<typename CellL = CellList<dim, St, FAST, shift<dim, St> > > CellL getCellList(St r_cut)
+	template<typename CellL = CellList_gen<dim, St, Process_keys_lin, Mem_fast<dim,St>, shift<dim, St> > > CellL getCellList(St r_cut)
 	{
 #ifdef SE_CLASS3
 		se3.getNN();
@@ -764,7 +740,7 @@ public:
 	 * \return the Cell list
 	 *
 	 */
-	template<typename CellL = CellList_hilb<dim, St, FAST, shift<dim, St> > > CellL getCellList_hilb(St r_cut)
+	template<typename CellL = CellList_gen<dim, St, Process_keys_hilb, Mem_fast<dim,St>, shift<dim, St> > > CellL getCellList_hilb(St r_cut)
 	{
 #ifdef SE_CLASS3
 		se3.getNN();
@@ -787,7 +763,7 @@ public:
 	 * \param cell_list Cell list to update
 	 *
 	 */
-	template<typename CellL = CellList<dim, St, FAST, shift<dim, St> > > void updateCellList(CellL & cell_list)
+	template<typename CellL> void updateCellList(CellL & cell_list)
 	{
 #ifdef SE_CLASS3
 		se3.getNN();
@@ -824,7 +800,7 @@ public:
 	 * \param cell_list Cell list to update
 	 *
 	 */
-	template<typename CellL = CellList<dim, St, FAST, shift<dim, St> > > void updateCellListSym(CellL & cell_list)
+	template<typename CellL = CellList<dim, St, Mem_fast<dim,St>, shift<dim, St> > > void updateCellListSym(CellL & cell_list)
 	{
 #ifdef SE_CLASS3
 		se3.getNN();
@@ -869,7 +845,7 @@ public:
 	 * \return the CellList
 	 *
 	 */
-	template<typename CellL = CellList<dim, St, FAST, shift<dim, St> > > CellL getCellList(St r_cut, const Ghost<dim, St> & enlarge)
+	template<typename CellL = CellList_gen<dim, St, Process_keys_lin, Mem_fast<dim,St>, shift<dim, St> > > CellL getCellList(St r_cut, const Ghost<dim, St> & enlarge)
 	{
 #ifdef SE_CLASS3
 		se3.getNN();
@@ -886,7 +862,7 @@ public:
 		// Processor bounding box
 		cl_param_calculate(pbox, div, r_cut, enlarge);
 
-		cell_list.Initialize(pbox, div);
+		cell_list.Initialize(pbox, div, g_m);
 		cell_list.set_ndec(getDecomposition().get_ndec());
 
 		updateCellList(cell_list);
@@ -909,7 +885,7 @@ public:
 	 * \return The Cell-list
 	 *
 	 */
-	template<typename CellL = CellList_hilb<dim, St, FAST, shift<dim, St> > > CellL getCellList_hilb(St r_cut, const Ghost<dim, St> & enlarge)
+	template<typename CellL = CellList_gen<dim, St, Process_keys_hilb, Mem_fast<dim,St>, shift<dim, St> > > CellL getCellList_hilb(St r_cut, const Ghost<dim, St> & enlarge)
 	{
 #ifdef SE_CLASS3
 		se3.getNN();
@@ -1128,7 +1104,7 @@ public:
 	 * \param m an order of a hilbert curve
 	 *
 	 */
-	template<typename CellL=CellList<dim,St,FAST,shift<dim,St> > > void reorder (int32_t m)
+	template<typename CellL=CellList_gen<dim,St,Process_keys_lin,Mem_fast<dim,St>,shift<dim,St> > > void reorder (int32_t m)
 	{
 		reorder(m,getDecomposition().getGhost());
 	}
@@ -1146,7 +1122,7 @@ public:
 	 * \param enlarge In case of padding particles the cell list must be enlarged, like a ghost this parameter say how much must be enlarged
 	 *
 	 */
-	template<typename CellL=CellList<dim,St,FAST,shift<dim,St> > > void reorder(int32_t m, const Ghost<dim,St> & enlarge)
+	template<typename CellL=CellList_gen<dim,St,Process_keys_lin,Mem_fast<dim,St>,shift<dim,St> > > void reorder(int32_t m, const Ghost<dim,St> & enlarge)
 	{
 		// reset the ghost part
 		v_pos.resize(g_m);
@@ -1170,7 +1146,7 @@ public:
 			div[i] = 1 << m;
 		}
 
-		cell_list.Initialize(pbox,div);
+		cell_list.Initialize(pbox,div,g_m);
 
 		// for each particle add the particle to the cell list
 
@@ -1572,6 +1548,517 @@ public:
 		dec.getDistribution().setDistTol(md.distributionTol());
 	}
 
+	inline void save(const std::string & filename) const
+	{
+		//Pack_request vector
+		size_t req = 0;
+
+		//std::cout << "V_pos.size() before save: " << v_pos.size() << std::endl;
+
+		//Pack request
+		Packer<decltype(v_pos),HeapMemory>::packRequest(v_pos,req);
+		Packer<decltype(v_prp),HeapMemory>::packRequest(v_prp,req);
+
+		std::cout << "Req: " << req << std::endl;
+
+		// allocate the memory
+		HeapMemory pmem;
+		//pmem.allocate(req);
+		ExtPreAlloc<HeapMemory> & mem = *(new ExtPreAlloc<HeapMemory>(req,pmem));
+		mem.incRef();
+
+		//Packing
+
+		Pack_stat sts;
+
+		Packer<decltype(v_pos),HeapMemory>::pack(mem,v_pos,sts);
+		Packer<decltype(v_prp),HeapMemory>::pack(mem,v_prp,sts);
+
+	    /*****************************************************************
+	     * Create a new file with default creation and access properties.*
+	     * Then create a dataset and write data to it and close the file *
+	     * and dataset.                                                  *
+	     *****************************************************************/
+
+		int mpi_rank = v_cl.getProcessUnitID();
+		int mpi_size = v_cl.getProcessingUnits();
+
+		MPI_Comm comm = v_cl.getMPIComm();
+		MPI_Info info  = MPI_INFO_NULL;
+/*
+	    //Initialize MPI
+	    MPI_Comm_size(comm, &mpi_size);
+	    MPI_Comm_rank(comm, &mpi_rank);
+*/
+	    // Set up file access property list with parallel I/O access
+
+		hid_t plist_id = H5Pcreate(H5P_FILE_ACCESS);
+		H5Pset_fapl_mpio(plist_id, comm, info);
+
+		// Create a new file collectively and release property list identifier.
+		hid_t file = H5Fcreate (filename.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, plist_id);
+		H5Pclose(plist_id);
+
+		size_t sz = pmem.size();
+		//std::cout << "Pmem.size: " << pmem.size() << std::endl;
+		openfpm::vector<size_t> sz_others;
+		v_cl.allGather(sz,sz_others);
+		v_cl.execute();
+
+		size_t sum = 0;
+
+		for (size_t i = 0; i < sz_others.size(); i++)
+			sum += sz_others.get(i);
+
+		//Size for data space in file
+		hsize_t fdim[1] = {sum};
+
+		//Size for data space in file
+		hsize_t fdim2[1] = {(size_t)mpi_size};
+
+		//Create data space in file
+		hid_t file_dataspace_id = H5Screate_simple(1, fdim, NULL);
+
+		//Create data space in file
+		hid_t file_dataspace_id_2 = H5Screate_simple(1, fdim2, NULL);
+
+		//Size for data space in memory
+		hsize_t mdim[1] = {pmem.size()};
+
+		//Create data space in memory
+		hid_t mem_dataspace_id = H5Screate_simple(1, mdim, NULL);
+
+		if (mpi_rank == 0)
+			std::cout << "Total object size: " << sum << std::endl;
+
+		//Create data set in file
+		hid_t file_dataset = H5Dcreate (file, "vector_dist", H5T_NATIVE_CHAR, file_dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+
+		//Create data set 2 in file
+		hid_t file_dataset_2 = H5Dcreate (file, "metadata", H5T_NATIVE_INT, file_dataspace_id_2, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+
+	    //H5Pclose(plist_id);
+	    H5Sclose(file_dataspace_id);
+	    H5Sclose(file_dataspace_id_2);
+
+	    hsize_t block[1] = {pmem.size()};
+
+	    //hsize_t stride[1] = {1};
+
+		hsize_t count[1] = {1};
+
+	    hsize_t offset[1] = {0};
+
+	    for (int i = 0; i < mpi_rank; i++)
+	    {
+	    	if (mpi_rank == 0)
+				offset[0] = 0;
+	    	else
+	    		offset[0] += sz_others.get(i);
+	    }
+
+	    std::cout << "MPI rank: " << mpi_rank << ", MPI size: " << mpi_size << ", Offset: " << offset[0] << ", Block: " << block[0] << std::endl;
+
+	    int metadata[mpi_size];
+
+	    for (int i = 0; i < mpi_size; i++)
+	    	metadata[i] = sz_others.get(i);
+
+	    //Select hyperslab in the file.
+	    file_dataspace_id = H5Dget_space(file_dataset);
+	    H5Sselect_hyperslab(file_dataspace_id, H5S_SELECT_SET, offset, NULL, count, block);
+
+	    file_dataspace_id_2 = H5Dget_space(file_dataset_2);
+
+
+	    //Create property list for collective dataset write.
+	    plist_id = H5Pcreate(H5P_DATASET_XFER);
+	    H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE);
+
+		//Write a data set to a file
+		H5Dwrite(file_dataset, H5T_NATIVE_CHAR, mem_dataspace_id, file_dataspace_id, plist_id, (const char *)pmem.getPointer());
+
+		//Write a data set 2 to a file
+		H5Dwrite(file_dataset_2, H5T_NATIVE_INT, H5S_ALL, file_dataspace_id_2, plist_id, metadata);
+
+
+	    //Close/release resources.
+	    H5Dclose(file_dataset);
+	    H5Sclose(file_dataspace_id);
+	    H5Dclose(file_dataset_2);
+	    H5Sclose(file_dataspace_id_2);
+	    H5Sclose(mem_dataspace_id);
+	    H5Pclose(plist_id);
+	    H5Fclose(file);
+	}
+
+	void load_block(long int bid, hssize_t mpi_size_old, int * metadata_out, openfpm::vector<size_t> metadata_accum, hid_t plist_id, hid_t dataset_2)
+	{
+/*	  	if (mpi_size >= mpi_size_old)
+	  	{
+			if (mpi_rank >= mpi_size_old)
+				block[0] = 0;
+			else
+				block[0] = {(size_t)metadata_out[mpi_rank]};
+	  	}
+	  	else
+	  	{
+	  		int x = mpi_size_old/mpi_size;
+	  		int shift = mpi_rank*x;
+	  		for (int i = 0; i < x; i++)
+	  		{
+	  			//block0.get(mpi_rank).add(metadata_out[shift]);
+	  			block[0] += metadata_out[shift];
+	  			shift++;
+	  		}
+	  		int y = mpi_size_old%mpi_size;
+	  		if (mpi_rank < y)
+	  		{
+				block_add[0] += metadata_out[mpi_size*x+mpi_rank];
+				//block_add0.get(mpi_rank).add(metadata_out[mpi_size*x+mpi_rank]);
+	  		}
+	  	}*/
+
+//		std::cout << "BID: " << bid << std::endl;
+
+		hsize_t offset[1];
+		hsize_t block[1];
+
+		if (bid < mpi_size_old && bid != -1)
+		{
+			offset[0] = metadata_accum.get(bid);
+			block[0] = metadata_out[bid];
+		}
+		else
+		{
+			offset[0] = 0;
+			block[0] = 0;
+		}
+
+//		std::cout << "Offset: " << offset[0] << "; Block: " << block[0]<<  std::endl;
+//	    hsize_t offset_add[1] = {0};
+
+/*	    if (mpi_size >= mpi_size_old)
+		{
+			if (mpi_rank >= mpi_size_old)
+				offset[0] = 0;
+			else
+			{
+				for (int i = 0; i < mpi_rank; i++)
+				offset[0] += metadata_out[i];
+			}
+		}
+	    else
+	    {
+	  		int x = mpi_size_old/mpi_size;
+	  		int shift = mpi_rank*x;
+
+	  		for (int i = 0; i < shift; i++)
+	  		{
+	  			offset[0] += metadata_out[i];
+	  			//offset0.get(mpi_rank).add(metadata_out[i]);
+	  		}
+
+	  		int y = mpi_size_old%mpi_size;
+	  		if (mpi_rank < y)
+	  		{
+	  			for (int i = 0; i < mpi_size*x + mpi_rank; i++)
+	  			{
+	  				offset_add[0] += metadata_out[i];
+	  				//offset_add0.get(mpi_rank).add(metadata_out[i]);
+	  			}
+	  		}
+	    }*/
+
+	    //hsize_t stride[1] = {1};
+	    hsize_t count[1] = {1};
+
+	    //std::cout << "LOAD: MPI rank: " << mpi_rank << ", MPI size: " << mpi_size << ", Offset: " << offset[0] << ", Offset_add: " << offset_add[0] << ", Block: " << block[0] << ", Block_add: " << block_add[0] << std::endl;
+/*
+	    std::cout << "LOAD: MPI rank: " << mpi_rank << ", MPI size: " << mpi_size << std::endl;
+	    for (size_t i = 0; i < offset0.get(mpi_rank).size(); i++)
+	    	std::cout << ", Offset: " << offset0.get(mpi_rank).get(i) << std::endl;
+		for (size_t i = 0; i < offset_add0.get(mpi_rank).size(); i++)
+			std::cout << ", Offset_add: " << offset_add0.get(mpi_rank).get(i) << std::endl;
+		for (size_t i = 0; i < block0.get(mpi_rank).size(); i++)
+			std::cout << ", Block: " << block0.get(mpi_rank).get(i) << std::endl;
+		for (size_t i = 0; i < block_add0.get(mpi_rank).size(); i++)
+			std::cout << ", Block_add: " << block_add0.get(mpi_rank).get(i) << std::endl;
+*/
+
+		//Select file dataspace
+		hid_t file_dataspace_id_2 = H5Dget_space(dataset_2);
+
+        H5Sselect_hyperslab(file_dataspace_id_2, H5S_SELECT_SET, offset, NULL, count, block);
+
+		//Select file dataspace
+/*		hid_t file_dataspace_id_3 = H5Dget_space(dataset_2);
+
+        H5Sselect_hyperslab(file_dataspace_id_3, H5S_SELECT_SET, offset_add, NULL, count, block_add);*/
+
+        hsize_t mdim_2[1] = {block[0]};
+//        hsize_t mdim_3[1] = {block_add[0]};
+
+
+		//Size for data space in memory
+
+		/*if (mpi_rank >= mpi_size_old)
+			mdim_2[0] = 0;
+		else
+			mdim_2[0] = metadata_out[mpi_rank];*/
+
+		//Create data space in memory
+		hid_t mem_dataspace_id_2 = H5Screate_simple(1, mdim_2, NULL);
+//		hid_t mem_dataspace_id_3 = H5Screate_simple(1, mdim_3, NULL);
+
+		//if (mpi_rank == 0)
+/*		{
+			hssize_t size2;
+
+			size2 = H5Sget_select_npoints (mem_dataspace_id_2);
+			printf ("\nLOAD: memspace_id_2 size: %llu\n", size2);
+			size2 = H5Sget_select_npoints (file_dataspace_id_2);
+			printf ("LOAD: dataspace_id_2 size: %llu\n", size2);
+		}*/
+/*
+		if (mpi_rank == 0)
+		{
+			hssize_t size2;
+
+			size2 = H5Sget_select_npoints (mem_dataspace_id_3);
+			printf ("\nLOAD: memspace_id_3 size: %llu\n", size2);
+			size2 = H5Sget_select_npoints (file_dataspace_id_3);
+			printf ("LOAD: dataspace_id_3 size: %llu\n", size2);
+		}
+*/
+	size_t sum = 0;
+
+		for (int i = 0; i < mpi_size_old; i++)
+		{
+			sum += metadata_out[i];
+		}
+
+//		std::cout << "LOAD: sum: " << sum << std::endl;
+
+		// allocate the memory
+		HeapMemory pmem;
+//		HeapMemory pmem2;
+		//pmem.allocate(req);
+		ExtPreAlloc<HeapMemory> & mem = *(new ExtPreAlloc<HeapMemory>(block[0],pmem));
+		mem.incRef();
+//		ExtPreAlloc<HeapMemory> & mem2 = *(new ExtPreAlloc<HeapMemory>(block_add[0],pmem2));
+//		mem2.incRef();
+
+	  	// Read the dataset.
+	    H5Dread(dataset_2, H5T_NATIVE_CHAR, mem_dataspace_id_2, file_dataspace_id_2, plist_id, (char *)mem.getPointer());
+
+	    // Read the dataset.
+//		H5Dread(dataset_2, H5T_NATIVE_CHAR, mem_dataspace_id_3, file_dataspace_id_3, plist_id, (char *)mem2.getPointer());
+
+		mem.allocate(pmem.size());
+//		mem2.allocate(pmem2.size());
+//		std::cout << "Mem.size(): " << mem.size() << " = " << block[0] << std::endl;
+
+		Unpack_stat ps;
+
+		openfpm::vector<Point<dim, St>> v_pos_unp;
+
+		openfpm::vector<prop> v_prp_unp;
+
+		Unpacker<decltype(v_pos_unp),HeapMemory>::unpack(mem,v_pos_unp,ps,1);
+		Unpacker<decltype(v_prp_unp),HeapMemory>::unpack(mem,v_prp_unp,ps,1);
+
+//		Unpack_stat ps2;
+
+
+//		Unpacker<decltype(v_pos),HeapMemory>::unpack(mem2,v_pos_unp,ps2,1);
+//		Unpacker<decltype(v_prp),HeapMemory>::unpack(mem2,v_prp_unp,ps2,1);
+
+//		std::cout << "V_pos.size(): " << v_pos.size() << std::endl;
+//		std::cout << "V_pos_unp.size(): " << v_pos_unp.size() << std::endl;
+
+		mem.decRef();
+		delete &mem;
+
+		for (size_t i = 0; i < v_pos_unp.size(); i++)
+			v_pos.add(v_pos_unp.get(i));
+
+		for (size_t i = 0; i < v_prp_unp.size(); i++)
+			v_prp.add(v_prp_unp.get(i));
+
+		g_m = v_pos.size();
+	}
+
+	inline void load(const std::string & filename)
+	{
+		v_pos.clear();
+		v_prp.clear();
+
+		g_m = 0;
+
+		MPI_Comm comm = v_cl.getMPIComm();
+		MPI_Info info  = MPI_INFO_NULL;
+
+		int mpi_rank = v_cl.getProcessUnitID();
+		//int mpi_size = v_cl.getProcessingUnits();
+
+		// Set up file access property list with parallel I/O access
+		hid_t plist_id = H5Pcreate(H5P_FILE_ACCESS);
+		H5Pset_fapl_mpio(plist_id, comm, info);
+
+		//Open a file
+	    hid_t file = H5Fopen (filename.c_str(), H5F_ACC_RDONLY, plist_id);
+	    H5Pclose(plist_id);
+
+	    //Open dataset
+	    hid_t dataset = H5Dopen (file, "metadata", H5P_DEFAULT);
+
+	    //Create property list for collective dataset read
+	  	plist_id = H5Pcreate(H5P_DATASET_XFER);
+	  	H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE);
+
+		//Select file dataspace
+		hid_t file_dataspace_id = H5Dget_space(dataset);
+
+		hssize_t mpi_size_old = H5Sget_select_npoints (file_dataspace_id);
+/*
+		if (mpi_rank == 0)
+			printf ("\nOld MPI size: %llu\n", mpi_size_old);
+*/
+	  	//Where to read metadata
+	  	int metadata_out[mpi_size_old];
+
+	  	for (int i = 0; i < mpi_size_old; i++)
+	  	{
+	  		metadata_out[i] = 0;
+	  	}
+
+		//Size for data space in memory
+		hsize_t mdim[1] = {(size_t)mpi_size_old};
+
+		//Create data space in memory
+		hid_t mem_dataspace_id = H5Screate_simple(1, mdim, NULL);
+/*
+		if (mpi_rank == 0)
+		{
+			hssize_t size;
+
+			size = H5Sget_select_npoints (mem_dataspace_id);
+			printf ("LOAD: memspace_id size: %llu\n", size);
+			size = H5Sget_select_npoints (file_dataspace_id);
+			printf ("LOAD: dataspace_id size: %llu\n", size);
+		}
+*/
+	  	// Read the dataset.
+	    H5Dread(dataset, H5T_NATIVE_INT, mem_dataspace_id, file_dataspace_id, plist_id, metadata_out);
+/*
+		if (mpi_rank == 0)
+		{
+			std::cout << "Metadata_out[]: ";
+			for (int i = 0; i < mpi_size_old; i++)
+			{
+				std::cout << metadata_out[i] << " ";
+			}
+			std::cout << " " << std::endl;
+		}
+*/
+	    openfpm::vector<size_t> metadata_accum;
+	    metadata_accum.resize(mpi_size_old);
+
+	    metadata_accum.get(0) = 0;
+	    for (int i = 1 ; i < mpi_size_old ; i++)
+	    	metadata_accum.get(i) = metadata_accum.get(i-1) + metadata_out[i-1];
+
+	    //Open dataset
+	    hid_t dataset_2 = H5Dopen (file, "vector_dist", H5P_DEFAULT);
+
+	    //Create property list for collective dataset read
+	  	plist_id = H5Pcreate(H5P_DATASET_XFER);
+	  	H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE);
+
+/*
+	  	openfpm::vector<openfpm::vector<hsize_t>> block0;
+	  	openfpm::vector<openfpm::vector<hsize_t>> block_add0;
+	  	openfpm::vector<openfpm::vector<hsize_t>> offset0;
+	  	openfpm::vector<openfpm::vector<hsize_t>> offset_add0;
+
+	  	block0.resize(mpi_size);
+	  	offset0.resize(mpi_size);
+	  	block_add0.resize(mpi_size);
+		offset_add0.resize(mpi_size);
+*/
+	  	openfpm::vector<size_t> n_block;
+	  	n_block.resize(v_cl.getProcessingUnits());
+
+
+	  	for(size_t i = 0 ; i < n_block.size() ; i++)
+	  		n_block.get(i) = mpi_size_old / v_cl.getProcessingUnits();
+
+	  	size_t rest_block = mpi_size_old % v_cl.getProcessingUnits();
+
+	  	//std::cout << "MPI size old: " << mpi_size_old << std::endl;
+	  	//std::cout << "MPI size: " << v_cl.getProcessingUnits() << std::endl;
+
+
+	  //	std::cout << "Rest block: " << rest_block << std::endl;
+
+	  	size_t max_block;
+
+	  	if (rest_block != 0)
+	  		max_block = n_block.get(0) + 1;
+	  	else
+	  		max_block = n_block.get(0);
+
+	  	//for(size_t i = 0 ; i < n_block.size() ; i++)
+	  	for(size_t i = 0 ; i < rest_block ; i++)
+	  		n_block.get(i) += 1;
+
+
+	  	//for(size_t i = 0 ; i < n_block.size() ; i++)
+	  		//std::cout << "n_block.get(i): " << n_block.get(i) << std::endl;
+
+	  	size_t start_block = 0;
+	  	size_t stop_block = 0;
+
+
+	  	if (v_cl.getProcessUnitID() != 0)
+	  	{
+			for(size_t i = 0 ; i < v_cl.getProcessUnitID() ; i++)
+				start_block += n_block.get(i);
+	  	}
+
+	  	stop_block = start_block + n_block.get(v_cl.getProcessUnitID());
+
+	  	//std::cout << "ID: " << v_cl.getProcessUnitID() << "; Start block: " << start_block << "; " << "Stop block: " << stop_block << std::endl;
+
+	  	if (mpi_rank >= mpi_size_old)
+	  		load_block(start_block,mpi_size_old,metadata_out,metadata_accum,plist_id,dataset_2);
+	  	else
+	  	{
+	  		size_t n_bl = 0;
+	  		size_t lb = start_block;
+			for ( ; lb < stop_block ; lb++, n_bl++)
+				load_block(lb,mpi_size_old,metadata_out,metadata_accum,plist_id,dataset_2);
+
+			if (n_bl < max_block)
+				load_block(-1,mpi_size_old,metadata_out,metadata_accum,plist_id,dataset_2);
+	  	}
+
+	    // Close the dataset.
+	    H5Dclose(dataset);
+	    H5Dclose(dataset_2);
+	    // Close the file.
+	    H5Fclose(file);
+	    H5Pclose(plist_id);
+
+		//std::cout << "V_pos.size() after merge: " << v_pos.size() << std::endl;
+
+		// Map particles
+		//map();
+
+		//std::cout << "V_pos.size() after merge and map: " << v_pos.size() << std::endl;
+	}
+
 	/*! \brief Output particle position and properties
 	 *
 	 * \param out output
diff --git a/src/Vector/vector_dist_HDF5_chckpnt_restart_test.hpp b/src/Vector/vector_dist_HDF5_chckpnt_restart_test.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..0c5da9f9b057b66e4833294420ddac61f1540a28
--- /dev/null
+++ b/src/Vector/vector_dist_HDF5_chckpnt_restart_test.hpp
@@ -0,0 +1,204 @@
+/*
+ * vector_dist_HDF5_save.hpp
+ *
+ *  Created on: Jun 12, 2016
+ *      Author: Yaroslav Zaluzhnyi
+ */
+
+#ifndef SRC_VECTOR_VECTOR_DIST_HDF5_CHCKPNT_RESTART_TEST_HPP_
+#define SRC_VECTOR_VECTOR_DIST_HDF5_CHCKPNT_RESTART_TEST_HPP_
+
+#include "vector_dist.hpp"
+#include "Packer_Unpacker/Pack_selector.hpp"
+#include "Packer_Unpacker/Packer.hpp"
+#include "Packer_Unpacker/Unpacker.hpp"
+#include "Vector/performance/vector_dist_performance_util.hpp"
+
+
+#include "hdf5.h"
+
+BOOST_AUTO_TEST_SUITE( vd_hdf5_chckpnt_rstrt_test )
+
+// Input data
+
+// Number of particles
+size_t k = 1000000;
+
+// Dimensionality
+const size_t dim = 3;
+
+BOOST_AUTO_TEST_CASE( vector_dist_hdf5_save_test )
+{
+	Vcluster & v_cl = create_vcluster();
+
+	if (v_cl.getProcessUnitID() == 0)
+		std::cout << "Saving distributed vector" << std::endl;
+
+	Box<dim,float> box;
+
+	for (size_t i = 0; i < dim; i++)
+	{
+		box.setLow(i,0.0);
+		box.setHigh(i,1.0);
+	}
+
+	// Boundary conditions
+	size_t bc[dim];
+
+	const size_t Ng = cbrt(k);
+
+	// we create a Grid iterator
+	size_t sz[dim] = {Ng,Ng,Ng};
+
+	for (size_t i = 0; i < dim; i++)
+		bc[i] = NON_PERIODIC;
+
+	// ghost
+	Ghost<dim,float> ghost(1.0/(Ng-2));
+
+	vector_dist<dim,float, aggregate<float[dim]>, CartDecomposition<dim,float> > vd(0,box,bc,ghost);
+
+	// Put particles
+
+	auto it = vd.getGridIterator(sz);
+
+	while (it.isNext())
+	{
+		vd.add();
+
+		auto key = it.get();
+
+		vd.getLastPos()[0] = key.get(0) * it.getSpacing(0);
+		vd.getLastPos()[1] = key.get(1) * it.getSpacing(1);
+		vd.getLastPos()[2] = key.get(2) * it.getSpacing(2);
+
+		++it;
+	}
+
+	//BOOST_REQUIRE_EQUAL(it.getSpacing(0),1.0f/(Ng-1));
+	//BOOST_REQUIRE_EQUAL(it.getSpacing(1),1.0f/(Ng-1));
+	//BOOST_REQUIRE_EQUAL(it.getSpacing(2),1.0f/(Ng-1));
+
+	vd.map();
+
+	// Put forces
+
+	auto it2 = vd.getDomainIterator();
+
+	while (it2.isNext())
+	{
+		auto key = it2.get();
+
+		//Put the forces
+		for (size_t i = 0; i < dim; i++)
+			vd.template getProp<0>(key)[i] = 0.51234 + vd.getPos(key)[0] + vd.getPos(key)[1]+ vd.getPos(key)[2];
+
+		++it2;
+	}
+
+	timer t;
+	t.start();
+	// Save the vector
+    vd.save("vector_dist.h5");
+	t.stop();
+
+	std::cout << "Saving time: " << t.getwct() << std::endl;
+}
+
+
+
+BOOST_AUTO_TEST_CASE( vector_dist_hdf5_load_test )
+{
+	Vcluster & v_cl = create_vcluster();
+
+	if (v_cl.getProcessUnitID() == 0)
+		std::cout << "Loading distributed vector" << std::endl;
+
+	Box<dim,float> box;
+
+	for (size_t i = 0; i < dim; i++)
+	{
+		box.setLow(i,0.0);
+		box.setHigh(i,1.0);
+	}
+
+	// Boundary conditions
+	size_t bc[dim];
+
+	for (size_t i = 0; i < dim; i++)
+		bc[i] = NON_PERIODIC;
+
+
+	const size_t Ng = cbrt(k);
+
+	// we create a Grid iterator
+	size_t sz[3] = {Ng,Ng,Ng};
+
+	// ghost
+	Ghost<dim,float> ghost(1.0/(Ng-2));
+
+	vector_dist<dim,float, aggregate<float[dim]>, CartDecomposition<dim,float> > vd(0,box,bc,ghost);
+
+	vd.load("vector_dist.h5");
+
+	timer t;
+	t.start();
+	// Save the vector
+    vd.load("vector_dist.h5");
+	t.stop();
+
+	std::cout << "Loading time: " << t.getwct() << std::endl;
+
+	/////////////////// Checking data ///////////////////////
+
+	// Check total number of particles
+	size_t n_part = vd.size_local();
+	openfpm::vector<size_t> tot_n_part;
+	v_cl.allGather(n_part,tot_n_part);
+	v_cl.execute();
+
+	size_t sum = 0;
+
+	for (size_t i = 0; i < tot_n_part.size(); i++)
+		sum += tot_n_part.get(i);
+
+	BOOST_REQUIRE_EQUAL(sum,k);
+
+	//std::cout << "Sum: " << sum << std::endl;
+
+    // Check spacing (positions)
+
+	auto it = vd.getGridIterator(sz);
+
+	while (it.isNext())
+	{
+		//auto key = it.get();
+
+		++it;
+	}
+
+	BOOST_REQUIRE_EQUAL(it.getSpacing(0),1.0f/(Ng-1));
+	BOOST_REQUIRE_EQUAL(it.getSpacing(1),1.0f/(Ng-1));
+	BOOST_REQUIRE_EQUAL(it.getSpacing(2),1.0f/(Ng-1));
+
+
+	// Check properties
+
+	auto it2 = vd.getDomainIterator();
+
+	while (it2.isNext())
+	{
+		auto key = it2.get();
+
+		//Put the forces
+		for (size_t i = 0; i < dim; i++)
+			BOOST_CHECK_CLOSE(vd.template getProp<0>(key)[i],0.51234 + vd.getPos(key)[0] + vd.getPos(key)[1]+ vd.getPos(key)[2],0.0001);
+
+		++it2;
+	}
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+
+
+#endif /* SRC_VECTOR_VECTOR_DIST_HDF5_CHCKPNT_RESTART_TEST_HPP_ */
diff --git a/src/Vector/vector_dist_unit_test.hpp b/src/Vector/vector_dist_unit_test.hpp
index cba0f756a41f0a2918ec3803de866308ad9a386c..5acc8093cd51101b70ac03d1f3eb27a479c6aac4 100644
--- a/src/Vector/vector_dist_unit_test.hpp
+++ b/src/Vector/vector_dist_unit_test.hpp
@@ -1064,7 +1064,7 @@ void Test_interacting(Box<3,float> & box)
 
 				Point<3,float> xp = vd.getPos(p);
 
-				auto Np = NN.getIterator(NN.getCell(xp));
+				auto Np = NN.getCellIterator(NN.getCell(xp));
 
 				while (Np.isNext())
 				{
diff --git a/src/main.cpp b/src/main.cpp
index 38e281838cccc35b3937f7b02a370ba7f7a2051d..6bdee796eaec9c4315bb4e5f10342068a0e1b7ea 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -46,6 +46,11 @@ int main(int argc, char* argv[])
 #include "Decomposition/Distribution/metis_util_unit_test.hpp"
 #include "dec_optimizer_unit_test.hpp"
 #include "Vector/vector_dist_unit_test.hpp"
+#include "Vector/vector_dist_HDF5_chckpnt_restart_test.hpp"
+#include "Grid/grid_dist_id_HDF5_chckpnt_restart_test.hpp"
+#ifdef PERFORMANCE_TEST
+#include "pdata_performance.hpp"
+#endif
 #include "Decomposition/Distribution/Distribution_unit_tests.hpp"
 #include "Grid/Iterators/grid_dist_id_iterators_unit_tests.hpp"
 //#include "DLB/DLB_unit_test.hpp"
diff --git a/src/pdata_performance.cpp b/src/pdata_performance.cpp
index ec7027f26451dd9530cf52c5e758a0bee8e8d5da..4137197a0214c242b1c1247ed35dd96186ceb1d6 100644
--- a/src/pdata_performance.cpp
+++ b/src/pdata_performance.cpp
@@ -32,73 +32,6 @@ boost::property_tree::ptree pt;
 
 BOOST_AUTO_TEST_SUITE( performance )
 
-static inline void warning_set(int & warning_level, double mean, double mean_ref, double sigma)
-{
-	int warning_level_candidate;
-
-	if (mean - mean_ref < -2.0*sigma )
-		warning_level_candidate = -1;
-	else if (mean - mean_ref < 2.0*sigma)
-		warning_level_candidate = 0;
-	else if (mean - mean_ref < 3.0*sigma)
-		warning_level_candidate = 1;
-	else
-		warning_level_candidate = 2;
-
-	if (warning_level_candidate > warning_level)
-		warning_level = warning_level_candidate;
-}
-
-static inline void addchartarea(std::string & chart_area, int lvl)
-{
-	std::string color;
-
-	if (lvl == -1)
-	{
-		chart_area = std::string(",chartArea: {\
-		    backgroundColor: {\
-		        stroke: '#00FF00',\
-		        strokeWidth: 6\
-		    }\
-		}");
-	}
-	else if (lvl == 0)
-	{
-		// NOTHING TO DO
-	}
-	else if (lvl == 1)
-	{
-		chart_area = std::string(",chartArea: {\
-		    backgroundColor: {\
-		        stroke: '#FFFF00',\
-		        strokeWidth: 6\
-		    }\
-		}");
-	}
-	else if (lvl == 2)
-	{
-		chart_area = std::string(",chartArea: {\
-		    backgroundColor: {\
-		        stroke: '#FF0000',\
-		        strokeWidth: 6\
-		    }\
-		}");
-	}
-
-}
-
-void addUpdtateTime(GoogleChart & cg)
-{
-    time_t t = time(0);   // get time now
-    struct tm * now = localtime( & t );
-
-    std::stringstream str;
-
-    str << "<h3>Updated: " << now->tm_mday << "/" << now->tm_mon + 1 << "/" << now->tm_year+1900 << "     " << now->tm_hour << ":" << now->tm_min << ":" << now->tm_sec << std::endl;
-
-	cg.addHTML(str.str());
-}
-
 #include "Vector/performance/vector_dist_performance_util.hpp"
 #include "Vector/performance/verlet_performance_tests.hpp"
 #include "Vector/performance/cell_list_part_reorder.hpp"