diff --git a/CHANGELOG.md b/CHANGELOG.md
index de399263a465615738f454fd106f2d636ee5b9f3..62f1c56e62ec98e752de829d8d925e4bff9c64f1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,7 @@ All notable changes to this project will be documented in this file.
 
 - Installation/detection of PETSC
 - 2D Fixing IO in binary for vector
+- 1D Fixing grid writer in ASCII mode
 
 ## [1.0.0] 13 September 2017
 
diff --git a/example/Grid/0_simple/main.cpp b/example/Grid/0_simple/main.cpp
index 30ae5a9472b27365b89de2a6e0e4a5147fdcdafa..339d420e1d5b4d7738ec5c0d259ec0f5b6e8b232 100644
--- a/example/Grid/0_simple/main.cpp
+++ b/example/Grid/0_simple/main.cpp
@@ -9,6 +9,7 @@
  * \subpage Grid_2_solve_eq
  * \subpage Grid_3_gs
  * \subpage Grid_3_gs_3D
+ * \subpage Grid_3_gs_3D_vector
  *
  */
 
diff --git a/example/Grid/3_gray_scott_3d/Makefile b/example/Grid/3_gray_scott_3d/Makefile
index 170e428569ba200362dfa7bbc4a5d533f4006696..04db7e20fa78c9a519f254e71d61f27e47df69f8 100644
--- a/example/Grid/3_gray_scott_3d/Makefile
+++ b/example/Grid/3_gray_scott_3d/Makefile
@@ -7,7 +7,7 @@ LDIR =
 OBJ = main.o
 
 %.o: %.cpp
-	$(CC) -O3 -c --std=c++11 -o $@ $< $(INCLUDE_PATH)
+	$(CC) -O3 -g -c --std=c++11 -o $@ $< $(INCLUDE_PATH)
 
 gray_scott: $(OBJ)
 	$(CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS)
diff --git a/example/Grid/3_gray_scott_3d/main.cpp b/example/Grid/3_gray_scott_3d/main.cpp
index 88e814cba1bc08693a77912e74c9049be081d104..7c7e8f4f2b842b709bd80b4413273f0d3943881e 100644
--- a/example/Grid/3_gray_scott_3d/main.cpp
+++ b/example/Grid/3_gray_scott_3d/main.cpp
@@ -6,6 +6,8 @@
  *
  * \page Grid_3_gs_3D Gray Scott in 3D
  *
+ * [TOC]
+ *
  * # Solving a gray scott-system in 3D # {#e3_gs_gray_scott}
  *
  * This example is just an extension of the 2D Gray scott example.
@@ -17,9 +19,25 @@
  * <img src="http://ppmcore.mpi-cbg.de/web/images/examples/gray_scott_3d/gs_alpha.png"/>
  * \endhtmlonly
  *
+ * More or less this example is the adaptation of the previous example to 3D
+ * with the improvement of using stencil iterator.
+ *
+ * ## Stencil iterator {#e3_gs_grat_scott_si}
+ *
+ * Stencil iterator require that you define a stencil,
+ *
+ * \snippet Grid/3_gray_scott_3d/main.cpp stencil def
+ *
+ * once is defined it is
+ * possible get and use a stencil iterator
+ *
+ * \snippet Grid/3_gray_scott_3d/main.cpp stencil get and use
+ *
+ * The rest of the example remain the same with the exception
+ * that the code has been extended in 3D.
+ *
  * \see \ref Grid_2_solve_eq
  *
- * \snippet Grid/3_gray_scott/main.cpp constants
  * 
  */
 
@@ -110,26 +128,10 @@ int main(int argc, char* argv[])
         double K = 0.053;
         double F = 0.014;
 
-	//! \cond [init lib] \endcond
-
-	/*!
-	 * \page Grid_3_gs_3D Gray Scott in 3D
-	 *
-	 * Here we create 2 distributed grid in 2D Old and New. In particular because we want that
-	 * the second grid is distributed across processors in the same way we pass the decomposition
-	 * of the Old grid to the New one in the constructor with **Old.getDecomposition()**. Doing this,
-	 * we force the two grid to have the same decomposition.
-	 *
-	 * \snippet Grid/3_gray_scott/main.cpp init grid
-	 *
-	 */
-
-	//! \cond [init grid] \endcond
-
 	grid_dist_id<3, double, aggregate<double,double>> Old(sz,domain,g,bc);
 
 	// New grid with the decomposition of the old grid
-        grid_dist_id<3, double, aggregate<double,double>> New(Old.getDecomposition(),sz,g);
+    grid_dist_id<3, double, aggregate<double,double>> New(Old.getDecomposition(),sz,g);
 
 	
 	// spacing of the grid on x and y
@@ -149,33 +151,39 @@ int main(int argc, char* argv[])
 	timer tot_sim;
 	tot_sim.start();
 
+	//! \cond [stencil def] \endcond
+
 	static grid_key_dx<3> star_stencil_3D[7] = {{0,0,0},
                                          	    {0,0,-1},
-						    {0,0,1},
-						    {0,-1,0},
-						    {0,1,0},
-						    {-1,0,0},
-						    {1,0,0}};
+												{0,0,1},
+												{0,-1,0},
+												{0,1,0},
+												{-1,0,0},
+												{1,0,0}};
+
+	//! \cond [stencil def] \endcond
 
 	for (size_t i = 0; i < timeSteps; ++i)
 	{
 		if (i % 300 == 0)
 			std::cout << "STEP: " << i << std::endl;
 
+		//! \cond [stencil get and use] \endcond
+
 		auto it = Old.getDomainIteratorStencil(star_stencil_3D);
 
 		while (it.isNext())
 		{
 			// center point
-			auto Cp = it.getStencil<0>();
+			auto Cp = it.getStencilGrid<0>();
 
 			// plus,minus X,Y,Z
-			auto mx = it.getStencil<1>();
-			auto px = it.getStencil<2>();
-			auto my = it.getStencil<3>();
-			auto py = it.getStencil<4>();
-			auto mz = it.getStencil<5>();
-			auto pz = it.getStencil<6>();
+			auto mx = it.getStencilGrid<1>();
+			auto px = it.getStencilGrid<2>();
+			auto my = it.getStencilGrid<3>();
+			auto py = it.getStencilGrid<4>();
+			auto mz = it.getStencilGrid<5>();
+			auto pz = it.getStencilGrid<6>();
 
 			// update based on Eq 2
 			New.get<U>(Cp) = Old.get<U>(Cp) + uFactor * (
@@ -206,6 +214,8 @@ int main(int argc, char* argv[])
 			++it;
 		}
 
+		//! \cond [stencil get and use] \endcond
+
 		// Here we copy New into the old grid in preparation of the new step
 		// It would be better to alternate, but using this we can show the usage
 		// of the function copy. To note that copy work only on two grid of the same
@@ -216,11 +226,11 @@ int main(int argc, char* argv[])
 		// After copy we synchronize again the ghost part U and V
 		Old.ghost_get<U,V>();
 
-		// Every 30 time step we output the configuration for
+		// Every 500 time step we output the configuration for
 		// visualization
-		if (i % 60 == 0)
+		if (i % 500 == 0)
 		{
-			Old.write_frame("output",count,VTK_WRITER | FORMAT_BINARY);
+			Old.save("output_" + std::to_string(count));
 			count++;
 		}
 	}
@@ -246,4 +256,13 @@ int main(int argc, char* argv[])
 	openfpm_finalize();
 
 	//! \cond [finalize] \endcond
+
+	/*!
+	 * \page Grid_3_gs_3D Gray Scott in 3D
+	 *
+	 * # Full code # {#code}
+	 *
+	 * \include Grid/3_gray_scott_3d/main.cpp
+	 *
+	 */
 }
diff --git a/example/Vector/7_SPH_dlb_opt/main.cpp b/example/Vector/7_SPH_dlb_opt/main.cpp
index d576c1a1d0f69647fc4ebe816c8c3585d6e0e43c..dff03e4625269cbb3f4d372fc76f765dc798dd8f 100644
--- a/example/Vector/7_SPH_dlb_opt/main.cpp
+++ b/example/Vector/7_SPH_dlb_opt/main.cpp
@@ -1118,6 +1118,7 @@ int main(int argc, char* argv[])
 		{
 			vd.deleteGhost();
 			vd.write_frame("Geometry",write,VTK_WRITER | FORMAT_BINARY);
+                        vd.getDecomposition().write("dec" + std::to_string(write));
 			vd.ghost_get<type,rho,Pressure,velocity>(SKIP_LABELLING);
 			write++;
 
diff --git a/openfpm_data b/openfpm_data
index f7ca1bc2fe8eeb9c6e3bdf34898341ffa91f8c1c..0ab602dd6a7f7897685651f58ec76eab7be96370 160000
--- a/openfpm_data
+++ b/openfpm_data
@@ -1 +1 @@
-Subproject commit f7ca1bc2fe8eeb9c6e3bdf34898341ffa91f8c1c
+Subproject commit 0ab602dd6a7f7897685651f58ec76eab7be96370
diff --git a/openfpm_pdata.doc b/openfpm_pdata.doc
index afd54c557ca81549f429a12fb10d2e8989dde33f..e356c1f644c459d47927b845e8cb1a8d06b43103 100644
--- a/openfpm_pdata.doc
+++ b/openfpm_pdata.doc
@@ -38,7 +38,7 @@ PROJECT_NAME           = "OpenFPM_pdata"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = 1.0.0
+PROJECT_NUMBER         = 1.1.0
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
diff --git a/src/Grid/Iterators/grid_dist_id_iterator.hpp b/src/Grid/Iterators/grid_dist_id_iterator.hpp
index 6cfc5f54a3b08bc9cf95d037c5a764920ec91ac9..612477712a0368ae3fca5c561a15c6f83b343e26 100644
--- a/src/Grid/Iterators/grid_dist_id_iterator.hpp
+++ b/src/Grid/Iterators/grid_dist_id_iterator.hpp
@@ -48,7 +48,7 @@ class grid_dist_iterator<dim,device_grid,FREE,stencil>
 	size_t g_c;
 
 	//! List of the grids we are going to iterate
-	const openfpm::vector<device_grid> & gList;
+	openfpm::vector<device_grid> & gList;
 
 	//! Extension of each grid: domain and ghost + domain
 	const openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext;
@@ -59,13 +59,19 @@ class grid_dist_iterator<dim,device_grid,FREE,stencil>
 	//! stop point (is the grid size)
 	grid_key_dx<dim> stop;
 
+	// device grid pointer
+	device_grid * dg;
+
 	/*! \brief from g_c increment g_c until you find a valid grid
 	 *
 	 */
 	void selectValidGrid()
 	{
 		// When the grid has size 0 potentially all the other informations are garbage
-		while (g_c < gList.size() && (gList.get(g_c).size() == 0 || gdb_ext.get(g_c).Dbox.isValid() == false ) ) g_c++;
+		while (g_c < gList.size() && (gList.get(g_c).size() == 0 || gdb_ext.get(g_c).Dbox.isValid() == false ) )
+		{g_c++;}
+
+		dg = &gList.get(g_c);
 
 		// get the next grid iterator
 		if (g_c < gList.size())
@@ -83,7 +89,7 @@ class grid_dist_iterator<dim,device_grid,FREE,stencil>
 	 * \param stop end point
 	 *
 	 */
-	grid_dist_iterator(const openfpm::vector<device_grid> & gk, const openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext, const grid_key_dx<dim> & stop)
+	grid_dist_iterator(openfpm::vector<device_grid> & gk, const openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext, const grid_key_dx<dim> & stop)
 	:g_c(0),gList(gk),gdb_ext(gdb_ext),stop(stop)
 	{
 		// Initialize the current iterator
@@ -100,7 +106,7 @@ class grid_dist_iterator<dim,device_grid,FREE,stencil>
 	 * \param stencil_pnt stencil points
 	 *
 	 */
-	grid_dist_iterator(const openfpm::vector<device_grid> & gk,
+	grid_dist_iterator(openfpm::vector<device_grid> & gk,
 			           const openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext,
 					   const grid_key_dx<dim> & stop,
 					   const grid_key_dx<dim> (& stencil_pnt)[stencil::nsp])
@@ -151,7 +157,7 @@ class grid_dist_iterator<dim,device_grid,FREE,stencil>
 		// If there are no other grid stop
 
 		if (g_c >= gList.size())
-			return false;
+		{return false;}
 
 		return true;
 	}
@@ -240,6 +246,18 @@ class grid_dist_iterator<dim,device_grid,FREE,stencil>
 	{
 		return grid_dist_lin_dx(g_c,a_it.template getStencil<id>());
 	}
+
+	/*! \brief Return the stencil point offset
+	 *
+	 * \tparam id
+	 *
+	 * \return linearized distributed key
+	 *
+	 */
+	template<unsigned int id> inline grid_dist_g_dx<device_grid> getStencilGrid()
+	{
+		return grid_dist_g_dx<device_grid>(dg,a_it.template getStencil<id>());
+	}
 };
 
 
diff --git a/src/Grid/grid_dist_id.hpp b/src/Grid/grid_dist_id.hpp
index 49cb6b5866a30a328df89163530d324dac12db2d..4c3f5c3d8d0c52adaaa1554eb36a137643b90b75 100644
--- a/src/Grid/grid_dist_id.hpp
+++ b/src/Grid/grid_dist_id.hpp
@@ -1199,7 +1199,7 @@ public:
 	 * \return the selected element
 	 *
 	 */
-	template <unsigned int p>inline auto get(const grid_dist_key_dx<dim> & v1) const -> typename std::add_lvalue_reference<decltype(loc_grid.get(v1.getSub()).template get<p>(v1.getKey()))>::type
+	template <unsigned int p = 0>inline auto get(const grid_dist_key_dx<dim> & v1) const -> typename std::add_lvalue_reference<decltype(loc_grid.get(v1.getSub()).template get<p>(v1.getKey()))>::type
 	{
 #ifdef SE_CLASS2
 		check_valid(this,8);
@@ -1215,7 +1215,7 @@ public:
 	 * \return the selected element
 	 *
 	 */
-	template <unsigned int p>inline auto get(const grid_dist_key_dx<dim> & v1) -> typename std::add_lvalue_reference<decltype(loc_grid.get(v1.getSub()).template get<p>(v1.getKey()))>::type
+	template <unsigned int p = 0>inline auto get(const grid_dist_key_dx<dim> & v1) -> typename std::add_lvalue_reference<decltype(loc_grid.get(v1.getSub()).template get<p>(v1.getKey()))>::type
 	{
 #ifdef SE_CLASS2
 		check_valid(this,8);
@@ -1231,7 +1231,39 @@ public:
 	 * \return the selected element
 	 *
 	 */
-	template <unsigned int p>inline auto get(const grid_dist_lin_dx & v1) const -> typename std::add_lvalue_reference<decltype(loc_grid.get(v1.getSub()).template get<p>(v1.getKey()))>::type
+	template <unsigned int p = 0>inline auto get(grid_dist_g_dx<device_grid> & v1) const -> typename std::add_lvalue_reference<decltype(v1.getSub()->template get<p>(v1.getKey()))>::type
+	{
+#ifdef SE_CLASS2
+		check_valid(this,8);
+#endif
+		return v1.getSub()->template get<p>(v1.getKey());
+	}
+
+	/*! \brief Get the reference of the selected element
+	 *
+	 * \tparam p property to get (is an integer)
+	 * \param v1 grid_key that identify the element in the grid
+	 *
+	 * \return the selected element
+	 *
+	 */
+	template <unsigned int p = 0>inline auto get(grid_dist_g_dx<device_grid> & v1) -> typename std::add_lvalue_reference<decltype(v1.getSub()->template get<p>(v1.getKey()))>::type
+	{
+#ifdef SE_CLASS2
+		check_valid(this,8);
+#endif
+		return v1.getSub()->template get<p>(v1.getKey());
+	}
+
+	/*! \brief Get the reference of the selected element
+	 *
+	 * \tparam p property to get (is an integer)
+	 * \param v1 grid_key that identify the element in the grid
+	 *
+	 * \return the selected element
+	 *
+	 */
+	template <unsigned int p = 0>inline auto get(const grid_dist_lin_dx & v1) const -> typename std::add_lvalue_reference<decltype(loc_grid.get(v1.getSub()).template get<p>(v1.getKey()))>::type
 	{
 #ifdef SE_CLASS2
 		check_valid(this,8);
@@ -1247,7 +1279,7 @@ public:
 	 * \return the selected element
 	 *
 	 */
-	template <unsigned int p>inline auto get(const grid_dist_lin_dx & v1) -> typename std::add_lvalue_reference<decltype(loc_grid.get(v1.getSub()).template get<p>(v1.getKey()))>::type
+	template <unsigned int p = 0>inline auto get(const grid_dist_lin_dx & v1) -> typename std::add_lvalue_reference<decltype(loc_grid.get(v1.getSub()).template get<p>(v1.getKey()))>::type
 	{
 #ifdef SE_CLASS2
 		check_valid(this,8);
@@ -1263,7 +1295,7 @@ public:
 	 * \return the selected element
 	 *
 	 */
-	template <unsigned int p>inline auto getProp(const grid_dist_key_dx<dim> & v1) const -> decltype(this->template get<p>(v1))
+	template <unsigned int p = 0>inline auto getProp(const grid_dist_key_dx<dim> & v1) const -> decltype(this->template get<p>(v1))
 	{
 		return this->template get<p>(v1);
 	}
@@ -1276,7 +1308,7 @@ public:
 	 * \return the selected element
 	 *
 	 */
-	template <unsigned int p>inline auto getProp(const grid_dist_key_dx<dim> & v1) -> decltype(this->template get<p>(v1))
+	template <unsigned int p = 0>inline auto getProp(const grid_dist_key_dx<dim> & v1) -> decltype(this->template get<p>(v1))
 	{
 		return this->template get<p>(v1);
 	}
@@ -1366,6 +1398,12 @@ public:
 																						  	  	  	 g_id_to_internal_ghost_box);
 	}
 
+	// copy bench test
+	double mem_mem_time = 0.0;
+	double mem_ite_time = 0.0;
+
+	int mem_select = 0;
+
 	/*! \brief Copy the give grid into this grid
 	 *
 	 * It copy the first grid into the given grid (No ghost)
@@ -1373,21 +1411,52 @@ public:
 	 * \warning the Decomposition must be ensured to be the same, otherwise crashes can happen, if you want to copy the grid independently from the decomposition please use the operator equal
 	 *
 	 * \param g Grid to copy
+	 * \param use_memcpy use memcpy function if possible
 	 *
 	 * \return itself
 	 *
 	 */
-	grid_dist_id<dim,St,T,Decomposition,Memory,device_grid> & copy(grid_dist_id<dim,St,T,Decomposition,Memory,device_grid> & g)
+	grid_dist_id<dim,St,T,Decomposition,Memory,device_grid> & copy(grid_dist_id<dim,St,T,Decomposition,Memory,device_grid> & g, bool use_memcpy = true)
 	{
-		auto it = this->getDomainIterator();
+		if (T::noPointers() == true && use_memcpy)
+		{
+			for (size_t i = 0 ; i < this->getN_loc_grid() ; i++)
+			{
+				auto & gs_src = this->get_loc_grid(i).getGrid();
 
-		while (it.isNext())
+				long int start = gs_src.LinId(gdb_ext.get(i).Dbox.getKP1());
+				long int stop = gs_src.LinId(gdb_ext.get(i).Dbox.getKP2());
+
+				if (stop < start) {continue;}
+
+				void * dst = static_cast<void *>(static_cast<char *>(this->get_loc_grid(i).getPointer()) + start*sizeof(T));
+				void * src = static_cast<void *>(static_cast<char *>(g.get_loc_grid(i).getPointer()) + start*sizeof(T));
+
+				memcpy(dst,src,sizeof(T) * (stop + 1 - start));
+			}
+		}
+		else
 		{
-			auto key = it.get();
+			grid_key_dx<dim> cnt[1];
+			cnt[0].zero();
+
+			for (size_t i = 0 ; i < this->getN_loc_grid() ; i++)
+			{
+				auto & dst = this->get_loc_grid(i);
+				auto & src = g.get_loc_grid(i);
 
-			this->loc_grid.get(key.getSub()).get_o(key.getKey()) = g.loc_grid.get(key.getSub()).get_o(key.getKey());
+				auto it = this->get_loc_grid_iterator_stencil(i,cnt);
 
-			++it;
+				while (it.isNext())
+				{
+					// center point
+					auto Cp = it.template getStencil<0>();
+
+					dst.get_o(Cp) = src.get_o(Cp);
+
+					++it;
+				}
+			}
 		}
 
 		return *this;
@@ -1510,6 +1579,36 @@ public:
 		return loc_grid.get(i);
 	}
 
+	/*! \brief Get the i sub-domain grid
+	 *
+	 * \param i sub-domain
+	 *
+	 * \return local grid
+	 *
+	 */
+	grid_key_dx_iterator_sub<dim,no_stencil> get_loc_grid_iterator(size_t i)
+	{
+		return grid_key_dx_iterator_sub<dim,no_stencil>(loc_grid.get(i).getGrid(),
+				 gdb_ext.get(i).Dbox.getKP1(),
+				 gdb_ext.get(i).Dbox.getKP2());
+	}
+
+	/*! \brief Get the i sub-domain grid
+	 *
+	 * \param i sub-domain
+	 *
+	 * \return local grid
+	 *
+	 */
+	template<unsigned int Np>
+	grid_key_dx_iterator_sub<dim,stencil_offset_compute<dim,Np>> get_loc_grid_iterator_stencil(size_t i,const grid_key_dx<dim> (& stencil_pnt)[Np])
+	{
+		return grid_key_dx_iterator_sub<dim,stencil_offset_compute<dim,Np>>(loc_grid.get(i).getGrid(),
+													 gdb_ext.get(i).Dbox.getKP1(),
+													 gdb_ext.get(i).Dbox.getKP2(),
+													 stencil_pnt);
+	}
+
 	/*! \brief Return the number of local grid
 	 *
 	 * \return the number of local grid
diff --git a/src/Grid/grid_dist_id_comm.hpp b/src/Grid/grid_dist_id_comm.hpp
index b41d03de4c39d92d105726aab9c122c203b3199b..406468ceedf518161f10ff0d95808d0730941a58 100644
--- a/src/Grid/grid_dist_id_comm.hpp
+++ b/src/Grid/grid_dist_id_comm.hpp
@@ -26,7 +26,7 @@ struct grid_unpack_selector_with_prp
 	 * \param ps unpack status
 	 *
 	 */
-	template<template<typename,typename> class op, int ... prp> static void call_unpack(ExtPreAlloc<Memory> & recv_buf, grid_key_dx_iterator_sub<device_grid::dims> & sub2, device_grid & gd, Unpack_stat & ps)
+	template<template<typename,typename> class op, int ... prp> static void call_unpack(ExtPreAlloc<Memory> & recv_buf, grid_key_dx_iterator_sub<device_grid::dims,stencil_offset_compute<device_grid::dims,1>> & sub2, device_grid & gd, Unpack_stat & ps)
 	{
 		std::cerr << __FILE__ << ":" << __LINE__ << " Error: complex properties on grids are not supported yet" << std::endl;
 	}
@@ -48,7 +48,7 @@ struct grid_unpack_selector_with_prp<true,T,device_grid,Memory>
 	 * \param ps unpack status
 	 *
 	 */
-	template<template<typename,typename> class op, unsigned int ... prp> static void call_unpack(ExtPreAlloc<Memory> & recv_buf, grid_key_dx_iterator_sub<device_grid::dims> & sub2, device_grid & gd, Unpack_stat & ps)
+	template<template<typename,typename> class op, unsigned int ... prp> static void call_unpack(ExtPreAlloc<Memory> & recv_buf, grid_key_dx_iterator_sub<device_grid::dims,stencil_offset_compute<device_grid::dims,1>> & sub2, device_grid & gd, Unpack_stat & ps)
 	{
 		PtrMemory * ptr1;
 
@@ -84,11 +84,19 @@ struct grid_unpack_selector_with_prp<true,T,device_grid,Memory>
 
 		// Merge the information
 
-		auto it_src = gs.getIterator();
+		grid_key_dx<device_grid::dims> cnt[1];
+		cnt[0].zero();
+
+		auto it_src = gs.getIteratorStencil(cnt);
 
 		while (sub2.isNext())
 		{
-			object_s_di_op<op,decltype(gs.get_o(it_src.get())),decltype(gd.get_o(sub2.get())),OBJ_ENCAP,prp...>(gs.get_o(it_src.get()),gd.get_o(sub2.get()));
+			object_s_di_op<op,
+			            decltype(gs.get_o(it_src.template getStencil<0>())),
+						decltype(gd.get_o(sub2.template getStencil<0>())),
+						OBJ_ENCAP,prp...>
+			(gs.get_o(it_src.template getStencil<0>()),
+			 gd.get_o(sub2.template getStencil<0>()));
 
 			++sub2;
 			++it_src;
@@ -123,7 +131,7 @@ struct grid_call_serialize_variadic<device_grid, Memory, index_tuple<prp...>>
 	 * \param ps unpack status
 	 *
 	 */
-	template<template<typename,typename> class op, typename T> inline static void call_unpack(ExtPreAlloc<Memory> & recv_buf, grid_key_dx_iterator_sub<device_grid::dims> & sub2, device_grid & dg, Unpack_stat & ps)
+	template<template<typename,typename> class op, typename T> inline static void call_unpack(ExtPreAlloc<Memory> & recv_buf, grid_key_dx_iterator_sub<device_grid::dims,stencil_offset_compute<device_grid::dims,1>> & sub2, device_grid & dg, Unpack_stat & ps)
 	{
 		const bool result = has_pack_gen<typename T::type>::value == false;
 
@@ -148,7 +156,7 @@ struct grid_unpack_with_prp
 	 * \param ps unpack status
 	 *
 	 */
-	template<unsigned int ... prp> static void unpacking(ExtPreAlloc<Memory> & recv_buf, grid_key_dx_iterator_sub<device_grid::dims> & sub2, device_grid & dg, Unpack_stat & ps)
+	template<unsigned int ... prp> static void unpacking(ExtPreAlloc<Memory> & recv_buf, grid_key_dx_iterator_sub<device_grid::dims,stencil_offset_compute<device_grid::dims,1>> & sub2, device_grid & dg, Unpack_stat & ps)
 	{
 		typedef index_tuple<prp...> ind_prop_to_pack;
 		grid_call_serialize_variadic<device_grid,Memory,ind_prop_to_pack>::template call_unpack<op,T>(recv_buf, sub2, dg, ps);
@@ -239,8 +247,11 @@ class grid_dist_id_comm
 				if (bx_dst.isValid() == false)
 					continue;
 
-				grid_key_dx_iterator_sub<dim> sub_src(loc_grid.get(i).getGrid(),bx_src.getKP1(),bx_src.getKP2());
-				grid_key_dx_iterator_sub<dim> sub_dst(loc_grid.get(sub_id_dst).getGrid(),bx_dst.getKP1(),bx_dst.getKP2());
+				grid_key_dx<dim> cnt[1];
+				cnt[0].zero();
+
+				grid_key_dx_iterator_sub<dim,stencil_offset_compute<dim,1>> sub_src(loc_grid.get(i).getGrid(),bx_src.getKP1(),bx_src.getKP2(),cnt);
+				grid_key_dx_iterator_sub<dim,stencil_offset_compute<dim,1>> sub_dst(loc_grid.get(sub_id_dst).getGrid(),bx_dst.getKP1(),bx_dst.getKP2(),cnt);
 
 #ifdef SE_CLASS1
 
@@ -258,7 +269,7 @@ class grid_dist_id_comm
 				while (sub_src.isNext())
 				{
 					// Option 1
-					gd.set(sub_dst.get(),gs,sub_src.get());
+					gd.set(sub_dst.template getStencil<0>(),gs,sub_src.template getStencil<0>());
 
 					++sub_src;
 					++sub_dst;
@@ -313,8 +324,11 @@ class grid_dist_id_comm
 				if (bx_dst.isValid() == false)
 					continue;
 
-				grid_key_dx_iterator_sub<dim> sub_src(loc_grid.get(i).getGrid(),bx_src.getKP1(),bx_src.getKP2());
-				grid_key_dx_iterator_sub<dim> sub_dst(loc_grid.get(sub_id_dst).getGrid(),bx_dst.getKP1(),bx_dst.getKP2());
+				grid_key_dx<dim> cnt[1];
+				cnt[0].zero();
+
+				grid_key_dx_iterator_sub<dim,stencil_offset_compute<dim,1>> sub_src(loc_grid.get(i).getGrid(),bx_src.getKP1(),bx_src.getKP2(),cnt);
+				grid_key_dx_iterator_sub<dim,stencil_offset_compute<dim,1>> sub_dst(loc_grid.get(sub_id_dst).getGrid(),bx_dst.getKP1(),bx_dst.getKP2(),cnt);
 
 #ifdef SE_CLASS1
 
@@ -332,7 +346,7 @@ class grid_dist_id_comm
 				while (sub_src.isNext())
 				{
 					// write the object in the last element
-					object_s_di_op<op,decltype(gs.get_o(sub_src.get())),decltype(gd.get_o(sub_dst.get())),OBJ_ENCAP,prp...>(gs.get_o(sub_src.get()),gd.get_o(sub_dst.get()));
+					object_s_di_op<op,decltype(gs.get_o(sub_src.get())),decltype(gd.get_o(sub_dst.get())),OBJ_ENCAP,prp...>(gs.get_o(sub_src.template getStencil<0>()),gd.get_o(sub_dst.template getStencil<0>()));
 
 					++sub_src;
 					++sub_dst;
@@ -664,8 +678,12 @@ public:
 
 				// Pack a size_t for the internal ghost id
 				Packer<size_t,HeapMemory>::packRequest(req);
+
+				grid_key_dx<dim> cnt[1];
+				cnt[0].zero();
+
 				// Create a sub grid iterator spanning the internal ghost layer
-				grid_key_dx_iterator_sub<dim> sub_it(loc_grid.get(sub_id).getGrid(),g_ig_box.getKP1(),g_ig_box.getKP2());
+				grid_key_dx_iterator_sub<dim,stencil_offset_compute<dim,1>> sub_it(loc_grid.get(sub_id).getGrid(),g_ig_box.getKP1(),g_ig_box.getKP2(),cnt);
 				// and pack the internal ghost grid
 				Packer<device_grid,HeapMemory>::template packRequest<prp...>(loc_grid.get(sub_id),sub_it,req);
 			}
@@ -706,8 +724,12 @@ public:
 
 				// Pack a size_t for the internal ghost id
 				Packer<size_t,HeapMemory>::pack(prAlloc_prp,g_id,sts);
+
+				grid_key_dx<dim> cnt[1];
+				cnt[0].zero();
+
 				// Create a sub grid iterator spanning the internal ghost layer
-				grid_key_dx_iterator_sub<dim> sub_it(loc_grid.get(sub_id).getGrid(),g_ig_box.getKP1(),g_ig_box.getKP2());
+				grid_key_dx_iterator_sub<dim,stencil_offset_compute<dim,1>> sub_it(loc_grid.get(sub_id).getGrid(),g_ig_box.getKP1(),g_ig_box.getKP2(),cnt);
 				// and pack the internal ghost grid
 				Packer<device_grid,HeapMemory>::template pack<prp...>(prAlloc_prp,loc_grid.get(sub_id),sub_it,sts);
 			}
@@ -794,8 +816,11 @@ public:
 				Box<dim,size_t> box = eg_box.get(i).bid.get(l_id).l_e_box;
 				size_t sub_id = eg_box.get(i).bid.get(l_id).sub;
 
+				grid_key_dx<dim> cnt[1];
+				cnt[0].zero();
+
 				// sub-grid where to unpack
-				grid_key_dx_iterator_sub<dim> sub2(loc_grid.get(sub_id).getGrid(),box.getKP1(),box.getKP2());
+				grid_key_dx_iterator_sub<dim,stencil_offset_compute<dim,1>> sub2(loc_grid.get(sub_id).getGrid(),box.getKP1(),box.getKP2(),cnt);
 
 				// Unpack
 				Unpacker<device_grid,HeapMemory>::template unpack<prp...>(prRecv_prp,sub2,loc_grid.get(sub_id),ps);
@@ -849,8 +874,12 @@ public:
 
 				// Pack a size_t for the internal ghost id
 				Packer<size_t,HeapMemory>::packRequest(req);
+
+				grid_key_dx<dim> cnt[1];
+				cnt[0].zero();
+
 				// Create a sub grid iterator spanning the internal ghost layer
-				grid_key_dx_iterator_sub<dim> sub_it(loc_grid.get(sub_id).getGrid(),g_eg_box.getKP1(),g_eg_box.getKP2());
+				grid_key_dx_iterator_sub<dim,stencil_offset_compute<dim,1>> sub_it(loc_grid.get(sub_id).getGrid(),g_eg_box.getKP1(),g_eg_box.getKP2(),cnt);
 				// and pack the internal ghost grid
 				Packer<device_grid,HeapMemory>::template packRequest<prp...>(loc_grid.get(sub_id),sub_it,req);
 			}
@@ -891,8 +920,12 @@ public:
 
 				// Pack a size_t for the internal ghost id
 				Packer<size_t,HeapMemory>::pack(prAlloc_prp,g_id,sts);
+
+				grid_key_dx<dim> cnt[1];
+				cnt[0].zero();
+
 				// Create a sub grid iterator spanning the internal ghost layer
-				grid_key_dx_iterator_sub<dim> sub_it(loc_grid.get(sub_id).getGrid(),g_eg_box.getKP1(),g_eg_box.getKP2());
+				grid_key_dx_iterator_sub<dim,stencil_offset_compute<dim,1>> sub_it(loc_grid.get(sub_id).getGrid(),g_eg_box.getKP1(),g_eg_box.getKP2(),cnt);
 				// and pack the internal ghost grid
 				Packer<device_grid,HeapMemory>::template pack<prp...>(prAlloc_prp,loc_grid.get(sub_id),sub_it,sts);
 			}
@@ -980,8 +1013,11 @@ public:
 				size_t sub_id = ig_box.get(i).bid.get(l_id).sub;
 				box -= gdb_ext.get(sub_id).origin.template convertPoint<size_t>();
 
+				grid_key_dx<dim> cnt[1];
+				cnt[0].zero();
+
 				// sub-grid where to unpack
-				grid_key_dx_iterator_sub<dim> sub2(loc_grid.get(sub_id).getGrid(),box.getKP1(),box.getKP2());
+				grid_key_dx_iterator_sub<dim,stencil_offset_compute<dim,1>> sub2(loc_grid.get(sub_id).getGrid(),box.getKP1(),box.getKP2(),cnt);
 
 				grid_unpack_with_prp<op,prp_object,device_grid,Memory>::template unpacking<prp...>(prRecv_prp,sub2,loc_grid.get(sub_id),ps);
 			}
diff --git a/src/Grid/grid_dist_key.hpp b/src/Grid/grid_dist_key.hpp
index 420a281af57106a3529e0a8973b2dc4862c495cc..6ea1925dfa92d0cced8ecb4301ba890fd1fd38c3 100644
--- a/src/Grid/grid_dist_key.hpp
+++ b/src/Grid/grid_dist_key.hpp
@@ -248,4 +248,101 @@ public:
 	}
 };
 
+/*! \brief Distributed linearized key
+ *
+ * instead of having the sub-subdomain index it store directly a pointer to the grid
+ *
+ */
+template<typename device_grid>
+class grid_dist_g_dx
+{
+	//! grid list counter
+	device_grid * dg;
+
+	//! Local grid iterator
+	size_t key;
+
+public:
+
+	/*! \brief return the sub-domain grid
+	 *
+	 *
+	 */
+	inline device_grid * getSub()
+	{
+		return dg;
+	}
+
+
+	/*! \brief Get the key
+	 *
+	 * \return the local key
+	 *
+	 */
+	inline size_t getKey() const
+	{
+		return key;
+	}
+
+
+	/*! \brief Get the reference key
+	 *
+	 * \return the local key
+	 *
+	 */
+	inline size_t & getKeyRef()
+	{
+		return key;
+	}
+
+	/* \brief Check if two key are the same
+	 *
+	 * \param key_t key to check
+	 *
+	 * \return true if the two key are equal
+	 *
+	 */
+
+	inline bool operator==(const grid_dist_g_dx & key_t)
+	{
+		if (dg != key_t.dg)
+			return false;
+
+		// Check the two key index by index
+
+		return getKey() == key_t.getKey();
+	}
+
+
+	/*! \brief Constructor set the sub-domain grid and the position in local coordinates
+	 *
+	 * \param g_c sub-domain
+	 * \param key key
+	 *
+	 */
+	inline grid_dist_g_dx(device_grid * dg, size_t key)
+	:dg(dg),key(key)
+	{
+	}
+
+	//! Constructor
+	inline grid_dist_g_dx(){}
+
+	/*! \brief convert the key to string
+	 *
+	 *
+	 */
+	std::string to_string()
+	{
+		std::stringstream str;
+
+		str << "sub_domain=" << dg << " ";
+		str << "lin_id=" << key << " ";
+
+		str << "\n";
+
+		return str.str();
+	}
+};
+
 #endif
diff --git a/src/Grid/grid_dist_util.hpp b/src/Grid/grid_dist_util.hpp
index 77e2383cbdef22ef28ffee634e15d109f3731329..ed84a123e6c92160f1c6c84d9c7ec3a3e307303d 100644
--- a/src/Grid/grid_dist_util.hpp
+++ b/src/Grid/grid_dist_util.hpp
@@ -124,7 +124,7 @@ template<int dim, typename Decomposition> inline void create_gdb_ext(openfpm::ve
 
 	// fill the spacing
 	for (size_t i = 0 ; i < dim ; i++)
-		spacing[i] = cd_sm.getCellBox().getP2()[i];
+	{spacing[i] = cd_sm.getCellBox().getP2()[i];}
 }
 
 /*! \brief it store a box, its unique id and the sub-domain from where it come from
diff --git a/src/Vector/vector_dist.hpp b/src/Vector/vector_dist.hpp
index 46f3cbc2762b987cdf24596b1b9eacb3f02e32fe..51beb4b0da09de6ec683fb17c31eb58307c68fe9 100644
--- a/src/Vector/vector_dist.hpp
+++ b/src/Vector/vector_dist.hpp
@@ -61,8 +61,12 @@
 #define NO_GHOST 0
 #define WITH_GHOST 2
 
+#define GCL_NON_SYMMETRIC 0
+#define GCL_SYMMETRIC 1
+#define GCL_HILBERT 2
+
 //! General function t get a cell-list
-template<unsigned int dim, typename St, typename CellL, typename Vector>
+template<unsigned int dim, typename St, typename CellL, typename Vector, unsigned int impl>
 struct gcl
 {
 	/*! \brief Get the Cell list based on the type
@@ -81,8 +85,8 @@ struct gcl
 };
 
 //! General function t get a cell-list
-template<unsigned int dim, typename St, typename Vector, typename Mem_type>
-struct gcl<dim,St,CellList_gen<dim, St, Process_keys_hilb,Mem_type, shift<dim, St> >,Vector>
+template<unsigned int dim, typename St, typename CellL, typename Vector>
+struct gcl<dim,St,CellL,Vector,GCL_HILBERT>
 {
 	/*! \brief Get the Cell list based on the type
 	 *
@@ -93,12 +97,31 @@ struct gcl<dim,St,CellList_gen<dim, St, Process_keys_hilb,Mem_type, shift<dim, S
 	 * \return the constructed cell-list
 	 *
 	 */
-	static inline CellList_gen<dim, St, Process_keys_hilb, Mem_type, shift<dim, St> > get(Vector & vd, const St & r_cut, const Ghost<dim,St> & g)
+	static inline CellL get(Vector & vd, const St & r_cut, const Ghost<dim,St> & g)
 	{
 		return vd.getCellList_hilb(r_cut,g);
 	}
 };
 
+//! General function t get a cell-list
+template<unsigned int dim, typename St, typename CellL, typename Vector>
+struct gcl<dim,St,CellL,Vector,GCL_SYMMETRIC>
+{
+	/*! \brief Get the Cell list based on the type
+	 *
+	 * \param vd Distributed vector
+	 * \param r_cut Cut-off radius
+	 * \param g Ghost
+	 *
+	 * \return the constructed cell-list
+	 *
+	 */
+	static inline CellL get(Vector & vd, const St & r_cut, const Ghost<dim,St> & g)
+	{
+		return vd.getCellListSym(r_cut);
+	}
+};
+
 #define CELL_MEMFAST(dim,St) CellList_gen<dim, St, Process_keys_lin, Mem_fast, shift<dim, St> >
 #define CELL_MEMBAL(dim,St) CellList_gen<dim, St, Process_keys_lin, Mem_bal, shift<dim, St> >
 #define CELL_MEMMW(dim,St) CellList_gen<dim, St, Process_keys_lin, Mem_mw, shift<dim, St> >
@@ -1001,7 +1024,7 @@ public:
 		}
 		else
 		{
-			CellL cli_tmp = gcl<dim,St,CellL,self>::get(*this,r_cut,getDecomposition().getGhost());
+			CellL cli_tmp = gcl<dim,St,CellL,self,GCL_NON_SYMMETRIC>::get(*this,r_cut,getDecomposition().getGhost());
 
 			cell_list.swap(cli_tmp);
 		}
@@ -1038,7 +1061,7 @@ public:
 		}
 		else
 		{
-			CellL cli_tmp = gcl<dim,St,CellL,self>::get(*this,r_cut,getDecomposition().getGhost());
+			CellL cli_tmp = gcl<dim,St,CellL,self,GCL_SYMMETRIC>::get(*this,r_cut,getDecomposition().getGhost());
 
 			cell_list.swap(cli_tmp);
 		}
@@ -1079,7 +1102,8 @@ public:
 		// Processor bounding box
 		cl_param_calculate(pbox, div, r_cut, enlarge);
 
-		cell_list.Initialize(pbox, div, g_m);
+		cell_list.Initialize(pbox, div);
+		cell_list.set_gm(g_m);
 		cell_list.set_ndec(getDecomposition().get_ndec());
 
 		updateCellList(cell_list,no_se3);
@@ -1119,7 +1143,8 @@ public:
 		// Processor bounding box
 		cl_param_calculate(pbox,div, r_cut, enlarge);
 
-		cell_list.Initialize(pbox, div, g_m);
+		cell_list.Initialize(pbox, div);
+		cell_list.set_gm(g_m);
 		cell_list.set_ndec(getDecomposition().get_ndec());
 
 		updateCellList(cell_list);
@@ -1379,7 +1404,8 @@ public:
 			div[i] = 1 << m;
 		}
 
-		cell_list.Initialize(pbox,div,g_m);
+		cell_list.Initialize(pbox,div);
+		cell_list.set_gm(g_m);
 
 		// for each particle add the particle to the cell list
 
diff --git a/src/Vector/vector_dist_unit_test.hpp b/src/Vector/vector_dist_unit_test.hpp
index a130c7ae789c37c181e941ff3bf22210e276c10c..7b873d5c6a0aef39dc213781d5e116d15cef4dec 100644
--- a/src/Vector/vector_dist_unit_test.hpp
+++ b/src/Vector/vector_dist_unit_test.hpp
@@ -270,12 +270,21 @@ void Test2D_ghost(Box<2,float> & box)
 	}
 }
 
-//! types to use in the tests each element in the list is tested
-typedef boost::mpl::list<vector_dist<2,float, Point_test<float> >,
-		                 vector_dist<2,float, Point_test<float>,memory_traits_inte<Point_test<float>>::type,memory_traits_inte> > test_types;
+BOOST_AUTO_TEST_CASE( vector_dist_ghost )
+{
+	typedef vector_dist<2,float, Point_test<float>> vector;
 
-BOOST_AUTO_TEST_CASE_TEMPLATE( vector_dist_ghost, vector, test_types )
+	Box<2,float> box({0.0,0.0},{1.0,1.0});
+	Test2D_ghost<vector>(box);
+
+	Box<2,float> box2({-1.0,-1.0},{2.5,2.5});
+	Test2D_ghost<vector>(box2);
+}
+
+BOOST_AUTO_TEST_CASE( vector_dist_ghost_inte )
 {
+	typedef vector_dist<2,float, Point_test<float>,memory_traits_inte<Point_test<float>>::type,memory_traits_inte> vector;
+
 	Box<2,float> box({0.0,0.0},{1.0,1.0});
 	Test2D_ghost<vector>(box);