diff --git a/src/Amr/grid_dist_amr.hpp b/src/Amr/grid_dist_amr.hpp
index 56803c19b189f13849751d02a43cc59a0db06fca..6ac740458cbbefa05dc21b0277a4134c7ce6d31b 100644
--- a/src/Amr/grid_dist_amr.hpp
+++ b/src/Amr/grid_dist_amr.hpp
@@ -115,18 +115,6 @@ class grid_dist_amr
 
 };
 
-/*! \brief It contain the offset necessary to move to coarser and finer level grids
- *
- */
-template<unsigned int dim>
-struct offset_mv
-{
-	//! offset to move up on an upper grid (coarse)
-	Point<dim,long int> up;
-
-	//! offset to move on the lower grid (finer)
-	Point<dim,long int> dw;
-};
 
 /*! \brief AMR Adaptive Multi Resolution Grid
  *
@@ -466,6 +454,16 @@ public:
 		return gd_array.get(lvl).getGridIterator();
 	}
 
+	/*! \brief Get an iterator to the grid
+	 *
+	 * \return an iterator to the grid
+	 *
+	 */
+	auto getGridIterator(size_t lvl, grid_key_dx<dim> & start, grid_key_dx<dim> & stop) -> decltype(gd_array.get(lvl).getGridIterator(start,stop))
+	{
+		return gd_array.get(lvl).getGridIterator(start,stop);
+	}
+
 #ifdef __NVCC__
 
 	/*! \brief Get an iterator to the grid
@@ -718,6 +716,41 @@ public:
 		return gd_array.get(lvl).remove(v1);
 	}
 
+	/*! \brief construct level connections for padding particles
+	 *
+	 *
+	 */
+	void construct_level_connections()
+	{
+		for (int lvl = 0 ; lvl < gd_array.size() ; lvl++)
+		{
+			if (lvl == 0)
+			{
+				gd_array.get(lvl).construct_link_dw(gd_array.get(lvl+1),mv_off.get(lvl));
+			}
+			else if (lvl == gd_array.size() - 1)
+			{gd_array.get(lvl).construct_link_up(gd_array.get(lvl-1));}
+			else
+			{
+				gd_array.get(lvl).construct_link_dw(gd_array.get(lvl+1),mv_off.get(lvl));
+			}
+		}
+	}
+
+	/*! \brief construct level connections for padding particles
+	 *
+	 * \tparam stencil_type type of stencil
+	 *
+	 */
+    template<typename stencil_type>
+    void tagBoundaries()
+	{
+		for (int lvl = 0 ; lvl < gd_array.size() ; lvl++)
+		{
+			gd_array.get(lvl).template tagBoundaries<stencil_type>();
+		}
+	}
+
 	//////////////////////////////////////
 
 	/*! \brief It synchronize the ghost parts
@@ -918,6 +951,32 @@ public:
 
 		return ret;
 	}
+
+#ifdef __NVCC__
+
+	/*! \brief Move the memory from the device to host memory
+	 *
+	 */
+	template<unsigned int ... prp> void deviceToHost()
+	{
+		for (size_t i = 0 ; i < gd_array.size() ; i++)
+		{
+			gd_array.get(i).template deviceToHost<prp ...>();
+		}
+	}
+
+	/*! \brief Move the memory from the device to host memory
+	 *
+	 */
+	template<unsigned int ... prp> void hostToDevice()
+	{
+		for (size_t i = 0 ; i < gd_array.size() ; i++)
+		{
+			gd_array.get(i).template hostToDevice<prp ...>();
+		}
+	}
+
+#endif
 };
 
 template<unsigned int dim, typename St, typename T>
diff --git a/src/Amr/tests/amr_base_gpu_unit_tests.cu b/src/Amr/tests/amr_base_gpu_unit_tests.cu
index 267d9b1829a4eba4d74dc74062a3707cca9bcc7a..ff23114819bb222c3ab67b987aa2f9281b8e1457 100644
--- a/src/Amr/tests/amr_base_gpu_unit_tests.cu
+++ b/src/Amr/tests/amr_base_gpu_unit_tests.cu
@@ -191,4 +191,138 @@ BOOST_AUTO_TEST_CASE( grid_dist_id_amr_gpu )
 	BOOST_REQUIRE_EQUAL(count_c,correct_result_cell);*/
 }
 
+BOOST_AUTO_TEST_CASE( grid_dist_id_amr_gpu_link_test )
+{
+	auto & v_cl = create_vcluster();
+
+	// Domain
+	Box<2,float> domain({0.0,0.0},{1.0,1.0});
+
+	Ghost<2,long int> g(1);
+	sgrid_dist_amr_gpu<2,float,aggregate<float>> amr_g(domain,g);
+
+	size_t g_sz[2] = {17,17};
+
+	size_t n_lvl = 3;
+
+	amr_g.initLevels(n_lvl,g_sz);
+
+	grid_key_dx<2> start({5,5});
+	grid_key_dx<2> start_lvl_dw({9,9});
+	grid_key_dx<2> stop_lvl_dw({12,12});
+	grid_key_dx<2> start_lvl_dw2({19,19});
+	grid_key_dx<2> stop_lvl_dw2({23,23});
+
+	auto it = amr_g.getGridIterator(0,start,start);
+	auto it2 = amr_g.getGridIterator(1,start_lvl_dw,stop_lvl_dw);
+	auto it3 = amr_g.getGridIterator(2,start_lvl_dw2,stop_lvl_dw2);
+//	it.setGPUInsertBuffer(4);
+
+	auto & lvl_0 = amr_g.getDistGrid(0);
+	auto & lvl_1 = amr_g.getDistGrid(1);
+	auto & lvl_2 = amr_g.getDistGrid(2);
+
+	// Add points in level 0
+
+	while (it.isNext())
+	{
+		auto key = it.get_dist();
+
+		lvl_0.template insertFlush<0>(key) = 1.0;
+
+		++it;
+	}
+
+	while (it2.isNext())
+	{
+		auto key = it2.get_dist();
+
+		lvl_1.template insertFlush<0>(key) = 2.0;
+
+		++it2;
+	}
+
+	while (it3.isNext())
+	{
+		auto key = it3.get_dist();
+
+		lvl_2.template insertFlush<0>(key) = 3.0;
+
+		++it3;
+	}
+
+	amr_g.hostToDevice<0>();
+	amr_g.tagBoundaries<NNStar<2>>();
+	amr_g.construct_level_connections();
+
+	/////////////////////////////////////////////////////////////
+
+	auto & lvl_zero_d = amr_g.getDistGrid(0);
+	auto & lvl_one_d = amr_g.getDistGrid(1);
+	auto & lvl_two_d = amr_g.getDistGrid(2);
+
+	// For each local grid
+
+	for (int i = 0 ; i < lvl_zero_d.getN_loc_grid() ; i++)
+	{
+
+		// Check
+		auto & lvl_zero = lvl_zero_d.get_loc_grid(i);
+		auto & lvl_one = lvl_one_d.get_loc_grid(i);
+		auto & lvl_two = lvl_two_d.get_loc_grid(i);
+
+		auto & offs_dw_link = lvl_zero.getDownLinksOffsets();
+		auto & dw_links = lvl_zero.getDownLinks();
+
+		BOOST_REQUIRE_EQUAL(offs_dw_link.size(),1);
+		BOOST_REQUIRE_EQUAL(dw_links.size(),4);
+
+		auto & indexL0 = lvl_zero.private_get_blockMap().getIndexBuffer();
+		auto & indexL1 = lvl_one.private_get_blockMap().getIndexBuffer();
+		auto & indexL2 = lvl_two.private_get_blockMap().getIndexBuffer();
+
+		auto & dataL0 = lvl_zero.private_get_blockMap().getDataBuffer();
+		auto & dataL1 = lvl_one.private_get_blockMap().getDataBuffer();
+		auto & dataL2 = lvl_two.private_get_blockMap().getDataBuffer();
+
+		dw_links.template deviceToHost<0,1>();
+
+		BOOST_REQUIRE_EQUAL(dataL1.template get<0>(dw_links.template get<0>(0))[dw_links.template get<1>(0)],2);
+		BOOST_REQUIRE_EQUAL(dataL1.template get<0>(dw_links.template get<0>(1))[dw_links.template get<1>(1)],2);
+		BOOST_REQUIRE_EQUAL(dataL1.template get<0>(dw_links.template get<0>(2))[dw_links.template get<1>(2)],2);
+		BOOST_REQUIRE_EQUAL(dataL1.template get<0>(dw_links.template get<0>(3))[dw_links.template get<1>(3)],2);
+
+		auto & offs_dw_link_1 = lvl_one.getDownLinksOffsets();
+		auto & dw_links_1 = lvl_one.getDownLinks();
+
+		BOOST_REQUIRE_EQUAL(offs_dw_link_1.size(),12);
+		BOOST_REQUIRE_EQUAL(dw_links_1.size(),9);
+
+		dw_links_1.template deviceToHost<0,1>();
+
+		BOOST_REQUIRE_EQUAL(dataL2.template get<0>(dw_links_1.template get<0>(0))[dw_links_1.template get<1>(0)],3);
+		BOOST_REQUIRE_EQUAL(dataL2.template get<0>(dw_links_1.template get<0>(1))[dw_links_1.template get<1>(1)],3);
+		BOOST_REQUIRE_EQUAL(dataL2.template get<0>(dw_links_1.template get<0>(2))[dw_links_1.template get<1>(2)],3);
+		BOOST_REQUIRE_EQUAL(dataL2.template get<0>(dw_links_1.template get<0>(3))[dw_links_1.template get<1>(3)],3);
+		BOOST_REQUIRE_EQUAL(dataL2.template get<0>(dw_links_1.template get<0>(4))[dw_links_1.template get<1>(4)],3);
+		BOOST_REQUIRE_EQUAL(dataL2.template get<0>(dw_links_1.template get<0>(5))[dw_links_1.template get<1>(5)],3);
+		BOOST_REQUIRE_EQUAL(dataL2.template get<0>(dw_links_1.template get<0>(6))[dw_links_1.template get<1>(6)],3);
+		BOOST_REQUIRE_EQUAL(dataL2.template get<0>(dw_links_1.template get<0>(7))[dw_links_1.template get<1>(7)],3);
+		BOOST_REQUIRE_EQUAL(dataL2.template get<0>(dw_links_1.template get<0>(8))[dw_links_1.template get<1>(8)],3);
+	}
+
+/*	grid_key_dx<2> k({8,8});
+	grid_key_dx<2> k2({16,16});
+
+	lvl_zero.insertFlush<0>(k) = 1.0;
+	lvl_one.insertFlush<0>(k2) = 5.0;
+
+	lvl_one.template hostToDevice<0>();
+	lvl_one.tagBoundaries(v_cl.getmgpuContext());*/
+
+	/////////////////////////////////////////////////////////////
+
+
+}
+
 BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 909a6a881f3e399548f8da496bd706d6b1f7d54a..f7a2c3857982a40cd95f91ae5fc763f0280c01fc 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -63,6 +63,9 @@ if (CUDA_FOUND)
         if (TEST_COVERAGE)
 		target_compile_options(pdata PRIVATE $<$<COMPILE_LANGUAGE:CUDA>: -Xcompiler "-fprofile-arcs -ftest-coverage">)
         endif()
+	if (CMAKE_BUILD_TYPE STREQUAL "Debug")
+		#		target_compile_options(pdata PRIVATE $<$<COMPILE_LANGUAGE:CUDA>: -G>)
+	endif()
 endif()
 
 if(TEST_PERFORMANCE)
diff --git a/src/Grid/grid_dist_id.hpp b/src/Grid/grid_dist_id.hpp
index 5046837912f1a84e4ae7c5513b8f3f5b677550b5..5246e6292ec847f5887bc8fd71a1a51feb6f46b1 100644
--- a/src/Grid/grid_dist_id.hpp
+++ b/src/Grid/grid_dist_id.hpp
@@ -29,6 +29,18 @@
 #include "Grid/cuda/grid_dist_id_iterator_gpu.cuh"
 #endif
 
+/*! \brief It contain the offset necessary to move to coarser and finer level grids
+ *
+ */
+template<unsigned int dim>
+struct offset_mv
+{
+	//! offset to move up on an upper grid (coarse)
+	Point<dim,long int> up;
+
+	//! offset to move on the lower grid (finer)
+	Point<dim,long int> dw;
+};
 
 //! Internal ghost box sent to construct external ghost box into the other processors
 template<unsigned int dim>
@@ -80,6 +92,8 @@ template<unsigned int dim,
 		 typename device_grid=grid_cpu<dim,T> >
 class grid_dist_id : public grid_dist_id_comm<dim,St,T,Decomposition,Memory,device_grid>
 {
+	typedef grid_dist_id<dim,St,T,Decomposition,Memory,device_grid> self;
+
 	//! Domain
 	Box<dim,St> domain;
 
@@ -2030,7 +2044,8 @@ public:
 	 *
 	 * In case of dense grid this function is equivalent to get, in case of sparse
 	 * grid this function insert a grid point. When the point already exist it return
-	 * a reference to the already existing point
+	 * a reference to the already existing point. In case of massive insert Sparse grids
+	 * it give a reference to the inserted element in the insert buffer
 	 *
 	 * \tparam p property to get (is an integer)
 	 * \param v1 grid_key that identify the element in the grid
@@ -2052,6 +2067,35 @@ public:
 	}
 
 
+	/*! \brief insert an element in the grid
+	 *
+	 * In case of dense grid this function is equivalent to get, in case of sparse
+	 * grid this function insert a grid point. When the point already exist it return
+	 * a reference to the already existing point. In case of massive insert Sparse grids
+	 * The point is inserted immediately and a reference to the inserted element is returned
+	 *
+	 * \warning This function is not fast an unlucky insert can potentially cost O(N) where N is the number
+	 *          of points (worst case)
+	 *
+	 * \tparam p property to get (is an integer)
+	 * \param v1 grid_key that identify the element in the grid
+	 *
+	 * \return a reference to the inserted element
+	 *
+	 */
+	template <unsigned int p,typename bg_key>inline auto insertFlush(const grid_dist_key_dx<dim,bg_key> & v1)
+	-> typename std::add_lvalue_reference
+	<
+		decltype(loc_grid.get(v1.getSub()).template insertFlush<p>(v1.getKey()))
+	>::type
+	{
+#ifdef SE_CLASS2
+		check_valid(this,8);
+#endif
+
+		return loc_grid.get(v1.getSub()).template insertFlush<p>(v1.getKey());
+	}
+
 	/*! \brief Get the reference of the selected element
 	 *
 	 * \tparam p property to get (is an integer)
@@ -2587,6 +2631,67 @@ public:
 		{loc_grid.get(i).clear();}
 	}
 
+	/*! \brief construct link between levels
+	 *
+	 * \praram grid_up grid level up
+	 * \param grid_dw grid level down
+	 *
+	 */
+	void construct_link(self & grid_up, self & grid_dw)
+	{
+		for (int i = 0 ; i < loc_grid.size() ; i++)
+		{
+			loc_grid.get(i).construct_link(grid_up.get_loc_grid(i),grid_dw.get_loc_grid(i),v_cl.getmgpuContext());
+		}
+	}
+
+	/*! \brief construct link between current and the level down
+	 *
+	 *
+	 * \param grid_dw grid level down
+	 *
+	 */
+	void construct_link_dw(self & grid_dw, openfpm::vector<offset_mv<dim>> & mvof)
+	{
+		for (int i = 0 ; i < loc_grid.size() ; i++)
+		{
+			Point<dim,int> p_dw;
+			for(int j = 0 ; j < dim ; j++)
+			{p_dw.get(j) = mvof.get(i).dw.get(j);}
+
+			loc_grid.get(i).construct_link_dw(grid_dw.get_loc_grid(i),p_dw,v_cl.getmgpuContext());
+		}
+	}
+
+	/*! \brief construct link between current and the level up
+	 *
+	 *
+	 * \param grid_dw grid level down
+	 *
+	 */
+	void construct_link_up(self & grid_up)
+	{
+		for (int i = 0 ; i < loc_grid.size() ; i++)
+		{
+			loc_grid.get(i).construct_link_up(grid_up.get_loc_grid(i),v_cl.getmgpuContext());
+		}
+	}
+
+	/*! \brief construct link between current and the level up
+	 *
+	 *
+	 * \param grid_dw grid level down
+	 *
+	 */
+    template<typename stencil_type>
+    void tagBoundaries()
+    {
+		for (int i = 0 ; i < loc_grid.size() ; i++)
+		{
+			loc_grid.get(i).template tagBoundaries<stencil_type>(v_cl.getmgpuContext());
+		}
+	}
+
 	/*! \brief It move all the grid parts that do not belong to the local processor to the respective processor
 	 *
 	 */
diff --git a/src/main.cpp b/src/main.cpp
index 9131b24776789c4d75c6bba483d39980b6f13240..305cdf76381f307a2ee4fad09abfb1e9cc928289 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -34,8 +34,8 @@ void timeout_cycle()
 // initialization function:
 bool init_unit_test()
 {
-  std::thread to (timeout_cycle);
-  to.detach();
+//  std::thread to (timeout_cycle);
+//  to.detach();
   return true;
 }