diff --git a/openfpm_data b/openfpm_data
index ffe4514e58db3f2102debf89fa393f19d2ec70a8..02ef67f834e1f2b36f781f380e436f821c9a7945 160000
--- a/openfpm_data
+++ b/openfpm_data
@@ -1 +1 @@
-Subproject commit ffe4514e58db3f2102debf89fa393f19d2ec70a8
+Subproject commit 02ef67f834e1f2b36f781f380e436f821c9a7945
diff --git a/src/Grid/grid_dist_id.hpp b/src/Grid/grid_dist_id.hpp
index 59380a248705abd7837258e2e6ef9e31dfe98ba1..e3dcad616729c2159662ff5dc6f042a426f45adf 100644
--- a/src/Grid/grid_dist_id.hpp
+++ b/src/Grid/grid_dist_id.hpp
@@ -2190,7 +2190,7 @@ public:
 	 * \tparam prp... Properties to synchronize
 	 *
 	 */
-	template<int... prp> void ghost_get()
+	template<int... prp> void ghost_get(size_t opt = 0)
 	{
 #ifdef SE_CLASS2
 		check_valid(this,8);
@@ -2217,7 +2217,8 @@ public:
 																								  use_bx_def,
 																								  loc_grid,
 																								  ginfo_v,
-																								  g_id_to_external_ghost_box);
+																								  g_id_to_external_ghost_box,
+																								  opt);
 	}
 
 	/*! \brief It synchronize the ghost parts
diff --git a/src/Grid/grid_dist_id_comm.hpp b/src/Grid/grid_dist_id_comm.hpp
index ade70484b4c6f669e916c4f26b155c3595eb9195..c3a061d3f6f84c62daed9334927bbb18eb73f463 100644
--- a/src/Grid/grid_dist_id_comm.hpp
+++ b/src/Grid/grid_dist_id_comm.hpp
@@ -175,6 +175,13 @@ class grid_dist_id_comm
 	//! Memory for the ghost receiving buffer
 	Memory g_recv_prp_mem;
 
+	//! send pointers
+	openfpm::vector<void *> pointers;
+	openfpm::vector<void *> pointers2;
+
+	//! Receiving option
+	size_t opt;
+
 	/*! \brief Sync the local ghost part
 	 *
 	 * \tparam prp... properties to sync
@@ -339,6 +346,10 @@ class grid_dist_id_comm
 
 		gd->recv_buffers.last().resize(msg_i);
 		gd->recv_proc.add(i);
+
+		if (gd->opt & RUN_ON_DEVICE)
+		{return gd->recv_buffers.last().getDevicePointer();}
+
 		return gd->recv_buffers.last().getPointer();
 	}
 
@@ -463,11 +474,15 @@ class grid_dist_id_comm
 									const openfpm::vector<ep_box_grid<dim>> & eg_box,
 									const std::unordered_map<size_t,size_t> & g_id_to_external_ghost_box,
 									const openfpm::vector<e_box_multi<dim>> & eb_gid_list,
-									Unpack_stat & ps)
+									Unpack_stat & ps,
+									size_t opt)
 	{
 		// Unpack the ghost box global-id
 
 		size_t g_id;
+		// we move from device to host the gid
+		if (opt & RUN_ON_DEVICE)
+		{emem.deviceToHost(ps.getOffset(),ps.getOffset()+sizeof(size_t));}
 		Unpacker<size_t,mem>::unpack(emem,g_id,ps);
 
 		size_t l_id = 0;
@@ -505,7 +520,7 @@ class grid_dist_id_comm
 
 		// Unpack
 		loc_grid.get(sub_id).remove(box);
-		Unpacker<device_grid,mem>::template unpack<decltype(sub2),prp...>(emem,sub2,loc_grid.get(sub_id),ps);
+		Unpacker<device_grid,mem>::template unpack<decltype(sub2),decltype(v_cl.getmgpuContext()),prp...>(emem,sub2,loc_grid.get(sub_id),ps,v_cl.getmgpuContext());
 
 		// Copy the information on the other grid
 		for (long int j = 0 ; j < (long int)eb_gid_list.get(l_id).eb_list.size() ; j++)
@@ -533,7 +548,8 @@ class grid_dist_id_comm
 							const std::vector<size_t> & prp_recv,
 							ExtPreAlloc<Memory> & prRecv_prp,
 							const std::unordered_map<size_t,size_t> & g_id_to_external_ghost_box,
-							const openfpm::vector<e_box_multi<dim>> & eb_gid_list)
+							const openfpm::vector<e_box_multi<dim>> & eb_gid_list,
+							size_t opt)
 	{
 		if (device_grid::isCompressed() == false)
 		{
@@ -555,7 +571,7 @@ class grid_dist_id_comm
 
 					unpack_data_to_ext_ghost<Memory,prp ...>(prRecv_prp,loc_grid,i,
 																eg_box,g_id_to_external_ghost_box,eb_gid_list,
-																ps);
+																ps,opt);
 				}
 			}
 		}
@@ -576,7 +592,7 @@ class grid_dist_id_comm
 
 					unpack_data_to_ext_ghost<BMemory<Memory>,prp ...>(mem,loc_grid,i,
 																eg_box,g_id_to_external_ghost_box,eb_gid_list,
-																ps);
+																ps,opt);
 				}
 			}
 		}
@@ -952,7 +968,8 @@ public:
 										 bool use_bx_def,
 										 openfpm::vector<device_grid> & loc_grid,
 										 const grid_sm<dim,void> & ginfo,
-										 std::unordered_map<size_t,size_t> & g_id_to_external_ghost_box)
+										 std::unordered_map<size_t,size_t> & g_id_to_external_ghost_box,
+										 size_t opt)
 	{
 #ifdef PROFILE_SCOREP
 		SCOREP_USER_REGION("ghost_get",SCOREP_USER_REGION_TYPE_FUNCTION)
@@ -967,6 +984,8 @@ public:
 		send_pointer.clear();
 		send_size.clear();
 
+		this->opt = opt;
+
 		size_t req = 0;
 
 		// first we initialize the pack buffer on all internal grids
@@ -1002,7 +1021,7 @@ public:
 
 		// Finalize calculation
 		for (size_t i = 0 ; i < loc_grid.size() ; i++)
-		{loc_grid.get(i).packCalculate(req,v_cl.getmgpuContext());}
+		{loc_grid.get(i).template packCalculate<prp ...>(req,v_cl.getmgpuContext());}
 
 		// resize the property buffer memory
 		g_send_prp_mem.resize(req);
@@ -1015,12 +1034,21 @@ public:
 		// Pack information
 		Pack_stat sts;
 
+		pointers.clear();
+		pointers2.clear();
+
 		// Pack the information for each processor and send it
 		for ( size_t i = 0 ; i < ig_box.size() ; i++ )
 		{
 
 			sts.mark();
-			void * pointer = prAlloc_prp.getPointerEnd();
+
+			void * pointer;
+
+			if (opt & RUN_ON_DEVICE)
+			{pointer = prAlloc_prp.getDevicePointerEnd();}
+			else
+			{pointer = prAlloc_prp.getPointerEnd();}
 
 			// for each ghost box
 			for (size_t j = 0 ; j < ig_box.get(i).bid.size() ; j++)
@@ -1039,6 +1067,7 @@ public:
 
 				// Pack a size_t for the internal ghost id
 				Packer<size_t,Memory>::pack(prAlloc_prp,g_id,sts);
+				prAlloc_prp.hostToDevice(prAlloc_prp.getOffset(),prAlloc_prp.getOffsetEnd());
 				// Create a sub grid iterator spanning the internal ghost layer
 				auto sub_it = loc_grid.get(sub_id).getIterator(g_ig_box.getKP1(),g_ig_box.getKP2());
 				// and pack the internal ghost grid
@@ -1046,10 +1075,24 @@ public:
 			}
 			// send the request
 
-			void * pointer2 = prAlloc_prp.getPointerEnd();
+			void * pointer2;
 
+			if (opt & RUN_ON_DEVICE)
+			{pointer2 = prAlloc_prp.getDevicePointerEnd();}
+			else
+			{pointer2 = prAlloc_prp.getPointerEnd();}
+
+			pointers.add(pointer);
+			pointers2.add(pointer2);
+		}
+
+		for (size_t i = 0 ; i < loc_grid.size() ; i++)
+		{loc_grid.get(i).template packFinalize<prp ...>(prAlloc_prp,sts);}
+
+		for ( size_t i = 0 ; i < ig_box.size() ; i++ )
+		{
 			// This function send (or queue for sending) the information
-			send_or_queue(ig_box.get(i).prc,(char *)pointer,(char *)pointer2);
+			send_or_queue(ig_box.get(i).prc,(char *)pointers.get(i),(char *)pointers2.get(i));
 		}
 
 		// Calculate the total information to receive from each processors
@@ -1064,9 +1107,15 @@ public:
 
 		queue_recv_data_get<prp_object>(eg_box,prp_recv,prRecv_prp);
 
+		for (size_t i = 0 ; i < loc_grid.size() ; i++)
+		{loc_grid.get(i).removeCopyReset();}
+
 		ghost_get_local<prp...>(loc_ig_box,loc_eg_box,gdb_ext,loc_grid,g_id_to_external_ghost_box,ginfo,use_bx_def);
 
-		merge_received_data_get<prp ...>(loc_grid,eg_box,prp_recv,prRecv_prp,g_id_to_external_ghost_box,eb_gid_list);
+		merge_received_data_get<prp ...>(loc_grid,eg_box,prp_recv,prRecv_prp,g_id_to_external_ghost_box,eb_gid_list,opt);
+
+		for (size_t i = 0 ; i < loc_grid.size() ; i++)
+		{loc_grid.get(i).removeCopyFinalize(v_cl.getmgpuContext());}
 	}
 
 	/*! \brief It merge the information in the ghost with the
diff --git a/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu b/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu
index 53881df76d3ac5956a56663e483cce928e9e8fa2..8f0a5ced7f530d9620bf1b077d14862063b3de3b 100644
--- a/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu
+++ b/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu
@@ -207,19 +207,24 @@ BOOST_AUTO_TEST_CASE( sgrid_gpu_test_ghost_get )
 
 	/////// GPU insert + flush
 
-	Box<2,size_t> box({1,1},{1,1});
+	Box<2,size_t> box({1,1},{15,15});
 	auto it = gdist.getGridIterator(box.getKP1(),box.getKP2());
 
 	/////// GPU Run kernel
 
-	gdist.setInsertBuffer(1);
+	gdist.setInsertBuffer(225);
 
 	float c = 5.0;
 
 	gdist.template iterateGridGPU<insert_kernel2D<0>>(it,c);
 	gdist.template flush<smax_<0>>(flush_type::FLUSH_ON_DEVICE);
 
-//	gdist.template ghost_get<0>();
+	gdist.template deviceToHost<0>();
+//	gdist.write("broken");
+
+	gdist.template ghost_get<0>(RUN_ON_DEVICE);
 }
 
+
+
 BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/Vector/vector_dist_comm.hpp b/src/Vector/vector_dist_comm.hpp
index b095138c1f73a1b3314ee0e1f2e8c12d8c645af5..6694ac553e107c2ca61861a46e561800acfad3fe 100644
--- a/src/Vector/vector_dist_comm.hpp
+++ b/src/Vector/vector_dist_comm.hpp
@@ -26,7 +26,6 @@ constexpr int NO_CHANGE_ELEMENTS = 4;
 
 constexpr int BIND_DEC_TO_GHOST = 1;
 
-constexpr int RUN_ON_DEVICE = 1024;
 constexpr int MAP_LOCAL = 2;
 
 constexpr int GHOST_SYNC = 0;
@@ -1173,6 +1172,9 @@ class vector_dist_comm
 		}
 		else
 		{
+			// if no properties must be sent skip this step
+			if (sizeof...(prp) == 0)	{return;}
+
 			// Fill the send buffer
 			for (size_t i = 0; i < g_opart.size(); i++)
 			{
diff --git a/src/config/config_cmake.h.in b/src/config/config_cmake.h.in
index 3c92ef09cc9df7eb992221ea0d51147772008550..c38415e8e75b5cbc443ac8ed6eafc387f97559e2 100644
--- a/src/config/config_cmake.h.in
+++ b/src/config/config_cmake.h.in
@@ -163,3 +163,5 @@ ${DEFINE_TEST_COVERAGE_MODE}
 
 /* Version number of package */
 #define VERSION "1.0.0"
+
+#define OPENFPM_PDATA