diff --git a/openfpm_data b/openfpm_data
index a227090455a3364dec5964b762bc2165658af189..8c459e2608db17c0ad8fd2a27d29e4de9b01cc2c 160000
--- a/openfpm_data
+++ b/openfpm_data
@@ -1 +1 @@
-Subproject commit a227090455a3364dec5964b762bc2165658af189
+Subproject commit 8c459e2608db17c0ad8fd2a27d29e4de9b01cc2c
diff --git a/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu b/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu
index e0a79e8af450795f365bd148e7375091255d8e4d..0851a3e50776fc4587bbfac1884b1efabc856bae 100644
--- a/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu
+++ b/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu
@@ -543,10 +543,7 @@ BOOST_AUTO_TEST_CASE( sgrid_gpu_test_ghost_point_remove )
 
 	gdist.template ghost_get<0,1>(RUN_ON_DEVICE);
 
-	gdist.deviceToHost<0>();
-	gdist.write_debug("Test_out");
-
-/*	for (int i = 0 ; i < 10 ; i++)
+	for (int i = 0 ; i < 10 ; i++)
 	{
 		gdist.template ghost_get<0,1>(RUN_ON_DEVICE);
 	}
@@ -555,7 +552,7 @@ BOOST_AUTO_TEST_CASE( sgrid_gpu_test_ghost_point_remove )
 
 	typedef typename GetCpBlockType<decltype(gdist),0,1>::type CpBlockType;
 
-	gdist.template conv2<0,1,2,3,1>({2,2,2},{(int)sz[0]-2,(int)sz[1]-2,(int)sz[2]-2},[] __device__ (float & u_out, float & v_out, CpBlockType & u, CpBlockType & v,int i, int j, int k){
+	gdist.template conv2<0,1,2,3,1>({2,2,2},{(int)sz[0]-3,(int)sz[1]-3,(int)sz[2]-3},[] __device__ (float & u_out, float & v_out, CpBlockType & u, CpBlockType & v,int i, int j, int k){
 		u_out = u(i+1,j,k) - u(i-1,j,k) + u(i,j+1,k) - u(i,j-1,k) + u(i,j,k+1) - u(i,j,k-1);
 		v_out = v(i+1,j,k) - v(i-1,j,k) + v(i,j+1,k) - v(i,j-1,k) + v(i,j,k+1) - v(i,j,k-1);
 	});
@@ -564,7 +561,7 @@ BOOST_AUTO_TEST_CASE( sgrid_gpu_test_ghost_point_remove )
 
 	// Now we check that ghost is correct
 
-	auto it3 = gdist.getSubDomainIterator({2,2,2},{(int)sz[0]-2,(int)sz[1]-2,(int)sz[2]-2});
+	auto it3 = gdist.getSubDomainIterator({2,2,2},{(int)sz[0]-3,(int)sz[1]-3,(int)sz[2]-3});
 
 	bool match = true;
 
@@ -594,7 +591,125 @@ BOOST_AUTO_TEST_CASE( sgrid_gpu_test_ghost_point_remove )
 		++it3;
 	}
 
-	BOOST_REQUIRE_EQUAL(match,true);*/
+	BOOST_REQUIRE_EQUAL(match,true);
+
+	gdist.template deviceToHost<0,1,2,3>();
+
+	auto it4 = gdist.getDomainGhostIterator();
+	Box<3,long int> bin({0,0,0},{59,59,59});
+
+	match = true;
+
+	while (it4.isNext())
+	{
+		auto p = it4.get();
+
+		// We have to check we have no point in the ghost area
+		auto gkey = it4.getGKey(p);
+
+		if (bin.isInside(gkey.toPoint()) == false)
+		{match = false;}
+
+		++it4;
+	}
+
+	BOOST_REQUIRE_EQUAL(match,true);
+}
+
+BOOST_AUTO_TEST_CASE( sgrid_gpu_test_skip_labelling )
+{
+	size_t sz[3] = {60,60,60};
+	periodicity<3> bc = {PERIODIC,PERIODIC,PERIODIC};
+
+	Ghost<3,long int> g(1);
+
+	Box<3,float> domain({0.0,0.0,0.0},{1.0,1.0,1.0});
+
+	sgrid_dist_id_gpu<3,float,aggregate<float,float,float,float>> gdist(sz,domain,g,bc);
+
+	gdist.template setBackgroundValue<0>(666);
+	gdist.template setBackgroundValue<1>(666);
+	gdist.template setBackgroundValue<2>(666);
+	gdist.template setBackgroundValue<3>(666);
+
+	/////// GPU insert + flush
+
+	Box<3,size_t> box({1,1,1},{sz[0]-1,sz[1]-1,sz[2]-1});
+
+	/////// GPU Run kernel
+
+	float c = 5.0;
+
+	typedef typename GetAddBlockType<decltype(gdist)>::type InsertBlockT;
+
+	gdist.addPoints(box.getKP1(),box.getKP2(),
+			        [] __device__ (int i, int j, int k)
+			        {
+						return true;
+			        },
+			        [c] __device__ (InsertBlockT & data, int i, int j, int k)
+			        {
+			        	data.template get<0>() = c + i + j + k;
+			        	data.template get<1>() = c + 1000 + i + j + k;
+			        }
+			        );
+
+	gdist.template flush<smax_<0>,smax_<1>>(flush_type::FLUSH_ON_DEVICE);
+
+	gdist.template ghost_get<0,1>(RUN_ON_DEVICE);
+
+	// Now run the convolution
+
+	typedef typename GetCpBlockType<decltype(gdist),0,1>::type CpBlockType;
+
+	gdist.template conv2<0,1,0,1,1>({0,0,0},{(int)sz[0]-1,(int)sz[1]-1,(int)sz[2]-1},[] __device__ (float & u_out, float & v_out, CpBlockType & u, CpBlockType & v,int i, int j, int k){
+		u_out = 5*u(i,j,k);
+		v_out = 5*v(i,j,k);
+	});
+
+	gdist.template ghost_get<0,1>(RUN_ON_DEVICE | SKIP_LABELLING);
+
+	gdist.template conv2<0,1,2,3,1>({2,2,2},{(int)sz[0]-3,(int)sz[1]-3,(int)sz[2]-3},[] __device__ (float & u_out, float & v_out, CpBlockType & u, CpBlockType & v,int i, int j, int k){
+		u_out = u(i+1,j,k) - u(i-1,j,k) + u(i,j+1,k) - u(i,j-1,k) + u(i,j,k+1) - u(i,j,k-1);
+		v_out = v(i+1,j,k) - v(i-1,j,k) + v(i,j+1,k) - v(i,j-1,k) + v(i,j,k+1) - v(i,j,k-1);
+	});
+
+
+	gdist.deviceToHost<0,1,2,3>();
+
+	// Now we check that ghost is correct
+
+	auto it3 = gdist.getSubDomainIterator({2,2,2},{(int)sz[0]-3,(int)sz[1]-3,(int)sz[2]-3});
+
+	bool match = true;
+
+	while (it3.isNext())
+	{
+		auto p = it3.get();
+
+		auto p_xp1 = p.move(0,1);
+		auto p_xm1 = p.move(0,-1);
+		auto p_yp1 = p.move(1,1);
+		auto p_ym1 = p.move(1,-1);
+		auto p_zp1 = p.move(2,1);
+		auto p_zm1 = p.move(2,-1);
+
+		float sub1 = gdist.template get<2>(p);
+		float sub2 = gdist.template get<3>(p);
+
+		if (sub1 != 6.0*5 || sub2 != 6.0*5)
+		{
+			std::cout << sub1 << "  " << sub2 << std::endl;
+			std::cout << gdist.template get<0>(p_xp1) << "   " << gdist.template get<0>(p_xm1) << std::endl;
+			std::cout << gdist.template get<1>(p_xp1) << "   " << gdist.template get<1>(p_xm1) << std::endl;
+			match = false;
+			break;
+		}
+
+		++it3;
+	}
+
+	BOOST_REQUIRE_EQUAL(match,true);
 }
 
 BOOST_AUTO_TEST_SUITE_END()