From 5dbbc3f5191012ed1ca9a956fa1578a846745819 Mon Sep 17 00:00:00 2001
From: Pietro Incardona <incardon@mpi-cbg.de>
Date: Fri, 13 Sep 2019 00:28:01 +0200
Subject: [PATCH] Fixing CUDA compilation + AMR distributed GPU working

---
 openfpm_data                                  |   2 +-
 src/Amr/grid_dist_amr.hpp                     |  16 +-
 src/Amr/tests/amr_base_gpu_unit_tests.cu      | 112 ++-
 src/Grid/Iterators/grid_dist_id_iterator.hpp  |  24 +-
 src/Grid/cuda/grid_dist_id_kernels.cuh        |  49 +-
 src/Grid/grid_dist_id.hpp                     |  57 +-
 .../tests/sgrid_dist_id_gpu_unit_tests.cu     | 127 ++-
 test_data/sgrid_gpu_output_1_0.vtk            | 911 ++++++++++++++++++
 test_data/sgrid_gpu_output_2_0.vtk            | 499 ++++++++++
 test_data/sgrid_gpu_output_2_1.vtk            | 423 ++++++++
 10 files changed, 2128 insertions(+), 92 deletions(-)
 create mode 100644 test_data/sgrid_gpu_output_1_0.vtk
 create mode 100644 test_data/sgrid_gpu_output_2_0.vtk
 create mode 100644 test_data/sgrid_gpu_output_2_1.vtk

diff --git a/openfpm_data b/openfpm_data
index 5056c7251..b7d419281 160000
--- a/openfpm_data
+++ b/openfpm_data
@@ -1 +1 @@
-Subproject commit 5056c72513114e2491a8fc1b4ddd4899ecd3e242
+Subproject commit b7d4192813d4aaccd703df1472b9bc9da6c9a7b6
diff --git a/src/Amr/grid_dist_amr.hpp b/src/Amr/grid_dist_amr.hpp
index e247f172d..56803c19b 100644
--- a/src/Amr/grid_dist_amr.hpp
+++ b/src/Amr/grid_dist_amr.hpp
@@ -466,6 +466,20 @@ public:
 		return gd_array.get(lvl).getGridIterator();
 	}
 
+#ifdef __NVCC__
+
+	/*! \brief Get an iterator to the grid
+	 *
+	 * \return an iterator to the grid
+	 *
+	 */
+	auto getGridIteratorGPU(size_t lvl) -> decltype(gd_array.get(lvl).getGridIteratorGPU())
+	{
+		return gd_array.get(lvl).getGridIteratorGPU();
+	}
+
+#endif
+
 	/*! \brief Get an iterator to the grid
 	 *
 	 * \return an iterator to the grid
@@ -912,7 +926,7 @@ using sgrid_dist_amr = grid_dist_amr<dim,St,T,AMR_IMPL_TRIVIAL,CartDecomposition
 #ifdef __NVCC__
 
 template<unsigned int dim, typename St, typename T, unsigned int blockEdgeSize = 8>
-using sgrid_dist_amr_gpu = grid_dist_amr<dim,St,T,AMR_IMPL_TRIVIAL,CartDecomposition<dim,St,CudaMemory,memory_traits_inte>,CudaMemory,SparseGridGpu<dim,T,blockEdgeSize>>;
+using sgrid_dist_amr_gpu = grid_dist_amr<dim,St,T,AMR_IMPL_TRIVIAL,CartDecomposition<dim,St,CudaMemory,memory_traits_inte>,CudaMemory,SparseGridGpu<dim,T,blockEdgeSize,IntPow<blockEdgeSize,dim>::value >>;
 
 #endif
 
diff --git a/src/Amr/tests/amr_base_gpu_unit_tests.cu b/src/Amr/tests/amr_base_gpu_unit_tests.cu
index f0337ca16..267d9b182 100644
--- a/src/Amr/tests/amr_base_gpu_unit_tests.cu
+++ b/src/Amr/tests/amr_base_gpu_unit_tests.cu
@@ -18,6 +18,58 @@
 #include "Point_test.hpp"
 #include "Grid/tests/grid_dist_id_util_tests.hpp"
 
+struct amr_launch_sparse
+{
+	template<typename grid_type, typename ite_type>
+	__device__ void operator()(grid_type & grid, ite_type itg, float spacing, Point<3,float> center)
+	{
+		GRID_ID_3_GLOBAL(itg);
+
+	    __shared__ bool is_block_empty;
+
+	    if (threadIdx.x == 0 && threadIdx.y == 0 && threadIdx.z == 0)
+	    {is_block_empty = true;}
+
+	    grid.init();
+
+	    int offset = 0;
+	    grid_key_dx<3,int> blk;
+	    bool out = grid.getInsertBlockOffset(itg,key,blk,offset);
+
+	    auto blockId = grid.getBlockLinId(blk);
+
+	    const float x = keyg.get(0)*spacing - center.get(0);
+	    const float y = keyg.get(1)*spacing - center.get(1);
+	    const float z = keyg.get(2)*spacing - center.get(2);
+
+	    float radius = sqrt((float) (x*x + y*y + z*z));
+
+	    bool is_active = radius < 0.4 && radius > 0.3;
+
+	    if (is_active == true)
+	    {is_block_empty = false;}
+
+	    __syncthreads();
+
+	    if (is_block_empty == false)
+	    {
+	        auto ec = grid.insertBlock(blockId);
+
+	        if ( is_active == true)
+	        {
+	            ec.template get<0>()[offset] = x+y+z;
+	            ec.template get<grid_type::pMask>()[offset] = 1;
+	        }
+	    }
+
+	    __syncthreads();
+
+	    grid.flush_block_insert();
+	}
+};
+
+
+
 BOOST_AUTO_TEST_SUITE( amr_grid_dist_id_test )
 
 
@@ -27,39 +79,73 @@ BOOST_AUTO_TEST_CASE( grid_dist_id_amr_gpu )
 	Box<3,float> domain3({0.0,0.0,0.0},{1.0,1.0,1.0});
 
 
-	Ghost<3,float> g(0.05);
+	Ghost<3,long int> g(1);
 	sgrid_dist_amr_gpu<3,float,aggregate<float>> amr_g(domain3,g);
 
 	size_t g_sz[3] = {4,4,4};
 
-	size_t n_lvl = 10;
+	size_t n_lvl = 6;
 
-//	amr_g.initLevels(n_lvl,g_sz);
+	amr_g.initLevels(n_lvl,g_sz);
 
-
-/*	for (size_t i = 0 ; i < amr_g.getNLvl() ; i++)
+	for (size_t i = 0 ; i < amr_g.getNLvl() ; i++)
 	{
 		// Fill the AMR with something
 
 		size_t count = 0;
 
-		auto it = amr_g.getGridIterator(i);
+		auto it = amr_g.getGridIteratorGPU(i);
+		it.setGPUInsertBuffer(1);
 
-		while (it.isNext())
-		{
-			auto key = it.get_dist();
-			auto akey = amr_g.getAMRKey(i,key);
+		Point<3,float> center({0.5,0.5,0.5});
+
+		it.launch(amr_launch_sparse(),it.getSpacing(0),center);
+		amr_g.getDistGrid(i).template flush<smax_<0>>(FLUSH_ON_DEVICE);
+
+		amr_g.getDistGrid(i).template deviceToHost<0>();
 
-			amr_g.template insert<0>(akey) = 3.0;
+		auto it2 = amr_g.getDistGrid(i).getDomainIterator();
+
+		while (it2.isNext())
+		{
+			auto key = it2.get();
+			auto keyg = it2.getGKey(key);
 
 			count++;
 
-			++it;
+			++it2;
+		}
+
+		auto & v_cl = create_vcluster();
+
+		v_cl.sum(count);
+		v_cl.execute();
+
+		switch(i)
+		{
+		case 0:
+			BOOST_REQUIRE_EQUAL(count,0);
+			break;
+		case 1:
+			BOOST_REQUIRE_EQUAL(count,30);
+			break;
+		case 2:
+			BOOST_REQUIRE_EQUAL(count,282);
+			break;
+		case 3:
+			BOOST_REQUIRE_EQUAL(count,2192);
+			break;
+		case 4:
+			BOOST_REQUIRE_EQUAL(count,16890);
+			break;
+		case 5:
+			BOOST_REQUIRE_EQUAL(count,136992);
+			break;
 		}
 	}
 
 	// Iterate across all the levels initialized
-	auto it = amr_g.getDomainIterator();
+/*	auto it = amr_g.getDomainIterator();
 
 	size_t count = 0;
 
diff --git a/src/Grid/Iterators/grid_dist_id_iterator.hpp b/src/Grid/Iterators/grid_dist_id_iterator.hpp
index eacea46e2..b7d5e1ef7 100644
--- a/src/Grid/Iterators/grid_dist_id_iterator.hpp
+++ b/src/Grid/Iterators/grid_dist_id_iterator.hpp
@@ -93,24 +93,6 @@ class grid_dist_iterator
 			// get the next grid iterator
 			if (g_c < gList.size())
 			{
-				// Sub iterator are used
-/*				if (impl == FREE)
-				{
-					if (gdb_ext.get(g_c).Dbox.isValid() == false)
-					{g_c++;}
-					else
-					{
-						a_it.reinitialize(gList.get(g_c).getIterator(gdb_ext.get(g_c).Dbox.getKP1(),gdb_ext.get(g_c).Dbox.getKP2()));
-						if (a_it.isNext() == false)	{g_c++;}
-					}
-				}
-				else
-				{
-					// Full iterator (no subset)
-					a_it.reinitialize(gList.get(g_c).getIterator());
-					if (a_it.isNext() == false)	{g_c++;}
-				}*/
-
 				selvg<impl == FREE>::call(a_it,gdb_ext,gList,g_c);
 			}
 		} while (g_c < gList.size() && a_it.isNext() == false);
@@ -276,12 +258,12 @@ class grid_dist_iterator
 		// Get the sub-domain id
 		size_t sub_id = k.getSub();
 
-		grid_key_dx<dim> k_glob = k.getKey();
+		auto k_glob = k.getKey();
 
 		// shift
-		k_glob = k_glob + gdb_ext.get(sub_id).origin;
+		auto k_glob2 = k_glob + gdb_ext.get(sub_id).origin;
 
-		return k_glob;
+		return k_glob2;
 	}
 
 	/*! \brief Return the stencil point offset
diff --git a/src/Grid/cuda/grid_dist_id_kernels.cuh b/src/Grid/cuda/grid_dist_id_kernels.cuh
index 9c4e10232..2032c06b5 100644
--- a/src/Grid/cuda/grid_dist_id_kernels.cuh
+++ b/src/Grid/cuda/grid_dist_id_kernels.cuh
@@ -8,9 +8,54 @@
 #ifndef GRID_DIST_ID_KERNELS_CUH_
 #define GRID_DIST_ID_KERNELS_CUH_
 
+#ifdef CUDA_GPU
 
-template<typename grid_type, typename func_t,typename ... args_t>
-__global__ void grid_apply_functor(grid_type g, ite_gpu<grid_type::d> ite, func_t f, args_t ... args)
+template<unsigned int dim>
+struct ite_gpu_dist
+{
+	dim3 thr;
+	dim3 wthr;
+
+	grid_key_dx<dim,int> start;
+	grid_key_dx<dim,int> stop;
+
+	grid_key_dx<dim,int> start_base;
+
+	grid_key_dx<dim,int> origin;
+
+	ite_gpu_dist(ite_gpu<dim> & ite)
+	{
+		thr = ite.thr;
+		wthr = ite.wthr;
+
+		start = ite.start;
+		stop = ite.stop;
+	}
+
+	size_t nblocks()
+	{
+		return wthr.x * wthr.y * wthr.z;
+	}
+};
+
+#define GRID_ID_3_GLOBAL(ite_gpu) grid_key_dx<3,int> key;\
+								  grid_key_dx<3,int> keyg;\
+							  key.set_d(0,threadIdx.x + blockIdx.x * blockDim.x + ite_gpu.start.get(0));\
+    						  key.set_d(1,threadIdx.y + blockIdx.y * blockDim.y + ite_gpu.start.get(1));\
+							  key.set_d(2,threadIdx.z + blockIdx.z * blockDim.z + ite_gpu.start.get(2));\
+							  \
+							  keyg.set_d(0,key.get(0) + ite_gpu.origin.get(0));\
+    						  keyg.set_d(1,key.get(1) + ite_gpu.origin.get(1));\
+							  keyg.set_d(2,key.get(2) + ite_gpu.origin.get(2));\
+										 \
+										 if (key.get(0) > ite_gpu.stop.get(0) || key.get(1) > ite_gpu.stop.get(1) || key.get(2) > ite_gpu.stop.get(2))\
+    									 {return;}
+
+
+#endif
+
+template<typename grid_type, typename ite_gpu_type,typename func_t,typename ... args_t>
+__global__ void grid_apply_functor(grid_type g, ite_gpu_type ite, func_t f, args_t ... args)
 {
 	f(g,ite,args...);
 }
diff --git a/src/Grid/grid_dist_id.hpp b/src/Grid/grid_dist_id.hpp
index 44c3ded42..ffef983d5 100644
--- a/src/Grid/grid_dist_id.hpp
+++ b/src/Grid/grid_dist_id.hpp
@@ -26,6 +26,7 @@
 #ifdef __NVCC__
 #include "SparseGridGpu/SparseGridGpu.hpp"
 #include "cuda/grid_dist_id_kernels.cuh"
+#include "Grid/cuda/grid_dist_id_iterator_gpu.cuh"
 #endif
 
 
@@ -1134,8 +1135,8 @@ public:
 	 */
 	void setBackgroundValue(T & bv)
 	{
-		for (size_t i = 0 ; i < loc_grid.size() ; i++)
-		{meta_copy<T>::meta_copy_(bv,loc_grid.get(i).getBackgroundValue());}
+		setBackground_impl<T,decltype(loc_grid)> func(bv,loc_grid);
+		boost::mpl::for_each_ref< boost::mpl::range_c<int,0,T::max_prop>>(func);
 	}
 
 	/*! \brief set the background value
@@ -1715,7 +1716,7 @@ public:
 	/*! /brief Get a grid Iterator
 	 *
 	 * In case of dense grid getGridIterator is equivalent to getDomainIterator
-	 * in case if sparse distributed grid getDomainIterator go across all the
+	 * in case of sparse grid getDomainIterator go across all the
 	 * inserted point get grid iterator run across all grid points independently
 	 * that the point has been insert or not
 	 *
@@ -1728,6 +1729,47 @@ public:
 		return it_dec;
 	}
 
+#ifdef __NVCC__
+
+	/*! /brief Get a grid Iterator in GPU
+	 *
+	 * In case of dense grid getGridIterator is equivalent to getDomainIteratorGPU
+	 * in case of sparse distributed grid getDomainIterator go across all the
+	 * inserted point getGridIteratorGPU run across all grid points independently
+	 * that the point has been insert or not
+	 *
+	 * \param start point
+	 * \param stop point
+	 *
+	 * \return a Grid iterator
+	 *
+	 */
+	inline grid_dist_id_iterator_gpu<Decomposition,openfpm::vector<device_grid>>
+	getGridIteratorGPU(const grid_key_dx<dim> & start, const grid_key_dx<dim> & stop)
+	{
+		grid_dist_id_iterator_gpu<Decomposition,openfpm::vector<device_grid>> it_dec(loc_grid,getDecomposition(), g_sz, start, stop);
+		return it_dec;
+	}
+
+	/*! /brief Get a grid Iterator in GPU
+	 *
+	 * In case of dense grid getGridIterator is equivalent to getDomainIteratorGPU
+	 * in case of sparse distributed grid getDomainIterator go across all the
+	 * inserted point getGridIteratorGPU run across all grid points independently
+	 * that the point has been insert or not
+	 *
+	 * \return a Grid iterator
+	 *
+	 */
+	inline grid_dist_id_iterator_gpu<Decomposition,openfpm::vector<device_grid>>
+	getGridIteratorGPU()
+	{
+		grid_dist_id_iterator_gpu<Decomposition,openfpm::vector<device_grid>> it_dec(loc_grid,getDecomposition(), g_sz);
+		return it_dec;
+	}
+
+#endif
+
 	/*! /brief Get a grid Iterator running also on ghost area
 	 *
 	 * In case of dense grid getGridIterator is equivalent to getDomainIterator
@@ -2641,6 +2683,8 @@ public:
 #ifdef __NVCC__
 
 	/*! \brief Set the size of the gpu insert buffer pool
+	 *
+	 * Indicate the maximum number of inserts each GPU block can do
 	 *
 	 * \param size of the insert pool
 	 *
@@ -2664,7 +2708,12 @@ public:
 			loc_grid.get(i).setGPUInsertBuffer(ite.nblocks(),gpu_insert_pool_size);
 			loc_grid.get(i).initializeGPUInsertBuffer();
 
-			grid_apply_functor<<<ite.wthr,ite.thr>>>(loc_grid.get(i).toKernel(),ite,func_t(),args...);
+			ite_gpu_dist<dim> itd = ite;
+
+			for (int j = 0 ; j < dim ; j++)
+			{itd.origin.set_d(j,gdb_ext.get(i).origin.get(j));}
+
+			grid_apply_functor<<<ite.wthr,ite.thr>>>(loc_grid.get(i).toKernel(),itd,func_t(),args...);
 
 			it.nextGrid();
 		}
diff --git a/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu b/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu
index 083830f66..5187dcc14 100644
--- a/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu
+++ b/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu
@@ -8,8 +8,8 @@ BOOST_AUTO_TEST_SUITE( sgrid_gpu_test_suite )
 template<unsigned int p>
 struct insert_kernel
 {
-	template<typename SparseGridGpu_type>
-	__device__ void operator()(SparseGridGpu_type & sg, ite_gpu<SparseGridGpu_type::d> & ite, float c)
+	template<typename SparseGridGpu_type, typename ite_type>
+	__device__ void operator()(SparseGridGpu_type & sg, ite_type & ite, float c)
 	{
 	    sg.init();
 
@@ -30,22 +30,34 @@ struct insert_kernel
 	    {return;}
 	    if (SparseGridGpu_type::d >= 2 && y+ite.start.get(1) > ite.stop.get(1))
 	    {return;}
-	    if (SparseGridGpu_type::d >= 3 && z+ite.start.get(1) > ite.stop.get(2))
+	    if (SparseGridGpu_type::d >= 3 && z+ite.start.get(2) > ite.stop.get(2))
 	    {return;}
 
-	    grid_key_dx<SparseGridGpu_type::d, size_t> coord({x+ite.start.get(0), y+ite.start.get(1), z+ite.start.get(2)});
-
-	//    size_t pos = sg.getLinId(coord);
-	//    printf("insertValues: bDim=(%d,%d), bId=(%d,%d), tId=(%d,%d) : "
-	//           "pos=%ld, coord={%d,%d}, value=%d\n",
-	//           bDimX, bDimY,
-	//           bIdX, bIdY,
-	//           tIdX, tIdY,
-	//           pos,
-	//           x, y,
-	//           x); //debug
-
-	    sg.template insert<p>(coord) = c;
+	    grid_key_dx<SparseGridGpu_type::d, size_t> coord;
+	    grid_key_dx<SparseGridGpu_type::d, size_t> coord_glob;
+
+	    if (SparseGridGpu_type::d >= 2)
+	    {
+	    	coord.set_d(0,x+ite.start.get(0));
+	    	coord_glob.set_d(0,x+ite.start.get(0)+ite.origin.get(0));
+	    	coord.set_d(1,y+ite.start.get(1));
+	    	coord_glob.set_d(1,y+ite.start.get(1)+ite.origin.get(1));
+	    }
+	    else if (SparseGridGpu_type::d >= 3)
+	    {
+		    coord.set_d(0,x+ite.start.get(0));
+		    coord_glob.set_d(0,x+ite.start.get(0)+ite.origin.get(0));
+		    coord.set_d(1,y+ite.start.get(1));
+		    coord_glob.set_d(1,y+ite.start.get(1)+ite.origin.get(1));
+		    coord.set_d(2,z+ite.start.get(2));
+		    coord_glob.set_d(2,z+ite.start.get(2)+ite.origin.get(2));
+	    }
+
+
+	    if (SparseGridGpu_type::d >= 2)
+	    {sg.template insert<p>(coord) = c + coord_glob.get(0) + coord_glob.get(1);}
+	    else
+	    {sg.template insert<p>(coord) = c + coord_glob.get(0) + coord_glob.get(1) + coord_glob.get(2);}
 
 	    __syncthreads();
 
@@ -80,35 +92,6 @@ BOOST_AUTO_TEST_CASE( sgrid_gpu_test_base )
 
 	gdist.template setBackgroundValue<0>(666);
 
-	/////// CPU insert
-
-/*	auto it = gdist.getGridIterator(box.getKP1(),box.getKP2());
-
-	while (it.isNext())
-	{
-		auto p = it.get_dist();
-
-		gdist.template insert<0>(p) = 1.0;
-
-		++it;
-	}
-
-	gdist.template flush<>();
-
-	Box<2,size_t> box2({0,0},{15,15});
-	auto it2 = gdist.getGridIterator(box2.getKP1(),box2.getKP2());
-
-	while (it2.isNext())
-	{
-		auto p = it2.get_dist();
-
-		std::cout << gdist.template get<0>(p) << std::endl;
-
-		++it2;
-	}*/
-
-	/////// host to device
-
 	/////// GPU insert + flush
 
 	Box<2,size_t> box({1,1},{1,1});
@@ -116,7 +99,7 @@ BOOST_AUTO_TEST_CASE( sgrid_gpu_test_base )
 
 	/////// GPU Run kernel
 
-	gdist.setInsertBuffer(128);
+	gdist.setInsertBuffer(1);
 
 	float c = 5.0;
 
@@ -137,10 +120,16 @@ BOOST_AUTO_TEST_CASE( sgrid_gpu_test_base )
 
 			if (p2.get(0) == box.getLow(0) && p2.get(1) == box.getLow(1))
 			{
-				BOOST_REQUIRE_EQUAL(gdist.template get<0>(p), 5.0);
+				BOOST_REQUIRE_EQUAL(gdist.template get<0>(p), 7.0);
 			}
 			else
 			{
+				if (gdist.template get<0>(p) != 666.0)
+				{
+					float f = gdist.template get<0>(p);
+					std::cout << "ERROR: " << gdist.template get<0>(p) << std::endl;
+				}
+
 				BOOST_REQUIRE_EQUAL(gdist.template get<0>(p), 666.0);
 			}
 
@@ -148,6 +137,8 @@ BOOST_AUTO_TEST_CASE( sgrid_gpu_test_base )
 		}
 	}
 
+	return;
+
 	//
 
 	c = 3.0;
@@ -189,14 +180,50 @@ BOOST_AUTO_TEST_CASE( sgrid_gpu_test_base )
 			++it;
 		}
 	}
+}
+
 
-	////////////////////////////////////
+BOOST_AUTO_TEST_CASE( sgrid_gpu_test_output )
+{
+	auto & v_cl = create_vcluster();
+
+	if (v_cl.size() > 3){return;}
+
+	size_t sz[2] = {17,17};
+	periodicity<2> bc = {PERIODIC,PERIODIC};
+
+	Ghost<2,long int> g(1);
+
+	Box<2,float> domain({0.0,0.0},{1.0,1.0});
+
+	sgrid_dist_id_gpu<2,float,aggregate<float>> gdist(sz,domain,g,bc);
+
+	gdist.template setBackgroundValue<0>(666);
+
+	/////// GPU insert + flush
+
+	Box<2,size_t> box({1,1},{15,15});
+	auto it = gdist.getGridIterator(box.getKP1(),box.getKP2());
+
+	/////// GPU Run kernel
 
 	gdist.setInsertBuffer(128);
-	gdist.template iterateGPU<stencil_kernel<0>>();
 
+	float c = 5.0;
 
-}
+	gdist.template iterateGridGPU<insert_kernel<0>>(it,c);
+	gdist.template flush<smax_<0>>(flush_type::FLUSH_ON_DEVICE);
 
+	gdist.template deviceToHost<0>();
+
+	gdist.write("sgrid_gpu_output");
+
+	std::string file_test("sgrid_gpu_output_" + std::to_string(v_cl.size()) + "_" + std::to_string(v_cl.rank())  + ".vtk");
+	std::string file("sgrid_gpu_output_" + std::to_string(v_cl.rank()) + ".vtk");
+
+	bool test = compare(file,"test_data/" + file_test);
+
+	BOOST_REQUIRE_EQUAL(true,test);
+}
 
 BOOST_AUTO_TEST_SUITE_END()
diff --git a/test_data/sgrid_gpu_output_1_0.vtk b/test_data/sgrid_gpu_output_1_0.vtk
new file mode 100644
index 000000000..b043e51ba
--- /dev/null
+++ b/test_data/sgrid_gpu_output_1_0.vtk
@@ -0,0 +1,911 @@
+# vtk DataFile Version 3.0
+grids
+ASCII
+DATASET POLYDATA
+POINTS 225 float
+0.058824 0.058824 0.0
+0.117647 0.058824 0.0
+0.176471 0.058824 0.0
+0.235294 0.058824 0.0
+0.294118 0.058824 0.0
+0.352941 0.058824 0.0
+0.058824 0.117647 0.0
+0.117647 0.117647 0.0
+0.176471 0.117647 0.0
+0.235294 0.117647 0.0
+0.294118 0.117647 0.0
+0.352941 0.117647 0.0
+0.058824 0.176471 0.0
+0.117647 0.176471 0.0
+0.176471 0.176471 0.0
+0.235294 0.176471 0.0
+0.294118 0.176471 0.0
+0.352941 0.176471 0.0
+0.058824 0.235294 0.0
+0.117647 0.235294 0.0
+0.176471 0.235294 0.0
+0.235294 0.235294 0.0
+0.294118 0.235294 0.0
+0.352941 0.235294 0.0
+0.058824 0.294118 0.0
+0.117647 0.294118 0.0
+0.176471 0.294118 0.0
+0.235294 0.294118 0.0
+0.294118 0.294118 0.0
+0.352941 0.294118 0.0
+0.058824 0.352941 0.0
+0.117647 0.352941 0.0
+0.176471 0.352941 0.0
+0.235294 0.352941 0.0
+0.294118 0.352941 0.0
+0.352941 0.352941 0.0
+0.411765 0.058824 0.0
+0.470588 0.058824 0.0
+0.529412 0.058824 0.0
+0.588235 0.058824 0.0
+0.647059 0.058824 0.0
+0.705882 0.058824 0.0
+0.764706 0.058824 0.0
+0.823529 0.058824 0.0
+0.411765 0.117647 0.0
+0.470588 0.117647 0.0
+0.529412 0.117647 0.0
+0.588235 0.117647 0.0
+0.647059 0.117647 0.0
+0.705882 0.117647 0.0
+0.764706 0.117647 0.0
+0.823529 0.117647 0.0
+0.411765 0.176471 0.0
+0.470588 0.176471 0.0
+0.529412 0.176471 0.0
+0.588235 0.176471 0.0
+0.647059 0.176471 0.0
+0.705882 0.176471 0.0
+0.764706 0.176471 0.0
+0.823529 0.176471 0.0
+0.411765 0.235294 0.0
+0.470588 0.235294 0.0
+0.529412 0.235294 0.0
+0.588235 0.235294 0.0
+0.647059 0.235294 0.0
+0.705882 0.235294 0.0
+0.764706 0.235294 0.0
+0.823529 0.235294 0.0
+0.411765 0.294118 0.0
+0.470588 0.294118 0.0
+0.529412 0.294118 0.0
+0.588235 0.294118 0.0
+0.647059 0.294118 0.0
+0.705882 0.294118 0.0
+0.764706 0.294118 0.0
+0.823529 0.294118 0.0
+0.411765 0.352941 0.0
+0.470588 0.352941 0.0
+0.529412 0.352941 0.0
+0.588235 0.352941 0.0
+0.647059 0.352941 0.0
+0.705882 0.352941 0.0
+0.764706 0.352941 0.0
+0.823529 0.352941 0.0
+0.882353 0.058824 0.0
+0.882353 0.117647 0.0
+0.882353 0.176471 0.0
+0.882353 0.235294 0.0
+0.882353 0.294118 0.0
+0.882353 0.352941 0.0
+0.058824 0.411765 0.0
+0.117647 0.411765 0.0
+0.176471 0.411765 0.0
+0.235294 0.411765 0.0
+0.294118 0.411765 0.0
+0.352941 0.411765 0.0
+0.058824 0.470588 0.0
+0.117647 0.470588 0.0
+0.176471 0.470588 0.0
+0.235294 0.470588 0.0
+0.294118 0.470588 0.0
+0.352941 0.470588 0.0
+0.058824 0.529412 0.0
+0.117647 0.529412 0.0
+0.176471 0.529412 0.0
+0.235294 0.529412 0.0
+0.294118 0.529412 0.0
+0.352941 0.529412 0.0
+0.058824 0.588235 0.0
+0.117647 0.588235 0.0
+0.176471 0.588235 0.0
+0.235294 0.588235 0.0
+0.294118 0.588235 0.0
+0.352941 0.588235 0.0
+0.058824 0.647059 0.0
+0.117647 0.647059 0.0
+0.176471 0.647059 0.0
+0.235294 0.647059 0.0
+0.294118 0.647059 0.0
+0.352941 0.647059 0.0
+0.058824 0.705882 0.0
+0.117647 0.705882 0.0
+0.176471 0.705882 0.0
+0.235294 0.705882 0.0
+0.294118 0.705882 0.0
+0.352941 0.705882 0.0
+0.058824 0.764706 0.0
+0.117647 0.764706 0.0
+0.176471 0.764706 0.0
+0.235294 0.764706 0.0
+0.294118 0.764706 0.0
+0.352941 0.764706 0.0
+0.058824 0.823529 0.0
+0.117647 0.823529 0.0
+0.176471 0.823529 0.0
+0.235294 0.823529 0.0
+0.294118 0.823529 0.0
+0.352941 0.823529 0.0
+0.411765 0.411765 0.0
+0.470588 0.411765 0.0
+0.529412 0.411765 0.0
+0.588235 0.411765 0.0
+0.647059 0.411765 0.0
+0.705882 0.411765 0.0
+0.764706 0.411765 0.0
+0.823529 0.411765 0.0
+0.411765 0.470588 0.0
+0.470588 0.470588 0.0
+0.529412 0.470588 0.0
+0.588235 0.470588 0.0
+0.647059 0.470588 0.0
+0.705882 0.470588 0.0
+0.764706 0.470588 0.0
+0.823529 0.470588 0.0
+0.411765 0.529412 0.0
+0.470588 0.529412 0.0
+0.529412 0.529412 0.0
+0.588235 0.529412 0.0
+0.647059 0.529412 0.0
+0.705882 0.529412 0.0
+0.764706 0.529412 0.0
+0.823529 0.529412 0.0
+0.411765 0.588235 0.0
+0.470588 0.588235 0.0
+0.529412 0.588235 0.0
+0.588235 0.588235 0.0
+0.647059 0.588235 0.0
+0.705882 0.588235 0.0
+0.764706 0.588235 0.0
+0.823529 0.588235 0.0
+0.411765 0.647059 0.0
+0.470588 0.647059 0.0
+0.529412 0.647059 0.0
+0.588235 0.647059 0.0
+0.647059 0.647059 0.0
+0.705882 0.647059 0.0
+0.764706 0.647059 0.0
+0.823529 0.647059 0.0
+0.411765 0.705882 0.0
+0.470588 0.705882 0.0
+0.529412 0.705882 0.0
+0.588235 0.705882 0.0
+0.647059 0.705882 0.0
+0.705882 0.705882 0.0
+0.764706 0.705882 0.0
+0.823529 0.705882 0.0
+0.411765 0.764706 0.0
+0.470588 0.764706 0.0
+0.529412 0.764706 0.0
+0.588235 0.764706 0.0
+0.647059 0.764706 0.0
+0.705882 0.764706 0.0
+0.764706 0.764706 0.0
+0.823529 0.764706 0.0
+0.411765 0.823529 0.0
+0.470588 0.823529 0.0
+0.529412 0.823529 0.0
+0.588235 0.823529 0.0
+0.647059 0.823529 0.0
+0.705882 0.823529 0.0
+0.764706 0.823529 0.0
+0.823529 0.823529 0.0
+0.882353 0.411765 0.0
+0.882353 0.470588 0.0
+0.882353 0.529412 0.0
+0.882353 0.588235 0.0
+0.882353 0.647059 0.0
+0.882353 0.705882 0.0
+0.882353 0.764706 0.0
+0.882353 0.823529 0.0
+0.058824 0.882353 0.0
+0.117647 0.882353 0.0
+0.176471 0.882353 0.0
+0.235294 0.882353 0.0
+0.294118 0.882353 0.0
+0.352941 0.882353 0.0
+0.411765 0.882353 0.0
+0.470588 0.882353 0.0
+0.529412 0.882353 0.0
+0.588235 0.882353 0.0
+0.647059 0.882353 0.0
+0.705882 0.882353 0.0
+0.764706 0.882353 0.0
+0.823529 0.882353 0.0
+0.882353 0.882353 0.0
+VERTICES 225 450
+1 0
+1 1
+1 2
+1 3
+1 4
+1 5
+1 6
+1 7
+1 8
+1 9
+1 10
+1 11
+1 12
+1 13
+1 14
+1 15
+1 16
+1 17
+1 18
+1 19
+1 20
+1 21
+1 22
+1 23
+1 24
+1 25
+1 26
+1 27
+1 28
+1 29
+1 30
+1 31
+1 32
+1 33
+1 34
+1 35
+1 36
+1 37
+1 38
+1 39
+1 40
+1 41
+1 42
+1 43
+1 44
+1 45
+1 46
+1 47
+1 48
+1 49
+1 50
+1 51
+1 52
+1 53
+1 54
+1 55
+1 56
+1 57
+1 58
+1 59
+1 60
+1 61
+1 62
+1 63
+1 64
+1 65
+1 66
+1 67
+1 68
+1 69
+1 70
+1 71
+1 72
+1 73
+1 74
+1 75
+1 76
+1 77
+1 78
+1 79
+1 80
+1 81
+1 82
+1 83
+1 84
+1 85
+1 86
+1 87
+1 88
+1 89
+1 90
+1 91
+1 92
+1 93
+1 94
+1 95
+1 96
+1 97
+1 98
+1 99
+1 100
+1 101
+1 102
+1 103
+1 104
+1 105
+1 106
+1 107
+1 108
+1 109
+1 110
+1 111
+1 112
+1 113
+1 114
+1 115
+1 116
+1 117
+1 118
+1 119
+1 120
+1 121
+1 122
+1 123
+1 124
+1 125
+1 126
+1 127
+1 128
+1 129
+1 130
+1 131
+1 132
+1 133
+1 134
+1 135
+1 136
+1 137
+1 138
+1 139
+1 140
+1 141
+1 142
+1 143
+1 144
+1 145
+1 146
+1 147
+1 148
+1 149
+1 150
+1 151
+1 152
+1 153
+1 154
+1 155
+1 156
+1 157
+1 158
+1 159
+1 160
+1 161
+1 162
+1 163
+1 164
+1 165
+1 166
+1 167
+1 168
+1 169
+1 170
+1 171
+1 172
+1 173
+1 174
+1 175
+1 176
+1 177
+1 178
+1 179
+1 180
+1 181
+1 182
+1 183
+1 184
+1 185
+1 186
+1 187
+1 188
+1 189
+1 190
+1 191
+1 192
+1 193
+1 194
+1 195
+1 196
+1 197
+1 198
+1 199
+1 200
+1 201
+1 202
+1 203
+1 204
+1 205
+1 206
+1 207
+1 208
+1 209
+1 210
+1 211
+1 212
+1 213
+1 214
+1 215
+1 216
+1 217
+1 218
+1 219
+1 220
+1 221
+1 222
+1 223
+1 224
+POINT_DATA 225
+SCALARS attr0 float
+LOOKUP_TABLE default
+7.000000
+8.000000
+9.000000
+10.000000
+11.000000
+12.000000
+8.000000
+9.000000
+10.000000
+11.000000
+12.000000
+13.000000
+9.000000
+10.000000
+11.000000
+12.000000
+13.000000
+14.000000
+10.000000
+11.000000
+12.000000
+13.000000
+14.000000
+15.000000
+11.000000
+12.000000
+13.000000
+14.000000
+15.000000
+16.000000
+12.000000
+13.000000
+14.000000
+15.000000
+16.000000
+17.000000
+13.000000
+14.000000
+15.000000
+16.000000
+17.000000
+18.000000
+19.000000
+20.000000
+14.000000
+15.000000
+16.000000
+17.000000
+18.000000
+19.000000
+20.000000
+21.000000
+15.000000
+16.000000
+17.000000
+18.000000
+19.000000
+20.000000
+21.000000
+22.000000
+16.000000
+17.000000
+18.000000
+19.000000
+20.000000
+21.000000
+22.000000
+23.000000
+17.000000
+18.000000
+19.000000
+20.000000
+21.000000
+22.000000
+23.000000
+24.000000
+18.000000
+19.000000
+20.000000
+21.000000
+22.000000
+23.000000
+24.000000
+25.000000
+21.000000
+22.000000
+23.000000
+24.000000
+25.000000
+26.000000
+13.000000
+14.000000
+15.000000
+16.000000
+17.000000
+18.000000
+14.000000
+15.000000
+16.000000
+17.000000
+18.000000
+19.000000
+15.000000
+16.000000
+17.000000
+18.000000
+19.000000
+20.000000
+16.000000
+17.000000
+18.000000
+19.000000
+20.000000
+21.000000
+17.000000
+18.000000
+19.000000
+20.000000
+21.000000
+22.000000
+18.000000
+19.000000
+20.000000
+21.000000
+22.000000
+23.000000
+19.000000
+20.000000
+21.000000
+22.000000
+23.000000
+24.000000
+20.000000
+21.000000
+22.000000
+23.000000
+24.000000
+25.000000
+19.000000
+20.000000
+21.000000
+22.000000
+23.000000
+24.000000
+25.000000
+26.000000
+20.000000
+21.000000
+22.000000
+23.000000
+24.000000
+25.000000
+26.000000
+27.000000
+21.000000
+22.000000
+23.000000
+24.000000
+25.000000
+26.000000
+27.000000
+28.000000
+22.000000
+23.000000
+24.000000
+25.000000
+26.000000
+27.000000
+28.000000
+29.000000
+23.000000
+24.000000
+25.000000
+26.000000
+27.000000
+28.000000
+29.000000
+30.000000
+24.000000
+25.000000
+26.000000
+27.000000
+28.000000
+29.000000
+30.000000
+31.000000
+25.000000
+26.000000
+27.000000
+28.000000
+29.000000
+30.000000
+31.000000
+32.000000
+26.000000
+27.000000
+28.000000
+29.000000
+30.000000
+31.000000
+32.000000
+33.000000
+27.000000
+28.000000
+29.000000
+30.000000
+31.000000
+32.000000
+33.000000
+34.000000
+21.000000
+22.000000
+23.000000
+24.000000
+25.000000
+26.000000
+27.000000
+28.000000
+29.000000
+30.000000
+31.000000
+32.000000
+33.000000
+34.000000
+35.000000
+SCALARS domain float
+LOOKUP_TABLE default
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
diff --git a/test_data/sgrid_gpu_output_2_0.vtk b/test_data/sgrid_gpu_output_2_0.vtk
new file mode 100644
index 000000000..2081c271d
--- /dev/null
+++ b/test_data/sgrid_gpu_output_2_0.vtk
@@ -0,0 +1,499 @@
+# vtk DataFile Version 3.0
+grids
+ASCII
+DATASET POLYDATA
+POINTS 122 float
+0.058824 0.058824 0.0
+0.117647 0.058824 0.0
+0.176471 0.058824 0.0
+0.235294 0.058824 0.0
+0.294118 0.058824 0.0
+0.352941 0.058824 0.0
+0.058824 0.117647 0.0
+0.117647 0.117647 0.0
+0.176471 0.117647 0.0
+0.235294 0.117647 0.0
+0.294118 0.117647 0.0
+0.352941 0.117647 0.0
+0.058824 0.176471 0.0
+0.117647 0.176471 0.0
+0.176471 0.176471 0.0
+0.235294 0.176471 0.0
+0.294118 0.176471 0.0
+0.352941 0.176471 0.0
+0.058824 0.235294 0.0
+0.117647 0.235294 0.0
+0.176471 0.235294 0.0
+0.235294 0.235294 0.0
+0.294118 0.235294 0.0
+0.352941 0.235294 0.0
+0.058824 0.294118 0.0
+0.117647 0.294118 0.0
+0.176471 0.294118 0.0
+0.235294 0.294118 0.0
+0.294118 0.294118 0.0
+0.352941 0.294118 0.0
+0.058824 0.352941 0.0
+0.117647 0.352941 0.0
+0.176471 0.352941 0.0
+0.235294 0.352941 0.0
+0.294118 0.352941 0.0
+0.352941 0.352941 0.0
+0.411765 0.058824 0.0
+0.470588 0.058824 0.0
+0.529412 0.058824 0.0
+0.588235 0.058824 0.0
+0.647059 0.058824 0.0
+0.705882 0.058824 0.0
+0.411765 0.117647 0.0
+0.470588 0.117647 0.0
+0.529412 0.117647 0.0
+0.588235 0.117647 0.0
+0.647059 0.117647 0.0
+0.705882 0.117647 0.0
+0.411765 0.176471 0.0
+0.470588 0.176471 0.0
+0.529412 0.176471 0.0
+0.588235 0.176471 0.0
+0.647059 0.176471 0.0
+0.705882 0.176471 0.0
+0.411765 0.235294 0.0
+0.470588 0.235294 0.0
+0.529412 0.235294 0.0
+0.588235 0.235294 0.0
+0.647059 0.235294 0.0
+0.705882 0.235294 0.0
+0.411765 0.294118 0.0
+0.470588 0.294118 0.0
+0.529412 0.294118 0.0
+0.588235 0.294118 0.0
+0.647059 0.294118 0.0
+0.705882 0.294118 0.0
+0.411765 0.352941 0.0
+0.470588 0.352941 0.0
+0.529412 0.352941 0.0
+0.588235 0.352941 0.0
+0.647059 0.352941 0.0
+0.705882 0.352941 0.0
+0.058824 0.411765 0.0
+0.117647 0.411765 0.0
+0.176471 0.411765 0.0
+0.235294 0.411765 0.0
+0.294118 0.411765 0.0
+0.352941 0.411765 0.0
+0.058824 0.470588 0.0
+0.117647 0.470588 0.0
+0.176471 0.470588 0.0
+0.235294 0.470588 0.0
+0.294118 0.470588 0.0
+0.352941 0.470588 0.0
+0.411765 0.411765 0.0
+0.470588 0.411765 0.0
+0.529412 0.411765 0.0
+0.588235 0.411765 0.0
+0.647059 0.411765 0.0
+0.705882 0.411765 0.0
+0.411765 0.470588 0.0
+0.470588 0.470588 0.0
+0.529412 0.470588 0.0
+0.588235 0.470588 0.0
+0.647059 0.470588 0.0
+0.705882 0.470588 0.0
+0.764706 0.058824 0.0
+0.823529 0.058824 0.0
+0.882353 0.058824 0.0
+0.764706 0.117647 0.0
+0.823529 0.117647 0.0
+0.882353 0.117647 0.0
+0.764706 0.176471 0.0
+0.823529 0.176471 0.0
+0.882353 0.176471 0.0
+0.764706 0.235294 0.0
+0.823529 0.235294 0.0
+0.882353 0.235294 0.0
+0.764706 0.294118 0.0
+0.823529 0.294118 0.0
+0.882353 0.294118 0.0
+0.764706 0.352941 0.0
+0.823529 0.352941 0.0
+0.882353 0.352941 0.0
+0.764706 0.411765 0.0
+0.823529 0.411765 0.0
+0.882353 0.411765 0.0
+0.058824 0.529412 0.0
+0.117647 0.529412 0.0
+0.176471 0.529412 0.0
+0.235294 0.529412 0.0
+0.294118 0.529412 0.0
+VERTICES 122 244
+1 0
+1 1
+1 2
+1 3
+1 4
+1 5
+1 6
+1 7
+1 8
+1 9
+1 10
+1 11
+1 12
+1 13
+1 14
+1 15
+1 16
+1 17
+1 18
+1 19
+1 20
+1 21
+1 22
+1 23
+1 24
+1 25
+1 26
+1 27
+1 28
+1 29
+1 30
+1 31
+1 32
+1 33
+1 34
+1 35
+1 36
+1 37
+1 38
+1 39
+1 40
+1 41
+1 42
+1 43
+1 44
+1 45
+1 46
+1 47
+1 48
+1 49
+1 50
+1 51
+1 52
+1 53
+1 54
+1 55
+1 56
+1 57
+1 58
+1 59
+1 60
+1 61
+1 62
+1 63
+1 64
+1 65
+1 66
+1 67
+1 68
+1 69
+1 70
+1 71
+1 72
+1 73
+1 74
+1 75
+1 76
+1 77
+1 78
+1 79
+1 80
+1 81
+1 82
+1 83
+1 84
+1 85
+1 86
+1 87
+1 88
+1 89
+1 90
+1 91
+1 92
+1 93
+1 94
+1 95
+1 96
+1 97
+1 98
+1 99
+1 100
+1 101
+1 102
+1 103
+1 104
+1 105
+1 106
+1 107
+1 108
+1 109
+1 110
+1 111
+1 112
+1 113
+1 114
+1 115
+1 116
+1 117
+1 118
+1 119
+1 120
+1 121
+POINT_DATA 122
+SCALARS attr0 float
+LOOKUP_TABLE default
+7.000000
+8.000000
+9.000000
+10.000000
+11.000000
+12.000000
+8.000000
+9.000000
+10.000000
+11.000000
+12.000000
+13.000000
+9.000000
+10.000000
+11.000000
+12.000000
+13.000000
+14.000000
+10.000000
+11.000000
+12.000000
+13.000000
+14.000000
+15.000000
+11.000000
+12.000000
+13.000000
+14.000000
+15.000000
+16.000000
+12.000000
+13.000000
+14.000000
+15.000000
+16.000000
+17.000000
+13.000000
+14.000000
+15.000000
+16.000000
+17.000000
+18.000000
+14.000000
+15.000000
+16.000000
+17.000000
+18.000000
+19.000000
+15.000000
+16.000000
+17.000000
+18.000000
+19.000000
+20.000000
+16.000000
+17.000000
+18.000000
+19.000000
+20.000000
+21.000000
+17.000000
+18.000000
+19.000000
+20.000000
+21.000000
+22.000000
+18.000000
+19.000000
+20.000000
+21.000000
+22.000000
+23.000000
+13.000000
+14.000000
+15.000000
+16.000000
+17.000000
+18.000000
+14.000000
+15.000000
+16.000000
+17.000000
+18.000000
+19.000000
+19.000000
+20.000000
+21.000000
+22.000000
+23.000000
+24.000000
+20.000000
+21.000000
+22.000000
+23.000000
+24.000000
+25.000000
+19.000000
+20.000000
+21.000000
+20.000000
+21.000000
+22.000000
+21.000000
+22.000000
+23.000000
+22.000000
+23.000000
+24.000000
+23.000000
+24.000000
+25.000000
+24.000000
+25.000000
+26.000000
+25.000000
+26.000000
+27.000000
+15.000000
+16.000000
+17.000000
+18.000000
+19.000000
+SCALARS domain float
+LOOKUP_TABLE default
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
diff --git a/test_data/sgrid_gpu_output_2_1.vtk b/test_data/sgrid_gpu_output_2_1.vtk
new file mode 100644
index 000000000..379943d7b
--- /dev/null
+++ b/test_data/sgrid_gpu_output_2_1.vtk
@@ -0,0 +1,423 @@
+# vtk DataFile Version 3.0
+grids
+ASCII
+DATASET POLYDATA
+POINTS 103 float
+0.764706 0.470588 0.0
+0.823529 0.470588 0.0
+0.882353 0.470588 0.0
+0.764706 0.529412 0.0
+0.823529 0.529412 0.0
+0.882353 0.529412 0.0
+0.764706 0.588235 0.0
+0.823529 0.588235 0.0
+0.882353 0.588235 0.0
+0.764706 0.647059 0.0
+0.823529 0.647059 0.0
+0.882353 0.647059 0.0
+0.764706 0.705882 0.0
+0.823529 0.705882 0.0
+0.882353 0.705882 0.0
+0.764706 0.764706 0.0
+0.823529 0.764706 0.0
+0.882353 0.764706 0.0
+0.764706 0.823529 0.0
+0.823529 0.823529 0.0
+0.882353 0.823529 0.0
+0.764706 0.882353 0.0
+0.823529 0.882353 0.0
+0.882353 0.882353 0.0
+0.058824 0.588235 0.0
+0.117647 0.588235 0.0
+0.176471 0.588235 0.0
+0.235294 0.588235 0.0
+0.294118 0.588235 0.0
+0.352941 0.588235 0.0
+0.058824 0.647059 0.0
+0.117647 0.647059 0.0
+0.176471 0.647059 0.0
+0.235294 0.647059 0.0
+0.294118 0.647059 0.0
+0.352941 0.647059 0.0
+0.058824 0.705882 0.0
+0.117647 0.705882 0.0
+0.176471 0.705882 0.0
+0.235294 0.705882 0.0
+0.294118 0.705882 0.0
+0.352941 0.705882 0.0
+0.058824 0.764706 0.0
+0.117647 0.764706 0.0
+0.176471 0.764706 0.0
+0.235294 0.764706 0.0
+0.294118 0.764706 0.0
+0.352941 0.764706 0.0
+0.058824 0.823529 0.0
+0.117647 0.823529 0.0
+0.176471 0.823529 0.0
+0.235294 0.823529 0.0
+0.294118 0.823529 0.0
+0.352941 0.823529 0.0
+0.058824 0.882353 0.0
+0.117647 0.882353 0.0
+0.176471 0.882353 0.0
+0.235294 0.882353 0.0
+0.294118 0.882353 0.0
+0.352941 0.882353 0.0
+0.411765 0.588235 0.0
+0.470588 0.588235 0.0
+0.529412 0.588235 0.0
+0.588235 0.588235 0.0
+0.647059 0.588235 0.0
+0.705882 0.588235 0.0
+0.411765 0.647059 0.0
+0.470588 0.647059 0.0
+0.529412 0.647059 0.0
+0.588235 0.647059 0.0
+0.647059 0.647059 0.0
+0.705882 0.647059 0.0
+0.411765 0.705882 0.0
+0.470588 0.705882 0.0
+0.529412 0.705882 0.0
+0.588235 0.705882 0.0
+0.647059 0.705882 0.0
+0.705882 0.705882 0.0
+0.411765 0.764706 0.0
+0.470588 0.764706 0.0
+0.529412 0.764706 0.0
+0.588235 0.764706 0.0
+0.647059 0.764706 0.0
+0.705882 0.764706 0.0
+0.411765 0.823529 0.0
+0.470588 0.823529 0.0
+0.529412 0.823529 0.0
+0.588235 0.823529 0.0
+0.647059 0.823529 0.0
+0.705882 0.823529 0.0
+0.411765 0.882353 0.0
+0.470588 0.882353 0.0
+0.529412 0.882353 0.0
+0.588235 0.882353 0.0
+0.647059 0.882353 0.0
+0.705882 0.882353 0.0
+0.352941 0.529412 0.0
+0.411765 0.529412 0.0
+0.470588 0.529412 0.0
+0.529412 0.529412 0.0
+0.588235 0.529412 0.0
+0.647059 0.529412 0.0
+0.705882 0.529412 0.0
+VERTICES 103 206
+1 0
+1 1
+1 2
+1 3
+1 4
+1 5
+1 6
+1 7
+1 8
+1 9
+1 10
+1 11
+1 12
+1 13
+1 14
+1 15
+1 16
+1 17
+1 18
+1 19
+1 20
+1 21
+1 22
+1 23
+1 24
+1 25
+1 26
+1 27
+1 28
+1 29
+1 30
+1 31
+1 32
+1 33
+1 34
+1 35
+1 36
+1 37
+1 38
+1 39
+1 40
+1 41
+1 42
+1 43
+1 44
+1 45
+1 46
+1 47
+1 48
+1 49
+1 50
+1 51
+1 52
+1 53
+1 54
+1 55
+1 56
+1 57
+1 58
+1 59
+1 60
+1 61
+1 62
+1 63
+1 64
+1 65
+1 66
+1 67
+1 68
+1 69
+1 70
+1 71
+1 72
+1 73
+1 74
+1 75
+1 76
+1 77
+1 78
+1 79
+1 80
+1 81
+1 82
+1 83
+1 84
+1 85
+1 86
+1 87
+1 88
+1 89
+1 90
+1 91
+1 92
+1 93
+1 94
+1 95
+1 96
+1 97
+1 98
+1 99
+1 100
+1 101
+1 102
+POINT_DATA 103
+SCALARS attr0 float
+LOOKUP_TABLE default
+26.000000
+27.000000
+28.000000
+27.000000
+28.000000
+29.000000
+28.000000
+29.000000
+30.000000
+29.000000
+30.000000
+31.000000
+30.000000
+31.000000
+32.000000
+31.000000
+32.000000
+33.000000
+32.000000
+33.000000
+34.000000
+33.000000
+34.000000
+35.000000
+16.000000
+17.000000
+18.000000
+19.000000
+20.000000
+21.000000
+17.000000
+18.000000
+19.000000
+20.000000
+21.000000
+22.000000
+18.000000
+19.000000
+20.000000
+21.000000
+22.000000
+23.000000
+19.000000
+20.000000
+21.000000
+22.000000
+23.000000
+24.000000
+20.000000
+21.000000
+22.000000
+23.000000
+24.000000
+25.000000
+21.000000
+22.000000
+23.000000
+24.000000
+25.000000
+26.000000
+22.000000
+23.000000
+24.000000
+25.000000
+26.000000
+27.000000
+23.000000
+24.000000
+25.000000
+26.000000
+27.000000
+28.000000
+24.000000
+25.000000
+26.000000
+27.000000
+28.000000
+29.000000
+25.000000
+26.000000
+27.000000
+28.000000
+29.000000
+30.000000
+26.000000
+27.000000
+28.000000
+29.000000
+30.000000
+31.000000
+27.000000
+28.000000
+29.000000
+30.000000
+31.000000
+32.000000
+20.000000
+21.000000
+22.000000
+23.000000
+24.000000
+25.000000
+26.000000
+SCALARS domain float
+LOOKUP_TABLE default
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
+1.0
-- 
GitLab