From 4bacfc470407c34e03f62f8356410fcf535b3bea Mon Sep 17 00:00:00 2001
From: Incardona Pietro <incardon@mpi-cbg.de>
Date: Thu, 17 Mar 2022 18:20:48 +0100
Subject: [PATCH] Fixing cuda grid

---
 src/Grid/cuda/cuda_grid_gpu_tests.cu | 64 ++++++++++++++++++++++++++++
 src/Grid/cuda/map_grid_cuda_ker.cuh  | 26 +++++++++--
 2 files changed, 86 insertions(+), 4 deletions(-)

diff --git a/src/Grid/cuda/cuda_grid_gpu_tests.cu b/src/Grid/cuda/cuda_grid_gpu_tests.cu
index eaae6bfd..0f412174 100644
--- a/src/Grid/cuda/cuda_grid_gpu_tests.cu
+++ b/src/Grid/cuda/cuda_grid_gpu_tests.cu
@@ -160,6 +160,70 @@ BOOST_AUTO_TEST_CASE (gpu_computation)
 	#endif
 }
 
+BOOST_AUTO_TEST_CASE (gpu_computation_lambda)
+{
+	#ifdef CUDA_GPU
+
+	{
+	size_t sz[3] = {64,64,64};
+	grid_gpu<3, aggregate<float,float[2],float[2][2]> > c3(sz);
+
+	c3.setMemory();
+
+	// Assign
+
+	auto c3_k = c3.toKernel();
+
+	auto lamb = [c3_k] __device__ (dim3 & blockIdx, dim3 & threadIdx)
+	{
+		grid_key_dx<3,int> p({blockIdx.x * blockDim.x + threadIdx.x,
+			blockIdx.y * blockDim.y + threadIdx.y,
+			blockIdx.z * blockDim.z + threadIdx.z});
+
+		c3_k.template get<0>(p) = 5.0;
+
+		c3_k.template get<1>(p)[0] = 5.0;
+		c3_k.template get<1>(p)[1] = 5.0;
+	
+		c3_k.template get<2>(p)[0][0] = 5.0;
+		c3_k.template get<2>(p)[0][1] = 5.0;
+		c3_k.template get<2>(p)[1][0] = 5.0;
+		c3_k.template get<2>(p)[1][1] = 5.0;
+	};
+
+	auto ite = c3.getGPUIterator({0,0,0},{63,63,63});
+
+	CUDA_LAUNCH_LAMBDA(ite,lamb);
+
+	c3.deviceToHost<0,1,2>();
+
+	auto it = c3.getIterator();
+
+	bool good = true;
+	while(it.isNext())
+	{
+		auto key = it.get();
+
+		good &= c3.template get<0>(key) == 5.0;
+
+		good &= c3.template get<1>(key)[0] == 5.0;
+		good &= c3.template get<1>(key)[1] == 5.0;
+
+		good &= c3.template get<2>(key)[0][0] == 5.0;
+		good &= c3.template get<2>(key)[0][1] == 5.0;
+		good &= c3.template get<2>(key)[1][0] == 5.0;
+		good &= c3.template get<2>(key)[1][1] == 5.0;
+
+		++it;
+	}
+
+	BOOST_REQUIRE_EQUAL(good,true);
+
+	}
+
+	#endif
+}
+
 BOOST_AUTO_TEST_CASE (gpu_computation_stencil)
 {
 	#ifdef CUDA_GPU
diff --git a/src/Grid/cuda/map_grid_cuda_ker.cuh b/src/Grid/cuda/map_grid_cuda_ker.cuh
index aa04eb6d..ca942f57 100644
--- a/src/Grid/cuda/map_grid_cuda_ker.cuh
+++ b/src/Grid/cuda/map_grid_cuda_ker.cuh
@@ -158,7 +158,7 @@ class grid_gpu_ker
 	typedef typename layout_base<T_>::type layout;
 
 	//! layout data
-	layout data_;
+	mutable layout data_;
 
 
 
@@ -264,14 +264,32 @@ public:
 	 * \return the const reference of the element
 	 *
 	 */
-	template <unsigned int p, typename ids_type, typename r_type=decltype(layout_base<T_>::template get_c<p>(data_,g1,grid_key_dx<dim>()))>
-	__device__ __host__ inline const r_type get(const grid_key_dx<dim,ids_type> & v1) const
+	template <unsigned int p, typename ids_type, typename r_type=decltype(layout_base<T_>::template get<p>(data_,g1,grid_key_dx<dim>()))>
+	__device__ __host__ inline r_type get_debug(const grid_key_dx<dim,ids_type> & v1) const
 	{
 #ifdef SE_CLASS1
 		if (check_bound(v1) == false)
 		{fill_grid_error_array_overflow<dim,p>(this->template getPointer<p>(),v1);}
 #endif
-		return layout_base<T_>::template get_c<p>(data_,g1,v1);
+
+		return layout_base<T_>::template get<p>(data_,g1,v1);
+	}
+
+	/*! \brief Get the const reference of the selected element
+	 *
+	 * \param v1 grid_key that identify the element in the grid
+	 *
+	 * \return the const reference of the element
+	 *
+	 */
+	template <unsigned int p, typename ids_type, typename r_type=decltype(layout_base<T_>::template get<p>(data_,g1,grid_key_dx<dim>()))>
+	__device__ __host__ inline r_type get(const grid_key_dx<dim,ids_type> & v1) const
+	{
+#ifdef SE_CLASS1
+		if (check_bound(v1) == false)
+		{fill_grid_error_array_overflow<dim,p>(this->template getPointer<p>(),v1);}
+#endif
+		return layout_base<T_>::template get<p>(data_,g1,v1);
 	}
 
 	/*! \brief Get the reference of the selected element
-- 
GitLab