From d0fb39194c4ad237e2b5ac90686031de00f5d572 Mon Sep 17 00:00:00 2001
From: Pietro Incardona <incardon@mpi-cbg.de>
Date: Tue, 16 Jun 2020 16:00:58 +0200
Subject: [PATCH] Candidate release 3.0.0

---
 .../1_gray_scott_3d_sparse_gpu/main.cu        | 276 ++++++++++++++++++
 openfpm_data                                  |   2 +-
 openfpm_pdata.doc                             |   2 +-
 .../tests/sgrid_dist_id_gpu_unit_tests.cu     |  89 +++++-
 4 files changed, 362 insertions(+), 7 deletions(-)
 create mode 100644 example/SparseGrid/1_gray_scott_3d_sparse_gpu/main.cu

diff --git a/example/SparseGrid/1_gray_scott_3d_sparse_gpu/main.cu b/example/SparseGrid/1_gray_scott_3d_sparse_gpu/main.cu
new file mode 100644
index 000000000..e9dd9ecd4
--- /dev/null
+++ b/example/SparseGrid/1_gray_scott_3d_sparse_gpu/main.cu
@@ -0,0 +1,276 @@
+#include "util/cuda/cuda_launch.hpp"
+#include "Grid/grid_dist_id.hpp"
+#include "data_type/aggregate.hpp"
+#include "timer.hpp"
+
+/*!
+ *
+ * \page Grid_3_gs_3D_sparse_gpu Gray Scott in 3D using sparse grids on GPU
+ *
+ * [TOC]
+ *
+ * # Solving a gray scott-system in 3D using Sparse grids on gpu # {#e3_gs_gray_scott_gpu}
+ *
+ * This example show how to solve a Gray-Scott system in 3D using sparse grids on gpu
+ *
+ * In figure is the final solution of the problem
+ *
+ * \htmlonly
+ * <img src="http://ppmcore.mpi-cbg.de/web/images/examples/gray_scott_3d/gs_alpha.png"/>
+ * \endhtmlonly
+ *
+ * More or less this example is the adaptation of the dense example in 3D
+ *
+ * \see \ref Grid_3_gs_3D
+ *
+ * # Initializetion
+ *
+ * On gpu we can add points using the function addPoints this function take 2 lamda functions the first take 3 arguments (in 3D)
+ * i,j,k these are the global coordinates for a point. We can return either true either false. In case of true the point is
+ * created in case of false the point is not inserted. The second lamda is instead used to initialize the point inserted.
+ * The arguments of the second lambda are the data argument we use to initialize the point and the global coordinates i,j,k
+ *
+ * After we add the points we have to flush the added points. This us achieved using the function flush the template parameters
+ * indicate how we have to act on the points. Consider infact we are adding points already exist ... do we have to add it using the max
+ * or the min. **FLUSH_ON_DEVICE** say instead that the operation is performed using the GPU
+ *
+ * \snippet SparseGrid/1_gray_scott_3d_sparse_gpu/main.cu create points
+ *
+ * The function can also called with a specified range
+ *
+ * \snippet SparseGrid/1_gray_scott_3d_sparse_gpu/main.cu create points sub
+ *
+ * # Update
+ *
+ * to calculate the right-hand-side we use the function **conv2** this function can be used to do a convolution that involve
+ * two properties
+ *
+ * The function accept a lambda function where the first 2 arguments are the output of the same type of the two property choosen.
+ *
+ * The arguments 3 and 4 contain the properties of two selected properties. while i,j,k are the coordinates we have to calculate the
+ * convolution. The call **conv2** also accept template parameters the first two indicate the source porperties, the other two are the destination properties. While the
+ * last is the extension of the stencil. In this case we use 1.
+ *
+ * The lambda function is defined as
+ *
+ * \snippet SparseGrid/1_gray_scott_3d_sparse_gpu/main.cu lambda
+ *
+ * and used in the body loop
+ *
+ * \snippet SparseGrid/1_gray_scott_3d_sparse_gpu/main.cu body
+ *
+ */
+
+constexpr int U = 0;
+constexpr int V = 1;
+
+constexpr int U_next = 2;
+constexpr int V_next = 3;
+
+constexpr int x = 0;
+constexpr int y = 1;
+constexpr int z = 2;
+
+typedef sgrid_dist_id_gpu<3,float,aggregate<float,float,float,float> > SparseGridType;
+
+void init(SparseGridType & grid, Box<3,float> & domain)
+{
+	//! \cond [create points] \endcond
+
+	typedef typename GetAddBlockType<SparseGridType>::type InsertBlockT;
+
+	grid.addPoints([] __device__ (int i, int j, int k)
+			        {
+						return true;
+			        },
+			        [] __device__ (InsertBlockT & data, int i, int j, int k)
+			        {
+			        	data.template get<U>() = 1.0;
+			        	data.template get<V>() = 0.0;
+			        }
+			        );
+
+
+	grid.template flush<smax_<U>,smax_<V>>(flush_type::FLUSH_ON_DEVICE);
+
+	//! \cond [create points] \endcond
+
+	long int x_start = grid.size(0)*1.55f/domain.getHigh(0);
+	long int y_start = grid.size(1)*1.55f/domain.getHigh(1);
+	long int z_start = grid.size(1)*1.55f/domain.getHigh(2);
+
+	long int x_stop = grid.size(0)*1.85f/domain.getHigh(0);
+	long int y_stop = grid.size(1)*1.85f/domain.getHigh(1);
+	long int z_stop = grid.size(1)*1.85f/domain.getHigh(2);
+
+	//! \cond [create points sub] \endcond
+
+	grid_key_dx<3> start({x_start,y_start,z_start});
+	grid_key_dx<3> stop ({x_stop,y_stop,z_stop});
+
+        grid.addPoints(start,stop,[] __device__ (int i, int j, int k)
+                                {
+                                                return true;
+                                },
+                                [] __device__ (InsertBlockT & data, int i, int j, int k)
+                                {
+                                        data.template get<U>() = 0.5;
+                                        data.template get<V>() = 0.24;
+                                }
+                                );
+
+	grid.template flush<smax_<U>,smax_<V>>(flush_type::FLUSH_ON_DEVICE);
+
+	//! \cond [create points sub] \endcond
+}
+
+
+int main(int argc, char* argv[])
+{
+	openfpm_init(&argc,&argv);
+
+	// domain
+	Box<3,float> domain({0.0,0.0,0.0},{2.5,2.5,2.5});
+	
+	// grid size
+        size_t sz[3] = {256,256,256};
+
+	// Define periodicity of the grid
+	periodicity<3> bc = {PERIODIC,PERIODIC,PERIODIC};
+	
+	// Ghost in grid unit
+	Ghost<3,long int> g(1);
+	
+	// deltaT
+	float deltaT = 0.25;
+
+	// Diffusion constant for specie U
+	float du = 2*1e-5;
+
+	// Diffusion constant for specie V
+	float dv = 1*1e-5;
+
+	// Number of timesteps
+        size_t timeSteps = 15000;
+
+	// K and F (Physical constant in the equation)
+    float K = 0.053;
+    float F = 0.014;
+
+	sgrid_dist_id_gpu<3, float, aggregate<float,float,float,float>> grid(sz,domain,g,bc);
+
+	// spacing of the grid on x and y
+	float spacing[3] = {grid.spacing(0),grid.spacing(1),grid.spacing(2)};
+
+	init(grid,domain);
+
+	// sync the ghost
+	grid.template ghost_get<U,V>(RUN_ON_DEVICE);
+
+	// because we assume that spacing[x] == spacing[y] we use formula 2
+	// and we calculate the prefactor of Eq 2
+	float uFactor = deltaT * du/(spacing[x]*spacing[x]);
+	float vFactor = deltaT * dv/(spacing[x]*spacing[x]);
+
+	timer tot_sim;
+	tot_sim.start();
+
+	for (size_t i = 0; i < timeSteps ; ++i)
+	{
+		std::cout << "STEP: " << i << std::endl;
+/*		if (i % 300 == 0)
+		{
+			std::cout << "STEP: " << i << std::endl;
+			grid.write_frame("out",i,VTK_WRITER);
+		}*/
+
+		//! \cond [stencil get and use] \endcond
+
+		typedef typename GetCpBlockType<decltype(grid),0,1>::type CpBlockType;
+
+		//! \cond [lambda] \endcond
+
+		auto func = [uFactor,vFactor,deltaT,F,K] __device__ (float & u_out, float & v_out,
+				                                   CpBlockType & u, CpBlockType & v,
+				                                   int i, int j, int k){
+
+				float uc = u(i,j,k);
+				float vc = v(i,j,k);
+
+				u_out = uc + uFactor *(u(i-1,j,k) + u(i+1,j,k) +
+                                                       u(i,j-1,k) + u(i,j+1,k) +
+                                                       u(i,j,k-1) + u(i,j,k+1) - 6.0*uc) - deltaT * uc*vc*vc
+                                                       - deltaT * F * (uc - 1.0);
+
+
+				v_out = vc + vFactor *(v(i-1,j,k) + v(i+1,j,k) +
+                                                       v(i,j+1,k) + v(i,j-1,k) +
+                                                       v(i,j,k-1) + v(i,j,k+1) - 6.0*vc) + deltaT * uc*vc*vc
+					               - deltaT * (F+K) * vc;
+				};
+
+		//! \cond [lambda] \endcond
+
+		//! \cond [body] \endcond
+
+		if (i % 2 == 0)
+		{
+			grid.conv2<U,V,U_next,V_next,1>({0,0,0},{(long int)sz[0]-1,(long int)sz[1]-1,(long int)sz[2]-1},func);
+
+			// After copy we synchronize again the ghost part U and V
+
+			grid.ghost_get<U_next,V_next>(RUN_ON_DEVICE | SKIP_LABELLING);
+		}
+		else
+		{
+			grid.conv2<U_next,V_next,U,V,1>({0,0,0},{(long int)sz[0]-1,(long int)sz[1]-1,(long int)sz[2]-1},func);
+
+			// After copy we synchronize again the ghost part U and V
+			grid.ghost_get<U,V>(RUN_ON_DEVICE | SKIP_LABELLING);
+		}
+
+		//! \cond [body] \endcond
+
+		// Every 500 time step we output the configuration for
+		// visualization
+//		if (i % 500 == 0)
+//		{
+//			grid.save("output_" + std::to_string(count));
+//			count++;
+//		}
+	}
+	
+	tot_sim.stop();
+	std::cout << "Total simulation: " << tot_sim.getwct() << std::endl;
+
+	grid.deviceToHost<U,V,U_next,V_next>();
+	grid.write("final");
+
+	//! \cond [time stepping] \endcond
+
+	/*!
+	 * \page Grid_3_gs_3D_sparse Gray Scott in 3D
+	 *
+	 * ## Finalize ##
+	 *
+	 * Deinitialize the library
+	 *
+	 * \snippet Grid/3_gray_scott/main.cpp finalize
+	 *
+	 */
+
+	//! \cond [finalize] \endcond
+
+	openfpm_finalize();
+
+	//! \cond [finalize] \endcond
+
+	/*!
+	 * \page Grid_3_gs_3D_sparse Gray Scott in 3D
+	 *
+	 * # Full code # {#code}
+	 *
+	 * \include Grid/3_gray_scott_3d/main.cpp
+	 *
+	 */
+}
diff --git a/openfpm_data b/openfpm_data
index ef3a3381d..b9f0bd4a0 160000
--- a/openfpm_data
+++ b/openfpm_data
@@ -1 +1 @@
-Subproject commit ef3a3381d938c4feedfedf0495605ddbee6cfcb6
+Subproject commit b9f0bd4a0b073481a9a1e971d84c55e566fd8d4c
diff --git a/openfpm_pdata.doc b/openfpm_pdata.doc
index 0c4e8ff29..2f1dd77b7 100644
--- a/openfpm_pdata.doc
+++ b/openfpm_pdata.doc
@@ -38,7 +38,7 @@ PROJECT_NAME           = "OpenFPM_pdata"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = 2.0.0
+PROJECT_NUMBER         = 3.0.0
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
diff --git a/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu b/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu
index 2133ad95f..5a221b6b1 100644
--- a/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu
+++ b/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu
@@ -677,11 +677,6 @@ BOOST_AUTO_TEST_CASE( sgrid_gpu_test_skip_labelling )
 
 	gdist.template ghost_get<0,1>(RUN_ON_DEVICE | SKIP_LABELLING);
 
-	//////////////////////////////////// DEBUG ///////////////////////////////
-	gdist.deviceToHost<0,1,2,3>();
-	gdist.write_debug("DEBUG");
-	//////////////////////////////////////////////////////////////////////////
-
 	gdist.template conv2<0,1,0,1,1>({0,0,0},{(int)sz[0]-1,(int)sz[1]-1,(int)sz[2]-1},[] __device__ (float & u_out, float & v_out, CpBlockType & u, CpBlockType & v,int i, int j, int k){
 		u_out = 2*u(i,j,k);
 		v_out = 2*v(i,j,k);
@@ -732,4 +727,88 @@ BOOST_AUTO_TEST_CASE( sgrid_gpu_test_skip_labelling )
 	BOOST_REQUIRE_EQUAL(match,true);
 }
 
+BOOST_AUTO_TEST_CASE( sgrid_gpu_test_conv_background )
+{
+	size_t sz[3] = {60,60,60};
+	periodicity<3> bc = {PERIODIC,PERIODIC,PERIODIC};
+
+	Ghost<3,long int> g(1);
+
+	Box<3,float> domain({0.0,0.0,0.0},{1.0,1.0,1.0});
+
+	sgrid_dist_id_gpu<3,float,aggregate<float,float,float,float>> gdist(sz,domain,g,bc);
+
+	gdist.template setBackgroundValue<0>(666);
+	gdist.template setBackgroundValue<1>(666);
+	gdist.template setBackgroundValue<2>(666);
+	gdist.template setBackgroundValue<3>(666);
+
+	/////// GPU insert + flush
+
+	Box<3,size_t> box({1,1,1},{sz[0]-1,sz[1]-1,sz[2]-1});
+
+	/////// GPU Run kernel
+
+	float c = 5.0;
+
+	typedef typename GetAddBlockType<decltype(gdist)>::type InsertBlockT;
+
+	gdist.addPoints(box.getKP1(),box.getKP2(),
+			        [] __device__ (int i, int j, int k)
+			        {
+						return (i == 30 && j == 30 && k == 30);
+			        },
+			        [c] __device__ (InsertBlockT & data, int i, int j, int k)
+			        {
+			        	data.template get<0>() = 0;
+			        	data.template get<1>() = 0;
+			        }
+			        );
+
+	gdist.template flush<smax_<0>,smax_<1>>(flush_type::FLUSH_ON_DEVICE);
+
+	gdist.template ghost_get<0,1>(RUN_ON_DEVICE);
+
+	// Now run the convolution
+
+	typedef typename GetCpBlockType<decltype(gdist),0,1>::type CpBlockType;
+
+	gdist.template conv2<0,1,2,3,1>({0,0,0},{(int)sz[0]-1,(int)sz[1]-1,(int)sz[2]-1},[] __device__ (float & u_out, float & v_out, CpBlockType & u, CpBlockType & v,int i, int j, int k){
+		u_out = u(i+1,j,k) + u(i,j+1,k) + u(i,j,k+1);
+		v_out = v(i+1,j,k) + v(i,j+1,k) + v(i,j,k+1);
+	});
+
+	gdist.deviceToHost<0,1,2,3>();
+
+	// Now we check that ghost is correct
+
+	auto it3 = gdist.getDomainIterator();
+
+	bool match = true;
+
+	int count = 0;
+
+	while (it3.isNext())
+	{
+		auto p = it3.get();
+
+		float sub1 = gdist.template get<2>(p);
+		float sub2 = gdist.template get<3>(p);
+
+		if (sub1 != 3*666.0 || sub2 != 3*666.0)
+		{
+			std::cout << sub1 << " " << sub2 << std::endl;
+			match = false;
+			break;
+		}
+
+		count++;
+
+		++it3;
+	}
+
+	BOOST_REQUIRE(count == 0 || count == 1);
+	BOOST_REQUIRE_EQUAL(match,true);
+}
+
 BOOST_AUTO_TEST_SUITE_END()
-- 
GitLab