From 926a696f9e4017832a8838fa4e753f4dcbd96c78 Mon Sep 17 00:00:00 2001 From: Incardona Pietro <incardon@mpi-cbg.de> Date: Mon, 15 Mar 2021 11:50:24 +0100 Subject: [PATCH] Fixing documentation for Sparse grid --- .../1_gray_scott_3d_sparse/main.cpp | 23 +- .../1_gray_scott_3d_sparse_gpu/main.cu | 4 + .../Makefile | 0 .../config.cfg | 0 .../main.cu | 59 +- .../Makefile | 0 .../config.cfg | 0 .../main.cpp | 8 +- .../Makefile | 0 .../config.cfg | 0 .../main.cpp | 6 +- .../Makefile | 0 .../config.cfg | 0 .../main.cu | 0 .../Makefile | 39 ++ .../config.cfg | 0 .../3_gray_scott_3d_sparse_gpu_cs_opt/main.cu | 544 ++++++++++++++++++ .../Makefile | 26 + .../config.cfg | 2 + .../main.cpp | 426 ++++++++++++++ .../5_gray_scott_3d_surface_cs_opt/Makefile | 26 + .../5_gray_scott_3d_surface_cs_opt/config.cfg | 2 + .../5_gray_scott_3d_surface_cs_opt/main.cpp | 472 +++++++++++++++ .../Makefile | 0 .../config.cfg | 2 + .../main.cu | 0 .../Makefile | 39 ++ .../config.cfg | 2 + .../main.cu | 284 +++++++++ .../SparseGrid/8_filling_benchmark/Makefile | 26 + .../SparseGrid/8_filling_benchmark/config.cfg | 2 + .../SparseGrid/8_filling_benchmark/main.cpp | 226 ++++++++ .../8_filling_benchmark_gpu/Makefile | 39 ++ .../8_filling_benchmark_gpu/config.cfg | 2 + .../8_filling_benchmark_gpu/main.cu | 221 +++++++ example/Vector/1_gpu_first_step/Makefile | 7 +- example/Vector/1_gpu_first_step/main.cu | 21 +- example/Vector/7_SPH_dlb_gpu/Makefile | 7 +- example/Vector/7_SPH_dlb_gpu/main.cu | 77 ++- 39 files changed, 2498 insertions(+), 94 deletions(-) rename example/SparseGrid/{1_gray_scott_3d_sparse_gpu_opt => 2_gray_scott_3d_sparse_gpu_opt}/Makefile (100%) rename example/SparseGrid/{1_gray_scott_3d_sparse_gpu_cs => 2_gray_scott_3d_sparse_gpu_opt}/config.cfg (100%) rename example/SparseGrid/{1_gray_scott_3d_sparse_gpu_opt => 2_gray_scott_3d_sparse_gpu_opt}/main.cu (75%) rename example/SparseGrid/{1_gray_scott_3d_sparse_opt => 2_gray_scott_3d_sparse_opt}/Makefile (100%) rename example/SparseGrid/{1_gray_scott_3d_sparse_cs => 2_gray_scott_3d_sparse_opt}/config.cfg (100%) rename example/SparseGrid/{1_gray_scott_3d_sparse_opt => 2_gray_scott_3d_sparse_opt}/main.cpp (96%) rename example/SparseGrid/{1_gray_scott_3d_sparse_cs => 3_gray_scott_3d_sparse_cs}/Makefile (100%) rename example/SparseGrid/{1_gray_scott_3d_sparse_opt => 3_gray_scott_3d_sparse_cs}/config.cfg (100%) rename example/SparseGrid/{1_gray_scott_3d_sparse_cs => 3_gray_scott_3d_sparse_cs}/main.cpp (98%) rename example/SparseGrid/{1_gray_scott_3d_sparse_gpu_cs => 3_gray_scott_3d_sparse_gpu_cs}/Makefile (100%) rename example/SparseGrid/{1_gray_scott_3d_sparse_gpu_opt => 3_gray_scott_3d_sparse_gpu_cs}/config.cfg (100%) rename example/SparseGrid/{1_gray_scott_3d_sparse_gpu_cs => 3_gray_scott_3d_sparse_gpu_cs}/main.cu (100%) create mode 100644 example/SparseGrid/3_gray_scott_3d_sparse_gpu_cs_opt/Makefile rename example/SparseGrid/{1_gray_scott_3d_sparse_gpu_opt_weak_scal => 3_gray_scott_3d_sparse_gpu_cs_opt}/config.cfg (100%) create mode 100644 example/SparseGrid/3_gray_scott_3d_sparse_gpu_cs_opt/main.cu create mode 100644 example/SparseGrid/4_gray_scott_3d_sparse_surface_cs/Makefile create mode 100644 example/SparseGrid/4_gray_scott_3d_sparse_surface_cs/config.cfg create mode 100644 example/SparseGrid/4_gray_scott_3d_sparse_surface_cs/main.cpp create mode 100644 example/SparseGrid/5_gray_scott_3d_surface_cs_opt/Makefile create mode 100644 example/SparseGrid/5_gray_scott_3d_surface_cs_opt/config.cfg create mode 100644 example/SparseGrid/5_gray_scott_3d_surface_cs_opt/main.cpp rename example/SparseGrid/{1_gray_scott_3d_sparse_gpu_opt_weak_scal => 6_gray_scott_3d_sparse_gpu_opt_weak_scal}/Makefile (100%) create mode 100644 example/SparseGrid/6_gray_scott_3d_sparse_gpu_opt_weak_scal/config.cfg rename example/SparseGrid/{1_gray_scott_3d_sparse_gpu_opt_weak_scal => 6_gray_scott_3d_sparse_gpu_opt_weak_scal}/main.cu (100%) create mode 100644 example/SparseGrid/7_gray_scott_3d_sparse_gpu_sphere_expanding/Makefile create mode 100644 example/SparseGrid/7_gray_scott_3d_sparse_gpu_sphere_expanding/config.cfg create mode 100644 example/SparseGrid/7_gray_scott_3d_sparse_gpu_sphere_expanding/main.cu create mode 100644 example/SparseGrid/8_filling_benchmark/Makefile create mode 100644 example/SparseGrid/8_filling_benchmark/config.cfg create mode 100644 example/SparseGrid/8_filling_benchmark/main.cpp create mode 100644 example/SparseGrid/8_filling_benchmark_gpu/Makefile create mode 100644 example/SparseGrid/8_filling_benchmark_gpu/config.cfg create mode 100644 example/SparseGrid/8_filling_benchmark_gpu/main.cu diff --git a/example/SparseGrid/1_gray_scott_3d_sparse/main.cpp b/example/SparseGrid/1_gray_scott_3d_sparse/main.cpp index 872edcb83..f26892a96 100644 --- a/example/SparseGrid/1_gray_scott_3d_sparse/main.cpp +++ b/example/SparseGrid/1_gray_scott_3d_sparse/main.cpp @@ -1,9 +1,10 @@ /*! \page SparseGrid SparseGrid * * \subpage Grid_3_gs_3D_sparse - * \subpage Grid_3_gs_3D_sparse_cs - * \subpage Grid_3_gs_3D_sparse_opt * \subpage Grid_3_gs_3D_sparse_gpu + * \subpage Grid_3_gs_3D_sparse_opt + * \subpage Grid_3_gs_3D_sparse_gpu_opt + * \subpage Grid_3_gs_3D_sparse_cs * \subpage Grid_3_gs_3D_sparse_gpu_cs * */ @@ -14,14 +15,13 @@ /*! * - * \page Grid_3_gs_3D_sparse Gray Scott in 3D using sparse grids + * \page Grid_3_gs_3D_sparse Gray Scott in\subpage Grid_3_gs_3D_sparse_opt 3D using sparse grids * * [TOC] * - * # Solving a gray scott-system in 3D using Sparse grids# {#e3_gs_gray_scott_sparse} + * # Solving a gray scott-system in 3D using Sparse grids on GPU optimized # {#e3_gs_gray_scott_sparse_gpu_opt} * - * This example show how to solve a Gray-Scott system in 3D using sparse grids in this case we well use a more - * complex geometry + * This example show how to solve a Gray-Scott system in 3D using sparse grids on gpu (optimized). The problem is the same as \ref Grid_3_gs_3D * * In figure is the final solution of the problem * @@ -34,16 +34,7 @@ * \see \ref Grid_3_gs_3D * * - * We recall here the main differences between sparse and dense. - * - * * **get** function return now constant values, so cannot be used to get values, a get in write is an insert - * a get on a point position that has not been inserted return the background value - * - * * **insert** function create/overwrite the points value - * - * * **getDomainIterator** return an iterator on the existing points - * - * * **getGridIterator** return an iterator on the dense version of the grid + * * * * diff --git a/example/SparseGrid/1_gray_scott_3d_sparse_gpu/main.cu b/example/SparseGrid/1_gray_scott_3d_sparse_gpu/main.cu index cd0eb3077..794f5bc08 100644 --- a/example/SparseGrid/1_gray_scott_3d_sparse_gpu/main.cu +++ b/example/SparseGrid/1_gray_scott_3d_sparse_gpu/main.cu @@ -72,8 +72,12 @@ constexpr int x = 0; constexpr int y = 1; constexpr int z = 2; +//! \cond [grid definition] \endcond + typedef sgrid_dist_id_gpu<3,float,aggregate<float,float,float,float> > SparseGridType; +//! \cond [grid definition] \endcond + void init(SparseGridType & grid, Box<3,float> & domain) { //! \cond [create points] \endcond diff --git a/example/SparseGrid/1_gray_scott_3d_sparse_gpu_opt/Makefile b/example/SparseGrid/2_gray_scott_3d_sparse_gpu_opt/Makefile similarity index 100% rename from example/SparseGrid/1_gray_scott_3d_sparse_gpu_opt/Makefile rename to example/SparseGrid/2_gray_scott_3d_sparse_gpu_opt/Makefile diff --git a/example/SparseGrid/1_gray_scott_3d_sparse_gpu_cs/config.cfg b/example/SparseGrid/2_gray_scott_3d_sparse_gpu_opt/config.cfg similarity index 100% rename from example/SparseGrid/1_gray_scott_3d_sparse_gpu_cs/config.cfg rename to example/SparseGrid/2_gray_scott_3d_sparse_gpu_opt/config.cfg diff --git a/example/SparseGrid/1_gray_scott_3d_sparse_gpu_opt/main.cu b/example/SparseGrid/2_gray_scott_3d_sparse_gpu_opt/main.cu similarity index 75% rename from example/SparseGrid/1_gray_scott_3d_sparse_gpu_opt/main.cu rename to example/SparseGrid/2_gray_scott_3d_sparse_gpu_opt/main.cu index 31f24b12f..7a6f2e643 100644 --- a/example/SparseGrid/1_gray_scott_3d_sparse_gpu_opt/main.cu +++ b/example/SparseGrid/2_gray_scott_3d_sparse_gpu_opt/main.cu @@ -1,3 +1,8 @@ +//#define VCLUSTER_PERF_REPORT <------ Activate telemetry for the VCluster data-structure +//#define SYNC_BEFORE_TAKE_TIME <------ Force synchronization of the kernels everytime we take the time with the structure timer. +// Use this option for telemetry and GPU otherwise the result are unreliable +//#define ENABLE_GRID_DIST_ID_PERF_STATS <------ Activate telementry for the grid data-structure + #include "Decomposition/Distribution/BoxDistribution.hpp" #include "Grid/grid_dist_id.hpp" #include "data_type/aggregate.hpp" @@ -5,11 +10,11 @@ /*! * - * \page Grid_3_gs_3D_sparse_gpu Gray Scott in 3D using sparse grids on GPU + * \page Grid_3_gs_3D_sparse_gpu_opt Gray Scott in 3D using sparse grids on GPU (Optimized) * * [TOC] * - * # Solving a gray scott-system in 3D using Sparse grids on gpu # {#e3_gs_gray_scott_gpu} + * # Solving a gray scott-system in 3D using Sparse grids on gpu (Optimized) # {#e3_gs_gray_scott_gpu} * * This example show how to solve a Gray-Scott system in 3D using sparse grids on gpu * @@ -23,41 +28,15 @@ * * \see \ref Grid_3_gs_3D * - * # Initializetion - * - * On gpu we can add points using the function addPoints this function take 2 lamda functions the first take 3 arguments (in 3D) - * i,j,k these are the global coordinates for a point. We can return either true either false. In case of true the point is - * created in case of false the point is not inserted. The second lamda is instead used to initialize the point inserted. - * The arguments of the second lambda are the data argument we use to initialize the point and the global coordinates i,j,k - * - * After we add the points we have to flush the added points. This us achieved using the function flush the template parameters - * indicate how we have to act on the points. Consider infact we are adding points already exist ... do we have to add it using the max - * or the min. **FLUSH_ON_DEVICE** say instead that the operation is performed using the GPU - * - * \snippet SparseGrid/1_gray_scott_3d_sparse_gpu/main.cu create points - * - * The function can also called with a specified range - * - * \snippet SparseGrid/1_gray_scott_3d_sparse_gpu/main.cu create points sub - * - * # Update + * # Optimizations * - * to calculate the right-hand-side we use the function **conv2** this function can be used to do a convolution that involve - * two properties + * Instead of using the default decomposition algorithm based on parmetis we use BoxDistribution. This decomposition divide the space equally + * across processors. The way to use a different algorithm for decomposing the sparse grid is given by changing the type of the Sparse grid + * + * \snippet SparseGrid/2_gray_scott_3d_sparse_gpu_opt/main.cu grid definition * - * The function accept a lambda function where the first 2 arguments are the output of the same type of the two property choosen. - * - * The arguments 3 and 4 contain the properties of two selected properties. while i,j,k are the coordinates we have to calculate the - * convolution. The call **conv2** also accept template parameters the first two indicate the source porperties, the other two are the destination properties. While the - * last is the extension of the stencil. In this case we use 1. - * - * The lambda function is defined as - * - * \snippet SparseGrid/1_gray_scott_3d_sparse_gpu/main.cu lambda - * - * and used in the body loop - * - * \snippet SparseGrid/1_gray_scott_3d_sparse_gpu/main.cu body + * Because the geometry is fixed we are also using the option SKIP_LABELLING. With this option active after a normal ghost_get we are able to + * activate certain optimization patterns in constructions of the sending buffers and merging data. * */ @@ -73,10 +52,14 @@ constexpr int x = 0; constexpr int y = 1; constexpr int z = 2; +//! \cond [grid definition] \endcond + typedef CartDecomposition<3,float, CudaMemory, memory_traits_inte, BoxDistribution<3,float> > Dec; typedef sgrid_dist_id_gpu<3,float,aggregate<float,float,float,float>,CudaMemory, Dec> SparseGridType; +//! \cond [grid definition] \endcond + void init(SparseGridType & grid, Box<3,float> & domain) { //! \cond [create points] \endcond @@ -210,13 +193,13 @@ int main(int argc, char* argv[]) u_out = uc + uFactor *(u(i-1,j,k) + u(i+1,j,k) + u(i,j-1,k) + u(i,j+1,k) + - u(i,j,k-1) + u(i,j,k+1) - 6.0*uc) - deltaT * uc*vc*vc - - deltaT * F * (uc - 1.0); + u(i,j,k-1) + u(i,j,k+1) - 6.0f*uc) - deltaT * uc*vc*vc + - deltaT * F * (uc - 1.0f); v_out = vc + vFactor *(v(i-1,j,k) + v(i+1,j,k) + v(i,j+1,k) + v(i,j-1,k) + - v(i,j,k-1) + v(i,j,k+1) - 6.0*vc) + deltaT * uc*vc*vc + v(i,j,k-1) + v(i,j,k+1) - 6.0f*vc) + deltaT * uc*vc*vc - deltaT * (F+K) * vc; }; diff --git a/example/SparseGrid/1_gray_scott_3d_sparse_opt/Makefile b/example/SparseGrid/2_gray_scott_3d_sparse_opt/Makefile similarity index 100% rename from example/SparseGrid/1_gray_scott_3d_sparse_opt/Makefile rename to example/SparseGrid/2_gray_scott_3d_sparse_opt/Makefile diff --git a/example/SparseGrid/1_gray_scott_3d_sparse_cs/config.cfg b/example/SparseGrid/2_gray_scott_3d_sparse_opt/config.cfg similarity index 100% rename from example/SparseGrid/1_gray_scott_3d_sparse_cs/config.cfg rename to example/SparseGrid/2_gray_scott_3d_sparse_opt/config.cfg diff --git a/example/SparseGrid/1_gray_scott_3d_sparse_opt/main.cpp b/example/SparseGrid/2_gray_scott_3d_sparse_opt/main.cpp similarity index 96% rename from example/SparseGrid/1_gray_scott_3d_sparse_opt/main.cpp rename to example/SparseGrid/2_gray_scott_3d_sparse_opt/main.cpp index ef1df5864..ac02c2ca2 100644 --- a/example/SparseGrid/1_gray_scott_3d_sparse_opt/main.cpp +++ b/example/SparseGrid/2_gray_scott_3d_sparse_opt/main.cpp @@ -26,7 +26,7 @@ * * Two optimization has been done. The first is to change the layout to struct of arrays defining the grid with * - * \snippet SparseGrid/1_gray_scott_3d_sparse_opt/main.cpp grid definition + * \snippet SparseGrid/2_gray_scott_3d_sparse_opt/main.cpp grid definition * * The second is using the function **conv_cross2** to calculate the right-hand-side * this function can be used to do a convolution that involve points in a cross stencil like in figure that involve @@ -60,11 +60,11 @@ * * The lambda function is defined as * - * \snippet SparseGrid/1_gray_scott_3d_sparse_opt/main.cpp lambda + * \snippet SparseGrid/2_gray_scott_3d_sparse_opt/main.cpp lambda * * and used in the body loop * - * \snippet SparseGrid/1_gray_scott_3d_sparse_opt/main.cpp body + * \snippet SparseGrid/2_gray_scott_3d_sparse_opt/main.cpp body * * To note that instead of copy we split the properties where we are acting at every iteration * @@ -263,7 +263,7 @@ int main(int argc, char* argv[]) * * Deinitialize the library * - * \snippet SparseGrid/1_gray_scott_3d_sparse_opt/main.cpp finalize + * \snippet SparseGrid/2_gray_scott_3d_sparse_opt/main.cpp finalize * */ diff --git a/example/SparseGrid/1_gray_scott_3d_sparse_cs/Makefile b/example/SparseGrid/3_gray_scott_3d_sparse_cs/Makefile similarity index 100% rename from example/SparseGrid/1_gray_scott_3d_sparse_cs/Makefile rename to example/SparseGrid/3_gray_scott_3d_sparse_cs/Makefile diff --git a/example/SparseGrid/1_gray_scott_3d_sparse_opt/config.cfg b/example/SparseGrid/3_gray_scott_3d_sparse_cs/config.cfg similarity index 100% rename from example/SparseGrid/1_gray_scott_3d_sparse_opt/config.cfg rename to example/SparseGrid/3_gray_scott_3d_sparse_cs/config.cfg diff --git a/example/SparseGrid/1_gray_scott_3d_sparse_cs/main.cpp b/example/SparseGrid/3_gray_scott_3d_sparse_cs/main.cpp similarity index 98% rename from example/SparseGrid/1_gray_scott_3d_sparse_cs/main.cpp rename to example/SparseGrid/3_gray_scott_3d_sparse_cs/main.cpp index 9b468421d..98444f92f 100644 --- a/example/SparseGrid/1_gray_scott_3d_sparse_cs/main.cpp +++ b/example/SparseGrid/3_gray_scott_3d_sparse_cs/main.cpp @@ -51,18 +51,18 @@ * The initialization involve the creation of 3 sphere and one cylinder channel connecting them in order to do it we * create an iterator over the grid (inserted and not inserted) point with **getGridIterator** * - * \snippet SparseGrid/1_gray_scott_3d_sparse_cs/main.cpp init sphere channel + * \snippet SparseGrid/3_gray_scott_3d_sparse_cs/main.cpp init sphere channel * * After creating the domain we make a perturbation in the up sphere * - * \snippet SparseGrid/1_gray_scott_3d_sparse_cs/main.cpp perturbation + * \snippet SparseGrid/3_gray_scott_3d_sparse_cs/main.cpp perturbation * * # Boundary conditions * * For this example we use mirror on direction X Y Z If the point is missing. If the point is missing in both direction than * the second derivative is considered zero * - * \snippet SparseGrid/1_gray_scott_3d_sparse_cs/main.cpp boundary condition + * \snippet SparseGrid/3_gray_scott_3d_sparse_cs/main.cpp boundary condition * */ diff --git a/example/SparseGrid/1_gray_scott_3d_sparse_gpu_cs/Makefile b/example/SparseGrid/3_gray_scott_3d_sparse_gpu_cs/Makefile similarity index 100% rename from example/SparseGrid/1_gray_scott_3d_sparse_gpu_cs/Makefile rename to example/SparseGrid/3_gray_scott_3d_sparse_gpu_cs/Makefile diff --git a/example/SparseGrid/1_gray_scott_3d_sparse_gpu_opt/config.cfg b/example/SparseGrid/3_gray_scott_3d_sparse_gpu_cs/config.cfg similarity index 100% rename from example/SparseGrid/1_gray_scott_3d_sparse_gpu_opt/config.cfg rename to example/SparseGrid/3_gray_scott_3d_sparse_gpu_cs/config.cfg diff --git a/example/SparseGrid/1_gray_scott_3d_sparse_gpu_cs/main.cu b/example/SparseGrid/3_gray_scott_3d_sparse_gpu_cs/main.cu similarity index 100% rename from example/SparseGrid/1_gray_scott_3d_sparse_gpu_cs/main.cu rename to example/SparseGrid/3_gray_scott_3d_sparse_gpu_cs/main.cu diff --git a/example/SparseGrid/3_gray_scott_3d_sparse_gpu_cs_opt/Makefile b/example/SparseGrid/3_gray_scott_3d_sparse_gpu_cs_opt/Makefile new file mode 100644 index 000000000..50373464f --- /dev/null +++ b/example/SparseGrid/3_gray_scott_3d_sparse_gpu_cs_opt/Makefile @@ -0,0 +1,39 @@ +include ../../example.mk + +### internally the example disable with the preprocessor its code if not compiled with nvcc +CUDA_CC= +CUDA_CC_LINK= +ifeq (, $(shell which nvcc)) + CUDA_CC=mpic++ -x c++ $(INCLUDE_PATH) + INCLUDE_PATH_NVCC= + CUDA_CC_LINK=mpic++ +else + CUDA_CC=nvcc -ccbin=mpic++ + CUDA_CC_LINK=nvcc -ccbin=mpic++ +endif + +gray_scott_sparse_gpu_test: OPT += -DTEST_RUN +gray_scott_sparse_gpu_test: gray_scott_sparse_gpu + +CC=mpic++ + +LDIR = + +OBJ = main.o + +%.o: %.cu + $(CUDA_CC) $(OPT) -O3 -g -c --std=c++14 -o $@ $< $(INCLUDE_PATH_NVCC) + +gray_scott_sparse_gpu: $(OBJ) + $(CUDA_CC_LINK) -o $@ $^ $(LIBS_PATH) $(LIBS) + +all: gray_scott_sparse_gpu + +run: gray_scott_sparse_gpu_test + mpirun -np 4 ./gray_scott_sparse_gpu + +.PHONY: clean all run + +clean: + rm -f *.o *~ core gray_scott_sparse_gpu + diff --git a/example/SparseGrid/1_gray_scott_3d_sparse_gpu_opt_weak_scal/config.cfg b/example/SparseGrid/3_gray_scott_3d_sparse_gpu_cs_opt/config.cfg similarity index 100% rename from example/SparseGrid/1_gray_scott_3d_sparse_gpu_opt_weak_scal/config.cfg rename to example/SparseGrid/3_gray_scott_3d_sparse_gpu_cs_opt/config.cfg diff --git a/example/SparseGrid/3_gray_scott_3d_sparse_gpu_cs_opt/main.cu b/example/SparseGrid/3_gray_scott_3d_sparse_gpu_cs_opt/main.cu new file mode 100644 index 000000000..71e6019c7 --- /dev/null +++ b/example/SparseGrid/3_gray_scott_3d_sparse_gpu_cs_opt/main.cu @@ -0,0 +1,544 @@ +//#define VCLUSTER_PERF_REPORT +//#define SYNC_BEFORE_TAKE_TIME +//#define ENABLE_GRID_DIST_ID_PERF_STATS +#include "Grid/grid_dist_id.hpp" +#include "data_type/aggregate.hpp" +#include "timer.hpp" + +/*! + * + * \page Grid_3_gs_3D_sparse_gpu_cs_opt Gray Scott in 3D using sparse grids on gpu in complex geometry + * + * [TOC] + * + * # Solving a gray scott-system in 3D using Sparse grids# {#e3_gs_gray_scott} + * + * This example show how to solve a Gray-Scott system in 3D using sparse grids on gpu with complex geometry + * + * In figure is the final solution of the problem + * + * \htmlonly +<table border="1" bgcolor="black"> + <tr> + <td> + <img src="http://ppmcore.mpi-cbg.de/web/images/examples/1_gray_scott_3d_sparse_cs/gs_3d_sparse_cs_section.png" style="width: 500px;" /> + </td> + <td> + <img src="http://ppmcore.mpi-cbg.de/web/images/examples/1_gray_scott_3d_sparse_cs/gs_3d_sparse_cs.png" style="width: 500px;" /> + </td> + </tr> +</table> +\endhtmlonly + * + * More or less this example is the same of \ref e3_gs_gray_scott_cs on gpu using what we learned in \ref e3_gs_gray_scott_gpu + * + * + */ + +#ifdef __NVCC__ + +constexpr int U = 0; +constexpr int V = 1; +constexpr int U_next = 2; +constexpr int V_next = 3; + +typedef sgrid_dist_id_gpu<3,double,aggregate<double,double,double,double> > sgrid_type; + +void init(sgrid_type & grid, Box<3,double> & domain) +{ + auto it = grid.getGridIterator(); + Point<3,double> p[8]= {{0.35,0.35,0.35}, + {0.35,2.0,2.0}, + {2.0,0.35,2.0}, + {2.0,2.0,0.35}, + {0.35,0.35,2.0}, + {0.35,2.0,0.35}, + {2.0,0.35,0.35}, + {2.0,2.0,2.0}}; + + +// Point<3,double> u({1.0,0.0,0.0}); +// Box<3,double> channel_box(p3,p1); + + double spacing_x = grid.spacing(0); + double spacing_y = grid.spacing(1); + double spacing_z = grid.spacing(2); + + typedef typename GetAddBlockType<sgrid_type>::type InsertBlockT; + + // Draw spheres + for (int i = 0 ; i < 8 ; i++) + { + Sphere<3,double> sph(p[i],0.3); + + Box<3,size_t> bx; + + for (int i = 0 ; i < 3 ; i++) + { + bx.setLow(i,(size_t)((sph.center(i) - 0.31)/grid.spacing(i))); + bx.setHigh(i,(size_t)((sph.center(i) + 0.31)/grid.spacing(i))); + } + + grid.addPoints(bx.getKP1(),bx.getKP2(),[spacing_x,spacing_y,spacing_z,sph] __device__ (int i, int j, int k) + { + Point<3,double> pc({i*spacing_x,j*spacing_y,k*spacing_z}); + + // Check if the point is in the domain + if (sph.isInside(pc) ) + {return true;} + + return false; + }, + [] __device__ (InsertBlockT & data, int i, int j, int k) + { + data.template get<U>() = 1.0; + data.template get<V>() = 0.0; + } + ); + + grid.template flush<smax_<U>,smax_<V>>(flush_type::FLUSH_ON_DEVICE); + grid.removeUnusedBuffers(); + } + + //channels + + Box<3,double> b({0.25,0.25,0.25},{2.1,2.1,2.1}); + + for (int k = 0 ; k < 3 ; k++) + { + for (int s = 0 ; s < 2 ; s++) + { + for (int i = 0 ; i < 2 ; i++) + { + Point<3,double> u({1.0*(((s+i)%2) == 0 && k != 2),1.0*(((s+i+1)%2) == 0 && k != 2),(k == 2)*1.0}); + Point<3,double> c({(i == 0)?0.35:2.0,(s == 0)?0.35:2.0,(k == 0)?0.35:2.0}); + + Box<3,size_t> bx; + + for (int i = 0 ; i < 3 ; i++) + { + if (c[i] == 2.0) + { + if (u[i] == 1.0) + { + bx.setLow(i,(size_t)(0.34/grid.spacing(i))); + bx.setHigh(i,(size_t)(2.01/grid.spacing(i))); + } + else + { + bx.setLow(i,(size_t)((c[i] - 0.11)/grid.spacing(i))); + bx.setHigh(i,(size_t)((c[i] + 0.11)/grid.spacing(i))); + } + } + else + { + if (u[i] == 1.0) + { + bx.setLow(i,(size_t)(0.34/grid.spacing(i))); + bx.setHigh(i,(size_t)(2.01/grid.spacing(i))); + } + else + { + bx.setLow(i,(size_t)((c[i] - 0.11)/grid.spacing(i))); + bx.setHigh(i,(size_t)((c[i] + 0.11)/grid.spacing(i))); + } + } + } + + grid.addPoints(bx.getKP1(),bx.getKP2(),[spacing_x,spacing_y,spacing_z,u,c,b] __device__ (int i, int j, int k) + { + Point<3,double> pc({i*spacing_x,j*spacing_y,k*spacing_z}); + Point<3,double> pcs({i*spacing_x,j*spacing_y,k*spacing_z}); + Point<3,double> vp; + + // shift + pc -= c; + + // calculate the distance from the diagonal + vp.get(0) = pc.get(1)*u.get(2) - pc.get(2)*u.get(1); + vp.get(1) = pc.get(2)*u.get(0) - pc.get(0)*u.get(2); + vp.get(2) = pc.get(0)*u.get(1) - pc.get(1)*u.get(0); + + double distance = vp.norm(); + + // Check if the point is in the domain + if (distance < 0.1 && b.isInside(pcs) == true ) + {return true;} + + return false; + }, + [] __device__ (InsertBlockT & data, int i, int j, int k) + { + data.template get<U>() = 1.0; + data.template get<V>() = 0.0; + } + ); + + grid.template flush<smax_<U>,smax_<V>>(flush_type::FLUSH_ON_DEVICE); + grid.removeUnusedBuffers(); + } + } + } + + // cross channel + + int s = 0; + for (int s = 0 ; s < 2 ; s++) + { + for (int i = 0 ; i < 2 ; i++) + { + Point<3,double> c({(i == 0)?0.35:2.0,(s == 0)?0.35:2.0,0.35}); + Point<3,double> u({(i == 0)?1.0:-1.0,(s == 0)?1.0:-1.0,1.0}); + + Box<3,size_t> bx; + + for (int k = 0 ; k < 16; k++) + { + for (int s = 0 ; s < 3 ; s++) + { + if (u[s] > 0.0) + { + bx.setLow(s,(c[s] + k*(u[s]/9.0))/grid.spacing(s) ); + bx.setHigh(s,(c[s] + (k+3)*(u[s]/9.0) )/ grid.spacing(s) ); + } + else + { + bx.setLow(s,(c[s] + (k+3)*(u[s]/9.0) )/grid.spacing(s) ); + bx.setHigh(s,(c[s] + k*(u[s]/9.0))/ grid.spacing(s) ); + } + } + + grid.addPoints(bx.getKP1(),bx.getKP2(),[spacing_x,spacing_y,spacing_z,u,c,b] __device__ (int i, int j, int k) + { + Point<3,double> pc({i*spacing_x,j*spacing_y,k*spacing_z}); + Point<3,double> pcs({i*spacing_x,j*spacing_y,k*spacing_z}); + Point<3,double> vp; + + // shift + pc -= c; + + // calculate the distance from the diagonal + vp.get(0) = pc.get(1)*u.get(2) - pc.get(2)*u.get(1); + vp.get(1) = pc.get(2)*u.get(0) - pc.get(0)*u.get(2); + vp.get(2) = pc.get(0)*u.get(1) - pc.get(1)*u.get(0); + + double distance = vp.norm() / sqrt(3.0); + + // Check if the point is in the domain + if (distance < 0.1 && b.isInside(pcs) == true ) + {return true;} + + return false; + }, + [] __device__ (InsertBlockT & data, int i, int j, int k) + { + data.template get<U>() = 1.0; + data.template get<V>() = 0.0; + } + ); + + grid.template flush<smax_<U>,smax_<V>>(flush_type::FLUSH_ON_DEVICE); + grid.removeUnusedBuffers(); + } + } + + } + + long int x_start = grid.size(0)*1.95f/domain.getHigh(0); + long int y_start = grid.size(1)*1.95f/domain.getHigh(1); + long int z_start = grid.size(1)*1.95f/domain.getHigh(2); + + long int x_stop = grid.size(0)*2.05f/domain.getHigh(0); + long int y_stop = grid.size(1)*2.05f/domain.getHigh(1); + long int z_stop = grid.size(1)*2.05f/domain.getHigh(2); + + grid_key_dx<3> start({x_start,y_start,z_start}); + grid_key_dx<3> stop ({x_stop,y_stop,z_stop}); + + grid.addPoints(start,stop,[] __device__ (int i, int j, int k) + { + return true; + }, + [] __device__ (InsertBlockT & data, int i, int j, int k) + { + data.template get<U>() = 0.5; + data.template get<V>() = 0.24; + } + ); + + grid.template flush<smin_<U>,smax_<V>>(flush_type::FLUSH_ON_DEVICE); + + grid.removeUnusedBuffers(); +} + + +int main(int argc, char* argv[]) +{ + openfpm_init(&argc,&argv); + + // domain + Box<3,double> domain({0.0,0.0,0.0},{2.5,2.5,2.5}); + + // grid size + size_t sz[3] = {384,384,384}; + + // Define periodicity of the grid + periodicity<3> bc = {NON_PERIODIC,NON_PERIODIC,NON_PERIODIC}; + + // Ghost in grid unit + Ghost<3,long int> g(1); + + // deltaT + double deltaT = 0.2; + + // Diffusion constant for specie U + double du = 2*1e-5; + + // Diffusion constant for specie V + double dv = 1*1e-5; + +#ifdef TEST_RUN + // Number of timesteps + size_t timeSteps = 300; +#else + // Number of timesteps + size_t timeSteps = 50000; +#endif + + // K and F (Physical constant in the equation) + double K = 0.053; + double F = 0.014; + + sgrid_type grid(sz,domain,g,bc); + + grid.template setBackgroundValue<0>(-0.5); + grid.template setBackgroundValue<1>(-0.5); + grid.template setBackgroundValue<2>(-0.5); + grid.template setBackgroundValue<3>(-0.5); + + // spacing of the grid on x and y + double spacing[3] = {grid.spacing(0),grid.spacing(1),grid.spacing(2)}; + + init(grid,domain); + + // sync the ghost + grid.template ghost_get<U,V>(RUN_ON_DEVICE); + + // because we assume that spacing[x] == spacing[y] we use formula 2 + // and we calculate the prefactor of Eq 2 + double uFactor = deltaT * du/(spacing[0]*spacing[0]); + double vFactor = deltaT * dv/(spacing[0]*spacing[0]); + + grid.template deviceToHost<U,V>(); + + timer tot_sim; + tot_sim.start(); + + for (size_t i = 0; i < timeSteps; ++i) + { + //! \cond [stencil get and use] \endcond + + typedef typename GetCpBlockType<decltype(grid),0,1>::type CpBlockType; + + auto func = [uFactor,vFactor,deltaT,F,K] __device__ (double & u_out, double & v_out, + CpBlockType & u, CpBlockType & v, + int i, int j, int k){ + + double uc = u(i,j,k); + double vc = v(i,j,k); + + double u_px = u(i+1,j,k); + double u_mx = u(i-1,j,k); + + double u_py = u(i,j+1,k); + double u_my = u(i,j-1,k); + + double u_pz = u(i,j,k+1); + double u_mz = u(i,j,k-1); + + double v_px = v(i+1,j,k); + double v_mx = v(i-1,j,k); + + double v_py = v(i,j+1,k); + double v_my = v(i,j-1,k); + + double v_pz = v(i,j,k+1); + double v_mz = v(i,j,k-1); + + // U fix + + if (u_mx < -0.1 && u_px < -0.1) + { + u_mx = uc; + u_px = uc; + } + + if (u_mx < -0.1) + {u_mx = u_px;} + + if (u_px < -0.1) + {u_px = u_mx;} + + if (u_my < -0.1 && u_py < -0.1) + { + u_my = uc; + u_py = uc; + } + + if (u_my < -0.1) + {u_my = u_py;} + + if (u_py < -0.1) + {u_py = u_my;} + + if (u_mz < -0.1 && u_pz < -0.1) + { + u_mz = uc; + u_pz = uc; + } + + if (u_mz < -0.1) + {u_mz = u_pz;} + + if (u_pz < -0.1) + {u_pz = u_mz;} + + // V fix + + if (v_mx < -0.1 && v_px < -0.1) + { + v_mx = uc; + v_px = uc; + } + + if (v_mx < -0.1) + {v_mx = v_px;} + + if (v_px < -0.1) + {v_px = v_mx;} + + if (v_my < -0.1 && v_py < -0.1) + { + v_my = uc; + v_py = uc; + } + + if (v_my < -0.1) + {v_my = v_py;} + + if (v_py < -0.1) + {v_py = v_my;} + + if (v_mz < -0.1 && v_pz < -0.1) + { + v_mz = uc; + v_pz = uc; + } + + if (v_mz < -0.1) + {v_mz = v_pz;} + + if (v_pz < -0.1) + {v_pz = v_mz;} + + u_out = uc + uFactor *(u_mx + u_px + + u_my + u_py + + u_mz + u_pz - 6.0*uc) - deltaT * uc*vc*vc + - deltaT * F * (uc - 1.0); + + + v_out = vc + vFactor *(v_mx + v_px + + v_py + v_my + + v_mz + v_pz - 6.0*vc) + deltaT * uc*vc*vc + - deltaT * (F+K) * vc; + + }; + + if (i % 2 == 0) + { + grid.conv2<U,V,U_next,V_next,1>({0,0,0},{(long int)sz[0]-1,(long int)sz[1]-1,(long int)sz[2]-1},func); + + cudaDeviceSynchronize(); + + // After copy we synchronize again the ghost part U and V + + grid.ghost_get<U_next,V_next>(RUN_ON_DEVICE | SKIP_LABELLING); + } + else + { + grid.conv2<U_next,V_next,U,V,1>({0,0,0},{(long int)sz[0]-1,(long int)sz[1]-1,(long int)sz[2]-1},func); + + cudaDeviceSynchronize(); + + // After copy we synchronize again the ghost part U and V + grid.ghost_get<U,V>(RUN_ON_DEVICE | SKIP_LABELLING); + } + + //! \cond [stencil get and use] \endcond + + // After copy we synchronize again the ghost part U and V + + // Every 500 time step we output the configuration for + // visualization +/* if (i % 500 == 0) + { + grid.save("output_" + std::to_string(count)); + count++; + }*/ + + std::cout << "STEP: " << i << std::endl; +/* if (i % 300 == 0) + { + grid.template deviceToHost<U,V>(); + grid.write_frame("out",i); + }*/ + } + + tot_sim.stop(); + std::cout << "Total simulation: " << tot_sim.getwct() << std::endl; + + grid.print_stats(); + + create_vcluster().print_stats(); + + grid.template deviceToHost<U,V>(); + grid.write("Final"); + + //! \cond [time stepping] \endcond + + /*! + * \page Grid_3_gs_3D_sparse_gpu_cs Gray Scott in 3D using sparse grids on gpu in complex geometry + * + * ## Finalize ## + * + * Deinitialize the library + * + * \snippet SparseGrid/1_gray_scott_3d_sparse_gpu_cs/main.cu finalize + * + */ + + //! \cond [finalize] \endcond + + openfpm_finalize(); + + //! \cond [finalize] \endcond + + /*! + * \page Grid_3_gs_3D_sparse_gpu_cs Gray Scott in 3D using sparse grids on gpu in complex geometry + * + * # Full code # {#code} + * + * \include SparseGrid/1_gray_scott_3d_sparse_gpu_cs/main.cu + * + */ +} + +#else + +int main(int argc, char* argv[]) +{ + return 0; +} + +#endif + diff --git a/example/SparseGrid/4_gray_scott_3d_sparse_surface_cs/Makefile b/example/SparseGrid/4_gray_scott_3d_sparse_surface_cs/Makefile new file mode 100644 index 000000000..54f2679d3 --- /dev/null +++ b/example/SparseGrid/4_gray_scott_3d_sparse_surface_cs/Makefile @@ -0,0 +1,26 @@ +include ../../example.mk + +CC=mpic++ + +LDIR = + +OBJ = main.o +gray_scott_sparse_surface_cs_test: OPT += -DTEST_RUN +gray_scott_sparse_surface_cs_test: gray_scott_sparse_surface_cs + +%.o: %.cpp + $(CC) -O3 -g $(OPT) -c --std=c++11 -o $@ $< $(INCLUDE_PATH) + +gray_scott_sparse_surface_cs: $(OBJ) + $(CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS) + +all: gray_scott_sparse_surface_cs + +run: gray_scott_sparse_cs_surface_test + mpirun -np 4 ./gray_scott_sparse_surface_cs + +.PHONY: clean all run + +clean: + rm -f *.o *~ core gray_scott_sparse_surface_cs + diff --git a/example/SparseGrid/4_gray_scott_3d_sparse_surface_cs/config.cfg b/example/SparseGrid/4_gray_scott_3d_sparse_surface_cs/config.cfg new file mode 100644 index 000000000..1eecbac35 --- /dev/null +++ b/example/SparseGrid/4_gray_scott_3d_sparse_surface_cs/config.cfg @@ -0,0 +1,2 @@ +[pack] +files = main.cpp Makefile diff --git a/example/SparseGrid/4_gray_scott_3d_sparse_surface_cs/main.cpp b/example/SparseGrid/4_gray_scott_3d_sparse_surface_cs/main.cpp new file mode 100644 index 000000000..33eea4a72 --- /dev/null +++ b/example/SparseGrid/4_gray_scott_3d_sparse_surface_cs/main.cpp @@ -0,0 +1,426 @@ +#include "Grid/grid_dist_id.hpp" +#include "data_type/aggregate.hpp" +#include "timer.hpp" + +/*! + * + * + */ + +constexpr int U = 0; +constexpr int V = 1; +constexpr int phi = 2; +constexpr int normal = 3; +constexpr int tgrad_u = 4; +constexpr int tgrad_v = 5; +constexpr int U_next = 6; +constexpr int V_next = 7; + +constexpr int x = 0; +constexpr int y = 1; +constexpr int z = 2; + +typedef sgrid_dist_id<3,double,aggregate<double,double,double,double[3],double[3],double[3],double,double> > sgrid_type; + +void init(sgrid_type & grid, Box<3,double> & domain) +{ + //! \cond [init sphere channel] \endcond + + auto it = grid.getGridIterator(); + Point<3,double> p1({0.5,0.5,0.5}); + + double sx = grid.spacing(0); + + Sphere<3,double> sph1(p1,0.3); + Sphere<3,double> sph2(p1,0.3 - sx*10); + Sphere<3,double> sph_zero(p1,0.3 - sx*5); + + while (it.isNext()) + { + // Get the local grid key + auto key = it.get_dist(); + auto keyg = it.get(); + + Point<3,double> pc; + Point<3,double> vp; + + for (int i = 0 ; i < 3 ; i++) + {pc.get(i) = keyg.get(i) * it.getSpacing(i);} + + // Check if the point is in the first sphere + if (sph1.isInside(pc) == true && sph2.isInside(pc) == false) + { + Point<3,double> pn = pc - p1; + pn /= pn.norm(); + double theta = acos(pn * Point<3,double>({0.0,0.0,1.0})); + Point<3,double> pn_ = pn; + pn_[2] = 0.0; + pn_ /= pn_.norm(); + double aphi = acos(pn_ * Point<3,double>({1.0,0.0,0.0})); + + // Create a perturbation in the solid angle + if (theta > 0.6 && theta < 0.8 && aphi > 0.0 && aphi < 0.2) + { + grid.template insert<U>(key) = 0.5; + grid.template insert<V>(key) = 0.25; + } + else + { + grid.template insert<U>(key) = 1.0; + grid.template insert<V>(key) = 0.0; + } + grid.template insert<phi>(key) = sph_zero.distance(pc); + grid.template insert<normal>(key)[0] = pn[0]; + grid.template insert<normal>(key)[1] = pn[1]; + grid.template insert<normal>(key)[2] = pn[2]; + + // Old values U and V + grid.template insert<U_next>(key) = 0.0; + grid.template insert<V_next>(key) = 0.0; + } + + ++it; + } + + //! \cond [init sphere channel] \endcond +} + +template<unsigned int U_src,unsigned int V_src,unsigned int U_dst, unsigned int V_dst> +void extend(sgrid_type & grid) +{ + double delta = 1e-10; + double max = 0.0; + auto it = grid.getDomainIterator(); + + while (it.isNext()) + { + // center point + auto Cp = it.get(); + + // plus,minus X,Y,Z + auto mx = Cp.move(0,-1); + auto px = Cp.move(0,+1); + auto my = Cp.move(1,-1); + auto py = Cp.move(1,1); + auto mz = Cp.move(2,-1); + auto pz = Cp.move(2,1); + + double s = grid.get<phi>(Cp) / sqrt(fabs(grid.get<phi>(Cp)) + delta); + + double Uext = 0.0; + double Vext = 0.0; + + double dir = s*grid.get<normal>(Cp)[x]; + + if (dir > 0) + { + Uext += dir * (grid.get<U_src>(Cp) - grid.get<U_src>(mx)); + Vext += dir * (grid.get<V_src>(Cp) - grid.get<V_src>(mx)); + } + else if (dir < 0) + { + Uext += dir * (grid.get<U_src>(px) - grid.get<U_src>(Cp)); + Vext += dir * (grid.get<V_src>(px) - grid.get<V_src>(Cp)); + } + + + dir = s*grid.get<normal>(Cp)[y]; + if (dir > 0) + { + Uext += dir * (grid.get<U_src>(Cp) - grid.get<U_src>(my)); + Vext += dir * (grid.get<V_src>(Cp) - grid.get<V_src>(my)); + } + else if (dir < 0) + { + Uext += dir * (grid.get<U_src>(py) - grid.get<U_src>(Cp)); + Vext += dir * (grid.get<V_src>(py) - grid.get<V_src>(Cp)); + } + + dir = s*grid.get<normal>(Cp)[z]; + if (dir > 0) + { + Uext += dir * (grid.get<U_src>(Cp) - grid.get<U_src>(mz)); + Vext += dir * (grid.get<V_src>(Cp) - grid.get<V_src>(mz)); + } + else if (dir < 0) + { + Uext += dir * (grid.get<U_src>(pz) - grid.get<U_src>(Cp)); + Vext += dir * (grid.get<V_src>(pz) - grid.get<V_src>(Cp)); + } + + if (Uext >= max) + { + max = Uext; + } + + grid.insert<U_dst>(Cp) = grid.get<U_src>(Cp) - 1.0*Uext; + grid.insert<V_dst>(Cp) = grid.get<V_src>(Cp) - 1.0*Vext; + + // Next point in the grid + ++it; + } + + std::cout << "UEX max: " << max << std::endl; +} + +int main(int argc, char* argv[]) +{ + openfpm_init(&argc,&argv); + + // domain + Box<3,double> domain({0.0,0.0,0.0},{2.5,2.5,2.5}); + + // grid size + size_t sz[3] = {512,512,512}; + + // Define periodicity of the grid + periodicity<3> bc = {NON_PERIODIC,NON_PERIODIC,NON_PERIODIC}; + + // Ghost in grid unit + Ghost<3,long int> g(1); + + // deltaT + double deltaT = 0.3; + + // Diffusion constant for specie U + double du = 1*1e-5; + + // Diffusion constant for specie V + double dv = 0.5*1e-5; + +//#ifdef TEST_RUN + // Number of timesteps +// size_t timeSteps = 200; +//#else + // Number of timesteps + size_t timeSteps = 150000; +//#endif + + // K and F (Physical constant in the equation) + double K = 0.053; + double F = 0.014; + + sgrid_type grid(sz,domain,g,bc); + + + // spacing of the grid on x and y + double spacing[3] = {grid.spacing(0),grid.spacing(1),grid.spacing(2)}; + + init(grid,domain); + + // sync the ghost + size_t count = 0; + grid.template ghost_get<U,V>(); + + // because we assume that spacing[x] == spacing[y] we use formula 2 + // and we calculate the prefactor of Eq 2 + double uFactor = deltaT * du; + double vFactor = deltaT * dv; + + auto & v_cl = create_vcluster(); + + timer tot_sim; + tot_sim.start(); + + for (size_t i = 0; i < timeSteps ; ++i) + { + { + auto it = grid.getDomainIterator(); + + while (it.isNext()) + { + // center point + auto Cp = it.get(); + + // plus,minus X,Y,Z + auto mx = Cp.move(0,-1); + auto px = Cp.move(0,+1); + auto my = Cp.move(1,-1); + auto py = Cp.move(1,1); + auto mz = Cp.move(2,-1); + auto pz = Cp.move(2,1); + + grid.insert<tgrad_u>(Cp)[0] = 0.0; + grid.insert<tgrad_u>(Cp)[1] = 0.0; + grid.insert<tgrad_u>(Cp)[2] = 0.0; + grid.insert<tgrad_v>(Cp)[0] = 0.0; + grid.insert<tgrad_v>(Cp)[1] = 0.0; + grid.insert<tgrad_v>(Cp)[2] = 0.0; + + //! \cond [boundary condition] \endcond + + if (grid.existPoint(mz) == true && grid.existPoint(pz) == true && + grid.existPoint(my) == true && grid.existPoint(py) == true && + grid.existPoint(mx) == true && grid.existPoint(px) == true ) + { + Point<3,double> gradU; + gradU[x] = (grid.get<U>(Cp) - grid.get<U>(mx)) / grid.spacing(0); + gradU[y] = (grid.get<U>(Cp) - grid.get<U>(my)) / grid.spacing(1); + gradU[z] = (grid.get<U>(Cp) - grid.get<U>(mz)) / grid.spacing(2); + + Point<3,double> gradV; + gradV[x] = (grid.get<V>(Cp) - grid.get<V>(mx)) / grid.spacing(0); + gradV[y] = (grid.get<V>(Cp) - grid.get<V>(my)) / grid.spacing(1); + gradV[z] = (grid.get<V>(Cp) - grid.get<V>(mz)) / grid.spacing(2); + + Point<3,double> PgradU; + Point<3,double> PgradV; + + PgradU.zero(); + PgradV.zero(); + + for (int i = 0 ; i < 3 ; i++) + { + for (int j = 0 ; j < 3 ; j++) + { + grid.insert<tgrad_u>(Cp)[i] += (((i == j)?1.0:0.0) - grid.get<normal>(Cp)[i]*grid.get<normal>(Cp)[j])*gradU[j]; + grid.insert<tgrad_v>(Cp)[i] += (((i == j)?1.0:0.0) - grid.get<normal>(Cp)[i]*grid.get<normal>(Cp)[j])*gradV[j]; + } + } + } + ++it; + } + } + +// Old.write_frame("Init_condition",i); + + { + auto it = grid.getDomainIterator(); + + while (it.isNext()) + { + // center point + auto Cp = it.get(); + + // plus,minus X,Y,Z + auto mx = Cp.move(0,-1); + auto px = Cp.move(0,+1); + auto my = Cp.move(1,-1); + auto py = Cp.move(1,1); + auto mz = Cp.move(2,-1); + auto pz = Cp.move(2,1); + + //! \cond [boundary condition] \endcond + + // Mirror z + + if (grid.existPoint(mz) == true && grid.existPoint(pz) == true && + grid.existPoint(my) == true && grid.existPoint(py) == true && + grid.existPoint(mx) == true && grid.existPoint(px) == true ) + { + double lapU = 0; + double lapV = 0; + + //Div + lapU += (grid.get<tgrad_u>(px)[0] - grid.get<tgrad_u>(Cp)[0]) / grid.spacing(0); + lapV += (grid.get<tgrad_v>(px)[0] - grid.get<tgrad_v>(Cp)[0]) / grid.spacing(0); + lapU += (grid.get<tgrad_u>(py)[1] - grid.get<tgrad_u>(Cp)[1]) / grid.spacing(1); + lapV += (grid.get<tgrad_v>(py)[1] - grid.get<tgrad_v>(Cp)[1]) / grid.spacing(1); + lapU += (grid.get<tgrad_u>(pz)[2] - grid.get<tgrad_u>(Cp)[2]) / grid.spacing(2); + lapV += (grid.get<tgrad_v>(pz)[2] - grid.get<tgrad_v>(Cp)[2]) / grid.spacing(2); + + // update based on Eq 2 + grid.insert<U_next>(Cp) = grid.get<U>(Cp) + uFactor * lapU + + - deltaT * grid.get<U>(Cp) * grid.get<V>(Cp) * grid.get<V>(Cp) + + - deltaT * F * (grid.get<U>(Cp) - 1.0); + + + // update based on Eq 2 + grid.insert<V_next>(Cp) = grid.get<V>(Cp) + vFactor * lapV + + deltaT * grid.get<U>(Cp) * grid.get<V>(Cp) * grid.get<V>(Cp) + + - deltaT * (F+K) * grid.get<V>(Cp); + } + + // Next point in the grid + ++it; + } + } + +// New.write_frame("update",i); + + // Extend + + if (i % 5 == 0) + { + for (int j = 0 ; j < 2 ; j++) + { + if (j % 2 == 0) + {extend<U_next,V_next,U,V>(grid);} + else + {extend<U,V,U_next,V_next>(grid);} + + // Here we copy New into the old grid in preparation of the new step + // It would be better to alternate, but using this we can show the usage + // of the function copy. To note that copy work only on two grid of the same + // decomposition. If you want to copy also the decomposition, or force to be + // exactly the same, use Old = New + //New.copy_sparse(Old); + } + } + +/* auto it = grid.getDomainIterator(); + + while (it.isNext()) + { + // center point + auto Cp = it.get(); + + // update based on Eq 2 + grid.insert<U>(Cp) = grid.get<U_next>(Cp); + grid.insert<V>(Cp) = grid.get<V_next>(Cp); + + ++it; + }*/ + + //! \cond [stencil get and use] \endcond + + // After copy we synchronize again the ghost part U and V + grid.ghost_get<U,V>(); + + // Every 500 time step we output the configuration for + // visualization + if (i % 500 == 0) + { + grid.save("output_" + std::to_string(count)); + count++; + } + + if (v_cl.rank() == 0) + {std::cout << "STEP: " << i << " " << std::endl;} + if (i % 100 == 0) + { + grid.write_frame("out",i); + } + } + + tot_sim.stop(); + std::cout << "Total simulation: " << tot_sim.getwct() << std::endl; + + //! \cond [time stepping] \endcond + + /*! + * \page Grid_3_gs_3D_sparse Gray Scott in 3D + * + * ## Finalize ## + * + * Deinitialize the library + * + * \snippet Grid/3_gray_scott/main.cpp finalize + * + */ + + //! \cond [finalize] \endcond + + openfpm_finalize(); + + //! \cond [finalize] \endcond + + /*! + * \page Grid_3_gs_3D_sparse Gray Scott in 3D + * + * # Full code # {#code} + * + * \include Grid/3_gray_scott_3d/main.cpp + * + */ +} diff --git a/example/SparseGrid/5_gray_scott_3d_surface_cs_opt/Makefile b/example/SparseGrid/5_gray_scott_3d_surface_cs_opt/Makefile new file mode 100644 index 000000000..d6018ae5d --- /dev/null +++ b/example/SparseGrid/5_gray_scott_3d_surface_cs_opt/Makefile @@ -0,0 +1,26 @@ +include ../../example.mk + +CC=mpic++ + +LDIR = + +OBJ = main.o +gray_scott_sparse_surface_cs_test: OPT += -DTEST_RUN +gray_scott_sparse_surface_cs_test: gray_scott_sparse_surface_cs + +%.o: %.cpp + $(CC) -O3 -g $(OPT) -c --std=c++14 -o $@ $< $(INCLUDE_PATH) + +gray_scott_sparse_surface_cs: $(OBJ) + $(CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS) + +all: gray_scott_sparse_surface_cs + +run: gray_scott_sparse_cs_surface_test + mpirun -np 4 ./gray_scott_sparse_surface_cs + +.PHONY: clean all run + +clean: + rm -f *.o *~ core gray_scott_sparse_surface_cs + diff --git a/example/SparseGrid/5_gray_scott_3d_surface_cs_opt/config.cfg b/example/SparseGrid/5_gray_scott_3d_surface_cs_opt/config.cfg new file mode 100644 index 000000000..1eecbac35 --- /dev/null +++ b/example/SparseGrid/5_gray_scott_3d_surface_cs_opt/config.cfg @@ -0,0 +1,2 @@ +[pack] +files = main.cpp Makefile diff --git a/example/SparseGrid/5_gray_scott_3d_surface_cs_opt/main.cpp b/example/SparseGrid/5_gray_scott_3d_surface_cs_opt/main.cpp new file mode 100644 index 000000000..8dba1fcdf --- /dev/null +++ b/example/SparseGrid/5_gray_scott_3d_surface_cs_opt/main.cpp @@ -0,0 +1,472 @@ +#include "Grid/grid_dist_id.hpp" +#include "data_type/aggregate.hpp" +#include "timer.hpp" + +/*! + * + */ + +constexpr int U = 0; +constexpr int V = 1; +constexpr int phi = 2; +constexpr int normal = 3; +constexpr int tgrad_u = 4; +constexpr int tgrad_v = 5; +constexpr int U_next = 6; +constexpr int V_next = 7; + +constexpr int x = 0; +constexpr int y = 1; +constexpr int z = 2; + +typedef sgrid_dist_soa<3,double,aggregate<double,double,double,double[3],double[3],double[3],double,double> > sgrid_type; + +void init(sgrid_type & grid, Box<3,double> & domain) +{ + //! \cond [init sphere channel] \endcond + + auto it = grid.getGridIterator(); + Point<3,double> p1({0.5,0.5,0.5}); + + double sx = grid.spacing(0); + + Sphere<3,double> sph1(p1,0.3); + Sphere<3,double> sph2(p1,0.3 - sx*10); + Sphere<3,double> sph_zero(p1,0.3 - sx*5); + + while (it.isNext()) + { + // Get the local grid key + auto key = it.get_dist(); + auto keyg = it.get(); + + Point<3,double> pc; + Point<3,double> vp; + + for (int i = 0 ; i < 3 ; i++) + {pc.get(i) = keyg.get(i) * it.getSpacing(i);} + + // Check if the point is in the first sphere + if (sph1.isInside(pc) == true && sph2.isInside(pc) == false) + { + Point<3,double> pn = pc - p1; + pn /= pn.norm(); + double theta = acos(pn * Point<3,double>({0.0,0.0,1.0})); + Point<3,double> pn_ = pn; + pn_[2] = 0.0; + pn_ /= pn_.norm(); + double aphi = acos(pn_ * Point<3,double>({1.0,0.0,0.0})); + + // Create a perturbation in the solid angle + if (theta > 0.6 && theta < 0.8 && aphi > 0.0 && aphi < 0.2) + { + grid.template insert<U>(key) = 0.5; + grid.template insert<V>(key) = 0.25; + } + else + { + grid.template insert<U>(key) = 1.0; + grid.template insert<V>(key) = 0.0; + } + grid.template insert<phi>(key) = sph_zero.distance(pc); + grid.template insert<normal>(key)[0] = pn[0]; + grid.template insert<normal>(key)[1] = pn[1]; + grid.template insert<normal>(key)[2] = pn[2]; + + // Old values U and V + grid.template insert<U_next>(key) = 0.0; + grid.template insert<V_next>(key) = 0.0; + } + + ++it; + } + + //! \cond [init sphere channel] \endcond +} + +template<unsigned int U_src,unsigned int V_src,unsigned int U_dst, unsigned int V_dst> +void extend(sgrid_type & grid, size_t (& sz)[3],double (& spacing)[3]) +{ + double delta = 1e-10; + double max = 0.0; + + auto func_extend = [&grid,delta,&spacing](auto & grid, auto & ids, + unsigned char * mask_sum) + { + Vc::double_v phi_c; + Vc::double_v s; + + Vc::double_v Uext = 0.0; + Vc::double_v Vext = 0.0; + + Vc::double_v n[3]; + Vc::double_v dir; + + Vc::double_v Uc; + Vc::double_v Vc; + Vc::double_v Uc_xm; + Vc::double_v Vc_xm; + Vc::double_v Uc_ym; + Vc::double_v Vc_ym; + Vc::double_v Uc_zm; + Vc::double_v Vc_zm; + + Vc::double_v Uc_xp; + Vc::double_v Vc_xp; + Vc::double_v Uc_yp; + Vc::double_v Vc_yp; + Vc::double_v Uc_zp; + Vc::double_v Vc_zp; + + load_crs<x,0,phi>(phi_c,grid,ids); + load_crs_v<x,0,x,normal>(n[x],grid,ids); + load_crs_v<x,0,y,normal>(n[y],grid,ids); + load_crs_v<x,0,z,normal>(n[z],grid,ids); + + load_crs<x,0,U_src>(Uc,grid,ids); + load_crs<x,0,V_src>(Vc,grid,ids); + load_crs<x,-1,U_src>(Uc_xm,grid,ids); + load_crs<x,-1,V_src>(Vc_xm,grid,ids); + load_crs<y,-1,U_src>(Uc_ym,grid,ids); + load_crs<y,-1,V_src>(Vc_ym,grid,ids); + load_crs<z,-1,U_src>(Uc_zm,grid,ids); + load_crs<z,-1,V_src>(Vc_zm,grid,ids); + load_crs<x,1,U_src>(Uc_xp,grid,ids); + load_crs<x,1,V_src>(Vc_xp,grid,ids); + load_crs<y,1,U_src>(Uc_yp,grid,ids); + load_crs<y,1,V_src>(Vc_yp,grid,ids); + load_crs<z,1,U_src>(Uc_zp,grid,ids); + load_crs<z,1,V_src>(Vc_zp,grid,ids); + + s = phi_c / sqrt(phi_c*phi_c + delta*delta); + + dir = s*n[0]; + auto dir_pos = dir > 0; + auto dir_neg = dir < 0; + + Uext += Vc::iif(dir_pos,dir * (Uc - Uc_xm)/spacing[0],Vc::double_v(0.0)); + Vext += Vc::iif(dir_pos,dir * (Vc - Vc_xm)/spacing[0],Vc::double_v(0.0)); + Uext += Vc::iif(dir_neg,dir * (Uc_xp - Uc)/spacing[0],Vc::double_v(0.0)); + Vext += Vc::iif(dir_neg,dir * (Vc_xp - Vc)/spacing[0],Vc::double_v(0.0)); + + dir = s*n[1]; + dir_pos = dir > 0; + dir_neg = dir < 0; + + Uext += Vc::iif(dir_pos,dir * (Uc - Uc_ym)/spacing[1],Vc::double_v(0.0)); + Vext += Vc::iif(dir_pos,dir * (Vc - Vc_ym)/spacing[1],Vc::double_v(0.0)); + Uext += Vc::iif(dir_neg,dir * (Uc_yp - Uc)/spacing[1],Vc::double_v(0.0)); + Vext += Vc::iif(dir_neg,dir * (Vc_yp - Vc)/spacing[1],Vc::double_v(0.0)); + + dir = s*n[2]; + dir_pos = dir > 0; + dir_neg = dir < 0; + + Uext += Vc::iif(dir_pos,dir * (Uc - Uc_zm)/spacing[2],Vc::double_v(0.0)); + Vext += Vc::iif(dir_pos,dir * (Vc - Vc_zm)/spacing[2],Vc::double_v(0.0)); + Uext += Vc::iif(dir_neg,dir * (Uc_zp - Uc)/spacing[2],Vc::double_v(0.0)); + Vext += Vc::iif(dir_neg,dir * (Vc_zp - Vc)/spacing[2],Vc::double_v(0.0)); + + Uext = Uc - 0.0003*Uext; + Vext = Vc - 0.0003*Vext; + + store_crs<U_dst>(grid,Uext,ids); + store_crs<V_dst>(grid,Vext,ids); + }; + + grid.template conv_cross_ids<1,double>({0,0,0},{sz[0] - 1, sz[1] - 1, sz[2] - 1},func_extend); +} + +int main(int argc, char* argv[]) +{ + openfpm_init(&argc,&argv); + + // domain + Box<3,double> domain({0.0,0.0,0.0},{2.5,2.5,2.5}); + + // grid size + size_t sz[3] = {512,512,512}; + + // Define periodicity of the grid + periodicity<3> bc = {NON_PERIODIC,NON_PERIODIC,NON_PERIODIC}; + + // Ghost in grid unit + Ghost<3,long int> g(1); + + // deltaT + double deltaT = 0.3; + + // Diffusion constant for specie U + double du = 1*1e-5; + + // Diffusion constant for specie V + double dv = 0.5*1e-5; + +//#ifdef TEST_RUN + // Number of timesteps +// size_t timeSteps = 200; +//#else + // Number of timesteps + size_t timeSteps = 100000; +//#endif + + // K and F (Physical constant in the equation) + double K = 0.053; + double F = 0.014; + + sgrid_type grid(sz,domain,g,bc); + + + // spacing of the grid on x and y + double spacing[3] = {grid.spacing(0),grid.spacing(1),grid.spacing(2)}; + + init(grid,domain); + + // sync the ghost + size_t count = 0; + grid.template ghost_get<U,V>(); + + // because we assume that spacing[x] == spacing[y] we use formula 2 + // and we calculate the prefactor of Eq 2 + double uFactor = deltaT * du; + double vFactor = deltaT * dv; + + auto & v_cl = create_vcluster(); + + timer tot_sim; + tot_sim.start(); + + for (size_t i = 0; i < timeSteps ; ++i) + { + auto func_grad = [&grid,&spacing](auto & grid, auto & ids, + unsigned char * mask_sum){ + + Vc::double_v n[3]; + + Vc::double_v Uc; + Vc::double_v xmU; + Vc::double_v ymU; + Vc::double_v zmU; + + Vc::double_v Vc; + Vc::double_v xmV; + Vc::double_v ymV; + Vc::double_v zmV; + + Vc::double_v u_out[3]; + Vc::double_v v_out[3]; + + load_crs<x,-1,U>(xmU,grid,ids); + load_crs<y,-1,U>(ymU,grid,ids); + load_crs<z,-1,U>(zmU,grid,ids); + load_crs<x,0,U>(Uc,grid,ids); + + load_crs<x,-1,V>(xmV,grid,ids); + load_crs<y,-1,V>(ymV,grid,ids); + load_crs<z,-1,V>(zmV,grid,ids); + load_crs<x,0,V>(Vc,grid,ids); + + load_crs_v<x,0,x,normal>(n[x],grid,ids); + load_crs_v<x,0,y,normal>(n[y],grid,ids); + load_crs_v<x,0,z,normal>(n[z],grid,ids); + + u_out[0] = (1.0-n[0]*n[0])*(Uc - xmU)/spacing[0] + (-n[1]*n[1])*(Uc - ymU)/spacing[1] + (-n[2]*n[2])*(Uc - zmU)/spacing[2]; + u_out[1] = (-n[0]*n[0])*(Uc - xmU)/spacing[0] + (1.0-n[1]*n[1])*(Uc - ymU)/spacing[1] + (-n[2]*n[2])*(Uc - zmU)/spacing[2]; + u_out[2] = (-n[0]*n[0])*(Uc - xmU)/spacing[0] + (-n[1]*n[1])*(Uc - ymU)/spacing[1] + (1.0-n[2]*n[2])*(Uc - zmU)/spacing[2]; + + v_out[0] = (1.0-n[0]*n[0])*(Vc - xmV)/spacing[0] + (-n[1]*n[1])*(Vc - ymV)/spacing[1] + (-n[2]*n[2])*(Vc - zmV)/spacing[2]; + v_out[1] = (-n[0]*n[0])*(Vc - xmV)/spacing[0] + (1.0-n[1]*n[1])*(Vc - ymV)/spacing[1] + (-n[2]*n[2])*(Vc - zmV)/spacing[2]; + v_out[2] = (-n[0]*n[0])*(Vc - xmV)/spacing[0] + (-n[1]*n[1])*(Vc - ymV)/spacing[1] + (1.0-n[2]*n[2])*(Vc - zmV)/spacing[2]; + + Vc::Mask<double> surround; + + for (int i = 0 ; i < Vc::double_v::Size ; i++) + {surround[i] = (mask_sum[i] == 6);} + + u_out[0] = Vc::iif(surround,u_out[0],Vc::double_v(0.0)); + u_out[1] = Vc::iif(surround,u_out[1],Vc::double_v(0.0)); + u_out[2] = Vc::iif(surround,u_out[2],Vc::double_v(0.0)); + + v_out[0] = Vc::iif(surround,v_out[0],Vc::double_v(0.0)); + v_out[1] = Vc::iif(surround,v_out[1],Vc::double_v(0.0)); + v_out[2] = Vc::iif(surround,v_out[2],Vc::double_v(0.0)); + + store_crs_v<tgrad_u,x>(grid,u_out[0],ids); + store_crs_v<tgrad_u,y>(grid,u_out[1],ids); + store_crs_v<tgrad_u,z>(grid,u_out[2],ids); + + store_crs_v<tgrad_v,x>(grid,v_out[0],ids); + store_crs_v<tgrad_v,y>(grid,v_out[1],ids); + store_crs_v<tgrad_v,z>(grid,v_out[2],ids); + }; + + grid.template conv_cross_ids<1,double>({0,0,0},{sz[0]-1,sz[1] - 1,sz[2] - 1},func_grad); + + auto func_lap = [&grid,&spacing,uFactor,vFactor,deltaT,K,F](auto & grid, auto & ids, + unsigned char * mask_sum){ + + Vc::double_v gradU_px; + Vc::double_v gradU_py; + Vc::double_v gradU_pz; + + Vc::double_v gradU_x; + Vc::double_v gradU_y; + Vc::double_v gradU_z; + + Vc::double_v gradV_px; + Vc::double_v gradV_py; + Vc::double_v gradV_pz; + + Vc::double_v gradV_x; + Vc::double_v gradV_y; + Vc::double_v gradV_z; + + Vc::double_v lapU; + Vc::double_v lapV; + + Vc::double_v Uc; + Vc::double_v Vc; + + Vc::double_v outU; + Vc::double_v outV; + + load_crs_v<x,1,x,tgrad_u>(gradU_px,grid,ids); + load_crs_v<y,1,y,tgrad_u>(gradU_py,grid,ids); + load_crs_v<z,1,z,tgrad_u>(gradU_pz,grid,ids); + + load_crs_v<x,0,x,tgrad_u>(gradU_x,grid,ids); + load_crs_v<x,0,y,tgrad_u>(gradU_y,grid,ids); + load_crs_v<x,0,z,tgrad_u>(gradU_z,grid,ids); + + load_crs_v<x,1,x,tgrad_v>(gradV_px,grid,ids); + load_crs_v<y,1,y,tgrad_v>(gradV_py,grid,ids); + load_crs_v<z,1,z,tgrad_v>(gradV_pz,grid,ids); + + load_crs_v<x,0,x,tgrad_v>(gradV_x,grid,ids); + load_crs_v<x,0,y,tgrad_v>(gradV_y,grid,ids); + load_crs_v<x,0,z,tgrad_v>(gradV_z,grid,ids); + + load_crs<x,0,U>(Uc,grid,ids); + load_crs<x,0,V>(Vc,grid,ids); + + lapU += (gradU_px - gradU_x) / spacing[0]; + lapV += (gradV_px - gradV_x) / spacing[0]; + lapU += (gradU_py - gradU_y) / spacing[1]; + lapV += (gradV_py - gradV_y) / spacing[1]; + lapU += (gradU_pz - gradU_z) / spacing[2]; + lapV += (gradV_pz - gradV_z) / spacing[2]; + + // update based on Eq 2 + outU = Uc + uFactor * lapU + + - deltaT * Uc * Vc * Vc + + - deltaT * F * (Uc - 1.0); + + + // update based on Eq 2 + outV = Vc + vFactor * lapV + + deltaT * Uc * Vc * Vc + + - deltaT * (F+K) * Vc; + + Vc::Mask<double> surround; + + for (int i = 0 ; i < Vc::double_v::Size ; i++) + {surround[i] = (mask_sum[i] == 6);} + + + outU = Vc::iif(surround,outU,Uc); + outV = Vc::iif(surround,outV,Vc); + + store_crs<U_next>(grid,outU,ids); + store_crs<V_next>(grid,outV,ids); + }; + + grid.template conv_cross_ids<1,double>({0,0,0},{sz[0]-1,sz[1] - 1,sz[2] - 1},func_lap); + +// New.write_frame("update",i); + + // Extend + + if (i % 5 == 0) + { + for (int j = 0 ; j < 2 ; j++) + { + if (j % 2 == 0) + {extend<U_next,V_next,U,V>(grid,sz,spacing);} + else + {extend<U,V,U_next,V_next>(grid,sz,spacing);} + + // Here we copy New into the old grid in preparation of the new step + // It would be better to alternate, but using this we can show the usage + // of the function copy. To note that copy work only on two grid of the same + // decomposition. If you want to copy also the decomposition, or force to be + // exactly the same, use Old = New + //New.copy_sparse(Old); + } + } + +/* auto it = grid.getDomainIterator(); + + while (it.isNext()) + { + // center point + auto Cp = it.get(); + + // update based on Eq 2 + grid.insert<U>(Cp) = grid.get<U_next>(Cp); + grid.insert<V>(Cp) = grid.get<V_next>(Cp); + + ++it; + }*/ + + //! \cond [stencil get and use] \endcond + + // After copy we synchronize again the ghost part U and V + grid.ghost_get<U,V>(); + + // Every 500 time step we output the configuration for + // visualization + if (i % 500 == 0) + { +// grid.save("output_" + std::to_string(count)); + count++; + } + + if (v_cl.rank() == 0) + {std::cout << "STEP: " << i << " " << std::endl;} + if (i % 1000 == 0) + { + grid.write_frame("out",i); + } + } + + tot_sim.stop(); + std::cout << "Total simulation: " << tot_sim.getwct() << std::endl; + + //! \cond [time stepping] \endcond + + /*! + * \page Grid_3_gs_3D_sparse Gray Scott in 3D + * + * ## Finalize ## + * + * Deinitialize the library + * + * \snippet Grid/3_gray_scott/main.cpp finalize + * + */ + + //! \cond [finalize] \endcond + + openfpm_finalize(); + + //! \cond [finalize] \endcond + + /*! + * \page Grid_3_gs_3D_sparse Gray Scott in 3D + * + * # Full code # {#code} + * + * \include Grid/3_gray_scott_3d/main.cpp + * + */ +} diff --git a/example/SparseGrid/1_gray_scott_3d_sparse_gpu_opt_weak_scal/Makefile b/example/SparseGrid/6_gray_scott_3d_sparse_gpu_opt_weak_scal/Makefile similarity index 100% rename from example/SparseGrid/1_gray_scott_3d_sparse_gpu_opt_weak_scal/Makefile rename to example/SparseGrid/6_gray_scott_3d_sparse_gpu_opt_weak_scal/Makefile diff --git a/example/SparseGrid/6_gray_scott_3d_sparse_gpu_opt_weak_scal/config.cfg b/example/SparseGrid/6_gray_scott_3d_sparse_gpu_opt_weak_scal/config.cfg new file mode 100644 index 000000000..699be429e --- /dev/null +++ b/example/SparseGrid/6_gray_scott_3d_sparse_gpu_opt_weak_scal/config.cfg @@ -0,0 +1,2 @@ +[pack] +files = main.cu Makefile diff --git a/example/SparseGrid/1_gray_scott_3d_sparse_gpu_opt_weak_scal/main.cu b/example/SparseGrid/6_gray_scott_3d_sparse_gpu_opt_weak_scal/main.cu similarity index 100% rename from example/SparseGrid/1_gray_scott_3d_sparse_gpu_opt_weak_scal/main.cu rename to example/SparseGrid/6_gray_scott_3d_sparse_gpu_opt_weak_scal/main.cu diff --git a/example/SparseGrid/7_gray_scott_3d_sparse_gpu_sphere_expanding/Makefile b/example/SparseGrid/7_gray_scott_3d_sparse_gpu_sphere_expanding/Makefile new file mode 100644 index 000000000..50373464f --- /dev/null +++ b/example/SparseGrid/7_gray_scott_3d_sparse_gpu_sphere_expanding/Makefile @@ -0,0 +1,39 @@ +include ../../example.mk + +### internally the example disable with the preprocessor its code if not compiled with nvcc +CUDA_CC= +CUDA_CC_LINK= +ifeq (, $(shell which nvcc)) + CUDA_CC=mpic++ -x c++ $(INCLUDE_PATH) + INCLUDE_PATH_NVCC= + CUDA_CC_LINK=mpic++ +else + CUDA_CC=nvcc -ccbin=mpic++ + CUDA_CC_LINK=nvcc -ccbin=mpic++ +endif + +gray_scott_sparse_gpu_test: OPT += -DTEST_RUN +gray_scott_sparse_gpu_test: gray_scott_sparse_gpu + +CC=mpic++ + +LDIR = + +OBJ = main.o + +%.o: %.cu + $(CUDA_CC) $(OPT) -O3 -g -c --std=c++14 -o $@ $< $(INCLUDE_PATH_NVCC) + +gray_scott_sparse_gpu: $(OBJ) + $(CUDA_CC_LINK) -o $@ $^ $(LIBS_PATH) $(LIBS) + +all: gray_scott_sparse_gpu + +run: gray_scott_sparse_gpu_test + mpirun -np 4 ./gray_scott_sparse_gpu + +.PHONY: clean all run + +clean: + rm -f *.o *~ core gray_scott_sparse_gpu + diff --git a/example/SparseGrid/7_gray_scott_3d_sparse_gpu_sphere_expanding/config.cfg b/example/SparseGrid/7_gray_scott_3d_sparse_gpu_sphere_expanding/config.cfg new file mode 100644 index 000000000..699be429e --- /dev/null +++ b/example/SparseGrid/7_gray_scott_3d_sparse_gpu_sphere_expanding/config.cfg @@ -0,0 +1,2 @@ +[pack] +files = main.cu Makefile diff --git a/example/SparseGrid/7_gray_scott_3d_sparse_gpu_sphere_expanding/main.cu b/example/SparseGrid/7_gray_scott_3d_sparse_gpu_sphere_expanding/main.cu new file mode 100644 index 000000000..04904f393 --- /dev/null +++ b/example/SparseGrid/7_gray_scott_3d_sparse_gpu_sphere_expanding/main.cu @@ -0,0 +1,284 @@ +#define SYNC_BEFORE_TAKE_TIME +#include "Decomposition/Distribution/BoxDistribution.hpp" +#include "Grid/grid_dist_id.hpp" +#include "data_type/aggregate.hpp" +#include "timer.hpp" + +/*! + * + * \page Grid_3_gs_3D_sparse_gpu_cs Gray Scott in 3D using sparse grids on gpu in complex geometry + * + * [TOC] + * + * # Solving a gray scott-system in 3D using Sparse grids# {#e3_gs_gray_scott} + * + * This example show how to solve a Gray-Scott system in 3D using sparse grids on gpu with complex geometry + * + * In figure is the final solution of the problem + * + * \htmlonly +<table border="1" bgcolor="black"> + <tr> + <td> + <img src="http://ppmcore.mpi-cbg.de/web/images/examples/1_gray_scott_3d_sparse_cs/gs_3d_sparse_cs_section.png" style="width: 500px;" /> + </td> + <td> + <img src="http://ppmcore.mpi-cbg.de/web/images/examples/1_gray_scott_3d_sparse_cs/gs_3d_sparse_cs.png" style="width: 500px;" /> + </td> + </tr> +</table> +\endhtmlonly + * + * More or less this example is the same of \ref e3_gs_gray_scott_cs on gpu using what we learned in \ref e3_gs_gray_scott_gpu + * + * + */ + +#ifdef __NVCC__ + +constexpr int U = 0; +constexpr int V = 1; +constexpr int U_next = 2; +constexpr int V_next = 3; + +typedef CartDecomposition<3,double, CudaMemory, memory_traits_inte, BoxDistribution<3,double> > Dec; + +typedef sgrid_dist_id_gpu<3,double,aggregate<double,double,double,double>, CudaMemory,Dec > sgrid_type; + +void draw_oscillation_shock(sgrid_type & grid, Box<3,double> & domain) +{ + auto it = grid.getGridIterator(); + Point<3,double> p({1.25,1.25,1.25}); + + +// Point<3,double> u({1.0,0.0,0.0}); +// Box<3,double> channel_box(p3,p1); + + double spacing_x = grid.spacing(0); + double spacing_y = grid.spacing(1); + double spacing_z = grid.spacing(2); + + typedef typename GetAddBlockType<sgrid_type>::type InsertBlockT; + + // Draw a shock expanding from 0.4 to 0.8 and than contracting from 0.8 to 0.4 + for (int i = 0 ; i < 100 ; i++) + { + Sphere<3,double> sph(p,0.2 + (double)i/160.0); + Sphere<3,double> sph2(p,0.4 + (double)i/160.0); + + Box<3,size_t> bx; + + for (int j = 0 ; j < 3 ; j++) + { + bx.setLow(j,(size_t)((sph.center(j) - 0.4 - (double)i/160.0)/grid.spacing(j))); + bx.setHigh(j,(size_t)((sph.center(j) + 0.4 + (double)i/160.0)/grid.spacing(j))); + } + + timer t_add; + t_add.start(); + + grid.addPoints(bx.getKP1(),bx.getKP2(),[spacing_x,spacing_y,spacing_z,sph,sph2] __device__ (int i, int j, int k) + { + Point<3,double> pc({i*spacing_x,j*spacing_y,k*spacing_z}); + + // Check if the point is in the domain + if (sph2.isInside(pc) ) + { + if (sph.isInside(pc) == false) + {return true;} + } + + return false; + }, + [] __device__ (InsertBlockT & data, int i, int j, int k) + { + data.template get<U>() = 1.0; + data.template get<V>() = 0.0; + } + ); + + t_add.stop(); + + timer t_flush; + t_flush.start(); + grid.template flush<smax_<U>,smax_<V>>(flush_type::FLUSH_ON_DEVICE); + t_flush.stop(); + + timer t_ghost; + t_ghost.start(); + grid.template ghost_get<U,V>(RUN_ON_DEVICE); + t_ghost.stop(); + timer t_ghost2; + t_ghost2.start(); + grid.template ghost_get<U,V>(RUN_ON_DEVICE | SKIP_LABELLING); + t_ghost2.stop(); + std::cout << t_ghost.getwct() << std::endl; + + std::cout << "TIME ghost1: " << t_ghost.getwct() << " ghost2: " << t_ghost2.getwct() << " flush: " << t_flush.getwct() << " " << std::endl; + + + grid.removeUnusedBuffers(); + + } + + std::cout << "Second Pass" <<std::endl; + + for (int i = 0 ; i < 100 ; i++) + { + Sphere<3,double> sph(p,0.2 + (double)i/160.0); + Sphere<3,double> sph2(p,0.4 + (double)i/160.0); + + Box<3,size_t> bx; + + for (int j = 0 ; j < 3 ; j++) + { + bx.setLow(j,(size_t)((sph.center(j) - 0.4 - (double)i/160.0)/grid.spacing(j))); + bx.setHigh(j,(size_t)((sph.center(j) + 0.4 + (double)i/160.0)/grid.spacing(j))); + } + + timer t_add; + t_add.start(); + + grid.addPoints(bx.getKP1(),bx.getKP2(),[spacing_x,spacing_y,spacing_z,sph,sph2] __device__ (int i, int j, int k) + { + Point<3,double> pc({i*spacing_x,j*spacing_y,k*spacing_z}); + + // Check if the point is in the domain + if (sph2.isInside(pc) ) + { + if (sph.isInside(pc) == false) + {return true;} + } + + return false; + }, + [] __device__ (InsertBlockT & data, int i, int j, int k) + { + data.template get<U>() = 1.0; + data.template get<V>() = 0.0; + } + ); + + t_add.stop(); + + + timer t_flush; + t_flush.start(); + grid.template flush<smax_<U>,smax_<V>>(flush_type::FLUSH_ON_DEVICE); + t_flush.stop(); +// grid.removeUnusedBuffers(); + + + timer t_ghost; + t_ghost.start(); + grid.template ghost_get<U,V>(RUN_ON_DEVICE); + t_ghost.stop(); + timer t_ghost2; + t_ghost2.start(); + grid.template ghost_get<U,V>(RUN_ON_DEVICE | SKIP_LABELLING); + t_ghost2.stop(); + + std::cout << "TIME ghost1: " << t_ghost.getwct() << " ghost2: " << t_ghost2.getwct() << " flush: " << t_flush.getwct() << " " << std::endl; + +// if (i % 10 == 0) +// { +// grid.template deviceToHost<U,V>(); +// grid.write_frame("Final",i); +// } + } + +} + + +int main(int argc, char* argv[]) +{ + openfpm_init(&argc,&argv); + + // domain + Box<3,double> domain({0.0,0.0,0.0},{2.5,2.5,2.5}); + + // grid size + size_t sz[3] = {384,384,384}; + + // Define periodicity of the grid + periodicity<3> bc = {NON_PERIODIC,NON_PERIODIC,NON_PERIODIC}; + + // Ghost in grid unit + Ghost<3,long int> g(1); + + // deltaT + double deltaT = 0.025; + + // Diffusion constant for specie U + double du = 2*1e-5; + + // Diffusion constant for specie V + double dv = 1*1e-5; + +#ifdef TEST_RUN + // Number of timesteps + size_t timeSteps = 300; +#else + // Number of timesteps + size_t timeSteps = 50000; +#endif + + // K and F (Physical constant in the equation) + double K = 0.053; + double F = 0.014; + + grid_sm<3,void> gv({3,1,1}); + + sgrid_type grid(sz,domain,g,bc,0,gv); + + grid.template setBackgroundValue<0>(-0.5); + grid.template setBackgroundValue<1>(-0.5); + grid.template setBackgroundValue<2>(-0.5); + grid.template setBackgroundValue<3>(-0.5); + + // spacing of the grid on x and y + double spacing[3] = {grid.spacing(0),grid.spacing(1),grid.spacing(2)}; + + draw_oscillation_shock(grid,domain); + + grid.template deviceToHost<U,V>(); + grid.write("Final"); + + //! \cond [time stepping] \endcond + + /*! + * \page Grid_3_gs_3D_sparse_gpu_cs Gray Scott in 3D using sparse grids on gpu in complex geometry + * + * ## Finalize ## + * + * Deinitialize the library + * + * \snippet SparseGrid/1_gray_scott_3d_sparse_gpu_cs/main.cu finalize + * + */ + + //! \cond [finalize] \endcond + + openfpm_finalize(); + + //! \cond [finalize] \endcond + + /*! + * \page Grid_3_gs_3D_sparse_gpu_cs Gray Scott in 3D using sparse grids on gpu in complex geometry + * + * # Full code # {#code} + * + * \include SparseGrid/1_gray_scott_3d_sparse_gpu_cs/main.cu + * + */ +} + +#else + +int main(int argc, char* argv[]) +{ + return 0; +} + +#endif + diff --git a/example/SparseGrid/8_filling_benchmark/Makefile b/example/SparseGrid/8_filling_benchmark/Makefile new file mode 100644 index 000000000..5bcf5675e --- /dev/null +++ b/example/SparseGrid/8_filling_benchmark/Makefile @@ -0,0 +1,26 @@ +include ../../example.mk + +CC=mpic++ + +LDIR = + +OBJ = main.o +gray_scott_sparse_opt_test: OPT += -DTEST_RUN +gray_scott_sparse_opt_test: gray_scott_sparse_opt + +%.o: %.cpp + $(CC) -mavx -O3 -g $(OPT) -c --std=c++11 -o $@ $< $(INCLUDE_PATH) + +gray_scott_sparse_opt: $(OBJ) + $(CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS) + +all: gray_scott_sparse_opt + +run: gray_scott_sparse_opt_test + mpirun -np 4 ./gray_scott_sparse_opt + +.PHONY: clean all run + +clean: + rm -f *.o *~ core gray_scott_sparse_opt + diff --git a/example/SparseGrid/8_filling_benchmark/config.cfg b/example/SparseGrid/8_filling_benchmark/config.cfg new file mode 100644 index 000000000..1eecbac35 --- /dev/null +++ b/example/SparseGrid/8_filling_benchmark/config.cfg @@ -0,0 +1,2 @@ +[pack] +files = main.cpp Makefile diff --git a/example/SparseGrid/8_filling_benchmark/main.cpp b/example/SparseGrid/8_filling_benchmark/main.cpp new file mode 100644 index 000000000..ba0e077f0 --- /dev/null +++ b/example/SparseGrid/8_filling_benchmark/main.cpp @@ -0,0 +1,226 @@ + +#include "util/cuda/cuda_launch.hpp" +#include "Grid/grid_dist_id.hpp" +#include "data_type/aggregate.hpp" +#include "timer.hpp" + +/*! + * + * \page Grid_3_gs_3D_sparse_opt Gray Scott in 3D using sparse grids optimized on CPU + * + * [TOC] + * + * # Solving a gray scott-system in 3D using sparse grids optimized on CPU # {#e3_gs_gray_scott_opt} + * + * This example show how to solve a Gray-Scott system in 3D using sparse grids in an optimized way + * + * In figure is the final solution of the problem + * + * \htmlonly + * <img src="http://ppmcore.mpi-cbg.de/web/images/examples/gray_scott_3d/gs_alpha.png"/> + * \endhtmlonly + * + * More or less this example is the adaptation of the dense example in 3D + * + * \see \ref Grid_3_gs_3D + * + * This example is the same as \ref e3_gs_gray_scott_sparse the difference is optimizing for speed. + * + * Two optimization has been done. The first is to change the layout to struct of arrays defining the grid with + * + * \snippet SparseGrid/1_gray_scott_3d_sparse_opt/main.cpp grid definition + * + * The second is using the function **conv_cross2** to calculate the right-hand-side + * this function can be used to do a convolution that involve points in a cross stencil like in figure that involve + * two properties + * +\verbatim + + * + * + * * x * * + * + * + +\endverbatim + * + * The function accept a lambda function where the first 2 arguments are the output in form of Vc::double_v. If we use float + * we have to use Vc::float_v or Vc::int_v in case the property is an integer. Vc variables come from the Vc library that is + * now integrated in openfpm. + * + *\htmlonly + * <a href="https://github.com/VcDevel/Vc" >Vc Library</a> + *\endhtmlonly + * + * Vc::double_v in general pack 1,2,4 doubles dependently from the fact we choose to activate no-SSE,SSE or AVX at compiler level. + * The arguments 3 and 4 contain the properties of two selected properties in the cross pattern given by xm xp ym yp zm zp. + * The last arguments is instead the mask. The mask can be accessed to check the number of existing points. For example if + * we have a cross stencil in 3D with stencil size = 1 than we expect 6 points. Note that the mask is an array because if Vc::double_v + * contain 4 doubles than the mask has 4 elements accessed with the array operator []. The call **cross_conv2** also accept + * template parameters the first two indicate the source porperties, the other two are the destination properties. While the + * last is the extension of the stencil. In this case we use 1. + * + * The lambda function is defined as + * + * \snippet SparseGrid/1_gray_scott_3d_sparse_opt/main.cpp lambda + * + * and used in the body loop + * + * \snippet SparseGrid/1_gray_scott_3d_sparse_opt/main.cpp body + * + * To note that instead of copy we split the properties where we are acting at every iteration + * + */ + +constexpr int U = 0; +constexpr int V = 1; + +constexpr int U_next = 2; +constexpr int V_next = 3; + +constexpr int x = 0; +constexpr int y = 1; +constexpr int z = 2; + +void init(sgrid_dist_soa<3,double,aggregate<double,double,double,double> > & grid, Box<3,double> & domain) +{ + for (int i = 0 ; i < 10 ; i++) + { + timer t; + t.start(); + + auto it = grid.getGridIterator(); + + while (it.isNext()) + { + // Get the local grid key + auto key = it.get_dist(); + + // Old values U and V + grid.template insert<U>(key) = 1.0; + + ++it; + } + + t.stop(); + std::cout << "Time populate: " << t.getwct() << std::endl; + + grid.clear(); + + timer t2; + t2.start(); + + auto it2 = grid.getGridIterator(); + + while (it2.isNext()) + { + // Get the local grid key + auto key = it2.get_dist(); + + // Old values U and V + grid.template insert<U>(key) = 5.0; + + ++it2; + } + + t2.stop(); + std::cout << "Time populate: " << t2.getwct() << std::endl; + + } +} + + +int main(int argc, char* argv[]) +{ + openfpm_init(&argc,&argv); + + // domain + Box<3,double> domain({0.0,0.0,0.0},{2.5,2.5,2.5}); + + // grid size + size_t sz[3] = {256,256,256}; + + // Define periodicity of the grid + periodicity<3> bc = {PERIODIC,PERIODIC,PERIODIC}; + + // Ghost in grid unit + Ghost<3,long int> g(1); + + // deltaT + double deltaT = 0.25; + + // Diffusion constant for specie U + double du = 2*1e-5; + + // Diffusion constant for specie V + double dv = 1*1e-5; + + // Number of timesteps +#ifdef TEST_RUN + size_t timeSteps = 200; +#else + size_t timeSteps = 5000; +#endif + + // K and F (Physical constant in the equation) + double K = 0.053; + double F = 0.014; + + //! \cond [grid definition] \endcond + + sgrid_dist_soa<3, double, aggregate<double,double,double,double>> grid(sz,domain,g,bc); + + //! \cond [grid definition] \endcond + + // spacing of the grid on x and y + double spacing[3] = {grid.spacing(0),grid.spacing(1),grid.spacing(2)}; + + init(grid,domain); + + // sync the ghost + size_t count = 0; + grid.template ghost_get<U,V>(); + + // because we assume that spacing[x] == spacing[y] we use formula 2 + // and we calculate the prefactor of Eq 2 + double uFactor = deltaT * du/(spacing[x]*spacing[x]); + double vFactor = deltaT * dv/(spacing[x]*spacing[x]); + + timer tot_sim; + tot_sim.start(); + + auto & v_cl = create_vcluster(); + + tot_sim.stop(); + std::cout << "Total simulation: " << tot_sim.getwct() << std::endl; + + grid.write("final"); + + //! \cond [time stepping] \endcond + + /*! + * \page Grid_3_gs_3D_sparse_opt Gray Scott in 3D using sparse grids optimized on CPU + * + * ## Finalize ## + * + * Deinitialize the library + * + * \snippet SparseGrid/1_gray_scott_3d_sparse_opt/main.cpp finalize + * + */ + + //! \cond [finalize] \endcond + + openfpm_finalize(); + + //! \cond [finalize] \endcond + + /*! + * \page Grid_3_gs_3D_sparse_opt Gray Scott in 3D using sparse grids optimized on CPU + * + * # Full code # {#code} + * + * \include SparseGrid/1_gray_scott_3d_sparse_opt/main.cpp + * + */ +} diff --git a/example/SparseGrid/8_filling_benchmark_gpu/Makefile b/example/SparseGrid/8_filling_benchmark_gpu/Makefile new file mode 100644 index 000000000..450a40d1c --- /dev/null +++ b/example/SparseGrid/8_filling_benchmark_gpu/Makefile @@ -0,0 +1,39 @@ +include ../../example.mk + +### internally the example disable with the preprocessor its code if not compiled with nvcc +CUDA_CC= +CUDA_CC_LINK= +ifeq (, $(shell which nvcc)) + CUDA_CC=mpic++ -x c++ $(INCLUDE_PATH) + INCLUDE_PATH_NVCC= + CUDA_CC_LINK=mpic++ +else + CUDA_CC=nvcc -ccbin=mpic++ + CUDA_CC_LINK=nvcc -ccbin=mpic++ +endif + +gray_scott_sparse_gpu_test: OPT += -DTEST_RUN +gray_scott_sparse_gpu_test: gray_scott_sparse_gpu + +CC=mpic++ + +LDIR = + +OBJ = main.o + +%.o: %.cu + $(CUDA_CC) -use_fast_math -O3 $(OPT) -g -c --std=c++14 -o $@ $< $(INCLUDE_PATH_NVCC) + +gray_scott_sparse_gpu: $(OBJ) + $(CUDA_CC_LINK) -o $@ $^ $(LIBS_PATH) $(LIBS) + +all: gray_scott_sparse_gpu + +run: gray_scott_sparse_gpu_test + mpirun -np 4 ./gray_scott_sparse_gpu + +.PHONY: clean all run + +clean: + rm -f *.o *~ core gray_scott_sparse_gpu + diff --git a/example/SparseGrid/8_filling_benchmark_gpu/config.cfg b/example/SparseGrid/8_filling_benchmark_gpu/config.cfg new file mode 100644 index 000000000..699be429e --- /dev/null +++ b/example/SparseGrid/8_filling_benchmark_gpu/config.cfg @@ -0,0 +1,2 @@ +[pack] +files = main.cu Makefile diff --git a/example/SparseGrid/8_filling_benchmark_gpu/main.cu b/example/SparseGrid/8_filling_benchmark_gpu/main.cu new file mode 100644 index 000000000..524b5790f --- /dev/null +++ b/example/SparseGrid/8_filling_benchmark_gpu/main.cu @@ -0,0 +1,221 @@ +#define VCLUSTER_PERF_REPORT +#define SYNC_BEFORE_TAKE_TIME +#define ENABLE_GRID_DIST_ID_PERF_STATS +#include "Decomposition/Distribution/BoxDistribution.hpp" +#include "util/cuda/cuda_launch.hpp" +#include "Grid/grid_dist_id.hpp" +#include "data_type/aggregate.hpp" +#include "timer.hpp" + +/*! + * + * \page Grid_3_gs_3D_sparse_gpu Gray Scott in 3D using sparse grids on GPU + * + * [TOC] + * + * # Solving a gray scott-system in 3D using Sparse grids on gpu # {#e3_gs_gray_scott_gpu} + * + * This example show how to solve a Gray-Scott system in 3D using sparse grids on gpu + * + * In figure is the final solution of the problem + * + * \htmlonly + * <img src="http://ppmcore.mpi-cbg.de/web/images/examples/gray_scott_3d/gs_alpha.png"/> + * \endhtmlonly + * + * More or less this example is the adaptation of the dense example in 3D + * + * \see \ref Grid_3_gs_3D + * + * # Initializetion + * + * On gpu we can add points using the function addPoints this function take 2 lamda functions the first take 3 arguments (in 3D) + * i,j,k these are the global coordinates for a point. We can return either true either false. In case of true the point is + * created in case of false the point is not inserted. The second lamda is instead used to initialize the point inserted. + * The arguments of the second lambda are the data argument we use to initialize the point and the global coordinates i,j,k + * + * After we add the points we have to flush the added points. This us achieved using the function flush the template parameters + * indicate how we have to act on the points. Consider infact we are adding points already exist ... do we have to add it using the max + * or the min. **FLUSH_ON_DEVICE** say instead that the operation is performed using the GPU + * + * \snippet SparseGrid/1_gray_scott_3d_sparse_gpu/main.cu create points + * + * The function can also called with a specified range + * + * \snippet SparseGrid/1_gray_scott_3d_sparse_gpu/main.cu create points sub + * + * # Update + * + * to calculate the right-hand-side we use the function **conv2** this function can be used to do a convolution that involve + * two properties + * + * The function accept a lambda function where the first 2 arguments are the output of the same type of the two property choosen. + * + * The arguments 3 and 4 contain the properties of two selected properties. while i,j,k are the coordinates we have to calculate the + * convolution. The call **conv2** also accept template parameters the first two indicate the source porperties, the other two are the destination properties. While the + * last is the extension of the stencil. In this case we use 1. + * + * The lambda function is defined as + * + * \snippet SparseGrid/1_gray_scott_3d_sparse_gpu/main.cu lambda + * + * and used in the body loop + * + * \snippet SparseGrid/1_gray_scott_3d_sparse_gpu/main.cu body + * + */ + +#ifdef __NVCC__ + +constexpr int U = 0; +constexpr int V = 1; + +constexpr int U_next = 2; +constexpr int V_next = 3; + +constexpr int x = 0; +constexpr int y = 1; +constexpr int z = 2; + +typedef CartDecomposition<3,float, CudaMemory, memory_traits_inte, BoxDistribution<3,float> > Dec; + +typedef sgrid_dist_id_gpu<3,float,aggregate<float>,CudaMemory, Dec> SparseGridType; + +void init(SparseGridType & grid, Box<3,float> & domain) +{ + //! \cond [create points] \endcond + + typedef typename GetAddBlockType<SparseGridType>::type InsertBlockT; + + for (int i = 0 ; i < 10 ; i++) + { + timer t; + t.start(); + + grid.addPoints([] __device__ (int i, int j, int k) + { + return true; + }, + [] __device__ (InsertBlockT & data, int i, int j, int k) + { + data.template get<U>() = 1.0; + } + ); + + + grid.template flush<smax_<U>>(flush_type::FLUSH_ON_DEVICE); + + t.stop(); + + std::cout << "Time populate: " << t.getwct() << std::endl; + + timer t2; + cudaDeviceSynchronize(); + t2.start(); + + grid.addPoints([] __device__ (int i, int j, int k) + { + return true; + }, + [] __device__ (InsertBlockT & data, int i, int j, int k) + { + data.template get<U>() = 5.0; + } + ); + + + grid.template flush<sRight_<U>>(flush_type::FLUSH_ON_DEVICE); + + t2.stop(); + + std::cout << "Time populate: " << t2.getwct() << std::endl; + } +} + + +int main(int argc, char* argv[]) +{ + openfpm_init(&argc,&argv); + + // domain + Box<3,float> domain({0.0,0.0,0.0},{2.5,2.5,2.5}); + + // grid size + size_t sz[3] = {512,512,512}; + + // Define periodicity of the grid + periodicity<3> bc = {PERIODIC,PERIODIC,PERIODIC}; + + // Ghost in grid unit + Ghost<3,long int> g(1); + + // deltaT + float deltaT = 0.25; + + // Diffusion constant for specie U + float du = 2*1e-5; + + // Diffusion constant for specie V + float dv = 1*1e-5; + + // Number of timesteps +#ifdef TEST_RUN + size_t timeSteps = 300; +#else + size_t timeSteps = 15000; +#endif + + // K and F (Physical constant in the equation) + float K = 0.053; + float F = 0.014; + + SparseGridType grid(sz,domain,g,bc); + + // spacing of the grid on x and y + float spacing[3] = {grid.spacing(0),grid.spacing(1),grid.spacing(2)}; + + init(grid,domain); + + // sync the ghost + grid.deviceToHost<U>(); + grid.write("final"); + grid.print_stats(); + + //! \cond [time stepping] \endcond + + /*! + * \page Grid_3_gs_3D_sparse Gray Scott in 3D + * + * ## Finalize ## + * + * Deinitialize the library + * + * \snippet Grid/3_gray_scott/main.cpp finalize + * + */ + + //! \cond [finalize] \endcond + + openfpm_finalize(); + + //! \cond [finalize] \endcond + + /*! + * \page Grid_3_gs_3D_sparse Gray Scott in 3D + * + * # Full code # {#code} + * + * \include Grid/3_gray_scott_3d/main.cpp + * + */ +} + +#else + +int main(int argc, char* argv[]) +{ + return 0; +} + +#endif + diff --git a/example/Vector/1_gpu_first_step/Makefile b/example/Vector/1_gpu_first_step/Makefile index 1599114c9..30dbf24e5 100644 --- a/example/Vector/1_gpu_first_step/Makefile +++ b/example/Vector/1_gpu_first_step/Makefile @@ -4,6 +4,8 @@ CUDA_CC= ifdef CUDA_ON_CPU CUDA_CC=mpic++ -x c++ $(INCLUDE_PATH) INCLUDE_PATH_NVCC= + CUDA_OPTIONS=-DCUDA_ON_CPU -D__NVCC__ -DCUDART_VERSION=11000 + LIBS_SELECT=$(LIBS_CUDA_ON_CPU) else ifeq (, $(shell which nvcc)) CUDA_CC=mpic++ -x c++ $(INCLUDE_PATH) @@ -11,6 +13,7 @@ else else CUDA_CC=nvcc -ccbin=mpic++ endif + LIBS_SELECT=$(LIBS) endif @@ -21,13 +24,13 @@ OBJ = main.o gpu_fstep: %.o: %.cu - $(CUDA_CC) -O3 -g -c --std=c++11 -o $@ $< $(INCLUDE_PATH_NVCC) + $(CUDA_CC) -O3 -g $(CUDA_OPTIONS) -c --std=c++14 -o $@ $< $(INCLUDE_PATH_NVCC) %.o: %.cpp $(CC) -O3 $(OPT) -g -c --std=c++11 -o $@ $< $(INCLUDE_PATH) gpu_fstep: $(OBJ) - $(CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS) + $(CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS_SELECT) all: gpu_fstep diff --git a/example/Vector/1_gpu_first_step/main.cu b/example/Vector/1_gpu_first_step/main.cu index 0bb965e80..30a69fcde 100644 --- a/example/Vector/1_gpu_first_step/main.cu +++ b/example/Vector/1_gpu_first_step/main.cu @@ -99,6 +99,21 @@ * \snippet Vector/1_gpu_first_step/main.cu using_openmpi * * * MPI must be compiled with CUDA support (in general installing OpenFPM with -g should attempt to install OpenMPI with CUDA support) + * + * ## Macro CUDA_LAUNCH + * + * When we want to launch a kernel "my_kernel" on CUDA we in general use the Nvidia CUDA syntax + * + * my_kernel<<<wthr,thr>>>(arguments ... ) + * + * Where wthr is the number of workgroups and thr is the number of threads in a workgroup and arguments... are the arguments to pass to the kernel. + * Equivalently we can launch a kernel with the macro CUDA_LAUNCH_DIM3(my_kernel,wthr,thr,arguments...) or CUDA_LAUNCH(my_kernel,ite,arguments) where + * ite has been taken using getDomainIteratorGPU. There are several advantage on using CUDA_LAUNCH. The first advantage in using the macro is enabling SE_CLASS1 + * all kernel launch become synchronous and an error check is performed before continue to the next kernel making debugging easier. Another feature is the possibility + * to run CUDA code on CPU without a GPU. compiling with "CUDA_ON_CPU=1 make" (Note openfpm must be compiled with GPU support (-g) or with CUDA_ON_CPU support + * (-c "... --enable_cuda_on_cpu"). You can compile this example on CPU. You do not have to change a single line of code for this example. (Check the video to see this + * feature in action). All the openfpm GPU example and CUDA example can run on CPU if they use CUDA_LAUNCH as macro. We are planning to support + * AMD GPUs as well using this system. * * ## Full code ## {#code_e0_sim} * @@ -211,7 +226,8 @@ int main(int argc, char* argv[]) //! \cond [launch_domain_it] \endcond auto ite = vd.getDomainIteratorGPU(); - translate_fill_prop<<<ite.wthr,ite.thr>>>(vd.toKernel()); + // translate_fill_prop<<<ite.wthr,ite.thr>>>(vd.toKernel()); + CUDA_LAUNCH(translate_fill_prop,ite,vd.toKernel()); //! \cond [launch_domain_it] \endcond @@ -230,7 +246,8 @@ int main(int argc, char* argv[]) for (int j = 0 ; j < 100 ; j++) { auto ite = vd.getDomainIteratorGPU(); - translate_fill_prop<<<ite.wthr,ite.thr>>>(vd.toKernel()); + // translate_fill_prop<<<ite.wthr,ite.thr>>>(vd.toKernel()); + CUDA_LAUNCH(translate_fill_prop,ite,vd.toKernel()); vd.map(RUN_ON_DEVICE); vd.template ghost_get<0,1,2>(RUN_ON_DEVICE); diff --git a/example/Vector/7_SPH_dlb_gpu/Makefile b/example/Vector/7_SPH_dlb_gpu/Makefile index 907d4ccd1..374603b68 100644 --- a/example/Vector/7_SPH_dlb_gpu/Makefile +++ b/example/Vector/7_SPH_dlb_gpu/Makefile @@ -9,6 +9,8 @@ ifdef CUDA_ON_CPU CUDA_CC=mpic++ -x c++ $(INCLUDE_PATH) INCLUDE_PATH_NVCC= CUDA_CC_LINK=mpic++ + CUDA_OPTIONS=-DCUDA_ON_CPU -D__NVCC__ -DCUDART_VERSION=11000 + LIBS_SELECT=$(LIBS_CUDA_ON_CPU) else ifeq (, $(shell which nvcc)) CUDA_CC=mpic++ -x c++ $(INCLUDE_PATH) @@ -18,6 +20,7 @@ else CUDA_CC=nvcc -ccbin=mpic++ CUDA_CC_LINK=nvcc -ccbin=mpic++ endif + LIBS_SELECT=$(LIBS) endif @@ -33,13 +36,13 @@ sph_dlb_test: OPT += -DTEST_RUN sph_dlb_test: sph_dlb %.o: %.cu - $(CUDA_CC) -O3 $(OPT) -g -c --std=c++14 -o $@ $< $(INCLUDE_PATH_NVCC) + $(CUDA_CC) $(CUDA_OPTIONS) -O3 $(OPT) -g -c --std=c++14 -o $@ $< $(INCLUDE_PATH_NVCC) %.o: %.cpp $(CC) -O3 $(OPT) -g -c --std=c++14 -o $@ $< $(INCLUDE_PATH) sph_dlb: $(OBJ) - $(CUDA_CC_LINK) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS) + $(CUDA_CC_LINK) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS_SELECT) all: sph_dlb diff --git a/example/Vector/7_SPH_dlb_gpu/main.cu b/example/Vector/7_SPH_dlb_gpu/main.cu index 08d91fbe2..50735f75d 100644 --- a/example/Vector/7_SPH_dlb_gpu/main.cu +++ b/example/Vector/7_SPH_dlb_gpu/main.cu @@ -39,6 +39,21 @@ * * \snippet Vector/7_SPH_dlb_gpu/main.cu mark_to_remove_kernel * + * ## Macro CUDA_LAUNCH + * + * When we want to launch a kernel "my_kernel" on CUDA we in general use the Nvidia CUDA syntax + * + * my_kernel<<<wthr,thr>>>(arguments ... ) + * + * Where wthr is the number of workgroups and thr is the number of threads in a workgroup and arguments... are the arguments to pass to the kernel. + * Equivalently we can launch a kernel with the macro CUDA_LAUNCH_DIM3(my_kernel,wthr,thr,arguments...) or CUDA_LAUNCH(my_kernel,ite,arguments) where + * ite has been taken using getDomainIteratorGPU. There are several advantage on using CUDA_LAUNCH. The first advantage in using the macro is enabling SE_CLASS1 + * all kernel launch become synchronous and an error check is performed before continue to the next kernel making debugging easier. Another feature is the possibility + * to run CUDA code on CPU without a GPU. compiling with "CUDA_ON_CPU=1 make" (Note openfpm must be compiled with GPU support (-g) or with CUDA_ON_CPU support + * (-c "... --enable_cuda_on_cpu"). You can compile this example on CPU. You do not have to change a single line of code for this example. (Check the video to see this + * feature in action). All the openfpm GPU example and CUDA example can run on CPU if they use CUDA_LAUNCH as macro. We are planning to support + * AMD GPUs as well using this system. + * * \include Vector/7_SPH_dlb_gpu_opt/main.cu * */ @@ -195,7 +210,10 @@ inline void EqState(particles & vd) { auto it = vd.getDomainIteratorGPU(); - EqState_gpu<<<it.wthr,it.thr>>>(vd.toKernel(),B); + // You can use standard CUDA kernel launch or the macro CUDA_LAUNCH + + //EqState_gpuning<<<it.wthr,it.thr>>>(vd.toKernel(),B); + CUDA_LAUNCH(EqState_gpu,it,vd.toKernel(),B) } @@ -301,16 +319,16 @@ __global__ void calc_forces_gpu(particles_type vd, NN_type NN, real_number W_dap Point<3,real_number> xa = vd.getPos(a); // Take the mass of the particle dependently if it is FLUID or BOUNDARY - real_number massa = (vd.getProp<type>(a) == FLUID)?MassFluid:MassBound; + real_number massa = (vd.template getProp<type>(a) == FLUID)?MassFluid:MassBound; // Get the density of the of the particle a - real_number rhoa = vd.getProp<rho>(a); + real_number rhoa = vd.template getProp<rho>(a); // Get the pressure of the particle a - real_number Pa = vd.getProp<Pressure>(a); + real_number Pa = vd.template getProp<Pressure>(a); // Get the Velocity of the particle a - Point<3,real_number> va = vd.getProp<velocity>(a); + Point<3,real_number> va = vd.template getProp<velocity>(a); // Reset the force counter (- gravity on zeta direction) vd.template getProp<force>(a)[0] = 0.0; @@ -319,7 +337,7 @@ __global__ void calc_forces_gpu(particles_type vd, NN_type NN, real_number W_dap vd.template getProp<drho>(a) = 0.0; // We threat FLUID particle differently from BOUNDARY PARTICLES ... - if (vd.getProp<type>(a) != FLUID) + if (vd.template getProp<type>(a) != FLUID) { // If it is a boundary particle calculate the delta rho based on equation 2 @@ -339,14 +357,14 @@ __global__ void calc_forces_gpu(particles_type vd, NN_type NN, real_number W_dap if (a == b) {++Np; continue;}; // get the mass of the particle - real_number massb = (vd.getProp<type>(b) == FLUID)?MassFluid:MassBound; + real_number massb = (vd.template getProp<type>(b) == FLUID)?MassFluid:MassBound; // Get the velocity of the particle b - Point<3,real_number> vb = vd.getProp<velocity>(b); + Point<3,real_number> vb = vd.template getProp<velocity>(b); // Get the pressure and density of particle b - real_number Pb = vd.getProp<Pressure>(b); - real_number rhob = vd.getProp<rho>(b); + real_number Pb = vd.template getProp<Pressure>(b); + real_number rhob = vd.template getProp<rho>(b); // Get the distance between p and q Point<3,real_number> dr = xa - xb; @@ -374,7 +392,7 @@ __global__ void calc_forces_gpu(particles_type vd, NN_type NN, real_number W_dap ++Np; } - vd.getProp<red>(a) = max_visc; + vd.template getProp<red>(a) = max_visc; } else { @@ -395,10 +413,10 @@ __global__ void calc_forces_gpu(particles_type vd, NN_type NN, real_number W_dap // if (p == q) skip this particle if (a == b) {++Np; continue;}; - real_number massb = (vd.getProp<type>(b) == FLUID)?MassFluid:MassBound; - Point<3,real_number> vb = vd.getProp<velocity>(b); - real_number Pb = vd.getProp<Pressure>(b); - real_number rhob = vd.getProp<rho>(b); + real_number massb = (vd.template getProp<type>(b) == FLUID)?MassFluid:MassBound; + Point<3,real_number> vb = vd.template getProp<velocity>(b); + real_number Pb = vd.template getProp<Pressure>(b); + real_number rhob = vd.template getProp<rho>(b); // Get the distance between p and q Point<3,real_number> dr = xa - xb; @@ -415,7 +433,7 @@ __global__ void calc_forces_gpu(particles_type vd, NN_type NN, real_number W_dap Point<3,real_number> DW; DWab(dr,DW,r,false); - real_number factor = - massb*((vd.getProp<Pressure>(a) + vd.getProp<Pressure>(b)) / (rhoa * rhob) + Tensile(r,rhoa,rhob,Pa,Pb,W_dap) + Pi(dr,r2,v_rel,rhoa,rhob,massb,cbar,max_visc)); + real_number factor = - massb*((vd.template getProp<Pressure>(a) + vd.template getProp<Pressure>(b)) / (rhoa * rhob) + Tensile(r,rhoa,rhob,Pa,Pb,W_dap) + Pi(dr,r2,v_rel,rhoa,rhob,massb,cbar,max_visc)); vd.template getProp<force>(a)[0] += factor * DW.get(0); vd.template getProp<force>(a)[1] += factor * DW.get(1); @@ -427,7 +445,7 @@ __global__ void calc_forces_gpu(particles_type vd, NN_type NN, real_number W_dap ++Np; } - vd.getProp<red>(a) = max_visc; + vd.template getProp<red>(a) = max_visc; } } @@ -438,7 +456,8 @@ template<typename CellList> inline void calc_forces(particles & vd, CellList & N // Update the cell-list vd.updateCellList(NN); - calc_forces_gpu<<<part.wthr,part.thr>>>(vd.toKernel(),NN.toKernel(),W_dap,cbar); + //calc_forces_gpu<<<part.wthr,part.thr>>>(vd.toKernel(),NN.toKernel(),W_dap,cbar); + CUDA_LAUNCH(calc_forces_gpu,part,vd.toKernel(),NN.toKernel(),W_dap,cbar) max_visc = reduce_local<red,_max_>(vd); } @@ -448,11 +467,11 @@ __global__ void max_acceleration_and_velocity_gpu(vector_type vd) { auto a = GET_PARTICLE(vd); - Point<3,real_number> acc(vd.getProp<force>(a)); - vd.getProp<red>(a) = norm(acc); + Point<3,real_number> acc(vd.template getProp<force>(a)); + vd.template getProp<red>(a) = norm(acc); - Point<3,real_number> vel(vd.getProp<velocity>(a)); - vd.getProp<red2>(a) = norm(vel); + Point<3,real_number> vel(vd.template getProp<velocity>(a)); + vd.template getProp<red2>(a) = norm(vel); } void max_acceleration_and_velocity(particles & vd, real_number & max_acc, real_number & max_vel) @@ -460,7 +479,8 @@ void max_acceleration_and_velocity(particles & vd, real_number & max_acc, real_n // Calculate the maximum acceleration auto part = vd.getDomainIteratorGPU(); - max_acceleration_and_velocity_gpu<<<part.wthr,part.thr>>>(vd.toKernel()); + // max_acceleration_and_velocity_gpu<<<part.wthr,part.thr>>>(vd.toKernel()); + CUDA_LAUNCH(max_acceleration_and_velocity_gpu,part,vd.toKernel()); max_acc = reduce_local<red,_max_>(vd); max_vel = reduce_local<red2,_max_>(vd); @@ -566,7 +586,8 @@ void verlet_int(particles & vd, real_number dt) real_number dt205 = dt*dt*0.5; real_number dt2 = dt*2.0; - verlet_int_gpu<<<part.wthr,part.thr>>>(vd.toKernel(),dt,dt2,dt205); + // verlet_int_gpu<<<part.wthr,part.thr>>>(vd.toKernel(),dt,dt2,dt205); + CUDA_LAUNCH(verlet_int_gpu,part,vd.toKernel(),dt,dt2,dt205); //! \cond [remove_marked_part] \endcond @@ -646,7 +667,8 @@ void euler_int(particles & vd, real_number dt) real_number dt205 = dt*dt*0.5; - euler_int_gpu<<<part.wthr,part.thr>>>(vd.toKernel(),dt,dt205); + // euler_int_gpu<<<part.wthr,part.thr>>>(vd.toKernel(),dt,dt205); + CUDA_LAUNCH(euler_int_gpu,part,vd.toKernel(),dt,dt205); // remove the particles remove_marked<red>(vd); @@ -722,7 +744,8 @@ inline void sensor_pressure(Vector & vd, // if the probe is inside the processor domain if (vd.getDecomposition().isLocal(probes.get(i)) == true) { - sensor_pressure_gpu<<<1,1>>>(vd.toKernel(),NN.toKernel(),probes.get(i),(real_number *)press_tmp_.toKernel()); + // sensor_pressure_gpu<<<1,1>>>(vd.toKernel(),NN.toKernel(),probes.get(i),(real_number *)press_tmp_.toKernel()); + CUDA_LAUNCH_DIM3(sensor_pressure_gpu,1,1,vd.toKernel(),NN.toKernel(),probes.get(i),(real_number *)press_tmp_.toKernel()); // move calculated pressure on press_tmp_.deviceToHost(); @@ -892,7 +915,7 @@ int main(int argc, char* argv[]) // Ok the initialization is done on CPU on GPU we are doing the main loop, so first we offload all properties on GPU vd.hostToDevicePos(); - vd.template hostToDeviceProp<type,rho,rho_prev,Pressure,velocity>(); + vd.template hostToDeviceProp<type,rho,rho_prev,Pressure,velocity,velocity_prev>(); vd.ghost_get<type,rho,Pressure,velocity>(RUN_ON_DEVICE); -- GitLab