From 8c2a0b50809bd9e3ced6415a5137c4b30828f56d Mon Sep 17 00:00:00 2001 From: Pietro Incardona <incardon@mpi-cbg.de> Date: Tue, 18 Dec 2018 17:44:08 +0100 Subject: [PATCH] Latest update --- build.sh | 9 ++++--- example/Vector/0_simple/main.cpp | 2 ++ example/Vector/7_SPH_dlb_gpu/main.cu | 26 ++++++++++++++++--- example/Vector/7_SPH_dlb_gpu_opt/main.cu | 8 +++--- example/Vector/7_SPH_dlb_opt/main_dbg.cpp | 19 -------------- openfpm_io | 2 +- src/Grid/grid_dist_id.hpp | 2 +- src/Grid/tests/grid_dist_id_unit_test.cpp | 4 --- src/Vector/cuda/vector_dist_gpu_unit_tests.cu | 13 ++++++++++ src/Vector/vector_dist.hpp | 19 ++++++++++---- 10 files changed, 64 insertions(+), 40 deletions(-) diff --git a/build.sh b/build.sh index 34810b769..bf6365ff7 100755 --- a/build.sh +++ b/build.sh @@ -17,18 +17,19 @@ echo "Branch name: $branch" rm -rf $HOME/openfpm_dependencies/openfpm_pdata/0 if [ x"$hostname" == x"cifarm-centos-node.mpi-cbg.de" ]; then - ./install_MPI_mpich.sh $HOME/openfpm_dependencies/openfpm_pdata/$branch/ 4 + ./install_MPI_mpich.sh $HOME/openfpm_dependencies/openfpm_io/$branch/ 4 echo 4 > $HOME/openfpm_dependencies/openfpm_pdata/$branch/MPI/version - rm -rf $HOME/openfpm_dependencies/openfpm_pdata/full fi if [ x"$hostname" == x"cifarm-ubuntu-node.mpi-cbg.de" ]; then - rm -rf $HOME/openfpm_dependencies/openfpm_pdata/full + ./install_MPI_mpich.sh $HOME/openfpm_dependencies/openfpm_io/$branch/ 4 + echo 4 > $HOME/openfpm_dependencies/openfpm_pdata/$branch/MPI/version fi if [ x"$hostname" == x"cifarm-mac-node.mpi-cbg.de" ]; then export PATH="/usr/local/bin:$PATH" - rm -rf $HOME/openfpm_dependencies/openfpm_pdata/full + ./install_MPI_mpich.sh $HOME/openfpm_dependencies/openfpm_io/$branch/ 4 + echo 4 > $HOME/openfpm_dependencies/openfpm_io/$branch/MPI/version fi diff --git a/example/Vector/0_simple/main.cpp b/example/Vector/0_simple/main.cpp index a93f5302e..b056ed10e 100644 --- a/example/Vector/0_simple/main.cpp +++ b/example/Vector/0_simple/main.cpp @@ -17,8 +17,10 @@ * \subpage Vector_6_complex_usage * \subpage Vector_7_sph_dlb * \subpage Vector_7_sph_dlb_opt + * \subpage Vector_7_sph_dlb_gpu * \subpage Vector_7_sph_dlb_gpu_opt * \subpage Vector_8_DEM + * \subpage Vector_9_gpu_cuda_interop * */ diff --git a/example/Vector/7_SPH_dlb_gpu/main.cu b/example/Vector/7_SPH_dlb_gpu/main.cu index 4d0a58f75..32c330aa3 100644 --- a/example/Vector/7_SPH_dlb_gpu/main.cu +++ b/example/Vector/7_SPH_dlb_gpu/main.cu @@ -8,7 +8,7 @@ * * * This example show the classical SPH Dam break simulation with load balancing and dynamic load balancing. The main difference with - * \ref{SPH_dlb} is that here we use GPU and 1.2 Millions particles. + * \ref SPH_dlb is that here we use GPUs and 1.2 Millions particles. * * \htmlonly * <a href="#" onclick="hide_show('vector-video-3')" >Simulation video 1</a><br> @@ -25,8 +25,21 @@ * </div> * \endhtmlonly * + * This example use all the features explained in example \ref e3_md_gpu. Additionally this example show how to remove particles + * on GPU using a bulk remove function on GPU * - * \snippet Vector/7_SPH_dlb_gpu_opt/main.cpp inclusion + * ## Bulk remove + * + * On SPH we have the necessity to remove particles that go out of bound. OpenFPM provide the function \b remove_marked \b . + * + * \snippet Vector/7_SPH_dlb_gpu/main.cu remove_marked_part + * + * where vd is the vector_dist_gpu red is the property that mark which particle must be removed. We mark the particle to be removed in the function kernel + * We check if the particle go out of the region of interest or their density go critically far from the rest density + * + * \snippet Vector/7_SPH_dlb_gpu/main.cu mark_to_remove_kernel + * + * \include Vector/7_SPH_dlb_gpu_opt/main.cu * */ @@ -523,7 +536,9 @@ __global__ void verlet_int_gpu(vector_dist_type vd, real_number dt, real_number vd.template getProp<velocity>(a)[2] = vd.template getProp<velocity_prev>(a)[2] + vd.template getProp<force>(a)[2]*dt2; vd.template getProp<rho>(a) = vd.template getProp<rho_prev>(a) + dt2*vd.template getProp<drho>(a); - // Check if the particle go out of range in space and in density + //! \cond [mark_to_remove_kernel] \endcond + + // Check if the particle go out of range in space and in density, if they do mark them to remove it later if (vd.getPos(a)[0] < 0.000263878 || vd.getPos(a)[1] < 0.000263878 || vd.getPos(a)[2] < 0.000263878 || vd.getPos(a)[0] > 0.000263878+1.59947 || vd.getPos(a)[1] > 0.000263878+0.672972 || vd.getPos(a)[2] > 0.50 || vd.template getProp<rho>(a) < RhoMin || vd.template getProp<rho>(a) > RhoMax) @@ -531,6 +546,7 @@ __global__ void verlet_int_gpu(vector_dist_type vd, real_number dt, real_number else {vd.template getProp<red>(a) = 0;} + //! \cond [mark_to_remove_kernel] \endcond vd.template getProp<velocity_prev>(a)[0] = velX; vd.template getProp<velocity_prev>(a)[1] = velY; @@ -550,9 +566,13 @@ void verlet_int(particles & vd, real_number dt) verlet_int_gpu<<<part.wthr,part.thr>>>(vd.toKernel(),dt,dt2,dt205); + //! \cond [remove_marked_part] \endcond + // remove the particles marked remove_marked<red>(vd); + //! \cond [remove_marked_part] \endcond + // increment the iteration counter cnt++; } diff --git a/example/Vector/7_SPH_dlb_gpu_opt/main.cu b/example/Vector/7_SPH_dlb_gpu_opt/main.cu index ba92a1a6c..111ce72fb 100644 --- a/example/Vector/7_SPH_dlb_gpu_opt/main.cu +++ b/example/Vector/7_SPH_dlb_gpu_opt/main.cu @@ -4,7 +4,7 @@ * [TOC] * * - * # SPH with Dynamic load Balancing on GPU # {#SPH_dlb_gpu_opt} + * # SPH with Dynamic load Balancing on GPU (Optimized) # {#SPH_dlb_gpu_opt} * * * This example show the classical SPH Dam break simulation with load balancing and dynamic load balancing. The main difference with @@ -29,9 +29,11 @@ * * ## GPU ## {#e7_sph_inclusion} * - * This example is the port on GPU of the following example \ref{SPH_dlb} + * This example is an optimization of the example \ref SPH_dlb_gpu all the optimization operated on this example has been explained + * here \ref e3_md_gpu_opt so we will not go into the details + * + * we report the full code here * - * \snippet Vector/7_SPH_dlb_gpu_opt/main.cpp inclusion * */ diff --git a/example/Vector/7_SPH_dlb_opt/main_dbg.cpp b/example/Vector/7_SPH_dlb_opt/main_dbg.cpp index d49c96ad6..be6ec8ec7 100644 --- a/example/Vector/7_SPH_dlb_opt/main_dbg.cpp +++ b/example/Vector/7_SPH_dlb_opt/main_dbg.cpp @@ -1,22 +1,3 @@ -/*! - * \page Vector_7_sph_dlb_dbg Vector 7 SPH Dam break simulation (Debugging video) - * - * - * [TOC] - * - * - * # SPH with Dynamic load Balancing (Debugging video) # {#SPH_dlb} - * - * \htmlonly - * <img src="http://ppmcore.mpi-cbg.de/web/images/examples/7_SPH_dlb/dam_break_all.jpg"/> - * \endhtmlonly - * - * ## Inclusion ## {#e7_sph_inclusion} - * - * - * \snippet Vector/7_SPH_dlb/main.cpp inclusion - * - */ //#define SE_CLASS1 //#define STOP_ON_ERROR diff --git a/openfpm_io b/openfpm_io index 4b2e3c09b..59be4882c 160000 --- a/openfpm_io +++ b/openfpm_io @@ -1 +1 @@ -Subproject commit 4b2e3c09bc57921cf0e1bb2f1056ec93cbb9b090 +Subproject commit 59be4882c3060f71b10057707b3b2699a01a2643 diff --git a/src/Grid/grid_dist_id.hpp b/src/Grid/grid_dist_id.hpp index 7aaacaed4..e053a9707 100644 --- a/src/Grid/grid_dist_id.hpp +++ b/src/Grid/grid_dist_id.hpp @@ -60,7 +60,7 @@ struct Box_fix * ### Synchronize a distributed grid for complex structures * \snippet grid_dist_id_unit_test.cpp Synchronized distributed grid complex * ### Usage of a grid dist iterator sub - * \snippet grid_dist_id_unit_test.cpp Usage of a sub_grid iterator + * \snippet grid_dist_id_iterators_unit_tests.hpp Usage of a sub_grid iterator * ### Construct two grid with the same decomposition * \snippet grid_dist_id_unit_test.cpp Construct two grid with the same decomposition * diff --git a/src/Grid/tests/grid_dist_id_unit_test.cpp b/src/Grid/tests/grid_dist_id_unit_test.cpp index 6c077f7b9..7b8b480c3 100644 --- a/src/Grid/tests/grid_dist_id_unit_test.cpp +++ b/src/Grid/tests/grid_dist_id_unit_test.cpp @@ -290,8 +290,6 @@ void Test1D(const Box<1,float> & domain, long int k) { BOOST_TEST_CHECKPOINT( "Testing 1D grid k=" << k ); - //! [Create and access a distributed grid] - // grid size size_t sz[1]; sz[0] = k; @@ -329,8 +327,6 @@ void Test1D(const Box<1,float> & domain, long int k) ++dom; } - //! [Create and access a distributed grid] - // Get the virtual cluster machine Vcluster<> & vcl = g_dist.getVC(); diff --git a/src/Vector/cuda/vector_dist_gpu_unit_tests.cu b/src/Vector/cuda/vector_dist_gpu_unit_tests.cu index 8bb80a1e3..9f8ff1faa 100644 --- a/src/Vector/cuda/vector_dist_gpu_unit_tests.cu +++ b/src/Vector/cuda/vector_dist_gpu_unit_tests.cu @@ -430,8 +430,14 @@ void vdist_calc_gpu_test() // Boundary conditions size_t bc[3]={PERIODIC,PERIODIC,PERIODIC}; + //! [Create a gpu vector] + vector_dist_gpu<3,St,aggregate<St,St[3],St[3]>> vd(1000,domain,bc,g); + //! [Create a gpu vector] + + //! [Fill gpu vector and move to GPU] + srand(v_cl.rank()*10000); auto it = vd.getDomainIterator(); @@ -463,6 +469,8 @@ void vdist_calc_gpu_test() // Ok we redistribute the particles (GPU based) vd.map(RUN_ON_DEVICE); + //! [Fill gpu vector and move to GPU] + vd.deviceToHostPos(); vd.template deviceToHostProp<0,1,2>(); @@ -577,8 +585,13 @@ void vdist_calc_gpu_test() } vd_cpu.template ghost_get<0,1,2>(); + + //! [Fill the ghost on GPU] + vd.template ghost_get<0,1,2>(RUN_ON_DEVICE); + //! [Fill the ghost on GPU] + vd.deviceToHostPos(); vd.template deviceToHostProp<0,1,2>(); diff --git a/src/Vector/vector_dist.hpp b/src/Vector/vector_dist.hpp index 325e3c3f4..5e924f579 100644 --- a/src/Vector/vector_dist.hpp +++ b/src/Vector/vector_dist.hpp @@ -191,20 +191,29 @@ enum reorder_opt /*! \brief Distributed vector * - * This class reppresent a distributed vector, the distribution of the structure + * This class represent a distributed vector, the distribution of the structure * is based on the positional information of the elements the vector store * * ## Create a vector of random elements on each processor 2D - * \snippet vector_dist_unit_test.hpp Create a vector of random elements on each processor 2D + * \snippet Vector/tests/vector_dist_unit_test.cpp Create a vector of random elements on each processor 2D * * ## Create a vector of random elements on each processor 3D - * \snippet vector_dist_unit_test.hpp Create a vector of random elements on each processor 3D + * \snippet Vector/tests/vector_dist_unit_test.cpp Create a vector of random elements on each processor 3D * * ## Create a vector of elements distributed on a grid like way - * \snippet vector_dist_unit_test.hpp Create a vector of elements distributed on a grid like way + * \snippet Vector/tests/vector_dist_unit_test.cpp Create a vector of elements distributed on a grid like way * * ## Redistribute the particles and sync the ghost properties - * \snippet vector_dist_unit_test.hpp Redistribute the particles and sync the ghost properties + * \snippet Vector/tests/vector_dist_unit_test.cpp Redistribute the particles and sync the ghost properties + * + * ## Create a gpu distributed vector [St = float or double] + * \snippet Vector/cuda/vector_dist_gpu_unit_tests.cu Create a gpu vector + * + * ## Fill a GPU vector_dist on CPU and move the information to GPU and redistribute [St = float or double] + * \snippet Vector/cuda/vector_dist_gpu_unit_tests.cu Fill gpu vector and move to GPU + * + * ## Fill the ghost on GPU + * \snippet Vector/cuda/vector_dist_gpu_unit_tests.cu Fill the ghost on GPU * * \tparam dim Dimensionality of the space where the elements lives * \tparam St type of space float, double ... -- GitLab