Commit 8c2a0b50 authored by incardon's avatar incardon

Latest update

parent 7766f252
......@@ -17,18 +17,19 @@ echo "Branch name: $branch"
rm -rf $HOME/openfpm_dependencies/openfpm_pdata/0
if [ x"$hostname" == x"cifarm-centos-node.mpi-cbg.de" ]; then
./install_MPI_mpich.sh $HOME/openfpm_dependencies/openfpm_pdata/$branch/ 4
./install_MPI_mpich.sh $HOME/openfpm_dependencies/openfpm_io/$branch/ 4
echo 4 > $HOME/openfpm_dependencies/openfpm_pdata/$branch/MPI/version
rm -rf $HOME/openfpm_dependencies/openfpm_pdata/full
fi
if [ x"$hostname" == x"cifarm-ubuntu-node.mpi-cbg.de" ]; then
rm -rf $HOME/openfpm_dependencies/openfpm_pdata/full
./install_MPI_mpich.sh $HOME/openfpm_dependencies/openfpm_io/$branch/ 4
echo 4 > $HOME/openfpm_dependencies/openfpm_pdata/$branch/MPI/version
fi
if [ x"$hostname" == x"cifarm-mac-node.mpi-cbg.de" ]; then
export PATH="/usr/local/bin:$PATH"
rm -rf $HOME/openfpm_dependencies/openfpm_pdata/full
./install_MPI_mpich.sh $HOME/openfpm_dependencies/openfpm_io/$branch/ 4
echo 4 > $HOME/openfpm_dependencies/openfpm_io/$branch/MPI/version
fi
......
......@@ -17,8 +17,10 @@
* \subpage Vector_6_complex_usage
* \subpage Vector_7_sph_dlb
* \subpage Vector_7_sph_dlb_opt
* \subpage Vector_7_sph_dlb_gpu
* \subpage Vector_7_sph_dlb_gpu_opt
* \subpage Vector_8_DEM
* \subpage Vector_9_gpu_cuda_interop
*
*/
......
......@@ -8,7 +8,7 @@
*
*
* This example show the classical SPH Dam break simulation with load balancing and dynamic load balancing. The main difference with
* \ref{SPH_dlb} is that here we use GPU and 1.2 Millions particles.
* \ref SPH_dlb is that here we use GPUs and 1.2 Millions particles.
*
* \htmlonly
* <a href="#" onclick="hide_show('vector-video-3')" >Simulation video 1</a><br>
......@@ -25,8 +25,21 @@
* </div>
* \endhtmlonly
*
* This example use all the features explained in example \ref e3_md_gpu. Additionally this example show how to remove particles
* on GPU using a bulk remove function on GPU
*
* \snippet Vector/7_SPH_dlb_gpu_opt/main.cpp inclusion
* ## Bulk remove
*
* On SPH we have the necessity to remove particles that go out of bound. OpenFPM provide the function \b remove_marked \b .
*
* \snippet Vector/7_SPH_dlb_gpu/main.cu remove_marked_part
*
* where vd is the vector_dist_gpu red is the property that mark which particle must be removed. We mark the particle to be removed in the function kernel
* We check if the particle go out of the region of interest or their density go critically far from the rest density
*
* \snippet Vector/7_SPH_dlb_gpu/main.cu mark_to_remove_kernel
*
* \include Vector/7_SPH_dlb_gpu_opt/main.cu
*
*/
......@@ -523,7 +536,9 @@ __global__ void verlet_int_gpu(vector_dist_type vd, real_number dt, real_number
vd.template getProp<velocity>(a)[2] = vd.template getProp<velocity_prev>(a)[2] + vd.template getProp<force>(a)[2]*dt2;
vd.template getProp<rho>(a) = vd.template getProp<rho_prev>(a) + dt2*vd.template getProp<drho>(a);
// Check if the particle go out of range in space and in density
//! \cond [mark_to_remove_kernel] \endcond
// Check if the particle go out of range in space and in density, if they do mark them to remove it later
if (vd.getPos(a)[0] < 0.000263878 || vd.getPos(a)[1] < 0.000263878 || vd.getPos(a)[2] < 0.000263878 ||
vd.getPos(a)[0] > 0.000263878+1.59947 || vd.getPos(a)[1] > 0.000263878+0.672972 || vd.getPos(a)[2] > 0.50 ||
vd.template getProp<rho>(a) < RhoMin || vd.template getProp<rho>(a) > RhoMax)
......@@ -531,6 +546,7 @@ __global__ void verlet_int_gpu(vector_dist_type vd, real_number dt, real_number
else
{vd.template getProp<red>(a) = 0;}
//! \cond [mark_to_remove_kernel] \endcond
vd.template getProp<velocity_prev>(a)[0] = velX;
vd.template getProp<velocity_prev>(a)[1] = velY;
......@@ -550,9 +566,13 @@ void verlet_int(particles & vd, real_number dt)
verlet_int_gpu<<<part.wthr,part.thr>>>(vd.toKernel(),dt,dt2,dt205);
//! \cond [remove_marked_part] \endcond
// remove the particles marked
remove_marked<red>(vd);
//! \cond [remove_marked_part] \endcond
// increment the iteration counter
cnt++;
}
......
......@@ -4,7 +4,7 @@
* [TOC]
*
*
* # SPH with Dynamic load Balancing on GPU # {#SPH_dlb_gpu_opt}
* # SPH with Dynamic load Balancing on GPU (Optimized) # {#SPH_dlb_gpu_opt}
*
*
* This example show the classical SPH Dam break simulation with load balancing and dynamic load balancing. The main difference with
......@@ -29,9 +29,11 @@
*
* ## GPU ## {#e7_sph_inclusion}
*
* This example is the port on GPU of the following example \ref{SPH_dlb}
* This example is an optimization of the example \ref SPH_dlb_gpu all the optimization operated on this example has been explained
* here \ref e3_md_gpu_opt so we will not go into the details
*
* we report the full code here
*
* \snippet Vector/7_SPH_dlb_gpu_opt/main.cpp inclusion
*
*/
......
/*!
* \page Vector_7_sph_dlb_dbg Vector 7 SPH Dam break simulation (Debugging video)
*
*
* [TOC]
*
*
* # SPH with Dynamic load Balancing (Debugging video) # {#SPH_dlb}
*
* \htmlonly
* <img src="http://ppmcore.mpi-cbg.de/web/images/examples/7_SPH_dlb/dam_break_all.jpg"/>
* \endhtmlonly
*
* ## Inclusion ## {#e7_sph_inclusion}
*
*
* \snippet Vector/7_SPH_dlb/main.cpp inclusion
*
*/
//#define SE_CLASS1
//#define STOP_ON_ERROR
......
openfpm_io @ 59be4882
Subproject commit 4b2e3c09bc57921cf0e1bb2f1056ec93cbb9b090
Subproject commit 59be4882c3060f71b10057707b3b2699a01a2643
......@@ -60,7 +60,7 @@ struct Box_fix
* ### Synchronize a distributed grid for complex structures
* \snippet grid_dist_id_unit_test.cpp Synchronized distributed grid complex
* ### Usage of a grid dist iterator sub
* \snippet grid_dist_id_unit_test.cpp Usage of a sub_grid iterator
* \snippet grid_dist_id_iterators_unit_tests.hpp Usage of a sub_grid iterator
* ### Construct two grid with the same decomposition
* \snippet grid_dist_id_unit_test.cpp Construct two grid with the same decomposition
*
......
......@@ -290,8 +290,6 @@ void Test1D(const Box<1,float> & domain, long int k)
{
BOOST_TEST_CHECKPOINT( "Testing 1D grid k=" << k );
//! [Create and access a distributed grid]
// grid size
size_t sz[1];
sz[0] = k;
......@@ -329,8 +327,6 @@ void Test1D(const Box<1,float> & domain, long int k)
++dom;
}
//! [Create and access a distributed grid]
// Get the virtual cluster machine
Vcluster<> & vcl = g_dist.getVC();
......
......@@ -430,8 +430,14 @@ void vdist_calc_gpu_test()
// Boundary conditions
size_t bc[3]={PERIODIC,PERIODIC,PERIODIC};
//! [Create a gpu vector]
vector_dist_gpu<3,St,aggregate<St,St[3],St[3]>> vd(1000,domain,bc,g);
//! [Create a gpu vector]
//! [Fill gpu vector and move to GPU]
srand(v_cl.rank()*10000);
auto it = vd.getDomainIterator();
......@@ -463,6 +469,8 @@ void vdist_calc_gpu_test()
// Ok we redistribute the particles (GPU based)
vd.map(RUN_ON_DEVICE);
//! [Fill gpu vector and move to GPU]
vd.deviceToHostPos();
vd.template deviceToHostProp<0,1,2>();
......@@ -577,8 +585,13 @@ void vdist_calc_gpu_test()
}
vd_cpu.template ghost_get<0,1,2>();
//! [Fill the ghost on GPU]
vd.template ghost_get<0,1,2>(RUN_ON_DEVICE);
//! [Fill the ghost on GPU]
vd.deviceToHostPos();
vd.template deviceToHostProp<0,1,2>();
......
......@@ -191,20 +191,29 @@ enum reorder_opt
/*! \brief Distributed vector
*
* This class reppresent a distributed vector, the distribution of the structure
* This class represent a distributed vector, the distribution of the structure
* is based on the positional information of the elements the vector store
*
* ## Create a vector of random elements on each processor 2D
* \snippet vector_dist_unit_test.hpp Create a vector of random elements on each processor 2D
* \snippet Vector/tests/vector_dist_unit_test.cpp Create a vector of random elements on each processor 2D
*
* ## Create a vector of random elements on each processor 3D
* \snippet vector_dist_unit_test.hpp Create a vector of random elements on each processor 3D
* \snippet Vector/tests/vector_dist_unit_test.cpp Create a vector of random elements on each processor 3D
*
* ## Create a vector of elements distributed on a grid like way
* \snippet vector_dist_unit_test.hpp Create a vector of elements distributed on a grid like way
* \snippet Vector/tests/vector_dist_unit_test.cpp Create a vector of elements distributed on a grid like way
*
* ## Redistribute the particles and sync the ghost properties
* \snippet vector_dist_unit_test.hpp Redistribute the particles and sync the ghost properties
* \snippet Vector/tests/vector_dist_unit_test.cpp Redistribute the particles and sync the ghost properties
*
* ## Create a gpu distributed vector [St = float or double]
* \snippet Vector/cuda/vector_dist_gpu_unit_tests.cu Create a gpu vector
*
* ## Fill a GPU vector_dist on CPU and move the information to GPU and redistribute [St = float or double]
* \snippet Vector/cuda/vector_dist_gpu_unit_tests.cu Fill gpu vector and move to GPU
*
* ## Fill the ghost on GPU
* \snippet Vector/cuda/vector_dist_gpu_unit_tests.cu Fill the ghost on GPU
*
* \tparam dim Dimensionality of the space where the elements lives
* \tparam St type of space float, double ...
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment