diff --git a/example/Vector/7_SPH_dlb_gpu_opt/Makefile b/example/Vector/7_SPH_dlb_gpu_opt/Makefile index dc1e101e100bf680bf39213e620bc199b6d5bc0a..24d51560f8247cbc23768c84358db11d9d91a5c1 100644 --- a/example/Vector/7_SPH_dlb_gpu_opt/Makefile +++ b/example/Vector/7_SPH_dlb_gpu_opt/Makefile @@ -8,7 +8,7 @@ ifdef CUDA_ON_CPU CUDA_CC=mpic++ -x c++ $(INCLUDE_PATH) INCLUDE_PATH_NVCC= CUDA_CC_LINK=mpic++ - CUDA_OPTIONS= + CUDA_OPTIONS=-DCUDA_ON_CPU -D__NVCC__ -DCUDART_VERSION=11000 else ifeq (, $(shell which nvcc)) CUDA_CC=mpic++ -x c++ $(INCLUDE_PATH) diff --git a/example/Vector/7_SPH_dlb_gpu_opt/main.cu b/example/Vector/7_SPH_dlb_gpu_opt/main.cu index 17f6f7f3d70a6113a315b34f6e333085798fe91c..140ca52b6fc60b2a16485c77024c6d15560775c9 100644 --- a/example/Vector/7_SPH_dlb_gpu_opt/main.cu +++ b/example/Vector/7_SPH_dlb_gpu_opt/main.cu @@ -51,7 +51,6 @@ #include "Vector/vector_dist.hpp" #include <math.h> #include "Draw/DrawParticles.hpp" -#include <cuda_profiler_api.h> @@ -313,19 +312,19 @@ __global__ void calc_forces_gpu(particles_type vd, NN_type NN, real_number W_dap Point<3,real_number> xa = vd.getPos(a); // Type of the particle - unsigned int typea = vd.getProp<type>(a); + unsigned int typea = vd.template getProp<type>(a); // Take the mass of the particle dependently if it is FLUID or BOUNDARY //real_number massa = (typea == FLUID)?MassFluid:MassBound; // Get the density of the of the particle a - real_number rhoa = vd.getProp<rho>(a); + real_number rhoa = vd.template getProp<rho>(a); // Get the pressure of the particle a - real_number Pa = vd.getProp<Pressure>(a); + real_number Pa = vd.template getProp<Pressure>(a); // Get the Velocity of the particle a - Point<3,real_number> va = vd.getProp<velocity>(a); + Point<3,real_number> va = vd.template getProp<velocity>(a); Point<3,real_number> force_; force_.get(0) = 0.0f; @@ -348,12 +347,12 @@ __global__ void calc_forces_gpu(particles_type vd, NN_type NN, real_number W_dap // if (p == q) skip this particle this condition should be done in the r^2 = 0 if (a == b) {++Np; continue;}; - unsigned int typeb = vd.getProp<type>(b); + unsigned int typeb = vd.template getProp<type>(b); real_number massb = (typeb == FLUID)?MassFluid:MassBound; - Point<3,real_number> vb = vd.getProp<velocity>(b); - real_number Pb = vd.getProp<Pressure>(b); - real_number rhob = vd.getProp<rho>(b); + Point<3,real_number> vb = vd.template getProp<velocity>(b); + real_number Pb = vd.template getProp<Pressure>(b); + real_number rhob = vd.template getProp<rho>(b); // Get the distance between p and q Point<3,real_number> dr = xa - xb; @@ -387,7 +386,7 @@ __global__ void calc_forces_gpu(particles_type vd, NN_type NN, real_number W_dap ++Np; } - vd.getProp<red>(a) = max_visc; + vd.template getProp<red>(a) = max_visc; vd.template getProp<force>(a)[0] = force_.get(0); vd.template getProp<force>(a)[1] = force_.get(1); @@ -414,11 +413,11 @@ __global__ void max_acceleration_and_velocity_gpu(vector_type vd) { auto a = GET_PARTICLE(vd); - Point<3,real_number> acc(vd.getProp<force>(a)); - vd.getProp<red>(a) = norm(acc); + Point<3,real_number> acc(vd.template getProp<force>(a)); + vd.template getProp<red>(a) = norm(acc); - Point<3,real_number> vel(vd.getProp<velocity>(a)); - vd.getProp<red2>(a) = norm(vel); + Point<3,real_number> vel(vd.template getProp<velocity>(a)); + vd.template getProp<red2>(a) = norm(vel); } void max_acceleration_and_velocity(particles & vd, real_number & max_acc, real_number & max_vel) @@ -426,7 +425,7 @@ void max_acceleration_and_velocity(particles & vd, real_number & max_acc, real_n // Calculate the maximum acceleration auto part = vd.getDomainIteratorGPU(); - max_acceleration_and_velocity_gpu<<<part.wthr,part.thr>>>(vd.toKernel()); + CUDA_LAUNCH(max_acceleration_and_velocity_gpu,part,vd.toKernel()); max_acc = reduce_local<red,_max_>(vd); max_vel = reduce_local<red2,_max_>(vd); @@ -529,7 +528,7 @@ void verlet_int(particles & vd, real_number dt) real_number dt205 = dt*dt*0.5; real_number dt2 = dt*2.0; - verlet_int_gpu<<<part.wthr,part.thr>>>(vd.toKernel(),dt,dt2,dt205); + CUDA_LAUNCH(verlet_int_gpu,part,vd.toKernel(),dt,dt2,dt205); // remove the particles marked remove_marked<red>(vd); @@ -605,7 +604,7 @@ void euler_int(particles & vd, real_number dt) real_number dt205 = dt*dt*0.5; - euler_int_gpu<<<part.wthr,part.thr>>>(vd.toKernel(),dt,dt205); + CUDA_LAUNCH(euler_int_gpu,part,vd.toKernel(),dt,dt205); // remove the particles remove_marked<red>(vd); @@ -681,7 +680,7 @@ inline void sensor_pressure(Vector & vd, // if the probe is inside the processor domain if (vd.getDecomposition().isLocal(probes.get(i)) == true) { - sensor_pressure_gpu<<<1,1>>>(vd.toKernel_sorted(),NN.toKernel(),probes.get(i),(real_number *)press_tmp_.toKernel()); + CUDA_LAUNCH_DIM3(sensor_pressure_gpu,1,1,vd.toKernel_sorted(),NN.toKernel(),probes.get(i),(real_number *)press_tmp_.toKernel()); vd.merge<Pressure>(NN); @@ -720,7 +719,9 @@ int main(int argc, char* argv[]) // initialize the library openfpm_init(&argc,&argv); +#ifndef CUDA_ON_CPU cudaDeviceSetCacheConfig(cudaFuncCachePreferL1); +#endif // It contain for each time-step the value detected by the probes openfpm::vector<openfpm::vector<real_number>> press_t; diff --git a/install b/install index 27a6a8d7192eebdb3973c91553ae3f2ee94d177b..88d0da947c9807f008ece653815ae1469c38e078 100755 --- a/install +++ b/install @@ -433,10 +433,9 @@ if [ x"$cuda_on_cpu" == x"YES" ]; then fi if [ x"$gpu_support" == x"1" ]; then echo "LIBS=-lvcluster -lofpm_pdata -lofpmmemory -lparmetis -lmetis -lboost_iostreams -lboost_program_options -lhdf5 -llibhilbert -lVc $(cat cuda_lib) $lin_alg_lib -ldl -lboost_filesystem -lboost_system" >> example.mk - echo "LIBS_SE2=-lvcluster -lofpmmemory_se2 -lparmetis -lmetis -lboost_iostreams -lboost_program_options -lhdf5 -llibhilbert -lVc $(cat cuda_lib) $lin_alg_lib -lboost_filesystem -lboost_system" >> example.mk + echo "LIBS_CUDA_ON_CPU=-lvcluster_cuda_on_cpu -lofpmmemory_cuda_on_cpu -lparmetis -lmetis -lboost_iostreams -lboost_program_options -lhdf5 -llibhilbert -lVc $(cat cuda_lib) $lin_alg_lib -lboost_filesystem -lboost_system" >> example.mk else echo "LIBS=-lvcluster -lofpm_pdata -lofpmmemory -lparmetis -lmetis -lboost_iostreams -lboost_program_options -lhdf5 -llibhilbert -lVc $lin_alg_lib -ldl -lboost_filesystem -lboost_system $optional_boost" >> example.mk - echo "LIBS_SE2=-lvcluster -lofpmmemory_se2 -lparmetis -lmetis -lboost_iostreams -lboost_program_options -lhdf5 -llibhilbert -lVc $lin_alg_lib -lboost_filesystem -lboost_system $optional_boost" >> example.mk fi echo "INCLUDE_PATH_NVCC=-Xcompiler="-Wno-deprecated-declarations" $(cat openmp_flags) "$(cat cuda_options)" -I. -I$install_base/openfpm_numerics/include -I$install_base/openfpm_pdata/include/config -I$install_base/openfpm_pdata/include -I$install_base/openfpm_data/include -I$install_base/openfpm_vcluster/include -I$install_base/openfpm_io/include -I$install_base/openfpm_devices/include -I$i_dir/METIS/include -I$i_dir/PARMETIS/include -I$i_dir/BOOST/include -I$i_dir/HDF5/include -I$i_dir/LIBHILBERT/include $lin_alg_inc" >> example.mk cp example.mk src/example.mk