diff --git a/example/Vector/3_molecular_dynamic_gpu_opt/main_gpu.cu b/example/Vector/3_molecular_dynamic_gpu_opt/main_gpu.cu index 8d20134e5772b9ff99171c246059cf54d45988fe..791963be4adc60d137f91cee0f4697655a0acf62 100644 --- a/example/Vector/3_molecular_dynamic_gpu_opt/main_gpu.cu +++ b/example/Vector/3_molecular_dynamic_gpu_opt/main_gpu.cu @@ -262,7 +262,7 @@ template<typename CellList> void calc_forces(vector_dist_gpu<3,real_number, aggr //! \cond [calc_force_sorted] \endcond - CUDA_LAUNCH(calc_force_gpu,it2.wthr,vd.toKernel_sorted(),NN.toKernel(),sigma12,sigma6,r_cut2); + CUDA_LAUNCH(calc_force_gpu,it2,vd.toKernel_sorted(),NN.toKernel(),sigma12,sigma6,r_cut2); //! \cond [calc_force_sorted] \endcond diff --git a/example/Vector/7_SPH_dlb_gpu_more_opt/main.cu b/example/Vector/7_SPH_dlb_gpu_more_opt/main.cu index 7e7eab9d99f8c2b10a2aa04e124e098e5cbe0891..bb4757617af0646f583365875fea074239a3f32a 100644 --- a/example/Vector/7_SPH_dlb_gpu_more_opt/main.cu +++ b/example/Vector/7_SPH_dlb_gpu_more_opt/main.cu @@ -62,7 +62,6 @@ #include "Vector/vector_dist.hpp" #include <math.h> #include "Draw/DrawParticles.hpp" -#include <cuda_profiler_api.h> @@ -324,16 +323,16 @@ __global__ void calc_forces_fluid_gpu(particles_type vd, fluid_ids_type fids, NN Point<3,real_number> xa = vd.getPos(a); // Type of the particle - unsigned int typea = vd.getProp<type>(a); + unsigned int typea = vd.template getProp<type>(a); // Get the density of the of the particle a - real_number rhoa = vd.getProp<rho>(a); + real_number rhoa = vd.template getProp<rho>(a); // Get the pressure of the particle a - real_number Pa = vd.getProp<Pressure>(a); + real_number Pa = vd.template getProp<Pressure>(a); // Get the Velocity of the particle a - Point<3,real_number> va = vd.getProp<velocity>(a); + Point<3,real_number> va = vd.template getProp<velocity>(a); Point<3,real_number> force_; force_.get(0) = 0.0f; @@ -356,12 +355,12 @@ __global__ void calc_forces_fluid_gpu(particles_type vd, fluid_ids_type fids, NN // if (p == q) skip this particle this condition should be done in the r^2 = 0 //if (a == b) {++Np; continue;}; - unsigned int typeb = vd.getProp<type>(b); + unsigned int typeb = vd.template getProp<type>(b); real_number massb = (typeb == FLUID)?MassFluid:MassBound; - Point<3,real_number> vb = vd.getProp<velocity>(b); - real_number Pb = vd.getProp<Pressure>(b); - real_number rhob = vd.getProp<rho>(b); + Point<3,real_number> vb = vd.template getProp<velocity>(b); + real_number Pb = vd.template getProp<Pressure>(b); + real_number rhob = vd.template getProp<rho>(b); // Get the distance between p and q Point<3,real_number> dr = xa - xb; @@ -395,7 +394,7 @@ __global__ void calc_forces_fluid_gpu(particles_type vd, fluid_ids_type fids, NN ++Np; } - vd.getProp<red>(a) = max_visc; + vd.template getProp<red>(a) = max_visc; vd.template getProp<force>(a)[0] = force_.get(0); vd.template getProp<force>(a)[1] = force_.get(1); @@ -417,10 +416,10 @@ __global__ void calc_forces_border_gpu(particles_type vd, fluid_ids_type fbord, Point<3,real_number> xa = vd.getPos(a); // Type of the particle - unsigned int typea = vd.getProp<type>(a); + unsigned int typea = vd.template getProp<type>(a); // Get the Velocity of the particle a - Point<3,real_number> va = vd.getProp<velocity>(a); + Point<3,real_number> va = vd.template getProp<velocity>(a); real_number drho_ = 0.0f; @@ -439,10 +438,10 @@ __global__ void calc_forces_border_gpu(particles_type vd, fluid_ids_type fbord, // if (p == q) skip this particle this condition should be done in the r^2 = 0 //if (a == b) {++Np; continue;}; - unsigned int typeb = vd.getProp<type>(b); + unsigned int typeb = vd.template getProp<type>(b); real_number massb = (typeb == FLUID)?MassFluid:MassBound; - Point<3,real_number> vb = vd.getProp<velocity>(b); + Point<3,real_number> vb = vd.template getProp<velocity>(b); // Get the distance between p and q Point<3,real_number> dr = xa - xb; @@ -467,7 +466,7 @@ __global__ void calc_forces_border_gpu(particles_type vd, fluid_ids_type fbord, ++Np; } - vd.getProp<red>(a) = max_visc; + vd.template getProp<red>(a) = max_visc; vd.template getProp<drho>(a) = drho_; } @@ -519,11 +518,11 @@ __global__ void max_acceleration_and_velocity_gpu(vector_type vd) { auto a = GET_PARTICLE(vd); - Point<3,real_number> acc(vd.getProp<force>(a)); - vd.getProp<red>(a) = norm(acc); + Point<3,real_number> acc(vd.template getProp<force>(a)); + vd.template getProp<red>(a) = norm(acc); - Point<3,real_number> vel(vd.getProp<velocity>(a)); - vd.getProp<red2>(a) = norm(vel); + Point<3,real_number> vel(vd.template getProp<velocity>(a)); + vd.template getProp<red2>(a) = norm(vel); } void max_acceleration_and_velocity(particles & vd, real_number & max_acc, real_number & max_vel) @@ -638,7 +637,7 @@ void verlet_int(particles & vd, real_number dt) real_number dt205 = dt*dt*0.5; real_number dt2 = dt*2.0; - verlet_int_gpu<<<part.wthr,part.thr>>>(vd.toKernel(),dt,dt2,dt205); + CUDA_LAUNCH(verlet_int_gpu,part,vd.toKernel(),dt,dt2,dt205); // remove the particles marked remove_marked<red>(vd); @@ -790,7 +789,7 @@ inline void sensor_pressure(Vector & vd, // if the probe is inside the processor domain if (vd.getDecomposition().isLocal(probes.get(i)) == true) { - sensor_pressure_gpu<<<1,1>>>(vd.toKernel_sorted(),NN.toKernel(),probes.get(i),(real_number *)press_tmp_.toKernel()); + CUDA_LAUNCH_DIM3(sensor_pressure_gpu,1,1,vd.toKernel_sorted(),NN.toKernel(),probes.get(i),(real_number *)press_tmp_.toKernel()); vd.merge<Pressure>(NN); @@ -818,7 +817,9 @@ int main(int argc, char* argv[]) openfpm::vector_gpu<aggregate<int>> fluid_ids; openfpm::vector_gpu<aggregate<int>> border_ids; +#ifdef CUDIFY_USE_CUDA cudaDeviceSetCacheConfig(cudaFuncCachePreferL1); +#endif // It contain for each time-step the value detected by the probes openfpm::vector<openfpm::vector<real_number>> press_t; diff --git a/example/Vector/9_gpu_cuda_interop/main.cu b/example/Vector/9_gpu_cuda_interop/main.cu index a9cc33b72a72bc6e7bbe95e02e484df06e955a32..b8e3a0b009a586cdea5c1174295d5e3516a94834 100644 --- a/example/Vector/9_gpu_cuda_interop/main.cu +++ b/example/Vector/9_gpu_cuda_interop/main.cu @@ -229,7 +229,7 @@ int main(int argc, char* argv[]) vd.template hostToDeviceProp<0,1,2>(); - print_data_particle_50<<<100,1>>>((float *)vd.getPropVector().template getDeviceBuffer<0>(), + CUDA_LAUNCH_DIM3(print_data_particle_50,100,1,(float *)vd.getPropVector().template getDeviceBuffer<0>(), (float *)vd.getPropVector().template getDeviceBuffer<1>(), (float *)vd.getPropVector().template getDeviceBuffer<2>(), vd.getPropVector().capacity()); diff --git a/install b/install index 6f83ff2674450869af5d8a26e7eef3b97be30ad4..91591d5e927a9c31e288544bcf1c4fc82e843a19 100755 --- a/install +++ b/install @@ -90,6 +90,16 @@ while getopts di:v:smghc:nul FLAG; do esac done + +if [ x"$gpu_support" == x"0" ]; then + if [[ $configure_options == *"--with-cuda-on-backend=CUDA"* ]]; then + gpu_support=1 + fi +else + configure_options=" $configure_options --with-cuda-on-backend=CUDA" +fi + + ### Configure options i_dir_old=$i_dir @@ -298,10 +308,6 @@ configure_options=" $configure_options --with-parmetis=$i_dir/PARMETIS " ./script/install_Metis.sh $i_dir $CC $CXX $ncore configure_options=" $configure_options --with-metis=$i_dir/METIS " -if [ x"$gpu_support" == x"1" ]; then - configure_options=" $configure_options --with-cuda-on-backend=CUDA" -fi - MPI_installed=0 MPI_System_prv=1 METIS_installed=1