diff --git a/CHANGELOG.md b/CHANGELOG.md index cc7d1d3242ce0f89a6bb3373c2593d9a77763734..8f9b375ab449f0a5523d6989877a9cd8d490f012 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,17 +1,34 @@ # Change Log All notable changes to this project will be documented in this file. -## [2.0.0] December 2018 +## [2.0.0] December 2018 (Codename Elisa) ### Added -- Adding GPU support (see example 3_molecular_dynamic_gpu) +- Adding GPU support (see example 1_gpu_first_step + 3_molecular_dynamic_gpu + 7_sph_dlb_gpu + 7_sph_dlb_gpu_opt) + +### Fixed + +- Detection of clang 10.0.0 on mac-osx mojave +- In VTK binary format all 64 bit types are casted to 32 bit. Either the long/unsigned_long are bugged in Paraview we tested, either I do not understand how they work. ### Changed - The type Vcluster now is templated and the standard Vcluster is Vcluster<> + Most probably you have to change in your code from Vcluster to Vcluster<> + +## [1.1.1] December 2018 (Codename Poisson) + +### Fixed + +- Detection of clang 10.0.0 on mac-osx mojave + +## [1.0.X ] End of life (Theese versions are not enymore supported) -## [1.1.0] February 2018 +## [1.1.0] February 2018 (Condename Ring) ### Added diff --git a/Jenkinsfile_numerics_mpi b/Jenkinsfile_numerics_mpi index c730e4622975a28abea65fe713695870472dca87..c32ec2c1f0d461b9d60fae86c4eb96273211480a 100644 --- a/Jenkinsfile_numerics_mpi +++ b/Jenkinsfile_numerics_mpi @@ -70,12 +70,8 @@ parallel ( stage ('run mac') { - parallel ( - "1" : {sh "cd openfpm_numerics && ./run.sh $WORKSPACE $NODE_NAME 1 0 0 numerics"}, - "2" : {sh "cd openfpm_numerics && ./run.sh $WORKSPACE $NODE_NAME 2 0 0 numerics"}, - "3" : {sh "cd openfpm_numerics && ./run.sh $WORKSPACE $NODE_NAME 3 0 0 numerics"}, - "4" : {sh "cd openfpm_numerics && ./run.sh $WORKSPACE $NODE_NAME 4 0 0 numerics"} - ) + sh "cd openfpm_numerics && ./run.sh $WORKSPACE $NODE_NAME 1 0 0 numerics" + sh "cd openfpm_numerics && ./run.sh $WORKSPACE $NODE_NAME 2 0 0 numerics" sh "./success.sh 2 sbalzarini-mac-15 openfpm_numerics" } } diff --git a/example/Vector/0_simple/main.cpp b/example/Vector/0_simple/main.cpp index b96ee352117245a5c74134acacd04740de6f90a1..f1270d56353f475f54603f8fa0886157c87c0ac2 100644 --- a/example/Vector/0_simple/main.cpp +++ b/example/Vector/0_simple/main.cpp @@ -4,8 +4,10 @@ * \subpage Vector_1_celllist * \subpage Vector_1_ghost_get * \subpage Vector_1_HDF5 + * \subpage Vector_1_gpu_first_step * \subpage Vector_2_expression * \subpage Vector_3_md + * \subpage Vector_3_md_dyn_gpu * \subpage Vector_4_reo_root * \subpage Vector_4_cp * \subpage Vector_4_mp_cl @@ -14,6 +16,7 @@ * \subpage Vector_6_complex_usage * \subpage Vector_7_sph_dlb * \subpage Vector_7_sph_dlb_opt + * \subpage Vector_7_sph_dlb_gpu_opt * \subpage Vector_8_DEM * */ diff --git a/example/Vector/1_HDF5_save_load/main.cpp b/example/Vector/1_HDF5_save_load/main.cpp index 1f01e4bc402ff8d4d73b79c013967c2f634bdb80..71606b30bb5f9dac7fdc68f3f40bad16035b08a2 100644 --- a/example/Vector/1_HDF5_save_load/main.cpp +++ b/example/Vector/1_HDF5_save_load/main.cpp @@ -1,5 +1,5 @@ /*! - * \page Vector_1_HDF5 HDF5 save and load + * \page Vector_1_HDF5 Vector 1 HDF5 save and load * * * [TOC] diff --git a/example/Vector/7_SPH_dlb_gpu/main.cu b/example/Vector/7_SPH_dlb_gpu/main.cu index 70c49aa52f98fc08741c77b523aa79ae67b65ad6..4d0a58f751cc78aee628ffc74576005a04bcdf38 100644 --- a/example/Vector/7_SPH_dlb_gpu/main.cu +++ b/example/Vector/7_SPH_dlb_gpu/main.cu @@ -1,5 +1,4 @@ -/*! - * \page Vector_7_sph_dlb_gpu Vector 7 SPH Dam break simulation with Dynamic load balacing on GPU +/*! \page Vector_7_sph_dlb_gpu Vector 7 SPH Dam break simulation with Dynamic load balacing on Multi-GPU * * * [TOC] @@ -8,50 +7,26 @@ * # SPH with Dynamic load Balancing on GPU # {#SPH_dlb_gpu} * * - * This example show the classical SPH Dam break simulation with Load Balancing and Dynamic load balancing. With - * Load balancing and Dynamic load balancing we indicate the possibility of the system to re-adapt the domain - * decomposition to keep all the processor load and reduce idle time. + * This example show the classical SPH Dam break simulation with load balancing and dynamic load balancing. The main difference with + * \ref{SPH_dlb} is that here we use GPU and 1.2 Millions particles. * * \htmlonly * <a href="#" onclick="hide_show('vector-video-3')" >Simulation video 1</a><br> * <div style="display:none" id="vector-video-3"> - * <video id="vid3" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_speed.mp4" type="video/mp4"></video> + * <video id="vid3" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_gpu1.mp4" type="video/mp4"></video> * </div> * <a href="#" onclick="hide_show('vector-video-4')" >Simulation video 2</a><br> * <div style="display:none" id="vector-video-4"> - * <video id="vid4" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_speed2.mp4" type="video/mp4"></video> + * <video id="vid4" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_gpu2.mp4" type="video/mp4"></video> * </div> - * <a href="#" onclick="hide_show('vector-video-15')" >Simulation dynamic load balancing video 1</a><br> + * <a href="#" onclick="hide_show('vector-video-15')" >Simulation video 3</a><br> * <div style="display:none" id="vector-video-15"> - * <video id="vid15" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_dlb.mp4" type="video/mp4"></video> - * </div> - * <a href="#" onclick="hide_show('vector-video-16')" >Simulation dynamic load balancing video 2</a><br> - * <div style="display:none" id="vector-video-16"> - * <video id="vid16" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_dlb2.mp4" type="video/mp4"></video> - * </div> - * <a href="#" onclick="hide_show('vector-video-17')" >Simulation countour prospective 1</a><br> - * <div style="display:none" id="vector-video-17"> - * <video id="vid17" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_zoom.mp4" type="video/mp4"></video> - * </div> - * <a href="#" onclick="hide_show('vector-video-18')" >Simulation countour prospective 2</a><br> - * <div style="display:none" id="vector-video-18"> - * <video id="vid18" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_back.mp4" type="video/mp4"></video> - * </div> - * <a href="#" onclick="hide_show('vector-video-19')" >Simulation countour prospective 3</a><br> - * <div style="display:none" id="vector-video-19"> - * <video id="vid19" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_all.mp4" type="video/mp4"></video> + * <video id="vid15" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_gpu3.mp4" type="video/mp4"></video> * </div> * \endhtmlonly * - * \htmlonly - * <img src="http://ppmcore.mpi-cbg.de/web/images/examples/7_SPH_dlb/dam_break_all.jpg"/> - * \endhtmlonly - * - * ## GPU ## {#e7_sph_inclusion} - * - * This example does not differ from the example in \ref{SPH_dlb} * - * \snippet Vector/7_SPH_dlb_gpu/main.cpp inclusion + * \snippet Vector/7_SPH_dlb_gpu_opt/main.cpp inclusion * */ @@ -550,7 +525,7 @@ __global__ void verlet_int_gpu(vector_dist_type vd, real_number dt, real_number // Check if the particle go out of range in space and in density if (vd.getPos(a)[0] < 0.000263878 || vd.getPos(a)[1] < 0.000263878 || vd.getPos(a)[2] < 0.000263878 || - vd.getPos(a)[0] > 0.000263878+1.59947 || vd.getPos(a)[1] > 0.000263878+0.672972 || vd.getPos(a)[2] > 0.000263878+0.903944 || + vd.getPos(a)[0] > 0.000263878+1.59947 || vd.getPos(a)[1] > 0.000263878+0.672972 || vd.getPos(a)[2] > 0.50 || vd.template getProp<rho>(a) < RhoMin || vd.template getProp<rho>(a) > RhoMax) {vd.template getProp<red>(a) = 1;} else @@ -629,7 +604,7 @@ __global__ void euler_int_gpu(vector_type vd,real_number dt, real_number dt205) // Check if the particle go out of range in space and in density if (vd.getPos(a)[0] < 0.000263878 || vd.getPos(a)[1] < 0.000263878 || vd.getPos(a)[2] < 0.000263878 || - vd.getPos(a)[0] > 0.000263878+1.59947 || vd.getPos(a)[1] > 0.000263878+0.672972 || vd.getPos(a)[2] > 0.000263878+0.903944 || + vd.getPos(a)[0] > 0.000263878+1.59947 || vd.getPos(a)[1] > 0.000263878+0.672972 || vd.getPos(a)[2] > 0.50 || vd.template getProp<rho>(a) < RhoMin || vd.template getProp<rho>(a) > RhoMax) {vd.template getProp<red>(a) = 1;} else diff --git a/example/Vector/7_SPH_dlb_gpu_opt/main.cu b/example/Vector/7_SPH_dlb_gpu_opt/main.cu index 95bb5db2bbdf3c8052350acbd639a424caab64ff..b0177e4612e5b1ee5f98fe17c8acbe0cfbd54ea6 100644 --- a/example/Vector/7_SPH_dlb_gpu_opt/main.cu +++ b/example/Vector/7_SPH_dlb_gpu_opt/main.cu @@ -1,57 +1,37 @@ -/*! - * \page Vector_7_sph_dlb_gpu Vector 7 SPH Dam break simulation with Dynamic load balacing on GPU +/*! \page Vector_7_sph_dlb_gpu_opt Vector 7 SPH Dam break simulation with Dynamic load balacing on Multi-GPU (optimized version) * * * [TOC] * * - * # SPH with Dynamic load Balancing on GPU # {#SPH_dlb_gpu} + * # SPH with Dynamic load Balancing on GPU # {#SPH_dlb_gpu_opt} * * - * This example show the classical SPH Dam break simulation with Load Balancing and Dynamic load balancing. With - * Load balancing and Dynamic load balancing we indicate the possibility of the system to re-adapt the domain - * decomposition to keep all the processor load and reduce idle time. + * This example show the classical SPH Dam break simulation with load balancing and dynamic load balancing. The main difference with + * \ref{SPH_dlb} is that here we use GPU and 1.2 Millions particles. Simulate 1.5 second should be duable on a 1050Ti within a couple + * of hours. * * \htmlonly * <a href="#" onclick="hide_show('vector-video-3')" >Simulation video 1</a><br> * <div style="display:none" id="vector-video-3"> - * <video id="vid3" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_speed.mp4" type="video/mp4"></video> + * <video id="vid3" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_gpu1.mp4" type="video/mp4"></video> * </div> * <a href="#" onclick="hide_show('vector-video-4')" >Simulation video 2</a><br> * <div style="display:none" id="vector-video-4"> - * <video id="vid4" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_speed2.mp4" type="video/mp4"></video> + * <video id="vid4" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_gpu2.mp4" type="video/mp4"></video> * </div> - * <a href="#" onclick="hide_show('vector-video-15')" >Simulation dynamic load balancing video 1</a><br> + * <a href="#" onclick="hide_show('vector-video-15')" >Simulation video 3</a><br> * <div style="display:none" id="vector-video-15"> - * <video id="vid15" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_dlb.mp4" type="video/mp4"></video> - * </div> - * <a href="#" onclick="hide_show('vector-video-16')" >Simulation dynamic load balancing video 2</a><br> - * <div style="display:none" id="vector-video-16"> - * <video id="vid16" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_dlb2.mp4" type="video/mp4"></video> - * </div> - * <a href="#" onclick="hide_show('vector-video-17')" >Simulation countour prospective 1</a><br> - * <div style="display:none" id="vector-video-17"> - * <video id="vid17" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_zoom.mp4" type="video/mp4"></video> - * </div> - * <a href="#" onclick="hide_show('vector-video-18')" >Simulation countour prospective 2</a><br> - * <div style="display:none" id="vector-video-18"> - * <video id="vid18" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_back.mp4" type="video/mp4"></video> - * </div> - * <a href="#" onclick="hide_show('vector-video-19')" >Simulation countour prospective 3</a><br> - * <div style="display:none" id="vector-video-19"> - * <video id="vid19" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_all.mp4" type="video/mp4"></video> + * <video id="vid15" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_gpu3.mp4" type="video/mp4"></video> * </div> * \endhtmlonly * - * \htmlonly - * <img src="http://ppmcore.mpi-cbg.de/web/images/examples/7_SPH_dlb/dam_break_all.jpg"/> - * \endhtmlonly * * ## GPU ## {#e7_sph_inclusion} * - * This example does not differ from the example in \ref{SPH_dlb} + * This example is the port on GPU of the following example \ref{SPH_dlb} * - * \snippet Vector/7_SPH_dlb_gpu/main.cpp inclusion + * \snippet Vector/7_SPH_dlb_gpu_opt/main.cpp inclusion * */ @@ -163,7 +143,7 @@ const int red = 8; const int red2 = 9; // Type of the vector containing particles -typedef vector_dist_gpu<3,real_number,aggregate<size_t,real_number, real_number, real_number, real_number, real_number[3], real_number[3], real_number[3], real_number, real_number>> particles; +typedef vector_dist_gpu<3,real_number,aggregate<unsigned int,real_number, real_number, real_number, real_number, real_number[3], real_number[3], real_number[3], real_number, real_number>> particles; // | | | | | | | | | | // | | | | | | | | | | // type density density Pressure delta force velocity velocity reduction another @@ -509,8 +489,8 @@ __global__ void verlet_int_gpu(vector_dist_type vd, real_number dt, real_number vd.template getProp<rho>(a) = vd.template getProp<rho_prev>(a) + dt2*vd.template getProp<drho>(a); // Check if the particle go out of range in space and in density - if (vd.getPos(a)[0] < 0.000263878 || vd.getPos(a)[1] < 0.000263878 || vd.getPos(a)[2] < 0.000263878 || - vd.getPos(a)[0] > 0.000263878+1.59947 || vd.getPos(a)[1] > 0.000263878+0.672972 || vd.getPos(a)[2] > 0.000263878+0.903944 || + if (vd.getPos(a)[0] < 0.0 || vd.getPos(a)[1] < 0.0 || vd.getPos(a)[2] < 0.0 || + vd.getPos(a)[0] > 1.61 || vd.getPos(a)[1] > 0.68 || vd.getPos(a)[2] > 0.50 || vd.template getProp<rho>(a) < RhoMin || vd.template getProp<rho>(a) > RhoMax) {vd.template getProp<red>(a) = 1;} else @@ -588,8 +568,8 @@ __global__ void euler_int_gpu(vector_type vd,real_number dt, real_number dt205) vd.template getProp<rho>(a) = vd.template getProp<rho>(a) + dt*vd.template getProp<drho>(a); // Check if the particle go out of range in space and in density - if (vd.getPos(a)[0] < 0.000263878 || vd.getPos(a)[1] < 0.000263878 || vd.getPos(a)[2] < 0.000263878 || - vd.getPos(a)[0] > 0.000263878+1.59947 || vd.getPos(a)[1] > 0.000263878+0.672972 || vd.getPos(a)[2] > 0.000263878+0.903944 || + if (vd.getPos(a)[0] < 0.0 || vd.getPos(a)[1] < 0.0 || vd.getPos(a)[2] < 0.0 || + vd.getPos(a)[0] > 1.61 || vd.getPos(a)[1] > 0.68 || vd.getPos(a)[2] > 0.50 || vd.template getProp<rho>(a) < RhoMin || vd.template getProp<rho>(a) > RhoMax) {vd.template getProp<red>(a) = 1;} else @@ -718,8 +698,8 @@ int main(int argc, char* argv[]) probes.add({0.754,0.31,0.02}); // Here we define our domain a 2D box with internals from 0 to 1.0 for x and y - Box<3,real_number> domain({-0.05,-0.05,-0.05},{1.7010,0.7065,0.5025}); - size_t sz[3] = {413,179,131}; + Box<3,real_number> domain({-0.05,-0.05,-0.05},{1.7010,0.7065,0.511}); + size_t sz[3] = {413,179,133}; // Fill W_dap W_dap = 1.0/Wab(H/1.5); @@ -939,7 +919,7 @@ int main(int argc, char* argv[]) vd.deviceToHostPos(); vd.deviceToHostProp<type,rho,rho_prev,Pressure,drho,force,velocity,velocity_prev,red,red2>(); - vd.write_frame("Geometry",write); + vd.write_frame("Geometry",write,VTK_WRITER | FORMAT_BINARY); write++; if (v_cl.getProcessUnitID() == 0) diff --git a/install b/install index 2cb62bd9e016043d766c6821c0b9742def07e595..2d3eb3ccd88d5f77347bda1a1adb9753dc170d2d 100755 --- a/install +++ b/install @@ -411,7 +411,7 @@ fi ### Create example.mk install_base=$(cat install_dir) -openmp_flags="$(cat openmp_flags) $(cat openfpm_flags)" +openmp_flags="$(cat openmp_flags)" if [ -d "$i_dir/HDF5/lib" ]; then hdf5_lib=$i_dir/HDF5/lib @@ -419,7 +419,7 @@ elif [ -d "$i_dir/HDF5/lib64" ]; then hdf5_lib=$i_dir/HDF5/lib64 fi -echo "INCLUDE_PATH= $openmp_flags -I. -I$install_base/openfpm_numerics/include -I$install_base/openfpm_pdata/include/config -I$install_base/openfpm_pdata/include -I$install_base/openfpm_data/include -I$install_base/openfpm_vcluster/include -I$install_base/openfpm_io/include -I$install_base/openfpm_devices/include -I$i_dir/METIS/include -I$i_dir/PARMETIS/include -I$i_dir/BOOST/include -I$i_dir/HDF5/include -I$i_dir/LIBHILBERT/include $lin_alg_inc" > example.mk +echo "INCLUDE_PATH=-Wno-deprecated-declarations $openmp_flags -I. -I$install_base/openfpm_numerics/include -I$install_base/openfpm_pdata/include/config -I$install_base/openfpm_pdata/include -I$install_base/openfpm_data/include -I$install_base/openfpm_vcluster/include -I$install_base/openfpm_io/include -I$install_base/openfpm_devices/include -I$i_dir/METIS/include -I$i_dir/PARMETIS/include -I$i_dir/BOOST/include -I$i_dir/HDF5/include -I$i_dir/LIBHILBERT/include $lin_alg_inc" > example.mk echo "LIBS_PATH= $openmp_flags -L$install_base/openfpm_devices/lib -L$install_base/openfpm_pdata/lib -L$install_base/openfpm_vcluster/lib -L$i_dir/METIS/lib -L$i_dir/PARMETIS/lib -L$i_dir/BOOST/lib -L$hdf5_lib -L$i_dir/LIBHILBERT/lib $lin_alg_dir" >> example.mk if [ x"$gpu_support" == x"1" ]; then echo "LIBS=-lvcluster -lofpm_pdata -lofpmmemory -lparmetis -lmetis -lboost_iostreams -lhdf5 -llibhilbert $(cat cuda_lib) $lin_alg_lib" >> example.mk @@ -428,7 +428,7 @@ else echo "LIBS=-lvcluster -lofpm_pdata -lofpmmemory -lparmetis -lmetis -lboost_iostreams -lhdf5 -llibhilbert $lin_alg_lib" >> example.mk echo "LIBS_SE2=-lvcluster -lofpmmemory_se2 -lparmetis -lmetis -lboost_iostreams -lhdf5 -llibhilbert $lin_alg_lib" >> example.mk fi -echo "INCLUDE_PATH_NVCC=$(cat openmp_flags) -Xcudafe \"--display_error_number --diag_suppress=2885 --diag_suppress=2887 --diag_suppress=2888 --diag_suppress=186 --diag_suppress=111\" --expt-extended-lambda -I. -I$install_base/openfpm_numerics/include -I$install_base/openfpm_pdata/include/config -I$install_base/openfpm_pdata/include -I$install_base/openfpm_data/include -I$install_base/openfpm_vcluster/include -I$install_base/openfpm_io/include -I$install_base/openfpm_devices/include -I$i_dir/METIS/include -I$i_dir/PARMETIS/include -I$i_dir/BOOST/include -I$i_dir/HDF5/include -I$i_dir/LIBHILBERT/include $lin_alg_inc" >> example.mk +echo "INCLUDE_PATH_NVCC=-Xcompiler="-Wno-deprecated-declarations" $(cat openmp_flags) -Xcudafe \"--display_error_number --diag_suppress=2885 --diag_suppress=2886 --diag_suppress=2887 --diag_suppress=2888 --diag_suppress=186 --diag_suppress=111\" --expt-extended-lambda -I. -I$install_base/openfpm_numerics/include -I$install_base/openfpm_pdata/include/config -I$install_base/openfpm_pdata/include -I$install_base/openfpm_data/include -I$install_base/openfpm_vcluster/include -I$install_base/openfpm_io/include -I$install_base/openfpm_devices/include -I$i_dir/METIS/include -I$i_dir/PARMETIS/include -I$i_dir/BOOST/include -I$i_dir/HDF5/include -I$i_dir/LIBHILBERT/include $lin_alg_inc" >> example.mk cp example.mk src/example.mk cp example.mk example/example.mk diff --git a/openfpm_pdata.doc b/openfpm_pdata.doc index e356c1f644c459d47927b845e8cb1a8d06b43103..0c4e8ff295dd62a02fb6a5039839908f1de63d93 100644 --- a/openfpm_pdata.doc +++ b/openfpm_pdata.doc @@ -38,7 +38,7 @@ PROJECT_NAME = "OpenFPM_pdata" # could be handy for archiving the generated documentation or if some version # control system is used. -PROJECT_NUMBER = 1.1.0 +PROJECT_NUMBER = 2.0.0 # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a @@ -763,7 +763,8 @@ INPUT_ENCODING = UTF-8 # *.md, *.mm, *.dox, *.py, *.f90, *.f, *.for, *.tcl, *.vhd, *.vhdl, *.ucf, # *.qsf, *.as and *.js. -FILE_PATTERNS = *.hpp *.cpp *.h +FILE_PATTERNS = *.cu *.cuh *.hpp *.cpp *.h +EXTENSION_MAPPING = cu=c++ cuh=c++ # The RECURSIVE tag can be used to specify whether or not subdirectories should # be searched for input files as well. @@ -1927,6 +1928,7 @@ INCLUDE_PATH = INCLUDE_FILE_PATTERNS = + # The PREDEFINED tag can be used to specify one or more macro names that are # defined before the preprocessor is started (similar to the -D option of e.g. # gcc). The argument of the tag is a list of macros of the form: name or diff --git a/script/install_Parmetis.sh b/script/install_Parmetis.sh index 91b8ec0d8ecf02e65cd083ef3d3cd0cde0d31d5a..e155f7608b5b798cb418231b4717faba1939e54b 100755 --- a/script/install_Parmetis.sh +++ b/script/install_Parmetis.sh @@ -12,6 +12,7 @@ fi ## Remove old download rm -rf parmetis-4.0.3 +rm parmetis-4.0.3.tar.gz wget http://glaros.dtc.umn.edu/gkhome/fetch/sw/parmetis/parmetis-4.0.3.tar.gz tar -xf parmetis-4.0.3.tar.gz diff --git a/src/Decomposition/Domain_NN_calculator_cart.hpp b/src/Decomposition/Domain_NN_calculator_cart.hpp index 3f2c144e16b5048768a6ff47e3230b5f50947568..d18180432cf165cea8ecc9e4d23924b8610a563c 100644 --- a/src/Decomposition/Domain_NN_calculator_cart.hpp +++ b/src/Decomposition/Domain_NN_calculator_cart.hpp @@ -217,8 +217,8 @@ class domain_nn_calculator_cart anom_lin.clear(); for (size_t i = 0 ; i < anom.size() ; i++) { - anom_lin.add(); grid_key_dx<dim> tmp = anom.get(i).subsub + shift; + anom_lin.add(); anom_lin.last().subsub = gs.LinId(tmp); long int self_cell = -1; @@ -281,7 +281,7 @@ public: dom_cells_lin.clear(); for (size_t i = 0 ; i < dom_cells.size() ; i++) { - grid_key_dx<dim> tmp = dom_cells.get(i) + shift; + grid_key_dx<dim> tmp = dom_cells.get(i) + shift; dom_cells_lin.add(gs.LinId(tmp)); } diff --git a/src/Vector/cuda/vector_dist_comm_util_funcs.cuh b/src/Vector/cuda/vector_dist_comm_util_funcs.cuh index 841232874a804d72517edad36013ca6fd6d8aea6..3b6aba61774ce98cec1084beac71c3aafc904ed8 100644 --- a/src/Vector/cuda/vector_dist_comm_util_funcs.cuh +++ b/src/Vector/cuda/vector_dist_comm_util_funcs.cuh @@ -277,7 +277,7 @@ struct local_ghost_from_dec_impl<dim,St,prop,Memory,layout_base,true> <<<ite.wthr,ite.thr>>> (box_f_dev.toKernel(),box_f_sv.toKernel(), v_pos.toKernel(),v_prp.toKernel(), - starts.toKernel(),shifts.toKernel(),o_part_loc.toKernel(),old); + starts.toKernel(),shifts.toKernel(),o_part_loc.toKernel(),old,g_m); #else std::cout << __FILE__ << ":" << __LINE__ << " error: to use the option RUN_ON_DEVICE you must compile with NVCC" << std::endl; diff --git a/src/Vector/cuda/vector_dist_cuda_func_test.cu b/src/Vector/cuda/vector_dist_cuda_func_test.cu index 3219e05cfa45c424e2e818a167a4d33c1a54d6bc..d78b4a294d7d3f6b7f59421893a2e4db9ecd213d 100644 --- a/src/Vector/cuda/vector_dist_cuda_func_test.cu +++ b/src/Vector/cuda/vector_dist_cuda_func_test.cu @@ -166,7 +166,7 @@ BOOST_AUTO_TEST_CASE( vector_ghost_process_local_particles ) <<<ite.wthr,ite.thr>>> (box_f_dev.toKernel(),box_f_sv.toKernel(), v_pos.toKernel(),v_prp.toKernel(), - starts.toKernel(),shifts.toKernel(),o_part_loc2.toKernel(),old); + starts.toKernel(),shifts.toKernel(),o_part_loc2.toKernel(),old,v_pos.size()); v_pos.deviceToHost<0>(); o_part_loc2.deviceToHost<0,1>(); diff --git a/src/Vector/cuda/vector_dist_cuda_funcs.cuh b/src/Vector/cuda/vector_dist_cuda_funcs.cuh index 74b42c0a827161523e21a0a89d10cc3d5f383890..76a52e4805e194cab4b6143ac262598397c278ae 100644 --- a/src/Vector/cuda/vector_dist_cuda_funcs.cuh +++ b/src/Vector/cuda/vector_dist_cuda_funcs.cuh @@ -222,12 +222,12 @@ template<unsigned int dim, typename St, __global__ void shift_ghost_each_part(vector_of_box box_f, vector_of_shifts box_f_sv, vector_type_pos v_pos, vector_type_prp v_prp, start_type start, shifts_type shifts, - output_type output, unsigned int offset) + output_type output, unsigned int offset,unsigned int g_m) { unsigned int old_shift = (unsigned int)-1; int p = threadIdx.x + blockIdx.x * blockDim.x; - if (p >= v_pos.size()) return; + if (p >= g_m) return; Point<dim,St> xp = v_pos.template get<0>(p); @@ -251,15 +251,8 @@ __global__ void shift_ghost_each_part(vector_of_box box_f, vector_of_shifts box_ v_pos.template get<0>(base+n)[j] = xp.get(j) - shifts.template get<0>(shift_actual)[j]; } - if (base_o + n < output.size()) - { - output.template get<0>(base_o+n) = p; - output.template get<1>(base_o+n) = shift_actual; - } - else - { - printf("OVERFLOW \n"); - } + output.template get<0>(base_o+n) = p; + output.template get<1>(base_o+n) = shift_actual; v_prp.set(base+n,v_prp.get(p)); @@ -290,7 +283,7 @@ struct _max_: mgpu::maximum_t<red_type> {}; template<unsigned int prp, template <typename> class op, typename vector_type> -auto reduce(vector_type & vd) -> typename std::remove_reference<decltype(vd.template getProp<prp>(0))>::type +auto reduce_local(vector_type & vd) -> typename std::remove_reference<decltype(vd.template getProp<prp>(0))>::type { typedef typename std::remove_reference<decltype(vd.template getProp<prp>(0))>::type reduce_type; diff --git a/src/Vector/cuda/vector_dist_gpu_unit_tests.cu b/src/Vector/cuda/vector_dist_gpu_unit_tests.cu index 2be62fa2334642f29e0eb0ea08cb2e3f1ea70cab..8bb80a1e34c1968498fbfa79d7db29c97e3774a9 100644 --- a/src/Vector/cuda/vector_dist_gpu_unit_tests.cu +++ b/src/Vector/cuda/vector_dist_gpu_unit_tests.cu @@ -736,20 +736,20 @@ BOOST_AUTO_TEST_CASE(vector_dist_reduce) vd.template hostToDeviceProp<0,1,2,3>(); - float redf = reduce<0,_add_>(vd); - double redd = reduce<1,_add_>(vd); - int redi = reduce<2,_add_>(vd); - size_t reds = reduce<3,_add_>(vd); + float redf = reduce_local<0,_add_>(vd); + double redd = reduce_local<1,_add_>(vd); + int redi = reduce_local<2,_add_>(vd); + size_t reds = reduce_local<3,_add_>(vd); BOOST_REQUIRE_EQUAL(redf,(vd.size_local()+1.0)*(vd.size_local())/2.0); BOOST_REQUIRE_EQUAL(redd,(vd.size_local()+1.0)*(vd.size_local())/2.0); BOOST_REQUIRE_EQUAL(redi,(vd.size_local()+1)*(vd.size_local())/2); BOOST_REQUIRE_EQUAL(reds,(vd.size_local()+1)*(vd.size_local())/2); - float redf2 = reduce<0,_max_>(vd); - double redd2 = reduce<1,_max_>(vd); - int redi2 = reduce<2,_max_>(vd); - size_t reds2 = reduce<3,_max_>(vd); + float redf2 = reduce_local<0,_max_>(vd); + double redd2 = reduce_local<1,_max_>(vd); + int redi2 = reduce_local<2,_max_>(vd); + size_t reds2 = reduce_local<3,_max_>(vd); BOOST_REQUIRE_EQUAL(redf2,vd.size_local()); BOOST_REQUIRE_EQUAL(redd2,vd.size_local()); diff --git a/src/Vector/vector_dist_comm.hpp b/src/Vector/vector_dist_comm.hpp index 3f5a9c464175ff5cde6c9dab6cd3fa8604da9054..45fa45e77f4373024c6dcb06641cf284368211fd 100644 --- a/src/Vector/vector_dist_comm.hpp +++ b/src/Vector/vector_dist_comm.hpp @@ -1567,6 +1567,12 @@ public: */ template<unsigned int ... prp> void map_list_(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp, size_t & g_m, size_t opt) { + if (opt & RUN_ON_DEVICE) + { + std::cout << "Error: " << __FILE__ << ":" << __LINE__ << " map_list is unsupported on device (coming soon)" << std::endl; + return; + } + typedef KillParticle obp; // Processor communication size