diff --git a/configure b/configure index 0e7884f4107915ca58ece3868ed242221bed5bbc..dbbe9ce0434e465ec5400afe864970324b84e916 100755 --- a/configure +++ b/configure @@ -466,6 +466,9 @@ do boost) conf_options="$conf_options -DBOOST_ROOT=$ac_optarg" ;; + mpivendor) + conf_options="$conf_options -DMPI_VENDOR=$ac_optarg" + ;; *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--with-$ac_useropt_orig" ac_unrecognized_sep=', ';; esac diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 6921993394b17b3c9b2576f31e452a5def25c721..7f66b0f9d7f7f10381244c4e9bc18324ff2d3a85 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -55,6 +55,7 @@ target_include_directories (ofpm_pdata PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/config target_include_directories (ofpm_pdata PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../openfpm_devices/src/) target_include_directories (ofpm_pdata PUBLIC ${Boost_INCLUDE_DIRS}) +target_compile_definitions(pdata PRIVATE ${MPI_VENDOR}) if(PETSC_FOUND) target_link_libraries(pdata ${PETSC_LIBRARIES}) diff --git a/src/Vector/cuda/vector_dist_comm_util_funcs.cuh b/src/Vector/cuda/vector_dist_comm_util_funcs.cuh index 3b6aba61774ce98cec1084beac71c3aafc904ed8..05ef321c10fbba0e87bbb08cb4c390cf22b7ee19 100644 --- a/src/Vector/cuda/vector_dist_comm_util_funcs.cuh +++ b/src/Vector/cuda/vector_dist_comm_util_funcs.cuh @@ -8,6 +8,9 @@ #ifndef VECTOR_DIST_COMM_UTIL_FUNCS_HPP_ #define VECTOR_DIST_COMM_UTIL_FUNCS_HPP_ +#define SKIP_LABELLING 512 +#define KEEP_PROPERTIES 512 + template<unsigned int dim, typename St, typename prop, typename Memory, template<typename> class layout_base, typename Decomposition, typename scan_type, bool is_ok_cuda> struct labelParticlesGhost_impl { @@ -195,7 +198,11 @@ struct local_ghost_from_opart_impl<with_pos,dim,St,prop,Memory,layout_base,true> size_t old = v_pos.size(); v_pos.resize(v_pos.size() + o_part_loc.size(),DATA_ON_DEVICE); - v_prp.resize(v_prp.size() + o_part_loc.size(),DATA_ON_DEVICE); + + if (!(opt & SKIP_LABELLING)) + { + v_prp.resize(v_prp.size() + o_part_loc.size(),DATA_ON_DEVICE); + } process_ghost_particles_local<with_pos,dim,decltype(o_part_loc.toKernel()),decltype(v_pos.toKernel()),decltype(v_prp.toKernel()),decltype(shifts.toKernel())> <<<ite.wthr,ite.thr>>> diff --git a/src/Vector/cuda/vector_dist_gpu_unit_tests.cu b/src/Vector/cuda/vector_dist_gpu_unit_tests.cu index b3ff5dd8da87a6ab94f108c9164bf104f1dc80a3..f6c411f1f9045a82a80e0c438406ab6425be2c7f 100644 --- a/src/Vector/cuda/vector_dist_gpu_unit_tests.cu +++ b/src/Vector/cuda/vector_dist_gpu_unit_tests.cu @@ -970,9 +970,6 @@ BOOST_AUTO_TEST_CASE(vector_dist_keep_prop_on_cuda) vd.deviceToHostPos(); vd.template deviceToHostProp<0>(); - // Get the neighborhood of each particles - - auto VV = vd.getVerlet(0.01); // store the number of neighborhood for each particles @@ -1036,12 +1033,11 @@ BOOST_AUTO_TEST_CASE(vector_dist_keep_prop_on_cuda) for (size_t i = 0 ; i < 25 ; i++) { - // move particles to CPU and move the particles by 0.1 - - vd.deviceToHostPos(); - if (i % 2 == 0) { + // move particles to CPU and move the particles by 0.1 + + vd.deviceToHostPos(); auto it = vd.getDomainIterator(); @@ -1091,6 +1087,7 @@ BOOST_AUTO_TEST_CASE(vector_dist_keep_prop_on_cuda) } else { + vd.template deviceToHostProp<0,1,2>(); auto it2 = vd.getDomainIterator(); @@ -1118,6 +1115,8 @@ BOOST_AUTO_TEST_CASE(vector_dist_keep_prop_on_cuda) ++it2; } + vd.template hostToDeviceProp<0,1,2>(); + ++base; vd.template ghost_get<0,1,2>(RUN_ON_DEVICE | KEEP_PROPERTIES); diff --git a/src/Vector/vector_dist_comm.hpp b/src/Vector/vector_dist_comm.hpp index 45fa45e77f4373024c6dcb06641cf284368211fd..15584eea2f1e391ee5b715aac3e0511eefd1a622 100644 --- a/src/Vector/vector_dist_comm.hpp +++ b/src/Vector/vector_dist_comm.hpp @@ -20,9 +20,6 @@ #include "Vector/util/vector_dist_funcs.hpp" #include "cuda/vector_dist_comm_util_funcs.cuh" -#define SKIP_LABELLING 512 -#define KEEP_PROPERTIES 512 - #define NO_POSITION 1 #define WITH_POSITION 2 #define NO_CHANGE_ELEMENTS 4 @@ -1493,8 +1490,16 @@ public: size_t opt_ = compute_options(opt); if (opt & SKIP_LABELLING) { - op_ssend_gg_recv_merge opm(g_m); - v_cl.template SSendRecvP_op<op_ssend_gg_recv_merge,send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_g_opart,opm,prc_recv_get,recv_sz_get,opt_); + if (opt & RUN_ON_DEVICE) + { + op_ssend_gg_recv_merge_run_device opm(g_m); + v_cl.template SSendRecvP_op<op_ssend_gg_recv_merge_run_device,send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_g_opart,opm,prc_recv_get,recv_sz_get,opt_); + } + else + { + op_ssend_gg_recv_merge opm(g_m); + v_cl.template SSendRecvP_op<op_ssend_gg_recv_merge,send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_g_opart,opm,prc_recv_get,recv_sz_get,opt_); + } } else {v_cl.template SSendRecvP<send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_g_opart,prc_recv_get,recv_sz_get,recv_sz_get_byte,opt_);}