diff --git a/openfpm_data b/openfpm_data index 151921ecf7d72831724e2a2faed2c1a4ec1b8984..34d9f7e2d5816486416544727369312f77dad4d9 160000 --- a/openfpm_data +++ b/openfpm_data @@ -1 +1 @@ -Subproject commit 151921ecf7d72831724e2a2faed2c1a4ec1b8984 +Subproject commit 34d9f7e2d5816486416544727369312f77dad4d9 diff --git a/src/Vector/cuda/vector_dist_comm_util_funcs.cuh b/src/Vector/cuda/vector_dist_comm_util_funcs.cuh index 1ae16b0d563a60a19de853058f069cc9af3723f5..4c1e87f4f02da17e2c293557b4f671b96632c6e1 100644 --- a/src/Vector/cuda/vector_dist_comm_util_funcs.cuh +++ b/src/Vector/cuda/vector_dist_comm_util_funcs.cuh @@ -8,6 +8,8 @@ #ifndef VECTOR_DIST_COMM_UTIL_FUNCS_HPP_ #define VECTOR_DIST_COMM_UTIL_FUNCS_HPP_ +#include "util/cuda/scan_ofp.cuh" + #define SKIP_LABELLING 512 #define KEEP_PROPERTIES 512 @@ -92,7 +94,7 @@ struct labelParticlesGhost_impl<dim,St,prop,Memory,layout_base,Decomposition,tru // scan //sc.scan_(proc_id_out,starts); starts.resize(proc_id_out.size()); - mgpu::scan((unsigned int *)proc_id_out.template getDeviceBuffer<0>(), proc_id_out.size(), (unsigned int *)starts.template getDeviceBuffer<0>() , v_cl.getmgpuContext()); + openfpm::scan((unsigned int *)proc_id_out.template getDeviceBuffer<0>(), proc_id_out.size(), (unsigned int *)starts.template getDeviceBuffer<0>() , v_cl.getmgpuContext()); starts.template deviceToHost<0>(starts.size()-1,starts.size()-1); size_t sz = starts.template get<0>(starts.size()-1); @@ -265,7 +267,7 @@ struct local_ghost_from_dec_impl<dim,St,prop,Memory,layout_base,true> box_f_dev.toKernel(),box_f_sv.toKernel(),v_pos.toKernel(),o_part_loc.toKernel(),g_m); starts.resize(o_part_loc.size()); - mgpu::scan((unsigned int *)o_part_loc.template getDeviceBuffer<0>(), o_part_loc.size(), (unsigned int *)starts.template getDeviceBuffer<0>() , v_cl.getmgpuContext()); + openfpm::scan((unsigned int *)o_part_loc.template getDeviceBuffer<0>(), o_part_loc.size(), (unsigned int *)starts.template getDeviceBuffer<0>() , v_cl.getmgpuContext()); starts.template deviceToHost<0>(starts.size()-1,starts.size()-1); size_t total = starts.template get<0>(starts.size()-1); diff --git a/src/Vector/cuda/vector_dist_cuda_func_test.cu b/src/Vector/cuda/vector_dist_cuda_func_test.cu index 8d49cd59b78ed35e22f6d78c31dff433addd95c3..799cc528f1ebc58444f1c60ffe51d77dfa3153e4 100644 --- a/src/Vector/cuda/vector_dist_cuda_func_test.cu +++ b/src/Vector/cuda/vector_dist_cuda_func_test.cu @@ -11,6 +11,7 @@ #include "util/cuda/scan_cuda.cuh" #include "util/cuda/moderngpu/kernel_scan.hxx" #include "Vector/vector_dist.hpp" +#include "util/cuda/scan_ofp.cuh" #define SUB_UNIT_FACTOR 1024 @@ -129,7 +130,7 @@ BOOST_AUTO_TEST_CASE( vector_ghost_process_local_particles ) starts.resize(o_part_loc.size()); auto & v_cl = create_vcluster(); - mgpu::scan((unsigned int *)o_part_loc.template getDeviceBuffer<0>(), o_part_loc.size(), (unsigned int *)starts.template getDeviceBuffer<0>() , v_cl.getmgpuContext()); + openfpm::scan((unsigned int *)o_part_loc.template getDeviceBuffer<0>(), o_part_loc.size(), (unsigned int *)starts.template getDeviceBuffer<0>() , v_cl.getmgpuContext()); starts.deviceToHost<0>(starts.size()-1,starts.size()-1); size_t tot = starts.template get<0>(o_part_loc.size()-1); diff --git a/src/Vector/vector_dist_comm.hpp b/src/Vector/vector_dist_comm.hpp index 7c30dbd3c8fe31e1d9f9361244c6931c47bea48f..050fc8af2d2123d1072c2b19955efd81e1ddfc36 100644 --- a/src/Vector/vector_dist_comm.hpp +++ b/src/Vector/vector_dist_comm.hpp @@ -18,6 +18,7 @@ #include "Vector/util/vector_dist_funcs.hpp" #include "cuda/vector_dist_comm_util_funcs.cuh" +#include "util/cuda/scan_ofp.cuh" #define NO_POSITION 1 #define WITH_POSITION 2 @@ -1139,7 +1140,7 @@ class vector_dist_comm #else starts.resize(v_cl.size()); - mgpu::scan((unsigned int *)prc_sz.template getDeviceBuffer<0>(), prc_sz.size(), (unsigned int *)starts.template getDeviceBuffer<0>() , v_cl.getmgpuContext()); + openfpm::scan((unsigned int *)prc_sz.template getDeviceBuffer<0>(), prc_sz.size(), (unsigned int *)starts.template getDeviceBuffer<0>() , v_cl.getmgpuContext()); // move prc_sz to host prc_sz.template deviceToHost<0>();