diff --git a/example/Vector/7_SPH_dlb_gpu_opt/main.cu b/example/Vector/7_SPH_dlb_gpu_opt/main.cu index 6c93494f0673132e8750ec5b09487362f10808f8..fa6c0ea2a3fd58de0fa3630b909e8f3e690dab57 100644 --- a/example/Vector/7_SPH_dlb_gpu_opt/main.cu +++ b/example/Vector/7_SPH_dlb_gpu_opt/main.cu @@ -482,7 +482,7 @@ __global__ void verlet_int_gpu(vector_dist_type vd, real_number dt, real_number return; } - //-Calculate displacement and update position / Calcula desplazamiento y actualiza posicion. + //-Calculate displacement and update position real_number dx = vd.template getProp<velocity>(a)[0]*dt + vd.template getProp<force>(a)[0]*dt205; real_number dy = vd.template getProp<velocity>(a)[1]*dt + vd.template getProp<force>(a)[1]*dt205; real_number dz = vd.template getProp<velocity>(a)[2]*dt + vd.template getProp<force>(a)[2]*dt205; diff --git a/openfpm_data b/openfpm_data index 02ef67f834e1f2b36f781f380e436f821c9a7945..f8ea1b875c24392c1a6991f4faa000e2981e96d1 160000 --- a/openfpm_data +++ b/openfpm_data @@ -1 +1 @@ -Subproject commit 02ef67f834e1f2b36f781f380e436f821c9a7945 +Subproject commit f8ea1b875c24392c1a6991f4faa000e2981e96d1 diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 1b1f005cdfe2aa810ddc8fe63c8c98d82e047943..909a6a881f3e399548f8da496bd706d6b1f7d54a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -127,7 +127,8 @@ if (NOT APPLE) target_link_libraries(pdata rt) endif () -install(FILES Decomposition/CartDecomposition.hpp +install(FILES Decomposition/CartDecomposition.hpp + Decomposition/Domain_icells_cart.hpp Decomposition/shift_vect_converter.hpp Decomposition/CartDecomposition_ext.hpp Decomposition/common.hpp @@ -185,6 +186,7 @@ install(FILES Vector/util/vector_dist_funcs.hpp install(FILES Vector/cuda/vector_dist_comm_util_funcs.cuh Vector/cuda/vector_dist_cuda_funcs.cuh + Vector/cuda/vector_dist_operators_list_ker.hpp DESTINATION openfpm_pdata/include/Vector/cuda ) install(FILES Graph/ids.hpp Graph/dist_map_graph.hpp diff --git a/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu b/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu index 8f0a5ced7f530d9620bf1b077d14862063b3de3b..3ef536e5b9438b34bfcd161d915d78bde45f5224 100644 --- a/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu +++ b/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu @@ -222,7 +222,7 @@ BOOST_AUTO_TEST_CASE( sgrid_gpu_test_ghost_get ) gdist.template deviceToHost<0>(); // gdist.write("broken"); - gdist.template ghost_get<0>(RUN_ON_DEVICE); +// gdist.template ghost_get<0>(RUN_ON_DEVICE); } diff --git a/src/Vector/cuda/vector_dist_cuda_funcs.cuh b/src/Vector/cuda/vector_dist_cuda_funcs.cuh index ea6b20f1b35179d28ce8229ce328f40f6122dccb..e986746e5e7f55afac8a81eea8d366c81e1075fb 100644 --- a/src/Vector/cuda/vector_dist_cuda_funcs.cuh +++ b/src/Vector/cuda/vector_dist_cuda_funcs.cuh @@ -14,6 +14,7 @@ #include "Decomposition/common.hpp" #include "lib/pdata.hpp" #include "util/cuda/kernels.cuh" +#include "util/cuda/scan_ofp.cuh" template<unsigned int dim, typename St, typename decomposition_type, typename vector_type, typename start_type, typename output_type> __global__ void proc_label_id_ghost(decomposition_type dec,vector_type vd, start_type starts, output_type out) @@ -401,4 +402,60 @@ void remove_marked(vector_type & vd) vd.getPropVector().swap(vd_prp_new); } +template<unsigned int prp, typename functor, typename particles_type, typename out_type> +__global__ void mark_indexes(particles_type vd, out_type out) +{ + auto a = GET_PARTICLE(vd); + + out.template getProp<0>(a) = functor::check(vd.template getProp<prp>(a)) == true; +} + +template<typename out_type, typename ids_type> +__global__ void fill_indexes(out_type scan, ids_type ids) +{ + unsigned int p = threadIdx.x + blockIdx.x * blockDim.x; + + if (p >= scan.size()-1) {return;} + + auto sp = scan.template get<0>(p); + auto spp = scan.template get<0>(p+1); + + if (sp != spp) + ids.template get<0>(scan.template get<0>(p)) = p; +} + +/*! \brief get the particle index that satify the functor condition + * + * This function can be used to collect the indexes of the particles of a particular type. + * Write a functor that return true when a particle of a particular type is identified + * and ids will contain the indexes for which the functor return true. + * + * \tparam prp property to pass to the functor + * + * \param vd distributed vector + * + */ +template<typename functor, typename vector_type, typename ids_type> +void get_indexes_sorted(vector_type & vd, ids_type & ids, mgpu::ofp_context_t & context) +{ + // first we do a scan of the property + openfpm::vector_gpu<aggregate<unsigned int>> scan; + + scan.setMemory(mem_tmp); + scan.resize(vd.size_local_with_ghost()+1); + + auto ite = scan.getGPUIterator(); + + CUDA_LAUNCH(mark_indexes,ite,vd.toKernel(),scan.toKernel()); + + openfpm::scan(scan.template getDeviceBuffer<0>(),scan.size(),scan.template getDeviceBuffer<0>(),context); + + // get the number of marked particles + scan.template deviceToHost<0>(scan.size()-1,scan.size()-1); + size_t nf = scan.template get<0>(scan.size()-1); + ids.resize(nf); + + CUDA_LAUNCH(fill_indexes,ite,scan.toKernel(),ids.toKernel()); +} + #endif /* VECTOR_DIST_CUDA_FUNCS_CUH_ */ diff --git a/src/Vector/cuda/vector_dist_gpu_unit_tests.cu b/src/Vector/cuda/vector_dist_gpu_unit_tests.cu index 475bbbd446bcd5e298c47491a0995d824527cb67..daa90b634e84612172478344a6809ec7a17bee14 100644 --- a/src/Vector/cuda/vector_dist_gpu_unit_tests.cu +++ b/src/Vector/cuda/vector_dist_gpu_unit_tests.cu @@ -1488,6 +1488,104 @@ BOOST_AUTO_TEST_CASE(vector_dist_keep_prop_on_cuda) } } +BOOST_AUTO_TEST_CASE(vector_dist_get_index_set) +{ + Box<3,double> domain({0.0,0.0,0.0},{1.0,1.0,1.0}); + Ghost<3,double> g(0.1); + size_t bc[3] = {PERIODIC,PERIODIC,PERIODIC}; + + if (create_vcluster().size() >= 16) + {return;} + + vector_dist_gpu<3,double,aggregate<int,double>> vdg(10000,domain,bc,g,DEC_GRAN(128)); + + auto it = vdg.getDomainIterator(); + + while (it.isNext()) + { + auto p = it.get(); + + vdg.getPos(p)[0] = (double)rand() / RAND_MAX; + vdg.getPos(p)[1] = (double)rand() / RAND_MAX; + vdg.getPos(p)[2] = (double)rand() / RAND_MAX; + + vdg.template getProp<0>(p) = (int)((double)rand() / RAND_MAX / 0.5); + + vdg.template getProp<1>(p) = (double)rand() / RAND_MAX; + + ++it; + } + + vdg.map(); + + vdg.hostToDeviceProp<0,1>(); + vdg.hostToDevicePos(); + +/* bool test = vdg.compareHostAndDevicePos(0.00001,0.00000001); + BOOST_REQUIRE_EQUAL(test,true); + + vdg.getPos(100)[0] = 0.99999999; + + test = vdg.compareHostAndDevicePos(0.00001,0.00000001); + BOOST_REQUIRE_EQUAL(test,false); + + vdg.hostToDevicePos(); + vdg.getPos(100)[0] = 0.99999999; + + test = vdg.compareHostAndDevicePos(0.00001,0.00000001); + BOOST_REQUIRE_EQUAL(test,true); + + ////////////////////////////////////////////////// PROP VECTOR + + test = vdg.compareHostAndDeviceProp<1>(0.00001,0.00000001); + BOOST_REQUIRE_EQUAL(test,true); + + vdg.getProp<1>(103)[0] = 0.99999999; + + test = vdg.compareHostAndDeviceProp<1>(0.00001,0.00000001); + BOOST_REQUIRE_EQUAL(test,false); + + vdg.hostToDeviceProp<1>(); + vdg.getProp<1>(103)[0] = 0.99999999; + + test = vdg.compareHostAndDeviceProp<1>(0.00001,0.00000001); + BOOST_REQUIRE_EQUAL(test,true); + + ////////////////////////////////////////////////// PROP scalar + + + test = vdg.compareHostAndDeviceProp<0>(0.00001,0.00000001); + BOOST_REQUIRE_EQUAL(test,true); + + vdg.getProp<0>(105) = 0.99999999; + + test = vdg.compareHostAndDeviceProp<0>(0.00001,0.00000001); + BOOST_REQUIRE_EQUAL(test,false); + + vdg.hostToDeviceProp<0>(); + vdg.getProp<0>(105) = 0.99999999; + + test = vdg.compareHostAndDeviceProp<0>(0.00001,0.00000001); + BOOST_REQUIRE_EQUAL(test,true); + + + ////////////////////////////////////////////////// PROP scalar + + + test = vdg.compareHostAndDeviceProp<2>(0.00001,0.00000001); + BOOST_REQUIRE_EQUAL(test,true); + + vdg.getProp<2>(108)[1][2] = 0.99999999; + + test = vdg.compareHostAndDeviceProp<2>(0.00001,0.00000001); + BOOST_REQUIRE_EQUAL(test,false); + + vdg.hostToDeviceProp<2>(); + vdg.getProp<2>(108)[1][2] = 0.99999999; + + test = vdg.compareHostAndDeviceProp<2>(0.00001,0.00000001); + BOOST_REQUIRE_EQUAL(test,true);*/ +} BOOST_AUTO_TEST_CASE(vector_dist_compare_host_device) { diff --git a/src/Vector/vector_dist.hpp b/src/Vector/vector_dist.hpp index 730a9c752ffb63699ab02ca5ba8c98a465d4590d..73c6c4f7963e4c8a94b60a75f0fe34038a3786b9 100644 --- a/src/Vector/vector_dist.hpp +++ b/src/Vector/vector_dist.hpp @@ -2017,6 +2017,20 @@ public: return v_pos.getGPUIteratorTo(g_m,n_thr); } + /*! \brief Get an iterator that traverse the particles in the domain + * + * \return an iterator + * + */ + ite_gpu<1> getDomainAndGhostIteratorGPU(size_t n_thr = 1024) const + { +#ifdef SE_CLASS3 + se3.getIterator(); +#endif + + return v_pos.getGPUIteratorTo(size_local(),n_thr); + } + /*! \brief Merge the properties calculated on the sorted vector on the original vector * * \parameter Cell-list from which has been constructed the sorted vector