diff --git a/openfpm_data b/openfpm_data index 6f81e335b5de00f03ad517e056925ea4ec52392f..9fa3b84aa906f694ccc410f6a8536ba77a6437be 160000 --- a/openfpm_data +++ b/openfpm_data @@ -1 +1 @@ -Subproject commit 6f81e335b5de00f03ad517e056925ea4ec52392f +Subproject commit 9fa3b84aa906f694ccc410f6a8536ba77a6437be diff --git a/openfpm_vcluster b/openfpm_vcluster index a99918127f5835c31d2df4e9020efdeb46d07d66..faa1d114c2d13e562d200c92e98c1ed7be306eeb 160000 --- a/openfpm_vcluster +++ b/openfpm_vcluster @@ -1 +1 @@ -Subproject commit a99918127f5835c31d2df4e9020efdeb46d07d66 +Subproject commit faa1d114c2d13e562d200c92e98c1ed7be306eeb diff --git a/src/Vector/vector_dist.hpp b/src/Vector/vector_dist.hpp index fd5155bcb3a9d2e6f373758224992dd8eb8259e7..6135c43db9c9e1cbc251ad8df2fe1d37f9c036af 100644 --- a/src/Vector/vector_dist.hpp +++ b/src/Vector/vector_dist.hpp @@ -130,11 +130,17 @@ struct gcl<dim,St,CellList_gen<dim, St, Process_keys_hilb,Mem_type, shift<dim, S * \tparam prop properties the vector element store in OpenFPM data structure format * \tparam Decomposition Decomposition strategy to use CartDecomposition ... * \tparam Memory Memory pool where store the information HeapMemory ... + * \tparam Memory layout * */ -template<unsigned int dim, typename St, typename prop, typename Decomposition = CartDecomposition<dim,St>, typename Memory = HeapMemory> -class vector_dist : public vector_dist_comm<dim,St,prop,Decomposition,Memory> +template<unsigned int dim, + typename St, + typename prop, + typename Decomposition = CartDecomposition<dim,St>, + typename Memory = HeapMemory, + template<typename> class layout_base = memory_traits_lin> +class vector_dist : public vector_dist_comm<dim,St,prop,Decomposition,Memory,layout_base> { public: @@ -151,11 +157,11 @@ private: //! Particle position vector, (It has 2 elements) the first has real particles assigned to a processor //! the second element contain unassigned particles - openfpm::vector<Point<dim, St>> v_pos; + openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base> v_pos; //! Particle properties vector, (It has 2 elements) the first has real particles assigned to a processor //! the second element contain unassigned particles - openfpm::vector<prop> v_prp; + openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base> v_prp; //! Virtual cluster Vcluster & v_cl; @@ -2115,4 +2121,6 @@ public: }; +template<unsigned int dim, typename St, typename prop, typename Decomposition = CartDecomposition<dim,St>> using vector_dist_gpu = vector_dist<dim,St,prop,Decomposition,CudaMemory,memory_traits_inte>; + #endif /* VECTOR_HPP_ */ diff --git a/src/Vector/vector_dist_comm.hpp b/src/Vector/vector_dist_comm.hpp index 758f51c070f79820ee183fc4cc42f760f0df9d34..b6acc16b43d927fdefe64581451f9dea404dc655 100644 --- a/src/Vector/vector_dist_comm.hpp +++ b/src/Vector/vector_dist_comm.hpp @@ -42,7 +42,12 @@ inline static size_t compute_options(size_t opt) * */ -template<unsigned int dim, typename St, typename prop, typename Decomposition = CartDecomposition<dim,St>, typename Memory = HeapMemory> +template<unsigned int dim, + typename St, + typename prop, + typename Decomposition = CartDecomposition<dim,St>, + typename Memory = HeapMemory, + template<typename> class layout_base = memory_traits_lin> class vector_dist_comm { //! Number of units for each sub-domain @@ -578,7 +583,11 @@ class vector_dist_comm * \param m_prp sending buffer for properties * */ - void fill_send_map_buf(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp, openfpm::vector<size_t> & prc_sz_r, openfpm::vector<openfpm::vector<Point<dim,St>>> & m_pos, openfpm::vector<openfpm::vector<prop>> & m_prp) + void fill_send_map_buf(openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base> & v_pos, + openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base> & v_prp, + openfpm::vector<size_t> & prc_sz_r, + openfpm::vector<openfpm::vector<Point<dim,St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base>> & m_pos, + openfpm::vector<openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base>> & m_prp) { m_prp.resize(prc_sz_r.size()); m_pos.resize(prc_sz_r.size()); @@ -658,7 +667,9 @@ class vector_dist_comm * \param prc_sz For each processor the number of particles to send * */ - template<typename obp> void labelParticleProcessor(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<aggregate<size_t,size_t,size_t>> & lbl_p, openfpm::vector<size_t> & prc_sz) + template<typename obp> void labelParticleProcessor(openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base> & v_pos, + openfpm::vector<aggregate<size_t,size_t,size_t>> & lbl_p, + openfpm::vector<size_t> & prc_sz) { // reset lbl_p lbl_p.clear(); @@ -969,10 +980,10 @@ public: { size_t opt_ = compute_options(opt); op_ssend_gg_recv_merge opm(g_m); - v_cl.SSendRecvP_op<op_ssend_gg_recv_merge,send_vector,decltype(v_prp),prp...>(g_send_prp,v_prp,prc_g_opart,opm,prc_recv_get,recv_sz_get,opt_); + v_cl.SSendRecvP_op<op_ssend_gg_recv_merge,send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_g_opart,opm,prc_recv_get,recv_sz_get,opt_); } else - v_cl.SSendRecvP<send_vector,decltype(v_prp),prp...>(g_send_prp,v_prp,prc_g_opart,prc_recv_get,recv_sz_get,recv_sz_get_byte); + v_cl.SSendRecvP<send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_g_opart,prc_recv_get,recv_sz_get,recv_sz_get_byte); // fill g_opart_sz g_opart_sz.resize(prc_g_opart.size()); @@ -1076,7 +1087,7 @@ public: fill_send_map_buf_list<prp_object,prp...>(v_pos,v_prp,prc_sz_r, m_pos, m_prp); v_cl.SSendRecv(m_pos,v_pos,prc_r,prc_recv_map,recv_sz_map); - v_cl.SSendRecvP<openfpm::vector<prp_object>,decltype(v_prp),prp...>(m_prp,v_prp,prc_r,prc_recv_map,recv_sz_map); + v_cl.SSendRecvP<openfpm::vector<prp_object>,decltype(v_prp),layout_base,prp...>(m_prp,v_prp,prc_r,prc_recv_map,recv_sz_map); // mark the ghost part @@ -1096,7 +1107,9 @@ public: * \param g_m ghost marker * */ - template<typename obp = KillParticle> void map_(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp, size_t & g_m) + template<typename obp = KillParticle> + void map_(openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base> & v_pos, + openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base> & v_prp, size_t & g_m) { // Processor communication size openfpm::vector<size_t> prc_sz(v_cl.getProcessingUnits()); @@ -1125,14 +1138,21 @@ public: } //! position vector - openfpm::vector<openfpm::vector<Point<dim, St>>> m_pos; + openfpm::vector<openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base>> m_pos; //! properties vector - openfpm::vector<openfpm::vector<prop>> m_prp; + openfpm::vector<openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base>> m_prp; fill_send_map_buf(v_pos,v_prp, prc_sz_r, m_pos, m_prp); - v_cl.SSendRecv(m_pos,v_pos,prc_r,prc_recv_map,recv_sz_map); - v_cl.SSendRecv(m_prp,v_prp,prc_r,prc_recv_map,recv_sz_map); + v_cl.SSendRecv<openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base>, + openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base>, + layout_base> + (m_pos,v_pos,prc_r,prc_recv_map,recv_sz_map); + + v_cl.SSendRecv<openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base>, + openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base>, + layout_base> + (m_prp,v_prp,prc_r,prc_recv_map,recv_sz_map); // mark the ghost part @@ -1215,12 +1235,12 @@ public: size_t opt_ = compute_options(opt); op_ssend_recv_merge<op> opm(g_opart); - v_cl.SSendRecvP_op<op_ssend_recv_merge<op>,send_vector,decltype(v_prp),prp...>(g_send_prp,v_prp,prc_recv_get,opm,prc_g_opart,g_opart_sz,opt_); + v_cl.SSendRecvP_op<op_ssend_recv_merge<op>,send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_recv_get,opm,prc_g_opart,g_opart_sz,opt_); } else { op_ssend_recv_merge<op> opm(g_opart); - v_cl.SSendRecvP_op<op_ssend_recv_merge<op>,send_vector,decltype(v_prp),prp...>(g_send_prp,v_prp,prc_recv_get,opm,prc_recv_put,recv_sz_put); + v_cl.SSendRecvP_op<op_ssend_recv_merge<op>,send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_recv_get,opm,prc_recv_put,recv_sz_put); } // process also the local replicated particles diff --git a/src/Vector/vector_dist_gpu_unit_tests.cpp b/src/Vector/vector_dist_gpu_unit_tests.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3853ec1cd9e574fb1ed448b381df0af8e6ba02d9 --- /dev/null +++ b/src/Vector/vector_dist_gpu_unit_tests.cpp @@ -0,0 +1,43 @@ + +#define BOOST_TEST_DYN_LINK +#include <boost/test/unit_test.hpp> +#include "VCluster/VCluster.hpp" +#include <Vector/vector_dist.hpp> + +BOOST_AUTO_TEST_SUITE( vector_dist_gpu_test ) + +void print_test(std::string test, size_t sz) +{ + if (create_vcluster().getProcessUnitID() == 0) + std::cout << test << " " << sz << "\n"; +} + +BOOST_AUTO_TEST_CASE( vector_dist_gpu_test) +{ + Box<3,float> domain({0.0,0.0,0.0},{1.0,1.0,1.0}); + + // set the ghost based on the radius cut off (make just a little bit smaller than the spacing) + Ghost<3,float> g(0.01); + + // Boundary conditions + size_t bc[3]={NON_PERIODIC,NON_PERIODIC,NON_PERIODIC}; + + vector_dist_gpu<3,float,aggregate<float,float[3]>> vd(1000,domain,bc,g); + + auto it = vd.getDomainIterator(); + + while (it.isNext()) + { + auto p = it.get(); + + vd.getPos(p)[0] = (float)rand() / RAND_MAX; + vd.getPos(p)[1] = (float)rand() / RAND_MAX; + vd.getPos(p)[2] = (float)rand() / RAND_MAX; + + ++it; + } + + vd.map(); +} + +BOOST_AUTO_TEST_SUITE_END()