diff --git a/openfpm_data b/openfpm_data index ffe4514e58db3f2102debf89fa393f19d2ec70a8..02ef67f834e1f2b36f781f380e436f821c9a7945 160000 --- a/openfpm_data +++ b/openfpm_data @@ -1 +1 @@ -Subproject commit ffe4514e58db3f2102debf89fa393f19d2ec70a8 +Subproject commit 02ef67f834e1f2b36f781f380e436f821c9a7945 diff --git a/src/Grid/grid_dist_id.hpp b/src/Grid/grid_dist_id.hpp index 59380a248705abd7837258e2e6ef9e31dfe98ba1..e3dcad616729c2159662ff5dc6f042a426f45adf 100644 --- a/src/Grid/grid_dist_id.hpp +++ b/src/Grid/grid_dist_id.hpp @@ -2190,7 +2190,7 @@ public: * \tparam prp... Properties to synchronize * */ - template<int... prp> void ghost_get() + template<int... prp> void ghost_get(size_t opt = 0) { #ifdef SE_CLASS2 check_valid(this,8); @@ -2217,7 +2217,8 @@ public: use_bx_def, loc_grid, ginfo_v, - g_id_to_external_ghost_box); + g_id_to_external_ghost_box, + opt); } /*! \brief It synchronize the ghost parts diff --git a/src/Grid/grid_dist_id_comm.hpp b/src/Grid/grid_dist_id_comm.hpp index ade70484b4c6f669e916c4f26b155c3595eb9195..c3a061d3f6f84c62daed9334927bbb18eb73f463 100644 --- a/src/Grid/grid_dist_id_comm.hpp +++ b/src/Grid/grid_dist_id_comm.hpp @@ -175,6 +175,13 @@ class grid_dist_id_comm //! Memory for the ghost receiving buffer Memory g_recv_prp_mem; + //! send pointers + openfpm::vector<void *> pointers; + openfpm::vector<void *> pointers2; + + //! Receiving option + size_t opt; + /*! \brief Sync the local ghost part * * \tparam prp... properties to sync @@ -339,6 +346,10 @@ class grid_dist_id_comm gd->recv_buffers.last().resize(msg_i); gd->recv_proc.add(i); + + if (gd->opt & RUN_ON_DEVICE) + {return gd->recv_buffers.last().getDevicePointer();} + return gd->recv_buffers.last().getPointer(); } @@ -463,11 +474,15 @@ class grid_dist_id_comm const openfpm::vector<ep_box_grid<dim>> & eg_box, const std::unordered_map<size_t,size_t> & g_id_to_external_ghost_box, const openfpm::vector<e_box_multi<dim>> & eb_gid_list, - Unpack_stat & ps) + Unpack_stat & ps, + size_t opt) { // Unpack the ghost box global-id size_t g_id; + // we move from device to host the gid + if (opt & RUN_ON_DEVICE) + {emem.deviceToHost(ps.getOffset(),ps.getOffset()+sizeof(size_t));} Unpacker<size_t,mem>::unpack(emem,g_id,ps); size_t l_id = 0; @@ -505,7 +520,7 @@ class grid_dist_id_comm // Unpack loc_grid.get(sub_id).remove(box); - Unpacker<device_grid,mem>::template unpack<decltype(sub2),prp...>(emem,sub2,loc_grid.get(sub_id),ps); + Unpacker<device_grid,mem>::template unpack<decltype(sub2),decltype(v_cl.getmgpuContext()),prp...>(emem,sub2,loc_grid.get(sub_id),ps,v_cl.getmgpuContext()); // Copy the information on the other grid for (long int j = 0 ; j < (long int)eb_gid_list.get(l_id).eb_list.size() ; j++) @@ -533,7 +548,8 @@ class grid_dist_id_comm const std::vector<size_t> & prp_recv, ExtPreAlloc<Memory> & prRecv_prp, const std::unordered_map<size_t,size_t> & g_id_to_external_ghost_box, - const openfpm::vector<e_box_multi<dim>> & eb_gid_list) + const openfpm::vector<e_box_multi<dim>> & eb_gid_list, + size_t opt) { if (device_grid::isCompressed() == false) { @@ -555,7 +571,7 @@ class grid_dist_id_comm unpack_data_to_ext_ghost<Memory,prp ...>(prRecv_prp,loc_grid,i, eg_box,g_id_to_external_ghost_box,eb_gid_list, - ps); + ps,opt); } } } @@ -576,7 +592,7 @@ class grid_dist_id_comm unpack_data_to_ext_ghost<BMemory<Memory>,prp ...>(mem,loc_grid,i, eg_box,g_id_to_external_ghost_box,eb_gid_list, - ps); + ps,opt); } } } @@ -952,7 +968,8 @@ public: bool use_bx_def, openfpm::vector<device_grid> & loc_grid, const grid_sm<dim,void> & ginfo, - std::unordered_map<size_t,size_t> & g_id_to_external_ghost_box) + std::unordered_map<size_t,size_t> & g_id_to_external_ghost_box, + size_t opt) { #ifdef PROFILE_SCOREP SCOREP_USER_REGION("ghost_get",SCOREP_USER_REGION_TYPE_FUNCTION) @@ -967,6 +984,8 @@ public: send_pointer.clear(); send_size.clear(); + this->opt = opt; + size_t req = 0; // first we initialize the pack buffer on all internal grids @@ -1002,7 +1021,7 @@ public: // Finalize calculation for (size_t i = 0 ; i < loc_grid.size() ; i++) - {loc_grid.get(i).packCalculate(req,v_cl.getmgpuContext());} + {loc_grid.get(i).template packCalculate<prp ...>(req,v_cl.getmgpuContext());} // resize the property buffer memory g_send_prp_mem.resize(req); @@ -1015,12 +1034,21 @@ public: // Pack information Pack_stat sts; + pointers.clear(); + pointers2.clear(); + // Pack the information for each processor and send it for ( size_t i = 0 ; i < ig_box.size() ; i++ ) { sts.mark(); - void * pointer = prAlloc_prp.getPointerEnd(); + + void * pointer; + + if (opt & RUN_ON_DEVICE) + {pointer = prAlloc_prp.getDevicePointerEnd();} + else + {pointer = prAlloc_prp.getPointerEnd();} // for each ghost box for (size_t j = 0 ; j < ig_box.get(i).bid.size() ; j++) @@ -1039,6 +1067,7 @@ public: // Pack a size_t for the internal ghost id Packer<size_t,Memory>::pack(prAlloc_prp,g_id,sts); + prAlloc_prp.hostToDevice(prAlloc_prp.getOffset(),prAlloc_prp.getOffsetEnd()); // Create a sub grid iterator spanning the internal ghost layer auto sub_it = loc_grid.get(sub_id).getIterator(g_ig_box.getKP1(),g_ig_box.getKP2()); // and pack the internal ghost grid @@ -1046,10 +1075,24 @@ public: } // send the request - void * pointer2 = prAlloc_prp.getPointerEnd(); + void * pointer2; + if (opt & RUN_ON_DEVICE) + {pointer2 = prAlloc_prp.getDevicePointerEnd();} + else + {pointer2 = prAlloc_prp.getPointerEnd();} + + pointers.add(pointer); + pointers2.add(pointer2); + } + + for (size_t i = 0 ; i < loc_grid.size() ; i++) + {loc_grid.get(i).template packFinalize<prp ...>(prAlloc_prp,sts);} + + for ( size_t i = 0 ; i < ig_box.size() ; i++ ) + { // This function send (or queue for sending) the information - send_or_queue(ig_box.get(i).prc,(char *)pointer,(char *)pointer2); + send_or_queue(ig_box.get(i).prc,(char *)pointers.get(i),(char *)pointers2.get(i)); } // Calculate the total information to receive from each processors @@ -1064,9 +1107,15 @@ public: queue_recv_data_get<prp_object>(eg_box,prp_recv,prRecv_prp); + for (size_t i = 0 ; i < loc_grid.size() ; i++) + {loc_grid.get(i).removeCopyReset();} + ghost_get_local<prp...>(loc_ig_box,loc_eg_box,gdb_ext,loc_grid,g_id_to_external_ghost_box,ginfo,use_bx_def); - merge_received_data_get<prp ...>(loc_grid,eg_box,prp_recv,prRecv_prp,g_id_to_external_ghost_box,eb_gid_list); + merge_received_data_get<prp ...>(loc_grid,eg_box,prp_recv,prRecv_prp,g_id_to_external_ghost_box,eb_gid_list,opt); + + for (size_t i = 0 ; i < loc_grid.size() ; i++) + {loc_grid.get(i).removeCopyFinalize(v_cl.getmgpuContext());} } /*! \brief It merge the information in the ghost with the diff --git a/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu b/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu index 53881df76d3ac5956a56663e483cce928e9e8fa2..8f0a5ced7f530d9620bf1b077d14862063b3de3b 100644 --- a/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu +++ b/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu @@ -207,19 +207,24 @@ BOOST_AUTO_TEST_CASE( sgrid_gpu_test_ghost_get ) /////// GPU insert + flush - Box<2,size_t> box({1,1},{1,1}); + Box<2,size_t> box({1,1},{15,15}); auto it = gdist.getGridIterator(box.getKP1(),box.getKP2()); /////// GPU Run kernel - gdist.setInsertBuffer(1); + gdist.setInsertBuffer(225); float c = 5.0; gdist.template iterateGridGPU<insert_kernel2D<0>>(it,c); gdist.template flush<smax_<0>>(flush_type::FLUSH_ON_DEVICE); -// gdist.template ghost_get<0>(); + gdist.template deviceToHost<0>(); +// gdist.write("broken"); + + gdist.template ghost_get<0>(RUN_ON_DEVICE); } + + BOOST_AUTO_TEST_SUITE_END() diff --git a/src/Vector/vector_dist_comm.hpp b/src/Vector/vector_dist_comm.hpp index b095138c1f73a1b3314ee0e1f2e8c12d8c645af5..6694ac553e107c2ca61861a46e561800acfad3fe 100644 --- a/src/Vector/vector_dist_comm.hpp +++ b/src/Vector/vector_dist_comm.hpp @@ -26,7 +26,6 @@ constexpr int NO_CHANGE_ELEMENTS = 4; constexpr int BIND_DEC_TO_GHOST = 1; -constexpr int RUN_ON_DEVICE = 1024; constexpr int MAP_LOCAL = 2; constexpr int GHOST_SYNC = 0; @@ -1173,6 +1172,9 @@ class vector_dist_comm } else { + // if no properties must be sent skip this step + if (sizeof...(prp) == 0) {return;} + // Fill the send buffer for (size_t i = 0; i < g_opart.size(); i++) { diff --git a/src/config/config_cmake.h.in b/src/config/config_cmake.h.in index 3c92ef09cc9df7eb992221ea0d51147772008550..c38415e8e75b5cbc443ac8ed6eafc387f97559e2 100644 --- a/src/config/config_cmake.h.in +++ b/src/config/config_cmake.h.in @@ -163,3 +163,5 @@ ${DEFINE_TEST_COVERAGE_MODE} /* Version number of package */ #define VERSION "1.0.0" + +#define OPENFPM_PDATA