From 1c2c55e3f837948f7534899af5ac76aac282c104 Mon Sep 17 00:00:00 2001 From: Incardona Pietro <incardon@mpi-cbg.de> Date: Mon, 3 Apr 2023 00:58:46 +0200 Subject: [PATCH] Fix for non dense --- openfpm_devices | 2 +- src/Grid/grid_dist_id.hpp | 94 ++--- src/Grid/grid_dist_id_comm.hpp | 343 +++++++++++++++--- src/Grid/tests/grid_dist_id_dlb_unit_test.cpp | 6 + .../tests/sgrid_dist_id_gpu_unit_tests.cu | 164 ++++----- 5 files changed, 426 insertions(+), 183 deletions(-) diff --git a/openfpm_devices b/openfpm_devices index fc5ba1035..42f6b016d 160000 --- a/openfpm_devices +++ b/openfpm_devices @@ -1 +1 @@ -Subproject commit fc5ba1035ac72a3c083ab1bf8a3cb60639e0c241 +Subproject commit 42f6b016d3d2045fcabf906cf8ea07c36b484fcc diff --git a/src/Grid/grid_dist_id.hpp b/src/Grid/grid_dist_id.hpp index 0d24f5184..e788fe9cc 100644 --- a/src/Grid/grid_dist_id.hpp +++ b/src/Grid/grid_dist_id.hpp @@ -103,6 +103,28 @@ struct copy_all_prop_sparse } }; +template<typename T> +struct variadic_caller +{}; + +template<int ... prp> +struct variadic_caller<index_tuple_sq<prp ...>> +{ + template<typename this_type, typename dec_type, typename cd_sm_type, typename loc_grid_type, typename gdb_ext_type> + static void call(this_type * this_, + dec_type & dec, + cd_sm_type & cd_sm, + loc_grid_type & loc_grid, + loc_grid_type & loc_grid_old, + gdb_ext_type & gdb_ext, + gdb_ext_type & gdb_ext_old, + gdb_ext_type & gdb_ext_global, + size_t opt) + { + this_->template map_<prp ...>(dec,cd_sm,loc_grid,loc_grid_old,gdb_ext,gdb_ext_old,gdb_ext_global,opt); + } +}; + struct ids_pl { size_t id; @@ -133,61 +155,10 @@ struct device_grid_copy<true> boost::mpl::for_each_ref< boost::mpl::range_c<int,0,decltype(block_data_dst)::max_prop> >(cp); } -/* - * - * - auto & v_cl = create_vcluster(); - auto & dg = loc_grid.get(0); - - typedef typename std::remove_reference<decltype(dg)>::type sparse_grid_type; - grid_key_dx<sparse_grid_type::dims> kp1 = gdb_ext.get(0).Dbox.getKP1(); - - for (int j = 0 ; j < std::ceil(gdb_ext_old.size() / 32) + 1 ; j++) - { - int jbase = j*32; - int sz = gdb_ext_old.size() - jbase; - if (sz < 0) {break;} - else if (sz > 32) {sz = 32;} - dg.copyRemoveReset(); - - std::cout << "JBASE: " << jbase << " SZ: " << sz << std::endl; - - for (int i = 0 ; i < sz ; i++) - { - Box<sparse_grid_type::dims,long int> bx_dst; - Box<sparse_grid_type::dims,long int> bx_src; - for (int k = 0 ; k < sparse_grid_type::dims ; k++) - { - bx_dst.setLow(k,kp1.get(k) + gdb_ext_old.get(i+jbase).origin.get(k) + gdb_ext_old.get(i+jbase).Dbox.getKP1().get(k)); - bx_dst.setHigh(k,kp1.get(k) + gdb_ext_old.get(i+jbase).origin.get(k) + gdb_ext_old.get(i+jbase).Dbox.getKP2().get(k)); - - bx_src.setLow(k,gdb_ext_old.get(i+jbase).Dbox.getKP1().get(k)); - bx_src.setHigh(k,gdb_ext_old.get(i+jbase).Dbox.getKP2().get(k)); - } - - std::cout << jbase << " " << i << " SRC:" << gdb_ext_old.get(i+jbase).Dbox.toString() << " DST: " << bx_dst.toString() << " INDEX BUFFER: " << loc_grid_old.get(jbase + i).private_get_index_array().size() << std::endl; - loc_grid_old.get(jbase + i).template hostToDevice<0>(); - dg.copy_to(loc_grid_old.get(jbase + i),gdb_ext_old.get(jbase +i).Dbox,bx_dst); - } - for (int i = 0 ; i < sz ; i++) - { - std::cout << "FINALIZE1" << std::endl; - loc_grid_old.get(jbase + i).template removeCopyToFinalize<0>(v_cl.getmgpuContext(), rem_copy_opt::PHASE1); - std::cout << "FINALIZE2" << std::endl; - loc_grid_old.get(jbase + i).template removeCopyToFinalize<0>(v_cl.getmgpuContext(), rem_copy_opt::PHASE2); - std::cout << "FINALIZE3" << std::endl; - loc_grid_old.get(jbase + i).template removeCopyToFinalize<0>(v_cl.getmgpuContext(), rem_copy_opt::PHASE3); - } - } - - * - * - */ - - template<typename gdb_ext_type, typename loc_grid_type> - static void pre_load(gdb_ext_type & gdb_ext_old, loc_grid_type & loc_grid_old, gdb_ext_type & gdb_ext, loc_grid_type & loc_grid) + template<typename gdb_ext_type, typename loc_grid_type> + static void pre_load(gdb_ext_type & gdb_ext_old, loc_grid_type & loc_grid_old, gdb_ext_type & gdb_ext, loc_grid_type & loc_grid) { auto & dg = loc_grid.get(0); auto dlin = dg.getGrid(); @@ -3475,7 +3446,12 @@ public: getGlobalGridsInfo(gdb_ext_global); - this->map_(dec,cd_sm,loc_grid,loc_grid_old,gdb_ext,gdb_ext_old,gdb_ext_global); + + typedef typename to_int_sequence<0,T::max_prop-1>::type result; + + //variadic_caller<result>::call(this,dec,cd_sm,loc_grid,loc_grid_old,gdb_ext,gdb_ext_old,gdb_ext_global,opt); + + this->template map_<0>(dec,cd_sm,loc_grid,loc_grid_old,gdb_ext,gdb_ext_old,gdb_ext_global,opt); loc_grid_old.clear(); loc_grid_old.shrink_to_fit(); @@ -3513,8 +3489,14 @@ public: if (v_cl.size() != 1) { + // move information from host to device + for (int i = 0 ; i < loc_grid_old.size() ; i++) + {loc_grid_old.get(i).template hostToDevice<0>();} // Map the distributed grid - map(NO_GDB_EXT_SWITCH); + size_t opt_ = NO_GDB_EXT_SWITCH; + if (std::is_same<Memory,CudaMemory>::value == true) + {opt_ |= RUN_ON_DEVICE;} + map(opt_); } else { @@ -3544,6 +3526,8 @@ public: ++it_src; } + + dg.template hostToDevice<0>(); } } } diff --git a/src/Grid/grid_dist_id_comm.hpp b/src/Grid/grid_dist_id_comm.hpp index d9312ef5e..9be3e3b8d 100644 --- a/src/Grid/grid_dist_id_comm.hpp +++ b/src/Grid/grid_dist_id_comm.hpp @@ -13,6 +13,7 @@ #include "grid_dist_util.hpp" #include "util/common_pdata.hpp" #include "lib/pdata.hpp" +#include "Grid/grid_common.hpp" /*! \brief Unpack selector @@ -166,6 +167,7 @@ class grid_dist_id_comm struct rp_id { int p_id; + int size; int i; bool operator<(const rp_id & tmp) const @@ -182,6 +184,7 @@ class grid_dist_id_comm //! first id is grid //! second id is the processor id openfpm::vector<openfpm::vector<aggregate<device_grid,SpaceBox<dim,long int>>>> m_oGrid; + openfpm::vector<int> m_oGrid_c; //! Memory for the ghost sending buffer Memory g_send_prp_mem; @@ -393,10 +396,13 @@ class grid_dist_id_comm gd->recv_buffers.last().resize(msg_i); gd->recv_proc.add(); gd->recv_proc.last().p_id = i; + gd->recv_proc.last().size = msg_i; gd->recv_proc.last().i = gd->recv_proc.size()-1; if (gd->opt & RUN_ON_DEVICE) - {return gd->recv_buffers.last().getDevicePointer();} + { + return gd->recv_buffers.last().getDevicePointer(); + } return gd->recv_buffers.last().getPointer(); } @@ -935,6 +941,26 @@ class grid_dist_id_comm } } + int find_local_sub(Box<dim, long int> & box_dst, openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext) + { + Point<dim,long int> point; + for (size_t n = 0; n < dim; n++) + {point.get(n) = (box_dst.getHigh(n) + box_dst.getLow(n))/2;} + + for (size_t j = 0; j < gdb_ext.size(); j++) + { + // Local sub-domain + SpaceBox<dim,long int> sub = gdb_ext.get(j).Dbox; + sub += gdb_ext.get(j).origin; + + if (sub.isInside(point) == true) + { + return j; + } + } + return -1; + } + public: /*! \brief Reconstruct the local grids @@ -999,6 +1025,30 @@ public: } } } + + std::cout << "UNPACKING " << std::endl; + + for (size_t i = 0 ; i < m_oGrid_recv.size() ; i++) + { + for (size_t j = 0 ; j < m_oGrid_recv.get(i).size() ; j++) + { + m_oGrid_recv.get(i).template get<0>(j).template deviceToHost<0>(); + std::cout << "UNPACKING POINTS: " << m_oGrid_recv.get(i).template get<0>(j).size() << std::endl; + m_oGrid_recv.get(i).template get<0>(j).template removeCopyToFinalize<0>(v_cl.getmgpuContext(), rem_copy_opt::PHASE1); + } + } + + for (size_t i = 0 ; i < m_oGrid_recv.size() ; i++) + { + for (size_t j = 0 ; j < m_oGrid_recv.get(i).size() ; j++) + {m_oGrid_recv.get(i).template get<0>(j).template removeCopyToFinalize<0>(v_cl.getmgpuContext(), rem_copy_opt::PHASE2);} + } + + for (size_t i = 0 ; i < m_oGrid_recv.size() ; i++) + { + for (size_t j = 0 ; j < m_oGrid_recv.get(i).size() ; j++) + {m_oGrid_recv.get(i).template get<0>(j).template removeCopyToFinalize<0>(v_cl.getmgpuContext(), rem_copy_opt::PHASE3);} + } } /*! \brief Label intersection grids for mappings @@ -1013,19 +1063,66 @@ public: * \param prc_sz For each processor the number of grids to send to * */ - inline void labelIntersectionGridsProcessor(Decomposition & dec, + template<typename lambda_t> + inline void labelIntersectionGridsProcessor_and_pack(Decomposition & dec, CellDecomposer_sm<dim,St,shift<dim,St>> & cd_sm, openfpm::vector<device_grid> & loc_grid_old, openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext, openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext_old, openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext_global, - openfpm::vector<openfpm::vector<aggregate<device_grid,SpaceBox<dim,long int>>>> & lbl_b, - openfpm::vector<size_t> & prc_sz) + size_t p_id_cur, + lambda_t f) { - lbl_b.clear(); + // lbl_bc.clear(); + // lbl_bc.resize(v_cl.getProcessingUnits()); + + // for (int i = 0 ; i < lbl_bc.size() ; i++) + // {lbl_bc.get(i) = 0;} + + // // count + + // for (size_t i = 0; i < gdb_ext_old.size(); i++) + // { + // // Local old sub-domain in global coordinates + // SpaceBox<dim,long int> sub_dom = gdb_ext_old.get(i).Dbox; + // sub_dom += gdb_ext_old.get(i).origin; + + // for (size_t j = 0; j < gdb_ext_global.size(); j++) + // { + // size_t p_id = 0; + + // // Intersection box + // SpaceBox<dim,long int> inte_box; + + // // Global new sub-domain in global coordinates + // SpaceBox<dim,long int> sub_dom_new = gdb_ext_global.get(j).Dbox; + // sub_dom_new += gdb_ext_global.get(j).origin; + + // bool intersect = false; + + // if (sub_dom.isValid() == true && sub_dom_new.isValid() == true) + // intersect = sub_dom.Intersect(sub_dom_new, inte_box); + + // if (intersect == true) + // { + // auto inte_box_cont = cd_sm.convertCellUnitsIntoDomainSpace(inte_box); + + // // Get processor ID that store intersection box + // Point<dim,St> p; + // for (size_t n = 0; n < dim; n++) + // p.get(n) = (inte_box_cont.getHigh(n) + inte_box_cont.getLow(n))/2; + + // p_id = dec.processorID(p); + + // lbl_bc.get(p_id) += 1; + // } + // } + // } + + // // reserve + // for (int i = 0 ; i < lbl_b.size() ; i++) + // {lbl_b.get(i).reserve(lbl_bc.get(i));} - // resize the label buffer - lbl_b.resize(v_cl.getProcessingUnits()); // Label all the intersection grids with the processor id where they should go @@ -1061,7 +1158,9 @@ public: p.get(n) = (inte_box_cont.getHigh(n) + inte_box_cont.getLow(n))/2; p_id = dec.processorID(p); - prc_sz.get(p_id)++; + if (p_id != p_id_cur) + {continue;} +// prc_sz.get(p_id)++; // Transform coordinates to local auto inte_box_local = inte_box; @@ -1080,8 +1179,12 @@ public: } // Grid to send - device_grid gr_send(sz); - gr_send.setMemory(); + //device_grid gr_send(sz); + //gr_send.setMemory(); + // lbl_b.get(p_id).add(); + // device_grid & gr_send = lbl_b.get(p_id).last().template get<0>(); + // SpaceBox<dim,long int> & box_send = lbl_b.get(p_id).last().template get<1>(); + // gr_send.setMemory(); // Sub iterator across intersection box inside local grid grid_key_dx<dim> start = inte_box_local.getKP1(); @@ -1094,22 +1197,76 @@ public: { box_src.setLow(i,start.get(i)); box_src.setHigh(i,stop.get(i)); - box_dst.setLow(i,0); - box_dst.setHigh(i,stop.get(i)-start.get(i)); + box_dst.setLow(i,inte_box.getLow(i)); + box_dst.setHigh(i,inte_box.getHigh(i)); } - gr_send.copy_to(gr,box_src,box_dst); + f(box_src,box_dst,gr,p_id); + } + } + } - aggregate<device_grid,SpaceBox<dim,long int>> aggr; +/* for (size_t i = 0 ; i < loc_grid_old.size() ; i++) + { + loc_grid_old.get(i).template removeCopyToFinalize<0>(v_cl.getmgpuContext(), rem_copy_opt::PHASE1); + } - aggr.template get<0>() = gr_send; - aggr.template get<1>() = inte_box; + for (size_t i = 0 ; i < loc_grid_old.size() ; i++) + { + loc_grid_old.get(i).template removeCopyToFinalize<0>(v_cl.getmgpuContext(), rem_copy_opt::PHASE2); + } - // Add to the labeling vector - lbl_b.get(p_id).add(aggr); - } + for (size_t i = 0 ; i < loc_grid_old.size() ; i++) + { + loc_grid_old.get(i).template removeCopyToFinalize<0>(v_cl.getmgpuContext(), rem_copy_opt::PHASE3); + }*/ + } + + /*! \brief Unpack + * + * + * + */ + template<int ... prp> + void unpack_buffer_to_local_grid(openfpm::vector<device_grid> & loc_grid, + openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext, + ExtPreAlloc<Memory> & send_buffer, + size_t sz) + { + // unpack local + Unpack_stat ps; + + while (ps.getOffset() < sz) + { + send_buffer.reset(); + + Box<dim,long int> box_dst; + send_buffer.deviceToHost(ps.getOffset(),ps.getOffset()+sizeof(Box<dim,long int>)); + Unpacker<Box<dim,long int>,Memory>::unpack(send_buffer,box_dst,ps); + + std::cout << "BOX DEST: " << box_dst.toString() << " || " << ps.getOffset() << " " << sz << std::endl; + int s = find_local_sub(box_dst,gdb_ext); + if (s == -1) + {std::cout << __FILE__ << ":" << __LINE__ << " map, error non-local subdomain " << std::endl;} + + // convert box_dst to local + for (int d = 0 ; d < dim ; d++ ) + { + box_dst.setLow(d, box_dst.getLow(d) - gdb_ext.get(s).origin.get(d)); + box_dst.setHigh(d, box_dst.getHigh(d) - gdb_ext.get(s).origin.get(d)); } + + loc_grid.get(s).remove(box_dst); + auto sub2 = loc_grid.get(s).getIterator(box_dst.getKP1(),box_dst.getKP2(),0); + Unpacker<device_grid,Memory>::template unpack<decltype(sub2),decltype(v_cl.getmgpuContext()),prp ...>(send_buffer,sub2,loc_grid.get(s),ps,v_cl.getmgpuContext(),NONE_OPT); + + std::cout << "END OFFSET" << ps.getOffset() << " " << sz << std::endl; } + + std::cout << "FINISHING UNPACK" << std::endl; + + for (int s = 0 ; s < loc_grid.size() ; s++) + {loc_grid.get(s).template removeAddUnpackFinalize<prp ...>(v_cl.getmgpuContext(),0);} } /*! \brief Moves all the grids that does not belong to the local processor to the respective processor @@ -1125,54 +1282,148 @@ public: * \param gdb_ext_global it contain the decomposition at global level * */ + template<int ... prp> void map_(Decomposition & dec, CellDecomposer_sm<dim,St,shift<dim,St>> & cd_sm, openfpm::vector<device_grid> & loc_grid, openfpm::vector<device_grid> & loc_grid_old, openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext, openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext_old, - openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext_global) + openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext_global, + size_t opt) { - // Processor communication size - openfpm::vector<size_t> prc_sz(v_cl.getProcessingUnits()); + this->opt = opt; + + openfpm::vector<size_t> send_buffer_sizes(v_cl.getProcessingUnits()); + openfpm::vector<Memory> send_buffers_; + openfpm::vector<ExtPreAlloc<Memory>> send_buffers; + send_buffers_.resize(v_cl.getProcessingUnits()); + send_buffers.resize(v_cl.getProcessingUnits()); + + send_prc_queue.clear(); + send_pointer.clear(); + send_size.clear(); + + for (int p_id = 0 ; p_id < v_cl.getProcessingUnits() ; p_id++) + { + for (int i = 0 ; i < loc_grid_old.size() ; i++) + {loc_grid_old.get(i).packReset();} + + auto l = [&](Box<dim,long int> & box_src, + Box<dim,long int> & box_dst, + device_grid & gr, + size_t p_id){ + //gr_send.copy_to(gr,box_src,box_dst); + + Packer<SpaceBox<dim,long int>,BMemory<Memory>>::packRequest(box_dst,send_buffer_sizes.get(p_id)); + + auto sub_it = gr.getIterator(box_src.getKP1(),box_src.getKP2(),0); + gr.template packRequest<prp ...>(sub_it,send_buffer_sizes.get(p_id)); + + //box_send = inte_box; + }; + + // Contains the processor id of each box (basically where they have to go) + labelIntersectionGridsProcessor_and_pack(dec,cd_sm,loc_grid_old,gdb_ext,gdb_ext_old,gdb_ext_global,p_id,l); + + for (int i = 0 ; i < loc_grid_old.size(); i++) + { + loc_grid_old.get(i).template packCalculate<prp ...>(send_buffer_sizes.get(p_id),v_cl.getmgpuContext()); + } + + std::cout << "Buffer " << p_id << " " << send_buffer_sizes.get(p_id) << std::endl; + + send_buffers_.get(p_id).resize(send_buffer_sizes.get(p_id)); + send_buffers.get(p_id).setMemory(send_buffer_sizes.get(p_id),send_buffers_.get(p_id)); + send_buffers.get(p_id).incRef(); + + // we now pack + Pack_stat sts; + + auto lp = [&](Box<dim,long int> & box_src, + Box<dim,long int> & box_dst, + device_grid & gr, + size_t p_id){ + + // Print box_dst + std::cout << "Box dst: " << box_dst.toString() << std::endl; + + size_t offset = send_buffers.get(p_id).getOffsetEnd(); + Packer<Box<dim,long int>,Memory>::pack(send_buffers.get(p_id),box_dst,sts); + size_t offset2 = send_buffers.get(p_id).getOffsetEnd(); - // Contains the processor id of each box (basically where they have to go) - labelIntersectionGridsProcessor(dec,cd_sm,loc_grid_old,gdb_ext,gdb_ext_old,gdb_ext_global,m_oGrid,prc_sz); + send_buffers.get(p_id).hostToDevice(offset,offset2); - // Calculate the sending buffer size for each processor, put this information in - // a contiguous buffer - p_map_req.resize(v_cl.getProcessingUnits()); + auto sub_it = gr.getIterator(box_src.getKP1(),box_src.getKP2(),0); - // Vector of number of sending grids for each involved processor - openfpm::vector<size_t> prc_sz_r; - // Vector of ranks of involved processors - openfpm::vector<size_t> prc_r; + Packer<device_grid,Memory>::template pack<decltype(sub_it),prp ...>(send_buffers.get(p_id),gr,sub_it,sts); + + std::cout << "SB: " << send_buffers.get(p_id).getOffsetEnd() << std::endl; + }; + + // Contains the processor id of each box (basically where they have to go) + labelIntersectionGridsProcessor_and_pack(dec,cd_sm,loc_grid_old,gdb_ext,gdb_ext_old,gdb_ext_global,p_id,lp); + + for (int i = 0 ; i < loc_grid_old.size() ; i++) + { + loc_grid_old.get(i).template packFinalize<prp ...>(send_buffers.get(p_id),sts,0,false); + } + } - for (size_t i = 0; i < v_cl.getProcessingUnits(); i++) + // std::cout << "Local buffer: " << send_buffers.get(v_cl.rank()).size() << std::endl; + // int sz = send_buffers.get(v_cl.rank()).size(); + //send_buffers.get(v_cl.rank()).reset(); + + // // Print all the byte in send_buffers_ + // for (int j = 0 ; j < 16 && j < sz ; j++) { + // std::cout << "Local buffer " << v_cl.rank() << " " << ((long int *)send_buffers.get(v_cl.rank()).getPointer())[j] << " " << &((long int *)send_buffers.get(v_cl.rank()).getPointer())[j] << std::endl; + // } + + unpack_buffer_to_local_grid(loc_grid,gdb_ext,send_buffers.get(v_cl.rank()),send_buffers.get(v_cl.rank()).size()); + + //openfpm::vector<void *> send_pointer; + //openfpm::vector<int> send_size; + for (int i = 0 ; i < send_buffers.size() ; i++) { - if (m_oGrid.get(i).size() != 0) + if (i != v_cl.rank()) { - p_map_req.get(i) = prc_r.size(); - prc_r.add(i); - prc_sz_r.add(m_oGrid.get(i).size()); + send_pointer.add(send_buffers_.get(i).getDevicePointer()); + send_size.add(send_buffers_.get(i).size()); + send_prc_queue.add(i); + + // Print sending buffer size + std::cout << "Sending to " << i << " " << send_buffers_.get(i).size() << " bytes" << std::endl; } } - decltype(m_oGrid) m_oGrid_new; - for (size_t i = 0; i < v_cl.getProcessingUnits(); i++) + size_t * send_size_ptr = NULL; + size_t * send_prc_queue_ptr = NULL; + void ** send_pointer_ptr = NULL; + + if (send_size.size() != 0) { - if (m_oGrid.get(i).size() != 0) - {m_oGrid_new.add(m_oGrid.get(i));} + send_size_ptr = &send_size.get(0); + send_pointer_ptr = &send_pointer.get(0); + send_prc_queue_ptr = &send_prc_queue.get(0); } - // Vector for receiving of intersection grids - openfpm::vector<openfpm::vector<aggregate<device_grid,SpaceBox<dim,long int>>>> m_oGrid_recv; + recv_buffers.clear(); + recv_proc.clear(); + + v_cl.sendrecvMultipleMessagesNBX(send_pointer.size(),send_size_ptr, + send_prc_queue_ptr,send_pointer_ptr, + receive_dynamic,this); + - // Send and recieve intersection grids - v_cl.SSendRecv(m_oGrid_new,m_oGrid_recv,prc_r,prc_recv_map,recv_sz_map); + for (int i = 0 ; i < recv_buffers.size() ; i++) + { + ExtPreAlloc<Memory> prAlloc_; + prAlloc_.setMemory(recv_buffers.get(i).size(),recv_buffers.get(i)); + unpack_buffer_to_local_grid<prp ...>(loc_grid,gdb_ext,prAlloc_,recv_proc.get(i).size); + } - // Reconstruct the new local grids - grids_reconstruct(m_oGrid_recv,loc_grid,gdb_ext,cd_sm); + for (int i = 0 ; i < send_buffers.size() ; i++) + {send_buffers.get(i).decRef();} } /*! \brief It fill the ghost part of the grids diff --git a/src/Grid/tests/grid_dist_id_dlb_unit_test.cpp b/src/Grid/tests/grid_dist_id_dlb_unit_test.cpp index bad7d6e1b..20ab1d307 100644 --- a/src/Grid/tests/grid_dist_id_dlb_unit_test.cpp +++ b/src/Grid/tests/grid_dist_id_dlb_unit_test.cpp @@ -102,6 +102,12 @@ void test_vector_grid_dlb() check &= gdist.template get<1>(p) == gkey.get(1); check &= gdist.template get<2>(p) == gkey.get(2); + if (check == false) + { + std::cout << "Error " << gdist.template get<0>(p) << " != " << gkey.get(0) << " " << gdist.template get<1>(p) << " != " << gkey.get(1) << " " << gdist.template get<2>(p) << " != " << gkey.get(2) << std::endl; + break; + } + ++it; } diff --git a/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu b/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu index 830ca9bdb..e80a49d3f 100644 --- a/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu +++ b/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu @@ -190,6 +190,7 @@ BOOST_AUTO_TEST_CASE( sgrid_gpu_test_output ) template<typename grid, typename box_type> void check_sgrid(grid & gdist2, box_type & box, float c) { + size_t n_point = 0; bool match = true; auto it2 = gdist2.getDomainIterator(); @@ -216,7 +217,7 @@ void check_sgrid(grid & gdist2, box_type & box, float c) if (match == false) { std::cout << gdist2.template get<0>(p_xp1) << " " << c + key_xp1.get(0) + key_xp1.get(1) << std::endl; - std::cout << "1 " << p_xp1.getKey().toPoint().to_string() << " " << &gdist2.template get<0>(p_xp1) << std::endl; + std::cout << "1 " << key_xp1.to_string() << " " << p_xp1.getKey().toPoint().to_string() << " " << &gdist2.template get<0>(p_xp1) << std::endl; break; } } @@ -257,10 +258,87 @@ void check_sgrid(grid & gdist2, box_type & box, float c) } } + n_point++; + + ++it2; + } + + auto & v_cl = create_vcluster(); + + v_cl.sum(n_point); + v_cl.execute(); + + BOOST_REQUIRE_EQUAL(match,true); + BOOST_REQUIRE_EQUAL(n_point,350*350); +} + +template<typename grid, typename box_type> +void check_sgrid_no_ghost(grid & gdist2, box_type & box, float c) +{ + size_t n_point = 0; + bool match = true; + auto it2 = gdist2.getDomainIterator(); + + while (it2.isNext()) + { + auto p = it2.get(); + + auto key = it2.getGKey(p); + + match &= gdist2.template get<0>(p) == c + key.get(0) + key.get(1); + + if (match == false) + { + std::cout << gdist2.template get<0>(p) << " " << c + key.get(0) + key.get(1) << std::endl; + std::cout << "1 " << key.to_string() << " " << p.getKey().toPoint().to_string() << " " << &gdist2.template get<0>(p) << std::endl; + break; + } + + n_point++; + ++it2; } + auto & v_cl = create_vcluster(); + + v_cl.sum(n_point); + v_cl.execute(); + BOOST_REQUIRE_EQUAL(match,true); + BOOST_REQUIRE_EQUAL(n_point,350*350); +} + +BOOST_AUTO_TEST_CASE( sgrid_gpu_test_load_from_file ) +{ + auto & v_cl = create_vcluster(); + + float c = 5.0; + + if (v_cl.size() > 8){return;} + + size_t sz[2] = {370,370}; + periodicity<2> bc = {PERIODIC,PERIODIC}; + + Ghost<2,long int> g(1); + + Box<2,float> domain({0.0,0.0},{1.0,1.0}); + + /////// GPU insert + flush + + Box<2,size_t> box({1,1},{350,350}); + + // Now load + + sgrid_dist_id_gpu<2,float,aggregate<float,float>> gdist2(sz,domain,g,bc); + + gdist2.load("test_data/sgrid_gpu_output_hdf5"); + gdist2.deviceToHost<0,1>(); + check_sgrid_no_ghost(gdist2,box,c); + gdist2.template hostToDevice<0>(); + gdist2.template ghost_get<0,1>(RUN_ON_DEVICE); + + gdist2.deviceToHost<0,1>(); + check_sgrid(gdist2,box,c); } BOOST_AUTO_TEST_CASE( sgrid_gpu_test_save_and_load ) @@ -291,7 +369,7 @@ BOOST_AUTO_TEST_CASE( sgrid_gpu_test_save_and_load ) float c = 5.0; - gdist.addPoints([] __device__ (int i, int j) + gdist.addPoints(box.getKP1(),box.getKP2(),[] __device__ (int i, int j) { return true; }, @@ -305,98 +383,22 @@ BOOST_AUTO_TEST_CASE( sgrid_gpu_test_save_and_load ) gdist.template flush<smax_<0>,smax_<1>>(flush_type::FLUSH_ON_DEVICE); gdist.template deviceToHost<0>(); - gdist.save("sgrid_gpu_output_hdf5"); - check_sgrid(gdist,box,c); + gdist.write("sgrid_conf"); // Now load sgrid_dist_id_gpu<2,float,aggregate<float,float>> gdist2(sz,domain,g,bc); gdist2.load("sgrid_gpu_output_hdf5"); - check_sgrid(gdist2,box,c); + gdist2.deviceToHost<0,1>(); + check_sgrid_no_ghost(gdist2,box,c); gdist2.template hostToDevice<0>(); - gdist2.template ghost_get<0,1>(RUN_ON_DEVICE); gdist2.deviceToHost<0,1>(); gdist.deviceToHost<0,1>(); check_sgrid(gdist2,box,c); - -/* bool match = true; - - - auto it2 = gdist2.getDomainIterator(); - - while (it2.isNext()) - { - auto p = it2.get(); - - auto key = it2.getGKey(p); - - auto p_xp1 = p.move(0,1); - auto p_xm1 = p.move(0,-1); - auto p_yp1 = p.move(1,1); - auto p_ym1 = p.move(1,-1); - - auto key_xp1 = key.move(0,1); - auto key_xm1 = key.move(0,-1); - auto key_yp1 = key.move(1,1); - auto key_ym1 = key.move(1,-1); - - if (box.isInside(key_xp1.toPoint())) - { - match &= gdist.template get<0>(p_xp1) == c + key_xp1.get(0) + key_xp1.get(1); - - if (match == false) - { - std::cout << gdist.template get<0>(p_xp1) << " " << c + key_xp1.get(0) + key_xp1.get(1) << std::endl; - std::cout << key_xp1.to_string() << std::endl; - break; - } - } - - if (box.isInside(key_xm1.toPoint())) - { - match &= gdist.template get<0>(p_xm1) == c + key_xm1.get(0) + key_xm1.get(1); - - if (match == false) - { - std::cout << gdist.template get<0>(p_xm1) << " " << c + key_xm1.get(0) + key_xm1.get(1) << std::endl; - std::cout << key_xm1.to_string() << std::endl; - break; - } - } - - if (box.isInside(key_yp1.toPoint())) - { - match &= gdist.template get<0>(p_yp1) == c + key_yp1.get(0) + key_yp1.get(1); - - if (match == false) - { - std::cout << gdist.template get<0>(p_yp1) << " " << c + key_yp1.get(0) + key_yp1.get(1) << std::endl; - std::cout << key_yp1.to_string() << std::endl; - break; - } - } - - if (box.isInside(key_ym1.toPoint())) - { - match &= gdist.template get<0>(p_ym1) == c + key_ym1.get(0) + key_ym1.get(1); - - if (match == false) - { - std::cout << gdist.template get<0>(p_ym1) << " " << c + key_ym1.get(0) + key_ym1.get(1) << std::endl; - std::cout << key_ym1.to_string() << std::endl; - break; - } - } - - ++it2; - } - - - BOOST_REQUIRE_EQUAL(match,true);*/ } void sgrid_ghost_get(size_t (& sz)[2],size_t (& sz2)[2]) -- GitLab