From 97cce61f3ffcd1909f2c2ff0ef9dca6009e5fae0 Mon Sep 17 00:00:00 2001 From: Pietro Incardona <i-bird@localhost.localdomain> Date: Thu, 14 Jul 2016 01:58:27 +0200 Subject: [PATCH] Fixing ghost unbound with dirty --- CHANGELOG.md | 1 + openfpm_data | 2 +- openfpm_devices | 2 +- script/install_LIBHILBERT.sh | 19 ++ src/Decomposition/CartDecomposition.hpp | 34 +-- src/Decomposition/common.hpp | 20 +- src/Decomposition/ie_ghost.hpp | 16 ++ src/Graph/dist_map_graph.hpp | 16 +- src/Graph/dist_map_graph_unit_test.hpp | 2 +- src/Grid/grid_dist_id.hpp | 177 +++++++++++- src/Grid/grid_dist_id_unit_test.hpp | 23 ++ src/Grid/grid_dist_id_unit_test_unb_ghost.hpp | 255 ++++++++++++++++++ src/dec_optimizer.hpp | 78 ++++-- src/dec_optimizer_unit_test.hpp | 11 +- 14 files changed, 577 insertions(+), 79 deletions(-) create mode 100755 script/install_LIBHILBERT.sh create mode 100644 src/Grid/grid_dist_id_unit_test_unb_ghost.hpp diff --git a/CHANGELOG.md b/CHANGELOG.md index 7432543a..562756fe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ All notable changes to this project will be documented in this file. - Hilber curve data and computation reordering for cache firndliness ### Fixed +- Removed small crash for small grid and big number of processors ### Changed diff --git a/openfpm_data b/openfpm_data index 9d44c008..dfe00860 160000 --- a/openfpm_data +++ b/openfpm_data @@ -1 +1 @@ -Subproject commit 9d44c008f04e855666b1ea15d02e681f2bfcf2e9 +Subproject commit dfe008605b9167afd8339078cba5c779a38c376b diff --git a/openfpm_devices b/openfpm_devices index efd6339f..5868264d 160000 --- a/openfpm_devices +++ b/openfpm_devices @@ -1 +1 @@ -Subproject commit efd6339ff5d5d73528b07771ffde668787dee826 +Subproject commit 5868264de4c5d9a684b48f492efbfd26798f1d18 diff --git a/script/install_LIBHILBERT.sh b/script/install_LIBHILBERT.sh new file mode 100755 index 00000000..d1562c15 --- /dev/null +++ b/script/install_LIBHILBERT.sh @@ -0,0 +1,19 @@ +#! /bin/bash + +# check if the directory $1/HDF5 exist + +if [ -d "$1/LIBHILBERT" ]; then + echo "LIBHILBERT already installed" + exit 0 +fi + +wget http://ppmcore.mpi-cbg.de/upload/libhilbert-master.zip +rm -rf libhilbert-master +unzip libhilbert-master.zip +cd libhilbert-master +mkdir build +cd build +cmake -DCMAKE_INSTALL_PREFIX:PATH=$1/LIBHILBERT .. +make all +make install + diff --git a/src/Decomposition/CartDecomposition.hpp b/src/Decomposition/CartDecomposition.hpp index ded5a478..212408c8 100755 --- a/src/Decomposition/CartDecomposition.hpp +++ b/src/Decomposition/CartDecomposition.hpp @@ -243,8 +243,18 @@ public: // set of Boxes produced by the decomposition optimizer openfpm::vector<::Box<dim, size_t>> loc_box; + // Ghost + Ghost<dim,long int> ghe; + + // Set the ghost + for (size_t i = 0 ; i < dim ; i++) + { + ghe.setLow(i,static_cast<long int>(ghost.getLow(i)/spacing[i]) - 1); + ghe.setHigh(i,static_cast<long int>(ghost.getHigh(i)/spacing[i]) + 1); + } + // optimize the decomposition - d_o.template optimize<nm_v::sub_id, nm_v::proc_id>(dist.getGraph(), p_id, loc_box, box_nn_processor,bc); + d_o.template optimize<nm_v::sub_id, nm_v::proc_id>(dist.getGraph(), p_id, loc_box, box_nn_processor,ghe,bc); // reset ss_box ss_box = domain; @@ -486,19 +496,6 @@ public: */ void calculateGhostBoxes() { -#ifdef DEBUG - // the ghost margins are assumed to be smaller - // than one sub-domain - - for (size_t i = 0; i < dim; i++) - { - if (fabs(ghost.template getLow(i)) >= ss_box.getHigh(i) || ghost.template getHigh(i) >= ss_box.getHigh(i)) - { - std::cerr << "Error " << __FILE__ << ":" << __LINE__ << " : Ghost are bigger than one sub-domain" << "\n"; - } - } -#endif - // Intersect all the local sub-domains with the sub-domains of the contiguous processors // create the internal structures that store ghost information @@ -506,15 +503,6 @@ public: ie_ghost<dim, T>::create_box_nn_processor_int(v_cl, ghost, sub_domains, box_nn_processor, *this); ie_loc_ghost<dim,T>::create(sub_domains,domain,ghost,bc); - - // get the smallest sub-domain dimension on each direction - for (size_t i = 0; i < dim; i++) - { - if (fabs(ghost.template getLow(i)) >= ss_box.getHigh(i) || ghost.template getHigh(i) >= ss_box.getHigh(i)) - { - std::cerr << "Error " << __FILE__ << ":" << __LINE__ << " : Ghost are bigger than one sub-domain" << "\n"; - } - } } public: diff --git a/src/Decomposition/common.hpp b/src/Decomposition/common.hpp index 61984553..4278a232 100755 --- a/src/Decomposition/common.hpp +++ b/src/Decomposition/common.hpp @@ -52,28 +52,13 @@ struct Box_loc_sub }; -/*! It contain a box definition and from witch sub-domain it come from (in the local processor) +/*! It contain a box definition and from witch sub-domain it come from (in the local processor list) * and an unique if across adjacent processors (for communication) * * If the box come from the intersection of an expanded sub-domain and a sub-domain * * Assuming we are considering the near processors i (0 to getNNProcessors()) * - * ### external ghost box - * - * id = id_exp * N_non_exp + id_non_exp - * - * id_exp = the id in the vector proc_adj_box.get(i) of the expanded sub-domain (sent local sub-domains) - * - * id_non_exp = the id in the vector nn_processor_subdomains[i] of the sub-domain (received sub-domains from near processors) - * - * ### internal ghost box - * - * id = id_exp * N_non_exp + id_non_exp - * - * id_exp = the id in the vector nn_processor_subdomains[i] of the expanded sub-domain - * - * id_non_exp = the id in the vector proc_adj_box.get(i) of the sub-domain * */ template<unsigned int dim, typename T> @@ -88,6 +73,9 @@ struct Box_sub //! see ebx_ibx_form in ie_ghost for the meaning size_t id; + //! see getNearSubdomainsRealId in nn_prcs + size_t r_sub; + //! see ie_ghost follow sector explanation comb<dim> cmb; }; diff --git a/src/Decomposition/ie_ghost.hpp b/src/Decomposition/ie_ghost.hpp index 16eed5be..8b1923b6 100755 --- a/src/Decomposition/ie_ghost.hpp +++ b/src/Decomposition/ie_ghost.hpp @@ -378,6 +378,7 @@ protected: Box_sub<dim,T> sb; sb.bx = b_int.box; sb.sub = i; + sb.r_sub = r_sub.get(k); sb.cmb = nn_p_box_pos.get(k); size_t p_idp = nn_p.ProctoID(p_id); @@ -615,6 +616,21 @@ public: return proc_int_box.get(id).ebx.get(j).id; } + /*! \brief Get the sub-domain send-id at witch belong the internal ghost box + * + * The internal ghost box is create from the intersection a local sub-domain + * and an extended sub-domain communicated from another processor. This function + * return the id of the sub-domain in the receiving list + * + * \param id adjacent processor list id (the id go from 0 to getNNProcessor()) + * \param j box (each near processor can produce more than one internal ghost box) + * \return sub-domain at which belong the internal ghost box + * + */ + inline size_t getProcessorIGhostSSub(size_t id, size_t j) const + { + return proc_int_box.get(id).ibx.get(j).r_sub; + } /*! \brief Get the local sub-domain at witch belong the internal ghost box * diff --git a/src/Graph/dist_map_graph.hpp b/src/Graph/dist_map_graph.hpp index b8aeaca7..208ea069 100644 --- a/src/Graph/dist_map_graph.hpp +++ b/src/Graph/dist_map_graph.hpp @@ -495,6 +495,8 @@ class DistGraph_CSR } } + recv_g.fvtxdist = fvtxdist; + // Swap temporary graph with the main one swap(recv_g); @@ -648,7 +650,7 @@ class DistGraph_CSR } // Exchange informations through processors - vcl.sendrecvMultipleMessagesNBX(prc.size(), &size.get(0), &prc.get(0), &ptr.get(0), gr_receive, &packs, NONE); + vcl.sendrecvMultipleMessagesNBX(prc.size(), (size_t *)size.getPointer(), (size_t *)prc.getPointer(), (void **)ptr.getPointer(), gr_receive, &packs, NONE); for (size_t i = 0; i < vcl.getProcessingUnits(); i++) { @@ -802,7 +804,7 @@ class DistGraph_CSR fillSendRecvStructs<size_t>(on_info, prc, size, ptr); // Send on_info - vcl.sendrecvMultipleMessagesNBX(prc.size(), &size.get(0), &prc.get(0), &ptr.get(0), on_receive, &on_vs, NONE); + vcl.sendrecvMultipleMessagesNBX(prc.size(), (size_t *)size.getPointer(), (size_t *)prc.getPointer(), (void **)ptr.getPointer(), on_receive, &on_vs, NONE); // Insert in the on_toup map the received couples for (size_t i = 0; i < vcl.getProcessingUnits(); i++) @@ -870,7 +872,7 @@ class DistGraph_CSR fillSendRecvStructs<size_t>(vni, prc, size, ptr); // Send and receive requests - vcl.sendrecvMultipleMessagesNBX(prc.size(), &size.get(0), &prc.get(0), &ptr.get(0), on_receive, &req_rmi, NONE); + vcl.sendrecvMultipleMessagesNBX(prc.size(), (size_t *)size.getPointer(), (size_t *)prc.getPointer(), (void **)ptr.getPointer(), on_receive, &req_rmi, NONE); // Re-mapping info map openfpm::vector<openfpm::vector<size_t>> rmi(vcl.getProcessingUnits()); @@ -891,7 +893,7 @@ class DistGraph_CSR fillSendRecvStructs<size_t>(resp_rmi, prc, size, ptr); // Send responses - vcl.sendrecvMultipleMessagesNBX(prc.size(), &size.get(0), &prc.get(0), &ptr.get(0), on_receive, &rmi, NONE); + vcl.sendrecvMultipleMessagesNBX(prc.size(), (size_t *)size.getPointer(), (size_t *)prc.getPointer(), (void **)ptr.getPointer(), on_receive, &rmi, NONE); // Add received info into re-mapping info map for (size_t i = 0; i < rmi.size(); ++i) @@ -2197,7 +2199,7 @@ public: fillSendRecvStructs<size_t>(vr_queue, prc, size, ptr); // Send/receive requests for info about needed vertices - vcl.sendrecvMultipleMessagesNBX(prc.size(), &size.get(0), &prc.get(0), &ptr.get(0), on_receive, &resp, NONE); + vcl.sendrecvMultipleMessagesNBX(prc.size(), (size_t *)size.getPointer(), (size_t *)prc.getPointer(), (void **)ptr.getPointer(), on_receive, &resp, NONE); // Prepare responses with the containing processors of requested vertices for (size_t i = 0; i < resp.size(); ++i) @@ -2224,7 +2226,7 @@ public: resp.resize(vcl.getProcessingUnits()); // Send/receive responses with the containing processors of requested vertices - vcl.sendrecvMultipleMessagesNBX(prc.size(), &size.get(0), &prc.get(0), &ptr.get(0), on_receive, &resp, NONE); + vcl.sendrecvMultipleMessagesNBX(prc.size(), (size_t *)size.getPointer(), (size_t *)prc.getPointer(), (void **)ptr.getPointer(), on_receive, &resp, NONE); // Clear requests array reqs.clear(); @@ -2259,7 +2261,7 @@ public: resp.resize(vcl.getProcessingUnits()); // Send/receive vertices requests - vcl.sendrecvMultipleMessagesNBX(prc.size(), &size.get(0), &prc.get(0), &ptr.get(0), on_receive, &resp, NONE); + vcl.sendrecvMultipleMessagesNBX(prc.size(), (size_t *)size.getPointer(), (size_t *)prc.getPointer(), (void **)ptr.getPointer(), on_receive, &resp, NONE); for (size_t i = 0; i < resp.size(); ++i) { diff --git a/src/Graph/dist_map_graph_unit_test.hpp b/src/Graph/dist_map_graph_unit_test.hpp index 6a4b606c..184cb4f8 100644 --- a/src/Graph/dist_map_graph_unit_test.hpp +++ b/src/Graph/dist_map_graph_unit_test.hpp @@ -415,7 +415,7 @@ BOOST_AUTO_TEST_CASE( dist_map_graph_use_free_add) gd.sync(); if(vcl.getProcessUnitID() == 0) - BOOST_REQUIRE_EQUAL(gd.getVertexId(5), 5ul); + BOOST_REQUIRE_EQUAL(gd.getVertexId(4), 5ul); gd.deleteGhosts(); diff --git a/src/Grid/grid_dist_id.hpp b/src/Grid/grid_dist_id.hpp index 358a5e59..f904ad9d 100644 --- a/src/Grid/grid_dist_id.hpp +++ b/src/Grid/grid_dist_id.hpp @@ -25,7 +25,9 @@ template<unsigned int dim> struct Box_fix { Box<dim,size_t> bx; + comb<dim> cmb; size_t g_id; + size_t r_sub; }; #define GRID_SUB_UNIT_FACTOR 64 @@ -87,6 +89,8 @@ class grid_dist_id //! It is unique across all the near processor std::unordered_map<size_t,size_t> g_id_to_external_ghost_box; + std::unordered_map<size_t,size_t> g_id_to_external_ghost_box_fix; + //! It map a global ghost id (g_id) to the internal ghost box information //! (is unique for processor), it is not unique across all the near processor openfpm::vector<std::unordered_map<size_t,size_t>> g_id_to_internal_ghost_box; @@ -131,6 +135,36 @@ class grid_dist_id return g->recv_mem_gg.get(lc_id).getPointer(); } + /*! \brief flip box just convert and internal ghost box into an external ghost box + * + * + */ + Box<dim,long int> flip_box(const Box<dim,long int> & box, const comb<dim> & cmb) + { + Box<dim,long int> flp; + + for (size_t i = 0 ; i < dim; i++) + { + if (cmb[i] == 0) + { + flp.setLow(i,box.getLow(i)); + flp.setHigh(i,box.getHigh(i)); + } + else if (cmb[i] == 1) + { + flp.setLow(i,box.getLow(i) + ginfo.size(i)); + flp.setHigh(i,box.getHigh(i) + ginfo.size(i)); + } + else if (cmb[i] == -1) + { + flp.setLow(i,box.getLow(i) - ginfo.size(i)); + flp.setHigh(i,box.getHigh(i) - ginfo.size(i)); + } + } + + return flp; + } + /*! \brief Create per-processor internal ghost boxes list in grid units and g_id_to_external_ghost_box * */ @@ -169,6 +203,7 @@ class grid_dist_id bid_t.g_id = dec.getProcessorIGhostId(i,j); bid_t.sub = dec.getProcessorIGhostSub(i,j); bid_t.cmb = dec.getProcessorIGhostPos(i,j); + bid_t.r_sub = dec.getProcessorIGhostSSub(i,j); pib.bid.add(bid_t); g_id_to_internal_ghost_box.get(i)[bid_t.g_id] = pib.bid.size()-1; @@ -233,6 +268,97 @@ class grid_dist_id } init_e_g_box = true; + + // Communicate the ig_box calculated to the other processor + + comb<dim> zero; + zero.zero(); + + // Here we collect all the calculated internal ghost box in the sector different from 0 that this processor has + + openfpm::vector<size_t> prc; + openfpm::vector<size_t> prc_recv; + openfpm::vector<size_t> sz_recv; + openfpm::vector<openfpm::vector<Box_fix<dim>>> box_int_send(dec.getNNProcessors()); + openfpm::vector<openfpm::vector<Box_fix<dim>>> box_int_recv; + + for(size_t i = 0 ; i < dec.getNNProcessors() ; i++) + { + for (size_t j = 0 ; j < ig_box.get(i).bid.size() ; j++) + { + box_int_send.get(i).add(); + box_int_send.get(i).last().bx = ig_box.get(i).bid.get(j).box; + box_int_send.get(i).last().g_id = ig_box.get(i).bid.get(j).g_id; + box_int_send.get(i).last().r_sub = ig_box.get(i).bid.get(j).r_sub; + box_int_send.get(i).last().cmb = ig_box.get(i).bid.get(j).cmb; + } + prc.add(dec.IDtoProc(i)); + } + + v_cl.SSendRecv(box_int_send,box_int_recv,prc,prc_recv,sz_recv); + + eg_box_tmp.resize(dec.getNNProcessors()); + + for (size_t i = 0 ; i < eg_box_tmp.size() ; i++) + eg_box_tmp.get(i).prc = dec.IDtoProc(i); + + for (size_t i = 0 ; i < box_int_recv.size() ; i++) + { + size_t p_id = dec.ProctoID(prc_recv.get(i)); + auto&& pib = eg_box_tmp.get(p_id); + pib.prc = prc_recv.get(i); + + // For each received internal ghost box + for (size_t j = 0 ; j < box_int_recv.get(i).size() ; j++) + { + size_t send_list_id = box_int_recv.get(i).get(j).r_sub; + + // Get the list of the sent sub-domains + // and recover the id of the sub-domain from + // the sent list + const openfpm::vector<size_t> & s_sub = dec.getSentSubdomains(p_id); + size_t sub_id = s_sub.get(send_list_id); + + e_box_id bid_t; + bid_t.sub = sub_id; + bid_t.cmb = box_int_recv.get(i).get(j).cmb; + bid_t.cmb.sign_flip(); + ::Box<dim,long int> ib = flip_box(box_int_recv.get(i).get(j).bx,box_int_recv.get(i).get(j).cmb); + bid_t.g_e_box = ib; + bid_t.g_id = box_int_recv.get(i).get(j).g_id; + // Translate in local coordinate + Box<dim,long int> tb = ib; + tb -= gdb_ext.get(sub_id).origin; + bid_t.l_e_box = tb; + + pib.bid.add(bid_t); + + g_id_to_external_ghost_box_fix[bid_t.g_id] = pib.bid.size()-1; + + size_t l_id = 0; + // convert the global id into local id + auto key = g_id_to_external_ghost_box.find(bid_t.g_id); + if (key != g_id_to_external_ghost_box.end()) // FOUND + l_id = key->second; + + Box<dim,long int> box_le = eg_box.get(p_id).bid.get(l_id).l_e_box; + Box<dim,long int> box_ge = eg_box.get(p_id).bid.get(l_id).g_e_box; + + if (box_le != bid_t.l_e_box || box_ge != bid_t.g_e_box || + eg_box.get(p_id).bid.get(l_id).cmb != bid_t.cmb || + eg_box.get(p_id).bid.get(l_id).sub != bid_t.sub) + { + int debug = 0; + debug++; + } + } + } + + // switch + + g_id_to_external_ghost_box = g_id_to_external_ghost_box_fix; + eg_box.clear(); + eg_box = eg_box_tmp; } bool init_local_i_g_box = false; @@ -268,6 +394,7 @@ class grid_dist_id pib.bid.last().box = ib; pib.bid.last().sub = dec.getLocalIGhostSub(i,j); pib.bid.last().k = dec.getLocalIGhostE(i,j); + pib.bid.last().cmb = dec.getLocalIGhostPos(i,j); } } @@ -286,7 +413,7 @@ class grid_dist_id if (init_local_e_g_box == true) return; - // Get the number of near processors + // Get the number of sub-domain for (size_t i = 0 ; i < dec.getNSubDomain() ; i++) { loc_eg_box.add(); @@ -307,11 +434,41 @@ class grid_dist_id pib.bid.last().box = ib; pib.bid.last().sub = dec.getLocalEGhostSub(i,j); pib.bid.last().cmb = dec.getLocalEGhostPos(i,j); - pib.bid.last().cmb.sign_flip(); } } init_local_e_g_box = true; + + loc_eg_box_tmp.resize(dec.getNSubDomain()); + + // Get the number of sub-domain + for (size_t i = 0 ; i < dec.getNSubDomain() ; i++) + { + for (size_t j = 0 ; j < loc_ig_box.get(i).bid.size() ; j++) + { + size_t k = loc_ig_box.get(i).bid.get(j).sub; + auto & pib = loc_eg_box_tmp.get(k); + + size_t s = loc_ig_box.get(i).bid.get(j).k; + pib.bid.resize(dec.getLocalNEGhost(k)); + + pib.bid.get(s).box = flip_box(loc_ig_box.get(i).bid.get(j).box,loc_ig_box.get(i).bid.get(j).cmb); + pib.bid.get(s).sub = dec.getLocalEGhostSub(k,s); + pib.bid.get(s).cmb = loc_ig_box.get(i).bid.get(j).cmb; + pib.bid.get(s).cmb.sign_flip(); + + if (pib.bid.get(s).box != loc_eg_box.get(k).bid.get(s).box && + pib.bid.get(s).cmb != loc_eg_box.get(k).bid.get(s).cmb && + pib.bid.get(s).sub != loc_eg_box.get(k).bid.get(s).sub) + { + std::cout << "CAZZO" << std::endl; + int debug = 0; + debug++; + } + } + } + + loc_eg_box = loc_eg_box_tmp; } /*! \brief Sync the local ghost part @@ -334,7 +491,7 @@ class grid_dist_id // sub domain connected with external box size_t sub_id_dst = loc_ig_box.get(i).bid.get(j).sub; - // local external ghost box connected + // local internal ghost box connected size_t k = loc_ig_box.get(i).bid.get(j).k; Box<dim,size_t> bx_dst = loc_eg_box.get(sub_id_dst).bid.get(k).box; @@ -1114,9 +1271,14 @@ public: //! id size_t g_id; + //! r_sub id of the sub-domain in the sent list + size_t r_sub; + //! Sector where it live the linked external ghost box comb<dim> cmb; + + //! sub size_t sub; }; @@ -1133,8 +1295,11 @@ public: //! sub-domain id size_t sub; - //! external box + //! external ghost box linked to this internal ghost box size_t k; + + //! combination + comb<dim> cmb; }; /*! \brief It store the information about the external ghost box @@ -1238,12 +1403,16 @@ public: //! External ghost boxes in grid units openfpm::vector<ep_box_grid> eg_box; + openfpm::vector<ep_box_grid> eg_box_tmp; + //! Local internal ghost boxes in grid units openfpm::vector<i_lbox_grid> loc_ig_box; //! Local external ghost boxes in grid units openfpm::vector<e_lbox_grid> loc_eg_box; + openfpm::vector<e_lbox_grid> loc_eg_box_tmp; + /*! \brief It synchronize the ghost parts * * \tparam prp... Properties to synchronize diff --git a/src/Grid/grid_dist_id_unit_test.hpp b/src/Grid/grid_dist_id_unit_test.hpp index 728b786b..4c650b91 100644 --- a/src/Grid/grid_dist_id_unit_test.hpp +++ b/src/Grid/grid_dist_id_unit_test.hpp @@ -1482,6 +1482,7 @@ void Test3D_periodic(const Box<3,float> & domain, long int k) } #include "grid_dist_id_unit_test_ext_dom.hpp" +#include "grid_dist_id_unit_test_unb_ghost.hpp" BOOST_AUTO_TEST_CASE( grid_dist_id_iterator_test_use) { @@ -1614,6 +1615,28 @@ BOOST_AUTO_TEST_CASE( grid_dist_id_periodic ) Test3D_periodic(domain3,k); } +BOOST_AUTO_TEST_CASE( grid_dist_id_unbound_ghost ) +{ + // Domain + Box<3,float> domain3({0.0,0.0,0.0},{1.0,1.0,1.0}); + + long int k = 32*32*32*create_vcluster().getProcessingUnits(); + k = std::pow(k, 1/3.); + + Test3D_unb_ghost(domain3,k); +} + +BOOST_AUTO_TEST_CASE( grid_dist_id_unbound_ghost_periodic ) +{ + // Domain + Box<3,float> domain3({0.0,0.0,0.0},{1.0,1.0,1.0}); + + long int k = 32*32*32*create_vcluster().getProcessingUnits(); + k = std::pow(k, 1/3.); + + Test3D_unb_ghost_periodic(domain3,k); +} + BOOST_AUTO_TEST_SUITE_END() #endif diff --git a/src/Grid/grid_dist_id_unit_test_unb_ghost.hpp b/src/Grid/grid_dist_id_unit_test_unb_ghost.hpp new file mode 100644 index 00000000..3299c95a --- /dev/null +++ b/src/Grid/grid_dist_id_unit_test_unb_ghost.hpp @@ -0,0 +1,255 @@ +/* + * grid_dist_id_unit_test_unb_ghost.hpp + * + * Created on: Jul 11, 2016 + * Author: i-bird + */ + +#ifndef SRC_GRID_GRID_DIST_ID_UNIT_TEST_UNB_GHOST_HPP_ +#define SRC_GRID_GRID_DIST_ID_UNIT_TEST_UNB_GHOST_HPP_ + +void Test3D_unb_ghost(const Box<3,float> & domain, long int k) +{ + long int big_step = k / 30; + big_step = (big_step == 0)?1:big_step; + long int small_step = 21; + + if (create_vcluster().getProcessingUnits() > 48) + return; + + print_test( "Testing 3D grid unbound ghost k<=",k); + + // 3D test + for ( ; k >= 2 ; k-= (k > 2*big_step)?big_step:small_step ) + { + BOOST_TEST_CHECKPOINT( "Testing 3D grid k=" << k ); + + // grid size + size_t sz[3]; + sz[0] = k; + sz[1] = k; + sz[2] = k; + + // Ghost + Ghost<3,float> g(0.49); + + // Distributed grid with id decomposition + grid_dist_id<3, float, scalar<float>, CartDecomposition<3,float>> g_dist(sz,domain,g); + + g_dist.getDecomposition().write("no_bound_decomposition"); + + // check the consistency of the decomposition + bool val = g_dist.getDecomposition().check_consistency(); + BOOST_REQUIRE_EQUAL(val,true); + + // Grid sm + grid_sm<3,void> info(sz); + + // get the domain iterator + size_t count = 0; + + auto dom = g_dist.getDomainIterator(); + + while (dom.isNext()) + { + auto key = dom.get(); + auto key_g = g_dist.getGKey(key); + + g_dist.template get<0>(key) = info.LinId(key_g); + + // Count the point + count++; + + ++dom; + } + + // Get the virtual cluster machine + Vcluster & vcl = g_dist.getVC(); + + // reduce + vcl.sum(count); + vcl.execute(); + + // Check + BOOST_REQUIRE_EQUAL(count,(size_t)k*k*k); + + bool match = true; + + auto dom2 = g_dist.getDomainIterator(); + + // check that the grid store the correct information + while (dom2.isNext()) + { + auto key = dom2.get(); + auto key_g = g_dist.getGKey(key); + + match &= (g_dist.template get<0>(key) == info.LinId(key_g))?true:false; + + ++dom2; + } + + BOOST_REQUIRE_EQUAL(match,true); + + //! [Synchronize the ghost and check the information] + + g_dist.template ghost_get<0>(); + + // check that the communication is correctly completed + + auto domg = g_dist.getDomainGhostIterator(); + + // check that the grid with the ghost past store the correct information + while (domg.isNext()) + { + auto key = domg.get(); + auto key_g = g_dist.getGKey(key); + + // In this case the boundary condition are non periodic + if (g_dist.isInside(key_g)) + { + match &= (g_dist.template get<0>(key) == info.LinId(key_g))?true:false; + } + + ++domg; + } + + BOOST_REQUIRE_EQUAL(match,true); + + //! [Synchronize the ghost and check the information] + } +} + + +// Test grid periodic + +void Test3D_unb_ghost_periodic(const Box<3,float> & domain, long int k) +{ + typedef Point_test<float> p; + + Vcluster & v_cl = create_vcluster(); + + if ( v_cl.getProcessingUnits() > 48 ) + return; + + long int big_step = k / 30; + big_step = (big_step == 0)?1:big_step; + long int small_step = 21; + + print_test( "Testing grid periodic unbound ghost k<=",k); + + // 3D test + for ( ; k >= 2 ; k-= (k > 2*big_step)?big_step:small_step ) + { + BOOST_TEST_CHECKPOINT( "Testing grid unbound ghost periodic k<=" << k ); + + // grid size + size_t sz[3]; + sz[0] = k; + sz[1] = k; + sz[2] = k; + + // Ghost + Ghost<3,float> g(0.1); + + // periodicity + periodicity<3> pr = {{PERIODIC,PERIODIC,PERIODIC}}; + + // Distributed grid with id decomposition + grid_dist_id<3, float, aggregate<long int>, CartDecomposition<3,float>> g_dist(sz,domain,g,pr); + + // check the consistency of the decomposition + bool val = g_dist.getDecomposition().check_consistency(); + BOOST_REQUIRE_EQUAL(val,true); + + // Grid sm + grid_sm<3,void> info(sz); + + size_t count = 0; + + // Set to zero the full grid + + auto dom1 = g_dist.getDomainGhostIterator(); + + while (dom1.isNext()) + { + auto key = dom1.get(); + + g_dist.template get<0>(key) = -1; + + ++dom1; + } + + auto dom = g_dist.getDomainIterator(); + + while (dom.isNext()) + { + auto key = dom.get(); + auto key_g = g_dist.getGKey(key); + + g_dist.template get<0>(key) = info.LinId(key_g); + + // Count the points + count++; + + ++dom; + } + + // Get the virtual cluster machine + Vcluster & vcl = g_dist.getVC(); + + // reduce + vcl.sum(count); + vcl.execute(); + + // Check + BOOST_REQUIRE_EQUAL(count,(size_t)k*k*k); + + // sync the ghosts +// g_dist.write("no_bound_before"); + g_dist.ghost_get<0>(); +// g_dist.write("no_bound_after"); + + bool match = true; + + // Domain + Ghost iterator + auto dom_gi = g_dist.getDomainGhostIterator(); + + size_t out_cnt = 0; + + while (dom_gi.isNext()) + { + bool out_p = false; + + auto key = dom_gi.get(); + auto key_g = g_dist.getGKey(key); + + // transform the key to be periodic + for (size_t i = 0 ; i < 3 ; i++) + { + if (key_g.get(i) < 0) + {key_g.set_d(i,key_g.get(i) + k);out_p = true;} + else if (key_g.get(i) >= k) + {key_g.set_d(i,key_g.get(i) - k);out_p = true;} + } + + if (g_dist.template get<0>(key) != -1 && out_p == true) + out_cnt++; + + if ( /*g_dist.template get<0>(key) != -1 &&*/ info.LinId(key_g) != g_dist.template get<0>(key) ) + match &= false; + + ++dom_gi; + } + + BOOST_REQUIRE_EQUAL(match, true); + if (k > 83) + { + vcl.sum(out_cnt); + vcl.execute(); + BOOST_REQUIRE(out_cnt != 0ul); + } + } +} + + +#endif /* SRC_GRID_GRID_DIST_ID_UNIT_TEST_UNB_GHOST_HPP_ */ diff --git a/src/dec_optimizer.hpp b/src/dec_optimizer.hpp index 308468d7..737377a0 100644 --- a/src/dec_optimizer.hpp +++ b/src/dec_optimizer.hpp @@ -2,6 +2,7 @@ #define DEC_OPTIMIZER_HPP #include "Grid/iterators/grid_key_dx_iterator_sub.hpp" +#include "Grid/iterators/grid_skin_iterator.hpp" /*! \brief this class represent a wavefront of dimension dim * @@ -237,7 +238,7 @@ private: * \param bc Boundary conditions * */ - template<unsigned int p_sub, unsigned int p_id> void add_to_queue(openfpm::vector<size_t> & domains, openfpm::vector<wavefront<dim>> & v_w, Graph & graph, std::vector<comb<dim>> & w_comb, long int pr_id, openfpm::vector< openfpm::vector<size_t> > & box_nn_processor, const size_t(& bc)[dim]) + template<unsigned int p_sub, unsigned int p_id> void add_to_queue(openfpm::vector<size_t> & domains, openfpm::vector<wavefront<dim>> & v_w, Graph & graph, std::vector<comb<dim>> & w_comb, long int pr_id, const size_t(& bc)[dim]) { // create a new queue openfpm::vector<size_t> domains_new; @@ -284,13 +285,6 @@ private: // Get the processor id of the sub-sub-domain long int pp_id = graph.vertex(gh.LinId(gk)).template get<p_id>(); - if (pp_id != pr_id) - { - // this box is contiguous to other processors - - box_nn_processor.get(box_nn_processor.size()-1).add(pp_id); - } - // if the sub-sub-domain is not assigned if (pid < 0) { @@ -310,11 +304,6 @@ private: } } - // sort and make it unique the numbers in processor list - std::sort(box_nn_processor.get(box_nn_processor.size()-1).begin(), box_nn_processor.get(box_nn_processor.size()-1).end()); - auto last = std::unique(box_nn_processor.get(box_nn_processor.size()-1).begin(), box_nn_processor.get(box_nn_processor.size()-1).end()); - box_nn_processor.get(box_nn_processor.size()-1).erase(last, box_nn_processor.get(box_nn_processor.size()-1).end()); - // copy the new queue to the old one (it not copied, C++11 move semantic) domains.swap(domains_new); } @@ -550,6 +539,7 @@ private: * \param p_id Processor id (if p_id == -1 the optimization is done for all the processors) * \param list of sub-domain boxes produced by the algorithm * \param box_nn_processor for each box it list all the neighborhood processor + * \param ghe Ghost extension in sub-sub-domain units in each direction * \param bc Boundary condition * \param init_sub_id when true p_sub property is initial set to -1 [default true] * \param sub_id starting sub_id enumeration [default 0] @@ -557,11 +547,14 @@ private: * \return last assigned sub-id * */ - template <unsigned int p_sub, unsigned int p_id> size_t optimize(grid_key_dx<dim> & start_p, Graph & graph, long int pr_id, openfpm::vector<Box<dim,size_t>> & lb, openfpm::vector< openfpm::vector<size_t> > & box_nn_processor, const size_t (& bc)[dim], bool init_sub_id = true, size_t sub_id = 0) + template <unsigned int p_sub, unsigned int p_id> size_t optimize(grid_key_dx<dim> & start_p, Graph & graph, long int pr_id, openfpm::vector<Box<dim,size_t>> & lb, openfpm::vector< openfpm::vector<size_t> > & box_nn_processor , const Ghost<dim,long int> & ghe ,const size_t (& bc)[dim], bool init_sub_id = true, size_t sub_id = 0) { // queue openfpm::vector<size_t> v_q; + // box list 2 + openfpm::vector< openfpm::vector<size_t> > box_nn_processor2; + // Create an hyper-cube HyperCube<dim> hyp; @@ -590,9 +583,6 @@ private: // Initialize the wavefronts from the domain start_p InitializeWavefront(start_p,v_w); - // Create a list for the box - box_nn_processor.add(); - // Create the biggest box containing the domain expand_from_point<p_sub,p_id>(v_q.get(0),graph,box,v_w,w_comb); @@ -603,15 +593,57 @@ private: fill_domain<p_sub>(graph,box,sub_id); // add the surrounding sub-domain to the queue - add_to_queue<p_sub,p_id>(v_q,v_w,graph,w_comb,pr_id,box_nn_processor,bc); + add_to_queue<p_sub,p_id>(v_q,v_w,graph,w_comb,pr_id,bc); // increment the sub_id sub_id++; } + // Construct box box_nn_processor from the constructed domain + construct_box_nn_processor<p_id>(graph,box_nn_processor,lb,ghe,bc,pr_id); + return sub_id; } + /*! \brief Construct the sub-domain processor list + * + * Each entry is a sub-domain, the list of numbers indicate the neighborhood processors + * + * \brief box_nn_processor + * + */ + template<unsigned int p_id> void construct_box_nn_processor(Graph & graph, openfpm::vector< openfpm::vector<size_t> > & box_nn_processor, const openfpm::vector<Box<dim,size_t>> & subs, const Ghost<dim,long int> & ghe, const size_t (& bc)[dim], long int pr_id) + { + std::unordered_map<size_t,size_t> map; + + for (size_t i = 0 ; i < subs.size() ; i++) + { + map.clear(); + Box<dim,size_t> sub = subs.get(i); + sub.enlarge(ghe); + grid_skin_iterator_bc<dim> gsi(gh,subs.get(i),sub,bc); + + while (gsi.isNext()) + { + auto key = gsi.get(); + + size_t pp_id = graph.vertex(gh.LinId(key)).template get<p_id>(); + if (pr_id != (long int)pp_id) + map[pp_id] = pp_id; + + ++gsi; + } + + // Add the keys to box_nn_processors + + box_nn_processor.add(); + for ( auto it = map.begin(); it != map.end(); ++it ) + { + box_nn_processor.last().add(it->first); + } + } + } + public: /*! \brief Constructor @@ -641,7 +673,7 @@ public: * \param graph we are processing * */ - template <unsigned int p_sub, unsigned int p_id> void optimize(grid_key_dx<dim> & start_p, Graph & graph, const size_t (& bc)[dim]) + template <unsigned int p_sub, unsigned int p_id> void optimize(grid_key_dx<dim> & start_p, Graph & graph, const Ghost<dim,long int> & ghe , const size_t (& bc)[dim]) { // temporal vector openfpm::vector<Box<dim,size_t>> tmp; @@ -650,7 +682,7 @@ public: openfpm::vector< openfpm::vector<size_t> > box_nn_processor; // optimize - optimize<p_sub,p_id>(start_p,graph,-1,tmp, box_nn_processor,bc); + optimize<p_sub,p_id>(start_p,graph,-1,tmp, box_nn_processor,ghe,bc); } /*! \brief optimize the graph @@ -667,7 +699,7 @@ public: * \param list of sub-domain boxes * */ - template <unsigned int p_sub, unsigned int p_id> void optimize(Graph & graph, long int pr_id, openfpm::vector<Box<dim,size_t>> & lb, openfpm::vector< openfpm::vector<size_t> > & box_nn_processor, const size_t (& bc)[dim]) + template <unsigned int p_sub, unsigned int p_id> void optimize(Graph & graph, long int pr_id, openfpm::vector<Box<dim,size_t>> & lb, openfpm::vector< openfpm::vector<size_t> > & box_nn_processor, const Ghost<dim,long int> & ghe, const size_t (& bc)[dim]) { grid_key_dx<dim> key_seed; key_seed.zero(); @@ -675,7 +707,7 @@ public: // if processor is -1 call optimize with -1 to do on all processors and exit if (pr_id == -1) { - optimize<p_sub,p_id>(key_seed,graph,pr_id,lb,box_nn_processor,bc); + optimize<p_sub,p_id>(key_seed,graph,pr_id,lb,box_nn_processor,ghe,bc); return; } @@ -689,7 +721,7 @@ public: while (key_seed.isValid()) { // optimize - sub_id = optimize<p_sub,p_id>(key_seed,graph,pr_id,lb,box_nn_processor,bc,false,sub_id); + sub_id = optimize<p_sub,p_id>(key_seed,graph,pr_id,lb,box_nn_processor,ghe,bc,false,sub_id); // new seed key_seed = search_seed<p_id,p_sub>(graph,pr_id); diff --git a/src/dec_optimizer_unit_test.hpp b/src/dec_optimizer_unit_test.hpp index 6babf566..deaa903e 100644 --- a/src/dec_optimizer_unit_test.hpp +++ b/src/dec_optimizer_unit_test.hpp @@ -50,8 +50,10 @@ BOOST_AUTO_TEST_CASE( dec_optimizer_test_use_np) // optimize dec_optimizer<3,Graph_CSR<nm_v,nm_e>> d_o(g,sz); + Ghost<3,size_t> ghe(1); + grid_key_dx<3> keyZero(0,0,0); - d_o.optimize<nm_v::sub_id,nm_v::id>(keyZero,g,bc); + d_o.optimize<nm_v::sub_id,nm_v::id>(keyZero,g,ghe,bc); } BOOST_AUTO_TEST_CASE( dec_optimizer_test_use_p) @@ -111,8 +113,10 @@ BOOST_AUTO_TEST_CASE( dec_optimizer_test_use_p) // For each sub-domain check the neighborhood processors openfpm::vector< openfpm::vector<size_t> > box_nn_processor; + Ghost<3,size_t> ghe(1); + // gp,p_id,loc_box,box_nn_processor,bc - d_o.optimize<nm_v::sub_id,nm_v::id>(g,-1,dec_o,box_nn_processor,bc); + d_o.optimize<nm_v::sub_id,nm_v::id>(g,-1,dec_o,box_nn_processor,ghe,bc); BOOST_REQUIRE_EQUAL(box_nn_processor.size(),8ul); @@ -187,7 +191,8 @@ BOOST_AUTO_TEST_CASE( dec_optimizer_disconnected_subdomains_np) //! for each sub-domain, contain the list of the neighborhood processors openfpm::vector<openfpm::vector<long unsigned int> > box_nn_processor; - d_o.optimize<nm_v::sub_id, nm_v::proc_id>(g, vcl.getProcessUnitID(), loc_box, box_nn_processor,bc); + Ghost<2,size_t> ghe(1); + d_o.optimize<nm_v::sub_id, nm_v::proc_id>(g, vcl.getProcessUnitID(), loc_box, box_nn_processor,ghe,bc); std::stringstream str_g; str_g << "dec_optimizer_disc_graph" << vcl.getProcessUnitID() << ".vtk"; -- GitLab