Commit 97cce61f authored by Pietro Incardona's avatar Pietro Incardona

Fixing ghost unbound with dirty

parent 58e62895
......@@ -14,6 +14,7 @@ All notable changes to this project will be documented in this file.
- Hilber curve data and computation reordering for cache firndliness
### Fixed
- Removed small crash for small grid and big number of processors
### Changed
......
openfpm_data @ dfe00860
Subproject commit 9d44c008f04e855666b1ea15d02e681f2bfcf2e9
Subproject commit dfe008605b9167afd8339078cba5c779a38c376b
openfpm_devices @ 5868264d
Subproject commit efd6339ff5d5d73528b07771ffde668787dee826
Subproject commit 5868264de4c5d9a684b48f492efbfd26798f1d18
#! /bin/bash
# check if the directory $1/HDF5 exist
if [ -d "$1/LIBHILBERT" ]; then
echo "LIBHILBERT already installed"
exit 0
fi
wget http://ppmcore.mpi-cbg.de/upload/libhilbert-master.zip
rm -rf libhilbert-master
unzip libhilbert-master.zip
cd libhilbert-master
mkdir build
cd build
cmake -DCMAKE_INSTALL_PREFIX:PATH=$1/LIBHILBERT ..
make all
make install
......@@ -243,8 +243,18 @@ public:
// set of Boxes produced by the decomposition optimizer
openfpm::vector<::Box<dim, size_t>> loc_box;
// Ghost
Ghost<dim,long int> ghe;
// Set the ghost
for (size_t i = 0 ; i < dim ; i++)
{
ghe.setLow(i,static_cast<long int>(ghost.getLow(i)/spacing[i]) - 1);
ghe.setHigh(i,static_cast<long int>(ghost.getHigh(i)/spacing[i]) + 1);
}
// optimize the decomposition
d_o.template optimize<nm_v::sub_id, nm_v::proc_id>(dist.getGraph(), p_id, loc_box, box_nn_processor,bc);
d_o.template optimize<nm_v::sub_id, nm_v::proc_id>(dist.getGraph(), p_id, loc_box, box_nn_processor,ghe,bc);
// reset ss_box
ss_box = domain;
......@@ -486,19 +496,6 @@ public:
*/
void calculateGhostBoxes()
{
#ifdef DEBUG
// the ghost margins are assumed to be smaller
// than one sub-domain
for (size_t i = 0; i < dim; i++)
{
if (fabs(ghost.template getLow(i)) >= ss_box.getHigh(i) || ghost.template getHigh(i) >= ss_box.getHigh(i))
{
std::cerr << "Error " << __FILE__ << ":" << __LINE__ << " : Ghost are bigger than one sub-domain" << "\n";
}
}
#endif
// Intersect all the local sub-domains with the sub-domains of the contiguous processors
// create the internal structures that store ghost information
......@@ -506,15 +503,6 @@ public:
ie_ghost<dim, T>::create_box_nn_processor_int(v_cl, ghost, sub_domains, box_nn_processor, *this);
ie_loc_ghost<dim,T>::create(sub_domains,domain,ghost,bc);
// get the smallest sub-domain dimension on each direction
for (size_t i = 0; i < dim; i++)
{
if (fabs(ghost.template getLow(i)) >= ss_box.getHigh(i) || ghost.template getHigh(i) >= ss_box.getHigh(i))
{
std::cerr << "Error " << __FILE__ << ":" << __LINE__ << " : Ghost are bigger than one sub-domain" << "\n";
}
}
}
public:
......
......@@ -52,28 +52,13 @@ struct Box_loc_sub
};
/*! It contain a box definition and from witch sub-domain it come from (in the local processor)
/*! It contain a box definition and from witch sub-domain it come from (in the local processor list)
* and an unique if across adjacent processors (for communication)
*
* If the box come from the intersection of an expanded sub-domain and a sub-domain
*
* Assuming we are considering the near processors i (0 to getNNProcessors())
*
* ### external ghost box
*
* id = id_exp * N_non_exp + id_non_exp
*
* id_exp = the id in the vector proc_adj_box.get(i) of the expanded sub-domain (sent local sub-domains)
*
* id_non_exp = the id in the vector nn_processor_subdomains[i] of the sub-domain (received sub-domains from near processors)
*
* ### internal ghost box
*
* id = id_exp * N_non_exp + id_non_exp
*
* id_exp = the id in the vector nn_processor_subdomains[i] of the expanded sub-domain
*
* id_non_exp = the id in the vector proc_adj_box.get(i) of the sub-domain
*
*/
template<unsigned int dim, typename T>
......@@ -88,6 +73,9 @@ struct Box_sub
//! see ebx_ibx_form in ie_ghost for the meaning
size_t id;
//! see getNearSubdomainsRealId in nn_prcs
size_t r_sub;
//! see ie_ghost follow sector explanation
comb<dim> cmb;
};
......
......@@ -378,6 +378,7 @@ protected:
Box_sub<dim,T> sb;
sb.bx = b_int.box;
sb.sub = i;
sb.r_sub = r_sub.get(k);
sb.cmb = nn_p_box_pos.get(k);
size_t p_idp = nn_p.ProctoID(p_id);
......@@ -615,6 +616,21 @@ public:
return proc_int_box.get(id).ebx.get(j).id;
}
/*! \brief Get the sub-domain send-id at witch belong the internal ghost box
*
* The internal ghost box is create from the intersection a local sub-domain
* and an extended sub-domain communicated from another processor. This function
* return the id of the sub-domain in the receiving list
*
* \param id adjacent processor list id (the id go from 0 to getNNProcessor())
* \param j box (each near processor can produce more than one internal ghost box)
* \return sub-domain at which belong the internal ghost box
*
*/
inline size_t getProcessorIGhostSSub(size_t id, size_t j) const
{
return proc_int_box.get(id).ibx.get(j).r_sub;
}
/*! \brief Get the local sub-domain at witch belong the internal ghost box
*
......
......@@ -495,6 +495,8 @@ class DistGraph_CSR
}
}
recv_g.fvtxdist = fvtxdist;
// Swap temporary graph with the main one
swap(recv_g);
......@@ -648,7 +650,7 @@ class DistGraph_CSR
}
// Exchange informations through processors
vcl.sendrecvMultipleMessagesNBX(prc.size(), &size.get(0), &prc.get(0), &ptr.get(0), gr_receive, &packs, NONE);
vcl.sendrecvMultipleMessagesNBX(prc.size(), (size_t *)size.getPointer(), (size_t *)prc.getPointer(), (void **)ptr.getPointer(), gr_receive, &packs, NONE);
for (size_t i = 0; i < vcl.getProcessingUnits(); i++)
{
......@@ -802,7 +804,7 @@ class DistGraph_CSR
fillSendRecvStructs<size_t>(on_info, prc, size, ptr);
// Send on_info
vcl.sendrecvMultipleMessagesNBX(prc.size(), &size.get(0), &prc.get(0), &ptr.get(0), on_receive, &on_vs, NONE);
vcl.sendrecvMultipleMessagesNBX(prc.size(), (size_t *)size.getPointer(), (size_t *)prc.getPointer(), (void **)ptr.getPointer(), on_receive, &on_vs, NONE);
// Insert in the on_toup map the received couples
for (size_t i = 0; i < vcl.getProcessingUnits(); i++)
......@@ -870,7 +872,7 @@ class DistGraph_CSR
fillSendRecvStructs<size_t>(vni, prc, size, ptr);
// Send and receive requests
vcl.sendrecvMultipleMessagesNBX(prc.size(), &size.get(0), &prc.get(0), &ptr.get(0), on_receive, &req_rmi, NONE);
vcl.sendrecvMultipleMessagesNBX(prc.size(), (size_t *)size.getPointer(), (size_t *)prc.getPointer(), (void **)ptr.getPointer(), on_receive, &req_rmi, NONE);
// Re-mapping info map
openfpm::vector<openfpm::vector<size_t>> rmi(vcl.getProcessingUnits());
......@@ -891,7 +893,7 @@ class DistGraph_CSR
fillSendRecvStructs<size_t>(resp_rmi, prc, size, ptr);
// Send responses
vcl.sendrecvMultipleMessagesNBX(prc.size(), &size.get(0), &prc.get(0), &ptr.get(0), on_receive, &rmi, NONE);
vcl.sendrecvMultipleMessagesNBX(prc.size(), (size_t *)size.getPointer(), (size_t *)prc.getPointer(), (void **)ptr.getPointer(), on_receive, &rmi, NONE);
// Add received info into re-mapping info map
for (size_t i = 0; i < rmi.size(); ++i)
......@@ -2197,7 +2199,7 @@ public:
fillSendRecvStructs<size_t>(vr_queue, prc, size, ptr);
// Send/receive requests for info about needed vertices
vcl.sendrecvMultipleMessagesNBX(prc.size(), &size.get(0), &prc.get(0), &ptr.get(0), on_receive, &resp, NONE);
vcl.sendrecvMultipleMessagesNBX(prc.size(), (size_t *)size.getPointer(), (size_t *)prc.getPointer(), (void **)ptr.getPointer(), on_receive, &resp, NONE);
// Prepare responses with the containing processors of requested vertices
for (size_t i = 0; i < resp.size(); ++i)
......@@ -2224,7 +2226,7 @@ public:
resp.resize(vcl.getProcessingUnits());
// Send/receive responses with the containing processors of requested vertices
vcl.sendrecvMultipleMessagesNBX(prc.size(), &size.get(0), &prc.get(0), &ptr.get(0), on_receive, &resp, NONE);
vcl.sendrecvMultipleMessagesNBX(prc.size(), (size_t *)size.getPointer(), (size_t *)prc.getPointer(), (void **)ptr.getPointer(), on_receive, &resp, NONE);
// Clear requests array
reqs.clear();
......@@ -2259,7 +2261,7 @@ public:
resp.resize(vcl.getProcessingUnits());
// Send/receive vertices requests
vcl.sendrecvMultipleMessagesNBX(prc.size(), &size.get(0), &prc.get(0), &ptr.get(0), on_receive, &resp, NONE);
vcl.sendrecvMultipleMessagesNBX(prc.size(), (size_t *)size.getPointer(), (size_t *)prc.getPointer(), (void **)ptr.getPointer(), on_receive, &resp, NONE);
for (size_t i = 0; i < resp.size(); ++i)
{
......
......@@ -415,7 +415,7 @@ BOOST_AUTO_TEST_CASE( dist_map_graph_use_free_add)
gd.sync();
if(vcl.getProcessUnitID() == 0)
BOOST_REQUIRE_EQUAL(gd.getVertexId(5), 5ul);
BOOST_REQUIRE_EQUAL(gd.getVertexId(4), 5ul);
gd.deleteGhosts();
......
......@@ -25,7 +25,9 @@ template<unsigned int dim>
struct Box_fix
{
Box<dim,size_t> bx;
comb<dim> cmb;
size_t g_id;
size_t r_sub;
};
#define GRID_SUB_UNIT_FACTOR 64
......@@ -87,6 +89,8 @@ class grid_dist_id
//! It is unique across all the near processor
std::unordered_map<size_t,size_t> g_id_to_external_ghost_box;
std::unordered_map<size_t,size_t> g_id_to_external_ghost_box_fix;
//! It map a global ghost id (g_id) to the internal ghost box information
//! (is unique for processor), it is not unique across all the near processor
openfpm::vector<std::unordered_map<size_t,size_t>> g_id_to_internal_ghost_box;
......@@ -131,6 +135,36 @@ class grid_dist_id
return g->recv_mem_gg.get(lc_id).getPointer();
}
/*! \brief flip box just convert and internal ghost box into an external ghost box
*
*
*/
Box<dim,long int> flip_box(const Box<dim,long int> & box, const comb<dim> & cmb)
{
Box<dim,long int> flp;
for (size_t i = 0 ; i < dim; i++)
{
if (cmb[i] == 0)
{
flp.setLow(i,box.getLow(i));
flp.setHigh(i,box.getHigh(i));
}
else if (cmb[i] == 1)
{
flp.setLow(i,box.getLow(i) + ginfo.size(i));
flp.setHigh(i,box.getHigh(i) + ginfo.size(i));
}
else if (cmb[i] == -1)
{
flp.setLow(i,box.getLow(i) - ginfo.size(i));
flp.setHigh(i,box.getHigh(i) - ginfo.size(i));
}
}
return flp;
}
/*! \brief Create per-processor internal ghost boxes list in grid units and g_id_to_external_ghost_box
*
*/
......@@ -169,6 +203,7 @@ class grid_dist_id
bid_t.g_id = dec.getProcessorIGhostId(i,j);
bid_t.sub = dec.getProcessorIGhostSub(i,j);
bid_t.cmb = dec.getProcessorIGhostPos(i,j);
bid_t.r_sub = dec.getProcessorIGhostSSub(i,j);
pib.bid.add(bid_t);
g_id_to_internal_ghost_box.get(i)[bid_t.g_id] = pib.bid.size()-1;
......@@ -233,6 +268,97 @@ class grid_dist_id
}
init_e_g_box = true;
// Communicate the ig_box calculated to the other processor
comb<dim> zero;
zero.zero();
// Here we collect all the calculated internal ghost box in the sector different from 0 that this processor has
openfpm::vector<size_t> prc;
openfpm::vector<size_t> prc_recv;
openfpm::vector<size_t> sz_recv;
openfpm::vector<openfpm::vector<Box_fix<dim>>> box_int_send(dec.getNNProcessors());
openfpm::vector<openfpm::vector<Box_fix<dim>>> box_int_recv;
for(size_t i = 0 ; i < dec.getNNProcessors() ; i++)
{
for (size_t j = 0 ; j < ig_box.get(i).bid.size() ; j++)
{
box_int_send.get(i).add();
box_int_send.get(i).last().bx = ig_box.get(i).bid.get(j).box;
box_int_send.get(i).last().g_id = ig_box.get(i).bid.get(j).g_id;
box_int_send.get(i).last().r_sub = ig_box.get(i).bid.get(j).r_sub;
box_int_send.get(i).last().cmb = ig_box.get(i).bid.get(j).cmb;
}
prc.add(dec.IDtoProc(i));
}
v_cl.SSendRecv(box_int_send,box_int_recv,prc,prc_recv,sz_recv);
eg_box_tmp.resize(dec.getNNProcessors());
for (size_t i = 0 ; i < eg_box_tmp.size() ; i++)
eg_box_tmp.get(i).prc = dec.IDtoProc(i);
for (size_t i = 0 ; i < box_int_recv.size() ; i++)
{
size_t p_id = dec.ProctoID(prc_recv.get(i));
auto&& pib = eg_box_tmp.get(p_id);
pib.prc = prc_recv.get(i);
// For each received internal ghost box
for (size_t j = 0 ; j < box_int_recv.get(i).size() ; j++)
{
size_t send_list_id = box_int_recv.get(i).get(j).r_sub;
// Get the list of the sent sub-domains
// and recover the id of the sub-domain from
// the sent list
const openfpm::vector<size_t> & s_sub = dec.getSentSubdomains(p_id);
size_t sub_id = s_sub.get(send_list_id);
e_box_id bid_t;
bid_t.sub = sub_id;
bid_t.cmb = box_int_recv.get(i).get(j).cmb;
bid_t.cmb.sign_flip();
::Box<dim,long int> ib = flip_box(box_int_recv.get(i).get(j).bx,box_int_recv.get(i).get(j).cmb);
bid_t.g_e_box = ib;
bid_t.g_id = box_int_recv.get(i).get(j).g_id;
// Translate in local coordinate
Box<dim,long int> tb = ib;
tb -= gdb_ext.get(sub_id).origin;
bid_t.l_e_box = tb;
pib.bid.add(bid_t);
g_id_to_external_ghost_box_fix[bid_t.g_id] = pib.bid.size()-1;
size_t l_id = 0;
// convert the global id into local id
auto key = g_id_to_external_ghost_box.find(bid_t.g_id);
if (key != g_id_to_external_ghost_box.end()) // FOUND
l_id = key->second;
Box<dim,long int> box_le = eg_box.get(p_id).bid.get(l_id).l_e_box;
Box<dim,long int> box_ge = eg_box.get(p_id).bid.get(l_id).g_e_box;
if (box_le != bid_t.l_e_box || box_ge != bid_t.g_e_box ||
eg_box.get(p_id).bid.get(l_id).cmb != bid_t.cmb ||
eg_box.get(p_id).bid.get(l_id).sub != bid_t.sub)
{
int debug = 0;
debug++;
}
}
}
// switch
g_id_to_external_ghost_box = g_id_to_external_ghost_box_fix;
eg_box.clear();
eg_box = eg_box_tmp;
}
bool init_local_i_g_box = false;
......@@ -268,6 +394,7 @@ class grid_dist_id
pib.bid.last().box = ib;
pib.bid.last().sub = dec.getLocalIGhostSub(i,j);
pib.bid.last().k = dec.getLocalIGhostE(i,j);
pib.bid.last().cmb = dec.getLocalIGhostPos(i,j);
}
}
......@@ -286,7 +413,7 @@ class grid_dist_id
if (init_local_e_g_box == true) return;
// Get the number of near processors
// Get the number of sub-domain
for (size_t i = 0 ; i < dec.getNSubDomain() ; i++)
{
loc_eg_box.add();
......@@ -307,11 +434,41 @@ class grid_dist_id
pib.bid.last().box = ib;
pib.bid.last().sub = dec.getLocalEGhostSub(i,j);
pib.bid.last().cmb = dec.getLocalEGhostPos(i,j);
pib.bid.last().cmb.sign_flip();
}
}
init_local_e_g_box = true;
loc_eg_box_tmp.resize(dec.getNSubDomain());
// Get the number of sub-domain
for (size_t i = 0 ; i < dec.getNSubDomain() ; i++)
{
for (size_t j = 0 ; j < loc_ig_box.get(i).bid.size() ; j++)
{
size_t k = loc_ig_box.get(i).bid.get(j).sub;
auto & pib = loc_eg_box_tmp.get(k);
size_t s = loc_ig_box.get(i).bid.get(j).k;
pib.bid.resize(dec.getLocalNEGhost(k));
pib.bid.get(s).box = flip_box(loc_ig_box.get(i).bid.get(j).box,loc_ig_box.get(i).bid.get(j).cmb);
pib.bid.get(s).sub = dec.getLocalEGhostSub(k,s);
pib.bid.get(s).cmb = loc_ig_box.get(i).bid.get(j).cmb;
pib.bid.get(s).cmb.sign_flip();
if (pib.bid.get(s).box != loc_eg_box.get(k).bid.get(s).box &&
pib.bid.get(s).cmb != loc_eg_box.get(k).bid.get(s).cmb &&
pib.bid.get(s).sub != loc_eg_box.get(k).bid.get(s).sub)
{
std::cout << "CAZZO" << std::endl;
int debug = 0;
debug++;
}
}
}
loc_eg_box = loc_eg_box_tmp;
}
/*! \brief Sync the local ghost part
......@@ -334,7 +491,7 @@ class grid_dist_id
// sub domain connected with external box
size_t sub_id_dst = loc_ig_box.get(i).bid.get(j).sub;
// local external ghost box connected
// local internal ghost box connected
size_t k = loc_ig_box.get(i).bid.get(j).k;
Box<dim,size_t> bx_dst = loc_eg_box.get(sub_id_dst).bid.get(k).box;
......@@ -1114,9 +1271,14 @@ public:
//! id
size_t g_id;
//! r_sub id of the sub-domain in the sent list
size_t r_sub;
//! Sector where it live the linked external ghost box
comb<dim> cmb;
//! sub
size_t sub;
};
......@@ -1133,8 +1295,11 @@ public:
//! sub-domain id
size_t sub;
//! external box
//! external ghost box linked to this internal ghost box
size_t k;
//! combination
comb<dim> cmb;
};
/*! \brief It store the information about the external ghost box
......@@ -1238,12 +1403,16 @@ public:
//! External ghost boxes in grid units
openfpm::vector<ep_box_grid> eg_box;
openfpm::vector<ep_box_grid> eg_box_tmp;
//! Local internal ghost boxes in grid units
openfpm::vector<i_lbox_grid> loc_ig_box;
//! Local external ghost boxes in grid units
openfpm::vector<e_lbox_grid> loc_eg_box;
openfpm::vector<e_lbox_grid> loc_eg_box_tmp;
/*! \brief It synchronize the ghost parts
*
* \tparam prp... Properties to synchronize
......
......@@ -1482,6 +1482,7 @@ void Test3D_periodic(const Box<3,float> & domain, long int k)
}
#include "grid_dist_id_unit_test_ext_dom.hpp"
#include "grid_dist_id_unit_test_unb_ghost.hpp"
BOOST_AUTO_TEST_CASE( grid_dist_id_iterator_test_use)
{
......@@ -1614,6 +1615,28 @@ BOOST_AUTO_TEST_CASE( grid_dist_id_periodic )
Test3D_periodic(domain3,k);
}
BOOST_AUTO_TEST_CASE( grid_dist_id_unbound_ghost )
{
// Domain
Box<3,float> domain3({0.0,0.0,0.0},{1.0,1.0,1.0});
long int k = 32*32*32*create_vcluster().getProcessingUnits();
k = std::pow(k, 1/3.);
Test3D_unb_ghost(domain3,k);
}
BOOST_AUTO_TEST_CASE( grid_dist_id_unbound_ghost_periodic )
{
// Domain
Box<3,float> domain3({0.0,0.0,0.0},{1.0,1.0,1.0});
long int k = 32*32*32*create_vcluster().getProcessingUnits();
k = std::pow(k, 1/3.);
Test3D_unb_ghost_periodic(domain3,k);
}
BOOST_AUTO_TEST_SUITE_END()
#endif
/*
* grid_dist_id_unit_test_unb_ghost.hpp
*
* Created on: Jul 11, 2016
* Author: i-bird
*/
#ifndef SRC_GRID_GRID_DIST_ID_UNIT_TEST_UNB_GHOST_HPP_
#define SRC_GRID_GRID_DIST_ID_UNIT_TEST_UNB_GHOST_HPP_
void Test3D_unb_ghost(const Box<3,float> & domain, long int k)
{
long int big_step = k / 30;
big_step = (big_step == 0)?1:big_step;
long int small_step = 21;
if (create_vcluster().getProcessingUnits() > 48)
return;
print_test( "Testing 3D grid unbound ghost k<=",k);
// 3D test
for ( ; k >= 2 ; k-= (k > 2*big_step)?big_step:small_step )
{
BOOST_TEST_CHECKPOINT( "Testing 3D grid k=" << k );
// grid size
size_t sz[3];
sz[0] = k;
sz[1] = k;
sz[2] = k;
// Ghost
Ghost<3,float> g(0.49);
// Distributed grid with id decomposition
grid_dist_id<3, float, scalar<float>, CartDecomposition<3,float>> g_dist(sz,domain,g);
g_dist.getDecomposition().write("no_bound_decomposition");
// check the consistency of the decomposition
bool val = g_dist.getDecomposition().check_consistency();
BOOST_REQUIRE_EQUAL(val,true);
// Grid sm
grid_sm<3,void> info(sz);
// get the domain iterator
size_t count = 0;
auto dom = g_dist.getDomainIterator();
while (dom.isNext())
{
auto key = dom.get();
auto key_g = g_dist.getGKey(key);
g_dist.template get<0>(key) = info.LinId(key_g);
// Count the point
count++;
++dom;
}
// Get the virtual cluster machine
Vcluster & vcl = g_dist.getVC();
// reduce
vcl.sum(count);
vcl.execute();
// Check
BOOST_REQUIRE_EQUAL(count,(size_t)k*k*k);