Newer
Older
/*
* CartDecomposition.hpp
*
* Created on: Aug 15, 2014
* Author: Pietro Incardona
*/
#ifndef CARTDECOMPOSITION_HPP
#define CARTDECOMPOSITION_HPP
#include "config.h"
#include "Decomposition.hpp"
#include <vector>
#include "global_const.hpp"
#include <initializer_list>
#include "SubdomainGraphNodes.hpp"
#include "metis_util.hpp"
#include "dec_optimizer.hpp"
#include "Space/Shape/Box.hpp"
#include <unordered_map>
#include "NN/CellList/CellList.hpp"
#include "common.hpp"
#include "ie_loc_ghost.hpp"
#include "ie_ghost.hpp"
#include "nn_processor.hpp"
/**
* \brief This class decompose a space into subspaces
*
* \tparam dim is the dimensionality of the physical domain we are going to decompose.
* \tparam T type of the space we decompose, Real, Integer, Complex ...
* \tparam layout to use
* \tparam Memory Memory factory used to allocate memory
* \tparam Domain Structure that contain the information of your physical domain
*
* Given an N-dimensional space, this class decompose the space into a Cartesian grid of small
* sub-sub-domain. At each sub-sub-domain is assigned an id that identify which processor is
* going to take care of that part of space (in general the space assigned to a processor is
* simply connected), a second step merge several sub-sub-domain with same id into bigger region
* sub-domain with the id. Each sub-domain has an extended space called ghost part
*
* Assuming that VCluster.getProcessUnitID(), equivalent to the MPI processor rank, return the processor local
* processor id, we define
*
* * local sub-domain: all the sub-domain with id == local processor
* * external ghost box: (or ghost box) are the boxes that compose the ghost space of the processor, or the
* boxes produced expanding every local sub-domain by the ghost extension and intersecting with the sub-domain
* of the other processors
* * Near processors are the processors adjacent to the local processor, where with adjacent we mean all the processor
* that has a non-zero intersection with the ghost part of the local processor, or all the processors that
* produce non-zero external boxes with the local processor, or all the processor that should communicate
* in case of ghost data synchronization
* * internal ghost box: is the part of ghost of the near processor that intersect the space of the
* processor, or the boxes produced expanding the sub-domain of the near processors with the local sub-domain
* * Near processor sub-domain: is a sub-domain that live in the a near (or contiguous) processor
* * Near processor list: the list of all the near processor of the local processor (each processor has a list
* of the near processor)
* * Local ghosts interal or external are all the ghosts that does not involve inter-processor communications
*
* \see calculateGhostBoxes() for a visualization of internal and external ghost boxes
template<unsigned int dim, typename T, template<typename> class device_l=openfpm::device_cpu, typename Memory=HeapMemory, template<unsigned int, typename> class Domain=Box>
class CartDecomposition : public ie_loc_ghost<dim,T>, public nn_prcs<dim,T> , public ie_ghost<dim,T>
//! Type of the domain we are going to decompose
typedef T domain_type;
//! It simplify to access the SpaceBox element
typedef SpaceBox<dim,T> Box;
private:
//! This is the key type to access data_s, for example in the case of vector
typedef typename openfpm::vector<SpaceBox<dim,T>,device_l<SpaceBox<dim,T>>,Memory,openfpm::vector_grow_policy_default,openfpm::vect_isel<SpaceBox<dim,T>>::value >::access_key acc_key;
openfpm::vector<SpaceBox<dim,T>> sub_domains;
//! for each sub-domain, contain the list of the neighborhood processors
openfpm::vector<openfpm::vector<long unsigned int> > box_nn_processor;
//! Structure that contain for each sub-sub-domain box the processor id
//! Structure that store the cartesian grid information
grid_sm<dim,void> gr;
//! Structure that decompose your structure into cell without creating them
//! useful to convert positions to CellId or sub-domain id in this case
CellDecomposer_sm<dim,T> cd;
//! rectangular domain to decompose
Domain<dim,T> domain;
//! Box Spacing
T spacing[dim];
//! Runtime virtual cluster machine
Vcluster & v_cl;
//! Cell-list that store the geometrical information of the local internal ghost boxes
CellList<dim,T,FAST> lgeo_cell;
/*! \brief Create internally the decomposition
*
* \param v_cl Virtual cluster, used internally to handle or pipeline communication
*
*/
void CreateDecomposition(Vcluster & v_cl)
{
// Calculate the total number of box and and the spacing
// on each direction
// Get the box containing the domain
SpaceBox<dim,T> bs = domain.getBox();
for (unsigned int i = 0; i < dim ; i++)
{
// Calculate the spacing
spacing[i] = (bs.getHigh(i) - bs.getLow(i)) / gr.size(i);
}
// Here we use METIS
// Create a cartesian grid graph
CartesianGraphFactory<dim,Graph_CSR<nm_part_v,nm_part_e>> g_factory_part;
Graph_CSR<nm_part_v,nm_part_e> gp = g_factory_part.template construct<NO_EDGE,T,dim-1>(gr.getSize(),domain);
// Get the number of processing units
size_t Np = v_cl.getProcessingUnits();
// Get the processor id
long int p_id = v_cl.getProcessUnitID();
// Convert the graph to metis
Metis<Graph_CSR<nm_part_v,nm_part_e>> met(gp,Np);
// fill the structure that store the processor id for each sub-domain
// Optimize the decomposition creating bigger spaces
// And reducing Ghost over-stress
dec_optimizer<dim,Graph_CSR<nm_part_v,nm_part_e>> d_o(gp,gr.getSize());
// set of Boxes produced by the decomposition optimizer
openfpm::vector<::Box<dim,size_t>> loc_box;
d_o.template optimize<nm_part_v::sub_id,nm_part_v::id>(gp,p_id,loc_box,box_nn_processor);
// Initialize ss_box and bbox
if (loc_box.size() >= 0)
{
SpaceBox<dim,size_t> sub_dc = loc_box.get(0);
SpaceBox<dim,T> sub_d(sub_dc);
sub_d.mul(spacing);
sub_d.expand(spacing);
// Fixing sub-domains to cover all the domain
// Fixing sub_d
// if (loc_box) is a the boundary we have to ensure that the box span the full
// domain (avoiding rounding off error)
for (size_t i = 0 ; i < dim ; i++)
{
if (sub_dc.getHigh(i) == cd.getGrid().size(i) - 1)
{
sub_d.setHigh(i,domain.getHigh(i));
}
}
// add the sub-domain
sub_domains.add(sub_d);
ss_box = sub_d;
for (size_t s = 1 ; s < loc_box.size() ; s++)
SpaceBox<dim,size_t> sub_dc = loc_box.get(s);
SpaceBox<dim,T> sub_d(sub_dc);
// re-scale and add spacing (the end is the starting point of the next domain + spacing)
sub_d.mul(spacing);
sub_d.expand(spacing);
// Fixing sub-domains to cover all the domain
// Fixing sub_d
// if (loc_box) is a the boundary we have to ensure that the box span the full
// domain (avoiding rounding off error)
for (size_t i = 0 ; i < dim ; i++)
{
if (sub_dc.getHigh(i) == cd.getGrid().size(i) - 1)
{
sub_d.setHigh(i,domain.getHigh(i));
}
}
// add the sub-domain
sub_domains.add(sub_d);
// Calculate the bound box
bbox.enclose(sub_d);
// Create the smallest box contained in all sub-domain
ss_box.contained(sub_d);
nn_prcs<dim,T>::create(box_nn_processor, sub_domains);
// fine_s structure contain the processor id for each sub-sub-domain
// with sub-sub-domain we mean the sub-domain decomposition before
// running dec_optimizer (before merging sub-domains)
auto it = gp.getVertexIterator();
while (it.isNext())
{
size_t key = it.get();
// fill with the fine decomposition
fine_s.get(key) = gp.template vertex_p<nm_part_v::id>(key);
++it;
}
// Get the smallest sub-division on each direction
::Box<dim,T> unit = getSmallestSubdivision();
// Get the processor bounding Box
::Box<dim,T> bound = getProcessorBounds();
size_t div[dim];
for (size_t i = 0 ; i < dim ; i++)
div[i] = (size_t)((bound.getHigh(i) - bound.getLow(i)) / unit.getHigh(i));
// Create shift
Point<dim,T> orig;
// p1 point of the Processor bound box is the shift
for (size_t i = 0 ; i < dim ; i++)
orig.get(i) = bound.getLow(i);
// Initialize the geo_cell structure
ie_ghost<dim,T>::Initialize_geo_cell(domain,div,orig);
// Save the ghost boundaries
Ghost<dim,T> ghost;
/*! \brief Create the subspaces that decompose your domain
*
* Create the subspaces that decompose your domain
*
*/
void CreateSubspaces()
{
// Create a grid where each point is a space
grid_sm<dim,void> g(div);
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
// create a grid_key_dx iterator
grid_key_dx_iterator<dim> gk_it(g);
// Divide the space into subspaces
while (gk_it.isNext())
{
//! iterate through all subspaces
grid_key_dx<dim> key = gk_it.get();
//! Create a new subspace
SpaceBox<dim,T> tmp;
//! fill with the Margin of the box
for (int i = 0 ; i < dim ; i++)
{
tmp.setHigh(i,(key.get(i)+1)*spacing[i]);
tmp.setLow(i,key.get(i)*spacing[i]);
}
//! add the space box
sub_domains.add(tmp);
// add the iterator
++gk_it;
}
}
// Heap memory receiver
HeapMemory hp_recv;
// vector v_proc
openfpm::vector<size_t> v_proc;
// Receive counter
size_t recv_cnt;
public:
/*! \brief Cartesian decomposition constructor
*
* \param v_cl Virtual cluster, used internally to handle or pipeline communication
*
*/
CartDecomposition(Vcluster & v_cl)
:nn_prcs<dim,T>(v_cl),v_cl(v_cl)
{
// Reset the box to zero
bbox.zero();
}
//! Cartesian decomposition destructor
~CartDecomposition()
{}
/*! \brief class to select the returned id by ghost_processorID
*
*/
class box_id
{
public:
/*! \brief Return the box id
*
* \param p structure containing the id informations
* \param b_id box_id
*
* \return box id
*
*/
inline static size_t id(p_box<dim,T> & p, size_t b_id)
{
return b_id;
}
};
/*! \brief class to select the returned id by ghost_processorID
*
*/
class processor_id
{
public:
/*! \brief Return the processor id
*
* \param p structure containing the id informations
* \param b_id box_id
*
* \return processor id
*
*/
inline static size_t id(p_box<dim,T> & p, size_t b_id)
{
return p.proc;
}
};
/*! \brief class to select the returned id by ghost_processorID
*
*/
class lc_processor_id
{
public:
/*! \brief Return the near processor id
*
* \param p structure containing the id informations
* \param b_id box_id
*
* \return local processor id
*
*/
inline static size_t id(p_box<dim,T> & p, size_t b_id)
{
return p.lc_proc;
}
};
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
*
* Example: Processor 10 calculate
* B8_0 B9_0 B9_1 and B5_0
*
*
+----------------------------------------------------+
| |
| Processor 8 |
| Sub-domain 0 +-----------------------------------+
| | |
| | |
++--------------+---+---------------------------+----+ Processor 9 |
| | | B8_0 | | Subdomain 0 |
| +------------------------------------+ |
| | | | | |
| | | XXXXXXXXXXXXX XX |B9_0| |
| | B | X Processor 10 X | | |
| Processor 5 | 5 | X Sub-domain 0 X | | |
| Subdomain 0 | _ | X X +----------------------------------------+
| | 0 | XXXXXXXXXXXXXXXX | | |
| | | | | |
| | | | | Processor 9 |
| | | |B9_1| Subdomain 1 |
| | | | | |
| | | | | |
| | | | | |
+--------------+---+---------------------------+----+ |
| |
+-----------------------------------+
and also
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
+----------------------------------------------------+
| |
| Processor 8 |
| Sub-domain 0 +-----------------------------------+
| +---------------------------------------------+ |
| | G8_0 | | |
++--------------+------------------------------------+ | Processor 9 |
| | | | | Subdomain 0 |
| | | |G9_0| |
| | | | | |
| | | XXXXXXXXXXXXX XX | | |
| | | X Processor 10 X | | |
| Processor|5 | X Sub-domain 0 X | | |
| Subdomain|0 | X X +-----------------------------------+
| | | XXXXXXXXXXXXXXXX | | |
| | G | | | |
| | 5 | | | Processor 9 |
| | | | | | Subdomain 1 |
| | 0 | |G9_1| |
| | | | | |
| | | | | |
+--------------+------------------------------------+ | |
| | | |
+----------------------------------------+----+------------------------------+
*
*
*
* \param ghost margins for each dimensions (p1 negative part) (p2 positive part)
*
^ p2[1]
|
|
+----+----+
| |
| |
p1[0]<-----+ +----> p2[0]
| |
| |
+----+----+
|
v p1[1]
*
*
*/
{
#ifdef DEBUG
// the ghost margins are assumed to be smaller
// than one sub-domain
for (size_t i = 0 ; i < dim ; i++)
{
if (ghost.template getLow(i) >= domain.template getHigh(i) / gr.size(i) || ghost.template getHigh(i) >= domain.template getHigh(i) / gr.size(i))
std::cerr << "Error " << __FILE__ << ":" << __LINE__ << " : Ghost are bigger than one domain" << "\n";
}
}
#endif
// Intersect all the local sub-domains with the sub-domains of the contiguous processors
// create the internal structures that store ghost information
ie_ghost<dim,T>::create_box_nn_processor_ext(v_cl,ghost,sub_domains,box_nn_processor,*this);
ie_ghost<dim,T>::create_box_nn_processor_int(v_cl,ghost,sub_domains,box_nn_processor,*this);
// ebox must come after ibox (in this case)
ie_loc_ghost<dim,T>::create_loc_ghost_ibox(ghost,sub_domains);
ie_loc_ghost<dim,T>::create_loc_ghost_ebox(ghost,sub_domains);
// get the smallest sub-domain dimension on each direction
for (size_t i = 0 ; i < dim ; i++)
{
if (ghost.template getLow(i) >= ss_box.getHigh(i) || ghost.template getHigh(i) >= domain.template getHigh(i) / gr.size(i))
{
std::cerr << "Error " << __FILE__ << ":" << __LINE__ << " : Ghost are bigger than one domain" << "\n";
}
}
/*! \brief processorID return in which processor the particle should go
*
* \return processorID
*
*/
template<typename Mem> size_t inline processorID(encapc<1, Point<dim,T>, Mem> p)
// Smallest subdivision on each direction
::Box<dim,T> ss_box;
/*! \brief Get the smallest subdivision of the domain on each direction
*
* \return a box p1 is set to zero
*
*/
const ::Box<dim,T> & getSmallestSubdivision()
{
return ss_box;
}
/*! \brief processorID return in which processor the particle should go
*
* \return processorID
*
*/
size_t inline processorID(const T (&p)[dim]) const
/*! \brief Set the parameter of the decomposition
*
* \param div_ storing into how many domain to decompose on each dimension
* \param domain_ domain to decompose
*
*/
void setParameters(const size_t (& div_)[dim], Domain<dim,T> domain_, Ghost<dim,T> ghost = Ghost<dim,T>())
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
//! Create the decomposition
CreateDecomposition(v_cl);
}
/*! \brief Get the number of local local hyper-cubes or sub-domains
*
* \return the number of sub-domains
*
*/
size_t getNLocalHyperCube()
{
return sub_domains.size();
}
/*! \brief Get the number of one set of hyper-cube enclosing one particular
* subspace, the hyper-cube enclose your space, even if one box is enough
* can be more that one to increase occupancy
*
* In case of Cartesian decomposition it just return 1, each subspace
* has one hyper-cube, and occupancy 1
*
* \param id of the subspace
* \return the number of hyper-cube enclosing your space
*
*/
size_t getNHyperCube(size_t id)
{
return 1;
}
/*! \brief Get the hyper-cube margins id_c has to be 0
*
* Get the hyper-cube margins id_c has to be 0, each subspace
* has one hyper-cube
*
* \param id of the subspace
* \param id_c
* \return The specified hyper-cube space
*
*/
SpaceBox<dim,T> & getHyperCubeMargins(size_t id, size_t id_c)
{
#ifdef DEBUG
// Check if this subspace exist
{
std::cerr << "Error CartDecomposition: id > N_tot";
}
else if (id_c > 0)
{
// Each subspace is an hyper-cube so return error if id_c > 0
std::cerr << "Error CartDecomposition: id_c > 0";
}
#endif
return sub_domains.get<Object>(id);
}
/*! \brief Get the total number of sub-domain for the local processor
* \param i (each local processor can have more than one sub-domain)
* \return the sub-domain
*
*/
SpaceBox<dim,T> getLocalHyperCube(size_t lc)
{
// Create a space box
SpaceBox<dim,T> sp;
// fill the space box
for (size_t k = 0 ; k < dim ; k++)
{
// create the SpaceBox Low and High
sp.setLow(k,sub_domains.template get<Box::p1>(lc)[k]);
sp.setHigh(k,sub_domains.template get<Box::p2>(lc)[k]);
}
return sp;
}
/*! \brief Get the local sub-domain with ghost extension
*
* \param i (each local processor can have more than one sub-domain)
* \return the sub-domain
*
*/
SpaceBox<dim,T> getSubDomainWithGhost(size_t lc)
{
// Create a space box
SpaceBox<dim,T> sp = sub_domains.get(lc);
// enlarge with ghost
sp.enlarge(ghost);
return sp;
}
/*! \brief Return the structure that store the physical domain
*
* Return the structure that store the physical domain
*
* \return The physical domain
*
*/
Domain<dim,T> & getDomain()
{
return domain;
}
/*! \brief Check if the particle is local
*
* \param p object position
template<typename Mem> bool isLocal(const encapc<1, Point<dim,T>, Mem> p) const
return processorID<Mem>(p) == v_cl.getProcessUnitID();
bool isLocal(const T (&pos)[dim]) const
return processorID(pos) == v_cl.getProcessUnitID();
}
::Box<dim,T> bbox;
/*! \brief Return the bounding box containing the processor box + smallest subdomain spacing
*
* \return The bounding box
*
*/
::Box<dim,T> & getProcessorBounds()
{
return bbox;
}
/*! \brief if the point fall into the ghost of some near processor it return the processors id's in which
* it fall
*
* \param p Point
* \return iterator of the processors id's
*
*/
/* inline auto labelPoint(Point<dim,T> & p) -> decltype(geo_cell.getIterator(geo_cell.getCell(p)))
{
return geo_cell.getIterator(geo_cell.getCell(p));
////////////// Functions to get decomposition information ///////////////
/*! \brief Write the decomposition as VTK file
*
* The function generate several files
*
* 1) subdomains_X.vtk domain for the local processor (X) as union of sub-domain
* 2) subdomains_adjacent_X.vtk sub-domains adjacent to the local processor (X)
* 3) internal_ghost_X.vtk Internal ghost boxes for the local processor (X)
* 4) external_ghost_X.vtk External ghost boxes for the local processor (X)
* 5) local_internal_ghost_X.vtk internal local ghost boxes for the local processor (X)
* 6) local_external_ghost_X.vtk external local ghost boxes for the local processor (X)
*
* \param output directory where to write the files
*
*/
bool write(std::string output) const
{
//! subdomains_X.vtk domain for the local processor (X) as union of sub-domain
VTKWriter<openfpm::vector<::SpaceBox<dim,T>>,VECTOR_BOX> vtk_box1;
vtk_box1.add(sub_domains);
vtk_box1.write(output + std::string("subdomains_") + std::to_string(v_cl.getProcessUnitID()) + std::string(".vtk"));
nn_prcs<dim,T>::write(output);
ie_ghost<dim,T>::write(output,v_cl.getProcessUnitID());
ie_loc_ghost<dim,T>::write(output,v_cl.getProcessUnitID());
/*! \brief function to check the consistency of the information of the decomposition
*
* \return false if is inconsistent
*
*/
bool check_consistency()
{
if (ie_loc_ghost<dim,T>::check_consistency(getNLocalHyperCube()) == false)
return false;
std::cout << "Subdomains\n";
for (size_t p = 0 ; p < sub_domains.size() ; p++)
{
std::cout << ::SpaceBox<dim,T>(sub_domains.get(p)).toString() << "\n";
}
std::cout << "External ghost box\n";
for (size_t p = 0 ; p < nn_prcs<dim,T>::getNNProcessors() ; p++)
{
for (size_t i = 0 ; i < ie_ghost<dim,T>::getProcessorNEGhost(p) ; i++)
std::cout << ie_ghost<dim,T>::getProcessorEGhostBox(p,i).toString() << " prc=" << nn_prcs<dim,T>::IDtoProc(p) << " id=" << ie_ghost<dim,T>::getProcessorEGhostId(p,i) << "\n";
std::cout << "Internal ghost box\n";
for (size_t p = 0 ; p < nn_prcs<dim,T>::getNNProcessors() ; p++)
{
for (size_t i = 0 ; i < ie_ghost<dim,T>::getProcessorNIGhost(p) ; i++)
std::cout << ie_ghost<dim,T>::getProcessorIGhostBox(p,i).toString() << " prc=" << nn_prcs<dim,T>::IDtoProc(p) << " id=" << ie_ghost<dim,T>::getProcessorIGhostId(p,i) << "\n";