Commit fd9a74fd authored by incardon's avatar incardon

Dynamic load Balancing fixed

parent 9034c2b1
......@@ -22,6 +22,9 @@
*
*/
//#define SE_CLASS1
//#define STOP_ON_ERROR
//! \cond [inclusion] \endcond
#include "Vector/vector_dist.hpp"
#include <math.h>
......@@ -1096,7 +1099,7 @@ int main(int argc, char* argv[])
//! \cond [load balancing] \endcond
// vd.addComputationCosts(md);
vd.addComputationCosts(md);
// vd.getDecomposition().getDistribution().write("AFTER_DECOMPOSE1");
// vd.getDecomposition().rebalance(1);
......@@ -1132,6 +1135,9 @@ int main(int argc, char* argv[])
//! \cond [main loop] \endcond
double time_mean = 0.0;
double time_min = 1000.0;
double time_max = 0.0;
size_t write = 0;
size_t it = 0;
size_t it_reb = 0;
......@@ -1147,16 +1153,31 @@ int main(int argc, char* argv[])
{
vd.map();
it_reb = 0;
vd.getDecomposition().write("DLB_BEFORE_");
// it_reb = 0;
ModelCustom md;
vd.addComputationCosts(md);
vd.getDecomposition().rebalance(1);
vd.getDecomposition().decompose();
if (v_cl.getProcessUnitID() == 0)
std::cout << "REBALANCED " << std::endl;
}
vd.map();
if (it_reb == 200)
{
vd.getDecomposition().write("DLB_AFTER_");
it_reb = 0;
vd.addComputationCosts(md);
std::cout << "PROCESSOR LOAD: " << vd.getDecomposition().getDistribution().getProcessorLoad() << " MEAN: " << time_mean / 200 << " MIN: " << time_min << " MAX: " << time_max << std::endl;
time_mean = 0.0;
time_min = 1000.0;
time_max = 0.0;
}
vd.ghost_get<type,rho,Pressure,velocity>();
// Calculate pressure from the density
......@@ -1173,6 +1194,9 @@ int main(int argc, char* argv[])
// Get the maximum viscosity term across processors
v_cl.max(max_visc);
v_cl.execute();
time_mean += it_time.getwct();
time_min = std::min(time_min,it_time.getwct());
time_max = std::max(time_max,it_time.getwct());
// Calculate delta t integration
double dt = calc_deltaT(vd,max_visc);
......
......@@ -34,6 +34,9 @@
template<unsigned int dim, typename T>
class ParMetisDistribution
{
//! Is distributed
bool is_distributed = false;
//! Vcluster
Vcluster & v_cl;
......@@ -207,6 +210,100 @@ class ParMetisDistribution
return &(v->get(i).get(0));
}
/*! \brief It update the full decomposition
*
*
*/
void postDecomposition()
{
//! Get the processor id
size_t p_id = v_cl.getProcessUnitID();
//! Get the number of processing units
size_t Np = v_cl.getProcessingUnits();
// Number of local vertex
size_t nl_vertex = vtxdist.get(p_id+1).id - vtxdist.get(p_id).id;
//! Get result partition for this processors
idx_t * partition = parmetis_graph.getPartition();
//! Prepare vector of arrays to contain all partitions
partitions.get(p_id).resize(nl_vertex);
std::copy(partition, partition + nl_vertex, &partitions.get(p_id).get(0));
// Reset data structure to keep trace of new vertices distribution in processors (needed to update main graph)
for (size_t i = 0; i < Np; ++i)
{
v_per_proc.get(i).clear();
}
// Communicate the local distribution to the other processors
// to reconstruct individually the global graph
openfpm::vector<size_t> prc;
openfpm::vector<size_t> sz;
openfpm::vector<void *> ptr;
for (size_t i = 0; i < Np; i++)
{
if (i != v_cl.getProcessUnitID())
{
partitions.get(i).clear();
prc.add(i);
sz.add(nl_vertex * sizeof(idx_t));
ptr.add(partitions.get(p_id).getPointer());
}
}
if (prc.size() == 0)
v_cl.sendrecvMultipleMessagesNBX(0, NULL, NULL, NULL, message_receive, &partitions,NONE);
else
v_cl.sendrecvMultipleMessagesNBX(prc.size(), &sz.get(0), &prc.get(0), &ptr.get(0), message_receive, &partitions,NONE);
// Update graphs with the received data
updateGraphs();
/////////////////////////////////////////
// Get result partition for this processor
/* idx_t * partition = parmetis_graph.getPartition();
//! Prepare vector of arrays to contain all partitions
partitions.get(p_id).resize(nl_vertex.id);
std::copy(partition, partition + nl_vertex.id, &partitions.get(p_id).get(0));
// Reset data structure to keep trace of new vertices distribution in processors (needed to update main graph)
for (size_t i = 0; i < Np; ++i)
{
v_per_proc.get(i).clear();
}
openfpm::vector<size_t> prc;
openfpm::vector<size_t> sz;
openfpm::vector<void *> ptr;
for (size_t i = 0; i < Np; i++)
{
if (i != v_cl.getProcessUnitID())
{
partitions.get(i).clear();
prc.add(i);
sz.add(nl_vertex.id * sizeof(idx_t));
ptr.add(partitions.get(p_id).getPointer());
}
}
// Exchange informations through processors
v_cl.sendrecvMultipleMessagesNBX(prc.size(), &sz.get(0), &prc.get(0), &ptr.get(0), message_receive, &partitions,
NONE);
// Update graphs with the new distributions
updateGraphs();*/
}
public:
/*! Constructor for the ParMetis class
......@@ -214,7 +311,7 @@ public:
* \param v_cl Vcluster to use as communication object in this class
*/
ParMetisDistribution(Vcluster & v_cl)
:v_cl(v_cl), parmetis_graph(v_cl, v_cl.getProcessingUnits()), vtxdist(v_cl.getProcessingUnits() + 1), partitions(v_cl.getProcessingUnits()), v_per_proc(v_cl.getProcessingUnits())
:is_distributed(false),v_cl(v_cl), parmetis_graph(v_cl, v_cl.getProcessingUnits()), vtxdist(v_cl.getProcessingUnits() + 1), partitions(v_cl.getProcessingUnits()), v_per_proc(v_cl.getProcessingUnits())
{
}
......@@ -305,51 +402,18 @@ public:
*/
void decompose()
{
//! Get the processor id
size_t p_id = v_cl.getProcessUnitID();
//! Get the number of processing units
size_t Np = v_cl.getProcessingUnits();
// Number of local vertex
size_t nl_vertex = vtxdist.get(p_id+1).id - vtxdist.get(p_id).id;
parmetis_graph.initSubGraph(gp, vtxdist, m2g, verticesGotWeights);
if (is_distributed == false)
parmetis_graph.initSubGraph(gp, vtxdist, m2g, verticesGotWeights);
else
parmetis_graph.reset(gp, vtxdist, m2g, verticesGotWeights);
//! Decompose
parmetis_graph.decompose<nm_v::proc_id>(vtxdist);
//! Get result partition for this processors
idx_t *partition = parmetis_graph.getPartition();
//! Prepare vector of arrays to contain all partitions
partitions.get(p_id).resize(nl_vertex);
std::copy(partition, partition + nl_vertex, &partitions.get(p_id).get(0));
// Communicate the local distribution to the other processors
// to reconstruct individually the global graph
openfpm::vector<size_t> prc;
openfpm::vector<size_t> sz;
openfpm::vector<void *> ptr;
// update after decomposition
postDecomposition();
for (size_t i = 0; i < Np; i++)
{
if (i != v_cl.getProcessUnitID())
{
prc.add(i);
sz.add(nl_vertex * sizeof(idx_t));
ptr.add(partitions.get(p_id).getPointer());
}
}
if (prc.size() == 0)
v_cl.sendrecvMultipleMessagesNBX(0, NULL, NULL, NULL, message_receive, &partitions,NONE);
else
v_cl.sendrecvMultipleMessagesNBX(prc.size(), &sz.get(0), &prc.get(0), &ptr.get(0), message_receive, &partitions,NONE);
// Update graphs with the received data
updateGraphs();
is_distributed = true;
}
/*! \brief Refine current decomposition
......@@ -360,51 +424,13 @@ public:
*/
void refine()
{
size_t Np = v_cl.getProcessingUnits();
size_t p_id = v_cl.getProcessUnitID();
// Number of local vertex
rid nl_vertex = vtxdist.get(p_id+1) - vtxdist.get(p_id);
// Reset parmetis graph and reconstruct it
parmetis_graph.reset(gp, vtxdist, m2g, verticesGotWeights);
// Refine
parmetis_graph.refine<nm_v::proc_id>(vtxdist);
// Get result partition for this processor
idx_t * partition = parmetis_graph.getPartition();
partitions.get(p_id).resize(nl_vertex.id);
std::copy(partition, partition + nl_vertex.id, &partitions.get(p_id).get(0));
// Reset data structure to keep trace of new vertices distribution in processors (needed to update main graph)
for (size_t i = 0; i < Np; ++i)
{
v_per_proc.get(i).clear();
}
openfpm::vector<size_t> prc;
openfpm::vector<size_t> sz;
openfpm::vector<void *> ptr;
for (size_t i = 0; i < Np; i++)
{
if (i != v_cl.getProcessUnitID())
{
partitions.get(i).clear();
prc.add(i);
sz.add(nl_vertex.id * sizeof(idx_t));
ptr.add(partitions.get(p_id).getPointer());
}
}
// Exchange informations through processors
v_cl.sendrecvMultipleMessagesNBX(prc.size(), &sz.get(0), &prc.get(0), &ptr.get(0), message_receive, &partitions,
NONE);
// Update graphs with the new distributions
updateGraphs();
postDecomposition();
}
/*! \brief Compute the unbalance of the processor compared to the optimal balance
......@@ -574,6 +600,7 @@ public:
const ParMetisDistribution<dim,T> & operator=(const ParMetisDistribution<dim,T> & dist)
{
is_distributed = dist.is_distributed;
gr = dist.gr;
domain = dist.domain;
gp = dist.gp;
......@@ -587,6 +614,7 @@ public:
const ParMetisDistribution<dim,T> & operator=(ParMetisDistribution<dim,T> && dist)
{
is_distributed = dist.is_distributed;
v_cl = dist.v_cl;
gr = dist.gr;
domain = dist.domain;
......
......@@ -45,6 +45,44 @@
#define NO_GHOST 0
#define WITH_GHOST 2
//! General function t get a cell-list
template<unsigned int dim, typename St, typename CellL, typename Vector>
struct gcl
{
/*! \brief Get the Cell list based on the type
*
* \param vd Distributed vector
* \param r_cut Cut-off radius
* \param g Ghost
*
* \return the constructed cell-list
*
*/
static inline CellL get(Vector & vd, const St & r_cut, const Ghost<dim,St> & g)
{
return vd.getCellList(r_cut);
}
};
//! General function t get a cell-list
template<unsigned int dim, typename St, typename Vector>
struct gcl<dim,St,CellList_hilb<dim, St, FAST, shift<dim, St> >,Vector>
{
/*! \brief Get the Cell list based on the type
*
* \param vd Distributed vector
* \param r_cut Cut-off radius
* \param g Ghost
*
* \return the constructed cell-list
*
*/
static inline CellList_hilb<dim, St, FAST, shift<dim, St> > get(Vector & vd, const St & r_cut, const Ghost<dim,St> & g)
{
return vd.getCellList_hilb(r_cut,g);
}
};
/*! \brief Distributed vector
*
* This class reppresent a distributed vector, the distribution of the structure
......@@ -129,6 +167,32 @@ private:
}
/*! \brief Check if the cell-list must be reconstructed because domain has changed
* because of load balancing
*
* \param bx Box to reconstruct
*
* \return true if the cell-list must be reconstructed
*
*/
bool check_cl_reconstruct(const Box<dim,St> & bx, St r_cut)
{
// Get ghost and anlarge by 1%
Ghost<dim,St> g = getDecomposition().getGhost();
g.magnify(1.013);
// Division array
size_t div[dim];
// get the processor bounding box
Box<dim, St> pbox = getDecomposition().getProcessorBounds();
// Processor bounding box
cl_param_calculate(pbox, div, r_cut, g);
return !bx.isContained(pbox);
}
public:
//! space type
......@@ -137,6 +201,9 @@ public:
//! dimensions of space
static const unsigned int dims = dim;
//! Self type
typedef vector_dist<dim,St,prop,Decomposition,Memory> self;
/*! \brief Operator= for distributed vector
*
* \param v vector to copy
......@@ -446,9 +513,28 @@ public:
*/
template<typename CellL = CellList<dim, St, FAST, shift<dim, St> > > void updateCellList(CellL & cell_list)
{
populate_cell_list(v_pos,cell_list,g_m,CL_NON_SYMMETRIC);
// This function assume equal spacing in all directions
// but in the worst case we take the maximum
St r_cut = 0;
for (size_t i = 0 ; i < dim ; i++)
r_cut = std::max(r_cut,cell_list.getCellBox().getHigh(i));
cell_list.set_gm(g_m);
// Here we have to check that the Box defined by the Cell-list is the same as the domain box of this
// processor. if it is not like that we have to completely reconstruct from stratch
bool to_reconstruct = check_cl_reconstruct(cell_list.getDomain(),r_cut);
if (to_reconstruct == false)
{
populate_cell_list(v_pos,cell_list,g_m,CL_NON_SYMMETRIC);
cell_list.set_gm(g_m);
}
else
{
CellL cli_tmp = gcl<dim,St,CellL,self>::get(*this,r_cut,getDecomposition().getGhost());
cell_list.swap(cli_tmp);
}
}
/*! \brief Update a cell list using the stored particles
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment