Skip to content
Snippets Groups Projects
Commit 852762d0 authored by Pietro Incardona's avatar Pietro Incardona
Browse files

Start refactorization

parent ed109c36
No related branches found
No related tags found
No related merge requests found
......@@ -30,6 +30,7 @@
#include "ie_ghost.hpp"
#include "nn_processor.hpp"
#include "GraphMLWriter.hpp"
#include "ParMetisDistribution.hpp"
#define BASICDEC_ERROR 2000lu
......@@ -82,7 +83,7 @@
*
*/
template<unsigned int dim, typename T, typename Memory = HeapMemory,
template<unsigned int dim, typename T, typename Distribution = ParMetisDistribution<dim,T> ,typename Memory = HeapMemory,
template<unsigned int, typename > class Domain = Box>
class BasicDecomposition: public ie_loc_ghost<dim, T>, public nn_prcs<dim, T>, public ie_ghost<dim, T>
{
......@@ -131,54 +132,6 @@ private:
//! Cell-list that store the geometrical information of the local internal ghost boxes
CellList<dim, T, FAST> lgeo_cell;
//! Convert the graph to parmetis format
Parmetis<Graph_CSR<nm_v, nm_e>> parmetis_graph;
//! Processor sub-sub-domain graph
Graph_CSR<nm_v, nm_e> sub_g;
//! Global sub-sub-domain graph
Graph_CSR<nm_v, nm_e> gp;
//! Init vtxdist needed for Parmetis
openfpm::vector<idx_t> vtxdist;
//! partitions
openfpm::vector<openfpm::vector<idx_t>> partitions;
//! Init data structure to keep trace of new vertices distribution in processors (needed to update main graph)
openfpm::vector<openfpm::vector<size_t>> v_per_proc;
//! Number of moved vertices in all iterations
size_t g_moved = 0;
//! Max number of moved vertices in all iterations
size_t m_moved = 0;
//! Wn for SAR heuristic
float w_n = 0;
//! Computation cost for SAR heuristic
float c_c = 5;
//! Number of time-steps since the previous DLB
size_t n_ts = 1;
//! Idle time accumulated so far, needed for SAR heuristic
openfpm::vector<float> i_times;
// Vector to collect all timings
openfpm::vector<long> times;
static void * message_receive(size_t msg_i, size_t total_msg, size_t total_p, size_t i, size_t ri, void * ptr)
{
openfpm::vector < openfpm::vector < idx_t >> *v = static_cast<openfpm::vector<openfpm::vector<idx_t>> *>(ptr);
v->get(i).resize(msg_i / sizeof(idx_t));
return &(v->get(i).get(0));
}
/*! \brief Constructor, it decompose and distribute the sub-domains across the processors
*
* \param v_cl Virtual cluster, used internally for communications
......@@ -382,6 +335,45 @@ private:
*/
}
/* ! \brief Calculate communication and migration costs
*
* \param gh_s ghost thickness
* \param ts how many timesteps have passed since last calculation, used to approximate the cost
*/
void computeCommunicationAndMigrationCosts(float gh_s, size_t ts)
{
size_t p_id = v_cl.getProcessUnitID();
float migration;
SpaceBox<dim, T> cellBox = cd.getCellBox();
float b_s = (cellBox.getHigh(0) - cellBox.getLow(0));
// compute the gh_area for 2 dim case
float gh_v = (gh_s * b_s);
// multiply for sub-sub-domain side for each domain
for(int i = 2 ; i < dim; i++)
gh_v *= b_s;
size_t norm = (size_t) (1.0 / gh_v);
migration = pow(b_s, dim);
size_t prev = 0;
for (size_t i = 0; i < gp.getNVertex(); i++)
{
gp.vertex(i).template get<nm_v::migration>() = norm * migration * gp.vertex(i).template get<nm_v::computation>();
for (size_t s = 0; s < gp.getNChilds(i); s++)
{
gp.edge(prev + s).template get<nm_e::communication>() = 1 * gp.vertex(i).template get<nm_v::computation>() * ts;
}
prev += gp.getNChilds(i);
}
}
// Save the ghost boundaries
Ghost<dim, T> ghost;
......@@ -420,195 +412,7 @@ private:
}
}
/* \brief fill the graph of the processor with the first decomposition (linear)
* Put vertices into processor graph (different for each processor)
*
* \param sub_g sub graph to fill
* \param gp mai graph, source for the vertices
* \param vtxdist array with the distribution of vertices through processors
* \param proc_id rank of the processor
* \param Np total number of processors
*/
void fillSubGraph()
{
int Np = v_cl.getProcessingUnits();
int p_id = v_cl.getProcessUnitID();
for (size_t j = vtxdist.get(p_id), local_j = 0; j < vtxdist.get(p_id + 1); j++, local_j++)
{
// Add vertex
nm_v pv = gp.vertexById(j);
sub_g.addVertex(pv);
// Add edges of vertex
for (size_t s = 0; s < gp.getNChilds(j); s++)
{
nm_e pe = gp.edge(j + s);
sub_g.template addEdge<NoCheck>(local_j, gp.getChild(j, s), pe);
}
}
// Just for output purpose
if (p_id == 0)
{
for (int i = 0; i < Np; i++)
{
for (size_t j = vtxdist.get(i); j < vtxdist.get(i + 1); j++)
{
gp.vertexById(j).template get<nm_v::proc_id>() = i;
}
}
}
}
/* \brief Update main graph ad subgraph with the partition in partitions param and renumber graphs
*
* \param partitions array storing all the partitions
* \param gp main graph
* \param sub_g sub graph
* \param v_per_proc array needed to recontruct the main graph
* \param vtxdist array with the distribution of vertices through processors
* \param statuses array of statsu objects
* \param proc_id current processors rank
* \param Np total umber of processors
*/
void updateGraphs()
{
int Np = v_cl.getProcessingUnits();
int p_id = v_cl.getProcessUnitID();
//stats info
size_t moved = 0;
// reset sub graph and local subgroph index
int local_j = 0;
sub_g.clear();
// Init n_vtxdist to gather informations about the new decomposition
openfpm::vector < idx_t > n_vtxdist(Np + 1);
for (int i = 0; i <= Np; i++)
n_vtxdist.get(i) = 0;
// Update main graph with other partitions made by Parmetis in other processors and the local partition
for (int i = 0; i < Np; i++)
{
int ndata = partitions.get(i).size();
// Update the main graph with received informations
for (int k = 0, l = vtxdist.get(i); k < ndata && l < vtxdist.get(i + 1); k++, l++)
{
// Create new n_vtxdist (1) (just count processors vertices)
n_vtxdist.get(partitions.get(i).get(k) + 1)++;
if
( gp.vertexById(l).template get<nm_v::proc_id>() != partitions.get(i).get(k))
moved++;
// Update proc id in the vertex
gp.vertexById(l).template get<nm_v::proc_id>() = partitions.get(i).get(k);
gp.vertex(l).template get<nm_v::global_id>() = l;
// Add vertex to temporary structure of distribution (needed to update main graph)
v_per_proc.get(partitions.get(i).get(k)).add(gp.getVertexOldId(l));
// Add vertices belonging to this processor in sub graph
if (partitions.get(i).get(k) == p_id)
{
nm_v pv = gp.vertexById(l);
sub_g.addVertex(pv);
// Add edges of vertex
for (size_t s = 0; s < gp.getNChildsByVertexId(l); s++)
{
nm_e pe = gp.edge(l + s);
sub_g.template addEdge<NoCheck>(local_j, gp.getChildByVertexId(l, s), pe);
}
local_j++;
}
}
}
// Create new n_vtxdist (2) (write boundaries)
for (int i = 2; i <= Np; i++)
{
n_vtxdist.get(i) += n_vtxdist.get(i - 1);
}
// Copy the new decomposition in the main vtxdist
for (int i = 0; i <= Np; i++)
{
vtxdist.get(i) = n_vtxdist.get(i);
}
// Renumbering subgraph
sub_g.reset_map_ids();
for (size_t j = vtxdist.get(p_id), i = 0; j < vtxdist.get(p_id + 1); j++, i++)
{
sub_g.set_map_ids(j, sub_g.vertex(i).template get<nm_v::global_id>());
sub_g.vertex(i).template get<nm_v::id>() = j;
}
// Renumbering main graph
for (size_t p = 0; p < Np; p++)
{
for (size_t j = vtxdist.get(p), i = 0; j < vtxdist.get(p + 1); j++, i++)
{
gp.set_map_ids(j, v_per_proc.get(p).get(i));
gp.vertex(v_per_proc.get(p).get(i)).template get<nm_v::id>() = j;
}
}
g_moved += moved;
if (moved > m_moved)
m_moved = moved;
}
/* ! \brief Calculate communication and migration costs
*
* \param gh_s ghost thickness
* \param ts how many timesteps have passed since last calculation, used to approximate the cost
*/
void computeCommunicationAndMigrationCosts(float gh_s, size_t ts)
{
size_t p_id = v_cl.getProcessUnitID();
float migration;
SpaceBox<dim, T> cellBox = cd.getCellBox();
float b_s = (cellBox.getHigh(0) - cellBox.getLow(0));
// compute the gh_area for 2 dim case
float gh_v = (gh_s * b_s);
// multiply for sub-sub-domain side for each domain
for(int i = 2 ; i < dim; i++)
gh_v *= b_s;
size_t norm = (size_t) (1.0 / gh_v);
migration = pow(b_s, dim);
size_t prev = 0;
for (size_t i = 0; i < gp.getNVertex(); i++)
{
gp.vertex(i).template get<nm_v::migration>() = norm * migration * gp.vertex(i).template get<nm_v::computation>();
for (size_t s = 0; s < gp.getNChilds(i); s++)
{
gp.edge(prev + s).template get<nm_e::communication>() = 1 * gp.vertex(i).template get<nm_v::computation>() * ts;
}
prev += gp.getNChilds(i);
}
}
// Heap memory receiver
HeapMemory hp_recv;
......@@ -922,99 +726,6 @@ public:
CreateDecomposition(v_cl);
}
/* ! \brief Refine current decomposition
*
* It makes a refinement of the current decomposition using Parmetis function RefineKWay
* After that it also does the remapping of the graph
*
*/
void refine()
{
size_t Np = v_cl.getProcessingUnits();
size_t p_id = v_cl.getProcessUnitID();
//0.01 and 1 must be given TODO
computeCommunicationAndMigrationCosts(0.01, n_ts);
// Reset parmetis graph and reconstruct it
parmetis_graph.reset(gp, sub_g);
// Refine
parmetis_graph.refine<nm_v::proc_id>(vtxdist, sub_g);
// Get result partition for this processor
idx_t * partition = parmetis_graph.getPartition();
partitions.get(p_id).resize(sub_g.getNVertex());
std::copy(partition, partition + sub_g.getNVertex(), &partitions.get(p_id).get(0));
// Reset data structure to keep trace of new vertices distribution in processors (needed to update main graph)
for (int i = 0; i < Np; ++i)
{
v_per_proc.get(i).clear();
}
openfpm::vector < size_t > prc;
openfpm::vector < size_t > sz;
openfpm::vector<void *> ptr;
for (size_t i = 0; i < Np; i++)
{
if (i != v_cl.getProcessUnitID())
{
partitions.get(i).clear();
prc.add(i);
sz.add(sub_g.getNVertex() * sizeof(idx_t));
ptr.add(partitions.get(p_id).getPointer());
}
}
// Exchange informations through processors
v_cl.sendrecvMultipleMessagesNBX(prc.size(), &sz.get(0), &prc.get(0), &ptr.get(0), message_receive, &partitions,
NONE);
// Update graphs with the new distributions
updateGraphs();
}
/*! Function that gather times informations and decides if a rebalance is needed
* it uses the SAR heuristic
*
*/
bool balanceNeeded(long t){
float t_max = 0, t_avg = 0;
// Exchange time informations through processors
v_cl.allGather(t, times);
v_cl.execute();
t_max = *(std::max_element(std::begin(times), std::end(times)));
//if(v_cl.getProcessUnitID())
//std::cout << "tmax: " << t_max << "\n";
t_avg = std::accumulate(times.begin(), times.end(), 0) / v_cl.getProcessingUnits();
//std::cout << "tavg: " << t_avg << "\n";
// add idle time to vector
i_times.add(t_max - t_avg);
// Compute Wn
double it_sum = *(std::max_element(std::begin(i_times), std::end(i_times)));
float nw_n = (it_sum + c_c) / n_ts;
if(nw_n > w_n){
i_times.clear();
n_ts = 1;
w_n = nw_n;
return true;
}else{
++n_ts;
w_n = nw_n;
return false;
}
}
/*! \brief Get the number of local sub-domains
*
* \return the number of sub-domains
......@@ -1157,61 +868,6 @@ public:
return true;
}
/* ! \brief function that return the position of the vertex in the space
*
* \param id vertex id
* \param pos vector tha t will contain x, y, z
*
*/
void getVertexPosition(size_t id, openfpm::vector<real_t> &pos)
{
pos.get(0) = gp.vertex(id).template get<nm_v::x>();
pos.get(1) = gp.vertex(id).template get<nm_v::y>();
if (dim == 3)
pos.get(2) = gp.vertex(id).template get<nm_v::z>();
}
/* ! \brief function that set the weight of the vertex
*
* \param id vertex id
*
* \return vector with x, y, z
*
*/
void setVertexWeight(size_t id, size_t weight)
{
gp.vertex(id).template get<nm_v::computation>() = weight;
}
/* ! \brief return number of moved vertices in all iterations so far
*
* \param id vertex id
*
* \return vector with x, y, z
*
*/
size_t getTotalMovedV()
{
return g_moved;
}
/* ! \brief return number of moved vertices in all iterations so far
*
* \param id vertex id
*
* \return vector with x, y, z
*
*/
size_t getMaxMovedV()
{
return m_moved;
}
void debugPrint()
{
std::cout << "Subdomains\n";
......
......@@ -2,17 +2,17 @@
LINKLIBS = $(METIS_LIB) $(PTHREAD_LIBS) $(OPT_LIBS) $(BOOST_LDFLAGS) $(BOOST_IOSTREAMS_LIB) $(CUDA_LIBS)
noinst_PROGRAMS = cart_dec metis_dec dom_box
cart_dec_SOURCES = CartDecomposition_gen_vtk.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster.cpp ../openfpm_data/src/Memleak_check.cpp
cart_dec_SOURCES = CartDecomposition_gen_vtk.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster.cpp ../openfpm_devices/src/Memleak_check.cpp
cart_dec_CXXFLAGS = $(CUDA_CFLAGS) $(INCLUDES_PATH) $(METIS_INCLUDE) $(BOOST_CPPFLAGS) -I../src -Wno-unused-function
cart_dec_CFLAGS = $(CUDA_CFLAGS)
cart_dec_LDADD = $(LINKLIBS) -lmetis
metis_dec_SOURCES = Metis_gen_vtk.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster.cpp ../openfpm_data/src/Memleak_check.cpp
metis_dec_SOURCES = Metis_gen_vtk.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster.cpp ../openfpm_devices/src/Memleak_check.cpp
metis_dec_CXXFLAGS = $(CUDA_CFLAGS) $(INCLUDES_PATH) $(METIS_INCLUDE) $(BOOST_CPPFLAGS) -I../src -Wno-unused-function
metis_dec_CFLAGS = $(CUDA_CFLAGS)
metis_dec_LDADD = $(LINKLIBS) -lmetis
dom_box_SOURCES = domain_gen_vtk.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster.cpp ../openfpm_data/src/Memleak_check.cpp
dom_box_SOURCES = domain_gen_vtk.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster.cpp ../openfpm_devices/src/Memleak_check.cpp
dom_box_CXXFLAGS = $(CUDA_CFLAGS) $(INCLUDES_PATH) $(METIS_INCLUDE) $(BOOST_CPPFLAGS) -I../src -Wno-unused-function
dom_box_CFLAGS = $(CUDA_CFLAGS)
dom_box_LDADD = $(LINKLIBS)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment