Commit 852762d0 authored by Pietro Incardona's avatar Pietro Incardona
Browse files

Start refactorization

parent ed109c36
......@@ -30,6 +30,7 @@
#include "ie_ghost.hpp"
#include "nn_processor.hpp"
#include "GraphMLWriter.hpp"
#include "ParMetisDistribution.hpp"
#define BASICDEC_ERROR 2000lu
......@@ -82,7 +83,7 @@
*
*/
template<unsigned int dim, typename T, typename Memory = HeapMemory,
template<unsigned int dim, typename T, typename Distribution = ParMetisDistribution<dim,T> ,typename Memory = HeapMemory,
template<unsigned int, typename > class Domain = Box>
class BasicDecomposition: public ie_loc_ghost<dim, T>, public nn_prcs<dim, T>, public ie_ghost<dim, T>
{
......@@ -131,54 +132,6 @@ private:
//! Cell-list that store the geometrical information of the local internal ghost boxes
CellList<dim, T, FAST> lgeo_cell;
//! Convert the graph to parmetis format
Parmetis<Graph_CSR<nm_v, nm_e>> parmetis_graph;
//! Processor sub-sub-domain graph
Graph_CSR<nm_v, nm_e> sub_g;
//! Global sub-sub-domain graph
Graph_CSR<nm_v, nm_e> gp;
//! Init vtxdist needed for Parmetis
openfpm::vector<idx_t> vtxdist;
//! partitions
openfpm::vector<openfpm::vector<idx_t>> partitions;
//! Init data structure to keep trace of new vertices distribution in processors (needed to update main graph)
openfpm::vector<openfpm::vector<size_t>> v_per_proc;
//! Number of moved vertices in all iterations
size_t g_moved = 0;
//! Max number of moved vertices in all iterations
size_t m_moved = 0;
//! Wn for SAR heuristic
float w_n = 0;
//! Computation cost for SAR heuristic
float c_c = 5;
//! Number of time-steps since the previous DLB
size_t n_ts = 1;
//! Idle time accumulated so far, needed for SAR heuristic
openfpm::vector<float> i_times;
// Vector to collect all timings
openfpm::vector<long> times;
static void * message_receive(size_t msg_i, size_t total_msg, size_t total_p, size_t i, size_t ri, void * ptr)
{
openfpm::vector < openfpm::vector < idx_t >> *v = static_cast<openfpm::vector<openfpm::vector<idx_t>> *>(ptr);
v->get(i).resize(msg_i / sizeof(idx_t));
return &(v->get(i).get(0));
}
/*! \brief Constructor, it decompose and distribute the sub-domains across the processors
*
* \param v_cl Virtual cluster, used internally for communications
......@@ -382,6 +335,45 @@ private:
*/
}
/* ! \brief Calculate communication and migration costs
*
* \param gh_s ghost thickness
* \param ts how many timesteps have passed since last calculation, used to approximate the cost
*/
void computeCommunicationAndMigrationCosts(float gh_s, size_t ts)
{
size_t p_id = v_cl.getProcessUnitID();
float migration;
SpaceBox<dim, T> cellBox = cd.getCellBox();
float b_s = (cellBox.getHigh(0) - cellBox.getLow(0));
// compute the gh_area for 2 dim case
float gh_v = (gh_s * b_s);
// multiply for sub-sub-domain side for each domain
for(int i = 2 ; i < dim; i++)
gh_v *= b_s;
size_t norm = (size_t) (1.0 / gh_v);
migration = pow(b_s, dim);
size_t prev = 0;
for (size_t i = 0; i < gp.getNVertex(); i++)
{
gp.vertex(i).template get<nm_v::migration>() = norm * migration * gp.vertex(i).template get<nm_v::computation>();
for (size_t s = 0; s < gp.getNChilds(i); s++)
{
gp.edge(prev + s).template get<nm_e::communication>() = 1 * gp.vertex(i).template get<nm_v::computation>() * ts;
}
prev += gp.getNChilds(i);
}
}
// Save the ghost boundaries
Ghost<dim, T> ghost;
......@@ -420,195 +412,7 @@ private:
}
}
/* \brief fill the graph of the processor with the first decomposition (linear)
* Put vertices into processor graph (different for each processor)
*
* \param sub_g sub graph to fill
* \param gp mai graph, source for the vertices
* \param vtxdist array with the distribution of vertices through processors
* \param proc_id rank of the processor
* \param Np total number of processors
*/
void fillSubGraph()
{
int Np = v_cl.getProcessingUnits();
int p_id = v_cl.getProcessUnitID();
for (size_t j = vtxdist.get(p_id), local_j = 0; j < vtxdist.get(p_id + 1); j++, local_j++)
{
// Add vertex
nm_v pv = gp.vertexById(j);
sub_g.addVertex(pv);
// Add edges of vertex
for (size_t s = 0; s < gp.getNChilds(j); s++)
{
nm_e pe = gp.edge(j + s);
sub_g.template addEdge<NoCheck>(local_j, gp.getChild(j, s), pe);
}
}
// Just for output purpose
if (p_id == 0)
{
for (int i = 0; i < Np; i++)
{
for (size_t j = vtxdist.get(i); j < vtxdist.get(i + 1); j++)
{
gp.vertexById(j).template get<nm_v::proc_id>() = i;
}
}
}
}
/* \brief Update main graph ad subgraph with the partition in partitions param and renumber graphs
*
* \param partitions array storing all the partitions
* \param gp main graph
* \param sub_g sub graph
* \param v_per_proc array needed to recontruct the main graph
* \param vtxdist array with the distribution of vertices through processors
* \param statuses array of statsu objects
* \param proc_id current processors rank
* \param Np total umber of processors
*/
void updateGraphs()
{
int Np = v_cl.getProcessingUnits();
int p_id = v_cl.getProcessUnitID();
//stats info
size_t moved = 0;
// reset sub graph and local subgroph index
int local_j = 0;
sub_g.clear();
// Init n_vtxdist to gather informations about the new decomposition
openfpm::vector < idx_t > n_vtxdist(Np + 1);
for (int i = 0; i <= Np; i++)
n_vtxdist.get(i) = 0;
// Update main graph with other partitions made by Parmetis in other processors and the local partition
for (int i = 0; i < Np; i++)
{
int ndata = partitions.get(i).size();
// Update the main graph with received informations
for (int k = 0, l = vtxdist.get(i); k < ndata && l < vtxdist.get(i + 1); k++, l++)
{
// Create new n_vtxdist (1) (just count processors vertices)
n_vtxdist.get(partitions.get(i).get(k) + 1)++;
if
( gp.vertexById(l).template get<nm_v::proc_id>() != partitions.get(i).get(k))
moved++;
// Update proc id in the vertex
gp.vertexById(l).template get<nm_v::proc_id>() = partitions.get(i).get(k);
gp.vertex(l).template get<nm_v::global_id>() = l;
// Add vertex to temporary structure of distribution (needed to update main graph)
v_per_proc.get(partitions.get(i).get(k)).add(gp.getVertexOldId(l));
// Add vertices belonging to this processor in sub graph
if (partitions.get(i).get(k) == p_id)
{
nm_v pv = gp.vertexById(l);
sub_g.addVertex(pv);
// Add edges of vertex
for (size_t s = 0; s < gp.getNChildsByVertexId(l); s++)
{
nm_e pe = gp.edge(l + s);
sub_g.template addEdge<NoCheck>(local_j, gp.getChildByVertexId(l, s), pe);
}
local_j++;
}
}
}
// Create new n_vtxdist (2) (write boundaries)
for (int i = 2; i <= Np; i++)
{
n_vtxdist.get(i) += n_vtxdist.get(i - 1);
}
// Copy the new decomposition in the main vtxdist
for (int i = 0; i <= Np; i++)
{
vtxdist.get(i) = n_vtxdist.get(i);
}
// Renumbering subgraph
sub_g.reset_map_ids();
for (size_t j = vtxdist.get(p_id), i = 0; j < vtxdist.get(p_id + 1); j++, i++)
{
sub_g.set_map_ids(j, sub_g.vertex(i).template get<nm_v::global_id>());
sub_g.vertex(i).template get<nm_v::id>() = j;
}
// Renumbering main graph
for (size_t p = 0; p < Np; p++)
{
for (size_t j = vtxdist.get(p), i = 0; j < vtxdist.get(p + 1); j++, i++)
{
gp.set_map_ids(j, v_per_proc.get(p).get(i));
gp.vertex(v_per_proc.get(p).get(i)).template get<nm_v::id>() = j;
}
}
g_moved += moved;
if (moved > m_moved)
m_moved = moved;
}
/* ! \brief Calculate communication and migration costs
*
* \param gh_s ghost thickness
* \param ts how many timesteps have passed since last calculation, used to approximate the cost
*/
void computeCommunicationAndMigrationCosts(float gh_s, size_t ts)
{
size_t p_id = v_cl.getProcessUnitID();
float migration;
SpaceBox<dim, T> cellBox = cd.getCellBox();
float b_s = (cellBox.getHigh(0) - cellBox.getLow(0));
// compute the gh_area for 2 dim case
float gh_v = (gh_s * b_s);
// multiply for sub-sub-domain side for each domain
for(int i = 2 ; i < dim; i++)
gh_v *= b_s;
size_t norm = (size_t) (1.0 / gh_v);
migration = pow(b_s, dim);
size_t prev = 0;
for (size_t i = 0; i < gp.getNVertex(); i++)
{
gp.vertex(i).template get<nm_v::migration>() = norm * migration * gp.vertex(i).template get<nm_v::computation>();
for (size_t s = 0; s < gp.getNChilds(i); s++)
{
gp.edge(prev + s).template get<nm_e::communication>() = 1 * gp.vertex(i).template get<nm_v::computation>() * ts;
}
prev += gp.getNChilds(i);
}
}
// Heap memory receiver
HeapMemory hp_recv;
......@@ -922,99 +726,6 @@ public:
CreateDecomposition(v_cl);
}
/* ! \brief Refine current decomposition
*
* It makes a refinement of the current decomposition using Parmetis function RefineKWay
* After that it also does the remapping of the graph
*
*/
void refine()
{
size_t Np = v_cl.getProcessingUnits();
size_t p_id = v_cl.getProcessUnitID();
//0.01 and 1 must be given TODO
computeCommunicationAndMigrationCosts(0.01, n_ts);
// Reset parmetis graph and reconstruct it
parmetis_graph.reset(gp, sub_g);
// Refine
parmetis_graph.refine<nm_v::proc_id>(vtxdist, sub_g);
// Get result partition for this processor
idx_t * partition = parmetis_graph.getPartition();
partitions.get(p_id).resize(sub_g.getNVertex());
std::copy(partition, partition + sub_g.getNVertex(), &partitions.get(p_id).get(0));
// Reset data structure to keep trace of new vertices distribution in processors (needed to update main graph)
for (int i = 0; i < Np; ++i)
{
v_per_proc.get(i).clear();
}
openfpm::vector < size_t > prc;
openfpm::vector < size_t > sz;
openfpm::vector<void *> ptr;
for (size_t i = 0; i < Np; i++)
{
if (i != v_cl.getProcessUnitID())
{
partitions.get(i).clear();
prc.add(i);
sz.add(sub_g.getNVertex() * sizeof(idx_t));
ptr.add(partitions.get(p_id).getPointer());
}
}
// Exchange informations through processors
v_cl.sendrecvMultipleMessagesNBX(prc.size(), &sz.get(0), &prc.get(0), &ptr.get(0), message_receive, &partitions,
NONE);
// Update graphs with the new distributions
updateGraphs();
}
/*! Function that gather times informations and decides if a rebalance is needed
* it uses the SAR heuristic
*
*/
bool balanceNeeded(long t){
float t_max = 0, t_avg = 0;
// Exchange time informations through processors
v_cl.allGather(t, times);
v_cl.execute();
t_max = *(std::max_element(std::begin(times), std::end(times)));
//if(v_cl.getProcessUnitID())
//std::cout << "tmax: " << t_max << "\n";
t_avg = std::accumulate(times.begin(), times.end(), 0) / v_cl.getProcessingUnits();
//std::cout << "tavg: " << t_avg << "\n";
// add idle time to vector
i_times.add(t_max - t_avg);
// Compute Wn
double it_sum = *(std::max_element(std::begin(i_times), std::end(i_times)));
float nw_n = (it_sum + c_c) / n_ts;
if(nw_n > w_n){
i_times.clear();
n_ts = 1;
w_n = nw_n;
return true;
}else{
++n_ts;
w_n = nw_n;
return false;
}
}
/*! \brief Get the number of local sub-domains
*
* \return the number of sub-domains
......@@ -1157,61 +868,6 @@ public:
return true;
}
/* ! \brief function that return the position of the vertex in the space
*
* \param id vertex id
* \param pos vector tha t will contain x, y, z
*
*/
void getVertexPosition(size_t id, openfpm::vector<real_t> &pos)
{
pos.get(0) = gp.vertex(id).template get<nm_v::x>();
pos.get(1) = gp.vertex(id).template get<nm_v::y>();
if (dim == 3)
pos.get(2) = gp.vertex(id).template get<nm_v::z>();
}
/* ! \brief function that set the weight of the vertex
*
* \param id vertex id
*
* \return vector with x, y, z
*
*/
void setVertexWeight(size_t id, size_t weight)
{
gp.vertex(id).template get<nm_v::computation>() = weight;
}
/* ! \brief return number of moved vertices in all iterations so far
*
* \param id vertex id
*
* \return vector with x, y, z
*
*/
size_t getTotalMovedV()
{
return g_moved;
}
/* ! \brief return number of moved vertices in all iterations so far
*
* \param id vertex id
*
* \return vector with x, y, z
*
*/
size_t getMaxMovedV()
{
return m_moved;
}
void debugPrint()
{
std::cout << "Subdomains\n";
......
......@@ -2,17 +2,17 @@
LINKLIBS = $(METIS_LIB) $(PTHREAD_LIBS) $(OPT_LIBS) $(BOOST_LDFLAGS) $(BOOST_IOSTREAMS_LIB) $(CUDA_LIBS)
noinst_PROGRAMS = cart_dec metis_dec dom_box
cart_dec_SOURCES = CartDecomposition_gen_vtk.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster.cpp ../openfpm_data/src/Memleak_check.cpp
cart_dec_SOURCES = CartDecomposition_gen_vtk.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster.cpp ../openfpm_devices/src/Memleak_check.cpp
cart_dec_CXXFLAGS = $(CUDA_CFLAGS) $(INCLUDES_PATH) $(METIS_INCLUDE) $(BOOST_CPPFLAGS) -I../src -Wno-unused-function
cart_dec_CFLAGS = $(CUDA_CFLAGS)
cart_dec_LDADD = $(LINKLIBS) -lmetis
metis_dec_SOURCES = Metis_gen_vtk.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster.cpp ../openfpm_data/src/Memleak_check.cpp
metis_dec_SOURCES = Metis_gen_vtk.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster.cpp ../openfpm_devices/src/Memleak_check.cpp
metis_dec_CXXFLAGS = $(CUDA_CFLAGS) $(INCLUDES_PATH) $(METIS_INCLUDE) $(BOOST_CPPFLAGS) -I../src -Wno-unused-function
metis_dec_CFLAGS = $(CUDA_CFLAGS)
metis_dec_LDADD = $(LINKLIBS) -lmetis
dom_box_SOURCES = domain_gen_vtk.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster.cpp ../openfpm_data/src/Memleak_check.cpp
dom_box_SOURCES = domain_gen_vtk.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster.cpp ../openfpm_devices/src/Memleak_check.cpp
dom_box_CXXFLAGS = $(CUDA_CFLAGS) $(INCLUDES_PATH) $(METIS_INCLUDE) $(BOOST_CPPFLAGS) -I../src -Wno-unused-function
dom_box_CFLAGS = $(CUDA_CFLAGS)
dom_box_LDADD = $(LINKLIBS)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment