From fd9a74fdbaeca529d149566b474fef32ffdf434b Mon Sep 17 00:00:00 2001 From: Pietro Incardona <incardon@mpi-cbg.de> Date: Wed, 1 Feb 2017 22:52:45 +0100 Subject: [PATCH] Dynamic load Balancing fixed --- example/Vector/7_SPH_dlb/main.cpp | 30 ++- .../Distribution/ParMetisDistribution.hpp | 188 ++++++++++-------- src/Vector/vector_dist.hpp | 90 ++++++++- 3 files changed, 223 insertions(+), 85 deletions(-) diff --git a/example/Vector/7_SPH_dlb/main.cpp b/example/Vector/7_SPH_dlb/main.cpp index c8c204080..83c5a0a06 100644 --- a/example/Vector/7_SPH_dlb/main.cpp +++ b/example/Vector/7_SPH_dlb/main.cpp @@ -22,6 +22,9 @@ * */ +//#define SE_CLASS1 +//#define STOP_ON_ERROR + //! \cond [inclusion] \endcond #include "Vector/vector_dist.hpp" #include <math.h> @@ -1096,7 +1099,7 @@ int main(int argc, char* argv[]) //! \cond [load balancing] \endcond -// vd.addComputationCosts(md); + vd.addComputationCosts(md); // vd.getDecomposition().getDistribution().write("AFTER_DECOMPOSE1"); // vd.getDecomposition().rebalance(1); @@ -1132,6 +1135,9 @@ int main(int argc, char* argv[]) //! \cond [main loop] \endcond + double time_mean = 0.0; + double time_min = 1000.0; + double time_max = 0.0; size_t write = 0; size_t it = 0; size_t it_reb = 0; @@ -1147,16 +1153,31 @@ int main(int argc, char* argv[]) { vd.map(); - it_reb = 0; + vd.getDecomposition().write("DLB_BEFORE_"); +// it_reb = 0; ModelCustom md; vd.addComputationCosts(md); - vd.getDecomposition().rebalance(1); + vd.getDecomposition().decompose(); if (v_cl.getProcessUnitID() == 0) std::cout << "REBALANCED " << std::endl; } vd.map(); + + + if (it_reb == 200) + { + vd.getDecomposition().write("DLB_AFTER_"); + + it_reb = 0; + vd.addComputationCosts(md); + std::cout << "PROCESSOR LOAD: " << vd.getDecomposition().getDistribution().getProcessorLoad() << " MEAN: " << time_mean / 200 << " MIN: " << time_min << " MAX: " << time_max << std::endl; + time_mean = 0.0; + time_min = 1000.0; + time_max = 0.0; + } + vd.ghost_get<type,rho,Pressure,velocity>(); // Calculate pressure from the density @@ -1173,6 +1194,9 @@ int main(int argc, char* argv[]) // Get the maximum viscosity term across processors v_cl.max(max_visc); v_cl.execute(); + time_mean += it_time.getwct(); + time_min = std::min(time_min,it_time.getwct()); + time_max = std::max(time_max,it_time.getwct()); // Calculate delta t integration double dt = calc_deltaT(vd,max_visc); diff --git a/src/Decomposition/Distribution/ParMetisDistribution.hpp b/src/Decomposition/Distribution/ParMetisDistribution.hpp index 0c3c09182..d0b9b0c24 100644 --- a/src/Decomposition/Distribution/ParMetisDistribution.hpp +++ b/src/Decomposition/Distribution/ParMetisDistribution.hpp @@ -34,6 +34,9 @@ template<unsigned int dim, typename T> class ParMetisDistribution { + //! Is distributed + bool is_distributed = false; + //! Vcluster Vcluster & v_cl; @@ -207,6 +210,100 @@ class ParMetisDistribution return &(v->get(i).get(0)); } + /*! \brief It update the full decomposition + * + * + */ + void postDecomposition() + { + //! Get the processor id + size_t p_id = v_cl.getProcessUnitID(); + + //! Get the number of processing units + size_t Np = v_cl.getProcessingUnits(); + + // Number of local vertex + size_t nl_vertex = vtxdist.get(p_id+1).id - vtxdist.get(p_id).id; + + //! Get result partition for this processors + idx_t * partition = parmetis_graph.getPartition(); + + //! Prepare vector of arrays to contain all partitions + partitions.get(p_id).resize(nl_vertex); + std::copy(partition, partition + nl_vertex, &partitions.get(p_id).get(0)); + + // Reset data structure to keep trace of new vertices distribution in processors (needed to update main graph) + for (size_t i = 0; i < Np; ++i) + { + v_per_proc.get(i).clear(); + } + + // Communicate the local distribution to the other processors + // to reconstruct individually the global graph + openfpm::vector<size_t> prc; + openfpm::vector<size_t> sz; + openfpm::vector<void *> ptr; + + for (size_t i = 0; i < Np; i++) + { + if (i != v_cl.getProcessUnitID()) + { + partitions.get(i).clear(); + prc.add(i); + sz.add(nl_vertex * sizeof(idx_t)); + ptr.add(partitions.get(p_id).getPointer()); + } + } + + if (prc.size() == 0) + v_cl.sendrecvMultipleMessagesNBX(0, NULL, NULL, NULL, message_receive, &partitions,NONE); + else + v_cl.sendrecvMultipleMessagesNBX(prc.size(), &sz.get(0), &prc.get(0), &ptr.get(0), message_receive, &partitions,NONE); + + // Update graphs with the received data + updateGraphs(); + + + ///////////////////////////////////////// + + + // Get result partition for this processor +/* idx_t * partition = parmetis_graph.getPartition(); + + //! Prepare vector of arrays to contain all partitions + partitions.get(p_id).resize(nl_vertex.id); + std::copy(partition, partition + nl_vertex.id, &partitions.get(p_id).get(0)); + + // Reset data structure to keep trace of new vertices distribution in processors (needed to update main graph) + for (size_t i = 0; i < Np; ++i) + { + v_per_proc.get(i).clear(); + } + + openfpm::vector<size_t> prc; + openfpm::vector<size_t> sz; + openfpm::vector<void *> ptr; + + for (size_t i = 0; i < Np; i++) + { + if (i != v_cl.getProcessUnitID()) + { + partitions.get(i).clear(); + prc.add(i); + sz.add(nl_vertex.id * sizeof(idx_t)); + ptr.add(partitions.get(p_id).getPointer()); + } + } + + // Exchange informations through processors + v_cl.sendrecvMultipleMessagesNBX(prc.size(), &sz.get(0), &prc.get(0), &ptr.get(0), message_receive, &partitions, + NONE); + + // Update graphs with the new distributions + updateGraphs();*/ + } + + public: /*! Constructor for the ParMetis class @@ -214,7 +311,7 @@ public: * \param v_cl Vcluster to use as communication object in this class */ ParMetisDistribution(Vcluster & v_cl) - :v_cl(v_cl), parmetis_graph(v_cl, v_cl.getProcessingUnits()), vtxdist(v_cl.getProcessingUnits() + 1), partitions(v_cl.getProcessingUnits()), v_per_proc(v_cl.getProcessingUnits()) + :is_distributed(false),v_cl(v_cl), parmetis_graph(v_cl, v_cl.getProcessingUnits()), vtxdist(v_cl.getProcessingUnits() + 1), partitions(v_cl.getProcessingUnits()), v_per_proc(v_cl.getProcessingUnits()) { } @@ -305,51 +402,18 @@ public: */ void decompose() { - - //! Get the processor id - size_t p_id = v_cl.getProcessUnitID(); - - //! Get the number of processing units - size_t Np = v_cl.getProcessingUnits(); - - // Number of local vertex - size_t nl_vertex = vtxdist.get(p_id+1).id - vtxdist.get(p_id).id; - - parmetis_graph.initSubGraph(gp, vtxdist, m2g, verticesGotWeights); + if (is_distributed == false) + parmetis_graph.initSubGraph(gp, vtxdist, m2g, verticesGotWeights); + else + parmetis_graph.reset(gp, vtxdist, m2g, verticesGotWeights); //! Decompose parmetis_graph.decompose<nm_v::proc_id>(vtxdist); - //! Get result partition for this processors - idx_t *partition = parmetis_graph.getPartition(); - - //! Prepare vector of arrays to contain all partitions - partitions.get(p_id).resize(nl_vertex); - std::copy(partition, partition + nl_vertex, &partitions.get(p_id).get(0)); - - // Communicate the local distribution to the other processors - // to reconstruct individually the global graph - openfpm::vector<size_t> prc; - openfpm::vector<size_t> sz; - openfpm::vector<void *> ptr; + // update after decomposition + postDecomposition(); - for (size_t i = 0; i < Np; i++) - { - if (i != v_cl.getProcessUnitID()) - { - prc.add(i); - sz.add(nl_vertex * sizeof(idx_t)); - ptr.add(partitions.get(p_id).getPointer()); - } - } - - if (prc.size() == 0) - v_cl.sendrecvMultipleMessagesNBX(0, NULL, NULL, NULL, message_receive, &partitions,NONE); - else - v_cl.sendrecvMultipleMessagesNBX(prc.size(), &sz.get(0), &prc.get(0), &ptr.get(0), message_receive, &partitions,NONE); - - // Update graphs with the received data - updateGraphs(); + is_distributed = true; } /*! \brief Refine current decomposition @@ -360,51 +424,13 @@ public: */ void refine() { - size_t Np = v_cl.getProcessingUnits(); - size_t p_id = v_cl.getProcessUnitID(); - - // Number of local vertex - rid nl_vertex = vtxdist.get(p_id+1) - vtxdist.get(p_id); - // Reset parmetis graph and reconstruct it parmetis_graph.reset(gp, vtxdist, m2g, verticesGotWeights); // Refine parmetis_graph.refine<nm_v::proc_id>(vtxdist); - // Get result partition for this processor - idx_t * partition = parmetis_graph.getPartition(); - - partitions.get(p_id).resize(nl_vertex.id); - std::copy(partition, partition + nl_vertex.id, &partitions.get(p_id).get(0)); - - // Reset data structure to keep trace of new vertices distribution in processors (needed to update main graph) - for (size_t i = 0; i < Np; ++i) - { - v_per_proc.get(i).clear(); - } - - openfpm::vector<size_t> prc; - openfpm::vector<size_t> sz; - openfpm::vector<void *> ptr; - - for (size_t i = 0; i < Np; i++) - { - if (i != v_cl.getProcessUnitID()) - { - partitions.get(i).clear(); - prc.add(i); - sz.add(nl_vertex.id * sizeof(idx_t)); - ptr.add(partitions.get(p_id).getPointer()); - } - } - - // Exchange informations through processors - v_cl.sendrecvMultipleMessagesNBX(prc.size(), &sz.get(0), &prc.get(0), &ptr.get(0), message_receive, &partitions, - NONE); - - // Update graphs with the new distributions - updateGraphs(); + postDecomposition(); } /*! \brief Compute the unbalance of the processor compared to the optimal balance @@ -574,6 +600,7 @@ public: const ParMetisDistribution<dim,T> & operator=(const ParMetisDistribution<dim,T> & dist) { + is_distributed = dist.is_distributed; gr = dist.gr; domain = dist.domain; gp = dist.gp; @@ -587,6 +614,7 @@ public: const ParMetisDistribution<dim,T> & operator=(ParMetisDistribution<dim,T> && dist) { + is_distributed = dist.is_distributed; v_cl = dist.v_cl; gr = dist.gr; domain = dist.domain; diff --git a/src/Vector/vector_dist.hpp b/src/Vector/vector_dist.hpp index 4e2132e12..9bcffed3b 100644 --- a/src/Vector/vector_dist.hpp +++ b/src/Vector/vector_dist.hpp @@ -45,6 +45,44 @@ #define NO_GHOST 0 #define WITH_GHOST 2 +//! General function t get a cell-list +template<unsigned int dim, typename St, typename CellL, typename Vector> +struct gcl +{ + /*! \brief Get the Cell list based on the type + * + * \param vd Distributed vector + * \param r_cut Cut-off radius + * \param g Ghost + * + * \return the constructed cell-list + * + */ + static inline CellL get(Vector & vd, const St & r_cut, const Ghost<dim,St> & g) + { + return vd.getCellList(r_cut); + } +}; + +//! General function t get a cell-list +template<unsigned int dim, typename St, typename Vector> +struct gcl<dim,St,CellList_hilb<dim, St, FAST, shift<dim, St> >,Vector> +{ + /*! \brief Get the Cell list based on the type + * + * \param vd Distributed vector + * \param r_cut Cut-off radius + * \param g Ghost + * + * \return the constructed cell-list + * + */ + static inline CellList_hilb<dim, St, FAST, shift<dim, St> > get(Vector & vd, const St & r_cut, const Ghost<dim,St> & g) + { + return vd.getCellList_hilb(r_cut,g); + } +}; + /*! \brief Distributed vector * * This class reppresent a distributed vector, the distribution of the structure @@ -129,6 +167,32 @@ private: } + /*! \brief Check if the cell-list must be reconstructed because domain has changed + * because of load balancing + * + * \param bx Box to reconstruct + * + * \return true if the cell-list must be reconstructed + * + */ + bool check_cl_reconstruct(const Box<dim,St> & bx, St r_cut) + { + // Get ghost and anlarge by 1% + Ghost<dim,St> g = getDecomposition().getGhost(); + g.magnify(1.013); + + // Division array + size_t div[dim]; + + // get the processor bounding box + Box<dim, St> pbox = getDecomposition().getProcessorBounds(); + + // Processor bounding box + cl_param_calculate(pbox, div, r_cut, g); + + return !bx.isContained(pbox); + } + public: //! space type @@ -137,6 +201,9 @@ public: //! dimensions of space static const unsigned int dims = dim; + //! Self type + typedef vector_dist<dim,St,prop,Decomposition,Memory> self; + /*! \brief Operator= for distributed vector * * \param v vector to copy @@ -446,9 +513,28 @@ public: */ template<typename CellL = CellList<dim, St, FAST, shift<dim, St> > > void updateCellList(CellL & cell_list) { - populate_cell_list(v_pos,cell_list,g_m,CL_NON_SYMMETRIC); + // This function assume equal spacing in all directions + // but in the worst case we take the maximum + St r_cut = 0; + for (size_t i = 0 ; i < dim ; i++) + r_cut = std::max(r_cut,cell_list.getCellBox().getHigh(i)); - cell_list.set_gm(g_m); + // Here we have to check that the Box defined by the Cell-list is the same as the domain box of this + // processor. if it is not like that we have to completely reconstruct from stratch + bool to_reconstruct = check_cl_reconstruct(cell_list.getDomain(),r_cut); + + if (to_reconstruct == false) + { + populate_cell_list(v_pos,cell_list,g_m,CL_NON_SYMMETRIC); + + cell_list.set_gm(g_m); + } + else + { + CellL cli_tmp = gcl<dim,St,CellL,self>::get(*this,r_cut,getDecomposition().getGhost()); + + cell_list.swap(cli_tmp); + } } /*! \brief Update a cell list using the stored particles -- GitLab