diff --git a/src/Decomposition/BasicDecomposition.hpp b/src/Decomposition/BasicDecomposition.hpp index e2f421cb89162f5a8c3ad1b7f26b7d1109849a64..462751f210122ac6137c6e0d5af2a9663ddd8149 100644 --- a/src/Decomposition/BasicDecomposition.hpp +++ b/src/Decomposition/BasicDecomposition.hpp @@ -129,6 +129,24 @@ private: //! Cell-list that store the geometrical information of the local internal ghost boxes CellList<dim, T, FAST> lgeo_cell; + //! Convert the graph to parmetis format + Parmetis<Graph_CSR<nm_v, nm_e>> parmetis_graph; + + //! Processor sub-sub-domain graph + Graph_CSR<nm_v, nm_e> sub_g; + + //! Global sub-sub-domain graph + Graph_CSR<nm_v, nm_e> gp; + + //! Init vtxdist needed for Parmetis + openfpm::vector<idx_t> vtxdist; + + //! partitions + openfpm::vector<openfpm::vector<idx_t>> partitions; + + //! Init data structure to keep trace of new vertices distribution in processors (needed to update main graph) + openfpm::vector<openfpm::vector<size_t>> v_per_proc; + static void * message_receive(size_t msg_i ,size_t total_msg, size_t total_p, size_t i, size_t ri, void * ptr) { openfpm::vector<openfpm::vector<idx_t>> * v = static_cast<openfpm::vector<openfpm::vector<idx_t>> *>(ptr); @@ -161,7 +179,7 @@ private: spacing[i] = (bs.getHigh(i) - bs.getLow(i)) / gr.size(i); } - //! MPI initialization + //! Get the processor id size_t p_id = v_cl.getProcessUnitID(); // Here we use PARMETIS @@ -169,7 +187,7 @@ private: CartesianGraphFactory<dim, Graph_CSR<nm_v, nm_e>> g_factory_part; // Processor graph - Graph_CSR<nm_v, nm_e> gp = g_factory_part.template construct<NO_EDGE, nm_v::id, T, dim - 1, 0, 1, 2>(gr.getSize(), domain); + gp = g_factory_part.template construct<NO_EDGE, nm_v::id, T, dim - 1, 0, 1, 2>(gr.getSize(), domain); //! Add computation information to each vertex (shape can change) addWeights(gp, HYPERBOLOID); @@ -186,9 +204,6 @@ private: size_t mod_v = gp.getNVertex() % Np; size_t div_v = gp.getNVertex() / Np; - //! Init vtxdist needed for Parmetis - openfpm::vector<idx_t> vtxdist(Np + 1); - for (int i = 0; i <= Np; i++) { if (i < mod_v) @@ -204,24 +219,21 @@ private: } //TODO transform in factory - //! Create subgraph - Graph_CSR<nm_v, nm_e> sub_g; + //! Put vertices into processor graph (different for each processor) fillSubGraph(sub_g, gp, vtxdist, p_id, Np); - // Convert the graph to parmetis format - Parmetis<Graph_CSR<nm_v, nm_e>> parmetis_graph(sub_g, Np); - + parmetis_graph.initSubGraph(sub_g); + // Decompose - parmetis_graph.decompose<nm_v::proc_id>(vtxdist); + parmetis_graph.decompose<nm_v::proc_id>(vtxdist,sub_g); // Get result partition for this processors idx_t *partition = parmetis_graph.getPartition(); // Prepare vector of arrays to contain all partitions - openfpm::vector<openfpm::vector<idx_t>> partitions(Np); partitions.get(p_id).resize(sub_g.getNVertex()); std::copy(partition,partition+sub_g.getNVertex(),&partitions.get(p_id).get(0)); @@ -240,9 +252,6 @@ private: } v_cl.sendrecvMultipleMessagesNBX(prc.size(),&sz.get(0),&prc.get(0),&ptr.get(0),message_receive,&partitions,NONE); - - // Init data structure to keep trace of new vertices distribution in processors (needed to update main graph) - openfpm::vector<openfpm::vector<size_t>> v_per_proc(Np); // Update graphs with the new distributions updateGraphs(partitions, gp, sub_g, v_per_proc, vtxdist, p_id, Np); @@ -270,60 +279,13 @@ private: } } - if (p_id == 0) { + if (p_id == 0) + { VTKWriter<Graph_CSR<nm_v, nm_e>, GRAPH> gv2(gp); gv2.write("test_graph_3.vtk"); } - - // Reset parmetis graph and reconstruct it - parmetis_graph.reset(gp); - - // Refine - parmetis_graph.refine<nm_v::proc_id>(vtxdist); - - // Get result partition for this processor - partition = parmetis_graph.getPartition(); - - partitions.get(p_id).resize(sub_g.getNVertex()); - std::copy(partition,partition+sub_g.getNVertex(),&partitions.get(p_id).get(0)); - - // Reset data structure to keep trace of new vertices distribution in processors (needed to update main graph) - for (int i = 0; i < Np; ++i) - { - v_per_proc.get(i).clear(); - } - - prc.clear(); - sz.clear(); - ptr.clear(); - - for (size_t i = 0 ; i < Np ; i++) - { - if (i != v_cl.getProcessUnitID()) - { - partitions.get(i).clear(); - prc.add(i); - sz.add(sub_g.getNVertex() * sizeof(idx_t)); - -// std::cout << "sub_g: " << sub_g.getNVertex() * sizeof(idx_t) << "\n"; - - ptr.add(partitions.get(p_id).getPointer()); - } - } - - v_cl.sendrecvMultipleMessagesNBX(prc.size(),&sz.get(0),&prc.get(0),&ptr.get(0),message_receive,&partitions,NONE); - - // Update graphs with the new distributions - updateGraphs(partitions, gp, sub_g, v_per_proc, vtxdist, p_id, Np); - // - - if (p_id == 1) { - VTKWriter<Graph_CSR<nm_v, nm_e>, GRAPH> gv2(gp); - gv2.write("test_graph_4.vtk"); - bool test = compare("test_graph_4.vtk","test_graph_test.vtk"); - BOOST_REQUIRE_EQUAL(test,true); - } + refine(); /* // fill the structure that store the processor id for each sub-domain @@ -445,6 +407,64 @@ private: // Save the ghost boundaries Ghost<dim, T> ghost; + void refine() + { + size_t Np = v_cl.getProcessingUnits(); + size_t p_id = v_cl.getProcessUnitID(); + + // Reset parmetis graph and reconstruct it + parmetis_graph.reset(gp,sub_g); + + // Refine + parmetis_graph.refine<nm_v::proc_id>(vtxdist,sub_g); + + // Get result partition for this processor + idx_t * partition = parmetis_graph.getPartition(); + + partitions.get(p_id).resize(sub_g.getNVertex()); + std::copy(partition,partition+sub_g.getNVertex(),&partitions.get(p_id).get(0)); + + // Reset data structure to keep trace of new vertices distribution in processors (needed to update main graph) + for (int i = 0; i < Np; ++i) + { + v_per_proc.get(i).clear(); + } + + + openfpm::vector<size_t> prc; + openfpm::vector<size_t> sz; + openfpm::vector<void *> ptr; + + for (size_t i = 0 ; i < Np ; i++) + { + if (i != v_cl.getProcessUnitID()) + { + partitions.get(i).clear(); + prc.add(i); + sz.add(sub_g.getNVertex() * sizeof(idx_t)); + +// std::cout << "sub_g: " << sub_g.getNVertex() * sizeof(idx_t) << "\n"; + + ptr.add(partitions.get(p_id).getPointer()); + } + } + + v_cl.sendrecvMultipleMessagesNBX(prc.size(),&sz.get(0),&prc.get(0),&ptr.get(0),message_receive,&partitions,NONE); + + // Update graphs with the new distributions + updateGraphs(partitions, gp, sub_g, v_per_proc, vtxdist, p_id, Np); + + // + + if (p_id == 1) + { + VTKWriter<Graph_CSR<nm_v, nm_e>, GRAPH> gv2(gp); + gv2.write("test_graph_4.vtk"); + bool test = compare("test_graph_4.vtk","test_graph_test.vtk"); + BOOST_REQUIRE_EQUAL(test,true); + } + } + /*! \brief Create the subspaces that decompose your domain * */ @@ -715,7 +735,8 @@ public: * */ BasicDecomposition(Vcluster & v_cl) : - nn_prcs<dim, T>(v_cl), v_cl(v_cl) { + nn_prcs<dim, T>(v_cl), v_cl(v_cl),parmetis_graph(v_cl, v_cl.getProcessingUnits()),vtxdist(v_cl.getProcessingUnits() + 1),partitions(v_cl.getProcessingUnits()),v_per_proc(v_cl.getProcessingUnits()) + { // Reset the box to zero bbox.zero(); } diff --git a/src/parmetis_util.hpp b/src/parmetis_util.hpp index b9362cadd11da45c1bdf621840c26e0aca3564c4..d37b309e892e468d442f6fed2f069ee41f95eabd 100644 --- a/src/parmetis_util.hpp +++ b/src/parmetis_util.hpp @@ -11,6 +11,7 @@ #include <iostream> #include "parmetis.h" #include "VTKWriter.hpp" +#include "VCluster.hpp" /*! \brief Metis graph structure * @@ -86,6 +87,8 @@ struct Parmetis_graph { /*! \brief Helper class to define Metis graph * + * TODO Transform pointer to openfpm vector + * * \tparam graph structure that store the graph * */ @@ -95,20 +98,27 @@ class Parmetis { Parmetis_graph Mg; // Original graph - Graph & g; +// Graph & g; // Communticator for OpenMPI MPI_Comm comm = NULL; + // VCluster + Vcluster & v_cl; + // Process rank information - int MPI_PROC_ID = 0; + int p_id = 0; + // nc Number of partition + size_t nc = 0; + /*! \brief Construct Adjacency list * * \param g Reference graph to get informations * */ - void constructAdjList(Graph &refGraph) { + void constructAdjList(Graph &refGraph, Graph & g) + { // create xadj and adjlist Mg.vwgt = new idx_t[g.getNVertex()]; Mg.xadj = new idx_t[g.getNVertex() + 1]; @@ -158,8 +168,8 @@ class Parmetis { // Fill the last point Mg.xadj[id] = prev; - /* - std::cout << MPI_PROC_ID << "\n"; + +/* std::cout << p_id << "\n"; for(int i=0; i<= g.getNVertex();i++){ std::cout << Mg.xadj[i] << " "; } @@ -167,8 +177,8 @@ class Parmetis { for(int i=0; i< g.getNEdge();i++){ std::cout << Mg.adjncy[i] << " "; } - std::cout << "\n\n"; - */ + std::cout << "\n\n";*/ + } @@ -205,7 +215,7 @@ class Parmetis { size_t child = refGraph.getChild(i, s); // Check if child is not in this processor - if(child > old_vtxdist[MPI_PROC_ID+1] || child < old_vtxdist[MPI_PROC_ID]) + if(child > old_vtxdist[p_id+1] || child < old_vtxdist[p_id]) Mg.adjncy[prev + s] = child; } @@ -220,7 +230,7 @@ class Parmetis { Mg.xadj[id] = prev; - std::cout << MPI_PROC_ID << "\n"; + std::cout << p_id << "\n"; for(int i=0; i<= g.getNVertex();i++){ std::cout << Mg.xadj[i] << " "; } @@ -243,84 +253,13 @@ public: * \param nc number of partitions * */ - Parmetis(Graph & g, size_t nc) : - g(g) { - // Init OpenMPI structures - + Parmetis(Vcluster & v_cl, size_t nc) + :v_cl(v_cl),nc(nc) + { + // TODO Move into VCluster MPI_Comm_dup(MPI_COMM_WORLD, &comm); - MPI_Comm_rank(MPI_COMM_WORLD, &MPI_PROC_ID); - - // Get the number of vertex - - Mg.nvtxs = new idx_t[1]; - Mg.nvtxs[0] = g.getNVertex(); - - // Set the number of constrains - - Mg.ncon = new idx_t[1]; - Mg.ncon[0] = 1; - - // Set to null the weight of the vertex - - Mg.vwgt = NULL; - - // construct the adjacency list - - constructAdjList(g); - - // Put the total communication size to NULL - - Mg.vsize = NULL; - - // Set to null the weight of the edge - - Mg.adjwgt = NULL; - - // Set the total number of partitions - - Mg.nparts = new idx_t[1]; - Mg.nparts[0] = nc; - - //! Set option for the graph partitioning (set as default) - - Mg.options = new idx_t[4]; - Mg.options[0] = 1; - Mg.options[1] = 3; - Mg.options[2] = 0; - Mg.options[3] = 0; - - //! is an output vector containing the partition for each vertex - - Mg.part = new idx_t[g.getNVertex()]; - for (int i = 0; i < g.getNVertex(); i++) - Mg.part[i] = MPI_PROC_ID; - - //! adaptiveRepart itr value - Mg.itr = new real_t[1]; - Mg.itr[0] = 1000.0; - - //! init tpwgts to have balanced vertices and ubvec - - Mg.tpwgts = new real_t[Mg.nparts[0]]; - Mg.ubvec = new real_t[Mg.nparts[0]]; - - for (int s = 0; s < Mg.nparts[0]; s++) { - Mg.tpwgts[s] = 1.0 / Mg.nparts[0]; - Mg.ubvec[s] = 1.05; - } - - Mg.edgecut = new idx_t[1]; - Mg.edgecut[0] = 0; - - //! This is used to indicate the numbering scheme that is used for the vtxdist, xadj, adjncy, and part arrays. (0 for C-style, start from 0 index) - Mg.numflag = new idx_t[1]; - Mg.numflag[0] = 0; - - //! This is used to indicate if the graph is weighted. wgtflag can take one of four values: - Mg.wgtflag = new idx_t[1]; - Mg.wgtflag[0] = 2; } - + //TODO deconstruct new variables /*! \brief destructor * @@ -386,14 +325,96 @@ public: } } + /*! \brief Set the Sub-graph + * + * \param sub_g Sub-graph to set + * + */ + void initSubGraph(Graph & sub_g) + { + p_id = v_cl.getProcessUnitID(); + + // Get the number of vertex + + Mg.nvtxs = new idx_t[1]; + Mg.nvtxs[0] = sub_g.getNVertex(); + + // Set the number of constrains + + Mg.ncon = new idx_t[1]; + Mg.ncon[0] = 1; + + // Set to null the weight of the vertex + + Mg.vwgt = NULL; + + // construct the adjacency list + + constructAdjList(sub_g,sub_g); + + // Put the total communication size to NULL + + Mg.vsize = NULL; + + // Set to null the weight of the edge + + Mg.adjwgt = NULL; + + // Set the total number of partitions + + Mg.nparts = new idx_t[1]; + Mg.nparts[0] = nc; + + //! Set option for the graph partitioning (set as default) + + Mg.options = new idx_t[4]; + Mg.options[0] = 1; + Mg.options[1] = 3; + Mg.options[2] = 0; + Mg.options[3] = 0; + + //! is an output vector containing the partition for each vertex + + Mg.part = new idx_t[sub_g.getNVertex()]; + for (int i = 0; i < sub_g.getNVertex(); i++) + Mg.part[i] = p_id; + + //! adaptiveRepart itr value + Mg.itr = new real_t[1]; + Mg.itr[0] = 1000.0; + + //! init tpwgts to have balanced vertices and ubvec + + Mg.tpwgts = new real_t[Mg.nparts[0]]; + Mg.ubvec = new real_t[Mg.nparts[0]]; + + for (int s = 0; s < Mg.nparts[0]; s++) { + Mg.tpwgts[s] = 1.0 / Mg.nparts[0]; + Mg.ubvec[s] = 1.05; + } + + Mg.edgecut = new idx_t[1]; + Mg.edgecut[0] = 0; + + //! This is used to indicate the numbering scheme that is used for the vtxdist, xadj, adjncy, and part arrays. (0 for C-style, start from 0 index) + Mg.numflag = new idx_t[1]; + Mg.numflag[0] = 0; + + //! This is used to indicate if the graph is weighted. wgtflag can take one of four values: + Mg.wgtflag = new idx_t[1]; + Mg.wgtflag[0] = 2; + } + /*! \brief Decompose the graph * * \tparam i which property store the decomposition * + * + * */ - template<unsigned int i> - void decompose(openfpm::vector<idx_t> & vtxdist) { + void decompose(openfpm::vector<idx_t> & vtxdist, Graph & sub_g) + { // Decompose ParMETIS_V3_PartKway((idx_t *)vtxdist.getPointer(), Mg.xadj, Mg.adjncy, Mg.vwgt, Mg.adjwgt, Mg.wgtflag, Mg.numflag, Mg.ncon, Mg.nparts, Mg.tpwgts, @@ -405,9 +426,9 @@ public: */ // For each vertex store the processor that contain the data - for (size_t j = 0, id = 0; j < g.getNVertex(); j++, id++) { + for (size_t j = 0, id = 0; j < sub_g.getNVertex() ; j++, id++) { - g.vertex(j).template get<i>() = Mg.part[id]; + sub_g.vertex(j).template get<i>() = Mg.part[id]; } @@ -420,18 +441,18 @@ public: */ template<unsigned int i> - void refine(openfpm::vector<idx_t> & vtxdist) + void refine(openfpm::vector<idx_t> & vtxdist, Graph & sub_g) { // Refine - + ParMETIS_V3_RefineKway((idx_t *)vtxdist.getPointer(), Mg.xadj, Mg.adjncy, Mg.vwgt, Mg.adjwgt, Mg.wgtflag, Mg.numflag, Mg.ncon, Mg.nparts, Mg.tpwgts, Mg.ubvec, Mg.options, Mg.edgecut, Mg.part, &comm); // For each vertex store the processor that contain the data - for (size_t j = 0, id = 0; j < g.getNVertex(); j++, id++) { + for (size_t j = 0, id = 0; j < sub_g.getNVertex(); j++, id++) { - g.vertex(j).template get<i>() = Mg.part[id]; + sub_g.vertex(j).template get<i>() = Mg.part[id]; } } @@ -446,7 +467,8 @@ public: /*! \brief Reset graph and reconstruct it * */ - void reset(Graph & mainGraph) { + void reset(Graph & mainGraph, Graph & sub_g) + { // Deallocate the graph structures if (Mg.xadj != NULL) { @@ -469,16 +491,15 @@ public: delete[] Mg.part; } - Mg.nvtxs[0] = g.getNVertex(); + Mg.nvtxs[0] = sub_g.getNVertex(); - Mg.part = new idx_t[g.getNVertex()]; + Mg.part = new idx_t[sub_g.getNVertex()]; - for (int i = 0; i < g.getNVertex(); i++) - Mg.part[i] = MPI_PROC_ID; + for (int i = 0; i < sub_g.getNVertex(); i++) + Mg.part[i] = p_id; // construct the adjacency list - constructAdjList(mainGraph); - + constructAdjList(mainGraph,sub_g); } };