From 104f989e7ff687e905684b893f7099d0ce825d8a Mon Sep 17 00:00:00 2001 From: Pietro Incardona <incardon@mpi-cbg.de> Date: Mon, 2 Nov 2015 15:16:05 +0100 Subject: [PATCH] New classes --- src/Decomposition/BasicDecomposition.hpp | 1125 +++++++++++++++++ .../BasicDecomposition_unit_test.hpp | 83 ++ src/parmetis_util.hpp | 486 +++++++ 3 files changed, 1694 insertions(+) create mode 100644 src/Decomposition/BasicDecomposition.hpp create mode 100644 src/Decomposition/BasicDecomposition_unit_test.hpp create mode 100644 src/parmetis_util.hpp diff --git a/src/Decomposition/BasicDecomposition.hpp b/src/Decomposition/BasicDecomposition.hpp new file mode 100644 index 000000000..594b85df3 --- /dev/null +++ b/src/Decomposition/BasicDecomposition.hpp @@ -0,0 +1,1125 @@ +/* + * BasicDecomposition.hpp + * + * Created on: Oct 07, 2015 + * Author: Antonio Leo + */ + +#ifndef BASICDECOMPOSITION_HPP +#define BASICDECOMPOSITION_HPP + +#include "config.h" +#include <cmath> +#include "VCluster.hpp" +#include "Graph/CartesianGraphFactory.hpp" +#include "Decomposition.hpp" +#include "Vector/map_vector.hpp" +#include <vector> +#include <initializer_list> +#include "SubdomainGraphNodes.hpp" +#include "parmetis_util.hpp" +#include "dec_optimizer.hpp" +#include "Space/Shape/Box.hpp" +#include "Space/Shape/Point.hpp" +#include "NN/CellList/CellDecomposer.hpp" +#include <unordered_map> +#include "NN/CellList/CellList.hpp" +#include "Space/Ghost.hpp" +#include "common.hpp" +#include "ie_loc_ghost.hpp" +#include "ie_ghost.hpp" +#include "nn_processor.hpp" +#include "GraphMLWriter.hpp" + +#define BASICDEC_ERROR 2000lu + +// Macro that decide what to do in case of error +#ifdef STOP_ON_ERROR +#define ACTION_ON_ERROR() exit(1); +#elif defined(THROW_ON_ERROR) +#define ACTION_ON_ERROR() throw BASICDEC_ERROR; +#else +#define ACTION_ON_ERROR() +#endif + +/** + * \brief This class decompose a space into subspaces + * + * \tparam dim is the dimensionality of the physical domain we are going to decompose. + * \tparam T type of the space we decompose, Real, Integer, Complex ... + * \tparam Memory Memory factory used to allocate memory + * \tparam Domain Structure that contain the information of your physical domain + * + * Given an N-dimensional space, this class decompose the space into a Cartesian grid of small + * sub-sub-domain. At each sub-sub-domain is assigned an id that identify which processor is + * going to take care of that part of space (in general the space assigned to a processor is + * simply connected), a second step merge several sub-sub-domain with same id into bigger region + * sub-domain with the id. Each sub-domain has an extended space called ghost part + * + * Assuming that VCluster.getProcessUnitID(), equivalent to the MPI processor rank, return the processor local + * processor id, we define + * + * * local processor: processor rank + * * local sub-domain: sub-domain given to the local processor + * * external ghost box: (or ghost box) are the boxes that compose the ghost space of the processor, or the + * boxes produced expanding every local sub-domain by the ghost extension and intersecting with the sub-domain + * of the other processors + * * Near processors are the processors adjacent to the local processor, where with adjacent we mean all the processor + * that has a non-zero intersection with the ghost part of the local processor, or all the processors that + * produce non-zero external boxes with the local processor, or all the processor that should communicate + * in case of ghost data synchronization + * * internal ghost box: is the part of ghost of the near processor that intersect the space of the + * processor, or the boxes produced expanding the sub-domain of the near processors with the local sub-domain + * * Near processor sub-domain: is a sub-domain that live in the a near (or contiguous) processor + * * Near processor list: the list of all the near processor of the local processor (each processor has a list + * of the near processor) + * * Local ghosts interal or external are all the ghosts that does not involve inter-processor communications + * + * \see calculateGhostBoxes() for a visualization of internal and external ghost boxes + * + * ### Create a Cartesian decomposition object on a Box space, distribute, calculate internal and external ghost boxes + * \snippet BasicDecomposition_unit_test.hpp Create BasicDecomposition + * + */ + +template<unsigned int dim, typename T, typename Memory = HeapMemory, template<unsigned int, typename > class Domain = Box> +class BasicDecomposition: public ie_loc_ghost<dim, T>, public nn_prcs<dim, T>, public ie_ghost<dim, T> { + +public: + + //! Type of the domain we are going to decompose + typedef T domain_type; + + //! It simplify to access the SpaceBox element + typedef SpaceBox<dim, T> Box; + +private: + + //! This is the key type to access data_s, for example in the case of vector + //! acc_key is size_t + typedef typename openfpm::vector<SpaceBox<dim, T>, Memory, openfpm::vector_grow_policy_default, + openfpm::vect_isel<SpaceBox<dim, T>>::value>::access_key acc_key; + + //! the set of all local sub-domain as vector + openfpm::vector<SpaceBox<dim, T>> sub_domains; + + //! for each sub-domain, contain the list of the neighborhood processors + openfpm::vector<openfpm::vector<long unsigned int> > box_nn_processor; + + //! Structure that contain for each sub-sub-domain box the processor id + //! exist for efficient global communication + openfpm::vector<size_t> fine_s; + + //! Structure that store the cartesian grid information + grid_sm<dim, void> gr; + + //! Structure that decompose your structure into cell without creating them + //! useful to convert positions to CellId or sub-domain id in this case + CellDecomposer_sm<dim, T> cd; + + //! rectangular domain to decompose + Domain<dim, T> domain; + + //! Box Spacing + T spacing[dim]; + + //! Runtime virtual cluster machine + Vcluster & v_cl; + + //! Cell-list that store the geometrical information of the local internal ghost boxes + CellList<dim, T, FAST> lgeo_cell; + + /*! \brief Constructor, it decompose and distribute the sub-domains across the processors + * + * \param v_cl Virtual cluster, used internally for communications + * + */ + void CreateDecomposition(Vcluster & v_cl) { +#ifdef SE_CLASS1 + if (&v_cl == NULL) + { + std::cerr << __FILE__ << ":" << __LINE__ << " error VCluster instance is null, check that you ever initialized it \n"; + ACTION_ON_ERROR() + } +#endif + // Calculate the total number of box and and the spacing + // on each direction + // Get the box containing the domain + SpaceBox<dim, T> bs = domain.getBox(); + + for (unsigned int i = 0; i < dim; i++) { + // Calculate the spacing + spacing[i] = (bs.getHigh(i) - bs.getLow(i)) / gr.size(i); + } + + //! MPI initialization + int MPI_PROC_ID = 0; + MPI_Comm_rank(MPI_COMM_WORLD, &MPI_PROC_ID); + + // Here we use PARMETIS + // Create a cartesian grid graph + CartesianGraphFactory<dim, Graph_CSR<nm_v, nm_e>> g_factory_part; + + // Processor graph + Graph_CSR<nm_v, nm_e> gp = g_factory_part.template construct<NO_EDGE, nm_v::id, T, dim - 1, 0, 1, 2>(gr.getSize(), domain); + + //! Add computation information to each vertex (shape can change) + addWeights(gp, HYPERBOLOID); + + //! Init ids vector + gp.init_map_ids(); + + // Get the number of processing units + size_t Np = v_cl.getProcessingUnits(); + + // Division of vertices in Np graphs + // Put (div+1) vertices in mod graphs + // Put div vertices in the rest of the graphs + size_t mod_v = gp.getNVertex() % Np; + size_t div_v = gp.getNVertex() / Np; + + //! Init vtxdist needed for Parmetis + idx_t *vtxdist = new idx_t[Np + 1]; + + for (int i = 0; i <= Np; i++) { + if (i < mod_v) + vtxdist[i] = (div_v + 1) * (i); + else + vtxdist[i] = (div_v) * (i) + mod_v; + } + + // Just for output purpose + if (MPI_PROC_ID == 0) { + VTKWriter<Graph_CSR<nm_v, nm_e>, GRAPH> gv2(gp); + gv2.write("test_graph_0.vtk"); + } + + //TODO transform in factory + //! Create subgraph + Graph_CSR<nm_v, nm_e> sub_g; + + //! Put vertices into processor graph (different for each processor) + fillSubGraph(sub_g, gp, vtxdist, MPI_PROC_ID, Np); + + // Convert the graph to parmetis format + Parmetis<Graph_CSR<nm_v, nm_e>> parmetis_graph(sub_g, Np); + + // Decompose + parmetis_graph.decompose<nm_v::proc_id>(vtxdist); + + // Get result partition for this processors + idx_t *partition = parmetis_graph.getPartition(); + + // Prepare MPI objects for communication + MPI_Request *requests_recv = new MPI_Request[Np]; + MPI_Request *requests_send = new MPI_Request[Np]; + MPI_Status *statuses = new MPI_Status[Np]; + + //----------------------------------------------- + + /* + + /* create a type for struct nm_v */ + const int nitems = 9; + int blocklengths[9] = {1,1}; + MPI_Datatype types[9] = {MPI_INT, MPI_INT, MPI_INT, MPI_INT, MPI_INT, MPI_INT, MPI_INT, MPI_INT, MPI_INT}; + MPI_Datatype MPI_VERTEX_TYPE; + MPI_Aint offsets[9]; + + offsets[0] = offsetof(nm_v, x); + offsets[1] = offsetof(nm_v, y); + offsets[2] = offsetof(nm_v, z); + offsets[3] = offsetof(nm_v, communication); + offsets[4] = offsetof(nm_v, computation); + offsets[5] = offsetof(nm_v, memory); + offsets[6] = offsetof(nm_v, id); + offsets[7] = offsetof(nm_v, sub_id); + offsets[8] = offsetof(nm_v, proc_id); + + MPI_Type_create_struct(nitems, blocklengths, offsets, types, &MPI_VERTEX_TYPE); + MPI_Type_commit(&MPI_VERTEX_TYPE); + + //----------------------------------------------- + + // Prepare vector of arrays to contain all partitions + int **partitions = new int*[Np]; + partitions[MPI_PROC_ID] = partition; + for (int i = 0; i < Np; i++) { + if (i != MPI_PROC_ID) + partitions[i] = new int[gp.getNVertex()]; + } + + // Exchange partitions with other processors + exchangePartitions(partitions, gp.getNVertex(), sub_g.getNVertex(), requests_recv, requests_send, statuses, MPI_PROC_ID, Np); + + // Init data structure to keep trace of new vertices distribution in processors (needed to update main graph) + std::vector<size_t> *v_per_proc = new std::vector<size_t>[Np]; + + // Update graphs with the new distributions + updateGraphs(partitions, gp, sub_g, v_per_proc, vtxdist, statuses, MPI_PROC_ID, Np); + + if (MPI_PROC_ID == 0) { + VTKWriter<Graph_CSR<nm_v, nm_e>, GRAPH> gv2(gp); + gv2.write("test_graph_2.vtk"); + } + + // Renumbering subgraph + sub_g.reset_map_ids(); + for (size_t j = vtxdist[MPI_PROC_ID], i = 0; j < vtxdist[MPI_PROC_ID + 1]; j++, i++) { + sub_g.set_map_ids(j, sub_g.vertex(i).template get<nm_v::id>()); + sub_g.vertex(i).template get<nm_v::id>() = j; + } + + gp.reset_map_ids(); + // Renumbering main graph + for (size_t p = 0; p < Np; p++) { + for (size_t j = vtxdist[p], i = 0; j < vtxdist[p + 1]; j++, i++) { + gp.set_map_ids(j, gp.vertex(v_per_proc[p][i]).template get<nm_v::id>()); + gp.vertex(v_per_proc[p][i]).template get<nm_v::id>() = j; + } + } + + if (MPI_PROC_ID == 0) { + VTKWriter<Graph_CSR<nm_v, nm_e>, GRAPH> gv2(gp); + gv2.write("test_graph_3.vtk"); + } + + // Reset parmetis graph and reconstruct it + parmetis_graph.reset(gp); + + // Refine + parmetis_graph.refine<nm_v::proc_id>(vtxdist); + + // Get result partition for this processor + partition = parmetis_graph.getPartition(); + + // Reset partitions array + partitions[MPI_PROC_ID] = partition; + for (int i = 0; i < Np; i++) { + if (i != MPI_PROC_ID) { + delete partitions[i]; + partitions[i] = new int[gp.getNVertex()]; + } + } + + // Reset data structure to keep trace of new vertices distribution in processors (needed to update main graph) + for (int i = 0; i < Np; ++i) { + v_per_proc[i].clear(); + } + + // Exchange partitions with other processors + exchangePartitions(partitions, gp.getNVertex(), sub_g.getNVertex(), requests_recv, requests_send, statuses, MPI_PROC_ID, Np); + + // Update graphs with the new distributions + updateGraphs(partitions, gp, sub_g, v_per_proc, vtxdist, statuses, MPI_PROC_ID, Np); + + + if (MPI_PROC_ID == 0) { + VTKWriter<Graph_CSR<nm_v, nm_e>, GRAPH> gv2(gp); + gv2.write("test_graph_4.vtk"); + } + + /* + // fill the structure that store the processor id for each sub-domain + fine_s.resize(gr.size()); + + // Optimize the decomposition creating bigger spaces + // And reducing Ghost over-stress + dec_optimizer<dim,Graph_CSR<nm_v,nm_e>> d_o(gp,gr.getSize()); + + // set of Boxes produced by the decomposition optimizer + openfpm::vector<::Box<dim,size_t>> loc_box; + + // optimize the decomposition + d_o.template optimize<nm_v::sub_id,nm_v::id>(gp,p_id,loc_box,box_nn_processor); + + // Initialize ss_box and bbox + if (loc_box.size() >= 0) + { + SpaceBox<dim,size_t> sub_dc = loc_box.get(0); + SpaceBox<dim,T> sub_d(sub_dc); + sub_d.mul(spacing); + sub_d.expand(spacing); + + // Fixing sub-domains to cover all the domain + + // Fixing sub_d + // if (loc_box) is a the boundary we have to ensure that the box span the full + // domain (avoiding rounding off error) + for (size_t i = 0 ; i < dim ; i++) + { + if (sub_dc.getHigh(i) == cd.getGrid().size(i) - 1) + { + sub_d.setHigh(i,domain.getHigh(i)); + } + } + + // add the sub-domain + sub_domains.add(sub_d); + + ss_box = sub_d; + ss_box -= ss_box.getP1(); + bbox = sub_d; + } + + // convert into sub-domain + for (size_t s = 1 ; s < loc_box.size() ; s++) + { + SpaceBox<dim,size_t> sub_dc = loc_box.get(s); + SpaceBox<dim,T> sub_d(sub_dc); + + // re-scale and add spacing (the end is the starting point of the next domain + spacing) + sub_d.mul(spacing); + sub_d.expand(spacing); + + // Fixing sub-domains to cover all the domain + + // Fixing sub_d + // if (loc_box) is a the boundary we have to ensure that the box span the full + // domain (avoiding rounding off error) + for (size_t i = 0 ; i < dim ; i++) + { + if (sub_dc.getHigh(i) == cd.getGrid().size(i) - 1) + { + sub_d.setHigh(i,domain.getHigh(i)); + } + } + + // add the sub-domain + sub_domains.add(sub_d); + + // Calculate the bound box + bbox.enclose(sub_d); + + // Create the smallest box contained in all sub-domain + ss_box.contained(sub_d); + } + + nn_prcs<dim,T>::create(box_nn_processor, sub_domains); + + // fill fine_s structure + // fine_s structure contain the processor id for each sub-sub-domain + // with sub-sub-domain we mean the sub-domain decomposition before + // running dec_optimizer (before merging sub-domains) + it = gp.getVertexIterator(); + + while (it.isNext()) + { + size_t key = it.get(); + + // fill with the fine decomposition + fine_s.get(key) = gp.template vertex_p<nm_v::id>(key); + + ++it; + } + + // Get the smallest sub-division on each direction + ::Box<dim,T> unit = getSmallestSubdivision(); + // Get the processor bounding Box + ::Box<dim,T> bound = getProcessorBounds(); + + // calculate the sub-divisions + size_t div[dim]; + for (size_t i = 0 ; i < dim ; i++) + div[i] = (size_t)((bound.getHigh(i) - bound.getLow(i)) / unit.getHigh(i)); + + // Create shift + Point<dim,T> orig; + + // p1 point of the Processor bound box is the shift + for (size_t i = 0 ; i < dim ; i++) + orig.get(i) = bound.getLow(i); + + // Initialize the geo_cell structure + ie_ghost<dim,T>::Initialize_geo_cell(domain,div,orig); + lgeo_cell.Initialize(domain,div,orig); + */ + } + + // Save the ghost boundaries + Ghost<dim, T> ghost; + + /*! \brief Create the subspaces that decompose your domain + * + */ + void CreateSubspaces() { + // Create a grid where each point is a space + grid_sm<dim, void> g(div); + + // create a grid_key_dx iterator + grid_key_dx_iterator < dim > gk_it(g); + + // Divide the space into subspaces + while (gk_it.isNext()) { + //! iterate through all subspaces + grid_key_dx < dim > key = gk_it.get(); + + //! Create a new subspace + SpaceBox<dim, T> tmp; + + //! fill with the Margin of the box + for (int i = 0; i < dim; i++) { + tmp.setHigh(i, (key.get(i) + 1) * spacing[i]); + tmp.setLow(i, key.get(i) * spacing[i]); + } + + //! add the space box + sub_domains.add(tmp); + + // add the iterator + ++gk_it; + } + } + + /* /brief types of weights distributions + * + */ + enum weightShape { + UNIFORM, SPHERE, HYPERBOLOID + }; + + /* \brief add vertex weights to the main domain, follow a shape + * + ** 0 - weights are all 1 on all vertices + ** 1 - weights are distributed as a sphere + * + * \param i id of the shape + * + */ + void addWeights(Graph_CSR<nm_v, nm_e> & gp, int i) + { + float c_x = 0, c_y = 0, c_z = 0 , radius2, eq; + float x = 0, y = 0, z = 0; + + switch (i) { + case UNIFORM: + + // Add computation information to each vertex + for (int i = 0; i < gp.getNVertex(); i++) { + gp.vertex(i).template get<nm_v::computation>() = 1; + } + break; + case SPHERE: + + // Fill vertices weights with a sphere (if dim=2 is a circle) + radius2 = pow(4, 2); + c_x = 2; + c_y = 2; + + if(dim == 3) + c_z = 2; + + for (int i = 0; i < gp.getNVertex(); i++) { + x = gp.vertex(i).template get<nm_v::x>() * 10; + y = gp.vertex(i).template get<nm_v::y>() * 10; + + if(dim == 3) + z = gp.vertex(i).template get<nm_v::z>() * 10; + + eq = pow((x - c_x), 2) + pow((y - c_y), 2) + pow((z - c_z), 2); + + if (eq <= radius2) { + gp.vertex(i).template get<nm_v::computation>() = 5; + } else { + gp.vertex(i).template get<nm_v::computation>() = 1; + } + } + break; + case HYPERBOLOID: + + // Fill vertices weights with a elliptic hyperboloid (if dim=2 is an hyperbole) + c_x = 5; + c_y = 5; + + if(dim == 3) + c_z = 5; + for (int i = 0; i < gp.getNVertex(); i++) { + x = gp.vertex(i).template get<nm_v::x>() * 10; + y = gp.vertex(i).template get<nm_v::y>() * 10; + + if(dim == 3) + z = gp.vertex(i).template get<nm_v::z>() * 10; + + eq = - pow((x - c_x), 2)/3 - pow((y - c_y), 2)/3 + pow((z - c_z), 2)/2; + + if (eq >= 1) { + gp.vertex(i).template get<nm_v::computation>() = 5; + } else { + gp.vertex(i).template get<nm_v::computation>() = 1; + } + } + break; + } + } + + /* \brief fill the graph of the processor with the first decomposition (linear) + * + * Put vertices into processor graph (different for each processor) + * + * \param sub_g sub graph to fill + * \param gp mai graph, source for the vertices + * \param vtxdist array with the distribution of vertices through processors + * \param proc_id rank of the processor + * \param Np total number of processors + */ + void fillSubGraph(Graph_CSR<nm_v, nm_e> &sub_g, Graph_CSR<nm_v, nm_e> &gp, idx_t* &vtxdist, int proc_id, int Np) + { + + for (size_t j = vtxdist[proc_id], local_j = 0; j < vtxdist[proc_id + 1]; j++, local_j++) { + + // Add vertex + nm_v pv = gp.vertexById(j); + sub_g.addVertex(pv); + + // Add edges of vertex + for (size_t s = 0; s < gp.getNChilds(j); s++) { + sub_g.template addEdge<NoCheck>(local_j, gp.getChildByVertexId(j, s)); + } + } + + // Just for output purpose + if (proc_id == 0) { + for (int i = 0; i < Np; i++) { + for (size_t j = vtxdist[i]; j < vtxdist[i + 1]; j++) { + gp.vertexById(j).template get<nm_v::proc_id>() = i; + } + } + VTKWriter<Graph_CSR<nm_v, nm_e>, GRAPH> gv2(gp); + gv2.write("test_graph_1.vtk"); + } + } + + /* \brief exchange partitions with other processors + * + * \param partitions array to store all the partitions + * \param gp_nv number of vertices on main graph + * \param sub_g_nv number of vertices on sub graph + * \param requests_recv array of requests + * \param requests_send array of requests + * \param statuses array of statsu objects + * \param proc_id current processors rank + * \param Np total umber of processors + */ + void exchangePartitions(int** &partitions, int gp_nv, int sub_g_nv, MPI_Request* &requests_recv, MPI_Request* &requests_send, + MPI_Status* &statuses, int proc_id, int Np) + { + + // Receive other partitions, each partition can contain max NVertex of main graph + for (int i = 0; i < Np; i++) { + if (i != proc_id) + MPI_Irecv(partitions[i], gp_nv, MPI_INT, i, 0, MPI_COMM_WORLD, &requests_recv[i]); + } + + // Send processor partition to other processors + for (int i = 0; i < Np; i++) { + if (i != proc_id) + MPI_Isend(partitions[proc_id], sub_g_nv, MPI_INT, i, 0, MPI_COMM_WORLD, &requests_send[i]); + } + + // Wait for all partitions from other processors + for (int i = 0; i < Np; i++) { + if (i != proc_id) + MPI_Wait(&requests_recv[i], &statuses[i]); + } + } + + /* \brief update main graph ad subgraph with the partition in partitions param + * + * \param partitions array storing all the partitions + * \param gp main graph + * \param sub_g sub graph + * \param v_per_proc array needed to recontruct the main graph + * \param vtxdist array with the distribution of vertices through processors + * \param statuses array of statsu objects + * \param proc_id current processors rank + * \param Np total umber of processors + */ + void updateGraphs(int** &partitions,Graph_CSR<nm_v, nm_e> &gp, Graph_CSR<nm_v, nm_e> &sub_g, std::vector<size_t>* &v_per_proc, idx_t* &vtxdist, MPI_Status* &statuses, int proc_id, int Np) { + + int size_p = sub_g.getNVertex(); + int local_j = 0; + sub_g.clear(); + + // Init n_vtxdist to gather informations about the new decomposition + idx_t *n_vtxdist = new idx_t[Np + 1]; + for (int i = 0; i <= Np; i++) + n_vtxdist[i] = 0; + + // Update main graph with other partitions made by Parmetis in other processors and the local partition + for (int i = 0; i < Np; i++) { + + int ndata = 0; + MPI_Get_count(&statuses[i], MPI_INT, &ndata); + + if (i == proc_id) + ndata = size_p; + + // Update the main graph with received informations + for (int k = 0, l = vtxdist[i]; k < ndata && l < vtxdist[i + 1]; k++, l++) { + + // Create new n_vtxdist (1) (just count processors vertices) + n_vtxdist[partitions[i][k] + 1]++; + + // Update proc id in the vertex + gp.vertexById(l).template get<nm_v::proc_id>() = partitions[i][k]; + + // Add vertex to temporary structure of distribution (needed to update main graph) + v_per_proc[partitions[i][k]].push_back(gp.vertexById(l).template get<nm_v::id>()); + + // Add vertices belonging to this processor in sub graph + if (partitions[i][k] == proc_id) { + + nm_v pv = gp.vertexById(l); + sub_g.addVertex(pv); + + // Add edges of vertex + for (size_t s = 0; s < gp.getNChildsByVertexId(l); s++) { + sub_g.template addEdge<NoCheck>(local_j, gp.getChildByVertexId(l, s)); + } + + local_j++; + } + } + } + + // Create new n_vtxdist (2) (write boundaries) + for (int i = 2; i <= Np; i++) { + n_vtxdist[i] += n_vtxdist[i - 1]; + } + // Copy the new decomposition in the main vtxdist + for (int i = 0; i <= Np; i++) { + vtxdist[i] = n_vtxdist[i]; + } + } + + // Heap memory receiver + HeapMemory hp_recv; + + // vector v_proc + openfpm::vector<size_t> v_proc; + + // Receive counter + size_t recv_cnt; + +public: + + /*! \brief Basic decomposition constructor + * + * \param v_cl Virtual cluster, used internally to handle or pipeline communication + * + */ + BasicDecomposition(Vcluster & v_cl) : + nn_prcs<dim, T>(v_cl), v_cl(v_cl) { + // Reset the box to zero + bbox.zero(); + } + + //! Basic decomposition destructor + ~BasicDecomposition() { + } + + // openfpm::vector<size_t> ids; + + /*! \brief class to select the returned id by ghost_processorID + * + */ + class box_id { + public: + /*! \brief Return the box id + * + * \param p structure containing the id informations + * \param b_id box_id + * + * \return box id + * + */ + inline static size_t id(p_box<dim, T> & p, size_t b_id) { + return b_id; + } + }; + + /*! \brief class to select the returned id by ghost_processorID + * + */ + class processor_id { + public: + /*! \brief Return the processor id + * + * \param p structure containing the id informations + * \param b_id box_id + * + * \return processor id + * + */ + inline static size_t id(p_box<dim, T> & p, size_t b_id) { + return p.proc; + } + }; + + /*! \brief class to select the returned id by ghost_processorID + * + */ + class lc_processor_id { + public: + /*! \brief Return the near processor id + * + * \param p structure containing the id informations + * \param b_id box_id + * + * \return local processor id + * + */ + inline static size_t id(p_box<dim, T> & p, size_t b_id) { + return p.lc_proc; + } + }; + + /*! It calculate the internal ghost boxes + * + * Example: Processor 10 calculate + * B8_0 B9_0 B9_1 and B5_0 + * + * + * + \verbatim + + +----------------------------------------------------+ + | | + | Processor 8 | + | Sub-domain 0 +-----------------------------------+ + | | | + | | | + ++--------------+---+---------------------------+----+ Processor 9 | + | | | B8_0 | | Subdomain 0 | + | +------------------------------------+ | + | | | | | | + | | | XXXXXXXXXXXXX XX |B9_0| | + | | B | X Processor 10 X | | | + | Processor 5 | 5 | X Sub-domain 0 X | | | + | Subdomain 0 | _ | X X +----------------------------------------+ + | | 0 | XXXXXXXXXXXXXXXX | | | + | | | | | | + | | | | | Processor 9 | + | | | |B9_1| Subdomain 1 | + | | | | | | + | | | | | | + | | | | | | + +--------------+---+---------------------------+----+ | + | | + +-----------------------------------+ + + \endverbatim + + and also + G8_0 G9_0 G9_1 G5_0 (External ghost boxes) + + \verbatim + + +----------------------------------------------------+ + | | + | Processor 8 | + | Sub-domain 0 +-----------------------------------+ + | +---------------------------------------------+ | + | | G8_0 | | | + ++--------------+------------------------------------+ | Processor 9 | + | | | | | Subdomain 0 | + | | | |G9_0| | + | | | | | | + | | | XXXXXXXXXXXXX XX | | | + | | | X Processor 10 X | | | + | Processor|5 | X Sub-domain 0 X | | | + | Subdomain|0 | X X +-----------------------------------+ + | | | XXXXXXXXXXXXXXXX | | | + | | G | | | | + | | 5 | | | Processor 9 | + | | | | | | Subdomain 1 | + | | 0 | |G9_1| | + | | | | | | + | | | | | | + +--------------+------------------------------------+ | | + | | | | + +----------------------------------------+----+------------------------------+ + + + \endverbatim + + * + * + * + * \param ghost margins for each dimensions (p1 negative part) (p2 positive part) + * + * + \verbatim + ^ p2[1] + | + | + +----+----+ + | | + | | + p1[0]<-----+ +----> p2[0] + | | + | | + +----+----+ + | + v p1[1] + + \endverbatim + + * + * + */ + void calculateGhostBoxes() { +#ifdef DEBUG + // the ghost margins are assumed to be smaller + // than one sub-domain + + for (size_t i = 0; i < dim; i++) { + if (ghost.template getLow(i) >= domain.template getHigh(i) / gr.size(i) + || ghost.template getHigh(i) >= domain.template getHigh(i) / gr.size(i)) { + std::cerr << "Error " << __FILE__ << ":" << __LINE__ << " : Ghost are bigger than one domain" << "\n"; + } + } +#endif + + // Intersect all the local sub-domains with the sub-domains of the contiguous processors + + // create the internal structures that store ghost information + ie_ghost<dim, T>::create_box_nn_processor_ext(v_cl, ghost, sub_domains, box_nn_processor, *this); + ie_ghost<dim, T>::create_box_nn_processor_int(v_cl, ghost, sub_domains, box_nn_processor, *this); + + // ebox must come after ibox (in this case) + + ie_loc_ghost<dim, T>::create_loc_ghost_ibox(ghost, sub_domains); + ie_loc_ghost<dim, T>::create_loc_ghost_ebox(ghost, sub_domains); + + // get the smallest sub-domain dimension on each direction + for (size_t i = 0; i < dim; i++) { + if (ghost.template getLow(i) >= ss_box.getHigh(i) || ghost.template getHigh(i) >= domain.template getHigh(i) / gr.size(i)) { + std::cerr << "Error " << __FILE__ << ":" << __LINE__ << " : Ghost are bigger than one domain" << "\n"; + } + } + } + + /*! \brief The default grid size + * + * The default grid is always an isotropic grid that adapt with the number of processors, + * it define in how many cell it will be divided the space for a particular required minimum + * number of sub-domain + * + */ + static size_t getDefaultGrid(size_t n_sub) { + // Calculate the number of sub-sub-domain on + // each dimension + return openfpm::math::round_big_2(pow(n_sub, 1.0 / dim)); + } + + /*! \brief Given a point return in which processor the particle should go + * + * \return processorID + * + */ + template<typename Mem> size_t inline processorID(encapc<1, Point<dim, T>, Mem> p) { + return fine_s.get(cd.getCell(p)); + } + + // Smallest subdivision on each direction + ::Box<dim, T> ss_box; + + /*! \brief Get the smallest subdivision of the domain on each direction + * + * \return a box p1 is set to zero + * + */ + const ::Box<dim, T> & getSmallestSubdivision() { + return ss_box; + } + + /*! \brief Given a point return in which processor the particle should go + * + * \return processorID + * + */ + + size_t inline processorID(const T (&p)[dim]) const { + return fine_s.get(cd.getCell(p)); + } + + /*! \brief Set the parameter of the decomposition + * + * \param div_ storing into how many domain to decompose on each dimension + * \param domain_ domain to decompose + * + */ + void setParameters(const size_t (&div_)[dim], Domain<dim, T> domain_, Ghost<dim, T> ghost = Ghost<dim, T>()) { + // set the ghost + this->ghost = ghost; + // Set the decomposition parameters + + gr.setDimensions(div_); + domain = domain_; + cd.setDimensions(domain, div_, 0); + + //! Create the decomposition + + CreateDecomposition(v_cl); + } + + /*! \brief Get the number of local sub-domains + * + * \return the number of sub-domains + * + */ + size_t getNLocalHyperCube() { + return sub_domains.size(); + } + + /*! \brief Get the local sub-domain + * + * \param i (each local processor can have more than one sub-domain) + * \return the sub-domain + * + */ + SpaceBox<dim, T> getLocalHyperCube(size_t lc) { + // Create a space box + SpaceBox<dim, T> sp; + + // fill the space box + + for (size_t k = 0; k < dim; k++) { + // create the SpaceBox Low and High + sp.setLow(k, sub_domains.template get < Box::p1 > (lc)[k]); + sp.setHigh(k, sub_domains.template get < Box::p2 > (lc)[k]); + } + + return sp; + } + + /*! \brief Get the local sub-domain with ghost extension + * + * \param i (each local processor can have more than one sub-domain) + * \return the sub-domain + * + */ + SpaceBox<dim, T> getSubDomainWithGhost(size_t lc) { + // Create a space box + SpaceBox<dim, T> sp = sub_domains.get(lc); + + // enlarge with ghost + sp.enlarge(ghost); + + return sp; + } + + /*! \brief Return the structure that store the physical domain + * + * \return The physical domain + * + */ + Domain<dim, T> & getDomain() { + return domain; + } + + /*! \brief Check if the particle is local + * + * \param p object position + * + * \return true if it is local + * + */ + template<typename Mem> bool isLocal(const encapc<1, Point<dim, T>, Mem> p) const { + return processorID<Mem>(p) == v_cl.getProcessUnitID(); + } + + /*! \brief Check if the particle is local + * + * \param p object position + * + * \return true if it is local + * + */ + bool isLocal(const T (&pos)[dim]) const { + return processorID(pos) == v_cl.getProcessUnitID(); + } + + ::Box<dim, T> bbox; + + /*! \brief Return the bounding box containing union of all the sub-domains for the local processor + * + * \return The bounding box + * + */ + ::Box<dim, T> & getProcessorBounds() { + return bbox; + } + + ////////////// Functions to get decomposition information /////////////// + + /*! \brief Write the decomposition as VTK file + * + * The function generate several files + * + * * subdomains_X.vtk domain for the local processor (X) as union of sub-domain + * * subdomains_adjacent_X.vtk sub-domains adjacent to the local processor (X) + * * internal_ghost_X.vtk Internal ghost boxes for the local processor (X) + * * external_ghost_X.vtk External ghost boxes for the local processor (X) + * * local_internal_ghost_X.vtk internal local ghost boxes for the local processor (X) + * * local_external_ghost_X.vtk external local ghost boxes for the local processor (X) + * + * where X is the local processor rank + * + * \param output directory where to write the files + * + */ + bool write(std::string output) const { + //! subdomains_X.vtk domain for the local processor (X) as union of sub-domain + VTKWriter<openfpm::vector<::SpaceBox<dim, T>>, VECTOR_BOX> vtk_box1; + vtk_box1.add(sub_domains); + vtk_box1.write(output + std::string("subdomains_") + std::to_string(v_cl.getProcessUnitID()) + std::string(".vtk")); + + nn_prcs<dim, T>::write(output); + ie_ghost<dim, T>::write(output, v_cl.getProcessUnitID()); + ie_loc_ghost<dim, T>::write(output, v_cl.getProcessUnitID()); + + return true; + } + + /*! \brief function to check the consistency of the information of the decomposition + * + * \return false if is inconsistent + * + */ + bool check_consistency() { + if (ie_loc_ghost<dim, T>::check_consistency(getNLocalHyperCube()) == false) + return false; + + return true; + } + + void debugPrint() { + std::cout << "Subdomains\n"; + for (size_t p = 0; p < sub_domains.size(); p++) { + std::cout << ::SpaceBox<dim, T>(sub_domains.get(p)).toString() << "\n"; + } + + std::cout << "External ghost box\n"; + + for (size_t p = 0; p<nn_prcs < dim, T>::getNNProcessors(); p++) { + for (size_t i = 0; i<ie_ghost < dim, T>::getProcessorNEGhost(p); i++) { + std::cout << ie_ghost<dim, T>::getProcessorEGhostBox(p, i).toString() << " prc=" << nn_prcs<dim, T>::IDtoProc(p) + << " id=" << ie_ghost<dim, T>::getProcessorEGhostId(p, i) << "\n"; + } + } + + std::cout << "Internal ghost box\n"; + + for (size_t p = 0; p<nn_prcs < dim, T>::getNNProcessors(); p++) { + for (size_t i = 0; i<ie_ghost < dim, T>::getProcessorNIGhost(p); i++) { + std::cout << ie_ghost<dim, T>::getProcessorIGhostBox(p, i).toString() << " prc=" << nn_prcs<dim, T>::IDtoProc(p) + << " id=" << ie_ghost<dim, T>::getProcessorIGhostId(p, i) << "\n"; + } + } + } + +}; + +#endif diff --git a/src/Decomposition/BasicDecomposition_unit_test.hpp b/src/Decomposition/BasicDecomposition_unit_test.hpp new file mode 100644 index 000000000..9ae1a7d94 --- /dev/null +++ b/src/Decomposition/BasicDecomposition_unit_test.hpp @@ -0,0 +1,83 @@ +#ifndef BASICDECOMPOSITION_UNIT_TEST_HPP +#define BASICDECOMPOSITION_UNIT_TEST_HPP + +#include "BasicDecomposition.hpp" +#include "util/mathutil.hpp" + +BOOST_AUTO_TEST_SUITE( BasicDecomposition_test ) + +#define SUB_UNIT_FACTOR 1024 + +BOOST_AUTO_TEST_CASE( BasicDecomposition_test_use) +{ + // Vcluster + Vcluster & vcl = *global_v_cluster; + + // Initialize the global VCluster + init_global_v_cluster(&boost::unit_test::framework::master_test_suite().argc,&boost::unit_test::framework::master_test_suite().argv); + + //! [Create CartDecomposition] + BasicDecomposition<3,float> dec(vcl); + + // Physical domain + Box<3,float> box({0.0,0.0,0.0},{1.0,1.0,1.0}); + size_t div[3]; + + // Get the number of processor and calculate the number of sub-domain + // for each processor (SUB_UNIT_FACTOR=64) + size_t n_proc = vcl.getProcessingUnits(); + size_t n_sub = n_proc * SUB_UNIT_FACTOR; + + // Set the number of sub-domains on each dimension (in a scalable way) + for (int i = 0 ; i < 3 ; i++) + {div[i] = openfpm::math::round_big_2(pow(n_sub,1.0/3));} + + // Define ghost + Ghost<3,float> g(0.01); + + // Decompose + dec.setParameters(div,box,g); + + // create a ghost border + dec.calculateGhostBoxes(); + + //! [Create BasicDecomposition] + + // For each calculated ghost box + for (size_t i = 0 ; i < dec.getNIGhostBox() ; i++) + { + SpaceBox<3,float> b = dec.getIGhostBox(i); + size_t proc = dec.getIGhostBoxProcessor(i); + + // sample one point inside the box + Point<3,float> p = b.rnd(); + + // Check that ghost_processorsID return that processor number + const openfpm::vector<size_t> & pr = dec.template ghost_processorID<BasicDecomposition<3,float>::processor_id>(p); + + bool found = false; + + for (size_t j = 0; j < pr.size() ; j++) + { + if (pr.get(j) == proc) + {found = true; break;} + } + + if (found == false) + { + const openfpm::vector<size_t> pr3 = dec.template ghost_processorID<BasicDecomposition<3,float>::processor_id>(p); + } + + BOOST_REQUIRE_EQUAL(found,true); + } + + // Check the consistency + + bool val = dec.check_consistency(); + BOOST_REQUIRE_EQUAL(val,true); +} + +BOOST_AUTO_TEST_SUITE_END() + + +#endif diff --git a/src/parmetis_util.hpp b/src/parmetis_util.hpp new file mode 100644 index 000000000..83bf2bca1 --- /dev/null +++ b/src/parmetis_util.hpp @@ -0,0 +1,486 @@ +/* + * parmetis_util.hpp + * + * Created on: Oct 07, 2015 + * Author: Antonio Leo + */ + +#ifndef PARMETIS_UTIL_HPP +#define PARMETIS_UTIL_HPP + +#include <iostream> +#include "parmetis.h" +#include "VTKWriter.hpp" + +/*! \brief Metis graph structure + * + * Metis graph structure + * + */ +struct Parmetis_graph { + //! The number of vertices in the graph + idx_t * nvtxs; + + //! number of balancing constrains + //! more practical, are the number of weights for each vertex + //! PS even we you specify vwgt == NULL ncon must be set at leat to + //! one + idx_t * ncon; + + //! For each vertex it store the adjacency lost start for the vertex i + idx_t * xadj; + + //! For each vertex it store a list of all neighborhood vertex + idx_t * adjncy; + + //! Array that store the weight for each vertex + idx_t * vwgt; + + //! Array of the vertex size, basically is the total communication amount + idx_t * vsize; + + //! The weight of the edge + idx_t * adjwgt; + + //! number of part to partition the graph + idx_t * nparts; + + //! Desired weight for each partition (one for each constrain) + real_t * tpwgts; + + //! For each partition load imbalance tollerated + real_t * ubvec; + + //! Additional option for the graph partitioning + idx_t * options; + + //! return the total comunication cost for each partition + idx_t * objval; + + //! Is a output vector containing the partition for each vertex + idx_t * part; + + //! Upon successful completion, the number of edges that are cut by the partitioning is written to this parameter. + idx_t * edgecut; + + //! This parameter describes the ratio of inter-processor communication time compared to data redistri- bution time. It should be set between 0.000001 and 1000000.0. If ITR is set high, a repartitioning with a low edge-cut will be computed. If it is set low, a repartitioning that requires little data redistri- bution will be computed. Good values for this parameter can be obtained by dividing inter-processor communication time by data redistribution time. Otherwise, a value of 1000.0 is recommended. + real_t * itr; + + //! This is used to indicate the numbering scheme that is used for the vtxdist, xadj, adjncy, and part arrays. (0 for C-style, start from 0 index) + idx_t * numflag; + + //! This is used to indicate if the graph is weighted. wgtflag can take one of four values: + // 0 No weights (vwgt and adjwgt are both NULL). + // 1 Weights on the edges only (vwgt is NULL). + // 2 Weights on the vertices only (adjwgt is NULL). + // 3 Weights on both the vertices and edges. + idx_t * wgtflag; +}; + +//! Balance communication and computation +#define BALANCE_CC 1 +//! Balance communication computation and memory +#define BALANCE_CCM 2 +//! Balance computation and comunication and others +#define BALANCE_CC_O(c) c+1 + +/*! \brief Helper class to define Metis graph + * + * \tparam graph structure that store the graph + * + */ +template<typename Graph> +class Parmetis { + // Graph in metis reppresentation + Parmetis_graph Mg; + + // Original graph + Graph & g; + + // Communticator for OpenMPI + MPI_Comm comm = NULL; + + // Process rank information + int MPI_PROC_ID = 0; + + /*! \brief Construct Adjacency list + * + * \param g Reference graph to get informations + * + */ + void constructAdjList(Graph &refGraph) { + // create xadj and adjlist + Mg.vwgt = new idx_t[g.getNVertex()]; + Mg.xadj = new idx_t[g.getNVertex() + 1]; + Mg.adjncy = new idx_t[g.getNEdge()]; + + //! starting point in the adjacency list + size_t prev = 0; + + // actual position + size_t id = 0; + + // property id + size_t real_id; + + // boolan to check if ref is the main graph + bool main = refGraph.getNVertex() != g.getNVertex(); + + // for each vertex calculate the position of the starting point in the adjacency list + for (size_t i = 0; i < g.getNVertex(); i++) { + // Add weight to vertex + Mg.vwgt[i] = g.vertex(i).template get<nm_v::computation>(); + + // Calculate the starting point in the adjacency list + Mg.xadj[id] = prev; + + if (main) + real_id = g.get_real_id(g.vertex(i).template get<nm_v::id>()); + else + real_id = i; + + // Create the adjacency list and the weights for edges + for (size_t s = 0; s < refGraph.getNChilds(real_id); s++) { + size_t child = refGraph.getChild(real_id, s); + + if (main) + Mg.adjncy[prev + s] = refGraph.vertex(child).template get<nm_v::id>(); + else + Mg.adjncy[prev + s] = child; + } + + // update the position for the next vertex + prev += refGraph.getNChilds(real_id); + + id++; + } + + // Fill the last point + Mg.xadj[id] = prev; + + /* + std::cout << MPI_PROC_ID << "\n"; + for(int i=0; i<= g.getNVertex();i++){ + std::cout << Mg.xadj[i] << " "; + } + std::cout << "\n\n"; + for(int i=0; i< g.getNEdge();i++){ + std::cout << Mg.adjncy[i] << " "; + } + std::cout << "\n\n"; + */ + + } + + /*! \brief Construct Adjacency list + * + * \param g Reference graph to get informations + * + */ + /* + void constructAdjList(Graph &refGraph, idx_t* &old_vtxdist ) { + // create xadj and adjlist + Mg.vwgt = new idx_t[g.getNVertex()]; + Mg.xadj = new idx_t[g.getNVertex() + 1]; + Mg.adjncy = new idx_t[g.getNEdge()]; + + //! starting point in the adjacency list + size_t prev = 0; + + // actual position + size_t id = 0; + + // for each vertex calculate the position of the starting point in the adjacency list + for (size_t i = 0; i < g.getNVertex(); i++) { + + // Add weight to vertex + Mg.vwgt[i] = g.vertex(i).template get<nm_v::computation>(); + + // Calculate the starting point in the adjacency list + Mg.xadj[id] = prev; + + // Create the adjacency list and the weights for edges + for (size_t s = 0; s < refGraph.getNChilds(i); s++) { + + size_t child = refGraph.getChild(i, s); + + // Check if child is not in this processor + if(child > old_vtxdist[MPI_PROC_ID+1] || child < old_vtxdist[MPI_PROC_ID]) + + Mg.adjncy[prev + s] = child; + } + + // update the position for the next vertex + prev += refGraph.getNChilds(i); + + id++; + } + + // Fill the last point + Mg.xadj[id] = prev; + + + std::cout << MPI_PROC_ID << "\n"; + for(int i=0; i<= g.getNVertex();i++){ + std::cout << Mg.xadj[i] << " "; + } + std::cout << "\n\n"; + for(int i=0; i< g.getNEdge();i++){ + std::cout << Mg.adjncy[i] << " "; + } + std::cout << "\n\n"; + + + } + */ +public: + + /*! \brief Constructor + * + * Construct a metis graph from Graph_CSR + * + * \param g Graph we want to convert to decompose + * \param nc number of partitions + * + */ + Parmetis(Graph & g, size_t nc) : + g(g) { + // Init OpenMPI structures + + MPI_Comm_dup(MPI_COMM_WORLD, &comm); + MPI_Comm_rank(MPI_COMM_WORLD, &MPI_PROC_ID); + + // Get the number of vertex + + Mg.nvtxs = new idx_t[1]; + Mg.nvtxs[0] = g.getNVertex(); + + // Set the number of constrains + + Mg.ncon = new idx_t[1]; + Mg.ncon[0] = 1; + + // Set to null the weight of the vertex + + Mg.vwgt = NULL; + + // construct the adjacency list + + constructAdjList(g); + + // Put the total communication size to NULL + + Mg.vsize = NULL; + + // Set to null the weight of the edge + + Mg.adjwgt = NULL; + + // Set the total number of partitions + + Mg.nparts = new idx_t[1]; + Mg.nparts[0] = nc; + + //! Set option for the graph partitioning (set as default) + + Mg.options = new idx_t[4]; + Mg.options[0] = 1; + Mg.options[1] = 3; + Mg.options[2] = 0; + Mg.options[3] = 0; + + //! is an output vector containing the partition for each vertex + + Mg.part = new idx_t[g.getNVertex()]; + for (int i = 0; i < g.getNVertex(); i++) + Mg.part[i] = MPI_PROC_ID; + + //! adaptiveRepart itr value + Mg.itr = new real_t[1]; + Mg.itr[0] = 1000.0; + + //! init tpwgts to have balanced vertices and ubvec + + Mg.tpwgts = new real_t[Mg.nparts[0]]; + Mg.ubvec = new real_t[Mg.nparts[0]]; + + for (int s = 0; s < Mg.nparts[0]; s++) { + Mg.tpwgts[s] = 1.0 / Mg.nparts[0]; + Mg.ubvec[s] = 1.05; + } + + Mg.edgecut = new idx_t[1]; + Mg.edgecut[0] = 0; + + //! This is used to indicate the numbering scheme that is used for the vtxdist, xadj, adjncy, and part arrays. (0 for C-style, start from 0 index) + Mg.numflag = new idx_t[1]; + Mg.numflag[0] = 0; + + //! This is used to indicate if the graph is weighted. wgtflag can take one of four values: + Mg.wgtflag = new idx_t[1]; + Mg.wgtflag[0] = 2; + } + + //TODO deconstruct new variables + /*! \brief destructor + * + * Destructor, It destroy all the memory allocated + * + */ + ~Parmetis() { + // Deallocate the Mg structure + if (Mg.nvtxs != NULL) { + delete[] Mg.nvtxs; + } + + if (Mg.ncon != NULL) { + delete[] Mg.ncon; + } + + if (Mg.xadj != NULL) { + delete[] Mg.xadj; + } + + if (Mg.adjncy != NULL) { + delete[] Mg.adjncy; + } + + if (Mg.vwgt != NULL) { + delete[] Mg.vwgt; + } + + if (Mg.adjwgt != NULL) { + delete[] Mg.adjwgt; + } + + if (Mg.nparts != NULL) { + delete[] Mg.nparts; + } + + if (Mg.tpwgts != NULL) { + delete[] Mg.tpwgts; + } + + if (Mg.ubvec != NULL) { + delete[] Mg.ubvec; + } + + if (Mg.options != NULL) { + delete[] Mg.options; + } + + if (Mg.part != NULL) { + delete[] Mg.part; + } + + if (Mg.edgecut != NULL) { + delete[] Mg.edgecut; + } + + if (Mg.numflag != NULL) { + delete[] Mg.numflag; + } + + if (Mg.wgtflag != NULL) { + delete[] Mg.wgtflag; + } + } + + /*! \brief Decompose the graph + * + * \tparam i which property store the decomposition + * + */ + + template<unsigned int i> + void decompose(idx_t *vtxdist) { + + // Decompose + ParMETIS_V3_PartKway(vtxdist, Mg.xadj, Mg.adjncy, Mg.vwgt, Mg.adjwgt, Mg.wgtflag, Mg.numflag, Mg.ncon, Mg.nparts, Mg.tpwgts, + Mg.ubvec, Mg.options, Mg.edgecut, Mg.part, &comm); + /* + ParMETIS_V3_AdaptiveRepart( vtxdist, Mg.xadj,Mg.adjncy,Mg.vwgt,Mg.vsize,Mg.adjwgt, &(Mg.wgtflag), &(Mg.numflag), + Mg.ncon, Mg.nparts, Mg.tpwgts, Mg.ubvec, &(Mg.itr), Mg.options, &(Mg.edgecut), + Mg.part, &comm ); + */ + + // For each vertex store the processor that contain the data + for (size_t j = 0, id = 0; j < g.getNVertex(); j++, id++) { + + g.vertex(j).template get<i>() = Mg.part[id]; + + } + + } + + /*! \brief Refine the graph + * + * \tparam i which property store the refined decomposition + * + */ + + template<unsigned int i> + void refine(idx_t *vtxdist) { + + // Refine + + ParMETIS_V3_RefineKway(vtxdist, Mg.xadj, Mg.adjncy, Mg.vwgt, Mg.adjwgt, Mg.wgtflag, Mg.numflag, Mg.ncon, Mg.nparts, Mg.tpwgts, + Mg.ubvec, Mg.options, Mg.edgecut, Mg.part, &comm); + + // For each vertex store the processor that contain the data + + for (size_t j = 0, id = 0; j < g.getNVertex(); j++, id++) { + + g.vertex(j).template get<i>() = Mg.part[id]; + + } + } + + /*! \brief Get graph partition vector + * + */ + idx_t* getPartition() { + return Mg.part; + } + + /*! \brief Reset graph and reconstruct it + * + */ + void reset(Graph & mainGraph) { + // Deallocate the graph structures + + if (Mg.xadj != NULL) { + delete[] Mg.xadj; + } + + if (Mg.adjncy != NULL) { + delete[] Mg.adjncy; + } + + if (Mg.vwgt != NULL) { + delete[] Mg.vwgt; + } + + if (Mg.adjwgt != NULL) { + delete[] Mg.adjwgt; + } + + if (Mg.part != NULL) { + delete[] Mg.part; + } + + Mg.nvtxs[0] = g.getNVertex(); + + Mg.part = new idx_t[g.getNVertex()]; + + for (int i = 0; i < g.getNVertex(); i++) + Mg.part[i] = MPI_PROC_ID; + + // construct the adjacency list + constructAdjList(mainGraph); + + } + +}; + +#endif -- GitLab