From 66bf135067d6810040c5e8bf679477b53c79e30e Mon Sep 17 00:00:00 2001 From: tonynsyde <antonio.leo.polito@gmail.com> Date: Wed, 9 Mar 2016 17:09:32 +0100 Subject: [PATCH] Parmetis test with DLB and vector_dist, random walk --- src/DLB/DLB.hpp | 2 +- src/Decomposition/CartDecomposition.hpp | 59 ++- .../CartDecomposition_unit_test.hpp | 82 ++- .../Distribution/DistParMetisDistribution.hpp | 85 ++-- .../Distribution/Distribution_unit_tests.hpp | 232 +++++++++ .../Distribution/ParMetisDistribution.hpp | 24 +- .../Distribution/parmetis_util.hpp | 46 +- src/Decomposition/ie_ghost.hpp | 15 + src/Decomposition/ie_loc_ghost.hpp | 9 + src/Decomposition/nn_processor.hpp | 14 + src/Graph/dist_map_graph.hpp | 99 ++-- src/Graph/dist_map_graph_unit_test.hpp | 156 ++++-- src/Grid/grid_dist_id.hpp | 3 - src/Vector/vector_dist.hpp | 465 ++++++++++-------- 14 files changed, 866 insertions(+), 425 deletions(-) diff --git a/src/DLB/DLB.hpp b/src/DLB/DLB.hpp index 848825d7..e7e79552 100644 --- a/src/DLB/DLB.hpp +++ b/src/DLB/DLB.hpp @@ -52,7 +52,7 @@ public: //! Level of un-balance needed to trigger the re-balance enum ThresholdLevel { - THRLD_LOW = 5, THRLD_MEDIUM = 7, THRLD_HIGH = 10 + THRLD_LOW = 10, THRLD_MEDIUM = 20, THRLD_HIGH = 30 }; private: diff --git a/src/Decomposition/CartDecomposition.hpp b/src/Decomposition/CartDecomposition.hpp index ec3bbf34..db0d57fa 100755 --- a/src/Decomposition/CartDecomposition.hpp +++ b/src/Decomposition/CartDecomposition.hpp @@ -143,9 +143,6 @@ private: // Heap memory receiver HeapMemory hp_recv; - // vector v_proc - openfpm::vector<size_t> v_proc; - // Receive counter size_t recv_cnt; @@ -345,11 +342,11 @@ private: for (size_t i = 0; i < dist.getNSubSubDomains(); i++) { - dist.setMigrationCost(i, norm * migration * dist.getVertexWeight(i)); + dist.setMigrationCost(i, norm * migration * dist.getSubSubDomainComputationCost(i)); for (size_t s = 0; s < dist.getNSubSubDomainNeighbors(i); s++) { - dist.setCommunicationCost(i, s, 1 * dist.getVertexWeight(i) * ts); + dist.setCommunicationCost(i, s, 1 * dist.getSubSubDomainComputationCost(i) * ts); } prev += dist.getNSubSubDomainNeighbors(i); } @@ -1169,26 +1166,46 @@ public: } + void reset() + { + sub_domains.clear(); + box_nn_processor.clear(); + fine_s.clear(); + nn_prcs<dim, T>::reset(); + ie_ghost<dim, T>::reset(); + ie_loc_ghost<dim, T>::reset(); + } + /*! \brief Start decomposition * */ void decompose() { + reset(); + computeCommunicationAndMigrationCosts(1); dist.decompose(); createSubdomains(v_cl,bc); + + calculateGhostBoxes(); } /*! \brief Refine the decomposition, available only for ParMetis distribution, for Metis it is a null call * */ - void rebalance() + void rebalance(size_t ts) { - computeCommunicationAndMigrationCosts(1); + reset(); + + computeCommunicationAndMigrationCosts(ts); dist.refine(); + + createSubdomains(v_cl,bc); + + calculateGhostBoxes(); } /*! \brief Refine the decomposition, available only for ParMetis distribution, for Metis it is a null call @@ -1210,8 +1227,8 @@ public: if (dlb.rebalanceNeeded()) { - computeCommunicationAndMigrationCosts(dlb.getNTimeStepSinceDLB()); - dist.refine(); + rebalance(dlb.getNTimeStepSinceDLB()); + return true; } return false; @@ -1246,9 +1263,10 @@ public: dist.getSubSubDomainPosition(id, pos); } + //TODO fix in Parmetis distribution to get only the right amount of vertices /*! \brief Get the number of sub-sub-domains in this sub-graph * - * @return number of sub-sub-domains in this sub-graph + * \return number of sub-sub-domains in this sub-graph */ size_t getNSubSubDomains() { @@ -1437,6 +1455,15 @@ public: return ghost; } + /*! \brief Method to access to the grid information of the decomposition + * + * \return the grid + */ + const grid_sm<dim,void> getGrid() + { + return gr; + } + ////////////// Functions to get decomposition information /////////////// /*! \brief Write the decomposition as VTK file @@ -1620,6 +1647,18 @@ public: { return dist; } + + /*! \brief Add computation cost i to the subsubdomain with global id gid + * + * \param gid global id of the subsubdomain to update + * \param i Cost increment + */ + inline void addComputationCost(size_t gid, size_t i) + { + size_t c = dist.getSubSubDomainComputationCost(gid); + + dist.setComputationCost(gid, c + i); + } }; diff --git a/src/Decomposition/CartDecomposition_unit_test.hpp b/src/Decomposition/CartDecomposition_unit_test.hpp index a7cbb140..63f1c016 100755 --- a/src/Decomposition/CartDecomposition_unit_test.hpp +++ b/src/Decomposition/CartDecomposition_unit_test.hpp @@ -4,7 +4,7 @@ #include "CartDecomposition.hpp" #include "util/mathutil.hpp" -BOOST_AUTO_TEST_SUITE( CartDecomposition_test ) +BOOST_AUTO_TEST_SUITE (CartDecomposition_test) #define SUB_UNIT_FACTOR 1024 @@ -58,7 +58,7 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_test_2D ) Vcluster & vcl = *global_v_cluster; // non-periodic boundary condition - size_t bc[2] = {NON_PERIODIC,NON_PERIODIC}; + size_t bc[2] = { NON_PERIODIC, NON_PERIODIC }; // Initialize the global VCluster init_global_v_cluster(&boost::unit_test::framework::master_test_suite().argc,&boost::unit_test::framework::master_test_suite().argv); @@ -133,16 +133,13 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_test_2D ) dlb.endIteration(); - dec.rebalance(dlb); + if(dec.rebalance(dlb)) + dec.write("DLB_test_graph_cart_" + std::to_string(i+1) + "_"); std::stringstream str; str << "DLB_test_graph_" << i + 1 << ".vtk"; dec.getDistribution().write(str.str()); - - } - // create a ghost border - dec.calculateGhostBoxes(); // For each calculated ghost box for (size_t i = 0; i < dec.getNIGhostBox(); i++) @@ -184,7 +181,7 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_test_2D_sar) Vcluster & vcl = *global_v_cluster; // non-periodic boundary condition - size_t bc[2] = {NON_PERIODIC,NON_PERIODIC}; + size_t bc[2] = { NON_PERIODIC, NON_PERIODIC }; // Initialize the global VCluster init_global_v_cluster(&boost::unit_test::framework::master_test_suite().argc,&boost::unit_test::framework::master_test_suite().argv); @@ -264,16 +261,16 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_test_2D_sar) dlb.endIteration(); - dec.rebalance(dlb); + if(dec.rebalance(dlb)) + { + dec.write("DLB_test_graph_cart_" + std::to_string(i) + "_"); + } std::stringstream str; str << "DLB_test_graph_" << i << ".vtk"; dec.getDistribution().write(str.str()); } - // create a ghost border - dec.calculateGhostBoxes(); - // For each calculated ghost box for (size_t i = 0; i < dec.getNIGhostBox(); i++) { @@ -314,7 +311,7 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_test_3D) Vcluster & vcl = *global_v_cluster; // non-periodic boundary condition - size_t bc[3] = {NON_PERIODIC,NON_PERIODIC,NON_PERIODIC}; + size_t bc[3] = { NON_PERIODIC, NON_PERIODIC, NON_PERIODIC }; // Initialize the global VCluster init_global_v_cluster(&boost::unit_test::framework::master_test_suite().argc,&boost::unit_test::framework::master_test_suite().argv); @@ -398,9 +395,6 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_test_3D) dec.getDistribution().write(str.str()); } - // create a ghost border - dec.calculateGhostBoxes(); - // For each calculated ghost box for (size_t i = 0; i < dec.getNIGhostBox(); i++) { @@ -444,10 +438,10 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_non_periodic_test) init_global_v_cluster(&boost::unit_test::framework::master_test_suite().argc,&boost::unit_test::framework::master_test_suite().argv); //! [Create CartDecomposition] - CartDecomposition<3,float> dec(vcl); + CartDecomposition<3, float> dec(vcl); // Physical domain - Box<3,float> box({0.0,0.0,0.0},{1.0,1.0,1.0}); + Box<3, float> box( { 0.0, 0.0, 0.0 }, { 1.0, 1.0, 1.0 }); size_t div[3]; // Get the number of processor and calculate the number of sub-domain @@ -456,26 +450,23 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_non_periodic_test) size_t n_sub = n_proc * SUB_UNIT_FACTOR; // Set the number of sub-domains on each dimension (in a scalable way) - for (int i = 0 ; i < 3 ; i++) - {div[i] = openfpm::math::round_big_2(pow(n_sub,1.0/3));} + for (int i = 0; i < 3; i++) + { div[i] = openfpm::math::round_big_2(pow(n_sub,1.0/3));} // Define ghost - Ghost<3,float> g(0.01); + Ghost<3, float> g(0.01); // Boundary conditions - size_t bc[] = {NON_PERIODIC,NON_PERIODIC,NON_PERIODIC}; + size_t bc[] = { NON_PERIODIC, NON_PERIODIC, NON_PERIODIC }; // Decompose dec.setParameters(div,box,bc,g); dec.decompose(); - // create a ghost border - dec.calculateGhostBoxes(); - //! [Create CartDecomposition] // For each calculated ghost box - for (size_t i = 0 ; i < dec.getNIGhostBox() ; i++) + for (size_t i = 0; i < dec.getNIGhostBox(); i++) { SpaceBox<3,float> b = dec.getIGhostBox(i); size_t proc = dec.getIGhostBoxProcessor(i); @@ -488,10 +479,10 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_non_periodic_test) bool found = false; - for (size_t j = 0; j < pr.size() ; j++) + for (size_t j = 0; j < pr.size(); j++) { if (pr.get(j) == proc) - {found = true; break;} + { found = true; break;} } if (found == false) @@ -508,7 +499,7 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_non_periodic_test) BOOST_REQUIRE_EQUAL(val,true); // We duplicate the decomposition - CartDecomposition<3,float> dec2 = dec.duplicate(); + CartDecomposition<3, float> dec2 = dec.duplicate(); dec2.check_consistency(); // check that dec and dec2 contain the same information @@ -520,10 +511,10 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_non_periodic_test) // We duplicate the decomposition redefining the ghost // Define ghost - Ghost<3,float> g3(0.005); + Ghost<3, float> g3(0.005); // We duplicate the decomposition redefining the ghost - CartDecomposition<3,float> dec3 = dec.duplicate(g3); + CartDecomposition<3, float> dec3 = dec.duplicate(g3); ret = dec3.check_consistency(); BOOST_REQUIRE_EQUAL(ret,true); @@ -533,7 +524,6 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_non_periodic_test) BOOST_REQUIRE_EQUAL(ret,true); } - BOOST_AUTO_TEST_CASE( CartDecomposition_periodic_test) { // Vcluster @@ -543,10 +533,10 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_periodic_test) init_global_v_cluster(&boost::unit_test::framework::master_test_suite().argc,&boost::unit_test::framework::master_test_suite().argv); //! [Create CartDecomposition] - CartDecomposition<3,float> dec(vcl); + CartDecomposition<3, float> dec(vcl); // Physical domain - Box<3,float> box({0.0,0.0,0.0},{1.0,1.0,1.0}); + Box<3, float> box( { 0.0, 0.0, 0.0 }, { 1.0, 1.0, 1.0 }); size_t div[3]; // Get the number of processor and calculate the number of sub-domain @@ -555,26 +545,23 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_periodic_test) size_t n_sub = n_proc * SUB_UNIT_FACTOR; // Set the number of sub-domains on each dimension (in a scalable way) - for (int i = 0 ; i < 3 ; i++) - {div[i] = openfpm::math::round_big_2(pow(n_sub,1.0/3));} + for (int i = 0; i < 3; i++) + { div[i] = openfpm::math::round_big_2(pow(n_sub,1.0/3));} // Define ghost - Ghost<3,float> g(0.01); + Ghost<3, float> g(0.01); // Boundary conditions - size_t bc[] = {PERIODIC,PERIODIC,PERIODIC}; + size_t bc[] = { PERIODIC, PERIODIC, PERIODIC }; // Decompose dec.setParameters(div,box,bc,g); dec.decompose(); - // create a ghost border - dec.calculateGhostBoxes(); - //! [Create CartDecomposition] // For each calculated ghost box - for (size_t i = 0 ; i < dec.getNIGhostBox() ; i++) + for (size_t i = 0; i < dec.getNIGhostBox(); i++) { SpaceBox<3,float> b = dec.getIGhostBox(i); size_t proc = dec.getIGhostBoxProcessor(i); @@ -587,10 +574,10 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_periodic_test) bool found = false; - for (size_t j = 0; j < pr.size() ; j++) + for (size_t j = 0; j < pr.size(); j++) { if (pr.get(j) == proc) - {found = true; break;} + { found = true; break;} } if (found == false) @@ -606,7 +593,7 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_periodic_test) BOOST_REQUIRE_EQUAL(val,true); // We duplicate the decomposition - CartDecomposition<3,float> dec2 = dec.duplicate(); + CartDecomposition<3, float> dec2 = dec.duplicate(); dec2.check_consistency(); bool ret = dec.is_equal(dec2); @@ -619,10 +606,10 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_periodic_test) // We duplicate the decomposition redefining the ghost // Define ghost - Ghost<3,float> g3(0.005); + Ghost<3, float> g3(0.005); // We duplicate the decomposition refefining the ghost - CartDecomposition<3,float> dec3 = dec.duplicate(g3); + CartDecomposition<3, float> dec3 = dec.duplicate(g3); ret = dec3.check_consistency(); BOOST_REQUIRE_EQUAL(ret,true); @@ -634,5 +621,4 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_periodic_test) BOOST_AUTO_TEST_SUITE_END() - #endif diff --git a/src/Decomposition/Distribution/DistParMetisDistribution.hpp b/src/Decomposition/Distribution/DistParMetisDistribution.hpp index 92122fa6..5b08b345 100644 --- a/src/Decomposition/Distribution/DistParMetisDistribution.hpp +++ b/src/Decomposition/Distribution/DistParMetisDistribution.hpp @@ -5,12 +5,13 @@ * Author: Antonio Leo */ +#ifndef SRC_DECOMPOSITION_DISTPARMETISDISTRIBUTION_HPP_ +#define SRC_DECOMPOSITION_DISTPARMETISDISTRIBUTION_HPP_ + #include "SubdomainGraphNodes.hpp" #include "parmetis_dist_util.hpp" #include "Graph/dist_map_graph.hpp" #include "Graph/DistGraphFactory.hpp" -#ifndef SRC_DECOMPOSITION_DISTPARMETISDISTRIBUTION_HPP_ -#define SRC_DECOMPOSITION_DISTPARMETISDISTRIBUTION_HPP_ template<unsigned int dim, typename T> class DistParMetisDistribution @@ -25,7 +26,7 @@ class DistParMetisDistribution Box<dim, T> domain; //! Processor sub-sub-domain graph - DistGraph_CSR<nm_v, nm_e> sub_g; + DistGraph_CSR<nm_v, nm_e> g; //! Convert the graph to parmetis format DistParmetis<DistGraph_CSR<nm_v, nm_e>> parmetis_graph; @@ -91,12 +92,12 @@ public: //! Create sub graph DistGraphFactory<dim, DistGraph_CSR<nm_v, nm_e>> dist_g_factory; - sub_g = dist_g_factory.template construct<NO_EDGE, T, dim - 1, 0, 1, 2>(gr.getSize(), domain); - sub_g.getDecompositionVector(vtxdist); + g = dist_g_factory.template construct<NO_EDGE, T, dim - 1, 0, 1, 2>(gr.getSize(), domain); + g.getDecompositionVector(vtxdist); if (dim == 2) - for (size_t i = 0; i < sub_g.getNVertex(); i++) - sub_g.vertex(i).template get<nm_v::x>()[2] = 0; + for (size_t i = 0; i < g.getNVertex(); i++) + g.vertex(i).template get<nm_v::x>()[2] = 0; } @@ -105,7 +106,7 @@ public: */ DistGraph_CSR<nm_v, nm_e> & getGraph() { - return sub_g; + return g; } /*! \brief Create first decomposition, it divides the graph in slices and give each slice to a processor @@ -114,20 +115,20 @@ public: void decompose() { //! Init sub graph in parmetis format - parmetis_graph.initSubGraph(sub_g); + parmetis_graph.initSubGraph(g); //! Decompose - parmetis_graph.decompose<nm_v::proc_id>(sub_g); + parmetis_graph.decompose<nm_v::proc_id>(g); //! Get result partition for this processors idx_t *partition = parmetis_graph.getPartition(); - for (size_t i = 0, j = sub_g.firstId(); i < sub_g.getNVertex() && j <= sub_g.lastId(); i++, j++) + for (size_t i = 0, j = g.firstId(); i < g.getNVertex() && j <= g.lastId(); i++, j++) { if (partition[i] != v_cl.getProcessUnitID()) - sub_g.q_move(sub_g.nodeById(j), partition[i]); + g.q_move(g.nodeById(j), partition[i]); } - sub_g.redistribute(); + g.redistribute(); } /*! \brief Refine current decomposition @@ -139,20 +140,20 @@ public: void refine() { //! Reset parmetis graph and reconstruct it - parmetis_graph.reset(sub_g); + parmetis_graph.reset(g); //! Refine - parmetis_graph.refine<nm_v::proc_id>(sub_g); + parmetis_graph.refine<nm_v::proc_id>(g); //! Get result partition for this processors idx_t *partition = parmetis_graph.getPartition(); - for (size_t i = 0, j = sub_g.firstId(); i < sub_g.getNVertex() && j <= sub_g.lastId(); i++, j++) + for (size_t i = 0, j = g.firstId(); i < g.getNVertex() && j <= g.lastId(); i++, j++) { if (partition[i] != v_cl.getProcessUnitID()) - sub_g.q_move(sub_g.nodeById(j), partition[i]); + g.q_move(g.nodeById(j), partition[i]); } - sub_g.redistribute(); + g.redistribute(); } /*! \brief Compute the unbalance value @@ -188,13 +189,13 @@ public: */ void getSubSubDomainPosition(size_t id, T (&pos)[dim]) { - if (id >= sub_g.getNVertex()) - std::cerr << "Position - Such vertex doesn't exist (id = " << id << ", " << "total size = " << sub_g.getNVertex() << ")\n"; + if (id >= g.getNVertex()) + std::cerr << "Position - Such vertex doesn't exist (id = " << id << ", " << "total size = " << g.getNVertex() << ")\n"; - pos[0] = sub_g.vertex(id).template get<nm_v::x>()[0]; - pos[1] = sub_g.vertex(id).template get<nm_v::x>()[1]; + pos[0] = g.vertex(id).template get<nm_v::x>()[0]; + pos[1] = g.vertex(id).template get<nm_v::x>()[1]; if (dim == 3) - pos[2] = sub_g.vertex(id).template get<nm_v::x>()[2]; + pos[2] = g.vertex(id).template get<nm_v::x>()[2]; } /*! \brief Function that set the weight of the vertex @@ -207,11 +208,11 @@ public: { verticesGotWeights = true; - if (id >= sub_g.getNVertex()) - std::cerr << "Weight - Such vertex doesn't exist (id = " << id << ", " << "total size = " << sub_g.getNVertex() << ")\n"; + if (id >= g.getNVertex()) + std::cerr << "Weight - Such vertex doesn't exist (id = " << id << ", " << "total size = " << g.getNVertex() << ")\n"; // If the vertex is inside this processor update the value - sub_g.vertex(id).template get<nm_v::computation>() = weight; + g.vertex(id).template get<nm_v::computation>() = weight; } @@ -231,10 +232,10 @@ public: */ size_t getVertexWeight(size_t id) { - if (id >= sub_g.getNVertex()) - std::cerr << "Such vertex doesn't exist (id = " << id << ", " << "total size = " << sub_g.getTotNVertex() << ")\n"; + if (id >= g.getNVertex()) + std::cerr << "Such vertex doesn't exist (id = " << id << ", " << "total size = " << g.getTotNVertex() << ")\n"; - return sub_g.vertex(id).template get<nm_v::computation>(); + return g.vertex(id).template get<nm_v::computation>(); } /*! \brief Compute the processor load counting the total weights of its vertices @@ -245,9 +246,9 @@ public: { size_t load = 0; - for (size_t i = 0; i < sub_g.getNVertex(); i++) + for (size_t i = 0; i < g.getNVertex(); i++) { - load += sub_g.vertex(i).template get<nm_v::computation>(); + load += g.vertex(i).template get<nm_v::computation>(); } return load; } @@ -283,10 +284,10 @@ public: */ void setMigrationCost(size_t id, size_t migration) { - if (id >= sub_g.getNVertex()) - std::cerr << "Migration - Such vertex doesn't exist (id = " << id << ", " << "total size = " << sub_g.getNVertex() << ")\n"; + if (id >= g.getNVertex()) + std::cerr << "Migration - Such vertex doesn't exist (id = " << id << ", " << "total size = " << g.getNVertex() << ")\n"; - sub_g.vertex(id).template get<nm_v::migration>() = migration; + g.vertex(id).template get<nm_v::migration>() = migration; } /*! \brief Set communication cost of the edge id @@ -297,7 +298,7 @@ public: */ void setCommunicationCost(size_t v_id, size_t e, size_t communication) { - sub_g.getChildEdge(v_id, e).template get<nm_e::communication>() = communication; + g.getChildEdge(v_id, e).template get<nm_e::communication>() = communication; } /*! \brief Returns total number of sub-sub-domains in the distribution graph @@ -305,7 +306,7 @@ public: */ size_t getNSubSubDomains() { - return sub_g.getNVertex(); + return g.getNVertex(); } /*! \brief Returns total number of neighbors of the sub-sub-domain id @@ -314,10 +315,10 @@ public: */ size_t getNSubSubDomainNeighbors(size_t id) { - if (id >= sub_g.getNVertex()) - std::cerr << "Neighbors - Such vertex doesn't exist (id = " << id << ", " << "total size = " << sub_g.getNVertex() << ")\n"; + if (id >= g.getNVertex()) + std::cerr << "Neighbors - Such vertex doesn't exist (id = " << id << ", " << "total size = " << g.getNVertex() << ")\n"; - return sub_g.getNChilds(id); + return g.getNChilds(id); } /*! \brief Print current graph and save it to file with name test_graph_[id] @@ -327,7 +328,7 @@ public: */ void write(const std::string & file) { - VTKWriter<DistGraph_CSR<nm_v, nm_e>, DIST_GRAPH> gv2(sub_g); + VTKWriter<DistGraph_CSR<nm_v, nm_e>, DIST_GRAPH> gv2(g); gv2.write(file); } @@ -336,7 +337,7 @@ public: v_cl = dist.v_cl; gr = dist.gr; domain = dist.domain; - sub_g = dist.sub_g; + g = dist.g; vtxdist = dist.vtxdist; partitions = dist.partitions; v_per_proc = dist.v_per_proc; @@ -350,7 +351,7 @@ public: v_cl = dist.v_cl; gr = dist.gr; domain = dist.domain; - sub_g.swap(dist.sub_g); + g.swap(dist.g); vtxdist.swap(dist.vtxdist); partitions.swap(dist.partitions); v_per_proc.swap(dist.v_per_proc); diff --git a/src/Decomposition/Distribution/Distribution_unit_tests.hpp b/src/Decomposition/Distribution/Distribution_unit_tests.hpp index 2ae41af6..e7a393df 100644 --- a/src/Decomposition/Distribution/Distribution_unit_tests.hpp +++ b/src/Decomposition/Distribution/Distribution_unit_tests.hpp @@ -316,6 +316,238 @@ BOOST_AUTO_TEST_CASE( DistParmetis_distribution_test) BOOST_REQUIRE_EQUAL(sizeof(DistParMetisDistribution<3,float>),1440ul); } +void print_test_v(std::string test, size_t sz) +{ + if (global_v_cluster->getProcessUnitID() == 0) + std::cout << test << " " << sz << "\n"; +} + +BOOST_AUTO_TEST_CASE( Parmetis_distribution_test_random_walk ) +{ + typedef Point<3,float> s; + + Vcluster & v_cl = *global_v_cluster; + + // set the seed + // create the random generator engine + std::srand(v_cl.getProcessUnitID()); + std::default_random_engine eg; + std::uniform_real_distribution<float> ud(0.0f, 1.0f); + + size_t nsz[] = { 0, 32, 4 }; + nsz[0] = 65536 * v_cl.getProcessingUnits(); + + print_test_v( "Testing 3D random walk vector k<=",nsz[0]); + + // 3D test + for (size_t i = 0; i < 3; i++ ) + { + size_t k = nsz[i]; + + BOOST_TEST_CHECKPOINT( "Testing 3D random walk k=" << k ); + + Box<3,float> box({0.0,0.0,0.0},{1.0,1.0,1.0}); + + // Grid info + grid_sm<3, void> info( { GS_SIZE, GS_SIZE, GS_SIZE }); + + // Boundary conditions + size_t bc[3] = { NON_PERIODIC,NON_PERIODIC,NON_PERIODIC }; + + // factor + float factor = pow(global_v_cluster->getProcessingUnits()/2.0f,1.0f/3.0f); + + // ghost + Ghost<3,float> ghost(0.01 / factor); + + // Distributed vector + vector_dist<3,float, Point_test<float>, CartDecomposition<3, float, HeapMemory, ParMetisDistribution<3, float>>> vd(k,box,bc,ghost); + + auto it = vd.getIterator(); + + while (it.isNext()) + { + auto key = it.get(); + + vd.template getPos<s::x>(key)[0] = ud(eg); + vd.template getPos<s::x>(key)[1] = ud(eg); + vd.template getPos<s::x>(key)[2] = ud(eg); + + ++it; + } + + vd.map(); + + vd.addComputationCosts(); + + vd.getDecomposition().getDistribution().write("parmetis_random_walk_" + std::to_string(0) + ".vtk"); + + // 10 step random walk + + for (size_t j = 0; j < 10; j++) + { + auto it = vd.getDomainIterator(); + + while (it.isNext()) + { + auto key = it.get(); + + vd.template getPos<s::x>(key)[0] += 0.01 * ud(eg); + vd.template getPos<s::x>(key)[1] += 0.01 * ud(eg); + vd.template getPos<s::x>(key)[2] += 0.01 * ud(eg); + + ++it; + } + + vd.map(); + + /////// Interactions /// + + //vd.ghost_get<>(); + //vd.getDomainIterator; + + //////////////////////// + + vd.addComputationCosts(); + + vd.getDecomposition().rebalance(10); + + vd.map(); + + vd.getDecomposition().getDistribution().write("parmetis_random_walk_" + std::to_string(j+1) + ".vtk"); + + size_t l = vd.size_local(); + v_cl.sum(l); + v_cl.execute(); + + // Count the local particles and check that the total number is consistent + size_t cnt = total_n_part_lc(vd,bc); + + //BOOST_REQUIRE_EQUAL((size_t)k,cnt); + } + } +} + +BOOST_AUTO_TEST_CASE( Parmetis_distribution_test_random_walk_2D ) +{ + typedef Point<2,float> s; + + Vcluster & v_cl = *global_v_cluster; + + // set the seed + // create the random generator engine + std::srand(v_cl.getProcessUnitID()); + std::default_random_engine eg; + std::uniform_real_distribution<float> ud(0.0f, 0.5f); + + size_t k = 100000; + + print_test_v( "Testing 2D random walk vector k<=",k); + + BOOST_TEST_CHECKPOINT( "Testing 2D random walk k=" << k ); + + Box<2,float> box({0.0,0.0},{1.0,1.0}); + + // Grid info + grid_sm<2, void> info( { GS_SIZE, GS_SIZE }); + + // Boundary conditions + size_t bc[2] = { PERIODIC, PERIODIC }; + + // factor + float factor = pow(global_v_cluster->getProcessingUnits()/2.0f,1.0f/3.0f); + + // ghost + Ghost<2,float> ghost(0.01 / factor); + + // Distributed vector + vector_dist<2,float, Point_test<float>, CartDecomposition<2, float, HeapMemory, ParMetisDistribution<2, float>>> vd(k,box,bc,ghost); + + // Init DLB tool + DLB dlb(v_cl); + + // Set unbalance threshold + dlb.setHeurisitc(DLB::Heuristic::UNBALANCE_THRLD); + dlb.setThresholdLevel(DLB::ThresholdLevel::THRLD_MEDIUM); + + auto it = vd.getIterator(); + + size_t c = 0; + while (it.isNext()) + { + auto key = it.get(); + if(c % 5) + { + vd.template getPos<s::x>(key)[0] = ud(eg); + vd.template getPos<s::x>(key)[1] = ud(eg); + }else{ + vd.template getPos<s::x>(key)[0] = ud(eg)*2; + vd.template getPos<s::x>(key)[1] = ud(eg)*2; + } + ++it; + ++c; + } + + vd.map(); + + vd.addComputationCosts(); + + vd.getDecomposition().rebalance(dlb); + + vd.map(); + + vd.getDecomposition().write("dec_init"); + vd.getDecomposition().getDistribution().write("parmetis_random_walk_" + std::to_string(0) + ".vtk"); + vd.write("particles_", 0, NO_GHOST); + + // 10 step random walk + for (size_t j = 0; j < 50; j++) + { + std::cout << "Iteration " << (j+1) << "\n"; + + auto it = vd.getDomainIterator(); + + while (it.isNext()) + { + auto key = it.get(); + + vd.template getPos<s::x>(key)[0] += 0.01 * ud(eg); + vd.template getPos<s::x>(key)[1] += 0.01 * ud(eg); + + ++it; + } + + vd.map(); + + /////// Interactions /// + + //vd.ghost_get<>(); + //vd.getDomainIterator; + + //////////////////////// + + vd.addComputationCosts(); + + vd.getDecomposition().rebalance(dlb); + + vd.map(); + + vd.getDecomposition().getDistribution().write("parmetis_random_walk_" + std::to_string(j+1) + ".vtk"); + vd.write("particles_", j+1, NO_GHOST); + vd.getDecomposition().write("dec_"); + + size_t l = vd.size_local(); + v_cl.sum(l); + v_cl.execute(); + + // Count the local particles and check that the total number is consistent + //size_t cnt = total_n_part_lc(vd,bc); + + //BOOST_REQUIRE_EQUAL((size_t)k,cnt); + } + +} + BOOST_AUTO_TEST_SUITE_END() #endif /* SRC_DECOMPOSITION_DISTRIBUTION_DISTRIBUTION_UNIT_TESTS_HPP_ */ diff --git a/src/Decomposition/Distribution/ParMetisDistribution.hpp b/src/Decomposition/Distribution/ParMetisDistribution.hpp index b85d9cbc..69ba63ae 100644 --- a/src/Decomposition/Distribution/ParMetisDistribution.hpp +++ b/src/Decomposition/Distribution/ParMetisDistribution.hpp @@ -12,7 +12,6 @@ #include "SubdomainGraphNodes.hpp" #include "parmetis_util.hpp" -#include "Graph/dist_map_graph.hpp" #include "Graph/ids.hpp" #define PARMETIS_DISTRIBUTION_ERROR 100002 @@ -373,7 +372,7 @@ public: rid nl_vertex = vtxdist.get(p_id+1) - vtxdist.get(p_id); // Reset parmetis graph and reconstruct it - parmetis_graph.reset(gp, vtxdist, m2g); + parmetis_graph.reset(gp, vtxdist, m2g, verticesGotWeights); // Refine parmetis_graph.refine<nm_v::proc_id>(vtxdist); @@ -451,16 +450,6 @@ public: if (id >= gp.getNVertex()) std::cerr << "Such vertex doesn't exist (id = " << id << ", " << "total size = " << gp.getNVertex() << ")\n"; - //TOACTIVATE when move to the distributed graph - //Return the pos object only if the vertex is in this graph - /* - if(sub_g.vertexIsInThisGraph(id)){ - pos[0] = sub_g.vertex(id).template get<nm_v::x>()[0]; - pos[1] = sub_g.vertex(id).template get<nm_v::x>()[1]; - if (dim == 3) - pos[2] = sub_g.vertex(id).template get<nm_v::x>()[2]; - }*/ - // Copy the geometrical informations inside the pos vector pos[0] = gp.vertex(id).template get<nm_v::x>()[0]; pos[1] = gp.vertex(id).template get<nm_v::x>()[1]; @@ -500,7 +489,7 @@ public: * \param id vertex id * */ - size_t getVertexWeight(size_t id) + size_t getSubSubDomainComputationCost(size_t id) { if (id >= gp.getNVertex()) std::cerr << "Such vertex doesn't exist (id = " << id << ", " << "total size = " << gp.getNVertex() << ")\n"; @@ -584,12 +573,11 @@ public: */ void write(const std::string & file) { - if (v_cl.getProcessUnitID() == 0) - { + //f (v_cl.getProcessUnitID() == 0) + //{ VTKWriter<Graph_CSR<nm_v, nm_e>, VTK_GRAPH> gv2(gp); - gv2.write(file); - } - + gv2.write(std::to_string(v_cl.getProcessUnitID()) + file); + //} } const ParMetisDistribution<dim,T> & operator=(const ParMetisDistribution<dim,T> & dist) diff --git a/src/Decomposition/Distribution/parmetis_util.hpp b/src/Decomposition/Distribution/parmetis_util.hpp index de3b9125..f89378da 100755 --- a/src/Decomposition/Distribution/parmetis_util.hpp +++ b/src/Decomposition/Distribution/parmetis_util.hpp @@ -129,7 +129,7 @@ class Parmetis * \param g Global graph * */ - void constructAdjList(Graph &g, const std::unordered_map<rid,gid> & m2g) + void constructAdjList(Graph &g, const std::unordered_map<rid, gid> & m2g) { // init basic graph informations and part vector // Put the total communication size to NULL @@ -139,7 +139,7 @@ class Parmetis size_t nedge = 0; size_t i = 0; - for (rid j = first; i < nvertex ; i++, ++j) + for (rid j = first; i < nvertex; i++, ++j) { Mg.part[i] = p_id; nedge += g.getNChilds(m2g.find(j)->second.id); @@ -161,13 +161,14 @@ class Parmetis size_t j = 0; // for each vertex calculate the position of the starting point in the adjacency list - for (rid i = first ; i <= last; ++i, j++) + for (rid i = first; i <= last; ++i, j++) { gid idx = m2g.find(i)->second; // Add weight to vertex and migration cost Mg.vwgt[j] = g.vertex(idx.id).template get<nm_v::computation>(); - Mg.vsize[j] = g.vertex(idx.id).template get<nm_v::migration>();; + Mg.vsize[j] = g.vertex(idx.id).template get<nm_v::migration>(); + ; // Calculate the starting point in the adjacency list Mg.xadj[id] = prev; @@ -179,7 +180,7 @@ class Parmetis size_t child = g.getChild(idx.id, s); Mg.adjncy[prev + s] = g.vertex(child).template get<nm_v::id>(); - Mg.adjwgt[prev + s] = g.getChildEdge(idx.id,s).template get<nm_e::communication>(); + Mg.adjwgt[prev + s] = g.getChildEdge(idx.id, s).template get<nm_e::communication>(); } // update the position for the next vertex @@ -202,8 +203,8 @@ public: * \param nc number of partitions * */ - Parmetis(Vcluster & v_cl, size_t nc) - :v_cl(v_cl), nc(nc) + Parmetis(Vcluster & v_cl, size_t nc) : + v_cl(v_cl), nc(nc) { // TODO Move into VCluster MPI_Comm_dup(MPI_COMM_WORLD, &comm); @@ -313,18 +314,20 @@ public: * \param g Global graph to set * \param w true if vertices have weights */ - void initSubGraph(Graph & g, const openfpm::vector<rid> & vtxdist, const std::unordered_map<rid,gid> & m2g, bool w) + void initSubGraph(Graph & g, const openfpm::vector<rid> & vtxdist, const std::unordered_map<rid, gid> & m2g, bool w) { p_id = v_cl.getProcessUnitID(); first = vtxdist.get(p_id); - last = vtxdist.get(p_id+1)-1; + last = vtxdist.get(p_id + 1) - 1; nvertex = last.id - first.id + 1; setDefaultParameters(w); // construct the adjacency list constructAdjList(g, m2g); + + //reset(g, vtxdist, m2g, w); } /*! \brief Decompose the graph @@ -337,8 +340,7 @@ public: { // Decompose - ParMETIS_V3_PartKway((idx_t *) vtxdist.getPointer(), Mg.xadj, Mg.adjncy, Mg.vwgt, Mg.adjwgt, Mg.wgtflag, - Mg.numflag, Mg.ncon, Mg.nparts, Mg.tpwgts, Mg.ubvec, Mg.options, Mg.edgecut, Mg.part, &comm); + ParMETIS_V3_PartKway((idx_t *) vtxdist.getPointer(), Mg.xadj, Mg.adjncy, Mg.vwgt, Mg.adjwgt, Mg.wgtflag, Mg.numflag, Mg.ncon, Mg.nparts, Mg.tpwgts, Mg.ubvec, Mg.options, Mg.edgecut, Mg.part, &comm); } /*! \brief Refine the graph @@ -352,9 +354,7 @@ public: { // Refine - ParMETIS_V3_AdaptiveRepart((idx_t *) vtxdist.getPointer(), Mg.xadj, Mg.adjncy, Mg.vwgt, Mg.vsize, Mg.adjwgt, - Mg.wgtflag, Mg.numflag, Mg.ncon, Mg.nparts, Mg.tpwgts, Mg.ubvec, Mg.itr, Mg.options, Mg.edgecut, - Mg.part, &comm); + ParMETIS_V3_AdaptiveRepart((idx_t *) vtxdist.getPointer(), Mg.xadj, Mg.adjncy, Mg.vwgt, Mg.vsize, Mg.adjwgt, Mg.wgtflag, Mg.numflag, Mg.ncon, Mg.nparts, Mg.tpwgts, Mg.ubvec, Mg.itr, Mg.options, Mg.edgecut, Mg.part, &comm); } /*! \brief Get graph partition vector @@ -367,13 +367,15 @@ public: /*! \brief Reset graph and reconstruct it * - * \param Global graph - * + * \param g Global graph + * \param vtxdist Distribution vector + * \param m2g Mapped id to global id map + * \param vgw Using weights on vertices */ - void reset(Graph & g,const openfpm::vector<rid> & vtxdist, const std::unordered_map<rid,gid> & m2g) + void reset(Graph & g, const openfpm::vector<rid> & vtxdist, const std::unordered_map<rid, gid> & m2g, bool vgw) { first = vtxdist.get(p_id); - last = vtxdist.get(p_id+1)-1; + last = vtxdist.get(p_id + 1) - 1; nvertex = last.id - first.id + 1; // Deallocate the graph structures @@ -403,8 +405,10 @@ public: delete[] Mg.part; } + setDefaultParameters(vgw); + // construct the adjacency list - constructAdjList(g,m2g); + constructAdjList(g, m2g); } /*! \brief Seth the default parameters for parmetis @@ -448,7 +452,7 @@ public: Mg.tpwgts = new real_t[Mg.nparts[0]]; Mg.ubvec = new real_t[Mg.nparts[0]]; - for (size_t s = 0; s < (size_t)Mg.nparts[0]; s++) + for (size_t s = 0; s < (size_t) Mg.nparts[0]; s++) { Mg.tpwgts[s] = 1.0 / Mg.nparts[0]; Mg.ubvec[s] = 1.05; @@ -464,7 +468,7 @@ public: //! This is used to indicate if the graph is weighted. wgtflag can take one of four values: Mg.wgtflag = new idx_t[1]; - if(w) + if (w) Mg.wgtflag[0] = 3; else Mg.wgtflag[0] = 0; diff --git a/src/Decomposition/ie_ghost.hpp b/src/Decomposition/ie_ghost.hpp index d06c6134..b807e53f 100755 --- a/src/Decomposition/ie_ghost.hpp +++ b/src/Decomposition/ie_ghost.hpp @@ -1003,6 +1003,21 @@ public: return true; } + + /*! \brief Reset the nn_prcs structure + * + */ + void reset() + { + box_nn_processor_int.clear(); + proc_int_box.clear(); + vb_ext.clear(); + vb_int.clear(); + geo_cell.clear(); + shifts.clear(); + ids_p.clear(); + ids.clear(); + } }; diff --git a/src/Decomposition/ie_loc_ghost.hpp b/src/Decomposition/ie_loc_ghost.hpp index 3772b988..1021c633 100755 --- a/src/Decomposition/ie_loc_ghost.hpp +++ b/src/Decomposition/ie_loc_ghost.hpp @@ -639,6 +639,15 @@ public: return true; } + + /*! \brief Reset the ie_loc_ghost + * + */ + void reset() + { + loc_ghost_box.clear(); + sub_domains_tmp.clear(); + } }; diff --git a/src/Decomposition/nn_processor.hpp b/src/Decomposition/nn_processor.hpp index ddda06c4..633fbc3b 100755 --- a/src/Decomposition/nn_processor.hpp +++ b/src/Decomposition/nn_processor.hpp @@ -539,6 +539,20 @@ public: return true; } + /*! \brief Reset the nn_prcs structure + * + */ + void reset() + { + nn_processors.clear(); + nn_processor_subdomains.clear(); + nn_processor_subdomains_tmp.clear(); + proc_adj_box.clear(); + boxes.clear(); + recv_cnt = 0; + aBC = false; + } + //! Used for testing porpose do not use std::unordered_map<size_t, N_box<dim,T>> & get_nn_processor_subdomains() { diff --git a/src/Graph/dist_map_graph.hpp b/src/Graph/dist_map_graph.hpp index 0ac39268..0a9186e6 100644 --- a/src/Graph/dist_map_graph.hpp +++ b/src/Graph/dist_map_graph.hpp @@ -300,6 +300,41 @@ class DistGraph_CSR // Queue of requests to add edges openfpm::vector<EdgeReq> e_queue; + /*! \brief Add vertex vrt with global id and id properties + * + * \param vrt vertex object to add + * \param gid global id, unique in global graph + * \param id id, unique n global graph + */ + inline void add_vertex(const V & vrt, size_t id, size_t gid) + { + // Create vertex info object + v_info vm; + vm.template get<v_info::id>() = id; + vm.template get<v_info::gid>() = gid; + + // Add the vertex info + v_m.add(vm); + + // Add the vertex + v.add(vrt); + + // Update id to global map + id2glb.insert( { id, gid }); + + // Update global id to local index + glb2loc.insert( { gid, v.size() - 1 }); + + // Update global id to id + glb2id.insert( { gid, id }); + + // Set the number of adjacent vertex for this vertex to 0 + v_l.add(0ul); + + // Add a slot for the vertex adjacency list + e_l.resize(e_l.size() + v_slot); + } + /*! \brief add edge on the graph * * add edge on the graph @@ -308,7 +343,6 @@ class DistGraph_CSR * \param v2 end vertex * */ - template<typename CheckPolicy = NoCheck> inline size_t addEdge_(size_t v1, size_t v2) { // If v1 and v2 does not satisfy some criteria return @@ -708,11 +742,16 @@ class DistGraph_CSR */ void updateVtxdist() { + // Prepare vtxdist vtxdist.resize(vcl.getProcessingUnits() + 1); + // Set first element to 0, always the same vtxdist.get(0) = 0; + // Prepare array that will contain the sizes of all the graphs openfpm::vector<size_t> recv(vcl.getProcessingUnits()); + + // Set the local size size_t tv = getNVertex(); // Sent and receive the size of each subgraph @@ -1280,15 +1319,18 @@ public: * \param id GLOBAL id of the vertex to access * */ - auto getVertex(size_t id) const -> const decltype( v.get(id) ) + auto getVertex(size_t id) const -> const decltype( v.get(0) ) { try { return v.get(glb2loc.at(id)); - } catch (const std::out_of_range& oor) + } + catch (const std::out_of_range& oor) { std::cerr << "The vertex with global id " << id << " is not in this sub-graph. Try to call reqVertex(" << id << ") and sync() first.\n"; } + + return v.get(0); } /*! \brief operator to access the vertex position index by id property @@ -1306,8 +1348,10 @@ public: } catch (const std::out_of_range& oor) { - std::cout << "Node not found by glb: "<< id <<std::endl; + std::cout << "Node not found by glb: " << id << std::endl; } + + return 0; } /*! /brief Get the first id of the graph @@ -1630,36 +1674,10 @@ public: * * \param vrt vertex object to add * \param gid global id, unique in global graph - * \param id id, unique n global graph */ - inline void add_vertex(const V & vrt, size_t id, size_t gid) + inline void add_vertex(const V & vrt, size_t gid) { - - // Create vertex info object - v_info vm; - vm.template get<v_info::id>() = id; - vm.template get<v_info::gid>() = gid; - - // Add the vertex info - v_m.add(vm); - - // Add the vertex - v.add(vrt); - - // Update id to global map - id2glb.insert( { id, gid }); - - // Update global id to local index - glb2loc.insert( { gid, v.size() - 1 }); - - // Update global id to id - glb2id.insert( { gid, id }); - - // Set the number of adjacent vertex for this vertex to 0 - v_l.add(0ul); - - // Add a slot for the vertex adjacency list - e_l.resize(e_l.size() + v_slot); + add_vertex(vrt, gid, gid); } /*! \brief map global id to local id @@ -1735,12 +1753,24 @@ public: return e.get(id_x_end); } + /*! \brief Get the global id of edge's source vertex + * + * \param v1 source vertex of the edge + * \param s n child of vertex v1 + * \return global id of the source vertex + */ size_t getChildSrcGid(size_t v1, size_t s) { size_t eid = e_l.template get<e_map::eid>(v1 * v_slot + s); return e_m.template get<e_info::sgid>(eid); } + /*! \brief Get the global id of edge's destination vertex + * + * \param v1 source vertex of the edge + * \param s n child of vertex v1 + * \return global id of the destination vertex + */ size_t getChildDstGid(size_t v1, size_t s) { size_t eid = e_l.template get<e_map::eid>(v1 * v_slot + s); @@ -1780,7 +1810,6 @@ public: // set source and destination ids of the edge e_m.get(id_x_end).template get<e_info::sgid>() = v1; e_m.get(id_x_end).template get<e_info::dgid>() = v2; - } /*! \brief Execute a synchronization through processor to finalize the add of the edges requested in the e_queue @@ -1828,7 +1857,6 @@ public: inline void swap(DistGraph_CSR<V, E, VertexList, EdgeList> & g) { // switch the memory - v.swap(g.v); v_m.swap(g.v_m); e.swap(g.e); @@ -1854,7 +1882,6 @@ public: inline void swap(DistGraph_CSR<V, E, VertexList, EdgeList> && g) { // switch the memory - v.swap(g.v); v_m.swap(g.v_m); e.swap(g.e); @@ -1927,7 +1954,7 @@ public: /*! \brief Once added all the vertices this function must be called to initialize all the properties, useless if a graph factory is used * */ - void initProperties() + void init() { initDistributionVector(); diff --git a/src/Graph/dist_map_graph_unit_test.hpp b/src/Graph/dist_map_graph_unit_test.hpp index a3bf3daa..a8d9461c 100644 --- a/src/Graph/dist_map_graph_unit_test.hpp +++ b/src/Graph/dist_map_graph_unit_test.hpp @@ -331,60 +331,79 @@ BOOST_AUTO_TEST_CASE( dist_map_graph_use_redistribution) BOOST_AUTO_TEST_CASE( dist_map_graph_use_free_add) { - //! Vcluster + // Vcluster Vcluster & vcl = *global_v_cluster; if(vcl.getProcessingUnits() != 4) return; - //! Initialize the global VCluster + // Initialize the global VCluster init_global_v_cluster(&boost::unit_test::framework::master_test_suite().argc,&boost::unit_test::framework::master_test_suite().argv); - //! Box - Box<2, float> box( { 0.0, 0.0 }, { 10.0, 10.0 }); + // [create graph adding freely the vertices and the edges ] - //! Distributed graph + // Distributed graph DistGraph_CSR<vx, ed> gd; + // Add vertices for (size_t i = 0; i < 4; ++i) { vx v; v.template get<vx::x>()[0] = vcl.getProcessUnitID(); v.template get<vx::x>()[1] = i; v.template get<vx::x>()[2] = 0; - size_t id = vcl.getProcessUnitID()*4 + i; - gd.add_vertex(v, id, id); - } - - gd.initProperties(); - - gd.add_edge(0,1); - gd.add_edge(1,2); - gd.add_edge(2,3); - gd.add_edge(0,4); - gd.add_edge(1,5); - gd.add_edge(2,6); - gd.add_edge(3,7); - gd.add_edge(4,5); - gd.add_edge(5,6); - gd.add_edge(6,7); - gd.add_edge(4,8); - gd.add_edge(5,9); - gd.add_edge(6,10); - gd.add_edge(7,11); - gd.add_edge(8,9); - gd.add_edge(9,10); - gd.add_edge(10,11); - gd.add_edge(8,12); - gd.add_edge(9,13); - gd.add_edge(10,14); - gd.add_edge(11,15); - gd.add_edge(12,13); - gd.add_edge(13,14); - gd.add_edge(14,15); + size_t gid = vcl.getProcessUnitID()*4 + i; + gd.add_vertex(v, gid); + } + + // This method must be called after adding vertices + gd.init(); + + // Add edges + if(vcl.getProcessUnitID()==0) + { + gd.add_edge(0,1); + gd.add_edge(1,2); + gd.add_edge(2,3); + gd.add_edge(0,4); + gd.add_edge(1,5); + gd.add_edge(2,6); + gd.add_edge(3,7); + } + + if(vcl.getProcessUnitID()==1) + { + gd.add_edge(4,5); + gd.add_edge(5,6); + gd.add_edge(6,7); + gd.add_edge(4,8); + gd.add_edge(5,9); + gd.add_edge(6,10); + gd.add_edge(7,11); + } + + if(vcl.getProcessUnitID()==2) + { + gd.add_edge(8,9); + gd.add_edge(9,10); + gd.add_edge(10,11); + gd.add_edge(8,12); + gd.add_edge(9,13); + gd.add_edge(10,14); + gd.add_edge(11,15); + } + + if(vcl.getProcessUnitID()==3) + { + gd.add_edge(12,13); + gd.add_edge(13,14); + gd.add_edge(14,15); + } gd.syncEdge(); + //! [create graph adding freely the vertices and the edges ] + if(vcl.getProcessUnitID() == 0) { for (size_t i = 0; i < 4; ++i) @@ -406,6 +425,9 @@ BOOST_AUTO_TEST_CASE( dist_map_graph_use_free_add) gd.sync(); + if(vcl.getProcessUnitID() == 0) + BOOST_REQUIRE_EQUAL(gd.getVertexId(5), 5); + gd.deleteGhosts(); if (vcl.getProcessUnitID() == 0) @@ -466,6 +488,70 @@ BOOST_AUTO_TEST_CASE( dist_map_graph_use_free_add) } } +BOOST_AUTO_TEST_CASE( dist_map_graph_use_multi_free_add) +{ + // Vcluster + Vcluster & vcl = *global_v_cluster; + + if(vcl.getProcessingUnits() != 4) + return; + + // Initialize the global VCluster + init_global_v_cluster(&boost::unit_test::framework::master_test_suite().argc, &boost::unit_test::framework::master_test_suite().argv); + + // Distributed graph + DistGraph_CSR<vx, ed> gd; + + // Add vertices + if(vcl.getProcessUnitID()==0) + { + vx v; + gd.add_vertex(v, 0); + gd.add_vertex(v, 1); + gd.add_vertex(v, 2); + gd.add_vertex(v, 3); + } + + if(vcl.getProcessUnitID()==1) + { + vx v; + gd.add_vertex(v, 4); + gd.add_vertex(v, 5); + gd.add_vertex(v, 6); + gd.add_vertex(v, 7); + } + + // This method must be called ALWAYS after adding vertices + gd.init(); + + if(vcl.getProcessUnitID()==2) + { + vx v; + gd.add_vertex(v, 8); + gd.add_vertex(v, 9); + gd.add_vertex(v, 10); + gd.add_vertex(v, 11); + } + + if(vcl.getProcessUnitID()==3) + { + vx v; + gd.add_vertex(v, 12); + gd.add_vertex(v, 13); + gd.add_vertex(v, 14); + gd.add_vertex(v, 15); + } + + // This method must be called ALWAYS after adding vertices + gd.init(); + + gd.reqVertex(15); + + gd.sync(); + + BOOST_REQUIRE_EQUAL(gd.getVertexId(gd.getNVertex()-1), 15); +} + BOOST_AUTO_TEST_SUITE_END() #endif diff --git a/src/Grid/grid_dist_id.hpp b/src/Grid/grid_dist_id.hpp index 0bcfbc1a..f16882b3 100644 --- a/src/Grid/grid_dist_id.hpp +++ b/src/Grid/grid_dist_id.hpp @@ -492,9 +492,6 @@ class grid_dist_id // Create the sub-domains dec.setParameters(div,domain,bc,ghost); dec.decompose(); - - // Calculate ghost boxes - dec.calculateGhostBoxes(); } /*! \brief Initialize the grid diff --git a/src/Vector/vector_dist.hpp b/src/Vector/vector_dist.hpp index 85a333f4..24aa9c53 100644 --- a/src/Vector/vector_dist.hpp +++ b/src/Vector/vector_dist.hpp @@ -65,7 +65,7 @@ * */ -template<unsigned int dim, typename St, typename prop, typename Decomposition , typename Memory=HeapMemory> +template<unsigned int dim, typename St, typename prop, typename Decomposition, typename Memory = HeapMemory> class vector_dist { private: @@ -78,7 +78,7 @@ private: //! Particle position vector, (It has 2 elements) the first has real particles assigned to a processor //! the second element contain unassigned particles - openfpm::vector<Point<dim,St>> v_pos; + openfpm::vector<Point<dim, St>> v_pos; //! Particle properties vector, (It has 2 elements) the first has real particles assigned to a processor //! the second element contain unassigned particles @@ -88,7 +88,7 @@ private: Vcluster & v_cl; // definition of the send vector for position - typedef openfpm::vector<Point<dim,St>,ExtPreAlloc<Memory>,openfpm::grow_policy_identity> send_pos_vector; + typedef openfpm::vector<Point<dim, St>, ExtPreAlloc<Memory>, openfpm::grow_policy_identity> send_pos_vector; ////////////////////////////// // COMMUNICATION variables @@ -128,9 +128,9 @@ private: struct pos_prop { //! position vector - openfpm::vector<Point<dim,St>,PreAllocHeapMemory<2>,openfpm::grow_policy_identity> pos; + openfpm::vector<Point<dim, St>, PreAllocHeapMemory<2>, openfpm::grow_policy_identity> pos; //! properties vector - openfpm::vector<prop,PreAllocHeapMemory<2>,openfpm::grow_policy_identity> prp; + openfpm::vector<prop, PreAllocHeapMemory<2>, openfpm::grow_policy_identity> prp; }; /*! \brief Label particles for mappings @@ -144,7 +144,7 @@ private: { // reset lbl_p lbl_p.resize(v_cl.getProcessingUnits()); - for (size_t i = 0 ; i < lbl_p.size() ; i++) + for (size_t i = 0; i < lbl_p.size(); i++) lbl_p.get(i).clear(); // resize the label buffer @@ -166,16 +166,15 @@ private: if (dec.getDomain().isInside(v_pos.get(key)) == true) p_id = dec.processorIDBC(v_pos.get(key)); else - p_id = obp::out(key,v_cl.getProcessUnitID()); - + p_id = obp::out(key, v_cl.getProcessUnitID()); // Particle to move if (p_id != v_cl.getProcessUnitID()) { - if ((long int)p_id != -1) + if ((long int) p_id != -1) { - prc_sz.get(p_id)++; - lbl_p.get(p_id).add(key); + prc_sz.get(p_id)++;lbl_p + .get(p_id).add(key); opart.add(key); } else @@ -223,16 +222,16 @@ private: // Given a particle, it return which processor require it (first id) and shift id, second id // For an explanation about shifts vectors please consult getShiftVector in ie_ghost - const openfpm::vector<std::pair<size_t,size_t>> & vp_id = dec.template ghost_processorID_pair<typename Decomposition::lc_processor_id, typename Decomposition::shift_id>(v_pos.get(key),UNIQUE); + const openfpm::vector<std::pair<size_t, size_t>> & vp_id = dec.template ghost_processorID_pair<typename Decomposition::lc_processor_id, typename Decomposition::shift_id>(v_pos.get(key), UNIQUE); - for (size_t i = 0 ; i < vp_id.size() ; i++) + for (size_t i = 0; i < vp_id.size(); i++) { // processor id size_t p_id = vp_id.get(i).first; // add particle to communicate - ghost_prc_sz.get(p_id)++; - opart.get(p_id).add(key); + ghost_prc_sz.get(p_id)++;opart + .get(p_id).add(key); oshift.get(p_id).add(vp_id.get(i).second); } @@ -244,37 +243,37 @@ private: * * In order to understand what this function use the following * - \verbatim - - [1,1] - +---------+------------------------+---------+ - | (1,-1) | | (1,1) | - | | | (1,0) --> 7 | | | - | v | | v | - | 6 | | 8 | - +--------------------------------------------+ - | | | | - | | | | - | | | | - | (-1,0) | | (1,0) | - | | | | | | - | v | (0,0) --> 4 | v | - | 3 | | 5 | - | | | | - B | | | A | - * | | | * | - | | | | - | | | | - | | | | - +--------------------------------------------+ - | (-1,-1) | | (-1,1) | - | | | (-1,0) --> 1 | | | - | v | | v | - | 0 | | 2 | - +---------+------------------------+---------+ - - - \endverbatim + \verbatim + + [1,1] + +---------+------------------------+---------+ + | (1,-1) | | (1,1) | + | | | (1,0) --> 7 | | | + | v | | v | + | 6 | | 8 | + +--------------------------------------------+ + | | | | + | | | | + | | | | + | (-1,0) | | (1,0) | + | | | | | | + | v | (0,0) --> 4 | v | + | 3 | | 5 | + | | | | + B | | | A | + * | | | * | + | | | | + | | | | + | | | | + +--------------------------------------------+ + | (-1,-1) | | (-1,1) | + | | | (-1,0) --> 1 | | | + | v | | v | + | 0 | | 2 | + +---------+------------------------+---------+ + + + \endverbatim * * The box is the domain, while all boxes at the border (so not (0,0) ) are the @@ -286,7 +285,7 @@ private: void add_loc_particles_bc() { // get the shift vectors - const openfpm::vector<Point<dim,St>> & shifts = dec.getShiftVectors(); + const openfpm::vector<Point<dim, St>> & shifts = dec.getShiftVectors(); // this map is used to check if a combination is already present std::unordered_map<size_t, size_t> map_cmb; @@ -296,41 +295,39 @@ private: // The boxes touching the border of the domain are divided in groups (first vector) // each group contain internal ghost coming from sub-domain of the same section - openfpm::vector_std<openfpm::vector_std<Box<dim,St>>> box_f; + openfpm::vector_std<openfpm::vector_std<Box<dim, St>>>box_f; openfpm::vector_std<comb<dim>> box_cmb; - for (size_t i = 0 ; i < dec.getNLocalSub() ; i++) + for (size_t i = 0; i < dec.getNLocalSub(); i++) { size_t Nl = dec.getLocalNIGhost(i); - for (size_t j = 0 ; j < Nl ; j++) + for (size_t j = 0; j < Nl; j++) { // If the ghost does not come from the intersection with an out of // border sub-domain the combination is all zero and n_zero return dim - if (dec.getLocalIGhostPos(i,j).n_zero() == dim) + if (dec.getLocalIGhostPos(i, j).n_zero() == dim) continue; // Check if we already have boxes with such combination - auto it = map_cmb.find(dec.getLocalIGhostPos(i,j).lin()); + auto it = map_cmb.find(dec.getLocalIGhostPos(i, j).lin()); if (it == map_cmb.end()) { // we do not have it box_f.add(); - box_f.last().add(dec.getLocalIGhostBox(i,j)); - box_cmb.add(dec.getLocalIGhostPos(i,j)); - map_cmb[dec.getLocalIGhostPos(i,j).lin()] = box_f.size()-1; + box_f.last().add(dec.getLocalIGhostBox(i, j)); + box_cmb.add(dec.getLocalIGhostPos(i, j)); + map_cmb[dec.getLocalIGhostPos(i, j).lin()] = box_f.size() - 1; } else { // we have it - box_f.get(it->second).add(dec.getLocalIGhostBox(i,j)); + box_f.get(it->second).add(dec.getLocalIGhostBox(i, j)); } - } } - if (box_f.size() == 0) return; else @@ -343,13 +340,13 @@ private: auto key = it.get(); // If particles are inside these boxes - for (size_t i = 0 ; i < box_f.size() ; i++) + for (size_t i = 0; i < box_f.size(); i++) { - for (size_t j = 0 ; j < box_f.get(i).size() ; j++) + for (size_t j = 0; j < box_f.get(i).size(); j++) { if (box_f.get(i).get(j).isInside(v_pos.get(key)) == true) { - Point<dim,St> p = v_pos.get(key); + Point<dim, St> p = v_pos.get(key); // shift p -= shifts.get(box_cmb.get(i).lin()); @@ -385,11 +382,11 @@ private: void fill_send_ghost_pos_buf(openfpm::vector<send_pos_vector> & g_pos_send, ExtPreAlloc<Memory> * prAlloc_pos) { // get the shift vectors - const openfpm::vector<Point<dim,St>> & shifts = dec.getShiftVectors(); + const openfpm::vector<Point<dim, St>> & shifts = dec.getShiftVectors(); // create a number of send buffers equal to the near processors g_pos_send.resize(ghost_prc_sz.size()); - for (size_t i = 0 ; i < g_pos_send.size() ; i++) + for (size_t i = 0; i < g_pos_send.size(); i++) { // set the preallocated memory to ensure contiguity g_pos_send.get(i).setMemory(*prAlloc_pos); @@ -399,13 +396,13 @@ private: } // Fill the send buffer - for ( size_t i = 0 ; i < opart.size() ; i++ ) + for (size_t i = 0; i < opart.size(); i++) { - for (size_t j = 0 ; j < opart.get(i).size() ; j++) + for (size_t j = 0; j < opart.get(i).size(); j++) { - Point<dim,St> s = v_pos.get(opart.get(i).get(j)); + Point<dim, St> s = v_pos.get(opart.get(i).get(j)); s -= shifts.get(oshift.get(i).get(j)); - g_pos_send.get(i).set(j,s); + g_pos_send.get(i).set(j, s); } } } @@ -420,11 +417,11 @@ private: * \param prAlloc_prp Memory object for the send buffer * */ - template<typename send_vector,typename prp_object, int... prp> void fill_send_ghost_prp_buf(openfpm::vector<send_vector> & g_send_prp, ExtPreAlloc<Memory> * prAlloc_prp) + template<typename send_vector, typename prp_object, int ... prp> void fill_send_ghost_prp_buf(openfpm::vector<send_vector> & g_send_prp, ExtPreAlloc<Memory> * prAlloc_prp) { // create a number of send buffers equal to the near processors g_send_prp.resize(ghost_prc_sz.size()); - for (size_t i = 0 ; i < g_send_prp.size() ; i++) + for (size_t i = 0; i < g_send_prp.size(); i++) { // set the preallocated memory to ensure contiguity g_send_prp.get(i).setMemory(*prAlloc_prp); @@ -434,17 +431,17 @@ private: } // Fill the send buffer - for ( size_t i = 0 ; i < opart.size() ; i++ ) + for (size_t i = 0; i < opart.size(); i++) { - for (size_t j = 0 ; j < opart.get(i).size() ; j++) + for (size_t j = 0; j < opart.get(i).size(); j++) { // source object type - typedef encapc<1,prop,typename openfpm::vector<prop>::memory_conf> encap_src; + typedef encapc<1, prop, typename openfpm::vector<prop>::memory_conf> encap_src; // destination object type - typedef encapc<1,prp_object,typename openfpm::vector<prp_object>::memory_conf> encap_dst; + typedef encapc<1, prp_object, typename openfpm::vector<prp_object>::memory_conf> encap_dst; // Copy only the selected properties - object_si_d<encap_src,encap_dst,OBJ_ENCAP,prp...>(v_prp.get(opart.get(i).get(j)),g_send_prp.get(i).get(j)); + object_si_d<encap_src, encap_dst, OBJ_ENCAP, prp...>(v_prp.get(opart.get(i).get(j)), g_send_prp.get(i).get(j)); } } } @@ -460,14 +457,14 @@ private: { pb.resize(prc_r.size()); - for (size_t i = 0 ; i < prc_r.size() ; i++) + for (size_t i = 0; i < prc_r.size(); i++) { // Create the size required to store the particles position and properties to communicate - size_t s1 = openfpm::vector<Point<dim,St>,HeapMemory,openfpm::grow_policy_identity>::calculateMem(prc_sz_r.get(i),0); - size_t s2 = openfpm::vector<prop,HeapMemory,openfpm::grow_policy_identity>::calculateMem(prc_sz_r.get(i),0); + size_t s1 = openfpm::vector<Point<dim, St>, HeapMemory, openfpm::grow_policy_identity>::calculateMem(prc_sz_r.get(i), 0); + size_t s2 = openfpm::vector<prop, HeapMemory, openfpm::grow_policy_identity>::calculateMem(prc_sz_r.get(i), 0); // Preallocate the memory - size_t sz[2] = {s1,s2}; + size_t sz[2] = { s1, s2 }; PreAllocHeapMemory<2> * mem = new PreAllocHeapMemory<2>(sz); // Set the memory allocator @@ -479,10 +476,9 @@ private: pb.get(i).prp.resize(prc_sz_r.get(i)); } - // Run through all the particles and fill the sending buffer - for (size_t i = 0 ; i < opart.size() ; i++) + for (size_t i = 0; i < opart.size(); i++) { auto it = opart.get(i).getIterator(); size_t lbl = p_map_req.get(i); @@ -492,8 +488,8 @@ private: size_t key = it.get(); size_t id = opart.get(i).get(key); - pb.get(lbl).pos.set(key,v_pos.get(id)); - pb.get(lbl).prp.set(key,v_prp.get(id)); + pb.get(lbl).pos.set(key, v_pos.get(id)); + pb.get(lbl).prp.set(key, v_prp.get(id)); ++it; } @@ -507,22 +503,22 @@ private: * \tparam prp set of properties to send * */ - template<typename send_vector,typename prp_object, int... prp> void process_received_ghost_prp() + template<typename send_vector, typename prp_object, int ... prp> void process_received_ghost_prp() { // Mark the ghost part g_m = v_prp.size(); // Process the received data (recv_mem_gg != 0 if you have data) - for (size_t i = 0 ; i < dec.getNNProcessors() && recv_mem_gg.size() != 0 ; i++) + for (size_t i = 0; i < dec.getNNProcessors() && recv_mem_gg.size() != 0; i++) { // calculate the number of received elements size_t n_ele = recv_sz.get(i) / sizeof(prp_object); // add the received particles to the vector - PtrMemory * ptr1 = new PtrMemory(recv_mem_gg.get(i).getPointer(),recv_sz.get(i)); + PtrMemory * ptr1 = new PtrMemory(recv_mem_gg.get(i).getPointer(), recv_sz.get(i)); // create vector representation to a piece of memory already allocated - openfpm::vector<prp_object,PtrMemory,openfpm::grow_policy_identity> v2; + openfpm::vector<prp_object, PtrMemory, openfpm::grow_policy_identity> v2; v2.setMemory(*ptr1); @@ -530,7 +526,7 @@ private: v2.resize(n_ele); // Add the ghost particle - v_prp.template add_prp<prp_object,PtrMemory,openfpm::grow_policy_identity,prp...>(v2); + v_prp.template add_prp<prp_object, PtrMemory, openfpm::grow_policy_identity, prp...>(v2); } } @@ -540,17 +536,17 @@ private: void process_received_ghost_pos() { // Process the received data (recv_mem_gg != 0 if you have data) - for (size_t i = 0 ; i < dec.getNNProcessors() && recv_mem_gg.size() != 0 ; i++) + for (size_t i = 0; i < dec.getNNProcessors() && recv_mem_gg.size() != 0; i++) { // calculate the number of received elements - size_t n_ele = recv_sz.get(i) / sizeof(Point<dim,St>); + size_t n_ele = recv_sz.get(i) / sizeof(Point<dim, St> ); // add the received particles to the vector - PtrMemory * ptr1 = new PtrMemory(recv_mem_gg.get(i).getPointer(),recv_sz.get(i)); + PtrMemory * ptr1 = new PtrMemory(recv_mem_gg.get(i).getPointer(), recv_sz.get(i)); // create vector representation to a piece of memory already allocated - openfpm::vector<Point<dim,St>,PtrMemory,openfpm::grow_policy_identity> v2; + openfpm::vector<Point<dim, St>, PtrMemory, openfpm::grow_policy_identity> v2; v2.setMemory(*ptr1); @@ -558,7 +554,7 @@ private: v2.resize(n_ele); // Add the ghost particle - v_pos.template add<PtrMemory,openfpm::grow_policy_identity>(v2); + v_pos.template add<PtrMemory, openfpm::grow_policy_identity>(v2); } } @@ -571,24 +567,24 @@ private: { size_t o_p_id = 0; - for (size_t i = 0 ; i < recv_mem_gm.size() ; i++) + for (size_t i = 0; i < recv_mem_gm.size(); i++) { // Get the number of elements - size_t n_ele = recv_mem_gm.get(i).size() / (sizeof(Point<dim,St>) + sizeof(prop)); + size_t n_ele = recv_mem_gm.get(i).size() / (sizeof(Point<dim, St> ) + sizeof(prop)); // Pointer of the received positions for each near processor - void * ptr_pos = (unsigned char *)recv_mem_gm.get(i).getPointer(); + void * ptr_pos = (unsigned char *) recv_mem_gm.get(i).getPointer(); // Pointer of the received properties for each near processor - void * ptr_prp = (unsigned char *)recv_mem_gm.get(i).getPointer() + n_ele * sizeof(Point<dim,St>); + void * ptr_prp = (unsigned char *) recv_mem_gm.get(i).getPointer() + n_ele * sizeof(Point<dim, St> ); - PtrMemory * ptr1 = new PtrMemory(ptr_pos,n_ele * sizeof(Point<dim,St>)); - PtrMemory * ptr2 = new PtrMemory(ptr_prp,n_ele * sizeof(prop)); + PtrMemory * ptr1 = new PtrMemory(ptr_pos, n_ele * sizeof(Point<dim, St> )); + PtrMemory * ptr2 = new PtrMemory(ptr_prp, n_ele * sizeof(prop)); // create vector representation to a piece of memory already allocated - openfpm::vector<Point<dim,St>,PtrMemory,openfpm::grow_policy_identity> vpos; - openfpm::vector<prop,PtrMemory,openfpm::grow_policy_identity> vprp; + openfpm::vector<Point<dim, St>, PtrMemory, openfpm::grow_policy_identity> vpos; + openfpm::vector<prop, PtrMemory, openfpm::grow_policy_identity> vprp; vpos.setMemory(*ptr1); vprp.setMemory(*ptr2); @@ -599,25 +595,25 @@ private: // Add the received particles to v_pos and v_prp size_t j = 0; - for ( ; j < vpos.size() && o_p_id < out_part.size() ; j++, o_p_id++) + for (; j < vpos.size() && o_p_id < out_part.size(); j++, o_p_id++) { - v_pos.set(out_part.get(o_p_id),vpos.get(j)); - v_prp.set(out_part.get(o_p_id),vprp.get(j)); + v_pos.set(out_part.get(o_p_id), vpos.get(j)); + v_prp.set(out_part.get(o_p_id), vprp.get(j)); } - for ( ; j < vpos.size(); j++) + for (; j < vpos.size(); j++) { v_pos.add(); - v_pos.set(v_pos.size()-1,vpos.get(j)); + v_pos.set(v_pos.size() - 1, vpos.get(j)); v_prp.add(); - v_prp.set(v_prp.size()-1,vprp.get(j)); + v_prp.set(v_prp.size() - 1, vprp.get(j)); } } // remove the (out-going particles) in the vector - v_pos.remove(out_part,o_p_id); - v_prp.remove(out_part,o_p_id); + v_pos.remove(out_part, o_p_id); + v_prp.remove(out_part, o_p_id); } /*! \brief Calculate send buffers total size and allocation @@ -633,13 +629,13 @@ private: template<typename prp_object> void calc_send_ghost_buf(size_t & size_byte_prp, size_t & size_byte_pos, std::vector<size_t> & pap_prp, std::vector<size_t> & pap_pos) { // Calculate the total size required for the sending buffer - for ( size_t i = 0 ; i < ghost_prc_sz.size() ; i++ ) + for (size_t i = 0; i < ghost_prc_sz.size(); i++) { - size_t alloc_ele = openfpm::vector<prp_object,HeapMemory,openfpm::grow_policy_identity>::calculateMem(ghost_prc_sz.get(i),0); + size_t alloc_ele = openfpm::vector<prp_object, HeapMemory, openfpm::grow_policy_identity>::calculateMem(ghost_prc_sz.get(i), 0); pap_prp.push_back(alloc_ele); size_byte_prp += alloc_ele; - alloc_ele = openfpm::vector<Point<dim,St>,HeapMemory,openfpm::grow_policy_identity>::calculateMem(ghost_prc_sz.get(i),0); + alloc_ele = openfpm::vector<Point<dim, St>, HeapMemory, openfpm::grow_policy_identity>::calculateMem(ghost_prc_sz.get(i), 0); pap_pos.push_back(alloc_ele); size_byte_pos += alloc_ele; } @@ -655,8 +651,8 @@ public: * \param g Ghost margins * */ - vector_dist(size_t np, Box<dim,St> box, const size_t (& bc)[dim] ,const Ghost<dim,St> & g) - :dec(*global_v_cluster),v_cl(*global_v_cluster) + vector_dist(size_t np, Box<dim, St> box, const size_t (&bc)[dim], const Ghost<dim, St> & g) : + dec(*global_v_cluster), v_cl(*global_v_cluster) { #ifdef SE_CLASS2 check_new(this,8,VECTOR_DIST_EVENT,4); @@ -689,15 +685,14 @@ public: // Calculate the maximum number (before merging) of sub-domain on // each dimension size_t div[dim]; - for (size_t i = 0 ; i < dim ; i++) - {div[i] = openfpm::math::round_big_2(pow(n_sub,1.0/dim));} + for (size_t i = 0; i < dim; i++) + { + div[i] = openfpm::math::round_big_2(pow(n_sub, 1.0 / dim)); + } // Create the sub-domains - dec.setParameters(div,box,bc,g); + dec.setParameters(div, box, bc, g); dec.decompose(); - - // and create the ghost boxes - dec.calculateGhostBoxes(); } ~vector_dist() @@ -714,7 +709,7 @@ public: */ static size_t getDefaultNsubsub() { - return V_SUB_UNIT_FACTOR; + return V_SUB_UNIT_FACTOR; } /*! \brief return the local size of the vector @@ -765,7 +760,7 @@ public: * contain non local particles * */ - template<typename obp=KillParticle> void map() + template<typename obp = KillParticle> void map() { // outgoing particles-id openfpm::vector<size_t> out_part; @@ -781,7 +776,7 @@ public: v_prp.resize(g_m); // Contain the processor id of each particle (basically where they have to go) - labelParticleProcessor<obp>(opart,prc_sz,out_part); + labelParticleProcessor<obp>(opart, prc_sz, out_part); // Calculate the sending buffer size for each processor, put this information in // a contiguous buffer @@ -789,7 +784,7 @@ public: openfpm::vector<size_t> prc_sz_r; openfpm::vector<size_t> prc_r; - for (size_t i = 0 ; i < v_cl.getProcessingUnits() ; i++) + for (size_t i = 0; i < v_cl.getProcessingUnits(); i++) { if (prc_sz.get(i) != 0) { @@ -804,25 +799,25 @@ public: openfpm::vector<pos_prop> pb; // fill the send buffers - fill_send_map_buf(prc_r,prc_sz_r,pb); + fill_send_map_buf(prc_r, prc_sz_r, pb); // Create the set of pointers openfpm::vector<void *> ptr(prc_r.size()); - for (size_t i = 0 ; i < prc_r.size() ; i++) + for (size_t i = 0; i < prc_r.size(); i++) { ptr.get(i) = pb.get(i).pos.getPointer(); } // convert the particle number to buffer size - for (size_t i = 0 ; i < prc_sz_r.size() ; i++) + for (size_t i = 0; i < prc_sz_r.size(); i++) { - prc_sz_r.get(i) = prc_sz_r.get(i)*(sizeof(prop) + sizeof(Point<dim,St>)); + prc_sz_r.get(i) = prc_sz_r.get(i) * (sizeof(prop) + sizeof(Point<dim, St> )); } // Send and receive the particles recv_mem_gm.clear(); - v_cl.sendrecvMultipleMessagesNBX(prc_sz_r.size(), (size_t *)prc_sz_r.getPointer(), (size_t *)prc_r.getPointer() , (void **)ptr.getPointer() , vector_dist::message_alloc_map, this ,NEED_ALL_SIZE); + v_cl.sendrecvMultipleMessagesNBX(prc_sz_r.size(), (size_t *) prc_sz_r.getPointer(), (size_t *) prc_r.getPointer(), (void **) ptr.getPointer(), vector_dist::message_alloc_map, this, NEED_ALL_SIZE); // Process the incoming particles @@ -839,13 +834,13 @@ public: * \param opt options WITH_POSITION, it send also the positional information of the particles * */ - template<int... prp> void ghost_get(size_t opt = WITH_POSITION) + template<int ... prp> void ghost_get(size_t opt = WITH_POSITION) { // Sending property object - typedef object<typename object_creator<typename prop::type,prp...>::type> prp_object; + typedef object<typename object_creator<typename prop::type, prp...>::type> prp_object; // send vector for each processor - typedef openfpm::vector<prp_object,ExtPreAlloc<Memory>,openfpm::grow_policy_identity> send_vector; + typedef openfpm::vector<prp_object, ExtPreAlloc<Memory>, openfpm::grow_policy_identity> send_vector; // reset the ghost part v_pos.resize(g_m); @@ -864,18 +859,19 @@ public: std::vector<size_t> pap_prp; std::vector<size_t> pap_pos; - calc_send_ghost_buf<prp_object>(size_byte_prp,size_byte_pos,pap_prp,pap_pos); + calc_send_ghost_buf<prp_object>(size_byte_prp, size_byte_pos, pap_prp, pap_pos); // Create memory for the send buffer g_prp_mem.resize(size_byte_prp); - if (opt != NO_POSITION) g_pos_mem.resize(size_byte_pos); + if (opt != NO_POSITION) + g_pos_mem.resize(size_byte_pos); // Create and fill send buffer for particle properties - ExtPreAlloc<Memory> * prAlloc_prp = new ExtPreAlloc<Memory>(pap_prp,g_prp_mem); + ExtPreAlloc<Memory> * prAlloc_prp = new ExtPreAlloc<Memory>(pap_prp, g_prp_mem); openfpm::vector<send_vector> g_send_prp; - fill_send_ghost_prp_buf<send_vector,prp_object,prp...>(g_send_prp,prAlloc_prp); + fill_send_ghost_prp_buf<send_vector, prp_object, prp...>(g_send_prp, prAlloc_prp); // Create and fill the send buffer for the particle position @@ -883,24 +879,23 @@ public: openfpm::vector<send_pos_vector> g_pos_send; if (opt != NO_POSITION) { - prAlloc_pos = new ExtPreAlloc<Memory>(pap_pos,g_pos_mem); - fill_send_ghost_pos_buf(g_pos_send,prAlloc_pos); + prAlloc_pos = new ExtPreAlloc<Memory>(pap_pos, g_pos_mem); + fill_send_ghost_pos_buf(g_pos_send, prAlloc_pos); } // Create processor list openfpm::vector<size_t> prc; - for (size_t i = 0 ; i < opart.size() ; i++) + for (size_t i = 0; i < opart.size(); i++) prc.add(dec.IDtoProc(i)); // Send/receive the particle properties information - v_cl.sendrecvMultipleMessagesNBX(prc,g_send_prp,msg_alloc_ghost_get,this); - process_received_ghost_prp<send_vector,prp_object,prp...>(); - + v_cl.sendrecvMultipleMessagesNBX(prc, g_send_prp, msg_alloc_ghost_get, this); + process_received_ghost_prp<send_vector, prp_object, prp...>(); if (opt != NO_POSITION) { // Send/receive the particle properties information - v_cl.sendrecvMultipleMessagesNBX(prc,g_pos_send,msg_alloc_ghost_get,this); + v_cl.sendrecvMultipleMessagesNBX(prc, g_pos_send, msg_alloc_ghost_get, this); process_received_ghost_pos(); } @@ -918,9 +913,9 @@ public: * \param ptr void pointer parameter for additional data to pass to the call-back * */ - static void * msg_alloc_ghost_get(size_t msg_i ,size_t total_msg, size_t total_p, size_t i, size_t ri, void * ptr) + static void * msg_alloc_ghost_get(size_t msg_i, size_t total_msg, size_t total_p, size_t i, size_t ri, void * ptr) { - vector_dist<dim,St,prop,Decomposition,Memory> * v = static_cast<vector_dist<dim,St,prop,Decomposition,Memory> *>(ptr); + vector_dist<dim, St, prop, Decomposition, Memory> * v = static_cast<vector_dist<dim, St, prop, Decomposition, Memory> *>(ptr); v->recv_sz.resize(v->dec.getNNProcessors()); v->recv_mem_gg.resize(v->dec.getNNProcessors()); @@ -948,10 +943,10 @@ public: * \return the pointer where to store the message for the processor i * */ - static void * message_alloc_map(size_t msg_i ,size_t total_msg, size_t total_p, size_t i, size_t ri, void * ptr) + static void * message_alloc_map(size_t msg_i, size_t total_msg, size_t total_p, size_t i, size_t ri, void * ptr) { // cast the pointer - vector_dist<dim,St,prop,Decomposition,Memory> * vd = static_cast<vector_dist<dim,St,prop,Decomposition,Memory> *>(ptr); + vector_dist<dim, St, prop, Decomposition, Memory> * vd = static_cast<vector_dist<dim, St, prop, Decomposition, Memory> *>(ptr); vd->recv_mem_gm.resize(vd->v_cl.getProcessingUnits()); vd->recv_mem_gm.get(i).resize(msg_i); @@ -982,7 +977,7 @@ public: */ template<unsigned int id> inline auto getLastPos() -> decltype(v_pos.template get<id>(0)) { - return v_pos.template get<id>(g_m-1); + return v_pos.template get<id>(g_m - 1); } /*! \brief Get the property of the last element @@ -997,7 +992,7 @@ public: */ template<unsigned int id> inline auto getLastProp() -> decltype(v_prp.template get<id>(0)) { - return v_prp.template get<id>(g_m-1); + return v_prp.template get<id>(g_m - 1); } /*! \brief Construct a cell list starting from the stored particles @@ -1009,9 +1004,9 @@ public: * \return the Cell list * */ - template<typename CellL=CellList<dim,St,FAST,shift<dim,St> > > CellL getCellList(St r_cut) + template<typename CellL = CellList<dim, St, FAST, shift<dim, St> > > CellL getCellList(St r_cut) { - return getCellList(r_cut,dec.getGhost()); + return getCellList(r_cut, dec.getGhost()); } /*! \brief Construct a cell list starting from the stored particles @@ -1027,32 +1022,32 @@ public: * \param enlarge In case of padding particles the cell list must be enlarged, like a ghost this parameter say how much must be enlarged * */ - template<typename CellL=CellList<dim,St,FAST,shift<dim,St> > > CellL getCellList(St r_cut, const Ghost<dim,St> & enlarge) + template<typename CellL = CellList<dim, St, FAST, shift<dim, St> > > CellL getCellList(St r_cut, const Ghost<dim, St> & enlarge) { CellL cell_list; // calculate the parameters of the cell list // get the processor bounding box - Box<dim,St> pbox = dec.getProcessorBounds(); + Box<dim, St> pbox = dec.getProcessorBounds(); // extend by the ghost pbox.enlarge(enlarge); - Box<dim,St> cell_box; + Box<dim, St> cell_box; size_t div[dim]; // Calculate the division array and the cell box - for (size_t i = 0 ; i < dim ; i++) + for (size_t i = 0; i < dim; i++) { - div[i] = static_cast<size_t>((pbox.getP2().get(i) - pbox.getP1().get(i))/ r_cut); + div[i] = static_cast<size_t>((pbox.getP2().get(i) - pbox.getP1().get(i)) / r_cut); div[i]++; - cell_box.setLow(i,0.0); - cell_box.setHigh(i,div[i]*r_cut); + cell_box.setLow(i, 0.0); + cell_box.setHigh(i, div[i] * r_cut); } - cell_list.Initialize(cell_box,div,pbox.getP1()); + cell_list.Initialize(cell_box, div, pbox.getP1()); // for each particle add the particle to the cell list @@ -1062,7 +1057,7 @@ public: { auto key = it.get(); - cell_list.add(this->template getPos<0>(key),key.getKey()); + cell_list.add(this->template getPos<0>(key), key.getKey()); ++it; } @@ -1085,46 +1080,46 @@ public: auto cl = getCellList(r_cut); // square of the cutting radius - St r_cut2 = r_cut*r_cut; + St r_cut2 = r_cut * r_cut; // iterate the particles - auto it_p = this->getDomainIterator(); - while (it_p.isNext()) - { - // key - vect_dist_key_dx key = it_p.get(); - - // Get the position of the particles - Point<dim,St> p = this->template getPos<0>(key); - - // Clear the neighborhood of the particle - verlet.get(key.getKey()).clear(); - - // Get the neighborhood of the particle - auto NN = cl.template getNNIterator<NO_CHECK>(cl.getCell(p)); - while(NN.isNext()) - { - auto nnp = NN.get(); - - // p != q - if (nnp == key.getKey()) - { - ++NN; - continue; - } - - Point<dim,St> q = this->template getPos<0>(nnp); - - if (p.distance2(q) < r_cut2) - verlet.get(key.getKey()).add(nnp); - - // Next particle - ++NN; - } - - // next particle - ++it_p; - } + auto it_p = this->getDomainIterator(); + while (it_p.isNext()) + { + // key + vect_dist_key_dx key = it_p.get(); + + // Get the position of the particles + Point<dim, St> p = this->template getPos<0>(key); + + // Clear the neighborhood of the particle + verlet.get(key.getKey()).clear(); + + // Get the neighborhood of the particle + auto NN = cl.template getNNIterator<NO_CHECK>(cl.getCell(p)); + while (NN.isNext()) + { + auto nnp = NN.get(); + + // p != q + if (nnp == key.getKey()) + { + ++NN; + continue; + } + + Point<dim, St> q = this->template getPos<0>(nnp); + + if (p.distance2(q) < r_cut2) + verlet.get(key.getKey()).add(nnp); + + // Next particle + ++NN; + } + + // next particle + ++it_p; + } } /*! \brief It return the number of particles contained by the previous processors @@ -1168,7 +1163,7 @@ public: */ vector_dist_iterator getIterator() { - return vector_dist_iterator(0,v_pos.size()); + return vector_dist_iterator(0, v_pos.size()); } /*! /brief Get a grid Iterator @@ -1178,30 +1173,35 @@ public: * \return a Grid iterator * */ - inline grid_dist_id_iterator_dec<Decomposition> getGridIterator(const size_t (& sz)[dim]) + inline grid_dist_id_iterator_dec<Decomposition> getGridIterator(const size_t (&sz)[dim]) { size_t sz_g[dim]; grid_key_dx<dim> start; grid_key_dx<dim> stop; - for (size_t i = 0 ; i < dim ; i++) + for (size_t i = 0; i < dim; i++) { - start.set_d(i,0); + start.set_d(i, 0); if (dec.isPeriodic(i) == PERIODIC) { sz_g[i] = sz[i]; - stop.set_d(i,sz_g[i]-2); + stop.set_d(i, sz_g[i] - 2); } else { sz_g[i] = sz[i]; - stop.set_d(i,sz_g[i]-1); + stop.set_d(i, sz_g[i] - 1); } } - grid_dist_id_iterator_dec<Decomposition> it_dec(dec,sz_g,start,stop); + grid_dist_id_iterator_dec<Decomposition> it_dec(dec, sz_g, start, stop); return it_dec; } + void calculateComputationCosts() + { + + } + /*! \brief Get the iterator across the position of the ghost particles * * \return an iterator @@ -1209,10 +1209,9 @@ public: */ vector_dist_iterator getGhostIterator() { - return vector_dist_iterator(g_m,v_pos.size()); + return vector_dist_iterator(g_m, v_pos.size()); } - /*! \brief Get an iterator that traverse the particles in the domain * * \return an iterator @@ -1220,7 +1219,7 @@ public: */ vector_dist_iterator getDomainIterator() { - return vector_dist_iterator(0,g_m); + return vector_dist_iterator(0, g_m); } /*! \brief Get the decomposition @@ -1228,11 +1227,37 @@ public: * \return * */ - const Decomposition & getDecomposition() + Decomposition & getDecomposition() { return dec; } + inline void addComputationCosts() + { + CellDecomposer_sm<dim, St> cdsm; + + cdsm.setDimensions(dec.getDomain(), dec.getGrid().getSize(), 0); + + auto it = getIterator(); + + for (size_t i = 0; i < dec.getNSubSubDomains(); i++) + { + dec.setSubSubDomainComputationCost(i, 0); + } + + while (it.isNext()) + { + size_t v = cdsm.getCell(this->template getPos<0>(it.get())); + + dec.addComputationCost(v, 1); + + //std::cout << v_cl.getProcessUnitID() << " -> " << v << " : " << dec.getDistribution().getSubSubDomainComputationCost(v) << "\n"; + + ++it; + } + + } + /*! \brief Output particle position and properties * * \param out output @@ -1244,12 +1269,31 @@ public: inline bool write(std::string out, int opt = NO_GHOST) { // CSVWriter test - CSVWriter<openfpm::vector<Point<dim,St>>, openfpm::vector<prop> > csv_writer; + CSVWriter<openfpm::vector<Point<dim, St>>, openfpm::vector<prop> > csv_writer; std::string output = std::to_string(out + std::to_string(v_cl.getProcessUnitID()) + std::to_string(".csv")); // Write the CSV - return csv_writer.write(output,v_pos,v_prp); + return csv_writer.write(output, v_pos, v_prp); + } + + /*! \brief Output particle position and properties + * + * \param out output + * \param opt NO_GHOST or WITH_GHOST + * + * \return if the file has been written correctly + * + */ + inline bool write(std::string out, size_t iteration, int opt = NO_GHOST) + { + // CSVWriter test + CSVWriter<openfpm::vector<Point<dim, St>>, openfpm::vector<prop> > csv_writer; + + std::string output = std::to_string(out + std::to_string(v_cl.getProcessUnitID()) + "_" + std::to_string(iteration) + std::to_string(".csv")); + + // Write the CSV + return csv_writer.write(output, v_pos, v_prp); } /* \brief It return the id of structure in the allocation list @@ -1281,5 +1325,4 @@ public: } }; - #endif /* VECTOR_HPP_ */ -- GitLab