From 078e6b8a4754efec6ea4d7d676d851c837602758 Mon Sep 17 00:00:00 2001 From: Yaroslav <beorn.90@gmail.com> Date: Tue, 14 Jun 2016 15:20:17 +0200 Subject: [PATCH] Reordering + iterator good performance --- images/Makefile.am | 8 +- openfpm_data | 2 +- openfpm_devices | 2 +- openfpm_io | 2 +- openfpm_numerics | 2 +- src/Makefile.am | 2 +- src/Vector/vector_dist.hpp | 132 ++++++++++++++++--- src/Vector/vector_dist_unit_test.hpp | 181 ++++++++++++++++++++++++++- src/main.cpp | 1 + 9 files changed, 300 insertions(+), 32 deletions(-) diff --git a/images/Makefile.am b/images/Makefile.am index 4634f73c3..41ca924f1 100644 --- a/images/Makefile.am +++ b/images/Makefile.am @@ -3,22 +3,22 @@ LINKLIBS = $(METIS_LIB) $(PARMETIS_LIB) $(PTHREAD_LIBS) $(OPT_LIBS) $(BOOST_LDF noinst_PROGRAMS = cart_dec metis_dec dom_box vector_dist cart_dec_SOURCES = CartDecomposition_gen_vtk.cpp ../src/lib/pdata.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster.cpp ../openfpm_devices/src/Memleak_check.cpp -cart_dec_CXXFLAGS = $(METIS_INCLUDE) $(PARMETIS_INCLUDE) $(CUDA_CFLAGS) $(INCLUDES_PATH) $(BOOST_CPPFLAGS) -I../src -Wno-unused-function -Wno-unused-local-typedefs +cart_dec_CXXFLAGS = $(METIS_INCLUDE) $(PARMETIS_INCLUDE) $(CUDA_CFLAGS) $(INCLUDES_PATH) $(BOOST_CPPFLAGS) -I../src -I/usr/local/libhilbert/include -Wno-unused-function -Wno-unused-local-typedefs cart_dec_CFLAGS = $(CUDA_CFLAGS) cart_dec_LDADD = $(LINKLIBS) -lparmetis -lmetis metis_dec_SOURCES = Metis_gen_vtk.cpp ../src/lib/pdata.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster.cpp ../openfpm_devices/src/Memleak_check.cpp -metis_dec_CXXFLAGS = $(METIS_INCLUDE) $(CUDA_CFLAGS) $(INCLUDES_PATH) $(BOOST_CPPFLAGS) -I../src -Wno-unused-function -Wno-unused-local-typedefs +metis_dec_CXXFLAGS = $(METIS_INCLUDE) $(CUDA_CFLAGS) $(INCLUDES_PATH) $(BOOST_CPPFLAGS) -I../src -I/usr/local/libhilbert/include -Wno-unused-function -Wno-unused-local-typedefs metis_dec_CFLAGS = $(CUDA_CFLAGS) metis_dec_LDADD = $(LINKLIBS) -lmetis dom_box_SOURCES = domain_gen_vtk.cpp ../src/lib/pdata.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster.cpp ../openfpm_devices/src/Memleak_check.cpp -dom_box_CXXFLAGS = $(METIS_INCLUDE) $(CUDA_CFLAGS) $(INCLUDES_PATH) $(BOOST_CPPFLAGS) -I../src -Wno-unused-function -Wno-unused-local-typedefs +dom_box_CXXFLAGS = $(METIS_INCLUDE) $(CUDA_CFLAGS) $(INCLUDES_PATH) $(BOOST_CPPFLAGS) -I../src -I/usr/local/libhilbert/include -Wno-unused-function -Wno-unused-local-typedefs dom_box_CFLAGS = $(CUDA_CFLAGS) dom_box_LDADD = $(LINKLIBS) vector_dist_SOURCES = vector.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_vcluster/src/VCluster.cpp ../openfpm_devices/src/memory/PtrMemory.cpp -vector_dist_CXXFLAGS = $(PARMETIS_INCLUDE) $(METIS_INCLUDE) $(CUDA_CFLAGS) $(INCLUDES_PATH) $(HDF5_CPPFLAGS) $(BOOST_CPPFLAGS) -I../src -Wno-unused-function -Wno-unused-local-typedefs +vector_dist_CXXFLAGS = $(PARMETIS_INCLUDE) $(METIS_INCLUDE) $(CUDA_CFLAGS) $(INCLUDES_PATH) $(HDF5_CPPFLAGS) $(BOOST_CPPFLAGS) -I../src -I/usr/local/libhilbert/include -Wno-unused-function -Wno-unused-local-typedefs vector_dist_CFLAGS = $(CUDA_CFLAGS) vector_dist_LDADD = $(LINKLIBS) -lparmetis -lmetis diff --git a/openfpm_data b/openfpm_data index 21d50e7f0..82bddfe80 160000 --- a/openfpm_data +++ b/openfpm_data @@ -1 +1 @@ -Subproject commit 21d50e7f0f796ceb5699dcf6939183572aba2ba0 +Subproject commit 82bddfe80a08243f00acae696b42920c439f2bb3 diff --git a/openfpm_devices b/openfpm_devices index 68d2e016d..d46372d3d 160000 --- a/openfpm_devices +++ b/openfpm_devices @@ -1 +1 @@ -Subproject commit 68d2e016d10ddcdf201bbc6cd4270ff0e4623d02 +Subproject commit d46372d3db114dd2dc95b3b03d7d9b906287d253 diff --git a/openfpm_io b/openfpm_io index d07c3c784..78f8cff03 160000 --- a/openfpm_io +++ b/openfpm_io @@ -1 +1 @@ -Subproject commit d07c3c7848e446437526d0bbda0843c18ab6a925 +Subproject commit 78f8cff03f684d2ddebed35b11c33bd0d9672a75 diff --git a/openfpm_numerics b/openfpm_numerics index 759fb6002..cd85b0454 160000 --- a/openfpm_numerics +++ b/openfpm_numerics @@ -1 +1 @@ -Subproject commit 759fb600260b785c9d282c10a9f1d6596767ec78 +Subproject commit cd85b04542371e1ed08507f52172b10e0310a35a diff --git a/src/Makefile.am b/src/Makefile.am index afc0ff2e3..148b3fd52 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -2,7 +2,7 @@ LINKLIBS = $(METIS_LIB) $(PTHREAD_LIBS) $(OPT_LIBS) $(BOOST_LDFLAGS) $(BOOST_IOS noinst_PROGRAMS = pdata pdata_SOURCES = main.cpp lib/pdata.cpp test_multiple_o.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster.cpp ../openfpm_devices/src/Memleak_check.cpp -pdata_CXXFLAGS = $(HDF5_CPPFLAGS) $(CUDA_CFLAGS) $(INCLUDES_PATH) $(PARMETIS_INCLUDE) $(METIS_INCLUDE) $(BOOST_CPPFLAGS) (H5PART_INCLUDE) -DPARALLEL_IO -I/usr/local/libhilbert/include -Wno-unused-local-typedefs +pdata_CXXFLAGS = $(HDF5_CPPFLAGS) $(CUDA_CFLAGS) $(INCLUDES_PATH) $(PARMETIS_INCLUDE) $(METIS_INCLUDE) $(BOOST_CPPFLAGS) $(H5PART_INCLUDE) -DPARALLEL_IO -I/usr/local/libhilbert/include -Wno-unused-local-typedefs pdata_CFLAGS = $(CUDA_CFLAGS) pdata_LDADD = $(LINKLIBS) -lparmetis -lmetis -L/usr/local/libhilbert/lib -lhilbert nobase_include_HEADERS = Decomposition/CartDecomposition.hpp Decomposition/common.hpp Decomposition/Decomposition.hpp Decomposition/ie_ghost.hpp \ diff --git a/src/Vector/vector_dist.hpp b/src/Vector/vector_dist.hpp index b340e276d..30b8cf298 100644 --- a/src/Vector/vector_dist.hpp +++ b/src/Vector/vector_dist.hpp @@ -17,6 +17,7 @@ #include "memory/PreAllocHeapMemory.hpp" #include "memory/PtrMemory.hpp" #include "NN/CellList/CellList.hpp" +#include "NN/CellList/CellListFast_hilb.hpp" #include "util/common.hpp" #include "util/object_util.hpp" #include "memory/ExtPreAlloc.hpp" @@ -660,6 +661,35 @@ private: } } + /*! \brief Calculate parameters for the cell list + * + * \param div Division array + * \param r_cut interation radius or size of each cell + * \param enlarge In case of padding particles the cell list must be enlarged, like a ghost. This parameter says how much must be enlarged + * + * \return the processor bounding box + */ + inline Box<dim, St> cl_param_calculate(size_t (&div)[dim], St r_cut, const Ghost<dim, St> & enlarge) + + { + // calculate the parameters of the cell list + + // get the processor bounding box + Box<dim, St> pbox = dec.getProcessorBounds(); + + // extend by the ghost + pbox.enlarge(enlarge); + + // Calculate the division array and the cell box + for (size_t i = 0; i < dim; i++) + { + div[i] = static_cast<size_t>((pbox.getP2().get(i) - pbox.getP1().get(i)) / r_cut); + div[i]++; + pbox.setHigh(i,pbox.getLow(i) + div[i]*r_cut); + } + return pbox; + } + public: /*! \brief Constructor @@ -1036,6 +1066,24 @@ public: return getCellList(r_cut, g); } + /*! \brief Construct an hilbert cell list starting from the stored particles + * + * \tparam CellL CellList type to construct + * + * \param r_cut interation radius, or size of each cell + * + * \return the Cell list + * + */ + template<typename CellL = CellList_hilb<dim, St, FAST, shift<dim, St> > > CellL getCellList_hilb(St r_cut) + { + // Get ghost and anlarge by 1% + Ghost<dim,St> g = dec.getGhost(); + g.magnify(1.01); + + return getCellList_hilb(r_cut, g); + } + /*! \brief Update a cell list using the stored particles * * \tparam CellL CellList type to construct @@ -1062,6 +1110,37 @@ public: } } + /*template<typename CellL> void celllist_initialize(CellL & cell_list, St r_cut, const Ghost<dim, St> & enlarge) + { + // calculate the parameters of the cell list + + // get the processor bounding box + Box<dim, St> pbox = dec.getProcessorBounds(); + // extend by the ghost + pbox.enlarge(enlarge); + + size_t div[dim]; + + // Calculate the division array and the cell box + for (size_t i = 0; i < dim; i++) + { + div[i] = static_cast<size_t>((pbox.getP2().get(i) - pbox.getP1().get(i)) / r_cut); + div[i]++; + pbox.setHigh(i,pbox.getLow(i) + div[i]*r_cut); + } + + if(is_Hilbert<CellL>::type::value == true) + { + std::cout << "Case 1: " << demangle(typeid(CellL).name()) << std::endl; + cell_list.Initialize(pbox, div, g_m); + } + else + { + std::cout << "Case 2: " << demangle(typeid(CellL).name()) << std::endl; + cell_list.Initialize(pbox, div); + } + }*/ + /*! \brief Construct a cell list starting from the stored particles * * It differ from the get getCellList for an additional parameter, in case the @@ -1079,23 +1158,43 @@ public: { CellL cell_list; - // calculate the parameters of the cell list + // Division array + size_t div[dim]; - // get the processor bounding box - Box<dim, St> pbox = dec.getProcessorBounds(); - // extend by the ghost - pbox.enlarge(enlarge); + // Processor bounding box + auto pbox = cl_param_calculate(div, r_cut, enlarge); + + cell_list.Initialize(pbox, div); + + updateCellList(cell_list); + + return cell_list; + } + /*! \brief Construct an hilbert cell list starting from the stored particles + * + * It differ from the get getCellList for an additional parameter, in case the + * domain + ghost is not big enough to contain additional padding particles, a Cell list + * with bigger space can be created + * (padding particles in general are particles added by the user out of the domains) + * + * \tparam CellL CellList type to construct + * + * \param r_cut interation radius, or size of each cell + * \param enlarge In case of padding particles the cell list must be enlarged, like a ghost this parameter say how much must be enlarged + * + */ + template<typename CellL = CellList_hilb<dim, St, FAST, shift<dim, St> > > CellL getCellList_hilb(St r_cut, const Ghost<dim, St> & enlarge) + { + CellL cell_list; + + // Division array size_t div[dim]; - // Calculate the division array and the cell box - for (size_t i = 0; i < dim; i++) - { - div[i] = static_cast<size_t>((pbox.getP2().get(i) - pbox.getP1().get(i)) / r_cut); - div[i]++; - } + // Processor bounding box + auto pbox = cl_param_calculate(div, r_cut, enlarge); - cell_list.Initialize(pbox, div); + cell_list.Initialize(pbox, div, g_m); updateCellList(cell_list); @@ -1202,20 +1301,15 @@ public: // extend by the ghost pbox.enlarge(enlarge); - Box<dim,St> cell_box; - size_t div[dim]; // Calculate the division array and the cell box for (size_t i = 0 ; i < dim ; i++) { div[i] = 1 << m; - - cell_box.setLow(i,0.0); - cell_box.setHigh(i,pbox.getP2().get(i) - pbox.getP1().get(i)); } - cell_list.Initialize(cell_box,div,pbox.getP1()); + cell_list.Initialize(pbox,div); // for each particle add the particle to the cell list @@ -1225,7 +1319,7 @@ public: { auto key = it.get(); - cell_list.add(this->template getPos<0>(key),key.getKey()); + cell_list.add(this->getPos(key),key.getKey()); ++it; } diff --git a/src/Vector/vector_dist_unit_test.hpp b/src/Vector/vector_dist_unit_test.hpp index 43e23886e..3f1007dcc 100644 --- a/src/Vector/vector_dist_unit_test.hpp +++ b/src/Vector/vector_dist_unit_test.hpp @@ -11,6 +11,7 @@ #include <random> #include "Vector/vector_dist.hpp" #include "data_type/aggregate.hpp" +#include "Vector/vector_dist_cl_hilb_performance_tests.hpp" /*! \brief Count the total number of particles * @@ -1161,11 +1162,11 @@ BOOST_AUTO_TEST_CASE( vector_dist_cell_verlet_test ) } } -BOOST_AUTO_TEST_CASE( vector_dist_hilbert_timer_test ) +BOOST_AUTO_TEST_CASE( vector_dist_hilbert_2d_benchmark_test ) { typedef Point<2,float> s; - Vcluster & v_cl = *global_v_cluster; + Vcluster & v_cl = create_vcluster(); // set the seed // create the random generator engine @@ -1200,8 +1201,8 @@ BOOST_AUTO_TEST_CASE( vector_dist_hilbert_timer_test ) { auto key = it.get(); - vd.template getPos<s::x>(key)[0] = ud(eg); - vd.template getPos<s::x>(key)[1] = ud(eg); + vd.getPos(key)[0] = ud(eg); + vd.getPos(key)[1] = ud(eg); ++it; } @@ -1236,6 +1237,178 @@ BOOST_AUTO_TEST_CASE( vector_dist_hilbert_timer_test ) } } +BOOST_AUTO_TEST_CASE( vector_dist_cl_hilb_forces_test ) +{ + ///////// INPUT DATA ////////// + + // Dimensionality of the space + const size_t dim = 2; + // Cut-off radiuses. Can be put different number of values + openfpm::vector<float> cl_r_cutoff {0.05}; + // The starting amount of particles (remember that this number is multiplied by number of processors you use for testing) + size_t cl_k_start = 100000; + // The lower threshold for number of particles + size_t cl_k_min = 10000; + // Ghost part of distributed vector + double ghost_part = 0.05; + + /////////////////////////////// + + //For different r_cut + for (size_t r = 0; r < cl_r_cutoff.size(); r++ ) + { + Vcluster & v_cl = create_vcluster(); + + //Cut-off radius + float r_cut = cl_r_cutoff.get(r); + + //Number of particles + size_t k = cl_k_start * v_cl.getProcessingUnits(); + + std::string str("Testing " + std::to_string(dim) + "D vector's forces k<="); + + vector_dist_test::print_test_v(str,k); + + std::cout << std::endl << "r_cut is " << r_cut << std::endl << std::endl; + + //For different number of particles + for (size_t k_int = k ; k_int >= cl_k_min ; k_int/=2 ) + { + BOOST_TEST_CHECKPOINT( "Testing " << dim << "D vector's forces k<=" << k_int ); + + std::cout << "k_int: " << k_int << std::endl; + + Box<dim,float> box; + + for (size_t i = 0; i < dim; i++) + { + box.setLow(i,0.0); + box.setHigh(i,1.0); + } + + // Boundary conditions + size_t bc[dim]; + + for (size_t i = 0; i < dim; i++) + bc[i] = PERIODIC; + + vector_dist<dim,float, aggregate<float[dim]>, CartDecomposition<dim,float> > vd(k_int,box,bc,Ghost<dim,float>(ghost_part)); + + vector_dist<dim,float, aggregate<float[dim]>, CartDecomposition<dim,float> > vd2(k_int,box,bc,Ghost<dim,float>(ghost_part)); + + // Initialize dist vectors + vd_initialize_double<dim>(vd, vd2, v_cl, k_int); + + vd.template ghost_get<0>(); + vd2.template ghost_get<0>(); + + //Get a cell list + + auto NN = vd.getCellList(r_cut); + + //Calculate forces + + auto t_forces = vector_dist_cl_perf_test::calculate_forces<dim>(NN,vd,r_cut); + + //Get a cell list hilb + + auto NN_hilb = vd2.getCellList_hilb(r_cut); + + //Calculate forces + auto t_forces_hilb = calculate_forces_hilb<dim>(NN_hilb,vd2,r_cut); + + auto it_v = vd.getIterator(); + + while (it_v.isNext()) + { + //key + vect_dist_key_dx key = it_v.get(); + + for (size_t i = 0; i < dim; i++) + { + auto a1 = vd.template getProp<0>(key)[i]; + auto a2 = vd2.template getProp<0>(key)[i]; + //Check that the forces are equal + BOOST_REQUIRE_EQUAL(a1,a2); + } + + ++it_v; + } + } + } +} + +BOOST_AUTO_TEST_CASE( stupid_test ) +{ + ///////// INPUT DATA ////////// + + // Dimensionality of the space + const size_t dim = 2; + // Cut-off radiuses. Can be put different number of values + openfpm::vector<float> cl_r_cutoff {0.05}; + // The starting amount of particles (remember that this number is multiplied by number of processors you use for testing) + size_t cl_k_start = 100000; + // The lower threshold for number of particles + size_t cl_k_min = 10000; + // Ghost part of distributed vector + double ghost_part = 0.05; + + /////////////////////////////// + + //For different r_cut + for (size_t r = 0; r < cl_r_cutoff.size(); r++ ) + { + Vcluster & v_cl = create_vcluster(); + + //Cut-off radius + float r_cut = cl_r_cutoff.get(r); + + //Number of particles + size_t k = cl_k_start * v_cl.getProcessingUnits(); + + std::string str("Testing " + std::to_string(dim) + "D vector's forces k<="); + + vector_dist_test::print_test_v(str,k); + + std::cout << std::endl << "r_cut is " << r_cut << std::endl << std::endl; + + //For different number of particles + for (size_t k_int = k ; k_int >= cl_k_min ; k_int/=2 ) + { + BOOST_TEST_CHECKPOINT( "Testing " << dim << "D vector's forces k<=" << k_int ); + + std::cout << "k_int: " << k_int << std::endl; + + Box<dim,float> box; + + for (size_t i = 0; i < dim; i++) + { + box.setLow(i,0.0); + box.setHigh(i,1.0); + } + + // Boundary conditions + size_t bc[dim]; + + for (size_t i = 0; i < dim; i++) + bc[i] = PERIODIC; + + vector_dist<dim,float, aggregate<float[dim]>, CartDecomposition<dim,float> > vd(k_int,box,bc,Ghost<dim,float>(ghost_part)); + + vector_dist<dim,float, aggregate<float[dim]>, CartDecomposition<dim,float> > vd2(k_int,box,bc,Ghost<dim,float>(ghost_part)); + + // Initialize dist vectors + vd_initialize_double<dim>(vd, vd2, v_cl, k_int); + + vd.template ghost_get<0>(); + vd2.template ghost_get<0>(); + + //Get a cell list + + auto NN = vd.getCellList(r_cut); + } + } +} BOOST_AUTO_TEST_SUITE_END() #endif /* VECTOR_DIST_UNIT_TEST_HPP_ */ diff --git a/src/main.cpp b/src/main.cpp index 4a178acf8..01ef322b5 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -26,6 +26,7 @@ #include "dec_optimizer_unit_test.hpp" #include "Grid/grid_dist_id_unit_test.hpp" #include "Vector/vector_dist_unit_test.hpp" +#include "Vector/vector_dist_HDF5_save_test.hpp" //#ifdef PERFORMANCE_TEST #include "pdata_performance.hpp" //#endif -- GitLab