diff --git a/configure.ac b/configure.ac index cd1ce4486715c633a72f81e09e6eedaf0e048418..08578e10db2a274fc0219f4bdb0afc21ad4c7c01 100755 --- a/configure.ac +++ b/configure.ac @@ -9,10 +9,7 @@ AC_CONFIG_SRCDIR([src/main.cpp]) AM_INIT_AUTOMAKE AC_CONFIG_HEADER([src/config/config.h]) m4_ifdef([ACX_PTHREAD],,[m4_include([m4/acx_pthread.m4])]) -m4_ifdef([AX_CHECK_CL],,[m4_include([m4/ax_opencl.m4])]) -m4_ifdef([AX_BOOST_BASE],,[m4_include([m4/ax_boost_base.m4])]) -m4_ifdef([AX_BOOST_PROGRAM_OPTIONS],,[m4_include([m4/ax_boost_program_options.m4])]) -m4_ifdef([AX_BOOST_THREAD],,[m4_include([m4/ax_boost_thread.m4])]) +m4_ifdef([AX_BOOST],,[m4_include([m4/ax_boost.m4])]) m4_ifdef([ACX_MPI],,[m4_include([m4/acx_mpi.m4])]) m4_ifdef([AX_OPENMP],,[m4_include([m4/ax_openmp.m4])]) m4_ifdef([AX_CUDA],,[m4_include([m4/ax_cuda.m4])]) @@ -53,7 +50,7 @@ if test x"$debuger" = x"yes"; then CXXFLAGS="$CXXFLAGS -g3 -Wall -O0 " NVCCFLAGS+="$NVCCFLAGS -g -O0 " else - CXXFLAGS="$CXXFLAGS -Wall -O3 -g3 -flto -funroll-loops " + CXXFLAGS="$CXXFLAGS -Wall -O3 -g3 -funroll-loops " NVCCFLAGS+="$NVCCFLAGS -O3 " fi @@ -80,6 +77,10 @@ fi INCLUDES_PATH+="-I. -Isrc/config/ -I../../metis_install/include -I../../OpenFPM_IO/src -I../../OpenFPM_data/src -I../../OpenFPM_devices/src -I../../OpenFPM_vcluster/src/" +##### CHECK FOR BOOST ############## + +AX_BOOST([1.52],[],[]) + ####### Checking for GPU support AX_CUDA @@ -116,10 +117,6 @@ AM_CONDITIONAL(BUILDCUDA, test ! x$NVCC = x"no") AC_DEFINE([HAVE_MPI],[],[MPI Enabled]) -##### - -AX_CHECK_COMPILER_FLAGS([-Wno-unused-but-set-variable],[CXXFLAGS="$CXXFLAGS -Wno-unused-but-set-variable"],[]) - AC_SUBST(NVCCFLAGS) AC_SUBST(INCLUDES_PATH) diff --git a/src/.deps/pdata-VCluster.Po b/src/.deps/pdata-VCluster.Po index 2a569d2695f35f2ee8ae5cde33efa626a9374bf9..259e45d854988f9b650d6b28625fb755665a0947 100644 --- a/src/.deps/pdata-VCluster.Po +++ b/src/.deps/pdata-VCluster.Po @@ -956,6 +956,7 @@ pdata-VCluster.o: ../../OpenFPM_vcluster/src/VCluster.cpp \ ../../OpenFPM_data/src/Grid/comb.hpp \ ../../OpenFPM_data/src/Grid/grid_key_expression.hpp \ ../../OpenFPM_data/src/Space/Shape/Point.hpp \ + ../../OpenFPM_data/src/Grid/Encap.hpp \ ../../OpenFPM_data/src/Grid/grid_key.hpp \ ../../OpenFPM_data/src/Grid/Encap.hpp \ ../../OpenFPM_data/src/memory_array.hpp \ @@ -3059,6 +3060,8 @@ pdata-VCluster.o: ../../OpenFPM_vcluster/src/VCluster.cpp \ ../../OpenFPM_data/src/Space/Shape/Point.hpp: +../../OpenFPM_data/src/Grid/Encap.hpp: + ../../OpenFPM_data/src/Grid/grid_key.hpp: ../../OpenFPM_data/src/Grid/Encap.hpp: diff --git a/src/.deps/pdata-main.Po b/src/.deps/pdata-main.Po index 59a49a224d42709fbf8963718c63e9dea987dbc4..2f0458c8c6aef30c75317261ccd794d027839cd8 100644 --- a/src/.deps/pdata-main.Po +++ b/src/.deps/pdata-main.Po @@ -921,6 +921,7 @@ pdata-main.o: main.cpp /usr/include/stdc-predef.h \ ../../OpenFPM_data/src/Grid/comb.hpp \ ../../OpenFPM_data/src/Grid/grid_key_expression.hpp \ ../../OpenFPM_data/src/Space/Shape/Point.hpp \ + ../../OpenFPM_data/src/Grid/Encap.hpp \ ../../OpenFPM_data/src/Grid/grid_key.hpp \ ../../OpenFPM_data/src/Grid/Encap.hpp \ ../../OpenFPM_data/src/memory_array.hpp \ @@ -1288,13 +1289,78 @@ pdata-main.o: main.cpp /usr/include/stdc-predef.h \ ../../OpenFPM_data/src/Space/SpaceBox.hpp \ ../../OpenFPM_data/src/Space/Shape/Point.hpp \ ../../OpenFPM_data/src/Space/Shape/Box.hpp \ - ../../OpenFPM_data/src/Grid/Encap.hpp Grid/grid_dist_id_iterator.hpp \ + ../../OpenFPM_data/src/Space/Ghost.hpp Grid/grid_dist_id_iterator.hpp \ Grid/grid_dist_key.hpp ../../OpenFPM_data/src/Point_test.hpp \ ../../OpenFPM_data/src/base_type.hpp Decomposition/CartDecomposition.hpp \ Decomposition/Decomposition.hpp ../../OpenFPM_data/src/global_const.hpp \ SubdomainGraphNodes.hpp metis_util.hpp \ ../../metis_install/include/metis.h /usr/include/inttypes.h \ - ../../OpenFPM_IO/src/VTKWriter.hpp dec_optimizer.hpp \ + ../../OpenFPM_IO/src/VTKWriter.hpp \ + ../../OpenFPM_IO/src/VTKWriter_graph.hpp \ + ../../OpenFPM_IO/src/VTKWriter_vector_box.hpp \ + /usr/include/boost/math/special_functions/pow.hpp \ + /usr/include/boost/math/policies/error_handling.hpp \ + /usr/include/boost/math/tools/precision.hpp \ + /usr/include/boost/format.hpp \ + /usr/include/boost/format/detail/compat_workarounds.hpp \ + /usr/include/boost/format/detail/config_macros.hpp \ + /usr/include/boost/format/detail/workarounds_gcc-2_95.hpp \ + /usr/include/boost/format/detail/workarounds_stlport.hpp \ + /usr/include/boost/format/format_fwd.hpp \ + /usr/include/boost/format/internals_fwd.hpp \ + /usr/include/boost/format/internals.hpp \ + /usr/include/boost/format/alt_sstream.hpp \ + /usr/include/boost/format/alt_sstream_impl.hpp \ + /usr/include/boost/format/format_class.hpp \ + /usr/include/boost/format/exceptions.hpp \ + /usr/include/boost/format/format_implementation.hpp \ + /usr/include/boost/format/group.hpp \ + /usr/include/boost/format/feed_args.hpp \ + /usr/include/boost/format/detail/msvc_disambiguater.hpp \ + /usr/include/boost/format/parsing.hpp \ + /usr/include/boost/format/free_funcs.hpp \ + /usr/include/boost/format/detail/unset_macros.hpp \ + ../../OpenFPM_IO/src/util.hpp \ + /usr/include/boost/iostreams/device/mapped_file.hpp \ + /usr/include/boost/iostreams/close.hpp \ + /usr/include/boost/iostreams/categories.hpp \ + /usr/include/boost/iostreams/flush.hpp \ + /usr/include/boost/iostreams/detail/dispatch.hpp \ + /usr/include/boost/iostreams/detail/select.hpp \ + /usr/include/boost/iostreams/traits.hpp \ + /usr/include/boost/iostreams/detail/bool_trait_def.hpp \ + /usr/include/boost/iostreams/detail/template_params.hpp \ + /usr/include/boost/preprocessor/control/expr_if.hpp \ + /usr/include/boost/iostreams/detail/config/wide_streams.hpp \ + /usr/include/boost/iostreams/detail/is_iterator_range.hpp \ + /usr/include/boost/iostreams/detail/config/disable_warnings.hpp \ + /usr/include/boost/iostreams/detail/config/enable_warnings.hpp \ + /usr/include/boost/iostreams/detail/select_by_size.hpp \ + /usr/include/boost/preprocessor/iteration/local.hpp \ + /usr/include/boost/preprocessor/iteration/detail/local.hpp \ + /usr/include/boost/iostreams/detail/wrap_unwrap.hpp \ + /usr/include/boost/iostreams/detail/enable_if_stream.hpp \ + /usr/include/boost/iostreams/traits_fwd.hpp \ + /usr/include/boost/iostreams/detail/streambuf.hpp \ + /usr/include/boost/iostreams/operations_fwd.hpp \ + /usr/include/boost/iostreams/detail/adapter/non_blocking_adapter.hpp \ + /usr/include/boost/iostreams/detail/ios.hpp \ + /usr/include/boost/iostreams/read.hpp \ + /usr/include/boost/iostreams/char_traits.hpp \ + /usr/include/boost/iostreams/detail/char_traits.hpp \ + /usr/include/boost/iostreams/seek.hpp \ + /usr/include/boost/iostreams/positioning.hpp \ + /usr/include/boost/iostreams/detail/config/codecvt.hpp \ + /usr/include/boost/iostreams/detail/config/fpos.hpp \ + /usr/include/boost/iostreams/write.hpp \ + /usr/include/boost/iostreams/concepts.hpp \ + /usr/include/boost/iostreams/detail/default_arg.hpp \ + /usr/include/boost/iostreams/detail/config/auto_link.hpp \ + /usr/include/boost/config/auto_link.hpp \ + /usr/include/boost/iostreams/detail/config/dyn_link.hpp \ + /usr/include/boost/iostreams/detail/path.hpp \ + /usr/include/boost/config/abi_prefix.hpp \ + /usr/include/boost/config/abi_suffix.hpp dec_optimizer.hpp \ ../../OpenFPM_data/src/NN/CellList/CellDecomposer.hpp \ /usr/include/c++/4.8.3/unordered_map \ /usr/include/c++/4.8.3/bits/hashtable.h \ @@ -1306,9 +1372,9 @@ pdata-main.o: main.cpp /usr/include/stdc-predef.h \ ../../OpenFPM_data/src/NN/CellList/CellNNIterator.hpp \ ../../OpenFPM_data/src/NN/CellList/CellListBal.hpp \ ../../OpenFPM_data/src/NN/CellList/CellListMem.hpp \ - ../../OpenFPM_data/src/util.hpp unit_test_init_cleanup.hpp \ - Decomposition/ORB_unit_test.hpp Decomposition/ORB.hpp \ - ../../OpenFPM_data/src/data_type/scalar.hpp \ + ../../OpenFPM_data/src/Space/Ghost.hpp ../../OpenFPM_IO/src/util.hpp \ + unit_test_init_cleanup.hpp Decomposition/ORB_unit_test.hpp \ + Decomposition/ORB.hpp ../../OpenFPM_data/src/data_type/scalar.hpp \ Graph/CartesianGraphFactory_unit_test.hpp \ Graph/CartesianGraphFactory.hpp metis_util_unit_test.hpp metis_util.hpp \ dec_optimizer_unit_test.hpp dec_optimizer.hpp \ @@ -3344,6 +3410,8 @@ Graph/CartesianGraphFactory.hpp: ../../OpenFPM_data/src/Space/Shape/Point.hpp: +../../OpenFPM_data/src/Grid/Encap.hpp: + ../../OpenFPM_data/src/Grid/grid_key.hpp: ../../OpenFPM_data/src/Grid/Encap.hpp: @@ -4164,7 +4232,7 @@ Grid/grid_dist_id.hpp: ../../OpenFPM_data/src/Space/Shape/Box.hpp: -../../OpenFPM_data/src/Grid/Encap.hpp: +../../OpenFPM_data/src/Space/Ghost.hpp: Grid/grid_dist_id_iterator.hpp: @@ -4190,6 +4258,136 @@ metis_util.hpp: ../../OpenFPM_IO/src/VTKWriter.hpp: +../../OpenFPM_IO/src/VTKWriter_graph.hpp: + +../../OpenFPM_IO/src/VTKWriter_vector_box.hpp: + +/usr/include/boost/math/special_functions/pow.hpp: + +/usr/include/boost/math/policies/error_handling.hpp: + +/usr/include/boost/math/tools/precision.hpp: + +/usr/include/boost/format.hpp: + +/usr/include/boost/format/detail/compat_workarounds.hpp: + +/usr/include/boost/format/detail/config_macros.hpp: + +/usr/include/boost/format/detail/workarounds_gcc-2_95.hpp: + +/usr/include/boost/format/detail/workarounds_stlport.hpp: + +/usr/include/boost/format/format_fwd.hpp: + +/usr/include/boost/format/internals_fwd.hpp: + +/usr/include/boost/format/internals.hpp: + +/usr/include/boost/format/alt_sstream.hpp: + +/usr/include/boost/format/alt_sstream_impl.hpp: + +/usr/include/boost/format/format_class.hpp: + +/usr/include/boost/format/exceptions.hpp: + +/usr/include/boost/format/format_implementation.hpp: + +/usr/include/boost/format/group.hpp: + +/usr/include/boost/format/feed_args.hpp: + +/usr/include/boost/format/detail/msvc_disambiguater.hpp: + +/usr/include/boost/format/parsing.hpp: + +/usr/include/boost/format/free_funcs.hpp: + +/usr/include/boost/format/detail/unset_macros.hpp: + +../../OpenFPM_IO/src/util.hpp: + +/usr/include/boost/iostreams/device/mapped_file.hpp: + +/usr/include/boost/iostreams/close.hpp: + +/usr/include/boost/iostreams/categories.hpp: + +/usr/include/boost/iostreams/flush.hpp: + +/usr/include/boost/iostreams/detail/dispatch.hpp: + +/usr/include/boost/iostreams/detail/select.hpp: + +/usr/include/boost/iostreams/traits.hpp: + +/usr/include/boost/iostreams/detail/bool_trait_def.hpp: + +/usr/include/boost/iostreams/detail/template_params.hpp: + +/usr/include/boost/preprocessor/control/expr_if.hpp: + +/usr/include/boost/iostreams/detail/config/wide_streams.hpp: + +/usr/include/boost/iostreams/detail/is_iterator_range.hpp: + +/usr/include/boost/iostreams/detail/config/disable_warnings.hpp: + +/usr/include/boost/iostreams/detail/config/enable_warnings.hpp: + +/usr/include/boost/iostreams/detail/select_by_size.hpp: + +/usr/include/boost/preprocessor/iteration/local.hpp: + +/usr/include/boost/preprocessor/iteration/detail/local.hpp: + +/usr/include/boost/iostreams/detail/wrap_unwrap.hpp: + +/usr/include/boost/iostreams/detail/enable_if_stream.hpp: + +/usr/include/boost/iostreams/traits_fwd.hpp: + +/usr/include/boost/iostreams/detail/streambuf.hpp: + +/usr/include/boost/iostreams/operations_fwd.hpp: + +/usr/include/boost/iostreams/detail/adapter/non_blocking_adapter.hpp: + +/usr/include/boost/iostreams/detail/ios.hpp: + +/usr/include/boost/iostreams/read.hpp: + +/usr/include/boost/iostreams/char_traits.hpp: + +/usr/include/boost/iostreams/detail/char_traits.hpp: + +/usr/include/boost/iostreams/seek.hpp: + +/usr/include/boost/iostreams/positioning.hpp: + +/usr/include/boost/iostreams/detail/config/codecvt.hpp: + +/usr/include/boost/iostreams/detail/config/fpos.hpp: + +/usr/include/boost/iostreams/write.hpp: + +/usr/include/boost/iostreams/concepts.hpp: + +/usr/include/boost/iostreams/detail/default_arg.hpp: + +/usr/include/boost/iostreams/detail/config/auto_link.hpp: + +/usr/include/boost/config/auto_link.hpp: + +/usr/include/boost/iostreams/detail/config/dyn_link.hpp: + +/usr/include/boost/iostreams/detail/path.hpp: + +/usr/include/boost/config/abi_prefix.hpp: + +/usr/include/boost/config/abi_suffix.hpp: + dec_optimizer.hpp: ../../OpenFPM_data/src/NN/CellList/CellDecomposer.hpp: @@ -4214,7 +4412,9 @@ dec_optimizer.hpp: ../../OpenFPM_data/src/NN/CellList/CellListMem.hpp: -../../OpenFPM_data/src/util.hpp: +../../OpenFPM_data/src/Space/Ghost.hpp: + +../../OpenFPM_IO/src/util.hpp: unit_test_init_cleanup.hpp: diff --git a/src/Decomposition/CartDecomposition.hpp b/src/Decomposition/CartDecomposition.hpp index ec34eb735c782d9442c6537a7269e3f22cf9f63e..8a1c01ed54c08eeea1dd635bc5fe7ae4700e809e 100644 --- a/src/Decomposition/CartDecomposition.hpp +++ b/src/Decomposition/CartDecomposition.hpp @@ -22,8 +22,7 @@ #include "NN/CellList/CellDecomposer.hpp" #include <unordered_map> #include "NN/CellList/CellList.hpp" - -template<unsigned int dim, typename T> using Ghost = Box<dim,T>; +#include "Space/Ghost.hpp" /** * \brief This class decompose a space into subspaces @@ -52,6 +51,15 @@ template<unsigned int dim, typename T> using Ghost = Box<dim,T>; template<unsigned int dim, typename T, template<typename> class device_l=openfpm::device_cpu, typename Memory=HeapMemory, template<unsigned int, typename> class Domain=Box, template<typename, typename, typename, typename, unsigned int> class data_s = openfpm::vector> class CartDecomposition { + struct N_box + { + // id of the processor in the nn_processor list + size_t id; + + // Near processor sub-domains + typename openfpm::vector<::Box<dim,T>> bx; + }; + struct Box_proc { // Intersection between the local sub-domain enlarged by the ghost and the contiguous processor @@ -89,21 +97,21 @@ private: SpaceBox<dim,T> sub_domain; //! the set of all local sub-domain as vector - data_s<SpaceBox<dim,T>,device_l<SpaceBox<dim,T>>,Memory,openfpm::vector_grow_policy_default, openfpm::vect_isel<SpaceBox<dim,T>>::value > sub_domains; + openfpm::vector<SpaceBox<dim,T>> sub_domains; //! List of near processors openfpm::vector<size_t> nn_processors; //! for each sub-domain, contain the list of the neighborhood processors //! and for each processor contain the boxes calculated from the intersection - //! of the sub-domain ghost with the near-by processor sub-domain () + //! of the sub-domain + ghost with the near-by processor sub-domain () openfpm::vector< openfpm::vector< Box_proc > > box_nn_processor_int; - //! for each box, contain the list of the neighborhood processors + //! for each sub-domain, contain the list of the neighborhood processors openfpm::vector<openfpm::vector<long unsigned int> > box_nn_processor; // for each near-processor store the sub-domain of the near processor - std::unordered_map<size_t,typename openfpm::vector<::Box<dim,T>> > nn_processor_subdomains; + std::unordered_map<size_t, N_box> nn_processor_subdomains; //! Structure that contain for each sub-domain box the processor id //! exist for efficient global communication @@ -144,22 +152,6 @@ private: //! and the near processor sub-domains CellList<dim,T,FAST> geo_cell; - /*! \brief Enlarge the ghost domain - * - * \param the box - * \param gh spacing of the margin to enlarge - * - */ - void enlarge(::Box<dim,T> & box, Ghost<dim,T> & gh) - { - typedef ::Box<dim,T> g; - - for (size_t j = 0 ; j < dim ; j++) - { - box.template getBase<g::p1>(j) = box.template getBase<g::p1>(j) - gh.template getBase<g::p1>(j); - box.template getBase<g::p2>(j) = box.template getBase<g::p2>(j) + gh.template getBase<g::p2>(j); - } - } /*! \brief Create internally the decomposition * @@ -170,24 +162,20 @@ private: { // Calculate the total number of box and and the spacing // on each direction - // Get the box containing the domain SpaceBox<dim,T> bs = domain.getBox(); for (unsigned int i = 0; i < dim ; i++) { // Calculate the spacing - spacing[i] = (bs.getHigh(i) - bs.getLow(i)) / gr.size(i); } // Here we use METIS - // Create a cartesian grid graph CartesianGraphFactory<dim,Graph_CSR<nm_part_v,nm_part_e>> g_factory_part; // Processor graph - Graph_CSR<nm_part_v,nm_part_e> gp = g_factory_part.template construct<NO_EDGE,T,dim-1>(gr.getSize(),domain); // Get the number of processing units @@ -200,16 +188,13 @@ private: Metis<Graph_CSR<nm_part_v,nm_part_e>> met(gp,Np); // decompose - met.decompose<nm_part_v::id>(); // fill the structure that store the processor id for each sub-domain - fine_s.resize(gr.size()); // Optimize the decomposition creating bigger spaces // And reducing Ghost over-stress - dec_optimizer<dim,Graph_CSR<nm_part_v,nm_part_e>> d_o(gp,gr.getSize()); // set of Boxes produced by the decomposition optimizer @@ -218,7 +203,8 @@ private: // optimize the decomposition d_o.template optimize<nm_part_v::sub_id,nm_part_v::id>(gp,p_id,loc_box,box_nn_processor); - // produce the list of the contiguous processor + // produce the list of the contiguous processor (nn_processors) and link nn_processor_subdomains to the + // processor list for (size_t i = 0 ; i < box_nn_processor.size() ; i++) { for (size_t j = 0 ; j < box_nn_processor.get(i).size() ; j++) @@ -232,18 +218,65 @@ private: auto last = std::unique(nn_processors.begin(), nn_processors.end()); nn_processors.erase(last, nn_processors.end()); + // produce the list of the contiguous processor (nn_processors) and link nn_processor_subdomains to the + // processor list + for (size_t i = 0 ; i < box_nn_processor.size() ; i++) + { + for (size_t j = 0 ; j < box_nn_processor.get(i).size() ; j++) + { + // processor id near to this sub-domain + size_t proc_id = box_nn_processor.get(i).get(j); + + size_t k = 0; + // search inside near processor list + for (k = 0 ; k < nn_processors.size() ; k++) + if (nn_processors.get(k) == proc_id) break; + + nn_processor_subdomains[proc_id].id = k; + } + } + + // Initialize ss_box and bbox + if (loc_box.size() >= 0) + { + SpaceBox<dim,T> sub_d(loc_box.get(0)); + sub_d.mul(spacing); + sub_d.expand(spacing); + + // add the sub-domain + sub_domains.add(sub_d); + + ss_box = sub_d; + bbox = sub_d; + } + // convert into sub-domain - for (size_t s = 0 ; s < loc_box.size() ; s++) + for (size_t s = 1 ; s < loc_box.size() ; s++) { SpaceBox<dim,T> sub_d(loc_box.get(s)); - // re-scale with spacing - sub_d.spacing(spacing); + // re-scale and add spacing (the end is the starting point of the next domain + spacing) + sub_d.mul(spacing); + sub_d.expand(spacing); // add the sub-domain sub_domains.add(sub_d); + + // Calculate the bound box + bbox.enclose(sub_d); + + // Create the smallest box contained in all sub-domain + ss_box.contained(sub_d); } + //++++++++++++++++++++++++++++++++++++++++ Debug output NN boxes + { + VTKWriter<openfpm::vector<::SpaceBox<dim,T>>,VECTOR_BOX> vtk_box1; + vtk_box1.add(sub_domains); + vtk_box1.write(std::string("loc_") + std::to_string(v_cl.getProcessUnitID()) + std::string(".vtk")); + } + //+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + // fill fine_s structure // fine_s structure contain the processor id for each sub-sub-domain // with sub-sub-domain we mean the sub-domain decomposition before @@ -314,21 +347,28 @@ private: * \param total message size to receive from all the processors * \param the total number of processor want to communicate with you * \param i processor id + * \param ri request id (it is an id that goes from 0 to total_p, and is unique + * every time message_alloc is called) * \param ptr a pointer to the vector_dist structure * * \return the pointer where to store the message * */ - static void * message_alloc(size_t msg_i ,size_t total_msg, size_t total_p, size_t i, void * ptr) + static void * message_alloc(size_t msg_i ,size_t total_msg, size_t total_p, size_t i, size_t ri, void * ptr) { // cast the pointer CartDecomposition<dim,T,device_l,Memory,Domain,data_s> * cd = static_cast< CartDecomposition<dim,T,device_l,Memory,Domain,data_s> *>(ptr); + if (cd->v_cl.getProcessUnitID() == 0) + { + std::cout << "Receiving from " << i << " msg size: " << msg_i << "\n"; + } + // Resize the memory - cd->nn_processor_subdomains[i].resize(msg_i); + cd->nn_processor_subdomains[i].bx.resize(msg_i); // Return the receive pointer - return cd->nn_processor_subdomains[i].getPointer(); + return cd->nn_processor_subdomains[i].bx.getPointer(); } public: @@ -341,6 +381,9 @@ public: CartDecomposition(CartDecomposition<dim,T,device_l,Memory,Domain,data_s> && cd) :sub_domain(cd.sub_domain),gr(cd.gr),cd(cd.cd),domain(cd.domain),v_cl(cd.v_cl) { + // Reset the box to zero + bbox.zero(); + //! Subspace selected //! access_key in case of grid is just the set of the index to access the grid id_sub.swap(cd.id_sub); @@ -362,7 +405,10 @@ public: */ CartDecomposition(Vcluster & v_cl) :id_sub(0),v_cl(v_cl) - {} + { + // Reset the box to zero + bbox.zero(); + } /*! \brief Cartesian decomposition constructor, it divide the space in boxes * @@ -372,10 +418,12 @@ public: * */ CartDecomposition(std::vector<size_t> dec, Domain<dim,T> domain, Vcluster & v_cl) - :id_sub(0),gr(dec),cd(domain,dec),domain(domain),v_cl(v_cl) + :id_sub(0),gr(dec),cd(domain,dec,0),domain(domain),v_cl(v_cl) { - // Create the decomposition + // Reset the box to zero + bbox.zero(); + // Create the decomposition CreateDecomposition(v_cl); } @@ -383,6 +431,72 @@ public: ~CartDecomposition() {} + openfpm::vector<size_t> ids; + + /*! \brief Given a position it return if the position belong to any neighborhood processor ghost + * + * \param p Particle position + * + * \param return the processor ids + * + */ + const openfpm::vector<size_t> ghost_processorID(Point<dim,T> & p) + { + ids.clear(); + + // Check with geo-cell if a particle is inside one Cell caotaining boxes + + auto cell_it = geo_cell.getCellIterator(p); + + // For each element in the cell, check if the point is inside the box + // if it is store the processor id + while (cell_it.isNext()) + { + size_t bid = cell_it.get(); + + if (vb_int.get(bid).box.isInside(p) == true) + { + ids.add(vb_int.get(bid).proc); + } + } + + return ids; + } + + /*! \brief Given a position it return if the position belong to any neighborhood processor ghost + * + * \param p Particle position + * + * \param return the processor ids + * + */ + template<typename Mem> inline const openfpm::vector<size_t> ghost_processorID(const encapc<1,Point<dim,T>,Mem> & p) + { + ids.clear(); + + // Check with geo-cell if a particle is inside one Cell containing boxes + + auto cell_it = geo_cell.getCellIterator(p); + + // For each element in the cell, check if the point is inside the box + // if it is, store the processor id + while (cell_it.isNext()) + { + size_t bid = cell_it.get(); + + if (vb_int.get(bid).box.isInside(p) == true) + { + ids.add(vb_int.get(bid).proc); + } + } + + return ids; + } + + // Internal boxes for this processor domain, indicated with B8_0 B9_0 ..... in the figure + // below as a linear vector + openfpm::vector<::Box<dim,T>> vb_int; + /*! It calculate the ghost boxes and internal boxes * * Example: Processor 10 calculate @@ -466,29 +580,24 @@ p1[0]<-----+ +----> p2[0] */ void calculateGhostBoxes(Ghost<dim,T> & ghost) { - typedef Ghost<dim,T> g; - #ifdef DEBUG // the ghost margins are assumed to be smaller // than one sub-domain for (size_t i = 0 ; i < dim ; i++) { - if (ghost.template getBase<g::p1>() >= domain.template getBase<g::p1>() / gr.size(i) ) + if (ghost.template getLow(i) >= domain.template getHigh(i) / gr.size(i) || ghost.template getHigh(i) >= domain.template getHigh(i) / gr.size(i)) { std::cerr << "Error: Ghost are bigger that one domain" << "\n"; } } #endif - // create a buffer with the sub-domain of this processors, the informations ( the box ) - // of sub-domain contiguous to the processor A are sent to the processor A and + // create a buffer with the sub-domains of this processor, the informations ( the boxes ) + // of the sub-domains contiguous to the processor A are sent to the processor A and // the information of the contiguous sub-domains in the near processors are received // - openfpm::vector< openfpm::vector< ::Box<dim,T>> > boxes(nn_processors.size()); - - // create the sub-domain buffer information to send - openfpm::vector< size_t > prc; + openfpm::vector< openfpm::vector< ::SpaceBox<dim,T>> > boxes(nn_processors.size()); for (size_t b = 0 ; b < box_nn_processor.size() ; b++) { @@ -496,24 +605,61 @@ p1[0]<-----+ +----> p2[0] { size_t prc = box_nn_processor.get(b).get(p); - boxes.add(sub_domains.get(b)); + // id of the processor in the processor list + // [value between 0 and the number of the near processors] + size_t id = nn_processor_subdomains[prc].id; + + boxes.get(id).add(sub_domains.get(b)); } } + //++++++++++++++++++++++++++++++++++++++++ Debug output NN boxes + { + for (size_t b = 0 ; b < boxes.size() ; b++) + { + VTKWriter<openfpm::vector<::SpaceBox<dim,T>>,VECTOR_BOX> vtk_box1; + vtk_box1.add(boxes.get(b)); + vtk_box1.write(std::string("Processor_") + std::to_string(v_cl.getProcessUnitID()) + "_" + std::to_string(nn_processors.get(b)) + std::string(".vtk")); + } + } + + //+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + // Intersect all the local sub-domains with the sub-domains of the contiguous processors // Get the sub-domains of the near processors - v_cl.sendrecvMultipleMessages(boxes,prc,CartDecomposition<dim,T,device_l,Memory,Domain,data_s>::message_alloc, this ,NEED_ALL_SIZE); + v_cl.sendrecvMultipleMessages(nn_processors,boxes,CartDecomposition<dim,T,device_l,Memory,Domain,data_s>::message_alloc, this ,NEED_ALL_SIZE); + + // ++++++++++++++++++++++++++++++++++++++++++ Check received boxes + + { + VTKWriter<openfpm::vector<::Box<dim,T>>,VECTOR_BOX> vtk_box1; + for (size_t p = 0 ; p < nn_processors.size() ; p++) + { + size_t prc = nn_processors.get(p); + + if (v_cl.getProcessUnitID() == 0) + std::cout << "Received from " << prc << " n_boxes: " << nn_processor_subdomains[prc].bx.size() << "\n"; - box_nn_processor_int.resize(box_nn_processor.size()); + vtk_box1.add(nn_processor_subdomains[prc].bx); + } + vtk_box1.write(std::string("rb_Processor_") + std::to_string(v_cl.getProcessUnitID()) + std::string(".vtk")); + } + + // +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + + box_nn_processor_int.resize(sub_domains.size()); // For each sub-domain for (size_t i = 0 ; i < sub_domains.size() ; i++) { - ::Box<dim,size_t> sub_with_ghost = sub_domains.get(i); + SpaceBox<dim,T> sub_with_ghost = sub_domains.get(i); // enlarge the sub-domain with the ghost - enlarge(sub_with_ghost,ghost); + sub_with_ghost.enlarge(ghost); + + // resize based on the number of contiguous processors + box_nn_processor_int.get(i).resize(box_nn_processor.get(i).size()); // For each processor contiguous to this sub-domain for (size_t j = 0 ; j < box_nn_processor.get(i).size() ; j++) @@ -522,7 +668,7 @@ p1[0]<-----+ +----> p2[0] size_t p_id = box_nn_processor.get(i).get(j); // get the set of sub-domains of the contiguous processor p_id - openfpm::vector< ::Box<dim,T> > & p_box = nn_processor_subdomains[p_id]; + openfpm::vector< ::Box<dim,T> > & p_box = nn_processor_subdomains[p_id].bx; // near processor sub-domain intersections openfpm::vector< ::Box<dim,T> > & p_box_int = box_nn_processor_int.get(i).get(j).bx; @@ -530,9 +676,9 @@ p1[0]<-----+ +----> p2[0] // for each near processor sub-domain intersect with the enlarged local sub-domain and store it for (size_t b = 0 ; b < p_box.size() ; b++) { - bool intersect; + ::Box<dim,T> bi; - ::Box<dim,T> bi = sub_with_ghost.Intersect(p_box.get(b),intersect); + bool intersect = sub_with_ghost.Intersect(::Box<dim,T>(p_box.get(b)),bi); if (intersect == true) p_box_int.add(bi); @@ -546,37 +692,39 @@ p1[0]<-----+ +----> p2[0] size_t p_id = box_nn_processor.get(i).get(j); // get the set of sub-domains of the contiguous processor p_id - openfpm::vector< ::Box<dim,T> > & nn_p_box = nn_processor_subdomains[p_id]; + openfpm::vector< ::Box<dim,T> > & nn_p_box = nn_processor_subdomains[p_id].bx; // near processor sub-domain intersections - openfpm::vector< ::Box<dim,T> > & p_box_int = box_nn_processor_int.get(i).get(j).bbx; + openfpm::vector< ::Box<dim,T> > & p_box_int = box_nn_processor_int.get(i).get(j).nbx; // For each near processor sub-domains enlarge and intersect with the local sub-domain and store the result for (size_t k = 0 ; k < nn_p_box.size() ; k++) { - // enlarge the local sub-domain + // enlarge the near-processor sub-domain ::Box<dim,T> n_sub = nn_p_box.get(k); + // local sub-domain + ::SpaceBox<dim,T> l_sub = sub_domains.get(i); + // Create a margin of ghost size around the near processor sub-domain - elarge(n_sub,ghost); + n_sub.enlarge(ghost); // Intersect with the local sub-domain - bool intersect; - ::Box<dim,T> b_int = n_sub.Intersect(n_sub,intersect); + ::Box<dim,T> b_int; + bool intersect = n_sub.Intersect(l_sub,b_int); // store if it intersect if (intersect == true) { - typedef ::Box<dim,T> b; - p_box_int.add(b_int); + vb_int.add(b_int); // update the geo_cell list // get the boxes this box span - grid_key<dim> p1 = geo_cell.getCell(b_int.template get<b::p1>() ); - grid_key<dim> p2 = geo_cell.getCell(b_int.template get<b::p2>() ); + const grid_key_dx<dim> p1 = geo_cell.getCellGrid(b_int.getP1()); + const grid_key_dx<dim> p2 = geo_cell.getCellGrid(b_int.getP2()); // Get the grid and the sub-iterator auto & gi = geo_cell.getGrid(); @@ -585,13 +733,31 @@ p1[0]<-----+ +----> p2[0] // add the box-id to the cell list while (g_sub.isNext()) { - auto & key = g_sub.get(); - geo_cell.add(gi.LinId(key),p_box_int.size()-1); + auto key = g_sub.get(); + geo_cell.addCell(gi.LinId(key),vb_int.size()-1); ++g_sub; } } } } + + + // ++++++++++++++++++++++++++++++++++++++++ Debug +++++++++++++++++++++++++++++ + + { + VTKWriter<openfpm::vector<::Box<dim,T>>,VECTOR_BOX> vtk_box1; + for (size_t p = 0 ; p < box_nn_processor_int.size() ; p++) + { + for (size_t s = 0 ; s < box_nn_processor_int.get(p).size() ; s++) + { + vtk_box1.add(box_nn_processor_int.get(p).get(s).nbx); + } + } + vtk_box1.write(std::string("inte_Processor_") + std::to_string(v_cl.getProcessUnitID()) + std::string(".vtk")); + } + + // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + } } @@ -606,6 +772,19 @@ p1[0]<-----+ +----> p2[0] return fine_s.get(cd.getCell(p)); } + // Smallest subdivision on each direction + ::Box<dim,T> ss_box; + + /*! \brief Get the smallest subdivision of the domain on each direction + * + * \return a box p1 is set to zero + * + */ + const ::Box<dim,T> & getSmallestSubdivision() + { + return ss_box; + } + /*! \brief processorID return in which processor the particle should go * * \return processorID @@ -647,7 +826,7 @@ p1[0]<-----+ +----> p2[0] gr.setDimensions(div_); domain = domain_; - cd.setDimensions(domain,div_); + cd.setDimensions(domain,div_,0); //! Create the decomposition @@ -888,6 +1067,68 @@ p1[0]<-----+ +----> p2[0] { return bbox; } + + /*! \brief if the point fall into the ghost of some near processor it return the processors id's in which + * it fall + * + * \param p Point + * \return iterator of the processors id's + * + */ + inline auto labelPoint(Point<dim,T> & p) -> decltype(geo_cell.getIterator(geo_cell.getCell(p))) + { + return geo_cell.getIterator(geo_cell.getCell(p)); + } + + /*! \brief if the point fall into the ghost of some near processor it return the processor number in which + * it fall + * + * \param p Point + * \return number of processors + * + */ + inline size_t labelPointNp(Point<dim,T> & p) + { + return geo_cell.getNelements(geo_cell.getCell(p)); + } + + /*! \brief It return the label point cell + * + * The labeling of a point p is regulated by a Cell list, give a point it give a cell-id + * + * \param p Point + * \return cell-id + * + */ + inline size_t labelPointCell(Point<dim,T> & p) + { + return geo_cell.getCell(p); + } + + /*! \brief Fill the ghost buffer + * + * \tparam one or more properties to get + * + */ +/* template<unsigned int ...i> void ghost_get() + { + // first check if a local particle must be sent to another processor + for (size_t i = 0 ; i < ; i++) + { + + } + }*/ + + /*! \brief Fill the ghost buffer + * + * \tparam one or more properties to get + * + */ +/* template<unsigned int ...i> void ghost_put() + { + + }*/ + }; diff --git a/src/Decomposition/CartDecomposition_unit_test.hpp b/src/Decomposition/CartDecomposition_unit_test.hpp index 668adc11eb02970ad576a72d473e5614a1601805..0fd1c9f388f22f7691d044d675e09939313ce1f5 100644 --- a/src/Decomposition/CartDecomposition_unit_test.hpp +++ b/src/Decomposition/CartDecomposition_unit_test.hpp @@ -33,6 +33,13 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_test_use) // Decompose dec.setParameters(div,box); + + Ghost<3,float> g(0.01); + + // create a ghost border + dec.calculateGhostBoxes(g); + + // } BOOST_AUTO_TEST_SUITE_END() diff --git a/src/Graph/CartesianGraphFactory.hpp b/src/Graph/CartesianGraphFactory.hpp index f667d06aad14a68d5c3c276c073c7cabd82a5def..5d2a72b631348d1fbd9c05d987be332276b54ef2 100644 --- a/src/Graph/CartesianGraphFactory.hpp +++ b/src/Graph/CartesianGraphFactory.hpp @@ -353,14 +353,15 @@ public: * * Each vertex is a subspace (Hyper-cube) of dimension dim, each vertex is * connected with an edge if two vertex (Hyper-cube) share a element of dimension grater than - * dim_c + * dim_c. One property can be used to store the contact size or the d-dimensional + * surface in common between two connected hyper-cube. * * \param sz Vector that store the size of the grid on each dimension * \param dom Box enclosing the physical domain * - * \tparam se Indicate which properties fill with the element weight. The - * element weight is the point, line , surface, d dimensional object - * in contact (in common between two hyper-cube). NO_EDGE indicate + * \tparam se Indicate which properties fill with the contact size. The + * contact size is the point, line , surface, d-dimensional object size + * in contact (in common) between two hyper-cube. NO_EDGE indicate * no property will store this information * \tparam T type of the domain like (int real complex ... ) * \tparam dim_c Connectivity dimension diff --git a/src/Grid/grid_dist_id.hpp b/src/Grid/grid_dist_id.hpp index 899a268e4c1c795ec866106263f3ea1f40e8f9b0..5a786ab0e2d07e635c4239683385ebbb9621d235 100644 --- a/src/Grid/grid_dist_id.hpp +++ b/src/Grid/grid_dist_id.hpp @@ -54,7 +54,7 @@ class grid_dist_id /*! \brief Get the grid size * - * Get the grid size, given a domain, the resolution on it and another spaceBox + * Given a domain, the resolution of the grid on it and another spaceBox contained in the domain * it give the size on all directions of the local grid * * \param sp SpaceBox enclosing the local grid @@ -92,7 +92,7 @@ class grid_dist_id for (size_t d = 0 ; d < dim ; d++) { // push the size of the local grid - v_size[d] = sp.getHigh(d) - sp.getLow(d) + 1; + v_size[d] = sp.getHigh(d) - sp.getLow(d); } } @@ -175,11 +175,9 @@ public: dec.hyperCube(); // Get the number of local grid needed - size_t n_grid = dec.getNLocalHyperCube(); // create local grids for each hyper-cube - loc_grid = v_cl.allocate<device_grid>(n_grid); // Size of the grid on each dimension @@ -322,7 +320,7 @@ class grid_dist_id<1,T,Decomposition,Memory,device_grid> for (size_t d = 0 ; d < 1 ; d++) { // push the size of the local grid - v_size[d] = sp.getHigh(d) - sp.getLow(d) + 1; + v_size[d] = sp.getHigh(d) - sp.getLow(d); } } diff --git a/src/Grid/grid_dist_id_unit_test.hpp b/src/Grid/grid_dist_id_unit_test.hpp index 43c7293c0b0dcb12b750331cb00ed2d721013388..7718e1d0b149dee76da8ad76b610be13f2fc6263 100644 --- a/src/Grid/grid_dist_id_unit_test.hpp +++ b/src/Grid/grid_dist_id_unit_test.hpp @@ -58,6 +58,16 @@ BOOST_AUTO_TEST_CASE( grid_dist_id_iterator_test_use) ++dom; } + // Get the virtual cluster machine + Vcluster & vcl = g_dist.getVC(); + + // reduce + vcl.reduce(count); + vcl.execute(); + + // Check + BOOST_REQUIRE_EQUAL(count,1024*1024); + size_t count_check = 0; dom = g_dist.getDomainIterator(); @@ -71,16 +81,6 @@ BOOST_AUTO_TEST_CASE( grid_dist_id_iterator_test_use) ++dom; } - // Get the virtual cluster machine - Vcluster & vcl = g_dist.getVC(); - - // reduce - vcl.reduce(count); - vcl.execute(); - - // Check - BOOST_REQUIRE_EQUAL(count,1024*1024); - /* auto g_it = g_dist.getIteratorBulk(); auto g_it_halo = g_dist.getHalo(); diff --git a/src/Makefile b/src/Makefile index 95e81349f24d6be0cf4a60fbca5c9bba46748a56..68aa50d3e4d61da92d2f3f020abc0f6e30059c11 100644 --- a/src/Makefile +++ b/src/Makefile @@ -84,13 +84,10 @@ subdir = src DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \ $(top_srcdir)/depcomp ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 -am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_compiler_flags.m4 \ - $(top_srcdir)/m4/acx_pthread.m4 $(top_srcdir)/m4/ax_opencl.m4 \ - $(top_srcdir)/m4/ax_boost_base.m4 \ - $(top_srcdir)/m4/ax_boost_program_options.m4 \ - $(top_srcdir)/m4/ax_boost_thread.m4 \ - $(top_srcdir)/m4/acx_mpi.m4 $(top_srcdir)/m4/ax_openmp.m4 \ - $(top_srcdir)/m4/ax_cuda.m4 $(top_srcdir)/configure.ac +am__aclocal_m4_deps = $(top_srcdir)/m4/acx_pthread.m4 \ + $(top_srcdir)/m4/ax_boost.m4 $(top_srcdir)/m4/acx_mpi.m4 \ + $(top_srcdir)/m4/ax_openmp.m4 $(top_srcdir)/m4/ax_cuda.m4 \ + $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) mkinstalldirs = $(install_sh) -d @@ -104,7 +101,8 @@ am_pdata_OBJECTS = pdata-main.$(OBJEXT) pdata-HeapMemory.$(OBJEXT) \ pdata-Memleak_check.$(OBJEXT) pdata_OBJECTS = $(am_pdata_OBJECTS) am__DEPENDENCIES_1 = -am__DEPENDENCIES_2 = $(am__DEPENDENCIES_1) +am__DEPENDENCIES_2 = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) pdata_DEPENDENCIES = $(am__DEPENDENCIES_2) pdata_LINK = $(CXXLD) $(pdata_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \ $(LDFLAGS) -o $@ @@ -175,6 +173,18 @@ AUTOCONF = ${SHELL} /home/i-bird/Desktop/MOSAIC/OpenFPM_project/OpenFPM_pdata/mi AUTOHEADER = ${SHELL} /home/i-bird/Desktop/MOSAIC/OpenFPM_project/OpenFPM_pdata/missing autoheader AUTOMAKE = ${SHELL} /home/i-bird/Desktop/MOSAIC/OpenFPM_project/OpenFPM_pdata/missing automake-1.13 AWK = gawk +BOOST_CPPFLAGS = -pthread -I/usr/include +BOOST_DATE_TIME_LIB = -lboost_date_time +BOOST_FILESYSTEM_LIB = -lboost_filesystem +BOOST_IOSTREAMS_LIB = -lboost_iostreams +BOOST_LDFLAGS = -L/usr/lib +BOOST_PROGRAM_OPTIONS_LIB = -lboost_program_options +BOOST_REGEX_LIB = -lboost_regex +BOOST_SERIALIZATION_LIB = -lboost_serialization +BOOST_SIGNALS_LIB = -lboost_signals +BOOST_THREAD_LIB = -lboost_thread +BOOST_UNIT_TEST_FRAMEWORK_LIB = -lboost_unit_test_framework +BOOST_WSERIALIZATION_LIB = -lboost_wserialization CC = gcc CCDEPMODE = depmode=gcc3 CFLAGS = -g -O2 @@ -183,7 +193,7 @@ CUDA_CFLAGS = -I -I -I/usr/local/cuda-5.5/include CUDA_LIBS = -L -L -L/usr/local/cuda-5.5/lib64 -lcuda -lcudart CXX = mpic++ CXXDEPMODE = depmode=gcc3 -CXXFLAGS = --std=c++11 -march=native -mtune=native -Wall -O3 -g3 -flto -funroll-loops -Wno-unused-but-set-variable +CXXFLAGS = --std=c++11 -march=native -mtune=native -g3 -Wall -O0 CYGPATH_W = echo DEFS = -DHAVE_CONFIG_H DEPDIR = .deps @@ -204,7 +214,7 @@ LTLIBOBJS = MAKEINFO = ${SHELL} /home/i-bird/Desktop/MOSAIC/OpenFPM_project/OpenFPM_pdata/missing makeinfo MKDIR_P = /usr/bin/mkdir -p NVCC = /usr/local/cuda-5.5/bin/nvcc -NVCCFLAGS = -O3 +NVCCFLAGS = -g -O0 NVCC_EXIST = yes OBJEXT = o PACKAGE = full-package-name @@ -274,7 +284,7 @@ target_vendor = unknown top_build_prefix = ../ top_builddir = .. top_srcdir = .. -LINKLIBS = $(PTHREAD_LIBS) $(OPT_LIBS) $(BOOST_LDFLAGS) $(BOOST_PROGRAM_OPTIONS_LIB) $(CUDA_LIBS) $(BOOST_THREAD_LIB) +LINKLIBS = $(PTHREAD_LIBS) $(OPT_LIBS) $(BOOST_LDFLAGS) $(BOOST_IOSTREAMS_LIB) $(CUDA_LIBS) pdata_SOURCES = main.cpp ../../OpenFPM_devices/src/memory/HeapMemory.cpp ../../OpenFPM_devices/src/memory/PtrMemory.cpp ../../OpenFPM_vcluster/src/VCluster.cpp ../../OpenFPM_data/src/Memleak_check.cpp pdata_CXXFLAGS = $(CUDA_CFLAGS) $(INCLUDES_PATH) pdata_CFLAGS = $(CUDA_CFLAGS) diff --git a/src/Makefile.am b/src/Makefile.am old mode 100755 new mode 100644 index 0b3679871bb8bbab62d7561681df3420880fbd41..a0fa9da119c9658bed3d914fb66f08a95042c449 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,5 +1,5 @@ -LINKLIBS = $(PTHREAD_LIBS) $(OPT_LIBS) $(BOOST_LDFLAGS) $(BOOST_PROGRAM_OPTIONS_LIB) $(CUDA_LIBS) $(BOOST_THREAD_LIB) +LINKLIBS = $(PTHREAD_LIBS) $(OPT_LIBS) $(BOOST_LDFLAGS) $(BOOST_IOSTREAMS_LIB) $(CUDA_LIBS) bin_PROGRAMS = pdata pdata_SOURCES = main.cpp ../../OpenFPM_devices/src/memory/HeapMemory.cpp ../../OpenFPM_devices/src/memory/PtrMemory.cpp ../../OpenFPM_vcluster/src/VCluster.cpp ../../OpenFPM_data/src/Memleak_check.cpp diff --git a/src/Makefile.in b/src/Makefile.in index a8cfb9ca289a1ac33e6705c0093608b1307c7f27..0d7175e1ee0d7415f255c6d8e5500e05f5acf868 100644 --- a/src/Makefile.in +++ b/src/Makefile.in @@ -84,13 +84,10 @@ subdir = src DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \ $(top_srcdir)/depcomp ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 -am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_compiler_flags.m4 \ - $(top_srcdir)/m4/acx_pthread.m4 $(top_srcdir)/m4/ax_opencl.m4 \ - $(top_srcdir)/m4/ax_boost_base.m4 \ - $(top_srcdir)/m4/ax_boost_program_options.m4 \ - $(top_srcdir)/m4/ax_boost_thread.m4 \ - $(top_srcdir)/m4/acx_mpi.m4 $(top_srcdir)/m4/ax_openmp.m4 \ - $(top_srcdir)/m4/ax_cuda.m4 $(top_srcdir)/configure.ac +am__aclocal_m4_deps = $(top_srcdir)/m4/acx_pthread.m4 \ + $(top_srcdir)/m4/ax_boost.m4 $(top_srcdir)/m4/acx_mpi.m4 \ + $(top_srcdir)/m4/ax_openmp.m4 $(top_srcdir)/m4/ax_cuda.m4 \ + $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) mkinstalldirs = $(install_sh) -d @@ -104,7 +101,8 @@ am_pdata_OBJECTS = pdata-main.$(OBJEXT) pdata-HeapMemory.$(OBJEXT) \ pdata-Memleak_check.$(OBJEXT) pdata_OBJECTS = $(am_pdata_OBJECTS) am__DEPENDENCIES_1 = -am__DEPENDENCIES_2 = $(am__DEPENDENCIES_1) +am__DEPENDENCIES_2 = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) pdata_DEPENDENCIES = $(am__DEPENDENCIES_2) pdata_LINK = $(CXXLD) $(pdata_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \ $(LDFLAGS) -o $@ @@ -175,6 +173,18 @@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ +BOOST_CPPFLAGS = @BOOST_CPPFLAGS@ +BOOST_DATE_TIME_LIB = @BOOST_DATE_TIME_LIB@ +BOOST_FILESYSTEM_LIB = @BOOST_FILESYSTEM_LIB@ +BOOST_IOSTREAMS_LIB = @BOOST_IOSTREAMS_LIB@ +BOOST_LDFLAGS = @BOOST_LDFLAGS@ +BOOST_PROGRAM_OPTIONS_LIB = @BOOST_PROGRAM_OPTIONS_LIB@ +BOOST_REGEX_LIB = @BOOST_REGEX_LIB@ +BOOST_SERIALIZATION_LIB = @BOOST_SERIALIZATION_LIB@ +BOOST_SIGNALS_LIB = @BOOST_SIGNALS_LIB@ +BOOST_THREAD_LIB = @BOOST_THREAD_LIB@ +BOOST_UNIT_TEST_FRAMEWORK_LIB = @BOOST_UNIT_TEST_FRAMEWORK_LIB@ +BOOST_WSERIALIZATION_LIB = @BOOST_WSERIALIZATION_LIB@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ @@ -274,7 +284,7 @@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ -LINKLIBS = $(PTHREAD_LIBS) $(OPT_LIBS) $(BOOST_LDFLAGS) $(BOOST_PROGRAM_OPTIONS_LIB) $(CUDA_LIBS) $(BOOST_THREAD_LIB) +LINKLIBS = $(PTHREAD_LIBS) $(OPT_LIBS) $(BOOST_LDFLAGS) $(BOOST_IOSTREAMS_LIB) $(CUDA_LIBS) pdata_SOURCES = main.cpp ../../OpenFPM_devices/src/memory/HeapMemory.cpp ../../OpenFPM_devices/src/memory/PtrMemory.cpp ../../OpenFPM_vcluster/src/VCluster.cpp ../../OpenFPM_data/src/Memleak_check.cpp pdata_CXXFLAGS = $(CUDA_CFLAGS) $(INCLUDES_PATH) pdata_CFLAGS = $(CUDA_CFLAGS) diff --git a/src/Vector/vector_dist.hpp b/src/Vector/vector_dist.hpp index 4fa9f1bdb7f3209129bf423f4623c79c90abeaf2..d990938a08da8239ddeff4b9bb17f78452c32743 100644 --- a/src/Vector/vector_dist.hpp +++ b/src/Vector/vector_dist.hpp @@ -17,6 +17,8 @@ #include "memory/PtrMemory.hpp" #include "NN/CellList/CellList.hpp" #include "common.hpp" +#include "util/vector_creator.hpp" +#include "memory/ExtPreAlloc.hpp" #define NO_ID false #define ID true @@ -24,6 +26,25 @@ #define GET 1 #define PUT 2 +#define INTERNAL 0 +#define NO_POSITION 1 + +/*! \brief This is a container for the sending buffers + * + * It is used in ghost_get to create a particular object with the properties selected + * + * \tparam Is a boost::fusion::vector with the properties selected + * + * + */ +template<typename v> +class buff_com +{ + typedef v type; + + const int max_prop = boost::fusion::result_of::size<v>::value; +}; + /*! \brief Distributed vector * */ @@ -91,19 +112,16 @@ public: // Create the sub-domains dec.setParameters(div,box); - // Get the bounding box containing the processor domain +- one sub-domain spacing - ::Box<point::dims,typename point::coord_type> & bbound = dec.getProcessorBounds(); + // Get the bounding box containing the processor domain + const ::Box<point::dims,typename point::coord_type> & bbound = dec.getProcessorBounds(); - // the smallest sub-division of the domain on each dimension - typename point::coord_type smallest_doms[point::dims]; + const ::Box<point::dims,typename point::coord_type> & smallest_unit = dec.getSmallestSubdivision(); // convert spacing divisions size_t n_g[point::dims]; for (size_t i = 0 ; i < point::dims ; i++) - { - n_g[i] = box.template getBase<b::p2>(i) / smallest_doms[i]; - } + n_g[i] = (bbound.getHigh(i) - bbound.getLow(i)) / smallest_unit.getHigh(i); point p; p.zero(); @@ -112,6 +130,16 @@ public: geo_cell.Initialize(box,n_g,p,8); } + /*! \brief return the local size of the vector + * + * \return local size + * + */ + size_t size_local() + { + v_pos.get(0).size(); + } + /*! \brief Get position of an object * * \param vec_key vector element @@ -144,6 +172,16 @@ public: openfpm::vector<prop,openfpm::device_cpu<prop>,PreAllocHeapMemory<2>,openfpm::grow_policy_identity> prp; }; + /*! \brief set the ghost + * + * \param g ghost + * + */ + void setGhost(Ghost<point::dims,typename point::coord_type> & g) + { + dec.calculateGhostBoxes(g); + } + /*! \brief It communicate the particle to the respective processor * */ @@ -256,12 +294,10 @@ public: prc_cnt.get(lbl)++; // Add processors and add size - ++it; } // Create the set of pointers - openfpm::vector<void *> ptr(prc_r.size()); for (size_t i = 0 ; i < prc_r.size() ; i++) { @@ -269,8 +305,7 @@ public: } // convert the particle number to buffer size - - for (size_t i = 0 ; i < v_cl.getProcessingUnits() ; i++) + for (size_t i = 0 ; i < prc_sz_r.size() ; i++) { prc_sz_r.get(i) = prc_sz_r.get(i)*(sizeof(prop) + sizeof(point)); } @@ -328,72 +363,133 @@ public: v_prp.get(0).remove(opart,o_p_id); } - // ghost particles sending buffer - openfpm::vector<HeapMemory> ghost_send_hp; + // outgoing particles-id + openfpm::vector<openfpm::vector<size_t>> opart; // Each entry contain the size of the ghost sending buffer - std::unordered_map<size_t,size_t> ghost_prc_sz; + openfpm::vector<size_t> ghost_prc_sz; // ghost particle labels openfpm::vector<size_t> ghost_lbl_p; + // Memory for the ghost sending buffer + Memory g_prp_mem; + + // Memory for the ghost + Memory g_pos_mem; + /*! \brief It synchronize getting the ghost particles * * \prp Properties to get * \opt options - * NO_RELABEL: If the particles does not move avoid to relabel + * NO_RELABEL: If the particles does not move avoid to relabel and send particle position * */ - template<unsigned int N> void ghost_get(const size_t prp[N], size_t opt) + template<int... prp> void ghost_get(size_t opt = NONE) { - // outgoing particles-id - openfpm::vector<size_t> opart; - + // Create the ghost buffer ghost_prc_sz.clear(); + ghost_lbl_p.clear(); + ghost_lbl_p.resize(v_pos.get(INTERNAL).size()); // Label the internal (assigned) particles - auto it = v_pos.get(0).getIterator(); + auto it = v_pos.get(INTERNAL).getIterator(); // Label all the particles with the processor id, where they should go while (it.isNext()) { auto key = it.get(); - size_t p_id = dec.ghost_processorID(v_pos.get(0).get(key)); + size_t p_id = dec.ghost_processorID(v_pos.get(INTERNAL).get(key)); ghost_lbl_p.get(key) = p_id; // It has to communicate if (p_id != v_cl.getProcessUnitID()) { + size_t id = dec.ProcToID(p_id); + // add particle to communicate - ghost_prc_sz[p_id]++; + ghost_prc_sz.get(id)++; - opart.add(key); + + opart.get(id).add(key); } ++it; } - // Create memory allocator for the send buffers - size_t i = 0; - ghost_send_hp.resize(ghost_prc_sz.size()); + // Total number of elements + size_t n_ele = 0; + + // sequence of pre-allocation pattern + openfpm::vector<size_t> pap; + + // Calculate the total size required for the sending buffer + for ( size_t i = 0 ; i < ghost_prc_sz.size() ; i++ ) + { + pap.add(ghost_prc_sz.get(i)*sizeof(vector_creator<Point_test<float>::type,prp...>::type)); + n_ele += ghost_prc_sz.get(i); + } + + // resize the property buffer memory + g_prp_mem.resize(n_ele * sizeof(typename vector_creator<Point_test<float>::type,prp...>::type)); + // resize the position buffer memory + if (opt != NO_POSITION) g_pos_mem.resize(n_ele * sizeof(point)); + + // Create an object of preallocated memory + ExtPreAlloc<Memory> prAlloc(pap,g_prp_mem); - for ( auto it = ghost_prc_sz.begin(); it != ghost_prc_sz.end(); ++it ) + // definition of a property object based on the property selected + typedef typename vector_creator<Point_test<float>::type,prp...>::type property_object; + + // definition of the send vector for each processor + typedef openfpm::vector<property_object,openfpm::device_cpu<prop>,ExtPreAlloc<Memory>> send_vector; + + // create a vector of send vector (ExtPreAlloc warrant that all the created vector are contiguous) + openfpm::vector<send_vector> g_send; + + // create a number of send buffer equal to the near processors + g_send.resize(ghost_prc_sz.size()); + for (size_t i = 0 ; i < g_send.size() ; i++) + { + // set the preallocated memory to ensure contiguity + g_send.get(i).setMemory(prAlloc); + + // resize the sending vector (No allocation is produced) + g_send.get(i).resize(ghost_prc_sz.get(i)); + } + + // Fill the send buffer + for ( size_t i = 0 ; i < opart.size() ; i++ ) { - // we are sending only some properties, so calculate the size of the sending buffer - size_t element_size = ele_size<N,typename prop::type>(prp); + for (size_t j = 0 ; j < opart.get(i).size() ; j++) + { + g_send.get(i).get(j) = v_prp.get(INTERNAL).get(opart.get(i).get(j)); + } + } + + // Create the buffer for particle position - ghost_send_hp.get(i).resize(it->second * element_size); + // definition of the send vector for each processor + typedef openfpm::vector<property_object,openfpm::device_cpu<point>,ExtPreAlloc<Memory>> send_pos_vector; - i++; + openfpm::vector<point> g_pos_send; + if (opt != NO_POSITION) + { + // Fill the send buffer + for ( size_t i = 0 ; i < opart.size() ; i++ ) + { + for (size_t j = 0 ; j < opart.get(i).size() ; j++) + { + g_send.get(i).get(j) = v_pos.get(INTERNAL).get(opart.get(i).get(j)); + } + } } - // + // Send receive the particles information - // ca - // send and receive the properties of the particles // add the received particles to the vector } @@ -413,17 +509,19 @@ public: * \param total message size to receive from all the processors * \param the total number of processor want to communicate with you * \param i processor id + * \param ri request id (it is an id that goes from 0 to total_p, and is unique + * every time message_alloc is called) * \param ptr a pointer to the vector_dist structure * * \return the pointer where to store the message * */ - static void * message_alloc(size_t msg_i ,size_t total_msg, size_t total_p, size_t i, void * ptr) + static void * message_alloc(size_t msg_i ,size_t total_msg, size_t total_p, size_t i, size_t ri, void * ptr) { // cast the pointer vector_dist<point,prop,Box,Decomposition,Memory,with_id> * vd = static_cast<vector_dist<point,prop,Box,Decomposition,Memory,with_id> *>(ptr); - // Resize the memory and + // Resize the receive buffer, and the size of each message buffer vd->hp_recv.resize(total_msg); vd->v_proc.resize(total_p); @@ -434,7 +532,7 @@ public: vd->recv_cnt += msg_i; // Save the processor message size - vd->v_proc.get(i) = msg_i; + vd->v_proc.get(ri) = msg_i; return recv_ptr; } diff --git a/src/Vector/vector_dist_unit_test.hpp b/src/Vector/vector_dist_unit_test.hpp index 748940c02f2e2f0ec7649131c89e9266e4fdc621..1c3eb3fbb6483afbb9bab107db29be79af2ba917 100644 --- a/src/Vector/vector_dist_unit_test.hpp +++ b/src/Vector/vector_dist_unit_test.hpp @@ -55,6 +55,16 @@ BOOST_AUTO_TEST_CASE( vector_dist_iterator_test_use ) ++it; } + + // set the ghost based on the radius cut off + Ghost<2,float> g(0.01); + + vd.setGhost(g); + + // do a ghost get + + typedef Point_test<float> p; + vd.template ghost_get<p::s,p::v>(); } BOOST_AUTO_TEST_SUITE_END() diff --git a/src/dec_optimizer.hpp b/src/dec_optimizer.hpp index b6c21c96cded626a0d98ad98ae0d9593c2794607..c1998384b0f92da26b4841d6a7dcd8172f5e28c9 100644 --- a/src/dec_optimizer.hpp +++ b/src/dec_optimizer.hpp @@ -176,9 +176,6 @@ private: */ template<unsigned int p_sub, unsigned int p_id> void add_to_queue(openfpm::vector<size_t> & domains, openfpm::vector<wavefront<dim>> & v_w, Graph & graph, std::vector<comb<dim>> & w_comb, long int pr_id, openfpm::vector< openfpm::vector<size_t> > & box_nn_processor) { - // it contain a list of the near processor to the box - box_nn_processor.add(); - // create a new queue openfpm::vector<size_t> domains_new; @@ -289,7 +286,7 @@ private: // direction of expansion - size_t domain_id = graph.vertex(gh.LinId(start_p)).template get<p_id>(); + size_t domain_id = graph.vertex(start_p).template get<p_id>(); bool can_expand = true; // while is possible to expand @@ -600,7 +597,7 @@ public: box_nn_processor.add(); // Create the biggest box containing the domain - expand_from_point<p_sub,p_id>(gh.LinId(v_q.get(0)),graph,box,v_w,w_comb); + expand_from_point<p_sub,p_id>(v_q.get(0),graph,box,v_w,w_comb); // Add the created box to the list of boxes lb.add(box); diff --git a/src/dec_optimizer_unit_test.hpp b/src/dec_optimizer_unit_test.hpp index e17f6f7a523e97a27c8fddf70a570e3c30f00e02..bcfbc4a8e428724643a7acd9c239fa6443707193 100644 --- a/src/dec_optimizer_unit_test.hpp +++ b/src/dec_optimizer_unit_test.hpp @@ -2,7 +2,7 @@ * dec_optimize.hpp * * Created on: Jan 16, 2015 - * Author: i-bird + * Author: Pietro Incardona */ #ifndef DEC_OPTIMIZE_HPP_ @@ -32,34 +32,28 @@ BOOST_AUTO_TEST_CASE( dec_optimizer_test_use) Box<3,float> box({0.0,0.0,0.0},{1.0,1.0,1.0}); // Graph to decompose - Graph_CSR<nm_v,nm_e> g = g_factory.construct<nm_e::communication,float,2,0,1,2>(sz,box); // Processor graph - Graph_CSR<nm_part_v,nm_part_e> gp = g_factory_part.construct<NO_EDGE,float,2>(sz,box); // Convert the graph to metis - Metis<Graph_CSR<nm_v,nm_e>> met(g,16); // decompose - met.decompose<nm_part_v::id>(gp); met.decompose<nm_v::id>(); // optimize - dec_optimizer<3,Graph_CSR<nm_v,nm_e>> d_o(g,sz); grid_key_dx<3> keyZero(0,0,0); d_o.optimize<nm_v::sub_id,nm_v::id>(keyZero,g); // Write the VTK file - - VTKWriter<Graph_CSR<nm_part_v,nm_part_e>> vtk(gp); + VTKWriter<Graph_CSR<nm_part_v,nm_part_e>,GRAPH> vtk(gp); vtk.write("vtk_partition.vtk"); - VTKWriter<Graph_CSR<nm_v,nm_e>> vtk2(g); + VTKWriter<Graph_CSR<nm_v,nm_e>,GRAPH> vtk2(g); vtk2.write("vtk_partition2.vtk"); } diff --git a/src/main.cpp b/src/main.cpp old mode 100755 new mode 100644 index fe80dc22a50f03e316e7f7467be967f758e0cd74..cb799f429a7c6701b823e7c86b009ab904f4a773 --- a/src/main.cpp +++ b/src/main.cpp @@ -8,6 +8,13 @@ #define BOOST_TEST_MODULE "C++ test module for OpenFPM_pdata project" #include <boost/test/included/unit_test.hpp> +/*struct MPIFixture { + MPIFixture() { MPI_Init(&boost::unit_test::framework::master_test_suite().argc,&boost::unit_test::framework::master_test_suite().argv);} + ~MPIFixture() { MPI_Finalize(); } +}; + +BOOST_GLOBAL_FIXTURE(MPIFixture);*/ + #include "Grid/grid_dist_id.hpp" #include "Point_test.hpp" #include "Decomposition/CartDecomposition.hpp" diff --git a/src/metis_util_unit_test.hpp b/src/metis_util_unit_test.hpp index 7638a2063405e0a4e31d50303dec03a5a261281e..f63d2097fd29c693660d53eda56c2dd287839de1 100644 --- a/src/metis_util_unit_test.hpp +++ b/src/metis_util_unit_test.hpp @@ -50,13 +50,6 @@ BOOST_AUTO_TEST_CASE( Metis_test_use) met.decompose<nm_part_v::id>(gp); met.decompose<nm_v::id>(); - - // Write the VTK file - - VTKWriter<Graph_CSR<nm_part_v,nm_part_e>> vtk(gp); - vtk.write("vtk_partition.vtk"); - VTKWriter<Graph_CSR<nm_v,nm_e>> vtk2(g); - vtk2.write("vtk_partition2.vtk"); } BOOST_AUTO_TEST_SUITE_END()