From 1a1686d9414d652c95222d0de0822f19af90da67 Mon Sep 17 00:00:00 2001 From: Pietro Incardona <incardon@mpi-cbg.de> Date: Fri, 24 Aug 2018 03:14:31 +0200 Subject: [PATCH] New MPI + MAP on device finally working --- configure.ac | 1 + images/CartDecomposition_gen_vtk.cpp | 2 +- images/Makefile.am | 8 +- images/vector.cpp | 4 +- install | 2 +- openfpm_data | 2 +- openfpm_devices | 2 +- openfpm_vcluster | 2 +- script/detect_gcc | 28 +++-- script/install_MPI.sh | 3 +- script/remove_old | 2 +- src/DLB/DLB.hpp | 4 +- src/Decomposition/CartDecomposition.hpp | 33 +++++- src/Decomposition/CartDecomposition_ext.hpp | 2 +- .../Distribution/DistParMetisDistribution.hpp | 4 +- .../Distribution/Distribution_unit_tests.hpp | 8 +- .../Distribution/MetisDistribution.hpp | 4 +- .../Distribution/ParMetisDistribution.hpp | 4 +- .../Distribution/SpaceDistribution.hpp | 4 +- .../Distribution/metis_util_unit_test.hpp | 2 +- .../Distribution/parmetis_dist_util.hpp | 4 +- .../Distribution/parmetis_util.hpp | 4 +- src/Decomposition/ORB.hpp | 2 +- .../cuda/CartDecomposition_gpu.cuh | 3 + .../cuda/decomposition_cuda_tests.cu | 2 +- src/Decomposition/dec_optimizer_unit_test.hpp | 2 +- src/Decomposition/ie_ghost.hpp | 6 +- src/Decomposition/nn_processor_unit_test.hpp | 6 +- .../tests/CartDecomposition_unit_test.cpp | 12 +-- src/Graph/DistGraphFactory.hpp | 4 +- src/Graph/dist_map_graph.hpp | 4 +- src/Graph/dist_map_graph_unit_test.hpp | 8 +- .../grid_dist_id_iterators_unit_tests.hpp | 12 +-- src/Grid/grid_dist_id.hpp | 4 +- src/Grid/grid_dist_id_comm.hpp | 2 +- ...grid_dist_id_HDF5_chckpnt_restart_test.cpp | 4 +- src/Grid/tests/grid_dist_id_unit_test.cpp | 34 +++--- .../tests/grid_dist_id_unit_test_ext_dom.hpp | 2 +- .../grid_dist_id_unit_test_unb_ghost.hpp | 6 +- src/Makefile.am | 13 ++- src/Vector/cuda/vector_dist_cuda_func_test.cu | 7 +- src/Vector/cuda/vector_dist_cuda_funcs.cuh | 4 +- .../{ => cuda}/vector_dist_gpu_unit_tests.cu | 86 ++++++++++++++- .../vector_dist_performance_common.hpp | 4 +- .../vector_dist_HDF5_chckpnt_restart_test.cpp | 2 +- src/Vector/tests/vector_dist_NN_tests.cpp | 8 +- .../tests/vector_dist_cell_list_tests.cpp | 26 ++--- .../vector_dist_complex_prp_unit_test.cpp | 2 +- src/Vector/tests/vector_dist_unit_test.cpp | 34 +++--- .../tests/vector_dist_util_unit_tests.hpp | 6 +- src/Vector/vector_dist.hpp | 12 +-- src/Vector/vector_dist_comm.hpp | 101 +++++++++++++----- src/Vector/vector_dist_dlb_test.hpp | 8 +- src/initialize/initialize_wrapper.hpp | 20 ++++ src/initialize/initialize_wrapper_cpu.cpp | 13 +++ src/initialize/initialize_wrapper_cuda.cu | 12 +++ src/unit_test_init_cleanup.hpp | 6 +- 57 files changed, 410 insertions(+), 196 deletions(-) rename src/Vector/{ => cuda}/vector_dist_gpu_unit_tests.cu (82%) create mode 100644 src/initialize/initialize_wrapper.hpp create mode 100644 src/initialize/initialize_wrapper_cpu.cpp create mode 100644 src/initialize/initialize_wrapper_cuda.cu diff --git a/configure.ac b/configure.ac index fe3d677d..694eb372 100644 --- a/configure.ac +++ b/configure.ac @@ -412,6 +412,7 @@ if test x"$NVCC_EXIST" = x"yes"; then fi else gpu_support=no + INITIALIZATION_SOURCE="initialize_openfpm.cpp" fi if test x$gpu_support = x"no"; then diff --git a/images/CartDecomposition_gen_vtk.cpp b/images/CartDecomposition_gen_vtk.cpp index 6e16b942..aa52e9d3 100644 --- a/images/CartDecomposition_gen_vtk.cpp +++ b/images/CartDecomposition_gen_vtk.cpp @@ -13,7 +13,7 @@ int main(int argc, char ** argv) openfpm_init(&argc,&argv); // Vcluster - Vcluster & vcl = create_vcluster(); + Vcluster<> & vcl = create_vcluster(); //! [Create CartDecomposition vtk gen] CartDecomposition<2,float> dec(vcl); diff --git a/images/Makefile.am b/images/Makefile.am index 5ec901fa..d25a0bcf 100644 --- a/images/Makefile.am +++ b/images/Makefile.am @@ -1,22 +1,22 @@ LINKLIBS = $(HDF5_LDFLAGS) $(HDF5_LIBS) $(OPENMP_LDFLAGS) $(LIBHILBERT_LIB) $(PETSC_LIB) $(SUITESPARSE_LIBS) $(LAPACK_LIBS) $(BLAS_LIBS) $(METIS_LIB) $(PARMETIS_LIB) $(PTHREAD_LIBS) $(OPT_LIBS) $(BOOST_LDFLAGS) $(BOOST_IOSTREAMS_LIB) $(CUDA_LIBS) noinst_PROGRAMS = cart_dec metis_dec dom_box vector_dist -cart_dec_SOURCES = CartDecomposition_gen_vtk.cpp ../src/lib/pdata.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster/VCluster.cpp ../openfpm_devices/src/Memleak_check.cpp +cart_dec_SOURCES = CartDecomposition_gen_vtk.cpp ../src/lib/pdata.cpp ../openfpm_devices/src/memory/CudaMemory.cu ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster/VCluster.cpp ../openfpm_devices/src/Memleak_check.cpp cart_dec_CXXFLAGS = -Wno-unknown-pragmas $(OPENMP_CFLAGS) $(AM_CXXFLAGS) $(PETSC_INCLUDE) $(METIS_INCLUDE) $(PARMETIS_INCLUDE) $(CUDA_CFLAGS) $(INCLUDES_PATH) $(BOOST_CPPFLAGS) -I../src -Wno-unused-function -Wno-unused-local-typedefs cart_dec_CFLAGS = $(OPENMP_CFLAGS) $(CUDA_CFLAGS) cart_dec_LDADD = $(LINKLIBS) -lparmetis -lmetis -metis_dec_SOURCES = Metis_gen_vtk.cpp ../src/lib/pdata.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster/VCluster.cpp ../openfpm_devices/src/Memleak_check.cpp +metis_dec_SOURCES = Metis_gen_vtk.cpp ../src/lib/pdata.cpp ../openfpm_devices/src/memory/CudaMemory.cu ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster/VCluster.cpp ../openfpm_devices/src/Memleak_check.cpp metis_dec_CXXFLAGS = -Wno-unknown-pragmas $(OPENMP_CFLAGS) $(AM_CXXFLAGS) $(PETSC_INCLUDE) $(METIS_INCLUDE) $(CUDA_CFLAGS) $(INCLUDES_PATH) $(BOOST_CPPFLAGS) -I../src -Wno-unused-function -Wno-unused-local-typedefs metis_dec_CFLAGS = $(OPENMP_CFLAGS) $(CUDA_CFLAGS) metis_dec_LDADD = $(LINKLIBS) -lmetis -dom_box_SOURCES = domain_gen_vtk.cpp ../src/lib/pdata.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster/VCluster.cpp ../openfpm_devices/src/Memleak_check.cpp +dom_box_SOURCES = domain_gen_vtk.cpp ../src/lib/pdata.cpp ../openfpm_devices/src/memory/CudaMemory.cu ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster/VCluster.cpp ../openfpm_devices/src/Memleak_check.cpp dom_box_CXXFLAGS = -Wno-unknown-pragmas $(OPENMP_CFLAGS) $(AM_CXXFLAGS) $(PETSC_INCLUDE) $(METIS_INCLUDE) $(CUDA_CFLAGS) $(INCLUDES_PATH) $(BOOST_CPPFLAGS) -I../src -Wno-unused-function -Wno-unused-local-typedefs dom_box_CFLAGS = $(OPENMP_CFLAGS) $(CUDA_CFLAGS) dom_box_LDADD = $(LINKLIBS) -vector_dist_SOURCES = vector.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_vcluster/src/VCluster/VCluster.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_devices/src/Memleak_check.cpp +vector_dist_SOURCES = vector.cpp ../openfpm_devices/src/memory/CudaMemory.cu ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_vcluster/src/VCluster/VCluster.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_devices/src/Memleak_check.cpp vector_dist_CXXFLAGS = -Wno-unknown-pragmas $(OPENMP_CFLAGS) $(AM_CXXFLAGS) $(LIBHILBERT_INCLUDE) $(PETSC_INCLUDE) $(PARMETIS_INCLUDE) $(METIS_INCLUDE) $(CUDA_CFLAGS) $(INCLUDES_PATH) $(HDF5_CPPFLAGS) $(BOOST_CPPFLAGS) -I../src -Wno-unused-function -Wno-unused-local-typedefs vector_dist_CFLAGS = $(OPENMP_CFLAGS) $(CUDA_CFLAGS) vector_dist_LDADD = $(LINKLIBS) -lparmetis -lmetis diff --git a/images/vector.cpp b/images/vector.cpp index 9414d2e6..52b7d900 100644 --- a/images/vector.cpp +++ b/images/vector.cpp @@ -50,9 +50,7 @@ int main(int argc, char* argv[]) // randomly in the domain, we create a Box that define our domain, boundary conditions, and ghost // openfpm_init(&argc,&argv); - Vcluster & v_cl = create_vcluster(); - - typedef Point<2,float> s; + Vcluster<> & v_cl = create_vcluster(); // set the seed // create the random generator engine diff --git a/install b/install index 49063cb9..f5ae54e5 100755 --- a/install +++ b/install @@ -176,7 +176,6 @@ if [ $? -ne 0 ]; then exit 1 fi - ## check for options echo -e "\033[1;34;5mDEPENCENCIES INSTALLATION DIR \033[0m" @@ -317,6 +316,7 @@ HDF5_System_prv=1 LIBHILBERT_installed=0 conf_err=1 + if [ $install_req -eq 0 ]; then ./configure $options $configure_options "$configure_blas_option" else diff --git a/openfpm_data b/openfpm_data index 66fdd0bd..0261ed4b 160000 --- a/openfpm_data +++ b/openfpm_data @@ -1 +1 @@ -Subproject commit 66fdd0bdd308bd78efe3c14a7d2bf9290e35f550 +Subproject commit 0261ed4b804309864d899bedec46a67e5e6cfe95 diff --git a/openfpm_devices b/openfpm_devices index c751d466..e5d5d31a 160000 --- a/openfpm_devices +++ b/openfpm_devices @@ -1 +1 @@ -Subproject commit c751d466195fd0271acf1933a6b0e9d2233d3529 +Subproject commit e5d5d31a0af09312118f72c0818a824443ba80fb diff --git a/openfpm_vcluster b/openfpm_vcluster index ec95a342..11a0f824 160000 --- a/openfpm_vcluster +++ b/openfpm_vcluster @@ -1 +1 @@ -Subproject commit ec95a342bc6fc783099f3cb044ff4524348bfedf +Subproject commit 11a0f824c111e686ad2388cec485ffcaafab5560 diff --git a/script/detect_gcc b/script/detect_gcc index 0905b02b..46c89cba 100755 --- a/script/detect_gcc +++ b/script/detect_gcc @@ -4,20 +4,6 @@ function haveProg() { [ -x "$(command -v $1)" ] } -function gpp_clang() -{ - if [ x"$possible_solutions_command" == x"g++" ]; then - CXX=g++ - CC=gcc - F77=gfortran - FC=gfortran - else - CXX=clang++ - CC=clang - F77=gfortran - FC=gfortran - fi -} function detect_compiler() { @@ -25,6 +11,7 @@ function detect_compiler() icpc_found=0 dgc_ret=0 + if [ x"$CXX" != x"" -o x"$CC" != x"" -o x"$F77" != x"" -o x"$FC" != x"" ]; then if [ x"$CXX" == x"" ]; then @@ -65,6 +52,7 @@ function detect_compiler() return fi + # First we try to understand if g++ command line is linked to clang if haveProg g++; then @@ -111,6 +99,7 @@ function detect_compiler() fi fi + g++ --version | grep "g++" > /dev/null 2>&1 if [ $? == 0 -a $gpp_found -ne 1 ]; then dgc_major=$(g++ --version | grep g++ | sed 's/.*\([0-9][0-9]*\)\.\([0-9][0-9]*\)\.\([0-9][0-9]*\).*/\1/g') @@ -147,6 +136,7 @@ function detect_compiler() fi fi + g++ --version | grep "clang" > /dev/null 2>&1 if [ $? == 0 -a $gpp_found -ne 1 ]; then dgc_major=$(g++ --version | grep g++ | sed 's/.*\([0-9][0-9]*\)\.\([0-9][0-9]*\)\.\([0-9][0-9]*\).*/\1/g') @@ -224,11 +214,17 @@ function detect_compiler() FC=ifort dgc_compiler=icpc else - gpp_clang + CXX=g++ + CC=gcc + F77=gfortran + FC=gfortran fi dgc_ret=1 elif [ $gpp_found -eq 1 ]; then - gpp_clang + CXX=g++ + CC=gcc + F77=gfortran + FC=gfortran elif [ $icpc_found -eq 1 ]; then CXX=icpc CC=icc diff --git a/script/install_MPI.sh b/script/install_MPI.sh index aeaa2060..583a881d 100755 --- a/script/install_MPI.sh +++ b/script/install_MPI.sh @@ -25,7 +25,8 @@ cd openmpi-3.1.1 # # -./configure --with-cuda --prefix=$1/MPI --enable-mpi-fortran=yes CC=$3 CXX=$4 F77=$4 FC=$5 + +./configure --with-cuda --prefix=$1/MPI --enable-mpi-fortran=yes CC=$3 CXX=$4 F77=$5 FC=$5 make -j $2 make install diff --git a/script/remove_old b/script/remove_old index d69d1ab7..459682d9 100755 --- a/script/remove_old +++ b/script/remove_old @@ -191,7 +191,7 @@ function remove_old() version=$(cat $1/MPI/version) if [ x"$version" != x"3" ]; then echo -e "\033[1;34;5m -------------------------------------------------------------------------------------- \033[0m" - echo -e "\033[1;34;5m MPI has been updated to version 2.1.1, the component will be updated automatically \033[0m" + echo -e "\033[1;34;5m MPI has been updated to version 3.1.1, the component will be updated automatically \033[0m" echo -e "\033[1;34;5m -------------------------------------------------------------------------------------- \033[0m" sleep 5 rm -rf $1/MPI/include diff --git a/src/DLB/DLB.hpp b/src/DLB/DLB.hpp index 81a788ad..fa04bc53 100644 --- a/src/DLB/DLB.hpp +++ b/src/DLB/DLB.hpp @@ -69,7 +69,7 @@ public: private: //! Runtime virtual cluster machine - Vcluster & v_cl; + Vcluster<> & v_cl; //! Structure that will contain all the timings Times timeInfo; @@ -165,7 +165,7 @@ public: * * \param v_cl virtual cluster object */ - DLB(Vcluster & v_cl) : + DLB(Vcluster<> & v_cl) : v_cl(v_cl) { } diff --git a/src/Decomposition/CartDecomposition.hpp b/src/Decomposition/CartDecomposition.hpp index 1c212cbb..c651d93a 100755 --- a/src/Decomposition/CartDecomposition.hpp +++ b/src/Decomposition/CartDecomposition.hpp @@ -197,7 +197,7 @@ protected: size_t magn[dim]; //! Runtime virtual cluster machine - Vcluster & v_cl; + Vcluster<> & v_cl; //! Create distribution Distribution dist; @@ -353,7 +353,7 @@ public: * \param opt option (one option is to construct) * */ - void createSubdomains(Vcluster & v_cl, const size_t (& bc)[dim], size_t opt = 0) + void createSubdomains(Vcluster<> & v_cl, const size_t (& bc)[dim], size_t opt = 0) { int p_id = v_cl.getProcessUnitID(); @@ -708,7 +708,7 @@ public: * \param v_cl Virtual cluster, used internally to handle or pipeline communication * */ - CartDecomposition(Vcluster & v_cl) + CartDecomposition(Vcluster<> & v_cl) :nn_prcs<dim, T>(v_cl), v_cl(v_cl), dist(v_cl),ref_cnt(0) { // Reset the box to zero @@ -1622,6 +1622,31 @@ public: return processorID<Mem>(pt) == v_cl.getProcessUnitID(); } + /*! \brief Check if the particle is local considering boundary conditions + * + * \warning if the particle id outside the domain and non periodic boundary the result + * is unreliable + * + * + * \param p object position + * \param bc boundary conditions + * + * \return true if it is local + * + */ + bool isLocalBC(const Point<dim,T> & p, const size_t (& bc)[dim]) const + { + Point<dim,T> pt = p; + + for (size_t i = 0 ; i < dim ; i++) + { + if (bc[i] == PERIODIC) + pt.get(i) = openfpm::math::periodic_l(p[i],domain.getHigh(i),domain.getLow(i)); + } + + return processorID(pt) == v_cl.getProcessUnitID(); + } + /*! \brief Get the domain Cells * * It return all the cells-id that are inside the processor-domain @@ -1789,7 +1814,7 @@ public: * \return the Virtual cluster machine * */ - Vcluster & getVC() const + Vcluster<> & getVC() const { #ifdef SE_CLASS2 check_valid(this,8); diff --git a/src/Decomposition/CartDecomposition_ext.hpp b/src/Decomposition/CartDecomposition_ext.hpp index ce5a8156..23c26f7e 100644 --- a/src/Decomposition/CartDecomposition_ext.hpp +++ b/src/Decomposition/CartDecomposition_ext.hpp @@ -167,7 +167,7 @@ public: * \param v_cl VCluster * */ - CartDecomposition_ext(Vcluster & v_cl) + CartDecomposition_ext(Vcluster<> & v_cl) :CartDecomposition<dim,T,Memory,layout_base,Distribution>(v_cl) { } diff --git a/src/Decomposition/Distribution/DistParMetisDistribution.hpp b/src/Decomposition/Distribution/DistParMetisDistribution.hpp index 528e163a..1f403bfa 100644 --- a/src/Decomposition/Distribution/DistParMetisDistribution.hpp +++ b/src/Decomposition/Distribution/DistParMetisDistribution.hpp @@ -17,7 +17,7 @@ template<unsigned int dim, typename T> class DistParMetisDistribution { //! Vcluster - Vcluster & v_cl; + Vcluster<> & v_cl; //! Structure that store the cartesian grid information grid_sm<dim, void> gr; @@ -73,7 +73,7 @@ public: * * @param v_cl Vcluster to use as communication object in this class */ - DistParMetisDistribution(Vcluster & v_cl) : + DistParMetisDistribution(Vcluster<> & v_cl) : v_cl(v_cl), parmetis_graph(v_cl, v_cl.getProcessingUnits()), vtxdist(v_cl.getProcessingUnits() + 1), partitions(v_cl.getProcessingUnits()), v_per_proc(v_cl.getProcessingUnits()) { diff --git a/src/Decomposition/Distribution/Distribution_unit_tests.hpp b/src/Decomposition/Distribution/Distribution_unit_tests.hpp index 1c527d5b..ac8d35ea 100644 --- a/src/Decomposition/Distribution/Distribution_unit_tests.hpp +++ b/src/Decomposition/Distribution/Distribution_unit_tests.hpp @@ -59,7 +59,7 @@ BOOST_AUTO_TEST_SUITE (Distribution_test) BOOST_AUTO_TEST_CASE( Metis_distribution_test) { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); if (v_cl.getProcessingUnits() != 3) return; @@ -180,7 +180,7 @@ BOOST_AUTO_TEST_CASE( Metis_distribution_test) BOOST_AUTO_TEST_CASE( Parmetis_distribution_test) { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); if (v_cl.getProcessingUnits() != 3) return; @@ -286,7 +286,7 @@ BOOST_AUTO_TEST_CASE( Parmetis_distribution_test) BOOST_AUTO_TEST_CASE( DistParmetis_distribution_test) { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); if (v_cl.getProcessingUnits() != 3) return; @@ -387,7 +387,7 @@ BOOST_AUTO_TEST_CASE( DistParmetis_distribution_test) BOOST_AUTO_TEST_CASE( Space_distribution_test) { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); if (v_cl.getProcessingUnits() != 3) return; diff --git a/src/Decomposition/Distribution/MetisDistribution.hpp b/src/Decomposition/Distribution/MetisDistribution.hpp index 2b27a9e3..caa5efd0 100644 --- a/src/Decomposition/Distribution/MetisDistribution.hpp +++ b/src/Decomposition/Distribution/MetisDistribution.hpp @@ -30,7 +30,7 @@ template<unsigned int dim, typename T> class MetisDistribution { //! Vcluster - Vcluster & v_cl; + Vcluster<> & v_cl; //! Structure that store the cartesian grid information grid_sm<dim, void> gr; @@ -111,7 +111,7 @@ public: * \param v_cl vcluster * */ - MetisDistribution(Vcluster & v_cl) + MetisDistribution(Vcluster<> & v_cl) :v_cl(v_cl),metis_graph(gp) { #ifdef SE_CLASS2 diff --git a/src/Decomposition/Distribution/ParMetisDistribution.hpp b/src/Decomposition/Distribution/ParMetisDistribution.hpp index 98412620..3718ce0c 100644 --- a/src/Decomposition/Distribution/ParMetisDistribution.hpp +++ b/src/Decomposition/Distribution/ParMetisDistribution.hpp @@ -39,7 +39,7 @@ class ParMetisDistribution bool is_distributed = false; //! Vcluster - Vcluster & v_cl; + Vcluster<> & v_cl; //! Structure that store the cartesian grid information grid_sm<dim, void> gr; @@ -283,7 +283,7 @@ public: * * \param v_cl Vcluster to use as communication object in this class */ - ParMetisDistribution(Vcluster & v_cl) + ParMetisDistribution(Vcluster<> & v_cl) :is_distributed(false),v_cl(v_cl), parmetis_graph(v_cl, v_cl.getProcessingUnits()), vtxdist(v_cl.getProcessingUnits() + 1), partitions(v_cl.getProcessingUnits()), v_per_proc(v_cl.getProcessingUnits()) { } diff --git a/src/Decomposition/Distribution/SpaceDistribution.hpp b/src/Decomposition/Distribution/SpaceDistribution.hpp index 26343e14..714a67b0 100644 --- a/src/Decomposition/Distribution/SpaceDistribution.hpp +++ b/src/Decomposition/Distribution/SpaceDistribution.hpp @@ -24,7 +24,7 @@ template<unsigned int dim, typename T> class SpaceDistribution { //! Vcluster - Vcluster & v_cl; + Vcluster<> & v_cl; //! Structure that store the cartesian grid information grid_sm<dim, void> gr; @@ -42,7 +42,7 @@ public: * * \param v_cl Vcluster to use as communication object in this class */ - SpaceDistribution(Vcluster & v_cl) + SpaceDistribution(Vcluster<> & v_cl) :v_cl(v_cl) { } diff --git a/src/Decomposition/Distribution/metis_util_unit_test.hpp b/src/Decomposition/Distribution/metis_util_unit_test.hpp index 3dd1dc69..0b2f4d1c 100644 --- a/src/Decomposition/Distribution/metis_util_unit_test.hpp +++ b/src/Decomposition/Distribution/metis_util_unit_test.hpp @@ -25,7 +25,7 @@ BOOST_AUTO_TEST_SUITE( Metis_test ) BOOST_AUTO_TEST_CASE( Metis_test_use) { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); if (v_cl.getProcessingUnits() != 3) return; diff --git a/src/Decomposition/Distribution/parmetis_dist_util.hpp b/src/Decomposition/Distribution/parmetis_dist_util.hpp index d9e482b4..1952ee5d 100755 --- a/src/Decomposition/Distribution/parmetis_dist_util.hpp +++ b/src/Decomposition/Distribution/parmetis_dist_util.hpp @@ -103,7 +103,7 @@ class DistParmetis MPI_Comm comm = (MPI_Comm)NULL; //! VCluster - Vcluster & v_cl; + Vcluster<> & v_cl; //! Process rank information int p_id = 0; @@ -178,7 +178,7 @@ public: * \param nc number of partitions * */ - DistParmetis(Vcluster & v_cl, size_t nc) : + DistParmetis(Vcluster<> & v_cl, size_t nc) : v_cl(v_cl), nc(nc) { // TODO Move into VCluster diff --git a/src/Decomposition/Distribution/parmetis_util.hpp b/src/Decomposition/Distribution/parmetis_util.hpp index 68fbf891..abe33bc1 100755 --- a/src/Decomposition/Distribution/parmetis_util.hpp +++ b/src/Decomposition/Distribution/parmetis_util.hpp @@ -108,7 +108,7 @@ class Parmetis MPI_Comm comm = (MPI_Comm)NULL; //! VCluster - Vcluster & v_cl; + Vcluster<> & v_cl; //! Process rank information int p_id = 0; @@ -210,7 +210,7 @@ public: * \param nc number of partitions * */ - Parmetis(Vcluster & v_cl, size_t nc) + Parmetis(Vcluster<> & v_cl, size_t nc) :v_cl(v_cl), nc(nc),n_dec(0) { #ifdef SE_CLASS1 diff --git a/src/Decomposition/ORB.hpp b/src/Decomposition/ORB.hpp index e9422298..cf9582d5 100755 --- a/src/Decomposition/ORB.hpp +++ b/src/Decomposition/ORB.hpp @@ -98,7 +98,7 @@ template<unsigned int dim, typename T, typename loc_wg=openfpm::vector<float>, t class ORB { // Virtual cluster - Vcluster & v_cl; + Vcluster<> & v_cl; // particle coordinate accumulator openfpm::vector<T> cm; diff --git a/src/Decomposition/cuda/CartDecomposition_gpu.cuh b/src/Decomposition/cuda/CartDecomposition_gpu.cuh index ed9e5a52..a6592116 100644 --- a/src/Decomposition/cuda/CartDecomposition_gpu.cuh +++ b/src/Decomposition/cuda/CartDecomposition_gpu.cuh @@ -20,6 +20,7 @@ __global__ void process_id_proc_each_part(cartdec_gpu cdg, particles_type parts, Point<3,float> xp = parts.template get<0>(p); int pr = cdg.processorIDBC(xp); + output.template get<1>(p) = (pr == rank)?-1:pr; output.template get<0>(p) = p; } @@ -55,6 +56,8 @@ __device__ __host__ inline int processorID_impl(T2 & p, fine_s_type & fine_s, vs #endif + + return sub_domains_global.template get<1>(e); } diff --git a/src/Decomposition/cuda/decomposition_cuda_tests.cu b/src/Decomposition/cuda/decomposition_cuda_tests.cu index acc21795..b8322092 100644 --- a/src/Decomposition/cuda/decomposition_cuda_tests.cu +++ b/src/Decomposition/cuda/decomposition_cuda_tests.cu @@ -14,7 +14,7 @@ BOOST_AUTO_TEST_CASE( decomposition_to_gpu_test_use ) auto & v_cl = create_vcluster(); // Vcluster - Vcluster & vcl = create_vcluster(); + Vcluster<> & vcl = create_vcluster(); CartDecomposition<3, float, CudaMemory, memory_traits_inte> dec(vcl); diff --git a/src/Decomposition/dec_optimizer_unit_test.hpp b/src/Decomposition/dec_optimizer_unit_test.hpp index eeaf821c..c23b33c6 100644 --- a/src/Decomposition/dec_optimizer_unit_test.hpp +++ b/src/Decomposition/dec_optimizer_unit_test.hpp @@ -135,7 +135,7 @@ BOOST_AUTO_TEST_CASE( dec_optimizer_test_use_p) BOOST_AUTO_TEST_CASE( dec_optimizer_disconnected_subdomains_np) { // Vcluster - Vcluster & vcl = create_vcluster(); + Vcluster<> & vcl = create_vcluster(); // Test for only 3 processors if (vcl.getProcessingUnits() != 3) diff --git a/src/Decomposition/ie_ghost.hpp b/src/Decomposition/ie_ghost.hpp index 853dd580..ed5a3341 100755 --- a/src/Decomposition/ie_ghost.hpp +++ b/src/Decomposition/ie_ghost.hpp @@ -140,7 +140,7 @@ class ie_ghost * \note To an explanation about the sectors see getShiftVectors * */ - inline size_t ebx_ibx_form(size_t k, size_t b, size_t p_id, const comb<dim> & c ,size_t N_b, Vcluster & v_cl, const bool ei) + inline size_t ebx_ibx_form(size_t k, size_t b, size_t p_id, const comb<dim> & c ,size_t N_b, Vcluster<> & v_cl, const bool ei) { comb<dim> cext = c; @@ -192,7 +192,7 @@ protected: * \see calculateGhostBoxes * */ - void create_box_nn_processor_ext(Vcluster & v_cl, + void create_box_nn_processor_ext(Vcluster<> & v_cl, Ghost<dim,T> & ghost, openfpm::vector<SpaceBox<dim,T>> & sub_domains, const openfpm::vector<openfpm::vector<long unsigned int> > & box_nn_processor, @@ -293,7 +293,7 @@ protected: * \see calculateGhostBoxes * */ - void create_box_nn_processor_int(Vcluster & v_cl, + void create_box_nn_processor_int(Vcluster<> & v_cl, Ghost<dim,T> & ghost, openfpm::vector<SpaceBox<dim,T>> & sub_domains, const openfpm::vector<openfpm::vector<long unsigned int> > & box_nn_processor, diff --git a/src/Decomposition/nn_processor_unit_test.hpp b/src/Decomposition/nn_processor_unit_test.hpp index 6f20ccb9..e7e6290f 100644 --- a/src/Decomposition/nn_processor_unit_test.hpp +++ b/src/Decomposition/nn_processor_unit_test.hpp @@ -12,7 +12,7 @@ void create_decomposition2x2(openfpm::vector<openfpm::vector<long unsigned int>> & box_nn_processor, openfpm::vector<SpaceBox<2,float>> & sub_domains) { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); box_nn_processor.add(); @@ -54,7 +54,7 @@ BOOST_AUTO_TEST_SUITE( nn_processor_test ) BOOST_AUTO_TEST_CASE( nn_processor_np_test) { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); /*! * @@ -202,7 +202,7 @@ BOOST_AUTO_TEST_CASE( nn_processor_np_test) BOOST_AUTO_TEST_CASE( nn_processor_box_periodic_test) { // Vcluster - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); /*! * diff --git a/src/Decomposition/tests/CartDecomposition_unit_test.cpp b/src/Decomposition/tests/CartDecomposition_unit_test.cpp index 0b03f249..cd53d612 100755 --- a/src/Decomposition/tests/CartDecomposition_unit_test.cpp +++ b/src/Decomposition/tests/CartDecomposition_unit_test.cpp @@ -55,7 +55,7 @@ void setComputationCosts3D(CartDecomposition<3, float> &dec, size_t n_v, Point<3 BOOST_AUTO_TEST_CASE( CartDecomposition_non_periodic_test) { // Vcluster - Vcluster & vcl = create_vcluster(); + Vcluster<> & vcl = create_vcluster(); CartDecomposition<3, float> dec(vcl); @@ -144,7 +144,7 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_non_periodic_test) BOOST_AUTO_TEST_CASE( CartDecomposition_periodic_test) { // Vcluster - Vcluster & vcl = create_vcluster(); + Vcluster<> & vcl = create_vcluster(); //! [Create CartDecomposition] CartDecomposition<3, float> dec(vcl); @@ -239,7 +239,7 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_periodic_test) BOOST_AUTO_TEST_CASE( CartDecomposition_ext_non_periodic_test) { // Vcluster - Vcluster & vcl = create_vcluster(); + Vcluster<> & vcl = create_vcluster(); CartDecomposition<3,float> dec(vcl); @@ -344,7 +344,7 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_ext_non_periodic_test) BOOST_AUTO_TEST_CASE( CartDecomposition_check_cross_consistency_between_proc_idbc_and_ghost ) { // Vcluster - Vcluster & vcl = create_vcluster(); + Vcluster<> & vcl = create_vcluster(); if (vcl.size() != 3) {return;} @@ -392,7 +392,7 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_check_cross_consistency_between_proc_idb BOOST_AUTO_TEST_CASE( CartDecomposition_check_cross_consistency_between_proc_idbc_and_ghost2 ) { // Vcluster - Vcluster & vcl = create_vcluster(); + Vcluster<> & vcl = create_vcluster(); CartDecomposition<3, double> dec(vcl); @@ -488,7 +488,7 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_check_cross_consistency_between_proc_idb BOOST_AUTO_TEST_CASE( CartDecomposition_non_periodic_test_dist_grid) { // Vcluster - Vcluster & vcl = create_vcluster(); + Vcluster<> & vcl = create_vcluster(); CartDecomposition<3, float> dec(vcl); diff --git a/src/Graph/DistGraphFactory.hpp b/src/Graph/DistGraphFactory.hpp index f724dac4..cdd8c508 100755 --- a/src/Graph/DistGraphFactory.hpp +++ b/src/Graph/DistGraphFactory.hpp @@ -240,7 +240,7 @@ public: //! Construct Cartesian graph static Graph construct(const size_t (&sz)[dim], Box<dim, T> dom) { - Vcluster &v_cl = create_vcluster(); + Vcluster<> &v_cl = create_vcluster(); // Calculate the size of the hyper-cubes on each dimension T szd[dim]; @@ -394,7 +394,7 @@ public: //! Construct Cartesian graph static Graph construct(const size_t (&sz)[dim], Box<dim, T> dom) { - Vcluster &v_cl = create_vcluster(); + Vcluster<> &v_cl = create_vcluster(); // Calculate the size of the hyper-cubes on each dimension diff --git a/src/Graph/dist_map_graph.hpp b/src/Graph/dist_map_graph.hpp index f400aed3..d103b4f7 100644 --- a/src/Graph/dist_map_graph.hpp +++ b/src/Graph/dist_map_graph.hpp @@ -209,7 +209,7 @@ template<typename V, typename E = no_edge, class DistGraph_CSR { //! Vcluster communication object - Vcluster & vcl; + Vcluster<> & vcl; //! Distribution vector openfpm::vector<idx_t> vtxdist; @@ -1147,7 +1147,7 @@ public: * \param gg distributed graph to copy * */ - DistGraph_CSR(Vcluster & vcl, DistGraph_CSR<V, E, Memory> && g) : + DistGraph_CSR(Vcluster<> & vcl, DistGraph_CSR<V, E, Memory> && g) : vcl(vcl) { swap(g); diff --git a/src/Graph/dist_map_graph_unit_test.hpp b/src/Graph/dist_map_graph_unit_test.hpp index f761d7a1..3ad6bc7f 100644 --- a/src/Graph/dist_map_graph_unit_test.hpp +++ b/src/Graph/dist_map_graph_unit_test.hpp @@ -127,7 +127,7 @@ BOOST_AUTO_TEST_CASE( dist_map_graph_use) { //! Vcluster - Vcluster & vcl = create_vcluster(); + Vcluster<> & vcl = create_vcluster(); if(vcl.getProcessingUnits() != 4) return; @@ -241,7 +241,7 @@ BOOST_AUTO_TEST_CASE( dist_map_graph_use) BOOST_AUTO_TEST_CASE( dist_map_graph_use_redistribution) { //! Vcluster - Vcluster & vcl = create_vcluster(); + Vcluster<> & vcl = create_vcluster(); if(vcl.getProcessingUnits() != 4) return; @@ -324,7 +324,7 @@ BOOST_AUTO_TEST_CASE( dist_map_graph_use_redistribution) BOOST_AUTO_TEST_CASE( dist_map_graph_use_free_add) { // Vcluster - Vcluster & vcl = create_vcluster(); + Vcluster<> & vcl = create_vcluster(); if(vcl.getProcessingUnits() != 4) return; @@ -480,7 +480,7 @@ BOOST_AUTO_TEST_CASE( dist_map_graph_use_free_add) BOOST_AUTO_TEST_CASE( dist_map_graph_use_multi_free_add) { // Vcluster - Vcluster & vcl = create_vcluster(); + Vcluster<> & vcl = create_vcluster(); if(vcl.getProcessingUnits() != 4) return; diff --git a/src/Grid/Iterators/grid_dist_id_iterators_unit_tests.hpp b/src/Grid/Iterators/grid_dist_id_iterators_unit_tests.hpp index 0c62f40e..41ff9899 100644 --- a/src/Grid/Iterators/grid_dist_id_iterators_unit_tests.hpp +++ b/src/Grid/Iterators/grid_dist_id_iterators_unit_tests.hpp @@ -93,7 +93,7 @@ void Test2D_sub(const Box<2,float> & domain, long int k) } // Get the virtual cluster machine - Vcluster & vcl = g_dist.getVC(); + Vcluster<> & vcl = g_dist.getVC(); // reduce vcl.sum(count); @@ -149,7 +149,7 @@ void Test3D_decit(const Box<3,float> & domain, long int k) { size_t k_bck = k; { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); if ( v_cl.getProcessingUnits() > 32 ) return; @@ -216,7 +216,7 @@ void Test3D_decit(const Box<3,float> & domain, long int k) k = k_bck; { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); if ( v_cl.getProcessingUnits() > 32 ) return; @@ -292,7 +292,7 @@ void Test3D_stencil(const Box<3,float> & domain, long int k) {1,0,0}}; { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); if ( v_cl.getProcessingUnits() > 32 ) return; @@ -393,7 +393,7 @@ void Test3D_fast_vect(const Box<3,float> & domain, long int k) {1,0,0}}; { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); if ( v_cl.getProcessingUnits() > 32 ) return; @@ -478,7 +478,7 @@ void Test3D_fast_vect(const Box<3,float> & domain, long int k) void Test3D_decskinit(const Box<3,float> & domain, long int k) { { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); if ( v_cl.getProcessingUnits() > 32 ) return; diff --git a/src/Grid/grid_dist_id.hpp b/src/Grid/grid_dist_id.hpp index fab6c7e6..7aaacaed 100644 --- a/src/Grid/grid_dist_id.hpp +++ b/src/Grid/grid_dist_id.hpp @@ -102,7 +102,7 @@ class grid_dist_id : public grid_dist_id_comm<dim,St,T,Decomposition,Memory,devi CellDecomposer_sm<dim,St,shift<dim,St>> cd_sm; //! Communicator class - Vcluster & v_cl; + Vcluster<> & v_cl; //! properties names openfpm::vector<std::string> prp_names; @@ -1268,7 +1268,7 @@ public: * \return the Virtual cluster machine * */ - Vcluster & getVC() + Vcluster<> & getVC() { #ifdef SE_CLASS2 check_valid(this,8); diff --git a/src/Grid/grid_dist_id_comm.hpp b/src/Grid/grid_dist_id_comm.hpp index 5533a3ca..dae22b2f 100644 --- a/src/Grid/grid_dist_id_comm.hpp +++ b/src/Grid/grid_dist_id_comm.hpp @@ -177,7 +177,7 @@ template<unsigned int dim, typename St, typename T, typename Decomposition = Car class grid_dist_id_comm { //! VCluster - Vcluster & v_cl; + Vcluster<> & v_cl; //! Maps the processor id with the communication request into map procedure openfpm::vector<size_t> p_map_req; diff --git a/src/Grid/tests/grid_dist_id_HDF5_chckpnt_restart_test.cpp b/src/Grid/tests/grid_dist_id_HDF5_chckpnt_restart_test.cpp index 929b2b7f..6275c92f 100644 --- a/src/Grid/tests/grid_dist_id_HDF5_chckpnt_restart_test.cpp +++ b/src/Grid/tests/grid_dist_id_HDF5_chckpnt_restart_test.cpp @@ -23,7 +23,7 @@ BOOST_AUTO_TEST_CASE( grid_dist_id_hdf5_save_test ) // Domain Box<2,float> domain({0.0,0.0},{1.0,1.0}); - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); // Skip this test on big scale if (v_cl.getProcessingUnits() >= 32) @@ -91,7 +91,7 @@ BOOST_AUTO_TEST_CASE( grid_dist_id_hdf5_load_test ) // Domain Box<2,float> domain({0.0,0.0},{1.0,1.0}); - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); // Skip this test on big scale if (v_cl.getProcessingUnits() >= 32) diff --git a/src/Grid/tests/grid_dist_id_unit_test.cpp b/src/Grid/tests/grid_dist_id_unit_test.cpp index 50a34576..6c077f7b 100644 --- a/src/Grid/tests/grid_dist_id_unit_test.cpp +++ b/src/Grid/tests/grid_dist_id_unit_test.cpp @@ -20,7 +20,7 @@ BOOST_AUTO_TEST_CASE( grid_dist_id_domain_grid_unit_converter3D_test) // Domain Box<3,float> domain({-0.3,-0.3,-0.3},{1.0,1.0,1.0}); - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); // Skip this test on big scale if (v_cl.getProcessingUnits() >= 32) @@ -103,7 +103,7 @@ BOOST_AUTO_TEST_CASE( grid_dist_id_domain_grid_unit_converter_test) // Domain Box<2,float> domain({0.0,0.0},{1.0,1.0}); - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); // Skip this test on big scale if (v_cl.getProcessingUnits() >= 32) @@ -212,7 +212,7 @@ void Test2D(const Box<2,float> & domain, long int k) //! [Create and access a distributed grid] // Get the virtual cluster machine - Vcluster & vcl = g_dist.getVC(); + Vcluster<> & vcl = g_dist.getVC(); // reduce vcl.sum(count); @@ -275,7 +275,7 @@ void Test2D(const Box<2,float> & domain, long int k) void Test1D(const Box<1,float> & domain, long int k) { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); long int big_step = k / 30; big_step = (big_step == 0)?1:big_step; long int small_step = 21; @@ -332,7 +332,7 @@ void Test1D(const Box<1,float> & domain, long int k) //! [Create and access a distributed grid] // Get the virtual cluster machine - Vcluster & vcl = g_dist.getVC(); + Vcluster<> & vcl = g_dist.getVC(); // reduce vcl.sum(count); @@ -452,7 +452,7 @@ void Test3D_sub(const Box<3,float> & domain, long int k) } // Get the virtual cluster machine - Vcluster & vcl = g_dist.getVC(); + Vcluster<> & vcl = g_dist.getVC(); // reduce vcl.sum(count); @@ -557,7 +557,7 @@ void Test3D(const Box<3,float> & domain, long int k) } // Get the virtual cluster machine - Vcluster & vcl = g_dist.getVC(); + Vcluster<> & vcl = g_dist.getVC(); // reduce vcl.sum(count); @@ -845,7 +845,7 @@ void Test2D_complex(const Box<2,float> & domain, long int k) //! [Create and access a distributed grid complex] // Get the virtual cluster machine - Vcluster & vcl = g_dist.getVC(); + Vcluster<> & vcl = g_dist.getVC(); // reduce vcl.sum(count); @@ -1007,7 +1007,7 @@ void Test3D_complex(const Box<3,float> & domain, long int k) } // Get the virtual cluster machine - Vcluster & vcl = g_dist.getVC(); + Vcluster<> & vcl = g_dist.getVC(); // reduce vcl.sum(count); @@ -1103,7 +1103,7 @@ void Test3D_dup(const Box<3,float> & domain, long int k) long int small_step = 21; long int k_old = k; - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); if ( v_cl.getProcessingUnits() > 32 ) return; @@ -1202,7 +1202,7 @@ void Test3D_dup(const Box<3,float> & domain, long int k) void Test3D_periodic(const Box<3,float> & domain, long int k) { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); if ( v_cl.getProcessingUnits() > 32 ) return; @@ -1274,7 +1274,7 @@ void Test3D_periodic(const Box<3,float> & domain, long int k) } // Get the virtual cluster machine - Vcluster & vcl = g_dist.getVC(); + Vcluster<> & vcl = g_dist.getVC(); // reduce vcl.sum(count); @@ -1359,7 +1359,7 @@ void Test3D_periodic(const Box<3,float> & domain, long int k) void Test3D_periodic_put(const Box<3,float> & domain, long int k) { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); if ( v_cl.getProcessingUnits() > 32 ) return; @@ -1479,7 +1479,7 @@ void Test_grid_copy(const Box<3,float> & domain, long int k) { typedef Point_test<float> p; - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); if ( v_cl.getProcessingUnits() > 32 ) return; @@ -1588,7 +1588,7 @@ void Test_grid_copy(const Box<3,float> & domain, long int k) void Test_ghost_correction(Box<3,double> & domain, long int k, long int g_) { size_t sz[3] = {(size_t)k,(size_t)k,(size_t)k}; - periodicity<3> bc = {PERIODIC,PERIODIC,PERIODIC}; + periodicity<3> bc = {{PERIODIC,PERIODIC,PERIODIC}}; Ghost<3,long int> g(g_); @@ -1872,7 +1872,7 @@ BOOST_AUTO_TEST_CASE ( grid_basic_functions ) {return;} size_t sz[2] = {(size_t)8,(size_t)8}; - periodicity<2> bc = {PERIODIC,PERIODIC}; + periodicity<2> bc = {{PERIODIC,PERIODIC}}; Ghost<2,long int> g(1); Box<2,double> domain({-1.0,-1.0},{1.0,1.0}); @@ -1893,7 +1893,7 @@ BOOST_AUTO_TEST_CASE ( grid_overflow_round_off_error ) size_t sz[2] = {numGridPoint,numGridPoint}; - periodicity<2> bc = {PERIODIC,PERIODIC}; + periodicity<2> bc = {{PERIODIC,PERIODIC}}; Ghost<2,double> g(3.0*(domain.getHigh(0) - domain.getLow(0))/numGridPoint + 0.001); diff --git a/src/Grid/tests/grid_dist_id_unit_test_ext_dom.hpp b/src/Grid/tests/grid_dist_id_unit_test_ext_dom.hpp index dcd7dbbd..add6c61d 100644 --- a/src/Grid/tests/grid_dist_id_unit_test_ext_dom.hpp +++ b/src/Grid/tests/grid_dist_id_unit_test_ext_dom.hpp @@ -17,7 +17,7 @@ void Test3D_extended_grid(const Box<3,float> & domain, long int k) big_step = (big_step == 0)?1:big_step; long int small_step = 21; - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); if ( v_cl.getProcessingUnits() > 32 ) return; diff --git a/src/Grid/tests/grid_dist_id_unit_test_unb_ghost.hpp b/src/Grid/tests/grid_dist_id_unit_test_unb_ghost.hpp index 36551eda..f4b45aa0 100644 --- a/src/Grid/tests/grid_dist_id_unit_test_unb_ghost.hpp +++ b/src/Grid/tests/grid_dist_id_unit_test_unb_ghost.hpp @@ -59,7 +59,7 @@ void Test3D_unb_ghost(const Box<3,float> & domain, long int k) } // Get the virtual cluster machine - Vcluster & vcl = g_dist.getVC(); + Vcluster<> & vcl = g_dist.getVC(); // reduce vcl.sum(count); @@ -118,7 +118,7 @@ void Test3D_unb_ghost(const Box<3,float> & domain, long int k) // Test grid periodic void Test3D_unb_ghost_periodic(const Box<3,float> & domain, long int k) { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); if ( v_cl.getProcessingUnits() > 24 ) return; @@ -187,7 +187,7 @@ void Test3D_unb_ghost_periodic(const Box<3,float> & domain, long int k) } // Get the virtual cluster machine - Vcluster & vcl = g_dist.getVC(); + Vcluster<> & vcl = g_dist.getVC(); // reduce vcl.sum(count); diff --git a/src/Makefile.am b/src/Makefile.am index 18cd8fee..70e6b590 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -3,14 +3,21 @@ LINKLIBS = $(HDF5_LDFLAGS) $(HDF5_LIBS) $(OPENMP_LDFLAGS) $(LIBHILBERT_LIB) $( FLAGS_NVCC = -Xcudafe "--display_error_number --diag_suppress=2885 --diag_suppress=2887 --diag_suppress=2888 --diag_suppress=186 --diag_suppress=111" $(NVCCFLAGS) $(INCLUDES_PATH) $(HDF5_CPPFLAGS) $(BOOST_CPPFLAGS) $(MPI_INC_PATH) $(PETSC_INCLUDE) $(LIBHILBERT_INCLUDE) $(PARMETIS_INCLUDE) $(METIS_INCLUDE) -g --expt-extended-lambda noinst_PROGRAMS = pdata actual_test -pdata_SOURCES = main.cpp Vector/cuda/vector_dist_cuda_func_test.cu Decomposition/cuda/decomposition_cuda_tests.cu Vector/vector_dist_gpu_unit_tests.cu Grid/tests/grid_dist_id_HDF5_chckpnt_restart_test.cpp Grid/tests/grid_dist_id_unit_test.cpp Grid/tests/staggered_grid_dist_unit_test.cpp Vector/tests/vector_dist_cell_list_tests.cpp Vector/tests/vector_dist_complex_prp_unit_test.cpp Vector/tests/vector_dist_HDF5_chckpnt_restart_test.cpp Vector/tests/vector_dist_MP_unit_tests.cpp Vector/tests/vector_dist_NN_tests.cpp Vector/tests/vector_dist_unit_test.cpp pdata_performance.cpp Decomposition/tests/CartDecomposition_unit_test.cpp Decomposition/tests/shift_vect_converter_tests.cpp Vector/performance/vector_dist_performance_util.cpp lib/pdata.cpp test_multiple_o.cpp ../openfpm_devices/src/memory/CudaMemory.cu ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster/VCluster.cpp ../openfpm_devices/src/Memleak_check.cpp +if BUILDCUDA +pdata_SOURCES = initialize/initialize_wrapper_cuda.cu +actual_test_SOURCES = initialize/initialize_wrapper_cuda.cu +else +pdata_SOURCES = initialize/initialize_wrapper_cpu.cpp +actual_test_SOURCES = initialize/initialize_wrapper_cpu.cpp +endif +pdata_SOURCES += main.cpp Vector/cuda/vector_dist_cuda_func_test.cu Decomposition/cuda/decomposition_cuda_tests.cu Vector/cuda/vector_dist_gpu_unit_tests.cu Grid/tests/grid_dist_id_HDF5_chckpnt_restart_test.cpp Grid/tests/grid_dist_id_unit_test.cpp Grid/tests/staggered_grid_dist_unit_test.cpp Vector/tests/vector_dist_cell_list_tests.cpp Vector/tests/vector_dist_complex_prp_unit_test.cpp Vector/tests/vector_dist_HDF5_chckpnt_restart_test.cpp Vector/tests/vector_dist_MP_unit_tests.cpp Vector/tests/vector_dist_NN_tests.cpp Vector/tests/vector_dist_unit_test.cpp pdata_performance.cpp Decomposition/tests/CartDecomposition_unit_test.cpp Decomposition/tests/shift_vect_converter_tests.cpp Vector/performance/vector_dist_performance_util.cpp lib/pdata.cpp test_multiple_o.cpp ../openfpm_devices/src/memory/CudaMemory.cu ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster/VCluster.cpp ../openfpm_devices/src/Memleak_check.cpp pdata_CXXFLAGS = -Wno-unknown-pragmas $(BOOST_CPPFLAGS) $(HDF5_CPPFLAGS) $(OPENMP_CFLAGS) $(AM_CXXFLAGS) $(LIBHILBERT_INCLUDE) $(PETSC_INCLUDE) $(CUDA_CFLAGS) $(INCLUDES_PATH) $(PARMETIS_INCLUDE) $(METIS_INCLUDE) $(H5PART_INCLUDE) -DPARALLEL_IO -Wno-unused-local-typedefs pdata_CFLAGS = $(CUDA_CFLAGS) pdata_LDADD = $(LINKLIBS) -lparmetis -lmetis -actual_test_SOURCES = Vector/cuda/vector_dist_cuda_func_test.cu Vector/vector_dist_gpu_unit_tests.cu vector_ main_single.cpp lib/pdata.cpp test_multiple_o.cpp ../openfpm_devices/src/memory/CudaMemory.cu ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster/VCluster.cpp ../openfpm_devices/src/Memleak_check.cpp +actual_test_SOURCES = Vector/cuda/vector_dist_cuda_func_test.cu Vector/cuda/vector_dist_gpu_unit_tests.cu vector_ main_single.cpp lib/pdata.cpp test_multiple_o.cpp ../openfpm_devices/src/memory/CudaMemory.cu ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster/VCluster.cpp ../openfpm_devices/src/Memleak_check.cpp actual_test_CXXFLAGS = -Wno-unknown-pragmas $(BOOST_CPPFLAGS) $(HDF5_CPPFLAGS) $(OPENMP_CFLAGS) $(AM_CXXFLAGS) $(LIBHILBERT_INCLUDE) $(PETSC_INCLUDE) $(CUDA_CFLAGS) $(INCLUDES_PATH) $(PARMETIS_INCLUDE) $(METIS_INCLUDE) $(H5PART_INCLUDE) -DPARALLEL_IO -Wno-unused-local-typedefs -actual_test_CFLAGS = $(CUDA_CFLAGS) +actual_test_CFLAGS = $(CUDA_CFLAGS) -IDIOCANE actual_test_LDADD = $(LINKLIBS) -lparmetis -lmetis diff --git a/src/Vector/cuda/vector_dist_cuda_func_test.cu b/src/Vector/cuda/vector_dist_cuda_func_test.cu index 3692e45a..6b3454e7 100644 --- a/src/Vector/cuda/vector_dist_cuda_func_test.cu +++ b/src/Vector/cuda/vector_dist_cuda_func_test.cu @@ -9,13 +9,16 @@ BOOST_AUTO_TEST_SUITE( vector_dist_gpu_util_func_test ) BOOST_AUTO_TEST_CASE( vector_dist_gpu_find_buffer_offsets_test ) { - openfpm::vector_gpu<aggregate<int>> vgp; + openfpm::vector_gpu<aggregate<int,int>> vgp; openfpm::vector_gpu<aggregate<int,int>> offs; vgp.resize(200000); for (size_t k = 0 ; k < vgp.size() ; k++) - {vgp.template get<0>(k) = k / 1000;} + { + vgp.template get<0>(k) = k / 1000; + vgp.template get<1>(k) = k / 1000; + } offs.resize(220); diff --git a/src/Vector/cuda/vector_dist_cuda_funcs.cuh b/src/Vector/cuda/vector_dist_cuda_funcs.cuh index bc7ff039..7c3b3c6d 100644 --- a/src/Vector/cuda/vector_dist_cuda_funcs.cuh +++ b/src/Vector/cuda/vector_dist_cuda_funcs.cuh @@ -17,11 +17,11 @@ __global__ void find_buffer_offsets(vector_type vd, int * cnt, vector_type_offs if (p >= vd.size() - 1) return; - if (vd.template get<0>(p) != vd.template get<0>(p+1)) + if (vd.template get<1>(p) != vd.template get<1>(p+1)) { int i = atomicAdd(cnt, 1); offs.template get<0>(i) = p+1; - offs.template get<1>(i) = vd.template get<0>(p); + offs.template get<1>(i) = vd.template get<1>(p); } } diff --git a/src/Vector/vector_dist_gpu_unit_tests.cu b/src/Vector/cuda/vector_dist_gpu_unit_tests.cu similarity index 82% rename from src/Vector/vector_dist_gpu_unit_tests.cu rename to src/Vector/cuda/vector_dist_gpu_unit_tests.cu index fd107599..26dbe011 100644 --- a/src/Vector/vector_dist_gpu_unit_tests.cu +++ b/src/Vector/cuda/vector_dist_gpu_unit_tests.cu @@ -3,7 +3,7 @@ #include <boost/test/unit_test.hpp> #include "VCluster/VCluster.hpp" #include <Vector/vector_dist.hpp> - +#include "Vector/tests/vector_dist_util_unit_tests.hpp" BOOST_AUTO_TEST_SUITE( vector_dist_gpu_test ) @@ -403,13 +403,97 @@ BOOST_AUTO_TEST_CASE( vector_dist_map_on_gpu_test) vd.getPos(p)[1] = (float)rand() / RAND_MAX; vd.getPos(p)[2] = (float)rand() / RAND_MAX; + vd.template getProp<0>(p) = vd.getPos(p)[0] + vd.getPos(p)[1] + vd.getPos(p)[2]; + + vd.template getProp<1>(p)[0] = vd.getPos(p)[0]; + vd.template getProp<1>(p)[1] = vd.getPos(p)[1]; + vd.template getProp<1>(p)[2] = vd.getPos(p)[2]; + + vd.template getProp<2>(p)[0] = vd.getPos(p)[0] + vd.getPos(p)[1]; + vd.template getProp<2>(p)[1] = vd.getPos(p)[0] + vd.getPos(p)[2]; + vd.template getProp<2>(p)[2] = vd.getPos(p)[1] + vd.getPos(p)[2]; + ++it; } + // move on device + vd.hostToDevicePos(); + vd.hostToDeviceProp<0,1,2>(); + // Ok we redistribute the particles (GPU based) vd.map(MAP_ON_DEVICE); + // Reset the host part + + auto it3 = vd.getDomainIterator(); + + while (it3.isNext()) + { + auto p = it3.get(); + + vd.getPos(p)[0] = 1.0; + vd.getPos(p)[1] = 1.0; + vd.getPos(p)[2] = 1.0; + + vd.template getProp<0>(p) = 0.0; + + vd.template getProp<0>(p) = 0.0; + vd.template getProp<0>(p) = 0.0; + vd.template getProp<0>(p) = 0.0; + + vd.template getProp<0>(p) = 0.0; + vd.template getProp<0>(p) = 0.0; + vd.template getProp<0>(p) = 0.0; + + ++it3; + } + + // we move from Device to CPU + + vd.deviceToHostPos(); + vd.deviceToHostProp<0,1,2>(); + + // Check + + auto it2 = vd.getDomainIterator(); + + bool match = true; + while (it2.isNext()) + { + auto p = it2.get(); + + match &= vd.template getProp<0>(p) == vd.getPos(p)[0] + vd.getPos(p)[1] + vd.getPos(p)[2]; + + match &= vd.template getProp<1>(p)[0] == vd.getPos(p)[0]; + match &= vd.template getProp<1>(p)[1] == vd.getPos(p)[1]; + match &= vd.template getProp<1>(p)[2] == vd.getPos(p)[2]; + + match &= vd.template getProp<2>(p)[0] == vd.getPos(p)[0] + vd.getPos(p)[1]; + match &= vd.template getProp<2>(p)[1] == vd.getPos(p)[0] + vd.getPos(p)[2]; + match &= vd.template getProp<2>(p)[2] == vd.getPos(p)[1] + vd.getPos(p)[2]; + + ++it2; + } + + BOOST_REQUIRE_EQUAL(match,true); + + // count local particles + + size_t l_cnt = 0; + size_t nl_cnt = 0; + size_t n_out = 0; + + // Domain + ghost box + Box<3,float> dom_ext = domain; + dom_ext.enlarge(g); + + auto it5 = vd.getDomainIterator(); + count_local_n_local<3>(vd,it5,bc,domain,dom_ext,l_cnt,nl_cnt,n_out); + + BOOST_REQUIRE_EQUAL(n_out,0); + BOOST_REQUIRE_EQUAL(l_cnt,vd.size_local()); + vd.write("gpu_write_test"); } BOOST_AUTO_TEST_SUITE_END() diff --git a/src/Vector/performance/vector_dist_performance_common.hpp b/src/Vector/performance/vector_dist_performance_common.hpp index e6e0d261..1b7240c0 100644 --- a/src/Vector/performance/vector_dist_performance_common.hpp +++ b/src/Vector/performance/vector_dist_performance_common.hpp @@ -136,7 +136,7 @@ template<unsigned int dim, unsigned int prp, typename T, typename V> void cross_ * \param v_cl Global vcluster * \param k_int Number of particles */ -template<unsigned int dim, typename v_dist> void vd_initialize(v_dist & vd, Vcluster & v_cl, size_t k_int) +template<unsigned int dim, typename v_dist> void vd_initialize(v_dist & vd, Vcluster<> & v_cl, size_t k_int) { // The random generator engine std::default_random_engine eg(v_cl.getProcessUnitID()*4313); @@ -166,7 +166,7 @@ template<unsigned int dim, typename v_dist> void vd_initialize(v_dist & vd, Vclu * \param v_cl Global vcluster * \param k_int Number of particles */ -template<unsigned int dim, typename v_dist> void vd_initialize_double(v_dist & vd,v_dist & vd2, Vcluster & v_cl, size_t k_int) +template<unsigned int dim, typename v_dist> void vd_initialize_double(v_dist & vd,v_dist & vd2, Vcluster<> & v_cl, size_t k_int) { // The random generator engine std::default_random_engine eg(v_cl.getProcessUnitID()*4313); diff --git a/src/Vector/tests/vector_dist_HDF5_chckpnt_restart_test.cpp b/src/Vector/tests/vector_dist_HDF5_chckpnt_restart_test.cpp index a843c42e..0edef0f5 100644 --- a/src/Vector/tests/vector_dist_HDF5_chckpnt_restart_test.cpp +++ b/src/Vector/tests/vector_dist_HDF5_chckpnt_restart_test.cpp @@ -120,7 +120,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_hdf5_load_test ) { #ifndef SE_CLASS3 - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); Box<dim,float> box; diff --git a/src/Vector/tests/vector_dist_NN_tests.cpp b/src/Vector/tests/vector_dist_NN_tests.cpp index d233589a..f947c164 100644 --- a/src/Vector/tests/vector_dist_NN_tests.cpp +++ b/src/Vector/tests/vector_dist_NN_tests.cpp @@ -16,7 +16,7 @@ extern void print_test_v(std::string test, size_t sz); template<typename VerletList> void test_full_nn(long int k) { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); if (v_cl.getProcessingUnits() > 12) return; @@ -240,7 +240,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_full_NN ) BOOST_AUTO_TEST_CASE( vector_dist_particle_iteration ) { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); if (v_cl.getProcessingUnits() > 12) return; @@ -321,7 +321,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_particle_iteration ) BOOST_AUTO_TEST_CASE( vector_dist_particle_NN_update_with_limit ) { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); if (v_cl.getProcessingUnits() > 12) return; @@ -392,7 +392,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_particle_NN_update_with_limit ) BOOST_AUTO_TEST_CASE( vector_dist_particle_getCellListSym_with_div ) { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); if (v_cl.getProcessingUnits() > 12) return; diff --git a/src/Vector/tests/vector_dist_cell_list_tests.cpp b/src/Vector/tests/vector_dist_cell_list_tests.cpp index 454d96a7..7ccdd523 100644 --- a/src/Vector/tests/vector_dist_cell_list_tests.cpp +++ b/src/Vector/tests/vector_dist_cell_list_tests.cpp @@ -19,7 +19,7 @@ extern long int decrement(long int k, long int step); void test_reorder_sfc(reorder_opt opt) { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); if (v_cl.getProcessingUnits() > 48) return; @@ -101,7 +101,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_reorder_2d_test ) BOOST_AUTO_TEST_CASE( vector_dist_cl_random_vs_hilb_forces_test ) { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); if (v_cl.getProcessingUnits() > 48) return; @@ -225,7 +225,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_cl_random_vs_hilb_forces_test ) BOOST_AUTO_TEST_CASE( vector_dist_cl_random_vs_reorder_forces_test ) { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); if (v_cl.getProcessingUnits() > 48) return; @@ -352,7 +352,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_cl_random_vs_reorder_forces_test ) BOOST_AUTO_TEST_CASE( vector_dist_symmetric_cell_list ) { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); if (v_cl.getProcessingUnits() > 24) return; @@ -562,7 +562,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_symmetric_cell_list ) BOOST_AUTO_TEST_CASE( vector_dist_symmetric_crs_cell_list ) { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); if (v_cl.getProcessingUnits() > 24) return; @@ -789,7 +789,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_symmetric_crs_cell_list ) template<typename VerletList> void test_vd_symmetric_verlet_list() { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); if (v_cl.getProcessingUnits() > 24) return; @@ -998,7 +998,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_symmetric_verlet_list ) template<typename VerletList> void vector_sym_verlet_list_nb() { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); if (v_cl.getProcessingUnits() > 24) return; @@ -1410,7 +1410,7 @@ template<typename VerletList, typename part_prop> void test_crs_full(vector_dist template<typename VerletList> void test_csr_verlet_list() { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); if (v_cl.getProcessingUnits() > 24) return; @@ -1470,7 +1470,7 @@ void test_csr_verlet_list() template<typename VerletList> void test_csr_verlet_list_override() { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); if (v_cl.getProcessingUnits() > 24) return; @@ -1558,7 +1558,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_symmetric_crs_verlet_list_dec_override ) template <typename VerletList> void test_vd_symmetric_crs_verlet() { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); if (v_cl.getProcessingUnits() > 24) return; @@ -1657,7 +1657,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_symmetric_crs_verlet_list_partit ) BOOST_AUTO_TEST_CASE( vector_dist_checking_unloaded_processors ) { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); if (v_cl.getProcessingUnits() > 24) return; @@ -1746,7 +1746,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_checking_unloaded_processors ) BOOST_AUTO_TEST_CASE( vector_dist_cell_list_multi_type ) { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); if (v_cl.getProcessingUnits() > 24) return; @@ -1858,7 +1858,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_cell_list_multi_type ) BOOST_AUTO_TEST_CASE( vector_dist_particle_NN_MP_iteration ) { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); if (v_cl.getProcessingUnits() > 24) {return;} diff --git a/src/Vector/tests/vector_dist_complex_prp_unit_test.cpp b/src/Vector/tests/vector_dist_complex_prp_unit_test.cpp index 8d2956ba..56360121 100644 --- a/src/Vector/tests/vector_dist_complex_prp_unit_test.cpp +++ b/src/Vector/tests/vector_dist_complex_prp_unit_test.cpp @@ -16,7 +16,7 @@ extern long int decrement(long int k, long int step); BOOST_AUTO_TEST_CASE( vector_dist_periodic_complex_prp_test_use_3d ) { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); if (v_cl.getProcessingUnits() > 48) return; diff --git a/src/Vector/tests/vector_dist_unit_test.cpp b/src/Vector/tests/vector_dist_unit_test.cpp index 5a4e2fc3..c6508999 100644 --- a/src/Vector/tests/vector_dist_unit_test.cpp +++ b/src/Vector/tests/vector_dist_unit_test.cpp @@ -60,7 +60,7 @@ long int decrement(long int k, long int step) template<unsigned int dim, template <typename> class layout> size_t total_n_part_lc(vector_dist<dim,float, Point_test<float>, CartDecomposition<dim,float>, HeapMemory, layout > & vd, size_t (& bc)[dim]) { - Vcluster & v_cl = vd.getVC(); + Vcluster<> & v_cl = vd.getVC(); auto it2 = vd.getDomainIterator(); const CartDecomposition<3,float> & ct = vd.getDecomposition(); @@ -100,7 +100,7 @@ template<typename vector> void Test2D_ghost(Box<2,float> & box) { // Communication object - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); typedef Point_test<float> p; @@ -291,7 +291,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_ghost_inte ) BOOST_AUTO_TEST_CASE( vector_dist_iterator_test_use_2d ) { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); // set the seed // create the random generator engine @@ -366,7 +366,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_iterator_test_use_2d ) BOOST_AUTO_TEST_CASE( vector_dist_iterator_test_use_3d ) { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); // set the seed // create the random generator engine @@ -443,7 +443,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_iterator_test_use_3d ) BOOST_AUTO_TEST_CASE( vector_dist_iterator_fixed_dec_3d ) { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); // set the seed // create the random generator engine @@ -520,7 +520,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_iterator_fixed_dec_3d ) BOOST_AUTO_TEST_CASE( vector_dist_periodic_test_use_2d ) { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); // set the seed // create the random generator engine @@ -628,7 +628,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_periodic_test_use_2d ) BOOST_AUTO_TEST_CASE( vector_dist_periodic_test_use_3d ) { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); // set the seed // create the random generator engine @@ -734,7 +734,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_periodic_test_use_3d ) void test_random_walk(size_t opt) { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); // set the seed // create the random generator engine @@ -925,7 +925,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_not_periodic_map ) BOOST_AUTO_TEST_CASE( vector_dist_out_of_bound_policy ) { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); if (v_cl.getProcessingUnits() > 8) return; @@ -985,7 +985,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_out_of_bound_policy ) void Test_interacting(Box<3,float> & box) { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); if (v_cl.getProcessingUnits() > 8) return; @@ -1150,7 +1150,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_grid_iterator ) // 3D test for ( ; k > 8*big_step ; k-= (k > 2*big_step)?big_step:small_step ) { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); const size_t Ng = k; @@ -1219,7 +1219,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_cell_verlet_test ) // 3D test for ( ; k > 8*big_step ; k-= (k > 2*big_step)?big_step:small_step ) { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); const size_t Ng = k; @@ -1328,7 +1328,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_cell_verlet_test ) BOOST_AUTO_TEST_CASE( vector_dist_periodic_map_list ) { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); if (v_cl.getProcessingUnits() > 3) return; @@ -1452,7 +1452,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_periodic_map_list ) BOOST_AUTO_TEST_CASE( vector_dist_ghost_with_ghost_buffering ) { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); if (v_cl.getProcessingUnits() > 3) return; @@ -1634,7 +1634,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_ghost_with_ghost_buffering ) BOOST_AUTO_TEST_CASE( vector_dist_ghost_put ) { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); long int k = 25*25*25*create_vcluster().getProcessingUnits(); k = std::pow(k, 1/3.); @@ -1824,7 +1824,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_ghost_put ) BOOST_AUTO_TEST_CASE( vector_fixing_noposition_and_keep_prop ) { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); if (v_cl.getProcessingUnits() > 48) return; @@ -1888,7 +1888,7 @@ BOOST_AUTO_TEST_CASE( vector_fixing_noposition_and_keep_prop ) BOOST_AUTO_TEST_CASE( vector_of_vector_dist ) { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); if (v_cl.getProcessingUnits() > 48) return; diff --git a/src/Vector/tests/vector_dist_util_unit_tests.hpp b/src/Vector/tests/vector_dist_util_unit_tests.hpp index f830e13f..3995a7cf 100644 --- a/src/Vector/tests/vector_dist_util_unit_tests.hpp +++ b/src/Vector/tests/vector_dist_util_unit_tests.hpp @@ -23,7 +23,7 @@ */ template<unsigned int dim,typename vector_dist> inline void count_local_n_local(vector_dist & vd, vector_dist_iterator & it, size_t (& bc)[dim] , Box<dim,float> & box, Box<dim,float> & dom_ext, size_t & l_cnt, size_t & nl_cnt, size_t & n_out) { - const CartDecomposition<dim,float> & ct = vd.getDecomposition(); + auto & ct = vd.getDecomposition(); while (it.isNext()) { @@ -31,8 +31,10 @@ template<unsigned int dim,typename vector_dist> inline void count_local_n_local( // Check if it is in the domain if (box.isInsideNP(vd.getPos(key)) == true) { + Point<dim,typename vector_dist::stype> xp = vd.getPos(key); + // Check if local - if (ct.isLocalBC(vd.getPos(key),bc) == true) + if (ct.isLocalBC(xp,bc) == true) l_cnt++; else nl_cnt++; diff --git a/src/Vector/vector_dist.hpp b/src/Vector/vector_dist.hpp index 8112a98e..05f15a77 100644 --- a/src/Vector/vector_dist.hpp +++ b/src/Vector/vector_dist.hpp @@ -234,7 +234,7 @@ private: #endif //! Virtual cluster - Vcluster & v_cl; + Vcluster<Memory> & v_cl; //! option used to create this vector size_t opt = 0; @@ -453,7 +453,7 @@ public: * */ vector_dist(const Decomposition & dec, size_t np) : - vector_dist_comm<dim,St,prop,Decomposition,Memory,layout_base>(dec), v_cl(create_vcluster()) SE_CLASS3_VDIST_CONSTRUCTOR + vector_dist_comm<dim,St,prop,Decomposition,Memory,layout_base>(dec), v_cl(create_vcluster<Memory>()) SE_CLASS3_VDIST_CONSTRUCTOR { #ifdef SE_CLASS2 check_new(this,8,VECTOR_DIST_EVENT,4); @@ -480,7 +480,7 @@ public: * */ vector_dist(size_t np, Box<dim, St> box, const size_t (&bc)[dim], const Ghost<dim, St> & g, size_t opt = 0, const grid_sm<dim,void> & gdist = grid_sm<dim,void>()) - :v_cl(create_vcluster()),opt(opt) SE_CLASS3_VDIST_CONSTRUCTOR + :v_cl(create_vcluster<Memory>()),opt(opt) SE_CLASS3_VDIST_CONSTRUCTOR { #ifdef SE_CLASS2 check_new(this,8,VECTOR_DIST_EVENT,4); @@ -2265,7 +2265,7 @@ public: * */ - Vcluster & getVC() + Vcluster<> & getVC() { #ifdef SE_CLASS2 check_valid(this,8); @@ -2496,7 +2496,7 @@ public: */ template<unsigned int ... prp> void hostToDeviceProp() { - v_prp.template deviceToHost<prp ...>(); + v_prp.template hostToDevice<prp ...>(); } /*! \brief Move the memory from the device to host memory @@ -2523,6 +2523,6 @@ public: }; -template<unsigned int dim, typename St, typename prop, typename Decomposition = CartDecomposition<dim,St>> using vector_dist_gpu = vector_dist<dim,St,prop,Decomposition,CudaMemory,memory_traits_inte>; +template<unsigned int dim, typename St, typename prop, typename Decomposition = CartDecomposition<dim,St,CudaMemory,memory_traits_inte>> using vector_dist_gpu = vector_dist<dim,St,prop,Decomposition,CudaMemory,memory_traits_inte>; #endif /* VECTOR_HPP_ */ diff --git a/src/Vector/vector_dist_comm.hpp b/src/Vector/vector_dist_comm.hpp index c468c915..4419fa2c 100644 --- a/src/Vector/vector_dist_comm.hpp +++ b/src/Vector/vector_dist_comm.hpp @@ -67,7 +67,7 @@ class vector_dist_comm typedef openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base> send_pos_vector; //! VCluster - Vcluster & v_cl; + Vcluster<Memory> & v_cl; //! Domain decomposition Decomposition dec; @@ -90,6 +90,12 @@ class vector_dist_comm //! particles that must be communicated to the other processors openfpm::vector<openfpm::vector<aggregate<size_t,size_t>>> g_opart; + //! Helper buffer for computation (on GPU) of local particles (position) + openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base> v_pos_tmp; + + //! Helper buffer for computation (on GPU) of local particles (properties) + openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base> v_prp_tmp; + //! Per processor number of particle g_opart_sz.get(i) = g_opart.get(i).size() openfpm::vector<size_t> g_opart_sz; @@ -160,10 +166,15 @@ class vector_dist_comm { if (opt & MAP_ON_DEVICE) { - for (size_t i = 0; i < prc_sz.size()-1 ; i++) + size_t prev_off = 0; + for (size_t i = 0; i < prc_sz.size() ; i++) { - prc_r.add(prc_sz.template get<1>(i)); - prc_sz_r.add(prc_sz.template get<0>(i+1) - prc_sz.template get<0>(i)); + if (prc_sz.template get<1>(i) != (unsigned int)-1) + { + prc_r.add(prc_sz.template get<1>(i)); + prc_sz_r.add(prc_sz.template get<0>(i) - prev_off); + } + prev_off = prc_sz.template get<0>(i); } } else @@ -554,8 +565,6 @@ class vector_dist_comm template<typename T> inline void operator()(T& t) { - typedef typename boost::mpl::at<v_mpl,T>::type prp_ms; - g_send_prp.get(i).template setMemory<T::value>(hsmem.get(j)); j++; @@ -672,7 +681,7 @@ class vector_dist_comm openfpm::vector<size_t> & prc_sz_r, openfpm::vector<openfpm::vector<Point<dim,St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base,openfpm::grow_policy_identity>> & m_pos, openfpm::vector<openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base,openfpm::grow_policy_identity>> & m_prp, - size_t offset, + openfpm::vector<aggregate<unsigned int, unsigned int>,Memory,typename layout_base<aggregate<unsigned int, unsigned int>>::type,layout_base> & prc_sz, size_t opt) { m_prp.resize(prc_sz_r.size()); @@ -691,6 +700,23 @@ class vector_dist_comm { #if defined(CUDA_GPU) && defined(__NVCC__) + // The first part of m_opart and prc_sz contain the local particles + + v_pos_tmp.resize(prc_sz.template get<0>(0)); + v_prp_tmp.resize(prc_sz.template get<0>(0)); + + auto ite = v_pos_tmp.getGPUIterator(); + + // fi;l v_pos_tmp and v_prp_tmp with local particles + process_map_particles<decltype(m_opart.toKernel()),decltype(v_pos_tmp.toKernel()),decltype(v_prp_tmp.toKernel()), + decltype(v_pos.toKernel()),decltype(v_prp.toKernel())> + <<<ite.wthr,ite.thr>>> + (m_opart.toKernel(),v_pos_tmp.toKernel(), v_prp_tmp.toKernel(), + v_pos.toKernel(),v_prp.toKernel(),0); + + size_t offset = prc_sz.template get<0>(0); + + // Fill the sending fuffers for (size_t i = 0 ; i < m_pos.size() ; i++) { auto ite = m_pos.get(i).getGPUIterator(); @@ -704,6 +730,10 @@ class vector_dist_comm offset += prc_sz_r.size(); } + // old local particles with the actual local particles + v_pos_tmp.swap(v_pos); + v_prp_tmp.swap(v_prp); + #else std::cout << __FILE__ << ":" << __LINE__ << " error MAP_ON_DEVICE require that you compile with NVCC, but it seem compiled with a normal compiler" << std::endl; @@ -723,10 +753,10 @@ class vector_dist_comm { process_map_particle<proc_without_prp>(i,end,id_end,m_opart,p_map_req,m_pos,m_prp,v_pos,v_prp,cnt); } - } - v_pos.resize(v_pos.size() - m_opart.size()); - v_prp.resize(v_prp.size() - m_opart.size()); + v_pos.resize(v_pos.size() - m_opart.size()); + v_prp.resize(v_prp.size() - m_opart.size()); + } } @@ -813,6 +843,7 @@ class vector_dist_comm CudaMemory mem; mem.allocate(sizeof(int)); + mem.fill(0); // Find the buffer bases find_buffer_offsets<decltype(lbl_p.toKernel()),decltype(prc_sz.toKernel())><<<ite.wthr,ite.thr>>> @@ -820,8 +851,14 @@ class vector_dist_comm // Trasfer the number of offsets on CPU mem.deviceToHost(); + prc_sz.template deviceToHost<0,1>(); + // get also the last element from lbl_p; + lbl_p.template deviceToHost<1>(lbl_p.size()-1,lbl_p.size()-1); int noff = *(int *)mem.getPointer(); + prc_sz.resize(noff+1); + prc_sz.template get<0>(prc_sz.size()-1) = lbl_p.size(); + prc_sz.template get<1>(prc_sz.size()-1) = lbl_p.template get<1>(lbl_p.size()-1); #else @@ -978,7 +1015,7 @@ public: * */ vector_dist_comm(const vector_dist_comm<dim,St,prop,Decomposition,Memory,layout_base> & v) - :v_cl(create_vcluster()),dec(create_vcluster()),lg_m(0) + :v_cl(create_vcluster<Memory>()),dec(create_vcluster()),lg_m(0) { this->operator=(v); } @@ -990,7 +1027,7 @@ public: * */ vector_dist_comm(const Decomposition & dec) - :v_cl(create_vcluster()),dec(dec),lg_m(0) + :v_cl(create_vcluster<Memory>()),dec(dec),lg_m(0) { } @@ -1001,7 +1038,7 @@ public: * */ vector_dist_comm(Decomposition && dec) - :v_cl(create_vcluster()),dec(dec),lg_m(0) + :v_cl(create_vcluster<Memory>()),dec(dec),lg_m(0) { } @@ -1010,7 +1047,7 @@ public: * */ vector_dist_comm() - :v_cl(create_vcluster()),dec(create_vcluster()),lg_m(0) + :v_cl(create_vcluster<Memory>()),dec(create_vcluster()),lg_m(0) { } @@ -1138,10 +1175,10 @@ public: { size_t opt_ = compute_options(opt); op_ssend_gg_recv_merge opm(g_m); - v_cl.SSendRecvP_op<op_ssend_gg_recv_merge,send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_g_opart,opm,prc_recv_get,recv_sz_get,opt_); + v_cl.template SSendRecvP_op<op_ssend_gg_recv_merge,send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_g_opart,opm,prc_recv_get,recv_sz_get,opt_); } else - {v_cl.SSendRecvP<send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_g_opart,prc_recv_get,recv_sz_get,recv_sz_get_byte);} + {v_cl.template SSendRecvP<send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_g_opart,prc_recv_get,recv_sz_get,recv_sz_get_byte);} // fill g_opart_sz g_opart_sz.resize(prc_g_opart.size()); @@ -1161,13 +1198,13 @@ public: if (opt & SKIP_LABELLING) { size_t opt_ = compute_options(opt); - v_cl.SSendRecv<send_pos_vector,decltype(v_pos),layout_base>(g_pos_send,v_pos,prc_g_opart,prc_recv_get,recv_sz_get,opt_); + v_cl.template SSendRecv<send_pos_vector,decltype(v_pos),layout_base>(g_pos_send,v_pos,prc_g_opart,prc_recv_get,recv_sz_get,opt_); } else { prc_recv_get.clear(); recv_sz_get.clear(); - v_cl.SSendRecv<send_pos_vector,decltype(v_pos),layout_base>(g_pos_send,v_pos,prc_g_opart,prc_recv_get,recv_sz_get); + v_cl.template SSendRecv<send_pos_vector,decltype(v_pos),layout_base>(g_pos_send,v_pos,prc_g_opart,prc_recv_get,recv_sz_get); } // fill g_opart_sz @@ -1257,7 +1294,7 @@ public: fill_send_map_buf_list<prp_object,prp...>(v_pos,v_prp,prc_sz_r, m_pos, m_prp); v_cl.SSendRecv(m_pos,v_pos,prc_r,prc_recv_map,recv_sz_map,opt); - v_cl.SSendRecvP<openfpm::vector<prp_object>,decltype(v_prp),layout_base,prp...>(m_prp,v_prp,prc_r,prc_recv_map,recv_sz_map,opt); + v_cl.template SSendRecvP<openfpm::vector<prp_object>,decltype(v_prp),layout_base,prp...>(m_prp,v_prp,prc_r,prc_recv_map,recv_sz_map,opt); // mark the ghost part @@ -1304,17 +1341,29 @@ public: //! properties vector openfpm::vector<openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base,openfpm::grow_policy_identity>> m_prp; - fill_send_map_buf(v_pos,v_prp, prc_sz_r, m_pos, m_prp,prc_sz_r.get(0),opt); + fill_send_map_buf(v_pos,v_prp, prc_sz_r, m_pos, m_prp,prc_sz,opt); + + size_t opt_ = 0; + if (opt & MAP_ON_DEVICE) + { +#if defined(CUDA_GPU) && defined(__NVCC__) + // Before doing the communication on MAP_ON_DEVICE we have to be sure that the previous kernels complete + cudaDeviceSynchronize(); + opt_ |= MPI_GPU_DIRECT; +#else + std::cout << __FILE__ << ":" << __LINE__ << " error: to use the option MAP_ON_DEVICE you must compile with NVCC" << std::endl; +#endif + } - v_cl.SSendRecv<openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base,openfpm::grow_policy_identity>, + v_cl.template SSendRecv<openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base,openfpm::grow_policy_identity>, openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base>, layout_base> - (m_pos,v_pos,prc_r,prc_recv_map,recv_sz_map,opt); + (m_pos,v_pos,prc_r,prc_recv_map,recv_sz_map,opt_); - v_cl.SSendRecv<openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base,openfpm::grow_policy_identity>, + v_cl.template SSendRecv<openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base,openfpm::grow_policy_identity>, openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base>, layout_base> - (m_prp,v_prp,prc_r,prc_recv_map,recv_sz_map,opt); + (m_prp,v_prp,prc_r,prc_recv_map,recv_sz_map,opt_); // mark the ghost part @@ -1401,12 +1450,12 @@ public: size_t opt_ = compute_options(opt); op_ssend_recv_merge<op> opm(g_opart); - v_cl.SSendRecvP_op<op_ssend_recv_merge<op>,send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_recv_get,opm,prc_g_opart,g_opart_sz,opt_); + v_cl.template SSendRecvP_op<op_ssend_recv_merge<op>,send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_recv_get,opm,prc_g_opart,g_opart_sz,opt_); } else { op_ssend_recv_merge<op> opm(g_opart); - v_cl.SSendRecvP_op<op_ssend_recv_merge<op>,send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_recv_get,opm,prc_recv_put,recv_sz_put); + v_cl.template SSendRecvP_op<op_ssend_recv_merge<op>,send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_recv_get,opm,prc_recv_put,recv_sz_put); } // process also the local replicated particles diff --git a/src/Vector/vector_dist_dlb_test.hpp b/src/Vector/vector_dist_dlb_test.hpp index f753ca3a..428e30ce 100644 --- a/src/Vector/vector_dist_dlb_test.hpp +++ b/src/Vector/vector_dist_dlb_test.hpp @@ -16,7 +16,7 @@ BOOST_AUTO_TEST_SUITE( vector_dist_dlb_test ) template<typename vector_type> void mp_test_template(vector_type & vd0, vector_type & vd1, vector_type & vd2, vector_type & vd3) { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); // Only processor 0 initialy add particles on a corner of a domain @@ -213,7 +213,7 @@ void mp_test_template(vector_type & vd0, vector_type & vd1, vector_type & vd2, v template<typename vector_type> void test_dlb_vector() { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); if (v_cl.getProcessingUnits() > 8) return; @@ -314,7 +314,7 @@ template<typename vector_type> void test_dlb_vector() template<typename vector_type> void test_dlb_multi_phase_vector() { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); if (v_cl.getProcessingUnits() > 8) return; @@ -335,7 +335,7 @@ template<typename vector_type> void test_dlb_multi_phase_vector() template<typename vector_type> void test_dlb_multi_phase_v_vector() { - Vcluster & v_cl = create_vcluster(); + Vcluster<> & v_cl = create_vcluster(); if (v_cl.getProcessingUnits() > 8) return; diff --git a/src/initialize/initialize_wrapper.hpp b/src/initialize/initialize_wrapper.hpp new file mode 100644 index 00000000..79d8577c --- /dev/null +++ b/src/initialize/initialize_wrapper.hpp @@ -0,0 +1,20 @@ +/* + * initialize_vcl.hpp + * + * Created on: Aug 21, 2018 + * Author: i-bird + */ + +#ifndef INITIALIZE_VCL_HPP_ +#define INITIALIZE_VCL_HPP_ + +/*! \brief If openfpm has to work on GPU we have to be sure openfpm_init is called on a file compiled with NVCC + * + * There are two implementation initialize.cpp and initialize.cu. In configuration stage the second implementation is chosen + * if the test has to run on GPU + * + */ +void openfpm_init_wrapper(int * argc, char *** argv); +void openfpm_finalize_wrapper(); + +#endif /* INITIALIZE_VCL_HPP_ */ diff --git a/src/initialize/initialize_wrapper_cpu.cpp b/src/initialize/initialize_wrapper_cpu.cpp new file mode 100644 index 00000000..4d75490c --- /dev/null +++ b/src/initialize/initialize_wrapper_cpu.cpp @@ -0,0 +1,13 @@ +#include "initialize_wrapper.hpp" +#include "VCluster/VCluster.hpp" + + +void openfpm_init_wrapper(int * argc, char *** argv) +{ + openfpm_init(argc,argv); +} + +void openfpm_finalize_wrapper() +{ + openfpm_finalize(); +} diff --git a/src/initialize/initialize_wrapper_cuda.cu b/src/initialize/initialize_wrapper_cuda.cu new file mode 100644 index 00000000..74a8473d --- /dev/null +++ b/src/initialize/initialize_wrapper_cuda.cu @@ -0,0 +1,12 @@ +#include "initialize_wrapper.hpp" +#include "VCluster/VCluster.hpp" + +void openfpm_init_wrapper(int * argc, char *** argv) +{ + openfpm_init(argc,argv); +} + +void openfpm_finalize_wrapper() +{ + openfpm_finalize(); +} diff --git a/src/unit_test_init_cleanup.hpp b/src/unit_test_init_cleanup.hpp index 96e7743f..1c953dad 100644 --- a/src/unit_test_init_cleanup.hpp +++ b/src/unit_test_init_cleanup.hpp @@ -8,7 +8,7 @@ #ifndef UNIT_TEST_INIT_CLEANUP_HPP_ #define UNIT_TEST_INIT_CLEANUP_HPP_ -#include "VCluster/VCluster.hpp" +#include "initialize/initialize_wrapper.hpp" const char * test_dir; @@ -19,7 +19,7 @@ struct ut_start { BOOST_TEST_MESSAGE("Initialize global VCluster"); - openfpm_init(&boost::unit_test::framework::master_test_suite().argc,&boost::unit_test::framework::master_test_suite().argv); + openfpm_init_wrapper(&boost::unit_test::framework::master_test_suite().argc,&boost::unit_test::framework::master_test_suite().argv); #ifdef PERFORMANCE_TEST test_dir = getenv("OPENFPM_PERFORMANCE_TEST_DIR"); @@ -35,7 +35,7 @@ struct ut_start ~ut_start() { BOOST_TEST_MESSAGE("Delete global VClster"); - openfpm_finalize(); + openfpm_finalize_wrapper(); } }; -- GitLab