diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..87a205e64b631f60178a47fbc45df1ec7a19467b --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,62 @@ +cmake_minimum_required(VERSION 3.8 FATAL_ERROR) +project(openfpm_pdata LANGUAGES C CXX CUDA) + + +list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_DIR}/CMakeFiles/) + +set(BOOST_INCLUDE ${Boost_INCLUDE_DIR} CACHE PATH "Include directory for BOOST") +set(SE_CLASS1 CACHE BOOL "Activate compilation with SE_CLASS1") +set(SE_CLASS2 CACHE BOOL "Activate compilation with SE_CLASS2") +set(SE_CLASS3 CACHE BOOL "Activate compilation with SE_CLASS3") + + +find_package(Boost 1.52.0 REQUIRED unit_test_framework iostreams program_options) +find_package(CUDA) +find_package(MPI REQUIRED) + +if(CUDA_FOUND) + set(OPENFPM_INIT_FILE "initialize/initialize_wrapper_cuda.cu") +else() + set(OPENFPM_INIT_FILE "initialize/initialize_wrapper_cuda.cpp") +endif() + +###### CONFIG.h FILE ###### + +if(SE_CLASS1) + set(DEFINE_SE_CLASS1 "#define SE_CLASS1") +endif() + +if(SE_CLASS2) + set(DEFINE_SE_CLASS2 "#define SE_CLASS2") +endif() + +if(SE_CLASS3) + set(DEFINE_SE_CLASS3 "#define SE_CLASS3") +endif() + +if(CUDA_FOUND) + set(DEFINE_CUDA_GPU "#define CUDA_GPU") +endif() + +if(MPI_FOUND) + set(DEFINE_HAVE_MPI "#define HAVE_MPI") +else() + message( FATAL_ERROR "MPI is required in order to install OpenFPM" ) +endif() + +if (Boost_FOUND) + set(DEFINE_HAVE_BOOST "#define HAVE_BOOST") + set(DEFINE_HAVE_BOOST_IOSTREAMS "#define HAVE_BOOST_IOSTREAMS") + set(DEFINE_HAVE_BOOST_PROGRAM_OPTIONS "#define HAVE_BOOST_PROGRAM_OPTIONS") + set(DEFINE_HAVE_BOOST_UNIT_TEST_FRAMEWORK "#define HAVE_BOOST_UNIT_TEST_FRAMEWORK") +else() + message( FATAL_ERROR "BOOST is required in order to install OpenFPM" ) +endif() + + +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/src/config/config_cmake.h.in ${CMAKE_CURRENT_SOURCE_DIR}/src/config/config.h) + +include_directories(SYSTEM ${MPI_INCLUDE_PATH}) + +add_subdirectory (src) + diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..9cb41bb2b10dfce34eae65c346ceff64057c3334 --- /dev/null +++ b/src/CMakeLists.txt @@ -0,0 +1,66 @@ +cmake_minimum_required(VERSION 3.8 FATAL_ERROR) + +########################### Executables + +add_executable(vcluster_test main.cpp VCluster/VCluster.cpp ../../openfpm_devices/src/memory/HeapMemory.cpp ../../openfpm_devices/src/memory/PtrMemory.cpp ../../openfpm_devices/src/Memleak_check.cpp ../../openfpm_devices/src/memory/CudaMemory.cu VCluster/cuda/VCluster_semantic_unit_cuda_tests.cu ) + +add_library(vcluster STATIC VCluster/VCluster.cpp) + +########################### + +target_compile_options(vcluster_test PUBLIC $<$<COMPILE_LANGUAGE:CUDA>:-Xcudafe "--display_error_number --diag_suppress=2885 --diag_suppress=2887 --diag_suppress=2888 --diag_suppress=186 --diag_suppress=111" --expt-extended-lambda>) + +target_include_directories (vcluster_test PUBLIC ${CUDA_INCLUDE_DIRS}) +target_include_directories (vcluster_test PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +target_include_directories (vcluster_test PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../../openfpm_devices/src/) +target_include_directories (vcluster_test PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../../openfpm_vcluster/src/) +target_include_directories (vcluster_test PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../../openfpm_data/src/) +target_include_directories (vcluster_test PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/config) + +target_include_directories (vcluster PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +target_include_directories (vcluster PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/config) +target_include_directories (vcluster PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../../openfpm_data/src/) +target_include_directories (vcluster PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../../openfpm_devices/src/) + +target_link_libraries(vcluster_test ${Boost_LIBRARIES}) + +# Request that particles be built with -std=c++11 +# As this is a public compile feature anything that links to particles +# will also build with -std=c++11 +target_compile_features(vcluster_test PUBLIC cxx_std_11) +target_link_libraries(vcluster_test ${MPI_C_LIBRARIES}) + +install(TARGETS vcluster DESTINATION openfpm_vcluster/lib) +install(FILES MPI_wrapper/MPI_IallreduceW.hpp + MPI_wrapper/MPI_IrecvW.hpp + MPI_wrapper/MPI_IBcastW.hpp + MPI_wrapper/MPI_IsendW.hpp + MPI_wrapper/MPI_util.hpp + MPI_wrapper/MPI_IAllGather.hpp + DESTINATION openfpm_vcluster/include/MPI_wrapper) + +install(FILES VCluster/VCluster_base.hpp + VCluster/VCluster.hpp + VCluster/VCluster_meta_function.hpp + DESTINATION openfpm_vcluster/include/VCluster ) + +install (FILES util/Vcluster_log.hpp + DESTINATION openfpm_vcluster/include/util) + +#if(BUILD_TESTING) + +# add_executable(particle_test test.cu) + +# set_target_properties(particle_test PROPERTIES CUDA_SEPARABLE_COMPILATION ON) +# target_link_libraries(particle_test PRIVATE particles) + +# add_test(NAME particles_10k COMMAND particle_test 10000 ) +# add_test(NAME particles_256k COMMAND particle_test 256000 ) + +# if(APPLE) + # We need to add the default path to the driver (libcuda.dylib) as an rpath, + # so that the static cuda runtime can find it at runtime. + # set_property(TARGET particle_test PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) + # endif() + #endif() + diff --git a/src/VCluster/VCluster.hpp b/src/VCluster/VCluster.hpp index b5a74b435d6de7c67e161d69227d8d54377ce12a..aeb9aea2a13b7af9411e0803ae35677190270bb2 100644 --- a/src/VCluster/VCluster.hpp +++ b/src/VCluster/VCluster.hpp @@ -136,8 +136,6 @@ class Vcluster: public Vcluster_base<InternalMemory> pack_unpack_cond_with_prp<has_max_prop<T, has_value_type<T>::value>::value, op, T, S, layout_base>::packing(mem, send.get(i), sts, send_buf,opt); } - self_base::tags.clear(); - // receive information base_info<InternalMemory> bi(&this->recv_buf,prc_recv,sz_recv_byte,this->tags,opt); @@ -168,6 +166,7 @@ class Vcluster: public Vcluster_base<InternalMemory> } else { + self_base::tags.clear(); prc_recv.clear(); self_base::sendrecvMultipleMessagesNBX(prc_send_.size(),(size_t *)send_sz_byte.getPointer(),(size_t *)prc_send_.getPointer(),(void **)send_buf.getPointer(),msg_alloc,(void *)&bi); } @@ -203,7 +202,7 @@ class Vcluster: public Vcluster_base<InternalMemory> struct base_info { //! Receive buffer - openfpm::vector<BMemory<Memory>> * recv_buf; + openfpm::vector_fr<BMemory<Memory>> * recv_buf; //! receiving processor list openfpm::vector<size_t> & prc; //! size of each message @@ -215,7 +214,7 @@ class Vcluster: public Vcluster_base<InternalMemory> size_t opt; //! constructor - base_info(openfpm::vector<BMemory<Memory>> * recv_buf, openfpm::vector<size_t> & prc, openfpm::vector<size_t> & sz, openfpm::vector<size_t> & tags,size_t opt) + base_info(openfpm::vector_fr<BMemory<Memory>> * recv_buf, openfpm::vector<size_t> & prc, openfpm::vector<size_t> & sz, openfpm::vector<size_t> & tags,size_t opt) :recv_buf(recv_buf),prc(prc),sz(sz),tags(tags),opt(opt) {} }; @@ -258,7 +257,7 @@ class Vcluster: public Vcluster_base<InternalMemory> if (rinfo.opt & MPI_GPU_DIRECT) { #if defined(MPIX_CUDA_AWARE_SUPPORT) && MPIX_CUDA_AWARE_SUPPORT - return rinfo.recv_buf->last().getDevicePointerNoCopy(); + return rinfo.recv_buf->last().getDevicePointer(); #else return rinfo.recv_buf->last().getPointer(); #endif @@ -639,7 +638,7 @@ class Vcluster: public Vcluster_base<InternalMemory> // we sort based on processor rcv.sort(); - openfpm::vector<BMemory<InternalMemory>> recv_ord; + openfpm::vector_fr<BMemory<InternalMemory>> recv_ord; recv_ord.resize(rcv.size()); openfpm::vector<size_t> prc_ord; @@ -657,7 +656,12 @@ class Vcluster: public Vcluster_base<InternalMemory> } // move rcv into recv - self_base::recv_buf.swap(recv_ord); + // Now we swap back to recv_buf in an ordered way + for (size_t i = 0 ; i < rcv.size() ; i++) + { + self_base::recv_buf.get(i).swap(recv_ord.get(i)); + } + prc.swap(prc_ord); sz_recv.swap(sz_recv_ord); diff --git a/src/VCluster/VCluster_base.hpp b/src/VCluster/VCluster_base.hpp index b9f54cc5f9d1abe473a9b1a8e39df646a23d8231..48a3b70ba99d61e8be6f66849a64ffbcc5d5fc71 100644 --- a/src/VCluster/VCluster_base.hpp +++ b/src/VCluster/VCluster_base.hpp @@ -142,16 +142,9 @@ class Vcluster_base //! vector of functions to execute after all the request has been performed std::vector<int> post_exe; -#if defined(CUDA_GPU) && defined(__NVCC__) - - //! standard context for mgpu + //! standard context for mgpu (if cuda is detected otherwise is unused) mgpu::standard_context_t * context; -#else - - void * context = NULL; - -#endif // Object array @@ -198,7 +191,7 @@ class Vcluster_base protected: //! Receive buffers - openfpm::vector<BMemory<InternalMemory>> recv_buf; + openfpm::vector_fr<BMemory<InternalMemory>> recv_buf; //! tags receiving openfpm::vector<size_t> tags; @@ -287,7 +280,7 @@ public: #if defined(CUDA_GPU) && defined(__NVCC__) - context = new mgpu::standard_context_t(); + context = new mgpu::standard_context_t(false); #endif } @@ -345,25 +338,22 @@ public: #endif -#if defined(CUDA_GPU) && defined(__NVCC__) - /*! \brief If nvidia cuda is activated return a mgpu context * + * \param iw ignore warning * */ - mgpu::standard_context_t & getmgpuContext() + mgpu::standard_context_t & getmgpuContext(bool iw = true) { - if (context == NULL) + if (context == NULL && iw == true) { - std::cout << __FILE__ << ":" << __LINE__ << " error: it seem that modern gpu context is not initialized." + std::cout << __FILE__ << ":" << __LINE__ << " Warning: it seem that modern gpu context is not initialized." "Either a compatible working cuda device has not been found, either openfpm_init has been called in a file that not compiled with NVCC" << std::endl; } return *context; } -#endif - /*! \brief Get the MPI_Communicator (or processor group) this VCluster is using * * \return MPI comunicator diff --git a/src/VCluster/VCluster_meta_function.hpp b/src/VCluster/VCluster_meta_function.hpp index 80baa15cdd1f60a47eb5d988bc19d49bbca990c5..55037ea4ef15b0ca428c04f2fc26257bc40e8d9c 100644 --- a/src/VCluster/VCluster_meta_function.hpp +++ b/src/VCluster/VCluster_meta_function.hpp @@ -17,7 +17,7 @@ struct unpack_selector_with_prp template<typename op, int ... prp> static void call_unpack(S & recv, - openfpm::vector<BMemory<Memory>> & recv_buf, + openfpm::vector_fr<BMemory<Memory>> & recv_buf, openfpm::vector<size_t> * sz, openfpm::vector<size_t> * sz_byte, op & op_param, @@ -76,7 +76,7 @@ struct unpack_each_prop_buffer * */ inline unpack_each_prop_buffer(S & recv, - openfpm::vector<BMemory<HeapMemory>> & recv_buf, + openfpm::vector_fr<BMemory<HeapMemory>> & recv_buf, op & op_param, size_t i, openfpm::vector<size_t> * sz, @@ -141,7 +141,7 @@ struct process_receive_mem_traits_inte size_t i; //! Receive buffer - openfpm::vector<BMemory<Memory>> & recv_buf; + openfpm::vector_fr<BMemory<Memory>> & recv_buf; //! Fake vector that map over received memory openfpm::vector<typename sT::value_type,PtrMemory,typename layout_base<typename sT::value_type>::type,layout_base,openfpm::grow_policy_identity> & v2; @@ -157,7 +157,7 @@ struct process_receive_mem_traits_inte * */ inline process_receive_mem_traits_inte(openfpm::vector<typename sT::value_type,PtrMemory,typename layout_base<typename sT::value_type>::type,layout_base,openfpm::grow_policy_identity> & v2, - openfpm::vector<BMemory<Memory>> & recv_buf, + openfpm::vector_fr<BMemory<Memory>> & recv_buf, size_t i, size_t opt) :i(i),recv_buf(recv_buf),v2(v2),opt(opt) @@ -174,11 +174,11 @@ struct process_receive_mem_traits_inte PtrMemory * ptr1; - if (opt == MPI_GPU_DIRECT) + if (opt & MPI_GPU_DIRECT) { #if defined(MPIX_CUDA_AWARE_SUPPORT) && MPIX_CUDA_AWARE_SUPPORT // add the received particles to the vector - ptr1 = new PtrMemory(recv_buf.get(i).getDevicePointerNoCopy(),recv_buf.get(i).size()); + ptr1 = new PtrMemory(recv_buf.get(i).getDevicePointer(),recv_buf.get(i).size()); #else // add the received particles to the vector ptr1 = new PtrMemory(recv_buf.get(i).getPointer(),recv_buf.get(i).size()); @@ -200,7 +200,7 @@ template<bool inte_or_lin,typename T, typename S, template<typename> class layou struct unpack_selector_with_prp_lin { template<typename op, unsigned int ... prp> static int call_unpack_impl(S & recv, - openfpm::vector<BMemory<Memory>> & recv_buf, + openfpm::vector_fr<BMemory<Memory>> & recv_buf, openfpm::vector<size_t> * sz, openfpm::vector<size_t> * sz_byte, op & op_param, @@ -237,7 +237,7 @@ template<typename T, typename S, template<typename> class layout_base, typename struct unpack_selector_with_prp_lin<true,T,S,layout_base,Memory> { template<typename op, unsigned int ... prp> static int call_unpack_impl(S & recv, - openfpm::vector<BMemory<Memory>> & recv_buf, + openfpm::vector_fr<BMemory<Memory>> & recv_buf, openfpm::vector<size_t> * sz, openfpm::vector<size_t> * sz_byte, op & op_param, @@ -282,7 +282,7 @@ template<typename T, typename S, template<typename> class layout_base, typename struct unpack_selector_with_prp<true,T,S,layout_base,Memory> { template<typename op, unsigned int ... prp> static void call_unpack(S & recv, - openfpm::vector<BMemory<Memory>> & recv_buf, + openfpm::vector_fr<BMemory<Memory>> & recv_buf, openfpm::vector<size_t> * sz, openfpm::vector<size_t> * sz_byte, op & op_param, @@ -317,7 +317,7 @@ struct call_serialize_variadic<index_tuple<prp...>> template<typename op, typename T, typename S, template<typename> class layout_base, typename Memory> inline static void call_unpack(S & recv, - openfpm::vector<BMemory<Memory>> & recv_buf, + openfpm::vector_fr<BMemory<Memory>> & recv_buf, openfpm::vector<size_t> * sz, openfpm::vector<size_t> * sz_byte, op & op_param, @@ -507,7 +507,7 @@ struct pack_unpack_cond_with_prp template<typename Memory> static void unpacking(S & recv, - openfpm::vector<BMemory<Memory>> & recv_buf, + openfpm::vector_fr<BMemory<Memory>> & recv_buf, openfpm::vector<size_t> * sz, openfpm::vector<size_t> * sz_byte, op & op_param, @@ -553,7 +553,7 @@ struct op_ssend_recv_add_sr layout_base, prp...>(v2); - recv.template hostToDevice<prp...>(); + recv.template hostToDevice<prp...>(recv.size(),recv.size()+v2.size()); #endif @@ -719,7 +719,7 @@ struct op_ssend_gg_recv_merge_impl<true> recv.template merge_prp_v<replace_, typename T::value_type, HeapMemory, - openfpm::grow_policy_double, + typename S::grow_policy, layout_base, prp...>(v2,start); diff --git a/src/config/config_cmake.h.in b/src/config/config_cmake.h.in new file mode 100644 index 0000000000000000000000000000000000000000..a0d62c9edc325fad8f80ce5ca11c06c068f57baf --- /dev/null +++ b/src/config/config_cmake.h.in @@ -0,0 +1,159 @@ +/* Coverty scan */ +${DEFINE_COVERTY_SCAN} + +/* GPU support */ +${DEFINE_CUDA_GPU} + +/* Debug */ +${DEFINE_DEBUG} /**/ + +/* Debug */ +${DEFINE_DEBUG_MODE} /**/ + +/* Define to dummy `main' function (if any) required to link to the Fortran + libraries. */ +${DEFINE_F77_DUMMY_MAIN} + +/* Define if F77 and FC dummy `main' functions are identical. */ +${DEFINE_FC_DUMMY_MAIN_EQ_F77} + +/* Define if you have a BLAS library. */ +${DEFINE_HAVE_BLAS} + +/* define if the Boost library is available */ +${DEFINE_HAVE_BOOST} + +/* define if the Boost::IOStreams library is available */ +${DEFINE_HAVE_BOOST_IOSTREAMS} /**/ + +/* define if the Boost::PROGRAM_OPTIONS library is available */ +${DEFINE_HAVE_BOOST_PROGRAM_OPTIONS} /**/ + +/* define if the Boost::Unit_Test_Framework library is available */ +${DEFINE_HAVE_BOOST_UNIT_TEST_FRAMEWORK} /**/ + +/* Have clock time */ +${DEFINE_HAVE_CLOCK_GETTIME} /**/ + +/* Define to 1 if you have the <dlfcn.h> header file. */ +${DEFINE_HAVE_DLFCN_H} + +/* Define if you have EIGEN library. */ +${DEFINE_HAVE_EIGEN} + +/* Define to 1 if you have the <Eigen/Dense> header file. */ +${DEFINE_HAVE_EIGEN_DENSE} + +/* Define to 1 if you have the <Eigen/LU> header file. */ +${DEFINE_HAVE_EIGEN_LU} + +/* Defined if you have HDF5 support */ +${DEFINE_HAVE_HDF5} + +/* Define to 1 if you have the <inttypes.h> header file. */ +${DEFINE_HAVE_INTTYPES_H} + +/* Define if you have LAPACK library */ +${DEFINE_HAVE_LAPACK} + +/* Define if you have LIBHILBERT library */ +${DEFINE_HAVE_LIBHILBERT} + +/* Have quad math lib */ +${DEFINE_HAVE_LIBQUADMATH} + +/* Define to 1 if you have the <memory.h> header file. */ +${DEFINE_HAVE_MEMORY_H} + +/* Define if you have METIS library */ +${DEFINE_HAVE_METIS} + +/* MPI Enabled */ +${DEFINE_HAVE_MPI} + +/* We have OSX */ +${DEFINE_HAVE_OSX} + +/* Define if you have PARMETIS library */ +${DEFINE_HAVE_PARMETIS} + +/* Define if you have PETSC library */ +${DEFINE_HAVE_PETSC} + +/* Define to 1 if you have the <stdint.h> header file. */ +${DEFINE_HAVE_STDINT_H} + +/* Define to 1 if you have the <stdlib.h> header file. */ +${DEFINE_HAVE_STDLIB_H} + +/* Define to 1 if you have the <strings.h> header file. */ +${DEFINE_HAVE_STRINGS_H} + +/* Define to 1 if you have the <string.h> header file. */ +${DEFINE_HAVE_STRING_H} + +/* Define if you have SUITESPARSE library. */ +${DEFINE_HAVE_SUITESPARSE} + +/* Define to 1 if you have the <sys/stat.h> header file. */ +${DEFINE_HAVE_SYS_STAT_H} + +/* Define to 1 if you have the <sys/types.h> header file. */ +${DEFINE_HAVE_SYS_TYPES_H} + +/* Define to 1 if you have the <unistd.h> header file. */ +${DEFINE_HAVE_UNISTD_H} + +/* Define to the sub-directory where libtool stores uninstalled libraries. */ +#define LT_OBJDIR ".libs/" + +/* NVCC compiling */ +${DEFINE_NVCC} /**/ + +/* Name of package */ +#define PACKAGE "openfpm_pdata" + +/* Define to the address where bug reports for this package should be sent. */ +#define PACKAGE_BUGREPORT "BUG-REPORT-ADDRESS" + +/* Define to the full name of this package. */ +#define PACKAGE_NAME "OpenFPM_pdata" + +/* Define to the full name and version of this package. */ +#define PACKAGE_STRING "OpenFPM_pdata 1.0.0" + +/* Define to the one symbol short name of this package. */ +#define PACKAGE_TARNAME "openfpm_pdata" + +/* Define to the home page for this package. */ +#define PACKAGE_URL "" + +/* Define to the version of this package. */ +#define PACKAGE_VERSION "1.0.0" + +/* Test performance mode */ +${DEFINE_PERFORMANCE_TEST} + +/* Security enhancement class 1 */ +${DEFINE_SE_CLASS1} + +/* Security enhancement class 2 */ +${DEFINE_SE_CLASS2} + +/* Security enhancement class 3 */ +${DEFINE_SE_CLASS3} + +/* Define to 1 if you have the ANSI C header files. */ +${DEFINE_STDC_HEADERS} + +/* If an error occur stop the program */ +${DEFINE_STOP_ON_ERROR} + +/* Test coverage mode */ +${DEFINE_TEST_COVERAGE_MODE} + +/* when an error accur continue but avoid unsafe operation */ +/* #undef THROW_ON_ERROR */ + +/* Version number of package */ +#define VERSION "1.0.0"