From af68dafb1a6fcf3e2a93e4187ed48dea06e959ed Mon Sep 17 00:00:00 2001 From: Pietro Incardona Date: Sat, 8 Jun 2019 22:07:22 +0200 Subject: [PATCH] Fixing for tracking Operators + async ghost --- example/Vector/7_SPH_dlb_opt/main_dbg.cpp | 2 - openfpm_numerics | 2 +- src/CMakeLists.txt | 2 +- src/Decomposition/Domain_icells_cart.hpp | 27 +--- src/Vector/cuda/vector_dist_cuda_func_test.cu | 1 + src/Vector/vector_dist.hpp | 76 ++++++++- src/Vector/vector_dist_kernel.hpp | 151 +++++++++++++++++- src/Vector/vector_dist_key.hpp | 8 +- src/lib/pdata.cpp | 2 +- src/lib/pdata.hpp | 8 + 10 files changed, 238 insertions(+), 41 deletions(-) diff --git a/example/Vector/7_SPH_dlb_opt/main_dbg.cpp b/example/Vector/7_SPH_dlb_opt/main_dbg.cpp index be6ec8ec..74bf16d4 100644 --- a/example/Vector/7_SPH_dlb_opt/main_dbg.cpp +++ b/example/Vector/7_SPH_dlb_opt/main_dbg.cpp @@ -701,8 +701,6 @@ template inline double calc_forces(particles & vd, VerletLi /* if (sum1 != sum2) { - std::cout << "PORCA TROIA: " << std::endl; - break; }*/ diff --git a/openfpm_numerics b/openfpm_numerics index ed016fb3..1897a889 160000 --- a/openfpm_numerics +++ b/openfpm_numerics @@ -1 +1 @@ -Subproject commit ed016fb3b90c7176afbc6ce998c1d19dd177128f +Subproject commit 1897a889d91379f7023a7c771da59533c0dfa207 diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 885745ba..1e048fec 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -77,7 +77,7 @@ target_include_directories (pdata PUBLIC ${LIBHILBERT_INCLUDE_DIRS}) target_include_directories (pdata PUBLIC ${Boost_INCLUDE_DIRS}) target_link_libraries(pdata ${Boost_LIBRARIES}) -target_link_libraries(pdata -L${PARMETIS_ROOT}/lib parmetis) +target_link_libraries(pdata ${PARMETIS_LIBRARIES}) target_link_libraries(pdata -L${METIS_ROOT}/lib metis) target_link_libraries(pdata -L${HDF5_ROOT}/lib hdf5 hdf5_hl) target_link_libraries(pdata -L${LIBHILBERT_LIBRARY_DIRS} ${LIBHILBERT_LIBRARIES}) diff --git a/src/Decomposition/Domain_icells_cart.hpp b/src/Decomposition/Domain_icells_cart.hpp index c7b30d9b..9055ff39 100644 --- a/src/Decomposition/Domain_icells_cart.hpp +++ b/src/Decomposition/Domain_icells_cart.hpp @@ -175,26 +175,14 @@ class domain_icell_calculator auto ite = g.getGPUIterator(p1,p2,256); + if (ite.wthr.x == 0) + {continue;} + vsi.setGPUInsertBuffer(ite.nblocks(),256); - insert_icell<<>>(vsi.toKernel(),cld,ite.start,p2); + CUDA_LAUNCH((insert_icell),ite,vsi.toKernel(),cld,ite.start,p2); vsi.template flush<>(v_cl.getmgpuContext(),flust_type::FLUSH_ON_DEVICE); - - ////////////////////////// DEBUG //////////////////////// - - vsi.private_get_vct_index().template deviceToHost<0>(); - auto & test = vsi.private_get_vct_index(); - - for (int k = 0 ; k < test.size() - 1 ; k++) - { - if (test.template get<0>(k) > test.template get<0>(k+1)) - { - std::cout << "BBBBBBUUUUUUUUUUUUUUUGGGGGGG" << std::endl; - } - } - - ///////////////////////////////////////////////////////// } // calculate the number of kernel launch @@ -211,17 +199,18 @@ class domain_icell_calculator auto p1 = cld.getCell(bx.getP1()); auto p2 = cld.getCell(pp2); - auto ite = g.getGPUIterator(p1,p2,256); + if (ite.wthr.x == 0) + {continue;} + vs.setGPUInsertBuffer(ite.nblocks(),256); vsi.setGPURemoveBuffer(ite.nblocks(),256); - insert_remove_icell<<>>(vs.toKernel(),vsi.toKernel(),cld,ite.start,p2); + CUDA_LAUNCH(insert_remove_icell,ite,vs.toKernel(),vsi.toKernel(),cld,ite.start,p2); vs.template flush<>(v_cl.getmgpuContext(),flust_type::FLUSH_ON_DEVICE); vsi.flush_remove(v_cl.getmgpuContext(),flust_type::FLUSH_ON_DEVICE); - } vs.swapIndexVector(icells); diff --git a/src/Vector/cuda/vector_dist_cuda_func_test.cu b/src/Vector/cuda/vector_dist_cuda_func_test.cu index 8d49cd59..4088971a 100644 --- a/src/Vector/cuda/vector_dist_cuda_func_test.cu +++ b/src/Vector/cuda/vector_dist_cuda_func_test.cu @@ -1262,6 +1262,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_particle_NN_MP_iteration_gpu ) // Distributed vector vector_dist_gpu<3,float,part_prop> vd(k,box,bc,ghost,BIND_DEC_TO_GHOST); + size_t start = vd.init_size_accum(k); auto it = vd.getIterator(); diff --git a/src/Vector/vector_dist.hpp b/src/Vector/vector_dist.hpp index 26f542e4..14e50cfd 100644 --- a/src/Vector/vector_dist.hpp +++ b/src/Vector/vector_dist.hpp @@ -37,6 +37,8 @@ #include "NN/CellList/ProcKeys.hpp" #include "Vector/vector_dist_kernel.hpp" #include "NN/CellList/cuda/CellList_gpu.hpp" +#include "lib/pdata.hpp" +#include "cuda/vector_dist_operators_list_ker.hpp" #define DEC_GRAN(gr) ((size_t)gr << 32) @@ -193,6 +195,30 @@ enum reorder_opt LINEAR = 2 }; +template +struct cell_list_selector +{ + typedef decltype(std::declval().getCellListGPU(0.0).toKernel()) ctype; + + static ctype get(vector & v, + typename vector::stype & r_cut) + { + return v.getCellListGPU(r_cut).toKernel(); + } +}; + +template +struct cell_list_selector +{ + typedef decltype(std::declval().getCellList(0.0)) ctype; + + static ctype get(vector & v, + typename vector::stype & r_cut) + { + return v.getCellList(r_cut); + } +}; + /*! \brief Distributed vector * * This class represent a distributed vector, the distribution of the structure @@ -227,14 +253,14 @@ enum reorder_opt * \tparam Memory layout * */ - template, typename Memory = HeapMemory, template class layout_base = memory_traits_lin> -class vector_dist : public vector_dist_comm +class vector_dist : public vector_dist_comm, + private vector_dist_ker_list> { public: @@ -541,6 +567,7 @@ public: this->init_decomposition(box,bc,g,opt,gdist); + #ifdef SE_CLASS3 se3.Initialize(); #endif @@ -1125,6 +1152,22 @@ public: return cell_list; } + /*! \brief Construct a cell list starting from the stored particles + * + * \tparam CellL CellList type to construct + * + * \param r_cut interation radius, or size of each cell + * \param no_se3 avoid SE_CLASS3 checking + * + * \return the Cell list + * + */ + template + typename cell_list_selector::ctype getCellListDev(St r_cut) + { + return cell_list_selector::get(*this,r_cut); + } + /*! \brief Construct a cell list starting from the stored particles * * \tparam CellL CellList type to construct @@ -2157,6 +2200,8 @@ public: this->template map_list_(v_pos,v_prp,g_m,opt); + this->update(this->toKernel()); + #ifdef SE_CLASS3 se3.map_post(); #endif @@ -2182,6 +2227,8 @@ public: this->template map_(v_pos,v_prp,g_m,opt); + this->update(this->toKernel()); + #ifdef SE_CLASS3 se3.map_post(); #endif @@ -2210,6 +2257,8 @@ public: this->template ghost_get_(v_pos,v_prp,g_m,opt); + this->update(this->toKernel()); + #ifdef SE_CLASS3 this->template ghost_get_(v_pos,v_prp,g_m,opt | KEEP_PROPERTIES); @@ -2262,6 +2311,8 @@ public: this->template ghost_wait_(v_pos,v_prp,g_m,opt); + this->update(this->toKernel()); + #ifdef SE_CLASS3 this->template ghost_get_(v_pos,v_prp,g_m,opt | KEEP_PROPERTIES); @@ -2505,6 +2556,8 @@ public: v_prp.resize(rs); g_m = rs; + + this->update(this->toKernel()); } /*! \brief Output particle position and properties @@ -2795,13 +2848,24 @@ public: * \return an usable vector in the kernel * */ - template vector_dist_ker toKernel() + template vector_dist_ker toKernel() { - vector_dist_ker v(g_m,v_pos.toKernel(), v_prp.toKernel()); + vector_dist_ker v(g_m,v_pos.toKernel(), v_prp.toKernel()); return v; } + /*! \brief Return the internal vector_dist_ker_list structure + * + * + * + * \return + */ + vector_dist_ker_list> & private_get_vector_dist_ker_list() + { + return *this; + } + /*! \brief Convert the grid into a data-structure compatible for computing into GPU * * In comparison with toGPU return a version sorted better for coalesced memory @@ -2809,9 +2873,9 @@ public: * \return an usable vector in the kernel * */ - template vector_dist_ker toKernel_sorted() + template vector_dist_ker toKernel_sorted() { - vector_dist_ker v(g_m,v_pos_out.toKernel(), v_prp_out.toKernel()); + vector_dist_ker v(g_m,v_pos_out.toKernel(), v_prp_out.toKernel()); return v; } diff --git a/src/Vector/vector_dist_kernel.hpp b/src/Vector/vector_dist_kernel.hpp index 812233ae..e05e55df 100644 --- a/src/Vector/vector_dist_kernel.hpp +++ b/src/Vector/vector_dist_kernel.hpp @@ -16,9 +16,44 @@ #define GET_PARTICLE_SORT(p,NN) if (blockDim.x*blockIdx.x + threadIdx.x >= NN.get_g_m()) {return;}\ else{p = NN.getDomainSortIds().template get<0>(blockDim.x*blockIdx.x + threadIdx.x);} +/*! \brief this class is a functor for "for_each" algorithm + * + * This class is a functor for "for_each" algorithm. For each + * element of the boost::vector the operator() is called. + * + */ +template +struct check_vector_dist_kernels +{ + //! op1 + const vector_dist_ker & o1; + //! op2 + const vector_dist_ker & o2; + + bool check; + + /*! \brief constructor + * + * \param src source encapsulated object + * \param dst source encapsulated object + * + */ + inline check_vector_dist_kernels(const vector_dist_ker & o1, const vector_dist_ker & o2) + :o1(o1),o2(o2),check(false) + {}; + + //! It call the copy function for each property + template + __device__ __host__ inline void operator()(T& t) + { + check &= o1.template getPointer() == o2.template getPointer(); + } +}; + template + typename prop, + template class layout_base = memory_traits_inte> class vector_dist_ker { //! Ghost marker, all the particle with id > g_m are ghost all with g_m < are real particle @@ -26,11 +61,11 @@ class vector_dist_ker //! Particle position vector, (It has 2 elements) the first has real particles assigned to a processor //! the second element contain unassigned particles - mutable openfpm::vector_gpu_ker,memory_traits_inte> v_pos; + mutable openfpm::vector_gpu_ker,layout_base> v_pos; //! Particle properties vector, (It has 2 elements) the first has real particles assigned to a processor //! the second element contain unassigned particles - mutable openfpm::vector_gpu_ker v_prp; + mutable openfpm::vector_gpu_ker::type,layout_base> v_prp; public: @@ -40,7 +75,11 @@ public: //! dimensions of space static const unsigned int dims = dim; - vector_dist_ker(int g_m, const openfpm::vector_gpu_ker,memory_traits_inte> & v_pos, const openfpm::vector_gpu_ker & v_prp) + //! tag the type as a vector that run on kernel + typedef int vector_kernel; + + vector_dist_ker(int g_m, const openfpm::vector_gpu_ker,layout_base> & v_pos, + const openfpm::vector_gpu_ker::type,layout_base> & v_prp) :g_m(g_m),v_pos(v_pos),v_prp(v_prp) {} @@ -49,14 +88,14 @@ public: * \return the number of particles * */ - __device__ int size_local() {return g_m;} + __device__ __host__ int size_local() const {return g_m;} /*! \brief return the number of particles * * \return the number of particles * */ - __device__ int size() {return v_pos.size();} + __device__ __host__ int size() const {return v_pos.size();} /*! \brief Get the position of an element * @@ -72,6 +111,20 @@ public: return v_pos.template get<0>(vec_key); } + /*! \brief Get the position of an element + * + * see the vector_dist iterator usage to get an element key + * + * \param vec_key element + * + * \return the position of the element in space + * + */ + __device__ inline auto getPos(const vect_dist_key_dx & vec_key) -> decltype(v_pos.template get<0>(vec_key.getKey())) + { + return v_pos.template get<0>(vec_key.getKey()); + } + /*! \brief Get the position of an element * * see the vector_dist iterator usage to get an element key @@ -86,6 +139,20 @@ public: return v_pos.template get<0>(vec_key); } + /*! \brief Get the position of an element + * + * see the vector_dist iterator usage to get an element key + * + * \param vec_key element + * + * \return the position of the element in space + * + */ + __device__ inline auto getPos(const vect_dist_key_dx & vec_key) const -> decltype(v_pos.template get<0>(vec_key.getKey())) + { + return v_pos.template get<0>(vec_key.getKey()); + } + /*! \brief Get the property of an element * * see the vector_dist iterator usage to get an element key @@ -101,6 +168,21 @@ public: return v_prp.template get(vec_key); } + /*! \brief Get the property of an element + * + * see the vector_dist iterator usage to get an element key + * + * \tparam id property id + * \param vec_key vector element + * + * \return return the selected property of the vector element + * + */ + template __device__ inline auto getProp(const vect_dist_key_dx & vec_key) -> decltype(v_prp.template get(vec_key.getKey())) + { + return v_prp.template get(vec_key.getKey()); + } + /*! \brief Get the property of an element * * see the vector_dist iterator usage to get an element key @@ -116,6 +198,21 @@ public: return v_prp.template get(vec_key); } + /*! \brief Get the property of an element + * + * see the vector_dist iterator usage to get an element key + * + * \tparam id property id + * \param vec_key vector element + * + * \return return the selected property of the vector element + * + */ + template __device__ inline auto getProp(const vect_dist_key_dx & vec_key) const -> decltype(v_prp.template get(vec_key.getKey())) + { + return v_prp.template get(vec_key.getKey()); + } + /*! \brief Return the internal position vector * * @@ -137,6 +234,46 @@ public: return v_prp; } + __host__ vector_dist_iterator getDomainIterator() const + { + std::cout << __FILE__ << ":" << __LINE__ << " error getDomainIterator used on a vector_dist_ker object is not allowed" << std::endl; + + return vector_dist_iterator(0,0); + } + + /*! \brief Get an iterator that traverse the particles in the domain + * + * \return an iterator + * + */ + __host__ ite_gpu<1> getDomainIteratorGPU(size_t n_thr = 1024) const + { +#ifdef SE_CLASS3 + se3.getIterator(); +#endif + + return v_pos.getGPUIteratorTo(g_m,n_thr); + } + + /*! \brief Check that the two structures are the same (at level of pointers) + * + * + * \return + */ + __host__ bool operator==(const vector_dist_ker & v) + { + if (v.v_pos.template getPointer<0>() != v_pos.template getPointer<0>()) + {return false;} + + check_vector_dist_kernels> cv(this->v_prp,v.v_prp); + + cv.check = true; + + // Do the same for the properties + boost::mpl::for_each_ref< boost::mpl::range_c >(cv); + + return cv.check; + } }; // This is a tranformation node for vector_distributed for the algorithm toKernel_tranform @@ -145,7 +282,7 @@ struct toKernel_transform { typedef typename apply_transform::type aggr; - typedef vector_dist_ker type; + typedef vector_dist_ker type; }; #endif diff --git a/src/Vector/vector_dist_key.hpp b/src/Vector/vector_dist_key.hpp index 821a2920..1717ae61 100644 --- a/src/Vector/vector_dist_key.hpp +++ b/src/Vector/vector_dist_key.hpp @@ -29,7 +29,7 @@ public: * \param key the local key * */ - inline void setKey(size_t key) + __device__ __host__ inline void setKey(size_t key) { this->key = key; } @@ -39,7 +39,7 @@ public: * \return the local key * */ - inline size_t getKey() const + __device__ __host__ inline size_t getKey() const { return key; } @@ -65,13 +65,13 @@ public: }*/ //! Default constructor - inline vect_dist_key_dx() + __device__ __host__ inline vect_dist_key_dx() { /* coverity[uninit_member] */ } //! Default constructor - inline vect_dist_key_dx(size_t key) + __device__ __host__ inline vect_dist_key_dx(size_t key) :key(key) { } diff --git a/src/lib/pdata.cpp b/src/lib/pdata.cpp index 6dc8609e..7819fedd 100644 --- a/src/lib/pdata.cpp +++ b/src/lib/pdata.cpp @@ -6,10 +6,10 @@ */ #include "pdata.hpp" #include "SubdomainGraphNodes.hpp" +#include "memory/CudaMemory.cuh" const std::string nm_v::attributes::name[] = {"x","migration","computation","global_id","id","sub_id","proc_id","id","fake_v"}; const std::string nm_e::attributes::name[] = {"communication","srcgid","dstgid"}; const std::string nm_part_v::attributes::name[] = {"id","sub_id"}; const std::string nm_part_e::attributes::name[] = {"id"}; - diff --git a/src/lib/pdata.hpp b/src/lib/pdata.hpp index 968f9d4f..919f9d64 100644 --- a/src/lib/pdata.hpp +++ b/src/lib/pdata.hpp @@ -1,2 +1,10 @@ #include "config.h" +#ifndef PDATA_HPP_ +#define PDATA_HPP_ + +constexpr int comp_host = 1; +constexpr int comp_dev = 2; + + +#endif -- GitLab