From c1c3e0c4e8586bf5061dbabbfa48dd0c2dc86960 Mon Sep 17 00:00:00 2001 From: Serhii Yaskovets <yaskovet@mpi-cbg.de> Date: Tue, 11 Apr 2023 14:54:52 +0200 Subject: [PATCH] Revert "Merge remote-tracking branch 'origin/FD_solver'" This reverts commit 94125e792a7edc2d3927453d6563a66342ed5ebc, reversing changes made to f61deb2e21ac22abfb96d75e6874a8842bbb38e8. --- src/Grid/grid_base_implementation.hpp | 2 +- src/Grid/grid_key.hpp | 2 +- src/Grid/grid_sm.hpp | 27 ++---- src/NN/CellList/CellList_gpu_test.cu | 30 +++--- .../CellList/cuda/CellDecomposer_gpu_ker.cuh | 95 ++----------------- src/NN/CellList/cuda/CellList_gpu.hpp | 89 ++++++++--------- src/NN/CellList/cuda/CellList_gpu_ker.cuh | 29 +++--- .../cuda/Cuda_cell_list_util_func.hpp | 60 ++++++------ src/Space/Shape/Point.hpp | 2 +- .../performance/performancePlots.cpp | 6 +- src/Vector/cuda/map_vector_cuda_ker.cuh | 80 +--------------- src/Vector/cuda/map_vector_std_cuda.hpp | 15 +-- src/Vector/cuda/map_vector_std_cuda_ker.cuh | 21 ---- src/Vector/map_vector.hpp | 43 ++------- src/Vector/map_vector_std.hpp | 10 -- src/Vector/vector_subset.hpp | 67 ------------- src/util/mathutil.hpp | 35 +++---- 17 files changed, 140 insertions(+), 473 deletions(-) delete mode 100644 src/Vector/vector_subset.hpp diff --git a/src/Grid/grid_base_implementation.hpp b/src/Grid/grid_base_implementation.hpp index 7a1a6693..60392952 100644 --- a/src/Grid/grid_base_implementation.hpp +++ b/src/Grid/grid_base_implementation.hpp @@ -752,7 +752,7 @@ public: { auto key = it.get(); - if (this->get_o(key) != g.get_o(key)) + if (this->get_o(key) != this->get_o(key)) return false; ++it; diff --git a/src/Grid/grid_key.hpp b/src/Grid/grid_key.hpp index 90fab130..106f7ee5 100644 --- a/src/Grid/grid_key.hpp +++ b/src/Grid/grid_key.hpp @@ -397,7 +397,7 @@ public: * */ template<typename a, typename ...T> - __device__ __host__ inline void set(a v, T...t) + inline void set(a v, T...t) { #ifdef SE_CLASS1 if (sizeof...(t) != dim -1) diff --git a/src/Grid/grid_sm.hpp b/src/Grid/grid_sm.hpp index 651608b2..e4511d29 100644 --- a/src/Grid/grid_sm.hpp +++ b/src/Grid/grid_sm.hpp @@ -186,7 +186,7 @@ class grid_sm * */ - __device__ __host__ inline void Initialize(const size_t sz) + inline void Initialize(const size_t sz) { //! Initialize the basic structure for each dimension sz_s[0] = sz; @@ -217,7 +217,7 @@ class grid_sm * */ - __device__ __host__ inline void Initialize(const size_t (& sz)[N]) + inline void Initialize(const size_t (& sz)[N]) { //! Initialize the basic structure for each dimension sz_s[0] = sz[0]; @@ -353,8 +353,7 @@ public: * */ - template<typename S> - __device__ __host__ inline grid_sm(const grid_sm<N,S> & g) + template<typename S> inline grid_sm(const grid_sm<N,S> & g) { size_tot = g.size_tot; @@ -381,7 +380,7 @@ public: // Static element to calculate total size - __device__ __host__ inline size_t totalSize(const size_t (& sz)[N]) + inline size_t totalSize(const size_t (& sz)[N]) { size_t tSz = 1; @@ -416,7 +415,7 @@ public: * */ - __device__ __host__ inline grid_sm(const size_t (& sz)[N]) + inline grid_sm(const size_t (& sz)[N]) : size_tot(totalSize(sz)) { Initialize(sz); @@ -626,7 +625,7 @@ public: } //! Destructor - __device__ __host__ ~grid_sm() {}; + ~grid_sm() {}; /*! \brief Return the size of the grid * @@ -714,7 +713,7 @@ public: * */ - __device__ __host__ inline size_t size_s(unsigned int i) const + inline size_t size_s(unsigned int i) const { return sz_s[i]; } @@ -738,21 +737,11 @@ public: * \return get the size of the grid as an array * */ - __device__ __host__ inline const size_t (& getSize() const)[N] + inline const size_t (& getSize() const)[N] { return sz; } - /*! \brief Returns the size of the grid in the passed array - * - * \return the size of the grid in the passed array - * - */ - __device__ __host__ inline void getSize(size_t (&size) [N]) const - { - for (size_t i = 0; i < N; ++i) size[i] = sz[i]; - } - /*! \brief Return a sub-grid iterator * * Return a sub-grid iterator, to iterate through the grid diff --git a/src/NN/CellList/CellList_gpu_test.cu b/src/NN/CellList/CellList_gpu_test.cu index 2d19dd2a..55ad9a05 100644 --- a/src/NN/CellList/CellList_gpu_test.cu +++ b/src/NN/CellList/CellList_gpu_test.cu @@ -83,16 +83,17 @@ void test_sub_index() no_transform_only<dim,T> t(mt,pt); - CUDA_LAUNCH_DIM3((subindex<false,dim,T,cnt_type,ids_type,no_transform_only<dim,T>>),ite.wthr,ite.thr,div, spacing, off, t, + pl.capacity(), pl.size(), + part_ids.capacity(), 0, - pl.toKernel(), - cl_n.toKernel(), - part_ids.toKernel()); + static_cast<T *>(pl.template getDeviceBuffer<0>()), + static_cast<cnt_type *>(cl_n.template getDeviceBuffer<0>()), + static_cast<cnt_type *>(part_ids.template getDeviceBuffer<0>())); cl_n.template deviceToHost<0>(); part_ids.template deviceToHost<0>(); @@ -202,16 +203,17 @@ void test_sub_index2() shift_only<dim,T> t(mt,pt); - CUDA_LAUNCH_DIM3((subindex<false,dim,T,cnt_type,ids_type,shift_only<dim,T>>),ite.wthr,ite.thr,div, spacing, off, t, + pl.capacity(), pl.size(), + part_ids.capacity(), 0, - pl.toKernel(), - cl_n.toKernel(), - part_ids.toKernel()); + static_cast<T *>(pl.template getDeviceBuffer<0>()), + static_cast<cnt_type *>(cl_n.template getDeviceBuffer<0>()), + static_cast<cnt_type *>(part_ids.template getDeviceBuffer<0>())); cl_n.template deviceToHost<0>(); part_ids.template deviceToHost<0>(); @@ -385,10 +387,11 @@ void test_fill_cell() div_c, off, part_ids.size(), + part_ids.capacity(), 0, - starts.toKernel(), - part_ids.toKernel(), - cells.toKernel() ); + static_cast<cnt_type *>(starts.template getDeviceBuffer<0>()), + static_cast<cnt_type *>(part_ids.template getDeviceBuffer<0>()), + static_cast<cnt_type *>(cells.template getDeviceBuffer<0>()) ); cells.template deviceToHost<0>(); @@ -663,8 +666,7 @@ void test_reorder_parts(size_t n_part) // Here we test fill cell CUDA_LAUNCH_DIM3((reorder_parts<decltype(parts_prp.toKernel()), decltype(pl.toKernel()), - decltype(sort_to_not_sort.toKernel()), - decltype(cells_out.toKernel()), + decltype(sort_to_not_sort.toKernel()), cnt_type, shift_ph<0,cnt_type>>),ite.wthr,ite.thr,pl.size(), parts_prp.toKernel(), @@ -673,7 +675,7 @@ void test_reorder_parts(size_t n_part) pl_out.toKernel(), sort_to_not_sort.toKernel(), non_sort_to_sort.toKernel(), - cells_out.toKernel()); + static_cast<cnt_type *>(cells_out.template getDeviceBuffer<0>())); bool check = true; parts_prp_out.template deviceToHost<0>(); diff --git a/src/NN/CellList/cuda/CellDecomposer_gpu_ker.cuh b/src/NN/CellList/cuda/CellDecomposer_gpu_ker.cuh index 3981ce07..fc108d88 100644 --- a/src/NN/CellList/cuda/CellDecomposer_gpu_ker.cuh +++ b/src/NN/CellList/cuda/CellDecomposer_gpu_ker.cuh @@ -29,69 +29,28 @@ class CellDecomposer_gpu_ker //! transformation transform t; - //! Unit box of the Cell list - SpaceBox<dim,T> box_unit; - - //! Grid structure of the Cell list - grid_sm<dim,void> gr_cell; - - //! cell_shift - Point<dim,long int> cell_shift; public: __device__ __host__ CellDecomposer_gpu_ker() {} - __device__ __host__ CellDecomposer_gpu_ker( - openfpm::array<T,dim,cnt_type> & spacing_c, - openfpm::array<ids_type,dim,cnt_type> & div_c, - openfpm::array<ids_type,dim,cnt_type> & off, - const transform & t) - : spacing_c(spacing_c),div_c(div_c),off(off),t(t) + __device__ __host__ CellDecomposer_gpu_ker(openfpm::array<T,dim,cnt_type> & spacing_c, + openfpm::array<ids_type,dim,cnt_type> & div_c, + openfpm::array<ids_type,dim,cnt_type> & off, + const transform & t) + :spacing_c(spacing_c),div_c(div_c),off(off),t(t) {} - __device__ __host__ CellDecomposer_gpu_ker( - openfpm::array<T,dim,cnt_type> & spacing_c, - openfpm::array<ids_type,dim,cnt_type> & div_c, - openfpm::array<ids_type,dim,cnt_type> & off, - const transform & t, - SpaceBox<dim,T> box_unit, - grid_sm<dim,void> gr_cell, - Point<dim,long int> cell_shift) - : spacing_c(spacing_c),div_c(div_c),off(off),t(t), - box_unit(box_unit), gr_cell(gr_cell), cell_shift(cell_shift) - {} - - __device__ __host__ grid_sm<dim,void> getGrid() + __host__ grid_sm<dim,void> getGrid() { size_t sz[dim]; - for (size_t i = 0 ; i < dim ; i++) - { - sz[i] = div_c[i]; - } - - return grid_sm<dim,void> (sz); - } - - __device__ __host__ void getGridSize(size_t (& sz)[dim]) const - { for (size_t i = 0 ; i < dim ; i++) { sz[i] = div_c[i] + 2*off[i]; } - } - template<typename ids_type2> - __device__ __host__ mem_id getGridLinId(const grid_key_dx<dim,ids_type2> & gk) const - { - mem_id lid = gk.get(0); - for (mem_id i = 1 ; i < dim ; i++) - { - lid += gk.get(i) * (div_c[i-1] + 2*off[i-1]); - } - - return lid; + return grid_sm<dim,void> (sz); } __device__ __host__ inline grid_key_dx<dim,ids_type> getCell(const Point<dim,T> & xp) const @@ -123,46 +82,6 @@ public: { return t; } - - __device__ __host__ inline grid_key_dx<dim> getCellGrid(const T (& pos)[dim]) const - { - grid_key_dx<dim> key; - key.set_d(0,ConvertToID(pos,0)); - - for (size_t s = 1 ; s < dim ; s++) - { - key.set_d(s,ConvertToID(pos,s)); - } - - return key; - } - - __device__ __host__ inline grid_key_dx<dim> getCellGrid(const Point<dim,T> & pos) const - { - grid_key_dx<dim> key; - key.set_d(0,ConvertToID(pos,0)); - - for (size_t s = 1 ; s < dim ; s++) - { - key.set_d(s,ConvertToID(pos,s)); - } - - return key; - } - - __device__ __host__ inline size_t ConvertToID(const T (&x)[dim] ,size_t s) const - { - size_t id = openfpm::math::size_t_floor(t.transform(x,s) / box_unit.getHigh(s)) + off[s]; - id = (id >= gr_cell.size(s))?(gr_cell.size(s)-1-cell_shift.get(s)):id-cell_shift.get(s); - return id; - } - - __device__ __host__ inline size_t ConvertToID(const Point<dim,T> & x ,size_t s, size_t sc = 0) const - { - size_t id = openfpm::math::size_t_floor(t.transform(x,s) / box_unit.getHigh(s)) + off[s]; - id = (id >= gr_cell.size(s))?(gr_cell.size(s)-1-cell_shift.get(s)):id-cell_shift.get(s); - return id; - } }; #endif /* CELLDECOMPOSER_GPU_KER_HPP_ */ diff --git a/src/NN/CellList/cuda/CellList_gpu.hpp b/src/NN/CellList/cuda/CellList_gpu.hpp index 5ee0e2b0..c7623583 100644 --- a/src/NN/CellList/cuda/CellList_gpu.hpp +++ b/src/NN/CellList/cuda/CellList_gpu.hpp @@ -44,10 +44,7 @@ struct CellList_gpu_ker_selector openfpm::array<ids_type,dim,cnt_type> & div_c, openfpm::array<ids_type,dim,cnt_type> & off, const transform & t, - unsigned int g_m, - const SpaceBox<dim,T>& box_unit, - const grid_sm<dim,void>& gr_cell, - const Point<dim,long int>& cell_shift) + unsigned int g_m) { return CellList_gpu_ker<dim,T,cnt_type,ids_type,transform,is_sparse>(starts.toKernel(), sorted_to_not_sorted.toKernel(), @@ -57,10 +54,7 @@ struct CellList_gpu_ker_selector div_c, off, t, - g_m, - box_unit, - gr_cell, - cell_shift); + g_m); } }; @@ -79,14 +73,10 @@ struct CellList_gpu_ker_selector<dim,T,cnt_type,ids_type,Memory,transform,vector vector_cnt_type & dprt, openfpm::vector<aggregate<int>,Memory,memory_traits_inte> & nnc_rad, openfpm::array<T,dim,cnt_type> & spacing_c, - openfpm::array<ids_type,dim,cnt_type> & div_c, - openfpm::array<ids_type,dim,cnt_type> & off, - const transform & t, - unsigned int g_m, - const SpaceBox<dim,T>& box_unit, - const grid_sm<dim,void>& gr_cell, - const Point<dim,long int>& cell_shift) - + openfpm::array<ids_type,dim,cnt_type> & div_c, + openfpm::array<ids_type,dim,cnt_type> & off, + const transform & t, + unsigned int g_m) { return CellList_gpu_ker<dim,T,cnt_type,ids_type,transform,true>(cell_nn.toKernel(), cell_nn_list.toKernel(), @@ -96,11 +86,7 @@ struct CellList_gpu_ker_selector<dim,T,cnt_type,ids_type,Memory,transform,vector spacing_c, div_c, off, - t, - g_m, - box_unit, - gr_cell, - cell_shift); + t,g_m); } }; @@ -261,11 +247,13 @@ class CellList_gpu : public CellDecomposer_sm<dim,T,transform> spacing_c, off, this->getTransform(), + pl.capacity(), pl.size(), + part_ids.capacity(), start, - pl.toKernel(), - starts.toKernel(), - part_ids.toKernel()); + static_cast<T *>(pl.template getDeviceBuffer<0>()), + static_cast<cnt_type *>(starts.template getDeviceBuffer<0>()), + static_cast<cnt_type *>(part_ids.template getDeviceBuffer<0>())); // now we construct the cells @@ -304,7 +292,6 @@ class CellList_gpu : public CellDecomposer_sm<dim,T,transform> CUDA_LAUNCH((reorder_parts<decltype(pl_prp.toKernel()), decltype(pl.toKernel()), decltype(sorted_to_not_sorted.toKernel()), - decltype(cells.toKernel()), cnt_type,shift_ph<0,cnt_type>>),ite,sorted_to_not_sorted.size(), pl_prp.toKernel(), pl_prp_out.toKernel(), @@ -312,7 +299,7 @@ class CellList_gpu : public CellDecomposer_sm<dim,T,transform> pl_out.toKernel(), sorted_to_not_sorted.toKernel(), non_sorted_to_sorted.toKernel(), - cells.toKernel()); + static_cast<cnt_type *>(cells.template getDeviceBuffer<0>())); if (opt == cl_construct_opt::Full) { @@ -390,11 +377,13 @@ class CellList_gpu : public CellDecomposer_sm<dim,T,transform> spacing_c, off, this->getTransform(), + pl.capacity(), stop, + part_ids.capacity(), start, - pl.toKernel(), - cl_n.toKernel(), - part_ids.toKernel()); + static_cast<T *>(pl.template getDeviceBuffer<0>()), + static_cast<cnt_type *>(cl_n.template getDeviceBuffer<0>()), + static_cast<cnt_type *>(part_ids.template getDeviceBuffer<0>())); // now we scan starts.resize(cl_n.size()); @@ -410,7 +399,7 @@ class CellList_gpu : public CellDecomposer_sm<dim,T,transform> CUDA_LAUNCH((fill_cells<dim,cnt_type,ids_type,shift_ph<0,cnt_type>>),itgg,0, part_ids.size(), - cells.toKernel()) ); + static_cast<cnt_type *>(cells.template getDeviceBuffer<0>()) ); // sort @@ -419,13 +408,14 @@ class CellList_gpu : public CellDecomposer_sm<dim,T,transform> #else CUDA_LAUNCH((fill_cells<dim,cnt_type,ids_type,shift_ph<0,cnt_type>>),itgg,0, - div_c, - off, - part_ids.size(), - start, - starts.toKernel(), - part_ids.toKernel(), - cells.toKernel() ); + div_c, + off, + part_ids.size(), + part_ids.capacity(), + start, + static_cast<cnt_type *>(starts.template getDeviceBuffer<0>()), + static_cast<cnt_type *>(part_ids.template getDeviceBuffer<0>()), + static_cast<cnt_type *>(cells.template getDeviceBuffer<0>()) ); #endif @@ -441,7 +431,6 @@ class CellList_gpu : public CellDecomposer_sm<dim,T,transform> CUDA_LAUNCH((reorder_parts<decltype(pl_prp.toKernel()), decltype(pl.toKernel()), decltype(sorted_to_not_sorted.toKernel()), - decltype(cells.toKernel()), cnt_type,shift_ph<0,cnt_type>>),ite,sorted_to_not_sorted.size(), pl_prp.toKernel(), pl_prp_out.toKernel(), @@ -449,7 +438,7 @@ class CellList_gpu : public CellDecomposer_sm<dim,T,transform> pl_out.toKernel(), sorted_to_not_sorted.toKernel(), non_sorted_to_sorted.toKernel(), - cells.toKernel()); + static_cast<cnt_type *>(cells.template getDeviceBuffer<0>())); } else { @@ -457,7 +446,6 @@ class CellList_gpu : public CellDecomposer_sm<dim,T,transform> CUDA_LAUNCH((reorder_parts_wprp<decltype(pl_prp.toKernel()), decltype(pl.toKernel()), decltype(sorted_to_not_sorted.toKernel()), - decltype(cells.toKernel()), cnt_type,shift_ph<0,cnt_type>,prp...>),ite,sorted_to_not_sorted.size(), pl_prp.toKernel(), pl_prp_out.toKernel(), @@ -465,7 +453,7 @@ class CellList_gpu : public CellDecomposer_sm<dim,T,transform> pl_out.toKernel(), sorted_to_not_sorted.toKernel(), non_sorted_to_sorted.toKernel(), - cells.toKernel()); + static_cast<cnt_type *>(cells.template getDeviceBuffer<0>())); } if (opt == cl_construct_opt::Full) @@ -660,17 +648,14 @@ public: cells_nn, cells_nn_list, cl_sparse, - sorted_to_not_sorted, - sorted_domain_particles_ids, - nnc_rad, - spacing_c, - div_c, - off, - this->getTransform(), - g_m, - this->box_unit, - this->gr_cell, - this->cell_shift); + sorted_to_not_sorted, + sorted_domain_particles_ids, + nnc_rad, + spacing_c, + div_c, + off, + this->getTransform(), + g_m); } /*! \brief Clear the structure diff --git a/src/NN/CellList/cuda/CellList_gpu_ker.cuh b/src/NN/CellList/cuda/CellList_gpu_ker.cuh index becb50c5..004716a7 100644 --- a/src/NN/CellList/cuda/CellList_gpu_ker.cuh +++ b/src/NN/CellList/cuda/CellList_gpu_ker.cuh @@ -449,14 +449,11 @@ public: openfpm::vector_gpu_ker<aggregate<cnt_type>,memory_traits_inte> dprt, openfpm::vector_gpu_ker<aggregate<int>,memory_traits_inte> rad_cells, openfpm::array<T,dim,cnt_type> & spacing_c, - openfpm::array<ids_type,dim,cnt_type> & div_c, - openfpm::array<ids_type,dim,cnt_type> & off, - const transform & t, - unsigned int g_m, - SpaceBox<dim,T> box_unit, - grid_sm<dim,void> gr_cell, - Point<dim,long int> cell_shift) - :CellDecomposer_gpu_ker<dim,T,cnt_type,ids_type,transform>(spacing_c,div_c,off,t,box_unit,gr_cell,cell_shift), + openfpm::array<ids_type,dim,cnt_type> & div_c, + openfpm::array<ids_type,dim,cnt_type> & off, + const transform & t, + unsigned int g_m) + :CellDecomposer_gpu_ker<dim,T,cnt_type,ids_type,transform>(spacing_c,div_c,off,t), starts(starts),srt(srt),dprt(dprt),rad_cells(rad_cells),g_m(g_m) { } @@ -619,16 +616,12 @@ public: openfpm::vector_gpu_ker<aggregate<cnt_type>,memory_traits_inte> srt, openfpm::vector_gpu_ker<aggregate<cnt_type>,memory_traits_inte> dprt, openfpm::array<T,dim,cnt_type> & spacing_c, - openfpm::array<ids_type,dim,cnt_type> & div_c, - openfpm::array<ids_type,dim,cnt_type> & off, - const transform & t, - unsigned int g_m, - SpaceBox<dim,T> box_unit, - grid_sm<dim,void> gr_cell, - Point<dim,long int> cell_shift) - - :CellDecomposer_gpu_ker<dim,T,cnt_type,ids_type,transform>(spacing_c,div_c,off,t,box_unit,gr_cell,cell_shift), - cell_nn(cell_nn),cell_nn_list(cell_nn_list),srt(srt),dprt(dprt),cl_sparse(cl_sparse),g_m(g_m) + openfpm::array<ids_type,dim,cnt_type> & div_c, + openfpm::array<ids_type,dim,cnt_type> & off, + const transform & t, + unsigned int g_m) + :CellDecomposer_gpu_ker<dim,T,cnt_type,ids_type,transform>(spacing_c,div_c,off,t),cell_nn(cell_nn),cell_nn_list(cell_nn_list),srt(srt),dprt(dprt), + cl_sparse(cl_sparse),g_m(g_m) { } diff --git a/src/NN/CellList/cuda/Cuda_cell_list_util_func.hpp b/src/NN/CellList/cuda/Cuda_cell_list_util_func.hpp index e50335e7..0d7c86a9 100644 --- a/src/NN/CellList/cuda/Cuda_cell_list_util_func.hpp +++ b/src/NN/CellList/cuda/Cuda_cell_list_util_func.hpp @@ -258,17 +258,18 @@ __device__ __host__ cnt_type encode_phase_id(cnt_type ph_id,cnt_type pid) #ifdef __NVCC__ template<bool is_sparse,unsigned int dim, typename pos_type, - typename cnt_type, typename ids_type, typename transform, - typename vector_pos_type, typename vector_cnt_type, typename vector_pids_type> + typename cnt_type, typename ids_type, typename transform> __global__ void subindex(openfpm::array<ids_type,dim,cnt_type> div, openfpm::array<pos_type,dim,cnt_type> spacing, openfpm::array<ids_type,dim,cnt_type> off, transform t, + int n_cap, int n_part, + int n_cap2, cnt_type start, - vector_pos_type p_pos, - vector_cnt_type counts, - vector_pids_type p_ids) + pos_type * p_pos, + cnt_type *counts, + cnt_type * p_ids) { cnt_type i, cid, ins; ids_type e[dim+1]; @@ -280,23 +281,23 @@ __global__ void subindex(openfpm::array<ids_type,dim,cnt_type> div, pos_type p[dim]; for (size_t k = 0 ; k < dim ; k++) - {p[k] = p_pos.template get<0>(i)[k];} + {p[k] = p_pos[i+k*n_cap];} cid = cid_<dim,cnt_type,ids_type,transform>::get_cid(div,spacing,off,t,p,e); if (is_sparse == false) { - e[dim] = atomicAdd(&counts.template get<0>(cid), 1); + e[dim] = atomicAdd(counts + cid, 1); - p_ids.template get<0>(ins)[0] = cid; - {p_ids.template get<0>(ins)[1] = e[dim];} + p_ids[ins] = cid; + {p_ids[ins+1*(n_cap2)] = e[dim];} } else { - p_ids.template get<0>(ins)[0] = cid; - {p_ids.template get<0>(ins)[1] = e[dim];} + p_ids[ins] = cid; + {p_ids[ins+1*(n_cap2)] = e[dim];} - counts.template get<0>(ins) = cid; + counts[ins] = cid; } } @@ -352,42 +353,43 @@ __global__ void subindex_without_count(openfpm::array<ids_type,dim,cnt_type> div #ifdef MAKE_CELLLIST_DETERMINISTIC -template<unsigned int dim, typename cnt_type, typename ids_type, typename ph, typename vector_cells_type> +template<unsigned int dim, typename cnt_type, typename ids_type, typename ph> __global__ void fill_cells(cnt_type phase_id , cnt_type n, - vector_cells_type cells) + cnt_type *cells) { cnt_type i; i = threadIdx.x + blockIdx.x * blockDim.x; if (i >= n) return; - cells.template get<0>(i) = encode_phase_id<cnt_type,ph>(phase_id,i); + cells[i] = encode_phase_id<cnt_type,ph>(phase_id,i); } #else -template<unsigned int dim, typename cnt_type, typename ids_type, typename ph,typename vector_starts_type, typename vector_pids_type, typename vector_cells_type> +template<unsigned int dim, typename cnt_type, typename ids_type, typename ph> __global__ void fill_cells(cnt_type phase_id , openfpm::array<ids_type,dim,cnt_type> div_c, openfpm::array<ids_type,dim,cnt_type> off, cnt_type n, + cnt_type n_cap, cnt_type start_p, - vector_starts_type starts, - vector_pids_type p_ids, - vector_cells_type cells) + const cnt_type *starts, + const cnt_type * p_ids, + cnt_type *cells) { cnt_type i, cid, id, start; i = threadIdx.x + blockIdx.x * blockDim.x; if (i >= n) return; - cid = p_ids.template get<0>(i)[0]; + cid = p_ids[i]; - start = starts.template get<0>(cid); - id = start + p_ids.template get<0>(i)[1]; + start = starts[cid]; + id = start + p_ids[1*n_cap+i]; - cells.template get<0>(id) = encode_phase_id<cnt_type,ph>(phase_id,i + start_p); + cells[id] = encode_phase_id<cnt_type,ph>(phase_id,i + start_p); } #endif @@ -422,7 +424,7 @@ __device__ inline void reorder_wprp(const vector_prp & input, output.template set<prp ...>(dst_id,input,src_id); } -template <typename vector_prp, typename vector_pos, typename vector_ns, typename vector_cells_type, typename cnt_type, typename sh> +template <typename vector_prp, typename vector_pos, typename vector_ns, typename cnt_type, typename sh> __global__ void reorder_parts(int n, const vector_prp input, vector_prp output, @@ -430,12 +432,12 @@ __global__ void reorder_parts(int n, vector_pos output_pos, vector_ns sorted_non_sorted, vector_ns non_sorted_to_sorted, - const vector_cells_type cells) + const cnt_type * cells) { cnt_type i = threadIdx.x + blockIdx.x * blockDim.x; if (i >= n) return; - cnt_type code = cells.template get<0>(i); + cnt_type code = cells[i]; reorder(input, output, code,i); reorder(input_pos,output_pos,code,i); @@ -444,7 +446,7 @@ __global__ void reorder_parts(int n, non_sorted_to_sorted.template get<0>(code) = i; } -template <typename vector_prp, typename vector_pos, typename vector_ns, typename vector_cells_type, typename cnt_type, typename sh, unsigned int ... prp> +template <typename vector_prp, typename vector_pos, typename vector_ns, typename cnt_type, typename sh, unsigned int ... prp> __global__ void reorder_parts_wprp(int n, const vector_prp input, vector_prp output, @@ -452,12 +454,12 @@ __global__ void reorder_parts_wprp(int n, vector_pos output_pos, vector_ns sorted_non_sorted, vector_ns non_sorted_to_sorted, - const vector_cells_type cells) + const cnt_type * cells) { cnt_type i = threadIdx.x + blockIdx.x * blockDim.x; if (i >= n) return; - cnt_type code = cells.template get<0>(i); + cnt_type code = cells[i]; reorder_wprp<vector_prp,cnt_type,prp...>(input, output, code,i); reorder(input_pos,output_pos,code,i); diff --git a/src/Space/Shape/Point.hpp b/src/Space/Shape/Point.hpp index 6e49d0e9..91f62252 100644 --- a/src/Space/Shape/Point.hpp +++ b/src/Space/Shape/Point.hpp @@ -416,7 +416,7 @@ template<unsigned int dim ,typename T> class Point * \return the reference * */ - __device__ __host__ T & value(size_t i) + T & value(size_t i) { return get(i); } diff --git a/src/SparseGridGpu/performance/performancePlots.cpp b/src/SparseGridGpu/performance/performancePlots.cpp index 3cd3b0e4..01a70924 100644 --- a/src/SparseGridGpu/performance/performancePlots.cpp +++ b/src/SparseGridGpu/performance/performancePlots.cpp @@ -45,8 +45,6 @@ void write_test_report(report_sparse_grid_tests &report_sparsegrid_funcs, std::s plotGetSingle2D(report_sparsegrid_funcs, testSet, plotCounter); plotGetNeighbourhood2D(report_sparsegrid_funcs, testSet, plotCounter); - - std::string file_xml_results(test_dir); file_xml_results += std::string("/") + std::string(perfResultsXmlFile); @@ -312,7 +310,7 @@ void plotDense3D(report_sparse_grid_tests &report_sparsegrid_funcs, std::set<std base + ".gridScaling(#).GFlops.mean"); report_sparsegrid_funcs.graphs.add("graphs.graph(" + std::to_string(plotCounter) + ").x.data(0).source", base + ".gridScaling(#).gridSize.x"); - report_sparsegrid_funcs.graphs.add("graphs.graph(" + std::to_string(plotCounter) + ").options.log_x", true); + report_sparsegrid_funcs.graphs.add("graphs.graph(" + std::to_string(plotCounter) + ").options.log_x", true); int bes = static_cast<int>( report_sparsegrid_funcs.graphs.template get<double>( base + ".gridScaling(0).blockSize")); report_sparsegrid_funcs.graphs.add("graphs.graph(" + std::to_string(plotCounter) + ").y.data(0).title", @@ -332,7 +330,7 @@ void plotDense3D(report_sparse_grid_tests &report_sparsegrid_funcs, std::set<std base + ".blockScaling(#).GFlops.mean"); report_sparsegrid_funcs.graphs.add("graphs.graph(" + std::to_string(plotCounter) + ").x.data(0).source", base + ".blockScaling(#).blockSize"); - report_sparsegrid_funcs.graphs.add("graphs.graph(" + std::to_string(plotCounter) + ").options.log_x",true); + report_sparsegrid_funcs.graphs.add("graphs.graph(" + std::to_string(plotCounter) + ").options.log_x",true); int ges = static_cast<int>( report_sparsegrid_funcs.graphs.template get<double>( base + ".blockScaling(0).gridSize.x")); report_sparsegrid_funcs.graphs.add("graphs.graph(" + std::to_string(plotCounter) + ").y.data(0).title", diff --git a/src/Vector/cuda/map_vector_cuda_ker.cuh b/src/Vector/cuda/map_vector_cuda_ker.cuh index fef74515..ea202a7f 100644 --- a/src/Vector/cuda/map_vector_cuda_ker.cuh +++ b/src/Vector/cuda/map_vector_cuda_ker.cuh @@ -172,11 +172,6 @@ namespace openfpm return v_size; } - __host__ __device__ size_t size_local() const - { - return size(); - } - /*! \brief return the maximum capacity of the vector before reallocation * * \return the capacity of the vector @@ -209,38 +204,6 @@ namespace openfpm return base.template get<p>(key); } - /*! \brief Get an element of the vector - * - * Get an element of the vector - * - * \tparam p Property to get - * \param id Element to get - * - * \return the element value requested - * - */ - template <unsigned int p> - __device__ __host__ inline auto getProp(unsigned int id) const -> decltype(base.template get<p>(grid_key_dx<1>(0))) - { - return this->get<p>(id); - } - - - /*! \brief Get an element of the vector - * - * Get an element of the vector - * - * \tparam p Property to get - * \param id Element to get - * - * \return the element value requested - * - */ - template <unsigned int p, typename key_type> - __device__ __host__ inline auto getProp(key_type id) const -> decltype(base.template get<p>(grid_key_dx<1>(0))) - { - return this->get<p>(id.getKey()); - } /*! \brief Get an element of the vector * @@ -525,22 +488,7 @@ namespace openfpm return base.getGPUIterator(start,stop,n_thr); } - /*! \brief Get a domain iterator for the GPU - * - * - */ - ite_gpu<1> getDomainIteratorGPU(size_t n_thr = default_kernel_wg_threads_) const - { - return getGPUIterator(n_thr); - } - - //Stub for some expression - void init() const {} - - __host__ __device__ auto value(unsigned int p) -> decltype(base.template get<0>(grid_key_dx<1>(0))) - { - return get<0>(p); - } + /*! \brief Get an iterator for the GPU * * @@ -553,7 +501,6 @@ namespace openfpm return base.getGPUIterator(start,stop_,n_thr); } - /*! \brief operator= this operator absorb the pointers, consider that this object wrap device pointers * * \param object to copy @@ -567,16 +514,6 @@ namespace openfpm return *this; } - __device__ __host__ vector_gpu_ker<T,layout_base> & getVector() - { - return *this; - } - - __device__ __host__ const vector_gpu_ker<T,layout_base> & getVector() const - { - return *this; - } - /*! \brief Return the base * * \return the base @@ -659,11 +596,6 @@ namespace openfpm return vref.size(); } - __host__ __device__ size_t size_local() const - { - return size(); - } - __device__ __host__ unsigned int capacity() const { return vref.capacity; @@ -760,16 +692,6 @@ namespace openfpm return vref.getGPUItertatorTo(stop,n_thr); } - vector_gpu_ker<T,layout_base> & getVector() - { - return *this; - } - - const vector_gpu_ker<T,layout_base> & getVector() const - { - return *this; - } - __host__ vector_gpu_ker_ref<T,layout_base> & operator=(const vector_gpu_ker<T,layout_base> & v) { vref.operator=(v); diff --git a/src/Vector/cuda/map_vector_std_cuda.hpp b/src/Vector/cuda/map_vector_std_cuda.hpp index d384a51f..319add7b 100644 --- a/src/Vector/cuda/map_vector_std_cuda.hpp +++ b/src/Vector/cuda/map_vector_std_cuda.hpp @@ -277,11 +277,8 @@ public: * */ vector(const std::initializer_list<T> & v) - // :base(v) + :base(v) { - // causes error if simply passed to base - for (T t: v) - add(t); } //! Constructor from another vector @@ -291,16 +288,6 @@ public: base.swap(v.base); } - //! Constructor from iterators - template< class InputIt > - vector(InputIt first, InputIt last) - { - while(first != last) { - add(*first); - ++first; - } - } - //! destructor ~vector() noexcept { diff --git a/src/Vector/cuda/map_vector_std_cuda_ker.cuh b/src/Vector/cuda/map_vector_std_cuda_ker.cuh index ea583826..cb1b054f 100644 --- a/src/Vector/cuda/map_vector_std_cuda_ker.cuh +++ b/src/Vector/cuda/map_vector_std_cuda_ker.cuh @@ -126,27 +126,6 @@ public: return base.template get<0>(id); } - /*! \brief Return the pointer to the chunk of memory - * - * \return the pointer to the chunk of memory - * - */ - __device__ __host__ void * getPointer() - { - return &base.template get<0>(0); - } - - /*! \brief Return the pointer to the chunk of memory - * - * \return the pointer to the chunk of memory - * - */ - __device__ __host__ const void * getPointer() const - { - return &base.template get<0>(0); - } - - vector_custd_ker() {} diff --git a/src/Vector/map_vector.hpp b/src/Vector/map_vector.hpp index 078ffd17..953fdeab 100644 --- a/src/Vector/map_vector.hpp +++ b/src/Vector/map_vector.hpp @@ -336,7 +336,7 @@ namespace openfpm * * \return the size * - */ //remove host device + */ size_t size_local() const { return v_size; @@ -1359,21 +1359,7 @@ namespace openfpm return base.get_o(key); } - /*! \brief Get an element of the vector - * - * Get an element of the vector - * - * \tparam p Property to get - * \param id Element to get - * - * \return the element value requested - * - */ //remove device host - template <unsigned int p> - inline auto getProp(const unsigned int & id) -> decltype(base.template get<p>(grid_key_dx<1>(0))) - { //uncomment this - return this->template get<p>(id); - } + /*! \brief Get an element of the vector * * Get an element of the vector @@ -1383,12 +1369,12 @@ namespace openfpm * * \return the element value requested * - *///remove host device - template <unsigned int p,typename KeyType> - inline auto getProp(const KeyType & id) -> decltype(base.template get<p>(grid_key_dx<1>(0))) + */ + + template <unsigned int p,typename KeyType> + inline auto getProp(const KeyType & id) -> decltype(base.template get<p>(grid_key_dx<1>(0))) { - //uncomment this - return this->template get<p>(id.getKey()); + return this->template get<p>(id.getKey()); } /*! \brief Get an element of the vector @@ -1400,10 +1386,10 @@ namespace openfpm * * \return the element value requested * - */ //remove device host + */ template <unsigned int p, typename keyType> - inline auto getProp(const keyType & id) const -> decltype(base.template get<p>(grid_key_dx<1>(0))) - { //uncomment this + inline auto getProp(const keyType & id) const -> decltype(base.template get<p>(grid_key_dx<1>(0))) + { return this->template get<p>(id.getKey()); } @@ -1944,7 +1930,6 @@ namespace openfpm return base.getGPUIterator(start,stop_,n_thr); } - #endif /*! \brief Get the vector elements iterator @@ -2002,14 +1987,6 @@ namespace openfpm return base.getGPUIterator(start,stop,n_thr); } - /*! \brief Get a domain iterator for the GPU - * - * - */ - ite_gpu<1> getDomainIteratorGPU(size_t n_thr = default_kernel_wg_threads_) const - { - return getGPUIterator(n_thr); - } #endif /*! \brief Return the size of the message needed to pack this object diff --git a/src/Vector/map_vector_std.hpp b/src/Vector/map_vector_std.hpp index ae0c08f1..2cf717e2 100644 --- a/src/Vector/map_vector_std.hpp +++ b/src/Vector/map_vector_std.hpp @@ -644,16 +644,6 @@ public: base.swap(v.base); } - //! Constructor from iterators - template< class InputIt > - vector(InputIt first, InputIt last) - { - while(first != last) { - add(*first); - ++first; - } - } - //! destructor ~vector() noexcept { diff --git a/src/Vector/vector_subset.hpp b/src/Vector/vector_subset.hpp deleted file mode 100644 index d6d78b69..00000000 --- a/src/Vector/vector_subset.hpp +++ /dev/null @@ -1,67 +0,0 @@ -#ifndef VECTOR_SUBSET_HPP -#define VECTOR_SUBSET_HPP - -namespace openfpm -{ - -template<unsigned int dim, - typename prop, - template<typename> class layout_base = memory_traits_inte> - class vector_subset_ker - { - mutable openfpm::vector_gpu_ker<typename apply_transform<layout_base,prop>::type,layout_base> v_all; - - mutable openfpm::vector_gpu_ker<aggregate<int>,layout_base> indexes; - - public: - - vector_subset_ker(openfpm::vector_gpu_ker<typename apply_transform<layout_base,prop>::type,layout_base> & v_all, - openfpm::vector_gpu_ker<aggregate<int>,layout_base> & indexes) - :v_all(v_all),indexes(indexes) - {} - - // get the - - template <unsigned int p> - __device__ __host__ inline auto get(size_t id) const -> decltype(v_all.template get<p>(0)) - { - return v_all.template get<p>(indexes.template get<0>(id)); - } - } - -public: - - template<typename T, typename Memory, template<typename> class layout_base, typename grow_p, unsigned int impl> - class vector_sub - { - vector<T,Memory,layout_base,grow_p,impl> & v_all; - - vector<aggregate<int>,Memory,layout_base,grow_p,impl> & indexes; - - public: - - vector(vector<T,Memory,layout_base,grow_p,impl> & v_all, - vector<aggregate<int>,Memory,layout_base,grow_p,impl> & indexes) - :v_all(v_all),indexes(indexes) - {} - - - // To kernel - template<unsigned int ... prp> vector_dist_ker<dim,St,prop,layout_base> toKernel() - { - vector_subset_ker<dim,St,prop,layout_base> v(v_all.toKernel(), indexes.toKernel()); - - return v; - } - - template <unsigned int p> - inline auto get(size_t id) const -> decltype(v_all.template get<p>(0)) - { - return v_all.template get<p>(indexes.template get<0>(id)); - } - - } - -}; - -#endif \ No newline at end of file diff --git a/src/util/mathutil.hpp b/src/util/mathutil.hpp index e813f30b..1b05785a 100644 --- a/src/util/mathutil.hpp +++ b/src/util/mathutil.hpp @@ -19,7 +19,7 @@ namespace openfpm * */ template <typename T> - __host__ __device__ int sgn(T val) + int sgn(T val) { return (T(0) < val) - (val < T(0)); } @@ -35,8 +35,7 @@ namespace openfpm * \return the factorial * */ - __host__ __device__ static inline - size_t factorial(size_t f) + static inline size_t factorial(size_t f) { size_t fc = 1; @@ -61,8 +60,7 @@ namespace openfpm * \param k * */ - __host__ __device__ static inline - size_t C(size_t n, size_t k) + static inline size_t C(size_t n, size_t k) { return factorial(n)/(factorial(k)*factorial(n-k)); } @@ -77,8 +75,7 @@ namespace openfpm * * */ - __host__ __device__ static inline - size_t round_big_2(size_t n) + static inline size_t round_big_2(size_t n) { n--; n |= n >> 1; // Divide by 2^k for consecutive doublings of k up to 32, @@ -105,15 +102,14 @@ namespace openfpm */ template<class T> - __host__ __device__ inline constexpr - size_t pow(const T base, unsigned const exponent) + inline constexpr size_t pow(const T base, unsigned const exponent) { // (parentheses not required in next line) return (exponent == 0) ? 1 : (base * pow(base, exponent-1)); } template<class T> - __host__ __device__ double intpowlog(const T x, unsigned const e) + double intpowlog(const T x, unsigned const e) { if (e == 0) return 1.0; if (e % 2 == 0) @@ -139,8 +135,7 @@ namespace openfpm * \param n modulo * */ - __host__ __device__ static inline - long int positive_modulo(long int i, long int n) + static inline long int positive_modulo(long int i, long int n) { return (i % n + n) % n; } @@ -160,9 +155,7 @@ namespace openfpm * \return the bound number * */ - template<typename T> - __host__ __device__ static inline - T periodic(const T & pos, const T & p2, const T & p1) + template<typename T> static inline T periodic(const T & pos, const T & p2, const T & p1) { T pos_tmp; @@ -189,9 +182,7 @@ namespace openfpm * \return the bound number * */ - template<typename T> - __device__ __host__ static inline - T periodic_l(const T & pos, const T & p2, const T & p1) + template<typename T> __device__ __host__ static inline T periodic_l(const T & pos, const T & p2, const T & p1) { T pos_tmp = pos; @@ -216,7 +207,7 @@ namespace openfpm * * */ - __host__ __device__ inline long int size_t_floor(double x) + inline long int size_t_floor(double x) { size_t i = (long int)x; /* truncate */ return i - ( i > x ); /* convert trunc to floor */ @@ -226,7 +217,7 @@ namespace openfpm * * */ - __host__ __device__ inline long int size_t_floor(float x) + inline long int size_t_floor(float x) { size_t i = (long int)x; /* truncate */ return i - ( i > x ); /* convert trunc to floor */ @@ -267,7 +258,7 @@ namespace openfpm * \param value * */ - __host__ __device__ inline int log2_64 (uint64_t value) + inline int log2_64 (uint64_t value) { value |= value >> 1; value |= value >> 2; @@ -285,7 +276,7 @@ namespace openfpm * * */ - __host__ __device__ inline long int size_t_floor(boost::multiprecision::float128 x) + inline long int size_t_floor(boost::multiprecision::float128 x) { size_t i = (long int)x; /* truncate */ return i - ( i > x ); /* convert trunc to floor */ -- GitLab