diff --git a/src/Grid/grid_base_implementation.hpp b/src/Grid/grid_base_implementation.hpp index 8409c6bd688e7466f1c5fcaa6976f20b2d113557..799e81fa0b578be96f528248660d2071a7d38bbb 100644 --- a/src/Grid/grid_base_implementation.hpp +++ b/src/Grid/grid_base_implementation.hpp @@ -752,7 +752,7 @@ public: { auto key = it.get(); - if (this->get_o(key) != this->get_o(key)) + if (this->get_o(key) != g.get_o(key)) return false; ++it; diff --git a/src/Grid/grid_key.hpp b/src/Grid/grid_key.hpp index 106f7ee5f4835ebc97d91e9c4697810cbace22fe..90fab130df3b04587ad44f6bbd62a96ef1d5960d 100644 --- a/src/Grid/grid_key.hpp +++ b/src/Grid/grid_key.hpp @@ -397,7 +397,7 @@ public: * */ template<typename a, typename ...T> - inline void set(a v, T...t) + __device__ __host__ inline void set(a v, T...t) { #ifdef SE_CLASS1 if (sizeof...(t) != dim -1) diff --git a/src/Grid/grid_sm.hpp b/src/Grid/grid_sm.hpp index ed96216e14832599a3f2a1a1cddc1871c9679ced..fa611b06a6c5fe61e001d56bfffbc71137f1a2ef 100644 --- a/src/Grid/grid_sm.hpp +++ b/src/Grid/grid_sm.hpp @@ -186,7 +186,7 @@ class grid_sm * */ - inline void Initialize(const size_t sz) + __device__ __host__ inline void Initialize(const size_t sz) { //! Initialize the basic structure for each dimension sz_s[0] = sz; @@ -217,7 +217,7 @@ class grid_sm * */ - inline void Initialize(const size_t (& sz)[N]) + __device__ __host__ inline void Initialize(const size_t (& sz)[N]) { //! Initialize the basic structure for each dimension sz_s[0] = sz[0]; @@ -353,7 +353,8 @@ public: * */ - template<typename S> inline grid_sm(const grid_sm<N,S> & g) + template<typename S> + __device__ __host__ inline grid_sm(const grid_sm<N,S> & g) { size_tot = g.size_tot; @@ -380,7 +381,7 @@ public: // Static element to calculate total size - inline size_t totalSize(const size_t (& sz)[N]) + __device__ __host__ inline size_t totalSize(const size_t (& sz)[N]) { size_t tSz = 1; @@ -415,7 +416,7 @@ public: * */ - inline grid_sm(const size_t (& sz)[N]) + __device__ __host__ inline grid_sm(const size_t (& sz)[N]) : size_tot(totalSize(sz)) { Initialize(sz); @@ -625,7 +626,7 @@ public: } //! Destructor - ~grid_sm() {}; + __device__ __host__ ~grid_sm() {}; /*! \brief Return the size of the grid * @@ -713,7 +714,7 @@ public: * */ - inline size_t size_s(unsigned int i) const + __device__ __host__ inline size_t size_s(unsigned int i) const { return sz_s[i]; } @@ -737,11 +738,21 @@ public: * \return get the size of the grid as an array * */ - inline const size_t (& getSize() const)[N] + __device__ __host__ inline const size_t (& getSize() const)[N] { return sz; } + /*! \brief Returns the size of the grid in the passed array + * + * \return the size of the grid in the passed array + * + */ + __device__ __host__ inline void getSize(size_t (&size) [N]) const + { + for (size_t i = 0; i < N; ++i) size[i] = sz[i]; + } + /*! \brief Return a sub-grid iterator * * Return a sub-grid iterator, to iterate through the grid diff --git a/src/NN/CellList/cuda/CellDecomposer_gpu_ker.cuh b/src/NN/CellList/cuda/CellDecomposer_gpu_ker.cuh index fc108d88969ee087844aa85a389dd66527a1847e..3981ce07658ac506f4c771afccb78736a1da76ff 100644 --- a/src/NN/CellList/cuda/CellDecomposer_gpu_ker.cuh +++ b/src/NN/CellList/cuda/CellDecomposer_gpu_ker.cuh @@ -29,30 +29,71 @@ class CellDecomposer_gpu_ker //! transformation transform t; + //! Unit box of the Cell list + SpaceBox<dim,T> box_unit; + + //! Grid structure of the Cell list + grid_sm<dim,void> gr_cell; + + //! cell_shift + Point<dim,long int> cell_shift; public: __device__ __host__ CellDecomposer_gpu_ker() {} - __device__ __host__ CellDecomposer_gpu_ker(openfpm::array<T,dim,cnt_type> & spacing_c, - openfpm::array<ids_type,dim,cnt_type> & div_c, - openfpm::array<ids_type,dim,cnt_type> & off, - const transform & t) - :spacing_c(spacing_c),div_c(div_c),off(off),t(t) + __device__ __host__ CellDecomposer_gpu_ker( + openfpm::array<T,dim,cnt_type> & spacing_c, + openfpm::array<ids_type,dim,cnt_type> & div_c, + openfpm::array<ids_type,dim,cnt_type> & off, + const transform & t) + : spacing_c(spacing_c),div_c(div_c),off(off),t(t) {} - __host__ grid_sm<dim,void> getGrid() + __device__ __host__ CellDecomposer_gpu_ker( + openfpm::array<T,dim,cnt_type> & spacing_c, + openfpm::array<ids_type,dim,cnt_type> & div_c, + openfpm::array<ids_type,dim,cnt_type> & off, + const transform & t, + SpaceBox<dim,T> box_unit, + grid_sm<dim,void> gr_cell, + Point<dim,long int> cell_shift) + : spacing_c(spacing_c),div_c(div_c),off(off),t(t), + box_unit(box_unit), gr_cell(gr_cell), cell_shift(cell_shift) + {} + + __device__ __host__ grid_sm<dim,void> getGrid() { size_t sz[dim]; for (size_t i = 0 ; i < dim ; i++) { - sz[i] = div_c[i] + 2*off[i]; + sz[i] = div_c[i]; } return grid_sm<dim,void> (sz); } + __device__ __host__ void getGridSize(size_t (& sz)[dim]) const + { + for (size_t i = 0 ; i < dim ; i++) + { + sz[i] = div_c[i] + 2*off[i]; + } + } + + template<typename ids_type2> + __device__ __host__ mem_id getGridLinId(const grid_key_dx<dim,ids_type2> & gk) const + { + mem_id lid = gk.get(0); + for (mem_id i = 1 ; i < dim ; i++) + { + lid += gk.get(i) * (div_c[i-1] + 2*off[i-1]); + } + + return lid; + } + __device__ __host__ inline grid_key_dx<dim,ids_type> getCell(const Point<dim,T> & xp) const { return cid_<dim,cnt_type,ids_type,transform>::get_cid_key(spacing_c,off,t,xp); @@ -82,6 +123,46 @@ public: { return t; } + + __device__ __host__ inline grid_key_dx<dim> getCellGrid(const T (& pos)[dim]) const + { + grid_key_dx<dim> key; + key.set_d(0,ConvertToID(pos,0)); + + for (size_t s = 1 ; s < dim ; s++) + { + key.set_d(s,ConvertToID(pos,s)); + } + + return key; + } + + __device__ __host__ inline grid_key_dx<dim> getCellGrid(const Point<dim,T> & pos) const + { + grid_key_dx<dim> key; + key.set_d(0,ConvertToID(pos,0)); + + for (size_t s = 1 ; s < dim ; s++) + { + key.set_d(s,ConvertToID(pos,s)); + } + + return key; + } + + __device__ __host__ inline size_t ConvertToID(const T (&x)[dim] ,size_t s) const + { + size_t id = openfpm::math::size_t_floor(t.transform(x,s) / box_unit.getHigh(s)) + off[s]; + id = (id >= gr_cell.size(s))?(gr_cell.size(s)-1-cell_shift.get(s)):id-cell_shift.get(s); + return id; + } + + __device__ __host__ inline size_t ConvertToID(const Point<dim,T> & x ,size_t s, size_t sc = 0) const + { + size_t id = openfpm::math::size_t_floor(t.transform(x,s) / box_unit.getHigh(s)) + off[s]; + id = (id >= gr_cell.size(s))?(gr_cell.size(s)-1-cell_shift.get(s)):id-cell_shift.get(s); + return id; + } }; #endif /* CELLDECOMPOSER_GPU_KER_HPP_ */ diff --git a/src/NN/CellList/cuda/CellList_gpu.hpp b/src/NN/CellList/cuda/CellList_gpu.hpp index c76235835f37b501dd4ddebabd8363cdb7926c64..defee388cdefe38e20acf4e8a7397863c6ff2b70 100644 --- a/src/NN/CellList/cuda/CellList_gpu.hpp +++ b/src/NN/CellList/cuda/CellList_gpu.hpp @@ -44,7 +44,10 @@ struct CellList_gpu_ker_selector openfpm::array<ids_type,dim,cnt_type> & div_c, openfpm::array<ids_type,dim,cnt_type> & off, const transform & t, - unsigned int g_m) + unsigned int g_m, + const SpaceBox<dim,T>& box_unit, + const grid_sm<dim,void>& gr_cell, + const Point<dim,long int>& cell_shift) { return CellList_gpu_ker<dim,T,cnt_type,ids_type,transform,is_sparse>(starts.toKernel(), sorted_to_not_sorted.toKernel(), @@ -54,7 +57,10 @@ struct CellList_gpu_ker_selector div_c, off, t, - g_m); + g_m, + box_unit, + gr_cell, + cell_shift); } }; @@ -73,10 +79,14 @@ struct CellList_gpu_ker_selector<dim,T,cnt_type,ids_type,Memory,transform,vector vector_cnt_type & dprt, openfpm::vector<aggregate<int>,Memory,memory_traits_inte> & nnc_rad, openfpm::array<T,dim,cnt_type> & spacing_c, - openfpm::array<ids_type,dim,cnt_type> & div_c, - openfpm::array<ids_type,dim,cnt_type> & off, - const transform & t, - unsigned int g_m) + openfpm::array<ids_type,dim,cnt_type> & div_c, + openfpm::array<ids_type,dim,cnt_type> & off, + const transform & t, + unsigned int g_m, + const SpaceBox<dim,T>& box_unit, + const grid_sm<dim,void>& gr_cell, + const Point<dim,long int>& cell_shift) + { return CellList_gpu_ker<dim,T,cnt_type,ids_type,transform,true>(cell_nn.toKernel(), cell_nn_list.toKernel(), @@ -86,7 +96,11 @@ struct CellList_gpu_ker_selector<dim,T,cnt_type,ids_type,Memory,transform,vector spacing_c, div_c, off, - t,g_m); + t, + g_m, + box_unit, + gr_cell, + cell_shift); } }; @@ -648,14 +662,17 @@ public: cells_nn, cells_nn_list, cl_sparse, - sorted_to_not_sorted, - sorted_domain_particles_ids, - nnc_rad, - spacing_c, - div_c, - off, - this->getTransform(), - g_m); + sorted_to_not_sorted, + sorted_domain_particles_ids, + nnc_rad, + spacing_c, + div_c, + off, + this->getTransform(), + g_m, + this->box_unit, + this->gr_cell, + this->cell_shift); } /*! \brief Clear the structure diff --git a/src/NN/CellList/cuda/CellList_gpu_ker.cuh b/src/NN/CellList/cuda/CellList_gpu_ker.cuh index 004716a7dd1872a5fef8c8adb0876ed5ae1f1808..becb50c5e3f7c12a5e04921fd3971218e0df11fc 100644 --- a/src/NN/CellList/cuda/CellList_gpu_ker.cuh +++ b/src/NN/CellList/cuda/CellList_gpu_ker.cuh @@ -449,11 +449,14 @@ public: openfpm::vector_gpu_ker<aggregate<cnt_type>,memory_traits_inte> dprt, openfpm::vector_gpu_ker<aggregate<int>,memory_traits_inte> rad_cells, openfpm::array<T,dim,cnt_type> & spacing_c, - openfpm::array<ids_type,dim,cnt_type> & div_c, - openfpm::array<ids_type,dim,cnt_type> & off, - const transform & t, - unsigned int g_m) - :CellDecomposer_gpu_ker<dim,T,cnt_type,ids_type,transform>(spacing_c,div_c,off,t), + openfpm::array<ids_type,dim,cnt_type> & div_c, + openfpm::array<ids_type,dim,cnt_type> & off, + const transform & t, + unsigned int g_m, + SpaceBox<dim,T> box_unit, + grid_sm<dim,void> gr_cell, + Point<dim,long int> cell_shift) + :CellDecomposer_gpu_ker<dim,T,cnt_type,ids_type,transform>(spacing_c,div_c,off,t,box_unit,gr_cell,cell_shift), starts(starts),srt(srt),dprt(dprt),rad_cells(rad_cells),g_m(g_m) { } @@ -616,12 +619,16 @@ public: openfpm::vector_gpu_ker<aggregate<cnt_type>,memory_traits_inte> srt, openfpm::vector_gpu_ker<aggregate<cnt_type>,memory_traits_inte> dprt, openfpm::array<T,dim,cnt_type> & spacing_c, - openfpm::array<ids_type,dim,cnt_type> & div_c, - openfpm::array<ids_type,dim,cnt_type> & off, - const transform & t, - unsigned int g_m) - :CellDecomposer_gpu_ker<dim,T,cnt_type,ids_type,transform>(spacing_c,div_c,off,t),cell_nn(cell_nn),cell_nn_list(cell_nn_list),srt(srt),dprt(dprt), - cl_sparse(cl_sparse),g_m(g_m) + openfpm::array<ids_type,dim,cnt_type> & div_c, + openfpm::array<ids_type,dim,cnt_type> & off, + const transform & t, + unsigned int g_m, + SpaceBox<dim,T> box_unit, + grid_sm<dim,void> gr_cell, + Point<dim,long int> cell_shift) + + :CellDecomposer_gpu_ker<dim,T,cnt_type,ids_type,transform>(spacing_c,div_c,off,t,box_unit,gr_cell,cell_shift), + cell_nn(cell_nn),cell_nn_list(cell_nn_list),srt(srt),dprt(dprt),cl_sparse(cl_sparse),g_m(g_m) { } diff --git a/src/Space/Shape/Point.hpp b/src/Space/Shape/Point.hpp index 6391bb54123ddd76ecf97cce676f9c0d3a7c492a..1fb4a801bed16802869e22a9815259f86a21300c 100644 --- a/src/Space/Shape/Point.hpp +++ b/src/Space/Shape/Point.hpp @@ -416,7 +416,7 @@ template<unsigned int dim ,typename T> class Point * \return the reference * */ - T & value(size_t i) + __device__ __host__ T & value(size_t i) { return get(i); } diff --git a/src/Vector/cuda/map_vector_std_cuda.hpp b/src/Vector/cuda/map_vector_std_cuda.hpp index 319add7ba4037341e89664ff30ab578c0dbe1bec..d384a51f598adfab38d575a7d0884de46ab0bdfa 100644 --- a/src/Vector/cuda/map_vector_std_cuda.hpp +++ b/src/Vector/cuda/map_vector_std_cuda.hpp @@ -277,8 +277,11 @@ public: * */ vector(const std::initializer_list<T> & v) - :base(v) + // :base(v) { + // causes error if simply passed to base + for (T t: v) + add(t); } //! Constructor from another vector @@ -288,6 +291,16 @@ public: base.swap(v.base); } + //! Constructor from iterators + template< class InputIt > + vector(InputIt first, InputIt last) + { + while(first != last) { + add(*first); + ++first; + } + } + //! destructor ~vector() noexcept { diff --git a/src/Vector/cuda/map_vector_std_cuda_ker.cuh b/src/Vector/cuda/map_vector_std_cuda_ker.cuh index cb1b054f1b15e1271ef8cd78ad1e05acc1970b79..ea58382657d70965a7de5f2a18e0362be019fd03 100644 --- a/src/Vector/cuda/map_vector_std_cuda_ker.cuh +++ b/src/Vector/cuda/map_vector_std_cuda_ker.cuh @@ -126,6 +126,27 @@ public: return base.template get<0>(id); } + /*! \brief Return the pointer to the chunk of memory + * + * \return the pointer to the chunk of memory + * + */ + __device__ __host__ void * getPointer() + { + return &base.template get<0>(0); + } + + /*! \brief Return the pointer to the chunk of memory + * + * \return the pointer to the chunk of memory + * + */ + __device__ __host__ const void * getPointer() const + { + return &base.template get<0>(0); + } + + vector_custd_ker() {} diff --git a/src/Vector/map_vector_std.hpp b/src/Vector/map_vector_std.hpp index 2d4a875fcec92d3d7870efc5a6cea211d0d9be7f..c600e49d425fa74584990b6a7152a9486f4ee03c 100644 --- a/src/Vector/map_vector_std.hpp +++ b/src/Vector/map_vector_std.hpp @@ -644,6 +644,16 @@ public: base.swap(v.base); } + //! Constructor from iterators + template< class InputIt > + vector(InputIt first, InputIt last) + { + while(first != last) { + add(*first); + ++first; + } + } + //! destructor ~vector() noexcept { diff --git a/src/util/mathutil.hpp b/src/util/mathutil.hpp index 1b05785a90aeac2155b2bae58745656c1a23daf4..e813f30b29097a444903dc365d19acbef57f811b 100644 --- a/src/util/mathutil.hpp +++ b/src/util/mathutil.hpp @@ -19,7 +19,7 @@ namespace openfpm * */ template <typename T> - int sgn(T val) + __host__ __device__ int sgn(T val) { return (T(0) < val) - (val < T(0)); } @@ -35,7 +35,8 @@ namespace openfpm * \return the factorial * */ - static inline size_t factorial(size_t f) + __host__ __device__ static inline + size_t factorial(size_t f) { size_t fc = 1; @@ -60,7 +61,8 @@ namespace openfpm * \param k * */ - static inline size_t C(size_t n, size_t k) + __host__ __device__ static inline + size_t C(size_t n, size_t k) { return factorial(n)/(factorial(k)*factorial(n-k)); } @@ -75,7 +77,8 @@ namespace openfpm * * */ - static inline size_t round_big_2(size_t n) + __host__ __device__ static inline + size_t round_big_2(size_t n) { n--; n |= n >> 1; // Divide by 2^k for consecutive doublings of k up to 32, @@ -102,14 +105,15 @@ namespace openfpm */ template<class T> - inline constexpr size_t pow(const T base, unsigned const exponent) + __host__ __device__ inline constexpr + size_t pow(const T base, unsigned const exponent) { // (parentheses not required in next line) return (exponent == 0) ? 1 : (base * pow(base, exponent-1)); } template<class T> - double intpowlog(const T x, unsigned const e) + __host__ __device__ double intpowlog(const T x, unsigned const e) { if (e == 0) return 1.0; if (e % 2 == 0) @@ -135,7 +139,8 @@ namespace openfpm * \param n modulo * */ - static inline long int positive_modulo(long int i, long int n) + __host__ __device__ static inline + long int positive_modulo(long int i, long int n) { return (i % n + n) % n; } @@ -155,7 +160,9 @@ namespace openfpm * \return the bound number * */ - template<typename T> static inline T periodic(const T & pos, const T & p2, const T & p1) + template<typename T> + __host__ __device__ static inline + T periodic(const T & pos, const T & p2, const T & p1) { T pos_tmp; @@ -182,7 +189,9 @@ namespace openfpm * \return the bound number * */ - template<typename T> __device__ __host__ static inline T periodic_l(const T & pos, const T & p2, const T & p1) + template<typename T> + __device__ __host__ static inline + T periodic_l(const T & pos, const T & p2, const T & p1) { T pos_tmp = pos; @@ -207,7 +216,7 @@ namespace openfpm * * */ - inline long int size_t_floor(double x) + __host__ __device__ inline long int size_t_floor(double x) { size_t i = (long int)x; /* truncate */ return i - ( i > x ); /* convert trunc to floor */ @@ -217,7 +226,7 @@ namespace openfpm * * */ - inline long int size_t_floor(float x) + __host__ __device__ inline long int size_t_floor(float x) { size_t i = (long int)x; /* truncate */ return i - ( i > x ); /* convert trunc to floor */ @@ -258,7 +267,7 @@ namespace openfpm * \param value * */ - inline int log2_64 (uint64_t value) + __host__ __device__ inline int log2_64 (uint64_t value) { value |= value >> 1; value |= value >> 2; @@ -276,7 +285,7 @@ namespace openfpm * * */ - inline long int size_t_floor(boost::multiprecision::float128 x) + __host__ __device__ inline long int size_t_floor(boost::multiprecision::float128 x) { size_t i = (long int)x; /* truncate */ return i - ( i > x ); /* convert trunc to floor */