diff --git a/src/Grid/grid_base_implementation.hpp b/src/Grid/grid_base_implementation.hpp index f9ca655e2b156cf9254856847ae0a471a614a94c..2b4e85d69bfeb294d72676b6125c13e8dab9baa8 100644 --- a/src/Grid/grid_base_implementation.hpp +++ b/src/Grid/grid_base_implementation.hpp @@ -1091,28 +1091,6 @@ public: const Box<dim,long int> & box_src, const Box<dim,long int> & box_dst) { - - - // sub-grid where to unpack -/* grid_key_dx_iterator_sub<dim> src(grid_src.getGrid(),box_src.getKP1(),box_src.getKP2()); - grid_key_dx_iterator_sub<dim> dst(getGrid(),box_dst.getKP1(),box_dst.getKP2()); - - while (src.isNext()) - { - auto key_src = src.get(); - auto key_dst = dst.get(); - - get_o(key_dst) = grid_src.get_o(key_src); - - ++src; - ++dst; - }*/ - - /////////////////////////////////////// - -// typedef typename std::remove_reference<decltype(grid_src)>::type grid_cp; -// typedef typename std::remove_reference<decltype(grid_src.getGrid())>::type grid_info_cp; - // fix box_dst Box<dim,size_t> box_src_; @@ -1226,21 +1204,6 @@ public: ++sub_src; ++sub_dst; } - -/* grid_key_dx_iterator_sub<dim> sub_src(grid_src.getGrid(),box_src.getKP1(),box_src.getKP2()); - grid_key_dx_iterator_sub<dim> sub_dst(this->getGrid(),box_dst.getKP1(),box_dst.getKP2()); - -// const auto & gs = loc_grid.get(i); -// auto & gd = loc_grid.get(sub_id_dst); - - while (sub_src.isNext()) - { - // write the object in the last element - object_s_di_op<op,decltype(grid_src.get_o(sub_src.get())),decltype(this->get_o(sub_dst.get())),OBJ_ENCAP,prp...>(grid_src.get_o(sub_src.get()),this->get_o(sub_dst.get())); - - ++sub_src; - ++sub_dst; - }*/ } /*! \brief Resize the grid diff --git a/src/Grid/grid_common.hpp b/src/Grid/grid_common.hpp index 8bfc8a56dd7de59ad08d3b76d982e8638b59b351..d3a390247b434d2d1253a483cefb9925bbe61da6 100644 --- a/src/Grid/grid_common.hpp +++ b/src/Grid/grid_common.hpp @@ -11,6 +11,33 @@ #include <type_traits> #include "util/tokernel_transformation.hpp" +/*! \brief this class is a functor for "for_each" algorithm + * + * This class is a functor for "for_each" algorithm. For each + * element of the boost::vector the operator() is called. + * Is mainly used to call hostToDevice for each properties + * + */ +template<typename aggrT_src, typename local_grids_type> +struct setBackground_impl +{ + aggrT_src & bck; + + local_grids_type loc_grid; + + inline setBackground_impl(aggrT_src & bck, local_grids_type & loc_grid) + :bck(bck),loc_grid(loc_grid) + {}; + + //! It call the copy function for each property + template<typename T> + inline void operator()(T& t) + { + for (size_t i = 0 ; i < loc_grid.size() ; i++) + {loc_grid.get(i).template setBackgroundValue<T::value>(bck.template get<T::value>());} + } +}; + /*! \brief this class is a functor for "for_each" algorithm * * This class is a functor for "for_each" algorithm. For each diff --git a/src/Grid/grid_key.hpp b/src/Grid/grid_key.hpp index 789f8c5938ee4545a06b90b4914545d14f639fad..c24376add69c810acff3a948e9f290de18e766e5 100644 --- a/src/Grid/grid_key.hpp +++ b/src/Grid/grid_key.hpp @@ -445,9 +445,10 @@ public: * \return a point unsigned long int * */ - Point<dim,size_t> toPoint() const + template<typename typeT = size_t> + inline Point<dim,typeT> toPoint() const { - Point<dim,size_t> p; + Point<dim,typeT> p; for (size_t i = 0; i < dim ; i++) { @@ -478,7 +479,7 @@ public: * \return the index value * */ - __device__ __host__ mem_id get(size_t i) const + __device__ __host__ index_type get(index_type i) const { return k[i]; } @@ -491,7 +492,7 @@ public: * \param id value to set * */ - __device__ __host__ void set_d(size_t i, mem_id id) + __device__ __host__ void set_d(index_type i, index_type id) { #if defined(SE_CLASS1) && !defined(__NVCC__) diff --git a/src/Grid/grid_sm.hpp b/src/Grid/grid_sm.hpp index c888410d128df097749e0c1d37ae4fbe5c3fd5c8..881c6f7ece957a6c8181f7589833fcc0b91d244a 100755 --- a/src/Grid/grid_sm.hpp +++ b/src/Grid/grid_sm.hpp @@ -859,10 +859,10 @@ ite_gpu<dim> getGPUIterator_impl(const grid_sm<dim,T2> & g1, const grid_key_dx<d {ig.thr.x = (key2.get(0) - key1.get(0) + 1);} if (dim >= 2 && ig.wthr.y == 1) - {ig.wthr.y = key2.get(1) - key1.get(1) + 1;} + {ig.thr.y = key2.get(1) - key1.get(1) + 1;} if (dim == 3 && ig.wthr.z == 1) - {ig.wthr.z = key2.get(2) - key1.get(2) + 1;} + {ig.thr.z = key2.get(2) - key1.get(2) + 1;} for (size_t i = 0 ; i < dim ; i++) { diff --git a/src/Grid/iterators/grid_key_dx_iterator_sub.hpp b/src/Grid/iterators/grid_key_dx_iterator_sub.hpp index 4d59803a23da3b5725a8a430ae62976cd44008a5..aeabf55097c7bc8486b78857d377422b820ffaf9 100644 --- a/src/Grid/iterators/grid_key_dx_iterator_sub.hpp +++ b/src/Grid/iterators/grid_key_dx_iterator_sub.hpp @@ -484,7 +484,7 @@ public: { #ifdef SE_CLASS1 if (initialized == false) - {std::cerr << "Error: " << __FILE__ << __LINE__ << " using unitialized iterator" << "\n";} + {std::cerr << "Error: " << __FILE__ << ":" << __LINE__ << " using unitialized iterator" << "\n";} #endif for (size_t i = 0 ; i < nsteps ; i++) @@ -506,7 +506,7 @@ public: { #ifdef SE_CLASS1 if (initialized == false) - {std::cerr << "Error: " << __FILE__ << __LINE__ << " using unitialized iterator" << "\n";} + {std::cerr << "Error: " << __FILE__ << ":" << __LINE__ << " using unitialized iterator" << "\n";} #endif if (this->gk.get(dim-1) <= gk_stop.get(dim-1)) @@ -643,6 +643,17 @@ public: { this->stl_code.private_adjust(tot_add); } + + /* \brief Set the iterator in a way that isNext return false + * + */ + void invalidate() + { + this->gk.set_d(dim-1,1); + this->gk_stop.set_d(dim-1,0); + + initialized = true; + } }; diff --git a/src/Grid/map_grid.hpp b/src/Grid/map_grid.hpp index a3b0e4e535f6c9e662fb36d3d95e7776a1552ce8..e756767914387a6d456fe281fa55b74aeb650e38 100755 --- a/src/Grid/map_grid.hpp +++ b/src/Grid/map_grid.hpp @@ -337,6 +337,18 @@ public: return background; } + /*! \brief Set the background value + * + * \tparam p property to set + * + */ + template<unsigned int p> + void setBackgroundValue(const typename boost::mpl::at<typename T::type,boost::mpl::int_<p>>::type & val) + { + background.template get<p>() = val; + } + + /*! \brief assign operator * * \return itself diff --git a/src/SparseGrid/SparseGrid.hpp b/src/SparseGrid/SparseGrid.hpp index 8780186473f6ee6b0ecc8d5050f3678d74a017f5..cc3572c52c4322ec39be91cfb7b1c4eed8bba2a4 100644 --- a/src/SparseGrid/SparseGrid.hpp +++ b/src/SparseGrid/SparseGrid.hpp @@ -586,7 +586,7 @@ public: template<unsigned int p> void setBackgroundValue(const typename boost::mpl::at<typename T::type,boost::mpl::int_<p>>::type & val) { - return background.template get<p>() = val; + background.template get<p>() = val; } /*! \brief Get the background value diff --git a/src/SparseGridGpu/BlockMapGpu.hpp b/src/SparseGridGpu/BlockMapGpu.hpp index 706f378a89a606a5362b742beb45050601e97c3f..e8507ae0c199012c52db6b71e4b5f3acfdf78dbe 100644 --- a/src/SparseGridGpu/BlockMapGpu.hpp +++ b/src/SparseGridGpu/BlockMapGpu.hpp @@ -30,6 +30,16 @@ class BlockMapGpu private: typedef BlockTypeOf<AggregateBlockT, 0> BlockT0; +#ifdef SE_CLASS1 + + //! Indicate if the setGPUInsertBuffer has been called + bool is_setGPUInsertBuffer = false; + + //! Indicate if the initializeGPUInsertBuffer has been called + bool is_initializeGPUInsertBuffer = false; + +#endif + protected: const static unsigned char EXIST_BIT = 0; typedef typename AggregateAppend<DataBlock<unsigned char, BlockT0::size>, AggregateBlockT>::type AggregateInternalT; @@ -42,7 +52,6 @@ protected: public: typedef AggregateBlockT AggregateType; -public: BlockMapGpu() = default; /*! \brief Get the background value @@ -133,12 +142,49 @@ public: void hostToDevice(); - void setGPUInsertBuffer(int nBlock, int nSlot); + /*! \Brief Before inser any element you have to call this function to initialize the insert buffer + * + * \param nBlock number of blocks the insert buffer has + * \param nSlot maximum number of insertion each thread block does + * + */ + void setGPUInsertBuffer(int nBlock, int nSlot) + { + // Prealloc the insert buffer on the underlying sparse vector + blockMap.setGPUInsertBuffer(nBlock, nSlot); + initializeGPUInsertBuffer(); - void initializeGPUInsertBuffer(); +#ifdef SE_CLASS1 + is_setGPUInsertBuffer = true; +#endif + } + + void initializeGPUInsertBuffer() + { + //todo: Test if it's enough to just initialize masks to 0, without any background value + // Initialize the blocks to background + auto & insertBuffer = blockMap.getGPUInsertBuffer(); + typedef BlockTypeOf<AggregateInternalT, pMask> BlockType; // Here assuming that all block types in the aggregate have the same size! + constexpr unsigned int chunksPerBlock = threadBlockSize / BlockType::size; // Floor is good here... + BlockMapGpuKernels::initializeInsertBuffer<pMask, chunksPerBlock> <<< insertBuffer.size()/chunksPerBlock, chunksPerBlock*BlockType::size >>>( + insertBuffer.toKernel()); + + #ifdef SE_CLASS1 + is_initializeGPUInsertBuffer = true; + #endif + } template<typename ... v_reduce> - void flush(mgpu::ofp_context_t &context, flush_type opt = FLUSH_ON_HOST); + void flush(mgpu::ofp_context_t &context, flush_type opt = FLUSH_ON_HOST) + { +#ifdef SE_CLASS1 + + if (is_setGPUInsertBuffer == false || is_initializeGPUInsertBuffer == false) + {std::cout << __FILE__ << ":" << __LINE__ << " error setGPUInsertBuffer you must call before doing any insertion " << std::endl;} +#endif + + blockMap.template flush<v_reduce ... >(context, opt); + } template<unsigned int p> void setBackgroundValue(ScalarTypeOf<AggregateBlockT, p> backgroundValue); @@ -234,33 +280,6 @@ void BlockMapGpu<AggregateBlockT, threadBlockSize, indexT, layout_base>::hostToD blockMap.template hostToDevice<pMask>(); } -template<typename AggregateBlockT, unsigned int threadBlockSize, typename indexT, template<typename> class layout_base> -void BlockMapGpu<AggregateBlockT, threadBlockSize, indexT, layout_base>::setGPUInsertBuffer(int nBlock, int nSlot) -{ - // Prealloc the insert buffer on the underlying sparse vector - blockMap.setGPUInsertBuffer(nBlock, nSlot); - initializeGPUInsertBuffer(); -} - -template<typename AggregateBlockT, unsigned int threadBlockSize, typename indexT, template<typename> class layout_base> -void BlockMapGpu<AggregateBlockT, threadBlockSize, indexT, layout_base>::initializeGPUInsertBuffer() -{ - //todo: Test if it's enough to just initialize masks to 0, without any background value - // Initialize the blocks to background - auto & insertBuffer = blockMap.getGPUInsertBuffer(); - typedef BlockTypeOf<AggregateInternalT, pMask> BlockType; // Here assuming that all block types in the aggregate have the same size! - constexpr unsigned int chunksPerBlock = threadBlockSize / BlockType::size; // Floor is good here... - BlockMapGpuKernels::initializeInsertBuffer<pMask, chunksPerBlock> <<< insertBuffer.size()/chunksPerBlock, chunksPerBlock*BlockType::size >>>( - insertBuffer.toKernel()); -} - -template<typename AggregateBlockT, unsigned int threadBlockSize, typename indexT, template<typename> class layout_base> -template<typename ... v_reduce> -void BlockMapGpu<AggregateBlockT, threadBlockSize, indexT, layout_base>::flush(mgpu::ofp_context_t &context, flush_type opt) -{ - blockMap.template flush<v_reduce .../*, sBitwiseOr_<pMask>*/>(context, opt); -} - template<typename AggregateBlockT, unsigned int threadBlockSize, typename indexT, template<typename> class layout_base> template<unsigned int p> void BlockMapGpu<AggregateBlockT, threadBlockSize, indexT, layout_base>::setBackgroundValue( diff --git a/src/SparseGridGpu/Iterators/SparseGridGpu_iterator.hpp b/src/SparseGridGpu/Iterators/SparseGridGpu_iterator.hpp new file mode 100644 index 0000000000000000000000000000000000000000..329ac5830baafffcf213410e2699bc4b775a0445 --- /dev/null +++ b/src/SparseGridGpu/Iterators/SparseGridGpu_iterator.hpp @@ -0,0 +1,201 @@ +/* + * SparseGridGpu_iterator.hpp + * + * Created on: Sep 4, 2019 + * Author: i-bird + */ + +#ifndef SPARSEGRIDGPU_ITERATOR_HPP_ +#define SPARSEGRIDGPU_ITERATOR_HPP_ + +/*! \brief Element index contain a data chunk index and a point index + * + * \tparam SparseGridGpu type + * + */ +template<typename SparseGridGpu_type> +class sparse_grid_gpu_index +{ + //! chunk position id + int cnk_pos_id; + + //! data id + int data_id; + + //! SparseGridGpu used to add functionalities + const SparseGridGpu_type & sparseGrid; + +public: + + /*! \brief Constructor from SparseGridGpu + * + * + */ + inline sparse_grid_gpu_index(const SparseGridGpu_type & sparseGrid, int cnk_pos_id, int data_id) + :sparseGrid(sparseGrid),cnk_pos_id(cnk_pos_id),data_id(data_id) + {} + + /*! \brief Convert to a point this index + * + * \see toPointS + * + * \return a point unsigned long int + * + */ + inline Point<SparseGridGpu_type::dims,size_t> toPoint() const + { + auto indexCnk = sparseGrid.private_get_index_array().template get<0>(cnk_pos_id); + + auto coord = sparseGrid.getCoord(indexCnk*sparseGrid.getBlockSize() + data_id); + + Point<SparseGridGpu_type::dims,size_t> p; + + for (size_t i = 0; i < SparseGridGpu_type::dims ; i++) + { + p.get(i) = coord.get(i); + } + + return p; + } + + /*! \brief Get chunk position id + * + * Return the position of the chunk in the chunks array \see SparseGridGpu \see private_get_data_array + * + * \return Get chunk position id + * + */ + int get_cnk_pos_id() const + { + return cnk_pos_id; + } + + /*! \brief Get chunk local index (the returned index < getblockSize()) + * + * \return Get chunk position id + * + */ + int get_data_id() const + { + return data_id; + } +}; + +template<unsigned int dim, typename SparseGridType> +class SparseGridGpu_iterator +{ + //! actual chunk + unsigned int chunk; + + //! actual point inside the chunk + unsigned int pnt; + + grid_key_dx<dim> a_cnk; + + //! original SparseGrid + const SparseGridType & sparseGrid; + + //! array type for the indexes + typedef typename std::remove_reference<decltype(sparseGrid.private_get_index_array())>::type index_array_type; + + //! array type for the data + typedef typename std::remove_reference<decltype(sparseGrid.private_get_data_array())>::type data_array_type; + + //! vector of the chunk indexes + const decltype(sparseGrid.private_get_index_array()) & ids; + + //! vector containing each chunks datas + const decltype(sparseGrid.private_get_data_array()) & data; + + //Get the chunk type + typedef typename boost::mpl::at<typename data_array_type::value_type::type,boost::mpl::int_<0>>::type chunk_type; + + //Get the chunk type + typedef boost::mpl::int_<boost::mpl::size<typename data_array_type::value_type::type>::type::value-1> pMask; + + //! Select the first valid point chunk + void SelectValid() + { + while (pnt < chunk_type::size && data.template get<pMask::value>(chunk)[pnt] == 0) + { + pnt++; + } + + while (pnt == chunk_type::size && chunk < ids.size()) + { + chunk++; + pnt = 0; + while (pnt < chunk_type::size && data.template get<pMask::value>(chunk)[pnt] == 0) + { + pnt++; + } + } + } + +public: + + /*! \brief Constructor + * + * \param ids vector of ids + * \para data vector of chunk data + * + */ + inline SparseGridGpu_iterator(const SparseGridType & sparseGrid) + :chunk(0), + pnt(0), + sparseGrid(sparseGrid), + ids(sparseGrid.private_get_index_array()), + data(sparseGrid.private_get_data_array()) + { + SelectValid(); + } + + /*! \brief Check if there is the next element + * + * Check if there is the next element + * + * \return true if there is the next, false otherwise + * + */ + bool isNext() const + { + return chunk < ids.size(); + } + + /*! \brief Get the next element + * + * Get the next element + * + * \return the next grid_key + * + */ + inline SparseGridGpu_iterator<dim,SparseGridType> & operator++() + { + ++pnt; + + if (pnt >= chunk_type::size) + { + ++chunk; + pnt = 0; + } + + SelectValid(); + + return *this; + } + + /*! \brief return the actual point + * + * \return the index of the actual point + * + */ + inline sparse_grid_gpu_index<SparseGridType> get() + { + sparse_grid_gpu_index<SparseGridType> spgi(sparseGrid,chunk,pnt); + + return spgi; + } +}; + + +#endif /* SPARSEGRIDGPU_ITERATOR_HPP_ */ diff --git a/src/SparseGridGpu/Iterators/SparseGridGpu_iterator_sub.hpp b/src/SparseGridGpu/Iterators/SparseGridGpu_iterator_sub.hpp index a78491b4760e8f37221bc07d62ba638be67a8df7..6c17a3b06a97ba9e2eae6d42bea0b979cae542af 100644 --- a/src/SparseGridGpu/Iterators/SparseGridGpu_iterator_sub.hpp +++ b/src/SparseGridGpu/Iterators/SparseGridGpu_iterator_sub.hpp @@ -8,39 +8,220 @@ #ifndef SPARSEGRIDGPU_ITERATOR_SUB_HPP_ #define SPARSEGRIDGPU_ITERATOR_SUB_HPP_ -template<unsigned int dim/*, typename blockMap_type*/> +#include "SparseGridGpu_iterator.hpp" + +template<unsigned int dim, typename SparseGridType> class SparseGridGpu_iterator_sub { + //! actual chunk unsigned int chunk; - unsigned int pnt; - Box<dim,size_t> sub; + //! original SparseGrid + const SparseGridType * sparseGrid; + + //! chunk coordinates + grid_key_dx<dim,int> chunk_coord; + + //! array type for the indexes + typedef typename std::remove_reference<decltype(sparseGrid->private_get_index_array())>::type index_array_type; + + //! array type for the data + typedef typename std::remove_reference<decltype(sparseGrid->private_get_data_array())>::type data_array_type; + + //! vector of the chunk indexes + const typename std::remove_reference<decltype(sparseGrid->private_get_index_array())>::type * ids; + + //! vector containing each chunks datas + const typename std::remove_reference<decltype(sparseGrid->private_get_data_array())>::type * data; + + //Get the chunk type + typedef typename boost::mpl::at<typename data_array_type::value_type::type,boost::mpl::int_<0>>::type chunk_type; + + //Get the chunk type + typedef boost::mpl::int_<boost::mpl::size<typename data_array_type::value_type::type>::type::value-1> pMask; + + // subset in grid coordinates + Box<dim,int> sub_set; + + // subset in local chunk coordinates + Box<dim,int> res; + + // in chunk iterator + grid_key_dx_iterator_sub<dim> in_chunk_it; + + // chunk size + grid_sm<dim,void> chunk_sz; + + /*! \brief initializr the chunk interator + * + * + */ + void initialize_chunk_it() + { + // compute if the chunk intersect the start - stop box + chunk_coord = sparseGrid->getCoord(ids->template get<0>(chunk)*sparseGrid->getBlockSize()); + + Box<dim,int> box; + + for (int i = 0 ; i < dim ; i++) + { + box.setLow(i,chunk_coord.get(i)); + box.setHigh(i,chunk_coord.get(i) + sparseGrid->getBlockEdgeSize() - 1); + } -// blockMap_type & blockMap; + + if (sub_set.Intersect(box,res) == true) + { + // remove the offset + for (int i = 0 ; i < dim ; i++) + { + res.setLow(i,res.getLow(i) - chunk_coord.get(i)); + res.setHigh(i,res.getHigh(i) - chunk_coord.get(i)); + } + + in_chunk_it.reinitialize(grid_key_dx_iterator_sub<dim>(chunk_sz,res.getKP1(),res.getKP2())); + + while (in_chunk_it.isNext() == true && data->template get<pMask::value>(chunk)[chunk_sz.LinId(in_chunk_it.get())] == 0) + { + ++in_chunk_it; + } + } + } + + //! Select the first valid point chunk + void SelectValid() + { + while (in_chunk_it.isNext() == true && data->template get<pMask::value>(chunk)[chunk_sz.LinId(in_chunk_it.get())] == 0) + { + ++in_chunk_it; + } + + while (in_chunk_it.isNext() == false && chunk < ids->size()) + { + chunk++; + + initialize_chunk_it(); + } + } + + /*! \brief Initialize chunk_sz member + * + * + */ + void initialize_chunk_sz() + { + size_t sz[dim]; + + for (int i = 0 ; i < dim ; i++) + {sz[i] = sparseGrid->getBlockEdgeSize();} + + chunk_sz.setDimensions(sz); + } public: + /*! \brief Default constructor + * + */ + inline SparseGridGpu_iterator_sub() + :chunk(0), + sparseGrid(NULL), + ids(NULL), + data(NULL) + { + initialize_chunk_sz(); + } + + /*! \brief Constructor + * + * \param sparseGrid original sparse grid + * \param start starting point + * \param stop stop point + * + */ + inline SparseGridGpu_iterator_sub(const SparseGridType & sparseGrid,const grid_key_dx<dim> & start,const grid_key_dx<dim> & stop) + :chunk(0), + sparseGrid(&sparseGrid), + ids(&sparseGrid.private_get_index_array()), + data(&sparseGrid.private_get_data_array()) + { + for (int i = 0; i < dim ; i++) + { + sub_set.setLow(i,start.get(i)); + sub_set.setHigh(i,stop.get(i)); + } + + initialize_chunk_sz(); + in_chunk_it.invalidate(); + initialize_chunk_it(); + + SelectValid(); + } + /*! \brief Reinitialize the iterator * - * it re-initialize the iterator with the passed grid_key_dx_iterator_sub - * the actual position of the grid_key_dx_iterator_sub is ignored + * \param it_sub subiterator + * + */ + inline void reinitialize(const SparseGridGpu_iterator_sub<dim,SparseGridType> & it_sub) + { + this->operator=(it_sub); + } + + /*! \brief Check if there is the next element + * + * Check if there is the next element * - * \param g_s_it grid_key_dx_iterator_sub + * \return true if there is the next, false otherwise * */ - inline void reinitialize(const SparseGridGpu_iterator_sub & g_s_it) + bool isNext() const { - // Reinitialize the iterator - chunk = g_s_it.chunk; - pnt = g_s_it.pnt; + return chunk < ids->size(); } + /*! \brief Get the next element + * + * Get the next element + * + * \return the next grid_key + * + */ + inline SparseGridGpu_iterator_sub<dim,SparseGridType> & operator++() + { + ++in_chunk_it; -}; + SelectValid(); -class SparseGridGpu_iterator -{ + return *this; + } + + /*! \brief return the actual point + * + * \return the index of the actual point + * + */ + inline sparse_grid_gpu_index<SparseGridType> get() const + { + sparse_grid_gpu_index<SparseGridType> spgi(*sparseGrid,chunk,chunk_sz.LinId(in_chunk_it.get())); + return spgi; + } + + SparseGridGpu_iterator_sub<dim,SparseGridType> & operator=(const SparseGridGpu_iterator_sub<dim,SparseGridType> & it_sub) + { + chunk = it_sub.chunk; + sparseGrid = it_sub.sparseGrid; + chunk_coord = it_sub.chunk_coord; + ids = it_sub.ids; + data = it_sub.data; + sub_set = it_sub.sub_set; + res = it_sub.res; + in_chunk_it.reinitialize(it_sub.in_chunk_it); + chunk_sz = it_sub.chunk_sz; + + return *this; + } }; diff --git a/src/SparseGridGpu/SparseGridGpu.hpp b/src/SparseGridGpu/SparseGridGpu.hpp index c5da1021909a8ec8270fa86b711e5122cfffe363..20775c1263c5e158c98bb930e81cb24e4349648d 100644 --- a/src/SparseGridGpu/SparseGridGpu.hpp +++ b/src/SparseGridGpu/SparseGridGpu.hpp @@ -14,6 +14,7 @@ #include "Iterators/SparseGridGpu_iterator_sub.hpp" #include "Geometry/grid_zmb.hpp" #include "util/stat/common_statistics.hpp" +#include "Iterators/SparseGridGpu_iterator.hpp" #if defined(OPENFPM_DATA_ENABLE_IO_MODULE) || defined(PERFORMANCE_TEST) #include "VTKWriter/VTKWriter.hpp" #endif @@ -364,11 +365,14 @@ private: openfpm::vector_gpu<aggregate<indexT>> nn_blocks; + typedef SparseGridGpu<dim,AggregateT,blockEdgeSize,threadBlockSize,indexT,layout_base,linearizer> self; + protected: static constexpr unsigned int blockSize = BlockTypeOf<AggregateBlockT, 0>::size; typedef AggregateBlockT AggregateInternalT; public: + static constexpr unsigned int dims = dim; static constexpr unsigned int blockEdgeSize_ = blockEdgeSize; @@ -376,7 +380,7 @@ public: template<typename Tfunc> using layout_mfunc = memory_traits_inte<Tfunc>; - typedef grid_key_dx<dim, indexT> base_key; + typedef sparse_grid_gpu_index<self> base_key; /*! \brief return the size of the grid * @@ -385,7 +389,7 @@ public: */ inline size_t size() const { - return gridGeometry.size(); + return this->countExistingElements(); } /*! \brief This is a meta-function return which type of sub iterator a grid produce @@ -394,9 +398,9 @@ public: * */ template <typename stencil = no_stencil> - static SparseGridGpu_iterator_sub<dim> type_of_subiterator() + static SparseGridGpu_iterator_sub<dim,self> type_of_subiterator() { - return SparseGridGpu_iterator_sub<dim>(); + return SparseGridGpu_iterator_sub<dim,self>(); } /*! \brief This is a meta-function return which type of iterator a grid produce @@ -404,9 +408,9 @@ public: * \return the type of the sub-grid iterator * */ - static SparseGridGpu_iterator type_of_iterator() + static SparseGridGpu_iterator<dim,self> type_of_iterator() { - return SparseGridGpu_iterator(); + return SparseGridGpu_iterator<dim,self>(std::declval<self>()); } template<typename dim3T> @@ -647,6 +651,8 @@ private: public: + typedef AggregateT value_type; + SparseGridGpu() :stencilSupportRadius(1) {}; @@ -788,7 +794,7 @@ public: return gridGeometry.LinId(coord); } - inline grid_key_dx<dim, int> getCoord(size_t linId) + inline grid_key_dx<dim, int> getCoord(size_t linId) const { return gridGeometry.InvLinId(linId); } @@ -798,20 +804,64 @@ public: return gridSize.getGPUIterator(start,stop,n_thr); } - // Data management methods + /*! \brief Get an element using the point coordinates + * + * \tparam p property index + * + * \param coord point coordinates + * + * \return the element + * + */ template<unsigned int p, typename CoordT> - auto get(const CoordT &coord) const -> const ScalarTypeOf<AggregateBlockT, p> & + auto get(const grid_key_dx<dim,CoordT> & coord) const -> const ScalarTypeOf<AggregateBlockT, p> & { return BlockMapGpu<AggregateInternalT, threadBlockSize, indexT, layout_base>::template get<p>(gridGeometry.LinId(coord)); } + /*! \brief Get an element using sparse_grid_gpu_index (using this index it guarantee that the point exist) + * + * \tparam p property index + * + * \param element + * + * \return the element + * + */ + template<unsigned int p> + auto get(const sparse_grid_gpu_index<self> & coord) const -> const ScalarTypeOf<AggregateBlockT, p> & + { + return private_get_data_array().template get<p>(coord.get_cnk_pos_id())[coord.get_data_id()]; + } + + /*! \brief Get an element using sparse_grid_gpu_index (using this index it guarantee that the point exist) + * + * \tparam p property index + * + * \param element + * + * \return the element + * + */ + template<unsigned int p> + auto get(const sparse_grid_gpu_index<self> & coord) -> ScalarTypeOf<AggregateBlockT, p> & + { + return private_get_data_array().template get<p>(coord.get_cnk_pos_id())[coord.get_data_id()]; + } + template<unsigned int p, typename CoordT> auto insert(const CoordT &coord) -> ScalarTypeOf<AggregateBlockT, p> & { return BlockMapGpu<AggregateInternalT, threadBlockSize, indexT, layout_base>::template insert<p>(gridGeometry.LinId(coord)); } + /*! \Brief Before inser any element you have to call this function to initialize the insert buffer + * + * \param nBlock number of blocks the insert buffer has + * \param nSlot maximum number of insertion each thread block does + * + */ template<typename dim3T> void setGPUInsertBuffer(dim3T nBlock, dim3T nSlot) { @@ -904,7 +954,7 @@ public: threadGridSize, localThreadBlockSize,indexBuffer.toKernel(), this->toKernel(),nn_blocks.toKernel()); } - size_t countExistingElements() + size_t countExistingElements() const { // Here it is crucial to use "auto &" as the type, as we need to be sure to pass the reference to the actual buffers! auto & indexBuffer = BlockMapGpu<AggregateInternalT, threadBlockSize, indexT, layout_base>::blockMap.getIndexBuffer(); @@ -1143,6 +1193,8 @@ public: ::unsetBit(bitMask, PADDING_BIT); } + + /*! \brief Insert the point on host side and flush directly * * First you have to move everything on host with deviceToHost, insertFlush and than move to GPU again @@ -1195,6 +1247,46 @@ public: } } + /*! \brief Return a SparseGrid iterator + * + * \return a SparseGrid iterator + * + */ + decltype(self::type_of_iterator()) getIterator() const + { + return decltype(self::type_of_iterator())(*this); + } + + /*! \brief Return a SparseGrid iterator only on a sub-set of elements + * + * \return a SparseGrid iterator on a subset of elements + * + */ + decltype(self::type_of_subiterator()) getIterator(const grid_key_dx<dim> & start, const grid_key_dx<dim> & stop) const + { + return decltype(self::type_of_subiterator())(*this,start,stop); + } + + /*! \brief Return the index array of the blocks + * + * \return the index arrays of the blocks + * + */ + auto private_get_index_array() const -> decltype(BlockMapGpu<AggregateInternalT, threadBlockSize, indexT, layout_base>::blockMap.getIndexBuffer()) & + { + return BlockMapGpu<AggregateInternalT, threadBlockSize, indexT, layout_base>::blockMap.getIndexBuffer(); + } + + /*! \brief Return the index array of the blocks + * + * \return the index arrays of the blocks + * + */ + auto private_get_data_array() -> decltype(BlockMapGpu<AggregateInternalT, threadBlockSize, indexT, layout_base>::blockMap.getDataBuffer()) & + { + return BlockMapGpu<AggregateInternalT, threadBlockSize, indexT, layout_base>::blockMap.getDataBuffer(); + } + /*! \brief Return the index array of the blocks * * \return the index arrays of the blocks @@ -1210,7 +1302,7 @@ public: * \return the index arrays of the blocks * */ - auto private_get_data_array() -> decltype(BlockMapGpu<AggregateInternalT, threadBlockSize, indexT, layout_base>::blockMap.getDataBuffer()) + auto private_get_data_array() const -> decltype(BlockMapGpu<AggregateInternalT, threadBlockSize, indexT, layout_base>::blockMap.getDataBuffer()) & { return BlockMapGpu<AggregateInternalT, threadBlockSize, indexT, layout_base>::blockMap.getDataBuffer(); } diff --git a/src/SparseGridGpu/SparseGridGpu_ker.cuh b/src/SparseGridGpu/SparseGridGpu_ker.cuh index a9d168f069315674c04e76988a6cb3b42dfcb225..929fbee1bf0439bd0429842a21420d17b5f0b11c 100644 --- a/src/SparseGridGpu/SparseGridGpu_ker.cuh +++ b/src/SparseGridGpu/SparseGridGpu_ker.cuh @@ -82,6 +82,34 @@ public: } + /*! \brief Given a point to insert, return the block-id and offset of that point + * + * \param p Point the thread is processing + * \param blk Block id the point is falling into + * \param offset linearized index within the block + * + * \return true if the point is inactive + * + */ + template<typename ite_type> + inline __device__ bool getInsertBlockOffset(const ite_type & itd, const grid_key_dx<dim, int> & p, grid_key_dx<dim, int> & blk, int & offset) + { + int accu = 1; + offset = 0; + + bool active = true; + + for (int i = 0 ; i < dim ; i++) + { + blk.set_d(i,p.get(i) / getBlockEdgeSize()); + offset += (p.get(i) % getBlockEdgeSize()) * accu; + accu *= getBlockEdgeSize(); + active = active && (p.get(i) >= (itd.start.get(i) + itd.start_base.get(i))) && (p.get(i) <= itd.stop.get(i)); + } + + return active; + } + template<typename CoordT> inline __device__ size_t getBlockLinId(CoordT blockCoord) const { diff --git a/src/SparseGridGpu/performance/Stencil_performance_tests.cu b/src/SparseGridGpu/performance/Stencil_performance_tests.cu index f037187abecaff81cb35ba89134141630bc325c6..595fd4dcc233d12c99ed49b3716b7fd08c32b8a5 100644 --- a/src/SparseGridGpu/performance/Stencil_performance_tests.cu +++ b/src/SparseGridGpu/performance/Stencil_performance_tests.cu @@ -27,18 +27,6 @@ report_sparse_grid_tests report_sparsegrid_funcs; std::string suiteURI = "performance.SparseGridGpu"; std::set<std::string> testSet; -//// 2D params -//const unsigned int minSpatialEdgeSize2D = 512; -//const unsigned int maxSpatialEdgeSize2D = 8192; -//const unsigned int minBlockEdgeSize2D = 2; -//const unsigned int maxBlockEdgeSize2D = 32; -//// 3D params -//const unsigned int minSpatialEdgeSize3D = 64; -//const unsigned int maxSpatialEdgeSize3D = 512; -//const unsigned int minBlockEdgeSize3D = 2; -//const unsigned int maxBlockEdgeSize3D = 8; -//// - struct Fixture { Fixture() @@ -1146,6 +1134,7 @@ BOOST_AUTO_TEST_SUITE(SparseGridGpu_test) BOOST_AUTO_TEST_CASE(testConv3x3x3_gridScaling) { std::string testURI = suiteURI + ".device.conv3x3x3.sparse.N.3D.gridScaling"; + unsigned int counter = 0; launch_testConv3x3x3_perf(testURI, counter++); testSet.insert(testURI); diff --git a/src/SparseGridGpu/tests/SparseGridGpu_tests.cu b/src/SparseGridGpu/tests/SparseGridGpu_tests.cu index cd0f57bd9e7ed39ef21dbd4f9e842faca53db901..ec58f5d8cdcfe3223bba2139521c3bf277c5caad 100644 --- a/src/SparseGridGpu/tests/SparseGridGpu_tests.cu +++ b/src/SparseGridGpu/tests/SparseGridGpu_tests.cu @@ -160,15 +160,6 @@ __global__ void copyBlocksToOutput(SparseGridType sparseGrid, VectorOutType outp auto value = sparseGrid.template get<p>(coord); -// printf("copyBlocksToOutput: bDim=(%d,%d), bId=(%d,%d), tId=(%d,%d) : " -// "pos=%ld, coord={%d,%d}, value=%d\n", -// bDimX, bDimY, -// bIdX, bIdY, -// tIdX, tIdY, -// pos, -// x, y, -// static_cast<int>(value)); //debug - output.template get<p>(pos) = value; // Compiler avoid warning @@ -331,12 +322,9 @@ struct HeatStencil SparseGridT::getBlockEdgeSize() + 2 * supportRadius, dim>::value; __shared__ ScalarT enlargedBlock[enlargedBlockSize]; -// sparseGrid.loadBlock<p>(dataBlockLoad, enlargedBlock); -// sparseGrid.loadGhost<p>(dataBlockId, enlargedBlock); sparseGrid.loadGhostBlock<p_src>(dataBlockLoad,dataBlockIdPos,enlargedBlock); -// sparseGrid.loadGhost<p>(dataBlockId, nullptr, enlargedBlock); __syncthreads(); if (applyStencilHere) @@ -440,18 +428,6 @@ struct HeatStencil2 ScalarT laplacian = -2.0 * dim * cur; // The central part of the stencil for (int d = 0; d < dim; ++d) { -// const auto boundary = boundaryDirection[d]; -// ScalarT neighbourPlus = enlargedBlock[nPlusId[d]]; -// ScalarT neighbourMinus = enlargedBlock[nMinusId[d]]; -// if (boundary == 1) -// { -// neighbourPlus = *(nPlus[d]); -// } -// else if (boundary == -1) -// { -// neighbourMinus = *(nMinus[d]); -// } -// laplacian += neighbourMinus + neighbourPlus; laplacian += *(nMinus[d]) + *(nPlus[d]); } enlargedBlock[linId] = cur + dt * laplacian; @@ -518,7 +494,6 @@ BOOST_AUTO_TEST_CASE(testInsert) bool match = true; for (size_t i = 0; i < output.size(); i++) { -// auto expectedValue = (i < gridSize * blockSizeInsert) ? i : 666; auto coord = sparseGrid.getCoord(i); auto expectedValue = coord.get(0); @@ -572,17 +547,9 @@ BOOST_AUTO_TEST_CASE(testInsert3D) bool match = true; for (size_t i = 0; i < output.size(); i++) { -// auto expectedValue = (i < gridSize * blockSizeInsert) ? i : 666; auto coord = sparseGrid.getCoord(i); auto expectedValue = coord.get(0); -// std::cout << "sparseGrid(" << coord.get(0) << "," << coord.get(1) << "," << coord.get(2) << ") = " -// << sparseGrid.template get<0>(coord) -// << " == " -// << expectedValue -// << " == " -// << output.template get<0>(i) << " = output(" << i << ")" -// << std::endl; match &= output.template get<0>(i) == sparseGrid.template get<0>(coord); match &= output.template get<0>(i) == expectedValue; } @@ -751,9 +718,7 @@ BOOST_AUTO_TEST_CASE(testTagBoundaries2) } /////// -// sparseGrid.hostToDevice(); //just sync masks sparseGrid.deviceToHost(); //just sync masks -// sparseGrid.deviceToHost<0>(); sparseGrid.findNeighbours(); // Pre-compute the neighbours pos for each block! // Now tag the boundaries @@ -815,10 +780,6 @@ BOOST_AUTO_TEST_CASE(testStencilHeat) insertConstantValue<0> << < gridSize, blockSizeInsert >> > (sparseGrid.toKernel(), 0); sparseGrid.flush < smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE); -// sparseGrid.setGPUInsertBuffer(gridSize, blockSizeInsert); -// insertBoundaryValuesHeat<0> << < gridSize, blockSizeInsert >> > (sparseGrid.toKernel()); -// sparseGrid.flush < smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE); - sparseGrid.findNeighbours(); // Pre-compute the neighbours pos for each block! sparseGrid.tagBoundaries(); @@ -837,10 +798,6 @@ BOOST_AUTO_TEST_CASE(testStencilHeat) cudaDeviceSynchronize(); sparseGrid.applyStencils<HeatStencil<dim, 1, 0>>(STENCIL_MODE_INPLACE, 0.1); cudaDeviceSynchronize(); -// // DEBUG // -// sparseGrid.deviceToHost<0,1>(); -// sparseGrid.write("test_heat_stencil"+std::to_string(iter)+".vtk"); -// //// } sparseGrid.deviceToHost<0,1>(); @@ -901,10 +858,6 @@ BOOST_AUTO_TEST_CASE(testStencilHeatInsert) CUDA_LAUNCH_DIM3((insertBoundaryValuesHeat<0>),gridSize, blockSizeInsert,sparseGrid.toKernel()); sparseGrid.flush < smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE); -// sparseGrid.setGPUInsertBuffer(gridSize, blockSizeInsert); -// CUDA_LAUNCH_DIM3((insertBoundaryValuesHeat<1>),gridSize, blockSizeInsert,sparseGrid.toKernel()); -// sparseGrid.flush < smax_< 1 >> (ctx, flush_type::FLUSH_ON_DEVICE); - sparseGrid.findNeighbours(); // Pre-compute the neighbours pos for each block! // // Now tag the boundaries @@ -918,7 +871,6 @@ BOOST_AUTO_TEST_CASE(testStencilHeatInsert) for (unsigned int iter=0; iter<maxIter; ++iter) { sparseGrid.applyStencils<HeatStencil<dim, 0,0>>(STENCIL_MODE_INSERT, 0.1); -// sparseGrid.applyStencils<HeatStencil<dim, 1,0>>(STENCIL_MODE_INSERT, 0.1); } @@ -988,6 +940,18 @@ BOOST_AUTO_TEST_CASE(testFlushInsert) sparseGrid.insertFlush<0>(grid_key_dx<3>({36,86,27})) = 9.0; sparseGrid.insertFlush<0>(grid_key_dx<3>({34,36,7})) = 10.0; + ////// Add points in the same blocks + + sparseGrid.insertFlush<0>(grid_key_dx<3>({4,6,7})) = 2.0; + sparseGrid.insertFlush<0>(grid_key_dx<3>({12,16,17})) = 3.0; + sparseGrid.insertFlush<0>(grid_key_dx<3>({12,46,27})) = 4.0; + sparseGrid.insertFlush<0>(grid_key_dx<3>({35,63,11})) = 5.0; + sparseGrid.insertFlush<0>(grid_key_dx<3>({38,96,47})) = 6.0; + sparseGrid.insertFlush<0>(grid_key_dx<3>({131,56,37})) = 7.0; + sparseGrid.insertFlush<0>(grid_key_dx<3>({132,76,17})) = 8.0; + sparseGrid.insertFlush<0>(grid_key_dx<3>({37,86,27})) = 9.0; + sparseGrid.insertFlush<0>(grid_key_dx<3>({35,36,7})) = 10.0; + BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(grid_key_dx<3>({3,6,7})),2.0); BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(grid_key_dx<3>({13,16,17})),3.0); BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(grid_key_dx<3>({13,46,27})),4.0); @@ -998,6 +962,16 @@ BOOST_AUTO_TEST_CASE(testFlushInsert) BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(grid_key_dx<3>({36,86,27})),9.0); BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(grid_key_dx<3>({34,36,7})),10.0); + BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(grid_key_dx<3>({4,6,7})),2.0); + BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(grid_key_dx<3>({12,16,17})),3.0); + BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(grid_key_dx<3>({12,46,27})),4.0); + BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(grid_key_dx<3>({35,63,11})),5.0); + BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(grid_key_dx<3>({38,96,47})),6.0); + BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(grid_key_dx<3>({131,56,37})),7.0); + BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(grid_key_dx<3>({132,76,17})),8.0); + BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(grid_key_dx<3>({37,86,27})),9.0); + BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(grid_key_dx<3>({35,36,7})),10.0); + sparseGrid.template hostToDevice<0>(); // Check on device I can get information @@ -1198,122 +1172,221 @@ BOOST_AUTO_TEST_CASE(testSparseGridGpuOutput) sparseGrid.write("SparseGridGPU_output.vtk"); } - BOOST_AUTO_TEST_CASE(testSparseGridGpuOutput3D) - { - constexpr unsigned int dim = 3; - constexpr unsigned int blockEdgeSize = 4; - typedef aggregate<float> AggregateT; +BOOST_AUTO_TEST_CASE(testSparseGridGpuOutput3D) +{ + constexpr unsigned int dim = 3; + constexpr unsigned int blockEdgeSize = 4; + typedef aggregate<float> AggregateT; - size_t sz[3] = {512,512,512}; + size_t sz[3] = {512,512,512}; // dim3 gridSize(128,128,128); - dim3 gridSize(32,32,32); + dim3 gridSize(32,32,32); - grid_smb<dim, blockEdgeSize> blockGeometry(sz); - SparseGridGpu<dim, AggregateT, blockEdgeSize, 64, long int> sparseGrid(blockGeometry); - mgpu::ofp_context_t ctx; - sparseGrid.template setBackgroundValue<0>(0); + grid_smb<dim, blockEdgeSize> blockGeometry(sz); + SparseGridGpu<dim, AggregateT, blockEdgeSize, 64, long int> sparseGrid(blockGeometry); + mgpu::ofp_context_t ctx; + sparseGrid.template setBackgroundValue<0>(0); - grid_key_dx<3,int> start({256,256,256}); + grid_key_dx<3,int> start({256,256,256}); - // Insert values on the grid - sparseGrid.setGPUInsertBuffer(gridSize,dim3(1)); - CUDA_LAUNCH_DIM3((insertSphere3D<0>), - gridSize, dim3(blockEdgeSize*blockEdgeSize*blockEdgeSize,1,1), - sparseGrid.toKernel(), start, 64, 56, 1); - sparseGrid.flush < smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE); + // Insert values on the grid + sparseGrid.setGPUInsertBuffer(gridSize,dim3(1)); + CUDA_LAUNCH_DIM3((insertSphere3D<0>), + gridSize, dim3(blockEdgeSize*blockEdgeSize*blockEdgeSize,1,1), + sparseGrid.toKernel(), start, 64, 56, 1); + sparseGrid.flush < smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE); - cudaDeviceSynchronize(); + cudaDeviceSynchronize(); - sparseGrid.findNeighbours(); // Pre-compute the neighbours pos for each block! - sparseGrid.tagBoundaries(); + sparseGrid.findNeighbours(); // Pre-compute the neighbours pos for each block! + sparseGrid.tagBoundaries(); - sparseGrid.template applyBoundaryStencils<BoundaryStencilSetX<dim,0,0>>(); + sparseGrid.template applyBoundaryStencils<BoundaryStencilSetX<dim,0,0>>(); - cudaDeviceSynchronize(); + cudaDeviceSynchronize(); - sparseGrid.template deviceToHost<0>(); + sparseGrid.template deviceToHost<0>(); - sparseGrid.write("SparseGridGPU_output3D.vtk"); - } + sparseGrid.write("SparseGridGPU_output3D.vtk"); - BOOST_AUTO_TEST_CASE(testSparseGridGpuOutput3DHeatStencil) - { - constexpr unsigned int dim = 3; - constexpr unsigned int blockEdgeSize = 4; - typedef aggregate<float, float> AggregateT; + bool test = compare("SparseGridGPU_output3D.vtk","test_data/SparseGridGPU_output3D_test.vtk"); + BOOST_REQUIRE_EQUAL(true,test); +} + +BOOST_AUTO_TEST_CASE(testSparseGridGpuOutput3DHeatStencil) +{ + constexpr unsigned int dim = 3; + constexpr unsigned int blockEdgeSize = 4; + typedef aggregate<float, float> AggregateT; - size_t sz[3] = {512,512,512}; + size_t sz[3] = {512,512,512}; // dim3 gridSize(128,128,128); - dim3 gridSize(32,32,32); - - grid_smb<dim, blockEdgeSize> blockGeometry(sz); - SparseGridGpu<dim, AggregateT, blockEdgeSize, 64, long int> sparseGrid(blockGeometry); - mgpu::ofp_context_t ctx; - sparseGrid.template setBackgroundValue<0>(0); - - ///// Insert sparse content, a set of 3 hollow spheres ///// - // Sphere 1 - grid_key_dx<3,int> start1({256,256,256}); - sparseGrid.setGPUInsertBuffer(gridSize,dim3(1)); - CUDA_LAUNCH_DIM3((insertSphere3D<0>), - gridSize, dim3(blockEdgeSize*blockEdgeSize*blockEdgeSize,1,1), - sparseGrid.toKernel(), start1, 64, 32, 1); - cudaDeviceSynchronize(); - sparseGrid.flush < smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE); - cudaDeviceSynchronize(); + dim3 gridSize(32,32,32); - // Sphere 2 - grid_key_dx<3,int> start2({192,192,192}); - sparseGrid.setGPUInsertBuffer(gridSize,dim3(1)); - CUDA_LAUNCH_DIM3((insertSphere3D<0>), - gridSize, dim3(blockEdgeSize*blockEdgeSize*blockEdgeSize,1,1), - sparseGrid.toKernel(), start2, 64, 44, 1); - cudaDeviceSynchronize(); - sparseGrid.flush < smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE); - cudaDeviceSynchronize(); + grid_smb<dim, blockEdgeSize> blockGeometry(sz); + SparseGridGpu<dim, AggregateT, blockEdgeSize, 64, long int> sparseGrid(blockGeometry); + mgpu::ofp_context_t ctx; + sparseGrid.template setBackgroundValue<0>(0); - // Sphere 3 - grid_key_dx<3,int> start3({340,192,192}); - sparseGrid.setGPUInsertBuffer(gridSize,dim3(1)); - CUDA_LAUNCH_DIM3((insertSphere3D<0>), - gridSize, dim3(blockEdgeSize*blockEdgeSize*blockEdgeSize,1,1), - sparseGrid.toKernel(), start3, 20, 15, 1); - cudaDeviceSynchronize(); - sparseGrid.flush < smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE); - cudaDeviceSynchronize(); - ///// ///// + ///// Insert sparse content, a set of 3 hollow spheres ///// + // Sphere 1 + grid_key_dx<3,int> start1({256,256,256}); + sparseGrid.setGPUInsertBuffer(gridSize,dim3(1)); + CUDA_LAUNCH_DIM3((insertSphere3D<0>), + gridSize, dim3(blockEdgeSize*blockEdgeSize*blockEdgeSize,1,1), + sparseGrid.toKernel(), start1, 64, 32, 1); + cudaDeviceSynchronize(); + sparseGrid.flush < smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE); + cudaDeviceSynchronize(); - sparseGrid.findNeighbours(); // Pre-compute the neighbours pos for each block! - sparseGrid.tagBoundaries(); + // Sphere 2 + grid_key_dx<3,int> start2({192,192,192}); + sparseGrid.setGPUInsertBuffer(gridSize,dim3(1)); + CUDA_LAUNCH_DIM3((insertSphere3D<0>), + gridSize, dim3(blockEdgeSize*blockEdgeSize*blockEdgeSize,1,1), + sparseGrid.toKernel(), start2, 64, 44, 1); + cudaDeviceSynchronize(); + sparseGrid.flush < smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE); + cudaDeviceSynchronize(); - // Now apply some boundary conditions - sparseGrid.template applyBoundaryStencils<BoundaryStencilSetXRescaled<dim,0,0>>( - 192, 384, - 0.0, 10.0); - cudaDeviceSynchronize(); + // Sphere 3 + grid_key_dx<3,int> start3({340,192,192}); + sparseGrid.setGPUInsertBuffer(gridSize,dim3(1)); + CUDA_LAUNCH_DIM3((insertSphere3D<0>), + gridSize, dim3(blockEdgeSize*blockEdgeSize*blockEdgeSize,1,1), + sparseGrid.toKernel(), start3, 20, 15, 1); + cudaDeviceSynchronize(); + sparseGrid.flush < smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE); + cudaDeviceSynchronize(); + ///// ///// - // Now apply the laplacian operator + sparseGrid.findNeighbours(); // Pre-compute the neighbours pos for each block! + sparseGrid.tagBoundaries(); + + // Now apply some boundary conditions + sparseGrid.template applyBoundaryStencils<BoundaryStencilSetXRescaled<dim,0,0>>( + 192, 384, + 0.0, 10.0); + cudaDeviceSynchronize(); + + // Now apply the laplacian operator // const unsigned int maxIter = 1000; - const unsigned int maxIter = 100; - for (unsigned int iter=0; iter<maxIter; ++iter) - { - for (int innerIter=0; innerIter<10; ++innerIter) - { - sparseGrid.applyStencils<HeatStencil<dim, 0, 1>>(STENCIL_MODE_INPLACE, 0.1); - cudaDeviceSynchronize(); - sparseGrid.applyStencils<HeatStencil<dim, 1, 0>>(STENCIL_MODE_INPLACE, 0.1); - cudaDeviceSynchronize(); - } - // DEBUG // - sparseGrid.deviceToHost<0,1>(); - sparseGrid.write("SparseGridGPU_output3DHeatStencil_"+std::to_string(iter)+".vtk"); - //// - } + const unsigned int maxIter = 100; + for (unsigned int iter=0; iter<maxIter; ++iter) + { + for (int innerIter=0; innerIter<10; ++innerIter) + { + sparseGrid.applyStencils<HeatStencil<dim, 0, 1>>(STENCIL_MODE_INPLACE, 0.1); + cudaDeviceSynchronize(); + sparseGrid.applyStencils<HeatStencil<dim, 1, 0>>(STENCIL_MODE_INPLACE, 0.1); + cudaDeviceSynchronize(); + } + } - sparseGrid.template deviceToHost<0,1>(); + sparseGrid.deviceToHost<0,1>(); + sparseGrid.write("SparseGridGPU_output3DHeatStencil.vtk"); +} - sparseGrid.write("SparseGridGPU_output3DHeatStencil.vtk"); - } +BOOST_AUTO_TEST_CASE(test_sparse_grid_iterator_host) +{ + constexpr unsigned int dim = 3; + constexpr unsigned int blockEdgeSize = 4; + typedef aggregate<float, float> AggregateT; + + size_t sz[3] = {512,512,512}; +// dim3 gridSize(128,128,128); + dim3 gridSize(32,32,32); + + grid_smb<dim, blockEdgeSize> blockGeometry(sz); + SparseGridGpu<dim, AggregateT, blockEdgeSize, 64, long int> sparseGrid(blockGeometry); + mgpu::ofp_context_t ctx; + sparseGrid.template setBackgroundValue<0>(0); + + ///// Insert sparse content, a set of 3 hollow spheres ///// + // Sphere 1 + grid_key_dx<3,int> start1({256,256,256}); + sparseGrid.setGPUInsertBuffer(gridSize,dim3(1)); + CUDA_LAUNCH_DIM3((insertSphere3D<0>), + gridSize, dim3(blockEdgeSize*blockEdgeSize*blockEdgeSize,1,1), + sparseGrid.toKernel(), start1, 64, 32, 1); + + sparseGrid.flush < smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE); + + sparseGrid.template deviceToHost<0>(); + + bool match = true; + + int count = 0; + + auto it = sparseGrid.getIterator(); + + while (it.isNext()) + { + auto key = it.get(); + + match &= sparseGrid.template get<0>(key) == 1.0; + //unsigned char bl = sparseGrid.template get<2>(key); + + count++; + + ++it; + } + + BOOST_REQUIRE_EQUAL(sparseGrid.countExistingElements(),count); + BOOST_REQUIRE_EQUAL(match,true); +} + +BOOST_AUTO_TEST_CASE(test_sparse_grid_iterator_sub_host) +{ + constexpr unsigned int dim = 3; + constexpr unsigned int blockEdgeSize = 4; + typedef aggregate<float, float> AggregateT; + + size_t sz[3] = {768,768,768}; + dim3 gridSize(32,32,32); + + grid_smb<dim, blockEdgeSize> blockGeometry(sz); + SparseGridGpu<dim, AggregateT, blockEdgeSize, 64, long int> sparseGrid(blockGeometry); + mgpu::ofp_context_t ctx; + sparseGrid.template setBackgroundValue<0>(0); + + ///// Insert sparse content, a set of 3 hollow spheres ///// + // Sphere 1 + grid_key_dx<3,int> start1({256,256,256}); + sparseGrid.setGPUInsertBuffer(gridSize,dim3(1)); + CUDA_LAUNCH_DIM3((insertSphere3D<0>), + gridSize, dim3(blockEdgeSize*blockEdgeSize*blockEdgeSize,1,1), + sparseGrid.toKernel(), start1, 32, 0, 1); + + sparseGrid.flush < smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE); + + sparseGrid.template deviceToHost<0>(); + + bool match = true; + + int count = 0; + + grid_key_dx<3> start({303,303,303}); + grid_key_dx<3> stop({337,337,337}); + + auto it = sparseGrid.getIterator(start,stop); + + while (it.isNext()) + { + auto key = it.get(); + + match &= sparseGrid.template get<0>(key) == 1.0; + + sparseGrid.template get<0>(key) = 5.0; + + count++; + + ++it; + } + + BOOST_REQUIRE_EQUAL(match,true); + BOOST_REQUIRE_EQUAL(count,42875); +} #endif diff --git a/src/SparseGridGpu/tests/utils/SparseGridGpu_testKernels.cuh b/src/SparseGridGpu/tests/utils/SparseGridGpu_testKernels.cuh index 90446c5b687d368aa7d15f0e0d3039a3d914b7be..43fee90b244a68333853f823678bcb551687ece3 100644 --- a/src/SparseGridGpu/tests/utils/SparseGridGpu_testKernels.cuh +++ b/src/SparseGridGpu/tests/utils/SparseGridGpu_testKernels.cuh @@ -157,13 +157,13 @@ __global__ void insertSphere3D(SparseGridType sparseGrid, grid_key_dx<3,int> sta grid_key_dx<3,int> keyg; keyg = sparseGrid.getGlobalCoord(blk,offset); - const long long x = keyg.get(0) - (start.get(0) + gridDim.x / 2 * SparseGridType::blockEdgeSize_); - const long long y = keyg.get(1) - (start.get(1) + gridDim.y / 2 * SparseGridType::blockEdgeSize_); - const long long z = keyg.get(2) - (start.get(2) + gridDim.z / 2 * SparseGridType::blockEdgeSize_); + const long int x = (long int)keyg.get(0) - (start.get(0) + gridDim.x / 2 * SparseGridType::blockEdgeSize_); + const long int y = (long int)keyg.get(1) - (start.get(1) + gridDim.y / 2 * SparseGridType::blockEdgeSize_); + const long int z = (long int)keyg.get(2) - (start.get(2) + gridDim.z / 2 * SparseGridType::blockEdgeSize_); float radius = sqrt((float) (x*x + y*y + z*z)); - bool is_active = radius < r1 && radius > r2; + bool is_active = radius < r1 && radius >= r2; if (is_active == true) { @@ -179,7 +179,6 @@ __global__ void insertSphere3D(SparseGridType sparseGrid, grid_key_dx<3,int> sta if ( is_active == true) { ec.template get<p>()[offset] = value; -// ec.template get<p>()[offset] = x; BlockMapGpu_ker<>::setExist(ec.template get<pMask>()[offset]); } } @@ -215,9 +214,9 @@ __global__ void insertSphere3D_radius(SparseGridType sparseGrid, grid_key_dx<3,i grid_key_dx<3,int> keyg; keyg = sparseGrid.getGlobalCoord(blk,offset); - const long long x = keyg.get(0) - (start.get(0) + gridDim.x / 2 * SparseGridType::blockEdgeSize_); - const long long y = keyg.get(1) - (start.get(1) + gridDim.y / 2 * SparseGridType::blockEdgeSize_); - const long long z = keyg.get(2) - (start.get(2) + gridDim.z / 2 * SparseGridType::blockEdgeSize_); + const long int x = (long int)keyg.get(0) - (start.get(0) + gridDim.x / 2 * SparseGridType::blockEdgeSize_); + const long int y = (long int)keyg.get(1) - (start.get(1) + gridDim.y / 2 * SparseGridType::blockEdgeSize_); + const long int z = (long int)keyg.get(2) - (start.get(2) + gridDim.z / 2 * SparseGridType::blockEdgeSize_); float radius = sqrt((float) (x*x + y*y + z*z)); diff --git a/src/Vector/map_vector_sparse.hpp b/src/Vector/map_vector_sparse.hpp index 36a88c89d5fcb749324008370651b45f6f4054db..b610e816ef70e33f06677fe5d48342c424e41914 100644 --- a/src/Vector/map_vector_sparse.hpp +++ b/src/Vector/map_vector_sparse.hpp @@ -1447,6 +1447,13 @@ namespace openfpm } public: + + vector_sparse() + :max_ele(0) + { + vct_data.resize(1); + } + /*! \brief Get the indices buffer * * \return the reference to the indices buffer @@ -1464,13 +1471,24 @@ namespace openfpm { return vct_data; } - public: - vector_sparse() - :max_ele(0) - { - vct_data.resize(1); - } + /*! \brief Get the indices buffer + * + * \return the reference to the indices buffer + */ + auto getIndexBuffer() const -> const decltype(vct_index)& + { + return vct_index; + } + + /*! \brief Get the data buffer + * + * \return the reference to the data buffer + */ + auto getDataBuffer() const -> const decltype(vct_data)& + { + return vct_data; + } /*! \brief Get an element of the vector * @@ -1598,7 +1616,7 @@ namespace openfpm if (v == ele) { // block exist - return vct_data.template get<p>(ele); + return vct_data.template get<p>(di); } // It does not exist, we create it di contain the index where we have to create the new block @@ -1623,7 +1641,7 @@ namespace openfpm if (v == ele) { // block exist - return vct_data.get(ele); + return vct_data.get(di); } // It does not exist, we create it di contain the index where we have to create the new block diff --git a/test_data/SparseGridGPU_output3D_test.vtk b/test_data/SparseGridGPU_output3D_test.vtk new file mode 100644 index 0000000000000000000000000000000000000000..e9211f38b7e3669cf668f9f996890a758c092f2e Binary files /dev/null and b/test_data/SparseGridGPU_output3D_test.vtk differ