From 6e0ef634e0bf26768016c5b587517b2ea5137b1c Mon Sep 17 00:00:00 2001 From: Incardona Pietro <incardon@mpi-cbg.de> Date: Wed, 8 Jun 2022 23:35:22 +0200 Subject: [PATCH] Adding conv_3 --- src/Grid/comb.hpp | 24 ++--- src/Grid/grid_sm.hpp | 4 +- src/SparseGridGpu/SparseGridGpu.hpp | 22 ++++ src/SparseGridGpu/SparseGridGpu_kernels.cuh | 106 ++++++++++++++++++++ 4 files changed, 142 insertions(+), 14 deletions(-) diff --git a/src/Grid/comb.hpp b/src/Grid/comb.hpp index 49d4183b..405d33a3 100644 --- a/src/Grid/comb.hpp +++ b/src/Grid/comb.hpp @@ -34,7 +34,7 @@ template<unsigned int dim> struct comb { //! Array that store the combination - char c[dim]; + signed char c[dim]; /*! \brief check if it is a valid combination * @@ -105,7 +105,7 @@ struct comb * \return Result combination * */ - inline comb<dim> operator&(char c_) + inline comb<dim> operator&(signed char c_) { comb<dim> ret; @@ -246,7 +246,7 @@ struct comb * */ - inline char operator[](int i) const + inline signed char operator[](int i) const { return c[i]; } @@ -257,7 +257,7 @@ struct comb * */ - inline char * getComb() + inline signed char * getComb() { return c; } @@ -268,7 +268,7 @@ struct comb * */ - inline const char * getComb() const + inline const signed char * getComb() const { return c; } @@ -282,7 +282,7 @@ struct comb * \return value of the i index * */ - inline char value(int i) const + inline signed char value(int i) const { return c[i]; } @@ -314,10 +314,10 @@ struct comb * \param c list of numbers * */ - comb(std::initializer_list<char> c) + comb(std::initializer_list<signed char> c) { size_t i = 0; - for(char x : c) + for(signed char x : c) {this->c[c.size() - i - 1] = x;i++;} } @@ -397,7 +397,7 @@ template<> struct comb<0> { //! FIX - char c[0]; + signed char c[0]; /*! \brief check if it is a valid combination * @@ -467,7 +467,7 @@ struct comb<0> * */ - inline char operator[](int i) + inline signed char operator[](int i) { return 0; } @@ -478,7 +478,7 @@ struct comb<0> * */ - inline char * getComb() + inline signed char * getComb() { return c; } @@ -492,7 +492,7 @@ struct comb<0> * \return value of the i index * */ - inline char value(int i) const + inline signed char value(int i) const { return c[i]; } diff --git a/src/Grid/grid_sm.hpp b/src/Grid/grid_sm.hpp index 4680783c..e4511d29 100644 --- a/src/Grid/grid_sm.hpp +++ b/src/Grid/grid_sm.hpp @@ -431,7 +431,7 @@ public: */ template<typename check=NoCheck, typename ids_type> - inline mem_id LinId(const grid_key_dx<N,ids_type> & gk, const char sum_id[N]) const + inline mem_id LinId(const grid_key_dx<N,ids_type> & gk, const signed char sum_id[N]) const { mem_id lid; @@ -467,7 +467,7 @@ public: */ template<typename check=NoCheck,typename ids_type> - inline mem_id LinId(const grid_key_dx<N,ids_type> & gk, const char sum_id[N], const size_t (&bc)[N]) const + inline mem_id LinId(const grid_key_dx<N,ids_type> & gk, const signed char sum_id[N], const size_t (&bc)[N]) const { mem_id lid; diff --git a/src/SparseGridGpu/SparseGridGpu.hpp b/src/SparseGridGpu/SparseGridGpu.hpp index 9c6a23db..00a31a27 100644 --- a/src/SparseGridGpu/SparseGridGpu.hpp +++ b/src/SparseGridGpu/SparseGridGpu.hpp @@ -2628,6 +2628,28 @@ public: applyStencils< SparseGridGpuKernels::stencil_func_conv2_b<dim,nLoop,prop_src1,prop_src2,prop_dst1,prop_dst2,stencil_size> >(box,STENCIL_MODE_INPLACE,func, args ...); } + /*! \brief Apply a free type convolution using blocks + * + * + */ + template<unsigned int prop_src1, unsigned int prop_src2, unsigned int prop_src3, + unsigned int prop_dst1 , unsigned int prop_dst2, unsigned int prop_dst3, + unsigned int stencil_size, typename lambda_f, typename ... ArgsT > + void conv3_b(grid_key_dx<dim> start, grid_key_dx<dim> stop , lambda_f func, ArgsT ... args) + { + Box<dim,int> box; + + for (int i = 0 ; i < dim ; i++) + { + box.setLow(i,start.get(i)); + box.setHigh(i,stop.get(i)); + } + + constexpr unsigned int nLoop = UIntDivCeil<(IntPow<blockEdgeSize + 2, dim>::value), (blockSize)>::value; + + applyStencils< SparseGridGpuKernels::stencil_func_conv3_b<dim,nLoop,prop_src1,prop_src2,prop_src3,prop_dst1,prop_dst2,prop_dst3,stencil_size> >(box,STENCIL_MODE_INPLACE,func, args ...); + } + /*! \brief Apply a free type convolution using blocks * * diff --git a/src/SparseGridGpu/SparseGridGpu_kernels.cuh b/src/SparseGridGpu/SparseGridGpu_kernels.cuh index 683dc776..8486f53c 100644 --- a/src/SparseGridGpu/SparseGridGpu_kernels.cuh +++ b/src/SparseGridGpu/SparseGridGpu_kernels.cuh @@ -158,6 +158,19 @@ namespace SparseGridGpuKernels { f(res1,res2,cpb1,cpb2,DataBlockLoad,offset,coord[0],coord[1],coord[2]); } + + template<typename ScalarT, typename coordType, typename CpBlockType, typename DataBlockWrapperT, typename lambda_func, typename ... ArgsT> + __device__ static inline void stencil3_block(ScalarT & res1, ScalarT & res2, ScalarT & res3, coordType & coord , + CpBlockType & cpb1, + CpBlockType & cpb2, + CpBlockType & cpb3, + DataBlockWrapperT & DataBlockLoad, + int offset, + lambda_func f, + ArgsT ... args) + { + f(res1,res2,res3,cpb1,cpb2,cpb3,DataBlockLoad,offset,coord[0],coord[1],coord[2]); + } }; template<> @@ -204,6 +217,19 @@ namespace SparseGridGpuKernels { f(res1,res2,cpb1,cpb2,DataBlockLoad,offset,coord[0],coord[1]); } + + template<typename ScalarT, typename coordType, typename CpBlockType, typename DataBlockWrapperT, typename lambda_func, typename ... ArgsT> + __device__ static inline void stencil3_block(ScalarT & res1, ScalarT & res2, ScalarT & res3, coordType & coord , + CpBlockType & cpb1, + CpBlockType & cpb2, + CpBlockType & cpb3, + DataBlockWrapperT & DataBlockLoad, + int offset, + lambda_func f, + ArgsT ... args) + { + f(res1,res2,res3,cpb1,cpb2,cpb3,DataBlockLoad,offset,coord[0],coord[1]); + } }; template<unsigned int dim, unsigned int n_loop, unsigned int p_src, unsigned int p_dst, unsigned int stencil_size> @@ -424,6 +450,86 @@ namespace SparseGridGpuKernels } }; + template<unsigned int dim, unsigned int n_loop, + unsigned int p_src1, unsigned int p_src2, unsigned int p_src3, + unsigned int p_dst1, unsigned int p_dst2, unsigned int p_dst3, + unsigned int stencil_size> + struct stencil_func_conv3_b + { + typedef NNStar<dim> stencil_type; + + static constexpr unsigned int supportRadius = stencil_size; + + template<typename SparseGridT, typename DataBlockWrapperT, typename lambda_func, typename ... ArgT> + static inline __device__ void stencil( + SparseGridT & sparseGrid, + const unsigned int dataBlockId, + openfpm::sparse_index<unsigned int> dataBlockIdPos, + unsigned int offset, + grid_key_dx<dim, int> & pointCoord, + DataBlockWrapperT & dataBlockLoad, + DataBlockWrapperT & dataBlockStore, + unsigned char curMask, + lambda_func f, + ArgT ... args) + { + typedef typename SparseGridT::AggregateBlockType AggregateT; + typedef ScalarTypeOf<AggregateT, p_src1> ScalarT1; + typedef ScalarTypeOf<AggregateT, p_src1> ScalarT2; + typedef ScalarTypeOf<AggregateT, p_src1> ScalarT3; + + constexpr unsigned int enlargedBlockSize = IntPow< + SparseGridT::getBlockEdgeSize() + 2 * supportRadius, dim>::value; + + __shared__ ScalarT1 enlargedBlock1[enlargedBlockSize]; + __shared__ ScalarT2 enlargedBlock2[enlargedBlockSize]; + __shared__ ScalarT3 enlargedBlock3[enlargedBlockSize]; + + // fill with background + + typedef typename vmpl_create_constant<dim,SparseGridT::blockEdgeSize_>::type block_sizes; + typedef typename vmpl_sum_constant<2*stencil_size,block_sizes>::type vmpl_sizes; + + cp_block<ScalarT1,stencil_size,vmpl_sizes,dim> cpb1(enlargedBlock1); + cp_block<ScalarT2,stencil_size,vmpl_sizes,dim> cpb2(enlargedBlock2); + cp_block<ScalarT3,stencil_size,vmpl_sizes,dim> cpb3(enlargedBlock3); + + sparseGrid.template loadGhostBlock<p_src1>(dataBlockLoad, dataBlockIdPos, enlargedBlock1); + sparseGrid.template loadGhostBlock<p_src2>(dataBlockLoad, dataBlockIdPos, enlargedBlock2); + sparseGrid.template loadGhostBlock<p_src3>(dataBlockLoad, dataBlockIdPos, enlargedBlock3); + + __syncthreads(); + + ScalarT1 res1 = 0; + ScalarT2 res2 = 0; + ScalarT3 res3 = 0; + + if ((curMask & mask_sparse::EXIST) && !(curMask & mask_sparse::PADDING)) + { + int coord[dim]; + + unsigned int linIdTmp = offset; + for (unsigned int d = 0; d < dim; ++d) + { + coord[d] = linIdTmp % SparseGridT::blockEdgeSize_; + linIdTmp /= SparseGridT::blockEdgeSize_; + } + + stencil_conv_func_impl<dim>::stencil3_block(res1,res2,res3,coord,cpb1,cpb2,cpb3,dataBlockLoad,offset,f,args...); + + dataBlockStore.template get<p_dst1>()[offset] = res1; + dataBlockStore.template get<p_dst2>()[offset] = res2; + dataBlockStore.template get<p_dst3>()[offset] = res3; + } + } + + template <typename SparseGridT, typename CtxT> + static inline void __host__ flush(SparseGridT & sparseGrid, CtxT & ctx) + { + // No flush + } + }; + template<unsigned int dim, unsigned int n_loop, unsigned int p_src1, unsigned int p_src2, unsigned int p_dst1, unsigned int p_dst2, unsigned int stencil_size> struct stencil_func_conv2 { -- GitLab