Commit 6e0ef634 authored by incardon's avatar incardon
Browse files

Adding conv_3

parent 81076bbe
Pipeline #4725 failed with stages
in 20 minutes and 12 seconds
......@@ -34,7 +34,7 @@ template<unsigned int dim>
struct comb
{
//! Array that store the combination
char c[dim];
signed char c[dim];
/*! \brief check if it is a valid combination
*
......@@ -105,7 +105,7 @@ struct comb
* \return Result combination
*
*/
inline comb<dim> operator&(char c_)
inline comb<dim> operator&(signed char c_)
{
comb<dim> ret;
......@@ -246,7 +246,7 @@ struct comb
*
*/
inline char operator[](int i) const
inline signed char operator[](int i) const
{
return c[i];
}
......@@ -257,7 +257,7 @@ struct comb
*
*/
inline char * getComb()
inline signed char * getComb()
{
return c;
}
......@@ -268,7 +268,7 @@ struct comb
*
*/
inline const char * getComb() const
inline const signed char * getComb() const
{
return c;
}
......@@ -282,7 +282,7 @@ struct comb
* \return value of the i index
*
*/
inline char value(int i) const
inline signed char value(int i) const
{
return c[i];
}
......@@ -314,10 +314,10 @@ struct comb
* \param c list of numbers
*
*/
comb(std::initializer_list<char> c)
comb(std::initializer_list<signed char> c)
{
size_t i = 0;
for(char x : c)
for(signed char x : c)
{this->c[c.size() - i - 1] = x;i++;}
}
......@@ -397,7 +397,7 @@ template<>
struct comb<0>
{
//! FIX
char c[0];
signed char c[0];
/*! \brief check if it is a valid combination
*
......@@ -467,7 +467,7 @@ struct comb<0>
*
*/
inline char operator[](int i)
inline signed char operator[](int i)
{
return 0;
}
......@@ -478,7 +478,7 @@ struct comb<0>
*
*/
inline char * getComb()
inline signed char * getComb()
{
return c;
}
......@@ -492,7 +492,7 @@ struct comb<0>
* \return value of the i index
*
*/
inline char value(int i) const
inline signed char value(int i) const
{
return c[i];
}
......
......@@ -431,7 +431,7 @@ public:
*/
template<typename check=NoCheck, typename ids_type>
inline mem_id LinId(const grid_key_dx<N,ids_type> & gk, const char sum_id[N]) const
inline mem_id LinId(const grid_key_dx<N,ids_type> & gk, const signed char sum_id[N]) const
{
mem_id lid;
......@@ -467,7 +467,7 @@ public:
*/
template<typename check=NoCheck,typename ids_type>
inline mem_id LinId(const grid_key_dx<N,ids_type> & gk, const char sum_id[N], const size_t (&bc)[N]) const
inline mem_id LinId(const grid_key_dx<N,ids_type> & gk, const signed char sum_id[N], const size_t (&bc)[N]) const
{
mem_id lid;
......
......@@ -2628,6 +2628,28 @@ public:
applyStencils< SparseGridGpuKernels::stencil_func_conv2_b<dim,nLoop,prop_src1,prop_src2,prop_dst1,prop_dst2,stencil_size> >(box,STENCIL_MODE_INPLACE,func, args ...);
}
/*! \brief Apply a free type convolution using blocks
*
*
*/
template<unsigned int prop_src1, unsigned int prop_src2, unsigned int prop_src3,
unsigned int prop_dst1 , unsigned int prop_dst2, unsigned int prop_dst3,
unsigned int stencil_size, typename lambda_f, typename ... ArgsT >
void conv3_b(grid_key_dx<dim> start, grid_key_dx<dim> stop , lambda_f func, ArgsT ... args)
{
Box<dim,int> box;
for (int i = 0 ; i < dim ; i++)
{
box.setLow(i,start.get(i));
box.setHigh(i,stop.get(i));
}
constexpr unsigned int nLoop = UIntDivCeil<(IntPow<blockEdgeSize + 2, dim>::value), (blockSize)>::value;
applyStencils< SparseGridGpuKernels::stencil_func_conv3_b<dim,nLoop,prop_src1,prop_src2,prop_src3,prop_dst1,prop_dst2,prop_dst3,stencil_size> >(box,STENCIL_MODE_INPLACE,func, args ...);
}
/*! \brief Apply a free type convolution using blocks
*
*
......
......@@ -158,6 +158,19 @@ namespace SparseGridGpuKernels
{
f(res1,res2,cpb1,cpb2,DataBlockLoad,offset,coord[0],coord[1],coord[2]);
}
template<typename ScalarT, typename coordType, typename CpBlockType, typename DataBlockWrapperT, typename lambda_func, typename ... ArgsT>
__device__ static inline void stencil3_block(ScalarT & res1, ScalarT & res2, ScalarT & res3, coordType & coord ,
CpBlockType & cpb1,
CpBlockType & cpb2,
CpBlockType & cpb3,
DataBlockWrapperT & DataBlockLoad,
int offset,
lambda_func f,
ArgsT ... args)
{
f(res1,res2,res3,cpb1,cpb2,cpb3,DataBlockLoad,offset,coord[0],coord[1],coord[2]);
}
};
template<>
......@@ -204,6 +217,19 @@ namespace SparseGridGpuKernels
{
f(res1,res2,cpb1,cpb2,DataBlockLoad,offset,coord[0],coord[1]);
}
template<typename ScalarT, typename coordType, typename CpBlockType, typename DataBlockWrapperT, typename lambda_func, typename ... ArgsT>
__device__ static inline void stencil3_block(ScalarT & res1, ScalarT & res2, ScalarT & res3, coordType & coord ,
CpBlockType & cpb1,
CpBlockType & cpb2,
CpBlockType & cpb3,
DataBlockWrapperT & DataBlockLoad,
int offset,
lambda_func f,
ArgsT ... args)
{
f(res1,res2,res3,cpb1,cpb2,cpb3,DataBlockLoad,offset,coord[0],coord[1]);
}
};
template<unsigned int dim, unsigned int n_loop, unsigned int p_src, unsigned int p_dst, unsigned int stencil_size>
......@@ -424,6 +450,86 @@ namespace SparseGridGpuKernels
}
};
template<unsigned int dim, unsigned int n_loop,
unsigned int p_src1, unsigned int p_src2, unsigned int p_src3,
unsigned int p_dst1, unsigned int p_dst2, unsigned int p_dst3,
unsigned int stencil_size>
struct stencil_func_conv3_b
{
typedef NNStar<dim> stencil_type;
static constexpr unsigned int supportRadius = stencil_size;
template<typename SparseGridT, typename DataBlockWrapperT, typename lambda_func, typename ... ArgT>
static inline __device__ void stencil(
SparseGridT & sparseGrid,
const unsigned int dataBlockId,
openfpm::sparse_index<unsigned int> dataBlockIdPos,
unsigned int offset,
grid_key_dx<dim, int> & pointCoord,
DataBlockWrapperT & dataBlockLoad,
DataBlockWrapperT & dataBlockStore,
unsigned char curMask,
lambda_func f,
ArgT ... args)
{
typedef typename SparseGridT::AggregateBlockType AggregateT;
typedef ScalarTypeOf<AggregateT, p_src1> ScalarT1;
typedef ScalarTypeOf<AggregateT, p_src1> ScalarT2;
typedef ScalarTypeOf<AggregateT, p_src1> ScalarT3;
constexpr unsigned int enlargedBlockSize = IntPow<
SparseGridT::getBlockEdgeSize() + 2 * supportRadius, dim>::value;
__shared__ ScalarT1 enlargedBlock1[enlargedBlockSize];
__shared__ ScalarT2 enlargedBlock2[enlargedBlockSize];
__shared__ ScalarT3 enlargedBlock3[enlargedBlockSize];
// fill with background
typedef typename vmpl_create_constant<dim,SparseGridT::blockEdgeSize_>::type block_sizes;
typedef typename vmpl_sum_constant<2*stencil_size,block_sizes>::type vmpl_sizes;
cp_block<ScalarT1,stencil_size,vmpl_sizes,dim> cpb1(enlargedBlock1);
cp_block<ScalarT2,stencil_size,vmpl_sizes,dim> cpb2(enlargedBlock2);
cp_block<ScalarT3,stencil_size,vmpl_sizes,dim> cpb3(enlargedBlock3);
sparseGrid.template loadGhostBlock<p_src1>(dataBlockLoad, dataBlockIdPos, enlargedBlock1);
sparseGrid.template loadGhostBlock<p_src2>(dataBlockLoad, dataBlockIdPos, enlargedBlock2);
sparseGrid.template loadGhostBlock<p_src3>(dataBlockLoad, dataBlockIdPos, enlargedBlock3);
__syncthreads();
ScalarT1 res1 = 0;
ScalarT2 res2 = 0;
ScalarT3 res3 = 0;
if ((curMask & mask_sparse::EXIST) && !(curMask & mask_sparse::PADDING))
{
int coord[dim];
unsigned int linIdTmp = offset;
for (unsigned int d = 0; d < dim; ++d)
{
coord[d] = linIdTmp % SparseGridT::blockEdgeSize_;
linIdTmp /= SparseGridT::blockEdgeSize_;
}
stencil_conv_func_impl<dim>::stencil3_block(res1,res2,res3,coord,cpb1,cpb2,cpb3,dataBlockLoad,offset,f,args...);
dataBlockStore.template get<p_dst1>()[offset] = res1;
dataBlockStore.template get<p_dst2>()[offset] = res2;
dataBlockStore.template get<p_dst3>()[offset] = res3;
}
}
template <typename SparseGridT, typename CtxT>
static inline void __host__ flush(SparseGridT & sparseGrid, CtxT & ctx)
{
// No flush
}
};
template<unsigned int dim, unsigned int n_loop, unsigned int p_src1, unsigned int p_src2, unsigned int p_dst1, unsigned int p_dst2, unsigned int stencil_size>
struct stencil_func_conv2
{
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment