Commit 9c823a00 authored by incardon's avatar incardon

Fixing bug in SparseGridGpu

parent 4f9c5838
Pipeline #2500 passed with stages
in 11 minutes
......@@ -70,7 +70,7 @@ __global__ void test_launch_cuda_native(float * scalar, float * vector, int sxy,
constexpr int NN_num = 4;
template<typename celllist_type>
/*template<typename celllist_type>
__global__ void test_launch_cell_list(celllist_type cell, ite_gpu<3> ite_gpu)
{
GRID_ID_3(ite_gpu)
......@@ -98,7 +98,7 @@ __global__ void test_launch_cell_list(celllist_type cell, ite_gpu<3> ite_gpu)
}
printf("CELLLIST %d %d %d nn_part: %d NN: %d %d %d %d \n",(int)key.get(0),(int)key.get(1),(int)key.get(2),nn_part,NN[0],NN[1],NN[2],NN[3]);
}
}*/
BOOST_AUTO_TEST_SUITE( grid_gpu_func_interp )
......
......@@ -22,7 +22,7 @@
* tuned for blocked data.
*/
template<unsigned int dim, unsigned int blockEdgeSize, typename indexT>
class grid_zmb : private grid_smb<dim,blockEdgeSize>
class grid_zmb : private grid_smb<dim,blockEdgeSize,indexT>
{
public:
......@@ -30,34 +30,34 @@ public:
grid_zmb() {}
__host__ __device__ grid_zmb(const size_t (& sz)[dim])
:grid_smb<dim,blockEdgeSize>(sz)
:grid_smb<dim,blockEdgeSize,indexT>(sz)
{}
__host__ __device__ grid_zmb(const size_t domainBlockEdgeSize)
:grid_smb<dim,blockEdgeSize>(domainBlockEdgeSize)
:grid_smb<dim,blockEdgeSize,indexT>(domainBlockEdgeSize)
{}
template<typename T>
__host__ __device__ grid_zmb(const grid_sm<dim, T> blockGrid)
:grid_smb<dim,blockEdgeSize>(blockGrid)
:grid_smb<dim,blockEdgeSize,indexT>(blockGrid)
{}
#ifdef __NVCC__
//Constructors from dim3 and uint3 objects
__host__ __device__ grid_zmb(const dim3 blockDimensions)
:grid_smb<dim,blockEdgeSize>(blockDimensions)
:grid_smb<dim,blockEdgeSize,indexT>(blockDimensions)
{}
#endif // __NVCC__
__host__ __device__ grid_zmb(const grid_zmb<dim, blockEdgeSize, indexT> &other)
:grid_smb<dim,blockEdgeSize>(other)
:grid_smb<dim,blockEdgeSize,indexT>(other)
{}
__host__ __device__ grid_zmb &operator=(const grid_zmb<dim, blockEdgeSize, indexT> &other)
{
((grid_smb<dim,blockEdgeSize> *)this)->operator=(other);
((grid_smb<dim,blockEdgeSize,indexT> *)this)->operator=(other);
return *this;
}
......@@ -115,7 +115,7 @@ public:
__host__ __device__ const indexT (& getSize() const)[dim]
{
return grid_smb<dim,blockEdgeSize>::getSize();
return grid_smb<dim,blockEdgeSize,indexT>::getSize();
}
......@@ -123,12 +123,12 @@ public:
template<typename indexT_>
inline __host__ __device__ grid_key_dx<dim,indexT> getGlobalCoord(const grid_key_dx<dim, indexT_> & blockCoord, unsigned int offset) const
{
return grid_smb<dim,blockEdgeSize>::getGlobalCoord(blockCoord,offset);
return grid_smb<dim,blockEdgeSize,indexT>::getGlobalCoord(blockCoord,offset);
}
inline indexT getBlockSize() const
{
return grid_smb<dim,blockEdgeSize>::getBlockSize();
return grid_smb<dim,blockEdgeSize,indexT>::getBlockSize();
}
};
......
......@@ -3566,7 +3566,7 @@ public:
template<unsigned int dim,
typename AggregateT,
unsigned int blockEdgeSize = default_edge<dim>::type::value,
unsigned int threadBlockSize = 128,
unsigned int threadBlockSize = default_edge<dim>::tb::value,
typename indexT=long int,
template<typename> class layout_base=memory_traits_inte,
typename linearizer = grid_zmb<dim, blockEdgeSize,indexT>>
......@@ -3575,11 +3575,19 @@ using SparseGridGpu_z = SparseGridGpu<dim,AggregateT,blockEdgeSize,threadBlockSi
template<unsigned int dim,
typename AggregateT,
unsigned int blockEdgeSize = default_edge<dim>::type::value,
unsigned int threadBlockSize = 128,
unsigned int threadBlockSize = default_edge<dim>::tb::value,
typename indexT=int,
template<typename> class layout_base=memory_traits_inte,
typename linearizer = grid_zmb<dim, blockEdgeSize,indexT>>
using SparseGridGpu_zi = SparseGridGpu<dim,AggregateT,blockEdgeSize,threadBlockSize,indexT,layout_base,linearizer>;
template<unsigned int dim,
typename AggregateT,
unsigned int blockEdgeSize = default_edge<dim>::type::value,
unsigned int threadBlockSize = default_edge<dim>::tb::value,
typename indexT=int,
template<typename> class layout_base=memory_traits_inte,
typename linearizer = grid_smb<dim, blockEdgeSize,indexT>>
using SparseGridGpu_i = SparseGridGpu<dim,AggregateT,blockEdgeSize,threadBlockSize,indexT,layout_base,linearizer>;
#endif //OPENFPM_PDATA_SPARSEGRIDGPU_HPP
......@@ -337,6 +337,8 @@ public:
unsigned int coord[dim];
linToCoordWithOffset<blockEdgeSize>(pos, stencilSupportRadius, coord);
const unsigned int linId = coordToLin<blockEdgeSize>(coord, stencilSupportRadius);
// const unsigned int linId = shift_position<dim,blockEdgeSize>::shift(pos,stencilSupportRadius);
return linId;
}
......@@ -354,6 +356,8 @@ public:
unsigned int coord[dim];
linToCoordWithOffset<blockEdgeSize>(offset, stencilSupportRadius, coord);
return coordToLin<blockEdgeSize>(coord, stencilSupportRadius);
// return shift_position<dim,blockEdgeSize>::shift(offset,stencilSupportRadius);
}
template<typename Coordtype>
......@@ -752,6 +756,9 @@ private:
unsigned int coord[dim];
linToCoordWithOffset<blockEdgeSize>(pos, stencilSupportRadius, coord);
const unsigned int linId = coordToLin<blockEdgeSize>(coord, stencilSupportRadius);
// const unsigned int linId = shift_position<dim,blockEdgeSize>::shift(pos,stencilSupportRadius);
// Actually load the data into the shared region
//ScalarT *basePtr = (ScalarT *)sharedRegionPtr;
......@@ -884,6 +891,8 @@ private:
unsigned int coord[dim];
linToCoordWithOffset<blockEdgeSize>(pos, stencilSupportRadius, coord);
const unsigned int linId = coordToLin<blockEdgeSize>(coord, stencilSupportRadius);
// const unsigned int linId = shift_position<dim,blockEdgeSize>::shift(pos,stencilSupportRadius);
// Actually store the data from the shared region
ScalarT *basePtr = (ScalarT *)sharedRegionPtr;
......
......@@ -35,6 +35,54 @@ struct cross_stencil
T xp[dim];
};
/*template<unsigned int dim, unsigned int block_edge_size>
struct shift_position
{
__device__ static inline int shift(int pos, int stencilRadius)
{
int accu = 1;
int pos_s = 0;
for (int i = 0 ; i < dim ; i++)
{
pos_s += (pos % block_edge_size + stencilRadius)*accu;
accu *= (block_edge_size + 2*stencilRadius);
pos /= block_edge_size;
}
return pos_s;
}
};
template<unsigned int block_edge_size>
struct shift_position<2,block_edge_size>
{
__device__ static inline int shift(int pos, int stencilRadius)
{
unsigned int x = pos % block_edge_size;
unsigned int y = (pos / block_edge_size);
unsigned int g_sz = block_edge_size + 2*stencilRadius;
return (x+stencilRadius) + (y+stencilRadius)*g_sz;
}
};
template<unsigned int block_edge_size>
struct shift_position<3,block_edge_size>
{
__device__ static inline int shift(int pos, int stencilRadius)
{
unsigned int x = pos % block_edge_size;
unsigned int y = (pos / block_edge_size) % block_edge_size;
unsigned int z = (pos / (block_edge_size*block_edge_size));
unsigned int g_sz = block_edge_size + 2*stencilRadius;
return (x+stencilRadius) + (y+stencilRadius)*g_sz + (z+stencilRadius)*g_sz*g_sz;
}
};*/
template<unsigned int dim>
struct NNStar
{
......@@ -53,7 +101,12 @@ struct NNStar
unsigned int d = offset/2;
int dPos = blockCoord.get(d) + (offset%2)*2 - 1;
blockCoord.set_d(d, dPos);
neighbourPos = blockMap.get_sparse(sparseGrid.getBlockLinId(blockCoord)).id;
int bl = sparseGrid.getBlockLinId(blockCoord);
bl = (dPos < 0)?-1:bl;
neighbourPos = blockMap.get_sparse(bl).id;
}
return neighbourPos;
}
......@@ -479,7 +532,7 @@ struct loadGhostBlock_impl<1,dim,AggregateBlockT,pMask,p,ct_params,blockEdgeSize
// Convert pos into a linear id accounting for the ghost offsets
unsigned int coord[dim];
linToCoordWithOffset<blockEdgeSize>(threadIdx.x, stencilSupportRadius, coord);
const int linId2 = coordToLin<blockEdgeSize>(coord, stencilSupportRadius);
const unsigned int linId2 = coordToLin<blockEdgeSize>(coord, stencilSupportRadius);
unsigned int nnb = nn_blocks.template get<0>(blockIdPos*ct_params::nNN + (threadIdx.x % ct_params::nNN));
......@@ -569,6 +622,9 @@ struct loadGhostBlock_impl<2,dim,AggregateBlockT,pMask,p,ct_params,blockEdgeSize
unsigned int coord[dim];
linToCoordWithOffset<blockEdgeSize>(threadIdx.x, stencilSupportRadius, coord);
const int linId_b = coordToLin<blockEdgeSize>(coord, stencilSupportRadius);
// const unsigned int linId_b = shift_position<dim,blockEdgeSize>::shift(threadIdx.x,stencilSupportRadius);
// printf("AAA %d %d \n",linId_b,linId_b_test);
unsigned int nnb = nn_blocks.template get<0>(blockIdPos*ct_params::nNN + (threadIdx.x % ct_params::nNN));
......@@ -675,6 +731,9 @@ struct loadGhostBlock_impl<3,dim,AggregateBlockT,pMask,p,ct_params,blockEdgeSize
unsigned int coord[dim];
linToCoordWithOffset<blockEdgeSize>(threadIdx.x, stencilSupportRadius, coord);
const int linId_b = coordToLin<blockEdgeSize>(coord, stencilSupportRadius);
// const unsigned int linId_b = shift_position<dim,blockEdgeSize>::shift(threadIdx.x,stencilSupportRadius);
// printf("AAA %d %d \n",linId_b,linId_b_test);
unsigned int nnb = nn_blocks.template get<0>(blockIdPos*ct_params::nNN + (threadIdx.x % ct_params::nNN));
......@@ -822,6 +881,9 @@ struct loadGhostBlock_impl<7,dim,AggregateBlockT,pMask,p,ct_params,blockEdgeSize
unsigned int coord[dim];
linToCoordWithOffset<blockEdgeSize>(threadIdx.x, stencilSupportRadius, coord);
const int linId_b = coordToLin<blockEdgeSize>(coord, stencilSupportRadius);
// const unsigned int linId_b = shift_position<dim,blockEdgeSize>::shift(threadIdx.x,stencilSupportRadius);
// printf("AAA %d %d \n",linId_b,linId_b_test);
unsigned int nnb = nn_blocks.template get<0>(blockIdPos*ct_params::nNN + (threadIdx.x % ct_params::nNN));
......
......@@ -846,8 +846,8 @@ namespace SparseGridGpuKernels
{
// Read local mask to register
curMask = dataBlockLoad.template get<pMask>()[offset];
if (bx.isInsideKey(pointCoord) == false)
{curMask = 0;}
for (int i = 0 ; i < dim ; i++)
{curMask &= (pointCoord.get(i) < bx.getLow(i) || pointCoord.get(i) > bx.getHigh(i))?0:0xFF;}
}
openfpm::sparse_index<unsigned int> sdataBlockPos;
......
......@@ -471,8 +471,6 @@ BOOST_AUTO_TEST_CASE(testTagBoundaries2)
BOOST_AUTO_TEST_CASE(testStencilHeat)
{
printf("\n");
constexpr unsigned int dim = 2;
constexpr unsigned int blockEdgeSize = 8;
typedef aggregate<float,float> AggregateT;
......@@ -501,10 +499,10 @@ BOOST_AUTO_TEST_CASE(testStencilHeat)
for (unsigned int iter=0; iter<maxIter; ++iter)
{
sparseGrid.applyStencils<HeatStencil<dim, 0, 1>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE, 0.1);
sparseGrid.applyStencils<HeatStencil<dim, 1, 0>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE, 0.1);
sparseGrid.applyStencils<HeatStencil<dim, 1, 0>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE, 0.01);
}
sparseGrid.template deviceToHost<0>();
sparseGrid.template deviceToHost<0,1>();
// Compare
bool match = true;
......@@ -513,7 +511,7 @@ BOOST_AUTO_TEST_CASE(testStencilHeat)
auto coord = sparseGrid.getCoord(i);
float expectedValue = 10.0 * coord.get(0) / (gridSize.x * blockEdgeSize - 1);
match &= fabs(sparseGrid.template get<0>(coord) - expectedValue) < 1e-2;
match &= fabs(sparseGrid.template get<1>(coord) - expectedValue) < 1e-2;
}
......@@ -522,8 +520,6 @@ BOOST_AUTO_TEST_CASE(testStencilHeat)
BOOST_AUTO_TEST_CASE(testStencil_lap_simplified)
{
printf("\n");
constexpr unsigned int dim = 2;
constexpr unsigned int blockEdgeSize = 8;
typedef aggregate<float,float> AggregateT;
......@@ -581,8 +577,6 @@ BOOST_AUTO_TEST_CASE(testStencil_lap_simplified)
BOOST_AUTO_TEST_CASE(testStencil_lap_no_cross_simplified)
{
printf("\n");
constexpr unsigned int dim = 2;
constexpr unsigned int blockEdgeSize = 8;
typedef aggregate<float,float> AggregateT;
......@@ -659,8 +653,6 @@ BOOST_AUTO_TEST_CASE(testStencil_lap_no_cross_simplified)
BOOST_AUTO_TEST_CASE(testStencil_lap_no_cross_simplified2)
{
printf("\n");
constexpr unsigned int dim = 2;
constexpr unsigned int blockEdgeSize = 8;
typedef aggregate<float,float,float,float> AggregateT;
......@@ -750,8 +742,6 @@ BOOST_AUTO_TEST_CASE(testStencil_lap_no_cross_simplified2)
BOOST_AUTO_TEST_CASE(testStencil_lap_no_cross_simplified_subset)
{
printf("\n");
constexpr unsigned int dim = 2;
constexpr unsigned int blockEdgeSize = 8;
typedef aggregate<float,float> AggregateT;
......@@ -811,8 +801,6 @@ __global__ void sparse_grid_get_test(sparsegrid_type sparseGrid, grid_key_dx<3>
BOOST_AUTO_TEST_CASE(testFlushInsert)
{
printf("\n");
constexpr unsigned int dim = 3;
constexpr unsigned int blockEdgeSize = 4;
typedef aggregate<float,float> AggregateT;
......
......@@ -27,7 +27,7 @@ struct BoundaryStencilSetX
DataBlockWrapperT & dataBlockStore,
unsigned char curMask)
{
if (curMask & mask_sparse::EXIST_AND_PADDING)
if (curMask == mask_sparse::EXIST_AND_PADDING)
{
dataBlockStore.template get<p_dst>()[offset] = pointCoord.get(0);
}
......@@ -55,9 +55,10 @@ struct BoundaryStencilSetXRescaled
unsigned char curMask,
ScalarT minX, ScalarT maxX, ScalarT minValue, ScalarT maxValue)
{
if (curMask & mask_sparse::EXIST_AND_PADDING)
if (curMask == mask_sparse::EXIST_AND_PADDING)
{
const ScalarT x = pointCoord.get(0);
auto value = maxValue * (x - minX) / (maxX - minX - 1);
if (x < minX)
{
......
......@@ -266,7 +266,17 @@ struct HeatStencil
}
// enlargedBlock[linId] = cur + dt * laplacian;
res = cur + dt * laplacian;
if (pointCoord.get(0) == 0 && pointCoord.get(1) == 8)
{
auto nPlusIdX = enlargedBlock[sparseGrid.getNeighbourLinIdInEnlargedBlock(coord, 0, 1)];
auto nMinusIdX = enlargedBlock[sparseGrid.getNeighbourLinIdInEnlargedBlock(coord, 0, -1)];
auto nPlusIdY = enlargedBlock[sparseGrid.getNeighbourLinIdInEnlargedBlock(coord, 1, 1)];
auto nMinusIdY = enlargedBlock[sparseGrid.getNeighbourLinIdInEnlargedBlock(coord, 1, -1)];
printf("POINT STENCIL: %f %f %f %f %f DIFF: %f\n",nPlusIdX,nMinusIdX,nPlusIdY,nMinusIdY,cur,nPlusIdX+nMinusIdX+nPlusIdY+nMinusIdY-4.0*cur);
}
}
__syncthreads();
if ((curMask & mask_sparse::EXIST) && !(curMask & mask_sparse::PADDING))
{
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment