Commit 5d5e36df authored by i-bird's avatar i-bird

Fixing compilation

parents 7d268068 9c823a00
Pipeline #2650 passed with stages
in 14 minutes and 26 seconds
......@@ -25,7 +25,6 @@ endif()
set (CMAKE_CXX_STANDARD 14)
set (CMAKE_CUDA_STANDARD 14)
set(Vc_DIR "${Vc_ROOT}/lib/cmake/Vc/")
message("Searching Vc in ${Vc_DIR}")
......
......@@ -70,7 +70,7 @@ __global__ void test_launch_cuda_native(float * scalar, float * vector, int sxy,
constexpr int NN_num = 4;
template<typename celllist_type>
/*template<typename celllist_type>
__global__ void test_launch_cell_list(celllist_type cell, ite_gpu<3> ite_gpu)
{
GRID_ID_3(ite_gpu)
......@@ -98,7 +98,7 @@ __global__ void test_launch_cell_list(celllist_type cell, ite_gpu<3> ite_gpu)
}
printf("CELLLIST %d %d %d nn_part: %d NN: %d %d %d %d \n",(int)key.get(0),(int)key.get(1),(int)key.get(2),nn_part,NN[0],NN[1],NN[2],NN[3]);
}
}*/
BOOST_AUTO_TEST_SUITE( grid_gpu_func_interp )
......@@ -107,7 +107,6 @@ BOOST_AUTO_TEST_CASE (gpu_p2m)
openfpm::vector_gpu<Point<3,float>> pos;
openfpm::vector_gpu<aggregate<float,float[3]>> prop;
pos.resize(100);
prop.resize(100);
......
......@@ -1256,6 +1256,14 @@ public:
}
}
/*! \brief Indicate that unpacking the header is supported
*
* \return false
*
*/
static bool is_unpack_header_supported()
{return false;}
/*! \brief Resize the grid
*
* Resize the grid to the old information is retained on the new grid,
......
......@@ -163,6 +163,26 @@ public:
{
}
/*! \brief Stub does not do anything
*
*/
template<typename pointers_type,
typename headers_type,
typename result_type,
unsigned int ... prp >
static void unpack_headers(pointers_type & pointers, headers_type & headers, result_type & result, int n_slot)
{}
template<unsigned int ... prp, typename S2, typename header_type, typename ite_type, typename context_type>
void unpack_with_headers(ExtPreAlloc<S2> & mem,
ite_type & sub_it,
header_type & headers,
int ih,
Unpack_stat & ps,
context_type &context,
rem_copy_opt opt = rem_copy_opt::NONE_OPT)
{}
/*! \brief It copy a grid
*
* \param g grid to copy
......@@ -743,6 +763,12 @@ public:
{
}
/*! \brief Stub does not do anything
*
*/
static void unpack_headers()
{}
/*! \brief Fill the memory with a byte
*
*/
......
......@@ -266,6 +266,21 @@ public:
{
obj.template unpack<prp...>(mem, sub_it, ps, context, opt);
}
template<typename grid_sub_it_type,
typename header_type,
typename context_type,
unsigned int ... prp> static void unpack_with_header(ExtPreAlloc<Mem> & mem,
grid_sub_it_type & sub_it,
T & obj,
header_type & header,
int ih,
Unpack_stat & ps,
context_type & context,
rem_copy_opt opt)
{
obj.template unpack_with_headers<prp...>(mem, sub_it, header, ih, ps, context, opt);
}
};
......
......@@ -100,7 +100,7 @@ template<unsigned int dim ,typename T> class Point
* \param p Point
*
*/
template <typename S> inline Point(const Point<dim,S> & p)
template <typename S> __device__ __host__ inline Point(const Point<dim,S> & p)
{
for (size_t i = 0 ; i < dim ; i++)
{get(i) = static_cast<S>(p.get(i));}
......
......@@ -987,6 +987,26 @@ public:
return sparse_grid_bck_value<typename std::remove_reference<decltype(chunks.get(0))>::type>(chunks.get(0));
}
/*! \brief Stub does not do anything
*
*/
template<typename pointers_type,
typename headers_type,
typename result_type,
unsigned int ... prp >
static void unpack_headers(pointers_type & pointers, headers_type & headers, result_type & result, int n_slot)
{}
template<unsigned int ... prp, typename S2, typename header_type, typename ite_type, typename context_type>
void unpack_with_headers(ExtPreAlloc<S2> & mem,
ite_type & sub_it,
header_type & headers,
int ih,
Unpack_stat & ps,
context_type &context,
rem_copy_opt opt = rem_copy_opt::NONE_OPT)
{}
/*! \brief Get the background value
*
* \return background value
......@@ -1093,6 +1113,14 @@ public:
return get_selector< typename boost::mpl::at<typename T::type,boost::mpl::int_<p>>::type >::template get_const<p>(chunks,active_cnk,sub_id);
}
/*! \brief Indicate that unpacking the header is supported
*
* \return false
*
*/
static bool is_unpack_header_supported()
{return false;}
/*! \brief Get the reference of the selected element
*
* \param v1 grid_key that identify the element in the grid
......
......@@ -22,7 +22,7 @@
* tuned for blocked data.
*/
template<unsigned int dim, unsigned int blockEdgeSize, typename indexT>
class grid_zmb : private grid_smb<dim,blockEdgeSize>
class grid_zmb : private grid_smb<dim,blockEdgeSize,indexT>
{
public:
......@@ -30,34 +30,34 @@ public:
grid_zmb() {}
__host__ __device__ grid_zmb(const size_t (& sz)[dim])
:grid_smb<dim,blockEdgeSize>(sz)
:grid_smb<dim,blockEdgeSize,indexT>(sz)
{}
__host__ __device__ grid_zmb(const size_t domainBlockEdgeSize)
:grid_smb<dim,blockEdgeSize>(domainBlockEdgeSize)
:grid_smb<dim,blockEdgeSize,indexT>(domainBlockEdgeSize)
{}
template<typename T>
__host__ __device__ grid_zmb(const grid_sm<dim, T> blockGrid)
:grid_smb<dim,blockEdgeSize>(blockGrid)
:grid_smb<dim,blockEdgeSize,indexT>(blockGrid)
{}
#ifdef __NVCC__
//Constructors from dim3 and uint3 objects
__host__ __device__ grid_zmb(const dim3 blockDimensions)
:grid_smb<dim,blockEdgeSize>(blockDimensions)
:grid_smb<dim,blockEdgeSize,indexT>(blockDimensions)
{}
#endif // __NVCC__
__host__ __device__ grid_zmb(const grid_zmb<dim, blockEdgeSize, indexT> &other)
:grid_smb<dim,blockEdgeSize>(other)
:grid_smb<dim,blockEdgeSize,indexT>(other)
{}
__host__ __device__ grid_zmb &operator=(const grid_zmb<dim, blockEdgeSize, indexT> &other)
{
((grid_smb<dim,blockEdgeSize> *)this)->operator=(other);
((grid_smb<dim,blockEdgeSize,indexT> *)this)->operator=(other);
return *this;
}
......@@ -115,7 +115,7 @@ public:
__host__ __device__ const indexT (& getSize() const)[dim]
{
return grid_smb<dim,blockEdgeSize>::getSize();
return grid_smb<dim,blockEdgeSize,indexT>::getSize();
}
......@@ -123,12 +123,12 @@ public:
template<typename indexT_>
inline __host__ __device__ grid_key_dx<dim,indexT> getGlobalCoord(const grid_key_dx<dim, indexT_> & blockCoord, unsigned int offset) const
{
return grid_smb<dim,blockEdgeSize>::getGlobalCoord(blockCoord,offset);
return grid_smb<dim,blockEdgeSize,indexT>::getGlobalCoord(blockCoord,offset);
}
inline indexT getBlockSize() const
{
return grid_smb<dim,blockEdgeSize>::getBlockSize();
return grid_smb<dim,blockEdgeSize,indexT>::getBlockSize();
}
};
......
......@@ -602,6 +602,9 @@ private:
//! Memory to remove copy finalize
ExtPreAlloc<CudaMemory> * prAlloc_prp;
//! shifts for chunk conversion
openfpm::vector_gpu<aggregate<int[dim]>> shifts;
bool findNN = false;
inline void swap_internal_remote()
......@@ -1099,7 +1102,7 @@ private:
auto & o_map = this->getSegmentToOutMap();
auto & segments_data = this->getSegmentToMergeIndexMap();
new_map.resize(a_map.size());
new_map.resize(a_map.size(),0);
// construct new to old map
......@@ -1386,7 +1389,7 @@ private:
if (n_cnk != 0)
{
openfpm::vector_gpu<aggregate<int[dim]>> shifts;
shifts.clear();
int n_shift = 1;
shifts.add();
......@@ -3152,6 +3155,99 @@ public:
grid_src.copySect.add(sgs);
}
/*! \brief Stub does not do anything
*
*/
template<typename pointers_type,
typename headers_type,
typename result_type,
unsigned int ... prp >
static void unpack_headers(pointers_type & pointers, headers_type & headers, result_type & result, int n_slot)
{
// we have to increment ps by the right amount
sparsegridgpu_pack_request<AggregateT,prp ...> spq;
boost::mpl::for_each_ref<boost::mpl::range_c<int,0,sizeof...(prp)>>(spq);
result.allocate(sizeof(int));
CUDA_LAUNCH_DIM3((SparseGridGpuKernels::unpack_headers<decltype(std::declval<self>().toKernel())>),1,pointers.size(),
pointers.toKernel(),
headers.toKernel(),
(int *)result.getDevicePointer(),
spq.point_size,
n_slot)
}
/*! \brief unpack the sub-grid object
*
* \tparam prp properties to unpack
*
* \param mem preallocated memory from where to unpack the object
* \param sub sub-grid iterator
* \param obj object where to unpack
*
*/
template<unsigned int ... prp, typename S2, typename header_type>
void unpack_with_headers(ExtPreAlloc<S2> & mem,
SparseGridGpu_iterator_sub<dim,self> & sub_it,
header_type & headers,
int ih,
Unpack_stat & ps,
mgpu::ofp_context_t &context,
rem_copy_opt opt = rem_copy_opt::NONE_OPT)
{
////////////////////////////////////////////////////////////
if ((opt & rem_copy_opt::KEEP_GEOMETRY) == false)
{
this->template addAndConvertPackedChunkToTmp<prp ...>(mem,sub_it,ps,context);
// readjust mem
}
else
{
// we have to increment ps by the right amount
sparsegridgpu_pack_request<AggregateT,prp ...> spq;
boost::mpl::for_each_ref<boost::mpl::range_c<int,0,sizeof...(prp)>>(spq);
// First get the number of chunks
size_t n_cnk = headers.template get<1>(ih);
ps.addOffset(sizeof(size_t));
ps.addOffset(2*dim*sizeof(unsigned int));
size_t actual_offset = n_cnk*sizeof(indexT);
unsigned int * scan = (unsigned int *)((unsigned char *)mem.getDevicePointer() + ps.getOffset() + n_cnk*sizeof(indexT));
// Unpack number of points
// calculate the number of total points
size_t n_pnt = headers.template get<2>(ih);
actual_offset += align_number(sizeof(indexT),(n_cnk+1)*sizeof(unsigned int));
void * data_base_ptr = (void *)((unsigned char *)mem.getDevicePointer() + ps.getOffset() + actual_offset );
actual_offset += align_number(sizeof(indexT),n_pnt*(spq.point_size));
short int * offsets = (short int *)((unsigned char *)mem.getDevicePointer() + ps.getOffset() + actual_offset);
actual_offset += align_number(sizeof(indexT),n_pnt*sizeof(short));
actual_offset += align_number(sizeof(indexT),n_pnt*sizeof(unsigned char));
scan_ptrs_cp.add(scan);
offset_ptrs_cp.add(offsets);
data_base_ptr_cp.add(data_base_ptr);
ps.addOffset(actual_offset);
}
}
/*! \brief Indicate that unpacking the header is supported
*
* \return true
*
*/
static bool is_unpack_header_supported()
{return true;}
/*! \brief unpack the sub-grid object
*
* \tparam prp properties to unpack
......@@ -3191,7 +3287,7 @@ public:
Unpacker<size_t,S2>::unpack(mem,n_cnk,ps);
// Unpack origin of the chunk indexing
for (int i = 0 ; i < dim ; i++)
/* for (int i = 0 ; i < dim ; i++)
{
int tmp;
Unpacker<int,S2>::unpack(mem,tmp,ps);
......@@ -3201,7 +3297,9 @@ public:
{
int tmp;
Unpacker<int,S2>::unpack(mem,tmp,ps);
}
}*/
ps.addOffset(2*dim*sizeof(unsigned int));
size_t actual_offset = n_cnk*sizeof(indexT);
unsigned int * scan = (unsigned int *)((unsigned char *)mem.getDevicePointer() + ps.getOffset() + n_cnk*sizeof(indexT));
......@@ -3465,7 +3563,7 @@ public:
template<unsigned int dim,
typename AggregateT,
unsigned int blockEdgeSize = default_edge<dim>::type::value,
unsigned int threadBlockSize = 128,
unsigned int threadBlockSize = default_edge<dim>::tb::value,
typename indexT=long int,
template<typename> class layout_base=memory_traits_inte,
typename linearizer = grid_zmb<dim, blockEdgeSize,indexT>>
......@@ -3474,11 +3572,19 @@ using SparseGridGpu_z = SparseGridGpu<dim,AggregateT,blockEdgeSize,threadBlockSi
template<unsigned int dim,
typename AggregateT,
unsigned int blockEdgeSize = default_edge<dim>::type::value,
unsigned int threadBlockSize = 128,
unsigned int threadBlockSize = default_edge<dim>::tb::value,
typename indexT=int,
template<typename> class layout_base=memory_traits_inte,
typename linearizer = grid_zmb<dim, blockEdgeSize,indexT>>
using SparseGridGpu_zi = SparseGridGpu<dim,AggregateT,blockEdgeSize,threadBlockSize,indexT,layout_base,linearizer>;
template<unsigned int dim,
typename AggregateT,
unsigned int blockEdgeSize = default_edge<dim>::type::value,
unsigned int threadBlockSize = default_edge<dim>::tb::value,
typename indexT=int,
template<typename> class layout_base=memory_traits_inte,
typename linearizer = grid_smb<dim, blockEdgeSize,indexT>>
using SparseGridGpu_i = SparseGridGpu<dim,AggregateT,blockEdgeSize,threadBlockSize,indexT,layout_base,linearizer>;
#endif //OPENFPM_PDATA_SPARSEGRIDGPU_HPP
......@@ -81,6 +81,44 @@ public:
background(bck)
{}
/*! \brief
*
* \param
*
*/
template<typename headers_type>
__device__ static int unpack_headers(headers_type & headers, unsigned char * data, int ih, int sz_pack)
{
size_t n_cnk;
if (sizeof(indexT) == 8)
{n_cnk = ((size_t *)data)[0];}
else
{
unsigned int dp1 = ((unsigned int *)data)[0];
unsigned int dp2 = ((unsigned int *)&data[4])[0];
n_cnk = (size_t)dp1 + ((size_t)dp2 << 32);
}
headers.template get<1>(ih) = n_cnk;
// for (int i = 0 ; i < dim ; i++)
// {headers.template get<3>(ih)[i] = data[2*sizeof(size_t) + i*sizeof(int)];}
// for (int i = 0 ; i < dim ; i++)
// {headers.template get<4>(ih)[i] = data[2*sizeof(size_t) + dim*sizeof(int) + i*sizeof(int)];}
size_t actual_offset = n_cnk*sizeof(indexT);
unsigned int n_pnt = *(unsigned int *)&(data[sizeof(size_t) + 2*dim*sizeof(int) + actual_offset + n_cnk*sizeof(unsigned int)]);
headers.template get<2>(ih) = n_pnt;
return sizeof(size_t) + // byte required to pack the number of chunk packed
2*dim*sizeof(int) + // starting point + size of the indexing packing
sizeof(indexT)*n_cnk + // byte required to pack the chunk indexes
align_number_device(sizeof(indexT),(n_cnk+1)*sizeof(unsigned int)) + // byte required to pack the scan of the chunk point
align_number_device(sizeof(indexT),n_pnt*sz_pack) + // byte required to pack data
align_number_device(sizeof(indexT),n_pnt*sizeof(short int)) + // byte required to pack offsets
align_number_device(sizeof(indexT),n_pnt*sizeof(unsigned char)); // byte required to pack masks;
}
/*! \brief Get the coordinate of the block and the offset id inside the block it give the global coordinate
*
* \param blockCoord block coordinate
......@@ -299,6 +337,8 @@ public:
unsigned int coord[dim];
linToCoordWithOffset<blockEdgeSize>(pos, stencilSupportRadius, coord);
const unsigned int linId = coordToLin<blockEdgeSize>(coord, stencilSupportRadius);
// const unsigned int linId = shift_position<dim,blockEdgeSize>::shift(pos,stencilSupportRadius);
return linId;
}
......@@ -316,6 +356,8 @@ public:
unsigned int coord[dim];
linToCoordWithOffset<blockEdgeSize>(offset, stencilSupportRadius, coord);
return coordToLin<blockEdgeSize>(coord, stencilSupportRadius);
// return shift_position<dim,blockEdgeSize>::shift(offset,stencilSupportRadius);
}
template<typename Coordtype>
......@@ -714,6 +756,9 @@ private:
unsigned int coord[dim];
linToCoordWithOffset<blockEdgeSize>(pos, stencilSupportRadius, coord);
const unsigned int linId = coordToLin<blockEdgeSize>(coord, stencilSupportRadius);
// const unsigned int linId = shift_position<dim,blockEdgeSize>::shift(pos,stencilSupportRadius);
// Actually load the data into the shared region
//ScalarT *basePtr = (ScalarT *)sharedRegionPtr;
......@@ -846,6 +891,8 @@ private:
unsigned int coord[dim];
linToCoordWithOffset<blockEdgeSize>(pos, stencilSupportRadius, coord);
const unsigned int linId = coordToLin<blockEdgeSize>(coord, stencilSupportRadius);
// const unsigned int linId = shift_position<dim,blockEdgeSize>::shift(pos,stencilSupportRadius);
// Actually store the data from the shared region
ScalarT *basePtr = (ScalarT *)sharedRegionPtr;
......
......@@ -35,6 +35,54 @@ struct cross_stencil
T xp[dim];
};
/*template<unsigned int dim, unsigned int block_edge_size>
struct shift_position
{
__device__ static inline int shift(int pos, int stencilRadius)
{
int accu = 1;
int pos_s = 0;
for (int i = 0 ; i < dim ; i++)
{
pos_s += (pos % block_edge_size + stencilRadius)*accu;
accu *= (block_edge_size + 2*stencilRadius);
pos /= block_edge_size;
}
return pos_s;
}
};
template<unsigned int block_edge_size>
struct shift_position<2,block_edge_size>
{
__device__ static inline int shift(int pos, int stencilRadius)
{
unsigned int x = pos % block_edge_size;
unsigned int y = (pos / block_edge_size);
unsigned int g_sz = block_edge_size + 2*stencilRadius;
return (x+stencilRadius) + (y+stencilRadius)*g_sz;
}
};
template<unsigned int block_edge_size>
struct shift_position<3,block_edge_size>
{
__device__ static inline int shift(int pos, int stencilRadius)
{
unsigned int x = pos % block_edge_size;
unsigned int y = (pos / block_edge_size) % block_edge_size;
unsigned int z = (pos / (block_edge_size*block_edge_size));
unsigned int g_sz = block_edge_size + 2*stencilRadius;
return (x+stencilRadius) + (y+stencilRadius)*g_sz + (z+stencilRadius)*g_sz*g_sz;
}
};*/
template<unsigned int dim>
struct NNStar
{
......@@ -53,7 +101,12 @@ struct NNStar
unsigned int d = offset/2;
int dPos = blockCoord.get(d) + (offset%2)*2 - 1;
blockCoord.set_d(d, dPos);
neighbourPos = blockMap.get_sparse(sparseGrid.getBlockLinId(blockCoord)).id;
int bl = sparseGrid.getBlockLinId(blockCoord);
bl = (dPos < 0)?-1:bl;
neighbourPos = blockMap.get_sparse(bl).id;
}
return neighbourPos;
}
......@@ -479,7 +532,7 @@ struct loadGhostBlock_impl<1,dim,AggregateBlockT,pMask,p,ct_params,blockEdgeSize
// Convert pos into a linear id accounting for the ghost offsets
unsigned int coord[dim];
linToCoordWithOffset<blockEdgeSize>(threadIdx.x, stencilSupportRadius, coord);
const int linId2 = coordToLin<blockEdgeSize>(coord, stencilSupportRadius);
const unsigned int linId2 = coordToLin<blockEdgeSize>(coord, stencilSupportRadius);
unsigned int nnb = nn_blocks.template get<0>(blockIdPos*ct_params::nNN + (threadIdx.x % ct_params::nNN));
......@@ -569,6 +622,9 @@ struct loadGhostBlock_impl<2,dim,AggregateBlockT,pMask,p,ct_params,blockEdgeSize
unsigned int coord[dim];
linToCoordWithOffset<blockEdgeSize>(threadIdx.x, stencilSupportRadius, coord);
const int linId_b = coordToLin<blockEdgeSize>(coord, stencilSupportRadius);
// const unsigned int linId_b = shift_position<dim,blockEdgeSize>::shift(threadIdx.x,stencilSupportRadius);
// printf("AAA %d %d \n",linId_b,linId_b_test);
unsigned int nnb = nn_blocks.template get<0>(blockIdPos*ct_params::nNN + (threadIdx.x % ct_params::nNN));
......@@ -675,6 +731,9 @@ struct loadGhostBlock_impl<3,dim,AggregateBlockT,pMask,p,ct_params,blockEdgeSize
unsigned int coord[dim];
linToCoordWithOffset<blockEdgeSize>(threadIdx.x, stencilSupportRadius, coord);
const int linId_b = coordToLin<blockEdgeSize>(coord, stencilSupportRadius);
// const unsigned int linId_b = shift_position<dim,blockEdgeSize>::shift(threadIdx.x,stencilSupportRadius);
// printf("AAA %d %d \n",linId_b,linId_b_test);
unsigned int nnb = nn_blocks.template get<0>(blockIdPos*ct_params::nNN + (threadIdx.x % ct_params::nNN));
......@@ -822,6 +881,9 @@ struct loadGhostBlock_impl<7,dim,AggregateBlockT,pMask,p,ct_params,blockEdgeSize
unsigned int coord[dim];
linToCoordWithOffset<blockEdgeSize>(threadIdx.x, stencilSupportRadius, coord);
const int linId_b = coordToLin<blockEdgeSize>(coord, stencilSupportRadius);
// const unsigned int linId_b = shift_position<dim,blockEdgeSize>::shift(threadIdx.x,stencilSupportRadius);
// printf("AAA %d %d \n",linId_b,linId_b_test);
unsigned int nnb = nn_blocks.template get<0>(blockIdPos*ct_params::nNN + (threadIdx.x % ct_params::nNN));
......
......@@ -29,6 +29,42 @@ enum mask_sparse
// Kernels for SparseGridGpu
namespace SparseGridGpuKernels
{
template<typename SparseGridGpuType, typename pointers_type, typename headers_type>
__global__ void unpack_headers(pointers_type pointers, headers_type headers, int * result, unsigned int sz_pack, int n_slot)
{
int t = threadIdx.x;
if (t > pointers.size()) {return;}
unsigned char * data_pack = (unsigned char *)pointers.template get<0>(t);
while (data_pack < pointers.template get<1>(t) )
{
int ih = pointers.template get<2>(t);
if (n_slot > ih)
{
if (sizeof(typename SparseGridGpuType::indexT_) == 8)
{headers.template get<0>(t*n_slot + ih) = *(size_t *)data_pack;}
else
{
unsigned int dp1 = *(unsigned int *)data_pack;
unsigned int dp2 = *(unsigned int *)&(data_pack[4]);
headers.template get<0>(t*n_slot + ih) = (size_t)dp1 + (((size_t)dp2) << 32);
}
data_pack += sizeof(size_t);