Commit 5d5e36df authored by i-bird's avatar i-bird

Fixing compilation

parents 7d268068 9c823a00
Pipeline #2650 passed with stages
in 14 minutes and 26 seconds
...@@ -25,7 +25,6 @@ endif() ...@@ -25,7 +25,6 @@ endif()
set (CMAKE_CXX_STANDARD 14) set (CMAKE_CXX_STANDARD 14)
set (CMAKE_CUDA_STANDARD 14) set (CMAKE_CUDA_STANDARD 14)
set(Vc_DIR "${Vc_ROOT}/lib/cmake/Vc/") set(Vc_DIR "${Vc_ROOT}/lib/cmake/Vc/")
message("Searching Vc in ${Vc_DIR}") message("Searching Vc in ${Vc_DIR}")
......
...@@ -70,7 +70,7 @@ __global__ void test_launch_cuda_native(float * scalar, float * vector, int sxy, ...@@ -70,7 +70,7 @@ __global__ void test_launch_cuda_native(float * scalar, float * vector, int sxy,
constexpr int NN_num = 4; constexpr int NN_num = 4;
template<typename celllist_type> /*template<typename celllist_type>
__global__ void test_launch_cell_list(celllist_type cell, ite_gpu<3> ite_gpu) __global__ void test_launch_cell_list(celllist_type cell, ite_gpu<3> ite_gpu)
{ {
GRID_ID_3(ite_gpu) GRID_ID_3(ite_gpu)
...@@ -98,7 +98,7 @@ __global__ void test_launch_cell_list(celllist_type cell, ite_gpu<3> ite_gpu) ...@@ -98,7 +98,7 @@ __global__ void test_launch_cell_list(celllist_type cell, ite_gpu<3> ite_gpu)
} }
printf("CELLLIST %d %d %d nn_part: %d NN: %d %d %d %d \n",(int)key.get(0),(int)key.get(1),(int)key.get(2),nn_part,NN[0],NN[1],NN[2],NN[3]); printf("CELLLIST %d %d %d nn_part: %d NN: %d %d %d %d \n",(int)key.get(0),(int)key.get(1),(int)key.get(2),nn_part,NN[0],NN[1],NN[2],NN[3]);
} }*/
BOOST_AUTO_TEST_SUITE( grid_gpu_func_interp ) BOOST_AUTO_TEST_SUITE( grid_gpu_func_interp )
...@@ -107,7 +107,6 @@ BOOST_AUTO_TEST_CASE (gpu_p2m) ...@@ -107,7 +107,6 @@ BOOST_AUTO_TEST_CASE (gpu_p2m)
openfpm::vector_gpu<Point<3,float>> pos; openfpm::vector_gpu<Point<3,float>> pos;
openfpm::vector_gpu<aggregate<float,float[3]>> prop; openfpm::vector_gpu<aggregate<float,float[3]>> prop;
pos.resize(100); pos.resize(100);
prop.resize(100); prop.resize(100);
......
...@@ -1256,6 +1256,14 @@ public: ...@@ -1256,6 +1256,14 @@ public:
} }
} }
/*! \brief Indicate that unpacking the header is supported
*
* \return false
*
*/
static bool is_unpack_header_supported()
{return false;}
/*! \brief Resize the grid /*! \brief Resize the grid
* *
* Resize the grid to the old information is retained on the new grid, * Resize the grid to the old information is retained on the new grid,
......
...@@ -163,6 +163,26 @@ public: ...@@ -163,6 +163,26 @@ public:
{ {
} }
/*! \brief Stub does not do anything
*
*/
template<typename pointers_type,
typename headers_type,
typename result_type,
unsigned int ... prp >
static void unpack_headers(pointers_type & pointers, headers_type & headers, result_type & result, int n_slot)
{}
template<unsigned int ... prp, typename S2, typename header_type, typename ite_type, typename context_type>
void unpack_with_headers(ExtPreAlloc<S2> & mem,
ite_type & sub_it,
header_type & headers,
int ih,
Unpack_stat & ps,
context_type &context,
rem_copy_opt opt = rem_copy_opt::NONE_OPT)
{}
/*! \brief It copy a grid /*! \brief It copy a grid
* *
* \param g grid to copy * \param g grid to copy
...@@ -743,6 +763,12 @@ public: ...@@ -743,6 +763,12 @@ public:
{ {
} }
/*! \brief Stub does not do anything
*
*/
static void unpack_headers()
{}
/*! \brief Fill the memory with a byte /*! \brief Fill the memory with a byte
* *
*/ */
......
...@@ -266,6 +266,21 @@ public: ...@@ -266,6 +266,21 @@ public:
{ {
obj.template unpack<prp...>(mem, sub_it, ps, context, opt); obj.template unpack<prp...>(mem, sub_it, ps, context, opt);
} }
template<typename grid_sub_it_type,
typename header_type,
typename context_type,
unsigned int ... prp> static void unpack_with_header(ExtPreAlloc<Mem> & mem,
grid_sub_it_type & sub_it,
T & obj,
header_type & header,
int ih,
Unpack_stat & ps,
context_type & context,
rem_copy_opt opt)
{
obj.template unpack_with_headers<prp...>(mem, sub_it, header, ih, ps, context, opt);
}
}; };
......
...@@ -100,7 +100,7 @@ template<unsigned int dim ,typename T> class Point ...@@ -100,7 +100,7 @@ template<unsigned int dim ,typename T> class Point
* \param p Point * \param p Point
* *
*/ */
template <typename S> inline Point(const Point<dim,S> & p) template <typename S> __device__ __host__ inline Point(const Point<dim,S> & p)
{ {
for (size_t i = 0 ; i < dim ; i++) for (size_t i = 0 ; i < dim ; i++)
{get(i) = static_cast<S>(p.get(i));} {get(i) = static_cast<S>(p.get(i));}
......
...@@ -987,6 +987,26 @@ public: ...@@ -987,6 +987,26 @@ public:
return sparse_grid_bck_value<typename std::remove_reference<decltype(chunks.get(0))>::type>(chunks.get(0)); return sparse_grid_bck_value<typename std::remove_reference<decltype(chunks.get(0))>::type>(chunks.get(0));
} }
/*! \brief Stub does not do anything
*
*/
template<typename pointers_type,
typename headers_type,
typename result_type,
unsigned int ... prp >
static void unpack_headers(pointers_type & pointers, headers_type & headers, result_type & result, int n_slot)
{}
template<unsigned int ... prp, typename S2, typename header_type, typename ite_type, typename context_type>
void unpack_with_headers(ExtPreAlloc<S2> & mem,
ite_type & sub_it,
header_type & headers,
int ih,
Unpack_stat & ps,
context_type &context,
rem_copy_opt opt = rem_copy_opt::NONE_OPT)
{}
/*! \brief Get the background value /*! \brief Get the background value
* *
* \return background value * \return background value
...@@ -1093,6 +1113,14 @@ public: ...@@ -1093,6 +1113,14 @@ public:
return get_selector< typename boost::mpl::at<typename T::type,boost::mpl::int_<p>>::type >::template get_const<p>(chunks,active_cnk,sub_id); return get_selector< typename boost::mpl::at<typename T::type,boost::mpl::int_<p>>::type >::template get_const<p>(chunks,active_cnk,sub_id);
} }
/*! \brief Indicate that unpacking the header is supported
*
* \return false
*
*/
static bool is_unpack_header_supported()
{return false;}
/*! \brief Get the reference of the selected element /*! \brief Get the reference of the selected element
* *
* \param v1 grid_key that identify the element in the grid * \param v1 grid_key that identify the element in the grid
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
* tuned for blocked data. * tuned for blocked data.
*/ */
template<unsigned int dim, unsigned int blockEdgeSize, typename indexT> template<unsigned int dim, unsigned int blockEdgeSize, typename indexT>
class grid_zmb : private grid_smb<dim,blockEdgeSize> class grid_zmb : private grid_smb<dim,blockEdgeSize,indexT>
{ {
public: public:
...@@ -30,34 +30,34 @@ public: ...@@ -30,34 +30,34 @@ public:
grid_zmb() {} grid_zmb() {}
__host__ __device__ grid_zmb(const size_t (& sz)[dim]) __host__ __device__ grid_zmb(const size_t (& sz)[dim])
:grid_smb<dim,blockEdgeSize>(sz) :grid_smb<dim,blockEdgeSize,indexT>(sz)
{} {}
__host__ __device__ grid_zmb(const size_t domainBlockEdgeSize) __host__ __device__ grid_zmb(const size_t domainBlockEdgeSize)
:grid_smb<dim,blockEdgeSize>(domainBlockEdgeSize) :grid_smb<dim,blockEdgeSize,indexT>(domainBlockEdgeSize)
{} {}
template<typename T> template<typename T>
__host__ __device__ grid_zmb(const grid_sm<dim, T> blockGrid) __host__ __device__ grid_zmb(const grid_sm<dim, T> blockGrid)
:grid_smb<dim,blockEdgeSize>(blockGrid) :grid_smb<dim,blockEdgeSize,indexT>(blockGrid)
{} {}
#ifdef __NVCC__ #ifdef __NVCC__
//Constructors from dim3 and uint3 objects //Constructors from dim3 and uint3 objects
__host__ __device__ grid_zmb(const dim3 blockDimensions) __host__ __device__ grid_zmb(const dim3 blockDimensions)
:grid_smb<dim,blockEdgeSize>(blockDimensions) :grid_smb<dim,blockEdgeSize,indexT>(blockDimensions)
{} {}
#endif // __NVCC__ #endif // __NVCC__
__host__ __device__ grid_zmb(const grid_zmb<dim, blockEdgeSize, indexT> &other) __host__ __device__ grid_zmb(const grid_zmb<dim, blockEdgeSize, indexT> &other)
:grid_smb<dim,blockEdgeSize>(other) :grid_smb<dim,blockEdgeSize,indexT>(other)
{} {}
__host__ __device__ grid_zmb &operator=(const grid_zmb<dim, blockEdgeSize, indexT> &other) __host__ __device__ grid_zmb &operator=(const grid_zmb<dim, blockEdgeSize, indexT> &other)
{ {
((grid_smb<dim,blockEdgeSize> *)this)->operator=(other); ((grid_smb<dim,blockEdgeSize,indexT> *)this)->operator=(other);
return *this; return *this;
} }
...@@ -115,7 +115,7 @@ public: ...@@ -115,7 +115,7 @@ public:
__host__ __device__ const indexT (& getSize() const)[dim] __host__ __device__ const indexT (& getSize() const)[dim]
{ {
return grid_smb<dim,blockEdgeSize>::getSize(); return grid_smb<dim,blockEdgeSize,indexT>::getSize();
} }
...@@ -123,12 +123,12 @@ public: ...@@ -123,12 +123,12 @@ public:
template<typename indexT_> template<typename indexT_>
inline __host__ __device__ grid_key_dx<dim,indexT> getGlobalCoord(const grid_key_dx<dim, indexT_> & blockCoord, unsigned int offset) const inline __host__ __device__ grid_key_dx<dim,indexT> getGlobalCoord(const grid_key_dx<dim, indexT_> & blockCoord, unsigned int offset) const
{ {
return grid_smb<dim,blockEdgeSize>::getGlobalCoord(blockCoord,offset); return grid_smb<dim,blockEdgeSize,indexT>::getGlobalCoord(blockCoord,offset);
} }
inline indexT getBlockSize() const inline indexT getBlockSize() const
{ {
return grid_smb<dim,blockEdgeSize>::getBlockSize(); return grid_smb<dim,blockEdgeSize,indexT>::getBlockSize();
} }
}; };
......
...@@ -602,6 +602,9 @@ private: ...@@ -602,6 +602,9 @@ private:
//! Memory to remove copy finalize //! Memory to remove copy finalize
ExtPreAlloc<CudaMemory> * prAlloc_prp; ExtPreAlloc<CudaMemory> * prAlloc_prp;
//! shifts for chunk conversion
openfpm::vector_gpu<aggregate<int[dim]>> shifts;
bool findNN = false; bool findNN = false;
inline void swap_internal_remote() inline void swap_internal_remote()
...@@ -1099,7 +1102,7 @@ private: ...@@ -1099,7 +1102,7 @@ private:
auto & o_map = this->getSegmentToOutMap(); auto & o_map = this->getSegmentToOutMap();
auto & segments_data = this->getSegmentToMergeIndexMap(); auto & segments_data = this->getSegmentToMergeIndexMap();
new_map.resize(a_map.size()); new_map.resize(a_map.size(),0);
// construct new to old map // construct new to old map
...@@ -1386,7 +1389,7 @@ private: ...@@ -1386,7 +1389,7 @@ private:
if (n_cnk != 0) if (n_cnk != 0)
{ {
openfpm::vector_gpu<aggregate<int[dim]>> shifts; shifts.clear();
int n_shift = 1; int n_shift = 1;
shifts.add(); shifts.add();
...@@ -3152,6 +3155,99 @@ public: ...@@ -3152,6 +3155,99 @@ public:
grid_src.copySect.add(sgs); grid_src.copySect.add(sgs);
} }
/*! \brief Stub does not do anything
*
*/
template<typename pointers_type,
typename headers_type,
typename result_type,
unsigned int ... prp >
static void unpack_headers(pointers_type & pointers, headers_type & headers, result_type & result, int n_slot)
{
// we have to increment ps by the right amount
sparsegridgpu_pack_request<AggregateT,prp ...> spq;
boost::mpl::for_each_ref<boost::mpl::range_c<int,0,sizeof...(prp)>>(spq);
result.allocate(sizeof(int));
CUDA_LAUNCH_DIM3((SparseGridGpuKernels::unpack_headers<decltype(std::declval<self>().toKernel())>),1,pointers.size(),
pointers.toKernel(),
headers.toKernel(),
(int *)result.getDevicePointer(),
spq.point_size,
n_slot)
}
/*! \brief unpack the sub-grid object
*
* \tparam prp properties to unpack
*
* \param mem preallocated memory from where to unpack the object
* \param sub sub-grid iterator
* \param obj object where to unpack
*
*/
template<unsigned int ... prp, typename S2, typename header_type>
void unpack_with_headers(ExtPreAlloc<S2> & mem,
SparseGridGpu_iterator_sub<dim,self> & sub_it,
header_type & headers,
int ih,
Unpack_stat & ps,
mgpu::ofp_context_t &context,
rem_copy_opt opt = rem_copy_opt::NONE_OPT)
{
////////////////////////////////////////////////////////////
if ((opt & rem_copy_opt::KEEP_GEOMETRY) == false)
{
this->template addAndConvertPackedChunkToTmp<prp ...>(mem,sub_it,ps,context);
// readjust mem
}
else
{
// we have to increment ps by the right amount
sparsegridgpu_pack_request<AggregateT,prp ...> spq;
boost::mpl::for_each_ref<boost::mpl::range_c<int,0,sizeof...(prp)>>(spq);
// First get the number of chunks
size_t n_cnk = headers.template get<1>(ih);
ps.addOffset(sizeof(size_t));
ps.addOffset(2*dim*sizeof(unsigned int));
size_t actual_offset = n_cnk*sizeof(indexT);
unsigned int * scan = (unsigned int *)((unsigned char *)mem.getDevicePointer() + ps.getOffset() + n_cnk*sizeof(indexT));
// Unpack number of points
// calculate the number of total points
size_t n_pnt = headers.template get<2>(ih);
actual_offset += align_number(sizeof(indexT),(n_cnk+1)*sizeof(unsigned int));
void * data_base_ptr = (void *)((unsigned char *)mem.getDevicePointer() + ps.getOffset() + actual_offset );
actual_offset += align_number(sizeof(indexT),n_pnt*(spq.point_size));
short int * offsets = (short int *)((unsigned char *)mem.getDevicePointer() + ps.getOffset() + actual_offset);
actual_offset += align_number(sizeof(indexT),n_pnt*sizeof(short));
actual_offset += align_number(sizeof(indexT),n_pnt*sizeof(unsigned char));
scan_ptrs_cp.add(scan);
offset_ptrs_cp.add(offsets);
data_base_ptr_cp.add(data_base_ptr);
ps.addOffset(actual_offset);
}
}
/*! \brief Indicate that unpacking the header is supported
*
* \return true
*
*/
static bool is_unpack_header_supported()
{return true;}
/*! \brief unpack the sub-grid object /*! \brief unpack the sub-grid object
* *
* \tparam prp properties to unpack * \tparam prp properties to unpack
...@@ -3191,7 +3287,7 @@ public: ...@@ -3191,7 +3287,7 @@ public:
Unpacker<size_t,S2>::unpack(mem,n_cnk,ps); Unpacker<size_t,S2>::unpack(mem,n_cnk,ps);
// Unpack origin of the chunk indexing // Unpack origin of the chunk indexing
for (int i = 0 ; i < dim ; i++) /* for (int i = 0 ; i < dim ; i++)
{ {
int tmp; int tmp;
Unpacker<int,S2>::unpack(mem,tmp,ps); Unpacker<int,S2>::unpack(mem,tmp,ps);
...@@ -3201,7 +3297,9 @@ public: ...@@ -3201,7 +3297,9 @@ public:
{ {
int tmp; int tmp;
Unpacker<int,S2>::unpack(mem,tmp,ps); Unpacker<int,S2>::unpack(mem,tmp,ps);
} }*/
ps.addOffset(2*dim*sizeof(unsigned int));
size_t actual_offset = n_cnk*sizeof(indexT); size_t actual_offset = n_cnk*sizeof(indexT);
unsigned int * scan = (unsigned int *)((unsigned char *)mem.getDevicePointer() + ps.getOffset() + n_cnk*sizeof(indexT)); unsigned int * scan = (unsigned int *)((unsigned char *)mem.getDevicePointer() + ps.getOffset() + n_cnk*sizeof(indexT));
...@@ -3465,7 +3563,7 @@ public: ...@@ -3465,7 +3563,7 @@ public:
template<unsigned int dim, template<unsigned int dim,
typename AggregateT, typename AggregateT,
unsigned int blockEdgeSize = default_edge<dim>::type::value, unsigned int blockEdgeSize = default_edge<dim>::type::value,
unsigned int threadBlockSize = 128, unsigned int threadBlockSize = default_edge<dim>::tb::value,
typename indexT=long int, typename indexT=long int,
template<typename> class layout_base=memory_traits_inte, template<typename> class layout_base=memory_traits_inte,
typename linearizer = grid_zmb<dim, blockEdgeSize,indexT>> typename linearizer = grid_zmb<dim, blockEdgeSize,indexT>>
...@@ -3474,11 +3572,19 @@ using SparseGridGpu_z = SparseGridGpu<dim,AggregateT,blockEdgeSize,threadBlockSi ...@@ -3474,11 +3572,19 @@ using SparseGridGpu_z = SparseGridGpu<dim,AggregateT,blockEdgeSize,threadBlockSi
template<unsigned int dim, template<unsigned int dim,
typename AggregateT, typename AggregateT,
unsigned int blockEdgeSize = default_edge<dim>::type::value, unsigned int blockEdgeSize = default_edge<dim>::type::value,
unsigned int threadBlockSize = 128, unsigned int threadBlockSize = default_edge<dim>::tb::value,
typename indexT=int, typename indexT=int,
template<typename> class layout_base=memory_traits_inte, template<typename> class layout_base=memory_traits_inte,
typename linearizer = grid_zmb<dim, blockEdgeSize,indexT>> typename linearizer = grid_zmb<dim, blockEdgeSize,indexT>>
using SparseGridGpu_zi = SparseGridGpu<dim,AggregateT,blockEdgeSize,threadBlockSize,indexT,layout_base,linearizer>; using SparseGridGpu_zi = SparseGridGpu<dim,AggregateT,blockEdgeSize,threadBlockSize,indexT,layout_base,linearizer>;
template<unsigned int dim,
typename AggregateT,
unsigned int blockEdgeSize = default_edge<dim>::type::value,
unsigned int threadBlockSize = default_edge<dim>::tb::value,
typename indexT=int,
template<typename> class layout_base=memory_traits_inte,
typename linearizer = grid_smb<dim, blockEdgeSize,indexT>>
using SparseGridGpu_i = SparseGridGpu<dim,AggregateT,blockEdgeSize,threadBlockSize,indexT,layout_base,linearizer>;
#endif //OPENFPM_PDATA_SPARSEGRIDGPU_HPP #endif //OPENFPM_PDATA_SPARSEGRIDGPU_HPP
...@@ -81,6 +81,44 @@ public: ...@@ -81,6 +81,44 @@ public:
background(bck) background(bck)
{} {}
/*! \brief
*
* \param
*
*/
template<typename headers_type>
__device__ static int unpack_headers(headers_type & headers, unsigned char * data, int ih, int sz_pack)
{
size_t n_cnk;
if (sizeof(indexT) == 8)
{n_cnk = ((size_t *)data)[0];}
else
{
unsigned int dp1 = ((unsigned int *)data)[0];
unsigned int dp2 = ((unsigned int *)&data[4])[0];
n_cnk = (size_t)dp1 + ((size_t)dp2 << 32);
}
headers.template get<1>(ih) = n_cnk;
// for (int i = 0 ; i < dim ; i++)
// {headers.template get<3>(ih)[i] = data[2*sizeof(size_t) + i*sizeof(int)];}
// for (int i = 0 ; i < dim ; i++)
// {headers.template get<4>(ih)[i] = data[2*sizeof(size_t) + dim*sizeof(int) + i*sizeof(int)];}
size_t actual_offset = n_cnk*sizeof(indexT);
unsigned int n_pnt = *(unsigned int *)&(data[sizeof(size_t) + 2*dim*sizeof(int) + actual_offset + n_cnk*sizeof(unsigned int)]);
headers.template get<2>(ih) = n_pnt;
return sizeof(size_t) + // byte required to pack the number of chunk packed
2*dim*sizeof(int) + // starting point + size of the indexing packing
sizeof(indexT)*n_cnk + // byte required to pack the chunk indexes
align_number_device(sizeof(indexT),(n_cnk+1)*sizeof(unsigned int)) + // byte required to pack the scan of the chunk point
align_number_device(sizeof(indexT),n_pnt*sz_pack) + // byte required to pack data
align_number_device(sizeof(indexT),n_pnt*sizeof(short int)) + // byte required to pack offsets
align_number_device(sizeof(indexT),n_pnt*sizeof(unsigned char)); // byte required to pack masks;
}
/*! \brief Get the coordinate of the block and the offset id inside the block it give the global coordinate /*! \brief Get the coordinate of the block and the offset id inside the block it give the global coordinate
* *
* \param blockCoord block coordinate * \param blockCoord block coordinate
...@@ -299,6 +337,8 @@ public: ...@@ -299,6 +337,8 @@ public:
unsigned int coord[dim]; unsigned int coord[dim];
linToCoordWithOffset<blockEdgeSize>(pos, stencilSupportRadius, coord); linToCoordWithOffset<blockEdgeSize>(pos, stencilSupportRadius, coord);
const unsigned int linId = coordToLin<blockEdgeSize>(coord, stencilSupportRadius); const unsigned int linId = coordToLin<blockEdgeSize>(coord, stencilSupportRadius);
// const unsigned int linId = shift_position<dim,blockEdgeSize>::shift(pos,stencilSupportRadius);
return linId; return linId;
} }
...@@ -316,6 +356,8 @@ public: ...@@ -316,6 +356,8 @@ public:
unsigned int coord[dim]; unsigned int coord[dim];
linToCoordWithOffset<blockEdgeSize>(offset, stencilSupportRadius, coord); linToCoordWithOffset<blockEdgeSize>(offset, stencilSupportRadius, coord);
return coordToLin<blockEdgeSize>(coord, stencilSupportRadius); return coordToLin<blockEdgeSize>(coord, stencilSupportRadius);
// return shift_position<dim,blockEdgeSize>::shift(offset,stencilSupportRadius);
} }
template<typename Coordtype> template<typename Coordtype>
...@@ -714,6 +756,9 @@ private: ...@@ -714,6 +756,9 @@ private:
unsigned int coord[dim]; unsigned int coord[dim];
linToCoordWithOffset<blockEdgeSize>(pos, stencilSupportRadius, coord); linToCoordWithOffset<blockEdgeSize>(pos, stencilSupportRadius, coord);