Commit 6ec119f4 authored by incardon's avatar incardon

Amr adding links constructions

parents dd63c9d2 bad52672
......@@ -8,6 +8,12 @@
#ifndef OPENFPM_DATA_SRC_GRID_GRID_BASE_IMPL_LAYOUT_HPP_
#define OPENFPM_DATA_SRC_GRID_GRID_BASE_IMPL_LAYOUT_HPP_
#include <boost/fusion/include/mpl.hpp>
#include "memory_ly/memory_conf.hpp"
#include <boost/fusion/include/for_each.hpp>
#include "Grid/Encap.hpp"
#include "Space/Shape/Box.hpp"
/*! \brief this class is a functor for "for_each" algorithm
*
* This class is a functor for "for_each" algorithm. For each
......
......@@ -24,53 +24,53 @@
* \snippet grid_unit_tests.hpp Access to an N-dimensional grid with an iterator
*
*/
template<unsigned int dim, typename T, typename S>
class grid_cpu<dim,T,S,typename memory_traits_inte<T>::type> : public grid_base_impl<dim,T,S,typename memory_traits_inte<T>::type, memory_traits_inte>
{
//! grid layout
typedef typename memory_traits_inte<T>::type layout;
public:
//! Object container for T, it is the return type of get_o it return a object type trough
// you can access all the properties of T
typedef typename grid_base_impl<dim,T,S,typename memory_traits_inte<T>::type, memory_traits_inte>::container container;
//! Default constructor
inline grid_cpu() THROW
:grid_base_impl<dim,T,S,layout,memory_traits_inte>()
{
}
/*! \brief create a grid from another grid
*
* \param g the grid to copy
*
*/
inline grid_cpu(const grid_cpu & g) THROW
:grid_base_impl<dim,T,S,layout,memory_traits_inte>(g)
{
}
/*! \brief create a grid of size sz on each direction
*
* \param sz grid size in each direction
*
*/
inline grid_cpu(const size_t & sz) THROW
:grid_base_impl<dim,T,S,layout,memory_traits_inte>(sz)
{
}
//! Constructor allocate memory and give them a representation
inline grid_cpu(const size_t (& sz)[dim]) THROW
:grid_base_impl<dim,T,S,layout,memory_traits_inte>(sz)
{
}
};
//! short formula for a grid on gpu
template <unsigned int dim, typename T> using grid_gpu = grid_cpu<dim,T,CudaMemory,typename memory_traits_inte<T>::type>;
//template<unsigned int dim, typename T, typename S>
//class grid_cpu<dim,T,S,typename memory_traits_inte<T>::type> : public grid_base_impl<dim,T,S,typename memory_traits_inte<T>::type, memory_traits_inte>
//{
// //! grid layout
// typedef typename memory_traits_inte<T>::type layout;
//
//public:
//
// //! Object container for T, it is the return type of get_o it return a object type trough
// // you can access all the properties of T
// typedef typename grid_base_impl<dim,T,S,typename memory_traits_inte<T>::type, memory_traits_inte>::container container;
//
// //! Default constructor
// inline grid_cpu() THROW
// :grid_base_impl<dim,T,S,layout,memory_traits_inte>()
// {
// }
//
// /*! \brief create a grid from another grid
// *
// * \param g the grid to copy
// *
// */
// inline grid_cpu(const grid_cpu & g) THROW
// :grid_base_impl<dim,T,S,layout,memory_traits_inte>(g)
// {
// }
//
// /*! \brief create a grid of size sz on each direction
// *
// * \param sz grid size in each direction
// *
// */
// inline grid_cpu(const size_t & sz) THROW
// :grid_base_impl<dim,T,S,layout,memory_traits_inte>(sz)
// {
// }
//
// //! Constructor allocate memory and give them a representation
// inline grid_cpu(const size_t (& sz)[dim]) THROW
// :grid_base_impl<dim,T,S,layout,memory_traits_inte>(sz)
// {
// }
//};
//
////! short formula for a grid on gpu
//template <unsigned int dim, typename T> using grid_gpu = grid_cpu<dim,T,CudaMemory,typename memory_traits_inte<T>::type>;
#endif /* OPENFPM_DATA_SRC_GRID_GRID_GPU_HPP_ */
......@@ -52,6 +52,7 @@
#include "Packer_Unpacker/has_pack_agg.hpp"
#include "cuda/cuda_grid_gpu_funcs.cuh"
#include "grid_base_implementation.hpp"
#include "util/for_each_ref.hpp"
#ifndef CUDA_GPU
typedef HeapMemory CudaMemory;
......
//
// Created by tommaso on 27/06/19.
//
#ifndef OPENFPM_PDATA_BLOCKCACHE_CUH
#define OPENFPM_PDATA_BLOCKCACHE_CUH
#include "util/cuda_util.hpp"
namespace BlockCacheUtils
{
template <typename T>
inline __device__ __host__ unsigned int SetToZeroIfFalse<true, T>(T value)
{
return value;
};
template <typename T>
inline __device__ __host__ unsigned int SetToZeroIfFalse<false, T>(T value)
{
return 0;
};
}
/**
* BlockCache is an abstraction built on the concept of loading a block into shared
* memory before using it in a stencil operation.
* The idea is to provide a way to transparently address shared and global memory via coordinates,
* caching the block data into shared memory but also allowing addressing non-cached data directly in global.
*/
template <typename SparseGridT, unsigned int chunksPerBlock, bool loadGhostInSharedMemory, unsigned int ... props>
struct BlockCache
{
static void
};
#endif //OPENFPM_PDATA_BLOCKCACHE_CUH
......@@ -167,6 +167,23 @@ public:
return blockMap;
}
inline __device__ void get_sparse(unsigned int linId, unsigned int & dataBlockPos , unsigned int & offset) const
{
#ifdef __NVCC__
typedef BlockTypeOf<AggregateBlockT, pMask> BlockT;
unsigned int blockId = linId / BlockT::size;
offset = linId % BlockT::size;
const auto sid = blockMap.get_sparse(blockId);
dataBlockPos = sid.id;
#else // __NVCC__
std::cout << __FILE__ << ":" << __LINE__ << " error: you are supposed to compile this file with nvcc, if you want to use it with gpu" << std::endl;
#endif // __NVCC__
}
inline static __device__ unsigned int getBlockId(unsigned int linId)
{
#ifdef __NVCC__
......@@ -241,14 +258,6 @@ public:
inline __device__ void remove(unsigned int blockId, unsigned int offset)
{
#ifdef __NVCC__
// const auto & aggregate = blockMap.get(blockId);
// const auto & block = aggregate.template get<p>();
// const auto & mask = aggregate.template get<pMask>();
// // Now check if the element actually exists
// return exist(mask[offset])
// ? block[offset]
// : blockMap.template getBackground<p>()[offset];
//// return blockMap.template get<p>(blockId)[offset];
const auto sid = blockMap.get_sparse(blockId);
blockMap.template get<pMask>(sid)[offset] = 0;
......@@ -258,6 +267,26 @@ public:
#endif // __NVCC__
}
/*! \brief Return the index buffer for the sparse vector
*
*
*
*/
inline __device__ auto getIndexBuffer() -> decltype(blockMap.getIndexBuffer())
{
return blockMap.getIndexBuffer();
}
/*! \brief Return the data buffer for the sparse vector
*
*
*
*/
inline __device__ auto getDataBuffer() -> decltype(blockMap.getDataBuffer())
{
return blockMap.getDataBuffer();
}
#ifdef SE_CLASS1
/*! \brief Check if the device pointer is owned by this structure
......@@ -299,6 +328,7 @@ inline __device__ auto BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>
#endif // __NVCC__
}
template<typename AggregateBlockT, typename indexT, template<typename> class layout_base>
template<unsigned int p>
inline __device__ auto BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>
......
......@@ -571,6 +571,17 @@ private:
//! the set of all sub-set to pack
mutable openfpm::vector_gpu<Box<dim,int>> pack_subs;
//! links of the padding points with real points of a coarse sparsegrid
openfpm::vector_gpu<aggregate<size_t>> links_up;
//! scan offsets of the links down
openfpm::vector_gpu<aggregate<unsigned int>> link_dw_scan;
//! links of the padding points with real points of a finer sparsegrid
openfpm::vector_gpu<aggregate<int,short int>> link_dw;
bool findNN = false;
protected:
static constexpr unsigned int blockSize = BlockTypeOf<AggregateBlockT, 0>::size;
typedef AggregateBlockT AggregateInternalT;
......@@ -640,6 +651,8 @@ public:
{
BlockMapGpu<AggregateInternalT, threadBlockSize, indexT, layout_base>
::template flush<v_reduce ...>(context, opt);
findNN = false;
}
private:
......@@ -1251,6 +1264,191 @@ public:
return BlockMapGpu<AggregateInternalT, threadBlockSize, indexT, layout_base>::template insert<p>(gridGeometry.LinId(coord));
}
/*! \brief construct link between levels
*
* \praram grid_up grid level up
* \param grid_dw grid level down
*
*/
void construct_link(self & grid_up, self & grid_dw, mgpu::ofp_context_t &context)
{
/* // Here it is crucial to use "auto &" as the type, as we need to be sure to pass the reference to the actual buffers!
auto & indexBuffer = BlockMapGpu<AggregateInternalT, threadBlockSize, indexT, layout_base>::blockMap.getIndexBuffer();
auto & dataBuffer = BlockMapGpu<AggregateInternalT, threadBlockSize, indexT, layout_base>::blockMap.getDataBuffer();
ite_gpu<1> ite;
ite.wthr.x = indexBuffer.size();
ite.wthr.y = 1;
ite.wthr.z = 1;
ite.thr.x = getBlockSize();
ite.thr.y = 1;
ite.thr.z = 1;
openfpm::vector_gpu<aggregate<unsigned int>> output;
output.resize(indexBuffer.size() + 1);
CUDA_LAUNCH((SparseGridGpuKernels::link_construct<dim,
BlockMapGpu<AggregateInternalT, threadBlockSize, indexT, layout_base>::pMask,
blockSize>),ite,grid_up.toKernel(),this->toKernel(),output.toKernel());
openfpm::scan((unsigned int *)output.template getDeviceBuffer<0>(),output.size(),(unsigned int *)output.template getDeviceBuffer<0>(),context);
output.template deviceToHost<0>(output.size()-1,output.size()-1);
unsigned int np_lup = output.template get<0>(output.size()-1);
links_up.resize(np_lup);
CUDA_LAUNCH((SparseGridGpuKernels::link_construct_insert<dim,
BlockMapGpu<AggregateInternalT, threadBlockSize, indexT, layout_base>::pMask,
blockSize>),ite,grid_up.toKernel(),this->toKernel(),output.toKernel(),links_up.toKernel());
*/
}
/*! \brief Get the offsets for each point of the links down
*
* \return the offsets of the links down
*
*/
openfpm::vector_gpu<aggregate<unsigned int>> & getDownLinksOffsets()
{
return link_dw_scan;
}
/*! \brief Get the links down for each point
*
* \return the links dow for each point
*
*/
openfpm::vector_gpu<aggregate<int,short int>> & getDownLinks()
{
return link_dw;
}
/*! \brief construct link on the down level
*
* \param grid_dw grid level down
*
*/
void construct_link_dw(self & grid_dw, Point<dim,int> p_dw, mgpu::ofp_context_t &context)
{
// Here it is crucial to use "auto &" as the type, as we need to be sure to pass the reference to the actual buffers!
auto & indexBuffer = BlockMapGpu<AggregateInternalT, threadBlockSize, indexT, layout_base>::blockMap.getIndexBuffer();
auto & dataBuffer = BlockMapGpu<AggregateInternalT, threadBlockSize, indexT, layout_base>::blockMap.getDataBuffer();
// Count padding points
// First we count the padding points
ite_gpu<1> ite;
ite.wthr.x = indexBuffer.size();
ite.wthr.y = 1;
ite.wthr.z = 1;
ite.thr.x = getBlockSize();
ite.thr.y = 1;
ite.thr.z = 1;
openfpm::vector_gpu<aggregate<unsigned int>> output;
output.resize(indexBuffer.size()+1);
output.fill<0>(0);
CUDA_LAUNCH((SparseGridGpuKernels::count_paddings<dim,
BlockMapGpu<AggregateInternalT, threadBlockSize, indexT, layout_base>::pMask,
blockSize>),ite,this->toKernel(),output.toKernel());
openfpm::scan((unsigned int *)output.template getDeviceBuffer<0>(),output.size(),(unsigned int *)output.template getDeviceBuffer<0>(),context);
output.template deviceToHost<0>(output.size()-1,output.size()-1);
unsigned int padding_points = output.template get<0>(output.size()-1);
// get the padding points
openfpm::vector_gpu<aggregate<unsigned int,short int>> pd_points;
pd_points.resize(padding_points);
CUDA_LAUNCH((SparseGridGpuKernels::collect_paddings<BlockMapGpu<AggregateInternalT, threadBlockSize, indexT, layout_base>::pMask>),ite,this->toKernel(),output.toKernel(),pd_points.toKernel());
// Count number of link down for padding points
// Calculate ghost
link_dw_scan.resize(padding_points+1);
link_dw_scan.fill<0>(0);
ite = link_dw_scan.getGPUIterator();
CUDA_LAUNCH((SparseGridGpuKernels::link_construct_dw_count<dim,
BlockMapGpu<AggregateInternalT, threadBlockSize, indexT, layout_base>::pMask,
blockSize>),
ite,pd_points.toKernel(),grid_dw.toKernel(),this->toKernel(),link_dw_scan.toKernel(),p_dw);
openfpm::scan((unsigned int *)link_dw_scan.template getDeviceBuffer<0>(),link_dw_scan.size(),(unsigned int *)link_dw_scan.template getDeviceBuffer<0>(),context);
link_dw_scan.template deviceToHost<0>(link_dw_scan.size()-1,link_dw_scan.size()-1);
size_t np_ldw = link_dw_scan.template get<0>(link_dw_scan.size()-1);
link_dw.resize(np_ldw);
CUDA_LAUNCH((SparseGridGpuKernels::link_construct_insert_dw<dim,
BlockMapGpu<AggregateInternalT, threadBlockSize, indexT, layout_base>::pMask,
blockSize>),ite,pd_points.toKernel(),grid_dw.toKernel(),this->toKernel(),link_dw_scan.toKernel(),link_dw.toKernel(),p_dw);
link_dw_scan.resize(link_dw_scan.size()-1);
}
/*! \brief construct link on the up levels
*
* \praram grid_up grid level up
*
*/
void construct_link_up(self & grid_up, mgpu::ofp_context_t &context)
{
/* // Here it is crucial to use "auto &" as the type, as we need to be sure to pass the reference to the actual buffers!
auto & indexBuffer = BlockMapGpu<AggregateInternalT, threadBlockSize, indexT, layout_base>::blockMap.getIndexBuffer();
auto & dataBuffer = BlockMapGpu<AggregateInternalT, threadBlockSize, indexT, layout_base>::blockMap.getDataBuffer();
ite_gpu<1> ite;
ite.wthr.x = indexBuffer.size();
ite.wthr.y = 1;
ite.wthr.z = 1;
ite.thr.x = getBlockSize();
ite.thr.y = 1;
ite.thr.z = 1;
openfpm::vector_gpu<aggregate<unsigned int>> output;
output.resize(indexBuffer.size() + 1);
CUDA_LAUNCH((SparseGridGpuKernels::link_construct<dim,
BlockMapGpu<AggregateInternalT, threadBlockSize, indexT, layout_base>::pMask,
blockSize>),ite,grid_up.toKernel(),this->toKernel(),output.toKernel());
openfpm::scan((unsigned int *)output.template getDeviceBuffer<0>(),output.size(),(unsigned int *)output.template getDeviceBuffer<0>(),context);
output.template deviceToHost<0>(output.size()-1,output.size()-1);
unsigned int np_lup = output.template get<0>(output.size()-1);
links_up.resize(np_lup);
CUDA_LAUNCH((SparseGridGpuKernels::link_construct_insert<dim,
BlockMapGpu<AggregateInternalT, threadBlockSize, indexT, layout_base>::pMask,
blockSize>),ite,grid_up.toKernel(),this->toKernel(),output.toKernel(),links_up.toKernel());
*/
}
/*! \Brief Before insert any element you have to call this function to initialize the insert buffer
*
* \param nBlock number of blocks the insert buffer has
......@@ -1288,6 +1486,10 @@ public:
unsigned int numScalars = indexBuffer.size() * dataChunkSize;
if (numScalars == 0) return;
if (findNN == false)
{
findNeighbours<stencil_type>();
}
// NOTE: Here we want to work only on one data chunk per block!
......@@ -1397,6 +1599,8 @@ public:
CUDA_LAUNCH_DIM3((SparseGridGpuKernels::findNeighbours<dim,NNtype>),
threadGridSize, localThreadBlockSize,indexBuffer.toKernel(), this->toKernel(),nn_blocks.toKernel());
findNN = true;
}
size_t countExistingElements() const
......
......@@ -348,6 +348,15 @@ public:
return BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>::template get<p>(grid.LinId(coord));
}
// Data management methods
template<typename CoordT>
inline __device__ void
get_sparse(const grid_key_dx<dim, CoordT> & coord, unsigned int & dataBlockPos, unsigned int & offset) const
{
return BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>::get_sparse(grid.LinId(coord),dataBlockPos,offset);
}
/*! \brief Access the grid point
*
* \param coord point
......
......@@ -91,6 +91,186 @@ namespace SparseGridGpuKernels
sparseGrid.template storeBlock<pMask>(dataBlock, enlargedBlock);
}
/*! \brief construct the link between 2 sparse grid
*
*
*/
template<unsigned int dim, unsigned int pMask, unsigned int chunk_size , typename SparseGridType, typename outputType>
__global__ void link_construct(SparseGridType grid_up, SparseGridType grid_cu, outputType out)
{
const unsigned int dataBlockPos = blockIdx.x;
const unsigned int offset = threadIdx.x;
auto & indexBuffer = grid_cu.getIndexBuffer();
auto & dataBuffer = grid_cu.getDataBuffer();
// if the point is a padding
if (dataBuffer.template get <pMask>(dataBlockPos)[offset] & 0x2)
{
auto id = indexBuffer.template get<0>(dataBlockPos);
grid_key_dx<dim,int> pos = grid_cu.getCoord(id*chunk_size + offset);
printf("HERE %d %d \n",pos.get(0),pos.get(1));
for (int i = 0 ; i < dim ; i++)
{pos.set_d(i,pos.get(i) / 2);}
if (grid_up.template get<pMask>(pos) == 0x1)
{
atomicAdd(&out.template get<0>(dataBlockPos),1);
}
}
}
/*! \brief count the padding particles
*
*
*/
template<unsigned int dim, unsigned int pMask, unsigned int chunk_size , typename SparseGridType, typename outputType>
__global__ void count_paddings(SparseGridType grid_cu, outputType out)
{
const unsigned int dataBlockPos = blockIdx.x;
const unsigned int offset = threadIdx.x;
auto & indexBuffer = grid_cu.getIndexBuffer();
auto & dataBuffer = grid_cu.getDataBuffer();
// if the point is a padding
if (dataBuffer.template get <pMask>(dataBlockPos)[offset] & 0x2)
{
atomicAdd(&out.template get<0>(dataBlockPos),1);
}
}
/*! \brief count the padding particles
*
*
*/
template<unsigned int pMask, typename SparseGridType, typename ScanType, typename outputType>
__global__ void collect_paddings(SparseGridType grid_cu, ScanType stp, outputType out)
{
const unsigned int dataBlockPos = blockIdx.x;
const unsigned int offset = threadIdx.x;
__shared__ int counter;
counter = 0;
__syncthreads();
auto & indexBuffer = grid_cu.getIndexBuffer();
auto & dataBuffer = grid_cu.getDataBuffer();
int pad_offset = stp.template get<0>(dataBlockPos);
// if the point is a padding
if (dataBuffer.template get <pMask>(dataBlockPos)[offset] & 0x2)
{
int cnt = atomicAdd(&counter,1);
out.template get<0>(pad_offset + cnt) = dataBlockPos;
out.template get<1>(pad_offset + cnt) = offset;
}
}
/*! \brief construct the link between 2 sparse grid
*
*
*/
template<unsigned int dim, unsigned int pMask, unsigned int chunk_size,
typename padPointType , typename SparseGridType,
typename outputType>
__global__ void link_construct_dw_count(padPointType padPoints, SparseGridType grid_dw, SparseGridType grid_cu, outputType out, Point<dim,int> p_dw)
{
const unsigned int p = blockIdx.x * blockDim.x + threadIdx.x;
if (p >= padPoints.size()) {return;}
const unsigned int dataBlockPos = padPoints.template get<0>(p);
const unsigned int offset = padPoints.template get<1>(p);
auto & indexBuffer = grid_cu.getIndexBuffer();
auto & dataBuffer = grid_cu.getDataBuffer();
auto id = indexBuffer.template get<0>(dataBlockPos);
grid_key_dx<dim,int> pos = grid_cu.getCoord(id*chunk_size + offset);
for (int i = 0 ; i < dim ; i++)
{pos.set_d(i,pos.get(i) * 2 + p_dw.get(i) );}
for (int j = 0 ; j < 2*dim ; j++)
{
grid_key_dx<dim,int> kc;
for (int k = 0 ; k < dim ; k++)
{
kc.set_d(k,pos.get(k) + ((j >> k) & 0x1) );
}
printf("COUNT p: %d dataBlockPos: %d offset: %d kc: %d %d pos: %d %d\n",p,dataBlockPos,offset,kc.get(0),kc.get(1),pos.get(0),pos.get(1));
if (grid_dw.template get<pMask>(kc) & 0x1)
{
printf("COUNT 2 %d %d \n",kc.get(0),kc.get(1));
int a = atomicAdd(&out.template get<0>(p),1);
}
}
}
/*! \brief construct the link between 2 sparse grid
*
*
*/
template<unsigned int dim, unsigned int pMask, unsigned int chunk_size,
typename padPointType , typename SparseGridType, typename scanType, typename outputType>
__global__ void link_construct_insert_dw(padPointType padPoints, SparseGridType grid_dw, SparseGridType grid_cu, scanType scan, outputType out, Point<dim,int> p_dw)
{
const unsigned int p = blockIdx.x * blockDim.x + threadIdx.x;
if (p >= padPoints.size()) {return;}
const unsigned int dataBlockPos = padPoints.template get<0>(p);
const unsigned int offset = padPoints.template get<1>(p);
auto & indexBuffer = grid_cu.getIndexBuffer();
auto & dataBuffer = grid_cu.getDataBuffer();
auto & dataBuffer_dw = grid_dw.getDataBuffer();
auto id = indexBuffer.template get<0>(dataBlockPos);
grid_key_dx<dim,int> pos = grid_cu.getCoord(id*chunk_size + offset);
for (int i = 0 ; i < dim ; i++)
{pos.set_d(i,pos.get(i) * 2 + p_dw.get(i) );}
unsigned int dataBlockPos_dw;
unsigned int offset_dw;
int link_offset = scan.template get<0>(p);
int c = 0;
for (int j = 0 ; j < 2*dim ; j++)
{
grid_key_dx<dim,int> kc;
for (int k = 0 ; k < dim ; k++)
{
kc.set_d(k,pos.get(k) + ((j >> k) & 0x1) );
}
grid_dw.get_sparse(kc,dataBlockPos_dw,offset_dw);
printf("HERE2 %d %d %d %d %d %d\n",pos.get(0),pos.get(1),kc.get(0),kc.get(1),dataBlockPos_dw,offset_dw);
if (dataBuffer_dw.template get<pMask>(dataBlockPos_dw)[offset_dw] & 0x1)
{
printf("ADD %d %d %d %d \n",offset_dw,p,link_offset,c);
out.template get<0>(link_offset + c) = dataBlockPos_dw;
out.template get<1>(link_offset + c) = offset_dw;
c++;
}
}
}
/*! \brief find the neighborhood of each chunk
*
* \param indexBuffer Chunk indec buffer
......
......@@ -8,6 +8,8 @@
#ifndef MAP_VECTOR_SPARSE_CUDA_KER_CUH_
#define MAP_VECTOR_SPARSE_CUDA_KER_CUH_
#include "util/for_each_ref.hpp"
//todo: Check where it's a good place to put the following method...
template<typename dim3Ta, typename dim3Tb>
inline __device__ __host__ int dim3CoordToInt(const dim3Ta & coord, const dim3Tb & dimensions)
......@@ -496,6 +498,33 @@ namespace openfpm
}
/*! \brief Get the data buffer
*
* \return the reference to the data buffer
*/
__device__ auto getDataBuffer() -> decltype(vct_data)&
{
return vct_data;
}
/*! \brief Get the indices buffer
*
* \return the reference to the indices buffer
*/
__device__ auto getIndexBuffer() const -> const decltype(vct_index)&
{
return vct_index;
}
/*! \brief Get the data buffer
*
* \return the reference to the data buffer
*/
__device__ auto getDataBuffer() const -> const decltype(vct_data)&
{
return vct_data;
}