Commit 4ddc3641 authored by incardon's avatar incardon

Fix inserBlock

parent 3f39e8df
......@@ -131,11 +131,11 @@ public:
#endif // __NVCC__
}
template<unsigned int nChunksPerBlocks = 1>
// template<unsigned int nChunksPerBlocks = 1>
inline __device__ auto insertBlockNew(indexT blockId, unsigned int stride = 8192) -> decltype(blockMap.insert(0))
{
int offset = threadIdx.x % stride;
__shared__ int mem[nChunksPerBlocks][encap_shmem<sizeof(blockMap.insert(0))>::nthr];
// int offset = threadIdx.x % stride;
__shared__ int mem/*[nChunksPerBlocks]*/[encap_shmem<sizeof(blockMap.insert(0))>::nthr];
#ifdef __NVCC__
if (threadIdx.x % stride == 0 && threadIdx.y == 0 && threadIdx.z == 0)
......@@ -143,12 +143,12 @@ public:
auto ec = blockMap.insert(blockId);
// copy to shared to broadcast on all thread
new (mem[offset]) decltype(ec)(ec.private_get_data(),ec.private_get_k());
new (mem/*[offset]*/) decltype(ec)(ec.private_get_data(),ec.private_get_k());
}
__syncthreads();
return *(decltype(blockMap.insert(0)) *)mem[offset];
return *(decltype(blockMap.insert(0)) *)mem/*[offset]*/;
#else // __NVCC__
std::cout << __FILE__ << ":" << __LINE__ << " error: you are supposed to compile this file with nvcc, if you want to use it with gpu" << std::endl;
#endif // __NVCC__
......
......@@ -53,7 +53,7 @@ __global__ void insertValuesBlocked(SparseGridType sparseGrid)
if (offset == 0) // Just one thread per data block
{
auto encap = sparseGrid.insertBlock(dataBlockId,BlockT::size);
auto encap = sparseGrid.insertBlock(dataBlockId);
blocks[dataBlockNum] = &(encap.template get<p>());
masks[dataBlockNum] = &(encap.template get<pMask>());
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment