Commit b7d41928 authored by incardon's avatar incardon

Several fixes for clang + CUDA compilation + setBackground

parent aad2641b
......@@ -23,7 +23,7 @@ struct setBackground_impl
{
aggrT_src & bck;
local_grids_type loc_grid;
local_grids_type & loc_grid;
inline setBackground_impl(aggrT_src & bck, local_grids_type & loc_grid)
:bck(bck),loc_grid(loc_grid)
......
......@@ -652,7 +652,9 @@ public:
this->gk.set_d(dim-1,1);
this->gk_stop.set_d(dim-1,0);
initialized = true;
#ifdef SE_CLASS1
this->initialized = true;
#endif
}
};
......
......@@ -440,7 +440,7 @@ public:
//! Indicate this structure has a function to check the device pointer
typedef int yes_has_check_device_pointer;
__device__ inline CellList_gpu_ker(openfpm::vector_gpu_ker<aggregate<cnt_type>,memory_traits_inte> starts,
__host__ __device__ inline CellList_gpu_ker(openfpm::vector_gpu_ker<aggregate<cnt_type>,memory_traits_inte> starts,
openfpm::vector_gpu_ker<aggregate<cnt_type>,memory_traits_inte> srt,
openfpm::vector_gpu_ker<aggregate<cnt_type>,memory_traits_inte> dprt,
openfpm::vector_gpu_ker<aggregate<int>,memory_traits_inte> rad_cells,
......@@ -606,7 +606,7 @@ public:
//! Indicate this structure has a function to check the device pointer
typedef int yes_has_check_device_pointer;
__device__ inline CellList_gpu_ker(openfpm::vector_gpu_ker<aggregate<cnt_type>,memory_traits_inte> cell_nn,
__host__ __device__ inline CellList_gpu_ker(openfpm::vector_gpu_ker<aggregate<cnt_type>,memory_traits_inte> cell_nn,
openfpm::vector_gpu_ker<aggregate<cnt_type,cnt_type>,memory_traits_inte> cell_nn_list,
openfpm::vector_sparse_gpu_ker<aggregate<cnt_type>,int,memory_traits_inte> cl_sparse,
openfpm::vector_gpu_ker<aggregate<cnt_type>,memory_traits_inte> srt,
......
......@@ -165,7 +165,7 @@ public:
// Initialize the blocks to background
auto & insertBuffer = blockMap.getGPUInsertBuffer();
typedef BlockTypeOf<AggregateInternalT, pMask> BlockType; // Here assuming that all block types in the aggregate have the same size!
constexpr unsigned int chunksPerBlock = threadBlockSize / BlockType::size; // Floor is good here...
constexpr unsigned int chunksPerBlock = 1; // Floor is good here...
BlockMapGpuKernels::initializeInsertBuffer<pMask, chunksPerBlock> <<< insertBuffer.size()/chunksPerBlock, chunksPerBlock*BlockType::size >>>(
insertBuffer.toKernel());
......@@ -186,8 +186,30 @@ public:
blockMap.template flush<v_reduce ... >(context, opt);
}
/*! \brief set the background for property p
*
* \tparam p property p
*
*/
template<unsigned int p>
void setBackgroundValue(ScalarTypeOf<AggregateBlockT, p> backgroundValue);
void setBackgroundValue(ScalarTypeOf<AggregateBlockT, p> backgroundValue)
{
// NOTE: Here we assume user only passes Blocks and not scalars in the templated aggregate type
typedef BlockTypeOf<AggregateInternalT, p> BlockT;
typedef BlockTypeOf<AggregateInternalT, pMask> BlockM;
BlockT bP;
BlockM bM;
for (unsigned int i = 0; i < BlockT::size; ++i)
{
bP[i] = backgroundValue;
bM[i] = 0;
}
blockMap.template setBackground<p>(bP);
blockMap.template setBackground<pMask>(bM);
}
template<typename BitMaskT>
inline static bool getBit(const BitMaskT &bitMask, unsigned char pos)
......@@ -280,26 +302,10 @@ void BlockMapGpu<AggregateBlockT, threadBlockSize, indexT, layout_base>::hostToD
blockMap.template hostToDevice<pMask>();
}
template<typename AggregateBlockT, unsigned int threadBlockSize, typename indexT, template<typename> class layout_base>
template<unsigned int p>
void BlockMapGpu<AggregateBlockT, threadBlockSize, indexT, layout_base>::setBackgroundValue(
ScalarTypeOf<AggregateBlockT, p> backgroundValue)
{
// NOTE: Here we assume user only passes Blocks and not scalars in the templated aggregate type
typedef BlockTypeOf<AggregateInternalT, p> BlockT;
typedef BlockTypeOf<AggregateInternalT, pMask> BlockM;
BlockT bP;
BlockM bM;
//template<typename AggregateBlockT, unsigned int threadBlockSize, typename indexT, template<typename> class layout_base>
//template<unsigned int p>
//void BlockMapGpu<AggregateBlockT, threadBlockSize, indexT, layout_base>::setBackgroundValue(
// ScalarTypeOf<AggregateBlockT, p> backgroundValue)
for (unsigned int i = 0; i < BlockT::size; ++i)
{
bP[i] = backgroundValue;
bM[i] = 0;
}
blockMap.template setBackground<p>(bP);
blockMap.template setBackground<pMask>(bM);
}
#endif /* BLOCK_MAP_GPU_HPP_ */
......@@ -79,6 +79,26 @@ public:
{
return data_id;
}
/*! \brief return toPoint() + p
*
* \param p the point p
*
* \return toPoint() + p
*
*/
inline grid_key_dx<SparseGridGpu_type::dims> operator+(const Point<SparseGridGpu_type::dims,size_t> & p)
{
grid_key_dx<SparseGridGpu_type::dims> ret;
Point<SparseGridGpu_type::dims,size_t> key = toPoint();
for (int i = 0 ; i < SparseGridGpu_type::dims ; i++)
{
ret.set_d(i,key.get(i) + p.get(i));
}
return ret;
}
};
template<unsigned int dim, typename SparseGridType>
......
......@@ -217,7 +217,7 @@ public:
data = it_sub.data;
sub_set = it_sub.sub_set;
res = it_sub.res;
in_chunk_it.reinitialize(it_sub.in_chunk_it);
in_chunk_it = it_sub.in_chunk_it;
chunk_sz = it_sub.chunk_sz;
return *this;
......
......@@ -10,7 +10,7 @@
//todo: Check where it's a good place to put the following method...
template<typename dim3Ta, typename dim3Tb>
inline __device__ __host__ int dim3CoordToInt(dim3Ta coord, dim3Tb dimensions)
inline __device__ __host__ int dim3CoordToInt(const dim3Ta & coord, const dim3Tb & dimensions)
{
int res = coord.z;
res *= dimensions.y;
......
......@@ -125,7 +125,7 @@ namespace openfpm
#ifdef __NVCC__
typedef typename boost::mpl::at<vector_reduction, T>::type reduction_type;
typedef typename boost::mpl::at<typename vector_data_type::value_type::type,typename reduction_type::prop>::type red_type;
typedef typename reduction_type::op_red<red_type> red_op;
typedef typename reduction_type::template op_red<red_type> red_op;
typedef typename boost::mpl::at<typename vector_index_type::value_type::type,boost::mpl::int_<0>>::type seg_type;
red_type init;
init = 0;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment