Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • mosaic/software/parallel-computing/openfpm/openfpm_data
  • argupta/openfpm_data
2 results
Show changes
Commits on Source (1)
Showing
with 140 additions and 473 deletions
......@@ -752,7 +752,7 @@ public:
{
auto key = it.get();
if (this->get_o(key) != g.get_o(key))
if (this->get_o(key) != this->get_o(key))
return false;
++it;
......
......@@ -397,7 +397,7 @@ public:
*
*/
template<typename a, typename ...T>
__device__ __host__ inline void set(a v, T...t)
inline void set(a v, T...t)
{
#ifdef SE_CLASS1
if (sizeof...(t) != dim -1)
......
......@@ -186,7 +186,7 @@ class grid_sm
*
*/
__device__ __host__ inline void Initialize(const size_t sz)
inline void Initialize(const size_t sz)
{
//! Initialize the basic structure for each dimension
sz_s[0] = sz;
......@@ -217,7 +217,7 @@ class grid_sm
*
*/
__device__ __host__ inline void Initialize(const size_t (& sz)[N])
inline void Initialize(const size_t (& sz)[N])
{
//! Initialize the basic structure for each dimension
sz_s[0] = sz[0];
......@@ -353,8 +353,7 @@ public:
*
*/
template<typename S>
__device__ __host__ inline grid_sm(const grid_sm<N,S> & g)
template<typename S> inline grid_sm(const grid_sm<N,S> & g)
{
size_tot = g.size_tot;
......@@ -381,7 +380,7 @@ public:
// Static element to calculate total size
__device__ __host__ inline size_t totalSize(const size_t (& sz)[N])
inline size_t totalSize(const size_t (& sz)[N])
{
size_t tSz = 1;
......@@ -416,7 +415,7 @@ public:
*
*/
__device__ __host__ inline grid_sm(const size_t (& sz)[N])
inline grid_sm(const size_t (& sz)[N])
: size_tot(totalSize(sz))
{
Initialize(sz);
......@@ -626,7 +625,7 @@ public:
}
//! Destructor
__device__ __host__ ~grid_sm() {};
~grid_sm() {};
/*! \brief Return the size of the grid
*
......@@ -714,7 +713,7 @@ public:
*
*/
__device__ __host__ inline size_t size_s(unsigned int i) const
inline size_t size_s(unsigned int i) const
{
return sz_s[i];
}
......@@ -738,21 +737,11 @@ public:
* \return get the size of the grid as an array
*
*/
__device__ __host__ inline const size_t (& getSize() const)[N]
inline const size_t (& getSize() const)[N]
{
return sz;
}
/*! \brief Returns the size of the grid in the passed array
*
* \return the size of the grid in the passed array
*
*/
__device__ __host__ inline void getSize(size_t (&size) [N]) const
{
for (size_t i = 0; i < N; ++i) size[i] = sz[i];
}
/*! \brief Return a sub-grid iterator
*
* Return a sub-grid iterator, to iterate through the grid
......
......@@ -83,16 +83,17 @@ void test_sub_index()
no_transform_only<dim,T> t(mt,pt);
CUDA_LAUNCH_DIM3((subindex<false,dim,T,cnt_type,ids_type,no_transform_only<dim,T>>),ite.wthr,ite.thr,div,
spacing,
off,
t,
pl.capacity(),
pl.size(),
part_ids.capacity(),
0,
pl.toKernel(),
cl_n.toKernel(),
part_ids.toKernel());
static_cast<T *>(pl.template getDeviceBuffer<0>()),
static_cast<cnt_type *>(cl_n.template getDeviceBuffer<0>()),
static_cast<cnt_type *>(part_ids.template getDeviceBuffer<0>()));
cl_n.template deviceToHost<0>();
part_ids.template deviceToHost<0>();
......@@ -202,16 +203,17 @@ void test_sub_index2()
shift_only<dim,T> t(mt,pt);
CUDA_LAUNCH_DIM3((subindex<false,dim,T,cnt_type,ids_type,shift_only<dim,T>>),ite.wthr,ite.thr,div,
spacing,
off,
t,
pl.capacity(),
pl.size(),
part_ids.capacity(),
0,
pl.toKernel(),
cl_n.toKernel(),
part_ids.toKernel());
static_cast<T *>(pl.template getDeviceBuffer<0>()),
static_cast<cnt_type *>(cl_n.template getDeviceBuffer<0>()),
static_cast<cnt_type *>(part_ids.template getDeviceBuffer<0>()));
cl_n.template deviceToHost<0>();
part_ids.template deviceToHost<0>();
......@@ -385,10 +387,11 @@ void test_fill_cell()
div_c,
off,
part_ids.size(),
part_ids.capacity(),
0,
starts.toKernel(),
part_ids.toKernel(),
cells.toKernel() );
static_cast<cnt_type *>(starts.template getDeviceBuffer<0>()),
static_cast<cnt_type *>(part_ids.template getDeviceBuffer<0>()),
static_cast<cnt_type *>(cells.template getDeviceBuffer<0>()) );
cells.template deviceToHost<0>();
......@@ -663,8 +666,7 @@ void test_reorder_parts(size_t n_part)
// Here we test fill cell
CUDA_LAUNCH_DIM3((reorder_parts<decltype(parts_prp.toKernel()),
decltype(pl.toKernel()),
decltype(sort_to_not_sort.toKernel()),
decltype(cells_out.toKernel()),
decltype(sort_to_not_sort.toKernel()),
cnt_type,
shift_ph<0,cnt_type>>),ite.wthr,ite.thr,pl.size(),
parts_prp.toKernel(),
......@@ -673,7 +675,7 @@ void test_reorder_parts(size_t n_part)
pl_out.toKernel(),
sort_to_not_sort.toKernel(),
non_sort_to_sort.toKernel(),
cells_out.toKernel());
static_cast<cnt_type *>(cells_out.template getDeviceBuffer<0>()));
bool check = true;
parts_prp_out.template deviceToHost<0>();
......
......@@ -29,69 +29,28 @@ class CellDecomposer_gpu_ker
//! transformation
transform t;
//! Unit box of the Cell list
SpaceBox<dim,T> box_unit;
//! Grid structure of the Cell list
grid_sm<dim,void> gr_cell;
//! cell_shift
Point<dim,long int> cell_shift;
public:
__device__ __host__ CellDecomposer_gpu_ker()
{}
__device__ __host__ CellDecomposer_gpu_ker(
openfpm::array<T,dim,cnt_type> & spacing_c,
openfpm::array<ids_type,dim,cnt_type> & div_c,
openfpm::array<ids_type,dim,cnt_type> & off,
const transform & t)
: spacing_c(spacing_c),div_c(div_c),off(off),t(t)
__device__ __host__ CellDecomposer_gpu_ker(openfpm::array<T,dim,cnt_type> & spacing_c,
openfpm::array<ids_type,dim,cnt_type> & div_c,
openfpm::array<ids_type,dim,cnt_type> & off,
const transform & t)
:spacing_c(spacing_c),div_c(div_c),off(off),t(t)
{}
__device__ __host__ CellDecomposer_gpu_ker(
openfpm::array<T,dim,cnt_type> & spacing_c,
openfpm::array<ids_type,dim,cnt_type> & div_c,
openfpm::array<ids_type,dim,cnt_type> & off,
const transform & t,
SpaceBox<dim,T> box_unit,
grid_sm<dim,void> gr_cell,
Point<dim,long int> cell_shift)
: spacing_c(spacing_c),div_c(div_c),off(off),t(t),
box_unit(box_unit), gr_cell(gr_cell), cell_shift(cell_shift)
{}
__device__ __host__ grid_sm<dim,void> getGrid()
__host__ grid_sm<dim,void> getGrid()
{
size_t sz[dim];
for (size_t i = 0 ; i < dim ; i++)
{
sz[i] = div_c[i];
}
return grid_sm<dim,void> (sz);
}
__device__ __host__ void getGridSize(size_t (& sz)[dim]) const
{
for (size_t i = 0 ; i < dim ; i++)
{
sz[i] = div_c[i] + 2*off[i];
}
}
template<typename ids_type2>
__device__ __host__ mem_id getGridLinId(const grid_key_dx<dim,ids_type2> & gk) const
{
mem_id lid = gk.get(0);
for (mem_id i = 1 ; i < dim ; i++)
{
lid += gk.get(i) * (div_c[i-1] + 2*off[i-1]);
}
return lid;
return grid_sm<dim,void> (sz);
}
__device__ __host__ inline grid_key_dx<dim,ids_type> getCell(const Point<dim,T> & xp) const
......@@ -123,46 +82,6 @@ public:
{
return t;
}
__device__ __host__ inline grid_key_dx<dim> getCellGrid(const T (& pos)[dim]) const
{
grid_key_dx<dim> key;
key.set_d(0,ConvertToID(pos,0));
for (size_t s = 1 ; s < dim ; s++)
{
key.set_d(s,ConvertToID(pos,s));
}
return key;
}
__device__ __host__ inline grid_key_dx<dim> getCellGrid(const Point<dim,T> & pos) const
{
grid_key_dx<dim> key;
key.set_d(0,ConvertToID(pos,0));
for (size_t s = 1 ; s < dim ; s++)
{
key.set_d(s,ConvertToID(pos,s));
}
return key;
}
__device__ __host__ inline size_t ConvertToID(const T (&x)[dim] ,size_t s) const
{
size_t id = openfpm::math::size_t_floor(t.transform(x,s) / box_unit.getHigh(s)) + off[s];
id = (id >= gr_cell.size(s))?(gr_cell.size(s)-1-cell_shift.get(s)):id-cell_shift.get(s);
return id;
}
__device__ __host__ inline size_t ConvertToID(const Point<dim,T> & x ,size_t s, size_t sc = 0) const
{
size_t id = openfpm::math::size_t_floor(t.transform(x,s) / box_unit.getHigh(s)) + off[s];
id = (id >= gr_cell.size(s))?(gr_cell.size(s)-1-cell_shift.get(s)):id-cell_shift.get(s);
return id;
}
};
#endif /* CELLDECOMPOSER_GPU_KER_HPP_ */
......@@ -44,10 +44,7 @@ struct CellList_gpu_ker_selector
openfpm::array<ids_type,dim,cnt_type> & div_c,
openfpm::array<ids_type,dim,cnt_type> & off,
const transform & t,
unsigned int g_m,
const SpaceBox<dim,T>& box_unit,
const grid_sm<dim,void>& gr_cell,
const Point<dim,long int>& cell_shift)
unsigned int g_m)
{
return CellList_gpu_ker<dim,T,cnt_type,ids_type,transform,is_sparse>(starts.toKernel(),
sorted_to_not_sorted.toKernel(),
......@@ -57,10 +54,7 @@ struct CellList_gpu_ker_selector
div_c,
off,
t,
g_m,
box_unit,
gr_cell,
cell_shift);
g_m);
}
};
......@@ -79,14 +73,10 @@ struct CellList_gpu_ker_selector<dim,T,cnt_type,ids_type,Memory,transform,vector
vector_cnt_type & dprt,
openfpm::vector<aggregate<int>,Memory,memory_traits_inte> & nnc_rad,
openfpm::array<T,dim,cnt_type> & spacing_c,
openfpm::array<ids_type,dim,cnt_type> & div_c,
openfpm::array<ids_type,dim,cnt_type> & off,
const transform & t,
unsigned int g_m,
const SpaceBox<dim,T>& box_unit,
const grid_sm<dim,void>& gr_cell,
const Point<dim,long int>& cell_shift)
openfpm::array<ids_type,dim,cnt_type> & div_c,
openfpm::array<ids_type,dim,cnt_type> & off,
const transform & t,
unsigned int g_m)
{
return CellList_gpu_ker<dim,T,cnt_type,ids_type,transform,true>(cell_nn.toKernel(),
cell_nn_list.toKernel(),
......@@ -96,11 +86,7 @@ struct CellList_gpu_ker_selector<dim,T,cnt_type,ids_type,Memory,transform,vector
spacing_c,
div_c,
off,
t,
g_m,
box_unit,
gr_cell,
cell_shift);
t,g_m);
}
};
......@@ -261,11 +247,13 @@ class CellList_gpu : public CellDecomposer_sm<dim,T,transform>
spacing_c,
off,
this->getTransform(),
pl.capacity(),
pl.size(),
part_ids.capacity(),
start,
pl.toKernel(),
starts.toKernel(),
part_ids.toKernel());
static_cast<T *>(pl.template getDeviceBuffer<0>()),
static_cast<cnt_type *>(starts.template getDeviceBuffer<0>()),
static_cast<cnt_type *>(part_ids.template getDeviceBuffer<0>()));
// now we construct the cells
......@@ -304,7 +292,6 @@ class CellList_gpu : public CellDecomposer_sm<dim,T,transform>
CUDA_LAUNCH((reorder_parts<decltype(pl_prp.toKernel()),
decltype(pl.toKernel()),
decltype(sorted_to_not_sorted.toKernel()),
decltype(cells.toKernel()),
cnt_type,shift_ph<0,cnt_type>>),ite,sorted_to_not_sorted.size(),
pl_prp.toKernel(),
pl_prp_out.toKernel(),
......@@ -312,7 +299,7 @@ class CellList_gpu : public CellDecomposer_sm<dim,T,transform>
pl_out.toKernel(),
sorted_to_not_sorted.toKernel(),
non_sorted_to_sorted.toKernel(),
cells.toKernel());
static_cast<cnt_type *>(cells.template getDeviceBuffer<0>()));
if (opt == cl_construct_opt::Full)
{
......@@ -390,11 +377,13 @@ class CellList_gpu : public CellDecomposer_sm<dim,T,transform>
spacing_c,
off,
this->getTransform(),
pl.capacity(),
stop,
part_ids.capacity(),
start,
pl.toKernel(),
cl_n.toKernel(),
part_ids.toKernel());
static_cast<T *>(pl.template getDeviceBuffer<0>()),
static_cast<cnt_type *>(cl_n.template getDeviceBuffer<0>()),
static_cast<cnt_type *>(part_ids.template getDeviceBuffer<0>()));
// now we scan
starts.resize(cl_n.size());
......@@ -410,7 +399,7 @@ class CellList_gpu : public CellDecomposer_sm<dim,T,transform>
CUDA_LAUNCH((fill_cells<dim,cnt_type,ids_type,shift_ph<0,cnt_type>>),itgg,0,
part_ids.size(),
cells.toKernel()) );
static_cast<cnt_type *>(cells.template getDeviceBuffer<0>()) );
// sort
......@@ -419,13 +408,14 @@ class CellList_gpu : public CellDecomposer_sm<dim,T,transform>
#else
CUDA_LAUNCH((fill_cells<dim,cnt_type,ids_type,shift_ph<0,cnt_type>>),itgg,0,
div_c,
off,
part_ids.size(),
start,
starts.toKernel(),
part_ids.toKernel(),
cells.toKernel() );
div_c,
off,
part_ids.size(),
part_ids.capacity(),
start,
static_cast<cnt_type *>(starts.template getDeviceBuffer<0>()),
static_cast<cnt_type *>(part_ids.template getDeviceBuffer<0>()),
static_cast<cnt_type *>(cells.template getDeviceBuffer<0>()) );
#endif
......@@ -441,7 +431,6 @@ class CellList_gpu : public CellDecomposer_sm<dim,T,transform>
CUDA_LAUNCH((reorder_parts<decltype(pl_prp.toKernel()),
decltype(pl.toKernel()),
decltype(sorted_to_not_sorted.toKernel()),
decltype(cells.toKernel()),
cnt_type,shift_ph<0,cnt_type>>),ite,sorted_to_not_sorted.size(),
pl_prp.toKernel(),
pl_prp_out.toKernel(),
......@@ -449,7 +438,7 @@ class CellList_gpu : public CellDecomposer_sm<dim,T,transform>
pl_out.toKernel(),
sorted_to_not_sorted.toKernel(),
non_sorted_to_sorted.toKernel(),
cells.toKernel());
static_cast<cnt_type *>(cells.template getDeviceBuffer<0>()));
}
else
{
......@@ -457,7 +446,6 @@ class CellList_gpu : public CellDecomposer_sm<dim,T,transform>
CUDA_LAUNCH((reorder_parts_wprp<decltype(pl_prp.toKernel()),
decltype(pl.toKernel()),
decltype(sorted_to_not_sorted.toKernel()),
decltype(cells.toKernel()),
cnt_type,shift_ph<0,cnt_type>,prp...>),ite,sorted_to_not_sorted.size(),
pl_prp.toKernel(),
pl_prp_out.toKernel(),
......@@ -465,7 +453,7 @@ class CellList_gpu : public CellDecomposer_sm<dim,T,transform>
pl_out.toKernel(),
sorted_to_not_sorted.toKernel(),
non_sorted_to_sorted.toKernel(),
cells.toKernel());
static_cast<cnt_type *>(cells.template getDeviceBuffer<0>()));
}
if (opt == cl_construct_opt::Full)
......@@ -660,17 +648,14 @@ public:
cells_nn,
cells_nn_list,
cl_sparse,
sorted_to_not_sorted,
sorted_domain_particles_ids,
nnc_rad,
spacing_c,
div_c,
off,
this->getTransform(),
g_m,
this->box_unit,
this->gr_cell,
this->cell_shift);
sorted_to_not_sorted,
sorted_domain_particles_ids,
nnc_rad,
spacing_c,
div_c,
off,
this->getTransform(),
g_m);
}
/*! \brief Clear the structure
......
......@@ -449,14 +449,11 @@ public:
openfpm::vector_gpu_ker<aggregate<cnt_type>,memory_traits_inte> dprt,
openfpm::vector_gpu_ker<aggregate<int>,memory_traits_inte> rad_cells,
openfpm::array<T,dim,cnt_type> & spacing_c,
openfpm::array<ids_type,dim,cnt_type> & div_c,
openfpm::array<ids_type,dim,cnt_type> & off,
const transform & t,
unsigned int g_m,
SpaceBox<dim,T> box_unit,
grid_sm<dim,void> gr_cell,
Point<dim,long int> cell_shift)
:CellDecomposer_gpu_ker<dim,T,cnt_type,ids_type,transform>(spacing_c,div_c,off,t,box_unit,gr_cell,cell_shift),
openfpm::array<ids_type,dim,cnt_type> & div_c,
openfpm::array<ids_type,dim,cnt_type> & off,
const transform & t,
unsigned int g_m)
:CellDecomposer_gpu_ker<dim,T,cnt_type,ids_type,transform>(spacing_c,div_c,off,t),
starts(starts),srt(srt),dprt(dprt),rad_cells(rad_cells),g_m(g_m)
{
}
......@@ -619,16 +616,12 @@ public:
openfpm::vector_gpu_ker<aggregate<cnt_type>,memory_traits_inte> srt,
openfpm::vector_gpu_ker<aggregate<cnt_type>,memory_traits_inte> dprt,
openfpm::array<T,dim,cnt_type> & spacing_c,
openfpm::array<ids_type,dim,cnt_type> & div_c,
openfpm::array<ids_type,dim,cnt_type> & off,
const transform & t,
unsigned int g_m,
SpaceBox<dim,T> box_unit,
grid_sm<dim,void> gr_cell,
Point<dim,long int> cell_shift)
:CellDecomposer_gpu_ker<dim,T,cnt_type,ids_type,transform>(spacing_c,div_c,off,t,box_unit,gr_cell,cell_shift),
cell_nn(cell_nn),cell_nn_list(cell_nn_list),srt(srt),dprt(dprt),cl_sparse(cl_sparse),g_m(g_m)
openfpm::array<ids_type,dim,cnt_type> & div_c,
openfpm::array<ids_type,dim,cnt_type> & off,
const transform & t,
unsigned int g_m)
:CellDecomposer_gpu_ker<dim,T,cnt_type,ids_type,transform>(spacing_c,div_c,off,t),cell_nn(cell_nn),cell_nn_list(cell_nn_list),srt(srt),dprt(dprt),
cl_sparse(cl_sparse),g_m(g_m)
{
}
......
......@@ -258,17 +258,18 @@ __device__ __host__ cnt_type encode_phase_id(cnt_type ph_id,cnt_type pid)
#ifdef __NVCC__
template<bool is_sparse,unsigned int dim, typename pos_type,
typename cnt_type, typename ids_type, typename transform,
typename vector_pos_type, typename vector_cnt_type, typename vector_pids_type>
typename cnt_type, typename ids_type, typename transform>
__global__ void subindex(openfpm::array<ids_type,dim,cnt_type> div,
openfpm::array<pos_type,dim,cnt_type> spacing,
openfpm::array<ids_type,dim,cnt_type> off,
transform t,
int n_cap,
int n_part,
int n_cap2,
cnt_type start,
vector_pos_type p_pos,
vector_cnt_type counts,
vector_pids_type p_ids)
pos_type * p_pos,
cnt_type *counts,
cnt_type * p_ids)
{
cnt_type i, cid, ins;
ids_type e[dim+1];
......@@ -280,23 +281,23 @@ __global__ void subindex(openfpm::array<ids_type,dim,cnt_type> div,
pos_type p[dim];
for (size_t k = 0 ; k < dim ; k++)
{p[k] = p_pos.template get<0>(i)[k];}
{p[k] = p_pos[i+k*n_cap];}
cid = cid_<dim,cnt_type,ids_type,transform>::get_cid(div,spacing,off,t,p,e);
if (is_sparse == false)
{
e[dim] = atomicAdd(&counts.template get<0>(cid), 1);
e[dim] = atomicAdd(counts + cid, 1);
p_ids.template get<0>(ins)[0] = cid;
{p_ids.template get<0>(ins)[1] = e[dim];}
p_ids[ins] = cid;
{p_ids[ins+1*(n_cap2)] = e[dim];}
}
else
{
p_ids.template get<0>(ins)[0] = cid;
{p_ids.template get<0>(ins)[1] = e[dim];}
p_ids[ins] = cid;
{p_ids[ins+1*(n_cap2)] = e[dim];}
counts.template get<0>(ins) = cid;
counts[ins] = cid;
}
}
......@@ -352,42 +353,43 @@ __global__ void subindex_without_count(openfpm::array<ids_type,dim,cnt_type> div
#ifdef MAKE_CELLLIST_DETERMINISTIC
template<unsigned int dim, typename cnt_type, typename ids_type, typename ph, typename vector_cells_type>
template<unsigned int dim, typename cnt_type, typename ids_type, typename ph>
__global__ void fill_cells(cnt_type phase_id ,
cnt_type n,
vector_cells_type cells)
cnt_type *cells)
{
cnt_type i;
i = threadIdx.x + blockIdx.x * blockDim.x;
if (i >= n) return;
cells.template get<0>(i) = encode_phase_id<cnt_type,ph>(phase_id,i);
cells[i] = encode_phase_id<cnt_type,ph>(phase_id,i);
}
#else
template<unsigned int dim, typename cnt_type, typename ids_type, typename ph,typename vector_starts_type, typename vector_pids_type, typename vector_cells_type>
template<unsigned int dim, typename cnt_type, typename ids_type, typename ph>
__global__ void fill_cells(cnt_type phase_id ,
openfpm::array<ids_type,dim,cnt_type> div_c,
openfpm::array<ids_type,dim,cnt_type> off,
cnt_type n,
cnt_type n_cap,
cnt_type start_p,
vector_starts_type starts,
vector_pids_type p_ids,
vector_cells_type cells)
const cnt_type *starts,
const cnt_type * p_ids,
cnt_type *cells)
{
cnt_type i, cid, id, start;
i = threadIdx.x + blockIdx.x * blockDim.x;
if (i >= n) return;
cid = p_ids.template get<0>(i)[0];
cid = p_ids[i];
start = starts.template get<0>(cid);
id = start + p_ids.template get<0>(i)[1];
start = starts[cid];
id = start + p_ids[1*n_cap+i];
cells.template get<0>(id) = encode_phase_id<cnt_type,ph>(phase_id,i + start_p);
cells[id] = encode_phase_id<cnt_type,ph>(phase_id,i + start_p);
}
#endif
......@@ -422,7 +424,7 @@ __device__ inline void reorder_wprp(const vector_prp & input,
output.template set<prp ...>(dst_id,input,src_id);
}
template <typename vector_prp, typename vector_pos, typename vector_ns, typename vector_cells_type, typename cnt_type, typename sh>
template <typename vector_prp, typename vector_pos, typename vector_ns, typename cnt_type, typename sh>
__global__ void reorder_parts(int n,
const vector_prp input,
vector_prp output,
......@@ -430,12 +432,12 @@ __global__ void reorder_parts(int n,
vector_pos output_pos,
vector_ns sorted_non_sorted,
vector_ns non_sorted_to_sorted,
const vector_cells_type cells)
const cnt_type * cells)
{
cnt_type i = threadIdx.x + blockIdx.x * blockDim.x;
if (i >= n) return;
cnt_type code = cells.template get<0>(i);
cnt_type code = cells[i];
reorder(input, output, code,i);
reorder(input_pos,output_pos,code,i);
......@@ -444,7 +446,7 @@ __global__ void reorder_parts(int n,
non_sorted_to_sorted.template get<0>(code) = i;
}
template <typename vector_prp, typename vector_pos, typename vector_ns, typename vector_cells_type, typename cnt_type, typename sh, unsigned int ... prp>
template <typename vector_prp, typename vector_pos, typename vector_ns, typename cnt_type, typename sh, unsigned int ... prp>
__global__ void reorder_parts_wprp(int n,
const vector_prp input,
vector_prp output,
......@@ -452,12 +454,12 @@ __global__ void reorder_parts_wprp(int n,
vector_pos output_pos,
vector_ns sorted_non_sorted,
vector_ns non_sorted_to_sorted,
const vector_cells_type cells)
const cnt_type * cells)
{
cnt_type i = threadIdx.x + blockIdx.x * blockDim.x;
if (i >= n) return;
cnt_type code = cells.template get<0>(i);
cnt_type code = cells[i];
reorder_wprp<vector_prp,cnt_type,prp...>(input, output, code,i);
reorder(input_pos,output_pos,code,i);
......
......@@ -416,7 +416,7 @@ template<unsigned int dim ,typename T> class Point
* \return the reference
*
*/
__device__ __host__ T & value(size_t i)
T & value(size_t i)
{
return get(i);
}
......
......@@ -45,8 +45,6 @@ void write_test_report(report_sparse_grid_tests &report_sparsegrid_funcs, std::s
plotGetSingle2D(report_sparsegrid_funcs, testSet, plotCounter);
plotGetNeighbourhood2D(report_sparsegrid_funcs, testSet, plotCounter);
std::string file_xml_results(test_dir);
file_xml_results += std::string("/") + std::string(perfResultsXmlFile);
......@@ -312,7 +310,7 @@ void plotDense3D(report_sparse_grid_tests &report_sparsegrid_funcs, std::set<std
base + ".gridScaling(#).GFlops.mean");
report_sparsegrid_funcs.graphs.add("graphs.graph(" + std::to_string(plotCounter) + ").x.data(0).source",
base + ".gridScaling(#).gridSize.x");
report_sparsegrid_funcs.graphs.add("graphs.graph(" + std::to_string(plotCounter) + ").options.log_x", true);
report_sparsegrid_funcs.graphs.add("graphs.graph(" + std::to_string(plotCounter) + ").options.log_x", true);
int bes = static_cast<int>( report_sparsegrid_funcs.graphs.template get<double>(
base + ".gridScaling(0).blockSize"));
report_sparsegrid_funcs.graphs.add("graphs.graph(" + std::to_string(plotCounter) + ").y.data(0).title",
......@@ -332,7 +330,7 @@ void plotDense3D(report_sparse_grid_tests &report_sparsegrid_funcs, std::set<std
base + ".blockScaling(#).GFlops.mean");
report_sparsegrid_funcs.graphs.add("graphs.graph(" + std::to_string(plotCounter) + ").x.data(0).source",
base + ".blockScaling(#).blockSize");
report_sparsegrid_funcs.graphs.add("graphs.graph(" + std::to_string(plotCounter) + ").options.log_x",true);
report_sparsegrid_funcs.graphs.add("graphs.graph(" + std::to_string(plotCounter) + ").options.log_x",true);
int ges = static_cast<int>( report_sparsegrid_funcs.graphs.template get<double>(
base + ".blockScaling(0).gridSize.x"));
report_sparsegrid_funcs.graphs.add("graphs.graph(" + std::to_string(plotCounter) + ").y.data(0).title",
......
......@@ -172,11 +172,6 @@ namespace openfpm
return v_size;
}
__host__ __device__ size_t size_local() const
{
return size();
}
/*! \brief return the maximum capacity of the vector before reallocation
*
* \return the capacity of the vector
......@@ -209,38 +204,6 @@ namespace openfpm
return base.template get<p>(key);
}
/*! \brief Get an element of the vector
*
* Get an element of the vector
*
* \tparam p Property to get
* \param id Element to get
*
* \return the element value requested
*
*/
template <unsigned int p>
__device__ __host__ inline auto getProp(unsigned int id) const -> decltype(base.template get<p>(grid_key_dx<1>(0)))
{
return this->get<p>(id);
}
/*! \brief Get an element of the vector
*
* Get an element of the vector
*
* \tparam p Property to get
* \param id Element to get
*
* \return the element value requested
*
*/
template <unsigned int p, typename key_type>
__device__ __host__ inline auto getProp(key_type id) const -> decltype(base.template get<p>(grid_key_dx<1>(0)))
{
return this->get<p>(id.getKey());
}
/*! \brief Get an element of the vector
*
......@@ -525,22 +488,7 @@ namespace openfpm
return base.getGPUIterator(start,stop,n_thr);
}
/*! \brief Get a domain iterator for the GPU
*
*
*/
ite_gpu<1> getDomainIteratorGPU(size_t n_thr = default_kernel_wg_threads_) const
{
return getGPUIterator(n_thr);
}
//Stub for some expression
void init() const {}
__host__ __device__ auto value(unsigned int p) -> decltype(base.template get<0>(grid_key_dx<1>(0)))
{
return get<0>(p);
}
/*! \brief Get an iterator for the GPU
*
*
......@@ -553,7 +501,6 @@ namespace openfpm
return base.getGPUIterator(start,stop_,n_thr);
}
/*! \brief operator= this operator absorb the pointers, consider that this object wrap device pointers
*
* \param object to copy
......@@ -567,16 +514,6 @@ namespace openfpm
return *this;
}
__device__ __host__ vector_gpu_ker<T,layout_base> & getVector()
{
return *this;
}
__device__ __host__ const vector_gpu_ker<T,layout_base> & getVector() const
{
return *this;
}
/*! \brief Return the base
*
* \return the base
......@@ -659,11 +596,6 @@ namespace openfpm
return vref.size();
}
__host__ __device__ size_t size_local() const
{
return size();
}
__device__ __host__ unsigned int capacity() const
{
return vref.capacity;
......@@ -760,16 +692,6 @@ namespace openfpm
return vref.getGPUItertatorTo(stop,n_thr);
}
vector_gpu_ker<T,layout_base> & getVector()
{
return *this;
}
const vector_gpu_ker<T,layout_base> & getVector() const
{
return *this;
}
__host__ vector_gpu_ker_ref<T,layout_base> & operator=(const vector_gpu_ker<T,layout_base> & v)
{
vref.operator=(v);
......
......@@ -277,11 +277,8 @@ public:
*
*/
vector(const std::initializer_list<T> & v)
// :base(v)
:base(v)
{
// causes error if simply passed to base
for (T t: v)
add(t);
}
//! Constructor from another vector
......@@ -291,16 +288,6 @@ public:
base.swap(v.base);
}
//! Constructor from iterators
template< class InputIt >
vector(InputIt first, InputIt last)
{
while(first != last) {
add(*first);
++first;
}
}
//! destructor
~vector() noexcept
{
......
......@@ -126,27 +126,6 @@ public:
return base.template get<0>(id);
}
/*! \brief Return the pointer to the chunk of memory
*
* \return the pointer to the chunk of memory
*
*/
__device__ __host__ void * getPointer()
{
return &base.template get<0>(0);
}
/*! \brief Return the pointer to the chunk of memory
*
* \return the pointer to the chunk of memory
*
*/
__device__ __host__ const void * getPointer() const
{
return &base.template get<0>(0);
}
vector_custd_ker()
{}
......
......@@ -336,7 +336,7 @@ namespace openfpm
*
* \return the size
*
*/ //remove host device
*/
size_t size_local() const
{
return v_size;
......@@ -1359,21 +1359,7 @@ namespace openfpm
return base.get_o(key);
}
/*! \brief Get an element of the vector
*
* Get an element of the vector
*
* \tparam p Property to get
* \param id Element to get
*
* \return the element value requested
*
*/ //remove device host
template <unsigned int p>
inline auto getProp(const unsigned int & id) -> decltype(base.template get<p>(grid_key_dx<1>(0)))
{ //uncomment this
return this->template get<p>(id);
}
/*! \brief Get an element of the vector
*
* Get an element of the vector
......@@ -1383,12 +1369,12 @@ namespace openfpm
*
* \return the element value requested
*
*///remove host device
template <unsigned int p,typename KeyType>
inline auto getProp(const KeyType & id) -> decltype(base.template get<p>(grid_key_dx<1>(0)))
*/
template <unsigned int p,typename KeyType>
inline auto getProp(const KeyType & id) -> decltype(base.template get<p>(grid_key_dx<1>(0)))
{
//uncomment this
return this->template get<p>(id.getKey());
return this->template get<p>(id.getKey());
}
/*! \brief Get an element of the vector
......@@ -1400,10 +1386,10 @@ namespace openfpm
*
* \return the element value requested
*
*/ //remove device host
*/
template <unsigned int p, typename keyType>
inline auto getProp(const keyType & id) const -> decltype(base.template get<p>(grid_key_dx<1>(0)))
{ //uncomment this
inline auto getProp(const keyType & id) const -> decltype(base.template get<p>(grid_key_dx<1>(0)))
{
return this->template get<p>(id.getKey());
}
......@@ -1944,7 +1930,6 @@ namespace openfpm
return base.getGPUIterator(start,stop_,n_thr);
}
#endif
/*! \brief Get the vector elements iterator
......@@ -2002,14 +1987,6 @@ namespace openfpm
return base.getGPUIterator(start,stop,n_thr);
}
/*! \brief Get a domain iterator for the GPU
*
*
*/
ite_gpu<1> getDomainIteratorGPU(size_t n_thr = default_kernel_wg_threads_) const
{
return getGPUIterator(n_thr);
}
#endif
/*! \brief Return the size of the message needed to pack this object
......
......@@ -644,16 +644,6 @@ public:
base.swap(v.base);
}
//! Constructor from iterators
template< class InputIt >
vector(InputIt first, InputIt last)
{
while(first != last) {
add(*first);
++first;
}
}
//! destructor
~vector() noexcept
{
......
#ifndef VECTOR_SUBSET_HPP
#define VECTOR_SUBSET_HPP
namespace openfpm
{
template<unsigned int dim,
typename prop,
template<typename> class layout_base = memory_traits_inte>
class vector_subset_ker
{
mutable openfpm::vector_gpu_ker<typename apply_transform<layout_base,prop>::type,layout_base> v_all;
mutable openfpm::vector_gpu_ker<aggregate<int>,layout_base> indexes;
public:
vector_subset_ker(openfpm::vector_gpu_ker<typename apply_transform<layout_base,prop>::type,layout_base> & v_all,
openfpm::vector_gpu_ker<aggregate<int>,layout_base> & indexes)
:v_all(v_all),indexes(indexes)
{}
// get the
template <unsigned int p>
__device__ __host__ inline auto get(size_t id) const -> decltype(v_all.template get<p>(0))
{
return v_all.template get<p>(indexes.template get<0>(id));
}
}
public:
template<typename T, typename Memory, template<typename> class layout_base, typename grow_p, unsigned int impl>
class vector_sub
{
vector<T,Memory,layout_base,grow_p,impl> & v_all;
vector<aggregate<int>,Memory,layout_base,grow_p,impl> & indexes;
public:
vector(vector<T,Memory,layout_base,grow_p,impl> & v_all,
vector<aggregate<int>,Memory,layout_base,grow_p,impl> & indexes)
:v_all(v_all),indexes(indexes)
{}
// To kernel
template<unsigned int ... prp> vector_dist_ker<dim,St,prop,layout_base> toKernel()
{
vector_subset_ker<dim,St,prop,layout_base> v(v_all.toKernel(), indexes.toKernel());
return v;
}
template <unsigned int p>
inline auto get(size_t id) const -> decltype(v_all.template get<p>(0))
{
return v_all.template get<p>(indexes.template get<0>(id));
}
}
};
#endif
\ No newline at end of file
......@@ -19,7 +19,7 @@ namespace openfpm
*
*/
template <typename T>
__host__ __device__ int sgn(T val)
int sgn(T val)
{
return (T(0) < val) - (val < T(0));
}
......@@ -35,8 +35,7 @@ namespace openfpm
* \return the factorial
*
*/
__host__ __device__ static inline
size_t factorial(size_t f)
static inline size_t factorial(size_t f)
{
size_t fc = 1;
......@@ -61,8 +60,7 @@ namespace openfpm
* \param k
*
*/
__host__ __device__ static inline
size_t C(size_t n, size_t k)
static inline size_t C(size_t n, size_t k)
{
return factorial(n)/(factorial(k)*factorial(n-k));
}
......@@ -77,8 +75,7 @@ namespace openfpm
*
*
*/
__host__ __device__ static inline
size_t round_big_2(size_t n)
static inline size_t round_big_2(size_t n)
{
n--;
n |= n >> 1; // Divide by 2^k for consecutive doublings of k up to 32,
......@@ -105,15 +102,14 @@ namespace openfpm
*/
template<class T>
__host__ __device__ inline constexpr
size_t pow(const T base, unsigned const exponent)
inline constexpr size_t pow(const T base, unsigned const exponent)
{
// (parentheses not required in next line)
return (exponent == 0) ? 1 : (base * pow(base, exponent-1));
}
template<class T>
__host__ __device__ double intpowlog(const T x, unsigned const e)
double intpowlog(const T x, unsigned const e)
{
if (e == 0) return 1.0;
if (e % 2 == 0)
......@@ -139,8 +135,7 @@ namespace openfpm
* \param n modulo
*
*/
__host__ __device__ static inline
long int positive_modulo(long int i, long int n)
static inline long int positive_modulo(long int i, long int n)
{
return (i % n + n) % n;
}
......@@ -160,9 +155,7 @@ namespace openfpm
* \return the bound number
*
*/
template<typename T>
__host__ __device__ static inline
T periodic(const T & pos, const T & p2, const T & p1)
template<typename T> static inline T periodic(const T & pos, const T & p2, const T & p1)
{
T pos_tmp;
......@@ -189,9 +182,7 @@ namespace openfpm
* \return the bound number
*
*/
template<typename T>
__device__ __host__ static inline
T periodic_l(const T & pos, const T & p2, const T & p1)
template<typename T> __device__ __host__ static inline T periodic_l(const T & pos, const T & p2, const T & p1)
{
T pos_tmp = pos;
......@@ -216,7 +207,7 @@ namespace openfpm
*
*
*/
__host__ __device__ inline long int size_t_floor(double x)
inline long int size_t_floor(double x)
{
size_t i = (long int)x; /* truncate */
return i - ( i > x ); /* convert trunc to floor */
......@@ -226,7 +217,7 @@ namespace openfpm
*
*
*/
__host__ __device__ inline long int size_t_floor(float x)
inline long int size_t_floor(float x)
{
size_t i = (long int)x; /* truncate */
return i - ( i > x ); /* convert trunc to floor */
......@@ -267,7 +258,7 @@ namespace openfpm
* \param value
*
*/
__host__ __device__ inline int log2_64 (uint64_t value)
inline int log2_64 (uint64_t value)
{
value |= value >> 1;
value |= value >> 2;
......@@ -285,7 +276,7 @@ namespace openfpm
*
*
*/
__host__ __device__ inline long int size_t_floor(boost::multiprecision::float128 x)
inline long int size_t_floor(boost::multiprecision::float128 x)
{
size_t i = (long int)x; /* truncate */
return i - ( i > x ); /* convert trunc to floor */
......