Commit 2542371d authored by incardon's avatar incardon
Browse files

GPU test Adding Cell-list gpu experimental

parent db133299
......@@ -14,7 +14,7 @@
* \tparam dim dimensionality of the grid
*
*/
template<unsigned int dim>
template<unsigned int dim, typename index_type>
class grid_key_dx
{
public:
......@@ -414,7 +414,7 @@ public:
}
//! structure that store all the index
mem_id k[dim];
index_type k[dim];
private:
......
......@@ -3,7 +3,7 @@
#include "util/common.hpp"
template<unsigned int dim> class grid_key_dx;
template<unsigned int dim, typename index_type = long int> class grid_key_dx;
template<int dim, typename exp1, typename exp2> class grid_key_dx_sum;
template<int dim, typename exp1, typename exp2> class grid_key_dx_sub;
......
......@@ -141,7 +141,128 @@ public:
}
};
/*! This Class apply a shift transformation before converting to Cell-ID
*
*
*/
template<unsigned int dim, typename T>
class shift_only
{
//! Shift point
Point<dim,T> sh;
public:
/*! \brief Constructor
*
* \param t Matrix transformation
* \param s shift
*
*/
shift_only(const Matrix<dim,T> & t, const Point<dim,T> & s)
:sh(s)
{
}
/*! \brief Shift the point transformation
*
* \param s source point
* \param i coordinate
*
* \return the transformed coordinate
*
*/
__device__ __host__ inline T transform(const T * s, const int i) const
{
return s[i] - sh.get(i);
}
/*! \brief Shift the point transformation
*
* \param s source point
* \param i coordinate
*
* \return the transformed coordinate
*
*/
__device__ __host__ inline T transform(const T(&s)[dim], const int i) const
{
return s[i] - sh.get(i);
}
/*! \brief Shift the point transformation
*
* \param s source point
* \param i coordinate
*
* \return the transformed coordinate
*
*/
__device__ __host__ inline T transform(const Point<dim,T> & s, const int i) const
{
return s.get(i) - sh.get(i);
}
/*! \brief Shift the point transformation
*
* \param s source point
* \param i coordinate
*
* \return the transformed coordinate
*
*/
template<typename Mem> __device__ __host__ inline T transform(const encapc<1,Point<dim,T>,Mem> & s, const int i) const
{
return s.template get<0>()[i] - sh.get(i);
}
/*! \brief Set the transformation Matrix and shift
*
* \param mat Matrix transformation
* \param orig origin point
*
*/
inline void setTransform(const Matrix<dim,T> & mat, const Point<dim,T> & orig)
{
for (size_t i = 0 ; i < dim ; i++)
sh.get(i) = orig.get(i);
}
/*! \brief Get the shift vector
*
* \return the shift vector
*
*/
inline const Point<dim,T> & getOrig() const
{
return sh;
}
/*! \brief It return true if the shift match
*
* \param s shift to compare with
*
* \return true if it match
*
*/
inline bool operator==(const shift<dim,T> & s)
{
return sh == s.sh;
}
/*! \brief It return true if the shift is different
*
* \param s shift to compare with
*
* \return true if the shift is different
*
*/
inline bool operator!=(const shift<dim,T> & s)
{
return !this->operator==(s);
}
};
//! No transformation
template<unsigned int dim, typename T>
......@@ -266,6 +387,114 @@ public:
}
};
//! No transformation
template<unsigned int dim, typename T>
class no_transform_only
{
public:
/*! \brief Constructor
*
* \param t Matrix transformation
* \param s shift
*
*/
no_transform_only(const Matrix<dim,T> & t, const Point<dim,T> & s)
{
}
/*! \brief Shift the point transformation
*
* \param s source point
* \param i coordinate
*
* \return the transformed coordinate
*
*/
__device__ __host__ inline T transform(const T * s, const int i) const
{
return s[i];
}
/*! \brief Shift the point transformation
*
* \param s source point
* \param i coordinate
*
* \return the transformed coordinate
*
*/
__device__ __host__ inline T transform(const T(&s)[dim], const int i) const
{
return s[i];
}
/*! \brief No transformation
*
* \param s source point
* \param i coordinate
*
* \return the source point coordinate
*
*/
__device__ __host__ inline T transform(const Point<dim,T> & s, const int i) const
{
return s.get(i);
}
/*! \brief No transformation
*
* \param s source point
* \param i coordinate
*
* \return the point coordinate
*
*/
template<typename Mem> __device__ __host__ inline T transform(const encapc<1,Point<dim,T>,Mem> & s, const int i) const
{
return s.template get<Point<dim,T>::x>()[i];
}
/*! \brief Set the transformation Matrix and shift
*
* \param mat Matrix transformation
* \param orig origin point
*
*/
inline void setTransform(const Matrix<dim,T> & mat, const Point<dim,T> & orig)
{
}
/*! \brief It return always true true
*
* There is nothing to compare
*
* \param nt unused
*
* \return true
*
*/
inline bool operator==(const no_transform<dim,T> & nt)
{
return true;
}
/*! \brief It return always false
*
* There is nothing to compare they cannot be differents
*
* \param nt unused
*
* \return false
*
*/
inline bool operator!=(const no_transform<dim,T> & nt)
{
return false;
}
};
/*! \brief Decompose a space into cells
*
......@@ -554,6 +783,16 @@ protected:
public:
/*! \brief Return the transformation
*
* \return the transform
*
*/
const transform & getTransform()
{
return t;
}
/*! \brief Return the underlying grid information of the cell list
*
* \return the grid infos
......
......@@ -16,44 +16,181 @@
#include "CellDecomposer.hpp"
#include "Vector/map_vector.hpp"
#include "cuda/Cuda_cell_list_util_func.hpp"
#include "util/cuda/scan_cuda.cuh"
constexpr int count = 0;
constexpr int start = 1;
template<unsigned int dim, typename T, typename Memory, typename cnt_type = int, typename ids_type = short int, typename transform = no_transform<dim,T>>
template<unsigned int dim, typename cnt_type, typename ids_type>
class NN_gpu_it
{
grid_key_dx<dim,cnt_type> cell_act;
grid_key_dx<dim,cnt_type> cell_start;
grid_key_dx<dim,cnt_type> cell_stop;
const openfpm::vector_gpu_ker<aggregate<cnt_type>> & starts;
const openfpm::array<ids_type,dim,cnt_type> & div_c;
const openfpm::array<ids_type,dim,cnt_type> & off;
cnt_type p_id;
cnt_type c_id;
public:
__device__ NN_gpu_it(const grid_key_dx<dim,cnt_type> & cell_pos,
const openfpm::vector_gpu_ker<aggregate<cnt_type>> & starts,
const openfpm::array<ids_type,dim,cnt_type> & div_c,
const openfpm::array<ids_type,dim,cnt_type> & off)
:starts(starts),div_c(div_c),off(off)
{
// calculate start and stop
for (size_t i = 0 ; i < dim ; i++)
{
cell_start.set_d(i,cell_pos.get(i) - 1);
cell_stop.set_d(i,cell_pos.get(i) + 1);
cell_act.set_d(i,cell_pos.get(i) - 1);
}
c_id = cid_<dim,cnt_type,ids_type,int>::get_cid(div_c,off,cell_start);
p_id = starts.template get<0>(c_id);
printf("CID: %d\n",c_id);
}
__device__ cnt_type get()
{
return p_id;
}
__device__ NN_gpu_it<dim,cnt_type,ids_type> & operator++()
{
++p_id;
while (p_id >= starts.template get<0>(c_id+1) && isNext())
{
cnt_type id = cell_act.get(0);
cell_act.set_d(0,id+1);
//! check the overflow of all the index with exception of the last dimensionality
int i = 0;
for ( ; i < dim-1 ; i++)
{
size_t id = cell_act.get(i);
if ((int)id > cell_stop.get(i))
{
// ! overflow, increment the next index
cell_act.set_d(i,cell_start.get(i));
id = cell_act.get(i+1);
cell_act.set_d(i+1,id+1);
}
else
{
break;
}
}
c_id = cid_<dim,cnt_type,ids_type,int>::get_cid(div_c,off,cell_act);
p_id = starts.template get<0>(c_id);
}
return *this;
}
__device__ bool isNext()
{
return cell_act.get(dim-1) < cell_stop.get(dim-1);
}
};
template<unsigned int dim, typename T, typename cnt_type, typename ids_type, typename transform>
class CellList_gpu_ker
{
openfpm::vector_gpu_ker<aggregate<cnt_type>> starts;
//! Spacing
openfpm::array<T,dim,cnt_type> spacing_c;
//! \brief number of sub-divisions in each direction
openfpm::array<ids_type,dim,cnt_type> div_c;
//! \brief cell offset
openfpm::array<ids_type,dim,cnt_type> off;
//! transformation
transform t;
public:
CellList_gpu_ker(openfpm::vector_gpu_ker<aggregate<cnt_type>> starts,
openfpm::array<T,dim,cnt_type> & spacing_c,
openfpm::array<ids_type,dim,cnt_type> & div_c,
openfpm::array<ids_type,dim,cnt_type> & off,
const transform & t)
:starts(starts),spacing_c(spacing_c),div_c(div_c),off(off),t(t)
{}
inline __device__ grid_key_dx<dim,cnt_type> getCell(const Point<dim,T> & xp)
{
grid_key_dx<dim,cnt_type> k;
for (int i = 0 ; i < dim ; i++)
{k.set_d(i,xp.get(i)/spacing_c[i]);}
return k;
}
__device__ NN_gpu_it<dim,cnt_type,ids_type> getNNIterator(const grid_key_dx<dim,cnt_type> & cid)
{
NN_gpu_it<dim,cnt_type,ids_type> ngi(cid,starts,div_c,off);
return ngi;
}
};
template<unsigned int dim, typename T, typename Memory, typename cnt_type = unsigned int, typename ids_type = unsigned char, typename transform = no_transform_only<dim,T>>
class CellList_gpu : public CellDecomposer_sm<dim,T,transform>
{
//! \brief Cell information
openfpm::vector<aggregate<cnt_type,cnt_type>,Memory,typename memory_traits_inte<aggregate<cnt_type,cnt_type>>::type,memory_traits_inte> cl_n;
typedef openfpm::vector<aggregate<cnt_type>,Memory,typename memory_traits_inte<aggregate<cnt_type>>::type,memory_traits_inte> vector_cnt_type;
//! \brief Number of particles in each cell
vector_cnt_type cl_n;
//! \brief for each cell the particles id in it
vector_cnt_type cells;
//! \brief particle information
//! \brief Cell scan with + operation of cl_n
vector_cnt_type starts;
//! \brief particle ids information the first "dim" is the cell-id in grid coordinates, the last is the local-id inside the cell
openfpm::vector<aggregate<ids_type[dim+1]>,Memory,typename memory_traits_inte<aggregate<ids_type[dim+1]>>::type,memory_traits_inte> part_ids;
//! \brief for each sorted index it show the index in the unordered
vector_cnt_type sorted_to_not_sorted;
//! Spacing
CudaMemory spacing;
openfpm::array<T,dim,cnt_type> spacing_c;
//! \brief number of sub-divisions in each direction
CudaMemory div;
openfpm::array<ids_type,dim,cnt_type> div_c;
//! \brief cell padding
openfpm::array<ids_type,dim,cnt_type> off;
//! Initialize the structures of the data structure
void InitializeStructures(const size_t (& div)[dim], size_t tot_n_cell)
void InitializeStructures(const size_t (& div)[dim], size_t tot_n_cell, size_t pad)
{
spacing.allocate(sizeof(T)*dim);
this->div.allocate(dim*sizeof(ids_type));
T (& div_p)[dim] = *static_cast<T (*)[dim]>(this->div.getPointer());
T (& spacing_p)[dim] = *static_cast<T (*)[dim]>(this->spacing.getPointer());
for (size_t i = 0 ; i < dim ; i++)
{
div_p[i] = div[i];
spacing_p[i] = this->getCellBox().getP2().get(i);
div_c[i] = div[i];
spacing_c[i] = this->getCellBox().getP2().get(i);
off[i] = pad;
}
// Force to copy into device
this->spacing.getDevicePointer();
this->div.getDevicePointer();
cl_n.resize(tot_n_cell);
}
......@@ -95,34 +232,85 @@ public:
CellDecomposer_sm<dim,T,transform>::setDimensions(box,div, mat, pad);
// create the array that store the number of particle on each cell and se it to 0
InitializeStructures(this->gr_cell.getSize(),this->gr_cell.size());
InitializeStructures(this->gr_cell.getSize(),this->gr_cell.size(),pad);
}
vector_cnt_type & getSortToNonSort()
{
return sorted_to_not_sorted;
}
/*! \brief construct from a list of particles
*
* \warning pl is assumed to be already be in device memory
*
* \param pl Particles list
*
*/
template<typename vector> void construct(vector & pl)
template<typename vector, typename vector_prp, unsigned int ... prp> void construct(vector & pl, vector_prp & pl_prp, vector_prp & pl_prp_out)
{
// First we set the count memory to zero
CUDA_SAFE(cudaMemset(cl_n.template getDeviceBuffer<0>(),0,cl_n.size()*sizeof(cnt_type)));
part_ids.resize(pl.size());
// Than we construct the ids
auto ite_gpu = pl.getGPUIterator();
subindex<dim,T,cnt_type,ids_type><<<ite_gpu.wthr,ite_gpu.thr>>>(*static_cast<ids_type (*)[dim]>(div.getDevicePointer()),
*static_cast<T (*)[dim]>(spacing.getDevicePointer()),
cl_n.resize(this->gr_cell.size()+1);
CUDA_SAFE(cudaMemset(cl_n.template getDeviceBuffer<0>(),0,cl_n.size()*sizeof(cnt_type)));
part_ids.resize(pl.size());
subindex<dim,T,cnt_type,ids_type><<<ite_gpu.wthr,ite_gpu.thr>>>(div_c,
spacing_c,
off,
this->getTransform(),
pl.capacity(),
pl.size(),
static_cast<T *>(pl.template getDeviceBuffer<0>()),
static_cast<cnt_type *>(cl_n.template getDeviceBuffer<0>()),
static_cast<ids_type *>(part_ids.template getDeviceBuffer<0>()));
// now we scan
scan<cnt_type,ids_type>(cl_n,starts);
// now we construct the cells
cells.resize(pl.size());
auto itgg = part_ids.getGPUIterator();
fill_cells<dim,cnt_type,ids_type,shift_ph<0,cnt_type>><<<itgg.wthr,itgg.thr>>>(0,
div_c,
part_ids.size(),
part_ids.capacity(),
static_cast<cnt_type *>(starts.template getDeviceBuffer<0>()),
static_cast<ids_type *>(part_ids.template getDeviceBuffer<0>()),
static_cast<cnt_type *>(cells.template getDeviceBuffer<0>()) );
sorted_to_not_sorted.resize(pl.size());
auto ite = pl.getGPUIterator();
// Here we test fill cell
reorder_parts<decltype(pl_prp.template toGPU<prp...>()),
decltype(sorted_to_not_sorted.template toGPU()),
cnt_type,shift_ph<0,cnt_type>><<<ite.wthr,ite.thr>>>(pl.size(),
pl_prp.template toGPU<prp...>(),
pl_prp_out.template toGPU<>(),
sorted_to_not_sorted.template toGPU<>(),
static_cast<cnt_type *>(cells.template getDeviceBuffer<0>()));
}
CellList_gpu_ker<dim,T,cnt_type,ids_type,transform> toGPU()
{
return CellList_gpu_ker<dim,T,cnt_type,ids_type,transform>
(starts.template toGPU<>(),
spacing_c,
div_c,
off,
this->getTransform());
}
};
......
......@@ -53,39 +53,38 @@ void test_sub_index()
pl.add(p9);
pl.add(p10);
CudaMemory spacing;
CudaMemory div;
spacing.allocate(sizeof(T)*dim);
div.allocate(dim*sizeof(ids_type));
ids_type (& div_p)[dim] = *static_cast<ids_type (*)[dim]>(div.getPointer());
T (& spacing_p)[dim] = *static_cast<T (*)[dim]>(spacing.getPointer());
openfpm::array<T,dim,cnt_type> spacing;
openfpm::array<ids_type,dim,cnt_type> div;
openfpm::array<ids_type,dim,cnt_type> off;
for (size_t i = 0 ; i < dim ; i++)
{
div_p[i] = 17;
spacing_p[i] = 0.1;
div[i] = 17;