Commit 66fdd0bd authored by incardon's avatar incardon
Browse files

Adding missing files

parent bd89b6d0
/*
* grid_gpu.hpp
*
* Created on: Oct 31, 2015
* Author: i-bird
*/
#ifndef OPENFPM_DATA_SRC_GRID_GRID_GPU_HPP_
#define OPENFPM_DATA_SRC_GRID_GRID_GPU_HPP_
/*! \brief This is an N-dimensional grid or an N-dimensional array with memory_traits_inte layout
*
* it is basically an N-dimensional Cartesian grid
*
* \tparam dim Dimensionality of the grid
* \tparam T type of object the grid store
* \tparam Mem memory layout
*
* ### Definition and allocation of a 3D grid on GPU memory
* \snippet grid_unit_tests.hpp Definition and allocation of a 3D grid on GPU memory
* ### Access a grid c3 of size sz on each direction
* \snippet grid_unit_tests.hpp Access a grid c3 of size sz on each direction
* ### Access to an N-dimensional grid with an iterator
* \snippet grid_unit_tests.hpp Access to an N-dimensional grid with an iterator
*
*/
template<unsigned int dim, typename T, typename S>
class grid_cpu<dim,T,S,typename memory_traits_inte<T>::type> : public grid_base_impl<dim,T,S,typename memory_traits_inte<T>::type, memory_traits_inte>
{
//! grid layout
typedef typename memory_traits_inte<T>::type layout;
public:
//! Object container for T, it is the return type of get_o it return a object type trough
// you can access all the properties of T
typedef typename grid_base_impl<dim,T,S,typename memory_traits_inte<T>::type, memory_traits_inte>::container container;
//! Default constructor
inline grid_cpu() THROW
:grid_base_impl<dim,T,S,layout,memory_traits_inte>()
{
}
/*! \brief create a grid from another grid
*
* \param g the grid to copy
*
*/
inline grid_cpu(const grid_cpu & g) THROW
:grid_base_impl<dim,T,S,layout,memory_traits_inte>(g)
{
}
/*! \brief create a grid of size sz on each direction
*
* \param sz grid size in each direction
*
*/
inline grid_cpu(const size_t & sz) THROW
:grid_base_impl<dim,T,S,layout,memory_traits_inte>(sz)
{
}
//! Constructor allocate memory and give them a representation
inline grid_cpu(const size_t (& sz)[dim]) THROW
:grid_base_impl<dim,T,S,layout,memory_traits_inte>(sz)
{
}
};
//! short formula for a grid on gpu
template <unsigned int dim, typename T> using grid_gpu = grid_cpu<dim,T,CudaMemory,typename memory_traits_inte<T>::type>;
#endif /* OPENFPM_DATA_SRC_GRID_GRID_GPU_HPP_ */
/*
* CellList_gpu.hpp
*
* Created on: Jun 11, 2018
* Author: i-bird
*/
#ifndef OPENFPM_DATA_SRC_NN_CELLLIST_CELLLIST_GPU_HPP_
#define OPENFPM_DATA_SRC_NN_CELLLIST_CELLLIST_GPU_HPP_
#include "config.h"
#ifdef CUDA_GPU
#include <cuda_runtime_api.h>
#include "NN/CellList/CellDecomposer.hpp"
#include "Vector/map_vector.hpp"
#include "Cuda_cell_list_util_func.hpp"
#include "util/cuda/scan_cuda.cuh"
#include "NN/CellList/cuda/CellList_gpu_ker.cuh"
#include "util/cuda_util.hpp"
constexpr int count = 0;
constexpr int start = 1;
template<unsigned int dim, typename T, typename Memory, typename transform = no_transform_only<dim,T>, typename cnt_type = unsigned int, typename ids_type = unsigned char>
class CellList_gpu : public CellDecomposer_sm<dim,T,transform>
{
typedef openfpm::vector<aggregate<cnt_type>,Memory,typename memory_traits_inte<aggregate<cnt_type>>::type,memory_traits_inte> vector_cnt_type;
//! \brief Number of particles in each cell
vector_cnt_type cl_n;
//! \brief for each cell the particles id in it
vector_cnt_type cells;
//! \brief Cell scan with + operation of cl_n
vector_cnt_type starts;
//! \brief particle ids information the first "dim" is the cell-id in grid coordinates, the last is the local-id inside the cell
openfpm::vector<aggregate<ids_type[dim+1]>,Memory,typename memory_traits_inte<aggregate<ids_type[dim+1]>>::type,memory_traits_inte> part_ids;
//! \brief for each sorted index it show the index in the unordered
vector_cnt_type sorted_to_not_sorted;
//! Spacing
openfpm::array<T,dim,cnt_type> spacing_c;
//! \brief number of sub-divisions in each direction
openfpm::array<ids_type,dim,cnt_type> div_c;
//! \brief cell padding
openfpm::array<ids_type,dim,cnt_type> off;
//! Initialize the structures of the data structure
void InitializeStructures(const size_t (& div)[dim], size_t tot_n_cell, size_t pad)
{
for (size_t i = 0 ; i < dim ; i++)
{
div_c[i] = div[i];
spacing_c[i] = this->getCellBox().getP2().get(i);
off[i] = pad;
}
cl_n.resize(tot_n_cell);
}
public:
/*! \brief Copy constructor
*
*
*
*/
CellList_gpu(const CellList_gpu<dim,T,Memory,transform,cnt_type,ids_type> & clg)
{
}
/*! \brief Copy constructor
*
*
*
*/
CellList_gpu(CellList_gpu<dim,T,Memory,transform,cnt_type,ids_type> && clg)
{
cl_n.swap(clg.cl_n);
cells.swap(clg.cells);
starts.swap(clg.starts);
part_ids.swap(clg.part_ids);
sorted_to_not_sorted.swap(clg.sorted_to_not_sorted);
spacing_c = clg.spacing_c;
div_c = clg.div_c;
off = clg.off;
}
CellList_gpu(const Box<dim,T> & box, const size_t (&div)[dim], const size_t pad = 1)
{
Initialize(box,div,pad);
}
/*! Initialize the cell list
*
* \param box Domain where this cell list is living
* \param div grid size on each dimension
* \param pad padding cell
* \param slot maximum number of slot
*
*/
void Initialize(const Box<dim,T> & box, const size_t (&div)[dim], const size_t pad = 1)
{
SpaceBox<dim,T> sbox(box);
// Initialize point transformation
Initialize(sbox,div,pad);
}
/*! Initialize the cell list constructor
*
* \param box Domain where this cell list is living
* \param div grid size on each dimension
* \param pad padding cell
* \param slot maximum number of slot
*
*/
void Initialize(const SpaceBox<dim,T> & box, const size_t (&div)[dim], const size_t pad = 1)
{
Matrix<dim,T> mat;
CellDecomposer_sm<dim,T,transform>::setDimensions(box,div, mat, pad);
// create the array that store the number of particle on each cell and se it to 0
InitializeStructures(this->gr_cell.getSize(),this->gr_cell.size(),pad);
}
vector_cnt_type & getSortToNonSort()
{
return sorted_to_not_sorted;
}
/*! \brief construct from a list of particles
*
* \warning pl is assumed to be already be in device memory
*
* \param pl Particles list
*
*/
template<typename vector, typename vector_prp> void construct(vector & pl, vector & pl_out, vector_prp & pl_prp, vector_prp & pl_prp_out)
{
#ifdef __NVCC__
part_ids.resize(pl.size());
// Than we construct the ids
auto ite_gpu = pl.getGPUIterator();
cl_n.resize(this->gr_cell.size()+1);
CUDA_SAFE(cudaMemset(cl_n.template getDeviceBuffer<0>(),0,cl_n.size()*sizeof(cnt_type)));
part_ids.resize(pl.size());
subindex<dim,T,cnt_type,ids_type><<<ite_gpu.wthr,ite_gpu.thr>>>(div_c,
spacing_c,
off,
this->getTransform(),
pl.capacity(),
pl.size(),
static_cast<T *>(pl.template getDeviceBuffer<0>()),
static_cast<cnt_type *>(cl_n.template getDeviceBuffer<0>()),
static_cast<ids_type *>(part_ids.template getDeviceBuffer<0>()));
// now we scan
scan<cnt_type,ids_type>(cl_n,starts);
// now we construct the cells
cells.resize(pl.size());
auto itgg = part_ids.getGPUIterator();
fill_cells<dim,cnt_type,ids_type,shift_ph<0,cnt_type>><<<itgg.wthr,itgg.thr>>>(0,
div_c,
off,
part_ids.size(),
part_ids.capacity(),
static_cast<cnt_type *>(starts.template getDeviceBuffer<0>()),
static_cast<ids_type *>(part_ids.template getDeviceBuffer<0>()),
static_cast<cnt_type *>(cells.template getDeviceBuffer<0>()) );
sorted_to_not_sorted.resize(pl.size());
auto ite = pl.getGPUIterator();
// Here we test fill cell
reorder_parts<decltype(pl_prp.toKernel()),
decltype(pl.toKernel()),
decltype(sorted_to_not_sorted.toKernel()),
cnt_type,shift_ph<0,cnt_type>><<<ite.wthr,ite.thr>>>(pl.size(),
pl_prp.toKernel(),
pl_prp_out.toKernel(),
pl.toKernel(),
pl_out.toKernel(),
sorted_to_not_sorted.toKernel(),
static_cast<cnt_type *>(cells.template getDeviceBuffer<0>()));
#else
std::cout << "Error: " << __FILE__ << ":" << __LINE__ << " you are calling CellList_gpu.construct() this function is suppose must be compiled with NVCC compiler, but it look like has been compiled by the standard system compiler" << std::endl;
#endif
}
CellList_gpu_ker<dim,T,cnt_type,ids_type,transform> toKernel()
{
return CellList_gpu_ker<dim,T,cnt_type,ids_type,transform>
(starts.toKernel(),
sorted_to_not_sorted.toKernel(),
spacing_c,
div_c,
off,
this->getTransform());
}
vector_cnt_type & private_get_sort_to_not_sorted()
{
return sorted_to_not_sorted;
}
};
#endif
#endif /* OPENFPM_DATA_SRC_NN_CELLLIST_CELLLIST_GPU_HPP_ */
/*
* Cuda_cell_list_util_func.hpp
*
* Created on: Jun 17, 2018
* Author: i-bird
*/
#ifndef OPENFPM_DATA_SRC_NN_CELLLIST_CUDA_CUDA_CELL_LIST_UTIL_FUNC_HPP_
#define OPENFPM_DATA_SRC_NN_CELLLIST_CUDA_CUDA_CELL_LIST_UTIL_FUNC_HPP_
#include <boost/integer/integer_mask.hpp>
template<unsigned int dim, typename cnt_type, typename ids_type, typename transform>
struct cid_
{
static inline __device__ cnt_type get_cid(openfpm::array<ids_type,1,cnt_type> & div_c , ids_type * e)
{
cnt_type id = e[dim-1];
#pragma unroll
for (int i = 1; i >= 0 ; i-- )
{id = e[i] + div_c[i]*id;}
return id;
}
static inline __device__ cnt_type get_cid(openfpm::array<ids_type,dim,cnt_type> & div_c , const grid_key_dx<1,cnt_type> & e)
{
cnt_type id = e.get(dim-1);
#pragma unroll
for (int i = 1; i >= 0 ; i-- )
{id = e.get(i) + div_c[i]*id;}
return id;
}
template<typename T> static inline __device__ cnt_type get_cid(openfpm::array<ids_type,dim,cnt_type> & div_c,
openfpm::array<T,dim,cnt_type> & spacing,
const transform & t,
const Point<dim,T> & p)
{
cnt_type id = p.get(dim-1) / spacing[dim-1];
#pragma unroll
for (int i = 1; i >= 0 ; i-- )
{id = t.transform(p.get(i),i) / spacing[i] + div_c[i]*id;}
return id;
}
};
template<typename cnt_type, typename ids_type, typename transform>
struct cid_<1,cnt_type,ids_type, transform>
{
static inline __device__ cnt_type get_cid(openfpm::array<ids_type,1,cnt_type> & div_c, ids_type * e)
{
return e[0];
}
static inline __device__ cnt_type get_cid(const openfpm::array<ids_type,1,cnt_type> & div_c, const grid_key_dx<1,cnt_type> & e)
{
return e.get(0);
}
template<typename T> static inline __device__ cnt_type get_cid(openfpm::array<ids_type,1,cnt_type> & div_c,
openfpm::array<T,1,cnt_type> & spacing,
const transform & t,
const Point<1,T> & p)
{
return t.transform(p.get(0),0) / spacing[0];
}
};
template<typename cnt_type, typename ids_type, typename transform>
struct cid_<2,cnt_type,ids_type,transform>
{
static inline __device__ cnt_type get_cid(openfpm::array<ids_type,2,cnt_type> & div_c, ids_type * e)
{
return e[0] + div_c[0] * e[1];
}
static inline __device__ cnt_type get_cid(const openfpm::array<ids_type,2,cnt_type> & div_c, const grid_key_dx<2,cnt_type> & e)
{
return e.get(0) + div_c[0] * e.get(1);
}
template<typename T> static inline __device__ cnt_type get_cid(openfpm::array<ids_type,2,cnt_type> & div_c,
openfpm::array<T,2,cnt_type> & spacing,
const transform & t,
const Point<2,T> & p)
{
return t.transform(p.get(0),0) / spacing[0] + div_c[0] * t.transform(p.get(1),1) / spacing[1];
}
};
template<typename cnt_type, typename ids_type,typename transform>
struct cid_<3,cnt_type,ids_type,transform>
{
static inline __device__ cnt_type get_cid(const openfpm::array<ids_type,3,cnt_type> & div_c,
const ids_type * e)
{
return e[0] + (e[1] + e[2]*div_c[1])*div_c[0];
}
static inline __device__ cnt_type get_cid(const openfpm::array<ids_type,3,cnt_type> & div_c,
const grid_key_dx<3,ids_type> & e)
{
return e.get(0) + (e.get(1) + e.get(2)*div_c[1])*div_c[0];
}
template<typename T> static inline __device__ cnt_type get_cid(const openfpm::array<ids_type,3,cnt_type> & div_c,
const openfpm::array<T,3,cnt_type> & spacing,
const openfpm::array<ids_type,3,cnt_type> & off,
const transform & t,
const Point<3,T> & p)
{
return openfpm::math::uint_floor(t.transform(p,0)/spacing[0]) + off[0] +
(openfpm::math::uint_floor(t.transform(p,1)/spacing[1]) + off[1] +
(openfpm::math::uint_floor(t.transform(p,2)/spacing[2]) + off[2])*div_c[1])*div_c[0];
}
template<typename T> static inline __device__ cnt_type get_cid(const openfpm::array<ids_type,3,cnt_type> & div_c,
const openfpm::array<T,3,cnt_type> & spacing,
const openfpm::array<ids_type,3,cnt_type> & off,
const transform & t,
const T * p,
ids_type * e)
{
e[0] = openfpm::math::uint_floor(t.transform(p,0)/spacing[0]) + off[0];
e[1] = openfpm::math::uint_floor(t.transform(p,1)/spacing[1]) + off[1];
e[2] = openfpm::math::uint_floor(t.transform(p,2)/spacing[2]) + off[2];
return e[0] + (e[1] + e[2]*div_c[1])*div_c[0];
}
template<typename T> static inline __device__ grid_key_dx<3,ids_type> get_cid_key(const openfpm::array<T,3,cnt_type> & spacing,
const openfpm::array<ids_type,3,cnt_type> & off,
const transform & t,
const Point<3,T> & p)
{
grid_key_dx<3,ids_type> e;
e.set_d(0,openfpm::math::uint_floor(t.transform(p,0)/spacing[0]) + off[0]);
e.set_d(1,openfpm::math::uint_floor(t.transform(p,1)/spacing[1]) + off[1]);
e.set_d(2,openfpm::math::uint_floor(t.transform(p,2)/spacing[2]) + off[2]);
return e;
}
static inline __device__ cnt_type get_cid(const openfpm::array<ids_type,3,cnt_type> & div_c,
const grid_key_dx<3,cnt_type> & e)
{
return e.get(0) + (e.get(1) + e.get(2)*div_c[1])*div_c[0];
}
};
template<unsigned int bit_phases, typename cnt_type>
struct shift_ph
{
typedef boost::mpl::int_<sizeof(cnt_type) - bit_phases> type;
typedef boost::low_bits_mask_t<sizeof(size_t)*8-bit_phases> mask_low;
};
template<typename cnt_type, typename ph>
__device__ __host__ cnt_type encode_phase_id(cnt_type ph_id,cnt_type pid)
{
return pid + (ph_id << ph::type::value);
}
/////////////////////////// THIS ONLY WORK IF NVCC IS COMPILING THIS //////////////////////////////////////////////////////
#ifdef __NVCC__
template<unsigned int dim, typename pos_type, typename cnt_type, typename ids_type, typename transform>
__global__ void subindex(openfpm::array<ids_type,dim,cnt_type> div,
openfpm::array<pos_type,dim,cnt_type> spacing,
openfpm::array<ids_type,dim,cnt_type> off,
transform t,
int n_cap,
int n_part,
pos_type * p_pos,
cnt_type *counts,
ids_type * p_ids)
{
cnt_type i, cid;
ids_type e[dim+1];
i = threadIdx.x + blockIdx.x * blockDim.x;
if (i >= n_part) return;
pos_type p[dim];
for (size_t k = 0 ; k < dim ; k++)
{p[k] = p_pos[i+k*n_cap];}
cid = cid_<dim,cnt_type,ids_type,transform>::get_cid(div,spacing,off,t,p,e);
e[dim] = atomicAdd(counts + cid, 1);
for (size_t k = 0 ; k <= dim ; k++)
{p_ids[i+k*(n_cap)] = e[k];}
}
template<unsigned int dim, typename cnt_type, typename ids_type, typename ph>
__global__ void fill_cells(cnt_type phase_id ,
openfpm::array<ids_type,dim,cnt_type> div_c,
openfpm::array<ids_type,dim,cnt_type> off,
cnt_type n,
cnt_type n_cap,
const cnt_type *starts,
const ids_type * p_ids,
cnt_type *cells)
{
cnt_type i, cid, id, start;
ids_type e[dim+1];
i = threadIdx.x + blockIdx.x * blockDim.x;
if (i >= n) return;
#pragma unroll
for (int j = 0 ; j < dim+1 ; j++)
{e[j] = p_ids[j*n_cap+i];}
cid = cid_<dim,cnt_type,ids_type,int>::get_cid(div_c, e);
start = starts[cid];
id = start + e[dim];
cells[id] = encode_phase_id<cnt_type,ph>(phase_id,i);
}
template<typename cnt_type, typename ph>
__device__ inline void phase_and_id(cnt_type c, cnt_type *ph_id, cnt_type *pid)
{
*pid = c & ph::mask_low::value;
*ph_id = c << ph::type::value;
}
/*template <unsigned int n_prp , typename cnt_type, typename T>
__device__ inline void reorderMP(const boost_array_openfpm<T*,n_prp> input,
boost_array_openfpm<T*,dim> output,
cnt_type i)
{
int src_id, ph_id;
phase_and_id(i, &ph_id, &src_id);
output[ph_id][i] = src.d[ph_id][src_id];
}*/
template <typename vector_prp , typename cnt_type>
__device__ inline void reorder(const vector_prp & input,
vector_prp & output,
cnt_type src_id,
cnt_type dst_id)
{
output.set(dst_id,input,src_id);
}
template <typename vector_prp, typename vector_pos, typename vector_ns, typename cnt_type, typename sh>
__global__ void reorder_parts(int n,
const vector_prp input,
vector_prp output,
const vector_pos input_pos,
vector_pos output_pos,
vector_ns sorted_non_sorted,
const cnt_type * cells)
{
cnt_type i = threadIdx.x + blockIdx.x * blockDim.x;