Commit 6853316c authored by incardon's avatar incardon

Make cell-list gpu more generic

parent 37d1890f
......@@ -35,12 +35,6 @@ struct skip_init<true,T>
#ifdef CUDA_GPU
#ifndef __NVCC__
#undef __host__
#undef __device__
#include <vector_types.h>
#endif
#define GRID_ID_3_RAW(start,stop) int x[3] = {threadIdx.x + blockIdx.x * blockDim.x + start.get(0),\
threadIdx.y + blockIdx.y * blockDim.y + start.get(1),\
threadIdx.z + blockIdx.z * blockDim.z + start.get(2)};\
......@@ -162,6 +156,28 @@ ite_gpu<dim> getGPUIterator_impl(const grid_sm<dim,T> & g1, grid_key_dx<dim> & k
return ig;
}
template<unsigned int dim>
bool has_work_gpu(ite_gpu<dim> & ite)
{
size_t tot_work = 1;
if (dim == 1)
{tot_work *= ite.wthr.x * ite.thr.x;}
else if(dim == 2)
{
tot_work *= ite.wthr.x * ite.thr.x;
tot_work *= ite.wthr.y * ite.thr.y;
}
else
{
tot_work *= ite.wthr.x * ite.thr.x;
tot_work *= ite.wthr.y * ite.thr.y;
tot_work *= ite.wthr.z * ite.thr.z;
}
return tot_work != 0;
}
#endif
#include "copy_grid_fast.hpp"
......@@ -334,8 +350,10 @@ private:
if (dim <= 3)
{
auto ite = this->getGPUIterator(start,stop);
bool has_work = has_work_gpu(ite);
copy_ndim_grid_device<dim,decltype(grid_new.toKernel())><<<ite.wthr,ite.thr>>>(this->toKernel(),grid_new.toKernel());
if (has_work == true)
{copy_ndim_grid_device<dim,decltype(grid_new.toKernel())><<<ite.wthr,ite.thr>>>(this->toKernel(),grid_new.toKernel());}
}
else
{
......
......@@ -3,12 +3,6 @@
#include "config.h"
#ifndef CUDA_GPU
#include <boost/config/compiler/nvcc.hpp>
#endif
//! Warning: apparently you cannot used nested boost::mpl with boost::fusion
//! can create template circularity, this include avoid the problem
#include "util/object_util.hpp"
#include "Grid/util.hpp"
#include "Vector/vect_isel.hpp"
......
......@@ -18,6 +18,7 @@
#include "Point_test.hpp"
#include "util/cuda/moderngpu/kernel_load_balance.hxx"
#include "util/cuda/scan_cuda.cuh"
#include "util/cuda_util.hpp"
BOOST_AUTO_TEST_SUITE( CellList_gpu_test )
......
......@@ -11,6 +11,19 @@
#define CL_SYMMETRIC 1
#define CL_NON_SYMMETRIC 2
/*! \brief Check this is a gpu or cpu type cell-list
*
*/
template<typename T, typename Sfinae = void>
struct is_gpu_celllist: std::false_type {};
template<typename T>
struct is_gpu_celllist<T, typename Void<typename T::yes_is_gpu_celllist>::type> : std::true_type
{};
/*! \brief populate the Cell-list with particles non symmetric case on GPU
*
* \tparam dim dimensionality of the space
......@@ -30,6 +43,79 @@ template<unsigned int dim, typename T, typename CellList> void populate_cell_lis
#include "Vector/map_vector.hpp"
template<bool is_gpu>
struct populate_cell_list_no_sym_impl
{
template<unsigned int dim, typename T, typename prop, typename Memory, template <typename> class layout_base , typename CellList>
static void populate(openfpm::vector<Point<dim,T>,Memory,typename layout_base<Point<dim,T>>::type,layout_base > & pos,
openfpm::vector<Point<dim,T>,Memory,typename layout_base<Point<dim,T>>::type,layout_base > & v_pos_out,
openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base > & v_prp_out,
openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base > & v_prp,
CellList & cli,
size_t g_m)
{
cli.clear();
for (size_t i = 0; i < pos.size() ; i++)
{
cli.add(pos.get(i), i);
}
}
};
template<>
struct populate_cell_list_no_sym_impl<true>
{
template<unsigned int dim, typename T, typename prop, typename Memory, template <typename> class layout_base , typename CellList>
static void populate(openfpm::vector<Point<dim,T>,Memory,typename layout_base<Point<dim,T>>::type,layout_base > & pos,
openfpm::vector<Point<dim,T>,Memory,typename layout_base<Point<dim,T>>::type,layout_base > & v_pos_out,
openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base > & v_prp_out,
openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base > & v_prp,
CellList & cli,
size_t g_m)
{
v_prp_out.resize(pos.size());
v_pos_out.resize(pos.size());
cli.template construct<decltype(pos),decltype(v_prp)>(pos,v_pos_out,v_prp,v_prp_out);
}
};
template<bool is_gpu>
struct populate_cell_list_sym_impl
{
template<unsigned int dim, typename T, typename Memory, template <typename> class layout_base , typename CellList>
static void populate(openfpm::vector<Point<dim,T>,Memory,typename layout_base<Point<dim,T>>::type,layout_base > & pos,
CellList & cli,
size_t g_m)
{
cli.clear();
for (size_t i = 0; i < g_m ; i++)
{
cli.addDom(pos.get(i), i);
}
for (size_t i = g_m; i < pos.size() ; i++)
{
cli.addPad(pos.get(i), i);
}
}
};
template<>
struct populate_cell_list_sym_impl<true>
{
template<unsigned int dim, typename T, typename Memory, template <typename> class layout_base , typename CellList>
static void populate(openfpm::vector<Point<dim,T>,Memory,typename layout_base<Point<dim,T>>::type,layout_base > & pos,
CellList & cli,
size_t g_m)
{
std::cout << __FILE__ << ":" << __LINE__ << " symmetric cell list on GPU is not implemented. (And will never be, race conditions make them non suitable for GPU)" << std::endl;
}
};
/*! \brief populate the Cell-list with particles non symmetric case
*
* \tparam dim dimensionality of the space
......@@ -41,17 +127,15 @@ template<unsigned int dim, typename T, typename CellList> void populate_cell_lis
* \param g_m marker (particle below this marker must be inside the domain, particles outside this marker must be outside the domain)
*
*/
template<unsigned int dim, typename T, typename Memory, template <typename> class layout_base , typename CellList>
template<unsigned int dim, typename T, typename prop, typename Memory, template <typename> class layout_base , typename CellList>
void populate_cell_list_no_sym(openfpm::vector<Point<dim,T>,Memory,typename layout_base<Point<dim,T>>::type,layout_base > & pos,
openfpm::vector<Point<dim,T>,Memory,typename layout_base<Point<dim,T>>::type,layout_base > & v_pos_out,
openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base > & v_prp_out,
openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base > & v_prp,
CellList & cli,
size_t g_m)
{
cli.clear();
for (size_t i = 0; i < pos.size() ; i++)
{
cli.add(pos.get(i), i);
}
populate_cell_list_no_sym_impl<is_gpu_celllist<CellList>::value>::populate(pos,v_pos_out,v_prp_out,v_prp,cli,g_m);
}
/*! \brief populate the Cell-list with particles symmetric case
......@@ -70,17 +154,7 @@ void populate_cell_list_sym(openfpm::vector<Point<dim,T>,Memory,typename layout_
CellList & cli,
size_t g_m)
{
cli.clear();
for (size_t i = 0; i < g_m ; i++)
{
cli.addDom(pos.get(i), i);
}
for (size_t i = g_m; i < pos.size() ; i++)
{
cli.addPad(pos.get(i), i);
}
populate_cell_list_sym_impl<is_gpu_celllist<CellList>::value>::populate(pos,cli,g_m);
}
/*! \brief populate the Cell-list with particles generic case
......@@ -95,16 +169,49 @@ void populate_cell_list_sym(openfpm::vector<Point<dim,T>,Memory,typename layout_
* \param g_m marker (particle below this marker must be inside the domain, particles outside this marker must be outside the domain)
*
*/
template<unsigned int dim, typename T, typename Memory, template <typename> class layout_base, typename CellList>
template<unsigned int dim, typename T, typename prop, typename Memory, template <typename> class layout_base, typename CellList>
void populate_cell_list(openfpm::vector<Point<dim,T>,Memory,typename layout_base<Point<dim,T>>::type,layout_base> & pos,
openfpm::vector<Point<dim,T>,Memory,typename layout_base<Point<dim,T>>::type,layout_base > & v_pos_out,
openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base > & v_prp_out,
openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base > & v_prp,
CellList & cli,
size_t g_m,
size_t opt)
{
if (opt == CL_NON_SYMMETRIC)
populate_cell_list_no_sym(pos,cli,g_m);
{populate_cell_list_no_sym(pos,v_pos_out,v_prp_out,v_prp,cli,g_m);}
else
populate_cell_list_sym(pos,cli,g_m);
{populate_cell_list_sym(pos,cli,g_m);}
}
/*! \brief populate the Cell-list with particles generic case
*
* \note this function remain for backward compatibility it supposed to be remove once the verlet-list use the new populate-cell list form
*
* \tparam dim dimensionality of the space
* \tparam T type of the space
* \tparam CellList type of cell-list
*
* \param pos vector of positions
* \param cli Cell-list
* \param opt option like CL_SYMMETRIC or CL_NON_SYMMETRIC
* \param g_m marker (particle below this marker must be inside the domain, particles outside this marker must be outside the domain)
*
*/
template<unsigned int dim, typename T, typename Memory, template <typename> class layout_base, typename CellList>
void populate_cell_list(openfpm::vector<Point<dim,T>,Memory,typename layout_base<Point<dim,T>>::type,layout_base> & pos,
CellList & cli,
size_t g_m,
size_t opt)
{
typedef openfpm::vector<aggregate<int>,Memory,typename layout_base<aggregate<int>>::type,layout_base> stub_prop_type;
stub_prop_type stub1;
stub_prop_type stub2;
openfpm::vector<Point<dim,T>,Memory,typename layout_base<Point<dim,T>>::type,layout_base> stub3;
populate_cell_list(pos,stub3,stub1,stub2,cli,g_m,opt);
}
/*! \brief Structure that contain a reference to a vector of particles
......
......@@ -39,7 +39,7 @@ public:
:Mem_type(mt),spacing_c(spacing_c),div_c(div_c),off(off),t(t)
{}
inline __device__ unsigned int getCell(const Point<dim,T> & xp)
inline __device__ unsigned int getCell(const Point<dim,T> & xp) const
{
return cid_<dim,cnt_type,ids_type,transform>::get_cid(div_c,spacing_c,off,t,xp);
}
......
......@@ -12,7 +12,6 @@
#ifdef CUDA_GPU
#include <cuda_runtime_api.h>
#include "NN/CellList/CellDecomposer.hpp"
#include "Vector/map_vector.hpp"
#include "Cuda_cell_list_util_func.hpp"
......@@ -53,6 +52,10 @@ class CellList_gpu : public CellDecomposer_sm<dim,T,transform>
//! \brief cell padding
openfpm::array<ids_type,dim,cnt_type> off;
//! Additional information in general (used to understand if the cell-list)
//! has been constructed from an old decomposition
size_t n_dec;
//! Initialize the structures of the data structure
void InitializeStructures(const size_t (& div)[dim], size_t tot_n_cell, size_t pad)
{
......@@ -68,6 +71,9 @@ class CellList_gpu : public CellDecomposer_sm<dim,T,transform>
public:
//! Indicate that this cell list is a gpu type cell-list
typedef int yes_is_gpu_celllist;
/*! \brief Copy constructor
*
*
......@@ -228,8 +234,71 @@ public:
{
return sorted_to_not_sorted;
}
/*! \brief Clear the structure
*
*
*/
void clear()
{
cl_n.clear();
cells.clear();
starts.clear();
part_ids.clear();
sorted_to_not_sorted.clear();
}
/////////////////////////////////////
//! Ghost marker
size_t g_m = 0;
/*! \brief return the ghost marker
*
* \return ghost marker
*
*/
inline size_t get_gm()
{
return g_m;
}
/*! \brief Set the ghost marker
*
* \param g_m marker
*
*/
inline void set_gm(size_t g_m)
{
this->g_m = g_m;
}
/////////////////////////////////////
/*! \brief Set the n_dec number
*
* \param n_dec
*
*/
void set_ndec(size_t n_dec)
{
this->n_dec = n_dec;
}
/*! \brief Set the n_dec number
*
* \return n_dec
*
*/
size_t get_ndec() const
{
return n_dec;
}
/////////////////////////////////////
};
#endif
#endif /* OPENFPM_DATA_SRC_NN_CELLLIST_CELLLIST_GPU_HPP_ */
......@@ -138,7 +138,7 @@ public:
:starts(starts),srt(srt),spacing_c(spacing_c),div_c(div_c),off(off),t(t)
{}
inline __device__ grid_key_dx<dim,ids_type> getCell(const Point<dim,T> & xp)
inline __device__ grid_key_dx<dim,ids_type> getCell(const Point<dim,T> & xp) const
{
return cid_<dim,cnt_type,ids_type,transform>::get_cid_key(spacing_c,off,t,xp);
}
......
......@@ -307,9 +307,9 @@ private:
void initCl(CellListImpl & cli, openfpm::vector<Point<dim,T>> & pos, size_t g_m, size_t opt)
{
if (opt & VL_SYMMETRIC || opt & VL_CRS_SYMMETRIC)
populate_cell_list(pos,cli,g_m,CL_SYMMETRIC);
{populate_cell_list(pos,cli,g_m,CL_SYMMETRIC);}
else
populate_cell_list(pos,cli,g_m,CL_NON_SYMMETRIC);
{populate_cell_list(pos,cli,g_m,CL_NON_SYMMETRIC);}
}
/*! \brief Create the Verlet list from a given cell-list
......
......@@ -27,9 +27,6 @@
#include <functional>
#include <numeric>
#include "util/boost/boost_multi_array_base_openfpm.hpp"
#include "util/boost/boost_multi_array_subarray_openfpm.hpp"
namespace boost {
namespace detail {
namespace multi_array {
......
......@@ -8,31 +8,51 @@
#ifndef OPENFPM_DATA_SRC_UTIL_CUDA_UTIL_HPP_
#define OPENFPM_DATA_SRC_UTIL_CUDA_UTIL_HPP_
#ifdef CUDA_GPU
#include "config.h"
#ifndef __NVCC__
#define __host__
#define __device__
#endif
#ifdef CUDA_GPU
#define CUDA_SAFE(cuda_call) \
cuda_call; \
{\
cudaError_t e = cudaPeekAtLastError();\
if (e != cudaSuccess)\
{\
std::string error = cudaGetErrorString(e);\
std::cout << "Cuda Error in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
}\
}
#else
#ifndef __host__
#define __host__
#define __device__
#ifndef __NVCC__
#define __host__
#define __device__
struct uint3
{
unsigned int x, y, z;
};
struct dim3
{
unsigned int x, y, z;
#if defined(__cplusplus)
__host__ __device__ dim3(unsigned int vx = 1, unsigned int vy = 1, unsigned int vz = 1) : x(vx), y(vy), z(vz) {}
__host__ __device__ dim3(uint3 v) : x(v.x), y(v.y), z(v.z) {}
__host__ __device__ operator uint3(void) { uint3 t; t.x = x; t.y = y; t.z = z; return t; }
#endif /* __cplusplus */
};
#else
#ifndef __host__
#define __host__
#define __device__
#endif
#define CUDA_SAFE(cuda_call) \
cuda_call; \
{\
cudaError_t e = cudaPeekAtLastError();\
if (e != cudaSuccess)\
{\
std::string error = cudaGetErrorString(e);\
std::cout << "Cuda Error in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
}\
}
#endif
#endif
#endif
#endif /* OPENFPM_DATA_SRC_UTIL_CUDA_UTIL_HPP_ */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment