Commit c4b479f6 authored by incardon's avatar incardon

Optimizing Cell-list construction in case of reordering

parent ea19c74e
...@@ -11,6 +11,12 @@ ...@@ -11,6 +11,12 @@
#define CL_SYMMETRIC 1 #define CL_SYMMETRIC 1
#define CL_NON_SYMMETRIC 2 #define CL_NON_SYMMETRIC 2
enum cl_construct_opt
{
Full,
Only_reorder
};
#if defined(CUDA_GPU) && defined(__NVCC__) #if defined(CUDA_GPU) && defined(__NVCC__)
#include "util/cuda/moderngpu/kernel_mergesort.hxx" #include "util/cuda/moderngpu/kernel_mergesort.hxx"
#endif #endif
...@@ -58,7 +64,8 @@ struct populate_cell_list_no_sym_impl ...@@ -58,7 +64,8 @@ struct populate_cell_list_no_sym_impl
openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base > & v_prp_out, openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base > & v_prp_out,
CellList & cli, CellList & cli,
mgpu::ofp_context_t & context, mgpu::ofp_context_t & context,
size_t g_m) size_t g_m,
cl_construct_opt optc)
{ {
cli.clear(); cli.clear();
...@@ -79,13 +86,13 @@ struct populate_cell_list_no_sym_impl<true> ...@@ -79,13 +86,13 @@ struct populate_cell_list_no_sym_impl<true>
openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base > & v_prp_out, openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base > & v_prp_out,
CellList & cli, CellList & cli,
mgpu::ofp_context_t & context, mgpu::ofp_context_t & context,
size_t g_m) size_t g_m,
cl_construct_opt optc)
{ {
v_prp_out.resize(pos.size()); v_prp_out.resize(pos.size());
v_pos_out.resize(pos.size()); v_pos_out.resize(pos.size());
cli.template construct<decltype(pos),decltype(v_prp)>(pos,v_pos_out,v_prp,v_prp_out,context,g_m); cli.template construct<decltype(pos),decltype(v_prp)>(pos,v_pos_out,v_prp,v_prp_out,context,g_m,optc);
} }
}; };
...@@ -141,9 +148,10 @@ void populate_cell_list_no_sym(openfpm::vector<Point<dim,T>,Memory,typename layo ...@@ -141,9 +148,10 @@ void populate_cell_list_no_sym(openfpm::vector<Point<dim,T>,Memory,typename layo
openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base > & v_prp_out, openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base > & v_prp_out,
CellList & cli, CellList & cli,
mgpu::ofp_context_t & mgpu, mgpu::ofp_context_t & mgpu,
size_t g_m) size_t g_m,
cl_construct_opt optc)
{ {
populate_cell_list_no_sym_impl<is_gpu_celllist<CellList>::value>::populate(pos,v_pos_out,v_prp,v_prp_out,cli,mgpu,g_m); populate_cell_list_no_sym_impl<is_gpu_celllist<CellList>::value>::populate(pos,v_pos_out,v_prp,v_prp_out,cli,mgpu,g_m,optc);
} }
/*! \brief populate the Cell-list with particles symmetric case /*! \brief populate the Cell-list with particles symmetric case
...@@ -185,10 +193,11 @@ void populate_cell_list(openfpm::vector<Point<dim,T>,Memory,typename layout_base ...@@ -185,10 +193,11 @@ void populate_cell_list(openfpm::vector<Point<dim,T>,Memory,typename layout_base
CellList & cli, CellList & cli,
mgpu::ofp_context_t & context, mgpu::ofp_context_t & context,
size_t g_m, size_t g_m,
size_t opt) size_t opt,
cl_construct_opt optc)
{ {
if (opt == CL_NON_SYMMETRIC) if (opt == CL_NON_SYMMETRIC)
{populate_cell_list_no_sym(pos,v_pos_out,v_prp,v_prp_out,cli,context,g_m);} {populate_cell_list_no_sym(pos,v_pos_out,v_prp,v_prp_out,cli,context,g_m,optc);}
else else
{populate_cell_list_sym(pos,cli,g_m);} {populate_cell_list_sym(pos,cli,g_m);}
} }
...@@ -212,7 +221,8 @@ void populate_cell_list(openfpm::vector<Point<dim,T>,Memory,typename layout_base ...@@ -212,7 +221,8 @@ void populate_cell_list(openfpm::vector<Point<dim,T>,Memory,typename layout_base
CellList & cli, CellList & cli,
mgpu::ofp_context_t & context, mgpu::ofp_context_t & context,
size_t g_m, size_t g_m,
size_t opt) size_t opt,
cl_construct_opt optc)
{ {
typedef openfpm::vector<aggregate<int>,Memory,typename layout_base<aggregate<int>>::type,layout_base> stub_prop_type; typedef openfpm::vector<aggregate<int>,Memory,typename layout_base<aggregate<int>>::type,layout_base> stub_prop_type;
...@@ -221,7 +231,7 @@ void populate_cell_list(openfpm::vector<Point<dim,T>,Memory,typename layout_base ...@@ -221,7 +231,7 @@ void populate_cell_list(openfpm::vector<Point<dim,T>,Memory,typename layout_base
openfpm::vector<Point<dim,T>,Memory,typename layout_base<Point<dim,T>>::type,layout_base> stub3; openfpm::vector<Point<dim,T>,Memory,typename layout_base<Point<dim,T>>::type,layout_base> stub3;
populate_cell_list(pos,stub3,stub1,stub2,cli,context,g_m,opt); populate_cell_list(pos,stub3,stub1,stub2,cli,context,g_m,opt,optc);
} }
/*! \brief Structure that contain a reference to a vector of particles /*! \brief Structure that contain a reference to a vector of particles
......
...@@ -27,7 +27,6 @@ ...@@ -27,7 +27,6 @@
constexpr int count = 0; constexpr int count = 0;
constexpr int start = 1; constexpr int start = 1;
template<unsigned int dim, typename T, typename Memory, typename transform = no_transform_only<dim,T>, typename cnt_type = unsigned int, typename ids_type = int> template<unsigned int dim, typename T, typename Memory, typename transform = no_transform_only<dim,T>, typename cnt_type = unsigned int, typename ids_type = int>
class CellList_gpu : public CellDecomposer_sm<dim,T,transform> class CellList_gpu : public CellDecomposer_sm<dim,T,transform>
{ {
...@@ -225,7 +224,14 @@ public: ...@@ -225,7 +224,14 @@ public:
* \param pl Particles list * \param pl Particles list
* *
*/ */
template<typename vector, typename vector_prp> void construct(vector & pl, vector & pl_out, vector_prp & pl_prp, vector_prp & pl_prp_out, mgpu::ofp_context_t & mgpuContext, size_t g_m = 0) template<typename vector, typename vector_prp>
void construct(vector & pl,
vector & pl_out,
vector_prp & pl_prp,
vector_prp & pl_prp_out,
mgpu::ofp_context_t & mgpuContext,
size_t g_m = 0,
cl_construct_opt opt = cl_construct_opt::Full)
{ {
#ifdef __NVCC__ #ifdef __NVCC__
...@@ -277,7 +283,7 @@ public: ...@@ -277,7 +283,7 @@ public:
auto ite = pl.getGPUIterator(); auto ite = pl.getGPUIterator();
// Here we test fill cell // Here we reorder the particles to improve coalescing access
CUDA_LAUNCH((reorder_parts<decltype(pl_prp.toKernel()), CUDA_LAUNCH((reorder_parts<decltype(pl_prp.toKernel()),
decltype(pl.toKernel()), decltype(pl.toKernel()),
decltype(sorted_to_not_sorted.toKernel()), decltype(sorted_to_not_sorted.toKernel()),
...@@ -291,20 +297,23 @@ public: ...@@ -291,20 +297,23 @@ public:
static_cast<cnt_type *>(cells.template getDeviceBuffer<0>())); static_cast<cnt_type *>(cells.template getDeviceBuffer<0>()));
ite = sorted_domain_particles_ids.getGPUIterator(); if (opt == cl_construct_opt::Full)
{
ite = sorted_domain_particles_ids.getGPUIterator();
CUDA_LAUNCH((mark_domain_particles),ite.wthr,ite.thr,sorted_to_not_sorted.toKernel(),sorted_domain_particles_ids.toKernel(),sorted_domain_particles_dg.toKernel(),g_m); CUDA_LAUNCH((mark_domain_particles),ite.wthr,ite.thr,sorted_to_not_sorted.toKernel(),sorted_domain_particles_ids.toKernel(),sorted_domain_particles_dg.toKernel(),g_m);
// now we sort the particles // now we sort the particles
mergesort((int *)sorted_domain_particles_dg.template getDeviceBuffer<0>(),(int *)sorted_domain_particles_ids.template getDeviceBuffer<0>(), mergesort((int *)sorted_domain_particles_dg.template getDeviceBuffer<0>(),(int *)sorted_domain_particles_ids.template getDeviceBuffer<0>(),
sorted_domain_particles_dg.size(), mgpu::template less_t<int>(), mgpuContext); sorted_domain_particles_dg.size(), mgpu::template less_t<int>(), mgpuContext);
}
#else #else
std::cout << "Error: " << __FILE__ << ":" << __LINE__ << " you are calling CellList_gpu.construct() this function is suppose must be compiled with NVCC compiler, but it look like has been compiled by the standard system compiler" << std::endl; std::cout << "Error: " << __FILE__ << ":" << __LINE__ << " you are calling CellList_gpu.construct() this function is suppose must be compiled with NVCC compiler, but it look like has been compiled by the standard system compiler" << std::endl;
#endif #endif
} }
......
...@@ -324,9 +324,9 @@ private: ...@@ -324,9 +324,9 @@ private:
{ {
mgpu::ofp_context_t context(mgpu::gpu_context_opt::dummy); mgpu::ofp_context_t context(mgpu::gpu_context_opt::dummy);
if (opt & VL_SYMMETRIC || opt & VL_CRS_SYMMETRIC) if (opt & VL_SYMMETRIC || opt & VL_CRS_SYMMETRIC)
{populate_cell_list(pos,cli,context,g_m,CL_SYMMETRIC);} {populate_cell_list(pos,cli,context,g_m,CL_SYMMETRIC,cl_construct_opt::Full);}
else else
{populate_cell_list(pos,cli,context,g_m,CL_NON_SYMMETRIC);} {populate_cell_list(pos,cli,context,g_m,CL_NON_SYMMETRIC,cl_construct_opt::Full);}
} }
/*! \brief Create the Verlet list from a given cell-list /*! \brief Create the Verlet list from a given cell-list
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment