Commit c4b479f6 authored by incardon's avatar incardon

Optimizing Cell-list construction in case of reordering

parent ea19c74e
......@@ -11,6 +11,12 @@
#define CL_SYMMETRIC 1
#define CL_NON_SYMMETRIC 2
enum cl_construct_opt
{
Full,
Only_reorder
};
#if defined(CUDA_GPU) && defined(__NVCC__)
#include "util/cuda/moderngpu/kernel_mergesort.hxx"
#endif
......@@ -58,7 +64,8 @@ struct populate_cell_list_no_sym_impl
openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base > & v_prp_out,
CellList & cli,
mgpu::ofp_context_t & context,
size_t g_m)
size_t g_m,
cl_construct_opt optc)
{
cli.clear();
......@@ -79,13 +86,13 @@ struct populate_cell_list_no_sym_impl<true>
openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base > & v_prp_out,
CellList & cli,
mgpu::ofp_context_t & context,
size_t g_m)
size_t g_m,
cl_construct_opt optc)
{
v_prp_out.resize(pos.size());
v_pos_out.resize(pos.size());
cli.template construct<decltype(pos),decltype(v_prp)>(pos,v_pos_out,v_prp,v_prp_out,context,g_m);
cli.template construct<decltype(pos),decltype(v_prp)>(pos,v_pos_out,v_prp,v_prp_out,context,g_m,optc);
}
};
......@@ -141,9 +148,10 @@ void populate_cell_list_no_sym(openfpm::vector<Point<dim,T>,Memory,typename layo
openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base > & v_prp_out,
CellList & cli,
mgpu::ofp_context_t & mgpu,
size_t g_m)
size_t g_m,
cl_construct_opt optc)
{
populate_cell_list_no_sym_impl<is_gpu_celllist<CellList>::value>::populate(pos,v_pos_out,v_prp,v_prp_out,cli,mgpu,g_m);
populate_cell_list_no_sym_impl<is_gpu_celllist<CellList>::value>::populate(pos,v_pos_out,v_prp,v_prp_out,cli,mgpu,g_m,optc);
}
/*! \brief populate the Cell-list with particles symmetric case
......@@ -185,10 +193,11 @@ void populate_cell_list(openfpm::vector<Point<dim,T>,Memory,typename layout_base
CellList & cli,
mgpu::ofp_context_t & context,
size_t g_m,
size_t opt)
size_t opt,
cl_construct_opt optc)
{
if (opt == CL_NON_SYMMETRIC)
{populate_cell_list_no_sym(pos,v_pos_out,v_prp,v_prp_out,cli,context,g_m);}
{populate_cell_list_no_sym(pos,v_pos_out,v_prp,v_prp_out,cli,context,g_m,optc);}
else
{populate_cell_list_sym(pos,cli,g_m);}
}
......@@ -212,7 +221,8 @@ void populate_cell_list(openfpm::vector<Point<dim,T>,Memory,typename layout_base
CellList & cli,
mgpu::ofp_context_t & context,
size_t g_m,
size_t opt)
size_t opt,
cl_construct_opt optc)
{
typedef openfpm::vector<aggregate<int>,Memory,typename layout_base<aggregate<int>>::type,layout_base> stub_prop_type;
......@@ -221,7 +231,7 @@ void populate_cell_list(openfpm::vector<Point<dim,T>,Memory,typename layout_base
openfpm::vector<Point<dim,T>,Memory,typename layout_base<Point<dim,T>>::type,layout_base> stub3;
populate_cell_list(pos,stub3,stub1,stub2,cli,context,g_m,opt);
populate_cell_list(pos,stub3,stub1,stub2,cli,context,g_m,opt,optc);
}
/*! \brief Structure that contain a reference to a vector of particles
......
......@@ -27,7 +27,6 @@
constexpr int count = 0;
constexpr int start = 1;
template<unsigned int dim, typename T, typename Memory, typename transform = no_transform_only<dim,T>, typename cnt_type = unsigned int, typename ids_type = int>
class CellList_gpu : public CellDecomposer_sm<dim,T,transform>
{
......@@ -225,7 +224,14 @@ public:
* \param pl Particles list
*
*/
template<typename vector, typename vector_prp> void construct(vector & pl, vector & pl_out, vector_prp & pl_prp, vector_prp & pl_prp_out, mgpu::ofp_context_t & mgpuContext, size_t g_m = 0)
template<typename vector, typename vector_prp>
void construct(vector & pl,
vector & pl_out,
vector_prp & pl_prp,
vector_prp & pl_prp_out,
mgpu::ofp_context_t & mgpuContext,
size_t g_m = 0,
cl_construct_opt opt = cl_construct_opt::Full)
{
#ifdef __NVCC__
......@@ -277,7 +283,7 @@ public:
auto ite = pl.getGPUIterator();
// Here we test fill cell
// Here we reorder the particles to improve coalescing access
CUDA_LAUNCH((reorder_parts<decltype(pl_prp.toKernel()),
decltype(pl.toKernel()),
decltype(sorted_to_not_sorted.toKernel()),
......@@ -291,20 +297,23 @@ public:
static_cast<cnt_type *>(cells.template getDeviceBuffer<0>()));
ite = sorted_domain_particles_ids.getGPUIterator();
if (opt == cl_construct_opt::Full)
{
ite = sorted_domain_particles_ids.getGPUIterator();
CUDA_LAUNCH((mark_domain_particles),ite.wthr,ite.thr,sorted_to_not_sorted.toKernel(),sorted_domain_particles_ids.toKernel(),sorted_domain_particles_dg.toKernel(),g_m);
CUDA_LAUNCH((mark_domain_particles),ite.wthr,ite.thr,sorted_to_not_sorted.toKernel(),sorted_domain_particles_ids.toKernel(),sorted_domain_particles_dg.toKernel(),g_m);
// now we sort the particles
mergesort((int *)sorted_domain_particles_dg.template getDeviceBuffer<0>(),(int *)sorted_domain_particles_ids.template getDeviceBuffer<0>(),
sorted_domain_particles_dg.size(), mgpu::template less_t<int>(), mgpuContext);
// now we sort the particles
mergesort((int *)sorted_domain_particles_dg.template getDeviceBuffer<0>(),(int *)sorted_domain_particles_ids.template getDeviceBuffer<0>(),
sorted_domain_particles_dg.size(), mgpu::template less_t<int>(), mgpuContext);
}
#else
#else
std::cout << "Error: " << __FILE__ << ":" << __LINE__ << " you are calling CellList_gpu.construct() this function is suppose must be compiled with NVCC compiler, but it look like has been compiled by the standard system compiler" << std::endl;
std::cout << "Error: " << __FILE__ << ":" << __LINE__ << " you are calling CellList_gpu.construct() this function is suppose must be compiled with NVCC compiler, but it look like has been compiled by the standard system compiler" << std::endl;
#endif
#endif
}
......
......@@ -324,9 +324,9 @@ private:
{
mgpu::ofp_context_t context(mgpu::gpu_context_opt::dummy);
if (opt & VL_SYMMETRIC || opt & VL_CRS_SYMMETRIC)
{populate_cell_list(pos,cli,context,g_m,CL_SYMMETRIC);}
{populate_cell_list(pos,cli,context,g_m,CL_SYMMETRIC,cl_construct_opt::Full);}
else
{populate_cell_list(pos,cli,context,g_m,CL_NON_SYMMETRIC);}
{populate_cell_list(pos,cli,context,g_m,CL_NON_SYMMETRIC,cl_construct_opt::Full);}
}
/*! \brief Create the Verlet list from a given cell-list
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment