Commit 1c8373d8 authored by incardon's avatar incardon

Testing update cell-list

parent 92b5df78
openfpm_data @ 6853316c
Subproject commit 37d1890f1c6953c2b1212ce937a86721ba6bb6c9
Subproject commit 6853316cab36d72c3fe4fabbec2d31cd5697c531
......@@ -914,7 +914,7 @@ public:
cart.bbox = bbox;
for (size_t i = 0 ; i < dim ; i++)
cart.bc[i] = this->bc[i];
{cart.bc[i] = this->bc[i];}
return cart;
}
......
......@@ -109,6 +109,37 @@ public:
return processorID_impl(pt,clk,sub_domains_global);
}
/*! \brief Apply boundary condition to the point
*
* If the particle go out to the right, bring back the particle on the left
* in case of periodic, nothing in case of non periodic
*
* \param pt encapsulated point object (it's coordinated are changed according the
* the explanation before)
*
*/
template<typename Mem> __device__ void applyPointBC(encapc<1,Point<dim,T>,Mem> && pt) const
{
for (size_t i = 0 ; i < dim ; i++)
{
if (bc[i] == PERIODIC)
{pt.template get<0>()[i] = openfpm::math::periodic_l(pt.template get<0>()[i],domain.getHigh(i),domain.getLow(i));}
}
}
/*! \brief Given a point return in which processor the particle should go
*
* \param p point
*
* \return processorID
*
*/
__device__ int inline processorID(const Point<dim,T> &pt)
{
return processorID_impl(pt,clk,sub_domains_global);
}
};
#endif /* CARTDECOMPOSITION_GPU_HPP_ */
#ifndef GRID_DIST_KEY_DX_HPP
#define GRID_DIST_KEY_DX_HPP
#include "Grid/map_grid.hpp"
/*! \brief Grid key for a distributed grid
*
* It contain from which local sub-domain grid come from, and the local grid_key_dx
......
......@@ -62,7 +62,7 @@ struct labelParticlesGhost_impl<dim,St,prop,Memory,layout_base,Decomposition,tru
auto ite = v_pos.getGPUIterator();
// First we have to see how many entry each particle produce
num_proc_ghost_each_part<3,float,decltype(dec.toKernel()),decltype(v_pos.toKernel()),decltype(proc_id_out.toKernel())>
num_proc_ghost_each_part<3,St,decltype(dec.toKernel()),decltype(v_pos.toKernel()),decltype(proc_id_out.toKernel())>
<<<ite.wthr,ite.thr>>>
(dec.toKernel(),v_pos.toKernel(),proc_id_out.toKernel());
......@@ -84,7 +84,7 @@ struct labelParticlesGhost_impl<dim,St,prop,Memory,layout_base,Decomposition,tru
ite = v_pos.getGPUIterator();
// we compute processor id for each particle
proc_label_id_ghost<3,float,decltype(dec.toKernel()),decltype(v_pos.toKernel()),decltype(starts.toKernel()),decltype(g_opart_device.toKernel())>
proc_label_id_ghost<3,St,decltype(dec.toKernel()),decltype(v_pos.toKernel()),decltype(starts.toKernel()),decltype(g_opart_device.toKernel())>
<<<ite.wthr,ite.thr>>>
(dec.toKernel(),v_pos.toKernel(),starts.toKernel(),g_opart_device.toKernel());
......
......@@ -730,7 +730,7 @@ BOOST_AUTO_TEST_CASE( decomposition_to_gpu_test_use )
openfpm::vector_gpu<aggregate<int,int>> proc_id_out;
proc_id_out.resize(vg.size());
process_id_proc_each_part<decltype(dec.toKernel()),decltype(vg.toKernel()),decltype(proc_id_out.toKernel())>
process_id_proc_each_part<3,float,decltype(dec.toKernel()),decltype(vg.toKernel()),decltype(proc_id_out.toKernel())>
<<<ite.wthr,ite.thr>>>
(dec.toKernel(),vg.toKernel(),proc_id_out.toKernel(),v_cl.rank());
......
......@@ -36,16 +36,17 @@ __global__ void num_proc_ghost_each_part(decomposition_type dec, vector_type vd,
out.template get<0>(p) = dec.ghost_processorID_N(xp);
}
template<typename cartdec_gpu, typename particles_type, typename vector_out>
template<unsigned int dim, typename St, typename cartdec_gpu, typename particles_type, typename vector_out>
__global__ void process_id_proc_each_part(cartdec_gpu cdg, particles_type parts, vector_out output , int rank)
{
int p = threadIdx.x + blockIdx.x * blockDim.x;
if (p >= parts.size()) return;
Point<3,float> xp = parts.template get<0>(p);
cdg.applyPointBC(parts.get(p));
Point<dim,St> xp = parts.template get<0>(p);
int pr = cdg.processorIDBC(xp);
int pr = cdg.processorID(xp);
output.template get<1>(p) = (pr == rank)?-1:pr;
output.template get<0>(p) = p;
......
......@@ -21,7 +21,15 @@
* \param n_out out of domain + ghost particles counter
*
*/
template<unsigned int dim,typename vector_dist> inline void count_local_n_local(vector_dist & vd, vector_dist_iterator & it, size_t (& bc)[dim] , Box<dim,float> & box, Box<dim,float> & dom_ext, size_t & l_cnt, size_t & nl_cnt, size_t & n_out)
template<unsigned int dim,typename vector_dist>
inline void count_local_n_local(vector_dist & vd,
vector_dist_iterator & it,
size_t (& bc)[dim] ,
Box<dim,typename vector_dist::stype> & box,
Box<dim,typename vector_dist::stype> & dom_ext,
size_t & l_cnt,
size_t & nl_cnt,
size_t & n_out)
{
auto & ct = vd.getDecomposition();
......@@ -44,7 +52,7 @@ template<unsigned int dim,typename vector_dist> inline void count_local_n_local(
nl_cnt++;
}
Point<dim,float> xp = vd.getPos(key);
Point<dim,typename vector_dist::stype> xp = vd.getPos(key);
// Check that all particles are inside the Domain + Ghost part
if (dom_ext.isInside(xp) == false)
......
......@@ -68,6 +68,25 @@
#define GCL_SYMMETRIC 1
#define GCL_HILBERT 2
template<bool is_gpu_celllist>
struct gcl_standard_no_symmetric_impl
{
template<unsigned int dim, typename St, typename CellL, typename Vector, unsigned int impl>
static inline CellL get(Vector & vd, const St & r_cut, const Ghost<dim,St> & g)
{
return vd.template getCellList<CellL>(r_cut);
}
};
template<>
struct gcl_standard_no_symmetric_impl<true>
{
template<unsigned int dim, typename St, typename CellL, typename Vector, unsigned int impl>
static inline CellL get(Vector & vd, const St & r_cut, const Ghost<dim,St> & g)
{
return vd.getCellListGPU(r_cut);
}
};
//! General function t get a cell-list
template<unsigned int dim, typename St, typename CellL, typename Vector, unsigned int impl>
......@@ -84,7 +103,7 @@ struct gcl
*/
static inline CellL get(Vector & vd, const St & r_cut, const Ghost<dim,St> & g)
{
return vd.template getCellList<CellL>(r_cut);
return gcl_standard_no_symmetric_impl<is_gpu_celllist<CellL>::value>::template get<dim,St,CellL,Vector,impl>(vd,r_cut,g);
}
};
......@@ -492,6 +511,7 @@ public:
check_parameters(box);
init_structures(np);
this->init_decomposition(box,bc,g,opt,gdist);
#ifdef SE_CLASS3
......@@ -1173,6 +1193,7 @@ public:
return cell_list;
}
#endif
/*! \brief Construct an hilbert cell list starting from the stored particles
......@@ -1228,7 +1249,7 @@ public:
if (to_reconstruct == false)
{
populate_cell_list(v_pos,cell_list,g_m,CL_NON_SYMMETRIC);
populate_cell_list(v_pos,v_pos_out,v_prp,v_prp_out,cell_list,g_m,CL_NON_SYMMETRIC);
cell_list.set_gm(g_m);
}
......@@ -1260,7 +1281,7 @@ public:
if (to_reconstruct == false)
{
populate_cell_list(v_pos,cell_list,g_m,CL_SYMMETRIC);
populate_cell_list(v_pos,v_pos_out,v_prp,v_prp_out,cell_list,g_m,CL_SYMMETRIC);
cell_list.set_gm(g_m);
}
......@@ -2194,7 +2215,8 @@ public:
if ((opt & 0x0FFF0000) == CSV_WRITER)
{
// CSVWriter test
CSVWriter<openfpm::vector<Point<dim, St>>, openfpm::vector<prop> > csv_writer;
CSVWriter<openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base>,
openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base> > csv_writer;
std::string output = std::to_string(out + "_" + std::to_string(v_cl.getProcessUnitID()) + "_" + std::to_string(iteration) + std::to_string(".csv"));
......@@ -2209,7 +2231,8 @@ public:
ft = file_type::BINARY;
// VTKWriter for a set of points
VTKWriter<boost::mpl::pair<openfpm::vector<Point<dim,St>>, openfpm::vector<prop>>, VECTOR_POINTS> vtk_writer;
VTKWriter<boost::mpl::pair<openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base>,
openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base>>, VECTOR_POINTS> vtk_writer;
vtk_writer.add(v_pos,v_prp,g_m);
std::string output = std::to_string(out + "_" + std::to_string(v_cl.getProcessUnitID()) + "_" + std::to_string(iteration) + std::to_string(".vtk"));
......
......@@ -282,10 +282,14 @@ class vector_dist_comm
}
}
#ifdef CUDA_GPU
// move box_f_dev and box_f_sv to device
box_f_dev.template hostToDevice<0,1>();
box_f_sv.template hostToDevice<0>();
#endif
shift_box_ndec = dec.get_ndec();
}
......@@ -309,23 +313,6 @@ class vector_dist_comm
{
local_ghost_from_opart_impl<true,dim,St,prop,Memory,layout_base,std::is_same<Memory,CudaMemory>::value>
::run(o_part_loc,shifts,v_pos,v_prp,opt);
/*#if defined(CUDA_GPU) && defined(__NVCC__)
auto ite = o_part_loc.getGPUIterator();
size_t old = v_pos.size();
v_pos.resize(v_pos.size() + o_part_loc.size(),DATA_ON_DEVICE);
v_prp.resize(v_prp.size() + o_part_loc.size(),DATA_ON_DEVICE);
process_ghost_particles_local<true,dim,decltype(o_part_loc.toKernel()),decltype(v_pos.toKernel()),decltype(v_prp.toKernel()),decltype(shifts.toKernel())>
<<<ite.wthr,ite.thr>>>
(o_part_loc.toKernel(),v_pos.toKernel(),v_prp.toKernel(),shifts.toKernel(),old);
#else
std::cout << __FILE__ << ":" << __LINE__ << " error: to use the option RUN_ON_DEVICE you must compile with NVCC" << std::endl;
#endif*/
}
else
{
......@@ -350,18 +337,6 @@ class vector_dist_comm
{
local_ghost_from_opart_impl<false,dim,St,prop,Memory,layout_base,std::is_same<Memory,CudaMemory>::value>
::run(o_part_loc,shifts,v_pos,v_prp,opt);
/*#if defined(CUDA_GPU) && defined(__NVCC__)
auto ite = o_part_loc.getGPUIterator();
process_ghost_particles_local<false,dim,decltype(o_part_loc.toKernel()),decltype(v_pos.toKernel()),decltype(v_prp.toKernel()),decltype(shifts.toKernel())>
<<<ite.wthr,ite.thr>>>
(o_part_loc.toKernel(),v_pos.toKernel(),v_prp.toKernel(),shifts.toKernel(),v_pos.size());
#else
std::cout << __FILE__ << ":" << __LINE__ << " error: to use the option RUN_ON_DEVICE you must compile with NVCC" << std::endl;
#endif*/
}
else
{
......@@ -395,47 +370,6 @@ class vector_dist_comm
{
local_ghost_from_dec_impl<dim,St,prop,Memory,layout_base,std::is_same<Memory,CudaMemory>::value>
::run(o_part_loc,shifts,box_f_dev,box_f_sv,v_cl,v_pos,v_prp,g_m,opt);
/*#if defined(CUDA_GPU) && defined(__NVCC__)
o_part_loc.resize(g_m+1);
o_part_loc.template get<0>(o_part_loc.size()-1) = 0;
o_part_loc.template hostToDevice(o_part_loc.size()-1,o_part_loc.size()-1);
// Label the internal (assigned) particles
auto ite = v_pos.getGPUIteratorTo(g_m);
// label particle processor
num_shift_ghost_each_part<dim,St,decltype(box_f_dev.toKernel()),decltype(v_pos.toKernel()),decltype(o_part_loc.toKernel())>
<<<ite.wthr,ite.thr>>>
(box_f_dev.toKernel(),v_pos.toKernel(),o_part_loc.toKernel());
openfpm::vector<aggregate<unsigned int>,Memory,typename layout_base<aggregate<unsigned int>>::type,layout_base> starts;
starts.resize(o_part_loc.size());
mgpu::scan((unsigned int *)o_part_loc.template getDeviceBuffer<0>(), o_part_loc.size(), (unsigned int *)starts.template getDeviceBuffer<0>() , v_cl.getmgpuContext());
o_part_loc.template deviceToHost<0>(o_part_loc.size()-1,o_part_loc.size()-1);
size_t total = o_part_loc.template get<0>(o_part_loc.size()-1);
size_t old = v_pos.size();
v_pos.resize(v_pos.size() + total);
// Label the internal (assigned) particles
ite = v_pos.getGPUIteratorTo(g_m);
shift_ghost_each_part<dim,St,decltype(box_f_dev.toKernel()),decltype(box_f_sv.toKernel()),
decltype(v_pos.toKernel()),decltype(v_prp.toKernel()),
decltype(starts.toKernel()),decltype(shifts.toKernel()),
decltype(o_part_loc.toKernel())>
<<<ite.wthr,ite.thr>>>
(box_f_dev.toKernel(),box_f_sv.toKernel(),
v_pos.toKernel(),v_prp.toKernel(),
starts.toKernel(),shifts.toKernel(),o_part_loc.toKernel(),old);
#else
std::cout << __FILE__ << ":" << __LINE__ << " error: to use the option RUN_ON_DEVICE you must compile with NVCC" << std::endl;
#endif*/
}
else
{
......@@ -1027,7 +961,7 @@ class vector_dist_comm
auto ite = v_pos.getGPUIterator();
// label particle processor
process_id_proc_each_part<decltype(dec.toKernel()),decltype(v_pos.toKernel()),decltype(lbl_p.toKernel())><<<ite.wthr,ite.thr>>>(dec.toKernel(),v_pos.toKernel(),lbl_p.toKernel(),v_cl.rank());
process_id_proc_each_part<dim,St,decltype(dec.toKernel()),decltype(v_pos.toKernel()),decltype(lbl_p.toKernel())><<<ite.wthr,ite.thr>>>(dec.toKernel(),v_pos.toKernel(),lbl_p.toKernel(),v_cl.rank());
// sort particles
mergesort((int *)lbl_p.template getDeviceBuffer<1>(),(int *)lbl_p.template getDeviceBuffer<0>(), lbl_p.size(), mgpu::template less_t<int>(), v_cl.getmgpuContext());
......@@ -1133,6 +1067,10 @@ class vector_dist_comm
size_t & g_m,
size_t opt)
{
#ifdef EXTREA_TRACE_PRE_COMM
Extrae_user_function (1);
#endif
// Buffer that contain for each processor the id of the particle to send
g_opart.clear();
g_opart.resize(dec.getNNProcessors());
......@@ -1143,86 +1081,6 @@ class vector_dist_comm
labelParticlesGhost_impl<dim,St,prop,Memory,layout_base,
Decomposition,std::is_same<Memory,CudaMemory>::value>
::run(dec,g_opart_device,v_cl,v_pos,v_prp,prc,prc_sz,prc_offset,g_m,opt);
/*#if defined(CUDA_GPU) && defined(__NVCC__)
openfpm::vector<aggregate<unsigned int>,
Memory,
typename layout_base<aggregate<unsigned int>>::type,
layout_base> proc_id_out;
proc_id_out.resize(v_pos.size()+1);
proc_id_out.template get<0>(proc_id_out.size()-1) = 0;
proc_id_out.template hostToDevice(proc_id_out.size()-1,proc_id_out.size()-1);
auto ite = v_pos.getGPUIterator();
// First we have to see how many entry each particle produce
num_proc_ghost_each_part<3,float,decltype(dec.toKernel()),decltype(v_pos.toKernel()),decltype(proc_id_out.toKernel())>
<<<ite.wthr,ite.thr>>>
(dec.toKernel(),v_pos.toKernel(),proc_id_out.toKernel());
openfpm::vector<aggregate<unsigned int>,
Memory,
typename layout_base<aggregate<unsigned int>>::type,
layout_base> starts;
// scan
scan<unsigned int,unsigned int>(proc_id_out,starts);
starts.resize(proc_id_out.size());
starts.template deviceToHost<0>(starts.size()-1,starts.size()-1);
size_t sz = starts.template get<0>(starts.size()-1);
// we compute processor id for each particle
g_opart_device.resize(sz);
ite = v_pos.getGPUIterator();
// we compute processor id for each particle
proc_label_id_ghost<3,float,decltype(dec.toKernel()),decltype(v_pos.toKernel()),decltype(starts.toKernel()),decltype(g_opart_device.toKernel())>
<<<ite.wthr,ite.thr>>>
(dec.toKernel(),v_pos.toKernel(),starts.toKernel(),g_opart_device.toKernel());
// sort particles
mergesort((int *)g_opart_device.template getDeviceBuffer<0>(),(long unsigned int *)g_opart_device.template getDeviceBuffer<1>(), g_opart_device.size(), mgpu::template less_t<int>(), v_cl.getmgpuContext());
CudaMemory mem;
mem.allocate(sizeof(int));
mem.fill(0);
prc_offset.resize(v_cl.size());
// Find the buffer bases
find_buffer_offsets<0,decltype(g_opart_device.toKernel()),decltype(prc_offset.toKernel())><<<ite.wthr,ite.thr>>>
(g_opart_device.toKernel(),(int *)mem.getDevicePointer(),prc_offset.toKernel());
// Trasfer the number of offsets on CPU
mem.deviceToHost();
prc_offset.template deviceToHost<0,1>();
g_opart_device.template deviceToHost<0>(g_opart_device.size()-1,g_opart_device.size()-1);
int noff = *(int *)mem.getPointer();
prc_offset.resize(noff+1);
prc_offset.template get<0>(prc_offset.size()-1) = g_opart_device.size();
prc_offset.template get<1>(prc_offset.size()-1) = g_opart_device.template get<0>(g_opart_device.size()-1);
prc.resize(noff+1);
prc_sz.resize(noff+1);
size_t base_offset = 0;
// Transfert to prc the list of processors
prc.resize(noff+1);
for (size_t i = 0 ; i < noff+1 ; i++)
{
prc.get(i) = prc_offset.template get<1>(i);
prc_sz.get(i) = prc_offset.template get<0>(i) - base_offset;
base_offset = prc_offset.template get<0>(i);
}
#else
std::cout << __FILE__ << ":" << __LINE__ << " error: to use gpu computation you must compile vector_dist.hpp with NVCC" << std::endl;
#endif*/
}
else
{
......@@ -1267,6 +1125,9 @@ class vector_dist_comm
g_opart.swap(g_opart_f);
}
#ifdef EXTREA_TRACE_PRE_COMM
Extrae_user_function (0);
#endif
}
/*! \brief Call-back to allocate buffer to receive incoming elements (particles)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment