Commit e0df46eb authored by incardon's avatar incardon

ie_ghost labelling working on gpu

parent 7d8b13c9
openfpm_data @ c46dc01e
Subproject commit a8b2bfa3c93ad1f3116ff11e62079697cfe1e0ba
Subproject commit c46dc01e03b1ea215239e8cb817ebff4c61a11db
......@@ -649,37 +649,6 @@ public:
ie_loc_ghost<dim,T>::create(sub_domains,domain,ghost,bc);
}
/* template<typename T2> inline size_t processorID_impl(T2 & p) const
{
// Get the number of elements in the cell
size_t e = -1;
size_t cl = fine_s.getCell(p);
size_t n_ele = fine_s.getNelements(cl);
for (size_t i = 0 ; i < n_ele ; i++)
{
e = fine_s.get(cl,i);
if (sub_domains_global.template get<0>(e).isInsideNP(p) == true)
{
break;
}
}
#ifdef SE_CLASS1
if (n_ele == 0)
{
std::cout << __FILE__ << ":" << __LINE__ << " I cannot detect in which processor this particle go" << std::endl;
return -1;
}
#endif
return sub_domains_global.template get<1>(e);
}*/
public:
......
......@@ -14,30 +14,99 @@ constexpr unsigned int lc_proc_ = 0;
constexpr unsigned int proc_ = 1;
constexpr unsigned int shift_id_ = 2;
template<unsigned int dim, typename T, typename cell_list_type, typename vb_int_box_type>
__device__ __host__ inline unsigned int ghost_processorID_N_impl(const Point<dim,T> & p, cell_list_type & geo_cell, vb_int_box_type & vb_int_proc)
template<typename output_type>
struct ID_operation
{
output_type & output;
__device__ __host__ ID_operation(output_type & output)
:output(output)
{}
__device__ __host__ inline void op(unsigned int base, unsigned int n, unsigned int proc_act, unsigned int shift_act, unsigned int pi)
{
output.template get<0>(base + n) = proc_act;
output.template get<1>(base + n) = pi;
output.template get<2>(base + n) = shift_act;
}
};
struct N_operation
{
__device__ __host__ inline void op(unsigned int base, unsigned int n, unsigned int proc_act, unsigned int shift_act, unsigned int pi)
{
}
};
template<unsigned int dim, typename T, typename cell_list_type, typename vb_int_box_type, typename vb_int_type, typename operation>
__device__ __host__ inline unsigned int ghost_processorID_general_impl(const Point<dim,T> & p,
unsigned int base,
unsigned int pi,
cell_list_type & geo_cell,
vb_int_box_type & vb_int_box,
vb_int_type & vb_int,
operation & op)
{
unsigned int cell = geo_cell.getCell(p);
unsigned int sz = geo_cell.getNelements(cell);
unsigned int n = 0;
for (int i = 0 ; i < sz ; i++)
bool switch_prc = false;
if (sz != 0)
{
unsigned int bid = geo_cell.get(cell,i);
int i = 0;
unsigned int bid = geo_cell.get(cell,0);
unsigned int proc_prev = vb_int.template get<proc_>(bid);
unsigned int shift_prev = vb_int.template get<shift_id_>(bid);
unsigned int proc_act;
unsigned int shift_act;
if (Box<dim,T>(vb_int_box.get(bid)).isInsideNP(p) == true)
{
op.op(base,n,proc_prev,shift_prev,pi);
unsigned int sz2 = vb_int_proc.template get<0>(bid).size();
switch_prc = true;
n++;
}
i++;
for (int j = 0 ; j < sz2 ; j++)
for ( ; i < sz ; i++)
{
if (Box<dim,T>(vb_int_proc.template get<0>(bid).get(j)).isInsideNP(p) == true)
{n++;}
unsigned int bid = geo_cell.get(cell,i);
proc_act = vb_int.template get<proc_>(bid);
shift_act = vb_int.template get<shift_id_>(bid);
switch_prc = (proc_act == proc_prev && shift_act == shift_prev) & switch_prc;
if (Box<dim,T>(vb_int_box.get(bid)).isInsideNP(p) == true && switch_prc == false)
{
op.op(base,n,proc_act,shift_act,pi);
switch_prc = true;
n++;
}
proc_prev = proc_act;
shift_prev = shift_act;
}
}
return n;
}
template<unsigned int dim, typename T, typename cell_list_type, typename vb_int_box_type, typename vb_int_type>
__device__ __host__ inline unsigned int ghost_processorID_N_impl(const Point<dim,T> & p,
cell_list_type & geo_cell,
vb_int_box_type & vb_int_box,
vb_int_type & vb_int)
{
N_operation op;
return ghost_processorID_general_impl(p,0,0,geo_cell,vb_int_box,vb_int,op);
}
/*! \brief structure that store and compute the internal and external local ghost box. Version usable in kernel
*
* \tparam dim is the dimensionality of the physical domain we are going to decompose.
......@@ -54,20 +123,24 @@ class ie_ghost_gpu
CellList_cpu_ker<dim,T,Mem_fast_ker<Memory,memory_traits_lin,int>,shift<dim,T>> geo_cell;
//! internal ghost box
openfpm::vector_gpu_ker<aggregate<openfpm::vector_gpu_ker<Box<dim, T>,layout_base>,int>,layout_base> vb_int_proc;
openfpm::vector_gpu_ker<Box<dim, T>,layout_base> vb_int_box;
//! internal ghost box processor infos
openfpm::vector_gpu_ker<aggregate<unsigned int,unsigned int,unsigned int>,layout_base> vb_int;
public:
ie_ghost_gpu(CellList_cpu_ker<dim,T,Mem_fast_ker<Memory,memory_traits_lin,int>,shift<dim,T>> geo_cell,
openfpm::vector_gpu_ker<aggregate<openfpm::vector_gpu_ker<Box<dim, T>,layout_base>,int>,layout_base> vb_int_proc)
:geo_cell(geo_cell),vb_int_proc(vb_int_proc)
openfpm::vector_gpu_ker<Box<dim, T>,layout_base> vb_int_box,
openfpm::vector_gpu_ker<aggregate<unsigned int,unsigned int,unsigned int>,layout_base> vb_int)
:geo_cell(geo_cell),vb_int_box(vb_int_box),vb_int(vb_int)
{
}
ie_ghost_gpu(const ie_ghost_gpu<dim,T,Memory,layout_base> & ieg)
:geo_cell(ieg.geo_cell),vb_int_proc(ieg.vb_int_proc)
:geo_cell(ieg.geo_cell),vb_int_box(ieg.vb_int_box),vb_int(ieg.vb_int)
{}
/*! \brief Get the cell from the particle position
......@@ -87,7 +160,7 @@ public:
*/
__device__ inline unsigned int ghost_processorID_N(const Point<dim,T> & p)
{
return ghost_processorID_N_impl(p,geo_cell,vb_int_proc);
return ghost_processorID_N_impl(p,geo_cell,vb_int_box,vb_int);
}
/*! \brief Get the number of processor a particle must sent
......@@ -97,28 +170,9 @@ public:
*/
template<typename output_type> __device__ inline void ghost_processor_ID(const Point<dim,T> & p, output_type & output, unsigned int base, unsigned int pi)
{
unsigned int cell = geo_cell.getCell(p);
unsigned int sz = geo_cell.getNelements(cell);
ID_operation<output_type> op(output);
unsigned int n = 0;
for (int i = 0 ; i < sz ; i++)
{
unsigned int bid = geo_cell.get(cell,i);
unsigned int sz2 = vb_int_proc.template get<0>(bid).size();
for (int j = 0 ; j < sz2 ; j++)
{
if (Box<dim,T>(vb_int_proc.template get<0>(bid).get(j)).isInsideNP(p) == true)
{
output.template get<0>(base+n) = vb_int_proc.template get<1>(bid);
output.template get<1>(base+n) = pi;
n++;
}
}
}
ghost_processorID_general_impl(p,base,pi,geo_cell,vb_int_box,vb_int,op);
}
};
......
......@@ -13,6 +13,26 @@
#include "Decomposition/shift_vect_converter.hpp"
#include "Decomposition/cuda/ie_ghost_gpu.cuh"
//! Processor id and box id
struct proc_box_id
{
size_t proc_id;
size_t box_id;
size_t shift_id;
//! operator to reorder
bool operator<(const proc_box_id & pbi) const
{
if (proc_id < pbi.proc_id)
{return true;}
else if (proc_id == pbi.proc_id)
{
return shift_id < pbi.shift_id;
}
return false;
}
};
/*! \brief structure that store and compute the internal and external local ghost box
*
......@@ -293,17 +313,15 @@ protected:
}
}
}
// construct_vb_int_proc();
}
/*! \brief construct the vb_int_proc box
*
*
*/
/* void construct_vb_int_proc()
void construct_vb_int_proc(const nn_prcs<dim,T> & nn_p)
{
vb_int_proc.resize(proc_int_box.size());
vb_int_proc.resize_no_device(proc_int_box.size());
for (size_t i = 0 ; i < proc_int_box.size() ; i++)
{
......@@ -317,8 +335,10 @@ protected:
vb_int_proc.template get<0>(i).template get<1>(j)[k] = proc_int_box.get(i).ibx.get(j).bx.getHigh(k);
}
}
vb_int_proc.template get<1>(i) = nn_p.IDtoProc(i);
}
}*/
}
/*! \brief Create the box_nn_processor_int (nbx part) structure, the geo_cell list and proc_int_box
*
......@@ -446,16 +466,85 @@ protected:
while (g_sub.isNext())
{
auto key = g_sub.get();
geo_cell.addCell(gi.LinId(key),vb_int.size()-1);
geo_cell_proc.addCell(gi.LinId(key),p_id);
size_t cell = gi.LinId(key);
geo_cell.addCell(cell,vb_int.size()-1);
// Check if p_id already exist at that cell
// and we add it only if does not exist
size_t nc = geo_cell_proc.getNelements(cell);
bool found = false;
for (size_t s = 0; s < nc ; s++)
{
if (geo_cell_proc.get(cell,s) == lc_proc)
{
found = true;
break;
}
}
if (found == false)
{geo_cell_proc.addCell(cell,lc_proc);}
++g_sub;
}
}
}
}
}
construct_vb_int_proc(nn_p);
reorder_geo_cell();
}
/*! \brief in this function we reorder the cells by processors
*
* In practice every processor in the list is ordered. the geo_cell give
*
* 7 boxes the first 2 boxes are related to processor 0 and the next 2 to processor 4, the other 3 must me related
* to another processor different from 0 and 4. This simplify the procedure to get a unique list of processor ids
* indicating on which processor a particle must be replicated as ghost
*
*/
void reorder_geo_cell()
{
openfpm::vector<proc_box_id> tmp_sort;
size_t div[dim];
for (size_t i = 0 ; i < dim ; i++) {div[i] = geo_cell.getDiv()[i];}
grid_sm<dim,void> gs(div);
grid_key_dx_iterator<dim> it(gs);
while (it.isNext())
{
size_t cell = gs.LinId(it.get());
size_t sz = geo_cell.getNelements(cell);
tmp_sort.resize(sz);
for (size_t i = 0 ; i < sz ; i++)
{
tmp_sort.get(i).box_id = geo_cell.get(cell,i);
tmp_sort.get(i).proc_id = vb_int.template get<proc_>(tmp_sort.get(i).box_id);
tmp_sort.get(i).shift_id = vb_int.template get<shift_id_>(tmp_sort.get(i).box_id);
}
tmp_sort.sort();
// now we set again the cell in an ordered way
for (size_t i = 0 ; i < sz ; i++)
{geo_cell.get(cell,i) = tmp_sort.get(i).box_id;}
++it;
}
}
public:
......@@ -807,6 +896,18 @@ public:
return geo_cell.getCellIterator(geo_cell.getCell(p));
}
/*! \brief Get the number of processor a particle must sent
*
* \param p position of the particle
*
*/
template<typename output_type> inline void ghost_processor_ID(const Point<dim,T> & p, output_type & output, unsigned int base, unsigned int pi)
{
ID_operation<output_type> op(output);
ghost_processorID_general_impl(p,base,pi,geo_cell,vb_int_box,vb_int,op);
}
/*! \brief Get the number of processor a particle must sent
*
* \param p position of the particle
......@@ -814,7 +915,7 @@ public:
*/
inline unsigned int ghost_processorID_N(const Point<dim,T> & p)
{
return ghost_processorID_N_impl(p,geo_cell,vb_int_proc);
return ghost_processorID_N_impl(p,geo_cell,vb_int_box,vb_int);
}
/*! \brief Given a position it return if the position belong to any neighborhood processor ghost
......@@ -1143,14 +1244,20 @@ public:
if (host_dev_transfer == false)
{
geo_cell.hostToDevice();
geo_cell_proc.hostToDevice();
vb_int_box.template hostToDevice<0,1>();
vb_int.template hostToDevice<0,1,2>();
for (size_t i = 0 ; i < vb_int_proc.size() ; i++)
{vb_int_proc.template get<0>(i). template hostToDevice<0,1>();}
vb_int_proc.template hostToDevice<0,1>();
host_dev_transfer = true;
}
ie_ghost_gpu<dim,T,Memory,layout_base> igg(geo_cell.toKernel(),
vb_int_proc.toKernel());
vb_int_box.toKernel(),
vb_int.toKernel());
return igg;
}
......
......@@ -18,7 +18,9 @@ BOOST_AUTO_TEST_CASE( decomposition_ie_ghost_gpu_test_use )
// Vcluster
Vcluster<> & vcl = create_vcluster();
CartDecomposition<3, float, CudaMemory, memory_traits_inte> dec(vcl);
typedef CartDecomposition<3, float, CudaMemory, memory_traits_inte> dec_type;
dec_type dec(vcl);
// Physical domain
Box<3, float> box( { 0.0, 0.0, 0.0 }, { 1.0, 1.0, 1.0 });
......@@ -86,8 +88,13 @@ BOOST_AUTO_TEST_CASE( decomposition_ie_ghost_gpu_test_use )
Point<3,float> xp = vg.template get<0>(i);
match &= proc_id_out.template get<0>(i) == dec.ghost_processorID_N(xp);
const openfpm::vector<std::pair<size_t, size_t>> & vp_id = dec.template ghost_processorID_pair<typename dec_type::lc_processor_id, typename dec_type::shift_id>(xp, UNIQUE);
match &= proc_id_out.template get<0>(i) == vp_id.size();
}
BOOST_REQUIRE_EQUAL(match,true);
////////////////////////// We now create the scan //////////////////////////////////////
......@@ -105,11 +112,11 @@ BOOST_AUTO_TEST_CASE( decomposition_ie_ghost_gpu_test_use )
size_t sz = starts.template get<0>(starts.size()-1);
///////////////////////// We now test //////////////////////////
///////////////////////// we collect the processor and shift id //////////////////////////
openfpm::vector<aggregate<unsigned int,unsigned int>,
openfpm::vector<aggregate<unsigned int,unsigned int,unsigned int>,
CudaMemory,
typename memory_traits_inte<aggregate<unsigned int,unsigned int>>::type,
typename memory_traits_inte<aggregate<unsigned int,unsigned int,unsigned int>>::type,
memory_traits_inte> output;
output.resize(sz);
......@@ -121,12 +128,61 @@ BOOST_AUTO_TEST_CASE( decomposition_ie_ghost_gpu_test_use )
<<<ite.wthr,ite.thr>>>
(dec.toKernel(),vg.toKernel(),starts.toKernel(),output.toKernel());
output.template deviceToHost<0,1>();
output.template deviceToHost<0,1,2>();
//////////////////// TESTING //////////////////////////
for (size_t i = 0 ; i < output.size() ; i++)
starts.deviceToHost<0>();
match = true;
for (size_t i = 0 ; i < starts.size() - 1 ; i++)
{
std::cout << output.template get<0>(i) << " " << output.template get<1>(i) << std::endl;
size_t base = starts.template get<0>(i);
size_t sz = starts.template get<0>(i+1) - base;
if (sz != 0)
{
size_t pid = output.template get<1>(base);
Point<3,float> xp = vg.template get<0>(pid);
openfpm::vector<proc_box_id> tmp_sort1;
openfpm::vector<proc_box_id> tmp_sort2;
const openfpm::vector<std::pair<size_t, size_t>> & vp_id = dec.template ghost_processorID_pair<typename dec_type::lc_processor_id, typename dec_type::shift_id>(xp, UNIQUE);
tmp_sort1.resize(vp_id.size());
for (size_t j = 0 ; j < vp_id.size() ; j++)
{
tmp_sort1.get(j).proc_id = dec.IDtoProc(vp_id.get(j).first);
tmp_sort1.get(j).box_id = 0;
tmp_sort1.get(j).shift_id = vp_id.get(j).second;
}
tmp_sort1.sort();
tmp_sort2.resize(sz);
for (size_t j = 0 ; j < sz ; j++)
{
tmp_sort2.get(j).proc_id = output.template get<0>(base+j);
tmp_sort2.get(j).box_id = 0;
tmp_sort2.get(j).shift_id = output.template get<2>(base+j);
}
tmp_sort2.sort();
match &= tmp_sort1.size() == tmp_sort2.size();
for (size_t j = 0 ; j < tmp_sort1.size() ; j++)
{
match &= tmp_sort1.get(j).proc_id == tmp_sort2.get(j).proc_id;
match &= tmp_sort1.get(j).shift_id == tmp_sort2.get(j).shift_id;
}
}
}
BOOST_REQUIRE_EQUAL(match,true);
}
BOOST_AUTO_TEST_CASE( decomposition_to_gpu_test_use )
......@@ -338,5 +394,7 @@ BOOST_AUTO_TEST_CASE(vector_dist_gpu_map_fill_send_buffer_test)
}
}
BOOST_AUTO_TEST_SUITE_END()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment