Skip to content
Snippets Groups Projects
Commit 8a5e9361 authored by Pietro Incardona's avatar Pietro Incardona
Browse files

Fixing GPU local ghost

parent c5f9f46d
No related branches found
No related tags found
No related merge requests found
...@@ -79,6 +79,10 @@ struct labelParticlesGhost_impl<dim,St,prop,Memory,layout_base,Decomposition,sca ...@@ -79,6 +79,10 @@ struct labelParticlesGhost_impl<dim,St,prop,Memory,layout_base,Decomposition,sca
auto ite = v_pos.getGPUIterator(); auto ite = v_pos.getGPUIterator();
// no work to do return
if (ite.wthr.x == 0)
{return;}
// First we have to see how many entry each particle produce // First we have to see how many entry each particle produce
num_proc_ghost_each_part<dim,St,decltype(dec.toKernel()),decltype(v_pos.toKernel()),decltype(proc_id_out.toKernel())> num_proc_ghost_each_part<dim,St,decltype(dec.toKernel()),decltype(v_pos.toKernel()),decltype(proc_id_out.toKernel())>
<<<ite.wthr,ite.thr>>> <<<ite.wthr,ite.thr>>>
...@@ -263,6 +267,9 @@ struct local_ghost_from_dec_impl<dim,St,prop,Memory,layout_base,true> ...@@ -263,6 +267,9 @@ struct local_ghost_from_dec_impl<dim,St,prop,Memory,layout_base,true>
// Label the internal (assigned) particles // Label the internal (assigned) particles
ite = v_pos.getGPUIteratorTo(g_m); ite = v_pos.getGPUIteratorTo(g_m);
// resize o_part_loc
o_part_loc.resize(total);
shift_ghost_each_part<dim,St,decltype(box_f_dev.toKernel()),decltype(box_f_sv.toKernel()), shift_ghost_each_part<dim,St,decltype(box_f_dev.toKernel()),decltype(box_f_sv.toKernel()),
decltype(v_pos.toKernel()),decltype(v_prp.toKernel()), decltype(v_pos.toKernel()),decltype(v_prp.toKernel()),
decltype(starts.toKernel()),decltype(shifts.toKernel()), decltype(starts.toKernel()),decltype(shifts.toKernel()),
......
...@@ -249,9 +249,17 @@ __global__ void shift_ghost_each_part(vector_of_box box_f, vector_of_shifts box_ ...@@ -249,9 +249,17 @@ __global__ void shift_ghost_each_part(vector_of_box box_f, vector_of_shifts box_
for (unsigned int j = 0 ; j < dim ; j++) for (unsigned int j = 0 ; j < dim ; j++)
{ {
v_pos.template get<0>(base+n)[j] = xp.get(j) - shifts.template get<0>(shift_actual)[j]; v_pos.template get<0>(base+n)[j] = xp.get(j) - shifts.template get<0>(shift_actual)[j];
}
if (base_o + n < output.size())
{
output.template get<0>(base_o+n) = p; output.template get<0>(base_o+n) = p;
output.template get<1>(base_o+n) = shift_actual; output.template get<1>(base_o+n) = shift_actual;
} }
else
{
printf("OVERFLOW \n");
}
v_prp.set(base+n,v_prp.get(p)); v_prp.set(base+n,v_prp.get(p));
......
...@@ -948,12 +948,16 @@ class vector_dist_comm ...@@ -948,12 +948,16 @@ class vector_dist_comm
starts.template deviceToHost<0>(); starts.template deviceToHost<0>();
size_t offset = starts.template get<0>(rank); size_t offset = starts.template get<0>(rank);
// fill v_pos_tmp and v_prp_tmp with local particles // no work to do
process_map_particles<decltype(m_opart.toKernel()),decltype(v_pos_tmp.toKernel()),decltype(v_prp_tmp.toKernel()), if (ite.wthr.x != 0)
{
// fill v_pos_tmp and v_prp_tmp with local particles
process_map_particles<decltype(m_opart.toKernel()),decltype(v_pos_tmp.toKernel()),decltype(v_prp_tmp.toKernel()),
decltype(v_pos.toKernel()),decltype(v_prp.toKernel())> decltype(v_pos.toKernel()),decltype(v_prp.toKernel())>
<<<ite.wthr,ite.thr>>> <<<ite.wthr,ite.thr>>>
(m_opart.toKernel(),v_pos_tmp.toKernel(), v_prp_tmp.toKernel(), (m_opart.toKernel(),v_pos_tmp.toKernel(), v_prp_tmp.toKernel(),
v_pos.toKernel(),v_prp.toKernel(),offset); v_pos.toKernel(),v_prp.toKernel(),offset);
}
// Fill the sending buffers // Fill the sending buffers
for (size_t i = 0 ; i < m_pos.size() ; i++) for (size_t i = 0 ; i < m_pos.size() ; i++)
...@@ -962,11 +966,17 @@ class vector_dist_comm ...@@ -962,11 +966,17 @@ class vector_dist_comm
auto ite = m_pos.get(i).getGPUIterator(); auto ite = m_pos.get(i).getGPUIterator();
process_map_particles<decltype(m_opart.toKernel()),decltype(m_pos.get(i).toKernel()),decltype(m_prp.get(i).toKernel()), // no work to do
if (ite.wthr.x != 0)
{
process_map_particles<decltype(m_opart.toKernel()),decltype(m_pos.get(i).toKernel()),decltype(m_prp.get(i).toKernel()),
decltype(v_pos.toKernel()),decltype(v_prp.toKernel())> decltype(v_pos.toKernel()),decltype(v_prp.toKernel())>
<<<ite.wthr,ite.thr>>> <<<ite.wthr,ite.thr>>>
(m_opart.toKernel(),m_pos.get(i).toKernel(), m_prp.get(i).toKernel(), (m_opart.toKernel(),m_pos.get(i).toKernel(), m_prp.get(i).toKernel(),
v_pos.toKernel(),v_prp.toKernel(),offset); v_pos.toKernel(),v_prp.toKernel(),offset);
}
} }
// old local particles with the actual local particles // old local particles with the actual local particles
...@@ -1075,11 +1085,17 @@ class vector_dist_comm ...@@ -1075,11 +1085,17 @@ class vector_dist_comm
prc_sz.template fill<0>(0); prc_sz.template fill<0>(0);
auto ite = v_pos.getGPUIterator();
if (ite.wthr.x == 0)
{
starts.resize(v_cl.size());
starts.template fill<0>(0);
return;
}
// we have one process we can skip ... // we have one process we can skip ...
if (v_cl.size() == 1) if (v_cl.size() == 1)
{ {
auto ite = v_pos.getGPUIterator();
// ... but we have to apply the boundary conditions // ... but we have to apply the boundary conditions
periodicity_int<dim> bc; periodicity_int<dim> bc;
...@@ -1091,8 +1107,6 @@ class vector_dist_comm ...@@ -1091,8 +1107,6 @@ class vector_dist_comm
return; return;
} }
auto ite = v_pos.getGPUIterator();
// label particle processor // label particle processor
process_id_proc_each_part<dim,St,decltype(dec.toKernel()),decltype(v_pos.toKernel()),decltype(lbl_p.toKernel()),decltype(prc_sz.toKernel())> process_id_proc_each_part<dim,St,decltype(dec.toKernel()),decltype(v_pos.toKernel()),decltype(lbl_p.toKernel()),decltype(prc_sz.toKernel())>
<<<ite.wthr,ite.thr>>> <<<ite.wthr,ite.thr>>>
...@@ -1220,10 +1234,6 @@ class vector_dist_comm ...@@ -1220,10 +1234,6 @@ class vector_dist_comm
size_t & g_m, size_t & g_m,
size_t opt) size_t opt)
{ {
#ifdef EXTREA_TRACE_PRE_COMM
Extrae_user_function (1);
#endif
// Buffer that contain for each processor the id of the particle to send // Buffer that contain for each processor the id of the particle to send
prc_sz.clear(); prc_sz.clear();
g_opart.clear(); g_opart.clear();
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment