Commit 58a74a5b authored by incardon's avatar incardon

ghost_get ... still to fix

parent b8ba87a6
openfpm_data @ 37d1890f
Subproject commit 42183947ec6434fa7644185690b005c494b26676
Subproject commit 37d1890f1c6953c2b1212ce937a86721ba6bb6c9
......@@ -919,6 +919,44 @@ public:
return cart;
}
/*! \brief It create another object that contain the same information and act in the same way
*
* \return a duplicated CartDecomposition object
*
*/
template<typename Memory2, template <typename> class layout_base2>
CartDecomposition<dim,T,Memory2,layout_base2,Distribution> duplicate_convert() const
{
CartDecomposition<dim,T> cart(v_cl);
(static_cast<ie_loc_ghost<dim,T>*>(&cart))->operator=(static_cast<ie_loc_ghost<dim,T>>(*this));
(static_cast<nn_prcs<dim,T>*>(&cart))->operator=(static_cast<nn_prcs<dim,T>>(*this));
ie_ghost<dim,T,Memory,layout_base> * ptr = static_cast<ie_ghost<dim,T,Memory,layout_base> *>((CartDecomposition<dim,T,Memory,layout_base,Distribution> *)this);
(static_cast<ie_ghost<dim,T,Memory2,layout_base2>*>(&cart))->operator=(ptr->template duplicate<Memory2,layout_base2>());
cart.private_get_sub_domains() = sub_domains;
cart.private_get_box_nn_processor() = box_nn_processor;
cart.private_get_fine_s() = fine_s;
cart.private_get_gr() = gr;
cart.private_get_gr_dist() = gr_dist;
cart.private_get_dist() = dist;
cart.private_get_commCostSet() = commCostSet;
cart.private_get_cd() = cd;
cart.private_get_domain() = domain;
cart.private_get_sub_domains_global() = sub_domains_global;
for (size_t i = 0 ; i < dim ; i++)
{cart.private_get_spacing(i) = spacing[i];};
cart.private_get_ghost() = ghost;
cart.private_get_bbox() = bbox;
for (size_t i = 0 ; i < dim ; i++)
{cart.private_get_bc(i) = this->bc[i];}
return cart;
}
/*! \brief Copy the element
*
* \param cart element to copy
......@@ -1999,6 +2037,145 @@ public:
//! friend classes
friend extended_type;
/*! \brief Return the internal data structure sub_domains
*
* \return sub_domains
*
*/
openfpm::vector<SpaceBox<dim, T>> & private_get_sub_domains()
{
return sub_domains;
}
/*! \brief Return the internal data structure box_nn_processor
*
* \return box_nn_processor
*
*/
openfpm::vector<openfpm::vector<long unsigned int> > & private_get_box_nn_processor()
{
return box_nn_processor;
}
/*! \brief Return the internal data structure fine_s
*
* \return fine_s
*
*/
CellList<dim,T,Mem_fast<Memory,int>,shift<dim,T>> & private_get_fine_s()
{
return fine_s;
}
/*! \brief Return the internal data structure gr
*
* \return gr
*
*/
grid_sm<dim, void> & private_get_gr()
{
return gr;
}
/*! \brief Return the internal data structure gr_dist
*
* \return gr_dist
*
*/
grid_sm<dim, void> & private_get_gr_dist()
{
return gr_dist;
}
/*! \brief Return the internal data structure dist
*
* \return dist
*
*/
Distribution & private_get_dist()
{
return dist;
}
/*! \brief Return the internal data structure commCostSet
*
* \return commCostSet
*
*/
bool & private_get_commCostSet()
{
return commCostSet;
}
/*! \brief Return the internal data structure cd
*
* \return cd
*
*/
CellDecomposer_sm<dim, T, shift<dim,T>> & private_get_cd()
{
return cd;
}
/*! \brief Return the internal data structure domain
*
* \return domain
*
*/
::Box<dim,T> & private_get_domain()
{
return domain;
}
/*! \brief Return the internal data structure sub_domains_global
*
* \return sub_domains_global
*
*/
openfpm::vector<Box_map<dim, T>,Memory,typename layout_base<Box_map<dim, T>>::type,layout_base> & private_get_sub_domains_global()
{
return sub_domains_global;
}
/*! \brief Return the internal data structure spacing
*
* \return spacing
*
*/
T & private_get_spacing(int i)
{
return spacing[i];
}
/*! \brief Return the internal data structure ghost
*
* \return ghost
*
*/
Ghost<dim,T> & private_get_ghost()
{
return ghost;
}
/*! \brief Return the internal data structure bbox
*
* \return bbox
*
*/
::Box<dim,T> & private_get_bbox()
{
return bbox;
}
/*! \brief Return the internal data structure bc
*
* \return bc
*
*/
size_t & private_get_bc(int i)
{
return bc[i];
}
};
......
......@@ -34,6 +34,7 @@ struct proc_box_id
}
};
/*! \brief structure that store and compute the internal and external local ghost box
*
* \tparam dim is the dimensionality of the physical domain we are going to decompose.
......@@ -517,6 +518,7 @@ public:
proc_int_box.swap(ie.proc_int_box);
vb_ext.swap(ie.vb_ext);
vb_int.swap(ie.vb_int);
vb_int_box.swap(ie.vb_int_box);
geo_cell.swap(ie.geo_cell);
shifts.swap(ie.shifts);
ids_p.swap(ie.ids_p);
......@@ -532,6 +534,7 @@ public:
proc_int_box = ie.proc_int_box;
vb_ext = ie.vb_ext;
vb_int = ie.vb_int;
vb_int_box = ie.vb_int_box;
geo_cell = ie.geo_cell;
shifts = ie.shifts;
ids_p = ie.ids_p;
......@@ -540,6 +543,31 @@ public:
return *this;
}
/*! \brief duplicate this structure changing layout and Memory
*
* \return a structure with Memory type and layout changed
*
*/
template<typename Memory2, template <typename> class layout_base2>
inline ie_ghost<dim,T,Memory2,layout_base2> duplicate()
{
ie_ghost<dim,T,Memory2,layout_base2> tmp;
tmp.private_get_box_nn_processor_int() = box_nn_processor_int;
tmp.private_get_proc_int_box() = proc_int_box;
tmp.private_get_vb_ext() = vb_ext;
tmp.private_get_vb_int() = vb_int;
tmp.private_get_vb_int_box() = vb_int_box;
tmp.private_geo_cell() = geo_cell;
tmp.private_get_shifts() = shifts;
tmp.private_get_ids_p() = ids_p;
tmp.private_get_ids() = ids;
return tmp;
}
/*! It return the shift vector
*
* Consider a domain with some ghost, at the border of the domain the
......@@ -1181,6 +1209,102 @@ public:
ids.clear();
}
/*! \brief Return the internal data structure box_nn_processor_int
*
* \return box_nn_processor_int
*
*/
inline openfpm::vector< openfpm::vector< Box_proc<dim,T> > > & private_get_box_nn_processor_int()
{
return box_nn_processor_int;
}
/*! \brief Return the internal data structure proc_int_box
*
* \return proc_int_box
*
*/
inline openfpm::vector< Box_dom<dim,T> > & private_get_proc_int_box()
{
return proc_int_box;
}
/*! \brief Return the internal data structure vb_ext
*
* \return vb_ext
*
*/
inline openfpm::vector<p_box<dim,T> > & private_get_vb_ext()
{
return vb_ext;
}
/*! \brief Return the internal data structure vb_int
*
* \return vb_int
*
*/
inline openfpm::vector<aggregate<unsigned int,unsigned int,unsigned int>,Memory,typename layout_base<aggregate<unsigned int,unsigned int,unsigned int>>::type,layout_base> &
private_get_vb_int()
{
return vb_int;
}
/*! \brief Return the internal data structure vb_int_box
*
* \return vb_int_box
*
*/
inline openfpm::vector<Box<dim,T>,Memory,typename layout_base<Box<dim,T>>::type,layout_base> &
private_get_vb_int_box()
{
return vb_int_box;
}
/*! \brief Return the internal data structure proc_int_box
*
* \return proc_int_box
*
*/
inline CellList<dim,T,Mem_fast<Memory,int>,shift<dim,T>> &
private_geo_cell()
{
return geo_cell;
}
/*! \brief Return the internal data structure shifts
*
* \return shifts
*
*/
inline openfpm::vector<Point<dim,T>,Memory,typename layout_base<Point<dim,T>>::type,layout_base> &
private_get_shifts()
{
return shifts;
}
/*! \brief Return the internal data structure ids_p
*
* \return ids_p
*
*/
inline openfpm::vector<std::pair<size_t,size_t>> &
private_get_ids_p()
{
return ids_p;
}
/*! \brief Return the internal data structure ids_p
*
* \return ids_p
*
*/
inline openfpm::vector<size_t> &
private_get_ids()
{
return ids;
}
/*! \brief toKernel() Convert this data-structure into a kernel usable data-structure
*
* \return
......
/*
* vector_dist_comm_util_funcs.hpp
*
* Created on: Sep 13, 2018
* Author: i-bird
*/
#ifndef VECTOR_DIST_COMM_UTIL_FUNCS_HPP_
#define VECTOR_DIST_COMM_UTIL_FUNCS_HPP_
template<unsigned int dim, typename St, typename prop, typename Memory, template<typename> class layout_base, typename Decomposition, bool is_ok_cuda>
struct labelParticlesGhost_impl
{
static void run(Decomposition & dec,
openfpm::vector<aggregate<unsigned int,unsigned long int>,
CudaMemory,
typename memory_traits_inte<aggregate<unsigned int,unsigned long int>>::type,
memory_traits_inte> & g_opart_device,
Vcluster<Memory> & v_cl,
openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base> & v_pos,
openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base> & v_prp,
openfpm::vector<size_t> & prc,
openfpm::vector<size_t> & prc_sz,
openfpm::vector<aggregate<unsigned int,unsigned int>,Memory,typename layout_base<aggregate<unsigned int,unsigned int>>::type,layout_base> & prc_offset,
size_t & g_m,
size_t opt)
{
std::cout << __FILE__ << ":" << __LINE__ << " error, you are trying to use using Cuda functions for a non cuda enabled data-structures" << std::endl;
}
};
template<unsigned int dim, typename St, typename prop, typename Memory, template<typename> class layout_base, typename Decomposition>
struct labelParticlesGhost_impl<dim,St,prop,Memory,layout_base,Decomposition,true>
{
static void run(Decomposition & dec,
openfpm::vector<aggregate<unsigned int,unsigned long int>,
CudaMemory,
typename memory_traits_inte<aggregate<unsigned int,unsigned long int>>::type,
memory_traits_inte> & g_opart_device,
Vcluster<Memory> & v_cl,
openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base> & v_pos,
openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base> & v_prp,
openfpm::vector<size_t> & prc,
openfpm::vector<size_t> & prc_sz,
openfpm::vector<aggregate<unsigned int,unsigned int>,Memory,typename layout_base<aggregate<unsigned int,unsigned int>>::type,layout_base> & prc_offset,
size_t & g_m,
size_t opt)
{
#if defined(CUDA_GPU) && defined(__NVCC__)
openfpm::vector<aggregate<unsigned int>,
Memory,
typename layout_base<aggregate<unsigned int>>::type,
layout_base> proc_id_out;
proc_id_out.resize(v_pos.size()+1);
proc_id_out.template get<0>(proc_id_out.size()-1) = 0;
proc_id_out.template hostToDevice(proc_id_out.size()-1,proc_id_out.size()-1);
auto ite = v_pos.getGPUIterator();
// First we have to see how many entry each particle produce
num_proc_ghost_each_part<3,float,decltype(dec.toKernel()),decltype(v_pos.toKernel()),decltype(proc_id_out.toKernel())>
<<<ite.wthr,ite.thr>>>
(dec.toKernel(),v_pos.toKernel(),proc_id_out.toKernel());
openfpm::vector<aggregate<unsigned int>,
Memory,
typename layout_base<aggregate<unsigned int>>::type,
layout_base> starts;
// scan
scan<unsigned int,unsigned int>(proc_id_out,starts);
starts.resize(proc_id_out.size());
starts.template deviceToHost<0>(starts.size()-1,starts.size()-1);
size_t sz = starts.template get<0>(starts.size()-1);
// we compute processor id for each particle
g_opart_device.resize(sz);
ite = v_pos.getGPUIterator();
// we compute processor id for each particle
proc_label_id_ghost<3,float,decltype(dec.toKernel()),decltype(v_pos.toKernel()),decltype(starts.toKernel()),decltype(g_opart_device.toKernel())>
<<<ite.wthr,ite.thr>>>
(dec.toKernel(),v_pos.toKernel(),starts.toKernel(),g_opart_device.toKernel());
// sort particles
mergesort((int *)g_opart_device.template getDeviceBuffer<0>(),(long unsigned int *)g_opart_device.template getDeviceBuffer<1>(), g_opart_device.size(), mgpu::template less_t<int>(), v_cl.getmgpuContext());
CudaMemory mem;
mem.allocate(sizeof(int));
mem.fill(0);
prc_offset.resize(v_cl.size());
// Find the buffer bases
find_buffer_offsets<0,decltype(g_opart_device.toKernel()),decltype(prc_offset.toKernel())><<<ite.wthr,ite.thr>>>
(g_opart_device.toKernel(),(int *)mem.getDevicePointer(),prc_offset.toKernel());
// Trasfer the number of offsets on CPU
mem.deviceToHost();
prc_offset.template deviceToHost<0,1>();
g_opart_device.template deviceToHost<0>(g_opart_device.size()-1,g_opart_device.size()-1);
int noff = *(int *)mem.getPointer();
prc_offset.resize(noff+1);
prc_offset.template get<0>(prc_offset.size()-1) = g_opart_device.size();
prc_offset.template get<1>(prc_offset.size()-1) = g_opart_device.template get<0>(g_opart_device.size()-1);
prc.resize(noff+1);
prc_sz.resize(noff+1);
size_t base_offset = 0;
// Transfert to prc the list of processors
prc.resize(noff+1);
for (size_t i = 0 ; i < noff+1 ; i++)
{
prc.get(i) = prc_offset.template get<1>(i);
prc_sz.get(i) = prc_offset.template get<0>(i) - base_offset;
base_offset = prc_offset.template get<0>(i);
}
#else
std::cout << __FILE__ << ":" << __LINE__ << " error: to use gpu computation you must compile vector_dist.hpp with NVCC" << std::endl;
#endif
}
};
template<bool with_pos,unsigned int dim, typename St, typename prop, typename Memory, template <typename> class layout_base, bool is_ok_cuda>
struct local_ghost_from_opart_impl
{
static void run(openfpm::vector<aggregate<unsigned int,unsigned int>,Memory,typename layout_base<aggregate<unsigned int,unsigned int>>::type,layout_base> & o_part_loc,
const openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base> & shifts,
openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base> & v_pos,
openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base> & v_prp,
size_t opt)
{
std::cout << __FILE__ << ":" << __LINE__ << " error, you are trying to use using Cuda functions for a non cuda enabled data-structures" << std::endl;
}
};
template<bool with_pos, unsigned int dim, typename St, typename prop, typename Memory, template <typename> class layout_base>
struct local_ghost_from_opart_impl<with_pos,dim,St,prop,Memory,layout_base,true>
{
static void run(openfpm::vector<aggregate<unsigned int,unsigned int>,Memory,typename layout_base<aggregate<unsigned int,unsigned int>>::type,layout_base> & o_part_loc,
const openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base> & shifts,
openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base> & v_pos,
openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base> & v_prp,
size_t opt)
{
#if defined(CUDA_GPU) && defined(__NVCC__)
auto ite = o_part_loc.getGPUIterator();
size_t old = v_pos.size();
v_pos.resize(v_pos.size() + o_part_loc.size(),DATA_ON_DEVICE);
v_prp.resize(v_prp.size() + o_part_loc.size(),DATA_ON_DEVICE);
process_ghost_particles_local<with_pos,dim,decltype(o_part_loc.toKernel()),decltype(v_pos.toKernel()),decltype(v_prp.toKernel()),decltype(shifts.toKernel())>
<<<ite.wthr,ite.thr>>>
(o_part_loc.toKernel(),v_pos.toKernel(),v_prp.toKernel(),shifts.toKernel(),old);
#else
std::cout << __FILE__ << ":" << __LINE__ << " error: to use the option RUN_ON_DEVICE you must compile with NVCC" << std::endl;
#endif
}
};
template<unsigned int dim, typename St, typename prop, typename Memory, template <typename> class layout_base, bool is_ok_cuda>
struct local_ghost_from_dec_impl
{
static void run(openfpm::vector<aggregate<unsigned int,unsigned int>,Memory,typename layout_base<aggregate<unsigned int,unsigned int>>::type,layout_base> & o_part_loc,
const openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base> & shifts,
openfpm::vector<Box<dim, St>,Memory,typename layout_base<Box<dim,St>>::type,layout_base> & box_f_dev,
openfpm::vector<aggregate<unsigned int>,Memory,typename layout_base<aggregate<unsigned int>>::type,layout_base> & box_f_sv,
Vcluster<Memory> & v_cl,
openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base> & v_pos,
openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base> & v_prp,
size_t & g_m,
size_t opt)
{
std::cout << __FILE__ << ":" << __LINE__ << " error, you are trying to use using Cuda functions for a non cuda enabled data-structures" << std::endl;
}
};
template<unsigned int dim, typename St, typename prop, typename Memory, template <typename> class layout_base>
struct local_ghost_from_dec_impl<dim,St,prop,Memory,layout_base,true>
{
static void run(openfpm::vector<aggregate<unsigned int,unsigned int>,Memory,typename layout_base<aggregate<unsigned int,unsigned int>>::type,layout_base> & o_part_loc,
const openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base> & shifts,
openfpm::vector<Box<dim, St>,Memory,typename layout_base<Box<dim,St>>::type,layout_base> & box_f_dev,
openfpm::vector<aggregate<unsigned int>,Memory,typename layout_base<aggregate<unsigned int>>::type,layout_base> & box_f_sv,
Vcluster<Memory> & v_cl,
openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base> & v_pos,
openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base> & v_prp,
size_t & g_m,
size_t opt)
{
#if defined(CUDA_GPU) && defined(__NVCC__)
o_part_loc.resize(g_m+1);
o_part_loc.template get<0>(o_part_loc.size()-1) = 0;
o_part_loc.template hostToDevice(o_part_loc.size()-1,o_part_loc.size()-1);
// Label the internal (assigned) particles
auto ite = v_pos.getGPUIteratorTo(g_m);
// label particle processor
num_shift_ghost_each_part<dim,St,decltype(box_f_dev.toKernel()),decltype(v_pos.toKernel()),decltype(o_part_loc.toKernel())>
<<<ite.wthr,ite.thr>>>
(box_f_dev.toKernel(),v_pos.toKernel(),o_part_loc.toKernel());
openfpm::vector<aggregate<unsigned int>,Memory,typename layout_base<aggregate<unsigned int>>::type,layout_base> starts;
starts.resize(o_part_loc.size());
mgpu::scan((unsigned int *)o_part_loc.template getDeviceBuffer<0>(), o_part_loc.size(), (unsigned int *)starts.template getDeviceBuffer<0>() , v_cl.getmgpuContext());
starts.template deviceToHost<0>(starts.size()-1,starts.size()-1);
size_t total = starts.template get<0>(starts.size()-1);
size_t old = v_pos.size();
v_pos.resize(v_pos.size() + total);
v_prp.resize(v_prp.size() + total);
// Label the internal (assigned) particles
ite = v_pos.getGPUIteratorTo(g_m);
shift_ghost_each_part<dim,St,decltype(box_f_dev.toKernel()),decltype(box_f_sv.toKernel()),
decltype(v_pos.toKernel()),decltype(v_prp.toKernel()),
decltype(starts.toKernel()),decltype(shifts.toKernel()),