Commit 58a74a5b authored by incardon's avatar incardon

ghost_get ... still to fix

parent b8ba87a6
openfpm_data @ 37d1890f
Subproject commit 42183947ec6434fa7644185690b005c494b26676
Subproject commit 37d1890f1c6953c2b1212ce937a86721ba6bb6c9
......@@ -919,6 +919,44 @@ public:
return cart;
}
/*! \brief It create another object that contain the same information and act in the same way
*
* \return a duplicated CartDecomposition object
*
*/
template<typename Memory2, template <typename> class layout_base2>
CartDecomposition<dim,T,Memory2,layout_base2,Distribution> duplicate_convert() const
{
CartDecomposition<dim,T> cart(v_cl);
(static_cast<ie_loc_ghost<dim,T>*>(&cart))->operator=(static_cast<ie_loc_ghost<dim,T>>(*this));
(static_cast<nn_prcs<dim,T>*>(&cart))->operator=(static_cast<nn_prcs<dim,T>>(*this));
ie_ghost<dim,T,Memory,layout_base> * ptr = static_cast<ie_ghost<dim,T,Memory,layout_base> *>((CartDecomposition<dim,T,Memory,layout_base,Distribution> *)this);
(static_cast<ie_ghost<dim,T,Memory2,layout_base2>*>(&cart))->operator=(ptr->template duplicate<Memory2,layout_base2>());
cart.private_get_sub_domains() = sub_domains;
cart.private_get_box_nn_processor() = box_nn_processor;
cart.private_get_fine_s() = fine_s;
cart.private_get_gr() = gr;
cart.private_get_gr_dist() = gr_dist;
cart.private_get_dist() = dist;
cart.private_get_commCostSet() = commCostSet;
cart.private_get_cd() = cd;
cart.private_get_domain() = domain;
cart.private_get_sub_domains_global() = sub_domains_global;
for (size_t i = 0 ; i < dim ; i++)
{cart.private_get_spacing(i) = spacing[i];};
cart.private_get_ghost() = ghost;
cart.private_get_bbox() = bbox;
for (size_t i = 0 ; i < dim ; i++)
{cart.private_get_bc(i) = this->bc[i];}
return cart;
}
/*! \brief Copy the element
*
* \param cart element to copy
......@@ -1999,6 +2037,145 @@ public:
//! friend classes
friend extended_type;
/*! \brief Return the internal data structure sub_domains
*
* \return sub_domains
*
*/
openfpm::vector<SpaceBox<dim, T>> & private_get_sub_domains()
{
return sub_domains;
}
/*! \brief Return the internal data structure box_nn_processor
*
* \return box_nn_processor
*
*/
openfpm::vector<openfpm::vector<long unsigned int> > & private_get_box_nn_processor()
{
return box_nn_processor;
}
/*! \brief Return the internal data structure fine_s
*
* \return fine_s
*
*/
CellList<dim,T,Mem_fast<Memory,int>,shift<dim,T>> & private_get_fine_s()
{
return fine_s;
}
/*! \brief Return the internal data structure gr
*
* \return gr
*
*/
grid_sm<dim, void> & private_get_gr()
{
return gr;
}
/*! \brief Return the internal data structure gr_dist
*
* \return gr_dist
*
*/
grid_sm<dim, void> & private_get_gr_dist()
{
return gr_dist;
}
/*! \brief Return the internal data structure dist
*
* \return dist
*
*/
Distribution & private_get_dist()
{
return dist;
}
/*! \brief Return the internal data structure commCostSet
*
* \return commCostSet
*
*/
bool & private_get_commCostSet()
{
return commCostSet;
}
/*! \brief Return the internal data structure cd
*
* \return cd
*
*/
CellDecomposer_sm<dim, T, shift<dim,T>> & private_get_cd()
{
return cd;
}
/*! \brief Return the internal data structure domain
*
* \return domain
*
*/
::Box<dim,T> & private_get_domain()
{
return domain;
}
/*! \brief Return the internal data structure sub_domains_global
*
* \return sub_domains_global
*
*/
openfpm::vector<Box_map<dim, T>,Memory,typename layout_base<Box_map<dim, T>>::type,layout_base> & private_get_sub_domains_global()
{
return sub_domains_global;
}
/*! \brief Return the internal data structure spacing
*
* \return spacing
*
*/
T & private_get_spacing(int i)
{
return spacing[i];
}
/*! \brief Return the internal data structure ghost
*
* \return ghost
*
*/
Ghost<dim,T> & private_get_ghost()
{
return ghost;
}
/*! \brief Return the internal data structure bbox
*
* \return bbox
*
*/
::Box<dim,T> & private_get_bbox()
{
return bbox;
}
/*! \brief Return the internal data structure bc
*
* \return bc
*
*/
size_t & private_get_bc(int i)
{
return bc[i];
}
};
......
......@@ -34,6 +34,7 @@ struct proc_box_id
}
};
/*! \brief structure that store and compute the internal and external local ghost box
*
* \tparam dim is the dimensionality of the physical domain we are going to decompose.
......@@ -517,6 +518,7 @@ public:
proc_int_box.swap(ie.proc_int_box);
vb_ext.swap(ie.vb_ext);
vb_int.swap(ie.vb_int);
vb_int_box.swap(ie.vb_int_box);
geo_cell.swap(ie.geo_cell);
shifts.swap(ie.shifts);
ids_p.swap(ie.ids_p);
......@@ -532,6 +534,7 @@ public:
proc_int_box = ie.proc_int_box;
vb_ext = ie.vb_ext;
vb_int = ie.vb_int;
vb_int_box = ie.vb_int_box;
geo_cell = ie.geo_cell;
shifts = ie.shifts;
ids_p = ie.ids_p;
......@@ -540,6 +543,31 @@ public:
return *this;
}
/*! \brief duplicate this structure changing layout and Memory
*
* \return a structure with Memory type and layout changed
*
*/
template<typename Memory2, template <typename> class layout_base2>
inline ie_ghost<dim,T,Memory2,layout_base2> duplicate()
{
ie_ghost<dim,T,Memory2,layout_base2> tmp;
tmp.private_get_box_nn_processor_int() = box_nn_processor_int;
tmp.private_get_proc_int_box() = proc_int_box;
tmp.private_get_vb_ext() = vb_ext;
tmp.private_get_vb_int() = vb_int;
tmp.private_get_vb_int_box() = vb_int_box;
tmp.private_geo_cell() = geo_cell;
tmp.private_get_shifts() = shifts;
tmp.private_get_ids_p() = ids_p;
tmp.private_get_ids() = ids;
return tmp;
}
/*! It return the shift vector
*
* Consider a domain with some ghost, at the border of the domain the
......@@ -1181,6 +1209,102 @@ public:
ids.clear();
}
/*! \brief Return the internal data structure box_nn_processor_int
*
* \return box_nn_processor_int
*
*/
inline openfpm::vector< openfpm::vector< Box_proc<dim,T> > > & private_get_box_nn_processor_int()
{
return box_nn_processor_int;
}
/*! \brief Return the internal data structure proc_int_box
*
* \return proc_int_box
*
*/
inline openfpm::vector< Box_dom<dim,T> > & private_get_proc_int_box()
{
return proc_int_box;
}
/*! \brief Return the internal data structure vb_ext
*
* \return vb_ext
*
*/
inline openfpm::vector<p_box<dim,T> > & private_get_vb_ext()
{
return vb_ext;
}
/*! \brief Return the internal data structure vb_int
*
* \return vb_int
*
*/
inline openfpm::vector<aggregate<unsigned int,unsigned int,unsigned int>,Memory,typename layout_base<aggregate<unsigned int,unsigned int,unsigned int>>::type,layout_base> &
private_get_vb_int()
{
return vb_int;
}
/*! \brief Return the internal data structure vb_int_box
*
* \return vb_int_box
*
*/
inline openfpm::vector<Box<dim,T>,Memory,typename layout_base<Box<dim,T>>::type,layout_base> &
private_get_vb_int_box()
{
return vb_int_box;
}
/*! \brief Return the internal data structure proc_int_box
*
* \return proc_int_box
*
*/
inline CellList<dim,T,Mem_fast<Memory,int>,shift<dim,T>> &
private_geo_cell()
{
return geo_cell;
}
/*! \brief Return the internal data structure shifts
*
* \return shifts
*
*/
inline openfpm::vector<Point<dim,T>,Memory,typename layout_base<Point<dim,T>>::type,layout_base> &
private_get_shifts()
{
return shifts;
}
/*! \brief Return the internal data structure ids_p
*
* \return ids_p
*
*/
inline openfpm::vector<std::pair<size_t,size_t>> &
private_get_ids_p()
{
return ids_p;
}
/*! \brief Return the internal data structure ids_p
*
* \return ids_p
*
*/
inline openfpm::vector<size_t> &
private_get_ids()
{
return ids;
}
/*! \brief toKernel() Convert this data-structure into a kernel usable data-structure
*
* \return
......
This diff is collapsed.
......@@ -390,7 +390,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_map_on_gpu_test)
Ghost<3,float> g(0.1);
// Boundary conditions
size_t bc[3]={NON_PERIODIC,NON_PERIODIC,NON_PERIODIC};
size_t bc[3]={PERIODIC,PERIODIC,PERIODIC};
vector_dist_gpu<3,float,aggregate<float,float[3],float[3]>> vd(1000,domain,bc,g);
......@@ -497,14 +497,140 @@ BOOST_AUTO_TEST_CASE( vector_dist_map_on_gpu_test)
vd.deviceToHostPos();
vd.deviceToHostProp<0,1,2>();
vd.write("gpu_write_before_test");
// To test we copy on a cpu distributed vector and we do a map
vector_dist<3,float,aggregate<float,float[3],float[3]>> vd_cpu(vd.getDecomposition().duplicate_convert<HeapMemory,memory_traits_lin>(),0);
auto itc = vd.getDomainIterator();
while (itc.isNext())
{
auto p = itc.get();
vd_cpu.add();
vd_cpu.getLastPos()[0] = vd.getPos(p)[0];
vd_cpu.getLastPos()[1] = vd.getPos(p)[1];
vd_cpu.getLastPos()[2] = vd.getPos(p)[2];
vd_cpu.getLastProp<0>() = vd.getProp<0>(p);
vd_cpu.getLastProp<1>()[0] = vd.getProp<1>(p)[0];
vd_cpu.getLastProp<1>()[1] = vd.getProp<1>(p)[1];
vd_cpu.getLastProp<1>()[2] = vd.getProp<1>(p)[2];
vd_cpu.getLastProp<2>()[0] = vd.getProp<2>(p)[0];
vd_cpu.getLastProp<2>()[1] = vd.getProp<2>(p)[1];
vd_cpu.getLastProp<2>()[2] = vd.getProp<2>(p)[2];
++itc;
}
vd_cpu.ghost_get<0,1,2>();
vd.ghost_get<0,1,2>(RUN_ON_DEVICE);
vd.deviceToHostPos();
vd.deviceToHostProp<0,1,2>();
vd.write("gpu_write_after_test");
vd.write("write_test");
vd_cpu.write("write_test2");
match = true;
// Particle on the gpu ghost and cpu ghost are not ordered in the same way so we have to reorder
struct part
{
Point<3,float> xp;
float prp0;
float prp1[3];
float prp2[3];
bool operator<(const part & tmp) const
{
if (xp.get(0) < tmp.xp.get(0))
{return true;}
else if (xp.get(0) > tmp.xp.get(0))
{return false;}
if (xp.get(1) < tmp.xp.get(1))
{return true;}
else if (xp.get(1) > tmp.xp.get(1))
{return false;}
if (xp.get(2) < tmp.xp.get(2))
{return true;}
else if (xp.get(2) > tmp.xp.get(2))
{return false;}
return false;
}
};
openfpm::vector<part> cpu_sort;
openfpm::vector<part> gpu_sort;
cpu_sort.resize(vd_cpu.size_local_with_ghost() - vd_cpu.size_local());
gpu_sort.resize(vd.size_local_with_ghost() - vd.size_local());
size_t cnt = 0;
auto itc2 = vd.getGhostIterator();
while (itc2.isNext())
{
auto p = itc2.get();
cpu_sort.get(cnt).xp.get(0) = vd_cpu.getPos(p)[0];
gpu_sort.get(cnt).xp.get(0) = vd.getPos(p)[0];
cpu_sort.get(cnt).xp.get(1) = vd_cpu.getPos(p)[1];
gpu_sort.get(cnt).xp.get(1) = vd.getPos(p)[1];
cpu_sort.get(cnt).xp.get(2) = vd_cpu.getPos(p)[2];
gpu_sort.get(cnt).xp.get(2) = vd.getPos(p)[2];
cpu_sort.get(cnt).prp0 = vd_cpu.getProp<0>(p);
gpu_sort.get(cnt).prp0 = vd.getProp<0>(p);
cpu_sort.get(cnt).prp1[0] = vd_cpu.getProp<1>(p)[0];
gpu_sort.get(cnt).prp1[0] = vd.getProp<1>(p)[0];
cpu_sort.get(cnt).prp1[1] = vd_cpu.getProp<1>(p)[1];
gpu_sort.get(cnt).prp1[1] = vd.getProp<1>(p)[1];
cpu_sort.get(cnt).prp1[2] = vd_cpu.getProp<1>(p)[2];
gpu_sort.get(cnt).prp1[2] = vd.getProp<1>(p)[2];
cpu_sort.get(cnt).prp2[0] = vd_cpu.getProp<2>(p)[0];
gpu_sort.get(cnt).prp2[0] = vd.getProp<2>(p)[0];
cpu_sort.get(cnt).prp2[1] = vd_cpu.getProp<2>(p)[1];
gpu_sort.get(cnt).prp2[1] = vd.getProp<2>(p)[1];
cpu_sort.get(cnt).prp2[2] = vd_cpu.getProp<2>(p)[2];
gpu_sort.get(cnt).prp2[2] = vd.getProp<2>(p)[2];
++cnt;
++itc2;
}
cpu_sort.sort();
gpu_sort.sort();
for (size_t i = 0 ; i < cpu_sort.size() ; i++)
{
match &= cpu_sort.get(i).xp.get(0) == gpu_sort.get(i).xp.get(0);
match &= cpu_sort.get(i).xp.get(1) == gpu_sort.get(i).xp.get(1);
match &= cpu_sort.get(i).xp.get(2) == gpu_sort.get(i).xp.get(2);
match &= cpu_sort.get(i).prp0 == gpu_sort.get(i).prp0;
match &= cpu_sort.get(i).prp1[0] == gpu_sort.get(i).prp1[0];
match &= cpu_sort.get(i).prp1[1] == gpu_sort.get(i).prp1[1];
match &= cpu_sort.get(i).prp1[2] == gpu_sort.get(i).prp1[2];
match &= cpu_sort.get(i).prp2[0] == gpu_sort.get(i).prp2[0];
match &= cpu_sort.get(i).prp2[1] == gpu_sort.get(i).prp2[1];
match &= cpu_sort.get(i).prp2[2] == gpu_sort.get(i).prp2[2];
}
BOOST_REQUIRE_EQUAL(match,true);
}
......
......@@ -16,6 +16,7 @@
#endif
#include "Vector/util/vector_dist_funcs.hpp"
#include "cuda/vector_dist_comm_util_funcs.cuh"
#define SKIP_LABELLING 512
#define KEEP_PROPERTIES 512
......@@ -282,8 +283,8 @@ class vector_dist_comm
}
// move box_f_dev and box_f_sv to device
box_f_dev.template deviceToHost<0,1>();
box_f_sv.template deviceToHost<0>();
box_f_dev.template hostToDevice<0,1>();
box_f_sv.template hostToDevice<0>();
shift_box_ndec = dec.get_ndec();
}
......@@ -306,7 +307,10 @@ class vector_dist_comm
{
if (opt & RUN_ON_DEVICE)
{
#if defined(CUDA_GPU) && defined(__NVCC__)
local_ghost_from_opart_impl<true,dim,St,prop,Memory,layout_base,std::is_same<Memory,CudaMemory>::value>
::run(o_part_loc,shifts,v_pos,v_prp,opt);
/*#if defined(CUDA_GPU) && defined(__NVCC__)
auto ite = o_part_loc.getGPUIterator();
......@@ -321,7 +325,7 @@ class vector_dist_comm
#else
std::cout << __FILE__ << ":" << __LINE__ << " error: to use the option RUN_ON_DEVICE you must compile with NVCC" << std::endl;
#endif
#endif*/
}
else
{
......@@ -344,7 +348,10 @@ class vector_dist_comm
{
if (opt & RUN_ON_DEVICE)
{
#if defined(CUDA_GPU) && defined(__NVCC__)
local_ghost_from_opart_impl<false,dim,St,prop,Memory,layout_base,std::is_same<Memory,CudaMemory>::value>
::run(o_part_loc,shifts,v_pos,v_prp,opt);
/*#if defined(CUDA_GPU) && defined(__NVCC__)
auto ite = o_part_loc.getGPUIterator();
......@@ -354,7 +361,7 @@ class vector_dist_comm
#else
std::cout << __FILE__ << ":" << __LINE__ << " error: to use the option RUN_ON_DEVICE you must compile with NVCC" << std::endl;
#endif
#endif*/
}
else
{
......@@ -386,7 +393,10 @@ class vector_dist_comm
if (opt & RUN_ON_DEVICE)
{
#if defined(CUDA_GPU) && defined(__NVCC__)
local_ghost_from_dec_impl<dim,St,prop,Memory,layout_base,std::is_same<Memory,CudaMemory>::value>
::run(o_part_loc,shifts,box_f_dev,box_f_sv,v_cl,v_pos,v_prp,g_m,opt);
/*#if defined(CUDA_GPU) && defined(__NVCC__)
o_part_loc.resize(g_m+1);
o_part_loc.template get<0>(o_part_loc.size()-1) = 0;
......@@ -424,7 +434,7 @@ class vector_dist_comm
#else
std::cout << __FILE__ << ":" << __LINE__ << " error: to use the option RUN_ON_DEVICE you must compile with NVCC" << std::endl;
#endif
#endif*/
}
else
......@@ -1130,7 +1140,11 @@ class vector_dist_comm
if (opt & RUN_ON_DEVICE)
{
#if defined(CUDA_GPU) && defined(__NVCC__)
labelParticlesGhost_impl<dim,St,prop,Memory,layout_base,
Decomposition,std::is_same<Memory,CudaMemory>::value>
::run(dec,g_opart_device,v_cl,v_pos,v_prp,prc,prc_sz,prc_offset,g_m,opt);
/*#if defined(CUDA_GPU) && defined(__NVCC__)
openfpm::vector<aggregate<unsigned int>,
Memory,
......@@ -1208,7 +1222,7 @@ class vector_dist_comm
std::cout << __FILE__ << ":" << __LINE__ << " error: to use gpu computation you must compile vector_dist.hpp with NVCC" << std::endl;
#endif
#endif*/
}
else
{
......@@ -1244,10 +1258,10 @@ class vector_dist_comm
{
if (g_opart.get(i).size() != 0)
{
prc_sz.add(g_opart.get(i).size());
g_opart_f.add();
g_opart.get(i).swap(g_opart_f.last());
prc.add(dec.IDtoProc(i));
prc_sz.add(g_opart.get(i).size());
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment