Skip to content
Snippets Groups Projects
Commit 34e4e85b authored by Pietro Incardona's avatar Pietro Incardona
Browse files

map and ghost_get working with switched layout

parent d518b0dc
No related branches found
No related tags found
No related merge requests found
openfpm_vcluster @ 7c68ec7f
Subproject commit faa1d114c2d13e562d200c92e98c1ed7be306eeb
Subproject commit 7c68ec7f6572fbb003101e0dc950404574d6e693
......@@ -54,7 +54,7 @@ class vector_dist_comm
size_t v_sub_unit_factor = 64;
//! definition of the send vector for position
typedef openfpm::vector<Point<dim, St>, Memory> send_pos_vector;
typedef openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base> send_pos_vector;
//! VCluster
Vcluster & v_cl;
......@@ -111,7 +111,7 @@ class vector_dist_comm
size_t lg_m;
//! Sending buffer
openfpm::vector<HeapMemory> hsmem;
openfpm::vector<Memory> hsmem;
//! Receiving buffer
openfpm::vector<HeapMemory> hrmem;
......@@ -267,7 +267,8 @@ class vector_dist_comm
* \param v_prp vector of particles properties
*
*/
void local_ghost_from_opart(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp)
void local_ghost_from_opart(openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base> & v_pos,
openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base> & v_prp)
{
// get the shift vectors
const openfpm::vector<Point<dim, St>> & shifts = dec.getShiftVectors();
......@@ -294,7 +295,9 @@ class vector_dist_comm
* \param g_m ghost marker
*
*/
void local_ghost_from_dec(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp, size_t g_m)
void local_ghost_from_dec(openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base> & v_pos,
openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base> & v_prp,
size_t g_m)
{
o_part_loc.clear();
......@@ -395,7 +398,10 @@ class vector_dist_comm
* \param opt options
*
*/
void add_loc_particles_bc(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp ,size_t & g_m, size_t opt)
void add_loc_particles_bc(openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base> & v_pos,
openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base> & v_prp ,
size_t & g_m,
size_t opt)
{
// Create the shift boxes
createShiftBox();
......@@ -408,9 +414,9 @@ class vector_dist_comm
else
{
if (opt & SKIP_LABELLING)
local_ghost_from_opart(v_pos,v_prp);
{local_ghost_from_opart(v_pos,v_prp);}
else
local_ghost_from_dec(v_pos,v_prp,g_m);
{local_ghost_from_dec(v_pos,v_prp,g_m);}
}
}
......@@ -420,7 +426,8 @@ class vector_dist_comm
* \param g_pos_send Send buffer to fill
*
*/
void fill_send_ghost_pos_buf(openfpm::vector<Point<dim, St>> & v_pos,openfpm::vector<send_pos_vector> & g_pos_send)
void fill_send_ghost_pos_buf(openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base> & v_pos,
openfpm::vector<send_pos_vector> & g_pos_send)
{
// get the shift vectors
const openfpm::vector<Point<dim, St>> & shifts = dec.getShiftVectors();
......@@ -516,7 +523,7 @@ class vector_dist_comm
*
*
*/
void resize_retained_buffer(openfpm::vector<HeapMemory> & rt_buf, size_t nbf)
void resize_retained_buffer(openfpm::vector<Memory> & rt_buf, size_t nbf)
{
// Release all the buffer that are going to be deleted
for (size_t i = nbf ; i < rt_buf.size() ; i++)
......@@ -527,6 +534,76 @@ class vector_dist_comm
hsmem.resize(nbf);
}
/*! \brief Set the buffer for each property
*
*
*/
template<typename send_vector, typename v_mpl>
struct set_mem_retained_buffers_inte
{
openfpm::vector<send_vector> & g_send_prp;
size_t i;
openfpm::vector<Memory> & hsmem;
size_t j;
set_mem_retained_buffers_inte(openfpm::vector<send_vector> & g_send_prp, size_t i ,
openfpm::vector<Memory> & hsmem, size_t j)
:g_send_prp(g_send_prp),i(i),hsmem(hsmem),j(j){}
//! It call the setMemory function for each property
template<typename T>
inline void operator()(T& t)
{
typedef typename boost::mpl::at<v_mpl,T>::type prp_ms;
g_send_prp.get(i).template setMemory<prp_ms::value>(hsmem.get(j));
j++;
}
};
template<bool inte_or_lin,typename send_vector, typename v_mpl>
struct set_mem_retained_buffers
{
static inline size_t set_mem_retained_buffers_(openfpm::vector<send_vector> & g_send_prp,
openfpm::vector<openfpm::vector<aggregate<size_t,size_t>>> & g_opart,
size_t i,
openfpm::vector<Memory> & hsmem,
size_t j)
{
// Set the memory for retain the send buffer
g_send_prp.get(i).setMemory(hsmem.get(j));
// resize the sending vector (No allocation is produced)
g_send_prp.get(i).resize(g_opart.get(i).size());
return j+1;
}
};
template<typename send_vector, typename v_mpl>
struct set_mem_retained_buffers<true,send_vector,v_mpl>
{
static inline size_t set_mem_retained_buffers_(openfpm::vector<send_vector> & g_send_prp,
openfpm::vector<openfpm::vector<aggregate<size_t,size_t>>> & g_opart,
size_t i,
openfpm::vector<Memory> & hsmem,
size_t j)
{
set_mem_retained_buffers_inte<send_vector,v_mpl> smrbi(g_send_prp,i,hsmem,j);
boost::mpl::for_each_ref<v_mpl>(smrbi);
// resize the sending vector (No allocation is produced)
g_send_prp.get(i).resize(g_opart.get(i).size());
return smrbi.j;
}
};
/*! \brief This function fill the send buffer for properties after the particles has been label with labelParticles
*
* \tparam send_vector type used to send data
......@@ -537,25 +614,33 @@ class vector_dist_comm
* \param g_send_prp Send buffer to fill
*
*/
template<typename send_vector, typename prp_object, int ... prp> void fill_send_ghost_prp_buf(openfpm::vector<prop> & v_prp, openfpm::vector<send_vector> & g_send_prp)
template<typename send_vector, typename prp_object, int ... prp>
void fill_send_ghost_prp_buf(openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base> & v_prp,
openfpm::vector<send_vector> & g_send_prp)
{
size_t factor = 1;
typedef typename to_boost_vmpl<prp...>::type v_mpl;
if (is_layout_inte<layout_base<prop>>::value == true) {factor *= sizeof...(prp);}
// create a number of send buffers equal to the near processors
g_send_prp.resize(g_opart.size());
resize_retained_buffer(hsmem,g_send_prp.size());
resize_retained_buffer(hsmem,g_send_prp.size()*factor);
for (size_t i = 0; i < g_send_prp.size(); i++)
for (size_t i = 0; i < hsmem.size(); i++)
{
// Buffer must retained and survive the destruction of the
// vector
if (hsmem.get(i).ref() == 0)
hsmem.get(i).incRef();
// Set the memory for retain the send buffer
g_send_prp.get(i).setMemory(hsmem.get(i));
{hsmem.get(i).incRef();}
}
// resize the sending vector (No allocation is produced)
g_send_prp.get(i).resize(g_opart.get(i).size());
size_t j = 0;
for (size_t i = 0; i < g_send_prp.size(); i++)
{
j = set_mem_retained_buffers<is_layout_inte<layout_base<prop>>::value,send_vector,v_mpl>::set_mem_retained_buffers_(g_send_prp,g_opart,i,hsmem,j);
}
// Fill the send buffer
......@@ -564,9 +649,9 @@ class vector_dist_comm
for (size_t j = 0; j < g_opart.get(i).size(); j++)
{
// source object type
typedef encapc<1, prop, typename openfpm::vector<prop>::layout_type> encap_src;
typedef decltype(v_prp.get(g_opart.get(i).template get<0>(j))) encap_src;
// destination object type
typedef encapc<1, prp_object, typename openfpm::vector<prp_object>::layout_type> encap_dst;
typedef decltype(g_send_prp.get(i).get(j)) encap_dst;
// Copy only the selected properties
object_si_d<encap_src, encap_dst, OBJ_ENCAP, prp...>(v_prp.get(g_opart.get(i).template get<0>(j)), g_send_prp.get(i).get(j));
......@@ -731,7 +816,10 @@ class vector_dist_comm
* \param g_m ghost marker
*
*/
void labelParticlesGhost(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp, openfpm::vector<size_t> & prc, size_t & g_m)
void labelParticlesGhost(openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base> & v_pos,
openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base> & v_prp,
openfpm::vector<size_t> & prc,
size_t & g_m)
{
// Buffer that contain for each processor the id of the particle to send
g_opart.clear();
......@@ -946,25 +1034,28 @@ public:
* \param g_m marker between real and ghost particles
*
*/
template<int ... prp> inline void ghost_get_(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp, size_t & g_m, size_t opt = WITH_POSITION)
template<int ... prp> inline void ghost_get_(openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base> & v_pos,
openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base> & v_prp,
size_t & g_m,
size_t opt = WITH_POSITION)
{
// Sending property object
typedef object<typename object_creator<typename prop::type, prp...>::type> prp_object;
// send vector for each processor
typedef openfpm::vector<prp_object> send_vector;
typedef openfpm::vector<prp_object,Memory,typename layout_base<prp_object>::type,layout_base> send_vector;
if (!(opt & NO_POSITION))
v_pos.resize(g_m);
{v_pos.resize(g_m);}
// reset the ghost part
if (!(opt & SKIP_LABELLING))
v_prp.resize(g_m);
{v_prp.resize(g_m);}
// Label all the particles
if ((opt & SKIP_LABELLING) == false)
labelParticlesGhost(v_pos,v_prp,prc_g_opart,g_m);
{labelParticlesGhost(v_pos,v_prp,prc_g_opart,g_m);}
// Send and receive ghost particle information
{
......@@ -1003,13 +1094,13 @@ public:
if (opt & SKIP_LABELLING)
{
size_t opt_ = compute_options(opt);
v_cl.SSendRecv(g_pos_send,v_pos,prc_g_opart,prc_recv_get,recv_sz_get,opt_);
v_cl.SSendRecv<send_pos_vector,decltype(v_pos),layout_base>(g_pos_send,v_pos,prc_g_opart,prc_recv_get,recv_sz_get,opt_);
}
else
{
prc_recv_get.clear();
recv_sz_get.clear();
v_cl.SSendRecv(g_pos_send,v_pos,prc_g_opart,prc_recv_get,recv_sz_get);
v_cl.SSendRecv<send_pos_vector,decltype(v_pos),layout_base>(g_pos_send,v_pos,prc_g_opart,prc_recv_get,recv_sz_get);
}
// fill g_opart_sz
......
......@@ -38,10 +38,6 @@ __global__ void calculate_force(vector_dist_ker<3, float, aggregate<float, floa
auto cell = cl.getCell(xp);
int s1 = cell.get(0);
int s2 = cell.get(1);
int s3 = cell.get(2);
Point<3,float> force1({0.0,0.0,0.0});
Point<3,float> force2({0.0,0.0,0.0});
......@@ -78,6 +74,172 @@ __global__ void calculate_force(vector_dist_ker<3, float, aggregate<float, floa
vd.template getProp<2>(p)[2] = force2.get(2);
}
template<typename CellList_type>
__global__ void calculate_force_full_sort(vector_dist_ker<3, float, aggregate<float, float[3], float [3]>> vd,
CellList_type cl)
{
auto p = GET_PARTICLE(vd);
Point<3,float> xp = vd.getPos(p);
auto it = cl.getNNIterator(cl.getCell(xp));
auto cell = cl.getCell(xp);
Point<3,float> force1({0.0,0.0,0.0});
while (it.isNext())
{
auto q1 = it.get();
if (q1 == p) {++it; continue;}
Point<3,float> xq_1 = vd.getPos(q1);
Point<3,float> r1 = xq_1 - xp;
// Normalize
r1 /= r1.norm();
force1 += vd.template getProp<0>(q1)*r1;
++it;
}
vd.template getProp<1>(p)[0] = force1.get(0);
vd.template getProp<1>(p)[1] = force1.get(1);
vd.template getProp<1>(p)[2] = force1.get(2);
}
template<typename CellList_type, typename vector_type>
bool check_force(CellList_type & NN_cpu, vector_type & vd)
{
auto it6 = vd.getDomainIterator();
bool match = true;
while (it6.isNext())
{
auto p = it6.get();
Point<3,float> xp = vd.getPos(p);
// Calculate on CPU
Point<3,float> force({0.0,0.0,0.0});
auto NNc = NN_cpu.getNNIterator(NN_cpu.getCell(xp));
while (NNc.isNext())
{
auto q = NNc.get();
if (q == p.getKey()) {++NNc; continue;}
Point<3,float> xq_2 = vd.getPos(q);
Point<3,float> r2 = xq_2 - xp;
// Normalize
r2 /= r2.norm();
force += vd.template getProp<0>(q)*r2;
++NNc;
}
match &= fabs(vd.template getProp<1>(p)[0] - vd.template getProp<2>(p)[0]) < 0.0001;
match &= fabs(vd.template getProp<1>(p)[1] - vd.template getProp<2>(p)[1]) < 0.0001;
match &= fabs(vd.template getProp<1>(p)[2] - vd.template getProp<2>(p)[2]) < 0.0001;
match &= fabs(vd.template getProp<1>(p)[0] - force.get(0)) < 0.0001;
match &= fabs(vd.template getProp<1>(p)[1] - force.get(1)) < 0.0001;
match &= fabs(vd.template getProp<1>(p)[2] - force.get(2)) < 0.0001;
++it6;
}
return match;
}
BOOST_AUTO_TEST_CASE( vector_dist_gpu_ghost_get )
{
auto & v_cl = create_vcluster();
if (v_cl.size() > 16)
{return;}
Box<3,float> domain({0.0,0.0,0.0},{1.0,1.0,1.0});
// set the ghost based on the radius cut off (make just a little bit smaller than the spacing)
Ghost<3,float> g(0.1);
// Boundary conditions
size_t bc[3]={PERIODIC,PERIODIC,PERIODIC};
vector_dist_gpu<3,float,aggregate<float,float[3],float[3]>> vd(1000,domain,bc,g);
auto it = vd.getDomainIterator();
while (it.isNext())
{
auto p = it.get();
vd.getPos(p)[0] = (float)rand() / RAND_MAX;
vd.getPos(p)[1] = (float)rand() / RAND_MAX;
vd.getPos(p)[2] = (float)rand() / RAND_MAX;
vd.template getProp<0>(p) = vd.getPos(p)[0] + vd.getPos(p)[1] + vd.getPos(p)[2];
vd.template getProp<1>(p)[0] = vd.getPos(p)[0] + vd.getPos(p)[1];
vd.template getProp<1>(p)[1] = vd.getPos(p)[0] + vd.getPos(p)[2];
vd.template getProp<1>(p)[2] = vd.getPos(p)[1] + vd.getPos(p)[2];
vd.template getProp<2>(p)[0] = vd.getPos(p)[0] + 3.0*vd.getPos(p)[1];
vd.template getProp<2>(p)[1] = vd.getPos(p)[0] + 3.0*vd.getPos(p)[2];
vd.template getProp<2>(p)[2] = vd.getPos(p)[1] + 3.0*vd.getPos(p)[2];
++it;
}
// Ok we redistribute the particles (CPU based)
vd.map();
vd.template ghost_get<0,1,2>();
// Now we check the the ghost contain the correct information
bool check = true;
auto itg = vd.getDomainAndGhostIterator();
while (itg.isNext())
{
auto p = itg.get();
check &= (vd.template getProp<0>(p) == vd.getPos(p)[0] + vd.getPos(p)[1] + vd.getPos(p)[2]);
check &= (vd.template getProp<1>(p)[0] == vd.getPos(p)[0] + vd.getPos(p)[1]);
check &= (vd.template getProp<1>(p)[1] == vd.getPos(p)[0] + vd.getPos(p)[2]);
check &= (vd.template getProp<1>(p)[2] == vd.getPos(p)[1] + vd.getPos(p)[2]);
check &= (vd.template getProp<2>(p)[0] == vd.getPos(p)[0] + 3.0*vd.getPos(p)[1]);
check &= (vd.template getProp<2>(p)[1] == vd.getPos(p)[0] + 3.0*vd.getPos(p)[2]);
check &= (vd.template getProp<2>(p)[2] == vd.getPos(p)[1] + 3.0*vd.getPos(p)[2]);
++itg;
}
size_t tot_s = vd.size_local_with_ghost();
v_cl.sum(tot_s);
v_cl.execute();
// We check that we check something
BOOST_REQUIRE(tot_s > 1000);
}
BOOST_AUTO_TEST_CASE( vector_dist_gpu_test)
{
auto & v_cl = create_vcluster();
......@@ -108,7 +270,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_gpu_test)
++it;
}
// Ok we redistribute the particles
// Ok we redistribute the particles (CPU based)
vd.map();
size_t size_l = vd.size_local();
......@@ -145,6 +307,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_gpu_test)
auto it3 = vd.getDomainIteratorGPU();
// offload to device
vd.hostToDevicePos();
initialize_props<<<it3.wthr,it3.thr>>>(vd.toKernel());
......@@ -170,6 +333,13 @@ BOOST_AUTO_TEST_CASE( vector_dist_gpu_test)
++it4;
}
// here we do a ghost_get
vd.ghost_get<0>();
// we re-offload what we received
vd.hostToDevicePos();
vd.template hostToDeviceProp<0>();
auto NN = vd.getCellListGPU(0.1);
auto NN_cpu = vd.getCellList(0.1);
......@@ -179,50 +349,31 @@ BOOST_AUTO_TEST_CASE( vector_dist_gpu_test)
vd.template deviceToHostProp<1,2>();
auto it6 = vd.getDomainIterator();
bool match = true;
while (it6.isNext())
{
auto p = it6.get();
Point<3,float> xp = vd.getPos(p);
bool test = check_force(NN_cpu,vd);
BOOST_REQUIRE_EQUAL(test,true);
// Calculate on CPU
// We do exactly the same test as before, but now we completely use the sorted version
Point<3,float> force({0.0,0.0,0.0});
calculate_force_full_sort<decltype(NN.toKernel())><<<it5.wthr,it5.thr>>>(vd.toKernel_sorted(),NN.toKernel());
auto NNc = NN_cpu.getNNIterator(NN_cpu.getCell(xp));
vd.template deviceToHostProp<1>();
while (NNc.isNext())
{
auto q = NNc.get();
test = check_force(NN_cpu,vd);
BOOST_REQUIRE_EQUAL(test,true);
if (q == p.getKey()) {++NNc; continue;}
// check
Point<3,float> xq_2 = vd.getPos(q);
Point<3,float> r2 = xq_2 - xp;
// Now we do a ghost_get from CPU
// Normalize
// Than we offload on GPU
r2 /= r2.norm();
force += vd.template getProp<0>(q)*r2;
// We construct a Cell-list
++NNc;
}
// We calculate force on CPU and GPU to check if they match
BOOST_REQUIRE_CLOSE(vd.template getProp<1>(p)[0],vd.template getProp<2>(p)[0],0.001);
BOOST_REQUIRE_CLOSE(vd.template getProp<1>(p)[1],vd.template getProp<2>(p)[1],0.001);
BOOST_REQUIRE_CLOSE(vd.template getProp<1>(p)[2],vd.template getProp<2>(p)[2],0.001);
BOOST_REQUIRE_CLOSE(vd.template getProp<1>(p)[0],force.get(0),0.01);
BOOST_REQUIRE_CLOSE(vd.template getProp<1>(p)[1],force.get(1),0.01);
BOOST_REQUIRE_CLOSE(vd.template getProp<1>(p)[2],force.get(2),0.01);
++it6;
}
}
BOOST_AUTO_TEST_SUITE_END()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment