Commit 34e4e85b authored by incardon's avatar incardon

map and ghost_get working with switched layout

parent d518b0dc
openfpm_vcluster @ 7c68ec7f
Subproject commit faa1d114c2d13e562d200c92e98c1ed7be306eeb
Subproject commit 7c68ec7f6572fbb003101e0dc950404574d6e693
This diff is collapsed.
......@@ -38,10 +38,6 @@ __global__ void calculate_force(vector_dist_ker<3, float, aggregate<float, floa
auto cell = cl.getCell(xp);
int s1 = cell.get(0);
int s2 = cell.get(1);
int s3 = cell.get(2);
Point<3,float> force1({0.0,0.0,0.0});
Point<3,float> force2({0.0,0.0,0.0});
......@@ -78,6 +74,172 @@ __global__ void calculate_force(vector_dist_ker<3, float, aggregate<float, floa
vd.template getProp<2>(p)[2] = force2.get(2);
}
template<typename CellList_type>
__global__ void calculate_force_full_sort(vector_dist_ker<3, float, aggregate<float, float[3], float [3]>> vd,
CellList_type cl)
{
auto p = GET_PARTICLE(vd);
Point<3,float> xp = vd.getPos(p);
auto it = cl.getNNIterator(cl.getCell(xp));
auto cell = cl.getCell(xp);
Point<3,float> force1({0.0,0.0,0.0});
while (it.isNext())
{
auto q1 = it.get();
if (q1 == p) {++it; continue;}
Point<3,float> xq_1 = vd.getPos(q1);
Point<3,float> r1 = xq_1 - xp;
// Normalize
r1 /= r1.norm();
force1 += vd.template getProp<0>(q1)*r1;
++it;
}
vd.template getProp<1>(p)[0] = force1.get(0);
vd.template getProp<1>(p)[1] = force1.get(1);
vd.template getProp<1>(p)[2] = force1.get(2);
}
template<typename CellList_type, typename vector_type>
bool check_force(CellList_type & NN_cpu, vector_type & vd)
{
auto it6 = vd.getDomainIterator();
bool match = true;
while (it6.isNext())
{
auto p = it6.get();
Point<3,float> xp = vd.getPos(p);
// Calculate on CPU
Point<3,float> force({0.0,0.0,0.0});
auto NNc = NN_cpu.getNNIterator(NN_cpu.getCell(xp));
while (NNc.isNext())
{
auto q = NNc.get();
if (q == p.getKey()) {++NNc; continue;}
Point<3,float> xq_2 = vd.getPos(q);
Point<3,float> r2 = xq_2 - xp;
// Normalize
r2 /= r2.norm();
force += vd.template getProp<0>(q)*r2;
++NNc;
}
match &= fabs(vd.template getProp<1>(p)[0] - vd.template getProp<2>(p)[0]) < 0.0001;
match &= fabs(vd.template getProp<1>(p)[1] - vd.template getProp<2>(p)[1]) < 0.0001;
match &= fabs(vd.template getProp<1>(p)[2] - vd.template getProp<2>(p)[2]) < 0.0001;
match &= fabs(vd.template getProp<1>(p)[0] - force.get(0)) < 0.0001;
match &= fabs(vd.template getProp<1>(p)[1] - force.get(1)) < 0.0001;
match &= fabs(vd.template getProp<1>(p)[2] - force.get(2)) < 0.0001;
++it6;
}
return match;
}
BOOST_AUTO_TEST_CASE( vector_dist_gpu_ghost_get )
{
auto & v_cl = create_vcluster();
if (v_cl.size() > 16)
{return;}
Box<3,float> domain({0.0,0.0,0.0},{1.0,1.0,1.0});
// set the ghost based on the radius cut off (make just a little bit smaller than the spacing)
Ghost<3,float> g(0.1);
// Boundary conditions
size_t bc[3]={PERIODIC,PERIODIC,PERIODIC};
vector_dist_gpu<3,float,aggregate<float,float[3],float[3]>> vd(1000,domain,bc,g);
auto it = vd.getDomainIterator();
while (it.isNext())
{
auto p = it.get();
vd.getPos(p)[0] = (float)rand() / RAND_MAX;
vd.getPos(p)[1] = (float)rand() / RAND_MAX;
vd.getPos(p)[2] = (float)rand() / RAND_MAX;
vd.template getProp<0>(p) = vd.getPos(p)[0] + vd.getPos(p)[1] + vd.getPos(p)[2];
vd.template getProp<1>(p)[0] = vd.getPos(p)[0] + vd.getPos(p)[1];
vd.template getProp<1>(p)[1] = vd.getPos(p)[0] + vd.getPos(p)[2];
vd.template getProp<1>(p)[2] = vd.getPos(p)[1] + vd.getPos(p)[2];
vd.template getProp<2>(p)[0] = vd.getPos(p)[0] + 3.0*vd.getPos(p)[1];
vd.template getProp<2>(p)[1] = vd.getPos(p)[0] + 3.0*vd.getPos(p)[2];
vd.template getProp<2>(p)[2] = vd.getPos(p)[1] + 3.0*vd.getPos(p)[2];
++it;
}
// Ok we redistribute the particles (CPU based)
vd.map();
vd.template ghost_get<0,1,2>();
// Now we check the the ghost contain the correct information
bool check = true;
auto itg = vd.getDomainAndGhostIterator();
while (itg.isNext())
{
auto p = itg.get();
check &= (vd.template getProp<0>(p) == vd.getPos(p)[0] + vd.getPos(p)[1] + vd.getPos(p)[2]);
check &= (vd.template getProp<1>(p)[0] == vd.getPos(p)[0] + vd.getPos(p)[1]);
check &= (vd.template getProp<1>(p)[1] == vd.getPos(p)[0] + vd.getPos(p)[2]);
check &= (vd.template getProp<1>(p)[2] == vd.getPos(p)[1] + vd.getPos(p)[2]);
check &= (vd.template getProp<2>(p)[0] == vd.getPos(p)[0] + 3.0*vd.getPos(p)[1]);
check &= (vd.template getProp<2>(p)[1] == vd.getPos(p)[0] + 3.0*vd.getPos(p)[2]);
check &= (vd.template getProp<2>(p)[2] == vd.getPos(p)[1] + 3.0*vd.getPos(p)[2]);
++itg;
}
size_t tot_s = vd.size_local_with_ghost();
v_cl.sum(tot_s);
v_cl.execute();
// We check that we check something
BOOST_REQUIRE(tot_s > 1000);
}
BOOST_AUTO_TEST_CASE( vector_dist_gpu_test)
{
auto & v_cl = create_vcluster();
......@@ -108,7 +270,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_gpu_test)
++it;
}
// Ok we redistribute the particles
// Ok we redistribute the particles (CPU based)
vd.map();
size_t size_l = vd.size_local();
......@@ -145,6 +307,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_gpu_test)
auto it3 = vd.getDomainIteratorGPU();
// offload to device
vd.hostToDevicePos();
initialize_props<<<it3.wthr,it3.thr>>>(vd.toKernel());
......@@ -170,6 +333,13 @@ BOOST_AUTO_TEST_CASE( vector_dist_gpu_test)
++it4;
}
// here we do a ghost_get
vd.ghost_get<0>();
// we re-offload what we received
vd.hostToDevicePos();
vd.template hostToDeviceProp<0>();
auto NN = vd.getCellListGPU(0.1);
auto NN_cpu = vd.getCellList(0.1);
......@@ -179,50 +349,31 @@ BOOST_AUTO_TEST_CASE( vector_dist_gpu_test)
vd.template deviceToHostProp<1,2>();
auto it6 = vd.getDomainIterator();
bool match = true;
while (it6.isNext())
{
auto p = it6.get();
Point<3,float> xp = vd.getPos(p);
bool test = check_force(NN_cpu,vd);
BOOST_REQUIRE_EQUAL(test,true);
// Calculate on CPU
// We do exactly the same test as before, but now we completely use the sorted version
Point<3,float> force({0.0,0.0,0.0});
calculate_force_full_sort<decltype(NN.toKernel())><<<it5.wthr,it5.thr>>>(vd.toKernel_sorted(),NN.toKernel());
auto NNc = NN_cpu.getNNIterator(NN_cpu.getCell(xp));
vd.template deviceToHostProp<1>();
while (NNc.isNext())
{
auto q = NNc.get();
test = check_force(NN_cpu,vd);
BOOST_REQUIRE_EQUAL(test,true);
if (q == p.getKey()) {++NNc; continue;}
// check
Point<3,float> xq_2 = vd.getPos(q);
Point<3,float> r2 = xq_2 - xp;
// Now we do a ghost_get from CPU
// Normalize
// Than we offload on GPU
r2 /= r2.norm();
force += vd.template getProp<0>(q)*r2;
// We construct a Cell-list
++NNc;
}
// We calculate force on CPU and GPU to check if they match
BOOST_REQUIRE_CLOSE(vd.template getProp<1>(p)[0],vd.template getProp<2>(p)[0],0.001);
BOOST_REQUIRE_CLOSE(vd.template getProp<1>(p)[1],vd.template getProp<2>(p)[1],0.001);
BOOST_REQUIRE_CLOSE(vd.template getProp<1>(p)[2],vd.template getProp<2>(p)[2],0.001);
BOOST_REQUIRE_CLOSE(vd.template getProp<1>(p)[0],force.get(0),0.01);
BOOST_REQUIRE_CLOSE(vd.template getProp<1>(p)[1],force.get(1),0.01);
BOOST_REQUIRE_CLOSE(vd.template getProp<1>(p)[2],force.get(2),0.01);
++it6;
}
}
BOOST_AUTO_TEST_SUITE_END()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment