Commit 7f2973d5 authored by incardon's avatar incardon
Browse files

ghost local particles moving on with tests

parent e0df46eb
openfpm_data @ 42183947
Subproject commit c46dc01e03b1ea215239e8cb817ebff4c61a11db
Subproject commit 42183947ec6434fa7644185690b005c494b26676
......@@ -26,8 +26,7 @@ struct ID_operation
__device__ __host__ inline void op(unsigned int base, unsigned int n, unsigned int proc_act, unsigned int shift_act, unsigned int pi)
{
output.template get<0>(base + n) = proc_act;
output.template get<1>(base + n) = pi;
output.template get<2>(base + n) = shift_act;
output.template get<1>(base + n) = (unsigned long int)pi + (((unsigned long int)shift_act) << 32);
}
};
......
......@@ -66,16 +66,10 @@ class ie_ghost
//! Cell-list that store the geometrical information of the internal ghost boxes
CellList<dim,T,Mem_fast<Memory,int>,shift<dim,T>> geo_cell;
//! Cell-list that store the geometrical information of the internal ghost boxes (on a processor based lavel)
CellList<dim,T,Mem_fast<Memory,int>,shift<dim,T>> geo_cell_proc;
typedef openfpm::vector<Box<dim,T>,Memory,typename layout_base<Box<dim,T>>::type,layout_base> proc_boxes;
//! internal ghost Boxes for each processor
openfpm::vector<aggregate<proc_boxes,int>,Memory,typename layout_base<aggregate<proc_boxes,int>>::type,layout_base> vb_int_proc;
//! shift vectors
openfpm::vector<Point<dim,T>> shifts;
openfpm::vector<Point<dim,T>,Memory,typename layout_base<Point<dim,T>>::type,layout_base> shifts;
//! Temporal buffers to return temporal information for ghost_processorID
openfpm::vector<std::pair<size_t,size_t>> ids_p;
......@@ -84,7 +78,7 @@ class ie_ghost
openfpm::vector<size_t> ids;
//! shift converter
shift_vect_converter<dim,T> sc_convert;
shift_vect_converter<dim,T,Memory,layout_base> sc_convert;
//! host to device transfer
bool host_dev_transfer = false;
......@@ -210,9 +204,6 @@ protected:
{
// Initialize the geo_cell structure
geo_cell.Initialize(domain,div,0);
// Initialize the geo_cell structure
geo_cell_proc.Initialize(domain,div,0);
}
/*! \brief Create the box_nn_processor_int (bx part) structure
......@@ -315,30 +306,6 @@ protected:
}
}
/*! \brief construct the vb_int_proc box
*
*
*/
void construct_vb_int_proc(const nn_prcs<dim,T> & nn_p)
{
vb_int_proc.resize_no_device(proc_int_box.size());
for (size_t i = 0 ; i < proc_int_box.size() ; i++)
{
vb_int_proc.template get<0>(i).resize(proc_int_box.get(i).ibx.size());
for (size_t j = 0 ; j < proc_int_box.get(i).ibx.size() ; j++)
{
for (size_t k = 0 ; k < dim ; k++)
{
vb_int_proc.template get<0>(i).template get<0>(j)[k] = proc_int_box.get(i).ibx.get(j).bx.getLow(k);
vb_int_proc.template get<0>(i).template get<1>(j)[k] = proc_int_box.get(i).ibx.get(j).bx.getHigh(k);
}
}
vb_int_proc.template get<1>(i) = nn_p.IDtoProc(i);
}
}
/*! \brief Create the box_nn_processor_int (nbx part) structure, the geo_cell list and proc_int_box
*
......@@ -470,24 +437,6 @@ protected:
geo_cell.addCell(cell,vb_int.size()-1);
// Check if p_id already exist at that cell
// and we add it only if does not exist
size_t nc = geo_cell_proc.getNelements(cell);
bool found = false;
for (size_t s = 0; s < nc ; s++)
{
if (geo_cell_proc.get(cell,s) == lc_proc)
{
found = true;
break;
}
}
if (found == false)
{geo_cell_proc.addCell(cell,lc_proc);}
++g_sub;
}
}
......@@ -495,8 +444,6 @@ protected:
}
}
construct_vb_int_proc(nn_p);
reorder_geo_cell();
}
......@@ -641,7 +588,7 @@ public:
* \return the shift vectors
*
*/
const openfpm::vector<Point<dim,T>> & getShiftVectors()
const openfpm::vector<Point<dim,T>,Memory,typename layout_base<Point<dim,T>>::type,layout_base> & getShiftVectors()
{
return shifts;
}
......@@ -1244,13 +1191,9 @@ public:
if (host_dev_transfer == false)
{
geo_cell.hostToDevice();
geo_cell_proc.hostToDevice();
vb_int_box.template hostToDevice<0,1>();
vb_int.template hostToDevice<0,1,2>();
for (size_t i = 0 ; i < vb_int_proc.size() ; i++)
{vb_int_proc.template get<0>(i). template hostToDevice<0,1>();}
vb_int_proc.template hostToDevice<0,1>();
shifts.template hostToDevice<0>();
host_dev_transfer = true;
}
......
......@@ -16,7 +16,7 @@
* handle such case
*
*/
template<unsigned int dim, typename T>
template<unsigned int dim, typename T, typename Memory, template<typename> class layout_base>
class shift_vect_converter
{
//! Indicate which indexes are non_periodic
......@@ -33,7 +33,8 @@ class shift_vect_converter
* \param domain box that describe the domain
*
*/
void generateShiftVectors_ld(const Box<dim,T> & domain, size_t (& bc)[dim], openfpm::vector<Point<dim,T>> & shifts)
void generateShiftVectors_ld(const Box<dim,T> & domain, size_t (& bc)[dim],
openfpm::vector<Point<dim,T>,Memory,typename layout_base<Point<dim,T>>::type,layout_base> & shifts)
{
shifts.resize(openfpm::math::pow(3,dim));
......@@ -69,7 +70,8 @@ class shift_vect_converter
* \param domain box that describe the domain
*
*/
void generateShiftVectors_hd(const Box<dim,T> & domain, size_t (& bc)[dim], openfpm::vector<Point<dim,T>> & shifts)
void generateShiftVectors_hd(const Box<dim,T> & domain, size_t (& bc)[dim],
openfpm::vector<Point<dim,T>,Memory,typename layout_base<Point<dim,T>>::type,layout_base> & shifts)
{
// get the indexes of the free degree of freedom
for (size_t i = 0 ; i < dim ; i++)
......@@ -123,7 +125,8 @@ public:
* \param domain box that describe the domain
*
*/
void generateShiftVectors(const Box<dim,T> & domain, size_t (& bc)[dim], openfpm::vector<Point<dim,T>> & shifts)
void generateShiftVectors(const Box<dim,T> & domain, size_t (& bc)[dim],
openfpm::vector<Point<dim,T>,Memory,typename layout_base<Point<dim,T>>::type,layout_base> & shifts)
{
if (dim < 10)
{generateShiftVectors_ld(domain,bc,shifts);}
......
......@@ -18,7 +18,7 @@ BOOST_AUTO_TEST_CASE( shift_vect_converter_tests_use )
{
{
Box<3,double> domain({0.0,0.0,0.0},{1.0,1.0,1.0});
shift_vect_converter<3,double> svc;
shift_vect_converter<3,double,HeapMemory,memory_traits_lin> svc;
size_t bc[3] = {PERIODIC,PERIODIC,PERIODIC};
openfpm::vector<Point<3,double>> sv;
......@@ -68,7 +68,7 @@ BOOST_AUTO_TEST_CASE( shift_vect_converter_tests_use )
bc[17] = PERIODIC;
bc[23] = PERIODIC;
shift_vect_converter<50,double> svc;
shift_vect_converter<50,double,HeapMemory,memory_traits_lin> svc;
svc.generateShiftVectors(domain,bc,sv);
......
......@@ -6,11 +6,360 @@
#include "Vector/util/vector_dist_funcs.hpp"
#include "Decomposition/CartDecomposition.hpp"
#include "util/cuda/scan_cuda.cuh"
#include "util/cuda/moderngpu/kernel_scan.hxx"
#define SUB_UNIT_FACTOR 1024
BOOST_AUTO_TEST_SUITE( vector_dist_gpu_util_func_test )
BOOST_AUTO_TEST_CASE( vector_ghost_process_local_particles )
{
typedef aggregate<float,float[3],float[3][3]> prop;
openfpm::vector_gpu<prop> v_prp;
v_prp.resize(10000);
openfpm::vector_gpu<Point<3,float>> v_pos;
v_pos.resize(10000);
openfpm::vector_gpu<aggregate<unsigned int,unsigned int>> o_part_loc;
for (size_t i = 0 ; i < v_prp.size() ; i++)
{
v_pos.template get<0>(i)[0] = (float)rand()/RAND_MAX;
v_pos.template get<0>(i)[1] = (float)rand()/RAND_MAX;
v_pos.template get<0>(i)[2] = (float)rand()/RAND_MAX;
v_prp.template get<0>(i) = i+12345;
v_prp.template get<1>(i)[0] = i;
v_prp.template get<1>(i)[1] = i+20000;
v_prp.template get<1>(i)[2] = i+50000;
v_prp.template get<2>(i)[0][0] = i+60000;
v_prp.template get<2>(i)[0][1] = i+70000;
v_prp.template get<2>(i)[0][2] = i+80000;
v_prp.template get<2>(i)[1][0] = i+90000;
v_prp.template get<2>(i)[1][1] = i+100000;
v_prp.template get<2>(i)[1][2] = i+110000;
v_prp.template get<2>(i)[2][0] = i+120000;
v_prp.template get<2>(i)[2][1] = i+130000;
}
openfpm::vector_gpu<Box<3,float>> box_f_dev;
openfpm::vector_gpu<aggregate<unsigned int>> box_f_sv;
box_f_dev.resize(4);
box_f_sv.resize(4);
box_f_dev.template get<0>(0)[0] = 0.0;
box_f_dev.template get<0>(0)[1] = 0.0;
box_f_dev.template get<0>(0)[2] = 0.0;
box_f_dev.template get<1>(0)[0] = 0.5;
box_f_dev.template get<1>(0)[1] = 1.0;
box_f_dev.template get<1>(0)[2] = 1.0;
box_f_sv.template get<0>(0) = 0;
box_f_dev.template get<0>(1)[0] = 0.0;
box_f_dev.template get<0>(1)[1] = 0.0;
box_f_dev.template get<0>(1)[2] = 0.0;
box_f_dev.template get<1>(1)[0] = 0.3;
box_f_dev.template get<1>(1)[1] = 1.0;
box_f_dev.template get<1>(1)[2] = 1.0;
box_f_sv.template get<0>(1) = 1;
box_f_dev.template get<0>(2)[0] = 0.0;
box_f_dev.template get<0>(2)[1] = 0.0;
box_f_dev.template get<0>(2)[2] = 0.0;
box_f_dev.template get<1>(2)[0] = 0.2;
box_f_dev.template get<1>(2)[1] = 1.0;
box_f_dev.template get<1>(2)[2] = 1.0;
box_f_sv.template get<0>(2) = 2;
box_f_dev.template get<0>(3)[0] = 0.0;
box_f_dev.template get<0>(3)[1] = 0.0;
box_f_dev.template get<0>(3)[2] = 0.0;
box_f_dev.template get<1>(3)[0] = 0.1;
box_f_dev.template get<1>(3)[1] = 1.0;
box_f_dev.template get<1>(3)[2] = 1.0;
box_f_sv.template get<0>(3) = 3;
// Label the internal (assigned) particles
auto ite = v_pos.getGPUIteratorTo(v_pos.size());
o_part_loc.resize(v_pos.size()+1);
o_part_loc.template get<0>(o_part_loc.size()-1) = 0;
o_part_loc.template hostToDevice<0>(o_part_loc.size()-1,o_part_loc.size()-1);
box_f_dev.hostToDevice<0,1>();
box_f_sv.hostToDevice<0>();
v_pos.hostToDevice<0>();
v_prp.hostToDevice<0,1,2>();
// label particle processor
num_shift_ghost_each_part<3,float,decltype(box_f_dev.toKernel()),decltype(v_pos.toKernel()),decltype(o_part_loc.toKernel())>
<<<ite.wthr,ite.thr>>>
(box_f_dev.toKernel(),v_pos.toKernel(),o_part_loc.toKernel());
o_part_loc.deviceToHost<0>();
bool match = true;
for (size_t i = 0 ; i < v_pos.size() ; i++)
{
if (v_pos.template get<0>(i)[0] >= 0.5)
{match &= o_part_loc.template get<0>(i) == 0;}
else if (v_pos.template get<0>(i)[0] >= 0.3)
{match &= o_part_loc.template get<0>(i) == 1;}
else if (v_pos.template get<0>(i)[0] >= 0.2)
{match &= o_part_loc.template get<0>(i) == 2;}
else if (v_pos.template get<0>(i)[0] >= 0.1)
{match &= o_part_loc.template get<0>(i) == 3;}
else
{match &= o_part_loc.template get<0>(i) == 4;}
}
BOOST_REQUIRE_EQUAL(match,true);
openfpm::vector_gpu<aggregate<unsigned int>> starts;
starts.resize(o_part_loc.size());
auto & v_cl = create_vcluster();
mgpu::scan((unsigned int *)o_part_loc.template getDeviceBuffer<0>(), o_part_loc.size(), (unsigned int *)starts.template getDeviceBuffer<0>() , v_cl.getmgpuContext());
starts.deviceToHost<0>(starts.size()-1,starts.size()-1);
size_t tot = starts.template get<0>(o_part_loc.size()-1);
openfpm::vector<Point<3,float>,CudaMemory,typename memory_traits_inte<Point<3,float>>::type,memory_traits_inte> shifts;
shifts.resize(4);
shifts.template get<0>(0)[0] = 10.0;
shifts.template get<0>(0)[1] = 0.0;
shifts.template get<0>(0)[2] = 0.0;
shifts.template get<0>(1)[0] = 20.0;
shifts.template get<0>(1)[1] = 0.0;
shifts.template get<0>(1)[2] = 0.0;
shifts.template get<0>(2)[0] = 30.0;
shifts.template get<0>(2)[1] = 0.0;
shifts.template get<0>(2)[2] = 0.0;
shifts.template get<0>(3)[0] = 40.0;
shifts.template get<0>(3)[1] = 0.0;
shifts.template get<0>(3)[2] = 0.0;
size_t old = v_pos.size();
v_pos.resize(v_pos.size() + tot);
shifts.template hostToDevice<0>();
openfpm::vector_gpu<aggregate<unsigned int,unsigned int>> o_part_loc2;
o_part_loc2.resize(tot);
shift_ghost_each_part<3,float,decltype(box_f_dev.toKernel()),decltype(box_f_sv.toKernel()),
decltype(v_pos.toKernel()),decltype(v_prp.toKernel()),
decltype(starts.toKernel()),decltype(shifts.toKernel()),
decltype(o_part_loc2.toKernel())>
<<<ite.wthr,ite.thr>>>
(box_f_dev.toKernel(),box_f_sv.toKernel(),
v_pos.toKernel(),v_prp.toKernel(),
starts.toKernel(),shifts.toKernel(),o_part_loc2.toKernel(),old);
v_pos.deviceToHost<0>();
o_part_loc2.deviceToHost<0,1>();
size_t base = old;
size_t base_o = 0;
for (size_t i = 0 ; i < old ; i++)
{
if (v_pos.template get<0>(i)[0] >= 0.5)
{}
else if (v_pos.template get<0>(i)[0] >= 0.3)
{
for (size_t j = 0 ; j < o_part_loc.template get<0>(i) ; j++)
{
match &= v_pos.template get<0>(base)[0] < 1.0 - (j+1.0)*10.0;
match &= v_pos.template get<0>(base)[0] >= -(j+1.0)*10.0;
match &= o_part_loc2.template get<0>(base_o) == i;
match &= o_part_loc2.template get<1>(base_o) == j;
base++;
base_o++;
}
}
else if (v_pos.template get<0>(i)[0] >= 0.2)
{
for (size_t j = 0 ; j < o_part_loc.template get<0>(i) ; j++)
{
match &= v_pos.template get<0>(base)[0] < 1.0 - (j+1.0)*10.0;
match &= v_pos.template get<0>(base)[0] >= -(j+1.0)*10.0;
match &= o_part_loc2.template get<0>(base_o) == i;
match &= o_part_loc2.template get<1>(base_o) == j;
base++;
base_o++;
}
}
else if (v_pos.template get<0>(i)[0] >= 0.1)
{
for (size_t j = 0 ; j < o_part_loc.template get<0>(i) ; j++)
{
match &= v_pos.template get<0>(base)[0] < 1.0 - (j+1.0)*10.0;
match &= v_pos.template get<0>(base)[0] >= -(j+1.0)*10.0;
match &= o_part_loc2.template get<0>(base_o) == i;
match &= o_part_loc2.template get<1>(base_o) == j;
base++;
base_o++;
}
}
else
{
for (size_t j = 0 ; j < o_part_loc.template get<0>(i) ; j++)
{
match &= v_pos.template get<0>(base)[0] < 1.0 - (j+1.0)*10.0;
match &= v_pos.template get<0>(base)[0] >= -(j+1.0)*10.0;
match &= o_part_loc2.template get<0>(base_o) == i;
match &= o_part_loc2.template get<1>(base_o) == j;
base++;
base_o++;
}
}
}
BOOST_REQUIRE_EQUAL(match,true);
}
BOOST_AUTO_TEST_CASE( vector_ghost_fill_send_buffer_test )
{
typedef aggregate<float,float[3],float[3][3]> prop;
// Sending property object
typedef object<typename object_creator<typename prop::type, 0,1,2>::type> prp_object;
// send vector for each processor
typedef openfpm::vector<prp_object,CudaMemory,typename memory_traits_inte<prp_object>::type,memory_traits_inte> send_vector;
openfpm::vector<send_vector> g_send_prp;
auto & v_cl = create_vcluster();
// Vcluster
Vcluster<> & vcl = create_vcluster();
openfpm::vector_gpu<prop> v_prp;
v_prp.resize(10000);
openfpm::vector_gpu<aggregate<unsigned int,unsigned int,unsigned int>> g_opart_device;
for (size_t i = 0 ; i < v_prp.size() ; i++)
{
v_prp.template get<0>(i) = i+12345;
v_prp.template get<1>(i)[0] = i;
v_prp.template get<1>(i)[1] = i+20000;
v_prp.template get<1>(i)[2] = i+50000;
v_prp.template get<2>(i)[0][0] = i+60000;
v_prp.template get<2>(i)[0][1] = i+70000;
v_prp.template get<2>(i)[0][2] = i+80000;
v_prp.template get<2>(i)[1][0] = i+90000;
v_prp.template get<2>(i)[1][1] = i+100000;
v_prp.template get<2>(i)[1][2] = i+110000;
v_prp.template get<2>(i)[2][0] = i+120000;
v_prp.template get<2>(i)[2][1] = i+130000;
v_prp.template get<2>(i)[2][2] = i+140000;
}
v_prp.hostToDevice<0,1,2>();
g_opart_device.resize(2*10000*3);
for (size_t i = 0 ; i < 3 ; i++)
{
for (size_t j = 0 ; j < 10000 ; j++)
{
g_opart_device.template get<0>(i*2*10000 + j*2) = i;
g_opart_device.template get<0>(i*2*10000 + j*2+1) = i;
g_opart_device.template get<1>(i*2*10000 + j*2) = j;
g_opart_device.template get<1>(i*2*10000 + j*2+1) = j;
g_opart_device.template get<2>(i*2*10000 + j*2) = 0;
g_opart_device.template get<2>(i*2*10000 + j*2+1) = 0;
}
}
g_opart_device.hostToDevice<0,1,2>();
g_send_prp.resize(3);
bool match = true;
size_t offset = 0;
for (size_t i = 0 ; i < 3 ; i++)
{
g_send_prp.get(i).resize(2*10000);
auto ite = g_send_prp.get(i).getGPUIterator();
process_ghost_particles_prp<decltype(g_opart_device.toKernel()),decltype(g_send_prp.get(i).toKernel()),decltype(v_prp.toKernel()),0,1,2>
<<<ite.wthr,ite.thr>>>
(g_opart_device.toKernel(), g_send_prp.get(i).toKernel(),
v_prp.toKernel(),offset);
offset += g_send_prp.get(i).size();
///////////// TEST ////////////
g_send_prp.get(i).deviceToHost<0,1,2>();
for (size_t j = 0 ; j < 10000 ; j++)
{
match &= g_send_prp.get(i).template get<0>(2*j) == j+12345;
match &= g_send_prp.get(i).template get<1>(2*j)[0] == j;
match &= g_send_prp.get(i).template get<1>(2*j)[1] == j+20000;
match &= g_send_prp.get(i).template get<1>(2*j)[2] == j+50000;
match &= g_send_prp.get(i).template get<2>(2*j)[0][0] == j+60000;
match &= g_send_prp.get(i).template get<2>(2*j)[0][1] == j+70000;
match &= g_send_prp.get(i).template get<2>(2*j)[0][2] == j+80000;
match &= g_send_prp.get(i).template get<2>(2*j)[1][0] == j+90000;
match &= g_send_prp.get(i).template get<2>(2*j)[1][1] == j+100000;
match &= g_send_prp.get(i).template get<2>(2*j)[1][2] == j+110000;
match &= g_send_prp.get(i).template get<2>(2*j)[2][0] == j+120000;
match &= g_send_prp.get(i).template get<2>(2*j)[2][1] == j+130000;
match &= g_send_prp.get(i).template get<2>(2*j)[2][2] == j+140000;
match = g_send_prp.get(i).template get<0>(2*j+1) == j+12345;
match = g_send_prp.get(i).template get<1>(2*j+1)[0] == j;
match = g_send_prp.get(i).template get<1>(2*j+1)[1] == j+20000;
match = g_send_prp.get(i).template get<1>(2*j+1)[2] == j+50000;
match = g_send_prp.get(i).template get<2>(2*j+1)[0][0] == j+60000;
match = g_send_prp.get(i).template get<2>(2*j+1)[0][1] == j+70000;
match = g_send_prp.get(i).template get<2>(2*j+1)[0][2] == j+80000;
match = g_send_prp.get(i).template get<2>(2*j+1)[1][0] == j+90000;
match = g_send_prp.get(i).template get<2>(2*j+1)[1][1] == j+100000;
match = g_send_prp.get(i).template get<2>(2*j+1)[1][2] == j+110000;
match = g_send_prp.get(i).template get<2>(2*j+1)[2][0] == j+120000;
match = g_send_prp.get(i).template get<2>(2*j+1)[2][1] == j+130000;
match = g_send_prp.get(i).template get<2>(2*j+1)[2][2] == j+140000;
}
}
BOOST_REQUIRE_EQUAL(match,true);
}
BOOST_AUTO_TEST_CASE( decomposition_ie_ghost_gpu_test_use )
{
auto & v_cl = create_vcluster();
......@@ -114,9 +463,9 @@ BOOST_AUTO_TEST_CASE( decomposition_ie_ghost_gpu_test_use )
///////////////////////// we collect the processor and shift id //////////////////////////
openfpm::vector<aggregate<unsigned int,unsigned int,unsigned int>,
openfpm::vector<aggregate<unsigned int,long unsigned int>,
CudaMemory,
typename memory_traits_inte<aggregate<unsigned int,unsigned int,unsigned int>>::type,
typename memory_traits_inte<aggregate<unsigned int,long unsigned int>>::type,
memory_traits_inte> output;