Commit 1974b8bc authored by incardon's avatar incardon

Fixing GPU tests for pdata

parent 11ddfc33
Pipeline #856 failed with stages
in 23 seconds
cmake_minimum_required(VERSION 3.8 FATAL_ERROR) cmake_minimum_required(VERSION 3.8 FATAL_ERROR)
project(openfpm_pdata LANGUAGES C CXX) project(openfpm_pdata LANGUAGES C CXX)
enable_testing()
list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_DIR}/cmake_modules/) list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_DIR}/cmake_modules/)
......
...@@ -215,6 +215,7 @@ do ...@@ -215,6 +215,7 @@ do
case $ac_useropt in case $ac_useropt in
debug) debug)
conf_options="$conf_options -DCMAKE_BUILD_TYPE=Debug" conf_options="$conf_options -DCMAKE_BUILD_TYPE=Debug"
debug_mode=1
;; ;;
se_class1) se_class1)
conf_options="$conf_options -DSE_CLASS1=ON" conf_options="$conf_options -DSE_CLASS1=ON"
...@@ -524,6 +525,10 @@ Try \`$0 --help' for more information" ...@@ -524,6 +525,10 @@ Try \`$0 --help' for more information"
esac esac
done done
if [ x"$debug_mode" != x"1" ]; then
conf_options+="$conf_options -DCMAKE_BUILD_TYPE=Release"
fi
cd build cd build
## remove enerything ## remove enerything
......
cmake_minimum_required(VERSION 3.8 FATAL_ERROR) cmake_minimum_required(VERSION 3.8 FATAL_ERROR)
########################### Executables ########################### Executables
if(CUDA_FOUND) if(CUDA_FOUND)
...@@ -16,6 +17,9 @@ endif() ...@@ -16,6 +17,9 @@ endif()
add_library(ofpm_pdata STATIC lib/pdata.cpp) add_library(ofpm_pdata STATIC lib/pdata.cpp)
add_test(NAME pdata_3_proc COMMAND mpirun -np 3 ./pdata)
add_test(NAME pdata_4_proc COMMAND mpirun -np 4 ./pdata)
########################### ###########################
if (CUDA_FOUND) if (CUDA_FOUND)
......
...@@ -105,7 +105,7 @@ public: ...@@ -105,7 +105,7 @@ public:
} }
CartDecomposition_gpu(const CartDecomposition_gpu<dim,T,Memory,layout_base> & dec) CartDecomposition_gpu(const CartDecomposition_gpu<dim,T,Memory,layout_base> & dec)
:ie_ghost_gpu<dim,T,Memory,layout_base>(dec),clk(dec.clk),domain(dec.domain) :ie_ghost_gpu<dim,T,Memory,layout_base>(dec),clk(dec.clk),domain(dec.domain),sub_domains_global(dec.sub_domains_global)
{ {
for (int s = 0 ; s < dim ; s++) for (int s = 0 ; s < dim ; s++)
{this->bc[s] = dec.bc[s];} {this->bc[s] = dec.bc[s];}
......
...@@ -41,7 +41,7 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_check_cross_consistency_between_proc_idb ...@@ -41,7 +41,7 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_check_cross_consistency_between_proc_idb
// Vcluster // Vcluster
Vcluster<> & vcl = create_vcluster(); Vcluster<> & vcl = create_vcluster();
CartDecomposition<3, double> dec(vcl); CartDecomposition<3, double, CudaMemory,memory_traits_inte> dec(vcl);
size_t bc[3] = {PERIODIC,PERIODIC,PERIODIC}; size_t bc[3] = {PERIODIC,PERIODIC,PERIODIC};
...@@ -87,6 +87,8 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_check_cross_consistency_between_proc_idb ...@@ -87,6 +87,8 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_check_cross_consistency_between_proc_idb
mem2.allocate(2*sizeof(unsigned int)); mem2.allocate(2*sizeof(unsigned int));
test_ghost_n<decltype(gpudec)><<<1,1>>>(p1,p2,gpudec,(unsigned int *)mem2.getDevicePointer()); test_ghost_n<decltype(gpudec)><<<1,1>>>(p1,p2,gpudec,(unsigned int *)mem2.getDevicePointer());
mem2.deviceToHost();
unsigned int tot = ((unsigned int *)mem2.getPointer())[0] + ((unsigned int *)mem2.getPointer())[1]; unsigned int tot = ((unsigned int *)mem2.getPointer())[0] + ((unsigned int *)mem2.getPointer())[1];
openfpm::vector_gpu<aggregate<int,int>> vd; openfpm::vector_gpu<aggregate<int,int>> vd;
...@@ -119,12 +121,16 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_check_cross_consistency_between_proc_idb ...@@ -119,12 +121,16 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_check_cross_consistency_between_proc_idb
test_proc_idbc<decltype(gpudec)><<<1,1>>>(p1,p2,gpudec,(unsigned int *)mem.getDevicePointer()); test_proc_idbc<decltype(gpudec)><<<1,1>>>(p1,p2,gpudec,(unsigned int *)mem.getDevicePointer());
mem.deviceToHost();
BOOST_REQUIRE(((unsigned int *)mem.getPointer())[0] < vcl.size()); BOOST_REQUIRE(((unsigned int *)mem.getPointer())[0] < vcl.size());
BOOST_REQUIRE(((unsigned int *)mem.getPointer())[1] < vcl.size()); BOOST_REQUIRE(((unsigned int *)mem.getPointer())[1] < vcl.size());
mem2.allocate(2*sizeof(unsigned int)); mem2.allocate(2*sizeof(unsigned int));
test_ghost_n<decltype(gpudec)><<<1,1>>>(p1,p2,gpudec,(unsigned int *)mem2.getDevicePointer()); test_ghost_n<decltype(gpudec)><<<1,1>>>(p1,p2,gpudec,(unsigned int *)mem2.getDevicePointer());
mem2.deviceToHost();
tot = ((unsigned int *)mem2.getPointer())[0] + ((unsigned int *)mem2.getPointer())[1]; tot = ((unsigned int *)mem2.getPointer())[0] + ((unsigned int *)mem2.getPointer())[1];
vd.resize(tot); vd.resize(tot);
...@@ -132,7 +138,7 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_check_cross_consistency_between_proc_idb ...@@ -132,7 +138,7 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_check_cross_consistency_between_proc_idb
if (((unsigned int *)mem.getPointer())[0] != ((unsigned int *)mem.getPointer())[1]) if (((unsigned int *)mem.getPointer())[0] != ((unsigned int *)mem.getPointer())[1])
{ {
if (vcl.rank() == ((unsigned int *)mem.getPointer())[2]) if (vcl.rank() == ((unsigned int *)mem.getPointer())[1])
{ {
BOOST_REQUIRE(((unsigned int *)mem2.getPointer())[1] != 0); BOOST_REQUIRE(((unsigned int *)mem2.getPointer())[1] != 0);
BOOST_REQUIRE(((unsigned int *)mem2.getPointer())[0] == 0); BOOST_REQUIRE(((unsigned int *)mem2.getPointer())[0] == 0);
......
...@@ -448,12 +448,17 @@ protected: ...@@ -448,12 +448,17 @@ protected:
reorder_geo_cell(); reorder_geo_cell();
} }
/*! \brief in this function we reorder the cells by processors /*! \brief in this function we reorder the list in each cells by processor id
* *
* In practice every processor in the list is ordered. the geo_cell give * suppose in one cell we have 7 boxes each box contain the processor id
* *
* 7 boxes the first 2 boxes are related to processor 0 and the next 2 to processor 4, the other 3 must me related * 1,5,9,5,1,1,6
* to another processor different from 0 and 4. This simplify the procedure to get a unique list of processor ids *
* after reorder we have the following sequence
*
* 1,1,1,5,5,6,9
*
* This simplify the procedure to get a unique list of processor ids
* indicating on which processor a particle must be replicated as ghost * indicating on which processor a particle must be replicated as ghost
* *
*/ */
......
...@@ -108,29 +108,39 @@ struct labelParticlesGhost_impl<dim,St,prop,Memory,layout_base,Decomposition,sca ...@@ -108,29 +108,39 @@ struct labelParticlesGhost_impl<dim,St,prop,Memory,layout_base,Decomposition,sca
mem.fill(0); mem.fill(0);
prc_offset.resize(v_cl.size()); prc_offset.resize(v_cl.size());
ite = g_opart_device.getGPUIterator();
// Find the buffer bases // Find the buffer bases
find_buffer_offsets<0,decltype(g_opart_device.toKernel()),decltype(prc_offset.toKernel())><<<ite.wthr,ite.thr>>> find_buffer_offsets<0,decltype(g_opart_device.toKernel()),decltype(prc_offset.toKernel())><<<ite.wthr,ite.thr>>>
(g_opart_device.toKernel(),(int *)mem.getDevicePointer(),prc_offset.toKernel()); (g_opart_device.toKernel(),(int *)mem.getDevicePointer(),prc_offset.toKernel());
// Trasfer the number of offsets on CPU // Trasfer the number of offsets on CPU
mem.deviceToHost(); mem.deviceToHost();
prc_offset.template deviceToHost<0,1>();
if (g_opart_device.size() != 0)
{g_opart_device.template deviceToHost<0>(g_opart_device.size()-1,g_opart_device.size()-1);}
int noff = *(int *)mem.getPointer(); int noff = *(int *)mem.getPointer();
// In this case we do not have communications at all // create the terminal of prc_offset
if (g_opart_device.size() == 0) prc_offset.resize(noff+1,DATA_ON_DEVICE);
{noff = -1;}
prc_offset.resize(noff+1); // Move the last processor index on device (id)
if (g_opart_device.size() != 0)
{g_opart_device.template deviceToHost<0>(g_opart_device.size()-1,g_opart_device.size()-1);}
prc_offset.template get<0>(prc_offset.size()-1) = g_opart_device.size(); prc_offset.template get<0>(prc_offset.size()-1) = g_opart_device.size();
if (g_opart_device.size() != 0) if (g_opart_device.size() != 0)
{prc_offset.template get<1>(prc_offset.size()-1) = g_opart_device.template get<0>(g_opart_device.size()-1);} {prc_offset.template get<1>(prc_offset.size()-1) = g_opart_device.template get<0>(g_opart_device.size()-1);}
else else
{prc_offset.template get<1>(prc_offset.size()-1) = 0;} {prc_offset.template get<1>(prc_offset.size()-1) = 0;}
prc_offset.template hostToDevice<0,1>(prc_offset.size()-1,prc_offset.size()-1);
// Here we reorder the offsets in ascending order
mergesort((int *)prc_offset.template getDeviceBuffer<0>(),(int *)prc_offset.template getDeviceBuffer<1>(), prc_offset.size(), mgpu::template less_t<int>(), v_cl.getmgpuContext());
prc_offset.template deviceToHost<0,1>();
// In this case we do not have communications at all
if (g_opart_device.size() == 0)
{noff = -1;}
prc.resize(noff+1); prc.resize(noff+1);
prc_sz.resize(noff+1); prc_sz.resize(noff+1);
...@@ -236,9 +246,9 @@ struct local_ghost_from_dec_impl<dim,St,prop,Memory,layout_base,true> ...@@ -236,9 +246,9 @@ struct local_ghost_from_dec_impl<dim,St,prop,Memory,layout_base,true>
auto ite = v_pos.getGPUIteratorTo(g_m); auto ite = v_pos.getGPUIteratorTo(g_m);
// label particle processor // label particle processor
num_shift_ghost_each_part<dim,St,decltype(box_f_dev.toKernel()),decltype(v_pos.toKernel()),decltype(o_part_loc.toKernel())> num_shift_ghost_each_part<dim,St,decltype(box_f_dev.toKernel()),decltype(box_f_sv.toKernel()),decltype(v_pos.toKernel()),decltype(o_part_loc.toKernel())>
<<<ite.wthr,ite.thr>>> <<<ite.wthr,ite.thr>>>
(box_f_dev.toKernel(),v_pos.toKernel(),o_part_loc.toKernel()); (box_f_dev.toKernel(),box_f_sv.toKernel(),v_pos.toKernel(),o_part_loc.toKernel(),g_m);
starts.resize(o_part_loc.size()); starts.resize(o_part_loc.size());
mgpu::scan((unsigned int *)o_part_loc.template getDeviceBuffer<0>(), o_part_loc.size(), (unsigned int *)starts.template getDeviceBuffer<0>() , v_cl.getmgpuContext()); mgpu::scan((unsigned int *)o_part_loc.template getDeviceBuffer<0>(), o_part_loc.size(), (unsigned int *)starts.template getDeviceBuffer<0>() , v_cl.getmgpuContext());
......
...@@ -98,9 +98,9 @@ BOOST_AUTO_TEST_CASE( vector_ghost_process_local_particles ) ...@@ -98,9 +98,9 @@ BOOST_AUTO_TEST_CASE( vector_ghost_process_local_particles )
v_prp.hostToDevice<0,1,2>(); v_prp.hostToDevice<0,1,2>();
// label particle processor // label particle processor
num_shift_ghost_each_part<3,float,decltype(box_f_dev.toKernel()),decltype(v_pos.toKernel()),decltype(o_part_loc.toKernel())> num_shift_ghost_each_part<3,float,decltype(box_f_dev.toKernel()),decltype(box_f_sv.toKernel()),decltype(v_pos.toKernel()),decltype(o_part_loc.toKernel())>
<<<ite.wthr,ite.thr>>> <<<ite.wthr,ite.thr>>>
(box_f_dev.toKernel(),v_pos.toKernel(),o_part_loc.toKernel()); (box_f_dev.toKernel(),box_f_sv.toKernel(),v_pos.toKernel(),o_part_loc.toKernel(),v_pos.size());
o_part_loc.deviceToHost<0>(); o_part_loc.deviceToHost<0>();
...@@ -734,11 +734,16 @@ BOOST_AUTO_TEST_CASE( decomposition_to_gpu_test_use ) ...@@ -734,11 +734,16 @@ BOOST_AUTO_TEST_CASE( decomposition_to_gpu_test_use )
proc_id_out.resize(vg.size()); proc_id_out.resize(vg.size());
openfpm::vector_gpu<aggregate<int,int,int>> dev_counter; openfpm::vector_gpu<aggregate<int,int,int>> dev_counter;
dev_counter.resize(10);
dev_counter.fill<0>(0);
dev_counter.fill<1>(0);
dev_counter.fill<2>(0);
process_id_proc_each_part<3,float,decltype(dec.toKernel()),decltype(vg.toKernel()),decltype(proc_id_out.toKernel()),decltype(dev_counter.toKernel())> process_id_proc_each_part<3,float,decltype(dec.toKernel()),decltype(vg.toKernel()),decltype(proc_id_out.toKernel()),decltype(dev_counter.toKernel())>
<<<ite.wthr,ite.thr>>> <<<ite.wthr,ite.thr>>>
(dec.toKernel(),vg.toKernel(),proc_id_out.toKernel(),dev_counter.toKernel(),v_cl.rank()); (dec.toKernel(),vg.toKernel(),proc_id_out.toKernel(),dev_counter.toKernel(),v_cl.rank());
proc_id_out.deviceToHost<0>(); proc_id_out.deviceToHost<0>();
bool match = true; bool match = true;
...@@ -852,33 +857,33 @@ BOOST_AUTO_TEST_CASE(vector_dist_merge_sort) ...@@ -852,33 +857,33 @@ BOOST_AUTO_TEST_CASE(vector_dist_merge_sort)
for (int i = 0 ; i < 10000 ; i++) // <------ particle id for (int i = 0 ; i < 10000 ; i++) // <------ particle id
{ {
v_pos.template get<0>(i)[0] = i; v_pos_out.template get<0>(i)[0] = i;
v_pos.template get<0>(i)[1] = i+10000; v_pos_out.template get<0>(i)[1] = i+10000;
v_pos.template get<0>(i)[2] = i+20000; v_pos_out.template get<0>(i)[2] = i+20000;
v_prp.template get<0>(i)[0] = i+60123; v_prp_out.template get<0>(i)[0] = i+60123;
v_prp.template get<0>(i)[1] = i+73543; v_prp_out.template get<0>(i)[1] = i+73543;
v_prp.template get<0>(i)[2] = i+82432; v_prp_out.template get<0>(i)[2] = i+82432;
v_prp.template get<1>(i)[0] = i+80123; v_prp_out.template get<1>(i)[0] = i+80123;
v_prp.template get<1>(i)[1] = i+93543; v_prp_out.template get<1>(i)[1] = i+93543;
v_prp.template get<1>(i)[2] = i+102432; v_prp_out.template get<1>(i)[2] = i+102432;
v_prp.template get<2>(i)[0] = i+110123; v_prp_out.template get<2>(i)[0] = i+110123;
v_prp.template get<2>(i)[1] = i+123543; v_prp_out.template get<2>(i)[1] = i+123543;
v_prp.template get<2>(i)[2] = i+132432; v_prp_out.template get<2>(i)[2] = i+132432;
v_prp_out.template get<0>(i)[0] = 0; v_prp.template get<0>(i)[0] = 0;
v_prp_out.template get<0>(i)[1] = 0; v_prp.template get<0>(i)[1] = 0;
v_prp_out.template get<0>(i)[2] = 0; v_prp.template get<0>(i)[2] = 0;
v_prp_out.template get<1>(i)[0] = 0; v_prp.template get<1>(i)[0] = 0;
v_prp_out.template get<1>(i)[1] = 0; v_prp.template get<1>(i)[1] = 0;
v_prp_out.template get<1>(i)[2] = 0; v_prp.template get<1>(i)[2] = 0;
v_prp_out.template get<2>(i)[0] = 0; v_prp.template get<2>(i)[0] = 0;
v_prp_out.template get<2>(i)[1] = 0; v_prp.template get<2>(i)[1] = 0;
v_prp_out.template get<2>(i)[2] = 0; v_prp.template get<2>(i)[2] = 0;
ns_to_s.template get<0>(i) = 10000-i-1; ns_to_s.template get<0>(i) = 10000-i-1;
} }
...@@ -895,7 +900,7 @@ BOOST_AUTO_TEST_CASE(vector_dist_merge_sort) ...@@ -895,7 +900,7 @@ BOOST_AUTO_TEST_CASE(vector_dist_merge_sort)
v_pos_out.toKernel(),v_prp_out.toKernel(), v_pos_out.toKernel(),v_prp_out.toKernel(),
ns_to_s.toKernel()); ns_to_s.toKernel());
v_prp_out.template deviceToHost<0,1,2>(); v_prp.template deviceToHost<0,1,2>();
bool match = true; bool match = true;
for (int i = 0 ; i < 10000 ; i++) // <------ particle id for (int i = 0 ; i < 10000 ; i++) // <------ particle id
...@@ -904,13 +909,13 @@ BOOST_AUTO_TEST_CASE(vector_dist_merge_sort) ...@@ -904,13 +909,13 @@ BOOST_AUTO_TEST_CASE(vector_dist_merge_sort)
match &= v_prp_out.template get<0>(10000-i-1)[1] == v_prp.template get<0>(i)[1]; match &= v_prp_out.template get<0>(10000-i-1)[1] == v_prp.template get<0>(i)[1];
match &= v_prp_out.template get<0>(10000-i-1)[2] == v_prp.template get<0>(i)[2]; match &= v_prp_out.template get<0>(10000-i-1)[2] == v_prp.template get<0>(i)[2];
match &= v_prp_out.template get<1>(10000-i-1)[0] == 0; match &= v_prp.template get<1>(10000-i-1)[0] == 0;
match &= v_prp_out.template get<1>(10000-i-1)[1] == 0; match &= v_prp.template get<1>(10000-i-1)[1] == 0;
match &= v_prp_out.template get<1>(10000-i-1)[2] == 0; match &= v_prp.template get<1>(10000-i-1)[2] == 0;
match &= v_prp_out.template get<2>(10000-i-1)[0] == 0; match &= v_prp.template get<2>(10000-i-1)[0] == 0;
match &= v_prp_out.template get<2>(10000-i-1)[1] == 0; match &= v_prp.template get<2>(10000-i-1)[1] == 0;
match &= v_prp_out.template get<2>(10000-i-1)[2] == 0; match &= v_prp.template get<2>(10000-i-1)[2] == 0;
} }
BOOST_REQUIRE_EQUAL(match,true); BOOST_REQUIRE_EQUAL(match,true);
...@@ -919,8 +924,8 @@ BOOST_AUTO_TEST_CASE(vector_dist_merge_sort) ...@@ -919,8 +924,8 @@ BOOST_AUTO_TEST_CASE(vector_dist_merge_sort)
v_pos_out.toKernel(),v_prp_out.toKernel(), v_pos_out.toKernel(),v_prp_out.toKernel(),
ns_to_s.toKernel()); ns_to_s.toKernel());
v_prp_out.template deviceToHost<0,1,2>(); v_prp.template deviceToHost<0,1,2>();
v_pos_out.template deviceToHost<0>(); v_pos.template deviceToHost<0>();
for (int i = 0 ; i < 10000 ; i++) // <------ particle id for (int i = 0 ; i < 10000 ; i++) // <------ particle id
{ {
...@@ -937,9 +942,9 @@ BOOST_AUTO_TEST_CASE(vector_dist_merge_sort) ...@@ -937,9 +942,9 @@ BOOST_AUTO_TEST_CASE(vector_dist_merge_sort)
match &= v_prp_out.template get<2>(10000-i-1)[2] == v_prp.template get<2>(i)[2]; match &= v_prp_out.template get<2>(10000-i-1)[2] == v_prp.template get<2>(i)[2];
match &= v_pos_out.template get<0>(10000-i-1)[0] == 0; match &= v_pos.template get<0>(10000-i-1)[0] == 0;
match &= v_pos_out.template get<0>(10000-i-1)[1] == 0; match &= v_pos.template get<0>(10000-i-1)[1] == 0;
match &= v_pos_out.template get<0>(10000-i-1)[2] == 0; match &= v_pos.template get<0>(10000-i-1)[2] == 0;
} }
BOOST_REQUIRE_EQUAL(match,true); BOOST_REQUIRE_EQUAL(match,true);
...@@ -948,11 +953,13 @@ BOOST_AUTO_TEST_CASE(vector_dist_merge_sort) ...@@ -948,11 +953,13 @@ BOOST_AUTO_TEST_CASE(vector_dist_merge_sort)
v_pos_out.toKernel(),v_prp_out.toKernel(), v_pos_out.toKernel(),v_prp_out.toKernel(),
ns_to_s.toKernel()); ns_to_s.toKernel());
v_prp_out.template deviceToHost<0,1,2>(); v_prp.template deviceToHost<0,1,2>();
v_pos_out.template deviceToHost<0>(); v_pos.template deviceToHost<0>();
for (int i = 0 ; i < 10000 ; i++) // <------ particle id for (int i = 0 ; i < 10000 ; i++) // <------ particle id
{ {
match &= v_prp_out.template get<0>(10000-i-1)[0] == v_prp.template get<0>(i)[0]; match &= v_prp_out.template get<0>(10000-i-1)[0] == v_prp.template get<0>(i)[0];
match &= v_prp_out.template get<0>(10000-i-1)[1] == v_prp.template get<0>(i)[1]; match &= v_prp_out.template get<0>(10000-i-1)[1] == v_prp.template get<0>(i)[1];
match &= v_prp_out.template get<0>(10000-i-1)[2] == v_prp.template get<0>(i)[2]; match &= v_prp_out.template get<0>(10000-i-1)[2] == v_prp.template get<0>(i)[2];
...@@ -1010,8 +1017,8 @@ BOOST_AUTO_TEST_CASE(vector_dist_gpu_map_fill_send_buffer_test) ...@@ -1010,8 +1017,8 @@ BOOST_AUTO_TEST_CASE(vector_dist_gpu_map_fill_send_buffer_test)
v_prp.template get<2>(i)[2][2] = 340.0 + (float)rand()/RAND_MAX; v_prp.template get<2>(i)[2][2] = 340.0 + (float)rand()/RAND_MAX;
int seg = i / 10000; int seg = i / 10000;
m_opart.template get<0>(i) = seg; m_opart.template get<1>(i) = seg;
m_opart.template get<1>(i) = (9999 - i%10000) + seg * 10000; m_opart.template get<0>(i) = (9999 - i%10000) + seg * 10000;
} }
m_pos.resize(10); m_pos.resize(10);
...@@ -1045,24 +1052,24 @@ BOOST_AUTO_TEST_CASE(vector_dist_gpu_map_fill_send_buffer_test) ...@@ -1045,24 +1052,24 @@ BOOST_AUTO_TEST_CASE(vector_dist_gpu_map_fill_send_buffer_test)
for (size_t j = 0 ; j < m_pos.get(i).size() ; j++) for (size_t j = 0 ; j < m_pos.get(i).size() ; j++)
{ {
match &= (m_pos.get(i).template get<0>(j)[0] == v_pos.template get<0>(m_opart.template get<1>(offset+j))[0]); match &= (m_pos.get(i).template get<0>(j)[0] == v_pos.template get<0>(m_opart.template get<0>(offset+j))[0]);
match &= (m_pos.get(i).template get<0>(j)[1] == v_pos.template get<0>(m_opart.template get<1>(offset+j))[1]); match &= (m_pos.get(i).template get<0>(j)[1] == v_pos.template get<0>(m_opart.template get<0>(offset+j))[1]);
match &= (m_pos.get(i).template get<0>(j)[2] == v_pos.template get<0>(m_opart.template get<1>(offset+j))[2]); match &= (m_pos.get(i).template get<0>(j)[2] == v_pos.template get<0>(m_opart.template get<0>(offset+j))[2]);
match &= (m_prp.get(i).template get<0>(j) == v_prp.template get<0>(m_opart.template get<1>(offset+j))); match &= (m_prp.get(i).template get<0>(j) == v_prp.template get<0>(m_opart.template get<0>(offset+j)));
match &= (m_prp.get(i).template get<1>(j)[0] == v_prp.template get<1>(m_opart.template get<1>(offset+j))[0]); match &= (m_prp.get(i).template get<1>(j)[0] == v_prp.template get<1>(m_opart.template get<0>(offset+j))[0]);
match &= (m_prp.get(i).template get<1>(j)[1] == v_prp.template get<1>(m_opart.template get<1>(offset+j))[1]); match &= (m_prp.get(i).template get<1>(j)[1] == v_prp.template get<1>(m_opart.template get<0>(offset+j))[1]);
match &= (m_prp.get(i).template get<2>(j)[0][0] == v_prp.template get<2>(m_opart.template get<1>(offset+j))[0][0]); match &= (m_prp.get(i).template get<2>(j)[0][0] == v_prp.template get<2>(m_opart.template get<0>(offset+j))[0][0]);
match &= (m_prp.get(i).template get<2>(j)[0][1] == v_prp.template get<2>(m_opart.template get<1>(offset+j))[0][1]); match &= (m_prp.get(i).template get<2>(j)[0][1] == v_prp.template get<2>(m_opart.template get<0>(offset+j))[0][1]);
match &= (m_prp.get(i).template get<2>(j)[0][2] == v_prp.template get<2>(m_opart.template get<1>(offset+j))[0][2]); match &= (m_prp.get(i).template get<2>(j)[0][2] == v_prp.template get<2>(m_opart.template get<0>(offset+j))[0][2]);
match &= (m_prp.get(i).template get<2>(j)[1][0] == v_prp.template get<2>(m_opart.template get<1>(offset+j))[1][0]); match &= (m_prp.get(i).template get<2>(j)[1][0] == v_prp.template get<2>(m_opart.template get<0>(offset+j))[1][0]);
match &= (m_prp.get(i).template get<2>(j)[1][1] == v_prp.template get<2>(m_opart.template get<1>(offset+j))[1][1]); match &= (m_prp.get(i).template get<2>(j)[1][1] == v_prp.template get<2>(m_opart.template get<0>(offset+j))[1][1]);
match &= (m_prp.get(i).template get<2>(j)[1][2] == v_prp.template get<2>(m_opart.template get<1>(offset+j))[1][2]); match &= (m_prp.get(i).template get<2>(j)[1][2] == v_prp.template get<2>(m_opart.template get<0>(offset+j))[1][2]);
match &= (m_prp.get(i).template get<2>(j)[2][0] == v_prp.template get<2>(m_opart.template get<1>(offset+j))[2][0]); match &= (m_prp.get(i).template get<2>(j)[2][0] == v_prp.template get<2>(m_opart.template get<0>(offset+j))[2][0]);
match &= (m_prp.get(i).template get<2>(j)[2][1] == v_prp.template get<2>(m_opart.template get<1>(offset+j))[2][1]); match &= (m_prp.get(i).template get<2>(j)[2][1] == v_prp.template get<2>(m_opart.template get<0>(offset+j))[2][1]);
match &= (m_prp.get(i).template get<2>(j)[2][2] == v_prp.template get<2>(m_opart.template get<1>(offset+j))[2][2]); match &= (m_prp.get(i).template get<2>(j)[2][2] == v_prp.template get<2>(m_opart.template get<0>(offset+j))[2][2]);
} }
BOOST_REQUIRE_EQUAL(match,true); BOOST_REQUIRE_EQUAL(match,true);
......
...@@ -63,7 +63,7 @@ __global__ void merge_sort_part(vector_pos_type vd_pos, vector_prp_type vd_prp, ...@@ -63,7 +63,7 @@ __global__ void merge_sort_part(vector_pos_type vd_pos, vector_prp_type vd_prp,
vd_pos.template set<0>(p,v_pos_ord,nss.template get<0>(p)); vd_pos.template set<0>(p,v_pos_ord,nss.template get<0>(p));
} }
vd_prp.template set<prp...>(p,vd_prp_ord,nss.template get<0>(p)); vd_prp.template set<prp ...>(p,vd_prp_ord,nss.template get<0>(p));
} }
template<unsigned int dim, typename St, typename cartdec_gpu, typename particles_type, typename vector_out, typename prc_sz_type> template<unsigned int dim, typename St, typename cartdec_gpu, typename particles_type, typename vector_out, typename prc_sz_type>
...@@ -99,7 +99,7 @@ __global__ void find_buffer_offsets(vector_type vd, int * cnt, vector_type_offs ...@@ -99,7 +99,7 @@ __global__ void find_buffer_offsets(vector_type vd, int * cnt, vector_type_offs
{ {
int i = atomicAdd(cnt, 1); int i = atomicAdd(cnt, 1);
offs.template get<0>(i) = p+1; offs.template get<0>(i) = p+1;
offs.template get<1>(i) = vd.template get<1>(p); offs.template get<1>(i) = vd.template get<prp_off>(p);
} }
} }
...@@ -184,12 +184,13 @@ __global__ void process_ghost_particles_local(vector_g_opart_type g_opart, vecto ...@@ -184,12 +184,13 @@ __global__ void process_ghost_particles_local(vector_g_opart_type g_opart, vecto
v_prp.set(base+i,v_prp.get(pid)); v_prp.set(base+i,v_prp.get(pid));
} }
template<unsigned int dim, typename St, typename vector_of_box, typename vector_type, typename output_type>