Commit acabde02 authored by incardon's avatar incardon

SparseGridGpu integration work, starting ghost_get

parent 94fea38d
......@@ -78,14 +78,17 @@ int main(int argc, char* argv[])
++it;
}
/* coverty[fun_call_w_exception] */
vd.write("Vector/vector_before_map",CSV_WRITER);
vd.map();
/* coverty[fun_call_w_exception] */
vd.write("Vector/vector_after_map",CSV_WRITER);
vd.ghost_get<0>();
/* coverty[fun_call_w_exception] */
vd.write("Vector/vector_ghost_fill",CSV_WRITER);
vd.getDecomposition().write("Vector/vect_decomposition");
......
openfpm_data @ b4e88176
Subproject commit 1235bd2fee3b8e37245bc7b8b4cf509242202a1a
Subproject commit b4e88176c97436ccca4a5b1d47d8a43f59946418
......@@ -108,6 +108,8 @@ struct Box_sub
{
r_sub = (size_t)-1;
cmb.zero();
sub = (size_t)-1;
}
};
......
......@@ -51,7 +51,7 @@ __device__ __host__ inline int processorID_impl(T2 & p,
#endif
/* coverty[negative_returns] */
// coverty[negative_returns]
return sub_domains_global.template get<1>(e);
}
......
......@@ -52,6 +52,17 @@ struct ite_gpu_dist
{return;}
#define GRID_ID_2_GLOBAL(ite_gpu) grid_key_dx<2,int> key;\
grid_key_dx<2,int> keyg;\
key.set_d(0,threadIdx.x + blockIdx.x * blockDim.x + ite_gpu.start.get(0));\
key.set_d(1,threadIdx.y + blockIdx.y * blockDim.y + ite_gpu.start.get(1));\
\
keyg.set_d(0,key.get(0) + ite_gpu.origin.get(0));\
keyg.set_d(1,key.get(1) + ite_gpu.origin.get(1));\
\
if (key.get(0) > ite_gpu.stop.get(0) || key.get(1) > ite_gpu.stop.get(1))\
{return;}
#endif
template<typename grid_type, typename ite_gpu_type,typename func_t,typename ... args_t>
......
......@@ -736,10 +736,6 @@ class grid_dist_id : public grid_dist_id_comm<dim,St,T,Decomposition,Memory,devi
pib.bid.last().sub_gdb_ext = j;
pib.bid.last().sub = i;
// if (use_bx_def == true)
// {pib.bid.last().k = -1;}
// else
// {pib.bid.last().k = dec.getLocalIGhostE(i,j);}
// these ghost always in the quadrant zero
pib.bid.last().cmb.zero();
......@@ -2711,7 +2707,10 @@ public:
ite_gpu_dist<dim> itd = ite;
for (int j = 0 ; j < dim ; j++)
{itd.origin.set_d(j,gdb_ext.get(i).origin.get(j));}
{
itd.origin.set_d(j,gdb_ext.get(i).origin.get(j));
itd.start_base.set_d(j,0);
}
grid_apply_functor<<<ite.wthr,ite.thr>>>(loc_grid.get(i).toKernel(),itd,func_t(),args...);
......
......@@ -969,7 +969,7 @@ public:
size_t req = 0;
// Create a packing request vector
// Calculating the size to pack all the data to send
for ( size_t i = 0 ; i < ig_box.size() ; i++ )
{
// for each ghost box
......@@ -986,12 +986,12 @@ public:
g_ig_box -= gdb_ext.get(sub_id).origin.template convertPoint<size_t>();
// Pack a size_t for the internal ghost id
Packer<size_t,HeapMemory>::packRequest(req);
Packer<size_t,Memory>::packRequest(req);
// Create a sub grid iterator spanning the internal ghost layer
auto sub_it = loc_grid.get(sub_id).getIterator(g_ig_box.getKP1(),g_ig_box.getKP2());
// and pack the internal ghost grid
Packer<device_grid,HeapMemory>::template packRequest<decltype(sub_it),prp...>(loc_grid.get(sub_id),sub_it,req);
// get the size to pack
Packer<device_grid,Memory>::template packRequest<decltype(sub_it),prp...>(loc_grid.get(sub_id),sub_it,req);
}
}
......@@ -1029,11 +1029,11 @@ public:
size_t g_id = ig_box.get(i).bid.get(j).g_id;
// Pack a size_t for the internal ghost id
Packer<size_t,HeapMemory>::pack(prAlloc_prp,g_id,sts);
Packer<size_t,Memory>::pack(prAlloc_prp,g_id,sts);
// Create a sub grid iterator spanning the internal ghost layer
auto sub_it = loc_grid.get(sub_id).getIterator(g_ig_box.getKP1(),g_ig_box.getKP2());
// and pack the internal ghost grid
Packer<device_grid,HeapMemory>::template pack<decltype(sub_it),prp...>(prAlloc_prp,loc_grid.get(sub_id),sub_it,sts);
Packer<device_grid,Memory>::template pack<decltype(sub_it),prp...>(prAlloc_prp,loc_grid.get(sub_id),sub_it,sts);
}
// send the request
......
......@@ -6,66 +6,38 @@
BOOST_AUTO_TEST_SUITE( sgrid_gpu_test_suite )
template<unsigned int p>
struct insert_kernel
struct insert_kernel2D
{
template<typename SparseGridGpu_type, typename ite_type>
__device__ void operator()(SparseGridGpu_type & sg, ite_type & ite, float c)
{
GRID_ID_2_GLOBAL(ite);
sg.init();
const auto bDimX = blockDim.x;
const auto bDimY = blockDim.y;
const auto bDimZ = blockDim.z;
const auto bIdX = blockIdx.x;
const auto bIdY = blockIdx.y;
const auto bIdZ = blockIdx.z;
const auto tIdX = threadIdx.x;
const auto tIdY = threadIdx.y;
const auto tIdZ = threadIdx.z;
int x = bIdX * bDimX + tIdX;
int y = bIdY * bDimY + tIdY;
int z = bIdZ * bDimZ + tIdZ;
if (x+ite.start.get(0) > ite.stop.get(0))
{return;}
if (SparseGridGpu_type::d >= 2 && y+ite.start.get(1) > ite.stop.get(1))
{return;}
if (SparseGridGpu_type::d >= 3 && z+ite.start.get(2) > ite.stop.get(2))
{return;}
grid_key_dx<SparseGridGpu_type::d, size_t> coord;
grid_key_dx<SparseGridGpu_type::d, size_t> coord_glob;
if (SparseGridGpu_type::d >= 2)
{
coord.set_d(0,x+ite.start.get(0));
coord_glob.set_d(0,x+ite.start.get(0)+ite.origin.get(0));
coord.set_d(1,y+ite.start.get(1));
coord_glob.set_d(1,y+ite.start.get(1)+ite.origin.get(1));
}
else if (SparseGridGpu_type::d >= 3)
{
coord.set_d(0,x+ite.start.get(0));
coord_glob.set_d(0,x+ite.start.get(0)+ite.origin.get(0));
coord.set_d(1,y+ite.start.get(1));
coord_glob.set_d(1,y+ite.start.get(1)+ite.origin.get(1));
coord.set_d(2,z+ite.start.get(2));
coord_glob.set_d(2,z+ite.start.get(2)+ite.origin.get(2));
}
if (SparseGridGpu_type::d >= 2)
{sg.template insert<p>(coord) = c + coord_glob.get(0) + coord_glob.get(1);}
else
{sg.template insert<p>(coord) = c + coord_glob.get(0) + coord_glob.get(1) + coord_glob.get(2);}
sg.template insert<p>(key) = c + keyg.get(0) + keyg.get(1);
__syncthreads();
sg.flush_block_insert();
}
};
template<unsigned int p>
struct insert_kernel3D
{
template<typename SparseGridGpu_type, typename ite_type>
__device__ void operator()(SparseGridGpu_type & sg, ite_type & ite, float c)
{
GRID_ID_3_GLOBAL(ite);
sg.init();
sg.template insert<p>(key) = c + keyg.get(0) + keyg.get(1) + keyg.get(2);
// Compiler avoid warning
y++;
z++;
__syncthreads();
sg.flush_block_insert();
}
};
......@@ -103,7 +75,7 @@ BOOST_AUTO_TEST_CASE( sgrid_gpu_test_base )
float c = 5.0;
gdist.template iterateGridGPU<insert_kernel<0>>(it,c);
gdist.template iterateGridGPU<insert_kernel2D<0>>(it,c);
gdist.template flush<smax_<0>>(flush_type::FLUSH_ON_DEVICE);
gdist.template deviceToHost<0>();
......@@ -124,12 +96,6 @@ BOOST_AUTO_TEST_CASE( sgrid_gpu_test_base )
}
else
{
if (gdist.template get<0>(p) != 666.0)
{
float f = gdist.template get<0>(p);
std::cout << "ERROR: " << gdist.template get<0>(p) << std::endl;
}
BOOST_REQUIRE_EQUAL(gdist.template get<0>(p), 666.0);
}
......@@ -137,8 +103,6 @@ BOOST_AUTO_TEST_CASE( sgrid_gpu_test_base )
}
}
return;
//
c = 3.0;
......@@ -148,7 +112,7 @@ BOOST_AUTO_TEST_CASE( sgrid_gpu_test_base )
auto it3 = gdist.getGridIterator(box3.getKP1(),box3.getKP2());
gdist.setInsertBuffer(128);
gdist.template iterateGridGPU<insert_kernel<0>>(it3,c);
gdist.template iterateGridGPU<insert_kernel2D<0>>(it3,c);
gdist.template flush<smax_<0>>(flush_type::FLUSH_ON_DEVICE);
gdist.template deviceToHost<0>();
......@@ -166,11 +130,13 @@ BOOST_AUTO_TEST_CASE( sgrid_gpu_test_base )
if (box.isInside(p2_))
{
BOOST_REQUIRE_EQUAL(gdist.template get<0>(p), 5.0);
BOOST_REQUIRE_EQUAL(gdist.template get<0>(p), 7.0);
}
else if (box3.isInside(p2_))
{
BOOST_REQUIRE_EQUAL(gdist.template get<0>(p), 3.0);
float tst = c + p2.get(0) + p2.get(1);
BOOST_REQUIRE_EQUAL(gdist.template get<0>(p), tst);
}
else
{
......@@ -211,7 +177,7 @@ BOOST_AUTO_TEST_CASE( sgrid_gpu_test_output )
float c = 5.0;
gdist.template iterateGridGPU<insert_kernel<0>>(it,c);
gdist.template iterateGridGPU<insert_kernel2D<0>>(it,c);
gdist.template flush<smax_<0>>(flush_type::FLUSH_ON_DEVICE);
gdist.template deviceToHost<0>();
......@@ -226,4 +192,34 @@ BOOST_AUTO_TEST_CASE( sgrid_gpu_test_output )
BOOST_REQUIRE_EQUAL(true,test);
}
BOOST_AUTO_TEST_CASE( sgrid_gpu_test_ghost_get )
{
size_t sz[2] = {17,17};
periodicity<2> bc = {PERIODIC,PERIODIC};
Ghost<2,long int> g(1);
Box<2,float> domain({0.0,0.0},{1.0,1.0});
sgrid_dist_id_gpu<2,float,aggregate<float>> gdist(sz,domain,g,bc);
gdist.template setBackgroundValue<0>(666);
/////// GPU insert + flush
Box<2,size_t> box({1,1},{1,1});
auto it = gdist.getGridIterator(box.getKP1(),box.getKP2());
/////// GPU Run kernel
gdist.setInsertBuffer(1);
float c = 5.0;
gdist.template iterateGridGPU<insert_kernel2D<0>>(it,c);
gdist.template flush<smax_<0>>(flush_type::FLUSH_ON_DEVICE);
// gdist.template ghost_get<0>();
}
BOOST_AUTO_TEST_SUITE_END()
......@@ -29,20 +29,32 @@ void mp_test_template(vector_type & vd0, vector_type & vd1, vector_type & vd2, v
vd2.add();
vd3.add();
// coverty[dont_call]
vd0.getLastPos()[0] = ((float)rand())/RAND_MAX * 0.3;
// coverty[dont_call]
vd0.getLastPos()[1] = ((float)rand())/RAND_MAX * 0.3;
// coverty[dont_call]
vd0.getLastPos()[2] = ((float)rand())/RAND_MAX * 0.3;
// coverty[dont_call]
vd1.getLastPos()[0] = ((float)rand())/RAND_MAX * 0.3 + 0.1;
// coverty[dont_call]
vd1.getLastPos()[1] = ((float)rand())/RAND_MAX * 0.3 + 0.1;
// coverty[dont_call]
vd1.getLastPos()[2] = ((float)rand())/RAND_MAX * 0.3 + 0.1;
// coverty[dont_call]
vd2.getLastPos()[0] = ((float)rand())/RAND_MAX * 0.3 + 0.2;
// coverty[dont_call]
vd2.getLastPos()[1] = ((float)rand())/RAND_MAX * 0.3 + 0.2;
// coverty[dont_call]
vd2.getLastPos()[2] = ((float)rand())/RAND_MAX * 0.3 + 0.2;
// coverty[dont_call]
vd3.getLastPos()[0] = ((float)rand())/RAND_MAX * 0.3 + 0.3;
// coverty[dont_call]
vd3.getLastPos()[1] = ((float)rand())/RAND_MAX * 0.3 + 0.3;
// coverty[dont_call]
vd3.getLastPos()[2] = ((float)rand())/RAND_MAX * 0.3 + 0.3;
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment