Commit 91ac2cc0 authored by incardon's avatar incardon
Browse files

Adding test for convolution on GPU

parent 9a3e1191
Pipeline #1881 failed with stages
in 0 seconds
openfpm_data @ 66a3a95d
Subproject commit e91db651a61479ae33ef2a66c3cfce70315e16b6
Subproject commit 66a3a95d0f1868b244555f70dfdd579ed3f1ee1f
......@@ -209,9 +209,9 @@ class grid_dist_iterator_sub
* \return the actual key
*
*/
inline grid_dist_key_dx<dim> get()
inline grid_dist_key_dx<dim,typename device_grid::base_key> get()
{
return grid_dist_key_dx<dim>(g_c,a_it.get());
return grid_dist_key_dx<dim,typename device_grid::base_key>(g_c,a_it.get());
}
/*! \brief Convert a g_dist_key_dx into a global key
......
......@@ -2551,7 +2551,7 @@ public:
*
*/
template<unsigned int prop_src1, unsigned int prop_src2, unsigned int prop_dst1, unsigned int prop_dst2, unsigned int stencil_size, unsigned int N, typename lambda_f, typename ... ArgsT >
void conv2(int (& stencil)[N][dim], grid_key_dx<3> start, grid_key_dx<3> stop , lambda_f func, ArgsT ... args)
void conv2(int (& stencil)[N][dim], grid_key_dx<dim> start, grid_key_dx<dim> stop , lambda_f func, ArgsT ... args)
{
for (int i = 0 ; i < loc_grid.size() ; i++)
{
......@@ -2575,6 +2575,44 @@ public:
}
}
/*! \brief apply a convolution using the stencil N
*
*
*/
template<unsigned int prop_src1, unsigned int prop_src2, unsigned int prop_dst1, unsigned int prop_dst2, unsigned int stencil_size, typename lambda_f, typename ... ArgsT >
void conv2(grid_key_dx<dim> start, grid_key_dx<dim> stop , lambda_f func, ArgsT ... args)
{
for (int i = 0 ; i < loc_grid.size() ; i++)
{
Box<dim,long int> inte;
Box<dim,long int> base;
for (int j = 0 ; j < dim ; j++)
{
base.setLow(j,(long int)start.get(j) - (long int)gdb_ext.get(i).origin.get(j));
base.setHigh(j,(long int)stop.get(j) - (long int)gdb_ext.get(i).origin.get(j));
}
Box<dim,long int> dom = gdb_ext.get(i).Dbox;
bool overlap = dom.Intersect(base,inte);
if (overlap == true)
{
loc_grid.get(i).template conv2<prop_src1,prop_src2,prop_dst1,prop_dst2,stencil_size>(inte.getKP1(),inte.getKP2(),func,args...);
}
}
}
template<typename NNtype>
void findNeighbours()
{
for (int i = 0 ; i < loc_grid.size() ; i++)
{
loc_grid.get(i).findNeighbours();
}
}
/*! \brief apply a convolution using the stencil N
*
*
......
......@@ -287,9 +287,6 @@ void sgrid_ghost_get(size_t (& sz)[2],size_t (& sz2)[2])
++it2;
}
gdist.write("after_ghost");
gdist.getDecomposition().write("sgrid_dec");
BOOST_REQUIRE_EQUAL(match,true);
}
......@@ -309,5 +306,82 @@ BOOST_AUTO_TEST_CASE( sgrid_gpu_test_ghost_get )
}
BOOST_AUTO_TEST_CASE( sgrid_gpu_test_conv2_test )
{
size_t sz[2] = {164,164};
periodicity<2> bc = {PERIODIC,PERIODIC};
Ghost<2,long int> g(1);
Box<2,float> domain({0.0,0.0},{1.0,1.0});
sgrid_dist_id_gpu<2,float,aggregate<float,float,float,float>> gdist(sz,domain,g,bc);
gdist.template setBackgroundValue<0>(666);
gdist.template setBackgroundValue<1>(666);
gdist.template setBackgroundValue<2>(666);
gdist.template setBackgroundValue<3>(666);
/////// GPU insert + flush
Box<2,size_t> box({1,1},{sz[0],sz[1]});
/////// GPU Run kernel
float c = 5.0;
auto it = gdist.getGridIterator(box.getKP1(),box.getKP2());
gdist.template iterateGridGPU<insert_kernel2D<0>>(it,c);
gdist.template flush<smax_<0>>(flush_type::FLUSH_ON_DEVICE);
auto it2 = gdist.getGridIterator(box.getKP1(),box.getKP2());
gdist.template iterateGridGPU<insert_kernel2D<1>>(it2,c+1000);
gdist.template flush<smax_<0>,smax_<1>>(flush_type::FLUSH_ON_DEVICE);
gdist.template ghost_get<0,1>(RUN_ON_DEVICE);
// Now run the convolution
typedef typename GetCpBlockType<decltype(gdist),0,1>::type CpBlockType;
gdist.template conv2<0,1,2,3,1>({2,2},{(int)sz[0]-2,(int)sz[1]-2},[] __device__ (float & u_out, float & v_out, CpBlockType & u, CpBlockType & v,int i, int j){
u_out = u(i+1,j) - u(i-1,j) + u(i,j+1) - u(i,j-1);
v_out = v(i+1,j) - v(i-1,j) + v(i,j+1) - v(i,j-1);
});
gdist.deviceToHost<0,1,2,3>();
// Now we check that ghost is correct
auto it3 = gdist.getSubDomainIterator({2,2},{(int)sz[0]-2,(int)sz[1]-2});
bool match = true;
while (it3.isNext())
{
auto p = it3.get();
auto p_xp1 = p.move(0,1);
auto p_xm1 = p.move(0,-1);
auto p_yp1 = p.move(1,1);
auto p_ym1 = p.move(1,-1);
float sub1 = gdist.template get<2>(p);
float sub2 = gdist.template get<3>(p);
if (sub1 != 2.0 || sub2 != 4.0)
{
std::cout << sub1 << " " << sub2 << std::endl;
std::cout << gdist.template get<0>(p_xp1) << " " << gdist.template get<0>(p_xm1) << std::endl;
std::cout << gdist.template get<1>(p_xp1) << " " << gdist.template get<1>(p_xm1) << std::endl;
break;
}
++it3;
}
BOOST_REQUIRE_EQUAL(match,true);
}
BOOST_AUTO_TEST_SUITE_END()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment