Skip to content
Snippets Groups Projects
Commit 91ac2cc0 authored by Pietro Incardona's avatar Pietro Incardona
Browse files

Adding test for convolution on GPU

parent 9a3e1191
No related branches found
No related tags found
No related merge requests found
Pipeline #1881 failed
openfpm_data @ 66a3a95d
Subproject commit e91db651a61479ae33ef2a66c3cfce70315e16b6
Subproject commit 66a3a95d0f1868b244555f70dfdd579ed3f1ee1f
......@@ -209,9 +209,9 @@ class grid_dist_iterator_sub
* \return the actual key
*
*/
inline grid_dist_key_dx<dim> get()
inline grid_dist_key_dx<dim,typename device_grid::base_key> get()
{
return grid_dist_key_dx<dim>(g_c,a_it.get());
return grid_dist_key_dx<dim,typename device_grid::base_key>(g_c,a_it.get());
}
/*! \brief Convert a g_dist_key_dx into a global key
......
......@@ -2551,7 +2551,7 @@ public:
*
*/
template<unsigned int prop_src1, unsigned int prop_src2, unsigned int prop_dst1, unsigned int prop_dst2, unsigned int stencil_size, unsigned int N, typename lambda_f, typename ... ArgsT >
void conv2(int (& stencil)[N][dim], grid_key_dx<3> start, grid_key_dx<3> stop , lambda_f func, ArgsT ... args)
void conv2(int (& stencil)[N][dim], grid_key_dx<dim> start, grid_key_dx<dim> stop , lambda_f func, ArgsT ... args)
{
for (int i = 0 ; i < loc_grid.size() ; i++)
{
......@@ -2575,6 +2575,44 @@ public:
}
}
/*! \brief apply a convolution using the stencil N
*
*
*/
template<unsigned int prop_src1, unsigned int prop_src2, unsigned int prop_dst1, unsigned int prop_dst2, unsigned int stencil_size, typename lambda_f, typename ... ArgsT >
void conv2(grid_key_dx<dim> start, grid_key_dx<dim> stop , lambda_f func, ArgsT ... args)
{
for (int i = 0 ; i < loc_grid.size() ; i++)
{
Box<dim,long int> inte;
Box<dim,long int> base;
for (int j = 0 ; j < dim ; j++)
{
base.setLow(j,(long int)start.get(j) - (long int)gdb_ext.get(i).origin.get(j));
base.setHigh(j,(long int)stop.get(j) - (long int)gdb_ext.get(i).origin.get(j));
}
Box<dim,long int> dom = gdb_ext.get(i).Dbox;
bool overlap = dom.Intersect(base,inte);
if (overlap == true)
{
loc_grid.get(i).template conv2<prop_src1,prop_src2,prop_dst1,prop_dst2,stencil_size>(inte.getKP1(),inte.getKP2(),func,args...);
}
}
}
template<typename NNtype>
void findNeighbours()
{
for (int i = 0 ; i < loc_grid.size() ; i++)
{
loc_grid.get(i).findNeighbours();
}
}
/*! \brief apply a convolution using the stencil N
*
*
......
......@@ -287,9 +287,6 @@ void sgrid_ghost_get(size_t (& sz)[2],size_t (& sz2)[2])
++it2;
}
gdist.write("after_ghost");
gdist.getDecomposition().write("sgrid_dec");
BOOST_REQUIRE_EQUAL(match,true);
}
......@@ -309,5 +306,82 @@ BOOST_AUTO_TEST_CASE( sgrid_gpu_test_ghost_get )
}
BOOST_AUTO_TEST_CASE( sgrid_gpu_test_conv2_test )
{
size_t sz[2] = {164,164};
periodicity<2> bc = {PERIODIC,PERIODIC};
Ghost<2,long int> g(1);
Box<2,float> domain({0.0,0.0},{1.0,1.0});
sgrid_dist_id_gpu<2,float,aggregate<float,float,float,float>> gdist(sz,domain,g,bc);
gdist.template setBackgroundValue<0>(666);
gdist.template setBackgroundValue<1>(666);
gdist.template setBackgroundValue<2>(666);
gdist.template setBackgroundValue<3>(666);
/////// GPU insert + flush
Box<2,size_t> box({1,1},{sz[0],sz[1]});
/////// GPU Run kernel
float c = 5.0;
auto it = gdist.getGridIterator(box.getKP1(),box.getKP2());
gdist.template iterateGridGPU<insert_kernel2D<0>>(it,c);
gdist.template flush<smax_<0>>(flush_type::FLUSH_ON_DEVICE);
auto it2 = gdist.getGridIterator(box.getKP1(),box.getKP2());
gdist.template iterateGridGPU<insert_kernel2D<1>>(it2,c+1000);
gdist.template flush<smax_<0>,smax_<1>>(flush_type::FLUSH_ON_DEVICE);
gdist.template ghost_get<0,1>(RUN_ON_DEVICE);
// Now run the convolution
typedef typename GetCpBlockType<decltype(gdist),0,1>::type CpBlockType;
gdist.template conv2<0,1,2,3,1>({2,2},{(int)sz[0]-2,(int)sz[1]-2},[] __device__ (float & u_out, float & v_out, CpBlockType & u, CpBlockType & v,int i, int j){
u_out = u(i+1,j) - u(i-1,j) + u(i,j+1) - u(i,j-1);
v_out = v(i+1,j) - v(i-1,j) + v(i,j+1) - v(i,j-1);
});
gdist.deviceToHost<0,1,2,3>();
// Now we check that ghost is correct
auto it3 = gdist.getSubDomainIterator({2,2},{(int)sz[0]-2,(int)sz[1]-2});
bool match = true;
while (it3.isNext())
{
auto p = it3.get();
auto p_xp1 = p.move(0,1);
auto p_xm1 = p.move(0,-1);
auto p_yp1 = p.move(1,1);
auto p_ym1 = p.move(1,-1);
float sub1 = gdist.template get<2>(p);
float sub2 = gdist.template get<3>(p);
if (sub1 != 2.0 || sub2 != 4.0)
{
std::cout << sub1 << " " << sub2 << std::endl;
std::cout << gdist.template get<0>(p_xp1) << " " << gdist.template get<0>(p_xm1) << std::endl;
std::cout << gdist.template get<1>(p_xp1) << " " << gdist.template get<1>(p_xm1) << std::endl;
break;
}
++it3;
}
BOOST_REQUIRE_EQUAL(match,true);
}
BOOST_AUTO_TEST_SUITE_END()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment