From b24aab2ca22d3d360230a2f9485a9c80383a62dd Mon Sep 17 00:00:00 2001 From: Incardona Pietro <incardon@mpi-cbg.de> Date: Wed, 8 Jun 2022 23:09:35 +0200 Subject: [PATCH] Adding conv_3 --- example/Performance/memBW/Makefile | 6 +- src/Grid/grid_dist_id.hpp | 31 ++++++ .../tests/sgrid_dist_id_gpu_unit_tests.cu | 103 ++++++++++++++++++ 3 files changed, 137 insertions(+), 3 deletions(-) diff --git a/example/Performance/memBW/Makefile b/example/Performance/memBW/Makefile index e78251aa5..bf092b814 100644 --- a/example/Performance/memBW/Makefile +++ b/example/Performance/memBW/Makefile @@ -14,19 +14,19 @@ ifdef HIP CUDA_CC_LINK=hipcc else ifdef CUDA_ON_CPU - CUDA_CC=mpic++ -x c++ $(INCLUDE_PATH) + CUDA_CC=mpic++ -g -x c++ $(INCLUDE_PATH) INCLUDE_PATH_NVCC= CUDA_CC_LINK=mpic++ CUDA_OPTIONS=-D__NVCC__ -DCUDART_VERSION=11000 LIBS_SELECT=$(LIBS) else ifeq (, $(shell which nvcc)) - CUDA_CC=mpic++ -x c++ $(INCLUDE_PATH) + CUDA_CC=mpic++ -x c++ $(INCLUDE_PATH) INCLUDE_PATH_NVCC= CUDA_CC_LINK=mpic++ LIBS_SELECT=$(LIBS) else - CUDA_CC=nvcc -ccbin=mpic++ + CUDA_CC=nvcc -g -ccbin=mpic++ CUDA_CC_LINK=nvcc -ccbin=mpic++ LIBS_SELECT=$(LIBS_NVCC) endif diff --git a/src/Grid/grid_dist_id.hpp b/src/Grid/grid_dist_id.hpp index fb5383adb..428d38c4c 100644 --- a/src/Grid/grid_dist_id.hpp +++ b/src/Grid/grid_dist_id.hpp @@ -2834,6 +2834,37 @@ public: } } + /*! \brief apply a convolution on 2 property on GPU + * + * + */ + template<unsigned int prop_src1, unsigned int prop_src2, unsigned int prop_src3, + unsigned int prop_dst1, unsigned int prop_dst2, unsigned int prop_dst3, + unsigned int stencil_size, typename lambda_f, typename ... ArgsT > + void conv3_b(grid_key_dx<dim> start, grid_key_dx<dim> stop , lambda_f func, ArgsT ... args) + { + for (int i = 0 ; i < loc_grid.size() ; i++) + { + Box<dim,long int> inte; + + Box<dim,long int> base; + for (int j = 0 ; j < dim ; j++) + { + base.setLow(j,(long int)start.get(j) - (long int)gdb_ext.get(i).origin.get(j)); + base.setHigh(j,(long int)stop.get(j) - (long int)gdb_ext.get(i).origin.get(j)); + } + + Box<dim,long int> dom = gdb_ext.get(i).Dbox; + + bool overlap = dom.Intersect(base,inte); + + if (overlap == true) + { + loc_grid.get(i).template conv3_b<prop_src1,prop_src2,prop_src3,prop_dst1,prop_dst2,prop_dst3,stencil_size>(inte.getKP1(),inte.getKP2(),func,args...); + } + } + } + template<typename NNtype> void findNeighbours() { diff --git a/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu b/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu index 3e516415a..a0582fd43 100644 --- a/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu +++ b/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu @@ -957,6 +957,109 @@ BOOST_AUTO_TEST_CASE( sgrid_gpu_test_conv2_b_test_3d ) BOOST_REQUIRE_EQUAL(match,true); } +BOOST_AUTO_TEST_CASE( sgrid_gpu_test_conv3_b_test_3d ) +{ + #ifdef CUDA_ON_CPU + size_t sz[3] = {20,20,20}; + #else + size_t sz[3] = {60,60,60}; + #endif + periodicity<3> bc = {PERIODIC,PERIODIC,PERIODIC}; + + Ghost<3,long int> g(1); + + Box<3,float> domain({0.0,0.0,0.0},{1.0,1.0,1.0}); + + sgrid_dist_id_gpu<3,float,aggregate<float,float,float,float,float,float>> gdist(sz,domain,g,bc); + + gdist.template setBackgroundValue<0>(666); + gdist.template setBackgroundValue<1>(666); + gdist.template setBackgroundValue<2>(666); + gdist.template setBackgroundValue<3>(666); + gdist.template setBackgroundValue<4>(666); + gdist.template setBackgroundValue<5>(666); + + /////// GPU insert + flush + + Box<3,size_t> box({1,1,1},{sz[0]-1,sz[1]-1,sz[2]-1}); + + /////// GPU Run kernel + + float c = 5.0; + + typedef typename GetAddBlockType<decltype(gdist)>::type InsertBlockT; + + gdist.addPoints(box.getKP1(),box.getKP2(), + [] __device__ (int i, int j, int k) + { + return true; + }, + [c] __device__ (InsertBlockT & data, int i, int j, int k) + { + data.template get<0>() = c + i + j + k; + data.template get<1>() = c + 1000 + i + j + k; + data.template get<2>() = c + 10000 + i + j + k; + } + ); + + gdist.template flush<smax_<0>,smax_<1>,smax_<2>>(flush_type::FLUSH_ON_DEVICE); + + gdist.template ghost_get<0,1,2>(RUN_ON_DEVICE); + + for (int i = 0 ; i < 10 ; i++) + { + gdist.template ghost_get<0,1,2>(RUN_ON_DEVICE); + } + + // Now run the convolution + + typedef typename GetCpBlockType<decltype(gdist),0,1>::type CpBlockType; + + gdist.template conv3_b<0,1,2,3,4,5,1>({2,2,2},{(int)sz[0]-2,(int)sz[1]-2,(int)sz[2]-2},[] __device__ (float & u_out, float & v_out, float & m_out, CpBlockType & u, CpBlockType & v , CpBlockType & m, auto & block, int offset,int i, int j, int k){ + u_out = u(i+1,j,k) - u(i-1,j,k) + u(i,j+1,k) - u(i,j-1,k) + u(i,j,k+1) - u(i,j,k-1) + block.template get<0>()[offset]; + v_out = v(i+1,j,k) - v(i-1,j,k) + v(i,j+1,k) - v(i,j-1,k) + v(i,j,k+1) - v(i,j,k-1) + block.template get<1>()[offset]; + m_out = m(i+1,j,k) - m(i-1,j,k) + m(i,j+1,k) - m(i,j-1,k) + m(i,j,k+1) - m(i,j,k-1) + block.template get<2>()[offset]; + }); + + gdist.deviceToHost<0,1,2,3,4,5>(); + + // Now we check that ghost is correct + + auto it3 = gdist.getSubDomainIterator({2,2,2},{(int)sz[0]-2,(int)sz[1]-2,(int)sz[2]-2}); + + bool match = true; + + while (it3.isNext()) + { + auto p = it3.get(); + + auto p_xp1 = p.move(0,1); + auto p_xm1 = p.move(0,-1); + auto p_yp1 = p.move(1,1); + auto p_ym1 = p.move(1,-1); + auto p_zp1 = p.move(2,1); + auto p_zm1 = p.move(2,-1); + + float sub1 = gdist.template get<3>(p); + float sub2 = gdist.template get<4>(p); + float sub3 = gdist.template get<5>(p); + + if (sub1 != 6.0 + gdist.template get<0>(p) || sub2 != 6.0 + gdist.template get<1>(p) || sub3 != 6.0 + gdist.template get<2>(p)) + { + std::cout << sub1 << " " << sub2 << " " << sub3 << std::endl; + std::cout << gdist.template get<0>(p_xp1) << " " << gdist.template get<0>(p_xm1) << std::endl; + std::cout << gdist.template get<1>(p_xp1) << " " << gdist.template get<1>(p_xm1) << std::endl; + std::cout << gdist.template get<2>(p_xp1) << " " << gdist.template get<2>(p_xm1) << std::endl; + match = false; + break; + } + + ++it3; + } + + BOOST_REQUIRE_EQUAL(match,true); +} + BOOST_AUTO_TEST_CASE( sgrid_gpu_test_ghost_point_remove ) { size_t sz[3] = {60,60,60}; -- GitLab