From b8e2251bb308666030f72d9c17e1d1c31f29ff04 Mon Sep 17 00:00:00 2001 From: Pietro Incardona <incardon@mpi-cbg.de> Date: Tue, 3 Dec 2019 02:55:34 +0100 Subject: [PATCH] ghost_get for sgrid_dist_id on GPU working --- openfpm_data | 2 +- src/Grid/grid_dist_id.hpp | 34 +++---- src/Grid/grid_dist_id_comm.hpp | 8 ++ src/Grid/grid_dist_key.hpp | 16 +-- .../tests/sgrid_dist_id_gpu_unit_tests.cu | 98 +++++++++++++++++-- 5 files changed, 115 insertions(+), 43 deletions(-) diff --git a/openfpm_data b/openfpm_data index bcdcfeb82..aab95db8e 160000 --- a/openfpm_data +++ b/openfpm_data @@ -1 +1 @@ -Subproject commit bcdcfeb82154f588c992b0c36ff1013a87a650e6 +Subproject commit aab95db8e340d0e9a8ce0094c0a5b832e0f05b8d diff --git a/src/Grid/grid_dist_id.hpp b/src/Grid/grid_dist_id.hpp index e3dcad616..504683791 100644 --- a/src/Grid/grid_dist_id.hpp +++ b/src/Grid/grid_dist_id.hpp @@ -124,8 +124,8 @@ class grid_dist_id : public grid_dist_id_comm<dim,St,T,Decomposition,Memory,devi //! Structure that divide the space into cells CellDecomposer_sm<dim,St,shift<dim,St>> cd_sm; - //! size of the insert pool on gpu - size_t gpu_insert_pool_size; + //! number of insert each GPU thread does + size_t gpu_n_insert_thread; //! Communicator class Vcluster<> & v_cl; @@ -2679,21 +2679,24 @@ public: #ifdef __NVCC__ - /*! \brief Set the size of the gpu insert buffer pool + /*! \brief Set the number inserts each GPU thread do * - * Indicate the maximum number of inserts each GPU block can do - * - * \param size of the insert pool + * \param n_ins number of insert per thread * */ - void setInsertBuffer(size_t n_pool) + void setNumberOfInsertPerThread(size_t n_ins) { - gpu_insert_pool_size = n_pool; + gpu_n_insert_thread = n_ins; } template<typename func_t,typename it_t, typename ... args_t> void iterateGridGPU(it_t & it, args_t ... args) { + // setGPUInsertBuffer must be called in anycase even with 0 points to insert + // the loop "it.isNextGrid()" does not guarantee to call it for all local grids + for (size_t i = 0 ; i < loc_grid.size() ; i++) + {loc_grid.get(i).setGPUInsertBuffer(0ul,1ul);} + while(it.isNextGrid()) { Box<dim,size_t> b = it.getGridBox(); @@ -2702,7 +2705,7 @@ public: auto ite = loc_grid.get(i).getGridGPUIterator(b.getKP1int(),b.getKP2int()); - loc_grid.get(i).setGPUInsertBuffer(ite.nblocks(),gpu_insert_pool_size); + loc_grid.get(i).setGPUInsertBuffer(ite.nblocks(),ite.nthrs()); loc_grid.get(i).initializeGPUInsertBuffer(); ite_gpu_dist<dim> itd = ite; @@ -2713,23 +2716,12 @@ public: itd.start_base.set_d(j,0); } - grid_apply_functor<<<ite.wthr,ite.thr>>>(loc_grid.get(i).toKernel(),itd,func_t(),args...); + CUDA_LAUNCH((grid_apply_functor),ite,loc_grid.get(i).toKernel(),itd,func_t(),args...); it.nextGrid(); } } - template<typename func_t, typename ... args_t> - void iterateGPU(args_t ... args) - { - for (int i = 0 ; i < loc_grid.size() ; i++) - { - auto & sp = loc_grid.get(i); - - // TODO Launch a kernel on every sparse grid GPU - } - } - /*! \brief Move the memory from the device to host memory * */ diff --git a/src/Grid/grid_dist_id_comm.hpp b/src/Grid/grid_dist_id_comm.hpp index 2a11a3797..ea419e4e5 100644 --- a/src/Grid/grid_dist_id_comm.hpp +++ b/src/Grid/grid_dist_id_comm.hpp @@ -201,6 +201,9 @@ class grid_dist_id_comm const grid_sm<dim,void> & ginfo, bool use_bx_def) { + for (size_t i = 0 ; i < loc_grid.size() ; i++) + {loc_grid.get(i).packReset();} + grid_key_dx<dim> cnt[1]; cnt[0].zero(); @@ -253,6 +256,11 @@ class grid_dist_id_comm } } } + + for (size_t i = 0 ; i < loc_grid.size() ; i++) + { + loc_grid.get(i).template removeCopyToFinalize<prp ...>(v_cl.getmgpuContext()); + } } /*! \brief Sync the local ghost part diff --git a/src/Grid/grid_dist_key.hpp b/src/Grid/grid_dist_key.hpp index 96224dc12..666e2bd32 100644 --- a/src/Grid/grid_dist_key.hpp +++ b/src/Grid/grid_dist_key.hpp @@ -9,10 +9,10 @@ template<bool impl, typename grid_key_base, unsigned int dim> class move_impl { public: - static grid_dist_key_dx<dim> move(grid_key_base & key, size_t sub, size_t i, size_t s) + static grid_dist_key_dx<dim,grid_key_base> move(grid_key_base & key, size_t sub, size_t i, int s) { key.set_d(i,key.get(i) + s); - return grid_dist_key_dx<dim>(sub,key); + return grid_dist_key_dx<dim,grid_key_base>(sub,key); } }; @@ -20,7 +20,7 @@ template<typename grid_key_base, unsigned int dim> class move_impl<false,grid_key_base,dim> { public: - static grid_dist_key_dx<dim> move(grid_key_base & key, size_t sub, size_t i, size_t s) + static grid_dist_key_dx<dim> move(grid_key_base & key, size_t sub, size_t i, int s) { std::cout << __FILE__ << ":" << __LINE__ << " Error move a key is not supported" " directly acting on the grid key, please use the move function from the grid method" << std::endl; @@ -119,19 +119,13 @@ public: * \return new key * */ - inline grid_dist_key_dx<dim> move(size_t i,size_t s) const + inline grid_dist_key_dx<dim,base_key> move(size_t i,int s) const { -// std::is_same<base_key,grid_key_dx<dim>>::value - auto key = getKey(); - return move_impl<std::is_same<base_key,grid_key_dx<dim>>::value, + return move_impl<has_set_d<base_key>::value, decltype(this->getKey()), dim>::move(key,getSub(),i,s); - -/* grid_key_dx<dim> key = getKey(); - key.set_d(i,key.get(i) + s); - return grid_dist_key_dx<dim>(getSub(),key);*/ } /*! \brief Create a new key moving the old one diff --git a/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu b/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu index eb1ecd752..f4a3d0b87 100644 --- a/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu +++ b/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu @@ -71,8 +71,6 @@ BOOST_AUTO_TEST_CASE( sgrid_gpu_test_base ) /////// GPU Run kernel - gdist.setInsertBuffer(1); - float c = 5.0; gdist.template iterateGridGPU<insert_kernel2D<0>>(it,c); @@ -110,7 +108,6 @@ BOOST_AUTO_TEST_CASE( sgrid_gpu_test_base ) Box<2,size_t> box3({3,3},{11,11}); auto it3 = gdist.getGridIterator(box3.getKP1(),box3.getKP2()); - gdist.setInsertBuffer(128); gdist.template iterateGridGPU<insert_kernel2D<0>>(it3,c); gdist.template flush<smax_<0>>(flush_type::FLUSH_ON_DEVICE); @@ -173,8 +170,6 @@ BOOST_AUTO_TEST_CASE( sgrid_gpu_test_output ) /////// GPU Run kernel - gdist.setInsertBuffer(128); - float c = 5.0; gdist.template iterateGridGPU<insert_kernel2D<0>>(it,c); @@ -192,9 +187,8 @@ BOOST_AUTO_TEST_CASE( sgrid_gpu_test_output ) BOOST_REQUIRE_EQUAL(true,test); } -BOOST_AUTO_TEST_CASE( sgrid_gpu_test_ghost_get ) +void sgrid_ghost_get(size_t (& sz)[2],size_t (& sz2)[2]) { - size_t sz[2] = {17,17}; periodicity<2> bc = {PERIODIC,PERIODIC}; Ghost<2,long int> g(1); @@ -207,13 +201,11 @@ BOOST_AUTO_TEST_CASE( sgrid_gpu_test_ghost_get ) /////// GPU insert + flush - Box<2,size_t> box({1,1},{15,15}); + Box<2,size_t> box({1,1},{sz2[0],sz2[1]}); auto it = gdist.getGridIterator(box.getKP1(),box.getKP2()); /////// GPU Run kernel - gdist.setInsertBuffer(225); - float c = 5.0; gdist.template iterateGridGPU<insert_kernel2D<0>>(it,c); @@ -225,7 +217,93 @@ BOOST_AUTO_TEST_CASE( sgrid_gpu_test_ghost_get ) gdist.template ghost_get<0>(RUN_ON_DEVICE); gdist.template deviceToHost<0>(); + + // Now we check that ghost is correct + + auto it2 = gdist.getDomainIterator(); + + bool match = true; + + while (it2.isNext()) + { + auto p = it2.get(); + + auto key = it2.getGKey(p); + + auto p_xp1 = p.move(0,1); + auto p_xm1 = p.move(0,-1); + auto p_yp1 = p.move(1,1); + auto p_ym1 = p.move(1,-1); + + auto key_xp1 = key.move(0,1); + auto key_xm1 = key.move(0,-1); + auto key_yp1 = key.move(1,1); + auto key_ym1 = key.move(1,-1); + + if (box.isInside(key_xp1.toPoint())) + { + match &= gdist.template get<0>(p_xp1) == c + key_xp1.get(0) + key_xp1.get(1); + + if (match == false) + { + std::cout << gdist.template get<0>(p_xp1) << " " << c + key_xp1.get(0) + key_xp1.get(1) << std::endl; + break; + } + } + + if (box.isInside(key_xm1.toPoint())) + { + match &= gdist.template get<0>(p_xm1) == c + key_xm1.get(0) + key_xm1.get(1); + + if (match == false) + { + std::cout << gdist.template get<0>(p_xm1) << " " << c + key_xm1.get(0) + key_xm1.get(1) << std::endl; + break; + } + } + + if (box.isInside(key_yp1.toPoint())) + { + match &= gdist.template get<0>(p_yp1) == c + key_yp1.get(0) + key_yp1.get(1); + + if (match == false) + { + std::cout << gdist.template get<0>(p_yp1) << " " << c + key_yp1.get(0) + key_yp1.get(1) << std::endl; + break; + } + } + + if (box.isInside(key_ym1.toPoint())) + { + match &= gdist.template get<0>(p_ym1) == c + key_ym1.get(0) + key_ym1.get(1); + + if (match == false) + { + std::cout << gdist.template get<0>(p_ym1) << " " << c + key_ym1.get(0) + key_ym1.get(1) << std::endl; + break; + } + } + + ++it2; + } + gdist.write("after_ghost"); + + BOOST_REQUIRE_EQUAL(match,true); +} + +BOOST_AUTO_TEST_CASE( sgrid_gpu_test_ghost_get ) +{ + size_t sz[2] = {17,17}; + size_t sz6[2] = {15,15}; + sgrid_ghost_get(sz,sz6); + + size_t sz2[2] = {170,170}; + size_t sz3[2] = {15,15}; + sgrid_ghost_get(sz2,sz3); + + size_t sz4[2] = {168,168}; + sgrid_ghost_get(sz2,sz4); } -- GitLab