diff --git a/src/Amr/grid_dist_amr.hpp b/src/Amr/grid_dist_amr.hpp index 56803c19b189f13849751d02a43cc59a0db06fca..6ac740458cbbefa05dc21b0277a4134c7ce6d31b 100644 --- a/src/Amr/grid_dist_amr.hpp +++ b/src/Amr/grid_dist_amr.hpp @@ -115,18 +115,6 @@ class grid_dist_amr }; -/*! \brief It contain the offset necessary to move to coarser and finer level grids - * - */ -template<unsigned int dim> -struct offset_mv -{ - //! offset to move up on an upper grid (coarse) - Point<dim,long int> up; - - //! offset to move on the lower grid (finer) - Point<dim,long int> dw; -}; /*! \brief AMR Adaptive Multi Resolution Grid * @@ -466,6 +454,16 @@ public: return gd_array.get(lvl).getGridIterator(); } + /*! \brief Get an iterator to the grid + * + * \return an iterator to the grid + * + */ + auto getGridIterator(size_t lvl, grid_key_dx<dim> & start, grid_key_dx<dim> & stop) -> decltype(gd_array.get(lvl).getGridIterator(start,stop)) + { + return gd_array.get(lvl).getGridIterator(start,stop); + } + #ifdef __NVCC__ /*! \brief Get an iterator to the grid @@ -718,6 +716,41 @@ public: return gd_array.get(lvl).remove(v1); } + /*! \brief construct level connections for padding particles + * + * + */ + void construct_level_connections() + { + for (int lvl = 0 ; lvl < gd_array.size() ; lvl++) + { + if (lvl == 0) + { + gd_array.get(lvl).construct_link_dw(gd_array.get(lvl+1),mv_off.get(lvl)); + } + else if (lvl == gd_array.size() - 1) + {gd_array.get(lvl).construct_link_up(gd_array.get(lvl-1));} + else + { + gd_array.get(lvl).construct_link_dw(gd_array.get(lvl+1),mv_off.get(lvl)); + } + } + } + + /*! \brief construct level connections for padding particles + * + * \tparam stencil_type type of stencil + * + */ + template<typename stencil_type> + void tagBoundaries() + { + for (int lvl = 0 ; lvl < gd_array.size() ; lvl++) + { + gd_array.get(lvl).template tagBoundaries<stencil_type>(); + } + } + ////////////////////////////////////// /*! \brief It synchronize the ghost parts @@ -918,6 +951,32 @@ public: return ret; } + +#ifdef __NVCC__ + + /*! \brief Move the memory from the device to host memory + * + */ + template<unsigned int ... prp> void deviceToHost() + { + for (size_t i = 0 ; i < gd_array.size() ; i++) + { + gd_array.get(i).template deviceToHost<prp ...>(); + } + } + + /*! \brief Move the memory from the device to host memory + * + */ + template<unsigned int ... prp> void hostToDevice() + { + for (size_t i = 0 ; i < gd_array.size() ; i++) + { + gd_array.get(i).template hostToDevice<prp ...>(); + } + } + +#endif }; template<unsigned int dim, typename St, typename T> diff --git a/src/Amr/tests/amr_base_gpu_unit_tests.cu b/src/Amr/tests/amr_base_gpu_unit_tests.cu index 267d9b1829a4eba4d74dc74062a3707cca9bcc7a..ff23114819bb222c3ab67b987aa2f9281b8e1457 100644 --- a/src/Amr/tests/amr_base_gpu_unit_tests.cu +++ b/src/Amr/tests/amr_base_gpu_unit_tests.cu @@ -191,4 +191,138 @@ BOOST_AUTO_TEST_CASE( grid_dist_id_amr_gpu ) BOOST_REQUIRE_EQUAL(count_c,correct_result_cell);*/ } +BOOST_AUTO_TEST_CASE( grid_dist_id_amr_gpu_link_test ) +{ + auto & v_cl = create_vcluster(); + + // Domain + Box<2,float> domain({0.0,0.0},{1.0,1.0}); + + Ghost<2,long int> g(1); + sgrid_dist_amr_gpu<2,float,aggregate<float>> amr_g(domain,g); + + size_t g_sz[2] = {17,17}; + + size_t n_lvl = 3; + + amr_g.initLevels(n_lvl,g_sz); + + grid_key_dx<2> start({5,5}); + grid_key_dx<2> start_lvl_dw({9,9}); + grid_key_dx<2> stop_lvl_dw({12,12}); + grid_key_dx<2> start_lvl_dw2({19,19}); + grid_key_dx<2> stop_lvl_dw2({23,23}); + + auto it = amr_g.getGridIterator(0,start,start); + auto it2 = amr_g.getGridIterator(1,start_lvl_dw,stop_lvl_dw); + auto it3 = amr_g.getGridIterator(2,start_lvl_dw2,stop_lvl_dw2); +// it.setGPUInsertBuffer(4); + + auto & lvl_0 = amr_g.getDistGrid(0); + auto & lvl_1 = amr_g.getDistGrid(1); + auto & lvl_2 = amr_g.getDistGrid(2); + + // Add points in level 0 + + while (it.isNext()) + { + auto key = it.get_dist(); + + lvl_0.template insertFlush<0>(key) = 1.0; + + ++it; + } + + while (it2.isNext()) + { + auto key = it2.get_dist(); + + lvl_1.template insertFlush<0>(key) = 2.0; + + ++it2; + } + + while (it3.isNext()) + { + auto key = it3.get_dist(); + + lvl_2.template insertFlush<0>(key) = 3.0; + + ++it3; + } + + amr_g.hostToDevice<0>(); + amr_g.tagBoundaries<NNStar<2>>(); + amr_g.construct_level_connections(); + + ///////////////////////////////////////////////////////////// + + auto & lvl_zero_d = amr_g.getDistGrid(0); + auto & lvl_one_d = amr_g.getDistGrid(1); + auto & lvl_two_d = amr_g.getDistGrid(2); + + // For each local grid + + for (int i = 0 ; i < lvl_zero_d.getN_loc_grid() ; i++) + { + + // Check + auto & lvl_zero = lvl_zero_d.get_loc_grid(i); + auto & lvl_one = lvl_one_d.get_loc_grid(i); + auto & lvl_two = lvl_two_d.get_loc_grid(i); + + auto & offs_dw_link = lvl_zero.getDownLinksOffsets(); + auto & dw_links = lvl_zero.getDownLinks(); + + BOOST_REQUIRE_EQUAL(offs_dw_link.size(),1); + BOOST_REQUIRE_EQUAL(dw_links.size(),4); + + auto & indexL0 = lvl_zero.private_get_blockMap().getIndexBuffer(); + auto & indexL1 = lvl_one.private_get_blockMap().getIndexBuffer(); + auto & indexL2 = lvl_two.private_get_blockMap().getIndexBuffer(); + + auto & dataL0 = lvl_zero.private_get_blockMap().getDataBuffer(); + auto & dataL1 = lvl_one.private_get_blockMap().getDataBuffer(); + auto & dataL2 = lvl_two.private_get_blockMap().getDataBuffer(); + + dw_links.template deviceToHost<0,1>(); + + BOOST_REQUIRE_EQUAL(dataL1.template get<0>(dw_links.template get<0>(0))[dw_links.template get<1>(0)],2); + BOOST_REQUIRE_EQUAL(dataL1.template get<0>(dw_links.template get<0>(1))[dw_links.template get<1>(1)],2); + BOOST_REQUIRE_EQUAL(dataL1.template get<0>(dw_links.template get<0>(2))[dw_links.template get<1>(2)],2); + BOOST_REQUIRE_EQUAL(dataL1.template get<0>(dw_links.template get<0>(3))[dw_links.template get<1>(3)],2); + + auto & offs_dw_link_1 = lvl_one.getDownLinksOffsets(); + auto & dw_links_1 = lvl_one.getDownLinks(); + + BOOST_REQUIRE_EQUAL(offs_dw_link_1.size(),12); + BOOST_REQUIRE_EQUAL(dw_links_1.size(),9); + + dw_links_1.template deviceToHost<0,1>(); + + BOOST_REQUIRE_EQUAL(dataL2.template get<0>(dw_links_1.template get<0>(0))[dw_links_1.template get<1>(0)],3); + BOOST_REQUIRE_EQUAL(dataL2.template get<0>(dw_links_1.template get<0>(1))[dw_links_1.template get<1>(1)],3); + BOOST_REQUIRE_EQUAL(dataL2.template get<0>(dw_links_1.template get<0>(2))[dw_links_1.template get<1>(2)],3); + BOOST_REQUIRE_EQUAL(dataL2.template get<0>(dw_links_1.template get<0>(3))[dw_links_1.template get<1>(3)],3); + BOOST_REQUIRE_EQUAL(dataL2.template get<0>(dw_links_1.template get<0>(4))[dw_links_1.template get<1>(4)],3); + BOOST_REQUIRE_EQUAL(dataL2.template get<0>(dw_links_1.template get<0>(5))[dw_links_1.template get<1>(5)],3); + BOOST_REQUIRE_EQUAL(dataL2.template get<0>(dw_links_1.template get<0>(6))[dw_links_1.template get<1>(6)],3); + BOOST_REQUIRE_EQUAL(dataL2.template get<0>(dw_links_1.template get<0>(7))[dw_links_1.template get<1>(7)],3); + BOOST_REQUIRE_EQUAL(dataL2.template get<0>(dw_links_1.template get<0>(8))[dw_links_1.template get<1>(8)],3); + } + +/* grid_key_dx<2> k({8,8}); + grid_key_dx<2> k2({16,16}); + + lvl_zero.insertFlush<0>(k) = 1.0; + lvl_one.insertFlush<0>(k2) = 5.0; + + lvl_one.template hostToDevice<0>(); + lvl_one.tagBoundaries(v_cl.getmgpuContext());*/ + + ///////////////////////////////////////////////////////////// + + +} + BOOST_AUTO_TEST_SUITE_END() diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 909a6a881f3e399548f8da496bd706d6b1f7d54a..f7a2c3857982a40cd95f91ae5fc763f0280c01fc 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -63,6 +63,9 @@ if (CUDA_FOUND) if (TEST_COVERAGE) target_compile_options(pdata PRIVATE $<$<COMPILE_LANGUAGE:CUDA>: -Xcompiler "-fprofile-arcs -ftest-coverage">) endif() + if (CMAKE_BUILD_TYPE STREQUAL "Debug") + # target_compile_options(pdata PRIVATE $<$<COMPILE_LANGUAGE:CUDA>: -G>) + endif() endif() if(TEST_PERFORMANCE) diff --git a/src/Grid/grid_dist_id.hpp b/src/Grid/grid_dist_id.hpp index 5046837912f1a84e4ae7c5513b8f3f5b677550b5..5246e6292ec847f5887bc8fd71a1a51feb6f46b1 100644 --- a/src/Grid/grid_dist_id.hpp +++ b/src/Grid/grid_dist_id.hpp @@ -29,6 +29,18 @@ #include "Grid/cuda/grid_dist_id_iterator_gpu.cuh" #endif +/*! \brief It contain the offset necessary to move to coarser and finer level grids + * + */ +template<unsigned int dim> +struct offset_mv +{ + //! offset to move up on an upper grid (coarse) + Point<dim,long int> up; + + //! offset to move on the lower grid (finer) + Point<dim,long int> dw; +}; //! Internal ghost box sent to construct external ghost box into the other processors template<unsigned int dim> @@ -80,6 +92,8 @@ template<unsigned int dim, typename device_grid=grid_cpu<dim,T> > class grid_dist_id : public grid_dist_id_comm<dim,St,T,Decomposition,Memory,device_grid> { + typedef grid_dist_id<dim,St,T,Decomposition,Memory,device_grid> self; + //! Domain Box<dim,St> domain; @@ -2030,7 +2044,8 @@ public: * * In case of dense grid this function is equivalent to get, in case of sparse * grid this function insert a grid point. When the point already exist it return - * a reference to the already existing point + * a reference to the already existing point. In case of massive insert Sparse grids + * it give a reference to the inserted element in the insert buffer * * \tparam p property to get (is an integer) * \param v1 grid_key that identify the element in the grid @@ -2052,6 +2067,35 @@ public: } + /*! \brief insert an element in the grid + * + * In case of dense grid this function is equivalent to get, in case of sparse + * grid this function insert a grid point. When the point already exist it return + * a reference to the already existing point. In case of massive insert Sparse grids + * The point is inserted immediately and a reference to the inserted element is returned + * + * \warning This function is not fast an unlucky insert can potentially cost O(N) where N is the number + * of points (worst case) + * + * \tparam p property to get (is an integer) + * \param v1 grid_key that identify the element in the grid + * + * \return a reference to the inserted element + * + */ + template <unsigned int p,typename bg_key>inline auto insertFlush(const grid_dist_key_dx<dim,bg_key> & v1) + -> typename std::add_lvalue_reference + < + decltype(loc_grid.get(v1.getSub()).template insertFlush<p>(v1.getKey())) + >::type + { +#ifdef SE_CLASS2 + check_valid(this,8); +#endif + + return loc_grid.get(v1.getSub()).template insertFlush<p>(v1.getKey()); + } + /*! \brief Get the reference of the selected element * * \tparam p property to get (is an integer) @@ -2587,6 +2631,67 @@ public: {loc_grid.get(i).clear();} } + /*! \brief construct link between levels + * + * \praram grid_up grid level up + * \param grid_dw grid level down + * + */ + void construct_link(self & grid_up, self & grid_dw) + { + for (int i = 0 ; i < loc_grid.size() ; i++) + { + loc_grid.get(i).construct_link(grid_up.get_loc_grid(i),grid_dw.get_loc_grid(i),v_cl.getmgpuContext()); + } + } + + /*! \brief construct link between current and the level down + * + * + * \param grid_dw grid level down + * + */ + void construct_link_dw(self & grid_dw, openfpm::vector<offset_mv<dim>> & mvof) + { + for (int i = 0 ; i < loc_grid.size() ; i++) + { + Point<dim,int> p_dw; + for(int j = 0 ; j < dim ; j++) + {p_dw.get(j) = mvof.get(i).dw.get(j);} + + loc_grid.get(i).construct_link_dw(grid_dw.get_loc_grid(i),p_dw,v_cl.getmgpuContext()); + } + } + + /*! \brief construct link between current and the level up + * + * + * \param grid_dw grid level down + * + */ + void construct_link_up(self & grid_up) + { + for (int i = 0 ; i < loc_grid.size() ; i++) + { + loc_grid.get(i).construct_link_up(grid_up.get_loc_grid(i),v_cl.getmgpuContext()); + } + } + + /*! \brief construct link between current and the level up + * + * + * \param grid_dw grid level down + * + */ + template<typename stencil_type> + void tagBoundaries() + { + for (int i = 0 ; i < loc_grid.size() ; i++) + { + loc_grid.get(i).template tagBoundaries<stencil_type>(v_cl.getmgpuContext()); + } + } + /*! \brief It move all the grid parts that do not belong to the local processor to the respective processor * */ diff --git a/src/main.cpp b/src/main.cpp index 9131b24776789c4d75c6bba483d39980b6f13240..305cdf76381f307a2ee4fad09abfb1e9cc928289 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -34,8 +34,8 @@ void timeout_cycle() // initialization function: bool init_unit_test() { - std::thread to (timeout_cycle); - to.detach(); +// std::thread to (timeout_cycle); +// to.detach(); return true; }