From d490c151c9de809856d8e6d0300c39495ce3b7f7 Mon Sep 17 00:00:00 2001 From: Pietro Incardona <incardon@mpi-cbg.de> Date: Thu, 23 Jan 2020 00:57:11 +0100 Subject: [PATCH] Fixing AMR Gpu working --- CMakeLists.txt | 6 +- openfpm_data | 2 +- script/remove_old | 4 +- src/Amr/tests/amr_base_gpu_unit_tests.cu | 123 +++++++++++++++++++++-- src/Grid/grid_dist_id_comm.hpp | 4 + 5 files changed, 129 insertions(+), 10 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 551c410b5..99ad9c12a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -53,8 +53,12 @@ if(ENABLE_GPU) message("CUDA is compatible") set(WARNING_SUPPRESSION_AND_OPTION_NVCC -Xcudafe "--display_error_number --diag_suppress=2915 --diag_suppress=2912 --diag_suppress=2913 --diag_suppress=111 --diag_suppress=186 --diag_suppress=611 " --expt-extended-lambda ) FILE(WRITE cuda_options "-Xcudafe \"--display_error_number --diag_suppress=2915 --diag_suppress=2914 --diag_suppress=2912 --diag_suppress=2913 --diag_suppress=111 --diag_suppress=186 --diag_suppress=611 \" --expt-extended-lambda") + elseif ( CUDA_VERSION_MAJOR EQUAL 10 AND CUDA_VERSION_MINOR EQUAL 2 ) + message("CUDA is compatible") + set(WARNING_SUPPRESSION_AND_OPTION_NVCC -Xcudafe "--display_error_number --diag_suppress=2976 --diag_suppress=2977 --diag_suppress=2978 --diag_suppress=2979 --diag_suppress=1835 --diag_suppress=611 --diag_suppress=186 --diag_suppress=128" --expt-extended-lambda) + set(WARNING_SUPPRESSION_AND_OPTION_NVCC_TEXT "-Xcudafe \"--display_error_number --diag_suppress=2976 --diag_suppress=2977 --diag_suppress=2978 --diag_suppress=2979 --diag_suppress=1835 --diag_suppress=611 --diag_suppress=186 --diag_suppress=128\" --expt-extended-lambda") else() - message(FATAL_ERROR "CUDA is incompatible, version 9.2 and 10.1 is only supported") + message(FATAL_ERROR "CUDA is incompatible, version 9.2 10.1 and 10.2 is only supported") endif() endif() diff --git a/openfpm_data b/openfpm_data index 6ec119f43..58f3b70f2 160000 --- a/openfpm_data +++ b/openfpm_data @@ -1 +1 @@ -Subproject commit 6ec119f437933742b2814122b1eab93136def358 +Subproject commit 58f3b70f264af944182ff2182e168d68f196b7e8 diff --git a/script/remove_old b/script/remove_old index eb1fc09d4..dd11a7639 100755 --- a/script/remove_old +++ b/script/remove_old @@ -160,7 +160,7 @@ function remove_old() version=$(cat $1/HDF5/version) if [ x"$version" != x"1" ]; then echo -e "\033[1;34;5m -------------------------------------------------------------------------------------- \033[0m" - echo -e "\033[1;34;5m HDF5 has been updated to version 1.8.19, the component will be updated automatically \033[0m" + echo -e "\033[1;34;5m HDF5 has been updated to version 1.10.6, the component will be updated automatically \033[0m" echo -e "\033[1;34;5m -------------------------------------------------------------------------------------- \033[0m" sleep 5 rm -rf $1/HDF5 @@ -171,7 +171,7 @@ function remove_old() version=$(cat $1/MPI/version) if [ x"$version" != x"4" ]; then echo -e "\033[1;34;5m -------------------------------------------------------------------------------------- \033[0m" - echo -e "\033[1;34;5m MPI has been updated to version 3.1.3, the component will be updated automatically \033[0m" + echo -e "\033[1;34;5m MPI has been updated to version 4.0.2, the component will be updated automatically \033[0m" echo -e "\033[1;34;5m -------------------------------------------------------------------------------------- \033[0m" sleep 5 rm -rf $1/MPI/include diff --git a/src/Amr/tests/amr_base_gpu_unit_tests.cu b/src/Amr/tests/amr_base_gpu_unit_tests.cu index ff2311481..63f0dea44 100644 --- a/src/Amr/tests/amr_base_gpu_unit_tests.cu +++ b/src/Amr/tests/amr_base_gpu_unit_tests.cu @@ -311,18 +311,129 @@ BOOST_AUTO_TEST_CASE( grid_dist_id_amr_gpu_link_test ) BOOST_REQUIRE_EQUAL(dataL2.template get<0>(dw_links_1.template get<0>(8))[dw_links_1.template get<1>(8)],3); } -/* grid_key_dx<2> k({8,8}); - grid_key_dx<2> k2({16,16}); + ///////////////////////////////////////////////////////////// +} + +BOOST_AUTO_TEST_CASE( grid_dist_id_amr_gpu_link_test_more_dense ) +{ + auto & v_cl = create_vcluster(); + + // Domain + Box<2,float> domain({0.0,0.0},{1.0,1.0}); + + Ghost<2,long int> g(1); + sgrid_dist_amr_gpu<2,float,aggregate<float>> amr_g(domain,g); + + size_t g_sz[2] = {17,17}; + + size_t n_lvl = 3; + + amr_g.initLevels(n_lvl,g_sz); + + grid_key_dx<2> start({1,1}); + grid_key_dx<2> stop({15,15}); + grid_key_dx<2> start_lvl_dw({2,2}); + grid_key_dx<2> stop_lvl_dw({31,31}); + grid_key_dx<2> start_lvl_dw2({4,4}); + grid_key_dx<2> stop_lvl_dw2({63,63}); + + auto it = amr_g.getGridIterator(0,start,stop); + auto it2 = amr_g.getGridIterator(1,start_lvl_dw,stop_lvl_dw); + auto it3 = amr_g.getGridIterator(2,start_lvl_dw2,stop_lvl_dw2); +// it.setGPUInsertBuffer(4); + + auto & lvl_0 = amr_g.getDistGrid(0); + auto & lvl_1 = amr_g.getDistGrid(1); + auto & lvl_2 = amr_g.getDistGrid(2); + + // Add points in level 0 + + while (it.isNext()) + { + auto key = it.get_dist(); + + lvl_0.template insertFlush<0>(key) = 1.0; + + ++it; + } + + while (it2.isNext()) + { + auto key = it2.get_dist(); + + lvl_1.template insertFlush<0>(key) = 2.0; + + ++it2; + } + + while (it3.isNext()) + { + auto key = it3.get_dist(); - lvl_zero.insertFlush<0>(k) = 1.0; - lvl_one.insertFlush<0>(k2) = 5.0; + lvl_2.template insertFlush<0>(key) = 3.0; - lvl_one.template hostToDevice<0>(); - lvl_one.tagBoundaries(v_cl.getmgpuContext());*/ + ++it3; + } + + amr_g.hostToDevice<0>(); + amr_g.ghost_get<0>(); + amr_g.tagBoundaries<NNStar<2>>(); + amr_g.construct_level_connections(); + amr_g.deviceToHost<0>(); + amr_g.write("TESTOUT"); ///////////////////////////////////////////////////////////// + auto & lvl_zero_d = amr_g.getDistGrid(0); + auto & lvl_one_d = amr_g.getDistGrid(1); + auto & lvl_two_d = amr_g.getDistGrid(2); + + // For each local grid + + for (int i = 0 ; i < lvl_zero_d.getN_loc_grid() ; i++) + { + + // Check + auto & lvl_zero = lvl_zero_d.get_loc_grid(i); + auto & lvl_one = lvl_one_d.get_loc_grid(i); + auto & lvl_two = lvl_two_d.get_loc_grid(i); + + auto & offs_dw_link = lvl_zero.getDownLinksOffsets(); + auto & dw_links = lvl_zero.getDownLinks(); + + BOOST_REQUIRE_EQUAL(offs_dw_link.size(),(i+1)*56); + BOOST_REQUIRE_EQUAL(dw_links.size(),(i+1)*56*4); + auto & indexL0 = lvl_zero.private_get_blockMap().getIndexBuffer(); + auto & indexL1 = lvl_one.private_get_blockMap().getIndexBuffer(); + auto & indexL2 = lvl_two.private_get_blockMap().getIndexBuffer(); + + auto & dataL0 = lvl_zero.private_get_blockMap().getDataBuffer(); + auto & dataL1 = lvl_one.private_get_blockMap().getDataBuffer(); + auto & dataL2 = lvl_two.private_get_blockMap().getDataBuffer(); + + dw_links.template deviceToHost<0,1>(); + + for (int i = 0 ; i < dw_links.size(); i++) + { + BOOST_REQUIRE_EQUAL(dataL1.template get<0>(dw_links.template get<0>(0))[dw_links.template get<1>(0)],2); + } + + auto & offs_dw_link_1 = lvl_one.getDownLinksOffsets(); + auto & dw_links_1 = lvl_one.getDownLinks(); + + BOOST_REQUIRE_EQUAL(offs_dw_link_1.size(),116); + BOOST_REQUIRE_EQUAL(dw_links_1.size(),116*4); + + dw_links_1.template deviceToHost<0,1>(); + + for (int i = 0 ; i < dw_links_1.size(); i++) + { + BOOST_REQUIRE_EQUAL(dataL2.template get<0>(dw_links_1.template get<0>(0))[dw_links_1.template get<1>(0)],3); + } + } + + ///////////////////////////////////////////////////////////// } BOOST_AUTO_TEST_SUITE_END() diff --git a/src/Grid/grid_dist_id_comm.hpp b/src/Grid/grid_dist_id_comm.hpp index ea419e4e5..7354a4d94 100644 --- a/src/Grid/grid_dist_id_comm.hpp +++ b/src/Grid/grid_dist_id_comm.hpp @@ -374,6 +374,10 @@ class grid_dist_id_comm std::vector<size_t> & prp_recv, ExtPreAlloc<Memory> & prRecv_prp) { +#ifdef __NVCC__ + cudaDeviceSynchronize(); +#endif + if (device_grid::isCompressed() == false) { //! Receive the information from each processors -- GitLab