diff --git a/CMakeLists.txt b/CMakeLists.txt index 551c410b50040dda7fb9d99e5dde6208bd2b5943..99ad9c12aa0fdc6802e44a7f3ae0a523613b8745 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -53,8 +53,12 @@ if(ENABLE_GPU) message("CUDA is compatible") set(WARNING_SUPPRESSION_AND_OPTION_NVCC -Xcudafe "--display_error_number --diag_suppress=2915 --diag_suppress=2912 --diag_suppress=2913 --diag_suppress=111 --diag_suppress=186 --diag_suppress=611 " --expt-extended-lambda ) FILE(WRITE cuda_options "-Xcudafe \"--display_error_number --diag_suppress=2915 --diag_suppress=2914 --diag_suppress=2912 --diag_suppress=2913 --diag_suppress=111 --diag_suppress=186 --diag_suppress=611 \" --expt-extended-lambda") + elseif ( CUDA_VERSION_MAJOR EQUAL 10 AND CUDA_VERSION_MINOR EQUAL 2 ) + message("CUDA is compatible") + set(WARNING_SUPPRESSION_AND_OPTION_NVCC -Xcudafe "--display_error_number --diag_suppress=2976 --diag_suppress=2977 --diag_suppress=2978 --diag_suppress=2979 --diag_suppress=1835 --diag_suppress=611 --diag_suppress=186 --diag_suppress=128" --expt-extended-lambda) + set(WARNING_SUPPRESSION_AND_OPTION_NVCC_TEXT "-Xcudafe \"--display_error_number --diag_suppress=2976 --diag_suppress=2977 --diag_suppress=2978 --diag_suppress=2979 --diag_suppress=1835 --diag_suppress=611 --diag_suppress=186 --diag_suppress=128\" --expt-extended-lambda") else() - message(FATAL_ERROR "CUDA is incompatible, version 9.2 and 10.1 is only supported") + message(FATAL_ERROR "CUDA is incompatible, version 9.2 10.1 and 10.2 is only supported") endif() endif() diff --git a/openfpm_data b/openfpm_data index 6ec119f437933742b2814122b1eab93136def358..58f3b70f264af944182ff2182e168d68f196b7e8 160000 --- a/openfpm_data +++ b/openfpm_data @@ -1 +1 @@ -Subproject commit 6ec119f437933742b2814122b1eab93136def358 +Subproject commit 58f3b70f264af944182ff2182e168d68f196b7e8 diff --git a/script/remove_old b/script/remove_old index eb1fc09d4a9d9b12372dff72a9a77176eb3df678..dd11a7639e79bf61cf8fc4eda8cc85d19eb3dc58 100755 --- a/script/remove_old +++ b/script/remove_old @@ -160,7 +160,7 @@ function remove_old() version=$(cat $1/HDF5/version) if [ x"$version" != x"1" ]; then echo -e "\033[1;34;5m -------------------------------------------------------------------------------------- \033[0m" - echo -e "\033[1;34;5m HDF5 has been updated to version 1.8.19, the component will be updated automatically \033[0m" + echo -e "\033[1;34;5m HDF5 has been updated to version 1.10.6, the component will be updated automatically \033[0m" echo -e "\033[1;34;5m -------------------------------------------------------------------------------------- \033[0m" sleep 5 rm -rf $1/HDF5 @@ -171,7 +171,7 @@ function remove_old() version=$(cat $1/MPI/version) if [ x"$version" != x"4" ]; then echo -e "\033[1;34;5m -------------------------------------------------------------------------------------- \033[0m" - echo -e "\033[1;34;5m MPI has been updated to version 3.1.3, the component will be updated automatically \033[0m" + echo -e "\033[1;34;5m MPI has been updated to version 4.0.2, the component will be updated automatically \033[0m" echo -e "\033[1;34;5m -------------------------------------------------------------------------------------- \033[0m" sleep 5 rm -rf $1/MPI/include diff --git a/src/Amr/tests/amr_base_gpu_unit_tests.cu b/src/Amr/tests/amr_base_gpu_unit_tests.cu index ff23114819bb222c3ab67b987aa2f9281b8e1457..63f0dea44c16db921be2696ce3159733bfc6b32e 100644 --- a/src/Amr/tests/amr_base_gpu_unit_tests.cu +++ b/src/Amr/tests/amr_base_gpu_unit_tests.cu @@ -311,18 +311,129 @@ BOOST_AUTO_TEST_CASE( grid_dist_id_amr_gpu_link_test ) BOOST_REQUIRE_EQUAL(dataL2.template get<0>(dw_links_1.template get<0>(8))[dw_links_1.template get<1>(8)],3); } -/* grid_key_dx<2> k({8,8}); - grid_key_dx<2> k2({16,16}); + ///////////////////////////////////////////////////////////// +} + +BOOST_AUTO_TEST_CASE( grid_dist_id_amr_gpu_link_test_more_dense ) +{ + auto & v_cl = create_vcluster(); + + // Domain + Box<2,float> domain({0.0,0.0},{1.0,1.0}); + + Ghost<2,long int> g(1); + sgrid_dist_amr_gpu<2,float,aggregate<float>> amr_g(domain,g); + + size_t g_sz[2] = {17,17}; + + size_t n_lvl = 3; + + amr_g.initLevels(n_lvl,g_sz); + + grid_key_dx<2> start({1,1}); + grid_key_dx<2> stop({15,15}); + grid_key_dx<2> start_lvl_dw({2,2}); + grid_key_dx<2> stop_lvl_dw({31,31}); + grid_key_dx<2> start_lvl_dw2({4,4}); + grid_key_dx<2> stop_lvl_dw2({63,63}); + + auto it = amr_g.getGridIterator(0,start,stop); + auto it2 = amr_g.getGridIterator(1,start_lvl_dw,stop_lvl_dw); + auto it3 = amr_g.getGridIterator(2,start_lvl_dw2,stop_lvl_dw2); +// it.setGPUInsertBuffer(4); + + auto & lvl_0 = amr_g.getDistGrid(0); + auto & lvl_1 = amr_g.getDistGrid(1); + auto & lvl_2 = amr_g.getDistGrid(2); + + // Add points in level 0 + + while (it.isNext()) + { + auto key = it.get_dist(); + + lvl_0.template insertFlush<0>(key) = 1.0; + + ++it; + } + + while (it2.isNext()) + { + auto key = it2.get_dist(); + + lvl_1.template insertFlush<0>(key) = 2.0; + + ++it2; + } + + while (it3.isNext()) + { + auto key = it3.get_dist(); - lvl_zero.insertFlush<0>(k) = 1.0; - lvl_one.insertFlush<0>(k2) = 5.0; + lvl_2.template insertFlush<0>(key) = 3.0; - lvl_one.template hostToDevice<0>(); - lvl_one.tagBoundaries(v_cl.getmgpuContext());*/ + ++it3; + } + + amr_g.hostToDevice<0>(); + amr_g.ghost_get<0>(); + amr_g.tagBoundaries<NNStar<2>>(); + amr_g.construct_level_connections(); + amr_g.deviceToHost<0>(); + amr_g.write("TESTOUT"); ///////////////////////////////////////////////////////////// + auto & lvl_zero_d = amr_g.getDistGrid(0); + auto & lvl_one_d = amr_g.getDistGrid(1); + auto & lvl_two_d = amr_g.getDistGrid(2); + + // For each local grid + + for (int i = 0 ; i < lvl_zero_d.getN_loc_grid() ; i++) + { + + // Check + auto & lvl_zero = lvl_zero_d.get_loc_grid(i); + auto & lvl_one = lvl_one_d.get_loc_grid(i); + auto & lvl_two = lvl_two_d.get_loc_grid(i); + + auto & offs_dw_link = lvl_zero.getDownLinksOffsets(); + auto & dw_links = lvl_zero.getDownLinks(); + + BOOST_REQUIRE_EQUAL(offs_dw_link.size(),(i+1)*56); + BOOST_REQUIRE_EQUAL(dw_links.size(),(i+1)*56*4); + auto & indexL0 = lvl_zero.private_get_blockMap().getIndexBuffer(); + auto & indexL1 = lvl_one.private_get_blockMap().getIndexBuffer(); + auto & indexL2 = lvl_two.private_get_blockMap().getIndexBuffer(); + + auto & dataL0 = lvl_zero.private_get_blockMap().getDataBuffer(); + auto & dataL1 = lvl_one.private_get_blockMap().getDataBuffer(); + auto & dataL2 = lvl_two.private_get_blockMap().getDataBuffer(); + + dw_links.template deviceToHost<0,1>(); + + for (int i = 0 ; i < dw_links.size(); i++) + { + BOOST_REQUIRE_EQUAL(dataL1.template get<0>(dw_links.template get<0>(0))[dw_links.template get<1>(0)],2); + } + + auto & offs_dw_link_1 = lvl_one.getDownLinksOffsets(); + auto & dw_links_1 = lvl_one.getDownLinks(); + + BOOST_REQUIRE_EQUAL(offs_dw_link_1.size(),116); + BOOST_REQUIRE_EQUAL(dw_links_1.size(),116*4); + + dw_links_1.template deviceToHost<0,1>(); + + for (int i = 0 ; i < dw_links_1.size(); i++) + { + BOOST_REQUIRE_EQUAL(dataL2.template get<0>(dw_links_1.template get<0>(0))[dw_links_1.template get<1>(0)],3); + } + } + + ///////////////////////////////////////////////////////////// } BOOST_AUTO_TEST_SUITE_END() diff --git a/src/Grid/grid_dist_id_comm.hpp b/src/Grid/grid_dist_id_comm.hpp index ea419e4e54c6ea35e5dbde7d96a46df8ac7e0e05..7354a4d94656b95ee0600bc51fc533a0310bbce0 100644 --- a/src/Grid/grid_dist_id_comm.hpp +++ b/src/Grid/grid_dist_id_comm.hpp @@ -374,6 +374,10 @@ class grid_dist_id_comm std::vector<size_t> & prp_recv, ExtPreAlloc<Memory> & prRecv_prp) { +#ifdef __NVCC__ + cudaDeviceSynchronize(); +#endif + if (device_grid::isCompressed() == false) { //! Receive the information from each processors