From d490c151c9de809856d8e6d0300c39495ce3b7f7 Mon Sep 17 00:00:00 2001
From: Pietro Incardona <incardon@mpi-cbg.de>
Date: Thu, 23 Jan 2020 00:57:11 +0100
Subject: [PATCH] Fixing AMR Gpu working

---
 CMakeLists.txt                           |   6 +-
 openfpm_data                             |   2 +-
 script/remove_old                        |   4 +-
 src/Amr/tests/amr_base_gpu_unit_tests.cu | 123 +++++++++++++++++++++--
 src/Grid/grid_dist_id_comm.hpp           |   4 +
 5 files changed, 129 insertions(+), 10 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 551c410b5..99ad9c12a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -53,8 +53,12 @@ if(ENABLE_GPU)
                 message("CUDA is compatible")
                 set(WARNING_SUPPRESSION_AND_OPTION_NVCC  -Xcudafe "--display_error_number --diag_suppress=2915 --diag_suppress=2912 --diag_suppress=2913 --diag_suppress=111 --diag_suppress=186 --diag_suppress=611 " --expt-extended-lambda )
 		FILE(WRITE cuda_options "-Xcudafe \"--display_error_number --diag_suppress=2915 --diag_suppress=2914  --diag_suppress=2912 --diag_suppress=2913 --diag_suppress=111 --diag_suppress=186 --diag_suppress=611 \" --expt-extended-lambda")
+        elseif ( CUDA_VERSION_MAJOR EQUAL 10 AND CUDA_VERSION_MINOR EQUAL 2 )
+                message("CUDA is compatible")
+                set(WARNING_SUPPRESSION_AND_OPTION_NVCC  -Xcudafe "--display_error_number --diag_suppress=2976 --diag_suppress=2977 --diag_suppress=2978 --diag_suppress=2979 --diag_suppress=1835 --diag_suppress=611 --diag_suppress=186 --diag_suppress=128" --expt-extended-lambda)
+                set(WARNING_SUPPRESSION_AND_OPTION_NVCC_TEXT  "-Xcudafe \"--display_error_number --diag_suppress=2976 --diag_suppress=2977 --diag_suppress=2978 --diag_suppress=2979 --diag_suppress=1835 --diag_suppress=611 --diag_suppress=186 --diag_suppress=128\" --expt-extended-lambda")
 	else()
-		message(FATAL_ERROR "CUDA is incompatible, version 9.2 and 10.1 is only supported")
+		message(FATAL_ERROR "CUDA is incompatible, version 9.2 10.1 and 10.2 is only supported")
 	endif()
 endif()
 
diff --git a/openfpm_data b/openfpm_data
index 6ec119f43..58f3b70f2 160000
--- a/openfpm_data
+++ b/openfpm_data
@@ -1 +1 @@
-Subproject commit 6ec119f437933742b2814122b1eab93136def358
+Subproject commit 58f3b70f264af944182ff2182e168d68f196b7e8
diff --git a/script/remove_old b/script/remove_old
index eb1fc09d4..dd11a7639 100755
--- a/script/remove_old
+++ b/script/remove_old
@@ -160,7 +160,7 @@ function remove_old()
         version=$(cat $1/HDF5/version)
         if [ x"$version" != x"1"  ]; then
             echo -e "\033[1;34;5m  -------------------------------------------------------------------------------------- \033[0m"
-            echo -e "\033[1;34;5m  HDF5 has been updated to version 1.8.19, the component will be updated automatically   \033[0m"
+            echo -e "\033[1;34;5m  HDF5 has been updated to version 1.10.6, the component will be updated automatically   \033[0m"
             echo -e "\033[1;34;5m  -------------------------------------------------------------------------------------- \033[0m"
             sleep 5
 	    rm -rf $1/HDF5
@@ -171,7 +171,7 @@ function remove_old()
         version=$(cat $1/MPI/version)
         if [ x"$version" != x"4"  ]; then
             echo -e "\033[1;34;5m  -------------------------------------------------------------------------------------- \033[0m"
-            echo -e "\033[1;34;5m  MPI has been updated to version 3.1.3, the component will be updated automatically      \033[0m"
+            echo -e "\033[1;34;5m  MPI has been updated to version 4.0.2, the component will be updated automatically      \033[0m"
             echo -e "\033[1;34;5m  -------------------------------------------------------------------------------------- \033[0m"
             sleep 5
             rm -rf $1/MPI/include
diff --git a/src/Amr/tests/amr_base_gpu_unit_tests.cu b/src/Amr/tests/amr_base_gpu_unit_tests.cu
index ff2311481..63f0dea44 100644
--- a/src/Amr/tests/amr_base_gpu_unit_tests.cu
+++ b/src/Amr/tests/amr_base_gpu_unit_tests.cu
@@ -311,18 +311,129 @@ BOOST_AUTO_TEST_CASE( grid_dist_id_amr_gpu_link_test )
 		BOOST_REQUIRE_EQUAL(dataL2.template get<0>(dw_links_1.template get<0>(8))[dw_links_1.template get<1>(8)],3);
 	}
 
-/*	grid_key_dx<2> k({8,8});
-	grid_key_dx<2> k2({16,16});
+	/////////////////////////////////////////////////////////////
+}
+
+BOOST_AUTO_TEST_CASE( grid_dist_id_amr_gpu_link_test_more_dense )
+{
+	auto & v_cl = create_vcluster();
+
+	// Domain
+	Box<2,float> domain({0.0,0.0},{1.0,1.0});
+
+	Ghost<2,long int> g(1);
+	sgrid_dist_amr_gpu<2,float,aggregate<float>> amr_g(domain,g);
+
+	size_t g_sz[2] = {17,17};
+
+	size_t n_lvl = 3;
+
+	amr_g.initLevels(n_lvl,g_sz);
+
+	grid_key_dx<2> start({1,1});
+	grid_key_dx<2> stop({15,15});
+	grid_key_dx<2> start_lvl_dw({2,2});
+	grid_key_dx<2> stop_lvl_dw({31,31});
+	grid_key_dx<2> start_lvl_dw2({4,4});
+	grid_key_dx<2> stop_lvl_dw2({63,63});
+
+	auto it = amr_g.getGridIterator(0,start,stop);
+	auto it2 = amr_g.getGridIterator(1,start_lvl_dw,stop_lvl_dw);
+	auto it3 = amr_g.getGridIterator(2,start_lvl_dw2,stop_lvl_dw2);
+//	it.setGPUInsertBuffer(4);
+
+	auto & lvl_0 = amr_g.getDistGrid(0);
+	auto & lvl_1 = amr_g.getDistGrid(1);
+	auto & lvl_2 = amr_g.getDistGrid(2);
+
+	// Add points in level 0
+
+	while (it.isNext())
+	{
+		auto key = it.get_dist();
+
+		lvl_0.template insertFlush<0>(key) = 1.0;
+
+		++it;
+	}
+
+	while (it2.isNext())
+	{
+		auto key = it2.get_dist();
+
+		lvl_1.template insertFlush<0>(key) = 2.0;
+
+		++it2;
+	}
+
+	while (it3.isNext())
+	{
+		auto key = it3.get_dist();
 
-	lvl_zero.insertFlush<0>(k) = 1.0;
-	lvl_one.insertFlush<0>(k2) = 5.0;
+		lvl_2.template insertFlush<0>(key) = 3.0;
 
-	lvl_one.template hostToDevice<0>();
-	lvl_one.tagBoundaries(v_cl.getmgpuContext());*/
+		++it3;
+	}
+
+	amr_g.hostToDevice<0>();
+	amr_g.ghost_get<0>();
+	amr_g.tagBoundaries<NNStar<2>>();
+	amr_g.construct_level_connections();
+	amr_g.deviceToHost<0>();
+	amr_g.write("TESTOUT");
 
 	/////////////////////////////////////////////////////////////
 
+	auto & lvl_zero_d = amr_g.getDistGrid(0);
+	auto & lvl_one_d = amr_g.getDistGrid(1);
+	auto & lvl_two_d = amr_g.getDistGrid(2);
+
+	// For each local grid
+
+	for (int i = 0 ; i < lvl_zero_d.getN_loc_grid() ; i++)
+	{
+
+		// Check
+		auto & lvl_zero = lvl_zero_d.get_loc_grid(i);
+		auto & lvl_one = lvl_one_d.get_loc_grid(i);
+		auto & lvl_two = lvl_two_d.get_loc_grid(i);
+
+		auto & offs_dw_link = lvl_zero.getDownLinksOffsets();
+		auto & dw_links = lvl_zero.getDownLinks();
+
+		BOOST_REQUIRE_EQUAL(offs_dw_link.size(),(i+1)*56);
+		BOOST_REQUIRE_EQUAL(dw_links.size(),(i+1)*56*4);
 
+		auto & indexL0 = lvl_zero.private_get_blockMap().getIndexBuffer();
+		auto & indexL1 = lvl_one.private_get_blockMap().getIndexBuffer();
+		auto & indexL2 = lvl_two.private_get_blockMap().getIndexBuffer();
+
+		auto & dataL0 = lvl_zero.private_get_blockMap().getDataBuffer();
+		auto & dataL1 = lvl_one.private_get_blockMap().getDataBuffer();
+		auto & dataL2 = lvl_two.private_get_blockMap().getDataBuffer();
+
+		dw_links.template deviceToHost<0,1>();
+
+		for (int i = 0 ; i < dw_links.size(); i++)
+		{
+			BOOST_REQUIRE_EQUAL(dataL1.template get<0>(dw_links.template get<0>(0))[dw_links.template get<1>(0)],2);
+		}
+
+		auto & offs_dw_link_1 = lvl_one.getDownLinksOffsets();
+		auto & dw_links_1 = lvl_one.getDownLinks();
+
+		BOOST_REQUIRE_EQUAL(offs_dw_link_1.size(),116);
+		BOOST_REQUIRE_EQUAL(dw_links_1.size(),116*4);
+
+		dw_links_1.template deviceToHost<0,1>();
+
+		for (int i = 0 ; i < dw_links_1.size(); i++)
+		{
+			BOOST_REQUIRE_EQUAL(dataL2.template get<0>(dw_links_1.template get<0>(0))[dw_links_1.template get<1>(0)],3);
+		}
+	}
+
+	/////////////////////////////////////////////////////////////
 }
 
 BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/Grid/grid_dist_id_comm.hpp b/src/Grid/grid_dist_id_comm.hpp
index ea419e4e5..7354a4d94 100644
--- a/src/Grid/grid_dist_id_comm.hpp
+++ b/src/Grid/grid_dist_id_comm.hpp
@@ -374,6 +374,10 @@ class grid_dist_id_comm
 			    		 std::vector<size_t> & prp_recv,
 						 ExtPreAlloc<Memory> & prRecv_prp)
 	{
+#ifdef __NVCC__
+		cudaDeviceSynchronize();
+#endif
+
 		if (device_grid::isCompressed() == false)
 		{
 			//! Receive the information from each processors
-- 
GitLab