diff --git a/CMakeLists.txt b/CMakeLists.txt index f3f6d117817746f1d68547d9c52bc8dd89dab794..c04fdabf87ba6b768ce16d760257ac2e2661b9ca 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -46,14 +46,14 @@ if(ENABLE_GPU) if (CUDA_VERSION_MAJOR EQUAL 9 AND CUDA_VERSION_MINOR EQUAL 2) message("CUDA is compatible") set(WARNING_SUPPRESSION_AND_OPTION_NVCC -Xcudafe "--display_error_number --diag_suppress=611 --diag_suppress=2885 --diag_suppress=2886 --diag_suppress=2887 --diag_suppress=2888 --diag_suppress=186 --diag_suppress=111" --expt-extended-lambda) + FILE(WRITE cuda_options " -Xcudafe \"--display_error_number --diag_suppress=611 --diag_suppress=2885 --diag_suppress=2886 --diag_suppress=2887 --diag_suppress=2888 --diag_suppress=186 --diag_suppress=111\" --expt-extended-lambda ") elseif ( CUDA_VERSION_MAJOR EQUAL 10 AND CUDA_VERSION_MINOR EQUAL 1 ) message("CUDA is compatible") - set(WARNING_SUPPRESSION_AND_OPTION_NVCC -Xcudafe "--display_error_number --diag_suppress=2915 --diag_suppress=2912 --diag_suppress=2913 --diag_suppress=111 --diag_suppress=186 --diag_suppress=611 " --expt-extended-lambda) - + set(WARNING_SUPPRESSION_AND_OPTION_NVCC -Xcudafe "--display_error_number --diag_suppress=2915 --diag_suppress=2912 --diag_suppress=2913 --diag_suppress=111 --diag_suppress=186 --diag_suppress=611 " --expt-extended-lambda ) + FILE(WRITE cuda_options "-Xcudafe \"--display_error_number --diag_suppress=2915 --diag_suppress=2914 --diag_suppress=2912 --diag_suppress=2913 --diag_suppress=111 --diag_suppress=186 --diag_suppress=611 \" --expt-extended-lambda") else() - message(FATAL_ERROR "CUDA is incompatible, version 9.2 is only supported") + message(FATAL_ERROR "CUDA is incompatible, version 9.2 and 10.1 is only supported") endif() - endif() diff --git a/example/Vector/7_SPH_dlb_gpu_opt/main.cu b/example/Vector/7_SPH_dlb_gpu_opt/main.cu index cba88837ebd873397ef7c85fccd99421cc3b4aac..c734351b77a1a317d7f3ea19034ab763ae52593c 100644 --- a/example/Vector/7_SPH_dlb_gpu_opt/main.cu +++ b/example/Vector/7_SPH_dlb_gpu_opt/main.cu @@ -42,7 +42,7 @@ #define PRINT_STACKTRACE #define STOP_ON_ERROR #define OPENMPI -#define SE_CLASS1 +//#define SE_CLASS1 #include "Vector/vector_dist.hpp" #include @@ -97,7 +97,7 @@ const real_number MassBound = 0.0000767656; #ifdef TEST_RUN const real_number t_end = 0.001; #else -const real_number t_end = 1.50; +const real_number t_end = 0.10; #endif // Gravity acceleration diff --git a/install b/install index 74c6ed283d9aacbdd65a059584837e9357413642..f69f7762de9648b7b37c074eb4f5b0de3d671b9f 100755 --- a/install +++ b/install @@ -426,7 +426,7 @@ else echo "LIBS=-lvcluster -lofpm_pdata -lofpmmemory -lparmetis -lmetis -lboost_iostreams -lboost_program_options -lhdf5 -llibhilbert $lin_alg_lib -ldl" >> example.mk echo "LIBS_SE2=-lvcluster -lofpmmemory_se2 -lparmetis -lmetis -lboost_iostreams -lboost_program_options -lhdf5 -llibhilbert $lin_alg_lib" >> example.mk fi -echo "INCLUDE_PATH_NVCC=-Xcompiler="-Wno-deprecated-declarations" $(cat openmp_flags) -Xcudafe \"--display_error_number --diag_suppress=2885 --diag_suppress=2886 --diag_suppress=2887 --diag_suppress=2888 --diag_suppress=186 --diag_suppress=111\" --expt-extended-lambda -I. -I$install_base/openfpm_numerics/include -I$install_base/openfpm_pdata/include/config -I$install_base/openfpm_pdata/include -I$install_base/openfpm_data/include -I$install_base/openfpm_vcluster/include -I$install_base/openfpm_io/include -I$install_base/openfpm_devices/include -I$i_dir/METIS/include -I$i_dir/PARMETIS/include -I$i_dir/BOOST/include -I$i_dir/HDF5/include -I$i_dir/LIBHILBERT/include $lin_alg_inc" >> example.mk +echo "INCLUDE_PATH_NVCC=-Xcompiler="-Wno-deprecated-declarations" $(cat openmp_flags) "$(cat cuda_options)" -I. -I$install_base/openfpm_numerics/include -I$install_base/openfpm_pdata/include/config -I$install_base/openfpm_pdata/include -I$install_base/openfpm_data/include -I$install_base/openfpm_vcluster/include -I$install_base/openfpm_io/include -I$install_base/openfpm_devices/include -I$i_dir/METIS/include -I$i_dir/PARMETIS/include -I$i_dir/BOOST/include -I$i_dir/HDF5/include -I$i_dir/LIBHILBERT/include $lin_alg_inc" >> example.mk cp example.mk src/example.mk cp example.mk example/example.mk diff --git a/openfpm_data b/openfpm_data index 5651520e8f4efeb48e0395726699b07a16ed4736..f853fc87dc8edc9e77727f0034453afa7f5dbb4a 160000 --- a/openfpm_data +++ b/openfpm_data @@ -1 +1 @@ -Subproject commit 5651520e8f4efeb48e0395726699b07a16ed4736 +Subproject commit f853fc87dc8edc9e77727f0034453afa7f5dbb4a diff --git a/src/Decomposition/CartDecomposition.hpp b/src/Decomposition/CartDecomposition.hpp index 3c7b5e9e146644c23e0739ac4ad50caae9257532..79214427fd8af33072586ae6405441b9228ba96b 100755 --- a/src/Decomposition/CartDecomposition.hpp +++ b/src/Decomposition/CartDecomposition.hpp @@ -311,7 +311,7 @@ public: // We reduce the size of the cells by a factor 8 in 3d 4 in 2d for (size_t i = 0 ; i < dim ; i++) - {div_g[i] = gr.size(i)/2;} + {div_g[i] = (gr.size(i) == 1)?1:gr.size(i)/2;} fine_s.Initialize(domain,div_g); } diff --git a/src/Vector/cuda/vector_dist_gpu_unit_tests.cu b/src/Vector/cuda/vector_dist_gpu_unit_tests.cu index 437f1624e82dfe10907252d457db4da90829c4ac..4b29e35bad539c319c911309a4b48c8a2098a32e 100644 --- a/src/Vector/cuda/vector_dist_gpu_unit_tests.cu +++ b/src/Vector/cuda/vector_dist_gpu_unit_tests.cu @@ -308,7 +308,8 @@ void check_cell_list_cpu_and_gpu(vector_type & vd, CellList_type & NN, CellList_ BOOST_REQUIRE_EQUAL(test,true); } -BOOST_AUTO_TEST_CASE( vector_dist_gpu_test) +template +void vector_dist_gpu_test_impl() { auto & v_cl = create_vcluster(); @@ -415,7 +416,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_gpu_test) vd.hostToDevicePos(); vd.template hostToDeviceProp<0>(); - auto NN = vd.getCellListGPU(0.1); + auto NN = vd.template getCellListGPU(0.1); auto NN_cpu = vd.getCellList(0.1); check_cell_list_cpu_and_gpu(vd,NN,NN_cpu); @@ -425,6 +426,16 @@ BOOST_AUTO_TEST_CASE( vector_dist_gpu_test) check_cell_list_cpu_and_gpu(vd,NN_up,NN_cpu); } +BOOST_AUTO_TEST_CASE( vector_dist_gpu_test) +{ + vector_dist_gpu_test_impl>>(); +} + +BOOST_AUTO_TEST_CASE( vector_dist_gpu_test_sparse) +{ + vector_dist_gpu_test_impl>(); +} + template void vdist_calc_gpu_test() { diff --git a/src/Vector/tests/vector_dist_unit_test.cpp b/src/Vector/tests/vector_dist_unit_test.cpp index f0114132c59bdcfff93539380e75ac0778fb25ca..a4780586f63e0adc0cedaae6e7a1908d5700e1be 100644 --- a/src/Vector/tests/vector_dist_unit_test.cpp +++ b/src/Vector/tests/vector_dist_unit_test.cpp @@ -1928,6 +1928,28 @@ BOOST_AUTO_TEST_CASE( vector_of_vector_dist ) BOOST_REQUIRE_EQUAL(cnt,4*4096ul); } +BOOST_AUTO_TEST_CASE( vector_high_dimension ) +{ + // Here we define our domain a 2D box with internals from 0 to 1.0 for x and y + Box<10,double> domain; + + for (size_t i = 0 ; i < 10 ; i++) + { + domain.setLow(i,0.0); + domain.setHigh(i,1.0); + } + + // Here we define the boundary conditions of our problem + size_t bc[10]; + for (size_t i = 0 ; i < 10 ; i++) + {bc[i] = NON_PERIODIC;}; + + // extended boundary around the domain, and the processor domain + Ghost<10,double> g(0.0); + + // we check if the constructor does not stuck + vector_dist<10,double, aggregate > vd(16,domain,bc,g); +} BOOST_AUTO_TEST_SUITE_END() diff --git a/src/Vector/vector_dist.hpp b/src/Vector/vector_dist.hpp index 4c839d93aca6c6569b36877ab544b839bb8ea3a9..4ce394c1a920083e8674d1c80590ec6f9e4c32bb 100644 --- a/src/Vector/vector_dist.hpp +++ b/src/Vector/vector_dist.hpp @@ -40,6 +40,8 @@ #define DEC_GRAN(gr) ((size_t)gr << 32) +template using CELLLIST_GPU_SPARSE = CellList_gpu,unsigned int,int,true>; + #define VECTOR_DIST_ERROR_OBJECT std::runtime_error("Runtime vector distributed error"); #ifdef SE_CLASS3 @@ -57,16 +59,16 @@ #define ID true // Perform a ghost get or a ghost put -#define GET 1 -#define PUT 2 +constexpr int GET = 1; +constexpr int PUT = 2; // Write the particles with ghost -#define NO_GHOST 0 -#define WITH_GHOST 2 +constexpr int NO_GHOST = 0; +constexpr int WITH_GHOST = 2; -#define GCL_NON_SYMMETRIC 0 -#define GCL_SYMMETRIC 1 -#define GCL_HILBERT 2 +constexpr int GCL_NON_SYMMETRIC = 0; +constexpr int GCL_SYMMETRIC = 1; +constexpr int GCL_HILBERT = 2; template struct gcl_standard_no_symmetric_impl @@ -1157,7 +1159,8 @@ public: * \return the Cell list * */ - CellList_gpu> getCellListGPU(St r_cut, bool no_se3 = false) + template>> + CellType getCellListGPU(St r_cut, bool no_se3 = false) { #ifdef SE_CLASS3 if (no_se3 == false) @@ -1171,7 +1174,7 @@ public: Ghost g = getDecomposition().getGhost(); g.magnify(1.013); - return getCellListGPU(r_cut, g,no_se3); + return getCellListGPU(r_cut, g,no_se3); } @@ -1190,7 +1193,8 @@ public: * \return the CellList * */ - CellList_gpu> getCellListGPU(St r_cut, const Ghost & enlarge, bool no_se3 = false) + template>> + CellType getCellListGPU(St r_cut, const Ghost & enlarge, bool no_se3 = false) { #ifdef SE_CLASS3 if (no_se3 == false) @@ -1206,7 +1210,7 @@ public: // Processor bounding box cl_param_calculate(pbox, div, r_cut, enlarge); - CellList_gpu> cell_list(pbox,div); + CellType cell_list(pbox,div); v_prp_out.resize(v_pos.size()); v_pos_out.resize(v_pos.size()); @@ -1949,7 +1953,8 @@ public: * \parameter Cell-list from which has been constructed the sorted vector * */ - template void merge_sort(CellList_gpu> & cl, size_t n_thr = 1024) + template + void merge_sort(CellList_gpu,id_1,id_2,is_sparse> & cl, size_t n_thr = 1024) { #if defined(__NVCC__)