diff --git a/example/Grid/3_gray_scott_3d_vectorization/Makefile b/example/Grid/3_gray_scott_3d_vectorization/Makefile index 9e12917167a0342e130e67484a87fbe9a0d8549f..73b29934dc6d916dd375c3aaa4d724ece064ee8c 100644 --- a/example/Grid/3_gray_scott_3d_vectorization/Makefile +++ b/example/Grid/3_gray_scott_3d_vectorization/Makefile @@ -10,10 +10,10 @@ OBJ = main.o update_new.o mpif90 -ffree-line-length-none -fno-range-check -fno-second-underscore -fimplicit-none -mavx -O3 -c -g -o $@ $< %.o: %.cpp - $(CC) -O3 -mavx -g -c --std=c++11 -Wno-ignored-attributes -o $@ $< $(INCLUDE_PATH) -I/where/is/vc/installation/include + $(CC) -O3 -mavx -g -c --std=c++11 -Wno-ignored-attributes -o $@ $< $(INCLUDE_PATH) -I/home/i-bird/VCDEVEL/include gray_scott: $(OBJ) - $(CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS) -L/where/is/vc/installation/lib # -lVc (Add -lVc if you use VCDevel) + $(CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS) -L/home/i-bird/VCDEVEL/lib -lVc #(Add -lVc if you use VCDevel) all: gray_scott diff --git a/example/Grid/3_gray_scott_3d_vectorization/main.cpp b/example/Grid/3_gray_scott_3d_vectorization/main.cpp index be485b82cb5161dd6ab0e4ce63b18ad843a76b14..76ecebd402527bef5fa392ebda0e8052a6d95f26 100644 --- a/example/Grid/3_gray_scott_3d_vectorization/main.cpp +++ b/example/Grid/3_gray_scott_3d_vectorization/main.cpp @@ -2,7 +2,7 @@ #include "data_type/aggregate.hpp" #include "timer.hpp" -#define FORTRAN_UPDATE +//#define FORTRAN_UPDATE #ifndef FORTRAN_UPDATE #include "Vc/Vc" @@ -119,12 +119,15 @@ void step(grid_dist_id<3, double, aggregate<double>> & OldU, grid_key_dx<3> (& star_stencil_3D)[7], double uFactor_s, double vFactor_s, double deltaT, double F, double K) { + timer tt; + tt.start(); + #ifndef FORTRAN_UPDATE //! \cond [cpp_update] \endcond - Vc::double uFactor = uFactor_s; - Vc::double vFactor = vFactor_s; + Vc::double_v uFactor = uFactor_s; + Vc::double_v vFactor = vFactor_s; WHILE_M(OldU,star_stencil_3D) auto & U_old = GET_GRID_M(OldU); @@ -221,6 +224,9 @@ void step(grid_dist_id<3, double, aggregate<double>> & OldU, //! \cond [fort_update] \endcond #endif + + tt.stop(); + std::cout << tt.getwct() << std::endl; } //! \cond [vectorization] \endcond diff --git a/example/Vector/3_molecular_dynamic_gpu/main.cu b/example/Vector/3_molecular_dynamic_gpu/main.cu index 7cdb05315c7196368cfc229a037f0654a9df665b..f0b28e427ca111fa376bfd0a69248d2a1389de40 100644 --- a/example/Vector/3_molecular_dynamic_gpu/main.cu +++ b/example/Vector/3_molecular_dynamic_gpu/main.cu @@ -1,3 +1,5 @@ +#define SCAN_WITH_CUB + /*! * \page Vector_3_md_dyn_gpu Vector 3 molecular dynamic on GPU * diff --git a/script/pre_req b/script/pre_req index 42347be5f94723c7857fdad7d7af526defc2679b..4a471cad694d8ea62b625c11e974ab3ca0b85848 100755 --- a/script/pre_req +++ b/script/pre_req @@ -307,34 +307,6 @@ if [ x"$MPI_valid" == x"yes" ]; then exit 1 fi fi - - #### Detect a potential dangerous situation ### - #### In which g++ is different from mpic++ ### - - output_mpi=$(mpic++ --version) - output_gcc=$($dgc_compiler --version) - - if [ x"$output_mpi" != x"$output_gcc" ]; then - echo -e "\033[91;5;1m MPI dangerous installation \033[0m" - echo -e "Performing a \033[1m \"mpic++ --version\" \033[0m, we detect that your installed mpic++ does not wrap the standard command g++" - echo "In general we strongly disencourage to override the default compiler(s), this configuration not only is not supported by OpenFPM, but can lead potentialy to several problems at system level. For the following reasons" - echo -e "\033[1m 1) All system wide dependencies become potentially useless \033[0m" - echo -e "\033[1m 2) A package maneger (apt-get or brew) installing a packege from source can potentialy generate incompatible system packages \033[0m" - echo -e "\033[1m 3) It is not easy to discover which compiler has been used to compile one dependency \033[0m" - echo -e "For just the time of the installation the installer will realign mpic++ to g++, unfortunately this could be not enough to ensure that all the dependencies has been compiled with the same compiler" - echo -e "\033[1m Installation will resume in 20 seconds \033[0m" - export OMPI_CXX=g++ - sleep 20 - fi - - output_mpi=$(mpic++ --version) - output_gcc=$($dgc_compiler --version) - - if [ x"$output_mpi" != x"$output_gcc" ]; then - echo -e "\033[91;5;1m MPI dangerous installation \033[0m" - echo -e "\033[1m The realign operation failed the installation will continue \033[0m" - sleep 10 - fi fi } diff --git a/script/remove_old b/script/remove_old index eb1fc09d4a9d9b12372dff72a9a77176eb3df678..1c522ee58408a4fd4d2519cbcb7a51ab3c7b0e55 100755 --- a/script/remove_old +++ b/script/remove_old @@ -77,6 +77,7 @@ function remove_old() if [ -d $1/BOOST ]; then is_update=$(cat $1/BOOST/include/boost/version.hpp | grep "#define BOOST_VERSION 106800") + is_update="$is_update $(cat $1/BOOST/include/boost/version.hpp | grep "#define BOOST_VERSION 107200")" if [ x"$is_update" == x"" ]; then echo -e "\033[1;34;5m --------------------------------------------------------------------------- \033[0m" echo -e "\033[1;34;5m Boost has been updated to 1.68, the component will be updated automatically \033[0m" @@ -143,7 +144,7 @@ function remove_old() if [ -d $1/PETSC ]; then version=$(cat $1/PETSC/version) - if [ x"$version" != x"2" ]; then + if [ x"$version" -ge x"2" ]; then echo -e "\033[1;34;5m -------------------------------------------------------------------------------------- \033[0m" echo -e "\033[1;34;5m PETSC has been updated to version 3.10.2, the component will be updated automatically \033[0m" echo -e "\033[1;34;5m -------------------------------------------------------------------------------------- \033[0m" @@ -158,7 +159,7 @@ function remove_old() if [ -d $1/HDF5 ]; then version=$(cat $1/HDF5/version) - if [ x"$version" != x"1" ]; then + if [ x"$version" -ge x"1" ]; then echo -e "\033[1;34;5m -------------------------------------------------------------------------------------- \033[0m" echo -e "\033[1;34;5m HDF5 has been updated to version 1.8.19, the component will be updated automatically \033[0m" echo -e "\033[1;34;5m -------------------------------------------------------------------------------------- \033[0m" @@ -169,7 +170,7 @@ function remove_old() if [ -d $1/MPI ]; then version=$(cat $1/MPI/version) - if [ x"$version" != x"4" ]; then + if [ x"$version" -ge x"4" ]; then echo -e "\033[1;34;5m -------------------------------------------------------------------------------------- \033[0m" echo -e "\033[1;34;5m MPI has been updated to version 3.1.3, the component will be updated automatically \033[0m" echo -e "\033[1;34;5m -------------------------------------------------------------------------------------- \033[0m" diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 2b96289c84e09b071174997af7a22ec971e44d42..d8054ffc4a0e1bbdc8bce121250bbe4abc5b80eb 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -58,6 +58,9 @@ target_link_libraries(pdata ${HDF5_LIBRARIES}) target_link_libraries(pdata -L${LIBHILBERT_LIBRARY_DIRS} ${LIBHILBERT_LIBRARIES}) target_link_libraries(pdata ${PETSC_LIBRARIES}) +add_definitions(-DSCAN_WITH_CUB) +#add_definitions(-DMAKE_CELLLIST_DETERMINISTIC) + if (TEST_COVERAGE) target_link_libraries(pdata -lgcov --coverage) endif() diff --git a/src/Grid/grid_dist_id.hpp b/src/Grid/grid_dist_id.hpp index 59ac4db5008f13202e3ed23e74be90a0671df2df..f53edca4fde06d372e38e0b7c6a3e88f7799fb3f 100644 --- a/src/Grid/grid_dist_id.hpp +++ b/src/Grid/grid_dist_id.hpp @@ -1783,9 +1783,9 @@ public: this->map_(dec,cd_sm,loc_grid,loc_grid_old,gdb_ext,gdb_ext_old,gdb_ext_global); loc_grid_old.clear(); + gdb_ext_old.clear(); } - inline void save(const std::string & filename) const { HDF5_writer<GRID_DIST> h5s; @@ -1799,8 +1799,16 @@ public: h5l.load<device_grid>(filename,loc_grid_old,gdb_ext_old); - // Map the distributed grid - map(); + if (v_cl.size() != 1) + { + // Map the distributed grid + map(); + } + else + { + loc_grid.swap(loc_grid_old); + gdb_ext_old.swap(gdb_ext); + } } /*! \brief Get the internal local ghost box diff --git a/src/Grid/grid_dist_id_comm.hpp b/src/Grid/grid_dist_id_comm.hpp index b14748c72799346521b6027d8139f6f680e608a4..ae1f8f7946ca9dad20d4d93628e1d795904aa771 100644 --- a/src/Grid/grid_dist_id_comm.hpp +++ b/src/Grid/grid_dist_id_comm.hpp @@ -721,7 +721,6 @@ public: { auto key = it.get(); grid_key_dx<dim> key2 = key - start; - std::string str = key.to_string(); gr_send.get_o(key2) = gr.get_o(key); diff --git a/src/Grid/tests/grid_dist_id_HDF5_chckpnt_restart_test.cpp b/src/Grid/tests/grid_dist_id_HDF5_chckpnt_restart_test.cpp index 6275c92f02dc43b4426a7db46204bbe518799d06..ae7bbf7c9e1d3f17e3ff7d9bda28c3812a6311f9 100644 --- a/src/Grid/tests/grid_dist_id_HDF5_chckpnt_restart_test.cpp +++ b/src/Grid/tests/grid_dist_id_HDF5_chckpnt_restart_test.cpp @@ -145,5 +145,137 @@ BOOST_AUTO_TEST_CASE( grid_dist_id_hdf5_load_test ) } +BOOST_AUTO_TEST_CASE( grid_dist_id_hdf5_2GB_save_test ) +{ + float ghost_part = 0.0; + + // Domain + Box<3,float> domain({0.0,0.0,0.0},{1.0,1.0,1.0}); + + Vcluster<> & v_cl = create_vcluster(); + + // Skip this test on big scale + if (v_cl.getProcessingUnits() != 1) + return; + + // grid size + size_t sz[3]; + sz[0] = 970; + sz[1] = 650; + sz[2] = 512; + + // Ghost + Ghost<3,float> g(ghost_part); + + // Distributed grid with id decomposition + grid_dist_id<3, float, aggregate<double>> g_dist(sz,domain,g); + + // get the decomposition + auto & dec = g_dist.getDecomposition(); + + // check the consistency of the decomposition + bool val = dec.check_consistency(); + BOOST_REQUIRE_EQUAL(val,true); + + size_t count = 0; + + auto it = g_dist.getDomainIterator(); + + while (it.isNext()) + { + //key + auto key = it.get(); + + auto keyg = g_dist.getGKey(key); + + g_dist.template get<0>(key) = keyg.get(0); + + ++it; + count++; + } + + openfpm::vector<size_t> count_total; + v_cl.allGather(count,count_total); + v_cl.execute(); + + size_t sum = 0; + + for (size_t i = 0; i < count_total.size(); i++) + {sum += count_total.get(i);} + + timer t; + t.start(); + // Save the grid + g_dist.save("grid_dist_2GB_id.h5" + std::to_string(v_cl.getProcessingUnits())); + t.stop(); +} + +BOOST_AUTO_TEST_CASE( grid_dist_id_hdf5_2GB_load_test ) +{ + float ghost_part = 0.0; + + // Domain + Box<3,float> domain({0.0,0.0,0.0},{1.0,1.0,1,0}); + + Vcluster<> & v_cl = create_vcluster(); + + // Skip this test on big scale + if (v_cl.getProcessingUnits() != 1) + return; + + // grid size + size_t sz[3]; + sz[0] = 970; + sz[1] = 650; + sz[2] = 512; + + // Ghost + Ghost<3,float> g(ghost_part); + + // Distributed grid with id decomposition + grid_dist_id<3, float, aggregate<double>> g_dist(sz,domain,g); + + g_dist.load("grid_dist_2GB_id.h5" + std::to_string(v_cl.getProcessingUnits())); + + auto it = g_dist.getDomainIterator(); + + size_t count = 0; + + bool match = true; + while (it.isNext()) + { + //key + auto key = it.get(); + + //BOOST_CHECK_CLOSE(g_dist.template get<0>(key),1,0.0001); + //std::cout << "Element: " << g_dist.template get<0>(key) << std::endl; + + auto keyg = g_dist.getGKey(key); + + match &= g_dist.template get<0>(key) == keyg.get(0); + + if (match == false) + { + int debug = 0; + debug++; + } + + ++it; + count++; + } + + openfpm::vector<size_t> count_total; + v_cl.allGather(count,count_total); + v_cl.execute(); + + size_t sum = 0; + + for (size_t i = 0; i < count_total.size(); i++) + sum += count_total.get(i); + + BOOST_REQUIRE_EQUAL(sum, (size_t)970*650*512); + BOOST_REQUIRE_EQUAL(match,true); +} + BOOST_AUTO_TEST_SUITE_END() diff --git a/src/Vector/cuda/vector_dist_comm_util_funcs.cuh b/src/Vector/cuda/vector_dist_comm_util_funcs.cuh index 4c1e87f4f02da17e2c293557b4f671b96632c6e1..b0b1a8ababe174c5ad20da397d937be6d286d43e 100644 --- a/src/Vector/cuda/vector_dist_comm_util_funcs.cuh +++ b/src/Vector/cuda/vector_dist_comm_util_funcs.cuh @@ -10,6 +10,15 @@ #include "util/cuda/scan_ofp.cuh" +#define NO_POSITION 1 +#define WITH_POSITION 2 +#define NO_CHANGE_ELEMENTS 4 + +#define BIND_DEC_TO_GHOST 1 + +#define RUN_ON_DEVICE 1024 +#define MAP_LOCAL 2 + #define SKIP_LABELLING 512 #define KEEP_PROPERTIES 512 @@ -199,7 +208,8 @@ struct local_ghost_from_opart_impl<with_pos,dim,St,prop,Memory,layout_base,true> size_t old = v_pos.size(); - v_pos.resize(v_pos.size() + o_part_loc.size(),DATA_ON_DEVICE); + if (!(opt & NO_POSITION)) + {v_pos.resize(v_pos.size() + o_part_loc.size(),DATA_ON_DEVICE);} if (!(opt & SKIP_LABELLING)) { diff --git a/src/Vector/vector_dist_comm.hpp b/src/Vector/vector_dist_comm.hpp index 050fc8af2d2123d1072c2b19955efd81e1ddfc36..3b3ede8902555ed7ce7f6425576082d144f5ca9a 100644 --- a/src/Vector/vector_dist_comm.hpp +++ b/src/Vector/vector_dist_comm.hpp @@ -20,15 +20,6 @@ #include "cuda/vector_dist_comm_util_funcs.cuh" #include "util/cuda/scan_ofp.cuh" -#define NO_POSITION 1 -#define WITH_POSITION 2 -#define NO_CHANGE_ELEMENTS 4 - -#define BIND_DEC_TO_GHOST 1 - -#define RUN_ON_DEVICE 1024 -#define MAP_LOCAL 2 - /*! \brief compute the communication options from the ghost_get/put options * *