diff --git a/CMakeLists.txt b/CMakeLists.txt index 2a3c20f34557077e042f61d3cf6a76a1f760fc02..e15cb00b421267ad3f17310e0b1c313d6fd4290a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,11 +19,15 @@ set(ENV{HDF5_ROOT} ${HDF5_ROOT}) set(OPENFPM_PDATA_DIR CACHE PATH "OpenFPM_pdata dirs") set(PETSC_ROOT CACHE PATH "If compiling with linear algebra indicate the PETSC root directory") set(ENV{PETSC_DIR} ${PETSC_ROOT}) +set(HIP_ENABLE CACHE BOOL "Enable HIP compiler") +set(AMD_ARCH_COMPILE "gfx900" CACHE STRING "AMD gpu architecture used to compile kernels") if (ENABLE_GPU) - set(CUDA_ON_CPU OFF) - enable_language(CUDA) - find_package(CUDA) + set(CUDA_ON_CPU OFF) + if (NOT HIP_ENABLE) + enable_language(CUDA) + find_package(CUDA) + endif() endif() set (CMAKE_CXX_STANDARD 14) @@ -41,6 +45,16 @@ find_package(MPI REQUIRED) find_package(HDF5 REQUIRED) find_package(TinyObjLoader) find_package(PETSc) +find_package(HIP) + +if(HIP_FOUND) + set(DEFINE_HIP_GPU "#define HIP_GPU") + set(DEFINE_CUDIFY_USE_HIP "#define CUDIFY_USE_HIP") +endif() + +if(HIP_FOUND) + set(DEFINE_CUDA_GPU "#define CUDA_GPU") +endif() if(CUDA_FOUND) set(OPENFPM_INIT_FILE "initialize/initialize_wrapper_cuda.cu") diff --git a/build.sh b/build.sh index 55b11b81cdec43c08949b9b4fb5ea370a3c7c813..d03d3ef014785d4c63e5adbea9606615cfb03913 100755 --- a/build.sh +++ b/build.sh @@ -1,6 +1,5 @@ #! /bin/bash - workspace=$1 hostname=$(hostname) branch=$3 diff --git a/configure b/configure index b34c8afb1697fe5f827dd7ce3f712409fe65d6ea..31f1735d38c3980de785b285dde79423e54d748f 100755 --- a/configure +++ b/configure @@ -122,7 +122,10 @@ with_petsc with_eigen with_vcdevel enable_gpu +enable_hip enable_asan +enable_garbageinj +enable_garbageinjv ' rm -rf build @@ -251,11 +254,25 @@ do else conf_options="$conf_options -DCMAKE_CUDA_HOST_COMPILER=$(which $CXX)" fi + if [ x"$CXXCUDA" == x"" ]; then + conf_options="$conf_options" + else + conf_options="$conf_options -DCMAKE_CUDA_COMPILER=$(which $CXXCUDA)" + fi conf_options="$conf_options -DENABLE_GPU=ON" ;; + hip) + conf_options="$conf_options -DHIP_ENABLE=ON" + ;; asan) conf_options="$conf_options -DENABLE_ASAN=ON" ;; + garbageinj) + conf_options="$conf_options -DENABLE_GARBAGE_INJECTOR=ON" + ;; + garbageinjv) + conf_options="$conf_options -DENABLE_VCLUSTER_GARBAGE_INJECTOR=ON" + ;; *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--enable-$ac_useropt_orig" ac_unrecognized_sep=', ' ;; @@ -573,6 +590,7 @@ cd build ## remove enerything echo "Calling cmake ../. $conf_options" +printf "cmake ../. $conf_options" > cmake_build_options rm ../error_code DYLD_LIBRARY_PATH=$ld_lib_pathopt cmake ../. $conf_options if [ $? != 0 ]; then @@ -601,6 +619,7 @@ clean: install: \$(MAKE) -C build \$@ + script/install_parallel_debugger pdata: \$(MAKE) -C build \$@ diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f8b87de0d1856c8a21e9ee1f1d8881481c13d72d..eb50c2f1c660f526c88f34e0efbdef3b4ba5d9c3 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -6,12 +6,41 @@ if (CUDA_ON_CPU) add_definitions(-DCUDA_ON_CPU) endif() +if (CUDA_FOUND OR CUDA_ON_CPU OR HIP_FOUND) -add_executable(io main.cpp + set(CUDA_SOURCES HDF5_wr/HDF5_writer_cuda.cu) + +endif() + +if ( HIP_ENABLE AND HIP_FOUND ) + + list(APPEND HIP_HIPCC_FLAGS ${CMAKE_CXX_FLAGS_DEBUG}) + + if (CMAKE_BUILD_TYPE STREQUAL "Debug") + list(APPEND HIP_HIPCC_FLAGS -O0) + endif() + + set(CMAKE_CXX_COMPILER ${HIP_HIPCC_EXECUTABLE}) + + list(APPEND HIP_HIPCC_FLAGS -D__NVCC__ -D__HIP__ -DCUDART_VERSION=11000 -D__CUDACC__ -D__CUDACC_VER_MAJOR__=11 -D__CUDACC_VER_MINOR__=0 -D__CUDACC_VER_BUILD__=0) + + set_source_files_properties(${CUDA_SOURCES} PROPERTIES LANGUAGE CXX) + + hip_add_executable(io main.cpp + MetaParser/MetaParser_unit_test.cpp + ${CUDA_SOURCES} + ObjReader/ObjReader_unit_test.cpp) + + +else() + + add_executable(io main.cpp MetaParser/MetaParser_unit_test.cpp - HDF5_wr/HDF5_writer_cuda.cu + ${CUDA_SOURCES} ObjReader/ObjReader_unit_test.cpp) +endif() + add_dependencies(io ofpmmemory) add_dependencies(io vcluster) @@ -71,7 +100,7 @@ endif() # will also build with -std=c++11 target_compile_features(io PUBLIC cxx_std_11) target_link_libraries(io ${MPI_C_LIBRARIES}) - +target_link_libraries(io ${MPI_CXX_LIBRARIES}) if (TEST_COVERAGE) target_link_libraries(io -lgcov) endif() diff --git a/src/RawReader/InitGridWithPixel.hpp b/src/RawReader/InitGridWithPixel.hpp index bd53a1ce70550f7a1c50de06000aa8c88dc4e2f8..1bd71814ab9e460047aa00cfe4b77f04e2ffbcec 100644 --- a/src/RawReader/InitGridWithPixel.hpp +++ b/src/RawReader/InitGridWithPixel.hpp @@ -9,7 +9,7 @@ * * @details Can be run in parallel. * - * @author Justina Stark + * @author Justina Stark & Pietro Incardona * @date November 2019 - August 2020 */ #ifndef IMAGE_BASED_RECONSTRUCTION_GETINITIALGRID_HPP @@ -18,6 +18,7 @@ #include <iostream> #include <typeinfo> #include <cmath> +#include <sys/stat.h> #include "Vector/vector_dist.hpp" #include "Grid/grid_dist_id.hpp" @@ -26,6 +27,14 @@ #include "level_set/redistancing_Sussman/HelpFunctionsForGrid.hpp" typedef signed char BYTE; + +inline bool exists_test (const std::string& name) { + struct stat buffer; + return (stat (name.c_str(), &buffer) == 0); +} + + + /**@brief Read the number of pixels per dimension from a csv-file in order to create a grid with the same size. * * @param path_to_file Std::string containing the path to the csv file that holds the image/volume size in @@ -35,13 +44,15 @@ typedef signed char BYTE; std::vector<size_t> get_size(const std::string & path_to_file) { std::vector<size_t> stack_dimst_1d; - // stream input csv file and error check - std::ifstream file(path_to_file); - if (!file) - { - std::cout << "Error opening file" << std::endl; - exit (EXIT_FAILURE); + // check if file exists and stream input csv file + if(!exists_test(path_to_file)){ + std::cout << "------------------------------------------------------------------------" << std::endl; + std::cout << "Error: file " << path_to_file << " does not exist. Aborting..." << std::endl; + std::cout << "------------------------------------------------------------------------" << std::endl; + abort(); } + std::ifstream file(path_to_file); + // get its size std::streampos fileSize; @@ -83,7 +94,14 @@ void load_pixel_onto_grid(grid_type & grid, std::string file_name, std::vector < constexpr size_t x = 0; constexpr size_t y = 1; constexpr size_t z = 2; - + + // check if file exists and stream input file + if(!exists_test(file_name)){ + std::cout << "------------------------------------------------------------------------" << std::endl; + std::cout << "Error: file " << file_name << " does not exist. Aborting..." << std::endl; + std::cout << "------------------------------------------------------------------------" << std::endl; + abort(); + } std::ifstream file_stream (file_name, std::ifstream::binary); auto & v_cl = create_vcluster(); @@ -121,17 +139,24 @@ void load_pixel_onto_grid(grid_type & grid, std::string file_name, std::vector < auto key = dom.get(); auto gkey = grid.getGKey(key); + // In case a patch starts within a group of nodes to which same pixel-value should be assigned, get the + // respective rest-offset + size_t rest_offset = (size_t) (fmod(gkey.get(0), refinement[x])); // get the remainder + + // get l as the length of one x-line of the original image stack for the specific patch on the processor - auto & gbox = grid.getLocalGridsInfo(); - auto & DomBox = gbox.get(key.getSub()).Dbox; - int l = (size_t) std::round((DomBox.getHigh(0) - DomBox.getLow(0) + 1) / refinement[x]); + auto & gbox = grid.getLocalGridsInfo(); + auto & DomBox = gbox.get(key.getSub()).Dbox; + size_t patch_size = DomBox.getHigh(0) - DomBox.getLow(0) + 1; + + size_t l = (size_t) ceil( (patch_size + rest_offset) / refinement[x]); // in case that the grid has a different resolution than the underlying image stack: // create a key which is used to get the offset for the file reading // the indices in this key are corrected by the refinement factor for (size_t d = 0; d < grid_type::dims; d++) { - gkey.set_d(d, std::round(gkey.get(d) / refinement[d])); + gkey.set_d(d, floor(gkey.get(d) / refinement[d])); } // the offset matches the pixel from the image stack to the corresponding current position of the iterator @@ -147,10 +172,13 @@ void load_pixel_onto_grid(grid_type & grid, std::string file_name, std::vector < // run over a whole grid-line in x and assign pixel values from pixel_line to grid nodes // if the grid is finer in x as the image stack, the same pixel value from pixel_line is // assigned refinement[x] times - for (size_t i = 0; i < l * refinement[x]; i++) + for (size_t k = 0; k < patch_size; ++k) { auto key = dom.get(); - grid.template get<Phi_0>(key) = (double) pixel_line[(size_t)floor(i / refinement[x])]; + // get the correct index of the pixel to be read from pixel_line by considering a potential rest-offset, + // when the patch divides group of nodes that belong to the same pixel + size_t i = (size_t) floor((k + rest_offset) / refinement[x]); + grid.template get<Phi_0>(key) = (double) pixel_line[i]; ++dom; } // now one grid line in x is finished and the iterator dom has advanced accordingly s.t. next loop continues diff --git a/src/config/config_cmake.h.in b/src/config/config_cmake.h.in index 85840022fcec2c75463a60a05aa6c09b76ea448e..89dc2f504342560b87f489780adc1563f3f4fed3 100644 --- a/src/config/config_cmake.h.in +++ b/src/config/config_cmake.h.in @@ -1,3 +1,9 @@ +/* HIP GPU support */ +${DEFINE_HIP_GPU} + +/* HIP Cudify GPU support */ +${DEFINE_CUDIFY_USE_HIP} + /* Coverty scan */ ${DEFINE_COVERTY_SCAN} diff --git a/src/main.cpp b/src/main.cpp index 176bf030ab86272271f3feea368596579bb02788..d73982ffad94993a966141815b8e09a216f0651c 100755 --- a/src/main.cpp +++ b/src/main.cpp @@ -8,7 +8,6 @@ #define BOOST_TEST_DYN_LINK #include <boost/test/unit_test.hpp> - // initialization function: bool init_unit_test() {