From 5edbb9578d6de07f5435fe6c4efaf283dfd719dc Mon Sep 17 00:00:00 2001 From: Pietro Incardona <incardon@mpi-cbg.de> Date: Thu, 30 Aug 2018 18:10:14 +0200 Subject: [PATCH] Adding GPU_branch_start --- CMakeFiles/FindEigen3.cmake | 108 ++ CMakeFiles/FindLibHilbert.cmake | 99 ++ CMakeFiles/FindMetis.cmake | 187 +++ CMakeFiles/FindPETSc.cmake | 212 +++ CMakeFiles/FindParMetis.cmake | 164 +++ .../Vortex_in_cell/main_vic_petsc_double.cpp | 1287 +++++++++++++++++ example/Vector/7_SPH_dlb_opt/main_dbg.cpp | 1254 ++++++++++++++++ openfpm_data | 2 +- script/install_EIGEN.sh | 15 +- script/install_OPENBLAS.sh | 4 +- script/remove_old | 9 +- src/Decomposition/CartDecomposition.hpp | 50 +- .../cuda/CartDecomposition_gpu.cuh | 27 +- .../cuda/decomposition_cuda_tests.cu | 66 - src/Decomposition/cuda/ie_ghost_gpu.cuh | 126 ++ src/Decomposition/ie_ghost.hpp | 131 +- src/Makefile.am | 2 +- src/Vector/cuda/vector_dist_cuda_func_test.cu | 189 +++ src/Vector/cuda/vector_dist_cuda_funcs.cuh | 41 + src/Vector/cuda/vector_dist_gpu_unit_tests.cu | 7 +- src/Vector/vector_dist.hpp | 4 +- src/Vector/vector_dist_comm.hpp | 123 +- src/Vector/vector_dist_kernel.hpp | 114 ++ src/config/config_cmake.h.in | 159 ++ src/main_single.cpp | 32 + 25 files changed, 4213 insertions(+), 199 deletions(-) create mode 100644 CMakeFiles/FindEigen3.cmake create mode 100644 CMakeFiles/FindLibHilbert.cmake create mode 100644 CMakeFiles/FindMetis.cmake create mode 100644 CMakeFiles/FindPETSc.cmake create mode 100644 CMakeFiles/FindParMetis.cmake create mode 100644 example/Numerics/Vortex_in_cell/main_vic_petsc_double.cpp create mode 100644 example/Vector/7_SPH_dlb_opt/main_dbg.cpp create mode 100644 src/Decomposition/cuda/ie_ghost_gpu.cuh create mode 100644 src/Vector/vector_dist_kernel.hpp create mode 100644 src/config/config_cmake.h.in create mode 100644 src/main_single.cpp diff --git a/CMakeFiles/FindEigen3.cmake b/CMakeFiles/FindEigen3.cmake new file mode 100644 index 00000000..a2857c10 --- /dev/null +++ b/CMakeFiles/FindEigen3.cmake @@ -0,0 +1,108 @@ +# - Try to find Eigen3 lib +# +# This module supports requiring a minimum version, e.g. you can do +# find_package(Eigen3 3.1.2) +# to require version 3.1.2 or newer of Eigen3. +# +# Once done this will define +# +# EIGEN3_FOUND - system has eigen lib with correct version +# EIGEN3_INCLUDE_DIR - the eigen include directory +# EIGEN3_VERSION - eigen version +# +# and the following imported target: +# +# Eigen3::Eigen - The header-only Eigen library +# +# This module reads hints about search locations from +# the following environment variables: +# +# EIGEN3_ROOT +# EIGEN3_ROOT_DIR + +# Copyright (c) 2006, 2007 Montel Laurent, <montel@kde.org> +# Copyright (c) 2008, 2009 Gael Guennebaud, <g.gael@free.fr> +# Copyright (c) 2009 Benoit Jacob <jacob.benoit.1@gmail.com> +# Redistribution and use is allowed according to the terms of the 2-clause BSD license. + +if(NOT Eigen3_FIND_VERSION) + if(NOT Eigen3_FIND_VERSION_MAJOR) + set(Eigen3_FIND_VERSION_MAJOR 2) + endif(NOT Eigen3_FIND_VERSION_MAJOR) + if(NOT Eigen3_FIND_VERSION_MINOR) + set(Eigen3_FIND_VERSION_MINOR 91) + endif(NOT Eigen3_FIND_VERSION_MINOR) + if(NOT Eigen3_FIND_VERSION_PATCH) + set(Eigen3_FIND_VERSION_PATCH 0) + endif(NOT Eigen3_FIND_VERSION_PATCH) + + set(Eigen3_FIND_VERSION "${Eigen3_FIND_VERSION_MAJOR}.${Eigen3_FIND_VERSION_MINOR}.${Eigen3_FIND_VERSION_PATCH}") +endif(NOT Eigen3_FIND_VERSION) + +macro(_eigen3_check_version) + file(READ "${EIGEN3_INCLUDE_DIR}/Eigen/src/Core/util/Macros.h" _eigen3_version_header) + + string(REGEX MATCH "define[ \t]+EIGEN_WORLD_VERSION[ \t]+([0-9]+)" _eigen3_world_version_match "${_eigen3_version_header}") + set(EIGEN3_WORLD_VERSION "${CMAKE_MATCH_1}") + string(REGEX MATCH "define[ \t]+EIGEN_MAJOR_VERSION[ \t]+([0-9]+)" _eigen3_major_version_match "${_eigen3_version_header}") + set(EIGEN3_MAJOR_VERSION "${CMAKE_MATCH_1}") + string(REGEX MATCH "define[ \t]+EIGEN_MINOR_VERSION[ \t]+([0-9]+)" _eigen3_minor_version_match "${_eigen3_version_header}") + set(EIGEN3_MINOR_VERSION "${CMAKE_MATCH_1}") + + set(EIGEN3_VERSION ${EIGEN3_WORLD_VERSION}.${EIGEN3_MAJOR_VERSION}.${EIGEN3_MINOR_VERSION}) + if(${EIGEN3_VERSION} VERSION_LESS ${Eigen3_FIND_VERSION}) + set(EIGEN3_VERSION_OK FALSE) + else(${EIGEN3_VERSION} VERSION_LESS ${Eigen3_FIND_VERSION}) + set(EIGEN3_VERSION_OK TRUE) + endif(${EIGEN3_VERSION} VERSION_LESS ${Eigen3_FIND_VERSION}) + + if(NOT EIGEN3_VERSION_OK) + + message(STATUS "Eigen3 version ${EIGEN3_VERSION} found in ${EIGEN3_INCLUDE_DIR}, " + "but at least version ${Eigen3_FIND_VERSION} is required") + endif(NOT EIGEN3_VERSION_OK) +endmacro(_eigen3_check_version) + +if (EIGEN3_INCLUDE_DIR) + + # in cache already + _eigen3_check_version() + set(EIGEN3_FOUND ${EIGEN3_VERSION_OK}) + set(Eigen3_FOUND ${EIGEN3_VERSION_OK}) + +else (EIGEN3_INCLUDE_DIR) + + # search first if an Eigen3Config.cmake is available in the system, + # if successful this would set EIGEN3_INCLUDE_DIR and the rest of + # the script will work as usual + find_package(Eigen3 ${Eigen3_FIND_VERSION} NO_MODULE QUIET) + + if(NOT EIGEN3_INCLUDE_DIR) + find_path(EIGEN3_INCLUDE_DIR NAMES signature_of_eigen3_matrix_library + HINTS + ENV EIGEN3_ROOT + ENV EIGEN3_ROOT_DIR + PATHS + ${CMAKE_INSTALL_PREFIX}/include + ${KDE4_INCLUDE_DIR} + PATH_SUFFIXES eigen3 eigen + ) + endif(NOT EIGEN3_INCLUDE_DIR) + + if(EIGEN3_INCLUDE_DIR) + _eigen3_check_version() + endif(EIGEN3_INCLUDE_DIR) + + include(FindPackageHandleStandardArgs) + find_package_handle_standard_args(Eigen3 DEFAULT_MSG EIGEN3_INCLUDE_DIR EIGEN3_VERSION_OK) + + mark_as_advanced(EIGEN3_INCLUDE_DIR) + +endif(EIGEN3_INCLUDE_DIR) + +if(EIGEN3_FOUND AND NOT TARGET Eigen3::Eigen) + add_library(Eigen3::Eigen INTERFACE IMPORTED) + set_target_properties(Eigen3::Eigen PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${EIGEN3_INCLUDE_DIR}") +endif() + diff --git a/CMakeFiles/FindLibHilbert.cmake b/CMakeFiles/FindLibHilbert.cmake new file mode 100644 index 00000000..9b6f2b35 --- /dev/null +++ b/CMakeFiles/FindLibHilbert.cmake @@ -0,0 +1,99 @@ +# - Try to find LibHilbert +# Once done this will define +# +# LIBHILBERT_FOUND - system has LibHilbert +# LIBHILBERT_INCLUDE_DIRS - include directories for PETSc +# LIBHILBERT_LIBRARY_DIRS - library directories for PETSc +# LIBHILBERT_LIBRARIES - libraries for PETSc +# +#============================================================================= +# Copyright (C) 2010-2016 Pietro Incardona +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +#============================================================================= + + +if (LIBHILBERT_FOUND) + return() +endif() + +add_library(libhilbert INTERFACE IMPORTED) + +# Add libraries (static) +set(_libs "-L${LIBHILBERT_ROOT}/lib -llibhilbert") +set_property(TARGET libhilbert PROPERTY INTERFACE_LINK_LIBRARIES "${_libs}") + + +# Create LibHilbert test program +set(LIBHILBERT_TEST_LIB_CPP + "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/libhilbert_test_lib.cpp") + + +file(WRITE ${LIBHILBERT_TEST_LIB_CPP} " +extern \"C\" +{ +#include \"hilbertKey.h\" +} + int main() +{ + //An integer to handle errors + int err; + + //Array to handle output + uint64_t nextCoord[2]; + + //Get the coordinates of the next cell + getIntCoordFromHKey(nextCoord, 4, 2, 0, &err); + + return 0; +} +") + +# Try to run test program (static linking) +try_run( + LIBHILBERT_TEST_LIB_EXITCODE + LIBHILBERT_TEST_LIB_COMPILED + ${CMAKE_CURRENT_BINARY_DIR} + ${LIBHILBERT_TEST_LIB_CPP} + CMAKE_FLAGS + "-DINCLUDE_DIRECTORIES:STRING=${LIBHILBERT_ROOT}/include" + "-DLINK_LIBRARIES:STRING=${LIBHILBERT_ROOT}/lib" + LINK_LIBRARIES libhilbert + COMPILE_OUTPUT_VARIABLE LIBHILBERT_TEST_LIB_COMPILE_OUTPUT + RUN_OUTPUT_VARIABLE LIBHILBERT_TEST_LIB_OUTPUT) + +if (LIBHILBERT_TEST_LIB_COMPILED AND LIBHILBERT_TEST_LIB_EXITCODE EQUAL 0) + message(STATUS "Test LibHilbert_TEST_RUNS static linking - Success") + set(LIBHILBERT_TEST_RUNS TRUE) + set(LIBHILBERT_FOUND TRUE) + set(LIBHILBERT_INCLUDE_DIRS ${LIBHILBERT_ROOT}/include) + set(LIBHILBERT_LIBRARY_DIRS ${LIBHILBERT_ROOT}/lib) + set(LIBHILBERT_LIBRARIES -llibhilbert) +else() + message(STATUS "Test LibHilbert_TEST_RUNS static linking - Failed") + set(LIBHILBERT_TEST_RUNS FALSE) +endif() + diff --git a/CMakeFiles/FindMetis.cmake b/CMakeFiles/FindMetis.cmake new file mode 100644 index 00000000..cab0b085 --- /dev/null +++ b/CMakeFiles/FindMetis.cmake @@ -0,0 +1,187 @@ +# -*- mode: cmake -*- +# +# METIS Find Module for Femus +# Shamelessly stolen from Amanzi open source code https://software.lanl.gov/ascem/trac +# +# Usage: +# Control the search through METIS_DIR or setting environment variable +# METIS_ROOT to the METIS installation prefix. +# +# This module does not search default paths! +# +# Following variables are set: +# METIS_FOUND (BOOL) Flag indicating if METIS was found +# METIS_INCLUDE_DIR (PATH) Path to the METIS include file +# METIS_INCLUDE_DIRS (LIST) List of all required include files +# METIS_LIBRARY_DIR (PATH) Path to the METIS library +# METIS_LIBRARY (FILE) METIS library +# METIS_LIBRARIES (LIST) List of all required METIS libraries +# +# ############################################################################# + +# Standard CMake modules see CMAKE_ROOT/Modules +include(FindPackageHandleStandardArgs) + +# Amanzi CMake functions see <root>/tools/cmake for source +#include(PrintVariable) + +if ( METIS_LIBRARIES AND METIS_INCLUDE_DIRS ) + + # Do nothing. Variables are set. No need to search again + +else(METIS_LIBRARIES AND METIS_INCLUDE_DIRS) + + # Cache variables + if(METIS_DIR) + set(METIS_DIR "${METIS_DIR}" CACHE PATH "Path to search for METIS include and library files") + endif() + + if(METIS_INCLUDE_DIR) + set(METIS_INCLUDE_DIR "${METIS_INCLUDE_DIR}" CACHE PATH "Path to search for METIS include files") + endif() + + if(METIS_LIBRARY_DIR) + set(METIS_LIBRARY_DIR "${METIS_LIBRARY_DIR}" CACHE PATH "Path to search for METIS library files") + endif() + + + # Search for include files + # Search order preference: + # (1) METIS_INCLUDE_DIR - check existence of path AND if the include files exist + # (2) METIS_DIR/<include> + # (3) Default CMake paths See cmake --html-help=out.html file for more information. + # + set(metis_inc_names "metis.h") + + if (METIS_INCLUDE_DIR) + + if (EXISTS "${METIS_INCLUDE_DIR}") + + find_path(metis_test_include_path + NAMES ${metis_inc_names} + HINTS ${METIS_INCLUDE_DIR} + NO_DEFAULT_PATH) + if(NOT metis_test_include_path) + message(SEND_ERROR "Can not locate ${metis_inc_names} in ${METIS_INCLUDE_DIR}") + endif() + set(METIS_INCLUDE_DIR "${metis_test_include_path}") + + else() + message(SEND_ERROR "METIS_INCLUDE_DIR=${METIS_INCLUDE_DIR} does not exist") + set(METIS_INCLUDE_DIR "METIS_INCLUDE_DIR-NOTFOUND") + endif() + + else() + +# Metis sometimes puts the include files in a subdir called Lib + + set(metis_inc_suffixes "include" "Lib") + if(METIS_DIR) + + if (EXISTS "${METIS_DIR}" ) + + find_path(METIS_INCLUDE_DIR + NAMES ${metis_inc_names} + HINTS ${METIS_DIR} + PATH_SUFFIXES ${metis_inc_suffixes} + NO_DEFAULT_PATH) + + else() + message(SEND_ERROR "METIS_DIR=${METIS_DIR} does not exist") + set(METIS_INCLUDE_DIR "METIS_INCLUDE_DIR-NOTFOUND") + endif() + + + else() + + find_path(METIS_INCLUDE_DIR + NAMES ${metis_inc_names} + PATH_SUFFIXES ${metis_inc_suffixes}) + + endif() + + endif() + + + # Search for libraries + # Search order preference: + # (1) METIS_LIBRARY_DIR - check existence of path AND if the library file exists + # (2) METIS_DIR/<lib,Lib> + # (3) Default CMake paths See cmake --html-help=out.html file for more information. + # + set(metis_lib_names "metis") + + + if (METIS_LIBRARY_DIR) + + if (EXISTS "${METIS_LIBRARY_DIR}") + + find_library(METIS_LIBRARY + NAMES ${metis_lib_names} + HINTS ${METIS_LIBRARY_DIR} + NO_DEFAULT_PATH) + else() + set(METIS_LIBRARY "METIS_LIBRARY-NOTFOUND") + endif() + + else() + + list(APPEND metis_lib_suffixes "lib" "Lib") + if(METIS_DIR) + + if (EXISTS "${METIS_DIR}" ) + + find_library(METIS_LIBRARY + NAMES ${metis_lib_names} + HINTS ${METIS_DIR} + PATH_SUFFIXES ${metis_lib_suffixes} + NO_DEFAULT_PATH) + + else() + set(METISLIBRARY "METIS_LIBRARY-NOTFOUND") + endif() + + + else() + + find_library(METIS_LIBRARY + NAMES ${metis_lib_names} + PATH_SUFFIXES ${metis_lib_suffixes}) + + endif() + + endif() + + if ( NOT METIS_LIBRARY ) + endif() + + + # Define prerequisite packages + set(METIS_INCLUDE_DIRS ${METIS_INCLUDE_DIR}) + set(METIS_LIBRARIES ${METIS_LIBRARY}) + + +endif(METIS_LIBRARIES AND METIS_INCLUDE_DIRS ) + +# Send useful message if everything is found +find_package_handle_standard_args(METIS DEFAULT_MSG + METIS_LIBRARIES + METIS_INCLUDE_DIRS) + +# find_package_handle_standard_args should set METIS_FOUND but it does not! +if ( METIS_LIBRARIES AND METIS_INCLUDE_DIRS) + set(METIS_FOUND TRUE) +else() + set(METIS_FOUND FALSE) +endif() + +# Define the version + +mark_as_advanced( + METIS_INCLUDE_DIR + METIS_INCLUDE_DIRS + METIS_LIBRARY + METIS_LIBRARIES + METIS_LIBRARY_DIR +) + diff --git a/CMakeFiles/FindPETSc.cmake b/CMakeFiles/FindPETSc.cmake new file mode 100644 index 00000000..ca71d4a6 --- /dev/null +++ b/CMakeFiles/FindPETSc.cmake @@ -0,0 +1,212 @@ +# - Try to find PETSc +# Once done this will define +# +# PETSC_FOUND - system has PETSc +# PETSC_INCLUDE_DIRS - include directories for PETSc +# PETSC_LIBRARY_DIRS - library directories for PETSc +# PETSC_LIBRARIES - libraries for PETSc +# PETSC_STATIC_LIBRARIES - libraries for PETSc (static linking, undefined if not required) +# PETSC_VERSION - version for PETSc +# PETSC_VERSION_MAJOR - First number in PETSC_VERSION +# PETSC_VERSION_MINOR - Second number in PETSC_VERSION +# PETSC_VERSION_SUBMINOR - Third number in PETSC_VERSION +# PETSC_INT_SIZE - sizeof(PetscInt) +# +#============================================================================= +# Copyright (C) 2010-2016 Garth N. Wells, Anders Logg and Johannes Ring +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +#============================================================================= + +# Outline: +# 1. Get flags from PETSc-generated pkg-config file +# 2. Test compile and run program using shared library linking +# 3. If shared library linking fails, test with static library linking + +# Load pkg-config module (provided by CMake) +find_package(PkgConfig REQUIRED) + +# Find PETSc pkg-config file. Note: craypetsc_real is on Cray systems +set(ENV{PKG_CONFIG_PATH} "$ENV{CRAY_PETSC_PREFIX_DIR}/lib/pkgconfig:$ENV{PETSC_DIR}/$ENV{PETSC_ARCH}/lib/pkgconfig:$ENV{PETSC_DIR}/lib/pkgconfig:$ENV{PKG_CONFIG_PATH}") +pkg_search_module(PETSC craypetsc_real PETSc) + +# Extract major, minor, etc from version string +if (PETSC_VERSION) + string(REPLACE "." ";" VERSION_LIST ${PETSC_VERSION}) + list(GET VERSION_LIST 0 PETSC_VERSION_MAJOR) + list(GET VERSION_LIST 1 PETSC_VERSION_MINOR) + list(GET VERSION_LIST 2 PETSC_VERSION_SUBMINOR) +endif() + +# Configure PETSc IMPORT (this involves creating an 'imported' target +# and attaching 'properties') +if (PETSC_FOUND AND NOT TARGET PETSC::petsc) + add_library(PETSC::petsc INTERFACE IMPORTED) + + # Add include paths + set_property(TARGET PETSC::petsc PROPERTY + INTERFACE_INCLUDE_DIRECTORIES ${PETSC_INCLUDE_DIRS}) + + # Add libraries + unset(_libs) + foreach (lib ${PETSC_LIBRARIES}) + find_library(LIB_${lib} NAMES ${lib} PATHS ${PETSC_LIBRARY_DIRS} NO_DEFAULT_PATH) + list(APPEND _libs ${LIB_${lib}}) + endforeach() + set_property(TARGET PETSC::petsc PROPERTY INTERFACE_LINK_LIBRARIES "${_libs}") +endif() + +# Configure PETSc 'static' IMPORT (this involves creating an +# 'imported' target and attaching 'properties') +if (PETSC_FOUND AND NOT TARGET PETSC::petsc_static) + add_library(PETSC::petsc_static INTERFACE IMPORTED) + + # Add libraries (static) + unset(_libs) + foreach (lib ${PETSC_STATIC_LIBRARIES}) + find_library(LIB_${lib} ${lib} HINTS ${PETSC_STATIC_LIBRARY_DIRS}) + list(APPEND _libs ${LIB_${lib}}) + endforeach() + set_property(TARGET PETSC::petsc_static PROPERTY INTERFACE_LINK_LIBRARIES "${_libs}") +endif() + +# Attempt to build and run PETSc test program +if (DOLFIN_SKIP_BUILD_TESTS) + + # Assume PETSc works + set(PETSC_TEST_RUNS TRUE) + +elseif (PETSC_FOUND) + + # Create PETSc test program + set(PETSC_TEST_LIB_CPP + "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/petsc_test_lib.cpp") + file(WRITE ${PETSC_TEST_LIB_CPP} " +#include \"petscts.h\" +#include \"petsc.h\" +int main() +{ + PetscErrorCode ierr; + TS ts; + int argc = 0; + char** argv = NULL; + ierr = PetscInitialize(&argc, &argv, PETSC_NULL, PETSC_NULL);CHKERRQ(ierr); + ierr = TSCreate(PETSC_COMM_WORLD,&ts);CHKERRQ(ierr); + ierr = TSSetFromOptions(ts);CHKERRQ(ierr); + ierr = TSDestroy(&ts);CHKERRQ(ierr); + ierr = PetscFinalize();CHKERRQ(ierr); + return 0; +} +") + + # Add MPI variables if MPI has been found + if (MPI_C_FOUND) + set(CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES} ${MPI_C_INCLUDE_PATH}) + set(CMAKE_REQUIRED_LIBRARIES ${CMAKE_REQUIRED_LIBRARIES} ${MPI_C_LIBRARIES}) + set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${MPI_C_COMPILE_FLAGS}") + endif() + + # Try to run test program (shared linking) + try_run( + PETSC_TEST_LIB_EXITCODE + PETSC_TEST_LIB_COMPILED + ${CMAKE_CURRENT_BINARY_DIR} + ${PETSC_TEST_LIB_CPP} + CMAKE_FLAGS + "-DINCLUDE_DIRECTORIES:STRING=${CMAKE_REQUIRED_INCLUDES}" + "-DLINK_LIBRARIES:STRING=${CMAKE_REQUIRED_LIBRARIES}" + LINK_LIBRARIES PETSC::petsc + COMPILE_OUTPUT_VARIABLE PETSC_TEST_LIB_COMPILE_OUTPUT + RUN_OUTPUT_VARIABLE PETSC_TEST_LIB_OUTPUT) + + # Check program output + if (PETSC_TEST_LIB_COMPILED AND PETSC_TEST_LIB_EXITCODE EQUAL 0) + + message(STATUS "Test PETSC_TEST_RUNS with shared library linking - Success") + set(PETSC_TEST_RUNS TRUE) + + # Static libraries not required, so unset + set_property(TARGET PETSC::petsc_static PROPERTY INTERFACE_LINK_LIBRARIES) + + else() + + message(STATUS "Test PETSC_TEST_RUNS with shared library linking - Failed") + + # Add MPI variables if MPI has been found + if (MPI_C_FOUND) + set(CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES} ${MPI_C_INCLUDE_PATH}) + set(CMAKE_REQUIRED_LIBRARIES ${CMAKE_REQUIRED_LIBRARIES} ${MPI_C_LIBRARIES}) + set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${MPI_C_COMPILE_FLAGS}") + endif() + + # Try to run test program (static linking) + try_run( + PETSC_TEST_LIB_EXITCODE + PETSC_TEST_LIB_COMPILED + ${CMAKE_CURRENT_BINARY_DIR} + ${PETSC_TEST_LIB_CPP} + CMAKE_FLAGS + "-DINCLUDE_DIRECTORIES:STRING=${CMAKE_REQUIRED_INCLUDES}" + "-DLINK_LIBRARIES:STRING=${CMAKE_REQUIRED_LIBRARIES}" + LINK_LIBRARIES PETSC::petsc PETSC::petsc_static + COMPILE_OUTPUT_VARIABLE PETSC_TEST_LIB_COMPILE_OUTPUT + RUN_OUTPUT_VARIABLE PETSC_TEST_LIB_OUTPUT) + + if (PETSC_TEST_LIB_COMPILED AND PETSC_TEST_LIB_EXITCODE EQUAL 0) + message(STATUS "Test PETSC_TEST_RUNS static linking - Success") + set(PETSC_TEST_RUNS TRUE) + else() + message(STATUS "Test PETSC_TEST_RUNS static linking - Failed") + set(PETSC_TEST_RUNS FALSE) + endif() + + endif() +endif() + +# Check sizeof(PetscInt) +if (PETSC_INCLUDE_DIRS) + include(CheckTypeSize) + set(CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES} ${PETSC_INCLUDE_DIRS}) + set(CMAKE_EXTRA_INCLUDE_FILES petscsys.h) + check_type_size("PetscInt" PETSC_INT_SIZE) + + unset(CMAKE_EXTRA_INCLUDE_FILES) + unset(CMAKE_REQUIRED_INCLUDES) +endif() + +# Standard package handling +include(FindPackageHandleStandardArgs) +if (PETSC_FOUND) + find_package_handle_standard_args(PETSc + REQUIRED_VARS PETSC_FOUND PETSC_TEST_RUNS VERSION_VAR PETSC_VERSION + FAIL_MESSAGE "PETSc could not be configured.") +else() + find_package_handle_standard_args(PETSc + REQUIRED_VARS PETSC_FOUND + FAIL_MESSAGE "PETSc could not be found. Be sure to set PETSC_DIR.") +endif() + + diff --git a/CMakeFiles/FindParMetis.cmake b/CMakeFiles/FindParMetis.cmake new file mode 100644 index 00000000..8c7ca8b5 --- /dev/null +++ b/CMakeFiles/FindParMetis.cmake @@ -0,0 +1,164 @@ +# - Try to find ParMETIS +# Once done this will define +# +# PARMETIS_FOUND - system has ParMETIS +# PARMETIS_INCLUDE_DIRS - include directories for ParMETIS +# PARMETIS_LIBRARIES - libraries for ParMETIS +# +# Variables used by this module. They can change the default behaviour and +# need to be set before calling find_package: +# +# PARMETIS_DIR - Prefix directory of the ParMETIS installation +# PARMETIS_INCLUDE_DIR - Include directory of the ParMETIS installation +# (set only if different from ${PARMETIS_DIR}/include) +# PARMETIS_LIB_DIR - Library directory of the ParMETIS installation +# (set only if different from ${PARMETIS_DIR}/lib) +# PARMETIS_TEST_RUNS - Skip tests building and running a test +# executable linked against ParMETIS libraries +# PARMETIS_LIB_SUFFIX - Also search for non-standard library names with the +# given suffix appended + +#============================================================================= +# Copyright (C) 2010-2012 Garth N. Wells, Anders Logg, Johannes Ring +# and Florian Rathgeber. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +#============================================================================= + +if(NOT PARMETIS_INCLUDE_DIR) + find_path(PARMETIS_INCLUDE_DIR parmetis.h + HINTS ${PARMETIS_INCLUDE_DIR} ENV PARMETIS_INCLUDE_DIR ${PARMETIS_DIR} ENV PARMETIS_DIR + PATH_SUFFIXES include + DOC "Directory where the ParMETIS header files are located" + ) +endif() + +if(NOT METIS_INCLUDE_DIR) + find_path(METIS_INCLUDE_DIR metis.h + HINTS ${METIS_INCLUDE_DIR} ENV METIS_INCLUDE_DIR ${METIS_DIR} ENV METIS_DIR + PATH_SUFFIXES include + DOC "Directory where the METIS header files are located" + ) +endif() + +if(PARMETIS_LIBRARIES) + set(PARMETIS_LIBRARY ${PARMETIS_LIBRARIES}) +endif() +if(NOT PARMETIS_LIBRARY) + find_library(PARMETIS_LIBRARY + NAMES parmetis parmetis${PARMETIS_LIB_SUFFIX} + HINTS ${PARMETIS_LIB_DIR} ENV PARMETIS_LIB_DIR ${PARMETIS_DIR} ENV PARMETIS_DIR + PATH_SUFFIXES lib + DOC "Directory where the ParMETIS library is located" + ) +endif() + +if(METIS_LIBRARIES) + set(METIS_LIBRARY ${METIS_LIBRARIES}) +endif() +if(NOT METIS_LIBRARY) + find_library(METIS_LIBRARY + NAMES metis metis${PARMETIS_LIB_SUFFIX} + HINTS ${PARMETIS_LIB_DIR} ENV PARMETIS_LIB_DIR ${PARMETIS_DIR} ENV PARMETIS_DIR + PATH_SUFFIXES lib + DOC "Directory where the METIS library is located" + ) +endif() + +# Get ParMETIS version +if(NOT PARMETIS_VERSION_STRING AND PARMETIS_INCLUDE_DIR AND EXISTS "${PARMETIS_INCLUDE_DIR}/parmetis.h") + set(version_pattern "^#define[\t ]+PARMETIS_(MAJOR|MINOR)_VERSION[\t ]+([0-9\\.]+)$") + file(STRINGS "${PARMETIS_INCLUDE_DIR}/parmetis.h" parmetis_version REGEX ${version_pattern}) + + foreach(match ${parmetis_version}) + if(PARMETIS_VERSION_STRING) + set(PARMETIS_VERSION_STRING "${PARMETIS_VERSION_STRING}.") + endif() + string(REGEX REPLACE ${version_pattern} "${PARMETIS_VERSION_STRING}\\2" PARMETIS_VERSION_STRING ${match}) + set(PARMETIS_VERSION_${CMAKE_MATCH_1} ${CMAKE_MATCH_2}) + endforeach() + unset(parmetis_version) + unset(version_pattern) +endif() + +# Try compiling and running test program +if(PARMETIS_INCLUDE_DIR AND METIS_INCLUDE_DIR AND + PARMETIS_LIBRARY AND METIS_LIBRARY) + + # Test requires MPI + find_package(MPI QUIET REQUIRED) + + # Set flags for building test program + set(CMAKE_REQUIRED_FLAGS "${MPI_C_COMPILE_FLAGS}") + # Ideally this would be used, but it unfortunately is not supported + #set(CMAKE_REQUIRED_LINKER_FLAGS "${MPI_C_LINK_FLAGS} ${CMAKE_EXE_LINKER_FLAGS}") + set(CMAKE_REQUIRED_INCLUDES + ${PARMETIS_INCLUDE_DIR} ${METIS_INCLUDE_DIR} ${MPI_C_INCLUDE_PATH}) + set(CMAKE_REQUIRED_LIBRARIES + ${PARMETIS_LIBRARY} ${METIS_LIBRARY} ${MPI_C_LIBRARIES}) + + # Build and run test program + include(CheckCSourceRuns) + check_c_source_runs(" +#include \"mpi.h\" +#define METIS_EXPORT +#include \"parmetis.h\" +int main( int argc, char* argv[] ) +{ + // FIXME: Find a simple but sensible test for ParMETIS + MPI_Init( &argc, &argv ); + MPI_Finalize(); + return 0; +} +" PARMETIS_TEST_RUNS) + + unset(CMAKE_REQUIRED_FLAGS) + #unset(CMAKE_REQUIRED_LINKER_FLAGS) + unset(CMAKE_REQUIRED_INCLUDES) + unset(CMAKE_REQUIRED_LIBRARIES) +endif() + +# Standard package handling +include(FindPackageHandleStandardArgs) +if(CMAKE_VERSION VERSION_GREATER 2.8.2) + find_package_handle_standard_args(ParMETIS + REQUIRED_VARS PARMETIS_LIBRARY PARMETIS_INCLUDE_DIR PARMETIS_TEST_RUNS + VERSION_VAR PARMETIS_VERSION_STRING) +else() + find_package_handle_standard_args(ParMETIS + REQUIRED_VARS PARMETIS_LIBRARY PARMETIS_INCLUDE_DIR PARMETIS_TEST_RUNS) +endif() + +if(PARMETIS_FOUND) + set(PARMETIS_LIBRARIES ${PARMETIS_LIBRARY} ${METIS_LIBRARY}) + set(PARMETIS_INCLUDE_DIRS ${PARMETIS_INCLUDE_DIR} ${METIS_INCLUDE_DIR}) +else() + unset(METIS_LIBRARY CACHE) + unset(METIS_INCLUDE_DIR CACHE) +endif() + +mark_as_advanced(PARMETIS_INCLUDE_DIR METIS_INCLUDE_DIR + PARMETIS_LIBRARY METIS_LIBRARY) + diff --git a/example/Numerics/Vortex_in_cell/main_vic_petsc_double.cpp b/example/Numerics/Vortex_in_cell/main_vic_petsc_double.cpp new file mode 100644 index 00000000..733ce842 --- /dev/null +++ b/example/Numerics/Vortex_in_cell/main_vic_petsc_double.cpp @@ -0,0 +1,1287 @@ +/*! \page Vortex_in_cell_petsc Vortex in Cell 3D + * + * # Vortex in Cell 3D ring with ringlets # {#vic_ringlets} + * + * In this example we solve the Navier-Stokes equation in the vortex formulation in 3D + * for an incompressible fluid. (bold symbols are vectorial quantity) + * + * ## Numerical method ## {#num_vic_mt} + * + * In this code we solve the Navier-stokes equation for incompressible fluid in the + * vorticity formulation. We first recall the Navier-stokes equation in vorticity formulation + * + * \f$ \nabla \times \boldsymbol u = - \boldsymbol w \f$ + * + * \f$ \frac{\displaystyle D \boldsymbol w}{\displaystyle dt} = ( \boldsymbol w \cdot \vec \nabla) \boldsymbol u + \nu \nabla^{2} \boldsymbol w \f$ (5) + * + * Where \f$w\f$ is the vorticity and \f$u\f$ is the velocity of the fluid. + * With high Reynold number \f$Re = \frac{uL}{\nu}\f$ and the term \f$uL\f$ significantly + * smaller than the reynold number we have that \f$\nu\f$ is small and the term \f$\nu \nabla^{2} w\f$ is + * negligible. The algorithm can be expressed with the following pseudo code. + * + * \verbatim + + 1) Initialize the vortex ring on grid + 2) Do an helmotz hodge projection to make the vorticity divergent free + 3) Initialize particles on the same position as the grid or remesh + + while (t < t_end) do + 4) Interpolate vorticity from the particles to mesh + 5) calculate velocity u from the vorticity w + 6) calculate the right-hand-side on grid and interpolate on particles + 7) interpolate velocity u to particles + 8) move particles accordingly to the velocity + 9) interpolate the vorticity into mesh and reinitialize the particles + in a grid like position + end while + + \endverbatim + * + * This pseudo code show how to solve the equation above using euler integration. + * In case of Runge-kutta of order two the pseudo code change into + * + * + * \verbatim + + 1) Initialize the vortex ring on grid + 2) Do an helmotz hodge projection to make the vorticity divergent free + 3) Initialize particles on the same position as the grid or remesh + + while (t < t_end) do + 4) 4) Interpolate vorticity from the particles to mesh + 5) calculate velocity u from the vorticity w + 6) calculate the right-hand-side on grid and interpolate on particles + 7) interpolate velocity u to particles + 8) move particles accordingly to the velocity and save the old position in x_old + + 9) Interpolate vorticity on mesh on the particles + 10) calculate velocity u from the vorticity w + 11) calculate the right-hand-side on grid and interpolate on particles + 12) interpolate velocity u to particles + 13) move particles accordingly to the velocity starting from x_old + 14) interpolate the vorticity into mesh and reinitialize the particles + in a grid like position + end while + + \endverbatim + * + * In the following we explain how each step is implemented in the code + * + * ## Inclusion ## {#num_vic_inc} + * + * This example code need several components. First because is a particle + * mesh example we have to activate **grid_dist_id.hpp** and **vector_dist_id.hpp**. + * Because we use a finite-difference scheme and linear-algebra to calculate the + * velocity out of the vorticity, we have to include **FDScheme.hpp** to produce + * from the finite difference scheme a matrix that represent the linear-system + * to solve. **SparseMatrix.hpp** is the Sparse-Matrix that will contain the linear + * system to solve in order to get the velocity out of the vorticity. + * **Vector.hpp** is the data-structure that contain the solution of the + * linear system. **petsc_solver.hpp** is the library to use in order invert the linear system. + * Because we have to interpolate between particles and grid we the to include + * **interpolate.hpp** as interpolation kernel we use the mp4, so we include the + * **mp4_kernel.hpp** + * + * For convenience we also define the particles type and the grid type and some + * convenient constants + * + * \snippet Numerics/Vortex_in_cell/main_vic_petsc.cpp inclusion + * + * + * + */ + +//#define SE_CLASS1 +//#define PRINT_STACKTRACE + +//! \cond [inclusion] \endcond + +#include "interpolation/interpolation.hpp" +#include "Grid/grid_dist_id.hpp" +#include "Vector/vector_dist.hpp" +#include "Matrix/SparseMatrix.hpp" +#include "Vector/Vector.hpp" +#include "FiniteDifference/FDScheme.hpp" +#include "Solvers/petsc_solver.hpp" +#include "interpolation/mp4_kernel.hpp" +#include "Solvers/petsc_solver_AMG_report.hpp" + +constexpr int x = 0; +constexpr int y = 1; +constexpr int z = 2; + +// The type of the grids +typedef grid_dist_id<3,double,aggregate<double[3]>> grid_type; + +// The type of the particles +typedef vector_dist<3,double,aggregate<double[3],double[3],double[3],double[3],double[3]>> particles_type; + +// radius of the torus +double ringr1 = 1.0; +// radius of the core of the torus +double sigma = 1.0/3.523; +// Reynold number +double tgtre = 7500.0; +// Noise factor for the ring vorticity on z +double ringnz = 0.01; + +// Kinematic viscosity +double nu = 1.0/tgtre; + +// Time step +double dt = 0.025; + +// All the properties by index +constexpr unsigned int vorticity = 0; +constexpr unsigned int velocity = 0; +constexpr unsigned int p_vel = 1; +constexpr int rhs_part = 2; +constexpr unsigned int old_vort = 3; +constexpr unsigned int old_pos = 4; + +//! \cond [inclusion] \endcond + +template<typename grid> void calc_and_print_max_div_and_int(grid & g_vort) +{ + //! \cond [sanity_int_div] \endcond + + g_vort.template ghost_get<vorticity>(); + auto it5 = g_vort.getDomainIterator(); + + double max_vort = 0.0; + + double int_vort[3] = {0.0,0.0,0.0}; + + while (it5.isNext()) + { + auto key = it5.get(); + + double tmp; + + tmp = 1.0f/g_vort.spacing(x)/2.0f*(g_vort.template get<vorticity>(key.move(x,1))[x] - g_vort.template get<vorticity>(key.move(x,-1))[x] ) + + 1.0f/g_vort.spacing(y)/2.0f*(g_vort.template get<vorticity>(key.move(y,1))[y] - g_vort.template get<vorticity>(key.move(y,-1))[y] ) + + 1.0f/g_vort.spacing(z)/2.0f*(g_vort.template get<vorticity>(key.move(z,1))[z] - g_vort.template get<vorticity>(key.move(z,-1))[z] ); + + int_vort[x] += g_vort.template get<vorticity>(key)[x]; + int_vort[y] += g_vort.template get<vorticity>(key)[y]; + int_vort[z] += g_vort.template get<vorticity>(key)[z]; + + if (tmp > max_vort) + max_vort = tmp; + + ++it5; + } + + Vcluster & v_cl = create_vcluster(); + v_cl.max(max_vort); + v_cl.sum(int_vort[0]); + v_cl.sum(int_vort[1]); + v_cl.sum(int_vort[2]); + v_cl.execute(); + + if (v_cl.getProcessUnitID() == 0) + {std::cout << "Max div for vorticity " << max_vort << " Integral: " << int_vort[0] << " " << int_vort[1] << " " << int_vort[2] << std::endl;} + + //! \cond [sanity_int_div] \endcond +} + +/*! \page Vortex_in_cell_petsc Vortex in Cell 3D + * + * # Step 1: Initialization of the vortex ring # {#vic_ring_init} + * + * In this function we initialize the vortex ring. The vortex ring is + * initialized accordingly to these formula. + * + * \f$ w(t = 0) = \frac{\Gamma}{\pi \sigma^{2}} e^{-(s/ \sigma)^2} \f$ + * + * \f$ s^2 = (z-z_c)^{2} + ((x-x_c)^2 + (y-y_c)^2 - R^2) \f$ + * + * \f$ \Gamma = \nu Re \f$ + * + * With this initialization the vortex ring look like the one in figure + * + * \image html int_vortex_arrow_small.jpg "Vortex ring initialization the arrow indicate the direction where the vortex point while the colors indicate the magnitude from blue (low) to red (high)" + * + * \snippet Numerics/Vortex_in_cell/main_vic_petsc.cpp init_vort + * + * + * + */ + +//! \cond [init_vort] \endcond + +/* + * gr is the grid where we are initializing the vortex ring + * domain is the simulation domain + * + */ +void init_ring(grid_type & gr, const Box<3,double> & domain) +{ + // To add some noise to the vortex ring we create two random + // vector + constexpr int nk = 32; + + double ak[nk]; + double bk[nk]; + + for (size_t i = 0 ; i < nk ; i++) + { + ak[i] = rand()/RAND_MAX; + bk[i] = rand()/RAND_MAX; + } + + // We calculate the circuitation gamma + double gamma = nu * tgtre; + double rinv2 = 1.0f/(sigma*sigma); + double max_vorticity = gamma*rinv2/M_PI; + + // We go through the grid to initialize the vortex + auto it = gr.getDomainIterator(); + + while (it.isNext()) + { + auto key_d = it.get(); + auto key = it.getGKey(key_d); + + double tx = (key.get(x)-2)*gr.spacing(x) + domain.getLow(x); + double ty = (key.get(y)-2)*gr.spacing(y) + domain.getLow(y); + double tz = (key.get(z)-2)*gr.spacing(z) + domain.getLow(z); + double theta1 = atan2((ty-2.5f),(tz-2.5f)); + + + + double noise = 0.0f; + // for (int kk=1 ; kk < nk; kk++) + // noise = noise + sin(kk*(theta1+2.0f*M_PI*ak[kk])) + cos(kk*(theta1+2.0f*M_PI*bk[kk])); + + double rad1r = sqrt((ty-2.5f)*(ty-2.5f) + (tz-2.5f)*(tz-2.5f)) - ringr1*(1.0f + ringnz * noise); + double rad1t = tx - 1.0f; + double rad1sq = rad1r*rad1r + rad1t*rad1t; + double radstr = -exp(-rad1sq*rinv2)*rinv2*gamma/M_PI; + gr.template get<vorticity>(key_d)[x] = 0.0f; + gr.template get<vorticity>(key_d)[y] = -radstr * cos(theta1); + gr.template get<vorticity>(key_d)[z] = radstr * sin(theta1); + + + ++it; + } +} + +//! \cond [init_vort] \endcond + +//! \cond [poisson_syseq] \endcond + +// Specification of the poisson equation for the helmotz-hodge projection +// 3D (dims = 3). The field is a scalar value (nvar = 1), bournary are periodic +// type of the the space is double. Final grid that will store \phi, the result (grid_dist_id<.....>) +// The other indicate which kind of Matrix to use to construct the linear system and +// which kind of vector to construct for the right hand side. Here we use a PETSC Sparse Matrix +// and PETSC vector. NORMAL_GRID indicate that is a standard grid (non-staggered) +struct poisson_nn_helm +{ + static const unsigned int dims = 3; + static const unsigned int nvar = 1; + static const bool boundary[]; + typedef double stype; + typedef grid_dist_id<3,double,aggregate<double>> b_grid; + typedef SparseMatrix<double,int,PETSC_BASE> SparseMatrix_type; + typedef Vector<double,PETSC_BASE> Vector_type; + static const int grid_type = NORMAL_GRID; +}; + +const bool poisson_nn_helm::boundary[] = {PERIODIC,PERIODIC,PERIODIC}; + +//! \cond [poisson_syseq] \endcond + + +/*! \page Vortex_in_cell_petsc Vortex in Cell 3D + * + * # Step 2: Helmotz-hodge projection # {#vic_hlm_proj} + * + * The Helnotz hodge projection is required in order to make the vorticity divergent + * free. The Helmotz-holde projection work in this way. A field can be divided into + * a curl-free part and a divergent-free part. + * + * \f$ w = w_{rot} + w_{div} \f$ + * + * with + * + * \f$ \vec \nabla \times w_{rot} = 0 \f$ + * + * \f$ \nabla \cdot w_{div} = 0 \f$ + * + * To have a vorticity divergent free we have to get the component (3) \f$w_{div} = w - w_{rot}\f$. + * In particular it hold + * + * \f$ \nabla \cdot w = \nabla \cdot w_{rot} \f$ + * + * Bacause \f$ \vec \nabla \times w_{rot} = 0 \f$ we can introduce a field \f$ \psi \f$ + * such that + * + * (2) \f$ w_{rot} = \vec \nabla \psi \f$ + * + * Doing the \f$ \nabla \cdot \vec \nabla \psi \f$ we obtain + * + * \f$ \nabla \cdot \vec \nabla \psi = \nabla^{2} \psi = \nabla \cdot w_{rot} = \nabla \cdot w \f$ + * + * so we lead to this equation + * + * (1) \f$ \nabla^{2} \psi = \nabla \cdot w \f$ + * + * Solving the equation for \f$ \psi \f$ we can obtain \f$ w_{rot} \f$ doing the gradient of \f$ \psi \f$ + * and finally correct \f$ w \f$ obtaining \f$ w_{div} \f$ + * + * The **helmotz_hodge_projection** function do this correction to the vorticity + * + * In particular it solve the equation (1) it calculate \f$ w_{rot} \f$ + * using (2) and correct the vorticity using using (3) + * + * + * ## Poisson equation ## + * + * To solve a poisson equation on a grid using finite-difference, we need to create + * an object that carry information about the system of equations + * + * \snippet Numerics/Vortex_in_cell/main_vic_petsc.cpp poisson_syseq + * + * Once created this object we can define the equation we are trying to solve. + * In particular the code below define the left-hand-side of the equation \f$ \nabla^{2} \psi \f$ + * + * \snippet Numerics/Vortex_in_cell/main_vic_petsc.cpp poisson_obj_eq + * + * Before to construct the linear system we also calculate the divergence of the + * vorticity \f$ \nabla \cdot w \f$ that will be the right-hand-side + * of the equation + * + * \snippet Numerics/Vortex_in_cell/main_vic_petsc.cpp calc_div_vort + * + * Finally we can create the object FDScheme using the object **poisson_nn_helm** + * as template variable. In addition to the constructor we have to specify the maximum extension of the stencil, the domain and the + * grid that will store the result. At this point we can impose an equation to construct + * our SparseMatrix. In this example we are imposing the poisson equation with right hand + * side equal to the divergence of vorticity (note: to avoid to create another field we use + * \f$ \psi \f$ to preliminary store the divergence of the vorticity). Imposing the + * equations produce an invertible SparseMatrix **A** and a right-hand-side Vector **b**. + * + * \snippet Numerics/Vortex_in_cell/main_vic_petsc.cpp create_fdscheme + * + * Because we need \f$ x = A^{-1}b \f$. We have to invert and solve a linear system. + * In this case we use the Conjugate-gradient-Method an iterative solver. Such method + * is controlled by two parameters. One is the tollerance that determine when the + * method is converged, the second one is the maximum number of iterations to avoid that + * the method go into infinite loop. After we set the parameters of the solver we can the + * the solution **x**. Finaly we copy back the solution **x** into the grid \f$ \psi \f$. + * + * \snippet Numerics/Vortex_in_cell/main_vic_petsc.cpp solve_petsc + * + * ### Note ### + * + * Because we are solving the poisson equation in periodic boundary conditions the Matrix has + * determinat equal to zero. This mean that \f$ \psi \f$ has no unique solution (if it has one). + * In order to recover one, we have to ensure that the integral of the righ hand side or vorticity + * is zero. (In our case is the case). We have to ensure that across time the integral of the + * vorticity is conserved. (In our case is the case if we consider the \f$ \nu = 0 \f$ and \f$ + * \nabla \cdot w = 0 \f$ we can rewrite (5) in a conservative way \f$ \frac{Dw}{dt} = div(w \otimes v) \f$ ). + * Is also good to notice that the solution that you get is the one with \f$ \int w = 0 \f$ + * + * + * + * \snippet Numerics/Vortex_in_cell/main_vic_petsc.cpp solve_petsc + * + * ## Correction ## {#vort_correction} + * + * After we got our solution for \f$ \psi \f$ we can calculate the correction of the vorticity + * doing the gradient of \f$ \psi \f$. + * + * \snippet Numerics/Vortex_in_cell/main_vic_petsc.cpp vort_correction + * + * We also do a sanity check and we control that the vorticity remain + * divergent-free. Getting the maximum value of the divergence and printing out + * its value + * + * + */ + +/* + * gr vorticity grid where we apply the correction + * domain simulation domain + * + */ +void helmotz_hodge_projection(grid_type & gr, const Box<3,double> & domain, petsc_solver<double> & solver, petsc_solver<double>::return_type & x_ , bool init) +{ + /////////////////////////////////////////////////////////////// + + //! \cond [poisson_obj_eq] \endcond + + // Convenient constant + constexpr unsigned int phi = 0; + + // We define a field phi_f + typedef Field<phi,poisson_nn_helm> phi_f; + + // We assemble the poisson equation doing the + // poisson of the Laplacian of phi using Laplacian + // central scheme (where the both derivative use central scheme, not + // forward composed backward like usually) + typedef Lap<phi_f,poisson_nn_helm,CENTRAL_SYM> poisson; + + //! \cond [poisson_obj_eq] \endcond + + //! \cond [calc_div_vort] \endcond + + // ghost get + gr.template ghost_get<vorticity>(); + + // ghost size of the psi function + Ghost<3,long int> g(2); + + // Here we create a distributed grid to store the result of the helmotz projection + grid_dist_id<3,double,aggregate<double>> psi(gr.getDecomposition(),gr.getGridInfo().getSize(),g); + + // Calculate the divergence of the vortex field + auto it = gr.getDomainIterator(); + + while (it.isNext()) + { + auto key = it.get(); + + psi.template get<phi>(key) = 1.0f/gr.spacing(x)/2.0f*(gr.template get<vorticity>(key.move(x,1))[x] - gr.template get<vorticity>(key.move(x,-1))[x] ) + + 1.0f/gr.spacing(y)/2.0f*(gr.template get<vorticity>(key.move(y,1))[y] - gr.template get<vorticity>(key.move(y,-1))[y] ) + + 1.0f/gr.spacing(z)/2.0f*(gr.template get<vorticity>(key.move(z,1))[z] - gr.template get<vorticity>(key.move(z,-1))[z] ); + + ++it; + } + + //! \cond [calc_div_vort] \endcond + + + calc_and_print_max_div_and_int(gr); + + + //! \cond [create_fdscheme] \endcond + + // In order to create a matrix that represent the poisson equation we have to indicate + // we have to indicate the maximum extension of the stencil and we we need an extra layer + // of points in case we have to impose particular boundary conditions. + Ghost<3,long int> stencil_max(2); + + // Here we define our Finite difference disctetization scheme object + FDScheme<poisson_nn_helm> fd(stencil_max, domain, psi); + + // impose the poisson equation, using right hand side psi on the full domain (psi.getDomainIterator) + // the template paramereter instead define which property of phi carry the righ-hand-side + // in this case phi has only one property, so the property 0 carry the right hand side + fd.template impose_dit<0>(poisson(),psi,psi.getDomainIterator()); + + //! \cond [create_fdscheme] \endcond + + //! \cond [solve_petsc] \endcond + + timer tm_solve; + if (init == true) + { + // Set the conjugate-gradient as method to solve the linear solver + solver.setSolver(KSPBCGS); + + // Set the absolute tolerance to determine that the method is converged + solver.setAbsTol(0.001); + + // Set the maximum number of iterations + solver.setMaxIter(500); + + solver.setPreconditioner(PCHYPRE_BOOMERAMG); + solver.setPreconditionerAMG_nl(6); + solver.setPreconditionerAMG_maxit(1); + solver.setPreconditionerAMG_relax("SOR/Jacobi"); + solver.setPreconditionerAMG_cycleType("V",0,4); + solver.setPreconditionerAMG_coarsenNodalType(0); + solver.setPreconditionerAMG_coarsen("HMIS"); + + tm_solve.start(); + + // Give to the solver A and b, return x, the solution + solver.solve(fd.getA(),x_,fd.getB()); + + tm_solve.stop(); + } + else + { + tm_solve.start(); + solver.solve(x_,fd.getB()); + tm_solve.stop(); + } + + // copy the solution x to the grid psi + fd.template copy<phi>(x_,psi); + + //! \cond [solve_petsc] \endcond + + //! \cond [vort_correction] \endcond + + psi.template ghost_get<phi>(); + + // Correct the vorticity to make it divergence free + + auto it2 = gr.getDomainIterator(); + + while (it2.isNext()) + { + auto key = it2.get(); + + gr.template get<vorticity>(key)[x] -= 1.0f/2.0f/psi.spacing(x)*(psi.template get<phi>(key.move(x,1))-psi.template get<phi>(key.move(x,-1))); + gr.template get<vorticity>(key)[y] -= 1.0f/2.0f/psi.spacing(y)*(psi.template get<phi>(key.move(y,1))-psi.template get<phi>(key.move(y,-1))); + gr.template get<vorticity>(key)[z] -= 1.0f/2.0f/psi.spacing(z)*(psi.template get<phi>(key.move(z,1))-psi.template get<phi>(key.move(z,-1))); + + ++it2; + } + + //! \cond [vort_correction] \endcond + + calc_and_print_max_div_and_int(gr); +} + + +/*! \page Vortex_in_cell_petsc Vortex in Cell 3D + * + * # Step 3: Remeshing vorticity # {#vic_remesh_vort} + * + * After that we initialized the vorticity on the grid, we initialize the particles + * in a grid like position and we interpolate the vorticity on particles. Because + * of the particles position being in a grid-like position and the symmetry of the + * interpolation kernels, the re-mesh step simply reduce to initialize the particle + * in a grid like position and assign the property vorticity of the particles equal to the + * grid vorticity. + * + * \snippet Numerics/Vortex_in_cell/main_vic_petsc.cpp remesh_part + * + */ + +//! \cond [remesh_part] \endcond + +void remesh(particles_type & vd, grid_type & gr,Box<3,double> & domain) +{ + // Remove all particles because we reinitialize in a grid like position + vd.clear(); + + // Get a grid iterator + auto git = vd.getGridIterator(gr.getGridInfo().getSize()); + + // For each grid point + while (git.isNext()) + { + // Get the grid point in global coordinates (key). p is in local coordinates + auto p = git.get(); + auto key = git.get_dist(); + + // Add the particle + vd.add(); + + // Assign the position to the particle in a grid like way + vd.getLastPos()[x] = gr.spacing(x)*p.get(x) + domain.getLow(x); + vd.getLastPos()[y] = gr.spacing(y)*p.get(y) + domain.getLow(y); + vd.getLastPos()[z] = gr.spacing(z)*p.get(z) + domain.getLow(z); + + // Initialize the vorticity + vd.template getLastProp<vorticity>()[x] = gr.template get<vorticity>(key)[x]; + vd.template getLastProp<vorticity>()[y] = gr.template get<vorticity>(key)[y]; + vd.template getLastProp<vorticity>()[z] = gr.template get<vorticity>(key)[z]; + + // next grid point + ++git; + } + + // redistribute the particles + vd.map(); +} + +//! \cond [remesh_part] \endcond + + +/*! \page Vortex_in_cell_petsc Vortex in Cell 3D + * + * # Step 5: Compute velocity from vorticity # {#vic_vel_from_vort} + * + * Computing the velocity from vorticity is done in the following way. Given + * + * \f$ \vec \nabla \times u = -w \f$ + * + * We intrododuce the stream line function defined as + * + * \f$ \nabla \times \phi = u \f$ (7) + * + * \f$ \nabla \cdot \phi = 0 \f$ + * + * We obtain + * + * \f$ \nabla \times \nabla \times \phi = -w = \vec \nabla (\nabla \cdot \phi) - \nabla^{2} \phi \f$ + * + * Because the divergence of \f$ \phi \f$ is defined to be zero we have + * + * \f$ \nabla^{2} \phi = w \f$ + * + * The velocity can be recovered by the equation (7) + * + * Putting into code what explained before, we again generate a poisson + * object + * + * \snippet Numerics/Vortex_in_cell/main_vic_petsc.cpp poisson_obj_eq + * + * In order to calculate the velocity out of the vorticity, we solve a poisson + * equation like we did in helmotz-projection equation, but we do it for each + * component \f$ i \f$ of the vorticity. Qnce we have the solution in **psi_s** + * we copy the result back into the grid **gr_ps**. We than calculate the + * quality of the solution printing the norm infinity of the residual and + * finally we save in the grid vector vield **phi_v** the compinent \f$ i \f$ + * (Copy from phi_s to phi_v is necessary because in phi_s is not a grid + * and cannot be used as a grid like object) + * + * \snippet Numerics/Vortex_in_cell/main_vic_petsc.cpp solve_poisson_comp + * + * We save the component \f$ i \f$ of \f$ \phi \f$ into **phi_v** + * + * \snippet Numerics/Vortex_in_cell/main_vic_petsc.cpp copy_to_phi_v + * + * Once we filled phi_v we can implement (7) and calculate the curl of **phi_v** + * to recover the velocity v + * + * \snippet Numerics/Vortex_in_cell/main_vic_petsc.cpp curl_phi_v + * + * + */ + +void comp_vel(Box<3,double> & domain, grid_type & g_vort,grid_type & g_vel, petsc_solver<double>::return_type (& phi_s)[3],petsc_solver<double> & solver) +{ + //! \cond [poisson_obj_eq] \endcond + + // Convenient constant + constexpr unsigned int phi = 0; + + // We define a field phi_f + typedef Field<phi,poisson_nn_helm> phi_f; + + // We assemble the poisson equation doing the + // poisson of the Laplacian of phi using Laplacian + // central scheme (where the both derivative use central scheme, not + // forward composed backward like usually) + typedef Lap<phi_f,poisson_nn_helm,CENTRAL_SYM> poisson; + + //! \cond [poisson_obj_eq] \endcond + + // Maximum size of the stencil + Ghost<3,long int> g(2); + + // Here we create a distributed grid to store the result + grid_dist_id<3,double,aggregate<double>> gr_ps(g_vort.getDecomposition(),g_vort.getGridInfo().getSize(),g); + grid_dist_id<3,double,aggregate<double[3]>> phi_v(g_vort.getDecomposition(),g_vort.getGridInfo().getSize(),g); + + // We calculate and print the maximum divergence of the vorticity + // and the integral of it + calc_and_print_max_div_and_int(g_vort); + + // For each component solve a poisson + for (size_t i = 0 ; i < 3 ; i++) + { + //! \cond [solve_poisson_comp] \endcond + + // Copy the vorticity component i in gr_ps + auto it2 = gr_ps.getDomainIterator(); + + // calculate the velocity from the curl of phi + while (it2.isNext()) + { + auto key = it2.get(); + + // copy + gr_ps.get<vorticity>(key) = g_vort.template get<vorticity>(key)[i]; + + ++it2; + } + + // Maximum size of the stencil + Ghost<3,long int> stencil_max(2); + + // Finite difference scheme + FDScheme<poisson_nn_helm> fd(stencil_max, domain, gr_ps); + + // impose the poisson equation using gr_ps = vorticity for the right-hand-side (on the full domain) + fd.template impose_dit<phi>(poisson(),gr_ps,gr_ps.getDomainIterator()); + + solver.setAbsTol(0.01); + + // Get the vector that represent the right-hand-side + Vector<double,PETSC_BASE> & b = fd.getB(); + + timer tm_solve; + tm_solve.start(); + + // Give to the solver A and b in this case we are giving + // an intitial guess phi_s calculated in the previous + // time step + solver.solve(phi_s[i],b); + + tm_solve.stop(); + + // Calculate the residual + + solError serr; + serr = solver.get_residual_error(phi_s[i],b); + + Vcluster & v_cl = create_vcluster(); + if (v_cl.getProcessUnitID() == 0) + {std::cout << "Solved component " << i << " Error: " << serr.err_inf << std::endl;} + + // copy the solution to grid + fd.template copy<phi>(phi_s[i],gr_ps); + + //! \cond [solve_poisson_comp] \endcond + + //! \cond [copy_to_phi_v] \endcond + + auto it3 = gr_ps.getDomainIterator(); + + // calculate the velocity from the curl of phi + while (it3.isNext()) + { + auto key = it3.get(); + + phi_v.get<velocity>(key)[i] = gr_ps.get<phi>(key); + + ++it3; + } + + //! \cond [copy_to_phi_v] \endcond + } + + //! \cond [curl_phi_v] \endcond + + phi_v.ghost_get<phi>(); + + double inv_dx = 0.5f / g_vort.spacing(0); + double inv_dy = 0.5f / g_vort.spacing(1); + double inv_dz = 0.5f / g_vort.spacing(2); + + auto it3 = phi_v.getDomainIterator(); + + // calculate the velocity from the curl of phi + while (it3.isNext()) + { + auto key = it3.get(); + + double phi_xy = (phi_v.get<phi>(key.move(y,1))[x] - phi_v.get<phi>(key.move(y,-1))[x])*inv_dy; + double phi_xz = (phi_v.get<phi>(key.move(z,1))[x] - phi_v.get<phi>(key.move(z,-1))[x])*inv_dz; + double phi_yx = (phi_v.get<phi>(key.move(x,1))[y] - phi_v.get<phi>(key.move(x,-1))[y])*inv_dx; + double phi_yz = (phi_v.get<phi>(key.move(z,1))[y] - phi_v.get<phi>(key.move(z,-1))[y])*inv_dz; + double phi_zx = (phi_v.get<phi>(key.move(x,1))[z] - phi_v.get<phi>(key.move(x,-1))[z])*inv_dx; + double phi_zy = (phi_v.get<phi>(key.move(y,1))[z] - phi_v.get<phi>(key.move(y,-1))[z])*inv_dy; + + g_vel.template get<velocity>(key)[x] = phi_zy - phi_yz; + g_vel.template get<velocity>(key)[y] = phi_xz - phi_zx; + g_vel.template get<velocity>(key)[z] = phi_yx - phi_xy; + + ++it3; + } + + //! \cond [curl_phi_v] \endcond +} + + +template<unsigned int prp> void set_zero(grid_type & gr) +{ + auto it = gr.getDomainGhostIterator(); + + // calculate the velocity from the curl of phi + while (it.isNext()) + { + auto key = it.get(); + + gr.template get<prp>(key)[0] = 0.0; + gr.template get<prp>(key)[1] = 0.0; + gr.template get<prp>(key)[2] = 0.0; + + ++it; + } +} + + +template<unsigned int prp> void set_zero(particles_type & vd) +{ + auto it = vd.getDomainIterator(); + + // calculate the velocity from the curl of phi + while (it.isNext()) + { + auto key = it.get(); + + vd.template getProp<prp>(key)[0] = 0.0; + vd.template getProp<prp>(key)[1] = 0.0; + vd.template getProp<prp>(key)[2] = 0.0; + + ++it; + } +} + +/*! \page Vortex_in_cell_petsc Vortex in Cell 3D + * + * # Step 6: Compute right hand side # {#vic_rhs_calc} + * + * Computing the right hand side is performed calculating the term + * \f$ (w \cdot \nabla) u \f$. For the nabla operator we use second + * order finite difference central scheme. The commented part is the + * term \f$ \nu \nabla^{2} w \f$ that we said to neglect + * + * \snippet Numerics/Vortex_in_cell/main_vic_petsc.cpp calc_rhs + * + */ + +//! \cond [calc_rhs] \endcond + +// Calculate the right hand side of the vorticity formulation +template<typename grid> void calc_rhs(grid & g_vort, grid & g_vel, grid & g_dwp) +{ + // usefull constant + constexpr int rhs = 0; + + // calculate several pre-factors for the stencil finite + // difference + double fac1 = 2.0f*nu/(g_vort.spacing(0)); + double fac2 = 2.0f*nu/(g_vort.spacing(1)); + double fac3 = 2.0f*nu/(g_vort.spacing(2)); + + double fac4 = 0.5f/(g_vort.spacing(0)); + double fac5 = 0.5f/(g_vort.spacing(1)); + double fac6 = 0.5f/(g_vort.spacing(2)); + + auto it = g_dwp.getDomainIterator(); + + g_vort.template ghost_get<vorticity>(); + + while (it.isNext()) + { + auto key = it.get(); + + g_dwp.template get<rhs>(key)[x] = /*fac1*(g_vort.template get<vorticity>(key.move(x,1))[x]+g_vort.template get<vorticity>(key.move(x,-1))[x])+ + fac2*(g_vort.template get<vorticity>(key.move(y,1))[x]+g_vort.template get<vorticity>(key.move(y,-1))[x])+ + fac3*(g_vort.template get<vorticity>(key.move(z,1))[x]+g_vort.template get<vorticity>(key.move(z,-1))[x])- + 2.0f*(fac1+fac2+fac3)*g_vort.template get<vorticity>(key)[x]+*/ + fac4*g_vort.template get<vorticity>(key)[x]* + (g_vel.template get<velocity>(key.move(x,1))[x] - g_vel.template get<velocity>(key.move(x,-1))[x])+ + fac5*g_vort.template get<vorticity>(key)[y]* + (g_vel.template get<velocity>(key.move(y,1))[x] - g_vel.template get<velocity>(key.move(y,-1))[x])+ + fac6*g_vort.template get<vorticity>(key)[z]* + (g_vel.template get<velocity>(key.move(z,1))[x] - g_vel.template get<velocity>(key.move(z,-1))[x]); + + g_dwp.template get<rhs>(key)[y] = /*fac1*(g_vort.template get<vorticity>(key.move(x,1))[y]+g_vort.template get<vorticity>(key.move(x,-1))[y])+ + fac2*(g_vort.template get<vorticity>(key.move(y,1))[y]+g_vort.template get<vorticity>(key.move(y,-1))[y])+ + fac3*(g_vort.template get<vorticity>(key.move(z,1))[y]+g_vort.template get<vorticity>(key.move(z,-1))[y])- + 2.0f*(fac1+fac2+fac3)*g_vort.template get<vorticity>(key)[y]+*/ + fac4*g_vort.template get<vorticity>(key)[x]* + (g_vel.template get<velocity>(key.move(x,1))[y] - g_vel.template get<velocity>(key.move(x,-1))[y])+ + fac5*g_vort.template get<vorticity>(key)[y]* + (g_vel.template get<velocity>(key.move(y,1))[y] - g_vel.template get<velocity>(key.move(y,-1))[y])+ + fac6*g_vort.template get<vorticity>(key)[z]* + (g_vel.template get<velocity>(key.move(z,1))[y] - g_vel.template get<velocity>(key.move(z,-1))[y]); + + + g_dwp.template get<rhs>(key)[z] = /*fac1*(g_vort.template get<vorticity>(key.move(x,1))[z]+g_vort.template get<vorticity>(key.move(x,-1))[z])+ + fac2*(g_vort.template get<vorticity>(key.move(y,1))[z]+g_vort.template get<vorticity>(key.move(y,-1))[z])+ + fac3*(g_vort.template get<vorticity>(key.move(z,1))[z]+g_vort.template get<vorticity>(key.move(z,-1))[z])- + 2.0f*(fac1+fac2+fac3)*g_vort.template get<vorticity>(key)[z]+*/ + fac4*g_vort.template get<vorticity>(key)[x]* + (g_vel.template get<velocity>(key.move(x,1))[z] - g_vel.template get<velocity>(key.move(x,-1))[z])+ + fac5*g_vort.template get<vorticity>(key)[y]* + (g_vel.template get<velocity>(key.move(y,1))[z] - g_vel.template get<velocity>(key.move(y,-1))[z])+ + fac6*g_vort.template get<vorticity>(key)[z]* + (g_vel.template get<velocity>(key.move(z,1))[z] - g_vel.template get<velocity>(key.move(z,-1))[z]); + + ++it; + } +} + +//! \cond [calc_rhs] \endcond + +/*! \page Vortex_in_cell_petsc Vortex in Cell 3D + * + * # Step 8: Runge-Kutta # {#vic_runge_kutta1} + * + * Here we do the first step of the runge kutta update. In particular we + * update the vorticity and position of the particles. The right-hand-side + * of the vorticity update is calculated on the grid and interpolated + * on the particles. The Runge-Kutta of order two + * require the following update for the vorticity and position as first step + * + * \f$ \boldsymbol w = \boldsymbol w + \frac{1}{2} \boldsymbol {rhs} \delta t \f$ + * + * \f$ \boldsymbol x = \boldsymbol x + \frac{1}{2} \boldsymbol u \delta t \f$ + * + * \snippet Numerics/Vortex_in_cell/main_vic_petsc.cpp runge_kutta_1 + * + */ + +//! \cond [runge_kutta_1] \endcond + +void rk_step1(particles_type & particles) +{ + auto it = particles.getDomainIterator(); + + while (it.isNext()) + { + auto key = it.get(); + + // Save the old vorticity + particles.getProp<old_vort>(key)[x] = particles.getProp<vorticity>(key)[x]; + particles.getProp<old_vort>(key)[y] = particles.getProp<vorticity>(key)[y]; + particles.getProp<old_vort>(key)[z] = particles.getProp<vorticity>(key)[z]; + + particles.getProp<vorticity>(key)[x] = particles.getProp<vorticity>(key)[x] + 0.5f * dt * particles.getProp<rhs_part>(key)[x]; + particles.getProp<vorticity>(key)[y] = particles.getProp<vorticity>(key)[y] + 0.5f * dt * particles.getProp<rhs_part>(key)[y]; + particles.getProp<vorticity>(key)[z] = particles.getProp<vorticity>(key)[z] + 0.5f * dt * particles.getProp<rhs_part>(key)[z]; + + ++it; + } + + auto it2 = particles.getDomainIterator(); + + while (it2.isNext()) + { + auto key = it2.get(); + + // Save the old position + particles.getProp<old_pos>(key)[x] = particles.getPos(key)[x]; + particles.getProp<old_pos>(key)[y] = particles.getPos(key)[y]; + particles.getProp<old_pos>(key)[z] = particles.getPos(key)[z]; + + particles.getPos(key)[x] = particles.getPos(key)[x] + 0.5f * dt * particles.getProp<p_vel>(key)[x]; + particles.getPos(key)[y] = particles.getPos(key)[y] + 0.5f * dt * particles.getProp<p_vel>(key)[y]; + particles.getPos(key)[z] = particles.getPos(key)[z] + 0.5f * dt * particles.getProp<p_vel>(key)[z]; + + ++it2; + } + + particles.map(); +} + +//! \cond [runge_kutta_1] \endcond + +/*! \page Vortex_in_cell_petsc Vortex in Cell 3D + * + * # Step 13: Runge-Kutta # {#vic_runge_kutta2} + * + * Here we do the second step of the Runge-Kutta update. In particular we + * update the vorticity and position of the particles. The right-hand-side + * of the vorticity update is calculated on the grid and interpolated + * on the particles. The Runge-Kutta of order two + * require the following update for the vorticity and position as first step + * + * \f$ \boldsymbol w = \boldsymbol w + \frac{1}{2} \boldsymbol {rhs} \delta t \f$ + * + * \f$ \boldsymbol x = \boldsymbol x + \frac{1}{2} \boldsymbol u \delta t \f$ + * + * \snippet Numerics/Vortex_in_cell/main_vic_petsc.cpp runge_kutta_2 + * + */ + +//! \cond [runge_kutta_2] \endcond + +void rk_step2(particles_type & particles) +{ + auto it = particles.getDomainIterator(); + + while (it.isNext()) + { + auto key = it.get(); + + particles.getProp<vorticity>(key)[x] = particles.getProp<old_vort>(key)[x] + dt * particles.getProp<rhs_part>(key)[x]; + particles.getProp<vorticity>(key)[y] = particles.getProp<old_vort>(key)[y] + dt * particles.getProp<rhs_part>(key)[y]; + particles.getProp<vorticity>(key)[z] = particles.getProp<old_vort>(key)[z] + dt * particles.getProp<rhs_part>(key)[z]; + + ++it; + } + + auto it2 = particles.getDomainIterator(); + + while (it2.isNext()) + { + auto key = it2.get(); + + particles.getPos(key)[x] = particles.getProp<old_pos>(key)[x] + dt * particles.getProp<p_vel>(key)[x]; + particles.getPos(key)[y] = particles.getProp<old_pos>(key)[y] + dt * particles.getProp<p_vel>(key)[y]; + particles.getPos(key)[z] = particles.getProp<old_pos>(key)[z] + dt * particles.getProp<p_vel>(key)[z]; + + ++it2; + } + + particles.map(); +} + + + +//! \cond [runge_kutta_2] \endcond + +/*! \page Vortex_in_cell_petsc Vortex in Cell 3D + * + * # Step 4-5-6-7: Do step # {#vic_do_step} + * + * The do step function assemble multiple steps some of them already explained. + * First we interpolate the vorticity from particles to mesh + * + * \snippet Numerics/Vortex_in_cell/main_vic_petsc.cpp do_step_p2m + * + * than we calculate velocity out of vorticity and the right-hand-side + * recalling step 5 and 6 + * + * \snippet Numerics/Vortex_in_cell/main_vic_petsc.cpp step_56 + * + * Finally we interpolate velocity and right-hand-side back to particles + * + * \snippet Numerics/Vortex_in_cell/main_vic_petsc.cpp inte_m2p + * + */ + +template<typename grid, typename vector> void do_step(vector & particles, + grid & g_vort, + grid & g_vel, + grid & g_dvort, + Box<3,double> & domain, + interpolate<particles_type,grid_type,mp4_kernel<double>> & inte, + petsc_solver<double>::return_type (& phi_s)[3], + petsc_solver<double> & solver) +{ + constexpr int rhs = 0; + + //! \cond [do_step_p2m] \endcond + + set_zero<vorticity>(g_vort); + inte.template p2m<vorticity,vorticity>(particles,g_vort); + + g_vort.template ghost_put<add_,vorticity>(); + + //! \cond [do_step_p2m] \endcond + + //! \cond [step_56] \endcond + + // Calculate velocity from vorticity + comp_vel(domain,g_vort,g_vel,phi_s,solver); + calc_rhs(g_vort,g_vel,g_dvort); + + //! \cond [step_56] \endcond + + //! \cond [inte_m2p] \endcond + + g_dvort.template ghost_get<rhs>(); + g_vel.template ghost_get<velocity>(); + set_zero<rhs_part>(particles); + set_zero<p_vel>(particles); + inte.template m2p<rhs,rhs_part>(g_dvort,particles); + inte.template m2p<velocity,p_vel>(g_vel,particles); + + //! \cond [inte_m2p] \endcond +} + +template<typename vector, typename grid> void check_point_and_save(vector & particles, + grid & g_vort, + grid & g_vel, + grid & g_dvort, + size_t i) +{ + Vcluster & v_cl = create_vcluster(); + + if (v_cl.getProcessingUnits() < 24) + { + particles.write("part_out_" + std::to_string(i),VTK_WRITER | FORMAT_BINARY); + g_vort.template ghost_get<vorticity>(); + g_vel.template ghost_get<velocity>(); + g_vel.write("grid_velocity_" + std::to_string(i), VTK_WRITER | FORMAT_BINARY); + g_vort.write("grid_vorticity_" + std::to_string(i), VTK_WRITER | FORMAT_BINARY); + } + + // In order to reduce the size of the saved data we apply a threshold. + // We only save particles with vorticity higher than 0.1 + vector_dist<3,double,aggregate<double>> part_save(particles.getDecomposition(),0); + + auto it_s = particles.getDomainIterator(); + + while (it_s.isNext()) + { + auto p = it_s.get(); + + double vort_magn = sqrt(particles.template getProp<vorticity>(p)[0] * particles.template getProp<vorticity>(p)[0] + + particles.template getProp<vorticity>(p)[1] * particles.template getProp<vorticity>(p)[1] + + particles.template getProp<vorticity>(p)[2] * particles.template getProp<vorticity>(p)[2]); + + if (vort_magn < 0.1) + { + ++it_s; + continue; + } + + part_save.add(); + + part_save.getLastPos()[0] = particles.getPos(p)[0]; + part_save.getLastPos()[1] = particles.getPos(p)[1]; + part_save.getLastPos()[2] = particles.getPos(p)[2]; + + part_save.template getLastProp<0>() = vort_magn; + + ++it_s; + } + + part_save.map(); + + // Save and HDF5 file for checkpoint restart + particles.save("check_point"); +} + +/*! \page Vortex_in_cell_petsc Vortex in Cell 3D + * + * # Main # {#vic_main} + * + * The main function as usual call the function **openfpm_init** it define + * the domain where the simulation take place. The ghost size of the grid + * the size of the grid in grid units on each direction, the periodicity + * of the domain, in this case PERIODIC in each dimension and we create + * our basic data structure for the simulation. A grid for the vorticity + * **g_vort** a grid for the velocity **g_vel** a grid for the right-hand-side + * of the vorticity update, and the **particles** vector. Additionally + * we define the data structure **phi_s[3]** that store the velocity solution + * in the previous time-step. keeping track of the previous solution for + * the velocity help the interative-solver to find the solution more quickly. + * Using the old velocity configuration as initial guess the solver will + * converge in few iterations refining the old one. + * + */ +int main(int argc, char* argv[]) +{ + // Initialize + openfpm_init(&argc,&argv); + { + // Domain, a rectangle + Box<3,double> domain({0.0,0.0,0.0},{22.0,5.57,5.57}); + + // Ghost (Not important in this case but required) + Ghost<3,long int> g(2); + + // Grid points on x=128 y=64 z=64 + long int sz[] = {512,64,64}; + size_t szu[] = {(size_t)sz[0],(size_t)sz[1],(size_t)sz[2]}; + + periodicity<3> bc = {{PERIODIC,PERIODIC,PERIODIC}}; + + grid_type g_vort(szu,domain,g,bc); + grid_type g_vel(g_vort.getDecomposition(),szu,g); + grid_type g_dvort(g_vort.getDecomposition(),szu,g); + particles_type particles(g_vort.getDecomposition(),0); + + // It store the solution to compute velocity + // It is used as initial guess every time we call the solver + Vector<double,PETSC_BASE> phi_s[3]; + Vector<double,PETSC_BASE> x_; + + // Parallel object + Vcluster & v_cl = create_vcluster(); + + // print out the spacing of the grid + if (v_cl.getProcessUnitID() == 0) + {std::cout << "SPACING " << g_vort.spacing(0) << " " << g_vort.spacing(1) << " " << g_vort.spacing(2) << std::endl;} + + // initialize the ring step 1 + init_ring(g_vort,domain); + + x_.resize(g_vort.size(),g_vort.getLocalDomainSize()); + x_.setZero(); + + // Create a PETSC solver to get the solution x + petsc_solver<double> solver; + + // Do the helmotz projection step 2 + helmotz_hodge_projection(g_vort,domain,solver,x_,true); + + // initialize the particle on a mesh step 3 + remesh(particles,g_vort,domain); + + // Get the total number of particles + size_t tot_part = particles.size_local(); + v_cl.sum(tot_part); + v_cl.execute(); + + // Now we set the size of phi_s + phi_s[0].resize(g_vort.size(),g_vort.getLocalDomainSize()); + phi_s[1].resize(g_vort.size(),g_vort.getLocalDomainSize()); + phi_s[2].resize(g_vort.size(),g_vort.getLocalDomainSize()); + + // And we initially set it to zero + phi_s[0].setZero(); + phi_s[1].setZero(); + phi_s[2].setZero(); + + // We create the interpolation object outside the loop cycles + // to avoid to recreate it at every cycle. Internally this object + // create some data-structure to optimize the conversion particle + // position to sub-domain. If there are no re-balancing it is safe + // to reuse-it + interpolate<particles_type,grid_type,mp4_kernel<double>> inte(particles,g_vort); + + // With more than 24 core we rely on the HDF5 checkpoint restart file + if (v_cl.getProcessingUnits() < 24) + {g_vort.write("grid_vorticity_init", VTK_WRITER | FORMAT_BINARY);} + + + // Before entering the loop we check if we want to restart from + // a previous simulation + size_t i = 0; + + // if we have an argument + if (argc > 1) + { + // take that argument + std::string restart(argv[1]); + + // convert it into number + i = std::stoi(restart); + + // load the file + particles.load("check_point_" + std::to_string(i)); + + // Print to inform that we are restarting from a + // particular position + if (v_cl.getProcessUnitID() == 0) + {std::cout << "Restarting from " << i << std::endl;} + } + + // Time Integration + for ( ; i < 10001 ; i++) + { + // do step 4-5-6-7 + do_step(particles,g_vort,g_vel,g_dvort,domain,inte,phi_s,solver); + + // do step 8 + rk_step1(particles); + + // do step 9-10-11-12 + do_step(particles,g_vort,g_vel,g_dvort,domain,inte,phi_s,solver); + + // do step 13 + rk_step2(particles); + + // so step 14 + set_zero<vorticity>(g_vort); + inte.template p2m<vorticity,vorticity>(particles,g_vort); + g_vort.template ghost_put<add_,vorticity>(); + + // helmotz-hodge projection + helmotz_hodge_projection(g_vort,domain,solver,x_,false); + + remesh(particles,g_vort,domain); + + // print the step number + if (v_cl.getProcessUnitID() == 0) + {std::cout << "Step " << i << std::endl;} + + // every 100 steps write the output + if (i % 100 == 0) {check_point_and_save(particles,g_vort,g_vel,g_dvort,i);} + + } + } + + openfpm_finalize(); +} + diff --git a/example/Vector/7_SPH_dlb_opt/main_dbg.cpp b/example/Vector/7_SPH_dlb_opt/main_dbg.cpp new file mode 100644 index 00000000..d49c96ad --- /dev/null +++ b/example/Vector/7_SPH_dlb_opt/main_dbg.cpp @@ -0,0 +1,1254 @@ +/*! + * \page Vector_7_sph_dlb_dbg Vector 7 SPH Dam break simulation (Debugging video) + * + * + * [TOC] + * + * + * # SPH with Dynamic load Balancing (Debugging video) # {#SPH_dlb} + * + * \htmlonly + * <img src="http://ppmcore.mpi-cbg.de/web/images/examples/7_SPH_dlb/dam_break_all.jpg"/> + * \endhtmlonly + * + * ## Inclusion ## {#e7_sph_inclusion} + * + * + * \snippet Vector/7_SPH_dlb/main.cpp inclusion + * + */ + +//#define SE_CLASS1 +//#define STOP_ON_ERROR + +#include "Vector/vector_dist.hpp" +#include <math.h> +#include "Draw/DrawParticles.hpp" + + +// A constant to indicate boundary particles +#define BOUNDARY 0 + +// A constant to indicate fluid particles +#define FLUID 1 + +size_t cnt = 0; + +// initial spacing between particles dp in the formulas +const double dp = 0.0085; +// Maximum height of the fluid water +// is going to be calculated and filled later on +double h_swl = 0.0; + +// c_s in the formulas (constant used to calculate the sound speed) +const double coeff_sound = 20.0; + +// gamma in the formulas +const double gamma_ = 7.0; + +// sqrt(3.0*dp*dp) support of the kernel +const double H = 0.0147224318643; + +// Eta in the formulas +const double Eta2 = 0.01 * H*H; + +// alpha in the formula +const double visco = 0.1; + +// cbar in the formula (calculated later) +double cbar = 0.0; + +// Mass of the fluid particles +const double MassFluid = 0.000614125; + +// Mass of the boundary particles +const double MassBound = 0.000614125; + +// End simulation time +const double t_end = 1.5; + +// Gravity acceleration +const double gravity = 9.81; + +// Reference densitu 1000Kg/m^3 +const double rho_zero = 1000.0; + +// Filled later require h_swl, it is b in the formulas +double B = 0.0; + +// Constant used to define time integration +const double CFLnumber = 0.2; + +// Minimum T +const double DtMin = 0.00001; + +// Minimum Rho allowed +const double RhoMin = 700.0; + +// Maximum Rho allowed +const double RhoMax = 1300.0; + +// Filled in initialization +double max_fluid_height = 0.0; + +// Properties + +// FLUID or BOUNDARY +const size_t type = 0; + +// Density +const int rho = 1; + +// Density at step n-1 +const int rho_prev = 2; + +// Pressure +const int Pressure = 3; + +// Delta rho calculated in the force calculation +const int drho = 4; + +// calculated force +const int force = 5; + +// velocity +const int velocity = 6; + +// velocity at previous step +const int velocity_prev = 7; + +struct nb_f +{ + size_t id; + double fact; + + // Used to reorder the neighborhood particles by id + bool operator<(const struct nb_f & pag) const + { + return (id < pag.id); + } +}; + +// Type of the vector containing particles +typedef vector_dist<3,double,aggregate<size_t,double, double, double, double, double[3], double[3], double[3],openfpm::vector<nb_f>, openfpm::vector<nb_f>, double[3], double , size_t>> particles; +// | | | | | | | | +// | | | | | | | | +// type density density Pressure delta force velocity velocity +// at n-1 density at n - 1 + +struct ModelCustom +{ + template<typename Decomposition, typename vector> inline void addComputation(Decomposition & dec, const vector & vd, size_t v, size_t p) + { + if (vd.template getProp<type>(p) == FLUID) + dec.addComputationCost(v,4); + else + dec.addComputationCost(v,3); + } + + template<typename Decomposition> inline void applyModel(Decomposition & dec, size_t v) + { + dec.setSubSubDomainComputationCost(v, dec.getSubSubDomainComputationCost(v) * dec.getSubSubDomainComputationCost(v)); + } +}; + +inline void EqState(particles & vd) +{ + auto it = vd.getDomainIterator(); + + while (it.isNext()) + { + auto a = it.get(); + + double rho_a = vd.template getProp<rho>(a); + double rho_frac = rho_a / rho_zero; + + vd.template getProp<Pressure>(a) = B*( rho_frac*rho_frac*rho_frac*rho_frac*rho_frac*rho_frac*rho_frac - 1.0); + + ++it; + } +} + + + +const double a2 = 1.0/M_PI/H/H/H; + +inline double Wab(double r) +{ + r /= H; + + if (r < 1.0) + return (1.0 - 3.0/2.0*r*r + 3.0/4.0*r*r*r)*a2; + else if (r < 2.0) + return (1.0/4.0*(2.0 - r*r)*(2.0 - r*r)*(2.0 - r*r))*a2; + else + return 0.0; +} + + + +const double c1 = -3.0/M_PI/H/H/H/H; +const double d1 = 9.0/4.0/M_PI/H/H/H/H; +const double c2 = -3.0/4.0/M_PI/H/H/H/H; +const double a2_4 = 0.25*a2; +// Filled later +double W_dap = 0.0; + +inline void DWab(Point<3,double> & dx, Point<3,double> & DW, double r, bool print) +{ + const double qq=r/H; + + if (qq < 1.0) + { + double qq2 = qq * qq; + double fac = (c1*qq + d1*qq2)/r; + + DW.get(0) = fac*dx.get(0); + DW.get(1) = fac*dx.get(1); + DW.get(2) = fac*dx.get(2); + } + else if (qq < 2.0) + { + double wqq = (2.0 - qq); + double fac = c2 * wqq * wqq / r; + + DW.get(0) = fac * dx.get(0); + DW.get(1) = fac * dx.get(1); + DW.get(2) = fac * dx.get(2); + } + else + { + DW.get(0) = 0.0; + DW.get(1) = 0.0; + DW.get(2) = 0.0; + } +} + + +// Tensile correction +inline double Tensile(double r, double rhoa, double rhob, double prs1, double prs2) +{ + const double qq=r/H; + //-Cubic Spline kernel + double wab; + if(r>H) + { + double wqq1=2.0f-qq; + double wqq2=wqq1*wqq1; + + wab=a2_4*(wqq2*wqq1); + } + else + { + double wqq2=qq*qq; + double wqq3=wqq2*qq; + + wab=a2*(1.0f-1.5f*wqq2+0.75f*wqq3); + } + + //-Tensile correction. + double fab=wab*W_dap; + fab*=fab; fab*=fab; //fab=fab^4 + const double tensilp1=(prs1/(rhoa*rhoa))*(prs1>0? 0.01: -0.2); + const double tensilp2=(prs2/(rhob*rhob))*(prs2>0? 0.01: -0.2); + + return (fab*(tensilp1+tensilp2)); +} + + +inline double Pi(const Point<3,double> & dr, double rr2, Point<3,double> & dv, double rhoa, double rhob, double massb, double & visc) +{ + const double dot = dr.get(0)*dv.get(0) + dr.get(1)*dv.get(1) + dr.get(2)*dv.get(2); + const double dot_rr2 = dot/(rr2+Eta2); + visc=std::max(dot_rr2,visc); + + if(dot < 0) + { + const float amubar=H*dot_rr2; + const float robar=(rhoa+rhob)*0.5f; + const float pi_visc=(-visco*cbar*amubar/robar); + + return pi_visc; + } + else + return 0.0; +} + + +template<typename VerletList> inline double calc_forces(particles & vd, VerletList & NN, double & max_visc, double r_gskin) +{ + auto NN2 = vd.getCellList(r_gskin); + + // Reset the ghost + auto itg = vd.getDomainAndGhostIterator(); + while (itg.isNext()) + { + auto p = itg.get(); + // Reset force + + if (p.getKey() < vd.size_local()) + { + // Reset the force counter (- gravity on zeta direction) + vd.template getProp<force>(p)[0] = 0.0; + vd.template getProp<force>(p)[1] = 0.0; + vd.template getProp<force>(p)[2] = -gravity; + vd.template getProp<drho>(p) = 0.0; + + + // Reset the force counter (- gravity on zeta direction) + vd.template getProp<10>(p)[0] = 0.0; + vd.template getProp<10>(p)[1] = 0.0; + vd.template getProp<10>(p)[2] = -gravity; + vd.template getProp<11>(p) = 0.0; + } + else + { + vd.getProp<force>(p)[0] = 0.0; + vd.getProp<force>(p)[1] = 0.0; + vd.getProp<force>(p)[2] = 0.0; + + vd.getProp<drho>(p) = 0.0; + } + + + vd.getProp<8>(p).clear(); + vd.getProp<9>(p).clear(); + + ++itg; + } + + // Get an iterator over particles + auto part = vd.getParticleIteratorCRS(NN.getInternalCellList()); + + double visc = 0; + + // For each particle ... + while (part.isNext()) + { + // ... a + auto a = part.get(); + + // Get the position xp of the particle + Point<3,double> xa = vd.getPos(a); + + // Take the mass of the particle dependently if it is FLUID or BOUNDARY + double massa = (vd.getProp<type>(a) == FLUID)?MassFluid:MassBound; + + // Get the density of the of the particle a + double rhoa = vd.getProp<rho>(a); + + // Get the pressure of the particle a + double Pa = vd.getProp<Pressure>(a); + + // Get the Velocity of the particle a + Point<3,double> va = vd.getProp<velocity>(a); + + // We threat FLUID particle differently from BOUNDARY PARTICLES ... + if (vd.getProp<type>(a) != FLUID) + { + // If it is a boundary particle calculate the delta rho based on equation 2 + // This require to run across the neighborhoods particles of a + auto Np = NN.template getNNIterator<NO_CHECK>(a); + + // For each neighborhood particle + while (Np.isNext() == true) + { + // ... q + auto b = Np.get(); + + // Get the position xp of the particle + Point<3,double> xb = vd.getPos(b); + + // if (p == q) skip this particle + if (a == b) {++Np; continue;}; + + // get the mass of the particle + double massb = (vd.getProp<type>(b) == FLUID)?MassFluid:MassBound; + + // Get the velocity of the particle b + Point<3,double> vb = vd.getProp<velocity>(b); + + // Get the pressure and density of particle b + double Pb = vd.getProp<Pressure>(b); + double rhob = vd.getProp<rho>(b); + + // Get the distance between p and q + Point<3,double> dr = xa - xb; + // take the norm of this vector + double r2 = norm2(dr); + + // If the particles interact ... + if (r2 < 4.0*H*H) + { + // ... calculate delta rho + double r = sqrt(r2); + + Point<3,double> dv = va - vb; + + Point<3,double> DW; + DWab(dr,DW,r,false); + + const double dot = dr.get(0)*dv.get(0) + dr.get(1)*dv.get(1) + dr.get(2)*dv.get(2); + const double dot_rr2 = dot/(r2+Eta2); + max_visc=std::max(dot_rr2,max_visc); + + double scal = (dv.get(0)*DW.get(0)+dv.get(1)*DW.get(1)+dv.get(2)*DW.get(2)); + + vd.getProp<drho>(a) += massb*scal; + vd.getProp<drho>(b) += massa*scal; + + struct nb_f nna; + nna.id = b; + nna.fact = 0.0; + + struct nb_f nnb; + + // If it is a fluid we have to calculate force + if (vd.getProp<type>(b) == FLUID) + { + Point<3,double> v_rel = va - vb; + double factor = - ((vd.getProp<Pressure>(a) + vd.getProp<Pressure>(b)) / (rhoa * rhob) + Tensile(r,rhoa,rhob,Pa,Pb) + Pi(dr,r2,v_rel,rhoa,rhob,massb,visc)); + + vd.getProp<force>(b)[0] -= massa * factor * DW.get(0); + vd.getProp<force>(b)[1] -= massa * factor * DW.get(1); + vd.getProp<force>(b)[2] -= massa * factor * DW.get(2); + + nnb.id = vd.getProp<12>(a); + nnb.fact = -massa * factor * DW.get(2); + } + else + { + struct nb_f nnb; + nnb.id = vd.getProp<12>(a); + nnb.fact = 0.0; + } + + vd.getProp<9>(a).add(nna); + vd.getProp<9>(b).add(nnb); + + } + + ++Np; + } + + ////////////////////// NN2 + + // Get an iterator over the neighborhood particles of p + auto Np2 = NN2.template getNNIterator<NO_CHECK>(NN2.getCell(vd.getPos(a))); + + // For each neighborhood particle + while (Np2.isNext() == true) + { + // ... q + auto b = Np2.get(); + + // Get the position xp of the particle + Point<3,double> xb = vd.getPos(b); + + // if (p == q) skip this particle + if (a == b) {++Np2; continue;}; + + // get the mass of the particle + double massb = (vd.getProp<type>(b) == FLUID)?MassFluid:MassBound; + + // Get the velocity of the particle b + Point<3,double> vb = vd.getProp<velocity>(b); + + // Get the pressure and density of particle b + double Pb = vd.getProp<Pressure>(b); + double rhob = vd.getProp<rho>(b); + + // Get the distance between p and q + Point<3,double> dr = xa - xb; + // take the norm of this vector + double r2 = norm2(dr); + + // If the particles interact ... + if (r2 < 4.0*H*H) + { + // ... calculate delta rho + double r = sqrt(r2); + + Point<3,double> dv = va - vb; + + Point<3,double> DW; + DWab(dr,DW,r,false); + + const double dot = dr.get(0)*dv.get(0) + dr.get(1)*dv.get(1) + dr.get(2)*dv.get(2); + const double dot_rr2 = dot/(r2+Eta2); + + vd.getProp<11>(a) += massb*(dv.get(0)*DW.get(0)+dv.get(1)*DW.get(1)+dv.get(2)*DW.get(2)); + + + struct nb_f nna; + nna.id = vd.getProp<12>(b); + nna.fact = 0.0; + + vd.getProp<8>(a).add(nna); + } + + ++Np2; + } + + } + else + { + // If it is a fluid particle calculate based on equation 1 and 2 + + // Get an iterator over the neighborhood particles of p + auto Np = NN.template getNNIterator<NO_CHECK>(a); + + // For each neighborhood particle + while (Np.isNext() == true) + { + // ... q + auto b = Np.get(); + + // Get the position xp of the particle + Point<3,double> xb = vd.getPos(b); + + // if (p == q) skip this particle + if (a == b) {++Np; continue;}; + + double massb = (vd.getProp<type>(b) == FLUID)?MassFluid:MassBound; + Point<3,double> vb = vd.getProp<velocity>(b); + double Pb = vd.getProp<Pressure>(b); + double rhob = vd.getProp<rho>(b); + + // Get the distance between p and q + Point<3,double> dr = xa - xb; + // take the norm of this vector + double r2 = norm2(dr); + + // if they interact + if (r2 < 4.0*H*H) + { + double r = sqrt(r2); + + Point<3,double> v_rel = va - vb; + + Point<3,double> DW; + DWab(dr,DW,r,false); + + double factor = - ((vd.getProp<Pressure>(a) + vd.getProp<Pressure>(b)) / (rhoa * rhob) + Tensile(r,rhoa,rhob,Pa,Pb) + Pi(dr,r2,v_rel,rhoa,rhob,massb,visc)); + + vd.getProp<force>(a)[0] += massb * factor * DW.get(0); + vd.getProp<force>(a)[1] += massb * factor * DW.get(1); + vd.getProp<force>(a)[2] += massb * factor * DW.get(2); + + vd.getProp<force>(b)[0] -= massa * factor * DW.get(0); + vd.getProp<force>(b)[1] -= massa * factor * DW.get(1); + vd.getProp<force>(b)[2] -= massa * factor * DW.get(2); + + double scal = (v_rel.get(0)*DW.get(0)+v_rel.get(1)*DW.get(1)+v_rel.get(2)*DW.get(2)); + + vd.getProp<drho>(a) += massb*scal; + vd.getProp<drho>(b) += massa*scal; + + + if (vd.getProp<12>(a) == 15604 && vd.getProp<12>(b) == 15601) + { + std::cout << "DETECTED ERROR " << std::endl; + } + + if (vd.getProp<12>(b) == 15604 && vd.getProp<12>(a) == 15601) + { + std::cout << "DETECTED ERROR " << create_vcluster().getProcessUnitID() << " a " << a << " b " << b << " " << vd.size_local_with_ghost() << std::endl; + } + + + struct nb_f nna; + nna.id = vd.getProp<12>(b); + nna.fact = massb * factor * DW.get(2); + + struct nb_f nnb; + nnb.id = vd.getProp<12>(a); + nnb.fact = -massa * factor * DW.get(2); + + vd.getProp<9>(a).add(nna); + vd.getProp<9>(b).add(nnb); + } + + ++Np; + } + + + ///////////////////// NN2 + + + ////////////////////// NN2 + + // Get an iterator over the neighborhood particles of p + auto Np2 = NN2.template getNNIterator<NO_CHECK>(NN2.getCell(vd.getPos(a))); + + if (a == 0 && create_vcluster().getProcessUnitID() == 0) + { +// std::cout << "Particle 0" << std::endl; + } + + // For each neighborhood particle + while (Np2.isNext() == true) + { + // ... q + auto b = Np2.get(); + + // Get the position xp of the particle + Point<3,double> xb = vd.getPos(b); + + // if (p == q) skip this particle + if (a == b) {++Np2; continue;}; + + double massb = (vd.getProp<type>(b) == FLUID)?MassFluid:MassBound; + Point<3,double> vb = vd.getProp<velocity>(b); + double Pb = vd.getProp<Pressure>(b); + double rhob = vd.getProp<rho>(b); + + // Get the distance between p and q + Point<3,double> dr = xa - xb; + // take the norm of this vector + double r2 = norm2(dr); + + // if they interact + if (r2 < 4.0*H*H) + { + double r = sqrt(r2); + + Point<3,double> v_rel = va - vb; + + Point<3,double> DW; + DWab(dr,DW,r,false); + + double factor = - massb*((vd.getProp<Pressure>(a) + vd.getProp<Pressure>(b)) / (rhoa * rhob) + Tensile(r,rhoa,rhob,Pa,Pb) + Pi(dr,r2,v_rel,rhoa,rhob,massb,visc)); + + vd.getProp<10>(a)[0] += factor * DW.get(0); + vd.getProp<10>(a)[1] += factor * DW.get(1); + vd.getProp<10>(a)[2] += factor * DW.get(2); + + vd.getProp<11>(a) += massb*(v_rel.get(0)*DW.get(0)+v_rel.get(1)*DW.get(1)+v_rel.get(2)*DW.get(2)); + + struct nb_f nna; + nna.id = vd.getProp<12>(b); + nna.fact = factor * DW.get(2); + + vd.getProp<8>(a).add(nna); + } + + ++Np2; + } + } + + ++part; + } + + vd.template ghost_put<add_,drho>(); + vd.template ghost_put<add_,force>(); + vd.template ghost_put<merge_,9>(); + vd.template ghost_put<add_,10>(); + vd.template ghost_put<add_,11>(); + + auto part3 = vd.getDomainIterator(); + + while (part3.isNext()) + { + auto a = part3.get(); + + if (vd.getProp<force>(a)[0] != vd.getProp<10>(a)[0] || + vd.getProp<force>(a)[1] != vd.getProp<10>(a)[1] || + vd.getProp<force>(a)[2] != vd.getProp<10>(a)[2]) + { + if (create_vcluster().getProcessUnitID() == 0) + { + std::cout << "LISTS " << vd.getProp<12>(a) << " " << vd.getProp<8>(a).size() << " " << vd.getProp<9>(a).size() << std::endl; + std::cout << "ERROR: " << vd.getProp<force>(a)[2] << " " << vd.getProp<10>(a)[2] << " part: " << a.getKey() << std::endl; + + // Print factors and sum + + + vd.getProp<8>(a).sort(); + vd.getProp<9>(a).sort(); + + double sum1 = 0.0; + double sum2 = 0.0; + + for (size_t i = 0 ; i < vd.getProp<8>(a).size() ; i++) + { + + std::cout << "FACT: " << vd.getProp<8>(a).get(i).fact << " " << vd.getProp<9>(a).get(i).fact << " " << vd.getProp<8>(a).get(i).id << " " << vd.getProp<9>(a).get(i).id << std::endl; + + sum1 += vd.getProp<8>(a).get(i).fact; + sum2 += vd.getProp<9>(a).get(i).fact; + } + + std::cout << "sum: " << sum1 << " " << sum2 << std::endl; + } + + break; + } + + vd.getProp<8>(a).sort(); + vd.getProp<9>(a).sort(); + + if (create_vcluster().getProcessUnitID() == 0) + { + if (vd.getProp<8>(a).size() != vd.getProp<9>(a).size()) + { + std::cout << "ERROR: " << vd.getProp<12>(a) << " " << vd.getProp<8>(a).size() << " " << vd.getProp<9>(a).size() << std::endl; + + for (size_t i = 0 ; i < vd.getProp<8>(a).size() ; i++) + { + std::cout << "NNNN: " << vd.getProp<9>(a).get(i).id << " " << vd.getProp<8>(a).get(i).id << std::endl; + } + + break; + } + } + +// double sum1 = 0.0; +// double sum2 = 0.0; + +/* for (size_t i = 0 ; i < vd.getProp<8>(a).size() ; i++) + { + if (vd.getProp<8>(a).get(i).id != vd.getProp<9>(a).get(i).id) + { + std::cout << "CAZZO " << std::endl; + } + + + sum1 += vd.getProp<8>(a).get(i).fact; + sum2 += vd.getProp<9>(a).get(i).fact; + }*/ + +/* if (sum1 != sum2) + { + std::cout << "PORCA TROIA: " << std::endl; + + break; + }*/ + + ++part3; + } +} + + +void max_acceleration_and_velocity(particles & vd, double & max_acc, double & max_vel) +{ + // Calculate the maximum acceleration + auto part = vd.getDomainIterator(); + + while (part.isNext()) + { + auto a = part.get(); + + Point<3,double> acc(vd.getProp<force>(a)); + double acc2 = norm2(acc); + + Point<3,double> vel(vd.getProp<velocity>(a)); + double vel2 = norm2(vel); + + if (vel2 >= max_vel) + max_vel = vel2; + + if (acc2 >= max_acc) + max_acc = acc2; + + ++part; + } + max_acc = sqrt(max_acc); + max_vel = sqrt(max_vel); + + Vcluster & v_cl = create_vcluster(); + v_cl.max(max_acc); + v_cl.max(max_vel); + v_cl.execute(); +} + + +double calc_deltaT(particles & vd, double ViscDtMax) +{ + double Maxacc = 0.0; + double Maxvel = 0.0; + max_acceleration_and_velocity(vd,Maxacc,Maxvel); + + //-dt1 depends on force per unit mass. + const double dt_f = (Maxacc)?sqrt(H/Maxacc):std::numeric_limits<int>::max(); + + //-dt2 combines the Courant and the viscous time-step controls. + const double dt_cv = H/(std::max(cbar,Maxvel*10.) + H*ViscDtMax); + + //-dt new value of time step. + double dt=double(CFLnumber)*std::min(dt_f,dt_cv); + if(dt<double(DtMin)) + dt=double(DtMin); + + return dt; +} + + +openfpm::vector<size_t> to_remove; + + +void verlet_int(particles & vd, double dt, double & max_disp) +{ + // list of the particle to remove + to_remove.clear(); + + // particle iterator + auto part = vd.getDomainIterator(); + + double dt205 = dt*dt*0.5; + double dt2 = dt*2.0; + + max_disp = 0; + + // For each particle ... + while (part.isNext()) + { + // ... a + auto a = part.get(); + + // if the particle is boundary + if (vd.template getProp<type>(a) == BOUNDARY) + { + // Update rho + double rhop = vd.template getProp<rho>(a); + + // Update only the density + vd.template getProp<velocity>(a)[0] = 0.0; + vd.template getProp<velocity>(a)[1] = 0.0; + vd.template getProp<velocity>(a)[2] = 0.0; + vd.template getProp<rho>(a) = vd.template getProp<rho_prev>(a) + dt2*vd.template getProp<drho>(a); + + vd.template getProp<rho_prev>(a) = rhop; + + ++part; + continue; + } + + //-Calculate displacement and update position / Calcula desplazamiento y actualiza posicion. + double dx = vd.template getProp<velocity>(a)[0]*dt + vd.template getProp<force>(a)[0]*dt205; + double dy = vd.template getProp<velocity>(a)[1]*dt + vd.template getProp<force>(a)[1]*dt205; + double dz = vd.template getProp<velocity>(a)[2]*dt + vd.template getProp<force>(a)[2]*dt205; + + vd.getPos(a)[0] += dx; + vd.getPos(a)[1] += dy; + vd.getPos(a)[2] += dz; + + double d2 = dx*dx + dy*dy + dz*dz; + + max_disp = (max_disp > d2)?max_disp:d2; + + double velX = vd.template getProp<velocity>(a)[0]; + double velY = vd.template getProp<velocity>(a)[1]; + double velZ = vd.template getProp<velocity>(a)[2]; + double rhop = vd.template getProp<rho>(a); + + vd.template getProp<velocity>(a)[0] = vd.template getProp<velocity_prev>(a)[0] + vd.template getProp<force>(a)[0]*dt2; + vd.template getProp<velocity>(a)[1] = vd.template getProp<velocity_prev>(a)[1] + vd.template getProp<force>(a)[1]*dt2; + vd.template getProp<velocity>(a)[2] = vd.template getProp<velocity_prev>(a)[2] + vd.template getProp<force>(a)[2]*dt2; + vd.template getProp<rho>(a) = vd.template getProp<rho_prev>(a) + dt2*vd.template getProp<drho>(a); + + // Check if the particle go out of range in space and in density + if (vd.getPos(a)[0] < 0.000263878 || vd.getPos(a)[1] < 0.000263878 || vd.getPos(a)[2] < 0.000263878 || + vd.getPos(a)[0] > 0.000263878+1.59947 || vd.getPos(a)[1] > 0.000263878+0.672972 || vd.getPos(a)[2] > 0.000263878+0.903944 || + vd.template getProp<rho>(a) < RhoMin || vd.template getProp<rho>(a) > RhoMax) + { + to_remove.add(a.getKey()); + + + // if we remove something the verlet are not anymore correct and must be reconstructed + max_disp = 100.0; + } + + vd.template getProp<velocity_prev>(a)[0] = velX; + vd.template getProp<velocity_prev>(a)[1] = velY; + vd.template getProp<velocity_prev>(a)[2] = velZ; + vd.template getProp<rho_prev>(a) = rhop; + + ++part; + } + + Vcluster & v_cl = create_vcluster(); + v_cl.max(max_disp); + v_cl.execute(); + + max_disp = sqrt(max_disp); + + // remove the particles + vd.remove(to_remove,0); + + // increment the iteration counter + cnt++; +} + +void euler_int(particles & vd, double dt, double & max_disp) +{ + // list of the particle to remove + to_remove.clear(); + + // particle iterator + auto part = vd.getDomainIterator(); + + double dt205 = dt*dt*0.5; + double dt2 = dt*2.0; + + max_disp = 0; + + // For each particle ... + while (part.isNext()) + { + // ... a + auto a = part.get(); + + // if the particle is boundary + if (vd.template getProp<type>(a) == BOUNDARY) + { + // Update rho + double rhop = vd.template getProp<rho>(a); + + // Update only the density + vd.template getProp<velocity>(a)[0] = 0.0; + vd.template getProp<velocity>(a)[1] = 0.0; + vd.template getProp<velocity>(a)[2] = 0.0; + vd.template getProp<rho>(a) = vd.template getProp<rho>(a) + dt*vd.template getProp<drho>(a); + + vd.template getProp<rho_prev>(a) = rhop; + + ++part; + continue; + } + + //-Calculate displacement and update position / Calcula desplazamiento y actualiza posicion. + double dx = vd.template getProp<velocity>(a)[0]*dt + vd.template getProp<force>(a)[0]*dt205; + double dy = vd.template getProp<velocity>(a)[1]*dt + vd.template getProp<force>(a)[1]*dt205; + double dz = vd.template getProp<velocity>(a)[2]*dt + vd.template getProp<force>(a)[2]*dt205; + + vd.getPos(a)[0] += dx; + vd.getPos(a)[1] += dy; + vd.getPos(a)[2] += dz; + + double d2 = dx*dx + dy*dy + dz*dz; + + max_disp = (max_disp > d2)?max_disp:d2; + + double velX = vd.template getProp<velocity>(a)[0]; + double velY = vd.template getProp<velocity>(a)[1]; + double velZ = vd.template getProp<velocity>(a)[2]; + double rhop = vd.template getProp<rho>(a); + + vd.template getProp<velocity>(a)[0] = vd.template getProp<velocity>(a)[0] + vd.template getProp<force>(a)[0]*dt; + vd.template getProp<velocity>(a)[1] = vd.template getProp<velocity>(a)[1] + vd.template getProp<force>(a)[1]*dt; + vd.template getProp<velocity>(a)[2] = vd.template getProp<velocity>(a)[2] + vd.template getProp<force>(a)[2]*dt; + vd.template getProp<rho>(a) = vd.template getProp<rho>(a) + dt*vd.template getProp<drho>(a); + + // Check if the particle go out of range in space and in density + if (vd.getPos(a)[0] < 0.000263878 || vd.getPos(a)[1] < 0.000263878 || vd.getPos(a)[2] < 0.000263878 || + vd.getPos(a)[0] > 0.000263878+1.59947 || vd.getPos(a)[1] > 0.000263878+0.672972 || vd.getPos(a)[2] > 0.000263878+0.903944 || + vd.template getProp<rho>(a) < RhoMin || vd.template getProp<rho>(a) > RhoMax) + { + to_remove.add(a.getKey()); + + // if we remove something the verlet are not anymore correct and must be reconstructed + max_disp = 100.0; + } + + vd.template getProp<velocity_prev>(a)[0] = velX; + vd.template getProp<velocity_prev>(a)[1] = velY; + vd.template getProp<velocity_prev>(a)[2] = velZ; + vd.template getProp<rho_prev>(a) = rhop; + + ++part; + } + + // remove the particles + vd.remove(to_remove,0); + + Vcluster & v_cl = create_vcluster(); + v_cl.max(max_disp); + v_cl.execute(); + + max_disp = sqrt(max_disp); + + // increment the iteration counter + cnt++; +} + + +int main(int argc, char* argv[]) +{ + // initialize the library + openfpm_init(&argc,&argv); + + // Here we define our domain a 2D box with internals from 0 to 1.0 for x and y + Box<3,double> domain({-0.05,-0.05,-0.05},{1.7010,0.7065,0.5025}); + size_t sz[3] = {207,90,66}; + + // Fill W_dap + W_dap = 1.0/Wab(H/1.5); + + // Here we define the boundary conditions of our problem + size_t bc[3]={NON_PERIODIC,NON_PERIODIC,NON_PERIODIC}; + + double skin = 0.25 * 2*H; + double r_gskin = 2*H + skin; + + // extended boundary around the domain, and the processor domain + // by the support of the cubic kernel + Ghost<3,double> g(r_gskin); + + particles vd(0,domain,bc,g,BIND_DEC_TO_GHOST); + + // You can ignore all these dp/2.0 is a trick to reach the same initialization + // of Dual-SPH that use a different criteria to draw particles + Box<3,double> fluid_box({dp/2.0,dp/2.0,dp/2.0},{0.4+dp/2.0,0.67-dp/2.0,0.3+dp/2.0}); + + // return an iterator to the fluid particles to add to vd + auto fluid_it = DrawParticles::DrawBox(vd,sz,domain,fluid_box); + + // here we fill some of the constants needed by the simulation + max_fluid_height = fluid_it.getBoxMargins().getHigh(2); + h_swl = fluid_it.getBoxMargins().getHigh(2) - fluid_it.getBoxMargins().getLow(2); + B = (coeff_sound)*(coeff_sound)*gravity*h_swl*rho_zero / gamma_; + cbar = coeff_sound * sqrt(gravity * h_swl); + + // for each particle inside the fluid box ... + while (fluid_it.isNext()) + { + // ... add a particle ... + vd.add(); + + // ... and set it position ... + vd.getLastPos()[0] = fluid_it.get().get(0); + vd.getLastPos()[1] = fluid_it.get().get(1); + vd.getLastPos()[2] = fluid_it.get().get(2); + + // and its type. + vd.template getLastProp<type>() = FLUID; + + // We also initialize the density of the particle and the hydro-static pressure given by + // + // rho_zero*g*h = P + // + // rho_p = (P/B + 1)^(1/Gamma) * rho_zero + // + + vd.template getLastProp<Pressure>() = rho_zero * gravity * (max_fluid_height - fluid_it.get().get(2)); + + vd.template getLastProp<rho>() = pow(vd.template getLastProp<Pressure>() / B + 1, 1.0/gamma_) * rho_zero; + vd.template getLastProp<rho_prev>() = vd.template getLastProp<rho>(); + vd.template getLastProp<velocity>()[0] = 0.0; + vd.template getLastProp<velocity>()[1] = 0.0; + vd.template getLastProp<velocity>()[2] = 0.0; + + vd.template getLastProp<velocity_prev>()[0] = 0.0; + vd.template getLastProp<velocity_prev>()[1] = 0.0; + vd.template getLastProp<velocity_prev>()[2] = 0.0; + + // next fluid particle + ++fluid_it; + } + + + // Recipient + Box<3,double> recipient1({0.0,0.0,0.0},{1.6+dp/2.0,0.67+dp/2.0,0.4+dp/2.0}); + Box<3,double> recipient2({dp,dp,dp},{1.6-dp/2.0,0.67-dp/2.0,0.4+dp/2.0}); + + Box<3,double> obstacle1({0.9,0.24-dp/2.0,0.0},{1.02+dp/2.0,0.36,0.45+dp/2.0}); + Box<3,double> obstacle2({0.9+dp,0.24+dp/2.0,0.0},{1.02-dp/2.0,0.36-dp,0.45-dp/2.0}); + Box<3,double> obstacle3({0.9+dp,0.24,0.0},{1.02,0.36,0.45}); + + openfpm::vector<Box<3,double>> holes; + holes.add(recipient2); + holes.add(obstacle1); + auto bound_box = DrawParticles::DrawSkin(vd,sz,domain,holes,recipient1); + + while (bound_box.isNext()) + { + vd.add(); + + vd.getLastPos()[0] = bound_box.get().get(0); + vd.getLastPos()[1] = bound_box.get().get(1); + vd.getLastPos()[2] = bound_box.get().get(2); + + vd.template getLastProp<type>() = BOUNDARY; + vd.template getLastProp<rho>() = rho_zero; + vd.template getLastProp<rho_prev>() = rho_zero; + vd.template getLastProp<velocity>()[0] = 0.0; + vd.template getLastProp<velocity>()[1] = 0.0; + vd.template getLastProp<velocity>()[2] = 0.0; + + vd.template getLastProp<velocity_prev>()[0] = 0.0; + vd.template getLastProp<velocity_prev>()[1] = 0.0; + vd.template getLastProp<velocity_prev>()[2] = 0.0; + + ++bound_box; + } + + auto obstacle_box = DrawParticles::DrawSkin(vd,sz,domain,obstacle2,obstacle1); + + while (obstacle_box.isNext()) + { + vd.add(); + + vd.getLastPos()[0] = obstacle_box.get().get(0); + vd.getLastPos()[1] = obstacle_box.get().get(1); + vd.getLastPos()[2] = obstacle_box.get().get(2); + + vd.template getLastProp<type>() = BOUNDARY; + vd.template getLastProp<rho>() = rho_zero; + vd.template getLastProp<rho_prev>() = rho_zero; + vd.template getLastProp<velocity>()[0] = 0.0; + vd.template getLastProp<velocity>()[1] = 0.0; + vd.template getLastProp<velocity>()[2] = 0.0; + + vd.template getLastProp<velocity_prev>()[0] = 0.0; + vd.template getLastProp<velocity_prev>()[1] = 0.0; + vd.template getLastProp<velocity_prev>()[2] = 0.0; + + ++obstacle_box; + } + + vd.map(); + + // Now that we fill the vector with particles + ModelCustom md; + + vd.addComputationCosts(md); + vd.getDecomposition().decompose(); + vd.map(); + + size_t loc = vd.size_local(); + openfpm::vector<size_t> gt; + + Vcluster & v_cl = create_vcluster(); + v_cl.allGather(loc,gt); + v_cl.execute(); + + + auto debug_it = vd.getDomainIterator(); + + + size_t tot = 0; + + + for (size_t i ; i < create_vcluster().getProcessUnitID() ; i++) + tot += gt.get(i); + + while(debug_it.isNext()) + { + auto a = debug_it.get(); + + vd.getProp<12>(a) = tot; + tot++; + + ++debug_it; + } + + vd.ghost_get<type,rho,Pressure,velocity,12>(); + + auto NN = vd.getVerletCrs(r_gskin); + + // Evolve + + size_t write = 0; + size_t it = 0; + size_t it_reb = 0; + double t = 0.0; + double tot_disp = 0.0; + double max_disp; + while (t <= t_end) + { + Vcluster & v_cl = create_vcluster(); + timer it_time; + + it_reb++; +// if (2*tot_disp >= skin) +// { + vd.map(); + + vd.getDecomposition().write("DecBEFORE"); + + if (it_reb > 5) + { + ModelCustom md; + vd.addComputationCosts(md); + vd.getDecomposition().decompose(); + + vd.map(); + + it_reb = 0; + + if (v_cl.getProcessUnitID() == 0) + std::cout << "REBALANCED " << std::endl; + + } + + vd.getDecomposition().write("DecAFTER"); + + // Calculate pressure from the density + EqState(vd); + + vd.ghost_get<type,rho,Pressure,velocity,12>(); + + //vd.updateVerlet(NN,r_gskin,VL_CRS_SYMMETRIC); + NN = vd.getVerletCrs(r_gskin); + + vd.write("Debug"); + + tot_disp = 0.0; + + if (v_cl.getProcessUnitID() == 0) + std::cout << "RECONSTRUCT Verlet " << std::endl; +// } +// else +// { +// // Calculate pressure from the density +// EqState(vd); +// +// vd.ghost_get<type,rho,Pressure,velocity>(SKIP_LABELLING); +// } + + double max_visc = 0.0; + + // Calc forces + calc_forces(vd,NN,max_visc,r_gskin); + + // Get the maximum viscosity term across processors + v_cl.max(max_visc); + v_cl.execute(); + + // Calculate delta t integration + double dt = calc_deltaT(vd,max_visc); + + // VerletStep or euler step + it++; + if (it < 40) + verlet_int(vd,dt,max_disp); + else + { + euler_int(vd,dt,max_disp); + it = 0; + } + + tot_disp += max_disp; + t += dt; + + if (write < t*100) + { +// vd.deleteGhost(); + vd.write("Geometry",write); +// vd.ghost_get<type,rho,Pressure,velocity>(SKIP_LABELLING); + write++; + + if (v_cl.getProcessUnitID() == 0) + std::cout << "TIME: " << t << " write " << it_time.getwct() << " " << v_cl.getProcessUnitID() << " TOT disp: " << tot_disp << " " << cnt << std::endl; + } + else + { + if (v_cl.getProcessUnitID() == 0) + std::cout << "TIME: " << t << " " << it_time.getwct() << " " << v_cl.getProcessUnitID() << " TOT disp: " << tot_disp << " " << cnt << std::endl; + } + } + + openfpm_finalize(); +} + diff --git a/openfpm_data b/openfpm_data index 0261ed4b..8d7bbbfa 160000 --- a/openfpm_data +++ b/openfpm_data @@ -1 +1 @@ -Subproject commit 0261ed4b804309864d899bedec46a67e5e6cfe95 +Subproject commit 8d7bbbfa94b4ef203f6e3e7af5d3bcdf3eea5707 diff --git a/script/install_EIGEN.sh b/script/install_EIGEN.sh index d01089a0..0b67506e 100755 --- a/script/install_EIGEN.sh +++ b/script/install_EIGEN.sh @@ -17,16 +17,19 @@ if [ ! -d "$1/SUITESPARSE" ]; then exit 1 fi -wget http://ppmcore.mpi-cbg.de/upload/eigen-3.3.1.tar.bz2 -rm -rf eigen-eigen-f562a193118d -tar -xf eigen-3.3.1.tar.bz2 +rm -rf eigen-3.3.5.tar.bz2 +wget http://ppmcore.mpi-cbg.de/upload/eigen-3.3.5.tar.bz2 +rm -rf eigen-eigen-b3f3d4950030/ +tar -xf eigen-3.3.5.tar.bz2 -cd eigen-eigen-f562a193118d +cd eigen-eigen-b3f3d4950030/ mkdir $1/EIGEN/ mv Eigen $1/EIGEN/Eigen cd .. -rm -rf eigen-eigen-f562a193118d +rm -rf eigen-eigen-b3f3d4950030/ + +touch $1/EIGEN/signature_of_eigen3_matrix_library # Mark the installation -echo 1 > $1/EIGEN/version +echo 2 > $1/EIGEN/version diff --git a/script/install_OPENBLAS.sh b/script/install_OPENBLAS.sh index 6b6925f6..09129796 100755 --- a/script/install_OPENBLAS.sh +++ b/script/install_OPENBLAS.sh @@ -7,8 +7,8 @@ if [ -d "$1/OPENBLAS" ]; then exit 0 fi +rm -rf OpenBLAS-0.2.20.tar.gz wget http://ppmcore.mpi-cbg.de/upload/OpenBLAS-0.2.20.tar.gz -rm -rf OpenBLAS-0.2.20 tar -xf OpenBLAS-0.2.20.tar.gz cd OpenBLAS-0.2.20 @@ -26,7 +26,7 @@ make install PREFIX=$1/OPENBLAS if [ ! "$(ls -A $1/OPENBLAS)" ]; then rm -rf $1/OPENBLAS else - rm -rf OpenBLAS-0.2.19 + rm -rf OpenBLAS-0.2.20 echo 1 > $1/OPENBLAS/version exit 0 fi diff --git a/script/remove_old b/script/remove_old index 459682d9..4e39fa1b 100755 --- a/script/remove_old +++ b/script/remove_old @@ -201,6 +201,7 @@ function remove_old() rm -rf $1/MPI/share rm -rf $1/MPI rm -rf $1/HDF5 + rm -rf $1/ZLIB rm -rf $1/PARMETIS rm -rf $1/PETSC rm -rf $1/TRILINOS @@ -212,10 +213,10 @@ function remove_old() if [ -d $1/EIGEN ]; then version=$(cat $1/EIGEN/version) - if [ x"$version" != x"1" ]; then - echo -e "\033[1;34;5m ---------------------------------------------------------------------- \033[0m" - echo -e "\033[1;34;5m EIGEN has been updated, the component will be updated automatically \033[0m" - echo -e "\033[1;34;5m ---------------------------------------------------------------------- \033[0m" + if [ x"$version" != x"2" ]; then + echo -e "\033[1;34;5m -------------------------------------------------------------------------------- \033[0m" + echo -e "\033[1;34;5m EIGEN has been updated to 3.3.5 , the component will be updated automatically \033[0m" + echo -e "\033[1;34;5m -------------------------------------------------------------------------------- \033[0m" sleep 5 rm -rf $1/EIGEN/Eigen rm -rf $1/EIGEN diff --git a/src/Decomposition/CartDecomposition.hpp b/src/Decomposition/CartDecomposition.hpp index c651d93a..e6706447 100755 --- a/src/Decomposition/CartDecomposition.hpp +++ b/src/Decomposition/CartDecomposition.hpp @@ -130,7 +130,7 @@ template<unsigned int dim> static void nsub_to_div(size_t (& div)[dim], size_t n */ template<unsigned int dim, typename T, typename Memory, template <typename> class layout_base, typename Distribution> -class CartDecomposition: public ie_loc_ghost<dim, T>, public nn_prcs<dim, T>, public ie_ghost<dim, T>, public domain_nn_calculator_cart<dim> +class CartDecomposition: public ie_loc_ghost<dim, T>, public nn_prcs<dim, T>, public ie_ghost<dim,T,Memory,layout_base>, public domain_nn_calculator_cart<dim> { public: @@ -458,10 +458,10 @@ public: {div[i] = (size_t) ((bound.getHigh(i) - bound.getLow(i)) / cd.getCellBox().getP2()[i]);} // Initialize the geo_cell structure - ie_ghost<dim,T>::Initialize_geo_cell(bound,div); + ie_ghost<dim,T,Memory,layout_base>::Initialize_geo_cell(bound,div); // Initialize shift vectors - ie_ghost<dim,T>::generateShiftVectors(domain,bc); + ie_ghost<dim,T,Memory,layout_base>::generateShiftVectors(domain,bc); } } @@ -643,8 +643,8 @@ public: // Intersect all the local sub-domains with the sub-domains of the contiguous processors // create the internal structures that store ghost information - ie_ghost<dim, T>::create_box_nn_processor_ext(v_cl, ghost, sub_domains, box_nn_processor, *this); - ie_ghost<dim, T>::create_box_nn_processor_int(v_cl, ghost, sub_domains, box_nn_processor, *this); + ie_ghost<dim, T,Memory,layout_base>::create_box_nn_processor_ext(v_cl, ghost, sub_domains, box_nn_processor, *this); + ie_ghost<dim, T,Memory,layout_base>::create_box_nn_processor_int(v_cl, ghost, sub_domains, box_nn_processor, *this); ie_loc_ghost<dim,T>::create(sub_domains,domain,ghost,bc); } @@ -776,9 +776,9 @@ public: * \return processor id * */ - inline static size_t id(p_box<dim, T> & p, size_t b_id) + template<typename encap_type> inline static size_t id(const encap_type & p, size_t b_id) { - return p.proc; + return p.template get<proc_>(); } }; @@ -796,9 +796,9 @@ public: * \return local processor id * */ - inline static size_t id(p_box<dim, T> & p, size_t b_id) + template<typename encap_type> inline static size_t id(const encap_type & p, size_t b_id) { - return p.lc_proc; + return p.template get<lc_proc_>(); } }; @@ -816,9 +816,9 @@ public: * \return shift_id id * */ - inline static size_t id(p_box<dim,T> & p, size_t b_id) + template<typename encap_type> inline static size_t id(const encap_type & p, size_t b_id) { - return p.shift_id; + return p.template get<shift_id_>(); } }; @@ -925,7 +925,7 @@ public: (static_cast<ie_loc_ghost<dim,T>*>(&cart))->operator=(static_cast<ie_loc_ghost<dim,T>>(*this)); (static_cast<nn_prcs<dim,T>*>(&cart))->operator=(static_cast<nn_prcs<dim,T>>(*this)); - (static_cast<ie_ghost<dim,T>*>(&cart))->operator=(static_cast<ie_ghost<dim,T>>(*this)); + (static_cast<ie_ghost<dim,T,Memory,layout_base>*>(&cart))->operator=(static_cast<ie_ghost<dim,T,Memory,layout_base>>(*this)); cart.sub_domains = sub_domains; cart.box_nn_processor = box_nn_processor; @@ -961,7 +961,7 @@ public: { static_cast<ie_loc_ghost<dim,T>*>(this)->operator=(static_cast<ie_loc_ghost<dim,T>>(cart)); static_cast<nn_prcs<dim,T>*>(this)->operator=(static_cast<nn_prcs<dim,T>>(cart)); - static_cast<ie_ghost<dim,T>*>(this)->operator=(static_cast<ie_ghost<dim,T>>(cart)); + static_cast<ie_ghost<dim,T,Memory,layout_base>*>(this)->operator=(static_cast<ie_ghost<dim,T,Memory,layout_base>>(cart)); sub_domains = cart.sub_domains; box_nn_processor = cart.box_nn_processor; @@ -1001,7 +1001,7 @@ public: { static_cast<ie_loc_ghost<dim,T>*>(this)->operator=(static_cast<ie_loc_ghost<dim,T>>(cart)); static_cast<nn_prcs<dim,T>*>(this)->operator=(static_cast<nn_prcs<dim,T>>(cart)); - static_cast<ie_ghost<dim,T>*>(this)->operator=(static_cast<ie_ghost<dim,T>>(cart)); + static_cast<ie_ghost<dim,T,Memory,layout_base>*>(this)->operator=(static_cast<ie_ghost<dim,T,Memory,layout_base>>(cart)); sub_domains.swap(cart.sub_domains); box_nn_processor.swap(cart.box_nn_processor); @@ -1314,7 +1314,7 @@ public: fine_s.clear(); loc_box.clear(); nn_prcs<dim, T>::reset(); - ie_ghost<dim, T>::reset(); + ie_ghost<dim,T,Memory,layout_base>::reset(); ie_loc_ghost<dim, T>::reset(); } @@ -1550,11 +1550,6 @@ public: return sub_domains; } -/* openfpm::vector<openfpm::vector<SpaceBox<dim, T>>> & getSubDomainsGlobal() - { - return sub_domains_global; - }*/ - /*! \brief Check if the particle is local * * \warning if the particle id outside the domain the result is unreliable @@ -1803,7 +1798,7 @@ public: vtk_box1.write(output + std::string("subdomains_") + std::to_string(v_cl.getProcessUnitID()) + std::string(".vtk")); nn_prcs<dim, T>::write(output); - ie_ghost<dim, T>::write(output, v_cl.getProcessUnitID()); + ie_ghost<dim,T,Memory,layout_base>::write(output, v_cl.getProcessUnitID()); ie_loc_ghost<dim, T>::write(output, v_cl.getProcessUnitID()); return true; @@ -1856,9 +1851,9 @@ public: for (size_t p = 0; p<nn_prcs < dim, T>::getNNProcessors(); p++) { - for (size_t i = 0; i<ie_ghost < dim, T>::getProcessorNEGhost(p); i++) + for (size_t i = 0; i<ie_ghost <dim,T,Memory,layout_base>::getProcessorNEGhost(p); i++) { - std::cout << ie_ghost<dim, T>::getProcessorEGhostBox(p, i).toString() << " prc=" << nn_prcs<dim, T>::IDtoProc(p) << " id=" << ie_ghost<dim, T>::getProcessorEGhostId(p, i) << "\n"; + std::cout << ie_ghost<dim,T,Memory,layout_base>::getProcessorEGhostBox(p, i).toString() << " prc=" << nn_prcs<dim, T>::IDtoProc(p) << " id=" << ie_ghost<dim,T,Memory,layout_base>::getProcessorEGhostId(p, i) << "\n"; } } @@ -1866,9 +1861,9 @@ public: for (size_t p = 0; p<nn_prcs < dim, T>::getNNProcessors(); p++) { - for (size_t i = 0; i<ie_ghost < dim, T>::getProcessorNIGhost(p); i++) + for (size_t i = 0; i<ie_ghost<dim,T,Memory,layout_base>::getProcessorNIGhost(p); i++) { - std::cout << ie_ghost<dim, T>::getProcessorIGhostBox(p, i).toString() << " prc=" << nn_prcs<dim, T>::IDtoProc(p) << " id=" << ie_ghost<dim, T>::getProcessorIGhostId(p, i) << "\n"; + std::cout << ie_ghost<dim,T,Memory,layout_base>::getProcessorIGhostBox(p, i).toString() << " prc=" << nn_prcs<dim, T>::IDtoProc(p) << " id=" << ie_ghost<dim,T,Memory,layout_base>::getProcessorIGhostId(p, i) << "\n"; } } } @@ -1888,7 +1883,7 @@ public: if (static_cast<nn_prcs<dim,T>*>(this)->is_equal(static_cast<nn_prcs<dim,T>&>(cart)) == false) return false; - if (static_cast<ie_ghost<dim,T>*>(this)->is_equal(static_cast<ie_ghost<dim,T>&>(cart)) == false) + if (static_cast<ie_ghost<dim,T,Memory,layout_base>*>(this)->is_equal(static_cast<ie_ghost<dim,T,Memory,layout_base>&>(cart)) == false) return false; if (sub_domains != cart.sub_domains) @@ -1934,7 +1929,7 @@ public: if (static_cast<nn_prcs<dim,T>*>(this)->is_equal(static_cast<nn_prcs<dim,T>&>(cart)) == false) return false; - if (static_cast<ie_ghost<dim,T>*>(this)->is_equal_ng(static_cast<ie_ghost<dim,T>&>(cart)) == false) + if (static_cast<ie_ghost<dim,T,Memory,layout_base>*>(this)->is_equal_ng(static_cast<ie_ghost<dim,T,Memory,layout_base>&>(cart)) == false) return false; if (sub_domains != cart.sub_domains) @@ -2024,6 +2019,7 @@ public: for (int i = 0 ; i < dim ; i++) {bc_[i] = this->periodicity(i);} CartDecomposition_gpu<dim,T,Memory,layout_base> cdg(fine_s.toKernel(), + ie_ghost<dim,T,Memory,layout_base>::toKernel(), sub_domains_global.toKernel(), getDomain(), bc_); diff --git a/src/Decomposition/cuda/CartDecomposition_gpu.cuh b/src/Decomposition/cuda/CartDecomposition_gpu.cuh index a6592116..2b4505c7 100644 --- a/src/Decomposition/cuda/CartDecomposition_gpu.cuh +++ b/src/Decomposition/cuda/CartDecomposition_gpu.cuh @@ -8,24 +8,7 @@ #ifndef CARTDECOMPOSITION_GPU_HPP_ #define CARTDECOMPOSITION_GPU_HPP_ -#ifdef __NVCC__ - -template<typename cartdec_gpu, typename particles_type, typename vector_out> -__global__ void process_id_proc_each_part(cartdec_gpu cdg, particles_type parts, vector_out output , int rank) -{ - int p = threadIdx.x + blockIdx.x * blockDim.x; - - if (p >= parts.size()) return; - - Point<3,float> xp = parts.template get<0>(p); - - int pr = cdg.processorIDBC(xp); - - output.template get<1>(p) = (pr == rank)?-1:pr; - output.template get<0>(p) = p; -} - -#endif +#include "ie_ghost_gpu.cuh" template<typename T2, typename fine_s_type, typename vsub_domain_type> __device__ __host__ inline int processorID_impl(T2 & p, fine_s_type & fine_s, vsub_domain_type & sub_domains_global) @@ -62,7 +45,7 @@ __device__ __host__ inline int processorID_impl(T2 & p, fine_s_type & fine_s, vs } template<unsigned int dim, typename T, typename Memory, template <typename> class layout_base> -class CartDecomposition_gpu +class CartDecomposition_gpu : public ie_ghost_gpu<dim,T,Memory,layout_base> { CellList_cpu_ker<dim,T,Mem_fast_ker<Memory,memory_traits_lin,int>,shift<dim,T>> clk; @@ -93,17 +76,18 @@ class CartDecomposition_gpu public: CartDecomposition_gpu(CellList_cpu_ker<dim,T,Mem_fast_ker<Memory,memory_traits_lin,int>,shift<dim,T>> clk, + ie_ghost_gpu<dim,T,Memory,layout_base> ieg, openfpm::vector_gpu_ker<Box_map<dim, T>,layout_base> sub_domains_global, const Box<dim,T> & domain, const int (& bc)[dim]) - :clk(clk),domain(domain),sub_domains_global(sub_domains_global) + :ie_ghost_gpu<dim,T,Memory,layout_base>(ieg),clk(clk),domain(domain),sub_domains_global(sub_domains_global) { for (int s = 0 ; s < dim ; s++) {this->bc[s] = bc[s];} } CartDecomposition_gpu(const CartDecomposition_gpu<dim,T,Memory,layout_base> & dec) - :clk(dec.clk),domain(dec.domain) + :ie_ghost_gpu<dim,T,Memory,layout_base>(dec),clk(dec.clk),domain(dec.domain) { for (int s = 0 ; s < dim ; s++) {this->bc[s] = dec.bc[s];} @@ -125,7 +109,6 @@ public: return processorID_impl(pt,clk,sub_domains_global); } - }; #endif /* CARTDECOMPOSITION_GPU_HPP_ */ diff --git a/src/Decomposition/cuda/decomposition_cuda_tests.cu b/src/Decomposition/cuda/decomposition_cuda_tests.cu index b8322092..84ac2397 100644 --- a/src/Decomposition/cuda/decomposition_cuda_tests.cu +++ b/src/Decomposition/cuda/decomposition_cuda_tests.cu @@ -9,70 +9,4 @@ BOOST_AUTO_TEST_SUITE( decomposition_to_gpu_test ) -BOOST_AUTO_TEST_CASE( decomposition_to_gpu_test_use ) -{ - auto & v_cl = create_vcluster(); - - // Vcluster - Vcluster<> & vcl = create_vcluster(); - - CartDecomposition<3, float, CudaMemory, memory_traits_inte> dec(vcl); - - // Physical domain - Box<3, float> box( { 0.0, 0.0, 0.0 }, { 1.0, 1.0, 1.0 }); - size_t div[3]; - - // Get the number of processor and calculate the number of sub-domain - // for each processor (SUB_UNIT_FACTOR=64) - size_t n_proc = vcl.getProcessingUnits(); - size_t n_sub = n_proc * SUB_UNIT_FACTOR; - - // Set the number of sub-domains on each dimension (in a scalable way) - for (int i = 0; i < 3; i++) - { div[i] = openfpm::math::round_big_2(pow(n_sub,1.0/3));} - - // Define ghost - Ghost<3, float> g(0.01); - - // Boundary conditions - size_t bc[] = { PERIODIC, PERIODIC, PERIODIC }; - - // Decompose - dec.setParameters(div,box,bc,g); - dec.decompose(); - - openfpm::vector_gpu<Point<3,float>> vg; - vg.resize(10000); - - for (size_t i = 0 ; i < 10000 ; i++) - { - vg.template get<0>(i)[0] = (float)rand()/RAND_MAX; - vg.template get<0>(i)[1] = (float)rand()/RAND_MAX; - vg.template get<0>(i)[2] = (float)rand()/RAND_MAX; - } - - vg.hostToDevice<0>(); - - // process on GPU the processor ID for each particles - - auto ite = vg.getGPUIterator(); - - openfpm::vector_gpu<aggregate<int,int>> proc_id_out; - proc_id_out.resize(vg.size()); - - process_id_proc_each_part<decltype(dec.toKernel()),decltype(vg.toKernel()),decltype(proc_id_out.toKernel())> - <<<ite.wthr,ite.thr>>> - (dec.toKernel(),vg.toKernel(),proc_id_out.toKernel(),v_cl.rank()); - - proc_id_out.deviceToHost<0>(); - - bool match = true; - for (size_t i = 0 ; i < proc_id_out.size() ; i++) - { - Point<3,float> xp = vg.template get<0>(i); - - match &= proc_id_out.template get<0>(i) == dec.processorIDBC(xp); - } -} - BOOST_AUTO_TEST_SUITE_END() diff --git a/src/Decomposition/cuda/ie_ghost_gpu.cuh b/src/Decomposition/cuda/ie_ghost_gpu.cuh new file mode 100644 index 00000000..e0a6f8fb --- /dev/null +++ b/src/Decomposition/cuda/ie_ghost_gpu.cuh @@ -0,0 +1,126 @@ +/* + * ie_ghost_gpu.cuh + * + * Created on: Aug 24, 2018 + * Author: i-bird + */ + +#ifndef IE_GHOST_GPU_CUH_ +#define IE_GHOST_GPU_CUH_ + +#include "data_type/aggregate.hpp" + +constexpr unsigned int lc_proc_ = 0; +constexpr unsigned int proc_ = 1; +constexpr unsigned int shift_id_ = 2; + +template<unsigned int dim, typename T, typename cell_list_type, typename vb_int_box_type, typename vb_int_type> +__device__ __host__ inline unsigned int ghost_processorID_N_impl(const Point<dim,T> & p, cell_list_type & geo_cell, vb_int_box_type & vb_int_box, vb_int_type & vb_int) +{ + unsigned int cell = geo_cell.getCell(p); + unsigned int sz = geo_cell.getNelements(cell); + + unsigned int n = 0; + + for (int i = 0 ; i < sz ; i++) + { + unsigned int bid = geo_cell.get(cell,i); + + if (Box<dim,T>(vb_int_box.get(bid)).isInsideNP(p) == true) + {n++;} + } + + return n; +} + +/*! \brief structure that store and compute the internal and external local ghost box. Version usable in kernel + * + * \tparam dim is the dimensionality of the physical domain we are going to decompose. + * \tparam T type of the space we decompose, Real, Integer, Complex ... + * + * \see CartDecomposition + * + */ +template<unsigned int dim, typename T, typename Memory, template<typename> class layout_base> +class ie_ghost_gpu +{ + + //! Cell-list that store the geometrical information of the internal ghost boxes + CellList_cpu_ker<dim,T,Mem_fast_ker<Memory,memory_traits_lin,int>,shift<dim,T>> geo_cell; + + //! internal ghost box + openfpm::vector_gpu_ker<Box<dim, T>,layout_base> vb_int_box; + + //! internal ghost box + openfpm::vector_gpu_ker<aggregate<unsigned int,unsigned int,unsigned int>,layout_base> vb_int; + + // maximum rank + unsigned int max_rank; + +public: + + + ie_ghost_gpu(CellList_cpu_ker<dim,T,Mem_fast_ker<Memory,memory_traits_lin,int>,shift<dim,T>> geo_cell, + openfpm::vector_gpu_ker<Box<dim, T>,layout_base> vb_int_box, + openfpm::vector_gpu_ker<aggregate<unsigned int,unsigned int,unsigned int>,layout_base> vb_int, + unsigned int max_rank) + :geo_cell(geo_cell),vb_int_box(vb_int_box),vb_int(vb_int),max_rank(max_rank) + { + + } + + ie_ghost_gpu(const ie_ghost_gpu<dim,T,Memory,layout_base> & ieg) + :geo_cell(ieg.geo_cell),vb_int_box(ieg.vb_int_box),vb_int(ieg.vb_int),max_rank(ieg.max_rank) + {} + + /*! \brief Get the cell from the particle position + * + * \param p position of the particle + * + */ + __device__ inline unsigned int ghost_processorID_cell(const Point<dim,T> & p) + { + return geo_cell.getCell(p); + } + + /*! \brief Get the number of processor a particle must sent + * + * \param p position of the particle + * + */ + __device__ inline unsigned int ghost_processorID_N(const Point<dim,T> & p) + { + return ghost_processorID_N_impl(p,geo_cell,vb_int_box,vb_int); + } + + /*! \brief Get the number of processor a particle must sent + * + * \param p position of the particle + * + */ + template<typename output_type> __device__ inline void ghost_processor_ID(const Point<dim,T> & p, output_type & output, unsigned int base, unsigned int pi) + { + unsigned int cell = geo_cell.getCell(p); + unsigned int sz = geo_cell.getNelements(cell); + + unsigned int n = 0; + + for (int i = 0 ; i < sz ; i++) + { + unsigned int bid = geo_cell.get(cell,i); + + if (Box<dim,T>(vb_int_box.get(bid)).isInsideNP(p) == true) + { + output.template get<0>(base+n) = vb_int.template get<proc_>(bid); + output.template get<1>(base+n) = pi; + + n++; + } + } + } + +}; + + + +#endif /* IE_GHOST_GPU_CUH_ */ diff --git a/src/Decomposition/ie_ghost.hpp b/src/Decomposition/ie_ghost.hpp index ed5a3341..b122ee4b 100755 --- a/src/Decomposition/ie_ghost.hpp +++ b/src/Decomposition/ie_ghost.hpp @@ -11,6 +11,7 @@ #include "common.hpp" #include "nn_processor.hpp" #include "Decomposition/shift_vect_converter.hpp" +#include "Decomposition/cuda/ie_ghost_gpu.cuh" /*! \brief structure that store and compute the internal and external local ghost box @@ -21,7 +22,7 @@ * \see CartDecomposition * */ -template<unsigned int dim, typename T> +template<unsigned int dim, typename T, typename Memory, template<typename> class layout_base > class ie_ghost { //! for each sub-domain (first vector), contain the list (nested vector) of the neighborhood processors @@ -37,10 +38,21 @@ class ie_ghost openfpm::vector<p_box<dim,T> > vb_ext; //! Internal ghost boxes for this processor domain - openfpm::vector<p_box<dim,T> > vb_int; + openfpm::vector<aggregate<unsigned int,unsigned int,unsigned int>,Memory,typename layout_base<aggregate<unsigned int,unsigned int,unsigned int>>::type,layout_base> vb_int; + + //! Internal ghost boxes for this processor domain + openfpm::vector<Box<dim,T>,Memory,typename layout_base<Box<dim,T>>::type,layout_base> vb_int_box; //! Cell-list that store the geometrical information of the internal ghost boxes - CellList<dim,T,Mem_fast<>,shift<dim,T>> geo_cell; + CellList<dim,T,Mem_fast<Memory,int>,shift<dim,T>> geo_cell; + + //! Cell-list that store the geometrical information of the internal ghost boxes (on a processor based lavel) + CellList<dim,T,Mem_fast<Memory,int>,shift<dim,T>> geo_cell_proc; + + typedef openfpm::vector<Box<dim,T>,Memory,typename layout_base<Box<dim,T>>::type,layout_base> proc_boxes; + + //! internal ghost Boxes for each processor + openfpm::vector<aggregate<proc_boxes>,Memory,typename layout_base<aggregate<proc_boxes>>::type,layout_base> vb_int_proc; //! shift vectors openfpm::vector<Point<dim,T>> shifts; @@ -54,6 +66,9 @@ class ie_ghost //! shift converter shift_vect_converter<dim,T> sc_convert; + //! host to device transfer + bool host_dev_transfer = false; + /*! \brief Given a local sub-domain i, it give the id of such sub-domain in the sent list * for the processor p_id * @@ -175,6 +190,9 @@ protected: { // Initialize the geo_cell structure geo_cell.Initialize(domain,div,0); + + // Initialize the geo_cell structure + geo_cell_proc.Initialize(domain,div,0); } /*! \brief Create the box_nn_processor_int (bx part) structure @@ -275,8 +293,33 @@ protected: } } } + +// construct_vb_int_proc(); } + /*! \brief construct the vb_int_proc box + * + * + */ +/* void construct_vb_int_proc() + { + vb_int_proc.resize(proc_int_box.size()); + + for (size_t i = 0 ; i < proc_int_box.size() ; i++) + { + vb_int_proc.template get<0>(i).resize(proc_int_box.get(i).ibx.size()); + + for (size_t j = 0 ; j < proc_int_box.get(i).ibx.size() ; j++) + { + for (size_t k = 0 ; k < dim ; k++) + { + vb_int_proc.template get<0>(i).template get<0>(j)[k] = proc_int_box.get(i).ibx.get(j).bx.getLow(k); + vb_int_proc.template get<0>(i).template get<1>(j)[k] = proc_int_box.get(i).ibx.get(j).bx.getHigh(k); + } + } + } + }*/ + /*! \brief Create the box_nn_processor_int (nbx part) structure, the geo_cell list and proc_int_box * * This structure store for each sub-domain of this processors the boxes that come from the intersection @@ -336,20 +379,24 @@ protected: n_sub.enlarge(ghost); // Intersect with the local sub-domain - p_box<dim,T> b_int; - bool intersect = n_sub.Intersect(l_sub,b_int.box); + Box<dim,T> b_int; + bool intersect = n_sub.Intersect(l_sub,b_int); // store if it intersect if (intersect == true) { + vb_int.add(); + + size_t last = vb_int.size() - 1; + // the box fill with the processor id - b_int.proc = p_id; + vb_int.template get<proc_>(last) = p_id; // fill the local processor id - b_int.lc_proc = lc_proc; + vb_int.template get<lc_proc_>(last) = lc_proc; // fill the shift id - b_int.shift_id = convertShift(nn_p_box_pos.get(k)); + vb_int.template get<shift_id_>(last) = convertShift(nn_p_box_pos.get(k)); // // Updating @@ -364,12 +411,12 @@ protected: // add the box to the near processor sub-domain intersections openfpm::vector< ::Box<dim,T> > & p_box_int = box_nn_processor_int.get(i).get(j).nbx; - p_box_int.add(b_int.box); - vb_int.add(b_int); + p_box_int.add(b_int); + vb_int_box.add(b_int); // store the box in proc_int_box storing from which sub-domain they come from Box_sub<dim,T> sb; - sb.bx = b_int.box; + sb.bx = b_int; sb.sub = i; sb.r_sub = r_sub.get(k); sb.cmb = nn_p_box_pos.get(k); @@ -388,8 +435,8 @@ protected: // update the geo_cell list // get the cells this box span - const grid_key_dx<dim> p1 = geo_cell.getCellGrid(b_int.box.getP1()); - const grid_key_dx<dim> p2 = geo_cell.getCellGrid(b_int.box.getP2()); + const grid_key_dx<dim> p1 = geo_cell.getCellGrid(b_int.getP1()); + const grid_key_dx<dim> p2 = geo_cell.getCellGrid(b_int.getP2()); // Get the grid and the sub-iterator auto & gi = geo_cell.getGrid(); @@ -400,6 +447,7 @@ protected: { auto key = g_sub.get(); geo_cell.addCell(gi.LinId(key),vb_int.size()-1); + geo_cell_proc.addCell(gi.LinId(key),p_id); ++g_sub; } } @@ -415,19 +463,19 @@ public: ie_ghost() {}; //! Copy constructor - ie_ghost(const ie_ghost<dim,T> & ie) + ie_ghost(const ie_ghost<dim,T,Memory,layout_base> & ie) { - this->operator =(ie); + this->operator=(ie); } //! Copy constructor - ie_ghost(ie_ghost<dim,T> && ie) + ie_ghost(ie_ghost<dim,T,Memory,layout_base> && ie) { this->operator=(ie); } //! Copy operator - inline ie_ghost<dim,T> & operator=(ie_ghost<dim,T> && ie) + inline ie_ghost<dim,T,Memory,layout_base> & operator=(ie_ghost<dim,T,Memory,layout_base> && ie) { box_nn_processor_int.swap(ie.box_nn_processor_int); proc_int_box.swap(ie.proc_int_box); @@ -442,7 +490,7 @@ public: } //! Copy operator - inline ie_ghost<dim,T> & operator=(const ie_ghost<dim,T> & ie) + inline ie_ghost<dim,T,Memory,layout_base> & operator=(const ie_ghost<dim,T,Memory,layout_base> & ie) { box_nn_processor_int = ie.box_nn_processor_int; proc_int_box = ie.proc_int_box; @@ -682,9 +730,9 @@ public: * \return the internal ghost box * */ - inline const ::Box<dim,T> & getIGhostBox(size_t b_id) const + inline ::Box<dim,T> getIGhostBox(size_t b_id) const { - return vb_int.get(b_id).box; + return vb_int_box.get(b_id); } /*! \brief Given the internal ghost box id, it return the near processor at witch belong @@ -697,7 +745,7 @@ public: */ inline size_t getIGhostBoxProcessor(size_t b_id) const { - return vb_int.get(b_id).proc; + return vb_int.template get<proc_>(b_id); } /*! \brief Get the number of the calculated external ghost boxes @@ -759,6 +807,16 @@ public: return geo_cell.getCellIterator(geo_cell.getCell(p)); } + /*! \brief Get the number of processor a particle must sent + * + * \param p position of the particle + * + */ + inline unsigned int ghost_processorID_N(const Point<dim,T> & p) + { + return ghost_processorID_N_impl(p,geo_cell,vb_int_box,vb_int); + } + /*! \brief Given a position it return if the position belong to any neighborhood processor ghost * (Internal ghost) * @@ -790,7 +848,7 @@ public: { size_t bid = cell_it.get(); - if (vb_int.get(bid).box.isInsideNP(p) == true) + if (Box<dim,T>(vb_int_box.get(bid)).isInsideNP(p) == true) { ids_p.add(std::pair<size_t,size_t>(id1::id(vb_int.get(bid),bid),id2::id(vb_int.get(bid),bid))); } @@ -839,7 +897,7 @@ public: { size_t bid = cell_it.get(); - if (vb_int.get(bid).box.isInsideNP(p) == true) + if (Box<dim,T>(vb_int_box.get(bid)).isInsideNP(p) == true) { ids.add(id::id(vb_int.get(bid),bid)); } @@ -883,7 +941,7 @@ public: { size_t bid = cell_it.get(); - if (vb_int.get(bid).box.isInsideNP(p) == true) + if (Box<dim,T>(vb_int_box.get(bid)).isInsideNP(p) == true) { ids_p.add(std::pair<size_t,size_t>(id1::id(vb_int.get(bid),bid),id2::id(vb_int.get(bid),bid))); } @@ -990,7 +1048,7 @@ public: * \return true if they are equal * */ - bool is_equal(ie_ghost<dim,T> & ig) + bool is_equal(ie_ghost<dim,T,Memory,layout_base> & ig) { if (getNEGhostBox() != ig.getNEGhostBox()) return false; @@ -1055,7 +1113,7 @@ public: * \return true if they are equal * */ - bool is_equal_ng(ie_ghost<dim,T> & ig) + bool is_equal_ng(ie_ghost<dim,T,Memory,layout_base> & ig) { return true; } @@ -1074,6 +1132,27 @@ public: ids_p.clear(); ids.clear(); } + + /*! \brief toKernel() Convert this data-structure into a kernel usable data-structure + * + * \return + * + */ + ie_ghost_gpu<dim,T,Memory,layout_base> toKernel() + { + if (host_dev_transfer == false) + { + geo_cell.hostToDevice(); + vb_int_box.template hostToDevice<0,1>(); + vb_int.template hostToDevice<0,1,2>(); + + host_dev_transfer = true; + } + + ie_ghost_gpu<dim,T,Memory,layout_base> igg(geo_cell.toKernel(),vb_int_box.toKernel(),vb_int.toKernel(),create_vcluster().size()); + + return igg; + } }; diff --git a/src/Makefile.am b/src/Makefile.am index 70e6b590..01822b3f 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -17,7 +17,7 @@ pdata_LDADD = $(LINKLIBS) -lparmetis -lmetis actual_test_SOURCES = Vector/cuda/vector_dist_cuda_func_test.cu Vector/cuda/vector_dist_gpu_unit_tests.cu vector_ main_single.cpp lib/pdata.cpp test_multiple_o.cpp ../openfpm_devices/src/memory/CudaMemory.cu ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster/VCluster.cpp ../openfpm_devices/src/Memleak_check.cpp actual_test_CXXFLAGS = -Wno-unknown-pragmas $(BOOST_CPPFLAGS) $(HDF5_CPPFLAGS) $(OPENMP_CFLAGS) $(AM_CXXFLAGS) $(LIBHILBERT_INCLUDE) $(PETSC_INCLUDE) $(CUDA_CFLAGS) $(INCLUDES_PATH) $(PARMETIS_INCLUDE) $(METIS_INCLUDE) $(H5PART_INCLUDE) -DPARALLEL_IO -Wno-unused-local-typedefs -actual_test_CFLAGS = $(CUDA_CFLAGS) -IDIOCANE +actual_test_CFLAGS = $(CUDA_CFLAGS) actual_test_LDADD = $(LINKLIBS) -lparmetis -lmetis diff --git a/src/Vector/cuda/vector_dist_cuda_func_test.cu b/src/Vector/cuda/vector_dist_cuda_func_test.cu index 6b3454e7..6d165d69 100644 --- a/src/Vector/cuda/vector_dist_cuda_func_test.cu +++ b/src/Vector/cuda/vector_dist_cuda_func_test.cu @@ -4,9 +4,198 @@ #include "Vector/map_vector.hpp" #include "Vector/cuda/vector_dist_cuda_funcs.cuh" #include "Vector/util/vector_dist_funcs.hpp" +#include "Decomposition/CartDecomposition.hpp" +#include "util/cuda/scan_cuda.cuh" + +#define SUB_UNIT_FACTOR 1024 BOOST_AUTO_TEST_SUITE( vector_dist_gpu_util_func_test ) +BOOST_AUTO_TEST_CASE( decomposition_ie_ghost_gpu_test_use ) +{ + auto & v_cl = create_vcluster(); + + // Vcluster + Vcluster<> & vcl = create_vcluster(); + + CartDecomposition<3, float, CudaMemory, memory_traits_inte> dec(vcl); + + // Physical domain + Box<3, float> box( { 0.0, 0.0, 0.0 }, { 1.0, 1.0, 1.0 }); + size_t div[3]; + + // Get the number of processor and calculate the number of sub-domain + // for each processor (SUB_UNIT_FACTOR=64) + size_t n_proc = vcl.getProcessingUnits(); + size_t n_sub = n_proc * SUB_UNIT_FACTOR; + + // Set the number of sub-domains on each dimension (in a scalable way) + for (int i = 0; i < 3; i++) + { div[i] = openfpm::math::round_big_2(pow(n_sub,1.0/3));} + + // Define ghost + Ghost<3, float> g(0.1); + + // Boundary conditions + size_t bc[] = { PERIODIC, PERIODIC, PERIODIC }; + + // Decompose + dec.setParameters(div,box,bc,g); + dec.decompose(); + + // Get the local boxes + + int nsub = dec.getNSubDomain(); + int n_part = 10000 / nsub; + + openfpm::vector_gpu<Point<3,float>> vg; + vg.resize(nsub*n_part); + + for (size_t k = 0 ; k < nsub ; k++) + { + SpaceBox<3,float> sp = dec.getSubDomain(k); + + for (size_t j = 0 ; j < n_part ; j++) + { + vg.template get<0>(k*n_part+j)[0] = (sp.getHigh(0) - sp.getLow(0))*((float)rand()/RAND_MAX) + sp.getLow(0); + vg.template get<0>(k*n_part+j)[1] = (sp.getHigh(1) - sp.getLow(1))*((float)rand()/RAND_MAX) + sp.getLow(1); + vg.template get<0>(k*n_part+j)[2] = (sp.getHigh(2) - sp.getLow(2))*((float)rand()/RAND_MAX) + sp.getLow(2); + } + } + + vg.hostToDevice<0>(); + + // process on GPU the processor ID for each particles + + auto ite = vg.getGPUIterator(); + + openfpm::vector_gpu<aggregate<unsigned int>> proc_id_out; + proc_id_out.resize(vg.size()+1); + proc_id_out.template get<0>(proc_id_out.size()-1) = 0; + proc_id_out.template hostToDevice(proc_id_out.size()-1,proc_id_out.size()-1); + + num_proc_ghost_each_part<3,float,decltype(dec.toKernel()),decltype(vg.toKernel()),decltype(proc_id_out.toKernel())> + <<<ite.wthr,ite.thr>>> + (dec.toKernel(),vg.toKernel(),proc_id_out.toKernel()); + + proc_id_out.deviceToHost<0>(); + + bool match = true; + for (size_t i = 0 ; i < vg.size() ; i++) + { + Point<3,float> xp = vg.template get<0>(i); + + match &= proc_id_out.template get<0>(i) == dec.ghost_processorID_N(xp); + } + + BOOST_REQUIRE_EQUAL(match,true); + + ////////////////////////// We now create the scan ////////////////////////////////////// + + openfpm::vector<aggregate<unsigned int>, + CudaMemory, + typename memory_traits_inte<aggregate<unsigned int>>::type, + memory_traits_inte> starts; + + starts.resize(proc_id_out.size()); + + // scan + scan<unsigned int,unsigned int>(proc_id_out,starts); + starts.deviceToHost<0>(starts.size()-1,starts.size()-1); + + size_t sz = starts.template get<0>(starts.size()-1); + + ///////////////////////// We now test ////////////////////////// + + openfpm::vector<aggregate<unsigned int,unsigned int>, + CudaMemory, + typename memory_traits_inte<aggregate<unsigned int,unsigned int>>::type, + memory_traits_inte> output; + + output.resize(sz); + + ite = vg.getGPUIterator(); + + // we compute processor id for each particle + proc_label_id_ghost<3,float,decltype(dec.toKernel()),decltype(vg.toKernel()),decltype(starts.toKernel()),decltype(output.toKernel())> + <<<ite.wthr,ite.thr>>> + (dec.toKernel(),vg.toKernel(),starts.toKernel(),output.toKernel()); + + output.template deviceToHost<0,1>(); + + for (size_t i = 0 ; i < output.size() ; i++) + { + std::cout << output.template get<0>(i) << " " << output.template get<1>(i) << std::endl; + } +} + +BOOST_AUTO_TEST_CASE( decomposition_to_gpu_test_use ) +{ + auto & v_cl = create_vcluster(); + + // Vcluster + Vcluster<> & vcl = create_vcluster(); + + CartDecomposition<3, float, CudaMemory, memory_traits_inte> dec(vcl); + + // Physical domain + Box<3, float> box( { 0.0, 0.0, 0.0 }, { 1.0, 1.0, 1.0 }); + size_t div[3]; + + // Get the number of processor and calculate the number of sub-domain + // for each processor (SUB_UNIT_FACTOR=64) + size_t n_proc = vcl.getProcessingUnits(); + size_t n_sub = n_proc * SUB_UNIT_FACTOR; + + // Set the number of sub-domains on each dimension (in a scalable way) + for (int i = 0; i < 3; i++) + { div[i] = openfpm::math::round_big_2(pow(n_sub,1.0/3));} + + // Define ghost + Ghost<3, float> g(0.01); + + // Boundary conditions + size_t bc[] = { PERIODIC, PERIODIC, PERIODIC }; + + // Decompose + dec.setParameters(div,box,bc,g); + dec.decompose(); + + openfpm::vector_gpu<Point<3,float>> vg; + vg.resize(10000); + + for (size_t i = 0 ; i < 10000 ; i++) + { + vg.template get<0>(i)[0] = (float)rand()/RAND_MAX; + vg.template get<0>(i)[1] = (float)rand()/RAND_MAX; + vg.template get<0>(i)[2] = (float)rand()/RAND_MAX; + } + + vg.hostToDevice<0>(); + + // process on GPU the processor ID for each particles + + auto ite = vg.getGPUIterator(); + + openfpm::vector_gpu<aggregate<int,int>> proc_id_out; + proc_id_out.resize(vg.size()); + + process_id_proc_each_part<decltype(dec.toKernel()),decltype(vg.toKernel()),decltype(proc_id_out.toKernel())> + <<<ite.wthr,ite.thr>>> + (dec.toKernel(),vg.toKernel(),proc_id_out.toKernel(),v_cl.rank()); + + proc_id_out.deviceToHost<0>(); + + bool match = true; + for (size_t i = 0 ; i < proc_id_out.size() ; i++) + { + Point<3,float> xp = vg.template get<0>(i); + + match &= proc_id_out.template get<0>(i) == dec.processorIDBC(xp); + } +} + + BOOST_AUTO_TEST_CASE( vector_dist_gpu_find_buffer_offsets_test ) { openfpm::vector_gpu<aggregate<int,int>> vgp; diff --git a/src/Vector/cuda/vector_dist_cuda_funcs.cuh b/src/Vector/cuda/vector_dist_cuda_funcs.cuh index 7c3b3c6d..31dfc6e4 100644 --- a/src/Vector/cuda/vector_dist_cuda_funcs.cuh +++ b/src/Vector/cuda/vector_dist_cuda_funcs.cuh @@ -10,6 +10,47 @@ #include "Vector/util/vector_dist_funcs.hpp" +template<unsigned int dim, typename St, typename decomposition_type, typename vector_type, typename start_type, typename output_type> +__global__ void proc_label_id_ghost(decomposition_type dec,vector_type vd, start_type starts, output_type out) +{ + int p = threadIdx.x + blockIdx.x * blockDim.x; + + if (p >= vd.size()) return; + + Point<dim,St> xp = vd.template get<0>(p); + + unsigned int base = starts.template get<0>(p); + + dec.ghost_processor_ID(xp,out,base,p); +} + +template<unsigned int dim, typename St, typename decomposition_type, typename vector_type, typename output_type> +__global__ void num_proc_ghost_each_part(decomposition_type dec, vector_type vd, output_type out) +{ + int p = threadIdx.x + blockIdx.x * blockDim.x; + + if (p >= vd.size()) return; + + Point<dim,St> xp = vd.template get<0>(p); + + out.template get<0>(p) = dec.ghost_processorID_N(xp); +} + +template<typename cartdec_gpu, typename particles_type, typename vector_out> +__global__ void process_id_proc_each_part(cartdec_gpu cdg, particles_type parts, vector_out output , int rank) +{ + int p = threadIdx.x + blockIdx.x * blockDim.x; + + if (p >= parts.size()) return; + + Point<3,float> xp = parts.template get<0>(p); + + int pr = cdg.processorIDBC(xp); + + output.template get<1>(p) = (pr == rank)?-1:pr; + output.template get<0>(p) = p; +} + template<typename vector_type,typename vector_type_offs> __global__ void find_buffer_offsets(vector_type vd, int * cnt, vector_type_offs offs) { diff --git a/src/Vector/cuda/vector_dist_gpu_unit_tests.cu b/src/Vector/cuda/vector_dist_gpu_unit_tests.cu index 26dbe011..b3886ee4 100644 --- a/src/Vector/cuda/vector_dist_gpu_unit_tests.cu +++ b/src/Vector/cuda/vector_dist_gpu_unit_tests.cu @@ -5,6 +5,8 @@ #include <Vector/vector_dist.hpp> #include "Vector/tests/vector_dist_util_unit_tests.hpp" +#define SUB_UNIT_FACTOR 1024 + BOOST_AUTO_TEST_SUITE( vector_dist_gpu_test ) void print_test(std::string test, size_t sz) @@ -375,7 +377,6 @@ BOOST_AUTO_TEST_CASE( vector_dist_gpu_test) } - BOOST_AUTO_TEST_CASE( vector_dist_map_on_gpu_test) { auto & v_cl = create_vcluster(); @@ -421,7 +422,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_map_on_gpu_test) vd.hostToDeviceProp<0,1,2>(); // Ok we redistribute the particles (GPU based) - vd.map(MAP_ON_DEVICE); + vd.map(RUN_ON_DEVICE); // Reset the host part @@ -494,6 +495,8 @@ BOOST_AUTO_TEST_CASE( vector_dist_map_on_gpu_test) BOOST_REQUIRE_EQUAL(l_cnt,vd.size_local()); vd.write("gpu_write_test"); + + vd.ghost_get<0,1,2>(RUN_ON_DEVICE); } BOOST_AUTO_TEST_SUITE_END() diff --git a/src/Vector/vector_dist.hpp b/src/Vector/vector_dist.hpp index 05f15a77..bd73bc48 100644 --- a/src/Vector/vector_dist.hpp +++ b/src/Vector/vector_dist.hpp @@ -2394,7 +2394,7 @@ public: * \return Particle iterator * */ - template<typename vrl> openfpm::vector_key_iterator_seq<typename vrl::Mem_type_type::loc_index> getParticleIteratorCRS(vrl & NN) + template<typename vrl> openfpm::vector_key_iterator_seq<typename vrl::Mem_type_type::local_index_type> getParticleIteratorCRS(vrl & NN) { #ifdef SE_CLASS1 if (!(opt & BIND_DEC_TO_GHOST)) @@ -2405,7 +2405,7 @@ public: #endif // First we check that - return openfpm::vector_key_iterator_seq<typename vrl::Mem_type_type::loc_index>(NN.getParticleSeq()); + return openfpm::vector_key_iterator_seq<typename vrl::Mem_type_type::local_index_type>(NN.getParticleSeq()); } /*! \brief Return from which cell we have to start in case of CRS interation diff --git a/src/Vector/vector_dist_comm.hpp b/src/Vector/vector_dist_comm.hpp index 4419fa2c..bd07859e 100644 --- a/src/Vector/vector_dist_comm.hpp +++ b/src/Vector/vector_dist_comm.hpp @@ -11,6 +11,7 @@ #if defined(CUDA_GPU) && defined(__NVCC__) #include "util/cuda/moderngpu/kernel_mergesort.hxx" #include "Vector/cuda/vector_dist_cuda_funcs.cuh" +#include "util/cuda/scan_cuda.cuh" #endif #include "Vector/util/vector_dist_funcs.hpp" @@ -24,7 +25,7 @@ #define BIND_DEC_TO_GHOST 1 -#define MAP_ON_DEVICE 1024 +#define RUN_ON_DEVICE 1024 #define MAP_LOCAL 2 /*! \brief compute the communication options from the ghost_get/put options @@ -164,7 +165,7 @@ class vector_dist_comm openfpm::vector<size_t> & prc_r, size_t opt) { - if (opt & MAP_ON_DEVICE) + if (opt & RUN_ON_DEVICE) { size_t prev_off = 0; for (size_t i = 0; i < prc_sz.size() ; i++) @@ -673,7 +674,7 @@ class vector_dist_comm * \param m_pos sending buffer for position * \param m_prp sending buffer for properties * \param offset from where start the list of the particles that migrate in o_part - * This parameter is used only in case of MAP_ON_DEVICE option + * This parameter is used only in case of RUN_ON_DEVICE option * */ void fill_send_map_buf(openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base> & v_pos, @@ -696,7 +697,7 @@ class vector_dist_comm cnt.get(i) = 0; } - if (opt & MAP_ON_DEVICE) + if (opt & RUN_ON_DEVICE) { #if defined(CUDA_GPU) && defined(__NVCC__) @@ -736,7 +737,7 @@ class vector_dist_comm #else - std::cout << __FILE__ << ":" << __LINE__ << " error MAP_ON_DEVICE require that you compile with NVCC, but it seem compiled with a normal compiler" << std::endl; + std::cout << __FILE__ << ":" << __LINE__ << " error RUN_ON_DEVICE require that you compile with NVCC, but it seem compiled with a normal compiler" << std::endl; #endif } @@ -823,7 +824,7 @@ class vector_dist_comm openfpm::vector<aggregate<unsigned int,unsigned int>,Memory,typename layout_base<aggregate<unsigned int,unsigned int>>::type,layout_base> & prc_sz, size_t opt) { - if (opt == MAP_ON_DEVICE) + if (opt == RUN_ON_DEVICE) { #ifdef __NVCC__ @@ -855,6 +856,7 @@ class vector_dist_comm // get also the last element from lbl_p; lbl_p.template deviceToHost<1>(lbl_p.size()-1,lbl_p.size()-1); + mem.deviceToHost(); int noff = *(int *)mem.getPointer(); prc_sz.resize(noff+1); prc_sz.template get<0>(prc_sz.size()-1) = lbl_p.size(); @@ -862,7 +864,7 @@ class vector_dist_comm #else - std::cout << __FILE__ << ":" << __LINE__ << " error, it seems you tried to call map with MAP_ON_DEVICE option, this requires to compile the program with NVCC" << std::endl; + std::cout << __FILE__ << ":" << __LINE__ << " error, it seems you tried to call map with RUN_ON_DEVICE option, this requires to compile the program with NVCC" << std::endl; #endif } @@ -930,57 +932,98 @@ class vector_dist_comm * \param v_prp vector of particle properties * \param prc for each particle it label the processor id (the owner of the particle, or where it should go the particle) * \param g_m ghost marker + * \param opt ghost_get options * */ void labelParticlesGhost(openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base> & v_pos, openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base> & v_prp, openfpm::vector<size_t> & prc, - size_t & g_m) + size_t & g_m, + size_t opt) { // Buffer that contain for each processor the id of the particle to send g_opart.clear(); g_opart.resize(dec.getNNProcessors()); prc_g_opart.clear(); - // Iterate over all particles - auto it = v_pos.getIteratorTo(g_m); - while (it.isNext()) + if (opt & RUN_ON_DEVICE) { - auto key = it.get(); +#if defined(CUDA_GPU) && defined(__NVCC__) - // Given a particle, it return which processor require it (first id) and shift id, second id - // For an explanation about shifts vectors please consult getShiftVector in ie_ghost - const openfpm::vector<std::pair<size_t, size_t>> & vp_id = dec.template ghost_processorID_pair<typename Decomposition::lc_processor_id, typename Decomposition::shift_id>(v_pos.get(key), UNIQUE); + openfpm::vector<aggregate<unsigned int>, + Memory, + typename layout_base<aggregate<unsigned int>>::type, + layout_base> proc_id_out; + proc_id_out.resize(v_pos.size()); - for (size_t i = 0; i < vp_id.size(); i++) - { - // processor id - size_t p_id = vp_id.get(i).first; + auto ite = v_pos.getGPUIterator(); - // add particle to communicate - g_opart.get(p_id).add(); - g_opart.get(p_id).last().template get<0>() = key; - g_opart.get(p_id).last().template get<1>() = vp_id.get(i).second; - } + // First we have to see how many entry each particle produce + num_proc_ghost_each_part<3,float,decltype(dec.toKernel()),decltype(v_pos.toKernel()),decltype(proc_id_out.toKernel())> + <<<ite.wthr,ite.thr>>> + (dec.toKernel(),v_pos.toKernel(),proc_id_out.toKernel()); - ++it; - } + openfpm::vector<aggregate<unsigned int>, + Memory, + typename layout_base<aggregate<unsigned int>>::type, + layout_base> starts; + + // scan + scan<unsigned int,unsigned int>(proc_id_out,starts); + + // we compute processor id for each particle - // remove all zero entry and construct prc (the list of the sending processors) - openfpm::vector<openfpm::vector<aggregate<size_t,size_t>>> g_opart_f; - // count the non zero element - for (size_t i = 0 ; i < g_opart.size() ; i++) + // we do a sort + +#else + + std::cout << __FILE__ << ":" << __LINE__ << " error: to use gpu computation you must compile vector_dist.hpp with NVCC" << std::endl; + +#endif + } + else { - if (g_opart.get(i).size() != 0) + // Iterate over all particles + auto it = v_pos.getIteratorTo(g_m); + while (it.isNext()) + { + auto key = it.get(); + + // Given a particle, it return which processor require it (first id) and shift id, second id + // For an explanation about shifts vectors please consult getShiftVector in ie_ghost + const openfpm::vector<std::pair<size_t, size_t>> & vp_id = dec.template ghost_processorID_pair<typename Decomposition::lc_processor_id, typename Decomposition::shift_id>(v_pos.get(key), UNIQUE); + + for (size_t i = 0; i < vp_id.size(); i++) + { + // processor id + size_t p_id = vp_id.get(i).first; + + // add particle to communicate + g_opart.get(p_id).add(); + g_opart.get(p_id).last().template get<0>() = key; + g_opart.get(p_id).last().template get<1>() = vp_id.get(i).second; + } + + ++it; + } + + // remove all zero entry and construct prc (the list of the sending processors) + openfpm::vector<openfpm::vector<aggregate<size_t,size_t>>> g_opart_f; + + // count the non zero element + for (size_t i = 0 ; i < g_opart.size() ; i++) { - g_opart_f.add(); - g_opart.get(i).swap(g_opart_f.last()); - prc.add(dec.IDtoProc(i)); + if (g_opart.get(i).size() != 0) + { + g_opart_f.add(); + g_opart.get(i).swap(g_opart_f.last()); + prc.add(dec.IDtoProc(i)); + } } - } - g_opart.swap(g_opart_f); + g_opart.swap(g_opart_f); + } } /*! \brief Call-back to allocate buffer to receive incoming elements (particles) @@ -1159,7 +1202,7 @@ public: // Label all the particles if ((opt & SKIP_LABELLING) == false) - {labelParticlesGhost(v_pos,v_prp,prc_g_opart,g_m);} + {labelParticlesGhost(v_pos,v_prp,prc_g_opart,g_m,opt);} // Send and receive ghost particle information { @@ -1344,14 +1387,14 @@ public: fill_send_map_buf(v_pos,v_prp, prc_sz_r, m_pos, m_prp,prc_sz,opt); size_t opt_ = 0; - if (opt & MAP_ON_DEVICE) + if (opt & RUN_ON_DEVICE) { #if defined(CUDA_GPU) && defined(__NVCC__) - // Before doing the communication on MAP_ON_DEVICE we have to be sure that the previous kernels complete + // Before doing the communication on RUN_ON_DEVICE we have to be sure that the previous kernels complete cudaDeviceSynchronize(); opt_ |= MPI_GPU_DIRECT; #else - std::cout << __FILE__ << ":" << __LINE__ << " error: to use the option MAP_ON_DEVICE you must compile with NVCC" << std::endl; + std::cout << __FILE__ << ":" << __LINE__ << " error: to use the option RUN_ON_DEVICE you must compile with NVCC" << std::endl; #endif } diff --git a/src/Vector/vector_dist_kernel.hpp b/src/Vector/vector_dist_kernel.hpp new file mode 100644 index 00000000..2155a26e --- /dev/null +++ b/src/Vector/vector_dist_kernel.hpp @@ -0,0 +1,114 @@ +/* + * vector_dist_gpu.hpp + * + * Created on: Jul 28, 2018 + * Author: i-bird + */ + +#ifndef VECTOR_DIST_GPU_HPP_ +#define VECTOR_DIST_GPU_HPP_ + +#ifdef CUDA_GPU + +#define POS_PROP -1 + +#define GET_PARTICLE(vd) blockDim.x*blockIdx.x + threadIdx.x; if (blockDim.x*blockIdx.x + threadIdx.x > vd.size()) {return;}; + +template<unsigned int dim, + typename St, + typename prop> +class vector_dist_ker +{ + //! Ghost marker, all the particle with id > g_m are ghost all with g_m < are real particle + int g_m = 0; + + //! Particle position vector, (It has 2 elements) the first has real particles assigned to a processor + //! the second element contain unassigned particles + openfpm::vector_gpu_ker<Point<dim,St>,memory_traits_inte> v_pos; + + //! Particle properties vector, (It has 2 elements) the first has real particles assigned to a processor + //! the second element contain unassigned particles + openfpm::vector_gpu_ker<prop,memory_traits_inte> v_prp; + +public: + + //! space type + typedef St stype; + + //! dimensions of space + static const unsigned int dims = dim; + + vector_dist_ker(const openfpm::vector_gpu_ker<Point<dim,St>,memory_traits_inte> & v_pos, const openfpm::vector_gpu_ker<prop,memory_traits_inte> & v_prp) + :v_pos(v_pos),v_prp(v_prp) + {} + + /*! \brief return the number of particles + * + * \return the number of particles + * + */ + __device__ int size() {return v_pos.size();} + + /*! \brief Get the position of an element + * + * see the vector_dist iterator usage to get an element key + * + * \param vec_key element + * + * \return the position of the element in space + * + */ + __device__ inline auto getPos(int vec_key) -> decltype(v_pos.template get<0>(vec_key)) + { + return v_pos.template get<0>(vec_key); + } + + /*! \brief Get the position of an element + * + * see the vector_dist iterator usage to get an element key + * + * \param vec_key element + * + * \return the position of the element in space + * + */ + __device__ inline auto getPos(int vec_key) const -> decltype(v_pos.template get<0>(vec_key)) + { + return v_pos.template get<0>(vec_key); + } + + /*! \brief Get the property of an element + * + * see the vector_dist iterator usage to get an element key + * + * \tparam id property id + * \param vec_key vector element + * + * \return return the selected property of the vector element + * + */ + template<unsigned int id> __device__ inline auto getProp(int vec_key) -> decltype(v_prp.template get<id>(vec_key)) + { + return v_prp.template get<id>(vec_key); + } + + /*! \brief Get the property of an element + * + * see the vector_dist iterator usage to get an element key + * + * \tparam id property id + * \param vec_key vector element + * + * \return return the selected property of the vector element + * + */ + template<unsigned int id> __device__ inline auto getProp(int vec_key) const -> decltype(v_prp.template get<id>(vec_key)) + { + return v_prp.template get<id>(vec_key); + } + +}; + +#endif + +#endif /* VECTOR_DIST_GPU_HPP_ */ diff --git a/src/config/config_cmake.h.in b/src/config/config_cmake.h.in new file mode 100644 index 00000000..a0d62c9e --- /dev/null +++ b/src/config/config_cmake.h.in @@ -0,0 +1,159 @@ +/* Coverty scan */ +${DEFINE_COVERTY_SCAN} + +/* GPU support */ +${DEFINE_CUDA_GPU} + +/* Debug */ +${DEFINE_DEBUG} /**/ + +/* Debug */ +${DEFINE_DEBUG_MODE} /**/ + +/* Define to dummy `main' function (if any) required to link to the Fortran + libraries. */ +${DEFINE_F77_DUMMY_MAIN} + +/* Define if F77 and FC dummy `main' functions are identical. */ +${DEFINE_FC_DUMMY_MAIN_EQ_F77} + +/* Define if you have a BLAS library. */ +${DEFINE_HAVE_BLAS} + +/* define if the Boost library is available */ +${DEFINE_HAVE_BOOST} + +/* define if the Boost::IOStreams library is available */ +${DEFINE_HAVE_BOOST_IOSTREAMS} /**/ + +/* define if the Boost::PROGRAM_OPTIONS library is available */ +${DEFINE_HAVE_BOOST_PROGRAM_OPTIONS} /**/ + +/* define if the Boost::Unit_Test_Framework library is available */ +${DEFINE_HAVE_BOOST_UNIT_TEST_FRAMEWORK} /**/ + +/* Have clock time */ +${DEFINE_HAVE_CLOCK_GETTIME} /**/ + +/* Define to 1 if you have the <dlfcn.h> header file. */ +${DEFINE_HAVE_DLFCN_H} + +/* Define if you have EIGEN library. */ +${DEFINE_HAVE_EIGEN} + +/* Define to 1 if you have the <Eigen/Dense> header file. */ +${DEFINE_HAVE_EIGEN_DENSE} + +/* Define to 1 if you have the <Eigen/LU> header file. */ +${DEFINE_HAVE_EIGEN_LU} + +/* Defined if you have HDF5 support */ +${DEFINE_HAVE_HDF5} + +/* Define to 1 if you have the <inttypes.h> header file. */ +${DEFINE_HAVE_INTTYPES_H} + +/* Define if you have LAPACK library */ +${DEFINE_HAVE_LAPACK} + +/* Define if you have LIBHILBERT library */ +${DEFINE_HAVE_LIBHILBERT} + +/* Have quad math lib */ +${DEFINE_HAVE_LIBQUADMATH} + +/* Define to 1 if you have the <memory.h> header file. */ +${DEFINE_HAVE_MEMORY_H} + +/* Define if you have METIS library */ +${DEFINE_HAVE_METIS} + +/* MPI Enabled */ +${DEFINE_HAVE_MPI} + +/* We have OSX */ +${DEFINE_HAVE_OSX} + +/* Define if you have PARMETIS library */ +${DEFINE_HAVE_PARMETIS} + +/* Define if you have PETSC library */ +${DEFINE_HAVE_PETSC} + +/* Define to 1 if you have the <stdint.h> header file. */ +${DEFINE_HAVE_STDINT_H} + +/* Define to 1 if you have the <stdlib.h> header file. */ +${DEFINE_HAVE_STDLIB_H} + +/* Define to 1 if you have the <strings.h> header file. */ +${DEFINE_HAVE_STRINGS_H} + +/* Define to 1 if you have the <string.h> header file. */ +${DEFINE_HAVE_STRING_H} + +/* Define if you have SUITESPARSE library. */ +${DEFINE_HAVE_SUITESPARSE} + +/* Define to 1 if you have the <sys/stat.h> header file. */ +${DEFINE_HAVE_SYS_STAT_H} + +/* Define to 1 if you have the <sys/types.h> header file. */ +${DEFINE_HAVE_SYS_TYPES_H} + +/* Define to 1 if you have the <unistd.h> header file. */ +${DEFINE_HAVE_UNISTD_H} + +/* Define to the sub-directory where libtool stores uninstalled libraries. */ +#define LT_OBJDIR ".libs/" + +/* NVCC compiling */ +${DEFINE_NVCC} /**/ + +/* Name of package */ +#define PACKAGE "openfpm_pdata" + +/* Define to the address where bug reports for this package should be sent. */ +#define PACKAGE_BUGREPORT "BUG-REPORT-ADDRESS" + +/* Define to the full name of this package. */ +#define PACKAGE_NAME "OpenFPM_pdata" + +/* Define to the full name and version of this package. */ +#define PACKAGE_STRING "OpenFPM_pdata 1.0.0" + +/* Define to the one symbol short name of this package. */ +#define PACKAGE_TARNAME "openfpm_pdata" + +/* Define to the home page for this package. */ +#define PACKAGE_URL "" + +/* Define to the version of this package. */ +#define PACKAGE_VERSION "1.0.0" + +/* Test performance mode */ +${DEFINE_PERFORMANCE_TEST} + +/* Security enhancement class 1 */ +${DEFINE_SE_CLASS1} + +/* Security enhancement class 2 */ +${DEFINE_SE_CLASS2} + +/* Security enhancement class 3 */ +${DEFINE_SE_CLASS3} + +/* Define to 1 if you have the ANSI C header files. */ +${DEFINE_STDC_HEADERS} + +/* If an error occur stop the program */ +${DEFINE_STOP_ON_ERROR} + +/* Test coverage mode */ +${DEFINE_TEST_COVERAGE_MODE} + +/* when an error accur continue but avoid unsafe operation */ +/* #undef THROW_ON_ERROR */ + +/* Version number of package */ +#define VERSION "1.0.0" diff --git a/src/main_single.cpp b/src/main_single.cpp new file mode 100644 index 00000000..ac34002d --- /dev/null +++ b/src/main_single.cpp @@ -0,0 +1,32 @@ +#include <iostream> + +size_t debug_tot_call = 0; + +#define PRINT_STACKTRACE +#define CHECKFOR_POSNAN +#define CHECKFOR_POSINF +#define CHECKFOR_PROPNAN +#define CHECKFOR_PROPINF + +#define BOOST_DISABLE_ASSERTS + + +#include "config.h" +#undef VERSION + +#define BOOST_TEST_DYN_LINK +#include <boost/test/unit_test.hpp> + +// initialization function: +bool init_unit_test() +{ + return true; +} + +// entry point: +int main(int argc, char* argv[]) +{ + return boost::unit_test::unit_test_main( &init_unit_test, argc, argv ); +} + +#include "unit_test_init_cleanup.hpp" -- GitLab