Commit fd33db21 authored by incardon's avatar incardon
Browse files

Merging done

parents 3cc71e2a 7d95adac
......@@ -41,14 +41,18 @@ set(CUDA_ON_CPU CACHE BOOL "Make Cuda work on heap")
set(CPACK_RUN_INSTALL_DEPENDENCIES CACHE BOOL "Set to true if we are creating deb or RPM packages")
set(ENABLE_GARBAGE_INJECTOR CACHE BOOL "Enable the injector of garbage in the memory allocator")
set(ENABLE_VCLUSTER_GARBAGE_INJECTOR CACHE BOOL "Enable the injector of garbage in the vcluster memory buffers")
set(HIP_ENABLE CACHE BOOL "Enable HIP compiler")
set(AMD_ARCH_COMPILE "gfx900" CACHE STRING "AMD gpu architecture used to compile kernels")
# Enabling real GPU is stronger than using CUDA_ON_CPU
if (ENABLE_GPU)
set(CUDA_ON_CPU OFF)
set(CUDA_ON_CPU OFF)
# Test CLang
find_package(CUDA)
set(CMAKE_CUDA_COMPILER_LIBRARY_ROOT ${CUDA_TOOLKIT_ROOT_DIR})
enable_language(CUDA)
if (NOT HIP_ENABLE)
find_package(CUDA)
set(CMAKE_CUDA_COMPILER_LIBRARY_ROOT ${CUDA_TOOLKIT_ROOT_DIR})
enable_language(CUDA)
endif()
endif()
set (CMAKE_CXX_STANDARD 14)
......@@ -83,6 +87,20 @@ find_package(Eigen3)
find_package(SuiteSparse OPTIONAL_COMPONENTS UMFPACK)
find_package(Vc)
find_package(OpenMP)
find_package(HIP)
if(HIP_FOUND)
set(DEFINE_HIP_GPU "#define HIP_GPU")
set(DEFINE_CUDIFY_USE_HIP "#define CUDIFY_USE_HIP")
file(WRITE hip_enabled 1)
else()
file(WRITE hip_enabled 0)
endif()
if(HIP_FOUND)
set(DEFINE_CUDA_GPU "#define CUDA_GPU")
endif()
if (OPENMP_FOUND)
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
......@@ -95,7 +113,8 @@ if(PROFILE_WITH_SCOREP)
set(CMAKE_CUDA_COMPILER_LAUNCHER "scorep")
endif()
if(ENABLE_GPU AND CUDA_FOUND)
if(ENABLE_GPU AND (CUDA_FOUND OR HIP_FOUND))
set(OPENFPM_INIT_FILE "initialize/initialize_wrapper_cuda.cu")
else()
set(OPENFPM_INIT_FILE "initialize/initialize_wrapper_cpu.cpp")
......@@ -339,3 +358,4 @@ cpack_add_component(OpenFPM
DISPLAY_NAME OpenFPM
DESCRITION OpenFPM binary files)
......@@ -122,6 +122,7 @@ with_petsc
with_eigen
with_vcdevel
enable_gpu
enable_hip
enable_asan
enable_garbageinj
enable_garbageinjv
......@@ -248,11 +249,7 @@ do
conf_options="$conf_options -DTEST_PERFORMANCE=ON"
;;
gpu)
if [ x"$CXX" == x"" ]; then
conf_options="$conf_options"
else
conf_options="$conf_options -DCMAKE_CUDA_HOST_COMPILER=$(which $CXX)"
fi
conf_options="$conf_options -DCMAKE_CUDA_HOST_COMPILER=$(which mpic++)"
if [ x"$CXXCUDA" == x"" ]; then
conf_options="$conf_options"
else
......@@ -260,6 +257,9 @@ do
fi
conf_options="$conf_options -DENABLE_GPU=ON"
;;
hip)
conf_options="$conf_options -DHIP_ENABLE=ON -DENABLE_GPU=ON"
;;
asan)
conf_options="$conf_options -DENABLE_ASAN=ON"
;;
......
......@@ -2,24 +2,30 @@ include ../../example.mk
LIBS_CUDA_ON_CPU=$(LIBS)
CUDA_CC=
ifdef CUDA_ON_CPU
CUDA_CC=mpic++ -x c++ $(INCLUDE_PATH)
INCLUDE_PATH_NVCC=
CUDA_OPTIONS=-DCUDA_ON_CPU -D__NVCC__ -DCUDART_VERSION=11000
LIBS_SELECT=$(LIBS_CUDA_ON_CPU)
CC=mpic++
ifdef HIP
CUDA_CC=hipcc
CUDA_OPTIONS=-D__NVCC__ -D__HIP__ -DCUDART_VERSION=11000 -D__CUDACC__ -D__CUDACC_VER_MAJOR__=11 -D__CUDACC_VER_MINOR__=0 -D__CUDACC_VER_BUILD__=0
LIBS_SELECT=$(LIBS)
CC=hipcc
else
ifeq (, $(shell which nvcc))
CUDA_CC=mpic++ -x c++ $(INCLUDE_PATH)
ifdef CUDA_ON_CPU
CUDA_CC=mpic++ -x c++ $(INCLUDE_PATH)
INCLUDE_PATH_NVCC=
CUDA_OPTIONS=-DCUDA_ON_CPU -D__NVCC__ -DCUDART_VERSION=11000
LIBS_SELECT=$(LIBS_CUDA_ON_CPU)
else
CUDA_CC=nvcc -ccbin=mpic++
ifeq (, $(shell which nvcc))
CUDA_CC=mpic++ -x c++ $(INCLUDE_PATH)
INCLUDE_PATH_NVCC=
else
CUDA_CC=nvcc -ccbin=mpic++
endif
LIBS_SELECT=$(LIBS)
endif
LIBS_SELECT=$(LIBS)
endif
CC=mpic++
OBJ = main.o
gpu_fstep:
......
......@@ -5,25 +5,33 @@ include ../../example.mk
CUDA_CC=
CUDA_CC_LINK=
ifdef CUDA_ON_CPU
CUDA_CC=mpic++ -x c++ $(INCLUDE_PATH)
INCLUDE_PATH_NVCC=
CUDA_CC_LINK=mpic++
CUDA_OPTIONS=-DCUDA_ON_CPU -D__NVCC__ -DCUDART_VERSION=11000
LIBS_SELECT=$(LIBS_CUDA_ON_CPU)
CC=mpic++
ifdef HIP
CUDA_CC=hipcc
CUDA_OPTIONS=-D__NVCC__ -D__HIP__ -DCUDART_VERSION=11000 -D__CUDACC__ -D__CUDACC_VER_MAJOR__=11 -D__CUDACC_VER_MINOR__=0 -D__CUDACC_VER_BUILD__=0
LIBS_SELECT=$(LIBS)
CC=hipcc
CUDA_CC_LINK=hipcc
else
ifeq (, $(shell which nvcc))
CUDA_CC=mpic++ -x c++ $(INCLUDE_PATH)
INCLUDE_PATH_NVCC=
CUDA_CC_LINK=mpic++
else
CUDA_CC=nvcc -ccbin=mpic++
CUDA_CC_LINK=nvcc -ccbin=mpic++
endif
LIBS_SELECT=$(LIBS)
ifdef CUDA_ON_CPU
CUDA_CC=mpic++ -x c++ $(INCLUDE_PATH)
INCLUDE_PATH_NVCC=
CUDA_CC_LINK=mpic++
CUDA_OPTIONS=-DCUDA_ON_CPU -D__NVCC__ -DCUDART_VERSION=11000
LIBS_SELECT=$(LIBS_CUDA_ON_CPU)
else
ifeq (, $(shell which nvcc))
CUDA_CC=mpic++ -x c++ $(INCLUDE_PATH)
INCLUDE_PATH_NVCC=
CUDA_CC_LINK=mpic++
else
CUDA_CC=nvcc -ccbin=mpic++
CUDA_CC_LINK=nvcc -ccbin=mpic++
endif
LIBS_SELECT=$(LIBS)
endif
endif
CC=mpic++
LDIR =
......
......@@ -794,7 +794,7 @@ int main(int argc, char* argv[])
// You can ignore all these dp/2.0 is a trick to reach the same initialization
// of Dual-SPH that use a different criteria to draw particles
Box<3,real_number> fluid_box({dp/2.0,dp/2.0,dp/2.0},{0.4+dp/2.0,0.67-dp/2.0,0.3+dp/2.0});
Box<3,real_number> fluid_box({dp/2.0f,dp/2.0f,dp/2.0f},{0.4f+dp/2.0f,0.67f-dp/2.0f,0.3f+dp/2.0f});
// return an iterator to the fluid particles to add to vd
auto fluid_it = DrawParticles::DrawBox(vd,sz,domain,fluid_box);
......@@ -843,12 +843,12 @@ int main(int argc, char* argv[])
}
// Recipient
Box<3,real_number> recipient1({0.0,0.0,0.0},{1.6+dp/2.0,0.67+dp/2.0,0.4+dp/2.0});
Box<3,real_number> recipient2({dp,dp,dp},{1.6-dp/2.0,0.67-dp/2.0,0.4+dp/2.0});
Box<3,real_number> recipient1({0.0f,0.0f,0.0f},{1.6f+dp/2.0f,0.67f+dp/2.0f,0.4f+dp/2.0f});
Box<3,real_number> recipient2({dp,dp,dp},{1.6f-dp/2.0f,0.67f-dp/2.0f,0.4f+dp/2.0f});
Box<3,real_number> obstacle1({0.9,0.24-dp/2.0,0.0},{1.02+dp/2.0,0.36,0.45+dp/2.0});
Box<3,real_number> obstacle2({0.9+dp,0.24+dp/2.0,0.0},{1.02-dp/2.0,0.36-dp,0.45-dp/2.0});
Box<3,real_number> obstacle3({0.9+dp,0.24,0.0},{1.02,0.36,0.45});
Box<3,real_number> obstacle1({0.9f,0.24f-dp/2.0f,0.0f},{1.02f+dp/2.0f,0.36f,0.45f+dp/2.0f});
Box<3,real_number> obstacle2({0.9f+dp,0.24f+dp/2.0f,0.0f},{1.02f-dp/2.0f,0.36f-dp,0.45f-dp/2.0f});
Box<3,real_number> obstacle3({0.9f+dp,0.24f,0.0f},{1.02f,0.36f,0.45f});
openfpm::vector<Box<3,real_number>> holes;
holes.add(recipient2);
......
......@@ -4,24 +4,32 @@ include ../../example.mk
### internally the example disable with the preprocessor its code if not compiled with nvcc
CUDA_CC=
CUDA_CC_LINK=
ifdef CUDA_ON_CPU
CUDA_CC=mpic++ -x c++ $(INCLUDE_PATH)
INCLUDE_PATH_NVCC=
CUDA_CC_LINK=mpic++
CUDA_OPTIONS=-DCUDA_ON_CPU -D__NVCC__ -DCUDART_VERSION=11000
LIBS_SELECT=$(LIBS_CUDA_ON_CPU)
ifdef HIP
CUDA_CC=hipcc
CUDA_OPTIONS=-D__NVCC__ -D__HIP__ -DCUDART_VERSION=11000 -D__CUDACC__ -D__CUDACC_VER_MAJOR__=11 -D__CUDACC_VER_MINOR__=0 -D__CUDACC_VER_BUILD__=0
LIBS_SELECT=$(LIBS)
CC=hipcc
CUDA_CC_LINK=hipcc
else
ifeq (, $(shell which nvcc))
CUDA_CC=mpic++ -x c++ $(INCLUDE_PATH)
INCLUDE_PATH_NVCC=
CUDA_CC_LINK=mpic++
CUDA_OPTIONS=
else
CUDA_CC=nvcc -ccbin=mpic++
CUDA_CC_LINK=nvcc -ccbin=mpic++
CUDA_OPTIONS=-use_fast_math -arch=sm_61 -lineinfo
endif
LIBS_SELECT=$(LIBS)
ifdef CUDA_ON_CPU
CUDA_CC=mpic++ -x c++ $(INCLUDE_PATH)
INCLUDE_PATH_NVCC=
CUDA_CC_LINK=mpic++
CUDA_OPTIONS=-DCUDA_ON_CPU -D__NVCC__ -DCUDART_VERSION=11000
LIBS_SELECT=$(LIBS_CUDA_ON_CPU)
else
ifeq (, $(shell which nvcc))
CUDA_CC=mpic++ -x c++ $(INCLUDE_PATH)
INCLUDE_PATH_NVCC=
CUDA_CC_LINK=mpic++
CUDA_OPTIONS=
else
CUDA_CC=nvcc -ccbin=mpic++
CUDA_CC_LINK=nvcc -ccbin=mpic++
CUDA_OPTIONS=-use_fast_math -arch=sm_61 -lineinfo
endif
LIBS_SELECT=$(LIBS)
endif
endif
......
......@@ -250,6 +250,7 @@ inline __device__ __host__ void DWab(Point<3,real_number> & dx, Point<3,real_num
DW.get(2) = factor * dx.get(2);
}
// Tensile correction
inline __device__ __host__ real_number Tensile(real_number r, real_number rhoa, real_number rhob, real_number prs1, real_number prs2, real_number W_dap)
{
......@@ -348,12 +349,12 @@ __global__ void calc_forces_gpu(particles_type vd, NN_type NN, real_number W_dap
// if (p == q) skip this particle this condition should be done in the r^2 = 0
if (a == b) {++Np; continue;};
unsigned int typeb = vd.template getProp<type>(b);
unsigned int typeb = vd.template getProp<type>(b);
real_number massb = (typeb == FLUID)?MassFluid:MassBound;
Point<3,real_number> vb = vd.template getProp<velocity>(b);
real_number Pb = vd.template getProp<Pressure>(b);
real_number rhob = vd.template getProp<rho>(b);
real_number massb = (typeb == FLUID)?MassFluid:MassBound;
Point<3,real_number> vb = vd.template getProp<velocity>(b);
real_number Pb = vd.template getProp<Pressure>(b);
real_number rhob = vd.template getProp<rho>(b);
// Get the distance between p and q
Point<3,real_number> dr = xa - xb;
......@@ -720,7 +721,7 @@ int main(int argc, char* argv[])
// initialize the library
openfpm_init(&argc,&argv);
#ifndef CUDA_ON_CPU
#if !defined(CUDA_ON_CPU) && !defined(__HIP__)
cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
#endif
......@@ -728,11 +729,11 @@ int main(int argc, char* argv[])
openfpm::vector<openfpm::vector<real_number>> press_t;
openfpm::vector<Point<3,real_number>> probes;
probes.add({0.8779,0.3,0.02});
probes.add({0.754,0.31,0.02});
probes.add({0.8779f,0.3f,0.02f});
probes.add({0.754f,0.31f,0.02f});
// Here we define our domain a 2D box with internals from 0 to 1.0 for x and y
Box<3,real_number> domain({-0.05,-0.05,-0.05},{1.7010,0.7065,0.511});
Box<3,real_number> domain({-0.05f,-0.05f,-0.05f},{1.7010f,0.7065f,0.511f});
size_t sz[3] = {413,179,133};
// Fill W_dap
......
......@@ -416,8 +416,11 @@ fi
install_base=$(cat install_dir)
openmp_flags="$(cat openmp_flags)"
cuda_include_dirs=$(cat cuda_include)
mpi_include_dirs=$(cat mpi_include)
mpi_libs=$(cat mpi_libs)
hip_enabled=$(cat hip_enabled)
if [ x"$hip_enabled" == x"1" ]; then
mpi_include_dirs=$(cat mpi_include)
mpi_libs=$(cat mpi_libs)
fi
cuda_on_cpu=$(cat cuda_on_cpu)
optional_boost=$(cat optional_boost_libs)
......@@ -429,19 +432,19 @@ elif [ -d "$i_dir/HDF5/lib64" ]; then
hdf5_lib_dir=-L$i_dir/HDF5/lib64
fi
echo "INCLUDE_PATH=$cuda_include_dirs -Wno-deprecated-declarations $openmp_flags -I. -I$install_base/openfpm_numerics/include -I$install_base/openfpm_pdata/include/config -I$install_base/openfpm_pdata/include -I$install_base/openfpm_data/include -I$install_base/openfpm_vcluster/include -I$install_base/openfpm_io/include -I$install_base/openfpm_devices/include -I$i_dir/VCDEVEL/include -I$i_dir/METIS/include -I$i_dir/PARMETIS/include -I$i_dir/BOOST/include -I$i_dir/HDF5/include -I$i_dir/LIBHILBERT/include $lin_alg_inc" > example.mk
echo "LIBS_PATH=$openmp_flags -L$install_base/openfpm_devices/lib -L$install_base/openfpm_pdata/lib -L$install_base/openfpm_vcluster/lib -L$i_dir/VCDEVEL/lib -L$i_dir/METIS/lib -L$i_dir/PARMETIS/lib -L$i_dir/BOOST/lib $hdf5_lib_dir -L$i_dir/LIBHILBERT/lib $lin_alg_dir " >> example.mk
echo "INCLUDE_PATH=$mpi_include_dirs $cuda_include_dirs -Wno-deprecated-declarations $openmp_flags -I. -I$install_base/openfpm_numerics/include -I$install_base/openfpm_pdata/include/config -I$install_base/openfpm_pdata/include -I$install_base/openfpm_data/include -I$install_base/openfpm_vcluster/include -I$install_base/openfpm_io/include -I$install_base/openfpm_devices/include -I$i_dir/VCDEVEL/include -I$i_dir/METIS/include -I$i_dir/PARMETIS/include -I$i_dir/BOOST/include -I$i_dir/HDF5/include -I$i_dir/LIBHILBERT/include $lin_alg_inc" > example.mk
echo "LIBS_PATH=$mpi_libs $openmp_flags -L$install_base/openfpm_devices/lib -L$install_base/openfpm_pdata/lib -L$install_base/openfpm_vcluster/lib -L$i_dir/VCDEVEL/lib -L$i_dir/METIS/lib -L$i_dir/PARMETIS/lib -L$i_dir/BOOST/lib $hdf5_lib_dir -L$i_dir/LIBHILBERT/lib $lin_alg_dir " >> example.mk
if [ x"$cuda_on_cpu" == x"YES" ]; then
echo "CUDA_ON_CPU=YES" >> example.mk
fi
if [ x"$gpu_support" == x"1" ]; then
echo "LIBS=-lvcluster -lofpm_pdata -lofpmmemory -lparmetis -lmetis -lboost_iostreams -lboost_program_options -lhdf5 -llibhilbert -lVc $(cat cuda_lib) $lin_alg_lib -ldl -lboost_filesystem -lboost_system" >> example.mk
echo "LIBS=$mpi_libs -lvcluster -lofpm_pdata -lofpmmemory -lparmetis -lmetis -lboost_iostreams -lboost_program_options -lhdf5 -llibhilbert -lVc $(cat cuda_lib) $lin_alg_lib -ldl -lboost_filesystem -lboost_system" >> example.mk
echo "LIBS_CUDA_ON_CPU=-lvcluster_cuda_on_cpu -lofpmmemory_cuda_on_cpu -lparmetis -lmetis -lboost_iostreams -lboost_program_options -lhdf5 -llibhilbert -lVc $(cat cuda_lib) $lin_alg_lib -lboost_filesystem -lboost_system -lboost_context" >> example.mk
else
echo "LIBS=-lvcluster -lofpm_pdata -lofpmmemory -lparmetis -lmetis -lboost_iostreams -lboost_program_options -lhdf5 -llibhilbert -lVc $lin_alg_lib -ldl -lboost_filesystem -lboost_system $optional_boost" >> example.mk
echo "LIBS_CUDA_ON_CPU=\$(LIBS)" >> example.mk
fi
echo "INCLUDE_PATH_NVCC=-Xcompiler="-Wno-deprecated-declarations" $(cat openmp_flags) "$(cat cuda_options)" -I. -I$install_base/openfpm_numerics/include -I$install_base/openfpm_pdata/include/config -I$install_base/openfpm_pdata/include -I$install_base/openfpm_data/include -I$install_base/openfpm_vcluster/include -I$install_base/openfpm_io/include -I$install_base/openfpm_devices/include -I$i_dir/METIS/include -I$i_dir/PARMETIS/include -I$i_dir/BOOST/include -I$i_dir/HDF5/include -I$i_dir/LIBHILBERT/include $lin_alg_inc" >> example.mk
echo "INCLUDE_PATH_NVCC=-Xcompiler="-Wno-deprecated-declarations" $(cat openmp_flags) "$(cat cuda_options)" $mpi_include_dirs -I. -I$install_base/openfpm_numerics/include -I$install_base/openfpm_pdata/include/config -I$install_base/openfpm_pdata/include -I$install_base/openfpm_data/include -I$install_base/openfpm_vcluster/include -I$install_base/openfpm_io/include -I$install_base/openfpm_devices/include -I$i_dir/METIS/include -I$i_dir/PARMETIS/include -I$i_dir/BOOST/include -I$i_dir/HDF5/include -I$i_dir/LIBHILBERT/include $lin_alg_inc" >> example.mk
cp example.mk src/example.mk
cp example.mk example/example.mk
......
openfpm_data @ 6e44b2c6
Subproject commit f9b6976f588efefd5973fd3e2093e1adc476dbd6
Subproject commit 6e44b2c6cc0dddca29958c443b63ec5eb4fad2e5
......@@ -4,9 +4,9 @@ function set_mpi()
{
if [ x"$MPI_valid" == x"yes" ]; then
if [ $is_mpi_openmpi -eq 1 ]; then
configure_options="$configure_options CXX=mpic++ --with-mpivendor=openmpi"
configure_options="$configure_options --with-mpivendor=openmpi"
else
configure_options="$configure_options CXX=mpic++ "
configure_options="$configure_options "
fi
else
if [ x"$platform" == x"cygwin" ]; then
......@@ -19,7 +19,7 @@ function set_mpi()
fi
MPI_installed=1
export PATH="$1/MPI/bin:$PATH"
configure_options="$configure_options CXX=mpic++ --with-mpivendor=openmpi"
configure_options="$configure_options --with-mpivendor=openmpi"
fi
}
......@@ -43,52 +43,4 @@ function get_openmpi_compilation_options()
done
}
function test_working_mpi_options()
{
script/download_MPI.sh
cd openmpi-3.1.3
openmpi_working_options=()
for opt in ${openmpi_compilation_options[@]}; do
# prefix and --with-cuda must be avoided
if [[ $opt == --with-cuda* ]]; then
continue;
fi
if [[ $opt == --prefix* ]]; then
continue;
fi
if [[ $opt == --enable-mpi-fortran* ]]; then
continue;
fi
if [[ $opt == CC* ]]; then
continue;
fi
if [[ $opt == CXX* ]]; then
continue;
fi
if [[ $opt == FC* ]]; then
continue;
fi
if [[ $opt == F77* ]]; then
continue;
fi
echo "Testing ./configure --prefix=$1/MPI --enable-mpi-fortran=yes CC=$4 CXX=$5 F77=$6 FC=$7 $openmpi_working_options"
./configure --prefix=$1/MPI --enable-mpi-fortran=yes CC=$4 CXX=$5 F77=$6 FC=$7 $openmpi_working_options $opt
if [ $? -eq 0 ]; then
openmpi_working_options="$openmpi_working_options $opt"
fi
done
echo "OpenMPI working options: $openmpi_working_options"
cd ..
}
......@@ -5,7 +5,7 @@ add_definitions(-DSCAN_WITH_CUB)
########################### Executables
if(CUDA_FOUND OR CUDA_ON_CPU)
if(CUDA_FOUND OR CUDA_ON_CPU OR HIP_FOUND)
set(CUDA_SOURCES
Grid/tests/sgrid_dist_id_gpu_unit_tests.cu
Vector/cuda/vector_dist_gpu_MP_tests.cu
......@@ -23,15 +23,54 @@ endif()
if (CUDA_ON_CPU)
add_definitions(-DCUDA_ON_CPU)
set_source_files_properties(${CUDA_SOURCES} PROPERTIES LANGUAGE CXX)
set_source_files_properties(${CUDA_SOURCES} PROPERTIES COMPILE_FLAGS "-D__NVCC__ -DCUDART_VERSION=11000")
set_source_files_properties(${CUDA_SOURCES} PROPERTIES COMPILE_FLAGS "-D__NVCC__ -DCUDART_VERSION=11000")
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
add_definitions("-x c++")
endif()
endif()
if ( HIP_ENABLE AND HIP_FOUND )
list(APPEND HIP_HIPCC_FLAGS ${CMAKE_CXX_FLAGS_DEBUG})
add_executable(pdata ${OPENFPM_INIT_FILE} ${CUDA_SOURCES} main.cpp
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
list(APPEND HIP_HIPCC_FLAGS -O0)
endif()
list(APPEND HIP_HIPCC_FLAGS -D__NVCC__ -D__HIP__ -DCUDART_VERSION=11000 -D__CUDACC__ -D__CUDACC_VER_MAJOR__=11 -D__CUDACC_VER_MINOR__=0 -D__CUDACC_VER_BUILD__=0)
set_source_files_properties(${CUDA_SOURCES} PROPERTIES LANGUAGE CXX)
set(CMAKE_CXX_COMPILER ${HIP_HIPCC_EXECUTABLE})
hip_add_executable(pdata ${CUDA_SOURCES} ${OPENFPM_INIT_FILE} main.cpp
Amr/grid_dist_amr_unit_tests.cpp
Amr/tests/amr_base_unit_tests.cpp
Debug/debug_test.cpp
Grid/tests/grid_dist_id_HDF5_chckpnt_restart_test.cpp
Grid/tests/grid_dist_id_unit_test.cpp
Grid/tests/sgrid_dist_id_unit_tests.cpp
Grid/tests/grid_dist_id_dlb_unit_test.cpp
Grid/tests/staggered_grid_dist_unit_test.cpp
Vector/tests/vector_dist_cell_list_tests.cpp
Vector/tests/vector_dist_complex_prp_unit_test.cpp
Vector/tests/vector_dist_HDF5_chckpnt_restart_test.cpp
Vector/tests/vector_dist_MP_unit_tests.cpp
Vector/tests/vector_dist_NN_tests.cpp
Vector/tests/vector_dist_unit_test.cpp
pdata_performance.cpp
Decomposition/tests/CartDecomposition_unit_test.cpp
Decomposition/tests/shift_vect_converter_tests.cpp
Vector/performance/vector_dist_performance_util.cpp
lib/pdata.cpp
test_multiple_o.cpp
)
hip_add_library(ofpm_pdata STATIC lib/pdata.cpp)
else()
add_executable(pdata ${OPENFPM_INIT_FILE} ${CUDA_SOURCES} main.cpp
Amr/grid_dist_amr_unit_tests.cpp
Amr/tests/amr_base_unit_tests.cpp
Debug/debug_test.cpp
......@@ -52,6 +91,10 @@ add_executable(pdata ${OPENFPM_INIT_FILE} ${CUDA_SOURCES} main.cpp
Vector/performance/vector_dist_performance_util.cpp
lib/pdata.cpp test_multiple_o.cpp)
add_library(ofpm_pdata STATIC lib/pdata.cpp)
endif()
add_dependencies(pdata ofpmmemory)
add_dependencies(pdata vcluster)
......@@ -81,7 +124,6 @@ if (ENABLE_ASAN)
add_definitions(-DENABLE_ASAN)
endif()
add_library(ofpm_pdata STATIC lib/pdata.cpp)
add_test(NAME pdata_3_proc COMMAND mpirun -np 3 ./pdata)
add_test(NAME pdata_4_proc COMMAND mpirun -np 4 ./pdata)
......
......@@ -8,7 +8,7 @@
#include "Vector/cuda/vector_dist_cuda_funcs.cuh"
#include "Vector/util/vector_dist_funcs.hpp"
#include "Decomposition/CartDecomposition.hpp"
#include "util/cuda/scan_cuda.cuh"
//#include "util/cuda/scan_cuda.cuh"
#include "Vector/vector_dist.hpp"
#include "util/cuda/scan_ofp.cuh"
......
......@@ -2043,7 +2043,7 @@ public:
* \return an iterator
*
*/
ite_gpu<1> getDomainIteratorGPU(size_t n_thr = 1024) const
ite_gpu<1> getDomainIteratorGPU(size_t n_thr = default_kernel_wg_threads_) const
{
#ifdef SE_CLASS3
se3.getIterator();
......@@ -2057,7 +2057,7 @@ public:
* \return an iterator
*
*/
ite_gpu<1> getDomainAndGhostIteratorGPU(size_t n_thr = 1024) const
ite_gpu<1> getDomainAndGhostIteratorGPU(size_t n_thr = default_kernel_wg_threads_) const
{
#ifdef SE_CLASS3
se3.getIterator();
......@@ -2072,7 +2072,7 @@ public:
*
*/
template<unsigned int ... prp,typename id_1, typename id_2, bool is_sparse>
void merge_sort(CellList_gpu<dim,St,CudaMemory,shift_only<dim, St>,id_1,id_2,is_sparse> & cl, size_t n_thr = 1024)
void merge_sort(CellList_gpu<dim,St,CudaMemory,shift_only<dim, St>,id_1,id_2,is_sparse> & cl, size_t n_thr = default_kernel_wg_threads_)
{
#if defined(__NVCC__)
......@@ -2159,7 +2159,7 @@ public:
* \parameter Cell-list from which has been constructed the sorted vector
*
*/
template<unsigned int ... prp> void merge_sort_with_pos(CellList_gpu<dim,St,CudaMemory,shift_only<dim, St>> & cl, size_t n_thr = 1024)
template<unsigned int ... prp> void merge_sort_with_pos(CellList_gpu<dim,St,CudaMemory,shift_only<dim, St>> & cl, size_t n_thr = default_kernel_wg_threads_)
{
#if defined(__NVCC__)
......@@ -2181,7 +2181,7 @@ public:
* \return an iterator
*
*/
auto getDomainIteratorDevice(size_t n_thr = 1024) const -> decltype(this->getDomainIteratorGPU(n_thr))
auto getDomainIteratorDevice(size_t n_thr = default_kernel_wg_threads_) const -> decltype(this->getDomainIteratorGPU(n_thr))
{
return this->getDomainIteratorGPU(n_thr);
}
......@@ -2194,7 +2194,7 @@ public:
* \return an iterator
*
*/
auto getDomainIteratorDevice(size_t n_thr = 1024) const -> decltype(this->getDomainIterator())
auto getDomainIteratorDevice(size_t n_thr = default_kernel_wg_threads_) const -> decltype(this->getDomainIterator())
{
return this->getDomainIterator();
}
......
......@@ -257,7 +257,7 @@ public:
* \return an iterator
*
*/
__host__ ite_gpu<1> getDomainIteratorGPU(size_t n_thr = 1024) const
__host__ ite_gpu<1> getDomainIteratorGPU(size_t n_thr = default_kernel_wg_threads_) const
{
return v_pos.getGPUIteratorTo(g_m,n_thr);
}
......
......@@ -4,6 +4,13 @@ ${DEFINE_COVERTY_SCAN}
/* GPU support */
${DEFINE_CUDA_GPU}
/* HIP GPU support */
${DEFINE_HIP_GPU}
/* HIP Cudify GPU support */
${DEFINE_CUDIFY_USE_HIP}
/* Debug */
${DEFINE_DEBUG} /**/
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment