Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Sbalzarini Lab
S
Software
P
Parallel Computing
OpenFPM
openfpm_devices
Commits
c4610f53
Commit
c4610f53
authored
Apr 20, 2021
by
incardon
Browse files
Merge remote-tracking branch 'origin/hip_conversion2'
parents
c8c1f419
f831b228
Pipeline
#2929
passed with stages
in 13 seconds
Changes
11
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
CMakeLists.txt
View file @
c4610f53
...
...
@@ -2,8 +2,10 @@ cmake_minimum_required(VERSION 3.8 FATAL_ERROR)
project
(
openfpm_devices LANGUAGES C CXX
)
set
(
BOOST_INCLUDE
${
Boost_INCLUDE_DIR
}
CACHE PATH
"Include directory for BOOST"
)
set
(
HIP_ENABLE CACHE BOOL
"Enable HIP compiler"
)
set
(
AMD_ARCH_COMPILE
"gfx900"
CACHE STRING
"AMD gpu architecture used to compile kernels"
)
list
(
APPEND CMAKE_MODULE_PATH
${
CMAKE_CURRENT_LIST_DIR
}
/cmake_modules/
)
list
(
APPEND CMAKE_MODULE_PATH
${
CMAKE_CURRENT_LIST_DIR
}
/cmake_modules/
/opt/rocm/hip/cmake
)
set
(
CMAKE_CXX_STANDARD 14
)
set
(
CMAKE_CUDA_STANDARD 14
)
...
...
@@ -15,35 +17,49 @@ set(CUDA_ON_CPU CACHE BOOL "Make Cuda work on heap")
# Save Boost_LIBRARIES before alpaka fuck-up Boost
set
(
Boost_LIBRARIES_BCK
${
Boost_LIBRARIES
}
)
if
(
ENABLE_GPU
)
enable_language
(
CUDA
)
find_package
(
CUDA
)
if
(
CUDA_VERSION_MAJOR EQUAL 9 AND CUDA_VERSION_MINOR EQUAL 2
)
message
(
"CUDA is compatible 9.2"
)
set
(
WARNING_SUPPRESSION_AND_OPTION_NVCC -Xcudafe
"--display_error_number --diag_suppress=611 --diag_suppress=2885 --diag_suppress=2886 --diag_suppress=2887 --diag_suppress=2888 --diag_suppress=186 --diag_suppress=111"
--expt-extended-lambda PARENT_SCOPE
)
set
(
WARNING_SUPPRESSION_AND_OPTION_NVCC_TEXT
"-Xcudafe
\"
--display_error_number --diag_suppress=611 --diag_suppress=2885 --diag_suppress=2886 --diag_suppress=2887 --diag_suppress=2888 --diag_suppress=186 --diag_suppress=111
\"
--expt-extended-lambda "
PARENT_SCOPE
)
elseif
(
CUDA_VERSION_MAJOR EQUAL 10 AND CUDA_VERSION_MINOR EQUAL 1
)
message
(
"CUDA is compatible 10.1"
)
set
(
WARNING_SUPPRESSION_AND_OPTION_NVCC -Xcudafe
"--display_error_number --diag_suppress=2931 --diag_suppress=2930 --diag_suppress=2929 --diag_suppress=2928 --diag_suppress=2915 --diag_suppress=2912 --diag_suppress=2913 --diag_suppress=111 --diag_suppress=186 --diag_suppress=611 --diag_suppress=128"
--expt-extended-lambda PARENT_SCOPE
)
set
(
WARNING_SUPPRESSION_AND_OPTION_NVCC_TEXT
"-Xcudafe
\"
--display_error_number --display_error_number --diag_suppress=2931 --diag_suppress=2930 --diag_suppress=2929 --diag_suppress=2928 --diag_suppress=2915 --diag_suppress=2912 --diag_suppress=2913 --diag_suppress=111 --diag_suppress=186 --diag_suppress=611 --diag_suppress=128
\"
--expt-extended-lambda"
PARENT_SCOPE
)
elseif
(
CUDA_VERSION_MAJOR EQUAL 10 AND CUDA_VERSION_MINOR EQUAL 2
)
message
(
"CUDA is compatible 10.2"
)
set
(
WARNING_SUPPRESSION_AND_OPTION_NVCC -Xcudafe
"--display_error_number --diag_suppress=2976 --diag_suppress=2977 --diag_suppress=2978 --diag_suppress=2979 --diag_suppress=1835 --diag_suppress=611 --diag_suppress=186 --diag_suppress=128"
--expt-extended-lambda PARENT_SCOPE
)
set
(
WARNING_SUPPRESSION_AND_OPTION_NVCC_TEXT
"-Xcudafe
\"
--display_error_number --diag_suppress=2976 --diag_suppress=2977 --diag_suppress=2978 --diag_suppress=2979 --diag_suppress=1835 --diag_suppress=611 --diag_suppress=186 --diag_suppress=128
\"
--expt-extended-lambda"
PARENT_SCOPE
)
elseif
(
CUDA_VERSION_MAJOR EQUAL 11 AND CUDA_VERSION_MINOR EQUAL 0
)
message
(
"CUDA is compatible 11.0"
)
set
(
WARNING_SUPPRESSION_AND_OPTION_NVCC -Xcudafe
"--display_error_number --diag_suppress=3056 --diag_suppress=3057 --diag_suppress=3058 --diag_suppress=3059 --diag_suppress=611 --diag_suppress=186 --diag_suppress=128"
--expt-extended-lambda PARENT_SCOPE
)
set
(
WARNING_SUPPRESSION_AND_OPTION_NVCC_TEXT
"-Xcudafe
\"
--display_error_number --diag_suppress=3056 --diag_suppress=3057 --diag_suppress=3058 --diag_suppress=3059 --diag_suppress=611 --diag_suppress=186 --diag_suppress=128
\"
--expt-extended-lambda"
PARENT_SCOPE
)
elseif
(
CUDA_VERSION_MAJOR EQUAL 11 AND CUDA_VERSION_MINOR EQUAL 1
)
message
(
"CUDA is compatible 11.1"
)
set
(
WARNING_SUPPRESSION_AND_OPTION_NVCC -Xcudafe
"--display_error_number --diag_suppress=3124 --diag_suppress=3126 --diag_suppress=3125 --diag_suppress=3123 --diag_suppress=611 --diag_suppress=186 --diag_suppress=128"
--expt-extended-lambda PARENT_SCOPE
)
set
(
WARNING_SUPPRESSION_AND_OPTION_NVCC_TEXT
"-Xcudafe
\"
--display_error_number --diag_suppress=3124 --diag_suppress=3126 --diag_suppress=3125 --diag_suppress=3123 --diag_suppress=611 --diag_suppress=186 --diag_suppress=128
\"
--expt-extended-lambda"
PARENT_SCOPE
)
elseif
(
CUDA_VERSION_MAJOR EQUAL 11 AND CUDA_VERSION_MINOR EQUAL 2
)
message
(
"CUDA is compatible 11.2"
)
set
(
WARNING_SUPPRESSION_AND_OPTION_NVCC -Xcudafe
"--display_error_number --diag_suppress=20014 --diag_suppress=20013 --diag_suppress=20012 --diag_suppress=20011 --diag_suppress=611 --diag_suppress=550 --diag_suppress=186 --diag_suppress=128"
--expt-extended-lambda PARENT_SCOPE
)
set
(
WARNING_SUPPRESSION_AND_OPTION_NVCC_TEXT
"-Xcudafe
\"
--display_error_number --diag_suppress=20014 --diag_suppress=20013 --diag_suppress=20012 --diag_suppress=20011 --diag_suppress=611 --diag_suppress=550 --diag_suppress=186 --diag_suppress=128
\"
--expt-extended-lambda"
PARENT_SCOPE
)
if
(
HIP_ENABLE AND NOT HIP_FOUND
)
find_package
(
HIP
)
if
(
NOT HIP_FOUND
)
message
(
FATAL_ERROR
"HIP has not been found"
)
else
()
message
(
FATAL_ERROR
"CUDA is incompatible, version 9.2 10.1 10.2 11.1 and 11.2 is only supported"
)
message
(
"HIP has been found"
)
endif
()
if
(
HIP_ENABLE
)
set
(
CMAKE_CXX_EXTENSIONS OFF
)
endif
()
endif
()
if
(
ENABLE_GPU
)
if
(
NOT HIP_ENABLE
)
enable_language
(
CUDA
)
find_package
(
CUDA
)
if
(
CUDA_VERSION_MAJOR EQUAL 9 AND CUDA_VERSION_MINOR EQUAL 2
)
message
(
"CUDA is compatible 9.2"
)
set
(
WARNING_SUPPRESSION_AND_OPTION_NVCC -Xcudafe
"--display_error_number --diag_suppress=611 --diag_suppress=2885 --diag_suppress=2886 --diag_suppress=2887 --diag_suppress=2888 --diag_suppress=186 --diag_suppress=111"
--expt-extended-lambda PARENT_SCOPE
)
set
(
WARNING_SUPPRESSION_AND_OPTION_NVCC_TEXT
"-Xcudafe
\"
--display_error_number --diag_suppress=611 --diag_suppress=2885 --diag_suppress=2886 --diag_suppress=2887 --diag_suppress=2888 --diag_suppress=186 --diag_suppress=111
\"
--expt-extended-lambda "
PARENT_SCOPE
)
elseif
(
CUDA_VERSION_MAJOR EQUAL 10 AND CUDA_VERSION_MINOR EQUAL 1
)
message
(
"CUDA is compatible 10.1"
)
set
(
WARNING_SUPPRESSION_AND_OPTION_NVCC -Xcudafe
"--display_error_number --diag_suppress=2931 --diag_suppress=2930 --diag_suppress=2929 --diag_suppress=2928 --diag_suppress=2915 --diag_suppress=2912 --diag_suppress=2913 --diag_suppress=111 --diag_suppress=186 --diag_suppress=611 --diag_suppress=128"
--expt-extended-lambda PARENT_SCOPE
)
set
(
WARNING_SUPPRESSION_AND_OPTION_NVCC_TEXT
"-Xcudafe
\"
--display_error_number --display_error_number --diag_suppress=2931 --diag_suppress=2930 --diag_suppress=2929 --diag_suppress=2928 --diag_suppress=2915 --diag_suppress=2912 --diag_suppress=2913 --diag_suppress=111 --diag_suppress=186 --diag_suppress=611 --diag_suppress=128
\"
--expt-extended-lambda"
PARENT_SCOPE
)
elseif
(
CUDA_VERSION_MAJOR EQUAL 10 AND CUDA_VERSION_MINOR EQUAL 2
)
message
(
"CUDA is compatible 10.2"
)
set
(
WARNING_SUPPRESSION_AND_OPTION_NVCC -Xcudafe
"--display_error_number --diag_suppress=2976 --diag_suppress=2977 --diag_suppress=2978 --diag_suppress=2979 --diag_suppress=1835 --diag_suppress=611 --diag_suppress=186 --diag_suppress=128"
--expt-extended-lambda PARENT_SCOPE
)
set
(
WARNING_SUPPRESSION_AND_OPTION_NVCC_TEXT
"-Xcudafe
\"
--display_error_number --diag_suppress=2976 --diag_suppress=2977 --diag_suppress=2978 --diag_suppress=2979 --diag_suppress=1835 --diag_suppress=611 --diag_suppress=186 --diag_suppress=128
\"
--expt-extended-lambda"
PARENT_SCOPE
)
elseif
(
CUDA_VERSION_MAJOR EQUAL 11 AND CUDA_VERSION_MINOR EQUAL 0
)
message
(
"CUDA is compatible 11.0"
)
set
(
WARNING_SUPPRESSION_AND_OPTION_NVCC -Xcudafe
"--display_error_number --diag_suppress=3056 --diag_suppress=3057 --diag_suppress=3058 --diag_suppress=3059 --diag_suppress=611 --diag_suppress=186 --diag_suppress=128"
--expt-extended-lambda PARENT_SCOPE
)
set
(
WARNING_SUPPRESSION_AND_OPTION_NVCC_TEXT
"-Xcudafe
\"
--display_error_number --diag_suppress=3056 --diag_suppress=3057 --diag_suppress=3058 --diag_suppress=3059 --diag_suppress=611 --diag_suppress=186 --diag_suppress=128
\"
--expt-extended-lambda"
PARENT_SCOPE
)
elseif
(
CUDA_VERSION_MAJOR EQUAL 11 AND CUDA_VERSION_MINOR EQUAL 1
)
message
(
"CUDA is compatible 11.1"
)
set
(
WARNING_SUPPRESSION_AND_OPTION_NVCC -Xcudafe
"--display_error_number --diag_suppress=3124 --diag_suppress=3126 --diag_suppress=3125 --diag_suppress=3123 --diag_suppress=611 --diag_suppress=186 --diag_suppress=128"
--expt-extended-lambda PARENT_SCOPE
)
set
(
WARNING_SUPPRESSION_AND_OPTION_NVCC_TEXT
"-Xcudafe
\"
--display_error_number --diag_suppress=3124 --diag_suppress=3126 --diag_suppress=3125 --diag_suppress=3123 --diag_suppress=611 --diag_suppress=186 --diag_suppress=128
\"
--expt-extended-lambda"
PARENT_SCOPE
)
elseif
(
CUDA_VERSION_MAJOR EQUAL 11 AND CUDA_VERSION_MINOR EQUAL 2
)
message
(
"CUDA is compatible 11.2"
)
set
(
WARNING_SUPPRESSION_AND_OPTION_NVCC -Xcudafe
"--display_error_number --diag_suppress=20014 --diag_suppress=20013 --diag_suppress=20012 --diag_suppress=20011 --diag_suppress=611 --diag_suppress=550 --diag_suppress=186 --diag_suppress=128"
--expt-extended-lambda PARENT_SCOPE
)
set
(
WARNING_SUPPRESSION_AND_OPTION_NVCC_TEXT
"-Xcudafe
\"
--display_error_number --diag_suppress=20014 --diag_suppress=20013 --diag_suppress=20012 --diag_suppress=20011 --diag_suppress=611 --diag_suppress=550 --diag_suppress=186 --diag_suppress=128
\"
--expt-extended-lambda"
PARENT_SCOPE
)
else
()
message
(
FATAL_ERROR
"CUDA is incompatible, version 9.2 10.1 10.2 11.1 and 11.2 is only supported"
)
endif
()
endif
()
else
()
find_package
(
alpaka
)
...
...
@@ -60,6 +76,11 @@ if(CUDA_ON_CPU)
set
(
DEFINE_CUDA_GPU
"#define CUDA_GPU"
)
endif
()
if
(
HIP_FOUND
)
set
(
DEFINE_HIP_GPU
"#define HIP_GPU"
)
set
(
DEFINE_CUDIFY_USE_HIP
"#define CUDIFY_USE_HIP"
)
endif
()
if
(
BOOST_FOUND
)
set
(
DEFINE_HAVE_BOOST
"#define HAVE_BOOST"
)
set
(
DEFINE_HAVE_BOOST_IOSTREAMS
"#define HAVE_BOOST_IOSTREAMS"
)
...
...
src/CMakeLists.txt
View file @
c4610f53
if
(
CUDA_FOUND OR CUDA_ON_CPU
)
if
(
CUDA_FOUND OR CUDA_ON_CPU
OR HIP_FOUND
)
set
(
CUDA_SOURCES memory/CudaMemory.cu
)
else
()
set
(
CUDA_SOURCES
)
...
...
@@ -13,10 +13,37 @@ if (CUDA_ON_CPU)
endif
()
endif
()
add_executable
(
mem main.cpp memory/HeapMemory.cpp util/cudify/cudify_vars.cpp util/cudify/cudify_unit_test.cu memory/mem_conf.cpp
${
CUDA_SOURCES
}
)
if
(
HIP_ENABLE AND HIP_FOUND
)
add_library
(
ofpmmemory STATIC memory/HeapMemory.cpp util/cudify/cudify_vars.cpp memory/PtrMemory.cpp memory/mem_conf.cpp
${
CUDA_SOURCES
}
)
add_library
(
ofpmmemory_dl SHARED memory/HeapMemory.cpp util/cudify/cudify_vars.cpp memory/PtrMemory.cpp memory/mem_conf.cpp
${
CUDA_SOURCES
}
)
set_source_files_properties
(
${
CUDA_SOURCES
}
PROPERTIES LANGUAGE CXX
)
hip_add_library
(
ofpmmemory STATIC memory/HeapMemory.cpp util/cudify/cudify_vars.cpp memory/PtrMemory.cpp memory/mem_conf.cpp
${
CUDA_SOURCES
}
)
hip_add_library
(
ofpmmemory_dl SHARED memory/HeapMemory.cpp util/cudify/cudify_vars.cpp memory/PtrMemory.cpp memory/mem_conf.cpp
${
CUDA_SOURCES
}
)
set
(
CMAKE_CXX_COMPILER
${
HIP_HIPCC_EXECUTABLE
}
)
hip_add_executable
(
mem main.cpp memory/HeapMemory.cpp util/cudify/cudify_vars.cpp util/cudify/cudify_unit_test.cu memory/mem_conf.cpp
${
CUDA_SOURCES
}
)
set
(
CMAKE_SHARED_LIBRARY_CXX_FLAGS
" "
)
set_property
(
TARGET ofpmmemory PROPERTY POSITION_INDEPENDENT_CODE ON
)
set_property
(
TARGET ofpmmemory_dl PROPERTY POSITION_INDEPENDENT_CODE ON
)
set_property
(
TARGET ofpmmemory PROPERTY CMAKE_CXX_FLAGS
"-Xcompiler -fPIC"
)
set_property
(
TARGET ofpmmemory_dl PROPERTY CMAKE_CXX_FLAGS
"-Xcompiler -fPIC"
)
set_property
(
TARGET ofpmmemory PROPERTY NO_SONAME ON
)
set_property
(
TARGET ofpmmemory_dl PROPERTY NO_SONAME ON
)
else
()
add_executable
(
mem main.cpp memory/HeapMemory.cpp util/cudify/cudify_vars.cpp util/cudify/cudify_unit_test.cu memory/mem_conf.cpp
${
CUDA_SOURCES
}
)
add_library
(
ofpmmemory STATIC memory/HeapMemory.cpp util/cudify/cudify_vars.cpp memory/PtrMemory.cpp memory/mem_conf.cpp
${
CUDA_SOURCES
}
)
add_library
(
ofpmmemory_dl SHARED memory/HeapMemory.cpp util/cudify/cudify_vars.cpp memory/PtrMemory.cpp memory/mem_conf.cpp
${
CUDA_SOURCES
}
)
endif
()
if
(
HIP_FOUND
)
SET
(
CMAKE_EXE_LINKER_FLAGS
"--amdgpu-target=
${
AMD_ARCH_COMPILE
}
"
)
SET
(
CMAKE_SHARED_LINKER_FLAGS
"--amdgpu-target=
${
AMD_ARCH_COMPILE
}
"
)
endif
()
if
(
CUDA_FOUND AND NOT CUDA_ON_CPU
)
add_library
(
ofpmmemory_cuda_on_cpu STATIC memory/HeapMemory.cpp util/cudify/cudify_vars.cpp memory/PtrMemory.cpp memory/mem_conf.cpp
${
CUDA_SOURCES
}
)
...
...
src/config/config_cmake.h.in
View file @
c4610f53
...
...
@@ -4,6 +4,12 @@ ${DEFINE_COVERTY_SCAN}
/* GPU support */
${DEFINE_CUDA_GPU}
/* HIP GPU support */
${DEFINE_HIP_GPU}
/* HIP Cudify GPU support */
${DEFINE_CUDIFY_USE_HIP}
/* Debug */
${DEFINE_DEBUG} /**/
...
...
src/cuda_macro.h
View file @
c4610f53
...
...
@@ -7,6 +7,17 @@
#include <iostream>
#if defined(__HIP__)
#define CUDA_SAFE_CALL(call) {\
hipError_t err = call;\
if (hipSuccess != err) {\
std::cerr << "HIP error in file "<< __FILE__ << " in line " << __LINE__ << ": " << hipGetErrorString(err);\
}\
}
#elif defined(CUDA_GPU)
#define CUDA_SAFE_CALL(call) {\
cudaError_t err = call;\
if (cudaSuccess != err) {\
...
...
@@ -14,4 +25,6 @@
}\
}
#endif
src/memory/CudaMemory.cu
View file @
c4610f53
...
...
@@ -17,7 +17,9 @@ bool CudaMemory::flush()
{
//! copy from host to device memory
#if defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
#ifdef __HIP__
CUDA_SAFE_CALL
(
hipMemcpy
(
dm
,
hm
,
sz
,
hipMemcpyHostToDevice
));
#elif defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
CUDA_SAFE_CALL
(
cudaMemcpy
(
dm
,
hm
,
sz
,
cudaMemcpyHostToDevice
));
#else
memcpy
(
dm
,
hm
,
sz
);
...
...
@@ -39,8 +41,10 @@ bool CudaMemory::allocate(size_t sz)
//! Allocate the device memory
if
(
dm
==
NULL
)
{
#if defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
CUDA_SAFE_CALL
(
cudaMalloc
(
&
dm
,
sz
));
#ifdef __HIP__
CUDA_SAFE_CALL
(
hipMalloc
(
&
dm
,
sz
));
#elif defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
CUDA_SAFE_CALL
(
cudaMalloc
(
&
dm
,
sz
));
#else
if
(
sz
!=
0
)
{
...
...
@@ -79,7 +83,9 @@ void CudaMemory::destroy()
if
(
dm
!=
NULL
)
{
//! Release the allocated memory
#if defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
#ifdef __HIP__
CUDA_SAFE_CALL
(
hipFree
(
dm
));
#elif defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
CUDA_SAFE_CALL
(
cudaFree
(
dm
));
#else
delete
[]
(
unsigned
char
*
)
dm
;
...
...
@@ -90,7 +96,9 @@ void CudaMemory::destroy()
if
(
hm
!=
NULL
)
{
//! we invalidate hm
#if defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
#ifdef __HIP__
CUDA_SAFE_CALL
(
hipHostFree
(
hm
));
#elif defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
CUDA_SAFE_CALL
(
cudaFreeHost
(
hm
));
#else
delete
[]
(
unsigned
char
*
)
hm
;
...
...
@@ -111,7 +119,9 @@ void CudaMemory::destroy()
*/
void
CudaMemory
::
deviceToDevice
(
void
*
ptr
,
size_t
start
,
size_t
stop
,
size_t
offset
)
{
#if defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
#ifdef __HIP__
CUDA_SAFE_CALL
(
hipMemcpy
(((
unsigned
char
*
)
dm
)
+
offset
,((
unsigned
char
*
)
ptr
)
+
start
,(
stop
-
start
),
hipMemcpyDeviceToDevice
));
#elif defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
CUDA_SAFE_CALL
(
cudaMemcpy
(((
unsigned
char
*
)
dm
)
+
offset
,((
unsigned
char
*
)
ptr
)
+
start
,(
stop
-
start
),
cudaMemcpyDeviceToDevice
));
#else
memcpy
(((
unsigned
char
*
)
dm
)
+
offset
,((
unsigned
char
*
)
ptr
)
+
start
,(
stop
-
start
));
...
...
@@ -127,7 +137,9 @@ void CudaMemory::allocate_host(size_t sz) const
{
if
(
hm
==
NULL
)
{
#if defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
#ifdef __HIP__
CUDA_SAFE_CALL
(
hipHostMalloc
(
&
hm
,
sz
,
hipHostMallocMapped
))
#elif defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
CUDA_SAFE_CALL
(
cudaHostAlloc
(
&
hm
,
sz
,
cudaHostAllocMapped
))
#else
hm
=
new
unsigned
char
[
sz
];
...
...
@@ -154,11 +166,13 @@ bool CudaMemory::copyFromPointer(const void * ptr)
// get the device pointer
void
*
dvp
;
#if defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
#ifdef __HIP__
CUDA_SAFE_CALL
(
hipHostGetDevicePointer
(
&
dvp
,
hm
,
0
));
// memory copy
memcpy
(
dvp
,
ptr
,
sz
);
#elif defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
CUDA_SAFE_CALL
(
cudaHostGetDevicePointer
(
&
dvp
,
hm
,
0
));
// memory copy
memcpy
(
dvp
,
ptr
,
sz
);
#else
memcpy
(
hm
,
ptr
,
sz
);
...
...
@@ -186,7 +200,9 @@ bool CudaMemory::copyDeviceToDevice(const CudaMemory & m)
}
//! Copy the memory
#if defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
#ifdef __HIP__
CUDA_SAFE_CALL
(
hipMemcpy
(
dm
,
m
.
dm
,
m
.
sz
,
hipMemcpyDeviceToDevice
));
#elif defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
CUDA_SAFE_CALL
(
cudaMemcpy
(
dm
,
m
.
dm
,
m
.
sz
,
cudaMemcpyDeviceToDevice
));
#else
memcpy
(
dm
,
m
.
dm
,
m
.
sz
);
...
...
@@ -268,7 +284,9 @@ bool CudaMemory::resize(size_t sz)
{
if
(
this
->
sz
<
sz
)
{
#if defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
#ifdef __HIP__
CUDA_SAFE_CALL
(
hipMalloc
(
&
tdm
,
sz
));
#elif defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
CUDA_SAFE_CALL
(
cudaMalloc
(
&
tdm
,
sz
));
#else
tdm
=
new
unsigned
char
[
sz
];
...
...
@@ -278,14 +296,18 @@ bool CudaMemory::resize(size_t sz)
#endif
#ifdef GARBAGE_INJECTOR
#if defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
#ifdef __HIP__
CUDA_SAFE_CALL
(
hipMemset
(
tdm
,
-
1
,
sz
));
#elif defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
CUDA_SAFE_CALL
(
cudaMemset
(
tdm
,
-
1
,
sz
));
#endif
#endif
}
//! copy from the old buffer to the new one
#if defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
#ifdef __HIP__
CUDA_SAFE_CALL
(
hipMemcpy
(
tdm
,
dm
,
CudaMemory
::
size
(),
hipMemcpyDeviceToDevice
));
#elif defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
CUDA_SAFE_CALL
(
cudaMemcpy
(
tdm
,
dm
,
CudaMemory
::
size
(),
cudaMemcpyDeviceToDevice
));
#else
memcpy
(
tdm
,
dm
,
CudaMemory
::
size
());
...
...
@@ -296,7 +318,9 @@ bool CudaMemory::resize(size_t sz)
{
if
(
this
->
sz
<
sz
)
{
#if defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
#ifdef __HIP__
CUDA_SAFE_CALL
(
hipHostMalloc
(
&
thm
,
sz
,
hipHostMallocMapped
));
#elif defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
CUDA_SAFE_CALL
(
cudaHostAlloc
(
&
thm
,
sz
,
cudaHostAllocMapped
));
#else
thm
=
new
unsigned
char
[
sz
];
...
...
@@ -307,7 +331,9 @@ bool CudaMemory::resize(size_t sz)
}
//! copy from the old buffer to the new one
#if defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
#ifdef __HIP__
CUDA_SAFE_CALL
(
hipMemcpy
(
thm
,
hm
,
CudaMemory
::
size
(),
hipMemcpyHostToHost
));
#elif defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
CUDA_SAFE_CALL
(
cudaMemcpy
(
thm
,
hm
,
CudaMemory
::
size
(),
cudaMemcpyHostToHost
));
#else
memcpy
(
thm
,
hm
,
CudaMemory
::
size
());
...
...
@@ -356,7 +382,9 @@ void CudaMemory::deviceToHost()
allocate_host
(
sz
);
//! copy from device to host memory
#if defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
#ifdef __HIP__
CUDA_SAFE_CALL
(
hipMemcpy
(
hm
,
dm
,
sz
,
hipMemcpyDeviceToHost
));
#elif defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
CUDA_SAFE_CALL
(
cudaMemcpy
(
hm
,
dm
,
sz
,
cudaMemcpyDeviceToHost
));
#else
memcpy
(
hm
,
dm
,
sz
);
...
...
@@ -378,7 +406,9 @@ void CudaMemory::deviceToHost(CudaMemory & mem)
{
resize
(
mem
.
sz
);}
//! copy from device to host memory
#if defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
#ifdef __HIP__
CUDA_SAFE_CALL
(
hipMemcpy
(
mem
.
hm
,
dm
,
mem
.
sz
,
hipMemcpyDeviceToHost
));
#elif defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
CUDA_SAFE_CALL
(
cudaMemcpy
(
mem
.
hm
,
dm
,
mem
.
sz
,
cudaMemcpyDeviceToHost
));
#else
memcpy
(
mem
.
hm
,
dm
,
mem
.
sz
);
...
...
@@ -400,7 +430,9 @@ void CudaMemory::hostToDevice(CudaMemory & mem)
{
resize
(
mem
.
sz
);}
//! copy from device to host memory
#if defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
#ifdef __HIP__
CUDA_SAFE_CALL
(
hipMemcpy
(
dm
,
mem
.
hm
,
mem
.
sz
,
hipMemcpyHostToDevice
));
#elif defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
CUDA_SAFE_CALL
(
cudaMemcpy
(
dm
,
mem
.
hm
,
mem
.
sz
,
cudaMemcpyHostToDevice
));
#else
memcpy
(
dm
,
mem
.
hm
,
mem
.
sz
);
...
...
@@ -414,7 +446,9 @@ void CudaMemory::hostToDevice(size_t start, size_t stop)
allocate_host
(
sz
);
//! copy from device to host memory
#if defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
#ifdef __HIP__
CUDA_SAFE_CALL
(
hipMemcpy
(((
unsigned
char
*
)
dm
)
+
start
,((
unsigned
char
*
)
hm
)
+
start
,(
stop
-
start
),
hipMemcpyHostToDevice
));
#elif defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
CUDA_SAFE_CALL
(
cudaMemcpy
(((
unsigned
char
*
)
dm
)
+
start
,((
unsigned
char
*
)
hm
)
+
start
,(
stop
-
start
),
cudaMemcpyHostToDevice
));
#else
memcpy
(((
unsigned
char
*
)
dm
)
+
start
,((
unsigned
char
*
)
hm
)
+
start
,(
stop
-
start
));
...
...
@@ -433,7 +467,9 @@ void CudaMemory::deviceToHost(size_t start, size_t stop)
allocate_host
(
sz
);
//! copy from device to host memory
#if defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
#ifdef __HIP__
CUDA_SAFE_CALL
(
hipMemcpy
(((
unsigned
char
*
)
hm
)
+
start
,((
unsigned
char
*
)
dm
)
+
start
,(
stop
-
start
),
hipMemcpyDeviceToHost
));
#elif defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
CUDA_SAFE_CALL
(
cudaMemcpy
(((
unsigned
char
*
)
hm
)
+
start
,((
unsigned
char
*
)
dm
)
+
start
,(
stop
-
start
),
cudaMemcpyDeviceToHost
));
#else
memcpy
(((
unsigned
char
*
)
hm
)
+
start
,((
unsigned
char
*
)
dm
)
+
start
,(
stop
-
start
));
...
...
@@ -463,7 +499,9 @@ const void * CudaMemory::getPointer() const
*/
void
CudaMemory
::
fill
(
unsigned
char
c
)
{
#if defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
#ifdef __HIP__
CUDA_SAFE_CALL
(
hipMemset
(
dm
,
c
,
size
()));
#elif defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
CUDA_SAFE_CALL
(
cudaMemset
(
dm
,
c
,
size
()));
#else
memset
(
dm
,
c
,
size
());
...
...
@@ -495,7 +533,9 @@ void CudaMemory::hostToDevice()
allocate_host
(
sz
);
//! copy from device to host memory
#if defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
#ifdef __HIP__
CUDA_SAFE_CALL
(
hipMemcpy
(
dm
,
hm
,
sz
,
hipMemcpyHostToDevice
));
#elif defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
CUDA_SAFE_CALL
(
cudaMemcpy
(
dm
,
hm
,
sz
,
cudaMemcpyHostToDevice
));
#else
memcpy
(
dm
,
hm
,
sz
);
...
...
src/memory/CudaMemory.cuh
View file @
c4610f53
...
...
@@ -33,11 +33,7 @@
#include "memory.hpp"
#include <iostream>
#if defined(__NVCC__) && !defined(CUDA_ON_CPU)
#include <cuda_runtime.h>
#else
#include "util/cuda_util.hpp"
#endif
extern
size_t
TotCudaMemoryAllocated
;
...
...
src/util/cuda_launch.hpp
View file @
c4610f53
...
...
@@ -11,7 +11,12 @@
#include "config.h"
#include "cuda_kernel_error_checker.hpp"
#if defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
#if defined(CUDA_GPU) && !defined(CUDA_ON_CPU) && !defined(__HIP__)
constexpr
int
default_kernel_wg_threads_
=
1024
;
#include "cub/util_type.cuh"
#include "cub/block/block_scan.cuh"
#if defined(SE_CLASS1) || defined(CUDA_CHECK_LAUNCH)
...
...
src/util/cuda_util.hpp
View file @
c4610f53
...
...
@@ -9,7 +9,16 @@
#define OPENFPM_DATA_SRC_UTIL_CUDA_UTIL_HPP_
#include "config.h"
#if defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
#if defined(__HIP__)
// If hip fill NVCC think it is Nvidia compiler
#ifdef __NVCC__
#undef __NVCC__
#include <hip/hip_runtime.h>
#define __NVCC__
#else
#include <hip/hip_runtime.h>
#endif
#elif defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
#include <cuda_runtime.h>
#endif
...
...
@@ -33,9 +42,6 @@
#endif
#ifdef CUDA_ON_CPU
#define CUDA_SAFE(cuda_call) \
cuda_call;
#ifdef __shared__
#undef __shared__
...
...
@@ -44,17 +50,6 @@
#else
#define CUDA_SAFE(cuda_call) \
cuda_call; \
{\
cudaError_t e = cudaPeekAtLastError();\
if (e != cudaSuccess)\
{\
std::string error = cudaGetErrorString(e);\
std::cout << "Cuda Error in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
}\
}
#ifndef __shared__
#define __shared__
#endif
...
...
src/util/cudify/cudify.hpp
View file @
c4610f53
...
...
@@ -3,6 +3,8 @@
#ifdef CUDIFY_USE_ALPAKA
#include "cudify_alpaka.hpp"
#elif defined(CUDIFY_USE_HIP)
#include "cudify_hip.hpp"
#else
#include "cudify_sequencial.hpp"
#endif
...
...
src/util/cudify/cudify_hip.hpp
0 → 100644
View file @
c4610f53
#ifndef CUDIFY_HIP_HPP_
#define CUDIFY_HIP_HPP_
#include "config.h"
#ifdef HIP_GPU
#include "cudify_hardware_common.hpp"
#define CUDIFY_ACTIVE
#ifdef __NVCC__
#undef __NVCC__
#include <hip/hip_runtime.h>
#define __NVCC__
#else
#include <hip/hip_runtime.h>
#endif
#include "util/cuda_util.hpp"
#include <vector>
#include <string.h>
#include "hipcub/hipcub.hpp"
#include "hipcub/block/block_scan.hpp"
constexpr
int
default_kernel_wg_threads_
=
256
;
typedef
hipError_t
cudaError_t
;
typedef
hipStream_t
cudaStream_t
;
typedef
hipDeviceProp_t
cudaDeviceProp_t
;
typedef
cudaDeviceProp_t
cudaDeviceProp
;
typedef
hipEvent_t
cudaEvent_t
;
typedef
hipFuncAttributes
cudaFuncAttributes
;
#define cudaSuccess hipSuccess
namespace
cub
{
template
<
typename
T
,
unsigned
int
bd
>
using
BlockScan
=
hipcub
::
BlockScan
<
T
,
bd
>
;
}
static
void
init_wrappers
()
{}
/**
* CUDA memory copy types
*/
enum
cudaMemcpyKind
{
cudaMemcpyHostToHost
=
0
,
/**< Host -> Host */
cudaMemcpyHostToDevice
=
1
,
/**< Host -> Device */
cudaMemcpyDeviceToHost
=
2
,
/**< Device -> Host */
cudaMemcpyDeviceToDevice
=
3
,
/**< Device -> Device */
cudaMemcpyDefault
=
4
/**< Direction of the transfer is inferred from the pointer values. Requires unified virtual addressing */
};
static
cudaError_t
cudaMemcpyToSymbol
(
unsigned
char
*
global_cuda_error_array
,
const
void
*
mem
,
size_t
sz
,
int
offset
,
cudaMemcpyKind
opt
)
{
hipMemcpyKind
opt_
;
switch
(
opt
)
{
case
cudaMemcpyHostToHost
:
opt_
=
hipMemcpyHostToHost
;
break
;
case
cudaMemcpyHostToDevice
:
opt_
=
hipMemcpyHostToDevice
;
break
;
case
cudaMemcpyDeviceToHost
:
opt_
=
hipMemcpyDeviceToHost
;
break
;
case
cudaMemcpyDeviceToDevice
:
opt_
=
hipMemcpyDeviceToDevice
;
break
;
default:
opt_
=
hipMemcpyDefault
;
break
;
}
return
hipMemcpyToSymbol
(
global_cuda_error_array
,
mem
,
sz
,
offset
,
opt_
);
}
static
cudaError_t
cudaDeviceSynchronize
()
{
return
hipDeviceSynchronize
();
}
static
cudaError_t
cudaMemcpyFromSymbol
(
void
*
dev_mem
,
const
unsigned
char
*
global_cuda_error_array
,
size_t
sz
)
{
return
hipMemcpyFromSymbol
(
dev_mem
,
global_cuda_error_array
,
sz
);
}
static
const
char
*
cudaGetErrorString
(
cudaError_t
error
)
{
return
hipGetErrorString
(
error
);
}
static
cudaError_t
cudaGetDevice
(
int
*
device
)
{
return
hipGetDevice
(
device
);
}
static
cudaError_t
cudaSetDevice
(
int
device
)
{
return
hipSetDevice
(
device
);
}
static
cudaError_t
cudaMemGetInfo
(
size_t
*
free
,
size_t
*
total
)
{
return
hipMemGetInfo
(
free
,
total
);
}
static
cudaError_t
cudaFuncGetAttributes
(
cudaFuncAttributes
*
attr
,
const
void
*
func
)
{
return
hipFuncGetAttributes
(
attr
,
func
);
}
static
cudaError_t
cudaGetDeviceProperties
(
cudaDeviceProp
*
prop
,
int
device
)
{
return
hipGetDeviceProperties
(
prop
,
device
);
}
static
cudaError_t
cudaEventCreate
(
cudaEvent_t
*
event
)
{
return
hipEventCreate
(
event
);
}
static
cudaError_t
cudaEventDestroy
(
cudaEvent_t
event
)
{
return
hipEventDestroy
(
event
);
}