Commit 0073fde6 authored by incardon's avatar incardon
Browse files

HIP conversion

parent 9ea5dfcc
......@@ -2,8 +2,10 @@ cmake_minimum_required(VERSION 3.8 FATAL_ERROR)
project(openfpm_devices LANGUAGES C CXX)
set(BOOST_INCLUDE ${Boost_INCLUDE_DIR} CACHE PATH "Include directory for BOOST")
set(HIP_ENABLE CACHE BOOL "Enable HIP compiler")
set(AMD_ARCH_COMPILE "gfx900" CACHE STRING "AMD gpu architecture used to compile kernels")
list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_DIR}/cmake_modules/)
list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_DIR}/cmake_modules/ /opt/rocm/hip/cmake)
set (CMAKE_CXX_STANDARD 14)
set (CMAKE_CUDA_STANDARD 14)
......@@ -15,35 +17,49 @@ set(CUDA_ON_CPU CACHE BOOL "Make Cuda work on heap")
# Save Boost_LIBRARIES before alpaka fuck-up Boost
set(Boost_LIBRARIES_BCK ${Boost_LIBRARIES})
if(ENABLE_GPU)
enable_language(CUDA)
find_package(CUDA)
if (CUDA_VERSION_MAJOR EQUAL 9 AND CUDA_VERSION_MINOR EQUAL 2)
message("CUDA is compatible 9.2")
set(WARNING_SUPPRESSION_AND_OPTION_NVCC -Xcudafe "--display_error_number --diag_suppress=611 --diag_suppress=2885 --diag_suppress=2886 --diag_suppress=2887 --diag_suppress=2888 --diag_suppress=186 --diag_suppress=111" --expt-extended-lambda PARENT_SCOPE)
set(WARNING_SUPPRESSION_AND_OPTION_NVCC_TEXT "-Xcudafe \"--display_error_number --diag_suppress=611 --diag_suppress=2885 --diag_suppress=2886 --diag_suppress=2887 --diag_suppress=2888 --diag_suppress=186 --diag_suppress=111\" --expt-extended-lambda " PARENT_SCOPE)
elseif ( CUDA_VERSION_MAJOR EQUAL 10 AND CUDA_VERSION_MINOR EQUAL 1 )
message("CUDA is compatible 10.1")
set(WARNING_SUPPRESSION_AND_OPTION_NVCC -Xcudafe "--display_error_number --diag_suppress=2931 --diag_suppress=2930 --diag_suppress=2929 --diag_suppress=2928 --diag_suppress=2915 --diag_suppress=2912 --diag_suppress=2913 --diag_suppress=111 --diag_suppress=186 --diag_suppress=611 --diag_suppress=128" --expt-extended-lambda PARENT_SCOPE)
set(WARNING_SUPPRESSION_AND_OPTION_NVCC_TEXT "-Xcudafe \"--display_error_number --display_error_number --diag_suppress=2931 --diag_suppress=2930 --diag_suppress=2929 --diag_suppress=2928 --diag_suppress=2915 --diag_suppress=2912 --diag_suppress=2913 --diag_suppress=111 --diag_suppress=186 --diag_suppress=611 --diag_suppress=128 \" --expt-extended-lambda" PARENT_SCOPE)
elseif ( CUDA_VERSION_MAJOR EQUAL 10 AND CUDA_VERSION_MINOR EQUAL 2 )
message("CUDA is compatible 10.2")
set(WARNING_SUPPRESSION_AND_OPTION_NVCC -Xcudafe "--display_error_number --diag_suppress=2976 --diag_suppress=2977 --diag_suppress=2978 --diag_suppress=2979 --diag_suppress=1835 --diag_suppress=611 --diag_suppress=186 --diag_suppress=128" --expt-extended-lambda PARENT_SCOPE)
set(WARNING_SUPPRESSION_AND_OPTION_NVCC_TEXT "-Xcudafe \"--display_error_number --diag_suppress=2976 --diag_suppress=2977 --diag_suppress=2978 --diag_suppress=2979 --diag_suppress=1835 --diag_suppress=611 --diag_suppress=186 --diag_suppress=128\" --expt-extended-lambda" PARENT_SCOPE)
elseif ( CUDA_VERSION_MAJOR EQUAL 11 AND CUDA_VERSION_MINOR EQUAL 0 )
message("CUDA is compatible 11.0")
set(WARNING_SUPPRESSION_AND_OPTION_NVCC -Xcudafe "--display_error_number --diag_suppress=3056 --diag_suppress=3057 --diag_suppress=3058 --diag_suppress=3059 --diag_suppress=611 --diag_suppress=186 --diag_suppress=128" --expt-extended-lambda PARENT_SCOPE)
set(WARNING_SUPPRESSION_AND_OPTION_NVCC_TEXT "-Xcudafe \"--display_error_number --diag_suppress=3056 --diag_suppress=3057 --diag_suppress=3058 --diag_suppress=3059 --diag_suppress=611 --diag_suppress=186 --diag_suppress=128\" --expt-extended-lambda" PARENT_SCOPE)
elseif ( CUDA_VERSION_MAJOR EQUAL 11 AND CUDA_VERSION_MINOR EQUAL 1 )
message("CUDA is compatible 11.1")
set(WARNING_SUPPRESSION_AND_OPTION_NVCC -Xcudafe "--display_error_number --diag_suppress=3124 --diag_suppress=3126 --diag_suppress=3125 --diag_suppress=3123 --diag_suppress=611 --diag_suppress=186 --diag_suppress=128" --expt-extended-lambda PARENT_SCOPE)
set(WARNING_SUPPRESSION_AND_OPTION_NVCC_TEXT "-Xcudafe \"--display_error_number --diag_suppress=3124 --diag_suppress=3126 --diag_suppress=3125 --diag_suppress=3123 --diag_suppress=611 --diag_suppress=186 --diag_suppress=128\" --expt-extended-lambda" PARENT_SCOPE)
elseif ( CUDA_VERSION_MAJOR EQUAL 11 AND CUDA_VERSION_MINOR EQUAL 2 )
message("CUDA is compatible 11.2")
set(WARNING_SUPPRESSION_AND_OPTION_NVCC -Xcudafe "--display_error_number --diag_suppress=20014 --diag_suppress=20013 --diag_suppress=20012 --diag_suppress=20011 --diag_suppress=611 --diag_suppress=550 --diag_suppress=186 --diag_suppress=128" --expt-extended-lambda PARENT_SCOPE)
set(WARNING_SUPPRESSION_AND_OPTION_NVCC_TEXT "-Xcudafe \"--display_error_number --diag_suppress=20014 --diag_suppress=20013 --diag_suppress=20012 --diag_suppress=20011 --diag_suppress=611 --diag_suppress=550 --diag_suppress=186 --diag_suppress=128\" --expt-extended-lambda" PARENT_SCOPE)
if (HIP_ENABLE AND NOT HIP_FOUND)
find_package(HIP)
if(NOT HIP_FOUND)
message( FATAL_ERROR "HIP has not been found" )
else()
message(FATAL_ERROR "CUDA is incompatible, version 9.2 10.1 10.2 11.1 and 11.2 is only supported")
message("HIP has been found")
endif()
if (HIP_ENABLE)
set(CMAKE_CXX_EXTENSIONS OFF)
endif()
endif()
if(ENABLE_GPU)
if (NOT HIP_ENABLE)
enable_language(CUDA)
find_package(CUDA)
if (CUDA_VERSION_MAJOR EQUAL 9 AND CUDA_VERSION_MINOR EQUAL 2)
message("CUDA is compatible 9.2")
set(WARNING_SUPPRESSION_AND_OPTION_NVCC -Xcudafe "--display_error_number --diag_suppress=611 --diag_suppress=2885 --diag_suppress=2886 --diag_suppress=2887 --diag_suppress=2888 --diag_suppress=186 --diag_suppress=111" --expt-extended-lambda PARENT_SCOPE)
set(WARNING_SUPPRESSION_AND_OPTION_NVCC_TEXT "-Xcudafe \"--display_error_number --diag_suppress=611 --diag_suppress=2885 --diag_suppress=2886 --diag_suppress=2887 --diag_suppress=2888 --diag_suppress=186 --diag_suppress=111\" --expt-extended-lambda " PARENT_SCOPE)
elseif ( CUDA_VERSION_MAJOR EQUAL 10 AND CUDA_VERSION_MINOR EQUAL 1 )
message("CUDA is compatible 10.1")
set(WARNING_SUPPRESSION_AND_OPTION_NVCC -Xcudafe "--display_error_number --diag_suppress=2931 --diag_suppress=2930 --diag_suppress=2929 --diag_suppress=2928 --diag_suppress=2915 --diag_suppress=2912 --diag_suppress=2913 --diag_suppress=111 --diag_suppress=186 --diag_suppress=611 --diag_suppress=128" --expt-extended-lambda PARENT_SCOPE)
set(WARNING_SUPPRESSION_AND_OPTION_NVCC_TEXT "-Xcudafe \"--display_error_number --display_error_number --diag_suppress=2931 --diag_suppress=2930 --diag_suppress=2929 --diag_suppress=2928 --diag_suppress=2915 --diag_suppress=2912 --diag_suppress=2913 --diag_suppress=111 --diag_suppress=186 --diag_suppress=611 --diag_suppress=128 \" --expt-extended-lambda" PARENT_SCOPE)
elseif ( CUDA_VERSION_MAJOR EQUAL 10 AND CUDA_VERSION_MINOR EQUAL 2 )
message("CUDA is compatible 10.2")
set(WARNING_SUPPRESSION_AND_OPTION_NVCC -Xcudafe "--display_error_number --diag_suppress=2976 --diag_suppress=2977 --diag_suppress=2978 --diag_suppress=2979 --diag_suppress=1835 --diag_suppress=611 --diag_suppress=186 --diag_suppress=128" --expt-extended-lambda PARENT_SCOPE)
set(WARNING_SUPPRESSION_AND_OPTION_NVCC_TEXT "-Xcudafe \"--display_error_number --diag_suppress=2976 --diag_suppress=2977 --diag_suppress=2978 --diag_suppress=2979 --diag_suppress=1835 --diag_suppress=611 --diag_suppress=186 --diag_suppress=128\" --expt-extended-lambda" PARENT_SCOPE)
elseif ( CUDA_VERSION_MAJOR EQUAL 11 AND CUDA_VERSION_MINOR EQUAL 0 )
message("CUDA is compatible 11.0")
set(WARNING_SUPPRESSION_AND_OPTION_NVCC -Xcudafe "--display_error_number --diag_suppress=3056 --diag_suppress=3057 --diag_suppress=3058 --diag_suppress=3059 --diag_suppress=611 --diag_suppress=186 --diag_suppress=128" --expt-extended-lambda PARENT_SCOPE)
set(WARNING_SUPPRESSION_AND_OPTION_NVCC_TEXT "-Xcudafe \"--display_error_number --diag_suppress=3056 --diag_suppress=3057 --diag_suppress=3058 --diag_suppress=3059 --diag_suppress=611 --diag_suppress=186 --diag_suppress=128\" --expt-extended-lambda" PARENT_SCOPE)
elseif ( CUDA_VERSION_MAJOR EQUAL 11 AND CUDA_VERSION_MINOR EQUAL 1 )
message("CUDA is compatible 11.1")
set(WARNING_SUPPRESSION_AND_OPTION_NVCC -Xcudafe "--display_error_number --diag_suppress=3124 --diag_suppress=3126 --diag_suppress=3125 --diag_suppress=3123 --diag_suppress=611 --diag_suppress=186 --diag_suppress=128" --expt-extended-lambda PARENT_SCOPE)
set(WARNING_SUPPRESSION_AND_OPTION_NVCC_TEXT "-Xcudafe \"--display_error_number --diag_suppress=3124 --diag_suppress=3126 --diag_suppress=3125 --diag_suppress=3123 --diag_suppress=611 --diag_suppress=186 --diag_suppress=128\" --expt-extended-lambda" PARENT_SCOPE)
elseif ( CUDA_VERSION_MAJOR EQUAL 11 AND CUDA_VERSION_MINOR EQUAL 2 )
message("CUDA is compatible 11.2")
set(WARNING_SUPPRESSION_AND_OPTION_NVCC -Xcudafe "--display_error_number --diag_suppress=20014 --diag_suppress=20013 --diag_suppress=20012 --diag_suppress=20011 --diag_suppress=611 --diag_suppress=550 --diag_suppress=186 --diag_suppress=128" --expt-extended-lambda PARENT_SCOPE)
set(WARNING_SUPPRESSION_AND_OPTION_NVCC_TEXT "-Xcudafe \"--display_error_number --diag_suppress=20014 --diag_suppress=20013 --diag_suppress=20012 --diag_suppress=20011 --diag_suppress=611 --diag_suppress=550 --diag_suppress=186 --diag_suppress=128\" --expt-extended-lambda" PARENT_SCOPE)
else()
message(FATAL_ERROR "CUDA is incompatible, version 9.2 10.1 10.2 11.1 and 11.2 is only supported")
endif()
endif()
else()
find_package(alpaka)
......@@ -60,6 +76,11 @@ if(CUDA_ON_CPU)
set(DEFINE_CUDA_GPU "#define CUDA_GPU")
endif()
if(HIP_FOUND)
set(DEFINE_HIP_GPU "#define HIP_GPU")
set(DEFINE_CUDIFY_USE_HIP "#define CUDIFY_USE_HIP")
endif()
if (BOOST_FOUND)
set(DEFINE_HAVE_BOOST "#define HAVE_BOOST")
set(DEFINE_HAVE_BOOST_IOSTREAMS "#define HAVE_BOOST_IOSTREAMS")
......
if(CUDA_FOUND OR CUDA_ON_CPU)
if(CUDA_FOUND OR CUDA_ON_CPU OR HIP_FOUND)
set(CUDA_SOURCES memory/CudaMemory.cu)
else()
set(CUDA_SOURCES )
......@@ -13,10 +13,37 @@ if (CUDA_ON_CPU)
endif()
endif()
add_executable(mem main.cpp memory/HeapMemory.cpp util/cudify/cudify_vars.cpp util/cudify/cudify_unit_test.cu memory/mem_conf.cpp ${CUDA_SOURCES})
if ( HIP_ENABLE AND HIP_FOUND )
add_library(ofpmmemory STATIC memory/HeapMemory.cpp util/cudify/cudify_vars.cpp memory/PtrMemory.cpp memory/mem_conf.cpp ${CUDA_SOURCES})
add_library(ofpmmemory_dl SHARED memory/HeapMemory.cpp util/cudify/cudify_vars.cpp memory/PtrMemory.cpp memory/mem_conf.cpp ${CUDA_SOURCES})
set_source_files_properties(${CUDA_SOURCES} PROPERTIES LANGUAGE CXX)
hip_add_library(ofpmmemory STATIC memory/HeapMemory.cpp util/cudify/cudify_vars.cpp memory/PtrMemory.cpp memory/mem_conf.cpp ${CUDA_SOURCES})
hip_add_library(ofpmmemory_dl SHARED memory/HeapMemory.cpp util/cudify/cudify_vars.cpp memory/PtrMemory.cpp memory/mem_conf.cpp ${CUDA_SOURCES})
set(CMAKE_CXX_COMPILER ${HIP_HIPCC_EXECUTABLE})
hip_add_executable(mem main.cpp memory/HeapMemory.cpp util/cudify/cudify_vars.cpp util/cudify/cudify_unit_test.cu memory/mem_conf.cpp ${CUDA_SOURCES})
set(CMAKE_SHARED_LIBRARY_CXX_FLAGS " ")
set_property(TARGET ofpmmemory PROPERTY POSITION_INDEPENDENT_CODE ON)
set_property(TARGET ofpmmemory_dl PROPERTY POSITION_INDEPENDENT_CODE ON)
set_property(TARGET ofpmmemory PROPERTY CMAKE_CXX_FLAGS "-Xcompiler -fPIC")
set_property(TARGET ofpmmemory_dl PROPERTY CMAKE_CXX_FLAGS "-Xcompiler -fPIC")
set_property(TARGET ofpmmemory PROPERTY NO_SONAME ON)
set_property(TARGET ofpmmemory_dl PROPERTY NO_SONAME ON)
else()
add_executable(mem main.cpp memory/HeapMemory.cpp util/cudify/cudify_vars.cpp util/cudify/cudify_unit_test.cu memory/mem_conf.cpp ${CUDA_SOURCES})
add_library(ofpmmemory STATIC memory/HeapMemory.cpp util/cudify/cudify_vars.cpp memory/PtrMemory.cpp memory/mem_conf.cpp ${CUDA_SOURCES})
add_library(ofpmmemory_dl SHARED memory/HeapMemory.cpp util/cudify/cudify_vars.cpp memory/PtrMemory.cpp memory/mem_conf.cpp ${CUDA_SOURCES})
endif()
if (HIP_FOUND)
SET(CMAKE_EXE_LINKER_FLAGS "--amdgpu-target=${AMD_ARCH_COMPILE}")
SET(CMAKE_SHARED_LINKER_FLAGS "--amdgpu-target=${AMD_ARCH_COMPILE}")
endif()
if (CUDA_FOUND AND NOT CUDA_ON_CPU)
add_library(ofpmmemory_cuda_on_cpu STATIC memory/HeapMemory.cpp util/cudify/cudify_vars.cpp memory/PtrMemory.cpp memory/mem_conf.cpp ${CUDA_SOURCES})
......
......@@ -4,6 +4,12 @@ ${DEFINE_COVERTY_SCAN}
/* GPU support */
${DEFINE_CUDA_GPU}
/* HIP GPU support */
${DEFINE_HIP_GPU}
/* HIP Cudify GPU support */
${DEFINE_CUDIFY_USE_HIP}
/* Debug */
${DEFINE_DEBUG} /**/
......
......@@ -7,6 +7,17 @@
#include <iostream>
#if defined(__HIP__)
#define CUDA_SAFE_CALL(call) {\
hipError_t err = call;\
if (hipSuccess != err) {\
std::cerr << "HIP error in file "<< __FILE__ << " in line " << __LINE__ << ": " << hipGetErrorString(err);\
}\
}
#elif defined(CUDA_GPU)
#define CUDA_SAFE_CALL(call) {\
cudaError_t err = call;\
if (cudaSuccess != err) {\
......@@ -14,4 +25,6 @@
}\
}
#endif
......@@ -17,7 +17,9 @@ bool CudaMemory::flush()
{
//! copy from host to device memory
#if defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
#ifdef __HIP__
CUDA_SAFE_CALL(hipMemcpy(dm,hm,sz,hipMemcpyHostToDevice));
#elif defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
CUDA_SAFE_CALL(cudaMemcpy(dm,hm,sz,cudaMemcpyHostToDevice));
#else
memcpy(dm,hm,sz);
......@@ -39,8 +41,10 @@ bool CudaMemory::allocate(size_t sz)
//! Allocate the device memory
if (dm == NULL)
{
#if defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
CUDA_SAFE_CALL(cudaMalloc(&dm,sz));
#ifdef __HIP__
CUDA_SAFE_CALL(hipMalloc(&dm,sz));
#elif defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
CUDA_SAFE_CALL(cudaMalloc(&dm,sz));
#else
if (sz != 0)
{
......@@ -79,7 +83,9 @@ void CudaMemory::destroy()
if (dm != NULL)
{
//! Release the allocated memory
#if defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
#ifdef __HIP__
CUDA_SAFE_CALL(hipFree(dm));
#elif defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
CUDA_SAFE_CALL(cudaFree(dm));
#else
delete [] (unsigned char *)dm;
......@@ -90,7 +96,9 @@ void CudaMemory::destroy()
if (hm != NULL)
{
//! we invalidate hm
#if defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
#ifdef __HIP__
CUDA_SAFE_CALL(hipHostFree(hm));
#elif defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
CUDA_SAFE_CALL(cudaFreeHost(hm));
#else
delete [] (unsigned char *)hm;
......@@ -111,7 +119,9 @@ void CudaMemory::destroy()
*/
void CudaMemory::deviceToDevice(void * ptr, size_t start, size_t stop, size_t offset)
{
#if defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
#ifdef __HIP__
CUDA_SAFE_CALL(hipMemcpy(((unsigned char *)dm)+offset,((unsigned char *)ptr)+start,(stop-start),hipMemcpyDeviceToDevice));
#elif defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
CUDA_SAFE_CALL(cudaMemcpy(((unsigned char *)dm)+offset,((unsigned char *)ptr)+start,(stop-start),cudaMemcpyDeviceToDevice));
#else
memcpy(((unsigned char *)dm)+offset,((unsigned char *)ptr)+start,(stop-start));
......@@ -127,7 +137,9 @@ void CudaMemory::allocate_host(size_t sz) const
{
if (hm == NULL)
{
#if defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
#ifdef __HIP__
CUDA_SAFE_CALL(hipHostMalloc(&hm,sz,hipHostMallocMapped))
#elif defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
CUDA_SAFE_CALL(cudaHostAlloc(&hm,sz,cudaHostAllocMapped))
#else
hm = new unsigned char[sz];
......@@ -154,11 +166,13 @@ bool CudaMemory::copyFromPointer(const void * ptr)
// get the device pointer
void * dvp;
#if defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
#ifdef __HIP__
CUDA_SAFE_CALL(hipHostGetDevicePointer(&dvp,hm,0));
// memory copy
memcpy(dvp,ptr,sz);
#elif defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
CUDA_SAFE_CALL(cudaHostGetDevicePointer(&dvp,hm,0));
// memory copy
memcpy(dvp,ptr,sz);
#else
memcpy(hm,ptr,sz);
......@@ -186,7 +200,9 @@ bool CudaMemory::copyDeviceToDevice(const CudaMemory & m)
}
//! Copy the memory
#if defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
#ifdef __HIP__
CUDA_SAFE_CALL(hipMemcpy(dm,m.dm,m.sz,hipMemcpyDeviceToDevice));
#elif defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
CUDA_SAFE_CALL(cudaMemcpy(dm,m.dm,m.sz,cudaMemcpyDeviceToDevice));
#else
memcpy(dm,m.dm,m.sz);
......@@ -268,7 +284,9 @@ bool CudaMemory::resize(size_t sz)
{
if (this->sz < sz)
{
#if defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
#ifdef __HIP__
CUDA_SAFE_CALL(hipMalloc(&tdm,sz));
#elif defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
CUDA_SAFE_CALL(cudaMalloc(&tdm,sz));
#else
tdm = new unsigned char [sz];
......@@ -278,14 +296,18 @@ bool CudaMemory::resize(size_t sz)
#endif
#ifdef GARBAGE_INJECTOR
#if defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
#ifdef __HIP__
CUDA_SAFE_CALL(hipMemset(tdm,-1,sz));
#elif defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
CUDA_SAFE_CALL(cudaMemset(tdm,-1,sz));
#endif
#endif
}
//! copy from the old buffer to the new one
#if defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
#ifdef __HIP__
CUDA_SAFE_CALL(hipMemcpy(tdm,dm,CudaMemory::size(),hipMemcpyDeviceToDevice));
#elif defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
CUDA_SAFE_CALL(cudaMemcpy(tdm,dm,CudaMemory::size(),cudaMemcpyDeviceToDevice));
#else
memcpy(tdm,dm,CudaMemory::size());
......@@ -296,7 +318,9 @@ bool CudaMemory::resize(size_t sz)
{
if (this->sz < sz)
{
#if defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
#ifdef __HIP__
CUDA_SAFE_CALL(hipHostMalloc(&thm,sz,hipHostMallocMapped));
#elif defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
CUDA_SAFE_CALL(cudaHostAlloc(&thm,sz,cudaHostAllocMapped));
#else
thm = new unsigned char [sz];
......@@ -307,7 +331,9 @@ bool CudaMemory::resize(size_t sz)
}
//! copy from the old buffer to the new one
#if defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
#ifdef __HIP__
CUDA_SAFE_CALL(hipMemcpy(thm,hm,CudaMemory::size(),hipMemcpyHostToHost));
#elif defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
CUDA_SAFE_CALL(cudaMemcpy(thm,hm,CudaMemory::size(),cudaMemcpyHostToHost));
#else
memcpy(thm,hm,CudaMemory::size());
......@@ -356,7 +382,9 @@ void CudaMemory::deviceToHost()
allocate_host(sz);
//! copy from device to host memory
#if defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
#ifdef __HIP__
CUDA_SAFE_CALL(hipMemcpy(hm,dm,sz,hipMemcpyDeviceToHost));
#elif defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
CUDA_SAFE_CALL(cudaMemcpy(hm,dm,sz,cudaMemcpyDeviceToHost));
#else
memcpy(hm,dm,sz);
......@@ -378,7 +406,9 @@ void CudaMemory::deviceToHost(CudaMemory & mem)
{resize(mem.sz);}
//! copy from device to host memory
#if defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
#ifdef __HIP__
CUDA_SAFE_CALL(hipMemcpy(mem.hm,dm,mem.sz,hipMemcpyDeviceToHost));
#elif defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
CUDA_SAFE_CALL(cudaMemcpy(mem.hm,dm,mem.sz,cudaMemcpyDeviceToHost));
#else
memcpy(mem.hm,dm,mem.sz);
......@@ -400,7 +430,9 @@ void CudaMemory::hostToDevice(CudaMemory & mem)
{resize(mem.sz);}
//! copy from device to host memory
#if defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
#ifdef __HIP__
CUDA_SAFE_CALL(hipMemcpy(dm,mem.hm,mem.sz,hipMemcpyHostToDevice));
#elif defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
CUDA_SAFE_CALL(cudaMemcpy(dm,mem.hm,mem.sz,cudaMemcpyHostToDevice));
#else
memcpy(dm,mem.hm,mem.sz);
......@@ -414,7 +446,9 @@ void CudaMemory::hostToDevice(size_t start, size_t stop)
allocate_host(sz);
//! copy from device to host memory
#if defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
#ifdef __HIP__
CUDA_SAFE_CALL(hipMemcpy(((unsigned char *)dm)+start,((unsigned char *)hm)+start,(stop-start),hipMemcpyHostToDevice));
#elif defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
CUDA_SAFE_CALL(cudaMemcpy(((unsigned char *)dm)+start,((unsigned char *)hm)+start,(stop-start),cudaMemcpyHostToDevice));
#else
memcpy(((unsigned char *)dm)+start,((unsigned char *)hm)+start,(stop-start));
......@@ -433,7 +467,9 @@ void CudaMemory::deviceToHost(size_t start, size_t stop)
allocate_host(sz);
//! copy from device to host memory
#if defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
#ifdef __HIP__
CUDA_SAFE_CALL(hipMemcpy(((unsigned char *)hm)+start,((unsigned char *)dm)+start,(stop-start),hipMemcpyDeviceToHost));
#elif defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
CUDA_SAFE_CALL(cudaMemcpy(((unsigned char *)hm)+start,((unsigned char *)dm)+start,(stop-start),cudaMemcpyDeviceToHost));
#else
memcpy(((unsigned char *)hm)+start,((unsigned char *)dm)+start,(stop-start));
......@@ -463,7 +499,9 @@ const void * CudaMemory::getPointer() const
*/
void CudaMemory::fill(unsigned char c)
{
#if defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
#ifdef __HIP__
CUDA_SAFE_CALL(hipMemset(dm,c,size()));
#elif defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
CUDA_SAFE_CALL(cudaMemset(dm,c,size()));
#else
memset(dm,c,size());
......@@ -495,7 +533,9 @@ void CudaMemory::hostToDevice()
allocate_host(sz);
//! copy from device to host memory
#if defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
#ifdef __HIP__
CUDA_SAFE_CALL(hipMemcpy(dm,hm,sz,hipMemcpyHostToDevice));
#elif defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
CUDA_SAFE_CALL(cudaMemcpy(dm,hm,sz,cudaMemcpyHostToDevice));
#else
memcpy(dm,hm,sz);
......
......@@ -33,11 +33,7 @@
#include "memory.hpp"
#include <iostream>
#if defined(__NVCC__) && !defined(CUDA_ON_CPU)
#include <cuda_runtime.h>
#else
#include "util/cuda_util.hpp"
#endif
extern size_t TotCudaMemoryAllocated;
......
......@@ -11,7 +11,12 @@
#include "config.h"
#include "cuda_kernel_error_checker.hpp"
#if defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
#if defined(CUDA_GPU) && !defined(CUDA_ON_CPU) && !defined(__HIP__)
constexpr int default_kernel_wg_threads_ = 1024;
#include "cub/util_type.cuh"
#include "cub/block/block_scan.cuh"
#if defined(SE_CLASS1) || defined(CUDA_CHECK_LAUNCH)
......
......@@ -9,7 +9,16 @@
#define OPENFPM_DATA_SRC_UTIL_CUDA_UTIL_HPP_
#include "config.h"
#if defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
#if defined(__HIP__)
// If hip fill NVCC think it is Nvidia compiler
#ifdef __NVCC__
#undef __NVCC__
#include <hip/hip_runtime.h>
#define __NVCC__
#else
#include <hip/hip_runtime.h>
#endif
#elif defined(CUDA_GPU) && !defined(CUDA_ON_CPU)
#include <cuda_runtime.h>
#endif
......@@ -33,9 +42,6 @@
#endif
#ifdef CUDA_ON_CPU
#define CUDA_SAFE(cuda_call) \
cuda_call;
#ifdef __shared__
#undef __shared__
......@@ -44,17 +50,6 @@
#else
#define CUDA_SAFE(cuda_call) \
cuda_call; \
{\
cudaError_t e = cudaPeekAtLastError();\
if (e != cudaSuccess)\
{\
std::string error = cudaGetErrorString(e);\
std::cout << "Cuda Error in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
}\
}
#ifndef __shared__
#define __shared__
#endif
......
......@@ -3,6 +3,8 @@
#ifdef CUDIFY_USE_ALPAKA
#include "cudify_alpaka.hpp"
#elif defined(CUDIFY_USE_HIP)
#include "cudify_hip.hpp"
#else
#include "cudify_sequencial.hpp"
#endif
......
......@@ -5,7 +5,7 @@
#include "util/cuda_launch.hpp"
#include "memory/CudaMemory.cuh"
#if defined(CUDA_ON_CPU) && defined(CUDIFY_ACTIVE)
#if defined(CUDIFY_ACTIVE)
BOOST_AUTO_TEST_SUITE( cudify_tests )
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment