Fixing HIP

4bd92bbb · Your Name · 79b417d5 · 4bd92bbb · 4bd92bbb · 4bd92bbb
Commit 4bd92bbb authored 3 years ago by Your Name
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -12,7 +12,9 @@ set (CMAKE_CUDA_STANDARD 14)
 set (ENV{BOOST_ROOT} ${BOOST_ROOT})
 set (Boost_NO_BOOST_CMAKE OFF)
 find_package(Boost 1.72.0 REQUIRED COMPONENTS unit_test_framework iostreams program_options OPTIONAL_COMPONENTS fiber context)
-find_package(OpenMP)
+if (NOT CUDA_ON_BACKEND STREQUAL "HIP")
+	find_package(OpenMP)
+endif()

 set(CUDA_ON_BACKEND CACHE STRING "Activate several backend for CUDA")


--- a/src/util/cudify/cudify_cuda.hpp
+++ b/src/util/cudify/cudify_cuda.hpp
@@ -20,6 +20,13 @@ __global__ void kernel_launch_lambda(lambda_f f)
    f(bid,tid);
 }

+template<typename lambda_f>
+__global__ void kernel_launch_lambda_tls(lambda_f f)
+{
+    f();
+}
+
+
 #endif

 static void init_wrappers()
@@ -218,6 +225,28 @@ static void init_wrappers()

 #define CUDA_CHECK()

+#define CUDA_LAUNCH_LAMBDA_TLS(ite, lambda_f, ...) \
+        {\
+        if (ite.wthr.x != 0)\
+        {kernel_launch_lambda<<<ite.wthr,ite.thr>>>(lambda_f);}\
+        }
+
+#define CUDA_LAUNCH_LAMBDA_DIM3(wthr_,thr_, lambda_f, ...) \
+        {\
+        dim3 wthr__(wthr_);\
+        dim3 thr__(thr_);\
+        if (ite.wthr.x != 0)\
+        {kernel_launch_lambda<<<wthr__,thr__>>>(lambda_f);}\
+        }
+
+#define CUDA_LAUNCH_LAMBDA_DIM3_TLS(wthr_,thr_, lambda_f, ...) \
+        {\
+        dim3 wthr__(wthr_);\
+        dim3 thr__(thr_);\
+        if (ite.wthr.x != 0)\
+        {kernel_launch_lambda_tls<<<wthr__,thr__>>>(lambda_f);}\
+        }
+
 #endif

 #endif
--- a/src/util/cudify/cudify_hip.hpp
+++ b/src/util/cudify/cudify_hip.hpp
@@ -205,6 +205,20 @@ static cudaError_t cudaMemcpy ( void* dst, const void* src, size_t count, cudaMe
 #include "hipcub/hipcub.hpp"
 #include "hipcub/block/block_scan.hpp"

+template<typename lambda_f>
+__global__ void kernel_launch_lambda(lambda_f f)
+{
+    dim3 bid = blockIdx;
+    dim3 tid = threadIdx;
+    f(bid,tid);
+}
+
+template<typename lambda_f>
+__global__ void kernel_launch_lambda_tls(lambda_f f)
+{
+    f();
+}
+
 namespace cub
 {
    template<typename T, unsigned int bd>
@@ -312,6 +326,45 @@ bool has_work_gpu_cl_(const wthr_type & wthr, const thr_type & thr)
        CHECK_SE_CLASS1_POST(#cuda_call,__VA_ARGS__)\
        }

+#define CUDA_LAUNCH_LAMBDA(ite,lambda_f, ...)\
+        {\
+        \
+        CHECK_SE_CLASS1_PRE\
+        \
+        if (has_work_gpu_cl_(ite.wthr,ite.thr) == true)\
+        {hipLaunchKernelGGL(HIP_KERNEL_NAME(kernel_launch_lambda),ite.wthr,ite.thr, 0, 0, lambda_f);}\
+        \
+        CHECK_SE_CLASS1_POST("kernel_launch_lambda",__VA_ARGS__)\
+        }
+
+#define CUDA_LAUNCH_LAMBDA_TLS(ite, lambda_f, ...) \
+        {\
+        CHECK_SE_CLASS1_PRE\
+        if (ite.wthr.x != 0)\
+        {hipLaunchKernelGGL(HIP_KERNEL_NAME(kernel_launch_lambda_tls),ite.wthr,ite.thr,0,0,lambda_f);}\
+	CHECK_SE_CLASS1_POST("kernel_launch_lambda",__VA_ARGS__)\
+        }
+
+#define CUDA_LAUNCH_LAMBDA_DIM3_TLS(wthr_,thr_, lambda_f, ...) \
+        {\
+        dim3 wthr__(wthr_);\
+        dim3 thr__(thr_);\
+	CHECK_SE_CLASS1_PRE\
+        if (wthr__.x != 0)\
+        {hipLaunchKernelGGL(HIP_KERNEL_NAME(kernel_launch_lambda_tls),wthr_,thr_, 0, 0, lambda_f);}\
+	CHECK_SE_CLASS1_POST("kernel_launch_lambda",__VA_ARGS__)\
+        }
+
+#define CUDA_LAUNCH_LAMBDA_DIM3(wthr_,thr_, lambda_f, ...) \
+        {\
+        dim3 wthr__(wthr_);\
+        dim3 thr__(thr_);\
+        CHECK_SE_CLASS1_PRE\
+        if (wthr__.x != 0)\
+        {hipLaunchKernelGGL(HIP_KERNEL_NAME(kernel_launch_lambda),wthr_,thr_, 0, 0, lambda_f);}\
+        CHECK_SE_CLASS1_POST("kernel_launch_lambda",__VA_ARGS__)\
+        }
+
 #define CUDA_CHECK()

 #endif

--- a/src/util/cudify/cudify_unit_test.cu
+++ b/src/util/cudify/cudify_unit_test.cu
@@ -2,6 +2,10 @@
 #define BOOST_TEST_DYN_LINK
 #include <boost/test/unit_test.hpp>

+#include <type_traits>
+
+std::is_trivially_copyable<int> b;
+
 #include "util/cuda_launch.hpp"
 #include "memory/CudaMemory.cuh"