Skip to content
Snippets Groups Projects
cudify_cuda.hpp 7.31 KiB
Newer Older
#ifndef __CUDIFY_CUDA_HPP__
#define __CUDIFY_CUDA_HPP__

#define CUDA_ON_BACKEND CUDA_BACKEND_CUDA

constexpr int default_kernel_wg_threads_ = 1024;

#if CUDART_VERSION >= 11000 && defined(__NVCC__)
    #include "cub/util_type.cuh"
    #include "cub/block/block_scan.cuh"
Pietro Incardona's avatar
Pietro Incardona committed
#endif

#ifdef __NVCC__

template<typename lambda_f>
__global__ void kernel_launch_lambda(lambda_f f)
{
    dim3 bid = blockIdx;
    dim3 tid = threadIdx;
    f(bid,tid);
}

#endif

static void init_wrappers()
{}

#if defined(SE_CLASS1) || defined(CUDA_CHECK_LAUNCH)

#define CUDA_LAUNCH(cuda_call,ite, ...) \
        {\
        cudaDeviceSynchronize(); \
        {\
            cudaError_t e = cudaGetLastError();\
            if (e != cudaSuccess)\
            {\
                std::string error = cudaGetErrorString(e);\
                std::cout << "Cuda an error has occurred before this CUDA_LAUNCH, detected in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
            }\
        }\
        CHECK_SE_CLASS1_PRE\
        if (ite.wthr.x != 0)\
        {cuda_call<<<ite.wthr,ite.thr>>>(__VA_ARGS__);}\
        cudaDeviceSynchronize(); \
        {\
            cudaError_t e = cudaGetLastError();\
            if (e != cudaSuccess)\
            {\
                std::string error = cudaGetErrorString(e);\
                std::cout << "Cuda Error in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
            }\
            CHECK_SE_CLASS1_POST(#cuda_call,__VA_ARGS__)\
        }\
        }

#define CUDA_LAUNCH_DIM3(cuda_call,wthr,thr, ...) \
        {\
        cudaDeviceSynchronize(); \
        {\
            cudaError_t e = cudaGetLastError();\
            if (e != cudaSuccess)\
            {\
                std::string error = cudaGetErrorString(e);\
                std::cout << "Cuda an error has occurred before this CUDA_LAUNCH, detected in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
            }\
        }\
        CHECK_SE_CLASS1_PRE\
        cuda_call<<<wthr,thr>>>(__VA_ARGS__);\
        cudaDeviceSynchronize(); \
        {\
            cudaError_t e = cudaGetLastError();\
            if (e != cudaSuccess)\
            {\
                std::string error = cudaGetErrorString(e);\
                std::cout << "Cuda Error in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
            }\
            CHECK_SE_CLASS1_POST(#cuda_call,__VA_ARGS__)\
        }\
        }

Pietro Incardona's avatar
Pietro Incardona committed
#define CUDA_LAUNCH_DIM3_DEBUG_SE1(cuda_call,wthr,thr, ...) \
        {\
        cudaDeviceSynchronize(); \
        {\
            cudaError_t e = cudaGetLastError();\
            if (e != cudaSuccess)\
            {\
                std::string error = cudaGetErrorString(e);\
                std::cout << "Cuda an error has occurred before this CUDA_LAUNCH, detected in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
            }\
        }\
        CHECK_SE_CLASS1_PRE\
        cuda_call<<<wthr,thr>>>(__VA_ARGS__);\
        cudaDeviceSynchronize(); \
        {\
            cudaError_t e = cudaGetLastError();\
            if (e != cudaSuccess)\
            {\
                std::string error = cudaGetErrorString(e);\
                std::cout << "Cuda Error in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
            }\
        }\
        }

#define CUDA_LAUNCH_LAMBDA(ite, lambda_f, ...) \
        {\
        cudaDeviceSynchronize(); \
        {\
            cudaError_t e = cudaGetLastError();\
            if (e != cudaSuccess)\
            {\
                std::string error = cudaGetErrorString(e);\
                std::cout << "Cuda an error has occurred before this CUDA_LAUNCH, detected in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
            }\
        }\
        CHECK_SE_CLASS1_PRE\
        if (ite.wthr.x != 0)\
        {kernel_launch_lambda<<<ite.wthr,ite.thr>>>(lambda_f);}\
        cudaDeviceSynchronize(); \
        {\
            cudaError_t e = cudaGetLastError();\
            if (e != cudaSuccess)\
            {\
                std::string error = cudaGetErrorString(e);\
                std::cout << "Cuda Error in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
            }\
Pietro Incardona's avatar
Pietro Incardona committed
            CHECK_SE_CLASS1_POST("lambda",0)\
        }\
        }

#define CUDA_LAUNCH_LAMBDA_TLS(ite, lambda_f, ...) \
        {\
        cudaDeviceSynchronize(); \
        {\
            cudaError_t e = cudaGetLastError();\
            if (e != cudaSuccess)\
            {\
                std::string error = cudaGetErrorString(e);\
                std::cout << "Cuda an error has occurred before this CUDA_LAUNCH, detected in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
            }\
        }\
        CHECK_SE_CLASS1_PRE\
        if (ite.wthr.x != 0)\
        {kernel_launch_lambda<<<ite.wthr,ite.thr>>>(lambda_f);}\
        cudaDeviceSynchronize(); \
        {\
            cudaError_t e = cudaGetLastError();\
            if (e != cudaSuccess)\
            {\
                std::string error = cudaGetErrorString(e);\
                std::cout << "Cuda Error in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
            }\
            CHECK_SE_CLASS1_POST("lambda",0)\
        }\
        }

#define CUDA_LAUNCH_LAMBDA_DIM3_TLS(wthr_,thr_, lambda_f, ...) \
        {\
        cudaDeviceSynchronize(); \
        {\
            cudaError_t e = cudaGetLastError();\
            if (e != cudaSuccess)\
            {\
                std::string error = cudaGetErrorString(e);\
                std::cout << "Cuda an error has occurred before this CUDA_LAUNCH, detected in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
            }\
        }\
        CHECK_SE_CLASS1_PRE\
        if (ite.wthr.x != 0)\
        {kernel_launch_lambda<<<wthr_,thr_>>>(lambda_f);}\
        cudaDeviceSynchronize(); \
        {\
            cudaError_t e = cudaGetLastError();\
            if (e != cudaSuccess)\
            {\
                std::string error = cudaGetErrorString(e);\
                std::cout << "Cuda Error in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
            }\
            CHECK_SE_CLASS1_POST("lambda",0)\
#define CUDA_CHECK() \
        {\
        cudaDeviceSynchronize(); \
        {\
            cudaError_t e = cudaGetLastError();\
            if (e != cudaSuccess)\
            {\
                std::string error = cudaGetErrorString(e);\
                std::cout << "Cuda an error has occurred before, detected in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
            }\
        }\
        CHECK_SE_CLASS1_PRE\
        cudaDeviceSynchronize(); \
        {\
            cudaError_t e = cudaGetLastError();\
            if (e != cudaSuccess)\
            {\
                std::string error = cudaGetErrorString(e);\
                std::cout << "Cuda Error in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
            }\
            CHECK_SE_CLASS1_POST("no call","no args")\
        }\
        }

#else

#define CUDA_LAUNCH(cuda_call,ite, ...) \
        if (ite.wthr.x != 0)\
        {cuda_call<<<ite.wthr,ite.thr>>>(__VA_ARGS__);}

#define CUDA_LAUNCH_DIM3(cuda_call,wthr,thr, ...) \
        cuda_call<<<wthr,thr>>>(__VA_ARGS__);

#define CUDA_LAUNCH_LAMBDA(ite,lambda_f, ...) \
        kernel_launch_lambda<<<ite.wthr,ite.thr>>>(lambda_f);

#define CUDA_CHECK()

#endif