Newer
Older
#ifndef __CUDIFY_CUDA_HPP__
#define __CUDIFY_CUDA_HPP__
#define CUDA_ON_BACKEND CUDA_BACKEND_CUDA
constexpr int default_kernel_wg_threads_ = 1024;
#if CUDART_VERSION >= 11000 && defined(__NVCC__)
#include "cub/util_type.cuh"
#include "cub/block/block_scan.cuh"
template<typename lambda_f>
__global__ void kernel_launch_lambda(lambda_f f)
{
dim3 bid = blockIdx;
dim3 tid = threadIdx;
f(bid,tid);
}
template<typename lambda_f>
__global__ void kernel_launch_lambda_tls(lambda_f f)
{
f();
}
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#endif
static void init_wrappers()
{}
#if defined(SE_CLASS1) || defined(CUDA_CHECK_LAUNCH)
#define CUDA_LAUNCH(cuda_call,ite, ...) \
{\
cudaDeviceSynchronize(); \
{\
cudaError_t e = cudaGetLastError();\
if (e != cudaSuccess)\
{\
std::string error = cudaGetErrorString(e);\
std::cout << "Cuda an error has occurred before this CUDA_LAUNCH, detected in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
}\
}\
CHECK_SE_CLASS1_PRE\
if (ite.wthr.x != 0)\
{cuda_call<<<ite.wthr,ite.thr>>>(__VA_ARGS__);}\
cudaDeviceSynchronize(); \
{\
cudaError_t e = cudaGetLastError();\
if (e != cudaSuccess)\
{\
std::string error = cudaGetErrorString(e);\
std::cout << "Cuda Error in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
}\
CHECK_SE_CLASS1_POST(#cuda_call,__VA_ARGS__)\
}\
}
#define CUDA_LAUNCH_DIM3(cuda_call,wthr,thr, ...) \
{\
cudaDeviceSynchronize(); \
{\
cudaError_t e = cudaGetLastError();\
if (e != cudaSuccess)\
{\
std::string error = cudaGetErrorString(e);\
std::cout << "Cuda an error has occurred before this CUDA_LAUNCH, detected in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
}\
}\
CHECK_SE_CLASS1_PRE\
cuda_call<<<wthr,thr>>>(__VA_ARGS__);\
cudaDeviceSynchronize(); \
{\
cudaError_t e = cudaGetLastError();\
if (e != cudaSuccess)\
{\
std::string error = cudaGetErrorString(e);\
std::cout << "Cuda Error in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
}\
CHECK_SE_CLASS1_POST(#cuda_call,__VA_ARGS__)\
}\
}
#define CUDA_LAUNCH_DIM3_DEBUG_SE1(cuda_call,wthr,thr, ...) \
{\
cudaDeviceSynchronize(); \
{\
cudaError_t e = cudaGetLastError();\
if (e != cudaSuccess)\
{\
std::string error = cudaGetErrorString(e);\
std::cout << "Cuda an error has occurred before this CUDA_LAUNCH, detected in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
}\
}\
CHECK_SE_CLASS1_PRE\
cuda_call<<<wthr,thr>>>(__VA_ARGS__);\
cudaDeviceSynchronize(); \
{\
cudaError_t e = cudaGetLastError();\
if (e != cudaSuccess)\
{\
std::string error = cudaGetErrorString(e);\
std::cout << "Cuda Error in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
}\
}\
}
#define CUDA_LAUNCH_LAMBDA(ite, lambda_f, ...) \
{\
cudaDeviceSynchronize(); \
{\
cudaError_t e = cudaGetLastError();\
if (e != cudaSuccess)\
{\
std::string error = cudaGetErrorString(e);\
std::cout << "Cuda an error has occurred before this CUDA_LAUNCH, detected in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
}\
}\
CHECK_SE_CLASS1_PRE\
if (ite.wthr.x != 0)\
{kernel_launch_lambda<<<ite.wthr,ite.thr>>>(lambda_f);}\
cudaDeviceSynchronize(); \
{\
cudaError_t e = cudaGetLastError();\
if (e != cudaSuccess)\
{\
std::string error = cudaGetErrorString(e);\
std::cout << "Cuda Error in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
}\
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
CHECK_SE_CLASS1_POST("lambda",0)\
}\
}
#define CUDA_LAUNCH_LAMBDA_TLS(ite, lambda_f, ...) \
{\
cudaDeviceSynchronize(); \
{\
cudaError_t e = cudaGetLastError();\
if (e != cudaSuccess)\
{\
std::string error = cudaGetErrorString(e);\
std::cout << "Cuda an error has occurred before this CUDA_LAUNCH, detected in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
}\
}\
CHECK_SE_CLASS1_PRE\
if (ite.wthr.x != 0)\
{kernel_launch_lambda<<<ite.wthr,ite.thr>>>(lambda_f);}\
cudaDeviceSynchronize(); \
{\
cudaError_t e = cudaGetLastError();\
if (e != cudaSuccess)\
{\
std::string error = cudaGetErrorString(e);\
std::cout << "Cuda Error in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
}\
CHECK_SE_CLASS1_POST("lambda",0)\
}\
}
#define CUDA_LAUNCH_LAMBDA_DIM3_TLS(wthr_,thr_, lambda_f, ...) \
{\
cudaDeviceSynchronize(); \
{\
cudaError_t e = cudaGetLastError();\
if (e != cudaSuccess)\
{\
std::string error = cudaGetErrorString(e);\
std::cout << "Cuda an error has occurred before this CUDA_LAUNCH, detected in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
}\
}\
CHECK_SE_CLASS1_PRE\
if (ite.wthr.x != 0)\
{kernel_launch_lambda<<<wthr_,thr_>>>(lambda_f);}\
cudaDeviceSynchronize(); \
{\
cudaError_t e = cudaGetLastError();\
if (e != cudaSuccess)\
{\
std::string error = cudaGetErrorString(e);\
std::cout << "Cuda Error in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
}\
CHECK_SE_CLASS1_POST("lambda",0)\
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
#define CUDA_CHECK() \
{\
cudaDeviceSynchronize(); \
{\
cudaError_t e = cudaGetLastError();\
if (e != cudaSuccess)\
{\
std::string error = cudaGetErrorString(e);\
std::cout << "Cuda an error has occurred before, detected in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
}\
}\
CHECK_SE_CLASS1_PRE\
cudaDeviceSynchronize(); \
{\
cudaError_t e = cudaGetLastError();\
if (e != cudaSuccess)\
{\
std::string error = cudaGetErrorString(e);\
std::cout << "Cuda Error in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
}\
CHECK_SE_CLASS1_POST("no call","no args")\
}\
}
#else
#define CUDA_LAUNCH(cuda_call,ite, ...) \
if (ite.wthr.x != 0)\
{cuda_call<<<ite.wthr,ite.thr>>>(__VA_ARGS__);}
#define CUDA_LAUNCH_DIM3(cuda_call,wthr,thr, ...) \
cuda_call<<<wthr,thr>>>(__VA_ARGS__);
#define CUDA_LAUNCH_LAMBDA(ite,lambda_f, ...) \
kernel_launch_lambda<<<ite.wthr,ite.thr>>>(lambda_f);
#define CUDA_LAUNCH_LAMBDA_TLS(ite, lambda_f, ...) \
{\
if (ite.wthr.x != 0)\
{kernel_launch_lambda<<<ite.wthr,ite.thr>>>(lambda_f);}\
}
#define CUDA_LAUNCH_LAMBDA_DIM3(wthr_,thr_, lambda_f, ...) \
{\
dim3 wthr__(wthr_);\
dim3 thr__(thr_);\
if (ite.wthr.x != 0)\
{kernel_launch_lambda<<<wthr__,thr__>>>(lambda_f);}\
}
#define CUDA_LAUNCH_LAMBDA_DIM3_TLS(wthr_,thr_, lambda_f, ...) \
{\
dim3 wthr__(wthr_);\
dim3 thr__(thr_);\
if (ite.wthr.x != 0)\
{kernel_launch_lambda_tls<<<wthr__,thr__>>>(lambda_f);}\
}