Newer
Older
#ifndef __CUDIFY_CUDA_HPP__
#define __CUDIFY_CUDA_HPP__
#define CUDA_ON_BACKEND CUDA_BACKEND_CUDA
constexpr int default_kernel_wg_threads_ = 1024;
#if CUDART_VERSION >= 11000 && defined(__NVCC__)
#include "cub/util_type.cuh"
#include "cub/block/block_scan.cuh"
template<typename lambda_f>
__global__ void kernel_launch_lambda(lambda_f f)
{
dim3 bid = blockIdx;
dim3 tid = threadIdx;
f(bid,tid);
}
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#endif
static void init_wrappers()
{}
#if defined(SE_CLASS1) || defined(CUDA_CHECK_LAUNCH)
#define CUDA_LAUNCH(cuda_call,ite, ...) \
{\
cudaDeviceSynchronize(); \
{\
cudaError_t e = cudaGetLastError();\
if (e != cudaSuccess)\
{\
std::string error = cudaGetErrorString(e);\
std::cout << "Cuda an error has occurred before this CUDA_LAUNCH, detected in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
}\
}\
CHECK_SE_CLASS1_PRE\
if (ite.wthr.x != 0)\
{cuda_call<<<ite.wthr,ite.thr>>>(__VA_ARGS__);}\
cudaDeviceSynchronize(); \
{\
cudaError_t e = cudaGetLastError();\
if (e != cudaSuccess)\
{\
std::string error = cudaGetErrorString(e);\
std::cout << "Cuda Error in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
}\
CHECK_SE_CLASS1_POST(#cuda_call,__VA_ARGS__)\
}\
}
#define CUDA_LAUNCH_DIM3(cuda_call,wthr,thr, ...) \
{\
cudaDeviceSynchronize(); \
{\
cudaError_t e = cudaGetLastError();\
if (e != cudaSuccess)\
{\
std::string error = cudaGetErrorString(e);\
std::cout << "Cuda an error has occurred before this CUDA_LAUNCH, detected in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
}\
}\
CHECK_SE_CLASS1_PRE\
cuda_call<<<wthr,thr>>>(__VA_ARGS__);\
cudaDeviceSynchronize(); \
{\
cudaError_t e = cudaGetLastError();\
if (e != cudaSuccess)\
{\
std::string error = cudaGetErrorString(e);\
std::cout << "Cuda Error in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
}\
CHECK_SE_CLASS1_POST(#cuda_call,__VA_ARGS__)\
}\
}
#define CUDA_LAUNCH_DIM3_DEBUG_SE1(cuda_call,wthr,thr, ...) \
{\
cudaDeviceSynchronize(); \
{\
cudaError_t e = cudaGetLastError();\
if (e != cudaSuccess)\
{\
std::string error = cudaGetErrorString(e);\
std::cout << "Cuda an error has occurred before this CUDA_LAUNCH, detected in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
}\
}\
CHECK_SE_CLASS1_PRE\
cuda_call<<<wthr,thr>>>(__VA_ARGS__);\
cudaDeviceSynchronize(); \
{\
cudaError_t e = cudaGetLastError();\
if (e != cudaSuccess)\
{\
std::string error = cudaGetErrorString(e);\
std::cout << "Cuda Error in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
}\
}\
}
#define CUDA_LAUNCH_LAMBDA(ite, lambda_f, ...) \
{\
cudaDeviceSynchronize(); \
{\
cudaError_t e = cudaGetLastError();\
if (e != cudaSuccess)\
{\
std::string error = cudaGetErrorString(e);\
std::cout << "Cuda an error has occurred before this CUDA_LAUNCH, detected in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
}\
}\
CHECK_SE_CLASS1_PRE\
if (ite.wthr.x != 0)\
{kernel_launch_lambda<<<ite.wthr,ite.thr>>>(lambda_f);}\
cudaDeviceSynchronize(); \
{\
cudaError_t e = cudaGetLastError();\
if (e != cudaSuccess)\
{\
std::string error = cudaGetErrorString(e);\
std::cout << "Cuda Error in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
}\
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
CHECK_SE_CLASS1_POST("lambda",0)\
}\
}
#define CUDA_LAUNCH_LAMBDA_TLS(ite, lambda_f, ...) \
{\
cudaDeviceSynchronize(); \
{\
cudaError_t e = cudaGetLastError();\
if (e != cudaSuccess)\
{\
std::string error = cudaGetErrorString(e);\
std::cout << "Cuda an error has occurred before this CUDA_LAUNCH, detected in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
}\
}\
CHECK_SE_CLASS1_PRE\
if (ite.wthr.x != 0)\
{kernel_launch_lambda<<<ite.wthr,ite.thr>>>(lambda_f);}\
cudaDeviceSynchronize(); \
{\
cudaError_t e = cudaGetLastError();\
if (e != cudaSuccess)\
{\
std::string error = cudaGetErrorString(e);\
std::cout << "Cuda Error in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
}\
CHECK_SE_CLASS1_POST("lambda",0)\
}\
}
#define CUDA_LAUNCH_LAMBDA_DIM3_TLS(wthr_,thr_, lambda_f, ...) \
{\
cudaDeviceSynchronize(); \
{\
cudaError_t e = cudaGetLastError();\
if (e != cudaSuccess)\
{\
std::string error = cudaGetErrorString(e);\
std::cout << "Cuda an error has occurred before this CUDA_LAUNCH, detected in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
}\
}\
CHECK_SE_CLASS1_PRE\
if (ite.wthr.x != 0)\
{kernel_launch_lambda<<<wthr_,thr_>>>(lambda_f);}\
cudaDeviceSynchronize(); \
{\
cudaError_t e = cudaGetLastError();\
if (e != cudaSuccess)\
{\
std::string error = cudaGetErrorString(e);\
std::cout << "Cuda Error in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
}\
CHECK_SE_CLASS1_POST("lambda",0)\
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
#define CUDA_CHECK() \
{\
cudaDeviceSynchronize(); \
{\
cudaError_t e = cudaGetLastError();\
if (e != cudaSuccess)\
{\
std::string error = cudaGetErrorString(e);\
std::cout << "Cuda an error has occurred before, detected in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
}\
}\
CHECK_SE_CLASS1_PRE\
cudaDeviceSynchronize(); \
{\
cudaError_t e = cudaGetLastError();\
if (e != cudaSuccess)\
{\
std::string error = cudaGetErrorString(e);\
std::cout << "Cuda Error in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
}\
CHECK_SE_CLASS1_POST("no call","no args")\
}\
}
#else
#define CUDA_LAUNCH(cuda_call,ite, ...) \
if (ite.wthr.x != 0)\
{cuda_call<<<ite.wthr,ite.thr>>>(__VA_ARGS__);}
#define CUDA_LAUNCH_DIM3(cuda_call,wthr,thr, ...) \
cuda_call<<<wthr,thr>>>(__VA_ARGS__);
#define CUDA_LAUNCH_LAMBDA(ite,lambda_f, ...) \
kernel_launch_lambda<<<ite.wthr,ite.thr>>>(lambda_f);