Commit fdbff4c0 authored by incardon's avatar incardon

Fixing CUDA_LAUNCH for only one orgument

parent ba4fe180
......@@ -246,7 +246,7 @@ public:
part_ids.resize(pl.size());
CUDA_LAUNCH((subindex<dim,T,cnt_type,ids_type>),ite_gpu.wthr,ite_gpu.thr,div_c,
CUDA_LAUNCH((subindex<dim,T,cnt_type,ids_type>),ite_gpu,div_c,
spacing_c,
off,
this->getTransform(),
......@@ -266,7 +266,7 @@ public:
cells.resize(pl.size());
auto itgg = part_ids.getGPUIterator();
CUDA_LAUNCH((fill_cells<dim,cnt_type,ids_type,shift_ph<0,cnt_type>>),itgg.wthr,itgg.thr,0,
CUDA_LAUNCH((fill_cells<dim,cnt_type,ids_type,shift_ph<0,cnt_type>>),itgg,0,
div_c,
off,
part_ids.size(),
......@@ -287,7 +287,7 @@ public:
CUDA_LAUNCH((reorder_parts<decltype(pl_prp.toKernel()),
decltype(pl.toKernel()),
decltype(sorted_to_not_sorted.toKernel()),
cnt_type,shift_ph<0,cnt_type>>),ite.wthr,ite.thr,pl.size(),
cnt_type,shift_ph<0,cnt_type>>),ite,pl.size(),
pl_prp.toKernel(),
pl_prp_out.toKernel(),
pl.toKernel(),
......@@ -301,7 +301,7 @@ public:
{
ite = sorted_domain_particles_ids.getGPUIterator();
CUDA_LAUNCH((mark_domain_particles),ite.wthr,ite.thr,sorted_to_not_sorted.toKernel(),sorted_domain_particles_ids.toKernel(),sorted_domain_particles_dg.toKernel(),g_m);
CUDA_LAUNCH((mark_domain_particles),ite,sorted_to_not_sorted.toKernel(),sorted_domain_particles_ids.toKernel(),sorted_domain_particles_dg.toKernel(),g_m);
// now we sort the particles
......
......@@ -104,7 +104,7 @@ namespace openfpm
auto ite = v.getGPUIterator();
CUDA_LAUNCH((merge_add_prp_device_impl<decltype(v.toKernel()),decltype(this_.toKernel()),args...>),ite.wthr,ite.thr,v.toKernel(),this_.toKernel(),(unsigned int)old_sz);
CUDA_LAUNCH((merge_add_prp_device_impl<decltype(v.toKernel()),decltype(this_.toKernel()),args...>),ite,v.toKernel(),this_.toKernel(),(unsigned int)old_sz);
#else
std::cout << __FILE__ << ":" << __LINE__ << " Error the function add_prp_device only work when map_vector is compiled with nvcc" << std::endl;
......@@ -136,7 +136,7 @@ namespace openfpm
auto ite = v.getGPUIterator();
CUDA_LAUNCH((merge_add_prp_device_impl<decltype(v.toKernel()),decltype(this_.toKernel()),args...>),ite.wthr,ite.thr,v.toKernel(),this_.toKernel(),(unsigned int)offset);
CUDA_LAUNCH((merge_add_prp_device_impl<decltype(v.toKernel()),decltype(this_.toKernel()),args...>),ite,v.toKernel(),this_.toKernel(),(unsigned int)offset);
#else
std::cout << __FILE__ << ":" << __LINE__ << " Error the function merge_prp_device only work when map_vector is compiled with nvcc" << std::endl;
......
......@@ -15,10 +15,10 @@
#include "cuda_kernel_error_checker.hpp"
#define CUDA_LAUNCH(cuda_call,grid_size,block_size, ...) \
#define CUDA_LAUNCH(cuda_call,ite, ...) \
{\
CHECK_SE_CLASS1_PRE\
cuda_call<<<(grid_size),(block_size)>>>(__VA_ARGS__); \
cuda_call<<<ite.wthr,ite.thr>>>(__VA_ARGS__); \
cudaDeviceSynchronize(); \
{\
cudaError_t e = cudaGetLastError();\
......@@ -30,10 +30,11 @@
CHECK_SE_CLASS1_POST(#cuda_call,__VA_ARGS__)\
}\
}
#else
#define CUDA_LAUNCH(cuda_call,grid_size,block_size, ...) \
cuda_call<<<(grid_size),(block_size)>>>(__VA_ARGS__);
#define CUDA_LAUNCH(cuda_call,ite, ...) \
cuda_call<<<ite.wthr,ite.thr>>>(__VA_ARGS__);
#endif
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment