diff --git a/example/SparseGrid/2_gray_scott_3d_sparse_gpu_opt/Makefile b/example/SparseGrid/2_gray_scott_3d_sparse_gpu_opt/Makefile index 4650f84db16ba41f38f28a6df7d7d3f036ce1176..3c12702c2e187ceec3b40cc44886d8df1ac000a4 100644 --- a/example/SparseGrid/2_gray_scott_3d_sparse_gpu_opt/Makefile +++ b/example/SparseGrid/2_gray_scott_3d_sparse_gpu_opt/Makefile @@ -36,7 +36,7 @@ LDIR = OBJ = main.o %.o: %.cu - $(CUDA_CC) -O3 $(OPT) -g -c --std=c++14 -o $@ $< $(INCLUDE_PATH_NVCC) + $(CUDA_CC) -O3 $(OPT) -g -c --std=c++14 -o $@ $< $(INCLUDE_PATH_NVCC) gray_scott_sparse_gpu: $(OBJ) $(CUDA_CC_LINK) -o $@ $^ $(LIBS_PATH) $(LIBS_SELECT) diff --git a/openfpm_devices b/openfpm_devices index b706933d26fabca7b931ec74f8fa9ecbe7824e72..f15ba7f565815f9c827d3e438f278aca877999b9 160000 --- a/openfpm_devices +++ b/openfpm_devices @@ -1 +1 @@ -Subproject commit b706933d26fabca7b931ec74f8fa9ecbe7824e72 +Subproject commit f15ba7f565815f9c827d3e438f278aca877999b9 diff --git a/src/Grid/cuda/grid_dist_id_iterator_gpu.cuh b/src/Grid/cuda/grid_dist_id_iterator_gpu.cuh index b3a322a393767148197f6def300c7254dcaa417d..1991b5cc9aaffe11ce9a540a712d89eefc76c83b 100644 --- a/src/Grid/cuda/grid_dist_id_iterator_gpu.cuh +++ b/src/Grid/cuda/grid_dist_id_iterator_gpu.cuh @@ -28,9 +28,19 @@ template<> struct launch_call_impl<1> { template<typename loc_grid_type, typename ite_type, typename itd_type, typename functor_type,typename ... argsT> - inline static void call(loc_grid_type & loc_grid, ite_type & ite, itd_type & itd, functor_type functor, argsT ... args) + inline static void call(loc_grid_type & loc_grid, ite_type & ite, itd_type & itd, functor_type f, argsT ... args) { - CUDA_LAUNCH(grid_apply_functor_shared_bool,ite,loc_grid.toKernel(), itd, functor, args... ); + + auto g = loc_grid.toKernel(); + + auto lamb = [g,itd,f,args ...] __device__ () mutable + { + __shared__ bool is_empty_block; + + f(g,itd,is_empty_block,args...); + }; + + CUDA_LAUNCH_LAMBDA_TLS(ite,lamb); } };