From ee2cb6aeb1cffc0f0dfde4bd5ea78a63d9ba5724 Mon Sep 17 00:00:00 2001 From: Pietro Incardona <incardon@mpi-cbg.de> Date: Thu, 13 Dec 2018 02:28:57 +0100 Subject: [PATCH] Inducing a crash --- example/Vector/7_SPH_dlb_gpu_opt/Makefile | 4 ++-- example/Vector/7_SPH_dlb_gpu_opt/main.cu | 6 +++--- src/Vector/cuda/vector_dist_cuda_func_test.cu | 20 ++++++++++++++++++- src/Vector/cuda/vector_dist_cuda_funcs.cuh | 8 ++++---- 4 files changed, 28 insertions(+), 10 deletions(-) diff --git a/example/Vector/7_SPH_dlb_gpu_opt/Makefile b/example/Vector/7_SPH_dlb_gpu_opt/Makefile index ce1336a5d..cca7077fd 100644 --- a/example/Vector/7_SPH_dlb_gpu_opt/Makefile +++ b/example/Vector/7_SPH_dlb_gpu_opt/Makefile @@ -12,10 +12,10 @@ sph_dlb_test: OPT += -DTEST_RUN sph_dlb_test: sph_dlb %.o: %.cu - nvcc -O3 -g -c -isystem=/home/i-bird/MPI/include --std=c++11 -o $@ $< $(INCLUDE_PATH_NVCC) + nvcc -O0 -g -c -isystem=/home/i-bird/MPI/include --std=c++11 -o $@ $< $(INCLUDE_PATH_NVCC) %.o: %.cpp - $(CC) -O3 $(OPT) -g -c --std=c++11 -o $@ $< $(INCLUDE_PATH) + $(CC) -O0 $(OPT) -g -c --std=c++11 -o $@ $< $(INCLUDE_PATH) sph_dlb: $(OBJ) $(CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS) diff --git a/example/Vector/7_SPH_dlb_gpu_opt/main.cu b/example/Vector/7_SPH_dlb_gpu_opt/main.cu index b0177e461..ba92a1a6c 100644 --- a/example/Vector/7_SPH_dlb_gpu_opt/main.cu +++ b/example/Vector/7_SPH_dlb_gpu_opt/main.cu @@ -390,7 +390,7 @@ template<typename CellList> inline void calc_forces(particles & vd, CellList & N vd.merge_sort<force,drho,red>(NN); - max_visc = reduce<red,_max_>(vd); + max_visc = reduce_local<red,_max_>(vd); } template<typename vector_type> @@ -412,8 +412,8 @@ void max_acceleration_and_velocity(particles & vd, real_number & max_acc, real_n max_acceleration_and_velocity_gpu<<<part.wthr,part.thr>>>(vd.toKernel()); - max_acc = reduce<red,_max_>(vd); - max_vel = reduce<red2,_max_>(vd); + max_acc = reduce_local<red,_max_>(vd); + max_vel = reduce_local<red2,_max_>(vd); Vcluster<> & v_cl = create_vcluster(); v_cl.max(max_acc); diff --git a/src/Vector/cuda/vector_dist_cuda_func_test.cu b/src/Vector/cuda/vector_dist_cuda_func_test.cu index d78b4a294..63bf0b0aa 100644 --- a/src/Vector/cuda/vector_dist_cuda_func_test.cu +++ b/src/Vector/cuda/vector_dist_cuda_func_test.cu @@ -1094,10 +1094,28 @@ void vector_dist_remove_marked_type() // Boundary conditions size_t bc[3]={PERIODIC,PERIODIC,PERIODIC}; - vector_dist_gpu<3,float,aggregate<float,float,int,int>> vd(5000*v_cl.size(),domain,bc,g); + vector_dist_gpu<3,float,aggregate<float,float,int,int>> vd(50000*v_cl.size(),domain,bc,g); + + // Fill the position auto it = vd.getDomainIterator(); + while(it.isNext()) + { + auto p = it.get(); + + vd.getPos(p)[0] = (float)rand() / RAND_MAX; + vd.getPos(p)[1] = (float)rand() / RAND_MAX; + vd.getPos(p)[2] = (float)rand() / RAND_MAX; + + ++it; + } + + vd.map(); + vd.template ghost_get<>(); + + it = vd.getDomainIterator(); + float fc = 1.0; float dc = 1.0; int ic = 1; diff --git a/src/Vector/cuda/vector_dist_cuda_funcs.cuh b/src/Vector/cuda/vector_dist_cuda_funcs.cuh index 76a52e480..3e119645e 100644 --- a/src/Vector/cuda/vector_dist_cuda_funcs.cuh +++ b/src/Vector/cuda/vector_dist_cuda_funcs.cuh @@ -104,11 +104,11 @@ __global__ void find_buffer_offsets(vector_type vd, int * cnt, vector_type_offs } template<unsigned int prp_off, typename vector_type,typename vector_type_offs> -__global__ void find_buffer_offsets_no_prc(vector_type vd, int * cnt, vector_type_offs offs) +__global__ void find_buffer_offsets_no_prc(vector_type vd, int * cnt, vector_type_offs offs, int g_m) { int p = threadIdx.x + blockIdx.x * blockDim.x; - if (p >= (int)vd.size() - 1) return; + if (p >= (int)g_m - 1) return; if (vd.template get<prp_off>(p) != vd.template get<prp_off>(p+1)) { @@ -367,12 +367,12 @@ void remove_marked(vector_type & vd) // mark point, particle that stay and to remove find_buffer_offsets_no_prc<prp,decltype(vd.getPropVector().toKernel()),decltype(mark.toKernel())><<<ite.wthr,ite.thr>>> - (vd.getPropVector().toKernel(),(int *)mem.getDevicePointer(),mark.toKernel()); + (vd.getPropVector().toKernel(),(int *)mem.getDevicePointer(),mark.toKernel(),vd.size_local()); mem.deviceToHost(); // we have no particles to remove - if (*(int *)mem.getPointer() == 0) + if (*(int *)mem.getPointer() != 1) {return;} // Get the mark point -- GitLab