From ee2cb6aeb1cffc0f0dfde4bd5ea78a63d9ba5724 Mon Sep 17 00:00:00 2001
From: Pietro Incardona <incardon@mpi-cbg.de>
Date: Thu, 13 Dec 2018 02:28:57 +0100
Subject: [PATCH] Inducing a crash

---
 example/Vector/7_SPH_dlb_gpu_opt/Makefile     |  4 ++--
 example/Vector/7_SPH_dlb_gpu_opt/main.cu      |  6 +++---
 src/Vector/cuda/vector_dist_cuda_func_test.cu | 20 ++++++++++++++++++-
 src/Vector/cuda/vector_dist_cuda_funcs.cuh    |  8 ++++----
 4 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/example/Vector/7_SPH_dlb_gpu_opt/Makefile b/example/Vector/7_SPH_dlb_gpu_opt/Makefile
index ce1336a5d..cca7077fd 100644
--- a/example/Vector/7_SPH_dlb_gpu_opt/Makefile
+++ b/example/Vector/7_SPH_dlb_gpu_opt/Makefile
@@ -12,10 +12,10 @@ sph_dlb_test: OPT += -DTEST_RUN
 sph_dlb_test: sph_dlb
 
 %.o: %.cu
-	nvcc -O3 -g -c -isystem=/home/i-bird/MPI/include --std=c++11 -o $@ $< $(INCLUDE_PATH_NVCC)
+	nvcc -O0 -g -c -isystem=/home/i-bird/MPI/include --std=c++11 -o $@ $< $(INCLUDE_PATH_NVCC)
 
 %.o: %.cpp
-	$(CC) -O3 $(OPT) -g -c --std=c++11 -o $@ $< $(INCLUDE_PATH)
+	$(CC) -O0 $(OPT) -g -c --std=c++11 -o $@ $< $(INCLUDE_PATH)
 
 sph_dlb: $(OBJ)
 	$(CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS)
diff --git a/example/Vector/7_SPH_dlb_gpu_opt/main.cu b/example/Vector/7_SPH_dlb_gpu_opt/main.cu
index b0177e461..ba92a1a6c 100644
--- a/example/Vector/7_SPH_dlb_gpu_opt/main.cu
+++ b/example/Vector/7_SPH_dlb_gpu_opt/main.cu
@@ -390,7 +390,7 @@ template<typename CellList> inline void calc_forces(particles & vd, CellList & N
 
 	vd.merge_sort<force,drho,red>(NN);
 
-	max_visc = reduce<red,_max_>(vd);
+	max_visc = reduce_local<red,_max_>(vd);
 }
 
 template<typename vector_type>
@@ -412,8 +412,8 @@ void max_acceleration_and_velocity(particles & vd, real_number & max_acc, real_n
 
 	max_acceleration_and_velocity_gpu<<<part.wthr,part.thr>>>(vd.toKernel());
 
-	max_acc = reduce<red,_max_>(vd);
-	max_vel = reduce<red2,_max_>(vd);
+	max_acc = reduce_local<red,_max_>(vd);
+	max_vel = reduce_local<red2,_max_>(vd);
 
 	Vcluster<> & v_cl = create_vcluster();
 	v_cl.max(max_acc);
diff --git a/src/Vector/cuda/vector_dist_cuda_func_test.cu b/src/Vector/cuda/vector_dist_cuda_func_test.cu
index d78b4a294..63bf0b0aa 100644
--- a/src/Vector/cuda/vector_dist_cuda_func_test.cu
+++ b/src/Vector/cuda/vector_dist_cuda_func_test.cu
@@ -1094,10 +1094,28 @@ void vector_dist_remove_marked_type()
 	// Boundary conditions
 	size_t bc[3]={PERIODIC,PERIODIC,PERIODIC};
 
-	vector_dist_gpu<3,float,aggregate<float,float,int,int>> vd(5000*v_cl.size(),domain,bc,g);
+	vector_dist_gpu<3,float,aggregate<float,float,int,int>> vd(50000*v_cl.size(),domain,bc,g);
+
+	// Fill the position
 
 	auto it = vd.getDomainIterator();
 
+	while(it.isNext())
+	{
+		auto p = it.get();
+
+		vd.getPos(p)[0] = (float)rand() / RAND_MAX;
+		vd.getPos(p)[1] = (float)rand() / RAND_MAX;
+		vd.getPos(p)[2] = (float)rand() / RAND_MAX;
+
+		++it;
+	}
+
+	vd.map();
+	vd.template ghost_get<>();
+
+	it = vd.getDomainIterator();
+
 	float fc = 1.0;
 	float dc = 1.0;
 	int ic = 1;
diff --git a/src/Vector/cuda/vector_dist_cuda_funcs.cuh b/src/Vector/cuda/vector_dist_cuda_funcs.cuh
index 76a52e480..3e119645e 100644
--- a/src/Vector/cuda/vector_dist_cuda_funcs.cuh
+++ b/src/Vector/cuda/vector_dist_cuda_funcs.cuh
@@ -104,11 +104,11 @@ __global__  void find_buffer_offsets(vector_type vd, int * cnt, vector_type_offs
 }
 
 template<unsigned int prp_off, typename vector_type,typename vector_type_offs>
-__global__  void find_buffer_offsets_no_prc(vector_type vd, int * cnt, vector_type_offs offs)
+__global__  void find_buffer_offsets_no_prc(vector_type vd, int * cnt, vector_type_offs offs, int g_m)
 {
     int p = threadIdx.x + blockIdx.x * blockDim.x;
 
-    if (p >= (int)vd.size() - 1) return;
+    if (p >= (int)g_m - 1) return;
 
     if (vd.template get<prp_off>(p) != vd.template get<prp_off>(p+1))
 	{
@@ -367,12 +367,12 @@ void remove_marked(vector_type & vd)
 
 	// mark point, particle that stay and to remove
 	find_buffer_offsets_no_prc<prp,decltype(vd.getPropVector().toKernel()),decltype(mark.toKernel())><<<ite.wthr,ite.thr>>>
-			           (vd.getPropVector().toKernel(),(int *)mem.getDevicePointer(),mark.toKernel());
+			           (vd.getPropVector().toKernel(),(int *)mem.getDevicePointer(),mark.toKernel(),vd.size_local());
 
 	mem.deviceToHost();
 
 	// we have no particles to remove
-	if (*(int *)mem.getPointer() == 0)
+	if (*(int *)mem.getPointer() != 1)
 	{return;}
 
 	// Get the mark point
-- 
GitLab