diff --git a/example/Vector/7_SPH_dlb_gpu_opt/Makefile b/example/Vector/7_SPH_dlb_gpu_opt/Makefile index ce1336a5d73aa5906577a50b5c12e4a8e8f9a76d..cca7077fdc1c3c4229ec9ad16db9200ea740a8f1 100644 --- a/example/Vector/7_SPH_dlb_gpu_opt/Makefile +++ b/example/Vector/7_SPH_dlb_gpu_opt/Makefile @@ -12,10 +12,10 @@ sph_dlb_test: OPT += -DTEST_RUN sph_dlb_test: sph_dlb %.o: %.cu - nvcc -O3 -g -c -isystem=/home/i-bird/MPI/include --std=c++11 -o $@ $< $(INCLUDE_PATH_NVCC) + nvcc -O0 -g -c -isystem=/home/i-bird/MPI/include --std=c++11 -o $@ $< $(INCLUDE_PATH_NVCC) %.o: %.cpp - $(CC) -O3 $(OPT) -g -c --std=c++11 -o $@ $< $(INCLUDE_PATH) + $(CC) -O0 $(OPT) -g -c --std=c++11 -o $@ $< $(INCLUDE_PATH) sph_dlb: $(OBJ) $(CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS) diff --git a/example/Vector/7_SPH_dlb_gpu_opt/main.cu b/example/Vector/7_SPH_dlb_gpu_opt/main.cu index b0177e4612e5b1ee5f98fe17c8acbe0cfbd54ea6..ba92a1a6cead3018a089ffccf9f514c52ca809b3 100644 --- a/example/Vector/7_SPH_dlb_gpu_opt/main.cu +++ b/example/Vector/7_SPH_dlb_gpu_opt/main.cu @@ -390,7 +390,7 @@ template<typename CellList> inline void calc_forces(particles & vd, CellList & N vd.merge_sort<force,drho,red>(NN); - max_visc = reduce<red,_max_>(vd); + max_visc = reduce_local<red,_max_>(vd); } template<typename vector_type> @@ -412,8 +412,8 @@ void max_acceleration_and_velocity(particles & vd, real_number & max_acc, real_n max_acceleration_and_velocity_gpu<<<part.wthr,part.thr>>>(vd.toKernel()); - max_acc = reduce<red,_max_>(vd); - max_vel = reduce<red2,_max_>(vd); + max_acc = reduce_local<red,_max_>(vd); + max_vel = reduce_local<red2,_max_>(vd); Vcluster<> & v_cl = create_vcluster(); v_cl.max(max_acc); diff --git a/src/Vector/cuda/vector_dist_cuda_func_test.cu b/src/Vector/cuda/vector_dist_cuda_func_test.cu index d78b4a294d7d3f6b7f59421893a2e4db9ecd213d..63bf0b0aa7ffb5cf967e3483292586dac15d64a1 100644 --- a/src/Vector/cuda/vector_dist_cuda_func_test.cu +++ b/src/Vector/cuda/vector_dist_cuda_func_test.cu @@ -1094,10 +1094,28 @@ void vector_dist_remove_marked_type() // Boundary conditions size_t bc[3]={PERIODIC,PERIODIC,PERIODIC}; - vector_dist_gpu<3,float,aggregate<float,float,int,int>> vd(5000*v_cl.size(),domain,bc,g); + vector_dist_gpu<3,float,aggregate<float,float,int,int>> vd(50000*v_cl.size(),domain,bc,g); + + // Fill the position auto it = vd.getDomainIterator(); + while(it.isNext()) + { + auto p = it.get(); + + vd.getPos(p)[0] = (float)rand() / RAND_MAX; + vd.getPos(p)[1] = (float)rand() / RAND_MAX; + vd.getPos(p)[2] = (float)rand() / RAND_MAX; + + ++it; + } + + vd.map(); + vd.template ghost_get<>(); + + it = vd.getDomainIterator(); + float fc = 1.0; float dc = 1.0; int ic = 1; diff --git a/src/Vector/cuda/vector_dist_cuda_funcs.cuh b/src/Vector/cuda/vector_dist_cuda_funcs.cuh index 76a52e4805e194cab4b6143ac262598397c278ae..3e119645e6e1aa4564cf0f8212e94c74b5e599d5 100644 --- a/src/Vector/cuda/vector_dist_cuda_funcs.cuh +++ b/src/Vector/cuda/vector_dist_cuda_funcs.cuh @@ -104,11 +104,11 @@ __global__ void find_buffer_offsets(vector_type vd, int * cnt, vector_type_offs } template<unsigned int prp_off, typename vector_type,typename vector_type_offs> -__global__ void find_buffer_offsets_no_prc(vector_type vd, int * cnt, vector_type_offs offs) +__global__ void find_buffer_offsets_no_prc(vector_type vd, int * cnt, vector_type_offs offs, int g_m) { int p = threadIdx.x + blockIdx.x * blockDim.x; - if (p >= (int)vd.size() - 1) return; + if (p >= (int)g_m - 1) return; if (vd.template get<prp_off>(p) != vd.template get<prp_off>(p+1)) { @@ -367,12 +367,12 @@ void remove_marked(vector_type & vd) // mark point, particle that stay and to remove find_buffer_offsets_no_prc<prp,decltype(vd.getPropVector().toKernel()),decltype(mark.toKernel())><<<ite.wthr,ite.thr>>> - (vd.getPropVector().toKernel(),(int *)mem.getDevicePointer(),mark.toKernel()); + (vd.getPropVector().toKernel(),(int *)mem.getDevicePointer(),mark.toKernel(),vd.size_local()); mem.deviceToHost(); // we have no particles to remove - if (*(int *)mem.getPointer() == 0) + if (*(int *)mem.getPointer() != 1) {return;} // Get the mark point