Commit 7eac8b1e authored by incardon's avatar incardon

Latest modules

parent d6e370ae
......@@ -201,7 +201,7 @@ inline void EqState(particles & vd)
{
auto it = vd.getDomainIteratorGPU();
CUDA_LAUNCH(EqState_gpu,it.wthr,it.thr,vd.toKernel(),B);
CUDA_LAUNCH(EqState_gpu,it,vd.toKernel(),B);
}
......@@ -398,7 +398,7 @@ template<typename CellList> inline void calc_forces(particles & vd, CellList & N
// Update the cell-list
vd.updateCellList(NN);
CUDA_LAUNCH(calc_forces_gpu,part.wthr,part.thr,vd.toKernel_sorted(),NN.toKernel(),W_dap,cbar);
CUDA_LAUNCH(calc_forces_gpu,part,vd.toKernel_sorted(),NN.toKernel(),W_dap,cbar);
vd.merge_sort<force,drho,red>(NN);
......
......@@ -17,13 +17,13 @@ OBJ = main.o
gpu_interop:
%.o: %.cu
$(CUDA_CC) -O3 -g -c -isystem=/home/i-bird/MPI/include --std=c++11 -o $@ $< $(INCLUDE_PATH_NVCC)
$(CUDA_CC) -O3 -g -c --std=c++11 -o $@ $< $(INCLUDE_PATH_NVCC)
%.o: %.cpp
$(CC) -O3 $(OPT) -g -c --std=c++11 -o $@ $< $(INCLUDE_PATH)
$(CUDA_CC) -O3 $(OPT) -g -c --std=c++11 -o $@ $< $(INCLUDE_PATH)
gpu_interop: $(OBJ)
$(CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS)
$(CUDA_CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS)
all: gpu_interop
......
openfpm_data @ fdbff4c0
Subproject commit ba4fe180729204c6b0b3c4a27897447795ca70dd
Subproject commit fdbff4c0b9b762e3447da261812e777eba52dcd4
......@@ -86,7 +86,7 @@ struct labelParticlesGhost_impl<dim,St,prop,Memory,layout_base,Decomposition,tru
// First we have to see how many entry each particle produce
CUDA_LAUNCH((num_proc_ghost_each_part<dim,St,decltype(dec.toKernel()),decltype(v_pos.toKernel()),decltype(proc_id_out.toKernel())>),
ite.wthr,ite.thr,
ite,
dec.toKernel(),v_pos.toKernel(),proc_id_out.toKernel());
// scan
......@@ -104,7 +104,7 @@ struct labelParticlesGhost_impl<dim,St,prop,Memory,layout_base,Decomposition,tru
// we compute processor id for each particle
CUDA_LAUNCH((proc_label_id_ghost<dim,St,decltype(dec.toKernel()),decltype(v_pos.toKernel()),decltype(starts.toKernel()),decltype(g_opart_device.toKernel())>),
ite.wthr,ite.thr,
ite,
dec.toKernel(),v_pos.toKernel(),starts.toKernel(),g_opart_device.toKernel());
// sort particles
......@@ -118,7 +118,7 @@ struct labelParticlesGhost_impl<dim,St,prop,Memory,layout_base,Decomposition,tru
// Find the buffer bases
CUDA_LAUNCH((find_buffer_offsets<0,decltype(g_opart_device.toKernel()),decltype(prc_offset.toKernel())>),
ite.wthr,ite.thr,
ite,
g_opart_device.toKernel(),(int *)mem.getDevicePointer(),prc_offset.toKernel());
// Trasfer the number of offsets on CPU
......@@ -208,7 +208,7 @@ struct local_ghost_from_opart_impl<with_pos,dim,St,prop,Memory,layout_base,true>
if (ite.wthr.x != 0)
{
CUDA_LAUNCH((process_ghost_particles_local<with_pos,dim,decltype(o_part_loc.toKernel()),decltype(v_pos.toKernel()),decltype(v_prp.toKernel()),decltype(shifts.toKernel())>),
ite.wthr,ite.thr,
ite,
o_part_loc.toKernel(),v_pos.toKernel(),v_prp.toKernel(),shifts.toKernel(),old);
}
#else
......@@ -261,7 +261,7 @@ struct local_ghost_from_dec_impl<dim,St,prop,Memory,layout_base,true>
// label particle processor
CUDA_LAUNCH((num_shift_ghost_each_part<dim,St,decltype(box_f_dev.toKernel()),decltype(box_f_sv.toKernel()),decltype(v_pos.toKernel()),decltype(o_part_loc.toKernel())>),
ite.wthr,ite.thr,
ite,
box_f_dev.toKernel(),box_f_sv.toKernel(),v_pos.toKernel(),o_part_loc.toKernel(),g_m);
starts.resize(o_part_loc.size());
......@@ -284,7 +284,7 @@ struct local_ghost_from_dec_impl<dim,St,prop,Memory,layout_base,true>
decltype(v_pos.toKernel()),decltype(v_prp.toKernel()),
decltype(starts.toKernel()),decltype(shifts.toKernel()),
decltype(o_part_loc.toKernel())>),
ite.wthr,ite.thr,
ite,
box_f_dev.toKernel(),box_f_sv.toKernel(),
v_pos.toKernel(),v_prp.toKernel(),
starts.toKernel(),shifts.toKernel(),o_part_loc.toKernel(),old,g_m);
......
......@@ -356,7 +356,7 @@ void remove_marked(vector_type & vd)
auto ite = idx.getGPUIterator();
CUDA_LAUNCH(create_index,ite.wthr,ite.thr,idx.toKernel());
CUDA_LAUNCH(create_index,ite,idx.toKernel());
// sort particles, so the particles to remove stay at the end
mergesort((remove_type *)vd.getPropVector().template getDeviceBuffer<prp>(),(unsigned int *)idx.template getDeviceBuffer<0>(), idx.size(), mgpu::template less_t<remove_type>(), vd.getVC().getmgpuContext());
......@@ -369,7 +369,7 @@ void remove_marked(vector_type & vd)
mem.fill(0);
// mark point, particle that stay and to remove
CUDA_LAUNCH((find_buffer_offsets_no_prc<prp,decltype(vd.getPropVector().toKernel()),decltype(mark.toKernel())>),ite.wthr,ite.thr,
CUDA_LAUNCH((find_buffer_offsets_no_prc<prp,decltype(vd.getPropVector().toKernel()),decltype(mark.toKernel())>),ite,
vd.getPropVector().toKernel(),(int *)mem.getDevicePointer(),mark.toKernel(),vd.size_local());
mem.deviceToHost();
......@@ -404,7 +404,7 @@ void remove_marked(vector_type & vd)
ite = vd_pos_old.getGPUIterator();
CUDA_LAUNCH((copy_new_to_old<vector_type::dims>),ite.wthr,ite.thr,vd_pos_new.toKernel(),vd_prp_new.toKernel(),vd_pos_old.toKernel(),vd_prp_old.toKernel(),idx.toKernel());
CUDA_LAUNCH((copy_new_to_old<vector_type::dims>),ite,vd_pos_new.toKernel(),vd_prp_new.toKernel(),vd_pos_old.toKernel(),vd_prp_old.toKernel(),idx.toKernel());
// and we swap
......
......@@ -1943,7 +1943,7 @@ public:
auto ite = v_pos.getGPUIteratorTo(g_m,n_thr);
CUDA_LAUNCH((merge_sort_part<false,decltype(v_pos.toKernel()),decltype(v_prp.toKernel()),decltype(cl.getNonSortToSort().toKernel()),prp...>),
ite.wthr,ite.thr,
ite,
v_pos.toKernel(),v_prp.toKernel(),v_pos_out.toKernel(),v_prp_out.toKernel(),cl.getNonSortToSort().toKernel());
#endif
......@@ -2026,7 +2026,7 @@ public:
auto ite = v_pos.getGPUIteratorTo(g_m,n_thr);
CUDA_LAUNCH((merge_sort_part<true,decltype(v_pos.toKernel()),decltype(v_prp.toKernel()),decltype(cl.getNonSortedToSorted().toKernel()),prp...>),
ite.wthr,ite.thr,
ite,
v_pos.toKernel(),v_prp.toKernel(),v_pos_out.toKernel(),v_prp_out.toKernel(),cl.getNonSortedToSorted().toKernel());
#endif
......
......@@ -597,7 +597,7 @@ class vector_dist_comm
auto ite = g_pos_send.get(i).getGPUIterator();
CUDA_LAUNCH((process_ghost_particles_pos<dim,decltype(g_opart_device.toKernel()),decltype(g_pos_send.get(i).toKernel()),decltype(v_pos.toKernel()),decltype(shifts.toKernel())>),
ite.wthr,ite.thr,
ite,
g_opart_device.toKernel(), g_pos_send.get(i).toKernel(),
v_pos.toKernel(),shifts.toKernel(),offset);
......@@ -824,7 +824,7 @@ class vector_dist_comm
auto ite = g_send_prp.get(i).getGPUIterator();
CUDA_LAUNCH((process_ghost_particles_prp<decltype(g_opart_device.toKernel()),decltype(g_send_prp.get(i).toKernel()),decltype(v_prp.toKernel()),prp...>),
ite.wthr,ite.thr,
ite,
g_opart_device.toKernel(), g_send_prp.get(i).toKernel(),
v_prp.toKernel(),offset);
......@@ -950,7 +950,7 @@ class vector_dist_comm
// fill v_pos_tmp and v_prp_tmp with local particles
CUDA_LAUNCH((process_map_particles<decltype(m_opart.toKernel()),decltype(v_pos_tmp.toKernel()),decltype(v_prp_tmp.toKernel()),
decltype(v_pos.toKernel()),decltype(v_prp.toKernel())>),
ite.wthr,ite.thr,
ite,
m_opart.toKernel(),v_pos_tmp.toKernel(), v_prp_tmp.toKernel(),
v_pos.toKernel(),v_prp.toKernel(),offset);
}
......@@ -968,7 +968,7 @@ class vector_dist_comm
CUDA_LAUNCH((process_map_particles<decltype(m_opart.toKernel()),decltype(m_pos.get(i).toKernel()),decltype(m_prp.get(i).toKernel()),
decltype(v_pos.toKernel()),decltype(v_prp.toKernel())>),
ite.wthr,ite.thr,
ite,
m_opart.toKernel(),m_pos.get(i).toKernel(), m_prp.get(i).toKernel(),
v_pos.toKernel(),v_prp.toKernel(),offset);
......@@ -1098,14 +1098,14 @@ class vector_dist_comm
for (size_t i = 0 ; i < dim ; i++) {bc.bc[i] = dec.periodicity(i);}
CUDA_LAUNCH((apply_bc_each_part<dim,St,decltype(v_pos.toKernel())>),ite.wthr,ite.thr,dec.getDomain(),bc,v_pos.toKernel());
CUDA_LAUNCH((apply_bc_each_part<dim,St,decltype(v_pos.toKernel())>),ite,dec.getDomain(),bc,v_pos.toKernel());
return;
}
// label particle processor
CUDA_LAUNCH((process_id_proc_each_part<dim,St,decltype(dec.toKernel()),decltype(v_pos.toKernel()),decltype(lbl_p.toKernel()),decltype(prc_sz.toKernel())>),
ite.wthr,ite.thr,
ite,
dec.toKernel(),v_pos.toKernel(),lbl_p.toKernel(),prc_sz.toKernel(),v_cl.rank());
......@@ -1144,7 +1144,7 @@ class vector_dist_comm
ite = lbl_p.getGPUIterator();
// we order lbl_p
CUDA_LAUNCH((reorder_lbl<decltype(lbl_p.toKernel()),decltype(starts.toKernel())>),ite.wthr,ite.thr,lbl_p.toKernel(),starts.toKernel());
CUDA_LAUNCH((reorder_lbl<decltype(lbl_p.toKernel()),decltype(starts.toKernel())>),ite,lbl_p.toKernel(),starts.toKernel());
#endif
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment