Commit cc76cc7c authored by incardon's avatar incardon

Moving to mgpu scan

parent b079452e
......@@ -11,11 +11,10 @@
#define SKIP_LABELLING 512
#define KEEP_PROPERTIES 512
template<unsigned int dim, typename St, typename prop, typename Memory, template<typename> class layout_base, typename Decomposition, typename scan_type, bool is_ok_cuda>
template<unsigned int dim, typename St, typename prop, typename Memory, template<typename> class layout_base, typename Decomposition, bool is_ok_cuda>
struct labelParticlesGhost_impl
{
static void run(CudaMemory & mem,
scan_type & sc,
Decomposition & dec,
openfpm::vector<aggregate<unsigned int,unsigned long int>,
CudaMemory,
......@@ -44,11 +43,10 @@ struct labelParticlesGhost_impl
template<unsigned int dim, typename St, typename prop, typename Memory, template<typename> class layout_base, typename Decomposition, typename scan_type>
struct labelParticlesGhost_impl<dim,St,prop,Memory,layout_base,Decomposition,scan_type,true>
template<unsigned int dim, typename St, typename prop, typename Memory, template<typename> class layout_base, typename Decomposition>
struct labelParticlesGhost_impl<dim,St,prop,Memory,layout_base,Decomposition,true>
{
static void run(CudaMemory & mem,
scan_type & sc,
Decomposition & dec,
openfpm::vector<aggregate<unsigned int,unsigned long int>,
CudaMemory,
......@@ -92,8 +90,9 @@ struct labelParticlesGhost_impl<dim,St,prop,Memory,layout_base,Decomposition,sca
dec.toKernel(),v_pos.toKernel(),proc_id_out.toKernel());
// scan
sc.scan_(proc_id_out,starts);
//sc.scan_(proc_id_out,starts);
starts.resize(proc_id_out.size());
mgpu::scan((unsigned int *)proc_id_out.template getDeviceBuffer<0>(), proc_id_out.size(), (unsigned int *)starts.template getDeviceBuffer<0>() , v_cl.getmgpuContext());
starts.template deviceToHost<0>(starts.size()-1,starts.size()-1);
size_t sz = starts.template get<0>(starts.size()-1);
......
......@@ -16,7 +16,6 @@
#include "util/cuda/moderngpu/kernel_scan.hxx"
#endif
#include "util/cuda/scan_cuda.cuh"
#include "Vector/util/vector_dist_funcs.hpp"
#include "cuda/vector_dist_comm_util_funcs.cuh"
......@@ -87,9 +86,6 @@ class vector_dist_comm
//! It map the processor id with the communication request into map procedure
openfpm::vector<size_t> p_map_req;
//! scan functionality required for gpu
scan<unsigned int,unsigned int> sc;
//! For each near processor, outgoing particle id
//! \warning opart is assumed to be an ordered list
//! first id particle id
......@@ -1240,8 +1236,8 @@ class vector_dist_comm
if (opt & RUN_ON_DEVICE)
{
labelParticlesGhost_impl<dim,St,prop,Memory,layout_base,
Decomposition,scan<unsigned int,unsigned int>,std::is_same<Memory,CudaMemory>::value>
::run(mem,sc,dec,g_opart_device,proc_id_out,starts,v_cl,v_pos,v_prp,prc,prc_sz,prc_offset,g_m,opt);
Decomposition,std::is_same<Memory,CudaMemory>::value>
::run(mem,dec,g_opart_device,proc_id_out,starts,v_cl,v_pos,v_prp,prc,prc_sz,prc_offset,g_m,opt);
}
else
{
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment