Commit c52f28ba authored by incardon's avatar incardon

Fixing compilation without CUDA

parent b01dde18
......@@ -16,8 +16,8 @@ else
endif
ifeq ($(PROFILE),ON)
CUDA_CC=scorep --nocompiler --cuda --mpp=mpi nvcc
CUDA_CC_LINK=scorep --nocompiler --cuda --mpp=mpi nvcc
CUDA_CC=scorep --nocompiler --cuda --mpp=mpi nvcc -ccbin=mpic++
CUDA_CC_LINK=scorep --nocompiler --cuda --mpp=mpi nvcc -ccbin=mpic++
else
CUDA_CC:=$(CUDA_CC)
CUDA_CC_LINK:=$(CUDA_CC_LINK)
......
......@@ -44,6 +44,8 @@
#define OPENMPI
//#define SE_CLASS1
#define USE_LOW_REGISTER_ITERATOR
#include "Vector/vector_dist.hpp"
#include <math.h>
#include "Draw/DrawParticles.hpp"
......@@ -97,7 +99,7 @@ const real_number MassBound = 0.0000767656;
#ifdef TEST_RUN
const real_number t_end = 0.001;
#else
const real_number t_end = 0.10;
const real_number t_end = 1.5;
#endif
// Gravity acceleration
......@@ -341,8 +343,8 @@ __global__ void calc_forces_gpu(particles_type vd, NN_type NN, real_number W_dap
// Get the position xp of the particle
Point<3,real_number> xb = vd.getPos(b);
// if (p == q) skip this particle
if (a == b) {++Np; continue;};
// if (p == q) skip this particle this condition should be done in the r^2 = 0
//if (a == b) {++Np; continue;};
unsigned int typeb = vd.getProp<type>(b);
......@@ -393,7 +395,7 @@ __global__ void calc_forces_gpu(particles_type vd, NN_type NN, real_number W_dap
template<typename CellList> inline void calc_forces(particles & vd, CellList & NN, real_number & max_visc, size_t cnt)
{
auto part = vd.getDomainIteratorGPU(64);
auto part = vd.getDomainIteratorGPU(96);
// Update the cell-list
vd.updateCellList(NN);
......@@ -856,7 +858,8 @@ int main(int argc, char* argv[])
vd.ghost_get<type,rho,Pressure,velocity>(RUN_ON_DEVICE);
auto NN = vd.getCellListGPU(2*H / 2.0);
auto NN = vd.getCellListGPU/*<CELLLIST_GPU_SPARSE<3,float>>*/(2*H / 2.0);
NN.setBoxNN(2);
timer tot_sim;
tot_sim.start();
......@@ -892,9 +895,9 @@ int main(int argc, char* argv[])
vd.map(RUN_ON_DEVICE);
// make sort
vd.make_sort(NN);
// it sort the vector (doesn not seem to produce some advantage)
// note force calculation is anyway sorted calculation
vd.make_sort(NN);
// Calculate pressure from the density
EqState(vd);
......
#define CHECKFOR_POSNAN
#define CHECKFOR_PROPNAN
//#define CHECKFOR_POSNAN
//#define CHECKFOR_PROPNAN
/*!
* \page Vector_8_DEM Vector 8 Discrete element method
......@@ -530,7 +530,7 @@ int main(int argc, char* argv[])
u_ij.get(1) = parts.getProp<cpd>(p)[cidx + 1];
u_ij.get(2) = parts.getProp<cpd>(p)[cidx + 2];
Point<3,double> F_nij = sqrt(delta_ij/2/R) * (k_n*delta_ij*n_ij - gamma_t*m_eff*v_nij);
Point<3,double> F_nij = sqrt(delta_ij/2/R) * (k_n*delta_ij*n_ij - gamma_n*m_eff*v_nij);
dF_n = dF_n + F_nij;
Point<3,double> F_tij = sqrt(delta_ij/2/R) * (-k_t*u_ij - gamma_t*m_eff*v_tij);
......
openfpm_data @ 813ae72b
Subproject commit 6cf5ca579440954750ebedf9ed71391bbffb9595
Subproject commit 813ae72b7e42b996dfeb47dffdc887194a2f2176
......@@ -4,12 +4,36 @@ cmake_minimum_required(VERSION 3.8 FATAL_ERROR)
########################### Executables
if(CUDA_FOUND)
set(CUDA_SOURCES Vector/cuda/vector_dist_gpu_MP_tests.cu Vector/cuda/vector_dist_cuda_func_test.cu Decomposition/cuda/decomposition_cuda_tests.cu Vector/cuda/vector_dist_gpu_unit_tests.cu ../openfpm_devices/src/memory/CudaMemory.cu)
set(CUDA_SOURCES Vector/cuda/vector_dist_gpu_MP_tests.cu
Vector/cuda/vector_dist_cuda_func_test.cu
Decomposition/cuda/decomposition_cuda_tests.cu
Vector/cuda/vector_dist_gpu_unit_tests.cu
../openfpm_devices/src/memory/CudaMemory.cu
Decomposition/cuda/Domain_icells_cart_unit_test.cu)
else()
set(CUDA_SOURCES)
endif()
add_executable(pdata ${OPENFPM_INIT_FILE} ${CUDA_SOURCES} main.cpp Debug/debug_test.cpp Grid/tests/grid_dist_id_HDF5_chckpnt_restart_test.cpp Grid/tests/grid_dist_id_unit_test.cpp Grid/tests/staggered_grid_dist_unit_test.cpp Vector/tests/vector_dist_cell_list_tests.cpp Vector/tests/vector_dist_complex_prp_unit_test.cpp Vector/tests/vector_dist_HDF5_chckpnt_restart_test.cpp Vector/tests/vector_dist_MP_unit_tests.cpp Vector/tests/vector_dist_NN_tests.cpp Vector/tests/vector_dist_unit_test.cpp pdata_performance.cpp Decomposition/tests/CartDecomposition_unit_test.cpp Decomposition/tests/shift_vect_converter_tests.cpp Vector/performance/vector_dist_performance_util.cpp lib/pdata.cpp test_multiple_o.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster/VCluster.cpp ../openfpm_devices/src/Memleak_check.cpp)
add_executable(pdata ${OPENFPM_INIT_FILE} ${CUDA_SOURCES} main.cpp
Debug/debug_test.cpp
Grid/tests/grid_dist_id_HDF5_chckpnt_restart_test.cpp
Grid/tests/grid_dist_id_unit_test.cpp
Grid/tests/staggered_grid_dist_unit_test.cpp
Vector/tests/vector_dist_cell_list_tests.cpp
Vector/tests/vector_dist_complex_prp_unit_test.cpp
Vector/tests/vector_dist_HDF5_chckpnt_restart_test.cpp
Vector/tests/vector_dist_MP_unit_tests.cpp
Vector/tests/vector_dist_NN_tests.cpp
Vector/tests/vector_dist_unit_test.cpp
pdata_performance.cpp
Decomposition/tests/CartDecomposition_unit_test.cpp
Decomposition/tests/shift_vect_converter_tests.cpp
Vector/performance/vector_dist_performance_util.cpp
lib/pdata.cpp test_multiple_o.cpp
../openfpm_devices/src/memory/HeapMemory.cpp
../openfpm_devices/src/memory/PtrMemory.cpp
../openfpm_vcluster/src/VCluster/VCluster.cpp
../openfpm_devices/src/Memleak_check.cpp)
if ( CMAKE_COMPILER_IS_GNUCC )
target_compile_options(pdata PRIVATE "-Wno-deprecated-declarations")
......
......@@ -68,6 +68,20 @@ __global__ void merge_sort_part(vector_pos_type vd_pos, vector_prp_type vd_prp,
vd_prp.template set<prp ...>(p,vd_prp_ord,nss.template get<0>(p));
}
template<typename vector_pos_type, typename vector_prp_type, typename stns_type, unsigned int ... prp>
__global__ void merge_sort_all(vector_pos_type vd_pos, vector_prp_type vd_prp,
vector_pos_type v_pos_ord, vector_prp_type vd_prp_ord,
stns_type nss)
{
int p = threadIdx.x + blockIdx.x * blockDim.x;
if (p >= vd_pos.size()) return;
vd_pos.template set<0>(p,v_pos_ord,nss.template get<0>(p));
vd_prp.set(p,vd_prp_ord,nss.template get<0>(p));
}
template<unsigned int dim, typename St, typename cartdec_gpu, typename particles_type, typename vector_out, typename prc_sz_type>
__global__ void process_id_proc_each_part(cartdec_gpu cdg, particles_type parts, vector_out output, prc_sz_type prc_sz , int rank)
{
......
......@@ -176,9 +176,9 @@ bool check_force(CellList_type & NN_cpu, vector_type & vd)
std::cout << "ERROR: " << vd.template getProp<1>(p)[1] << " " << vd.template getProp<2>(p)[1] << std::endl;
std::cout << "ERROR: " << vd.template getProp<1>(p)[2] << " " << vd.template getProp<2>(p)[2] << std::endl;
std::cout << "ERROR2: " << vd.template getProp<1>(p)[0] << " " << force.get(0) << std::endl;
std::cout << "ERROR2: " << vd.template getProp<1>(p)[1] << " " << force.get(1) << std::endl;
std::cout << "ERROR2: " << vd.template getProp<1>(p)[2] << " " << force.get(2) << std::endl;
std::cout << p.getKey() << " ERROR2: " << vd.template getProp<1>(p)[0] << " " << force.get(0) << std::endl;
std::cout << p.getKey() << " ERROR2: " << vd.template getProp<1>(p)[1] << " " << force.get(1) << std::endl;
std::cout << p.getKey() << " ERROR2: " << vd.template getProp<1>(p)[2] << " " << force.get(2) << std::endl;
break;
......@@ -420,12 +420,109 @@ void vector_dist_gpu_test_impl()
auto NN_cpu = vd.getCellList(0.1);
check_cell_list_cpu_and_gpu(vd,NN,NN_cpu);
auto NN_up = vd.getCellListGPU(0.1);
auto NN_up = vd.template getCellListGPU<CellList_type>(0.1);
NN_up.clear();
vd.updateCellList(NN_up);
check_cell_list_cpu_and_gpu(vd,NN_up,NN_cpu);
}
template<typename CellList_type>
void vector_dist_gpu_make_sort_test_impl()
{
auto & v_cl = create_vcluster();
if (v_cl.size() > 16)
{return;}
Box<3,float> domain({0.0,0.0,0.0},{1.0,1.0,1.0});
// set the ghost based on the radius cut off (make just a little bit smaller than the spacing)
Ghost<3,float> g(0.1);
// Boundary conditions
size_t bc[3]={NON_PERIODIC,NON_PERIODIC,NON_PERIODIC};
vector_dist_gpu<3,float,aggregate<float,float[3],float[3]>> vd(10000,domain,bc,g);
srand(55067*create_vcluster().rank());
auto it = vd.getDomainIterator();
while (it.isNext())
{
auto p = it.get();
int x = rand();
int y = rand();
int z = rand();
vd.getPos(p)[0] = (float)x / RAND_MAX;
vd.getPos(p)[1] = (float)y / RAND_MAX;
vd.getPos(p)[2] = (float)z / RAND_MAX;
++it;
}
vd.hostToDevicePos();
// Ok we redistribute the particles
vd.map(RUN_ON_DEVICE);
auto it3 = vd.getDomainIteratorGPU();
initialize_props<<<it3.wthr,it3.thr>>>(vd.toKernel());
// Here we get do a make sort
auto NN = vd.template getCellListGPU<CellList_type>(0.1);
vd.make_sort(NN);
openfpm::vector_gpu<aggregate<float,float[3],float[3]>> tmp_prp = vd.getPropVector();
openfpm::vector_gpu<Point<3,float>> tmp_pos = vd.getPosVector();
vd.deviceToHostPos();
tmp_pos.template deviceToHost<0>();
// here we do a ghost_get
vd.ghost_get<0>(RUN_ON_DEVICE);
// Here we get do a make sort
NN = vd.template getCellListGPU<CellList_type>(0.1);
vd.make_sort_from(NN);
// Check
tmp_pos.deviceToHost<0>();
vd.deviceToHostPos();
bool match = true;
for (size_t i = 0 ; i < vd.size_local() ; i++)
{
Point<3,float> p1 = vd.getPos(i)[0];
Point<3,float> p2 = tmp_pos.template get<0>(i);
// They must be in the same cell
auto c1 = NN.getCell(p1);
auto c2 = NN.getCell(p1);
match &= c1 == c2;
}
BOOST_REQUIRE_EQUAL(match,true);
}
BOOST_AUTO_TEST_CASE(vector_dist_gpu_make_sort_sparse)
{
vector_dist_gpu_make_sort_test_impl<CELLLIST_GPU_SPARSE<3,float>>();
}
BOOST_AUTO_TEST_CASE(vector_dist_gpu_make_sort)
{
vector_dist_gpu_make_sort_test_impl<CellList_gpu<3,float,CudaMemory,shift_only<3, float>>>();
}
BOOST_AUTO_TEST_CASE( vector_dist_gpu_test)
{
vector_dist_gpu_test_impl<CellList_gpu<3,float,CudaMemory,shift_only<3, float>>>();
......@@ -792,6 +889,7 @@ BOOST_AUTO_TEST_CASE(vector_dist_reduce)
BOOST_REQUIRE_EQUAL(reds2,vd.size_local());
}
template<typename CellList_type>
void vector_dist_dlb_on_cuda_impl(size_t k,double r_cut)
{
typedef vector_dist_gpu<3,double,aggregate<double,double[3],double[3]>> vector_type;
......@@ -908,7 +1006,7 @@ void vector_dist_dlb_on_cuda_impl(size_t k,double r_cut)
vd.template deviceToHostProp<0,1,2>();
// Check calc forces
auto NN_gpu = vd.getCellListGPU(r_cut);
auto NN_gpu = vd.template getCellListGPU<CellList_type>(r_cut);
auto NN_cpu = vd.getCellList(r_cut);
check_cell_list_cpu_and_gpu(vd,NN_gpu,NN_cpu);
......@@ -958,7 +1056,12 @@ void vector_dist_dlb_on_cuda_impl(size_t k,double r_cut)
BOOST_AUTO_TEST_CASE(vector_dist_dlb_on_cuda)
{
vector_dist_dlb_on_cuda_impl(50000,0.01);
vector_dist_dlb_on_cuda_impl<CellList_gpu<3,double,CudaMemory,shift_only<3,double>,unsigned int,int,false>>(50000,0.01);
}
BOOST_AUTO_TEST_CASE(vector_dist_dlb_on_cuda_sparse)
{
vector_dist_dlb_on_cuda_impl<CELLLIST_GPU_SPARSE<3,double>>(50000,0.01);
}
BOOST_AUTO_TEST_CASE(vector_dist_dlb_on_cuda2)
......@@ -966,7 +1069,7 @@ BOOST_AUTO_TEST_CASE(vector_dist_dlb_on_cuda2)
if (create_vcluster().size() <= 3)
{return;};
vector_dist_dlb_on_cuda_impl(1000000,0.01);
vector_dist_dlb_on_cuda_impl<CellList_gpu<3,double,CudaMemory,shift_only<3,double>,unsigned int,int,false>>(1000000,0.01);
}
BOOST_AUTO_TEST_CASE(vector_dist_dlb_on_cuda3)
......@@ -974,7 +1077,7 @@ BOOST_AUTO_TEST_CASE(vector_dist_dlb_on_cuda3)
if (create_vcluster().size() < 8)
{return;}
vector_dist_dlb_on_cuda_impl(15000000,0.005);
vector_dist_dlb_on_cuda_impl<CellList_gpu<3,double,CudaMemory,shift_only<3,double>,unsigned int,int,false>>(15000000,0.005);
}
......
......@@ -40,7 +40,9 @@
#define DEC_GRAN(gr) ((size_t)gr << 32)
#ifdef CUDA_GPU
template<unsigned int dim,typename St> using CELLLIST_GPU_SPARSE = CellList_gpu<dim,St,CudaMemory,shift_only<dim, St>,unsigned int,int,true>;
#endif
#define VECTOR_DIST_ERROR_OBJECT std::runtime_error("Runtime vector distributed error");
......@@ -86,7 +88,7 @@ struct gcl_standard_no_symmetric_impl<true>
template<unsigned int dim, typename St, typename CellL, typename Vector, unsigned int impl>
static inline CellL get(Vector & vd, const St & r_cut, const Ghost<dim,St> & g)
{
return vd.getCellListGPU(r_cut);
return vd.template getCellListGPU<CellL>(r_cut);
}
};
......@@ -1042,7 +1044,8 @@ public:
* \return the Cell list
*
*/
template<typename CellL = CellList<dim, St, Mem_fast<>, shift<dim, St>,internal_position_vector_type > > CellL getCellListSym(St r_cut)
template<typename CellL = CellList<dim, St, Mem_fast<>, shift<dim, St>,internal_position_vector_type > >
CellL getCellListSym(St r_cut)
{
#ifdef SE_CLASS1
if (!(opt & BIND_DEC_TO_GHOST))
......@@ -1323,6 +1326,7 @@ public:
CellL cli_tmp = gcl<dim,St,CellL,self,GCL_NON_SYMMETRIC>::get(*this,r_cut,getDecomposition().getGhost());
cell_list.swap(cli_tmp);
cell_list.re_setBoxNN();
}
}
......@@ -2206,7 +2210,39 @@ public:
se3.template ghost_get_pre<prp...>(opt);
#endif
this->template ghost_get_<prp...>(v_pos,v_prp,g_m,opt);
this->template ghost_get_<GHOST_SYNC,prp...>(v_pos,v_prp,g_m,opt);
#ifdef SE_CLASS3
this->template ghost_get_<prop::max_prop_real>(v_pos,v_prp,g_m,opt | KEEP_PROPERTIES);
se3.template ghost_get_post<prp...>(opt);
#endif
}
/*! \brief It synchronize the properties and position of the ghost particles
*
* \tparam prp list of properties to get synchronize
*
* \param opt options WITH_POSITION, it send also the positional information of the particles
*
*/
template<int ... prp> inline void Ighost_get(size_t opt = WITH_POSITION)
{
#ifdef SE_CLASS1
if (getDecomposition().getProcessorBounds().isValid() == false && size_local() != 0)
{
std::cerr << __FILE__ << ":" << __LINE__ << " Error the processor " << v_cl.getProcessUnitID() << " has particles, but is supposed to be unloaded" << std::endl;
ACTION_ON_ERROR(VECTOR_DIST_ERROR_OBJECT);
}
#endif
#ifdef SE_CLASS3
se3.template ghost_get_pre<prp...>(opt);
#endif
this->template ghost_get_<GHOST_ASYNC,prp...>(v_pos,v_prp,g_m,opt);
#ifdef SE_CLASS3
......@@ -2216,6 +2252,7 @@ public:
#endif
}
/*! \brief It synchronize the properties and position of the ghost particles
*
* \tparam op which kind of operation to apply
......@@ -2814,7 +2851,8 @@ public:
* \param NN Cell-list to use to reorder
*
*/
void make_sort(CellList_gpu<dim,St,CudaMemory,shift_only<dim, St>> & NN)
template<typename CellList_type>
void make_sort(CellList_type & NN)
{
deleteGhost();
......@@ -2827,6 +2865,30 @@ public:
v_prp.swap(v_prp_out);
}
/*! \brief this function sort the vector
*
* \note this function does not kill the ghost and does not invalidate the Cell-list)
*
* \param NN Cell-list to use to reorder
*
*/
template<typename CellList_type>
void make_sort_from(CellList_type & cl)
{
#if defined(__NVCC__)
auto ite = v_pos.getGPUIteratorTo(g_m);
CUDA_LAUNCH((merge_sort_all<decltype(v_pos.toKernel()),decltype(v_prp.toKernel()),decltype(cl.getNonSortToSort().toKernel())>),
ite,
v_pos_out.toKernel(),v_prp_out.toKernel(),v_pos.toKernel(),v_prp.toKernel(),cl.getNonSortToSort().toKernel());
v_pos.swap(v_pos_out);
v_prp.swap(v_prp_out);
#endif
}
/*! \brief This function compare if the host and device buffer position match up to some tolerance
*
* \tparam prp property to check
......
......@@ -29,6 +29,9 @@ constexpr int BIND_DEC_TO_GHOST = 1;
constexpr int RUN_ON_DEVICE = 1024;
constexpr int MAP_LOCAL = 2;
constexpr int GHOST_SYNC = 0;
constexpr int GHOST_ASYNC = 1;
/*! \brief compute the communication options from the ghost_get/put options
*
*
......@@ -52,6 +55,168 @@ inline static size_t compute_options(size_t opt)
return opt_;
}
/*! \brief template selector for asynchronous or not asynchronous
*
* \tparam impl implementation
* \tparam prp properties
*
*/
template<unsigned int impl, template<typename> class layout_base, unsigned int ... prp>
struct ghost_exchange_comm_impl
{
template<typename Vcluster_type, typename vector_prop_type,
typename vector_pos_type, typename send_vector,
typename prc_recv_get_type, typename prc_g_opart_type,
typename recv_sz_get_type, typename recv_sz_get_byte_type,
typename g_opart_sz_type>
static inline void sendrecv_prp(Vcluster_type & v_cl,
openfpm::vector<send_vector> & g_send_prp,
vector_prop_type & v_prp,
vector_pos_type & v_pos,
prc_g_opart_type & prc_g_opart,
prc_recv_get_type & prc_recv_get,
recv_sz_get_type & recv_sz_get,
recv_sz_get_byte_type & recv_sz_get_byte,
g_opart_sz_type & g_opart_sz,
size_t g_m,
size_t opt)
{
// if there are no properties skip
// SSendRecvP send everything when we do not give properties
if (sizeof...(prp) != 0)
{
size_t opt_ = compute_options(opt);
if (opt & SKIP_LABELLING)
{
if (opt & RUN_ON_DEVICE)
{
op_ssend_gg_recv_merge_run_device opm(g_m);
v_cl.template SSendRecvP_op<op_ssend_gg_recv_merge_run_device,send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_g_opart,opm,prc_recv_get,recv_sz_get,opt_);
}
else
{
op_ssend_gg_recv_merge opm(g_m);
v_cl.template SSendRecvP_op<op_ssend_gg_recv_merge,send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_g_opart,opm,prc_recv_get,recv_sz_get,opt_);
}
}
else
{v_cl.template SSendRecvP<send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_g_opart,prc_recv_get,recv_sz_get,recv_sz_get_byte,opt_);}
// fill g_opart_sz
g_opart_sz.resize(prc_g_opart.size());
for (size_t i = 0 ; i < prc_g_opart.size() ; i++)
g_opart_sz.get(i) = g_send_prp.get(i).size();
}
}
template<typename Vcluster_type, typename vector_prop_type,
typename vector_pos_type, typename send_pos_vector,
typename prc_recv_get_type, typename prc_g_opart_type,
typename recv_sz_get_type>
static inline void sendrecv_pos(Vcluster_type & v_cl,
openfpm::vector<send_pos_vector> & g_pos_send,
vector_prop_type & v_prp,
vector_pos_type & v_pos,
prc_recv_get_type & prc_recv_get,
recv_sz_get_type & recv_sz_get,
prc_g_opart_type & prc_g_opart,
size_t opt)
{
size_t opt_ = compute_options(opt);
if (opt & SKIP_LABELLING)
{
v_cl.template SSendRecv<send_pos_vector,decltype(v_pos),layout_base>(g_pos_send,v_pos,prc_g_opart,prc_recv_get,recv_sz_get,opt_);
}
else
{
prc_recv_get.clear();
recv_sz_get.clear();
v_cl.template SSendRecv<send_pos_vector,decltype(v_pos),layout_base>(g_pos_send,v_pos,prc_g_opart,prc_recv_get,recv_sz_get,opt_);
}
}
};
template<template<typename> class layout_base, unsigned int ... prp>
struct ghost_exchange_comm_impl<GHOST_ASYNC,layout_base, prp ... >
{
template<typename Vcluster_type, typename vector_prop_type,
typename vector_pos_type, typename send_vector,
typename prc_recv_get_type, typename prc_g_opart_type,
typename recv_sz_get_type, typename recv_sz_get_byte_type,
typename g_opart_sz_type>
static inline void sendrecv_prp(Vcluster_type & v_cl,
openfpm::vector<send_vector> & g_send_prp,
vector_prop_type & v_prp,
vector_pos_type & v_pos,
prc_g_opart_type & prc_g_opart,
prc_recv_get_type & prc_recv_get,
recv_sz_get_type & recv_sz_get,
recv_sz_get_byte_type & recv_sz_get_byte,
g_opart_sz_type & g_opart_sz,
size_t g_m,
size_t opt)
{
// if there are no properties skip
// SSendRecvP send everything when we do not give properties
if (sizeof...(prp) != 0)
{
size_t opt_ = compute_options(opt);
if (opt & SKIP_LABELLING)
{
if (opt & RUN_ON_DEVICE)
{
op_ssend_gg_recv_merge_run_device opm(g_m);
v_cl.template SSendRecvP_opAsync<op_ssend_gg_recv_merge_run_device,send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_g_opart,opm,prc_recv_get,recv_sz_get,opt_);
}
else
{
op_ssend_gg_recv_merge opm(g_m);
v_cl.template SSendRecvP_opAsync<op_ssend_gg_recv_merge,send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_g_opart,opm,prc_recv_get,recv_sz_get,opt_);
}
}
else
{v_cl.template SSendRecvPAsync<send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_g_opart,prc_recv_get,recv_sz_get,recv_sz_get_byte,opt_);}
// fill g_opart_sz
g_opart_sz.resize(prc_g_opart.size());
for (size_t i = 0 ; i < prc_g_opart.size() ; i++)
g_opart_sz.get(i) = g_send_prp.get(i).size();
}
}
template<typename Vcluster_type, typename vector_prop_type,
typename vector_pos_type, typename send_pos_vector,
typename prc_recv_get_type, typename prc_g_opart_type,
typename recv_sz_get_type>
static inline void sendrecv_pos(Vcluster_type & v_cl,
openfpm::vector<send_pos_vector> & g_pos_send,
vector_prop_type & v_prp,
vector_pos_type & v_pos,
prc_recv_get_type & prc_recv_get,
recv_sz_get_type & recv_sz_get,
prc_g_opart_type & prc_g_opart,
size_t opt)
{
size_t opt_ = compute_options(opt);
if (opt & SKIP_LABELLING)
{
v_cl.template SSendRecvAsync<send_pos_vector,decltype(v_pos),layout_base>(g_pos_send,v_pos,prc_g_opart,prc_recv_get,recv_sz_get,opt_);
}
else
{
prc_recv_get.clear();
recv_sz_get.clear();
v_cl.template SSendRecvAsync<send_pos_vector,decltype(v_pos),layout_base>(g_pos_send,v_pos,prc_g_opart,prc_recv_get,recv_sz_get,opt_);
}
}
};
/*! \brief This class is an helper for the communication of vector_dist
*
* \tparam dim Dimensionality of the space where the elements lives
......@@ -1451,7 +1616,7 @@ public:
* \param g_m marker between real and ghost particles
*
*/
template<int ... prp> inline void ghost_get_(openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base> & v_pos,
template<unsigned int impl, int ... prp> inline void ghost_get_(openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base> & v_pos,
openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base> & v_prp,
size_t & g_m,
size_t opt = WITH_POSITION)
......@@ -1490,31 +1655,9 @@ public:
// if there are no properties skip
// SSendRecvP send everything when we do not give properties
if (sizeof...(prp) != 0)
{
size_t opt_ = compute_options(opt);
if (opt & SKIP_LABELLING)
{
if (opt & RUN_ON_DEVICE)
{
op_ssend_gg_recv_merge_run_device opm(g_m);
v_cl.template SSendRecvP_op<op_ssend_gg_recv_merge_run_device,send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_g_opart,opm,prc_recv_get,recv_sz_get,opt_);
}
else
{
op_ssend_gg_recv_merge opm(g_m);
v_cl.template SSendRecvP_op<op_ssend_gg_recv_merge,send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_g_opart,opm,prc_recv_get,recv_sz_get,opt_);
}
}
else
{v_cl.template SSendRecvP<send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_g_opart,prc_recv_get,recv_sz_get,recv_sz_get_byte,opt_);}
// fill g_opart_sz
g_opart_sz.resize(prc_g_opart.size());
for (size_t i = 0 ; i < prc_g_opart.size() ; i++)
g_opart_sz.get(i) = g_send_prp.get(i).size();
}
ghost_exchange_comm_impl<impl,layout_base,prp ...>::template
sendrecv_prp(v_cl,g_send_prp,v_prp,v_pos,prc_g_opart,
prc_recv_get,recv_sz_get,recv_sz_get_byte,g_opart_sz,g_m,opt);
}
if (!(opt & NO_POSITION))
......@@ -1528,17 +1671,9 @@ public:
cudaDeviceSynchronize();
#endif
size_t opt_ = compute_options(opt);
if (opt & SKIP_LABELLING)
{
v_cl.template SSendRecv<send_pos_vector,decltype(v_pos),layout_base>(g_pos_send,v_pos,prc_g_opart,prc_recv_get,recv_sz_get,opt_);
}
else
{
prc_recv_get.clear();
recv_sz_get.clear();
v_cl.template SSendRecv<send_pos_vector,decltype(v_pos),layout_base>(g_pos_send,v_pos,prc_g_opart,prc_recv_get,recv_sz_get,opt_);
}
ghost_exchange_comm_impl<impl,layout_base,prp ...>::template
sendrecv_pos(v_cl,g_pos_send,v_prp,v_pos,prc_recv_get,recv_sz_get,prc_g_opart,opt);
// fill g_opart_sz
g_opart_sz.resize(prc_g_opart.size());
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment