diff --git a/src/NN/CellList/CellList_util.hpp b/src/NN/CellList/CellList_util.hpp index d16d6105a4206b3f5c079a87d58c66a16ed5c024..62dfa94c45cfb2f41a1bd97031888f4d37ea5aee 100644 --- a/src/NN/CellList/CellList_util.hpp +++ b/src/NN/CellList/CellList_util.hpp @@ -49,8 +49,8 @@ struct populate_cell_list_no_sym_impl template class layout_base , typename CellList> static void populate(openfpm::vector,Memory,typename layout_base>::type,layout_base > & pos, openfpm::vector,Memory,typename layout_base>::type,layout_base > & v_pos_out, - openfpm::vector::type,layout_base > & v_prp_out, openfpm::vector::type,layout_base > & v_prp, + openfpm::vector::type,layout_base > & v_prp_out, CellList & cli, size_t g_m) { @@ -69,8 +69,8 @@ struct populate_cell_list_no_sym_impl template class layout_base , typename CellList> static void populate(openfpm::vector,Memory,typename layout_base>::type,layout_base > & pos, openfpm::vector,Memory,typename layout_base>::type,layout_base > & v_pos_out, - openfpm::vector::type,layout_base > & v_prp_out, openfpm::vector::type,layout_base > & v_prp, + openfpm::vector::type,layout_base > & v_prp_out, CellList & cli, size_t g_m) { @@ -130,12 +130,12 @@ struct populate_cell_list_sym_impl template class layout_base , typename CellList> void populate_cell_list_no_sym(openfpm::vector,Memory,typename layout_base>::type,layout_base > & pos, openfpm::vector,Memory,typename layout_base>::type,layout_base > & v_pos_out, - openfpm::vector::type,layout_base > & v_prp_out, openfpm::vector::type,layout_base > & v_prp, + openfpm::vector::type,layout_base > & v_prp_out, CellList & cli, size_t g_m) { - populate_cell_list_no_sym_impl::value>::populate(pos,v_pos_out,v_prp_out,v_prp,cli,g_m); + populate_cell_list_no_sym_impl::value>::populate(pos,v_pos_out,v_prp,v_prp_out,cli,g_m); } /*! \brief populate the Cell-list with particles symmetric case @@ -172,14 +172,14 @@ void populate_cell_list_sym(openfpm::vector,Memory,typename layout_ template class layout_base, typename CellList> void populate_cell_list(openfpm::vector,Memory,typename layout_base>::type,layout_base> & pos, openfpm::vector,Memory,typename layout_base>::type,layout_base > & v_pos_out, - openfpm::vector::type,layout_base > & v_prp_out, openfpm::vector::type,layout_base > & v_prp, + openfpm::vector::type,layout_base > & v_prp_out, CellList & cli, size_t g_m, size_t opt) { if (opt == CL_NON_SYMMETRIC) - {populate_cell_list_no_sym(pos,v_pos_out,v_prp_out,v_prp,cli,g_m);} + {populate_cell_list_no_sym(pos,v_pos_out,v_prp,v_prp_out,cli,g_m);} else {populate_cell_list_sym(pos,cli,g_m);} } diff --git a/src/NN/CellList/cuda/CellList_gpu.hpp b/src/NN/CellList/cuda/CellList_gpu.hpp index 18699470af450a9ec2baa7f596ee3ebb82cdf2e8..e1047560fa96b7f7b57dbc5b79c1fe0b87f2df22 100644 --- a/src/NN/CellList/cuda/CellList_gpu.hpp +++ b/src/NN/CellList/cuda/CellList_gpu.hpp @@ -81,9 +81,20 @@ public: */ CellList_gpu(const CellList_gpu & clg) { + cl_n = clg.cl_n; + cells = clg.cells; + starts = clg.starts; + part_ids = clg.part_ids; + sorted_to_not_sorted = clg.sorted_to_not_sorted; + + spacing_c = clg.spacing_c; + div_c = clg.div_c; + off = clg.off; + g_m = clg.g_m; + n_dec = clg.n_dec; } - /*! \brief Copy constructor + /*! \brief Copy constructor from temporal * * * @@ -99,6 +110,8 @@ public: spacing_c = clg.spacing_c; div_c = clg.div_c; off = clg.off; + g_m = clg.g_m; + n_dec = clg.n_dec; } CellList_gpu(const Box & box, const size_t (&div)[dim], const size_t pad = 1) @@ -210,6 +223,7 @@ public: sorted_to_not_sorted.toKernel(), static_cast(cells.template getDeviceBuffer<0>())); + #else std::cout << "Error: " << __FILE__ << ":" << __LINE__ << " you are calling CellList_gpu.construct() this function is suppose must be compiled with NVCC compiler, but it look like has been compiled by the standard system compiler" << std::endl; diff --git a/src/util/copy_compare/copy_fusion_vector.hpp b/src/util/copy_compare/copy_fusion_vector.hpp index ccf34a6749f37638a47dd19963f38d0e8f273859..05c5ca4d396903fe9943ce7391eeb666145db9d9 100644 --- a/src/util/copy_compare/copy_fusion_vector.hpp +++ b/src/util/copy_compare/copy_fusion_vector.hpp @@ -30,7 +30,7 @@ struct copy_fusion_vector * \param dst destination fusion vector * */ - __device__ inline copy_fusion_vector(const bfv & src, bfv & dst) + __device__ __host__ inline copy_fusion_vector(const bfv & src, bfv & dst) :src(src),dst(dst){}; #ifdef SE_CLASS1 @@ -47,7 +47,7 @@ struct copy_fusion_vector //! It call the copy function for each property template - __device__ inline void operator()(T& t) + __device__ __host__ inline void operator()(T& t) { // This is the type of the object we have to copy typedef typename boost::fusion::result_of::at_c::type copy_type; diff --git a/src/util/cuda/moderngpu/intrinsics.hxx b/src/util/cuda/moderngpu/intrinsics.hxx index d9fd30628899f852dc37d0ef0d754dd5b6372355..c0aa101916b075137713abc0fc91528e6cb95810 100644 --- a/src/util/cuda/moderngpu/intrinsics.hxx +++ b/src/util/cuda/moderngpu/intrinsics.hxx @@ -241,8 +241,8 @@ MGPU_DEVICE inline c_type shfl_##dir##_op(c_type x, int offset, \ ".reg .u32 hi;" \ ".reg .pred p;" \ "mov.b64 {lo, hi}, %1;" \ - "shfl."#dir".b32 lo|p, lo, %2, %3;" \ - "shfl."#dir".b32 hi , hi, %2, %3;" \ + "shfl.sync."#dir".b32 lo|p, lo, %2, %3,0xFFFFFFFF;" \ + "shfl.sync."#dir".b32 hi , hi, %2, %3,0xFFFFFFFF;" \ "mov.b64 r0, {lo, hi};" \ "@p "#ptx_op"."#ptx_type" r0, r0, %4;" \ "mov."#ptx_type" %0, r0; }" \