Commit 42183947 authored by incardon's avatar incardon

Fixing Warning shfl

parent c46dc01e
...@@ -167,7 +167,7 @@ MGPU_DEVICE type_t shfl_down(type_t x, int offset, int width = warp_size) { ...@@ -167,7 +167,7 @@ MGPU_DEVICE type_t shfl_down(type_t x, int offset, int width = warp_size) {
u.t = x; u.t = x;
iterate<num_words>([&](int i) { iterate<num_words>([&](int i) {
u.x[i] = __shfl_down(u.x[i], offset, width); u.x[i] = __shfl_down_sync(0xFFFFFFFF,u.x[i], offset, width);
}); });
return u.t; return u.t;
} }
...@@ -176,7 +176,7 @@ template<typename type_t, typename op_t> ...@@ -176,7 +176,7 @@ template<typename type_t, typename op_t>
MGPU_DEVICE type_t shfl_up_op(type_t x, int offset, op_t op, MGPU_DEVICE type_t shfl_up_op(type_t x, int offset, op_t op,
int width = warp_size) { int width = warp_size) {
type_t y = shfl_up(x, offset, width); type_t y = shfl_up_sync(0xFFFFFFFF,x, offset, width);
int lane = (width - 1) & threadIdx.x; int lane = (width - 1) & threadIdx.x;
if(lane >= offset) x = op(x, y); if(lane >= offset) x = op(x, y);
return x; return x;
...@@ -186,7 +186,7 @@ template<typename type_t, typename op_t> ...@@ -186,7 +186,7 @@ template<typename type_t, typename op_t>
MGPU_DEVICE type_t shfl_down_op(type_t x, int offset, op_t op, MGPU_DEVICE type_t shfl_down_op(type_t x, int offset, op_t op,
int width = warp_size) { int width = warp_size) {
type_t y = shfl_down(x, offset, width); type_t y = shfl_down_sync(0xFFFFFFFF,x, offset, width);
int lane = (width - 1) & threadIdx.x; int lane = (width - 1) & threadIdx.x;
if(lane < width - offset) x = op(x, y); if(lane < width - offset) x = op(x, y);
return x; return x;
...@@ -200,7 +200,7 @@ MGPU_DEVICE inline c_type shfl_##dir##_op(c_type x, int offset, \ ...@@ -200,7 +200,7 @@ MGPU_DEVICE inline c_type shfl_##dir##_op(c_type x, int offset, \
asm( \ asm( \
"{.reg ."#ptx_type" r0;" \ "{.reg ."#ptx_type" r0;" \
".reg .pred p;" \ ".reg .pred p;" \
"shfl."#dir".b32 r0|p, %1, %2, %3;" \ "shfl.sync."#dir".b32 r0|p, %1, %2, %3,0xFFFFFFFF;" \
"@p "#ptx_op"."#ptx_type" r0, r0, %4;" \ "@p "#ptx_op"."#ptx_type" r0, r0, %4;" \
"mov."#ptx_type" %0, r0; }" \ "mov."#ptx_type" %0, r0; }" \
: "="#r(result) : #r(x), "r"(offset), "r"(mask), #r(x)); \ : "="#r(result) : #r(x), "r"(offset), "r"(mask), #r(x)); \
......
...@@ -41,16 +41,15 @@ struct object_si_d_e ...@@ -41,16 +41,15 @@ struct object_si_d_e
* \param dst destination object * \param dst destination object
* *
*/ */
object_si_d_e(const v_src & src, v_dst & dst) __device__ __host__ object_si_d_e(const v_src & src, v_dst & dst)
:src(src),dst(dst) :src(src),dst(dst)
{ {
}; };
//! It call the functor for each member //! It call the functor for each member
template<typename T> template<typename T>
void operator()(T& t) __device__ __host__ void operator()(T& t)
{ {
// typedef typename boost::mpl::at<typename v_dst::type,typename boost::mpl::int_<T::value>>::type dtype;
typedef decltype(src.template get<boost::mpl::at<v_prp,boost::mpl::int_<T::value>>::type::value>()) stype; typedef decltype(src.template get<boost::mpl::at<v_prp,boost::mpl::int_<T::value>>::type::value>()) stype;
// In case of layout switch use this // In case of layout switch use this
...@@ -167,13 +166,13 @@ struct object_si_d<v_src,v_dst,OBJ_NORMAL,prp...> ...@@ -167,13 +166,13 @@ struct object_si_d<v_src,v_dst,OBJ_NORMAL,prp...>
template<typename v_src, typename v_dst, int... prp> template<typename v_src, typename v_dst, int... prp>
struct object_si_d<v_src,v_dst,OBJ_ENCAP,prp...> struct object_si_d<v_src,v_dst,OBJ_ENCAP,prp...>
{ {
inline object_si_d(const v_src && vs, v_dst && vd) __device__ __host__ inline object_si_d(const v_src && vs, v_dst && vd)
{ {
object_si_d_e<v_src,v_dst,prp...> obj(vs,vd); object_si_d_e<v_src,v_dst,prp...> obj(vs,vd);
boost::mpl::for_each_ref< boost::mpl::range_c<int,0,v_dst::max_prop> >(obj); boost::mpl::for_each_ref< boost::mpl::range_c<int,0,v_dst::max_prop> >(obj);
} }
inline object_si_d(const v_src & vs, v_dst & vd) __device__ __host__ inline object_si_d(const v_src & vs, v_dst & vd)
{ {
object_si_d_e<v_src,v_dst,prp...> obj(vs,vd); object_si_d_e<v_src,v_dst,prp...> obj(vs,vd);
boost::mpl::for_each_ref< boost::mpl::range_c<int,0,v_dst::max_prop> >(obj); boost::mpl::for_each_ref< boost::mpl::range_c<int,0,v_dst::max_prop> >(obj);
...@@ -183,11 +182,11 @@ struct object_si_d<v_src,v_dst,OBJ_ENCAP,prp...> ...@@ -183,11 +182,11 @@ struct object_si_d<v_src,v_dst,OBJ_ENCAP,prp...>
template<typename v_src, typename v_dst> template<typename v_src, typename v_dst>
struct object_si_d<v_src,v_dst,OBJ_ENCAP> struct object_si_d<v_src,v_dst,OBJ_ENCAP>
{ {
inline object_si_d(const v_src && vs, v_dst && vd) __device__ __host__ inline object_si_d(const v_src && vs, v_dst && vd)
{ {
} }
inline object_si_d(const v_src & vs, v_dst & vd) __device__ __host__ inline object_si_d(const v_src & vs, v_dst & vd)
{ {
} }
}; };
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment