Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
O
openfpm_data
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Locked Files
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
Requirements
Requirements
List
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Security & Compliance
Security & Compliance
Dependency List
License Compliance
Operations
Operations
Environments
Analytics
Analytics
CI / CD
Code Review
Insights
Issue
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
openfpm
openfpm_data
Commits
42183947
Commit
42183947
authored
Sep 12, 2018
by
incardon
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fixing Warning shfl
parent
c46dc01e
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
10 additions
and
11 deletions
+10
-11
src/util/cuda/moderngpu/intrinsics.hxx
src/util/cuda/moderngpu/intrinsics.hxx
+4
-4
src/util/object_si_d.hpp
src/util/object_si_d.hpp
+6
-7
No files found.
src/util/cuda/moderngpu/intrinsics.hxx
View file @
42183947
...
...
@@ -167,7 +167,7 @@ MGPU_DEVICE type_t shfl_down(type_t x, int offset, int width = warp_size) {
u
.
t
=
x
;
iterate
<
num_words
>
([
&
](
int
i
)
{
u
.
x
[
i
]
=
__shfl_down
(
u
.
x
[
i
],
offset
,
width
);
u
.
x
[
i
]
=
__shfl_down
_sync
(
0xFFFFFFFF
,
u
.
x
[
i
],
offset
,
width
);
});
return
u
.
t
;
}
...
...
@@ -176,7 +176,7 @@ template<typename type_t, typename op_t>
MGPU_DEVICE
type_t
shfl_up_op
(
type_t
x
,
int
offset
,
op_t
op
,
int
width
=
warp_size
)
{
type_t
y
=
shfl_up
(
x
,
offset
,
width
);
type_t
y
=
shfl_up
_sync
(
0xFFFFFFFF
,
x
,
offset
,
width
);
int
lane
=
(
width
-
1
)
&
threadIdx
.
x
;
if
(
lane
>=
offset
)
x
=
op
(
x
,
y
);
return
x
;
...
...
@@ -186,7 +186,7 @@ template<typename type_t, typename op_t>
MGPU_DEVICE
type_t
shfl_down_op
(
type_t
x
,
int
offset
,
op_t
op
,
int
width
=
warp_size
)
{
type_t
y
=
shfl_down
(
x
,
offset
,
width
);
type_t
y
=
shfl_down
_sync
(
0xFFFFFFFF
,
x
,
offset
,
width
);
int
lane
=
(
width
-
1
)
&
threadIdx
.
x
;
if
(
lane
<
width
-
offset
)
x
=
op
(
x
,
y
);
return
x
;
...
...
@@ -200,7 +200,7 @@ MGPU_DEVICE inline c_type shfl_##dir##_op(c_type x, int offset, \
asm( \
"{.reg ."#ptx_type" r0;" \
".reg .pred p;" \
"shfl.
"#dir".b32 r0|p, %1, %2, %3
;" \
"shfl.
sync."#dir".b32 r0|p, %1, %2, %3,0xFFFFFFFF
;" \
"@p "#ptx_op"."#ptx_type" r0, r0, %4;" \
"mov."#ptx_type" %0, r0; }" \
: "="#r(result) : #r(x), "r"(offset), "r"(mask), #r(x)); \
...
...
src/util/object_si_d.hpp
View file @
42183947
...
...
@@ -41,16 +41,15 @@ struct object_si_d_e
* \param dst destination object
*
*/
object_si_d_e
(
const
v_src
&
src
,
v_dst
&
dst
)
__device__
__host__
object_si_d_e
(
const
v_src
&
src
,
v_dst
&
dst
)
:
src
(
src
),
dst
(
dst
)
{
};
//! It call the functor for each member
template
<
typename
T
>
void
operator
()(
T
&
t
)
__device__
__host__
void
operator
()(
T
&
t
)
{
// typedef typename boost::mpl::at<typename v_dst::type,typename boost::mpl::int_<T::value>>::type dtype;
typedef
decltype
(
src
.
template
get
<
boost
::
mpl
::
at
<
v_prp
,
boost
::
mpl
::
int_
<
T
::
value
>
>::
type
::
value
>
())
stype
;
// In case of layout switch use this
...
...
@@ -167,13 +166,13 @@ struct object_si_d<v_src,v_dst,OBJ_NORMAL,prp...>
template
<
typename
v_src
,
typename
v_dst
,
int
...
prp
>
struct
object_si_d
<
v_src
,
v_dst
,
OBJ_ENCAP
,
prp
...
>
{
inline
object_si_d
(
const
v_src
&&
vs
,
v_dst
&&
vd
)
__device__
__host__
inline
object_si_d
(
const
v_src
&&
vs
,
v_dst
&&
vd
)
{
object_si_d_e
<
v_src
,
v_dst
,
prp
...
>
obj
(
vs
,
vd
);
boost
::
mpl
::
for_each_ref
<
boost
::
mpl
::
range_c
<
int
,
0
,
v_dst
::
max_prop
>
>
(
obj
);
}
inline
object_si_d
(
const
v_src
&
vs
,
v_dst
&
vd
)
__device__
__host__
inline
object_si_d
(
const
v_src
&
vs
,
v_dst
&
vd
)
{
object_si_d_e
<
v_src
,
v_dst
,
prp
...
>
obj
(
vs
,
vd
);
boost
::
mpl
::
for_each_ref
<
boost
::
mpl
::
range_c
<
int
,
0
,
v_dst
::
max_prop
>
>
(
obj
);
...
...
@@ -183,11 +182,11 @@ struct object_si_d<v_src,v_dst,OBJ_ENCAP,prp...>
template
<
typename
v_src
,
typename
v_dst
>
struct
object_si_d
<
v_src
,
v_dst
,
OBJ_ENCAP
>
{
inline
object_si_d
(
const
v_src
&&
vs
,
v_dst
&&
vd
)
__device__
__host__
inline
object_si_d
(
const
v_src
&&
vs
,
v_dst
&&
vd
)
{
}
inline
object_si_d
(
const
v_src
&
vs
,
v_dst
&
vd
)
__device__
__host__
inline
object_si_d
(
const
v_src
&
vs
,
v_dst
&
vd
)
{
}
};
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment