Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
O
openfpm_pdata
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Locked Files
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
Requirements
Requirements
List
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Security & Compliance
Security & Compliance
Dependency List
License Compliance
Operations
Operations
Environments
Packages & Registries
Packages & Registries
Package Registry
Container Registry
Analytics
Analytics
CI / CD
Code Review
Insights
Issue
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
argupta
openfpm_pdata
Commits
b079452e
Commit
b079452e
authored
Jan 04, 2019
by
incardon
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Latest modules
parent
560035f5
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
49 additions
and
48 deletions
+49
-48
openfpm_data
openfpm_data
+1
-1
src/Vector/cuda/vector_dist_comm_util_funcs.cuh
src/Vector/cuda/vector_dist_comm_util_funcs.cuh
+19
-18
src/Vector/cuda/vector_dist_cuda_funcs.cuh
src/Vector/cuda/vector_dist_cuda_funcs.cuh
+4
-4
src/Vector/vector_dist.hpp
src/Vector/vector_dist.hpp
+6
-6
src/Vector/vector_dist_comm.hpp
src/Vector/vector_dist_comm.hpp
+19
-19
No files found.
openfpm_data
@
8d576118
Subproject commit
77fc4055f2dc0ddf5e759277fd6d60c57a3af791
Subproject commit
8d5761180cd6f396c5e819df13b83ac62592754e
src/Vector/cuda/vector_dist_comm_util_funcs.cuh
View file @
b079452e
...
...
@@ -87,9 +87,9 @@ struct labelParticlesGhost_impl<dim,St,prop,Memory,layout_base,Decomposition,sca
{
return
;}
// First we have to see how many entry each particle produce
num_proc_ghost_each_part
<
dim
,
St
,
decltype
(
dec
.
toKernel
()),
decltype
(
v_pos
.
toKernel
()),
decltype
(
proc_id_out
.
toKernel
())
>
<<<
ite
.
wthr
,
ite
.
thr
>>>
(
dec
.
toKernel
(),
v_pos
.
toKernel
(),
proc_id_out
.
toKernel
());
CUDA_LAUNCH
((
num_proc_ghost_each_part
<
dim
,
St
,
decltype
(
dec
.
toKernel
()),
decltype
(
v_pos
.
toKernel
()),
decltype
(
proc_id_out
.
toKernel
())
>
),
ite
.
wthr
,
ite
.
thr
,
dec
.
toKernel
(),
v_pos
.
toKernel
(),
proc_id_out
.
toKernel
());
// scan
sc
.
scan_
(
proc_id_out
,
starts
);
...
...
@@ -104,9 +104,9 @@ struct labelParticlesGhost_impl<dim,St,prop,Memory,layout_base,Decomposition,sca
ite
=
v_pos
.
getGPUIterator
();
// we compute processor id for each particle
proc_label_id_ghost
<
dim
,
St
,
decltype
(
dec
.
toKernel
()),
decltype
(
v_pos
.
toKernel
()),
decltype
(
starts
.
toKernel
()),
decltype
(
g_opart_device
.
toKernel
())
>
<<<
ite
.
wthr
,
ite
.
thr
>>>
(
dec
.
toKernel
(),
v_pos
.
toKernel
(),
starts
.
toKernel
(),
g_opart_device
.
toKernel
());
CUDA_LAUNCH
((
proc_label_id_ghost
<
dim
,
St
,
decltype
(
dec
.
toKernel
()),
decltype
(
v_pos
.
toKernel
()),
decltype
(
starts
.
toKernel
()),
decltype
(
g_opart_device
.
toKernel
())
>
),
ite
.
wthr
,
ite
.
thr
,
dec
.
toKernel
(),
v_pos
.
toKernel
(),
starts
.
toKernel
(),
g_opart_device
.
toKernel
());
// sort particles
mergesort
((
int
*
)
g_opart_device
.
template
getDeviceBuffer
<
0
>(),(
long
unsigned
int
*
)
g_opart_device
.
template
getDeviceBuffer
<
1
>(),
g_opart_device
.
size
(),
mgpu
::
template
less_t
<
int
>(),
v_cl
.
getmgpuContext
());
...
...
@@ -118,8 +118,9 @@ struct labelParticlesGhost_impl<dim,St,prop,Memory,layout_base,Decomposition,sca
ite
=
g_opart_device
.
getGPUIterator
();
// Find the buffer bases
find_buffer_offsets
<
0
,
decltype
(
g_opart_device
.
toKernel
()),
decltype
(
prc_offset
.
toKernel
())
><<<
ite
.
wthr
,
ite
.
thr
>>>
(
g_opart_device
.
toKernel
(),(
int
*
)
mem
.
getDevicePointer
(),
prc_offset
.
toKernel
());
CUDA_LAUNCH
((
find_buffer_offsets
<
0
,
decltype
(
g_opart_device
.
toKernel
()),
decltype
(
prc_offset
.
toKernel
())
>
),
ite
.
wthr
,
ite
.
thr
,
g_opart_device
.
toKernel
(),(
int
*
)
mem
.
getDevicePointer
(),
prc_offset
.
toKernel
());
// Trasfer the number of offsets on CPU
mem
.
deviceToHost
();
...
...
@@ -207,9 +208,9 @@ struct local_ghost_from_opart_impl<with_pos,dim,St,prop,Memory,layout_base,true>
if
(
ite
.
wthr
.
x
!=
0
)
{
process_ghost_particles_local
<
with_pos
,
dim
,
decltype
(
o_part_loc
.
toKernel
()),
decltype
(
v_pos
.
toKernel
()),
decltype
(
v_prp
.
toKernel
()),
decltype
(
shifts
.
toKernel
())
>
<<<
ite
.
wthr
,
ite
.
thr
>>>
(
o_part_loc
.
toKernel
(),
v_pos
.
toKernel
(),
v_prp
.
toKernel
(),
shifts
.
toKernel
(),
old
);
CUDA_LAUNCH
((
process_ghost_particles_local
<
with_pos
,
dim
,
decltype
(
o_part_loc
.
toKernel
()),
decltype
(
v_pos
.
toKernel
()),
decltype
(
v_prp
.
toKernel
()),
decltype
(
shifts
.
toKernel
())
>
),
ite
.
wthr
,
ite
.
thr
,
o_part_loc
.
toKernel
(),
v_pos
.
toKernel
(),
v_prp
.
toKernel
(),
shifts
.
toKernel
(),
old
);
}
#else
std
::
cout
<<
__FILE__
<<
":"
<<
__LINE__
<<
" error: to use the option RUN_ON_DEVICE you must compile with NVCC"
<<
std
::
endl
;
...
...
@@ -260,9 +261,9 @@ struct local_ghost_from_dec_impl<dim,St,prop,Memory,layout_base,true>
auto
ite
=
v_pos
.
getGPUIteratorTo
(
g_m
);
// label particle processor
num_shift_ghost_each_part
<
dim
,
St
,
decltype
(
box_f_dev
.
toKernel
()),
decltype
(
box_f_sv
.
toKernel
()),
decltype
(
v_pos
.
toKernel
()),
decltype
(
o_part_loc
.
toKernel
())
>
<<<
ite
.
wthr
,
ite
.
thr
>>>
(
box_f_dev
.
toKernel
(),
box_f_sv
.
toKernel
(),
v_pos
.
toKernel
(),
o_part_loc
.
toKernel
(),
g_m
);
CUDA_LAUNCH
((
num_shift_ghost_each_part
<
dim
,
St
,
decltype
(
box_f_dev
.
toKernel
()),
decltype
(
box_f_sv
.
toKernel
()),
decltype
(
v_pos
.
toKernel
()),
decltype
(
o_part_loc
.
toKernel
())
>
),
ite
.
wthr
,
ite
.
thr
,
box_f_dev
.
toKernel
(),
box_f_sv
.
toKernel
(),
v_pos
.
toKernel
(),
o_part_loc
.
toKernel
(),
g_m
);
starts
.
resize
(
o_part_loc
.
size
());
mgpu
::
scan
((
unsigned
int
*
)
o_part_loc
.
template
getDeviceBuffer
<
0
>(),
o_part_loc
.
size
(),
(
unsigned
int
*
)
starts
.
template
getDeviceBuffer
<
0
>()
,
v_cl
.
getmgpuContext
());
...
...
@@ -280,12 +281,12 @@ struct local_ghost_from_dec_impl<dim,St,prop,Memory,layout_base,true>
// resize o_part_loc
o_part_loc
.
resize
(
total
);
shift_ghost_each_part
<
dim
,
St
,
decltype
(
box_f_dev
.
toKernel
()),
decltype
(
box_f_sv
.
toKernel
()),
CUDA_LAUNCH
((
shift_ghost_each_part
<
dim
,
St
,
decltype
(
box_f_dev
.
toKernel
()),
decltype
(
box_f_sv
.
toKernel
()),
decltype
(
v_pos
.
toKernel
()),
decltype
(
v_prp
.
toKernel
()),
decltype
(
starts
.
toKernel
()),
decltype
(
shifts
.
toKernel
()),
decltype
(
o_part_loc
.
toKernel
())
>
<<<
ite
.
wthr
,
ite
.
thr
>>>
(
box_f_dev
.
toKernel
(),
box_f_sv
.
toKernel
(),
decltype
(
o_part_loc
.
toKernel
())
>
),
ite
.
wthr
,
ite
.
thr
,
box_f_dev
.
toKernel
(),
box_f_sv
.
toKernel
(),
v_pos
.
toKernel
(),
v_prp
.
toKernel
(),
starts
.
toKernel
(),
shifts
.
toKernel
(),
o_part_loc
.
toKernel
(),
old
,
g_m
);
...
...
src/Vector/cuda/vector_dist_cuda_funcs.cuh
View file @
b079452e
...
...
@@ -353,7 +353,7 @@ void remove_marked(vector_type & vd)
auto
ite
=
idx
.
getGPUIterator
();
create_index
<<<
ite
.
wthr
,
ite
.
thr
>>>
(
idx
.
toKernel
());
CUDA_LAUNCH
(
create_index
,
ite
.
wthr
,
ite
.
thr
,
idx
.
toKernel
());
// sort particles, so the particles to remove stay at the end
mergesort
((
remove_type
*
)
vd
.
getPropVector
().
template
getDeviceBuffer
<
prp
>(),(
unsigned
int
*
)
idx
.
template
getDeviceBuffer
<
0
>(),
idx
.
size
(),
mgpu
::
template
less_t
<
remove_type
>(),
vd
.
getVC
().
getmgpuContext
());
...
...
@@ -366,8 +366,8 @@ void remove_marked(vector_type & vd)
mem
.
fill
(
0
);
// mark point, particle that stay and to remove
find_buffer_offsets_no_prc
<
prp
,
decltype
(
vd
.
getPropVector
().
toKernel
()),
decltype
(
mark
.
toKernel
())
><<<
ite
.
wthr
,
ite
.
thr
>>>
(
vd
.
getPropVector
().
toKernel
(),(
int
*
)
mem
.
getDevicePointer
(),
mark
.
toKernel
(),
vd
.
size_local
());
CUDA_LAUNCH
((
find_buffer_offsets_no_prc
<
prp
,
decltype
(
vd
.
getPropVector
().
toKernel
()),
decltype
(
mark
.
toKernel
())
>
),
ite
.
wthr
,
ite
.
thr
,
vd
.
getPropVector
().
toKernel
(),(
int
*
)
mem
.
getDevicePointer
(),
mark
.
toKernel
(),
vd
.
size_local
());
mem
.
deviceToHost
();
...
...
@@ -401,7 +401,7 @@ void remove_marked(vector_type & vd)
ite
=
vd_pos_old
.
getGPUIterator
();
copy_new_to_old
<
vector_type
::
dims
><<<
ite
.
wthr
,
ite
.
thr
>>>
(
vd_pos_new
.
toKernel
(),
vd_prp_new
.
toKernel
(),
vd_pos_old
.
toKernel
(),
vd_prp_old
.
toKernel
(),
idx
.
toKernel
());
CUDA_LAUNCH
((
copy_new_to_old
<
vector_type
::
dims
>
),
ite
.
wthr
,
ite
.
thr
,
vd_pos_new
.
toKernel
(),
vd_prp_new
.
toKernel
(),
vd_pos_old
.
toKernel
(),
vd_prp_old
.
toKernel
(),
idx
.
toKernel
());
// and we swap
...
...
src/Vector/vector_dist.hpp
View file @
b079452e
...
...
@@ -1907,9 +1907,9 @@ public:
auto
ite
=
v_pos
.
getGPUIteratorTo
(
g_m
,
n_thr
);
merge_sort_part
<
false
,
decltype
(
v_pos
.
toKernel
()),
decltype
(
v_prp
.
toKernel
()),
decltype
(
cl
.
getNonSortToSort
().
toKernel
()),
prp
...
>
<<<
ite
.
wthr
,
ite
.
thr
>>>
(
v_pos
.
toKernel
(),
v_prp
.
toKernel
(),
v_pos_out
.
toKernel
(),
v_prp_out
.
toKernel
(),
cl
.
getNonSortToSort
().
toKernel
());
CUDA_LAUNCH
((
merge_sort_part
<
false
,
decltype
(
v_pos
.
toKernel
()),
decltype
(
v_prp
.
toKernel
()),
decltype
(
cl
.
getNonSortToSort
().
toKernel
()),
prp
...
>
),
ite
.
wthr
,
ite
.
thr
,
v_pos
.
toKernel
(),
v_prp
.
toKernel
(),
v_pos_out
.
toKernel
(),
v_prp_out
.
toKernel
(),
cl
.
getNonSortToSort
().
toKernel
());
#endif
}
...
...
@@ -1990,9 +1990,9 @@ public:
auto
ite
=
v_pos
.
getGPUIteratorTo
(
g_m
,
n_thr
);
merge_sort_part
<
true
,
decltype
(
v_pos
.
toKernel
()),
decltype
(
v_prp
.
toKernel
()),
decltype
(
cl
.
getNonSortedToSorted
().
toKernel
()),
prp
...
>
<<<
ite
.
wthr
,
ite
.
thr
>>>
(
v_pos
.
toKernel
(),
v_prp
.
toKernel
(),
v_pos_out
.
toKernel
(),
v_prp_out
.
toKernel
(),
cl
.
getNonSortedToSorted
().
toKernel
());
CUDA_LAUNCH
((
merge_sort_part
<
true
,
decltype
(
v_pos
.
toKernel
()),
decltype
(
v_prp
.
toKernel
()),
decltype
(
cl
.
getNonSortedToSorted
().
toKernel
()),
prp
...
>
),
ite
.
wthr
,
ite
.
thr
,
v_pos
.
toKernel
(),
v_prp
.
toKernel
(),
v_pos_out
.
toKernel
(),
v_prp_out
.
toKernel
(),
cl
.
getNonSortedToSorted
().
toKernel
());
#endif
}
...
...
src/Vector/vector_dist_comm.hpp
View file @
b079452e
...
...
@@ -597,9 +597,9 @@ class vector_dist_comm
{
auto
ite
=
g_pos_send
.
get
(
i
).
getGPUIterator
();
process_ghost_particles_pos
<
dim
,
decltype
(
g_opart_device
.
toKernel
()),
decltype
(
g_pos_send
.
get
(
i
).
toKernel
()),
decltype
(
v_pos
.
toKernel
()),
decltype
(
shifts
.
toKernel
())
>
<<<
ite
.
wthr
,
ite
.
thr
>>>
(
g_opart_device
.
toKernel
(),
g_pos_send
.
get
(
i
).
toKernel
(),
CUDA_LAUNCH
((
process_ghost_particles_pos
<
dim
,
decltype
(
g_opart_device
.
toKernel
()),
decltype
(
g_pos_send
.
get
(
i
).
toKernel
()),
decltype
(
v_pos
.
toKernel
()),
decltype
(
shifts
.
toKernel
())
>
),
ite
.
wthr
,
ite
.
thr
,
g_opart_device
.
toKernel
(),
g_pos_send
.
get
(
i
).
toKernel
(),
v_pos
.
toKernel
(),
shifts
.
toKernel
(),
offset
);
offset
+=
prc_sz
.
get
(
i
);
...
...
@@ -824,9 +824,9 @@ class vector_dist_comm
{
auto
ite
=
g_send_prp
.
get
(
i
).
getGPUIterator
();
process_ghost_particles_prp
<
decltype
(
g_opart_device
.
toKernel
()),
decltype
(
g_send_prp
.
get
(
i
).
toKernel
()),
decltype
(
v_prp
.
toKernel
()),
prp
...
>
<<<
ite
.
wthr
,
ite
.
thr
>>>
(
g_opart_device
.
toKernel
(),
g_send_prp
.
get
(
i
).
toKernel
(),
CUDA_LAUNCH
((
process_ghost_particles_prp
<
decltype
(
g_opart_device
.
toKernel
()),
decltype
(
g_send_prp
.
get
(
i
).
toKernel
()),
decltype
(
v_prp
.
toKernel
()),
prp
...
>
),
ite
.
wthr
,
ite
.
thr
,
g_opart_device
.
toKernel
(),
g_send_prp
.
get
(
i
).
toKernel
(),
v_prp
.
toKernel
(),
offset
);
offset
+=
prc_sz
.
get
(
i
);
...
...
@@ -949,10 +949,10 @@ class vector_dist_comm
if
(
ite
.
wthr
.
x
!=
0
)
{
// fill v_pos_tmp and v_prp_tmp with local particles
process_map_particles
<
decltype
(
m_opart
.
toKernel
()),
decltype
(
v_pos_tmp
.
toKernel
()),
decltype
(
v_prp_tmp
.
toKernel
()),
decltype
(
v_pos
.
toKernel
()),
decltype
(
v_prp
.
toKernel
())
>
<<<
ite
.
wthr
,
ite
.
thr
>>>
(
m_opart
.
toKernel
(),
v_pos_tmp
.
toKernel
(),
v_prp_tmp
.
toKernel
(),
CUDA_LAUNCH
((
process_map_particles
<
decltype
(
m_opart
.
toKernel
()),
decltype
(
v_pos_tmp
.
toKernel
()),
decltype
(
v_prp_tmp
.
toKernel
()),
decltype
(
v_pos
.
toKernel
()),
decltype
(
v_prp
.
toKernel
())
>
),
ite
.
wthr
,
ite
.
thr
,
m_opart
.
toKernel
(),
v_pos_tmp
.
toKernel
(),
v_prp_tmp
.
toKernel
(),
v_pos
.
toKernel
(),
v_prp
.
toKernel
(),
offset
);
}
...
...
@@ -967,10 +967,10 @@ class vector_dist_comm
if
(
ite
.
wthr
.
x
!=
0
)
{
process_map_particles
<
decltype
(
m_opart
.
toKernel
()),
decltype
(
m_pos
.
get
(
i
).
toKernel
()),
decltype
(
m_prp
.
get
(
i
).
toKernel
()),
decltype
(
v_pos
.
toKernel
()),
decltype
(
v_prp
.
toKernel
())
>
<<<
ite
.
wthr
,
ite
.
thr
>>>
(
m_opart
.
toKernel
(),
m_pos
.
get
(
i
).
toKernel
(),
m_prp
.
get
(
i
).
toKernel
(),
CUDA_LAUNCH
((
process_map_particles
<
decltype
(
m_opart
.
toKernel
()),
decltype
(
m_pos
.
get
(
i
).
toKernel
()),
decltype
(
m_prp
.
get
(
i
).
toKernel
()),
decltype
(
v_pos
.
toKernel
()),
decltype
(
v_prp
.
toKernel
())
>
),
ite
.
wthr
,
ite
.
thr
,
m_opart
.
toKernel
(),
m_pos
.
get
(
i
).
toKernel
(),
m_prp
.
get
(
i
).
toKernel
(),
v_pos
.
toKernel
(),
v_prp
.
toKernel
(),
offset
);
}
...
...
@@ -1099,15 +1099,15 @@ class vector_dist_comm
for
(
size_t
i
=
0
;
i
<
dim
;
i
++
)
{
bc
.
bc
[
i
]
=
dec
.
periodicity
(
i
);}
apply_bc_each_part
<
dim
,
St
,
decltype
(
v_pos
.
toKernel
())
><<<
ite
.
wthr
,
ite
.
thr
>>>
(
dec
.
getDomain
(),
bc
,
v_pos
.
toKernel
());
CUDA_LAUNCH
((
apply_bc_each_part
<
dim
,
St
,
decltype
(
v_pos
.
toKernel
())
>
),
ite
.
wthr
,
ite
.
thr
,
dec
.
getDomain
(),
bc
,
v_pos
.
toKernel
());
return
;
}
// label particle processor
process_id_proc_each_part
<
dim
,
St
,
decltype
(
dec
.
toKernel
()),
decltype
(
v_pos
.
toKernel
()),
decltype
(
lbl_p
.
toKernel
()),
decltype
(
prc_sz
.
toKernel
())
>
<<<
ite
.
wthr
,
ite
.
thr
>>>
(
dec
.
toKernel
(),
v_pos
.
toKernel
(),
lbl_p
.
toKernel
(),
prc_sz
.
toKernel
(),
v_cl
.
rank
());
CUDA_LAUNCH
((
process_id_proc_each_part
<
dim
,
St
,
decltype
(
dec
.
toKernel
()),
decltype
(
v_pos
.
toKernel
()),
decltype
(
lbl_p
.
toKernel
()),
decltype
(
prc_sz
.
toKernel
())
>
),
ite
.
wthr
,
ite
.
thr
,
dec
.
toKernel
(),
v_pos
.
toKernel
(),
lbl_p
.
toKernel
(),
prc_sz
.
toKernel
(),
v_cl
.
rank
());
#ifndef TEST1
...
...
@@ -1145,7 +1145,7 @@ class vector_dist_comm
ite
=
lbl_p
.
getGPUIterator
();
// we order lbl_p
reorder_lbl
<
decltype
(
lbl_p
.
toKernel
()),
decltype
(
starts
.
toKernel
())
><<<
ite
.
wthr
,
ite
.
thr
>>>
(
lbl_p
.
toKernel
(),
starts
.
toKernel
());
CUDA_LAUNCH
((
reorder_lbl
<
decltype
(
lbl_p
.
toKernel
()),
decltype
(
starts
.
toKernel
())
>
),
ite
.
wthr
,
ite
.
thr
,
lbl_p
.
toKernel
(),
starts
.
toKernel
());
#endif
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment