Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
O
openfpm_pdata
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Locked Files
Issues
1
Issues
1
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
Requirements
Requirements
List
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Security & Compliance
Security & Compliance
Dependency List
License Compliance
Operations
Operations
Environments
Analytics
Analytics
CI / CD
Code Review
Insights
Issue
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
openfpm
openfpm_pdata
Commits
c52f28ba
Commit
c52f28ba
authored
May 11, 2019
by
incardon
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fixing compilation without CUDA
parent
b01dde18
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
406 additions
and
65 deletions
+406
-65
example/Vector/7_SPH_dlb_gpu_opt/Makefile
example/Vector/7_SPH_dlb_gpu_opt/Makefile
+2
-2
example/Vector/7_SPH_dlb_gpu_opt/main.cu
example/Vector/7_SPH_dlb_gpu_opt/main.cu
+11
-8
example/Vector/8_DEM/main.cpp
example/Vector/8_DEM/main.cpp
+3
-3
openfpm_data
openfpm_data
+1
-1
src/CMakeLists.txt
src/CMakeLists.txt
+26
-2
src/Vector/cuda/vector_dist_cuda_funcs.cuh
src/Vector/cuda/vector_dist_cuda_funcs.cuh
+14
-0
src/Vector/cuda/vector_dist_gpu_unit_tests.cu
src/Vector/cuda/vector_dist_gpu_unit_tests.cu
+111
-8
src/Vector/vector_dist.hpp
src/Vector/vector_dist.hpp
+66
-4
src/Vector/vector_dist_comm.hpp
src/Vector/vector_dist_comm.hpp
+172
-37
No files found.
example/Vector/7_SPH_dlb_gpu_opt/Makefile
View file @
c52f28ba
...
...
@@ -16,8 +16,8 @@ else
endif
ifeq
($(PROFILE),ON)
CUDA_CC
=
scorep
--nocompiler
--cuda
--mpp
=
mpi nvcc
CUDA_CC_LINK
=
scorep
--nocompiler
--cuda
--mpp
=
mpi nvcc
CUDA_CC
=
scorep
--nocompiler
--cuda
--mpp
=
mpi nvcc
-ccbin
=
mpic++
CUDA_CC_LINK
=
scorep
--nocompiler
--cuda
--mpp
=
mpi nvcc
-ccbin
=
mpic++
else
CUDA_CC
:=
$(CUDA_CC)
CUDA_CC_LINK
:=
$(CUDA_CC_LINK)
...
...
example/Vector/7_SPH_dlb_gpu_opt/main.cu
View file @
c52f28ba
...
...
@@ -44,6 +44,8 @@
#define OPENMPI
//#define SE_CLASS1
#define USE_LOW_REGISTER_ITERATOR
#include "Vector/vector_dist.hpp"
#include <math.h>
#include "Draw/DrawParticles.hpp"
...
...
@@ -97,7 +99,7 @@ const real_number MassBound = 0.0000767656;
#ifdef TEST_RUN
const
real_number
t_end
=
0.001
;
#else
const
real_number
t_end
=
0.10
;
const
real_number
t_end
=
1.5
;
#endif
// Gravity acceleration
...
...
@@ -341,8 +343,8 @@ __global__ void calc_forces_gpu(particles_type vd, NN_type NN, real_number W_dap
// Get the position xp of the particle
Point
<
3
,
real_number
>
xb
=
vd
.
getPos
(
b
);
// if (p == q) skip this particle
if
(
a
==
b
)
{
++
Np
;
continue
;};
// if (p == q) skip this particle
this condition should be done in the r^2 = 0
//
if (a == b) {++Np; continue;};
unsigned
int
typeb
=
vd
.
getProp
<
type
>
(
b
);
...
...
@@ -393,7 +395,7 @@ __global__ void calc_forces_gpu(particles_type vd, NN_type NN, real_number W_dap
template
<
typename
CellList
>
inline
void
calc_forces
(
particles
&
vd
,
CellList
&
NN
,
real_number
&
max_visc
,
size_t
cnt
)
{
auto
part
=
vd
.
getDomainIteratorGPU
(
64
);
auto
part
=
vd
.
getDomainIteratorGPU
(
96
);
// Update the cell-list
vd
.
updateCellList
(
NN
);
...
...
@@ -856,7 +858,8 @@ int main(int argc, char* argv[])
vd
.
ghost_get
<
type
,
rho
,
Pressure
,
velocity
>
(
RUN_ON_DEVICE
);
auto
NN
=
vd
.
getCellListGPU
(
2
*
H
/
2.0
);
auto
NN
=
vd
.
getCellListGPU
/*<CELLLIST_GPU_SPARSE<3,float>>*/
(
2
*
H
/
2.0
);
NN
.
setBoxNN
(
2
);
timer
tot_sim
;
tot_sim
.
start
();
...
...
@@ -892,9 +895,9 @@ int main(int argc, char* argv[])
vd
.
map
(
RUN_ON_DEVICE
);
// make sort
vd
.
make_sort
(
NN
);
// it sort the vector (doesn not seem to produce some advantage)
// note force calculation is anyway sorted calculation
vd
.
make_sort
(
NN
);
// Calculate pressure from the density
EqState
(
vd
);
...
...
example/Vector/8_DEM/main.cpp
View file @
c52f28ba
#define CHECKFOR_POSNAN
#define CHECKFOR_PROPNAN
//
#define CHECKFOR_POSNAN
//
#define CHECKFOR_PROPNAN
/*!
* \page Vector_8_DEM Vector 8 Discrete element method
...
...
@@ -530,7 +530,7 @@ int main(int argc, char* argv[])
u_ij
.
get
(
1
)
=
parts
.
getProp
<
cpd
>
(
p
)[
cidx
+
1
];
u_ij
.
get
(
2
)
=
parts
.
getProp
<
cpd
>
(
p
)[
cidx
+
2
];
Point
<
3
,
double
>
F_nij
=
sqrt
(
delta_ij
/
2
/
R
)
*
(
k_n
*
delta_ij
*
n_ij
-
gamma_
t
*
m_eff
*
v_nij
);
Point
<
3
,
double
>
F_nij
=
sqrt
(
delta_ij
/
2
/
R
)
*
(
k_n
*
delta_ij
*
n_ij
-
gamma_
n
*
m_eff
*
v_nij
);
dF_n
=
dF_n
+
F_nij
;
Point
<
3
,
double
>
F_tij
=
sqrt
(
delta_ij
/
2
/
R
)
*
(
-
k_t
*
u_ij
-
gamma_t
*
m_eff
*
v_tij
);
...
...
openfpm_data
@
813ae72b
Subproject commit
6cf5ca579440954750ebedf9ed71391bbffb9595
Subproject commit
813ae72b7e42b996dfeb47dffdc887194a2f2176
src/CMakeLists.txt
View file @
c52f28ba
...
...
@@ -4,12 +4,36 @@ cmake_minimum_required(VERSION 3.8 FATAL_ERROR)
########################### Executables
if
(
CUDA_FOUND
)
set
(
CUDA_SOURCES Vector/cuda/vector_dist_gpu_MP_tests.cu Vector/cuda/vector_dist_cuda_func_test.cu Decomposition/cuda/decomposition_cuda_tests.cu Vector/cuda/vector_dist_gpu_unit_tests.cu ../openfpm_devices/src/memory/CudaMemory.cu
)
set
(
CUDA_SOURCES Vector/cuda/vector_dist_gpu_MP_tests.cu
Vector/cuda/vector_dist_cuda_func_test.cu
Decomposition/cuda/decomposition_cuda_tests.cu
Vector/cuda/vector_dist_gpu_unit_tests.cu
../openfpm_devices/src/memory/CudaMemory.cu
Decomposition/cuda/Domain_icells_cart_unit_test.cu
)
else
()
set
(
CUDA_SOURCES
)
endif
()
add_executable
(
pdata
${
OPENFPM_INIT_FILE
}
${
CUDA_SOURCES
}
main.cpp Debug/debug_test.cpp Grid/tests/grid_dist_id_HDF5_chckpnt_restart_test.cpp Grid/tests/grid_dist_id_unit_test.cpp Grid/tests/staggered_grid_dist_unit_test.cpp Vector/tests/vector_dist_cell_list_tests.cpp Vector/tests/vector_dist_complex_prp_unit_test.cpp Vector/tests/vector_dist_HDF5_chckpnt_restart_test.cpp Vector/tests/vector_dist_MP_unit_tests.cpp Vector/tests/vector_dist_NN_tests.cpp Vector/tests/vector_dist_unit_test.cpp pdata_performance.cpp Decomposition/tests/CartDecomposition_unit_test.cpp Decomposition/tests/shift_vect_converter_tests.cpp Vector/performance/vector_dist_performance_util.cpp lib/pdata.cpp test_multiple_o.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster/VCluster.cpp ../openfpm_devices/src/Memleak_check.cpp
)
add_executable
(
pdata
${
OPENFPM_INIT_FILE
}
${
CUDA_SOURCES
}
main.cpp
Debug/debug_test.cpp
Grid/tests/grid_dist_id_HDF5_chckpnt_restart_test.cpp
Grid/tests/grid_dist_id_unit_test.cpp
Grid/tests/staggered_grid_dist_unit_test.cpp
Vector/tests/vector_dist_cell_list_tests.cpp
Vector/tests/vector_dist_complex_prp_unit_test.cpp
Vector/tests/vector_dist_HDF5_chckpnt_restart_test.cpp
Vector/tests/vector_dist_MP_unit_tests.cpp
Vector/tests/vector_dist_NN_tests.cpp
Vector/tests/vector_dist_unit_test.cpp
pdata_performance.cpp
Decomposition/tests/CartDecomposition_unit_test.cpp
Decomposition/tests/shift_vect_converter_tests.cpp
Vector/performance/vector_dist_performance_util.cpp
lib/pdata.cpp test_multiple_o.cpp
../openfpm_devices/src/memory/HeapMemory.cpp
../openfpm_devices/src/memory/PtrMemory.cpp
../openfpm_vcluster/src/VCluster/VCluster.cpp
../openfpm_devices/src/Memleak_check.cpp
)
if
(
CMAKE_COMPILER_IS_GNUCC
)
target_compile_options
(
pdata PRIVATE
"-Wno-deprecated-declarations"
)
...
...
src/Vector/cuda/vector_dist_cuda_funcs.cuh
View file @
c52f28ba
...
...
@@ -68,6 +68,20 @@ __global__ void merge_sort_part(vector_pos_type vd_pos, vector_prp_type vd_prp,
vd_prp
.
template
set
<
prp
...>(
p
,
vd_prp_ord
,
nss
.
template
get
<
0
>(
p
));
}
template
<
typename
vector_pos_type
,
typename
vector_prp_type
,
typename
stns_type
,
unsigned
int
...
prp
>
__global__
void
merge_sort_all
(
vector_pos_type
vd_pos
,
vector_prp_type
vd_prp
,
vector_pos_type
v_pos_ord
,
vector_prp_type
vd_prp_ord
,
stns_type
nss
)
{
int
p
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
if
(
p
>=
vd_pos
.
size
())
return
;
vd_pos
.
template
set
<
0
>(
p
,
v_pos_ord
,
nss
.
template
get
<
0
>(
p
));
vd_prp
.
set
(
p
,
vd_prp_ord
,
nss
.
template
get
<
0
>(
p
));
}
template
<
unsigned
int
dim
,
typename
St
,
typename
cartdec_gpu
,
typename
particles_type
,
typename
vector_out
,
typename
prc_sz_type
>
__global__
void
process_id_proc_each_part
(
cartdec_gpu
cdg
,
particles_type
parts
,
vector_out
output
,
prc_sz_type
prc_sz
,
int
rank
)
{
...
...
src/Vector/cuda/vector_dist_gpu_unit_tests.cu
View file @
c52f28ba
...
...
@@ -176,9 +176,9 @@ bool check_force(CellList_type & NN_cpu, vector_type & vd)
std
::
cout
<<
"ERROR: "
<<
vd
.
template
getProp
<
1
>(
p
)[
1
]
<<
" "
<<
vd
.
template
getProp
<
2
>(
p
)[
1
]
<<
std
::
endl
;
std
::
cout
<<
"ERROR: "
<<
vd
.
template
getProp
<
1
>(
p
)[
2
]
<<
" "
<<
vd
.
template
getProp
<
2
>(
p
)[
2
]
<<
std
::
endl
;
std
::
cout
<<
"
ERROR2: "
<<
vd
.
template
getProp
<
1
>(
p
)[
0
]
<<
" "
<<
force
.
get
(
0
)
<<
std
::
endl
;
std
::
cout
<<
"
ERROR2: "
<<
vd
.
template
getProp
<
1
>(
p
)[
1
]
<<
" "
<<
force
.
get
(
1
)
<<
std
::
endl
;
std
::
cout
<<
"
ERROR2: "
<<
vd
.
template
getProp
<
1
>(
p
)[
2
]
<<
" "
<<
force
.
get
(
2
)
<<
std
::
endl
;
std
::
cout
<<
p
.
getKey
()
<<
"
ERROR2: "
<<
vd
.
template
getProp
<
1
>(
p
)[
0
]
<<
" "
<<
force
.
get
(
0
)
<<
std
::
endl
;
std
::
cout
<<
p
.
getKey
()
<<
"
ERROR2: "
<<
vd
.
template
getProp
<
1
>(
p
)[
1
]
<<
" "
<<
force
.
get
(
1
)
<<
std
::
endl
;
std
::
cout
<<
p
.
getKey
()
<<
"
ERROR2: "
<<
vd
.
template
getProp
<
1
>(
p
)[
2
]
<<
" "
<<
force
.
get
(
2
)
<<
std
::
endl
;
break
;
...
...
@@ -420,12 +420,109 @@ void vector_dist_gpu_test_impl()
auto
NN_cpu
=
vd
.
getCellList
(
0.1
);
check_cell_list_cpu_and_gpu
(
vd
,
NN
,
NN_cpu
);
auto
NN_up
=
vd
.
getCellListGPU
(
0.1
);
auto
NN_up
=
vd
.
template
getCellListGPU
<
CellList_type
>(
0.1
);
NN_up
.
clear
();
vd
.
updateCellList
(
NN_up
);
check_cell_list_cpu_and_gpu
(
vd
,
NN_up
,
NN_cpu
);
}
template
<
typename
CellList_type
>
void
vector_dist_gpu_make_sort_test_impl
()
{
auto
&
v_cl
=
create_vcluster
();
if
(
v_cl
.
size
()
>
16
)
{
return
;}
Box
<
3
,
float
>
domain
({
0.0
,
0.0
,
0.0
},{
1.0
,
1.0
,
1.0
});
// set the ghost based on the radius cut off (make just a little bit smaller than the spacing)
Ghost
<
3
,
float
>
g
(
0.1
);
// Boundary conditions
size_t
bc
[
3
]
=
{
NON_PERIODIC
,
NON_PERIODIC
,
NON_PERIODIC
};
vector_dist_gpu
<
3
,
float
,
aggregate
<
float
,
float
[
3
],
float
[
3
]
>>
vd
(
10000
,
domain
,
bc
,
g
);
srand
(
55067
*
create_vcluster
().
rank
());
auto
it
=
vd
.
getDomainIterator
();
while
(
it
.
isNext
())
{
auto
p
=
it
.
get
();
int
x
=
rand
();
int
y
=
rand
();
int
z
=
rand
();
vd
.
getPos
(
p
)[
0
]
=
(
float
)
x
/
RAND_MAX
;
vd
.
getPos
(
p
)[
1
]
=
(
float
)
y
/
RAND_MAX
;
vd
.
getPos
(
p
)[
2
]
=
(
float
)
z
/
RAND_MAX
;
++
it
;
}
vd
.
hostToDevicePos
();
// Ok we redistribute the particles
vd
.
map
(
RUN_ON_DEVICE
);
auto
it3
=
vd
.
getDomainIteratorGPU
();
initialize_props
<<<
it3
.
wthr
,
it3
.
thr
>>>
(
vd
.
toKernel
());
// Here we get do a make sort
auto
NN
=
vd
.
template
getCellListGPU
<
CellList_type
>(
0.1
);
vd
.
make_sort
(
NN
);
openfpm
::
vector_gpu
<
aggregate
<
float
,
float
[
3
],
float
[
3
]
>>
tmp_prp
=
vd
.
getPropVector
();
openfpm
::
vector_gpu
<
Point
<
3
,
float
>>
tmp_pos
=
vd
.
getPosVector
();
vd
.
deviceToHostPos
();
tmp_pos
.
template
deviceToHost
<
0
>();
// here we do a ghost_get
vd
.
ghost_get
<
0
>
(
RUN_ON_DEVICE
);
// Here we get do a make sort
NN
=
vd
.
template
getCellListGPU
<
CellList_type
>(
0.1
);
vd
.
make_sort_from
(
NN
);
// Check
tmp_pos
.
deviceToHost
<
0
>
();
vd
.
deviceToHostPos
();
bool
match
=
true
;
for
(
size_t
i
=
0
;
i
<
vd
.
size_local
()
;
i
++
)
{
Point
<
3
,
float
>
p1
=
vd
.
getPos
(
i
)[
0
];
Point
<
3
,
float
>
p2
=
tmp_pos
.
template
get
<
0
>(
i
);
// They must be in the same cell
auto
c1
=
NN
.
getCell
(
p1
);
auto
c2
=
NN
.
getCell
(
p1
);
match
&=
c1
==
c2
;
}
BOOST_REQUIRE_EQUAL
(
match
,
true
);
}
BOOST_AUTO_TEST_CASE
(
vector_dist_gpu_make_sort_sparse
)
{
vector_dist_gpu_make_sort_test_impl
<
CELLLIST_GPU_SPARSE
<
3
,
float
>>
();
}
BOOST_AUTO_TEST_CASE
(
vector_dist_gpu_make_sort
)
{
vector_dist_gpu_make_sort_test_impl
<
CellList_gpu
<
3
,
float
,
CudaMemory
,
shift_only
<
3
,
float
>>>
();
}
BOOST_AUTO_TEST_CASE
(
vector_dist_gpu_test
)
{
vector_dist_gpu_test_impl
<
CellList_gpu
<
3
,
float
,
CudaMemory
,
shift_only
<
3
,
float
>>>
();
...
...
@@ -792,6 +889,7 @@ BOOST_AUTO_TEST_CASE(vector_dist_reduce)
BOOST_REQUIRE_EQUAL
(
reds2
,
vd
.
size_local
());
}
template
<
typename
CellList_type
>
void
vector_dist_dlb_on_cuda_impl
(
size_t
k
,
double
r_cut
)
{
typedef
vector_dist_gpu
<
3
,
double
,
aggregate
<
double
,
double
[
3
],
double
[
3
]
>>
vector_type
;
...
...
@@ -908,7 +1006,7 @@ void vector_dist_dlb_on_cuda_impl(size_t k,double r_cut)
vd
.
template
deviceToHostProp
<
0
,
1
,
2
>();
// Check calc forces
auto
NN_gpu
=
vd
.
getCellListGPU
(
r_cut
);
auto
NN_gpu
=
vd
.
template
getCellListGPU
<
CellList_type
>
(
r_cut
);
auto
NN_cpu
=
vd
.
getCellList
(
r_cut
);
check_cell_list_cpu_and_gpu
(
vd
,
NN_gpu
,
NN_cpu
);
...
...
@@ -958,7 +1056,12 @@ void vector_dist_dlb_on_cuda_impl(size_t k,double r_cut)
BOOST_AUTO_TEST_CASE
(
vector_dist_dlb_on_cuda
)
{
vector_dist_dlb_on_cuda_impl
(
50000
,
0.01
);
vector_dist_dlb_on_cuda_impl
<
CellList_gpu
<
3
,
double
,
CudaMemory
,
shift_only
<
3
,
double
>
,
unsigned
int
,
int
,
false
>>
(
50000
,
0.01
);
}
BOOST_AUTO_TEST_CASE
(
vector_dist_dlb_on_cuda_sparse
)
{
vector_dist_dlb_on_cuda_impl
<
CELLLIST_GPU_SPARSE
<
3
,
double
>>
(
50000
,
0.01
);
}
BOOST_AUTO_TEST_CASE
(
vector_dist_dlb_on_cuda2
)
...
...
@@ -966,7 +1069,7 @@ BOOST_AUTO_TEST_CASE(vector_dist_dlb_on_cuda2)
if
(
create_vcluster
().
size
()
<=
3
)
{
return
;};
vector_dist_dlb_on_cuda_impl
(
1000000
,
0.01
);
vector_dist_dlb_on_cuda_impl
<
CellList_gpu
<
3
,
double
,
CudaMemory
,
shift_only
<
3
,
double
>
,
unsigned
int
,
int
,
false
>>
(
1000000
,
0.01
);
}
BOOST_AUTO_TEST_CASE
(
vector_dist_dlb_on_cuda3
)
...
...
@@ -974,7 +1077,7 @@ BOOST_AUTO_TEST_CASE(vector_dist_dlb_on_cuda3)
if
(
create_vcluster
().
size
()
<
8
)
{
return
;}
vector_dist_dlb_on_cuda_impl
(
15000000
,
0.005
);
vector_dist_dlb_on_cuda_impl
<
CellList_gpu
<
3
,
double
,
CudaMemory
,
shift_only
<
3
,
double
>
,
unsigned
int
,
int
,
false
>>
(
15000000
,
0.005
);
}
...
...
src/Vector/vector_dist.hpp
View file @
c52f28ba
...
...
@@ -40,7 +40,9 @@
#define DEC_GRAN(gr) ((size_t)gr << 32)
#ifdef CUDA_GPU
template
<
unsigned
int
dim
,
typename
St
>
using
CELLLIST_GPU_SPARSE
=
CellList_gpu
<
dim
,
St
,
CudaMemory
,
shift_only
<
dim
,
St
>
,
unsigned
int
,
int
,
true
>
;
#endif
#define VECTOR_DIST_ERROR_OBJECT std::runtime_error("Runtime vector distributed error");
...
...
@@ -86,7 +88,7 @@ struct gcl_standard_no_symmetric_impl<true>
template
<
unsigned
int
dim
,
typename
St
,
typename
CellL
,
typename
Vector
,
unsigned
int
impl
>
static
inline
CellL
get
(
Vector
&
vd
,
const
St
&
r_cut
,
const
Ghost
<
dim
,
St
>
&
g
)
{
return
vd
.
getCellListGPU
(
r_cut
);
return
vd
.
template
getCellListGPU
<
CellL
>
(
r_cut
);
}
};
...
...
@@ -1042,7 +1044,8 @@ public:
* \return the Cell list
*
*/
template
<
typename
CellL
=
CellList
<
dim
,
St
,
Mem_fast
<
>,
shift
<
dim
,
St
>
,
internal_position_vector_type
>
>
CellL
getCellListSym
(
St
r_cut
)
template
<
typename
CellL
=
CellList
<
dim
,
St
,
Mem_fast
<
>,
shift
<
dim
,
St
>
,
internal_position_vector_type
>
>
CellL
getCellListSym
(
St
r_cut
)
{
#ifdef SE_CLASS1
if
(
!
(
opt
&
BIND_DEC_TO_GHOST
))
...
...
@@ -1323,6 +1326,7 @@ public:
CellL
cli_tmp
=
gcl
<
dim
,
St
,
CellL
,
self
,
GCL_NON_SYMMETRIC
>::
get
(
*
this
,
r_cut
,
getDecomposition
().
getGhost
());
cell_list
.
swap
(
cli_tmp
);
cell_list
.
re_setBoxNN
();
}
}
...
...
@@ -2206,7 +2210,39 @@ public:
se3
.
template
ghost_get_pre
<
prp
...>(
opt
);
#endif
this
->
template
ghost_get_
<
prp
...>(
v_pos
,
v_prp
,
g_m
,
opt
);
this
->
template
ghost_get_
<
GHOST_SYNC
,
prp
...>(
v_pos
,
v_prp
,
g_m
,
opt
);
#ifdef SE_CLASS3
this
->
template
ghost_get_
<
prop
::
max_prop_real
>(
v_pos
,
v_prp
,
g_m
,
opt
|
KEEP_PROPERTIES
);
se3
.
template
ghost_get_post
<
prp
...>(
opt
);
#endif
}
/*! \brief It synchronize the properties and position of the ghost particles
*
* \tparam prp list of properties to get synchronize
*
* \param opt options WITH_POSITION, it send also the positional information of the particles
*
*/
template
<
int
...
prp
>
inline
void
Ighost_get
(
size_t
opt
=
WITH_POSITION
)
{
#ifdef SE_CLASS1
if
(
getDecomposition
().
getProcessorBounds
().
isValid
()
==
false
&&
size_local
()
!=
0
)
{
std
::
cerr
<<
__FILE__
<<
":"
<<
__LINE__
<<
" Error the processor "
<<
v_cl
.
getProcessUnitID
()
<<
" has particles, but is supposed to be unloaded"
<<
std
::
endl
;
ACTION_ON_ERROR
(
VECTOR_DIST_ERROR_OBJECT
);
}
#endif
#ifdef SE_CLASS3
se3
.
template
ghost_get_pre
<
prp
...>(
opt
);
#endif
this
->
template
ghost_get_
<
GHOST_ASYNC
,
prp
...>(
v_pos
,
v_prp
,
g_m
,
opt
);
#ifdef SE_CLASS3
...
...
@@ -2216,6 +2252,7 @@ public:
#endif
}
/*! \brief It synchronize the properties and position of the ghost particles
*
* \tparam op which kind of operation to apply
...
...
@@ -2814,7 +2851,8 @@ public:
* \param NN Cell-list to use to reorder
*
*/
void
make_sort
(
CellList_gpu
<
dim
,
St
,
CudaMemory
,
shift_only
<
dim
,
St
>>
&
NN
)
template
<
typename
CellList_type
>
void
make_sort
(
CellList_type
&
NN
)
{
deleteGhost
();
...
...
@@ -2827,6 +2865,30 @@ public:
v_prp
.
swap
(
v_prp_out
);
}
/*! \brief this function sort the vector
*
* \note this function does not kill the ghost and does not invalidate the Cell-list)
*
* \param NN Cell-list to use to reorder
*
*/
template
<
typename
CellList_type
>
void
make_sort_from
(
CellList_type
&
cl
)
{
#if defined(__NVCC__)
auto
ite
=
v_pos
.
getGPUIteratorTo
(
g_m
);
CUDA_LAUNCH
((
merge_sort_all
<
decltype
(
v_pos
.
toKernel
()),
decltype
(
v_prp
.
toKernel
()),
decltype
(
cl
.
getNonSortToSort
().
toKernel
())
>
),
ite
,
v_pos_out
.
toKernel
(),
v_prp_out
.
toKernel
(),
v_pos
.
toKernel
(),
v_prp
.
toKernel
(),
cl
.
getNonSortToSort
().
toKernel
());
v_pos
.
swap
(
v_pos_out
);
v_prp
.
swap
(
v_prp_out
);
#endif
}
/*! \brief This function compare if the host and device buffer position match up to some tolerance
*
* \tparam prp property to check
...
...
src/Vector/vector_dist_comm.hpp
View file @
c52f28ba
...
...
@@ -29,6 +29,9 @@ constexpr int BIND_DEC_TO_GHOST = 1;
constexpr
int
RUN_ON_DEVICE
=
1024
;
constexpr
int
MAP_LOCAL
=
2
;
constexpr
int
GHOST_SYNC
=
0
;
constexpr
int
GHOST_ASYNC
=
1
;
/*! \brief compute the communication options from the ghost_get/put options
*
*
...
...
@@ -52,6 +55,168 @@ inline static size_t compute_options(size_t opt)
return
opt_
;
}
/*! \brief template selector for asynchronous or not asynchronous
*
* \tparam impl implementation
* \tparam prp properties
*
*/
template
<
unsigned
int
impl
,
template
<
typename
>
class
layout_base
,
unsigned
int
...
prp
>
struct
ghost_exchange_comm_impl
{
template
<
typename
Vcluster_type
,
typename
vector_prop_type
,
typename
vector_pos_type
,
typename
send_vector
,
typename
prc_recv_get_type
,
typename
prc_g_opart_type
,
typename
recv_sz_get_type
,
typename
recv_sz_get_byte_type
,
typename
g_opart_sz_type
>
static
inline
void
sendrecv_prp
(
Vcluster_type
&
v_cl
,
openfpm
::
vector
<
send_vector
>
&
g_send_prp
,
vector_prop_type
&
v_prp
,
vector_pos_type
&
v_pos
,
prc_g_opart_type
&
prc_g_opart
,
prc_recv_get_type
&
prc_recv_get
,
recv_sz_get_type
&
recv_sz_get
,
recv_sz_get_byte_type
&
recv_sz_get_byte
,
g_opart_sz_type
&
g_opart_sz
,
size_t
g_m
,
size_t
opt
)
{
// if there are no properties skip
// SSendRecvP send everything when we do not give properties
if
(
sizeof
...(
prp
)
!=
0
)
{
size_t
opt_
=
compute_options
(
opt
);
if
(
opt
&
SKIP_LABELLING
)
{
if
(
opt
&
RUN_ON_DEVICE
)
{
op_ssend_gg_recv_merge_run_device
opm
(
g_m
);
v_cl
.
template
SSendRecvP_op
<
op_ssend_gg_recv_merge_run_device
,
send_vector
,
decltype
(
v_prp
),
layout_base
,
prp
...>(
g_send_prp
,
v_prp
,
prc_g_opart
,
opm
,
prc_recv_get
,
recv_sz_get
,
opt_
);
}
else
{
op_ssend_gg_recv_merge
opm
(
g_m
);
v_cl
.
template
SSendRecvP_op
<
op_ssend_gg_recv_merge
,
send_vector
,
decltype
(
v_prp
),
layout_base
,
prp
...>(
g_send_prp
,
v_prp
,
prc_g_opart
,
opm
,
prc_recv_get
,
recv_sz_get
,
opt_
);
}
}
else
{
v_cl
.
template
SSendRecvP
<
send_vector
,
decltype
(
v_prp
),
layout_base
,
prp
...>(
g_send_prp
,
v_prp
,
prc_g_opart
,
prc_recv_get
,
recv_sz_get
,
recv_sz_get_byte
,
opt_
);}
// fill g_opart_sz
g_opart_sz
.
resize
(
prc_g_opart
.
size
());
for
(
size_t
i
=
0
;
i
<
prc_g_opart
.
size
()
;
i
++
)
g_opart_sz
.
get
(
i
)
=
g_send_prp
.
get
(
i
).
size
();
}
}
template
<
typename
Vcluster_type
,
typename
vector_prop_type
,
typename
vector_pos_type
,
typename
send_pos_vector
,
typename
prc_recv_get_type
,
typename
prc_g_opart_type
,
typename
recv_sz_get_type
>
static
inline
void
sendrecv_pos
(
Vcluster_type
&
v_cl
,
openfpm
::
vector
<
send_pos_vector
>
&
g_pos_send
,
vector_prop_type
&
v_prp
,
vector_pos_type
&
v_pos
,
prc_recv_get_type
&
prc_recv_get
,
recv_sz_get_type
&
recv_sz_get
,
prc_g_opart_type
&
prc_g_opart
,
size_t
opt
)
{
size_t
opt_
=
compute_options
(
opt
);
if
(
opt
&
SKIP_LABELLING
)
{
v_cl
.
template
SSendRecv
<
send_pos_vector
,
decltype
(
v_pos
),
layout_base
>(
g_pos_send
,
v_pos
,
prc_g_opart
,
prc_recv_get
,
recv_sz_get
,
opt_
);
}
else
{
prc_recv_get
.
clear
();
recv_sz_get
.
clear
();
v_cl
.
template
SSendRecv
<
send_pos_vector
,
decltype
(
v_pos
),
layout_base
>(
g_pos_send
,
v_pos
,
prc_g_opart
,
prc_recv_get
,
recv_sz_get
,
opt_
);
}
}
};
template
<
template
<
typename
>
class
layout_base
,
unsigned
int
...
prp
>
struct
ghost_exchange_comm_impl
<
GHOST_ASYNC
,
layout_base
,
prp
...
>
{
template
<
typename
Vcluster_type
,
typename
vector_prop_type
,
typename
vector_pos_type
,
typename
send_vector
,
typename
prc_recv_get_type
,
typename
prc_g_opart_type
,
typename
recv_sz_get_type
,
typename
recv_sz_get_byte_type
,
typename
g_opart_sz_type
>
static
inline
void
sendrecv_prp
(
Vcluster_type
&
v_cl
,
openfpm
::
vector
<
send_vector
>
&
g_send_prp
,
vector_prop_type
&
v_prp
,
vector_pos_type
&
v_pos
,
prc_g_opart_type
&
prc_g_opart
,
prc_recv_get_type
&
prc_recv_get
,
recv_sz_get_type
&
recv_sz_get
,
recv_sz_get_byte_type
&
recv_sz_get_byte
,
g_opart_sz_type
&
g_opart_sz
,
size_t
g_m
,
size_t
opt
)
{
// if there are no properties skip
// SSendRecvP send everything when we do not give properties
if
(
sizeof
...(
prp
)
!=
0
)
{
size_t
opt_
=
compute_options
(
opt
);
if
(
opt
&
SKIP_LABELLING
)
{
if
(
opt
&
RUN_ON_DEVICE
)
{
op_ssend_gg_recv_merge_run_device
opm
(
g_m
);
v_cl
.
template
SSendRecvP_opAsync
<
op_ssend_gg_recv_merge_run_device
,
send_vector
,
decltype
(
v_prp
),
layout_base
,
prp
...>(
g_send_prp
,
v_prp
,
prc_g_opart
,
opm
,
prc_recv_get
,
recv_sz_get
,
opt_
);
}
else
{
op_ssend_gg_recv_merge
opm
(
g_m
);
v_cl
.
template
SSendRecvP_opAsync
<
op_ssend_gg_recv_merge
,
send_vector
,
decltype
(
v_prp
),
layout_base
,
prp
...>(
g_send_prp
,
v_prp
,
prc_g_opart
,
opm
,
prc_recv_get
,
recv_sz_get
,
opt_
);
}
}
else
{
v_cl
.
template
SSendRecvPAsync
<
send_vector
,
decltype
(
v_prp
),
layout_base
,
prp
...>(
g_send_prp
,
v_prp
,
prc_g_opart
,
prc_recv_get
,
recv_sz_get
,
recv_sz_get_byte
,
opt_
);}
// fill g_opart_sz
g_opart_sz
.
resize
(
prc_g_opart
.
size
());
for
(
size_t
i
=
0
;
i
<
prc_g_opart
.
size
()
;
i
++
)
g_opart_sz
.
get
(
i
)
=
g_send_prp
.
get
(
i
).
size
();
}
}
template
<
typename
Vcluster_type
,
typename
vector_prop_type
,
typename
vector_pos_type
,
typename
send_pos_vector
,
typename
prc_recv_get_type
,
typename
prc_g_opart_type
,
typename
recv_sz_get_type
>
static
inline
void
sendrecv_pos
(
Vcluster_type
&
v_cl
,
openfpm
::
vector
<
send_pos_vector
>
&
g_pos_send
,
vector_prop_type
&
v_prp
,
vector_pos_type
&
v_pos
,
prc_recv_get_type
&
prc_recv_get
,
recv_sz_get_type
&
recv_sz_get
,
prc_g_opart_type
&
prc_g_opart
,
size_t
opt
)
{
size_t
opt_
=
compute_options
(
opt
);
if
(
opt
&
SKIP_LABELLING
)
{
v_cl
.
template
SSendRecvAsync
<
send_pos_vector
,
decltype
(
v_pos
),
layout_base
>(
g_pos_send
,
v_pos
,
prc_g_opart
,
prc_recv_get
,
recv_sz_get
,
opt_
);
}
else
{
prc_recv_get
.
clear
();
recv_sz_get
.
clear
();
v_cl
.
template
SSendRecvAsync
<
send_pos_vector
,
decltype
(
v_pos
),
layout_base
>(
g_pos_send
,
v_pos
,
prc_g_opart
,
prc_recv_get
,
recv_sz_get
,
opt_
);
}
}
};
/*! \brief This class is an helper for the communication of vector_dist
*
* \tparam dim Dimensionality of the space where the elements lives
...
...
@@ -1451,7 +1616,7 @@ public:
* \param g_m marker between real and ghost particles