Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
openfpm_pdata
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Sbalzarini Lab
Software
Parallel Computing
OpenFPM
openfpm_pdata
Commits
34e4e85b
Commit
34e4e85b
authored
6 years ago
by
Pietro Incardona
Browse files
Options
Downloads
Patches
Plain Diff
map and ghost_get working with switched layout
parent
d518b0dc
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
openfpm_vcluster
+1
-1
1 addition, 1 deletion
openfpm_vcluster
src/Vector/vector_dist_comm.hpp
+119
-28
119 additions, 28 deletions
src/Vector/vector_dist_comm.hpp
src/Vector/vector_dist_gpu_unit_tests.cu
+188
-37
188 additions, 37 deletions
src/Vector/vector_dist_gpu_unit_tests.cu
with
308 additions
and
66 deletions
openfpm_vcluster
@
7c68ec7f
Subproject commit
faa1d114c2d13e562d200c92e98c1ed7be306eeb
Subproject commit
7c68ec7f6572fbb003101e0dc950404574d6e693
This diff is collapsed.
Click to expand it.
src/Vector/vector_dist_comm.hpp
+
119
−
28
View file @
34e4e85b
...
...
@@ -54,7 +54,7 @@ class vector_dist_comm
size_t
v_sub_unit_factor
=
64
;
//! definition of the send vector for position
typedef
openfpm
::
vector
<
Point
<
dim
,
St
>
,
Memory
>
send_pos_vector
;
typedef
openfpm
::
vector
<
Point
<
dim
,
St
>
,
Memory
,
typename
layout_base
<
Point
<
dim
,
St
>>::
type
,
layout_base
>
send_pos_vector
;
//! VCluster
Vcluster
&
v_cl
;
...
...
@@ -111,7 +111,7 @@ class vector_dist_comm
size_t
lg_m
;
//! Sending buffer
openfpm
::
vector
<
Heap
Memory
>
hsmem
;
openfpm
::
vector
<
Memory
>
hsmem
;
//! Receiving buffer
openfpm
::
vector
<
HeapMemory
>
hrmem
;
...
...
@@ -267,7 +267,8 @@ class vector_dist_comm
* \param v_prp vector of particles properties
*
*/
void
local_ghost_from_opart
(
openfpm
::
vector
<
Point
<
dim
,
St
>>
&
v_pos
,
openfpm
::
vector
<
prop
>
&
v_prp
)
void
local_ghost_from_opart
(
openfpm
::
vector
<
Point
<
dim
,
St
>
,
Memory
,
typename
layout_base
<
Point
<
dim
,
St
>>::
type
,
layout_base
>
&
v_pos
,
openfpm
::
vector
<
prop
,
Memory
,
typename
layout_base
<
prop
>::
type
,
layout_base
>
&
v_prp
)
{
// get the shift vectors
const
openfpm
::
vector
<
Point
<
dim
,
St
>>
&
shifts
=
dec
.
getShiftVectors
();
...
...
@@ -294,7 +295,9 @@ class vector_dist_comm
* \param g_m ghost marker
*
*/
void
local_ghost_from_dec
(
openfpm
::
vector
<
Point
<
dim
,
St
>>
&
v_pos
,
openfpm
::
vector
<
prop
>
&
v_prp
,
size_t
g_m
)
void
local_ghost_from_dec
(
openfpm
::
vector
<
Point
<
dim
,
St
>
,
Memory
,
typename
layout_base
<
Point
<
dim
,
St
>>::
type
,
layout_base
>
&
v_pos
,
openfpm
::
vector
<
prop
,
Memory
,
typename
layout_base
<
prop
>::
type
,
layout_base
>
&
v_prp
,
size_t
g_m
)
{
o_part_loc
.
clear
();
...
...
@@ -395,7 +398,10 @@ class vector_dist_comm
* \param opt options
*
*/
void
add_loc_particles_bc
(
openfpm
::
vector
<
Point
<
dim
,
St
>>
&
v_pos
,
openfpm
::
vector
<
prop
>
&
v_prp
,
size_t
&
g_m
,
size_t
opt
)
void
add_loc_particles_bc
(
openfpm
::
vector
<
Point
<
dim
,
St
>
,
Memory
,
typename
layout_base
<
Point
<
dim
,
St
>>::
type
,
layout_base
>
&
v_pos
,
openfpm
::
vector
<
prop
,
Memory
,
typename
layout_base
<
prop
>::
type
,
layout_base
>
&
v_prp
,
size_t
&
g_m
,
size_t
opt
)
{
// Create the shift boxes
createShiftBox
();
...
...
@@ -408,9 +414,9 @@ class vector_dist_comm
else
{
if
(
opt
&
SKIP_LABELLING
)
local_ghost_from_opart
(
v_pos
,
v_prp
);
{
local_ghost_from_opart
(
v_pos
,
v_prp
);
}
else
local_ghost_from_dec
(
v_pos
,
v_prp
,
g_m
);
{
local_ghost_from_dec
(
v_pos
,
v_prp
,
g_m
);
}
}
}
...
...
@@ -420,7 +426,8 @@ class vector_dist_comm
* \param g_pos_send Send buffer to fill
*
*/
void
fill_send_ghost_pos_buf
(
openfpm
::
vector
<
Point
<
dim
,
St
>>
&
v_pos
,
openfpm
::
vector
<
send_pos_vector
>
&
g_pos_send
)
void
fill_send_ghost_pos_buf
(
openfpm
::
vector
<
Point
<
dim
,
St
>
,
Memory
,
typename
layout_base
<
Point
<
dim
,
St
>>::
type
,
layout_base
>
&
v_pos
,
openfpm
::
vector
<
send_pos_vector
>
&
g_pos_send
)
{
// get the shift vectors
const
openfpm
::
vector
<
Point
<
dim
,
St
>>
&
shifts
=
dec
.
getShiftVectors
();
...
...
@@ -516,7 +523,7 @@ class vector_dist_comm
*
*
*/
void
resize_retained_buffer
(
openfpm
::
vector
<
Heap
Memory
>
&
rt_buf
,
size_t
nbf
)
void
resize_retained_buffer
(
openfpm
::
vector
<
Memory
>
&
rt_buf
,
size_t
nbf
)
{
// Release all the buffer that are going to be deleted
for
(
size_t
i
=
nbf
;
i
<
rt_buf
.
size
()
;
i
++
)
...
...
@@ -527,6 +534,76 @@ class vector_dist_comm
hsmem
.
resize
(
nbf
);
}
/*! \brief Set the buffer for each property
*
*
*/
template
<
typename
send_vector
,
typename
v_mpl
>
struct
set_mem_retained_buffers_inte
{
openfpm
::
vector
<
send_vector
>
&
g_send_prp
;
size_t
i
;
openfpm
::
vector
<
Memory
>
&
hsmem
;
size_t
j
;
set_mem_retained_buffers_inte
(
openfpm
::
vector
<
send_vector
>
&
g_send_prp
,
size_t
i
,
openfpm
::
vector
<
Memory
>
&
hsmem
,
size_t
j
)
:
g_send_prp
(
g_send_prp
),
i
(
i
),
hsmem
(
hsmem
),
j
(
j
){}
//! It call the setMemory function for each property
template
<
typename
T
>
inline
void
operator
()(
T
&
t
)
{
typedef
typename
boost
::
mpl
::
at
<
v_mpl
,
T
>::
type
prp_ms
;
g_send_prp
.
get
(
i
).
template
setMemory
<
prp_ms
::
value
>(
hsmem
.
get
(
j
));
j
++
;
}
};
template
<
bool
inte_or_lin
,
typename
send_vector
,
typename
v_mpl
>
struct
set_mem_retained_buffers
{
static
inline
size_t
set_mem_retained_buffers_
(
openfpm
::
vector
<
send_vector
>
&
g_send_prp
,
openfpm
::
vector
<
openfpm
::
vector
<
aggregate
<
size_t
,
size_t
>>>
&
g_opart
,
size_t
i
,
openfpm
::
vector
<
Memory
>
&
hsmem
,
size_t
j
)
{
// Set the memory for retain the send buffer
g_send_prp
.
get
(
i
).
setMemory
(
hsmem
.
get
(
j
));
// resize the sending vector (No allocation is produced)
g_send_prp
.
get
(
i
).
resize
(
g_opart
.
get
(
i
).
size
());
return
j
+
1
;
}
};
template
<
typename
send_vector
,
typename
v_mpl
>
struct
set_mem_retained_buffers
<
true
,
send_vector
,
v_mpl
>
{
static
inline
size_t
set_mem_retained_buffers_
(
openfpm
::
vector
<
send_vector
>
&
g_send_prp
,
openfpm
::
vector
<
openfpm
::
vector
<
aggregate
<
size_t
,
size_t
>>>
&
g_opart
,
size_t
i
,
openfpm
::
vector
<
Memory
>
&
hsmem
,
size_t
j
)
{
set_mem_retained_buffers_inte
<
send_vector
,
v_mpl
>
smrbi
(
g_send_prp
,
i
,
hsmem
,
j
);
boost
::
mpl
::
for_each_ref
<
v_mpl
>
(
smrbi
);
// resize the sending vector (No allocation is produced)
g_send_prp
.
get
(
i
).
resize
(
g_opart
.
get
(
i
).
size
());
return
smrbi
.
j
;
}
};
/*! \brief This function fill the send buffer for properties after the particles has been label with labelParticles
*
* \tparam send_vector type used to send data
...
...
@@ -537,25 +614,33 @@ class vector_dist_comm
* \param g_send_prp Send buffer to fill
*
*/
template
<
typename
send_vector
,
typename
prp_object
,
int
...
prp
>
void
fill_send_ghost_prp_buf
(
openfpm
::
vector
<
prop
>
&
v_prp
,
openfpm
::
vector
<
send_vector
>
&
g_send_prp
)
template
<
typename
send_vector
,
typename
prp_object
,
int
...
prp
>
void
fill_send_ghost_prp_buf
(
openfpm
::
vector
<
prop
,
Memory
,
typename
layout_base
<
prop
>::
type
,
layout_base
>
&
v_prp
,
openfpm
::
vector
<
send_vector
>
&
g_send_prp
)
{
size_t
factor
=
1
;
typedef
typename
to_boost_vmpl
<
prp
...
>::
type
v_mpl
;
if
(
is_layout_inte
<
layout_base
<
prop
>>::
value
==
true
)
{
factor
*=
sizeof
...(
prp
);}
// create a number of send buffers equal to the near processors
g_send_prp
.
resize
(
g_opart
.
size
());
resize_retained_buffer
(
hsmem
,
g_send_prp
.
size
());
resize_retained_buffer
(
hsmem
,
g_send_prp
.
size
()
*
factor
);
for
(
size_t
i
=
0
;
i
<
g_send_prp
.
size
();
i
++
)
for
(
size_t
i
=
0
;
i
<
hsmem
.
size
();
i
++
)
{
// Buffer must retained and survive the destruction of the
// vector
if
(
hsmem
.
get
(
i
).
ref
()
==
0
)
hsmem
.
get
(
i
).
incRef
();
// Set the memory for retain the send buffer
g_send_prp
.
get
(
i
).
setMemory
(
hsmem
.
get
(
i
));
{
hsmem
.
get
(
i
).
incRef
();}
}
// resize the sending vector (No allocation is produced)
g_send_prp
.
get
(
i
).
resize
(
g_opart
.
get
(
i
).
size
());
size_t
j
=
0
;
for
(
size_t
i
=
0
;
i
<
g_send_prp
.
size
();
i
++
)
{
j
=
set_mem_retained_buffers
<
is_layout_inte
<
layout_base
<
prop
>>::
value
,
send_vector
,
v_mpl
>::
set_mem_retained_buffers_
(
g_send_prp
,
g_opart
,
i
,
hsmem
,
j
);
}
// Fill the send buffer
...
...
@@ -564,9 +649,9 @@ class vector_dist_comm
for
(
size_t
j
=
0
;
j
<
g_opart
.
get
(
i
).
size
();
j
++
)
{
// source object type
typedef
encapc
<
1
,
prop
,
typename
openfpm
::
vector
<
prop
>::
layout_type
>
encap_src
;
typedef
decltype
(
v_prp
.
get
(
g_opart
.
get
(
i
).
template
get
<
0
>(
j
)))
encap_src
;
// destination object type
typedef
encapc
<
1
,
prp_object
,
typename
openfpm
::
vector
<
prp_object
>::
layout_type
>
encap_dst
;
typedef
decltype
(
g_send_prp
.
get
(
i
).
get
(
j
))
encap_dst
;
// Copy only the selected properties
object_si_d
<
encap_src
,
encap_dst
,
OBJ_ENCAP
,
prp
...
>
(
v_prp
.
get
(
g_opart
.
get
(
i
).
template
get
<
0
>(
j
)),
g_send_prp
.
get
(
i
).
get
(
j
));
...
...
@@ -731,7 +816,10 @@ class vector_dist_comm
* \param g_m ghost marker
*
*/
void
labelParticlesGhost
(
openfpm
::
vector
<
Point
<
dim
,
St
>>
&
v_pos
,
openfpm
::
vector
<
prop
>
&
v_prp
,
openfpm
::
vector
<
size_t
>
&
prc
,
size_t
&
g_m
)
void
labelParticlesGhost
(
openfpm
::
vector
<
Point
<
dim
,
St
>
,
Memory
,
typename
layout_base
<
Point
<
dim
,
St
>>::
type
,
layout_base
>
&
v_pos
,
openfpm
::
vector
<
prop
,
Memory
,
typename
layout_base
<
prop
>::
type
,
layout_base
>
&
v_prp
,
openfpm
::
vector
<
size_t
>
&
prc
,
size_t
&
g_m
)
{
// Buffer that contain for each processor the id of the particle to send
g_opart
.
clear
();
...
...
@@ -946,25 +1034,28 @@ public:
* \param g_m marker between real and ghost particles
*
*/
template
<
int
...
prp
>
inline
void
ghost_get_
(
openfpm
::
vector
<
Point
<
dim
,
St
>>
&
v_pos
,
openfpm
::
vector
<
prop
>
&
v_prp
,
size_t
&
g_m
,
size_t
opt
=
WITH_POSITION
)
template
<
int
...
prp
>
inline
void
ghost_get_
(
openfpm
::
vector
<
Point
<
dim
,
St
>
,
Memory
,
typename
layout_base
<
Point
<
dim
,
St
>>::
type
,
layout_base
>
&
v_pos
,
openfpm
::
vector
<
prop
,
Memory
,
typename
layout_base
<
prop
>::
type
,
layout_base
>
&
v_prp
,
size_t
&
g_m
,
size_t
opt
=
WITH_POSITION
)
{
// Sending property object
typedef
object
<
typename
object_creator
<
typename
prop
::
type
,
prp
...
>::
type
>
prp_object
;
// send vector for each processor
typedef
openfpm
::
vector
<
prp_object
>
send_vector
;
typedef
openfpm
::
vector
<
prp_object
,
Memory
,
typename
layout_base
<
prp_object
>::
type
,
layout_base
>
send_vector
;
if
(
!
(
opt
&
NO_POSITION
))
v_pos
.
resize
(
g_m
);
{
v_pos
.
resize
(
g_m
);
}
// reset the ghost part
if
(
!
(
opt
&
SKIP_LABELLING
))
v_prp
.
resize
(
g_m
);
{
v_prp
.
resize
(
g_m
);
}
// Label all the particles
if
((
opt
&
SKIP_LABELLING
)
==
false
)
labelParticlesGhost
(
v_pos
,
v_prp
,
prc_g_opart
,
g_m
);
{
labelParticlesGhost
(
v_pos
,
v_prp
,
prc_g_opart
,
g_m
);
}
// Send and receive ghost particle information
{
...
...
@@ -1003,13 +1094,13 @@ public:
if
(
opt
&
SKIP_LABELLING
)
{
size_t
opt_
=
compute_options
(
opt
);
v_cl
.
SSendRecv
(
g_pos_send
,
v_pos
,
prc_g_opart
,
prc_recv_get
,
recv_sz_get
,
opt_
);
v_cl
.
SSendRecv
<
send_pos_vector
,
decltype
(
v_pos
),
layout_base
>
(
g_pos_send
,
v_pos
,
prc_g_opart
,
prc_recv_get
,
recv_sz_get
,
opt_
);
}
else
{
prc_recv_get
.
clear
();
recv_sz_get
.
clear
();
v_cl
.
SSendRecv
(
g_pos_send
,
v_pos
,
prc_g_opart
,
prc_recv_get
,
recv_sz_get
);
v_cl
.
SSendRecv
<
send_pos_vector
,
decltype
(
v_pos
),
layout_base
>
(
g_pos_send
,
v_pos
,
prc_g_opart
,
prc_recv_get
,
recv_sz_get
);
}
// fill g_opart_sz
...
...
This diff is collapsed.
Click to expand it.
src/Vector/vector_dist_gpu_unit_tests.cu
+
188
−
37
View file @
34e4e85b
...
...
@@ -38,10 +38,6 @@ __global__ void calculate_force(vector_dist_ker<3, float, aggregate<float, floa
auto
cell
=
cl
.
getCell
(
xp
);
int
s1
=
cell
.
get
(
0
);
int
s2
=
cell
.
get
(
1
);
int
s3
=
cell
.
get
(
2
);
Point
<
3
,
float
>
force1
({
0.0
,
0.0
,
0.0
});
Point
<
3
,
float
>
force2
({
0.0
,
0.0
,
0.0
});
...
...
@@ -78,6 +74,172 @@ __global__ void calculate_force(vector_dist_ker<3, float, aggregate<float, floa
vd
.
template
getProp
<
2
>(
p
)[
2
]
=
force2
.
get
(
2
);
}
template
<
typename
CellList_type
>
__global__
void
calculate_force_full_sort
(
vector_dist_ker
<
3
,
float
,
aggregate
<
float
,
float
[
3
],
float
[
3
]
>>
vd
,
CellList_type
cl
)
{
auto
p
=
GET_PARTICLE
(
vd
);
Point
<
3
,
float
>
xp
=
vd
.
getPos
(
p
);
auto
it
=
cl
.
getNNIterator
(
cl
.
getCell
(
xp
));
auto
cell
=
cl
.
getCell
(
xp
);
Point
<
3
,
float
>
force1
({
0.0
,
0.0
,
0.0
});
while
(
it
.
isNext
())
{
auto
q1
=
it
.
get
();
if
(
q1
==
p
)
{
++
it
;
continue
;}
Point
<
3
,
float
>
xq_1
=
vd
.
getPos
(
q1
);
Point
<
3
,
float
>
r1
=
xq_1
-
xp
;
// Normalize
r1
/=
r1
.
norm
();
force1
+=
vd
.
template
getProp
<
0
>(
q1
)
*
r1
;
++
it
;
}
vd
.
template
getProp
<
1
>(
p
)[
0
]
=
force1
.
get
(
0
);
vd
.
template
getProp
<
1
>(
p
)[
1
]
=
force1
.
get
(
1
);
vd
.
template
getProp
<
1
>(
p
)[
2
]
=
force1
.
get
(
2
);
}
template
<
typename
CellList_type
,
typename
vector_type
>
bool
check_force
(
CellList_type
&
NN_cpu
,
vector_type
&
vd
)
{
auto
it6
=
vd
.
getDomainIterator
();
bool
match
=
true
;
while
(
it6
.
isNext
())
{
auto
p
=
it6
.
get
();
Point
<
3
,
float
>
xp
=
vd
.
getPos
(
p
);
// Calculate on CPU
Point
<
3
,
float
>
force
({
0.0
,
0.0
,
0.0
});
auto
NNc
=
NN_cpu
.
getNNIterator
(
NN_cpu
.
getCell
(
xp
));
while
(
NNc
.
isNext
())
{
auto
q
=
NNc
.
get
();
if
(
q
==
p
.
getKey
())
{
++
NNc
;
continue
;}
Point
<
3
,
float
>
xq_2
=
vd
.
getPos
(
q
);
Point
<
3
,
float
>
r2
=
xq_2
-
xp
;
// Normalize
r2
/=
r2
.
norm
();
force
+=
vd
.
template
getProp
<
0
>(
q
)
*
r2
;
++
NNc
;
}
match
&=
fabs
(
vd
.
template
getProp
<
1
>(
p
)[
0
]
-
vd
.
template
getProp
<
2
>(
p
)[
0
])
<
0.0001
;
match
&=
fabs
(
vd
.
template
getProp
<
1
>(
p
)[
1
]
-
vd
.
template
getProp
<
2
>(
p
)[
1
])
<
0.0001
;
match
&=
fabs
(
vd
.
template
getProp
<
1
>(
p
)[
2
]
-
vd
.
template
getProp
<
2
>(
p
)[
2
])
<
0.0001
;
match
&=
fabs
(
vd
.
template
getProp
<
1
>(
p
)[
0
]
-
force
.
get
(
0
))
<
0.0001
;
match
&=
fabs
(
vd
.
template
getProp
<
1
>(
p
)[
1
]
-
force
.
get
(
1
))
<
0.0001
;
match
&=
fabs
(
vd
.
template
getProp
<
1
>(
p
)[
2
]
-
force
.
get
(
2
))
<
0.0001
;
++
it6
;
}
return
match
;
}
BOOST_AUTO_TEST_CASE
(
vector_dist_gpu_ghost_get
)
{
auto
&
v_cl
=
create_vcluster
();
if
(
v_cl
.
size
()
>
16
)
{
return
;}
Box
<
3
,
float
>
domain
({
0.0
,
0.0
,
0.0
},{
1.0
,
1.0
,
1.0
});
// set the ghost based on the radius cut off (make just a little bit smaller than the spacing)
Ghost
<
3
,
float
>
g
(
0.1
);
// Boundary conditions
size_t
bc
[
3
]
=
{
PERIODIC
,
PERIODIC
,
PERIODIC
};
vector_dist_gpu
<
3
,
float
,
aggregate
<
float
,
float
[
3
],
float
[
3
]
>>
vd
(
1000
,
domain
,
bc
,
g
);
auto
it
=
vd
.
getDomainIterator
();
while
(
it
.
isNext
())
{
auto
p
=
it
.
get
();
vd
.
getPos
(
p
)[
0
]
=
(
float
)
rand
()
/
RAND_MAX
;
vd
.
getPos
(
p
)[
1
]
=
(
float
)
rand
()
/
RAND_MAX
;
vd
.
getPos
(
p
)[
2
]
=
(
float
)
rand
()
/
RAND_MAX
;
vd
.
template
getProp
<
0
>(
p
)
=
vd
.
getPos
(
p
)[
0
]
+
vd
.
getPos
(
p
)[
1
]
+
vd
.
getPos
(
p
)[
2
];
vd
.
template
getProp
<
1
>(
p
)[
0
]
=
vd
.
getPos
(
p
)[
0
]
+
vd
.
getPos
(
p
)[
1
];
vd
.
template
getProp
<
1
>(
p
)[
1
]
=
vd
.
getPos
(
p
)[
0
]
+
vd
.
getPos
(
p
)[
2
];
vd
.
template
getProp
<
1
>(
p
)[
2
]
=
vd
.
getPos
(
p
)[
1
]
+
vd
.
getPos
(
p
)[
2
];
vd
.
template
getProp
<
2
>(
p
)[
0
]
=
vd
.
getPos
(
p
)[
0
]
+
3.0
*
vd
.
getPos
(
p
)[
1
];
vd
.
template
getProp
<
2
>(
p
)[
1
]
=
vd
.
getPos
(
p
)[
0
]
+
3.0
*
vd
.
getPos
(
p
)[
2
];
vd
.
template
getProp
<
2
>(
p
)[
2
]
=
vd
.
getPos
(
p
)[
1
]
+
3.0
*
vd
.
getPos
(
p
)[
2
];
++
it
;
}
// Ok we redistribute the particles (CPU based)
vd
.
map
();
vd
.
template
ghost_get
<
0
,
1
,
2
>();
// Now we check the the ghost contain the correct information
bool
check
=
true
;
auto
itg
=
vd
.
getDomainAndGhostIterator
();
while
(
itg
.
isNext
())
{
auto
p
=
itg
.
get
();
check
&=
(
vd
.
template
getProp
<
0
>(
p
)
==
vd
.
getPos
(
p
)[
0
]
+
vd
.
getPos
(
p
)[
1
]
+
vd
.
getPos
(
p
)[
2
]);
check
&=
(
vd
.
template
getProp
<
1
>(
p
)[
0
]
==
vd
.
getPos
(
p
)[
0
]
+
vd
.
getPos
(
p
)[
1
]);
check
&=
(
vd
.
template
getProp
<
1
>(
p
)[
1
]
==
vd
.
getPos
(
p
)[
0
]
+
vd
.
getPos
(
p
)[
2
]);
check
&=
(
vd
.
template
getProp
<
1
>(
p
)[
2
]
==
vd
.
getPos
(
p
)[
1
]
+
vd
.
getPos
(
p
)[
2
]);
check
&=
(
vd
.
template
getProp
<
2
>(
p
)[
0
]
==
vd
.
getPos
(
p
)[
0
]
+
3.0
*
vd
.
getPos
(
p
)[
1
]);
check
&=
(
vd
.
template
getProp
<
2
>(
p
)[
1
]
==
vd
.
getPos
(
p
)[
0
]
+
3.0
*
vd
.
getPos
(
p
)[
2
]);
check
&=
(
vd
.
template
getProp
<
2
>(
p
)[
2
]
==
vd
.
getPos
(
p
)[
1
]
+
3.0
*
vd
.
getPos
(
p
)[
2
]);
++
itg
;
}
size_t
tot_s
=
vd
.
size_local_with_ghost
();
v_cl
.
sum
(
tot_s
);
v_cl
.
execute
();
// We check that we check something
BOOST_REQUIRE
(
tot_s
>
1000
);
}
BOOST_AUTO_TEST_CASE
(
vector_dist_gpu_test
)
{
auto
&
v_cl
=
create_vcluster
();
...
...
@@ -108,7 +270,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_gpu_test)
++
it
;
}
// Ok we redistribute the particles
// Ok we redistribute the particles
(CPU based)
vd
.
map
();
size_t
size_l
=
vd
.
size_local
();
...
...
@@ -145,6 +307,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_gpu_test)
auto
it3
=
vd
.
getDomainIteratorGPU
();
// offload to device
vd
.
hostToDevicePos
();
initialize_props
<<<
it3
.
wthr
,
it3
.
thr
>>>
(
vd
.
toKernel
());
...
...
@@ -170,6 +333,13 @@ BOOST_AUTO_TEST_CASE( vector_dist_gpu_test)
++
it4
;
}
// here we do a ghost_get
vd
.
ghost_get
<
0
>
();
// we re-offload what we received
vd
.
hostToDevicePos
();
vd
.
template
hostToDeviceProp
<
0
>();
auto
NN
=
vd
.
getCellListGPU
(
0.1
);
auto
NN_cpu
=
vd
.
getCellList
(
0.1
);
...
...
@@ -179,50 +349,31 @@ BOOST_AUTO_TEST_CASE( vector_dist_gpu_test)
vd
.
template
deviceToHostProp
<
1
,
2
>();
auto
it6
=
vd
.
getDomainIterator
();
bool
match
=
true
;
while
(
it6
.
isNext
())
{
auto
p
=
it6
.
get
();
Point
<
3
,
float
>
xp
=
vd
.
getPos
(
p
);
bool
test
=
check_force
(
NN_cpu
,
vd
);
BOOST_REQUIRE_EQUAL
(
test
,
true
);
//
Calculate on CPU
//
We do exactly the same test as before, but now we completely use the sorted version
Point
<
3
,
float
>
force
({
0.0
,
0.0
,
0.0
}
);
calculate_force_full_sort
<
decltype
(
NN
.
toKernel
())
><<<
it5
.
wthr
,
it5
.
thr
>>>
(
vd
.
toKernel_sorted
(),
NN
.
toKernel
()
);
auto
NNc
=
NN_cpu
.
getNNIterator
(
NN_cpu
.
getCell
(
xp
)
);
vd
.
template
deviceToHostProp
<
1
>(
);
while
(
NNc
.
isNext
())
{
auto
q
=
NNc
.
get
();
test
=
check_force
(
NN_cpu
,
vd
);
BOOST_REQUIRE_EQUAL
(
test
,
true
);
if
(
q
==
p
.
getKey
())
{
++
NNc
;
continue
;}
// check
Point
<
3
,
float
>
xq_2
=
vd
.
getPos
(
q
);
Point
<
3
,
float
>
r2
=
xq_2
-
xp
;
// Now we do a ghost_get from CPU
// Normalize
// Than we offload on GPU
r2
/=
r2
.
norm
();
force
+=
vd
.
template
getProp
<
0
>(
q
)
*
r2
;
// We construct a Cell-list
++
NNc
;
}
// We calculate force on CPU and GPU to check if they match
BOOST_REQUIRE_CLOSE
(
vd
.
template
getProp
<
1
>(
p
)[
0
],
vd
.
template
getProp
<
2
>(
p
)[
0
],
0.001
);
BOOST_REQUIRE_CLOSE
(
vd
.
template
getProp
<
1
>(
p
)[
1
],
vd
.
template
getProp
<
2
>(
p
)[
1
],
0.001
);
BOOST_REQUIRE_CLOSE
(
vd
.
template
getProp
<
1
>(
p
)[
2
],
vd
.
template
getProp
<
2
>(
p
)[
2
],
0.001
);
BOOST_REQUIRE_CLOSE
(
vd
.
template
getProp
<
1
>(
p
)[
0
],
force
.
get
(
0
),
0.01
);
BOOST_REQUIRE_CLOSE
(
vd
.
template
getProp
<
1
>(
p
)[
1
],
force
.
get
(
1
),
0.01
);
BOOST_REQUIRE_CLOSE
(
vd
.
template
getProp
<
1
>(
p
)[
2
],
force
.
get
(
2
),
0.01
);
++
it6
;
}
}
BOOST_AUTO_TEST_SUITE_END
()
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment