Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
O
openfpm_pdata
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Locked Files
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
Requirements
Requirements
List
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Security & Compliance
Security & Compliance
Dependency List
License Compliance
Operations
Operations
Environments
Analytics
Analytics
CI / CD
Code Review
Insights
Issue
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
openfpm
openfpm_pdata
Commits
5dbbc3f5
Commit
5dbbc3f5
authored
Sep 13, 2019
by
incardon
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fixing CUDA compilation + AMR distributed GPU working
parent
95bc0f71
Changes
10
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
2128 additions
and
92 deletions
+2128
-92
openfpm_data
openfpm_data
+1
-1
src/Amr/grid_dist_amr.hpp
src/Amr/grid_dist_amr.hpp
+15
-1
src/Amr/tests/amr_base_gpu_unit_tests.cu
src/Amr/tests/amr_base_gpu_unit_tests.cu
+99
-13
src/Grid/Iterators/grid_dist_id_iterator.hpp
src/Grid/Iterators/grid_dist_id_iterator.hpp
+3
-21
src/Grid/cuda/grid_dist_id_kernels.cuh
src/Grid/cuda/grid_dist_id_kernels.cuh
+47
-2
src/Grid/grid_dist_id.hpp
src/Grid/grid_dist_id.hpp
+53
-4
src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu
src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu
+77
-50
test_data/sgrid_gpu_output_1_0.vtk
test_data/sgrid_gpu_output_1_0.vtk
+911
-0
test_data/sgrid_gpu_output_2_0.vtk
test_data/sgrid_gpu_output_2_0.vtk
+499
-0
test_data/sgrid_gpu_output_2_1.vtk
test_data/sgrid_gpu_output_2_1.vtk
+423
-0
No files found.
openfpm_data
@
b7d41928
Subproject commit
5056c72513114e2491a8fc1b4ddd4899ecd3e242
Subproject commit
b7d4192813d4aaccd703df1472b9bc9da6c9a7b6
src/Amr/grid_dist_amr.hpp
View file @
5dbbc3f5
...
...
@@ -466,6 +466,20 @@ public:
return
gd_array
.
get
(
lvl
).
getGridIterator
();
}
#ifdef __NVCC__
/*! \brief Get an iterator to the grid
*
* \return an iterator to the grid
*
*/
auto
getGridIteratorGPU
(
size_t
lvl
)
->
decltype
(
gd_array
.
get
(
lvl
).
getGridIteratorGPU
())
{
return
gd_array
.
get
(
lvl
).
getGridIteratorGPU
();
}
#endif
/*! \brief Get an iterator to the grid
*
* \return an iterator to the grid
...
...
@@ -912,7 +926,7 @@ using sgrid_dist_amr = grid_dist_amr<dim,St,T,AMR_IMPL_TRIVIAL,CartDecomposition
#ifdef __NVCC__
template
<
unsigned
int
dim
,
typename
St
,
typename
T
,
unsigned
int
blockEdgeSize
=
8
>
using
sgrid_dist_amr_gpu
=
grid_dist_amr
<
dim
,
St
,
T
,
AMR_IMPL_TRIVIAL
,
CartDecomposition
<
dim
,
St
,
CudaMemory
,
memory_traits_inte
>
,
CudaMemory
,
SparseGridGpu
<
dim
,
T
,
blockEdgeSize
>>
;
using
sgrid_dist_amr_gpu
=
grid_dist_amr
<
dim
,
St
,
T
,
AMR_IMPL_TRIVIAL
,
CartDecomposition
<
dim
,
St
,
CudaMemory
,
memory_traits_inte
>
,
CudaMemory
,
SparseGridGpu
<
dim
,
T
,
blockEdgeSize
,
IntPow
<
blockEdgeSize
,
dim
>::
value
>>
;
#endif
...
...
src/Amr/tests/amr_base_gpu_unit_tests.cu
View file @
5dbbc3f5
...
...
@@ -18,6 +18,58 @@
#include "Point_test.hpp"
#include "Grid/tests/grid_dist_id_util_tests.hpp"
struct
amr_launch_sparse
{
template
<
typename
grid_type
,
typename
ite_type
>
__device__
void
operator
()(
grid_type
&
grid
,
ite_type
itg
,
float
spacing
,
Point
<
3
,
float
>
center
)
{
GRID_ID_3_GLOBAL
(
itg
);
__shared__
bool
is_block_empty
;
if
(
threadIdx
.
x
==
0
&&
threadIdx
.
y
==
0
&&
threadIdx
.
z
==
0
)
{
is_block_empty
=
true
;}
grid
.
init
();
int
offset
=
0
;
grid_key_dx
<
3
,
int
>
blk
;
bool
out
=
grid
.
getInsertBlockOffset
(
itg
,
key
,
blk
,
offset
);
auto
blockId
=
grid
.
getBlockLinId
(
blk
);
const
float
x
=
keyg
.
get
(
0
)
*
spacing
-
center
.
get
(
0
);
const
float
y
=
keyg
.
get
(
1
)
*
spacing
-
center
.
get
(
1
);
const
float
z
=
keyg
.
get
(
2
)
*
spacing
-
center
.
get
(
2
);
float
radius
=
sqrt
((
float
)
(
x
*
x
+
y
*
y
+
z
*
z
));
bool
is_active
=
radius
<
0.4
&&
radius
>
0.3
;
if
(
is_active
==
true
)
{
is_block_empty
=
false
;}
__syncthreads
();
if
(
is_block_empty
==
false
)
{
auto
ec
=
grid
.
insertBlock
(
blockId
);
if
(
is_active
==
true
)
{
ec
.
template
get
<
0
>()[
offset
]
=
x
+
y
+
z
;
ec
.
template
get
<
grid_type
::
pMask
>()[
offset
]
=
1
;
}
}
__syncthreads
();
grid
.
flush_block_insert
();
}
};
BOOST_AUTO_TEST_SUITE
(
amr_grid_dist_id_test
)
...
...
@@ -27,39 +79,73 @@ BOOST_AUTO_TEST_CASE( grid_dist_id_amr_gpu )
Box
<
3
,
float
>
domain3
({
0.0
,
0.0
,
0.0
},{
1.0
,
1.0
,
1.0
});
Ghost
<
3
,
float
>
g
(
0.05
);
Ghost
<
3
,
long
int
>
g
(
1
);
sgrid_dist_amr_gpu
<
3
,
float
,
aggregate
<
float
>>
amr_g
(
domain3
,
g
);
size_t
g_sz
[
3
]
=
{
4
,
4
,
4
};
size_t
n_lvl
=
10
;
size_t
n_lvl
=
6
;
//
amr_g.initLevels(n_lvl,g_sz);
amr_g
.
initLevels
(
n_lvl
,
g_sz
);
/* for (size_t i = 0 ; i < amr_g.getNLvl() ; i++)
for
(
size_t
i
=
0
;
i
<
amr_g
.
getNLvl
()
;
i
++
)
{
// Fill the AMR with something
size_t
count
=
0
;
auto it = amr_g.getGridIterator(i);
auto
it
=
amr_g
.
getGridIteratorGPU
(
i
);
it
.
setGPUInsertBuffer
(
1
);
while (it.isNext())
{
auto key = it.get_dist();
auto akey = amr_g.getAMRKey(i,key);
Point
<
3
,
float
>
center
({
0.5
,
0.5
,
0.5
});
it
.
launch
(
amr_launch_sparse
(),
it
.
getSpacing
(
0
),
center
);
amr_g
.
getDistGrid
(
i
).
template
flush
<
smax_
<
0
>
>
(
FLUSH_ON_DEVICE
);
amr_g
.
getDistGrid
(
i
).
template
deviceToHost
<
0
>();
amr_g.template insert<0>(akey) = 3.0;
auto
it2
=
amr_g
.
getDistGrid
(
i
).
getDomainIterator
();
while
(
it2
.
isNext
())
{
auto
key
=
it2
.
get
();
auto
keyg
=
it2
.
getGKey
(
key
);
count
++
;
++it;
++
it2
;
}
auto
&
v_cl
=
create_vcluster
();
v_cl
.
sum
(
count
);
v_cl
.
execute
();
switch
(
i
)
{
case
0
:
BOOST_REQUIRE_EQUAL
(
count
,
0
);
break
;
case
1
:
BOOST_REQUIRE_EQUAL
(
count
,
30
);
break
;
case
2
:
BOOST_REQUIRE_EQUAL
(
count
,
282
);
break
;
case
3
:
BOOST_REQUIRE_EQUAL
(
count
,
2192
);
break
;
case
4
:
BOOST_REQUIRE_EQUAL
(
count
,
16890
);
break
;
case
5
:
BOOST_REQUIRE_EQUAL
(
count
,
136992
);
break
;
}
}
// Iterate across all the levels initialized
auto it = amr_g.getDomainIterator();
/*
auto it = amr_g.getDomainIterator();
size_t count = 0;
...
...
src/Grid/Iterators/grid_dist_id_iterator.hpp
View file @
5dbbc3f5
...
...
@@ -93,24 +93,6 @@ class grid_dist_iterator
// get the next grid iterator
if
(
g_c
<
gList
.
size
())
{
// Sub iterator are used
/* if (impl == FREE)
{
if (gdb_ext.get(g_c).Dbox.isValid() == false)
{g_c++;}
else
{
a_it.reinitialize(gList.get(g_c).getIterator(gdb_ext.get(g_c).Dbox.getKP1(),gdb_ext.get(g_c).Dbox.getKP2()));
if (a_it.isNext() == false) {g_c++;}
}
}
else
{
// Full iterator (no subset)
a_it.reinitialize(gList.get(g_c).getIterator());
if (a_it.isNext() == false) {g_c++;}
}*/
selvg
<
impl
==
FREE
>::
call
(
a_it
,
gdb_ext
,
gList
,
g_c
);
}
}
while
(
g_c
<
gList
.
size
()
&&
a_it
.
isNext
()
==
false
);
...
...
@@ -276,12 +258,12 @@ class grid_dist_iterator
// Get the sub-domain id
size_t
sub_id
=
k
.
getSub
();
grid_key_dx
<
dim
>
k_glob
=
k
.
getKey
();
auto
k_glob
=
k
.
getKey
();
// shift
k_glob
=
k_glob
+
gdb_ext
.
get
(
sub_id
).
origin
;
auto
k_glob2
=
k_glob
+
gdb_ext
.
get
(
sub_id
).
origin
;
return
k_glob
;
return
k_glob
2
;
}
/*! \brief Return the stencil point offset
...
...
src/Grid/cuda/grid_dist_id_kernels.cuh
View file @
5dbbc3f5
...
...
@@ -8,9 +8,54 @@
#ifndef GRID_DIST_ID_KERNELS_CUH_
#define GRID_DIST_ID_KERNELS_CUH_
#ifdef CUDA_GPU
template
<
typename
grid_type
,
typename
func_t
,
typename
...
args_t
>
__global__
void
grid_apply_functor
(
grid_type
g
,
ite_gpu
<
grid_type
::
d
>
ite
,
func_t
f
,
args_t
...
args
)
template
<
unsigned
int
dim
>
struct
ite_gpu_dist
{
dim3
thr
;
dim3
wthr
;
grid_key_dx
<
dim
,
int
>
start
;
grid_key_dx
<
dim
,
int
>
stop
;
grid_key_dx
<
dim
,
int
>
start_base
;
grid_key_dx
<
dim
,
int
>
origin
;
ite_gpu_dist
(
ite_gpu
<
dim
>
&
ite
)
{
thr
=
ite
.
thr
;
wthr
=
ite
.
wthr
;
start
=
ite
.
start
;
stop
=
ite
.
stop
;
}
size_t
nblocks
()
{
return
wthr
.
x
*
wthr
.
y
*
wthr
.
z
;
}
};
#define GRID_ID_3_GLOBAL(ite_gpu) grid_key_dx<3,int> key;\
grid_key_dx<3,int> keyg;\
key.set_d(0,threadIdx.x + blockIdx.x * blockDim.x + ite_gpu.start.get(0));\
key.set_d(1,threadIdx.y + blockIdx.y * blockDim.y + ite_gpu.start.get(1));\
key.set_d(2,threadIdx.z + blockIdx.z * blockDim.z + ite_gpu.start.get(2));\
\
keyg.set_d(0,key.get(0) + ite_gpu.origin.get(0));\
keyg.set_d(1,key.get(1) + ite_gpu.origin.get(1));\
keyg.set_d(2,key.get(2) + ite_gpu.origin.get(2));\
\
if (key.get(0) > ite_gpu.stop.get(0) || key.get(1) > ite_gpu.stop.get(1) || key.get(2) > ite_gpu.stop.get(2))\
{return;}
#endif
template
<
typename
grid_type
,
typename
ite_gpu_type
,
typename
func_t
,
typename
...
args_t
>
__global__
void
grid_apply_functor
(
grid_type
g
,
ite_gpu_type
ite
,
func_t
f
,
args_t
...
args
)
{
f
(
g
,
ite
,
args
...);
}
...
...
src/Grid/grid_dist_id.hpp
View file @
5dbbc3f5
...
...
@@ -26,6 +26,7 @@
#ifdef __NVCC__
#include "SparseGridGpu/SparseGridGpu.hpp"
#include "cuda/grid_dist_id_kernels.cuh"
#include "Grid/cuda/grid_dist_id_iterator_gpu.cuh"
#endif
...
...
@@ -1134,8 +1135,8 @@ public:
*/
void
setBackgroundValue
(
T
&
bv
)
{
for
(
size_t
i
=
0
;
i
<
loc_grid
.
size
()
;
i
++
)
{
meta_copy
<
T
>::
meta_copy_
(
bv
,
loc_grid
.
get
(
i
).
getBackgroundValue
());}
setBackground_impl
<
T
,
decltype
(
loc_grid
)
>
func
(
bv
,
loc_grid
);
boost
::
mpl
::
for_each_ref
<
boost
::
mpl
::
range_c
<
int
,
0
,
T
::
max_prop
>>
(
func
);
}
/*! \brief set the background value
...
...
@@ -1715,7 +1716,7 @@ public:
/*! /brief Get a grid Iterator
*
* In case of dense grid getGridIterator is equivalent to getDomainIterator
* in case
if sparse distributed
grid getDomainIterator go across all the
* in case
of sparse
grid getDomainIterator go across all the
* inserted point get grid iterator run across all grid points independently
* that the point has been insert or not
*
...
...
@@ -1728,6 +1729,47 @@ public:
return
it_dec
;
}
#ifdef __NVCC__
/*! /brief Get a grid Iterator in GPU
*
* In case of dense grid getGridIterator is equivalent to getDomainIteratorGPU
* in case of sparse distributed grid getDomainIterator go across all the
* inserted point getGridIteratorGPU run across all grid points independently
* that the point has been insert or not
*
* \param start point
* \param stop point
*
* \return a Grid iterator
*
*/
inline
grid_dist_id_iterator_gpu
<
Decomposition
,
openfpm
::
vector
<
device_grid
>>
getGridIteratorGPU
(
const
grid_key_dx
<
dim
>
&
start
,
const
grid_key_dx
<
dim
>
&
stop
)
{
grid_dist_id_iterator_gpu
<
Decomposition
,
openfpm
::
vector
<
device_grid
>>
it_dec
(
loc_grid
,
getDecomposition
(),
g_sz
,
start
,
stop
);
return
it_dec
;
}
/*! /brief Get a grid Iterator in GPU
*
* In case of dense grid getGridIterator is equivalent to getDomainIteratorGPU
* in case of sparse distributed grid getDomainIterator go across all the
* inserted point getGridIteratorGPU run across all grid points independently
* that the point has been insert or not
*
* \return a Grid iterator
*
*/
inline
grid_dist_id_iterator_gpu
<
Decomposition
,
openfpm
::
vector
<
device_grid
>>
getGridIteratorGPU
()
{
grid_dist_id_iterator_gpu
<
Decomposition
,
openfpm
::
vector
<
device_grid
>>
it_dec
(
loc_grid
,
getDecomposition
(),
g_sz
);
return
it_dec
;
}
#endif
/*! /brief Get a grid Iterator running also on ghost area
*
* In case of dense grid getGridIterator is equivalent to getDomainIterator
...
...
@@ -2641,6 +2683,8 @@ public:
#ifdef __NVCC__
/*! \brief Set the size of the gpu insert buffer pool
*
* Indicate the maximum number of inserts each GPU block can do
*
* \param size of the insert pool
*
...
...
@@ -2664,7 +2708,12 @@ public:
loc_grid
.
get
(
i
).
setGPUInsertBuffer
(
ite
.
nblocks
(),
gpu_insert_pool_size
);
loc_grid
.
get
(
i
).
initializeGPUInsertBuffer
();
grid_apply_functor
<<<
ite
.
wthr
,
ite
.
thr
>>>
(
loc_grid
.
get
(
i
).
toKernel
(),
ite
,
func_t
(),
args
...);
ite_gpu_dist
<
dim
>
itd
=
ite
;
for
(
int
j
=
0
;
j
<
dim
;
j
++
)
{
itd
.
origin
.
set_d
(
j
,
gdb_ext
.
get
(
i
).
origin
.
get
(
j
));}
grid_apply_functor
<<<
ite
.
wthr
,
ite
.
thr
>>>
(
loc_grid
.
get
(
i
).
toKernel
(),
itd
,
func_t
(),
args
...);
it
.
nextGrid
();
}
...
...
src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu
View file @
5dbbc3f5
...
...
@@ -8,8 +8,8 @@ BOOST_AUTO_TEST_SUITE( sgrid_gpu_test_suite )
template
<
unsigned
int
p
>
struct
insert_kernel
{
template
<
typename
SparseGridGpu_type
>
__device__
void
operator
()(
SparseGridGpu_type
&
sg
,
ite_
gpu
<
SparseGridGpu_type
::
d
>
&
ite
,
float
c
)
template
<
typename
SparseGridGpu_type
,
typename
ite_type
>
__device__
void
operator
()(
SparseGridGpu_type
&
sg
,
ite_
type
&
ite
,
float
c
)
{
sg
.
init
();
...
...
@@ -30,22 +30,34 @@ struct insert_kernel
{
return
;}
if
(
SparseGridGpu_type
::
d
>=
2
&&
y
+
ite
.
start
.
get
(
1
)
>
ite
.
stop
.
get
(
1
))
{
return
;}
if
(
SparseGridGpu_type
::
d
>=
3
&&
z
+
ite
.
start
.
get
(
1
)
>
ite
.
stop
.
get
(
2
))
if
(
SparseGridGpu_type
::
d
>=
3
&&
z
+
ite
.
start
.
get
(
2
)
>
ite
.
stop
.
get
(
2
))
{
return
;}
grid_key_dx
<
SparseGridGpu_type
::
d
,
size_t
>
coord
({
x
+
ite
.
start
.
get
(
0
),
y
+
ite
.
start
.
get
(
1
),
z
+
ite
.
start
.
get
(
2
)});
// size_t pos = sg.getLinId(coord);
// printf("insertValues: bDim=(%d,%d), bId=(%d,%d), tId=(%d,%d) : "
// "pos=%ld, coord={%d,%d}, value=%d\n",
// bDimX, bDimY,
// bIdX, bIdY,
// tIdX, tIdY,
// pos,
// x, y,
// x); //debug
sg
.
template
insert
<
p
>(
coord
)
=
c
;
grid_key_dx
<
SparseGridGpu_type
::
d
,
size_t
>
coord
;
grid_key_dx
<
SparseGridGpu_type
::
d
,
size_t
>
coord_glob
;
if
(
SparseGridGpu_type
::
d
>=
2
)
{
coord
.
set_d
(
0
,
x
+
ite
.
start
.
get
(
0
));
coord_glob
.
set_d
(
0
,
x
+
ite
.
start
.
get
(
0
)
+
ite
.
origin
.
get
(
0
));
coord
.
set_d
(
1
,
y
+
ite
.
start
.
get
(
1
));
coord_glob
.
set_d
(
1
,
y
+
ite
.
start
.
get
(
1
)
+
ite
.
origin
.
get
(
1
));
}
else
if
(
SparseGridGpu_type
::
d
>=
3
)
{
coord
.
set_d
(
0
,
x
+
ite
.
start
.
get
(
0
));
coord_glob
.
set_d
(
0
,
x
+
ite
.
start
.
get
(
0
)
+
ite
.
origin
.
get
(
0
));
coord
.
set_d
(
1
,
y
+
ite
.
start
.
get
(
1
));
coord_glob
.
set_d
(
1
,
y
+
ite
.
start
.
get
(
1
)
+
ite
.
origin
.
get
(
1
));
coord
.
set_d
(
2
,
z
+
ite
.
start
.
get
(
2
));
coord_glob
.
set_d
(
2
,
z
+
ite
.
start
.
get
(
2
)
+
ite
.
origin
.
get
(
2
));
}
if
(
SparseGridGpu_type
::
d
>=
2
)
{
sg
.
template
insert
<
p
>(
coord
)
=
c
+
coord_glob
.
get
(
0
)
+
coord_glob
.
get
(
1
);}
else
{
sg
.
template
insert
<
p
>(
coord
)
=
c
+
coord_glob
.
get
(
0
)
+
coord_glob
.
get
(
1
)
+
coord_glob
.
get
(
2
);}
__syncthreads
();
...
...
@@ -80,35 +92,6 @@ BOOST_AUTO_TEST_CASE( sgrid_gpu_test_base )
gdist
.
template
setBackgroundValue
<
0
>(
666
);
/////// CPU insert
/* auto it = gdist.getGridIterator(box.getKP1(),box.getKP2());
while (it.isNext())
{
auto p = it.get_dist();
gdist.template insert<0>(p) = 1.0;
++it;
}
gdist.template flush<>();
Box<2,size_t> box2({0,0},{15,15});
auto it2 = gdist.getGridIterator(box2.getKP1(),box2.getKP2());
while (it2.isNext())
{
auto p = it2.get_dist();
std::cout << gdist.template get<0>(p) << std::endl;
++it2;
}*/
/////// host to device
/////// GPU insert + flush
Box
<
2
,
size_t
>
box
({
1
,
1
},{
1
,
1
});
...
...
@@ -116,7 +99,7 @@ BOOST_AUTO_TEST_CASE( sgrid_gpu_test_base )
/////// GPU Run kernel
gdist
.
setInsertBuffer
(
1
28
);
gdist
.
setInsertBuffer
(
1
);
float
c
=
5.0
;
...
...
@@ -137,10 +120,16 @@ BOOST_AUTO_TEST_CASE( sgrid_gpu_test_base )
if
(
p2
.
get
(
0
)
==
box
.
getLow
(
0
)
&&
p2
.
get
(
1
)
==
box
.
getLow
(
1
))
{
BOOST_REQUIRE_EQUAL
(
gdist
.
template
get
<
0
>(
p
),
5
.0
);
BOOST_REQUIRE_EQUAL
(
gdist
.
template
get
<
0
>(
p
),
7
.0
);
}
else
{
if
(
gdist
.
template
get
<
0
>(
p
)
!=
666.0
)
{
float
f
=
gdist
.
template
get
<
0
>(
p
);
std
::
cout
<<
"ERROR: "
<<
gdist
.
template
get
<
0
>(
p
)
<<
std
::
endl
;
}
BOOST_REQUIRE_EQUAL
(
gdist
.
template
get
<
0
>(
p
),
666.0
);
}
...
...
@@ -148,6 +137,8 @@ BOOST_AUTO_TEST_CASE( sgrid_gpu_test_base )
}
}
return
;
//
c
=
3.0
;
...
...
@@ -189,14 +180,50 @@ BOOST_AUTO_TEST_CASE( sgrid_gpu_test_base )
++
it
;
}
}
}
////////////////////////////////////
BOOST_AUTO_TEST_CASE
(
sgrid_gpu_test_output
)
{
auto
&
v_cl
=
create_vcluster
();
if
(
v_cl
.
size
()
>
3
){
return
;}
size_t
sz
[
2
]
=
{
17
,
17
};
periodicity
<
2
>
bc
=
{
PERIODIC
,
PERIODIC
};
Ghost
<
2
,
long
int
>
g
(
1
);
Box
<
2
,
float
>
domain
({
0.0
,
0.0
},{
1.0
,
1.0
});
sgrid_dist_id_gpu
<
2
,
float
,
aggregate
<
float
>>
gdist
(
sz
,
domain
,
g
,
bc
);
gdist
.
template
setBackgroundValue
<
0
>(
666
);
/////// GPU insert + flush
Box
<
2
,
size_t
>
box
({
1
,
1
},{
15
,
15
});
auto
it
=
gdist
.
getGridIterator
(
box
.
getKP1
(),
box
.
getKP2
());
/////// GPU Run kernel
gdist
.
setInsertBuffer
(
128
);
gdist
.
template
iterateGPU
<
stencil_kernel
<
0
>
>
();
float
c
=
5.0
;
}
gdist
.
template
iterateGridGPU
<
insert_kernel
<
0
>
>
(
it
,
c
);
gdist
.
template
flush
<
smax_
<
0
>
>
(
flush_type
::
FLUSH_ON_DEVICE
);
gdist
.
template
deviceToHost
<
0
>();
gdist
.
write
(
"sgrid_gpu_output"
);
std
::
string
file_test
(
"sgrid_gpu_output_"
+
std
::
to_string
(
v_cl
.
size
())
+
"_"
+
std
::
to_string
(
v_cl
.
rank
())
+
".vtk"
);
std
::
string
file
(
"sgrid_gpu_output_"
+
std
::
to_string
(
v_cl
.
rank
())
+
".vtk"
);
bool
test
=
compare
(
file
,
"test_data/"
+
file_test
);
BOOST_REQUIRE_EQUAL
(
true
,
test
);
}
BOOST_AUTO_TEST_SUITE_END
()
test_data/sgrid_gpu_output_1_0.vtk
0 → 100644
View file @
5dbbc3f5
This diff is collapsed.
Click to expand it.
test_data/sgrid_gpu_output_2_0.vtk
0 → 100644
View file @
5dbbc3f5
# vtk DataFile Version 3.0
grids
ASCII
DATASET POLYDATA
POINTS 122 float
0.058824 0.058824 0.0
0.117647 0.058824 0.0
0.176471 0.058824 0.0
0.235294 0.058824 0.0
0.294118 0.058824 0.0
0.352941 0.058824 0.0
0.058824 0.117647 0.0
0.117647 0.117647 0.0
0.176471 0.117647 0.0
0.235294 0.117647 0.0
0.294118 0.117647 0.0
0.352941 0.117647 0.0
0.058824 0.176471 0.0
0.117647 0.176471 0.0
0.176471 0.176471 0.0
0.235294 0.176471 0.0
0.294118 0.176471 0.0
0.352941 0.176471 0.0
0.058824 0.235294 0.0
0.117647 0.235294 0.0
0.176471 0.235294 0.0
0.235294 0.235294 0.0
0.294118 0.235294 0.0
0.352941 0.235294 0.0
0.058824 0.294118 0.0
0.117647 0.294118 0.0
0.176471 0.294118 0.0
0.235294 0.294118 0.0
0.294118 0.294118 0.0
0.352941 0.294118 0.0
0.058824 0.352941 0.0
0.117647 0.352941 0.0
0.176471 0.352941 0.0
0.235294 0.352941 0.0
0.294118 0.352941 0.0
0.352941 0.352941 0.0
0.411765 0.058824 0.0
0.470588 0.058824 0.0
0.529412 0.058824 0.0
0.588235 0.058824 0.0
0.647059 0.058824 0.0
0.705882 0.058824 0.0
0.411765 0.117647 0.0
0.470588 0.117647 0.0
0.529412 0.117647 0.0
0.588235 0.117647 0.0
0.647059 0.117647 0.0
0.705882 0.117647 0.0
0.411765 0.176471 0.0
0.470588 0.176471 0.0
0.529412 0.176471 0.0
0.588235 0.176471 0.0
0.647059 0.176471 0.0
0.705882 0.176471 0.0
0.411765 0.235294 0.0
0.470588 0.235294 0.0
0.529412 0.235294 0.0
0.588235 0.235294 0.0
0.647059 0.235294 0.0
0.705882 0.235294 0.0
0.411765 0.294118 0.0
0.470588 0.294118 0.0
0.529412 0.294118 0.0
0.588235 0.294118 0.0
0.647059 0.294118 0.0
0.705882 0.294118 0.0
0.411765 0.352941 0.0
0.470588 0.352941 0.0
0.529412 0.352941 0.0
0.588235 0.352941 0.0
0