Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Sbalzarini Lab
S
Software
P
Parallel Computing
OpenFPM
openfpm_data
Commits
c3bedcd4
Commit
c3bedcd4
authored
Jun 23, 2018
by
incardon
Browse files
Moving forward gpu + eliminatig warning messages
parent
aeb35be2
Changes
28
Hide whitespace changes
Inline
Side-by-side
src/Grid/gpu_test/cuda_gpu_func.cpp
View file @
c3bedcd4
...
...
@@ -42,7 +42,7 @@ BOOST_AUTO_TEST_CASE (gpu_computation_func)
auto
gcf2
=
c3
.
getGPUIterator
(
k3
,
k4
);
BOOST_REQUIRE_EQUAL
(
gcf2
.
thr
.
x
,
1
6
ul
);
BOOST_REQUIRE_EQUAL
(
gcf2
.
thr
.
x
,
1
3
ul
);
BOOST_REQUIRE_EQUAL
(
gcf2
.
thr
.
y
,
8ul
);
BOOST_REQUIRE_EQUAL
(
gcf2
.
thr
.
z
,
8ul
);
...
...
src/Grid/gpu_test/cuda_grid_unit_tests.cu
View file @
c3bedcd4
...
...
@@ -3,14 +3,25 @@
#include "Point_test.hpp"
#include <stdio.h>
/*__global__ void grid_fill_vector(grid_gpu_ker<3,Point_test<float>> g1, ite_gpu ite_gpu)
__global__
void
grid_gradient_vector
(
grid_gpu_ker
<
3
,
Point_test
<
float
>>
g1
,
grid_gpu_ker
<
3
,
Point_test
<
float
>>
g2
,
ite_gpu
<
3
>
ite_gpu
)
{
GRID_ID_3
(
ite_gpu
);
g2
.
template
get
<
4
>(
key
)[
0
]
=
(
g1
.
template
get
<
0
>(
key
.
move
(
0
,
1
))
-
g1
.
template
get
<
0
>(
key
.
move
(
0
,
-
1
)))
/
2.0
;
g2
.
template
get
<
4
>(
key
)[
1
]
=
(
g1
.
template
get
<
0
>(
key
.
move
(
1
,
1
))
-
g1
.
template
get
<
0
>(
key
.
move
(
1
,
-
1
)))
/
2.0
;
g2
.
template
get
<
4
>(
key
)[
2
]
=
(
g1
.
template
get
<
0
>(
key
.
move
(
2
,
1
))
-
g1
.
template
get
<
0
>(
key
.
move
(
2
,
-
1
)))
/
2.0
;
}
__global__
void
grid_fill_vector
(
grid_gpu_ker
<
3
,
Point_test
<
float
>>
g1
,
ite_gpu
<
3
>
ite_gpu
)
{
GRID_ID_3
(
ite_gpu
);
g1
.
template
get
<
4
>(
key
)[
0
]
=
1.0
;
}*/
g1
.
template
get
<
4
>(
key
)[
1
]
=
2.0
;
g1
.
template
get
<
4
>(
key
)[
2
]
=
3.0
;
}
__global__
void
compute_stencil_grid
(
grid_gpu_ker
<
3
,
Point_test
<
float
>>
g1
,
grid_gpu_ker
<
3
,
Point_test
<
float
>>
g2
,
ite_gpu
ite_gpu
)
__global__
void
compute_stencil_grid
(
grid_gpu_ker
<
3
,
Point_test
<
float
>>
g1
,
grid_gpu_ker
<
3
,
Point_test
<
float
>>
g2
,
ite_gpu
<
3
>
ite_gpu
)
{
GRID_ID_3
(
ite_gpu
);
...
...
@@ -100,10 +111,17 @@ void gpu_grid_3D_compute_grid_stencil(grid_gpu<3,Point_test<float>> & g1, grid_g
compute_stencil_grid
<<<
gpu_it
.
thr
,
gpu_it
.
wthr
>>>
(
g1k
,
g2k
,
gpu_it
);
}
/*
void gpu_grid_fill_vector(grid_gpu<3,Point_test<float>> & g1, grid_key_dx<3> & start, grid_key_dx<3> & stop)
void
gpu_grid_fill_vector
(
grid_gpu
<
3
,
Point_test
<
float
>>
&
g1
,
grid_key_dx
<
3
>
&
start
,
grid_key_dx
<
3
>
&
stop
)
{
auto
gpu_it
=
g1
.
getGPUIterator
(
start
,
stop
);
grid_fill_vector
<<<
gpu_it
.
thr
,
gpu_it
.
wthr
>>>
(
g1
.
toGPU
(),
gpu_it
);
}*/
}
void
gpu_grid_gradient_vector
(
grid_gpu
<
3
,
Point_test
<
float
>>
&
g1
,
grid_gpu
<
3
,
Point_test
<
float
>>
&
g2
,
grid_key_dx
<
3
>
&
start
,
grid_key_dx
<
3
>
&
stop
)
{
auto
gpu_it
=
g1
.
getGPUIterator
(
start
,
stop
);
grid_gradient_vector
<<<
gpu_it
.
thr
,
gpu_it
.
wthr
>>>
(
g1
.
toGPU
(),
g2
.
toGPU
(),
gpu_it
);
}
src/Grid/gpu_test/cuda_grid_unit_tests.cuh
View file @
c3bedcd4
...
...
@@ -16,6 +16,9 @@ void gpu_grid_3D_one(grid_gpu<3,Point_test<float>> & g);
void
gpu_grid_3D_compute_grid_stencil
(
grid_gpu
<
3
,
Point_test
<
float
>>
&
g1
,
grid_gpu
<
3
,
Point_test
<
float
>>
&
g2
,
grid_key_dx
<
3
>
&
start
,
grid_key_dx
<
3
>
&
stop
);
//void gpu_grid_fill_vector(grid_gpu<3,Point_test<float>> & g1, grid_key_dx<3> & start, grid_key_dx<3> & stop);
void
gpu_grid_fill_vector
(
grid_gpu
<
3
,
Point_test
<
float
>>
&
g1
,
grid_key_dx
<
3
>
&
start
,
grid_key_dx
<
3
>
&
stop
);
void
gpu_grid_gradient_vector
(
grid_gpu
<
3
,
Point_test
<
float
>>
&
g1
,
grid_gpu
<
3
,
Point_test
<
float
>>
&
g2
,
grid_key_dx
<
3
>
&
start
,
grid_key_dx
<
3
>
&
stop
);
#endif
/* OPENFPM_DATA_SRC_GRID_CUDA_GPU_COMPUTE_CUH_ */
src/Grid/grid_base_impl_layout.hpp
View file @
c3bedcd4
...
...
@@ -100,6 +100,8 @@ struct mem_get<p,layout,data_type,g1_type,key_type,1>
{
__host__
__device__
static
inline
auto
get
(
data_type
&
data_
,
const
g1_type
&
g1
,
const
key_type
&
v1
)
->
decltype
(
boost
::
fusion
::
at_c
<
p
>
(
data_
).
mem_r
.
operator
[](
g1
.
LinId
(
v1
)))
{
auto
diocane2
=
g1
.
LinId
(
v1
);
boost
::
fusion
::
at_c
<
p
>
(
data_
).
mem_r
.
operator
[](
diocane2
);
return
boost
::
fusion
::
at_c
<
p
>
(
data_
).
mem_r
.
operator
[](
g1
.
LinId
(
v1
));
}
...
...
src/Grid/grid_base_implementation.hpp
View file @
c3bedcd4
...
...
@@ -41,13 +41,14 @@
if (key.get(0) > ite_gpu.stop.get(0) || key.get(1) > ite_gpu.stop.get(1) || key.get(2) > ite_gpu.stop.get(2))\
{return;}
template
<
unsigned
int
dim
>
struct
ite_gpu
{
dim3
thr
;
dim3
wthr
;
grid_key_dx
<
3
>
start
;
grid_key_dx
<
3
>
stop
;
grid_key_dx
<
dim
>
start
;
grid_key_dx
<
dim
>
stop
;
};
#else
#define __host__
...
...
@@ -367,12 +368,23 @@ public:
}
#ifdef CUDA_GPU
struct
ite_gpu
getGPUIterator
(
grid_key_dx
<
dim
>
&
key1
,
grid_key_dx
<
dim
>
&
key2
,
size_t
n_thr
=
1024
)
/*! \brief Get an iterator for the GPU
*
* \param start starting point
* \param stop end point
*
*/
struct
ite_gpu
<
dim
>
getGPUIterator
(
grid_key_dx
<
dim
>
&
key1
,
grid_key_dx
<
dim
>
&
key2
,
size_t
n_thr
=
1024
)
{
size_t
n
=
n_thr
;
size_t
tot_work
=
1
;
for
(
size_t
i
=
0
;
i
<
dim
;
i
++
)
{
tot_work
*=
key2
.
get
(
i
)
-
key1
.
get
(
i
)
+
1
;}
size_t
n
=
(
tot_work
<=
n_thr
)
?
openfpm
::
math
::
round_big_2
(
tot_work
)
:
n_thr
;
// Work to do
ite_gpu
ig
;
ite_gpu
<
dim
>
ig
;
ig
.
thr
.
x
=
1
;
ig
.
thr
.
y
=
1
;
...
...
@@ -392,11 +404,37 @@ public:
n
=
n
>>
1
;
dir
++
;
dir
%=
dim
;
}
if
(
dim
>=
1
)
{
ig
.
wthr
.
x
=
(
key2
.
get
(
0
)
-
key1
.
get
(
0
)
+
1
)
/
ig
.
thr
.
x
+
(((
key2
.
get
(
0
)
-
key1
.
get
(
0
)
+
1
)
%
ig
.
thr
.
x
!=
0
)
?
1
:
0
);}
if
(
dim
>=
2
)
{
ig
.
wthr
.
y
=
(
key2
.
get
(
1
)
-
key1
.
get
(
1
)
+
1
)
/
ig
.
thr
.
y
+
(((
key2
.
get
(
1
)
-
key1
.
get
(
1
)
+
1
)
%
ig
.
thr
.
y
!=
0
)
?
1
:
0
);}
else
{
ig
.
wthr
.
y
=
1
;}
if
(
dim
>=
3
)
{
// Roll the other dimensions on z
ig
.
wthr
.
z
=
1
;
for
(
size_t
i
=
2
;
i
<
dim
;
i
++
)
{
ig
.
wthr
.
z
*=
(
key2
.
get
(
i
)
-
key1
.
get
(
i
)
+
1
)
/
ig
.
thr
.
z
+
(((
key2
.
get
(
i
)
-
key1
.
get
(
i
)
+
1
)
%
ig
.
thr
.
z
!=
0
)
?
1
:
0
);}
}
else
{
ig
.
wthr
.
z
=
1
;}
// crop if wthr == 1
if
(
dim
>=
1
&&
ig
.
wthr
.
x
==
1
)
{
ig
.
thr
.
x
=
(
key2
.
get
(
0
)
-
key1
.
get
(
0
)
+
1
);}
if
(
dim
>=
2
&&
ig
.
wthr
.
y
==
1
)
{
ig
.
wthr
.
y
=
key2
.
get
(
1
)
-
key1
.
get
(
1
)
+
1
;}
ig
.
wthr
.
x
=
(
key2
.
get
(
0
)
-
key1
.
get
(
0
)
+
1
)
/
ig
.
thr
.
x
+
(((
key2
.
get
(
0
)
-
key1
.
get
(
0
)
+
1
)
%
ig
.
thr
.
x
!=
0
)
?
1
:
0
);
ig
.
wthr
.
y
=
(
key2
.
get
(
1
)
-
key1
.
get
(
1
)
+
1
)
/
ig
.
thr
.
y
+
(((
key2
.
get
(
1
)
-
key1
.
get
(
1
)
+
1
)
%
ig
.
thr
.
y
!=
0
)
?
1
:
0
);
ig
.
wthr
.
z
=
(
key2
.
get
(
2
)
-
key1
.
get
(
2
)
+
1
)
/
ig
.
thr
.
z
+
(((
key2
.
get
(
2
)
-
key1
.
get
(
2
)
+
1
)
%
ig
.
thr
.
z
!=
0
)
?
1
:
0
);
if
(
dim
==
3
&&
ig
.
wthr
.
z
==
1
)
{
ig
.
wthr
.
z
=
key2
.
get
(
2
)
-
key1
.
get
(
2
)
+
1
;}
ig
.
start
=
key1
;
ig
.
stop
=
key2
;
...
...
src/Grid/grid_sm.hpp
View file @
c3bedcd4
...
...
@@ -450,7 +450,7 @@ public:
}
//! Linearize a set of index
template
<
typename
a
>
inline
mem_id
Lin
(
a
v
)
const
template
<
typename
a
>
__device__
__host__
inline
mem_id
Lin
(
a
v
)
const
{
return
v
;
}
...
...
src/Grid/grid_unit_tests.hpp
View file @
c3bedcd4
...
...
@@ -947,7 +947,78 @@ BOOST_AUTO_TEST_CASE (gpu_computation_grid_stencil)
// We also try to fill a vectorial quantity
// gpu_grid_fill_vector(c3,zero,keyl);
gpu_grid_fill_vector
(
c3
,
zero
,
keyl
);
}
#endif
}
BOOST_AUTO_TEST_CASE
(
gpu_computation_grid_stencil_vector
)
{
#ifdef CUDA_GPU
{
size_t
sz
[
3
]
=
{
64
,
64
,
64
};
grid_gpu
<
3
,
Point_test
<
float
>
>
c3
(
sz
);
grid_gpu
<
3
,
Point_test
<
float
>
>
c2
(
sz
);
grid_key_dx
<
3
>
key1
({
1
,
1
,
1
});
grid_key_dx
<
3
>
zero
({
0
,
0
,
0
});
grid_key_dx
<
3
>
key2
({
62
,
62
,
62
});
grid_key_dx
<
3
>
keyl
({
63
,
63
,
63
});
c3
.
setMemory
();
c2
.
setMemory
();
gpu_grid_fill_vector
(
c3
,
zero
,
keyl
);
// Check property 1 is 1.0
c3
.
deviceToHost
<
4
>
();
{
auto
it
=
c3
.
getIterator
(
key1
,
key2
);
bool
good
=
true
;
while
(
it
.
isNext
())
{
auto
key
=
it
.
get
();
good
&=
c3
.
get
<
4
>
(
key
)[
0
]
==
1.0
;
good
&=
c3
.
get
<
4
>
(
key
)[
1
]
==
2.0
;
good
&=
c3
.
get
<
4
>
(
key
)[
2
]
==
3.0
;
++
it
;
}
BOOST_REQUIRE_EQUAL
(
good
,
true
);
}
// Fill c3
gpu_grid_3D_compute
(
c3
);
gpu_grid_gradient_vector
(
c3
,
c2
,
key1
,
key2
);
// Check property 1 is 1.0
c2
.
deviceToHost
<
4
>
();
{
auto
it
=
c2
.
getIterator
(
key1
,
key2
);
bool
good
=
true
;
while
(
it
.
isNext
())
{
auto
key
=
it
.
get
();
good
&=
c2
.
get
<
4
>
(
key
)[
0
]
==
1.0
;
good
&=
c2
.
get
<
4
>
(
key
)[
1
]
==
64.0
;
good
&=
c2
.
get
<
4
>
(
key
)[
2
]
==
4096.0
;
++
it
;
}
BOOST_REQUIRE_EQUAL
(
good
,
true
);
}
}
...
...
src/Grid/map_grid.hpp
View file @
c3bedcd4
...
...
@@ -193,7 +193,7 @@ public:
*/
template
<
typename
T_type
>
struct
copy_switch_memory_c
struct
copy_switch_memory_c
_no_cpy
{
//! encapsulated source object
const
typename
memory_traits_inte
<
T_type
>::
type
&
src
;
...
...
@@ -207,7 +207,7 @@ struct copy_switch_memory_c
* \param dst source encapsulated object
*
*/
inline
copy_switch_memory_c
(
const
typename
memory_traits_inte
<
T_type
>::
type
&
src
,
inline
copy_switch_memory_c
_no_cpy
(
const
typename
memory_traits_inte
<
T_type
>::
type
&
src
,
typename
memory_traits_inte
<
T_type
>::
type
&
dst
)
:
src
(
src
),
dst
(
dst
)
{
...
...
@@ -222,7 +222,47 @@ struct copy_switch_memory_c
// Increment the reference of mem
boost
::
fusion
::
at_c
<
T
::
value
>
(
dst
).
mem
->
incRef
();
boost
::
fusion
::
at_c
<
T
::
value
>
(
dst
).
mem_r
.
bind_ref
(
boost
::
fusion
::
at_c
<
T
::
value
>
(
src
).
mem_r
);
boost
::
fusion
::
at_c
<
T
::
value
>
(
dst
).
switchToDevicePtr
();
boost
::
fusion
::
at_c
<
T
::
value
>
(
dst
).
switchToDevicePtrNoCopy
();
}
};
/*! \brief this class is a functor for "for_each" algorithm
*
* This class is a functor for "for_each" algorithm. For each
* element of the boost::vector the operator() is called.
* Is mainly used to copy one encap into another encap object
*
* \tparam encap source
* \tparam encap dst
*
*/
template
<
typename
T_type
,
unsigned
int
...
prp
>
struct
copy_host_to_device
{
//! encapsulated destination object
typename
memory_traits_inte
<
T_type
>::
type
&
dst
;
//! Convert the packed properties into an MPL vector
typedef
typename
to_boost_vmpl
<
prp
...
>::
type
v_prp
;
/*! \brief constructor
*
* \param src source encapsulated object
* \param dst source encapsulated object
*
*/
inline
copy_host_to_device
(
typename
memory_traits_inte
<
T_type
>::
type
&
dst
)
:
dst
(
dst
)
{
};
//! It call the copy function for each property
template
<
typename
T
>
inline
void
operator
()(
T
&
t
)
const
{
boost
::
fusion
::
at_c
<
boost
::
mpl
::
at
<
v_prp
,
boost
::
mpl
::
int_
<
T
::
value
>>::
type
::
value
>
(
dst
).
switchToDevicePtr
();
}
};
...
...
@@ -254,7 +294,7 @@ struct grid_gpu_ker
grid_gpu_ker
(
const
grid_gpu_ker
&
cpy
)
:
g1
(
cpy
.
g1
)
{
copy_switch_memory_c
<
T
>
bp_mc
(
cpy
.
data_
,
this
->
data_
);
copy_switch_memory_c
_no_cpy
<
T
>
bp_mc
(
cpy
.
data_
,
this
->
data_
);
boost
::
mpl
::
for_each_ref
<
boost
::
mpl
::
range_c
<
int
,
0
,
T
::
max_prop
>
>
(
bp_mc
);
}
...
...
@@ -436,13 +476,16 @@ public:
* The object created can be considered like a reference of the original
*
*/
grid_gpu_ker
<
dim
,
T
>
toGPU
()
template
<
unsigned
int
...
prp
>
grid_gpu_ker
<
dim
,
T
>
toGPU
()
{
grid_gpu_ker
<
dim
,
T
>
g
(
this
->
g1
);
copy_switch_memory_c
<
T
>
cp_mc
(
this
->
data_
,
g
.
data_
);
copy_switch_memory_c
_no_cpy
<
T
>
cp_mc
(
this
->
data_
,
g
.
data_
);
boost
::
mpl
::
for_each_ref
<
boost
::
mpl
::
range_c
<
int
,
0
,
T
::
max_prop
>
>
(
cp_mc
);
copy_host_to_device
<
T
,
prp
...
>
cpod
(
g
.
data_
);
boost
::
mpl
::
for_each_ref
<
boost
::
mpl
::
range_c
<
int
,
0
,
sizeof
...(
prp
)
>
>
(
cpod
);
return
g
;
}
...
...
src/Makefile.am
View file @
c3bedcd4
...
...
@@ -10,7 +10,7 @@ else
endif
noinst_PROGRAMS
=
mem_map
mem_map_SOURCES
=
../../openfpm_devices/src/Memleak_check.cpp main.cpp Grid/gpu_test/cuda_gpu_func.cpp
$(CUDA_SOURCES)
../../openfpm_devices/src/memory/HeapMemory.cpp ../../openfpm_devices/src/memory/PtrMemory.cpp Grid/gpu_test/cuda_grid_unit_tests.cu
mem_map_SOURCES
=
../../openfpm_devices/src/Memleak_check.cpp main.cpp
NN/CellList/CellList_gpu_test.cu
Grid/gpu_test/cuda_gpu_func.cpp
$(CUDA_SOURCES)
../../openfpm_devices/src/memory/HeapMemory.cpp ../../openfpm_devices/src/memory/PtrMemory.cpp Grid/gpu_test/cuda_grid_unit_tests.cu
mem_map_CXXFLAGS
=
$(AM_CXXFLAGS)
$(LIBHILBERT_INCLUDE)
$(CUDA_CFLAGS)
$(INCLUDES_PATH)
$(BOOST_CPPFLAGS)
-I
/usr/local/include
-I
/usr/local/libhilbert/include
mem_map_CFLAGS
=
$(CUDA_CFLAGS)
mem_map_LDADD
=
$(LINKLIBS)
...
...
src/NN/CellList/CellList.hpp
View file @
c3bedcd4
...
...
@@ -420,7 +420,7 @@ private:
size_t
bc
[
dim
];
for
(
size_t
i
=
0
;
i
<
dim
;
i
++
)
bc
[
i
]
=
NON_PERIODIC
;
{
bc
[
i
]
=
NON_PERIODIC
;
}
Box
<
dim
,
long
int
>
bx
=
cd_sm
.
convertDomainSpaceIntoCellUnits
(
dom_box
,
bc
);
...
...
src/NN/CellList/CellList_gpu.hpp
0 → 100644
View file @
c3bedcd4
/*
* CellList_gpu.hpp
*
* Created on: Jun 11, 2018
* Author: i-bird
*/
#ifndef OPENFPM_DATA_SRC_NN_CELLLIST_CELLLIST_GPU_HPP_
#define OPENFPM_DATA_SRC_NN_CELLLIST_CELLLIST_GPU_HPP_
#include "config.h"
#ifdef CUDA_GPU
#include <cuda_runtime_api.h>
#include "CellDecomposer.hpp"
#include "Vector/map_vector.hpp"
#include "cuda/Cuda_cell_list_util_func.hpp"
constexpr
int
count
=
0
;
constexpr
int
start
=
1
;
template
<
unsigned
int
dim
,
typename
T
,
typename
Memory
,
typename
cnt_type
=
int
,
typename
ids_type
=
short
int
,
typename
transform
=
no_transform
<
dim
,
T
>
>
class
CellList_gpu
:
public
CellDecomposer_sm
<
dim
,
T
,
transform
>
{
//! \brief Cell information
openfpm
::
vector
<
aggregate
<
cnt_type
,
cnt_type
>
,
Memory
,
typename
memory_traits_inte
<
aggregate
<
cnt_type
,
cnt_type
>>::
type
,
memory_traits_inte
>
cl_n
;
//! \brief particle information
openfpm
::
vector
<
aggregate
<
ids_type
[
dim
+
1
]
>
,
Memory
,
typename
memory_traits_inte
<
aggregate
<
ids_type
[
dim
+
1
]
>>::
type
,
memory_traits_inte
>
part_ids
;
//! Spacing
CudaMemory
spacing
;
//! \brief number of sub-divisions in each direction
CudaMemory
div
;
//! Initialize the structures of the data structure
void
InitializeStructures
(
const
size_t
(
&
div
)[
dim
],
size_t
tot_n_cell
)
{
spacing
.
allocate
(
sizeof
(
T
)
*
dim
);
this
->
div
.
allocate
(
dim
*
sizeof
(
ids_type
));
T
(
&
div_p
)[
dim
]
=
*
static_cast
<
T
(
*
)[
dim
]
>
(
this
->
div
.
getPointer
());
T
(
&
spacing_p
)[
dim
]
=
*
static_cast
<
T
(
*
)[
dim
]
>
(
this
->
spacing
.
getPointer
());
for
(
size_t
i
=
0
;
i
<
dim
;
i
++
)
{
div_p
[
i
]
=
div
[
i
];
spacing_p
[
i
]
=
this
->
getCellBox
().
getP2
().
get
(
i
);
}
// Force to copy into device
this
->
spacing
.
getDevicePointer
();
this
->
div
.
getDevicePointer
();
cl_n
.
resize
(
tot_n_cell
);
}
public:
CellList_gpu
(
const
Box
<
dim
,
T
>
&
box
,
const
size_t
(
&
div
)[
dim
],
const
size_t
pad
=
1
)
{
Initialize
(
box
,
div
,
pad
);
}
/*! Initialize the cell list
*
* \param box Domain where this cell list is living
* \param div grid size on each dimension
* \param pad padding cell
* \param slot maximum number of slot
*
*/
void
Initialize
(
const
Box
<
dim
,
T
>
&
box
,
const
size_t
(
&
div
)[
dim
],
const
size_t
pad
=
1
)
{
SpaceBox
<
dim
,
T
>
sbox
(
box
);
// Initialize point transformation
Initialize
(
sbox
,
div
,
pad
);
}
/*! Initialize the cell list constructor
*
* \param box Domain where this cell list is living
* \param div grid size on each dimension
* \param pad padding cell
* \param slot maximum number of slot
*
*/
void
Initialize
(
const
SpaceBox
<
dim
,
T
>
&
box
,
const
size_t
(
&
div
)[
dim
],
const
size_t
pad
=
1
)
{
Matrix
<
dim
,
T
>
mat
;
CellDecomposer_sm
<
dim
,
T
,
transform
>::
setDimensions
(
box
,
div
,
mat
,
pad
);
// create the array that store the number of particle on each cell and se it to 0
InitializeStructures
(
this
->
gr_cell
.
getSize
(),
this
->
gr_cell
.
size
());
}
/*! \brief construct from a list of particles
*
* \param pl Particles list
*
*/
template
<
typename
vector
>
void
construct
(
vector
&
pl
)
{
// First we set the count memory to zero
CUDA_SAFE
(
cudaMemset
(
cl_n
.
template
getDeviceBuffer
<
0
>(),
0
,
cl_n
.
size
()
*
sizeof
(
cnt_type
)));
part_ids
.
resize
(
pl
.
size
());
// Than we construct the ids
auto
ite_gpu
=
pl
.
getGPUIterator
();
subindex
<
dim
,
T
,
cnt_type
,
ids_type
><<<
ite_gpu
.
wthr
,
ite_gpu
.
thr
>>>
(
*
static_cast
<
ids_type
(
*
)[
dim
]
>
(
div
.
getDevicePointer
()),
*
static_cast
<
T
(
*
)[
dim
]
>
(
spacing
.
getDevicePointer
()),
pl
.
capacity
(),
pl
.
size
(),
static_cast
<
T
*>
(
pl
.
template
getDeviceBuffer
<
0
>()),
static_cast
<
cnt_type
*>
(
cl_n
.
template
getDeviceBuffer
<
0
>()),
static_cast
<
ids_type
*>
(
part_ids
.
template
getDeviceBuffer
<
0
>()));
}
};
#endif
#endif
/* OPENFPM_DATA_SRC_NN_CELLLIST_CELLLIST_GPU_HPP_ */
src/NN/CellList/CellList_gpu_test.cu
0 → 100644
View file @
c3bedcd4
/*
* CellList_gpu_test.cpp
*
* Created on: Jun 13, 2018
* Author: i-bird
*/
#define BOOST_GPU_ENABLED __host__ __device__
#include "config.h"
#define BOOST_TEST_DYN_LINK
#include <boost/test/unit_test.hpp>
#include "util/cuda_util.hpp"
#include "CellList_gpu.hpp"
BOOST_AUTO_TEST_SUITE
(
CellList_gpu_test
)
template
<
unsigned
int
dim
,
typename
T
,
typename
cnt_type
,
typename
ids_type
>
void
test_sub_index
()
{
openfpm
::
vector
<
aggregate
<
cnt_type
,
cnt_type
>
,
CudaMemory
,
typename
memory_traits_inte
<
aggregate
<
cnt_type
,
cnt_type
>>::
type
,
memory_traits_inte
>
cl_n
;
openfpm
::
vector
<
aggregate
<
ids_type
[
dim
+
1
]
>
,
CudaMemory
,
typename
memory_traits_inte
<
aggregate
<
ids_type
[
dim
+
1
]
>>::
type
,
memory_traits_inte
>
part_ids
;
// fill with some particles
openfpm
::
vector
<
Point
<
dim
,
T
>
,
CudaMemory
,
typename
memory_traits_inte
<
Point
<
dim
,
T
>>::
type
,
memory_traits_inte
>
pl
;
// create 3 particles
Point
<
dim
,
T
>
p1
({
0.2
,
0.2
,
0.2
});
Point
<
dim
,
T
>
p2
({
0.9
,
0.2
,
0.2
});
Point
<
dim
,
T
>
p3
({
0.2
,
0.9
,
0.2
});
Point
<
dim
,
T
>
p4
({
0.2
,
0.2
,
0.9
});
Point
<
dim
,
T
>
p5
({
0.9
,
0.9
,
0.2
});
Point
<
dim
,
T
>
p6
({
0.9
,
0.2
,
0.9
});
Point
<
dim
,
T
>
p7
({
0.2
,
0.9
,
0.9
});
Point
<
dim
,
T
>
p8
({
0.9
,
0.9
,
0.9
});
Point
<
dim
,
T
>
p9
({
0.0
,
0.0
,
0.0
});
Point
<
dim
,
T
>
p10
({
0.205
,
0.205
,
0.205
});
pl
.
add
(
p1
);
pl
.
add
(
p2
);
pl
.
add
(
p3
);
pl
.
add
(
p4
);
pl
.
add
(
p5
);
pl
.
add
(
p6
);
pl
.
add
(
p7
);
pl
.
add
(
p8
);
pl
.
add
(
p9
);
pl
.
add
(
p10
);
CudaMemory
spacing
;
CudaMemory
div
;
spacing
.
allocate
(
sizeof
(
T
)
*
dim
);
div
.
allocate
(
dim
*
sizeof
(
ids_type
));
ids_type
(
&
div_p
)[
dim
]
=
*
static_cast
<
ids_type
(
*
)[
dim
]
>
(
div
.
getPointer
());
T
(
&
spacing_p
)[
dim
]
=
*
static_cast
<
T
(
*
)[
dim
]
>
(
spacing
.
getPointer
());
for
(
size_t
i
=
0
;
i
<
dim
;
i
++
)
{
div_p
[
i
]
=
17
;
spacing_p
[
i
]
=
0.1
;
}
cl_n
.
resize
(
17
*
17
*
17
);
CUDA_SAFE
(
cudaMemset
(
cl_n
.
template
getDeviceBuffer
<
0
>(),
0
,
cl_n
.
size
()
*
sizeof
(
cnt_type
)));
part_ids
.
resize
(
9
);
size_t
sz
[
3
]
=
{
17
,
17
,
17
};
grid_sm
<
3
,
void
>
gr
(
sz
);
// Force to copy into device
spacing
.
getDevicePointer
();
div
.
getDevicePointer
();
auto
ite
=
pl
.
getGPUIterator
();
subindex
<
dim
,
T
,
cnt_type
,
ids_type
><<<
ite
.
wthr
,
ite
.
thr
>>>
(
*
static_cast
<
ids_type
(
*
)[
dim
]
>
(
div
.
getDevicePointer
()),
*
static_cast
<
T
(
*
)[
dim
]
>
(
spacing
.
getDevicePointer
()),
pl
.
capacity
(),
pl
.
size
(),
static_cast
<
T
*>
(
pl
.
template
getDeviceBuffer
<
0
>()),