Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
O
openfpm_data
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Locked Files
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
Requirements
Requirements
List
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Security & Compliance
Security & Compliance
Dependency List
License Compliance
Operations
Operations
Environments
Analytics
Analytics
CI / CD
Code Review
Insights
Issue
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
openfpm
openfpm_data
Commits
a9632484
Commit
a9632484
authored
Jan 19, 2019
by
incardon
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Adding function to cell-list for expanded point add
parent
9f134a74
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
179 additions
and
23 deletions
+179
-23
src/NN/CellList/CellDecomposer.hpp
src/NN/CellList/CellDecomposer.hpp
+105
-1
src/NN/CellList/cuda/Cuda_cell_list_util_func.hpp
src/NN/CellList/cuda/Cuda_cell_list_util_func.hpp
+18
-18
src/Vector/cuda/map_vector_cuda_ker.cuh
src/Vector/cuda/map_vector_cuda_ker.cuh
+3
-0
src/util/cuda/cuda_kernel_error_checker.hpp
src/util/cuda/cuda_kernel_error_checker.hpp
+9
-3
src/util/cuda/cuda_launch.hpp
src/util/cuda/cuda_launch.hpp
+1
-1
src/util/cuda/modern_gpu_tests.cu
src/util/cuda/modern_gpu_tests.cu
+43
-0
No files found.
src/NN/CellList/CellDecomposer.hpp
View file @
a9632484
...
...
@@ -233,7 +233,7 @@ public:
* \return the shift vector
*
*/
inline
const
Point
<
dim
,
T
>
&
getOrig
()
const
__device__
__host__
inline
const
Point
<
dim
,
T
>
&
getOrig
()
const
{
return
sh
;
}
...
...
@@ -587,6 +587,34 @@ class CellDecomposer_sm
return
id
;
}
/*! \brief Convert the coordinates into id with negative machine precision expansion
*
* \param x point
* \param s dimension
*
*/
inline
size_t
ConvertToID_me
(
const
Point
<
dim
,
T
>
&
x
,
size_t
s
,
size_t
sc
=
0
)
const
{
T
cc
=
t
.
transform
(
x
,
s
)
/
box_unit
.
getHigh
(
s
)
-
0.015625
;
size_t
id
=
openfpm
::
math
::
size_t_floor
(
cc
)
+
off
[
s
];
id
=
(
id
>=
gr_cell
.
size
(
s
))
?
(
gr_cell
.
size
(
s
)
-
1
-
cell_shift
.
get
(
s
))
:
id
-
cell_shift
.
get
(
s
);
return
id
;
}
/*! \brief Convert the coordinates into id with positive machine precision expansion
*
* \param x point
* \param s dimension
*
*/
inline
size_t
ConvertToID_pe
(
const
Point
<
dim
,
T
>
&
x
,
size_t
s
,
size_t
sc
=
0
)
const
{
T
cc
=
t
.
transform
(
x
,
s
)
/
box_unit
.
getHigh
(
s
)
+
0.015625
;
size_t
id
=
openfpm
::
math
::
size_t_floor
(
cc
)
+
off
[
s
];
id
=
(
id
>=
gr_cell
.
size
(
s
))
?
(
gr_cell
.
size
(
s
)
-
1
-
cell_shift
.
get
(
s
))
:
id
-
cell_shift
.
get
(
s
);
return
id
;
}
/*! \brief Convert the coordinates into id without apply shift
*
* \param x coordinate
...
...
@@ -932,6 +960,82 @@ public:
return
cell_id
;
}
/*! \brief Get the cell-ids with negative machine precision expansion
*
* Convert the point coordinates into the cell ids (Careful it include padding)
*
* \param pos Point position
*
* \return the cell-ids ad a grid_key_dx<dim>
*
*/
inline
grid_key_dx
<
dim
>
getCellGrid_me
(
const
Point
<
dim
,
T
>
&
pos
)
const
{
#ifdef SE_CLASS1
if
(
tot_n_cell
==
0
)
{
std
::
cerr
<<
"Error: "
<<
__FILE__
<<
":"
<<
__LINE__
<<
" using an uninitialized CellDecomposer"
<<
std
::
endl
;
ACTION_ON_ERROR
(
CELL_DECOMPOSER
);
}
#endif
grid_key_dx
<
dim
>
key
;
key
.
set_d
(
0
,
ConvertToID_me
(
pos
,
0
));
for
(
size_t
s
=
1
;
s
<
dim
;
s
++
)
{
#ifdef SE_CLASS1
if
((
size_t
)(
t
.
transform
(
pos
,
s
)
/
box_unit
.
getHigh
(
s
))
+
off
[
s
]
<
0
)
{
std
::
cerr
<<
"Error: "
<<
__FILE__
<<
":"
<<
__LINE__
<<
" point is not inside the cell space"
<<
std
::
endl
;
ACTION_ON_ERROR
(
CELL_DECOMPOSER
);
}
#endif
/* coverity[dead_error_line] */
key
.
set_d
(
s
,
ConvertToID_me
(
pos
,
s
));
}
return
key
;
}
/*! \brief Get the cell-ids with positive machine precision expansion
*
* Convert the point coordinates into the cell ids (Careful it include padding)
*
* \param pos Point position
*
* \return the cell-ids ad a grid_key_dx<dim>
*
*/
inline
grid_key_dx
<
dim
>
getCellGrid_pe
(
const
Point
<
dim
,
T
>
&
pos
)
const
{
#ifdef SE_CLASS1
if
(
tot_n_cell
==
0
)
{
std
::
cerr
<<
"Error: "
<<
__FILE__
<<
":"
<<
__LINE__
<<
" using an uninitialized CellDecomposer"
<<
std
::
endl
;
ACTION_ON_ERROR
(
CELL_DECOMPOSER
);
}
#endif
grid_key_dx
<
dim
>
key
;
key
.
set_d
(
0
,
ConvertToID_pe
(
pos
,
0
));
for
(
size_t
s
=
1
;
s
<
dim
;
s
++
)
{
#ifdef SE_CLASS1
if
((
size_t
)(
t
.
transform
(
pos
,
s
)
/
box_unit
.
getHigh
(
s
))
+
off
[
s
]
<
0
)
{
std
::
cerr
<<
"Error: "
<<
__FILE__
<<
":"
<<
__LINE__
<<
" point is not inside the cell space"
<<
std
::
endl
;
ACTION_ON_ERROR
(
CELL_DECOMPOSER
);
}
#endif
/* coverity[dead_error_line] */
key
.
set_d
(
s
,
ConvertToID_pe
(
pos
,
s
));
}
return
key
;
}
/*! \brief Get the cell-ids
*
* Convert the point coordinates into the cell ids (Careful it include padding)
...
...
src/NN/CellList/cuda/Cuda_cell_list_util_func.hpp
View file @
a9632484
...
...
@@ -13,7 +13,7 @@
template
<
unsigned
int
dim
,
typename
cnt_type
,
typename
ids_type
,
typename
transform
>
struct
cid_
{
static
inline
__device__
cnt_type
get_cid
(
openfpm
::
array
<
ids_type
,
1
,
cnt_type
>
&
div_c
,
ids_type
*
e
)
static
inline
__device__
__host__
cnt_type
get_cid
(
openfpm
::
array
<
ids_type
,
1
,
cnt_type
>
&
div_c
,
ids_type
*
e
)
{
cnt_type
id
=
e
[
dim
-
1
];
...
...
@@ -24,7 +24,7 @@ struct cid_
return
id
;
}
static
inline
__device__
cnt_type
get_cid
(
openfpm
::
array
<
ids_type
,
dim
,
cnt_type
>
&
div_c
,
const
grid_key_dx
<
1
,
cnt_type
>
&
e
)
static
inline
__device__
__host__
cnt_type
get_cid
(
openfpm
::
array
<
ids_type
,
dim
,
cnt_type
>
&
div_c
,
const
grid_key_dx
<
1
,
cnt_type
>
&
e
)
{
cnt_type
id
=
e
.
get
(
dim
-
1
);
...
...
@@ -35,7 +35,7 @@ struct cid_
return
id
;
}
template
<
typename
T
>
static
inline
__device__
cnt_type
get_cid
(
openfpm
::
array
<
ids_type
,
dim
,
cnt_type
>
&
div_c
,
template
<
typename
T
>
static
inline
__device__
__host__
cnt_type
get_cid
(
openfpm
::
array
<
ids_type
,
dim
,
cnt_type
>
&
div_c
,
openfpm
::
array
<
T
,
dim
,
cnt_type
>
&
spacing
,
const
transform
&
t
,
const
Point
<
dim
,
T
>
&
p
)
...
...
@@ -53,17 +53,17 @@ struct cid_
template
<
typename
cnt_type
,
typename
ids_type
,
typename
transform
>
struct
cid_
<
1
,
cnt_type
,
ids_type
,
transform
>
{
static
inline
__device__
cnt_type
get_cid
(
openfpm
::
array
<
ids_type
,
1
,
cnt_type
>
&
div_c
,
ids_type
*
e
)
static
inline
__device__
__host__
cnt_type
get_cid
(
openfpm
::
array
<
ids_type
,
1
,
cnt_type
>
&
div_c
,
ids_type
*
e
)
{
return
e
[
0
];
}
static
inline
__device__
cnt_type
get_cid
(
const
openfpm
::
array
<
ids_type
,
1
,
cnt_type
>
&
div_c
,
const
grid_key_dx
<
1
,
cnt_type
>
&
e
)
static
inline
__device__
__host__
cnt_type
get_cid
(
const
openfpm
::
array
<
ids_type
,
1
,
cnt_type
>
&
div_c
,
const
grid_key_dx
<
1
,
cnt_type
>
&
e
)
{
return
e
.
get
(
0
);
}
template
<
typename
T
>
static
inline
__device__
cnt_type
get_cid
(
openfpm
::
array
<
ids_type
,
1
,
cnt_type
>
&
div_c
,
template
<
typename
T
>
static
inline
__device__
__host__
cnt_type
get_cid
(
openfpm
::
array
<
ids_type
,
1
,
cnt_type
>
&
div_c
,
openfpm
::
array
<
T
,
1
,
cnt_type
>
&
spacing
,
const
transform
&
t
,
const
Point
<
1
,
T
>
&
p
)
...
...
@@ -75,17 +75,17 @@ struct cid_<1,cnt_type,ids_type, transform>
template
<
typename
cnt_type
,
typename
ids_type
,
typename
transform
>
struct
cid_
<
2
,
cnt_type
,
ids_type
,
transform
>
{
static
inline
__device__
cnt_type
get_cid
(
openfpm
::
array
<
ids_type
,
2
,
cnt_type
>
&
div_c
,
ids_type
*
e
)
static
inline
__device__
__host__
cnt_type
get_cid
(
openfpm
::
array
<
ids_type
,
2
,
cnt_type
>
&
div_c
,
ids_type
*
e
)
{
return
e
[
0
]
+
div_c
[
0
]
*
e
[
1
];
}
static
inline
__device__
cnt_type
get_cid
(
const
openfpm
::
array
<
ids_type
,
2
,
cnt_type
>
&
div_c
,
const
grid_key_dx
<
2
,
cnt_type
>
&
e
)
static
inline
__device__
__host__
cnt_type
get_cid
(
const
openfpm
::
array
<
ids_type
,
2
,
cnt_type
>
&
div_c
,
const
grid_key_dx
<
2
,
cnt_type
>
&
e
)
{
return
e
.
get
(
0
)
+
div_c
[
0
]
*
e
.
get
(
1
);
}
template
<
typename
T
>
static
inline
__device__
cnt_type
get_cid
(
const
openfpm
::
array
<
ids_type
,
2
,
cnt_type
>
&
div_c
,
template
<
typename
T
>
static
inline
__device__
__host__
cnt_type
get_cid
(
const
openfpm
::
array
<
ids_type
,
2
,
cnt_type
>
&
div_c
,
const
openfpm
::
array
<
T
,
2
,
cnt_type
>
&
spacing
,
const
openfpm
::
array
<
ids_type
,
2
,
cnt_type
>
&
off
,
const
transform
&
t
,
...
...
@@ -96,7 +96,7 @@ struct cid_<2,cnt_type,ids_type,transform>
(
openfpm
::
math
::
uint_floor
(
t
.
transform
(
p
,
1
)
/
spacing
[
1
])
+
off
[
1
])
*
div_c
[
0
];
}
template
<
typename
T
>
static
inline
__device__
cnt_type
get_cid
(
const
openfpm
::
array
<
ids_type
,
2
,
cnt_type
>
&
div_c
,
template
<
typename
T
>
static
inline
__device__
__host__
cnt_type
get_cid
(
const
openfpm
::
array
<
ids_type
,
2
,
cnt_type
>
&
div_c
,
const
openfpm
::
array
<
T
,
2
,
cnt_type
>
&
spacing
,
const
openfpm
::
array
<
ids_type
,
2
,
cnt_type
>
&
off
,
const
transform
&
t
,
...
...
@@ -109,7 +109,7 @@ struct cid_<2,cnt_type,ids_type,transform>
return
e
[
0
]
+
e
[
1
]
*
div_c
[
0
];
}
template
<
typename
T
>
static
inline
__device__
grid_key_dx
<
2
,
ids_type
>
get_cid_key
(
const
openfpm
::
array
<
T
,
2
,
cnt_type
>
&
spacing
,
template
<
typename
T
>
static
inline
__device__
__host__
grid_key_dx
<
2
,
ids_type
>
get_cid_key
(
const
openfpm
::
array
<
T
,
2
,
cnt_type
>
&
spacing
,
const
openfpm
::
array
<
ids_type
,
2
,
cnt_type
>
&
off
,
const
transform
&
t
,
const
Point
<
2
,
T
>
&
p
)
...
...
@@ -123,7 +123,7 @@ struct cid_<2,cnt_type,ids_type,transform>
}
template
<
typename
U
=
cnt_type
,
typename
sfinae
=
typename
std
::
enable_if
<
std
::
is_same
<
ids_type
,
U
>
::
value
>::
type
>
static
inline
__device__
cnt_type
get_cid
(
const
openfpm
::
array
<
ids_type
,
2
,
cnt_type
>
&
div_c
,
static
inline
__device__
__host__
cnt_type
get_cid
(
const
openfpm
::
array
<
ids_type
,
2
,
cnt_type
>
&
div_c
,
const
grid_key_dx
<
2
,
cnt_type
>
&
e
)
{
return
e
.
get
(
0
)
+
e
.
get
(
1
)
*
div_c
[
0
];
...
...
@@ -135,19 +135,19 @@ template<typename cnt_type, typename ids_type,typename transform>
struct
cid_
<
3
,
cnt_type
,
ids_type
,
transform
>
{
static
inline
__device__
cnt_type
get_cid
(
const
openfpm
::
array
<
ids_type
,
3
,
cnt_type
>
&
div_c
,
static
inline
__device__
__host__
cnt_type
get_cid
(
const
openfpm
::
array
<
ids_type
,
3
,
cnt_type
>
&
div_c
,
const
ids_type
*
e
)
{
return
e
[
0
]
+
(
e
[
1
]
+
e
[
2
]
*
div_c
[
1
])
*
div_c
[
0
];
}
static
inline
__device__
cnt_type
get_cid
(
const
openfpm
::
array
<
ids_type
,
3
,
cnt_type
>
&
div_c
,
static
inline
__device__
__host__
cnt_type
get_cid
(
const
openfpm
::
array
<
ids_type
,
3
,
cnt_type
>
&
div_c
,
const
grid_key_dx
<
3
,
ids_type
>
&
e
)
{
return
e
.
get
(
0
)
+
(
e
.
get
(
1
)
+
e
.
get
(
2
)
*
div_c
[
1
])
*
div_c
[
0
];
}
template
<
typename
T
>
static
inline
__device__
cnt_type
get_cid
(
const
openfpm
::
array
<
ids_type
,
3
,
cnt_type
>
&
div_c
,
template
<
typename
T
>
static
inline
__device__
__host__
cnt_type
get_cid
(
const
openfpm
::
array
<
ids_type
,
3
,
cnt_type
>
&
div_c
,
const
openfpm
::
array
<
T
,
3
,
cnt_type
>
&
spacing
,
const
openfpm
::
array
<
ids_type
,
3
,
cnt_type
>
&
off
,
const
transform
&
t
,
...
...
@@ -159,7 +159,7 @@ struct cid_<3,cnt_type,ids_type,transform>
(
openfpm
::
math
::
uint_floor
(
t
.
transform
(
p
,
2
)
/
spacing
[
2
])
+
off
[
2
])
*
div_c
[
1
])
*
div_c
[
0
];
}
template
<
typename
T
>
static
inline
__device__
cnt_type
get_cid
(
const
openfpm
::
array
<
ids_type
,
3
,
cnt_type
>
&
div_c
,
template
<
typename
T
>
static
inline
__device__
__host__
cnt_type
get_cid
(
const
openfpm
::
array
<
ids_type
,
3
,
cnt_type
>
&
div_c
,
const
openfpm
::
array
<
T
,
3
,
cnt_type
>
&
spacing
,
const
openfpm
::
array
<
ids_type
,
3
,
cnt_type
>
&
off
,
const
transform
&
t
,
...
...
@@ -173,7 +173,7 @@ struct cid_<3,cnt_type,ids_type,transform>
return
e
[
0
]
+
(
e
[
1
]
+
e
[
2
]
*
div_c
[
1
])
*
div_c
[
0
];
}
template
<
typename
T
>
static
inline
__device__
grid_key_dx
<
3
,
ids_type
>
get_cid_key
(
const
openfpm
::
array
<
T
,
3
,
cnt_type
>
&
spacing
,
template
<
typename
T
>
static
inline
__device__
__host__
grid_key_dx
<
3
,
ids_type
>
get_cid_key
(
const
openfpm
::
array
<
T
,
3
,
cnt_type
>
&
spacing
,
const
openfpm
::
array
<
ids_type
,
3
,
cnt_type
>
&
off
,
const
transform
&
t
,
const
Point
<
3
,
T
>
&
p
)
...
...
@@ -188,7 +188,7 @@ struct cid_<3,cnt_type,ids_type,transform>
}
template
<
typename
U
=
cnt_type
,
typename
sfinae
=
typename
std
::
enable_if
<
std
::
is_same
<
ids_type
,
U
>
::
value
>::
type
>
static
inline
__device__
cnt_type
get_cid
(
const
openfpm
::
array
<
ids_type
,
3
,
cnt_type
>
&
div_c
,
static
inline
__device__
__host__
cnt_type
get_cid
(
const
openfpm
::
array
<
ids_type
,
3
,
cnt_type
>
&
div_c
,
const
grid_key_dx
<
3
,
cnt_type
>
&
e
)
{
return
e
.
get
(
0
)
+
(
e
.
get
(
1
)
+
e
.
get
(
2
)
*
div_c
[
1
])
*
div_c
[
0
];
...
...
src/Vector/cuda/map_vector_cuda_ker.cuh
View file @
a9632484
...
...
@@ -15,6 +15,9 @@ __global__ void merge_add_prp_device_impl(vector_src_type v_src, vector_dst_type
{
int
i
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
if
(
i
>=
v_src
.
size
())
{
return
;}
// write the object in the last element
object_s_di
<
decltype
(
v_src
.
get
(
i
)),
decltype
(
v_dst
.
get
(
old_sz
+
i
)),
OBJ_ENCAP
,
args
...
>
(
v_src
.
get
(
i
),
v_dst
.
get
(
old_sz
+
i
));
}
...
...
src/util/cuda/cuda_kernel_error_checker.hpp
View file @
a9632484
...
...
@@ -116,8 +116,8 @@ template<typename ... Args>int error_arg(void * ptr, int prp, Args ... args)
#include <boost/algorithm/string.hpp>
#ifdef SE_CLASS1
#define C
HECK_SE_CLASS1_PRE int dev_mem[] = {0,0,0,0,0,0,0,0,0,0,0}
;
//#define CHECK_SE_CLASS1_POST(...) cudaMemcpyFromSymbol(dev_mem,global_cuda_error_array,sizeof(dev_mem))
;
#define C
UDA_LAUNCH_ERROR_OBJECT std::runtime_error("Runtime vector error")
;
#define CHECK_SE_CLASS1_PRE int dev_mem[] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}
;
#define CHECK_SE_CLASS1_POST(kernel_call,...) cudaMemcpyFromSymbol(dev_mem,global_cuda_error_array,sizeof(dev_mem)); \
if (dev_mem[0] != 0)\
{\
...
...
@@ -127,7 +127,12 @@ template<typename ... Args>int error_arg(void * ptr, int prp, Args ... args)
std::string args_s( #__VA_ARGS__ );\
std::vector<std::string> results;\
boost::split(results, args_s, [](char c){return c == ',';});\
std::cout << __FILE__ << ":" << __LINE__ << " Overflow detected in Kernel: " << kernel_call << " from the structure " << results[ea] << " property: " << prp_err << " index:(" ;\
std::string data_s;\
if (ea >= results.size())\
{data_s = "Internal";}\
else\
{data_s = results[ea];}\
std::cout << __FILE__ << ":" << __LINE__ << " Overflow detected in Kernel: " << kernel_call << " from the structure: " << data_s << " property: " << prp_err << " index:(" ;\
int i = 0; \
for ( ; i < dev_mem[4]-1 ; i++)\
{\
...
...
@@ -136,6 +141,7 @@ template<typename ... Args>int error_arg(void * ptr, int prp, Args ... args)
std::cout << dev_mem[5+i];\
std::cout << ")";\
std::cout << " thread: " << "(" << dev_mem[6+i] << "," << dev_mem[7+i] << "," << dev_mem[8+i] << ")*(" << dev_mem[9+i] << "," << dev_mem[10+i] << "," << dev_mem[11+i] << ")+(" << dev_mem[12+i] << "," << dev_mem[13+i] << "," << dev_mem[14+i] << ")" << std::endl;\
ACTION_ON_ERROR(CUDA_LAUNCH_ERROR_OBJECT);\
}
#else
#define CHECK_SE_CLASS1_PRE
...
...
src/util/cuda/cuda_launch.hpp
View file @
a9632484
...
...
@@ -21,13 +21,13 @@
cuda_call<<<(grid_size),(block_size)>>>(__VA_ARGS__); \
cudaDeviceSynchronize(); \
{\
CHECK_SE_CLASS1_POST(#cuda_call,__VA_ARGS__)\
cudaError_t e = cudaGetLastError();\
if (e != cudaSuccess)\
{\
std::string error = cudaGetErrorString(e);\
std::cout << "Cuda Error in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
}\
CHECK_SE_CLASS1_POST(#cuda_call,__VA_ARGS__)\
}\
}
#else
...
...
src/util/cuda/modern_gpu_tests.cu
View file @
a9632484
...
...
@@ -8,6 +8,8 @@
#include "Vector/map_vector.hpp"
#include "util/cuda/moderngpu/kernel_load_balance.hxx"
#include "util/cuda/moderngpu/kernel_mergesort.hxx"
#include "util/cuda/moderngpu/kernel_reduce.hxx"
BOOST_AUTO_TEST_SUITE
(
modern_gpu_tests
)
...
...
@@ -90,5 +92,46 @@ BOOST_AUTO_TEST_CASE( modern_gpu_sort )
// Test the cell list
}
BOOST_AUTO_TEST_CASE
(
modern_gpu_reduce
)
{
std
::
cout
<<
"Test modern gpu reduce"
<<
"
\n
"
;
mgpu
::
standard_context_t
context
(
false
);
int
count
=
200030
;
openfpm
::
vector_gpu
<
aggregate
<
int
>>
vgpu
;
vgpu
.
resize
(
count
);
for
(
size_t
i
=
0
;
i
<
count
;
i
++
)
{
vgpu
.
template
get
<
0
>(
i
)
=
((
float
)
rand
()
/
RAND_MAX
)
*
17
;
}
vgpu
.
hostToDevice
<
0
>
();
CudaMemory
mem
;
mem
.
allocate
(
sizeof
(
int
));
mgpu
::
reduce
((
int
*
)
vgpu
.
template
getDeviceBuffer
<
0
>(),
count
,
(
int
*
)
mem
.
getDevicePointer
(),
mgpu
::
plus_t
<
int
>
(),
context
);
mem
.
deviceToHost
();
int
red_p
=
*
(
int
*
)
mem
.
getPointer
();
// print
int
red
=
0
;
for
(
int
i
=
0
;
i
<
count
;
i
++
)
{
red
+=
vgpu
.
template
get
<
0
>(
i
);
}
BOOST_REQUIRE_EQUAL
(
red
,
red_p
);
std
::
cout
<<
"End test modern gpu test reduce"
<<
"
\n
"
;
// Test the cell list
}
BOOST_AUTO_TEST_SUITE_END
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment