Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
O
openfpm_data
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Locked Files
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
Requirements
Requirements
List
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Security & Compliance
Security & Compliance
Dependency List
License Compliance
Operations
Operations
Environments
Analytics
Analytics
CI / CD
Code Review
Insights
Issue
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
openfpm
openfpm_data
Commits
5d5e36df
Commit
5d5e36df
authored
Jan 10, 2021
by
i-bird
Browse files
Options
Browse Files
Download
Plain Diff
Fixing compilation
parents
7d268068
9c823a00
Pipeline
#2650
passed with stages
in 14 minutes and 26 seconds
Changes
16
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
375 additions
and
30 deletions
+375
-30
CMakeLists.txt
CMakeLists.txt
+0
-1
src/Grid/cuda/cuda_grid_gpu_int.cu
src/Grid/cuda/cuda_grid_gpu_int.cu
+2
-3
src/Grid/grid_base_implementation.hpp
src/Grid/grid_base_implementation.hpp
+8
-0
src/Grid/map_grid.hpp
src/Grid/map_grid.hpp
+26
-0
src/Packer_Unpacker/Unpacker.hpp
src/Packer_Unpacker/Unpacker.hpp
+15
-0
src/Space/Shape/Point.hpp
src/Space/Shape/Point.hpp
+1
-1
src/SparseGrid/SparseGrid.hpp
src/SparseGrid/SparseGrid.hpp
+28
-0
src/SparseGridGpu/Geometry/grid_zmb.hpp
src/SparseGridGpu/Geometry/grid_zmb.hpp
+10
-10
src/SparseGridGpu/SparseGridGpu.hpp
src/SparseGridGpu/SparseGridGpu.hpp
+112
-6
src/SparseGridGpu/SparseGridGpu_ker.cuh
src/SparseGridGpu/SparseGridGpu_ker.cuh
+47
-0
src/SparseGridGpu/SparseGridGpu_ker_util.hpp
src/SparseGridGpu/SparseGridGpu_ker_util.hpp
+64
-2
src/SparseGridGpu/SparseGridGpu_kernels.cuh
src/SparseGridGpu/SparseGridGpu_kernels.cuh
+38
-2
src/SparseGridGpu/tests/SparseGridGpu_tests.cu
src/SparseGridGpu/tests/SparseGridGpu_tests.cu
+3
-3
src/SparseGridGpu/tests/utils/SparseGridGpu_testKernels.cuh
src/SparseGridGpu/tests/utils/SparseGridGpu_testKernels.cuh
+3
-2
src/SparseGridGpu/tests/utils/SparseGridGpu_util_test.cuh
src/SparseGridGpu/tests/utils/SparseGridGpu_util_test.cuh
+10
-0
src/timer.hpp
src/timer.hpp
+8
-0
No files found.
CMakeLists.txt
View file @
5d5e36df
...
...
@@ -25,7 +25,6 @@ endif()
set
(
CMAKE_CXX_STANDARD 14
)
set
(
CMAKE_CUDA_STANDARD 14
)
set
(
Vc_DIR
"
${
Vc_ROOT
}
/lib/cmake/Vc/"
)
message
(
"Searching Vc in
${
Vc_DIR
}
"
)
...
...
src/Grid/cuda/cuda_grid_gpu_int.cu
View file @
5d5e36df
...
...
@@ -70,7 +70,7 @@ __global__ void test_launch_cuda_native(float * scalar, float * vector, int sxy,
constexpr
int
NN_num
=
4
;
template
<
typename
celllist_type
>
/*
template<typename celllist_type>
__global__ void test_launch_cell_list(celllist_type cell, ite_gpu<3> ite_gpu)
{
GRID_ID_3(ite_gpu)
...
...
@@ -98,7 +98,7 @@ __global__ void test_launch_cell_list(celllist_type cell, ite_gpu<3> ite_gpu)
}
printf("CELLLIST %d %d %d nn_part: %d NN: %d %d %d %d \n",(int)key.get(0),(int)key.get(1),(int)key.get(2),nn_part,NN[0],NN[1],NN[2],NN[3]);
}
}
*/
BOOST_AUTO_TEST_SUITE
(
grid_gpu_func_interp
)
...
...
@@ -107,7 +107,6 @@ BOOST_AUTO_TEST_CASE (gpu_p2m)
openfpm
::
vector_gpu
<
Point
<
3
,
float
>>
pos
;
openfpm
::
vector_gpu
<
aggregate
<
float
,
float
[
3
]
>>
prop
;
pos
.
resize
(
100
);
prop
.
resize
(
100
);
...
...
src/Grid/grid_base_implementation.hpp
View file @
5d5e36df
...
...
@@ -1256,6 +1256,14 @@ public:
}
}
/*! \brief Indicate that unpacking the header is supported
*
* \return false
*
*/
static
bool
is_unpack_header_supported
()
{
return
false
;}
/*! \brief Resize the grid
*
* Resize the grid to the old information is retained on the new grid,
...
...
src/Grid/map_grid.hpp
View file @
5d5e36df
...
...
@@ -163,6 +163,26 @@ public:
{
}
/*! \brief Stub does not do anything
*
*/
template
<
typename
pointers_type
,
typename
headers_type
,
typename
result_type
,
unsigned
int
...
prp
>
static
void
unpack_headers
(
pointers_type
&
pointers
,
headers_type
&
headers
,
result_type
&
result
,
int
n_slot
)
{}
template
<
unsigned
int
...
prp
,
typename
S2
,
typename
header_type
,
typename
ite_type
,
typename
context_type
>
void
unpack_with_headers
(
ExtPreAlloc
<
S2
>
&
mem
,
ite_type
&
sub_it
,
header_type
&
headers
,
int
ih
,
Unpack_stat
&
ps
,
context_type
&
context
,
rem_copy_opt
opt
=
rem_copy_opt
::
NONE_OPT
)
{}
/*! \brief It copy a grid
*
* \param g grid to copy
...
...
@@ -743,6 +763,12 @@ public:
{
}
/*! \brief Stub does not do anything
*
*/
static
void
unpack_headers
()
{}
/*! \brief Fill the memory with a byte
*
*/
...
...
src/Packer_Unpacker/Unpacker.hpp
View file @
5d5e36df
...
...
@@ -266,6 +266,21 @@ public:
{
obj
.
template
unpack
<
prp
...>(
mem
,
sub_it
,
ps
,
context
,
opt
);
}
template
<
typename
grid_sub_it_type
,
typename
header_type
,
typename
context_type
,
unsigned
int
...
prp
>
static
void
unpack_with_header
(
ExtPreAlloc
<
Mem
>
&
mem
,
grid_sub_it_type
&
sub_it
,
T
&
obj
,
header_type
&
header
,
int
ih
,
Unpack_stat
&
ps
,
context_type
&
context
,
rem_copy_opt
opt
)
{
obj
.
template
unpack_with_headers
<
prp
...>(
mem
,
sub_it
,
header
,
ih
,
ps
,
context
,
opt
);
}
};
...
...
src/Space/Shape/Point.hpp
View file @
5d5e36df
...
...
@@ -100,7 +100,7 @@ template<unsigned int dim ,typename T> class Point
* \param p Point
*
*/
template
<
typename
S
>
inline
Point
(
const
Point
<
dim
,
S
>
&
p
)
template
<
typename
S
>
__device__
__host__
inline
Point
(
const
Point
<
dim
,
S
>
&
p
)
{
for
(
size_t
i
=
0
;
i
<
dim
;
i
++
)
{
get
(
i
)
=
static_cast
<
S
>
(
p
.
get
(
i
));}
...
...
src/SparseGrid/SparseGrid.hpp
View file @
5d5e36df
...
...
@@ -987,6 +987,26 @@ public:
return
sparse_grid_bck_value
<
typename
std
::
remove_reference
<
decltype
(
chunks
.
get
(
0
))
>::
type
>
(
chunks
.
get
(
0
));
}
/*! \brief Stub does not do anything
*
*/
template
<
typename
pointers_type
,
typename
headers_type
,
typename
result_type
,
unsigned
int
...
prp
>
static
void
unpack_headers
(
pointers_type
&
pointers
,
headers_type
&
headers
,
result_type
&
result
,
int
n_slot
)
{}
template
<
unsigned
int
...
prp
,
typename
S2
,
typename
header_type
,
typename
ite_type
,
typename
context_type
>
void
unpack_with_headers
(
ExtPreAlloc
<
S2
>
&
mem
,
ite_type
&
sub_it
,
header_type
&
headers
,
int
ih
,
Unpack_stat
&
ps
,
context_type
&
context
,
rem_copy_opt
opt
=
rem_copy_opt
::
NONE_OPT
)
{}
/*! \brief Get the background value
*
* \return background value
...
...
@@ -1093,6 +1113,14 @@ public:
return
get_selector
<
typename
boost
::
mpl
::
at
<
typename
T
::
type
,
boost
::
mpl
::
int_
<
p
>>::
type
>::
template
get_const
<
p
>(
chunks
,
active_cnk
,
sub_id
);
}
/*! \brief Indicate that unpacking the header is supported
*
* \return false
*
*/
static
bool
is_unpack_header_supported
()
{
return
false
;}
/*! \brief Get the reference of the selected element
*
* \param v1 grid_key that identify the element in the grid
...
...
src/SparseGridGpu/Geometry/grid_zmb.hpp
View file @
5d5e36df
...
...
@@ -22,7 +22,7 @@
* tuned for blocked data.
*/
template
<
unsigned
int
dim
,
unsigned
int
blockEdgeSize
,
typename
indexT
>
class
grid_zmb
:
private
grid_smb
<
dim
,
blockEdgeSize
>
class
grid_zmb
:
private
grid_smb
<
dim
,
blockEdgeSize
,
indexT
>
{
public:
...
...
@@ -30,34 +30,34 @@ public:
grid_zmb
()
{}
__host__
__device__
grid_zmb
(
const
size_t
(
&
sz
)[
dim
])
:
grid_smb
<
dim
,
blockEdgeSize
>
(
sz
)
:
grid_smb
<
dim
,
blockEdgeSize
,
indexT
>
(
sz
)
{}
__host__
__device__
grid_zmb
(
const
size_t
domainBlockEdgeSize
)
:
grid_smb
<
dim
,
blockEdgeSize
>
(
domainBlockEdgeSize
)
:
grid_smb
<
dim
,
blockEdgeSize
,
indexT
>
(
domainBlockEdgeSize
)
{}
template
<
typename
T
>
__host__
__device__
grid_zmb
(
const
grid_sm
<
dim
,
T
>
blockGrid
)
:
grid_smb
<
dim
,
blockEdgeSize
>
(
blockGrid
)
:
grid_smb
<
dim
,
blockEdgeSize
,
indexT
>
(
blockGrid
)
{}
#ifdef __NVCC__
//Constructors from dim3 and uint3 objects
__host__
__device__
grid_zmb
(
const
dim3
blockDimensions
)
:
grid_smb
<
dim
,
blockEdgeSize
>
(
blockDimensions
)
:
grid_smb
<
dim
,
blockEdgeSize
,
indexT
>
(
blockDimensions
)
{}
#endif // __NVCC__
__host__
__device__
grid_zmb
(
const
grid_zmb
<
dim
,
blockEdgeSize
,
indexT
>
&
other
)
:
grid_smb
<
dim
,
blockEdgeSize
>
(
other
)
:
grid_smb
<
dim
,
blockEdgeSize
,
indexT
>
(
other
)
{}
__host__
__device__
grid_zmb
&
operator
=
(
const
grid_zmb
<
dim
,
blockEdgeSize
,
indexT
>
&
other
)
{
((
grid_smb
<
dim
,
blockEdgeSize
>
*
)
this
)
->
operator
=
(
other
);
((
grid_smb
<
dim
,
blockEdgeSize
,
indexT
>
*
)
this
)
->
operator
=
(
other
);
return
*
this
;
}
...
...
@@ -115,7 +115,7 @@ public:
__host__
__device__
const
indexT
(
&
getSize
()
const
)[
dim
]
{
return
grid_smb
<
dim
,
blockEdgeSize
>::
getSize
();
return
grid_smb
<
dim
,
blockEdgeSize
,
indexT
>::
getSize
();
}
...
...
@@ -123,12 +123,12 @@ public:
template
<
typename
indexT_
>
inline
__host__
__device__
grid_key_dx
<
dim
,
indexT
>
getGlobalCoord
(
const
grid_key_dx
<
dim
,
indexT_
>
&
blockCoord
,
unsigned
int
offset
)
const
{
return
grid_smb
<
dim
,
blockEdgeSize
>::
getGlobalCoord
(
blockCoord
,
offset
);
return
grid_smb
<
dim
,
blockEdgeSize
,
indexT
>::
getGlobalCoord
(
blockCoord
,
offset
);
}
inline
indexT
getBlockSize
()
const
{
return
grid_smb
<
dim
,
blockEdgeSize
>::
getBlockSize
();
return
grid_smb
<
dim
,
blockEdgeSize
,
indexT
>::
getBlockSize
();
}
};
...
...
src/SparseGridGpu/SparseGridGpu.hpp
View file @
5d5e36df
...
...
@@ -602,6 +602,9 @@ private:
//! Memory to remove copy finalize
ExtPreAlloc
<
CudaMemory
>
*
prAlloc_prp
;
//! shifts for chunk conversion
openfpm
::
vector_gpu
<
aggregate
<
int
[
dim
]
>>
shifts
;
bool
findNN
=
false
;
inline
void
swap_internal_remote
()
...
...
@@ -1099,7 +1102,7 @@ private:
auto
&
o_map
=
this
->
getSegmentToOutMap
();
auto
&
segments_data
=
this
->
getSegmentToMergeIndexMap
();
new_map
.
resize
(
a_map
.
size
());
new_map
.
resize
(
a_map
.
size
()
,
0
);
// construct new to old map
...
...
@@ -1386,7 +1389,7 @@ private:
if
(
n_cnk
!=
0
)
{
openfpm
::
vector_gpu
<
aggregate
<
int
[
dim
]
>>
shifts
;
shifts
.
clear
()
;
int
n_shift
=
1
;
shifts
.
add
();
...
...
@@ -3152,6 +3155,99 @@ public:
grid_src
.
copySect
.
add
(
sgs
);
}
/*! \brief Stub does not do anything
*
*/
template
<
typename
pointers_type
,
typename
headers_type
,
typename
result_type
,
unsigned
int
...
prp
>
static
void
unpack_headers
(
pointers_type
&
pointers
,
headers_type
&
headers
,
result_type
&
result
,
int
n_slot
)
{
// we have to increment ps by the right amount
sparsegridgpu_pack_request
<
AggregateT
,
prp
...
>
spq
;
boost
::
mpl
::
for_each_ref
<
boost
::
mpl
::
range_c
<
int
,
0
,
sizeof
...(
prp
)
>>
(
spq
);
result
.
allocate
(
sizeof
(
int
));
CUDA_LAUNCH_DIM3
((
SparseGridGpuKernels
::
unpack_headers
<
decltype
(
std
::
declval
<
self
>
().
toKernel
())
>
),
1
,
pointers
.
size
(),
pointers
.
toKernel
(),
headers
.
toKernel
(),
(
int
*
)
result
.
getDevicePointer
(),
spq
.
point_size
,
n_slot
)
}
/*! \brief unpack the sub-grid object
*
* \tparam prp properties to unpack
*
* \param mem preallocated memory from where to unpack the object
* \param sub sub-grid iterator
* \param obj object where to unpack
*
*/
template
<
unsigned
int
...
prp
,
typename
S2
,
typename
header_type
>
void
unpack_with_headers
(
ExtPreAlloc
<
S2
>
&
mem
,
SparseGridGpu_iterator_sub
<
dim
,
self
>
&
sub_it
,
header_type
&
headers
,
int
ih
,
Unpack_stat
&
ps
,
mgpu
::
ofp_context_t
&
context
,
rem_copy_opt
opt
=
rem_copy_opt
::
NONE_OPT
)
{
////////////////////////////////////////////////////////////
if
((
opt
&
rem_copy_opt
::
KEEP_GEOMETRY
)
==
false
)
{
this
->
template
addAndConvertPackedChunkToTmp
<
prp
...>(
mem
,
sub_it
,
ps
,
context
);
// readjust mem
}
else
{
// we have to increment ps by the right amount
sparsegridgpu_pack_request
<
AggregateT
,
prp
...
>
spq
;
boost
::
mpl
::
for_each_ref
<
boost
::
mpl
::
range_c
<
int
,
0
,
sizeof
...(
prp
)
>>
(
spq
);
// First get the number of chunks
size_t
n_cnk
=
headers
.
template
get
<
1
>(
ih
);
ps
.
addOffset
(
sizeof
(
size_t
));
ps
.
addOffset
(
2
*
dim
*
sizeof
(
unsigned
int
));
size_t
actual_offset
=
n_cnk
*
sizeof
(
indexT
);
unsigned
int
*
scan
=
(
unsigned
int
*
)((
unsigned
char
*
)
mem
.
getDevicePointer
()
+
ps
.
getOffset
()
+
n_cnk
*
sizeof
(
indexT
));
// Unpack number of points
// calculate the number of total points
size_t
n_pnt
=
headers
.
template
get
<
2
>(
ih
);
actual_offset
+=
align_number
(
sizeof
(
indexT
),(
n_cnk
+
1
)
*
sizeof
(
unsigned
int
));
void
*
data_base_ptr
=
(
void
*
)((
unsigned
char
*
)
mem
.
getDevicePointer
()
+
ps
.
getOffset
()
+
actual_offset
);
actual_offset
+=
align_number
(
sizeof
(
indexT
),
n_pnt
*
(
spq
.
point_size
));
short
int
*
offsets
=
(
short
int
*
)((
unsigned
char
*
)
mem
.
getDevicePointer
()
+
ps
.
getOffset
()
+
actual_offset
);
actual_offset
+=
align_number
(
sizeof
(
indexT
),
n_pnt
*
sizeof
(
short
));
actual_offset
+=
align_number
(
sizeof
(
indexT
),
n_pnt
*
sizeof
(
unsigned
char
));
scan_ptrs_cp
.
add
(
scan
);
offset_ptrs_cp
.
add
(
offsets
);
data_base_ptr_cp
.
add
(
data_base_ptr
);
ps
.
addOffset
(
actual_offset
);
}
}
/*! \brief Indicate that unpacking the header is supported
*
* \return true
*
*/
static
bool
is_unpack_header_supported
()
{
return
true
;}
/*! \brief unpack the sub-grid object
*
* \tparam prp properties to unpack
...
...
@@ -3191,7 +3287,7 @@ public:
Unpacker
<
size_t
,
S2
>::
unpack
(
mem
,
n_cnk
,
ps
);
// Unpack origin of the chunk indexing
for
(
int
i
=
0
;
i
<
dim
;
i
++
)
/*
for (int i = 0 ; i < dim ; i++)
{
int tmp;
Unpacker<int,S2>::unpack(mem,tmp,ps);
...
...
@@ -3201,7 +3297,9 @@ public:
{
int tmp;
Unpacker<int,S2>::unpack(mem,tmp,ps);
}
}*/
ps
.
addOffset
(
2
*
dim
*
sizeof
(
unsigned
int
));
size_t
actual_offset
=
n_cnk
*
sizeof
(
indexT
);
unsigned
int
*
scan
=
(
unsigned
int
*
)((
unsigned
char
*
)
mem
.
getDevicePointer
()
+
ps
.
getOffset
()
+
n_cnk
*
sizeof
(
indexT
));
...
...
@@ -3465,7 +3563,7 @@ public:
template
<
unsigned
int
dim
,
typename
AggregateT
,
unsigned
int
blockEdgeSize
=
default_edge
<
dim
>
::
type
::
value
,
unsigned
int
threadBlockSize
=
128
,
unsigned
int
threadBlockSize
=
default_edge
<
dim
>::
tb
::
value
,
typename
indexT
=
long
int
,
template
<
typename
>
class
layout_base
=
memory_traits_inte
,
typename
linearizer
=
grid_zmb
<
dim
,
blockEdgeSize
,
indexT
>>
...
...
@@ -3474,11 +3572,19 @@ using SparseGridGpu_z = SparseGridGpu<dim,AggregateT,blockEdgeSize,threadBlockSi
template
<
unsigned
int
dim
,
typename
AggregateT
,
unsigned
int
blockEdgeSize
=
default_edge
<
dim
>
::
type
::
value
,
unsigned
int
threadBlockSize
=
128
,
unsigned
int
threadBlockSize
=
default_edge
<
dim
>::
tb
::
value
,
typename
indexT
=
int
,
template
<
typename
>
class
layout_base
=
memory_traits_inte
,
typename
linearizer
=
grid_zmb
<
dim
,
blockEdgeSize
,
indexT
>>
using
SparseGridGpu_zi
=
SparseGridGpu
<
dim
,
AggregateT
,
blockEdgeSize
,
threadBlockSize
,
indexT
,
layout_base
,
linearizer
>
;
template
<
unsigned
int
dim
,
typename
AggregateT
,
unsigned
int
blockEdgeSize
=
default_edge
<
dim
>
::
type
::
value
,
unsigned
int
threadBlockSize
=
default_edge
<
dim
>::
tb
::
value
,
typename
indexT
=
int
,
template
<
typename
>
class
layout_base
=
memory_traits_inte
,
typename
linearizer
=
grid_smb
<
dim
,
blockEdgeSize
,
indexT
>>
using
SparseGridGpu_i
=
SparseGridGpu
<
dim
,
AggregateT
,
blockEdgeSize
,
threadBlockSize
,
indexT
,
layout_base
,
linearizer
>
;
#endif //OPENFPM_PDATA_SPARSEGRIDGPU_HPP
src/SparseGridGpu/SparseGridGpu_ker.cuh
View file @
5d5e36df
...
...
@@ -81,6 +81,44 @@ public:
background
(
bck
)
{}
/*! \brief
*
* \param
*
*/
template
<
typename
headers_type
>
__device__
static
int
unpack_headers
(
headers_type
&
headers
,
unsigned
char
*
data
,
int
ih
,
int
sz_pack
)
{
size_t
n_cnk
;
if
(
sizeof
(
indexT
)
==
8
)
{
n_cnk
=
((
size_t
*
)
data
)[
0
];}
else
{
unsigned
int
dp1
=
((
unsigned
int
*
)
data
)[
0
];
unsigned
int
dp2
=
((
unsigned
int
*
)
&
data
[
4
])[
0
];
n_cnk
=
(
size_t
)
dp1
+
((
size_t
)
dp2
<<
32
);
}
headers
.
template
get
<
1
>(
ih
)
=
n_cnk
;
// for (int i = 0 ; i < dim ; i++)
// {headers.template get<3>(ih)[i] = data[2*sizeof(size_t) + i*sizeof(int)];}
// for (int i = 0 ; i < dim ; i++)
// {headers.template get<4>(ih)[i] = data[2*sizeof(size_t) + dim*sizeof(int) + i*sizeof(int)];}
size_t
actual_offset
=
n_cnk
*
sizeof
(
indexT
);
unsigned
int
n_pnt
=
*
(
unsigned
int
*
)
&
(
data
[
sizeof
(
size_t
)
+
2
*
dim
*
sizeof
(
int
)
+
actual_offset
+
n_cnk
*
sizeof
(
unsigned
int
)]);
headers
.
template
get
<
2
>(
ih
)
=
n_pnt
;
return
sizeof
(
size_t
)
+
// byte required to pack the number of chunk packed
2
*
dim
*
sizeof
(
int
)
+
// starting point + size of the indexing packing
sizeof
(
indexT
)
*
n_cnk
+
// byte required to pack the chunk indexes
align_number_device
(
sizeof
(
indexT
),(
n_cnk
+
1
)
*
sizeof
(
unsigned
int
))
+
// byte required to pack the scan of the chunk point
align_number_device
(
sizeof
(
indexT
),
n_pnt
*
sz_pack
)
+
// byte required to pack data
align_number_device
(
sizeof
(
indexT
),
n_pnt
*
sizeof
(
short
int
))
+
// byte required to pack offsets
align_number_device
(
sizeof
(
indexT
),
n_pnt
*
sizeof
(
unsigned
char
));
// byte required to pack masks;
}
/*! \brief Get the coordinate of the block and the offset id inside the block it give the global coordinate
*
* \param blockCoord block coordinate
...
...
@@ -299,6 +337,8 @@ public:
unsigned
int
coord
[
dim
];
linToCoordWithOffset
<
blockEdgeSize
>
(
pos
,
stencilSupportRadius
,
coord
);
const
unsigned
int
linId
=
coordToLin
<
blockEdgeSize
>
(
coord
,
stencilSupportRadius
);
// const unsigned int linId = shift_position<dim,blockEdgeSize>::shift(pos,stencilSupportRadius);
return
linId
;
}
...
...
@@ -316,6 +356,8 @@ public:
unsigned
int
coord
[
dim
];
linToCoordWithOffset
<
blockEdgeSize
>
(
offset
,
stencilSupportRadius
,
coord
);
return
coordToLin
<
blockEdgeSize
>
(
coord
,
stencilSupportRadius
);
// return shift_position<dim,blockEdgeSize>::shift(offset,stencilSupportRadius);
}
template
<
typename
Coordtype
>
...
...
@@ -714,6 +756,9 @@ private:
unsigned
int
coord
[
dim
];
linToCoordWithOffset
<
blockEdgeSize
>
(
pos
,
stencilSupportRadius
,
coord
);
const
unsigned
int
linId
=
coordToLin
<
blockEdgeSize
>
(
coord
,
stencilSupportRadius
);
// const unsigned int linId = shift_position<dim,blockEdgeSize>::shift(pos,stencilSupportRadius);
// Actually load the data into the shared region
//ScalarT *basePtr = (ScalarT *)sharedRegionPtr;
...
...
@@ -846,6 +891,8 @@ private:
unsigned
int
coord
[
dim
];
linToCoordWithOffset
<
blockEdgeSize
>
(
pos
,
stencilSupportRadius
,
coord
);
const
unsigned
int
linId
=
coordToLin
<
blockEdgeSize
>
(
coord
,
stencilSupportRadius
);
// const unsigned int linId = shift_position<dim,blockEdgeSize>::shift(pos,stencilSupportRadius);
// Actually store the data from the shared region
ScalarT
*
basePtr
=
(
ScalarT
*
)
sharedRegionPtr
;
...
...
src/SparseGridGpu/SparseGridGpu_ker_util.hpp
View file @
5d5e36df
...
...
@@ -35,6 +35,54 @@ struct cross_stencil
T
xp
[
dim
];
};
/*template<unsigned int dim, unsigned int block_edge_size>
struct shift_position
{
__device__ static inline int shift(int pos, int stencilRadius)
{
int accu = 1;
int pos_s = 0;
for (int i = 0 ; i < dim ; i++)
{
pos_s += (pos % block_edge_size + stencilRadius)*accu;
accu *= (block_edge_size + 2*stencilRadius);
pos /= block_edge_size;
}
return pos_s;
}
};
template<unsigned int block_edge_size>
struct shift_position<2,block_edge_size>
{
__device__ static inline int shift(int pos, int stencilRadius)
{
unsigned int x = pos % block_edge_size;
unsigned int y = (pos / block_edge_size);
unsigned int g_sz = block_edge_size + 2*stencilRadius;
return (x+stencilRadius) + (y+stencilRadius)*g_sz;
}
};
template<unsigned int block_edge_size>
struct shift_position<3,block_edge_size>
{
__device__ static inline int shift(int pos, int stencilRadius)
{
unsigned int x = pos % block_edge_size;
unsigned int y = (pos / block_edge_size) % block_edge_size;
unsigned int z = (pos / (block_edge_size*block_edge_size));
unsigned int g_sz = block_edge_size + 2*stencilRadius;
return (x+stencilRadius) + (y+stencilRadius)*g_sz + (z+stencilRadius)*g_sz*g_sz;
}
};*/
template
<
unsigned
int
dim
>
struct
NNStar
{
...
...
@@ -53,7 +101,12 @@ struct NNStar
unsigned
int
d
=
offset
/
2
;
int
dPos
=
blockCoord
.
get
(
d
)
+
(
offset
%
2
)
*
2
-
1
;
blockCoord
.
set_d
(
d
,
dPos
);
neighbourPos
=
blockMap
.
get_sparse
(
sparseGrid
.
getBlockLinId
(
blockCoord
)).
id
;
int
bl
=
sparseGrid
.
getBlockLinId
(
blockCoord
);
bl
=
(
dPos
<
0
)
?-
1
:
bl
;
neighbourPos
=
blockMap
.
get_sparse
(
bl
).
id
;
}
return
neighbourPos
;
}
...
...
@@ -479,7 +532,7 @@ struct loadGhostBlock_impl<1,dim,AggregateBlockT,pMask,p,ct_params,blockEdgeSize
// Convert pos into a linear id accounting for the ghost offsets
unsigned
int
coord
[
dim
];
linToCoordWithOffset
<
blockEdgeSize
>
(
threadIdx
.
x
,
stencilSupportRadius
,
coord
);
const
int
linId2
=
coordToLin
<
blockEdgeSize
>
(
coord
,
stencilSupportRadius
);
const
unsigned
int
linId2
=
coordToLin
<
blockEdgeSize
>
(
coord
,
stencilSupportRadius
);
unsigned
int
nnb
=
nn_blocks
.
template
get
<
0
>(
blockIdPos
*
ct_params
::
nNN
+
(
threadIdx
.
x
%
ct_params
::
nNN
));
...
...
@@ -569,6 +622,9 @@ struct loadGhostBlock_impl<2,dim,AggregateBlockT,pMask,p,ct_params,blockEdgeSize
unsigned
int
coord
[
dim
];
linToCoordWithOffset
<
blockEdgeSize
>
(
threadIdx
.
x
,
stencilSupportRadius
,
coord
);
const
int
linId_b
=
coordToLin
<
blockEdgeSize
>
(
coord
,
stencilSupportRadius
);
// const unsigned int linId_b = shift_position<dim,blockEdgeSize>::shift(threadIdx.x,stencilSupportRadius);
// printf("AAA %d %d \n",linId_b,linId_b_test);
unsigned
int
nnb
=
nn_blocks
.
template
get
<
0
>(
blockIdPos
*
ct_params
::
nNN
+
(
threadIdx
.
x
%
ct_params
::
nNN
));
...
...
@@ -675,6 +731,9 @@ struct loadGhostBlock_impl<3,dim,AggregateBlockT,pMask,p,ct_params,blockEdgeSize
unsigned
int
coord
[
dim
];
linToCoordWithOffset
<
blockEdgeSize
>
(
threadIdx
.
x
,
stencilSupportRadius
,
coord
);
const
int
linId_b
=
coordToLin
<
blockEdgeSize
>
(
coord
,
stencilSupportRadius
);
// const unsigned int linId_b = shift_position<dim,blockEdgeSize>::shift(threadIdx.x,stencilSupportRadius);
// printf("AAA %d %d \n",linId_b,linId_b_test);
unsigned
int
nnb
=
nn_blocks
.
template
get
<
0
>(
blockIdPos
*
ct_params
::
nNN
+
(
threadIdx
.
x
%
ct_params
::
nNN
));
...
...
@@ -822,6 +881,9 @@ struct loadGhostBlock_impl<7,dim,AggregateBlockT,pMask,p,ct_params,blockEdgeSize
unsigned
int
coord
[
dim
];
linToCoordWithOffset
<
blockEdgeSize
>
(
threadIdx
.
x
,
stencilSupportRadius
,
coord
);
const
int
linId_b
=
coordToLin
<
blockEdgeSize
>
(
coord
,
stencilSupportRadius
);
// const unsigned int linId_b = shift_position<dim,blockEdgeSize>::shift(threadIdx.x,stencilSupportRadius);
// printf("AAA %d %d \n",linId_b,linId_b_test);
unsigned
int
nnb
=
nn_blocks
.
template
get
<
0
>(
blockIdPos
*
ct_params
::
nNN
+
(
threadIdx
.
x
%
ct_params
::
nNN
));
...
...
src/SparseGridGpu/SparseGridGpu_kernels.cuh
View file @
5d5e36df
...
...
@@ -29,6 +29,42 @@ enum mask_sparse
// Kernels for SparseGridGpu
namespace
SparseGridGpuKernels
{
template
<
typename
SparseGridGpuType
,
typename
pointers_type
,
typename
headers_type
>
__global__
void
unpack_headers
(
pointers_type
pointers
,
headers_type
headers
,
int
*
result
,
unsigned
int
sz_pack
,
int
n_slot
)
{
int
t
=
threadIdx
.
x
;
if
(
t
>
pointers
.
size
())
{
return
;}
unsigned
char
*
data_pack
=
(
unsigned
char
*
)
pointers
.
template
get
<
0
>(
t
);
while
(
data_pack
<
pointers
.
template
get
<
1
>(
t
)
)
{
int
ih
=
pointers
.
template
get
<
2
>(
t
);
if
(
n_slot
>
ih
)
{
if
(
sizeof
(
typename
SparseGridGpuType
::
indexT_
)
==
8
)
{
headers
.
template
get
<
0
>(
t
*
n_slot
+
ih
)
=
*
(
size_t
*
)
data_pack
;}
else
{
unsigned
int
dp1
=
*
(
unsigned
int
*
)
data_pack
;
unsigned
int
dp2
=
*
(
unsigned
int
*
)
&
(
data_pack
[
4
]);
headers
.
template
get
<
0
>(
t
*
n_slot
+
ih
)
=
(
size_t
)
dp1
+
(((
size_t
)
dp2
)
<<
32
);
}
data_pack
+=
sizeof
(
size_t
);