Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
O
openfpm_pdata
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Locked Files
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
Requirements
Requirements
List
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Security & Compliance
Security & Compliance
Dependency List
License Compliance
Operations
Operations
Environments
Packages & Registries
Packages & Registries
Package Registry
Container Registry
Analytics
Analytics
CI / CD
Code Review
Insights
Issue
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
argupta
openfpm_pdata
Commits
6c6ae5b8
Commit
6c6ae5b8
authored
Jan 19, 2019
by
incardon
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fixing fine_s for CPU construction and GPU usage
parent
4672f6d5
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
30 additions
and
17 deletions
+30
-17
example/Vector/7_SPH_dlb_gpu_opt/Makefile
example/Vector/7_SPH_dlb_gpu_opt/Makefile
+1
-1
example/Vector/7_SPH_dlb_gpu_opt/main.cu
example/Vector/7_SPH_dlb_gpu_opt/main.cu
+12
-6
openfpm_data
openfpm_data
+1
-1
src/Decomposition/CartDecomposition.hpp
src/Decomposition/CartDecomposition.hpp
+2
-2
src/Decomposition/cuda/CartDecomposition_gpu.cuh
src/Decomposition/cuda/CartDecomposition_gpu.cuh
+14
-7
No files found.
example/Vector/7_SPH_dlb_gpu_opt/Makefile
View file @
6c6ae5b8
...
...
@@ -31,7 +31,7 @@ sph_dlb_test: OPT += -DTEST_RUN
sph_dlb_test
:
sph_dlb
%.o
:
%.cu
$(CUDA_CC)
-O
3
$(OPT)
-use_fast_math
-arch
=
sm_61
-lineinfo
-g
-c
-isystem
=
/home/i-bird/MPI/include
--std
=
c++11
-o
$@
$<
$(INCLUDE_PATH_NVCC)
$(CUDA_CC)
-O
0
-g
$(OPT)
-use_fast_math
-arch
=
sm_61
-lineinfo
-g
-c
-isystem
=
/home/i-bird/MPI/include
--std
=
c++11
-o
$@
$<
$(INCLUDE_PATH_NVCC)
%.o
:
%.cpp
$(CC)
-O3
$(OPT)
-g
-c
--std
=
c++11
-o
$@
$<
$(INCLUDE_PATH)
...
...
example/Vector/7_SPH_dlb_gpu_opt/main.cu
View file @
6c6ae5b8
...
...
@@ -39,6 +39,8 @@
#ifdef __NVCC__
#define PRINT_STACKTRACE
#define STOP_ON_ERROR
#define OPENMPI
#include "Vector/vector_dist.hpp"
#include <math.h>
...
...
@@ -89,9 +91,9 @@ const real_number MassBound = 0.0000767656 / 8;
// End simulation time
#ifdef TEST_RUN
const
real_number
t_end
=
0.00
05
;
const
real_number
t_end
=
0.00
1
;
#else
const
real_number
t_end
=
0.00
05
;
const
real_number
t_end
=
0.00
1
;
#endif
// Gravity acceleration
...
...
@@ -195,7 +197,7 @@ inline void EqState(particles & vd)
{
auto
it
=
vd
.
getDomainIteratorGPU
();
EqState_gpu
<<<
it
.
wthr
,
it
.
thr
>>>
(
vd
.
toKernel
(),
B
);
CUDA_LAUNCH
(
EqState_gpu
,
it
.
wthr
,
it
.
thr
,
vd
.
toKernel
(),
B
);
}
...
...
@@ -294,6 +296,7 @@ __global__ void calc_forces_gpu(particles_type vd, NN_type NN, real_number W_dap
{
// ... a
unsigned
int
a
;
GET_PARTICLE_SORT
(
a
,
NN
);
real_number
max_visc
=
0.0
f
;
...
...
@@ -391,7 +394,7 @@ template<typename CellList> inline void calc_forces(particles & vd, CellList & N
// Update the cell-list
vd
.
updateCellList
(
NN
);
calc_forces_gpu
<<<
part
.
wthr
,
part
.
thr
>>>
(
vd
.
toKernel_sorted
(),
NN
.
toKernel
(),
W_dap
,
cbar
);
CUDA_LAUNCH
(
calc_forces_gpu
,
part
.
wthr
,
part
.
thr
,
vd
.
toKernel_sorted
(),
NN
.
toKernel
(),
W_dap
,
cbar
);
vd
.
merge_sort
<
force
,
drho
,
red
>
(
NN
);
...
...
@@ -865,6 +868,7 @@ int main(int argc, char* argv[])
Vcluster
<>
&
v_cl
=
create_vcluster
();
timer
it_time
;
////// Do rebalancing every 200 timesteps
it_reb
++
;
if
(
it_reb
==
300
)
...
...
@@ -886,6 +890,7 @@ int main(int argc, char* argv[])
vd
.
map
(
RUN_ON_DEVICE
);
// make sort
vd
.
make_sort
(
NN
);
...
...
@@ -896,6 +901,7 @@ int main(int argc, char* argv[])
vd
.
ghost_get
<
type
,
rho
,
Pressure
,
velocity
>
(
RUN_ON_DEVICE
);
// Calc forces
calc_forces
(
vd
,
NN
,
max_visc
,
cnt
);
...
...
@@ -922,7 +928,7 @@ int main(int argc, char* argv[])
{
// Sensor pressure require update ghost, so we ensure that particles are distributed correctly
// and ghost are updated
/*
vd.map(RUN_ON_DEVICE);
vd
.
map
(
RUN_ON_DEVICE
);
vd
.
ghost_get
<
type
,
rho
,
Pressure
,
velocity
>
(
RUN_ON_DEVICE
);
vd
.
updateCellList
(
NN
);
...
...
@@ -960,7 +966,7 @@ int main(int argc, char* argv[])
++
ito
;
}
vd_out.write_frame("Particles",write,VTK_WRITER | FORMAT_BINARY);
*/
vd_out
.
write_frame
(
"Particles"
,
write
,
VTK_WRITER
|
FORMAT_BINARY
);
write
++
;
if
(
v_cl
.
getProcessUnitID
()
==
0
)
...
...
openfpm_data
@
a9632484
Subproject commit
9f134a74e1ad2dd797c0a9fa2e40754c24da4dd7
Subproject commit
a9632484c3123f103cacd2479d02c86c21b835e0
src/Decomposition/CartDecomposition.hpp
View file @
6c6ae5b8
...
...
@@ -326,8 +326,8 @@ public:
{
// get the cells this box span
const
grid_key_dx
<
dim
>
p1
=
fine_s
.
getCellGrid
(
sub_domains_global
.
template
get
<
0
>(
i
).
getP1
());
const
grid_key_dx
<
dim
>
p2
=
fine_s
.
getCellGrid
(
sub_domains_global
.
template
get
<
0
>(
i
).
getP2
());
const
grid_key_dx
<
dim
>
p1
=
fine_s
.
getCellGrid
_me
(
sub_domains_global
.
template
get
<
0
>(
i
).
getP1
());
const
grid_key_dx
<
dim
>
p2
=
fine_s
.
getCellGrid
_pe
(
sub_domains_global
.
template
get
<
0
>(
i
).
getP2
());
// Get the grid and the sub-iterator
auto
&
gi
=
fine_s
.
getGrid
();
...
...
src/Decomposition/cuda/CartDecomposition_gpu.cuh
View file @
6c6ae5b8
...
...
@@ -19,7 +19,8 @@ __device__ __host__ inline int processorID_impl(T2 & p, fine_s_type & fine_s, vs
int
cl
=
fine_s
.
getCell
(
p
);
int
n_ele
=
fine_s
.
getNelements
(
cl
);
for
(
int
i
=
0
;
i
<
n_ele
;
i
++
)
int
i
=
0
;
for
(
;
i
<
n_ele
;
i
++
)
{
e
=
fine_s
.
get
(
cl
,
i
);
...
...
@@ -29,11 +30,17 @@ __device__ __host__ inline int processorID_impl(T2 & p, fine_s_type & fine_s, vs
}
}
#if defined(SE_CLASS1)
&& !defined(__NVCC__)
#if defined(SE_CLASS1)
if
(
n_ele
==
0
)
{
std
::
cout
<<
__FILE__
<<
":"
<<
__LINE__
<<
" I cannot detect in which processor this particle go"
<<
std
::
endl
;
printf
(
"CartDecomposition_gpu.cuh:processorID_impl, error I cannot detect in which processor this particle go"
);
return
-
1
;
}
if
(
i
==
n_ele
)
{
printf
(
"CartDecomposition_gpu.cuh:processorID_impl, error I cannot detect in which processor this particle go because of round-off inconsistencies"
);
return
-
1
;
}
...
...
@@ -82,7 +89,7 @@ class CartDecomposition_gpu : public ie_ghost_gpu<dim,T,Memory,layout_base>
* the explanation before)
*
*/
__device__
void
applyPointBC
(
Point
<
dim
,
T
>
&
pt
)
const
__device__
__host__
void
applyPointBC
(
Point
<
dim
,
T
>
&
pt
)
const
{
for
(
int
i
=
0
;
i
<
dim
;
i
++
)
{
...
...
@@ -120,7 +127,7 @@ public:
* \return processorID
*
*/
__device__
int
inline
processorIDBC
(
const
Point
<
dim
,
T
>
&
p
)
__device__
__host__
int
inline
processorIDBC
(
const
Point
<
dim
,
T
>
&
p
)
{
Point
<
dim
,
T
>
pt
=
p
;
this
->
applyPointBC
(
pt
);
...
...
@@ -137,7 +144,7 @@ public:
* the explanation before)
*
*/
template
<
typename
Mem
>
__device__
void
applyPointBC
(
encapc
<
1
,
Point
<
dim
,
T
>
,
Mem
>
&&
pt
)
const
template
<
typename
Mem
>
__device__
__host__
void
applyPointBC
(
encapc
<
1
,
Point
<
dim
,
T
>
,
Mem
>
&&
pt
)
const
{
for
(
size_t
i
=
0
;
i
<
dim
;
i
++
)
{
...
...
@@ -154,7 +161,7 @@ public:
* \return processorID
*
*/
__device__
int
inline
processorID
(
const
Point
<
dim
,
T
>
&
pt
)
__device__
__host__
int
inline
processorID
(
const
Point
<
dim
,
T
>
&
pt
)
{
return
processorID_impl
(
pt
,
clk
,
sub_domains_global
);
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment