Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Sbalzarini Lab
S
Software
P
Parallel Computing
OpenFPM
openfpm_pdata
Commits
852762d0
Commit
852762d0
authored
Nov 19, 2015
by
Pietro Incardona
Browse files
Start refactorization
parent
ed109c36
Changes
2
Hide whitespace changes
Inline
Side-by-side
src/Decomposition/BasicDecomposition.hpp
View file @
852762d0
...
...
@@ -30,6 +30,7 @@
#include "ie_ghost.hpp"
#include "nn_processor.hpp"
#include "GraphMLWriter.hpp"
#include "ParMetisDistribution.hpp"
#define BASICDEC_ERROR 2000lu
...
...
@@ -82,7 +83,7 @@
*
*/
template
<
unsigned
int
dim
,
typename
T
,
typename
Memory
=
HeapMemory
,
template
<
unsigned
int
dim
,
typename
T
,
typename
Distribution
=
ParMetisDistribution
<
dim
,
T
>
,
typename
Memory
=
HeapMemory
,
template
<
unsigned
int
,
typename
>
class
Domain
=
Box
>
class
BasicDecomposition
:
public
ie_loc_ghost
<
dim
,
T
>
,
public
nn_prcs
<
dim
,
T
>
,
public
ie_ghost
<
dim
,
T
>
{
...
...
@@ -131,54 +132,6 @@ private:
//! Cell-list that store the geometrical information of the local internal ghost boxes
CellList
<
dim
,
T
,
FAST
>
lgeo_cell
;
//! Convert the graph to parmetis format
Parmetis
<
Graph_CSR
<
nm_v
,
nm_e
>>
parmetis_graph
;
//! Processor sub-sub-domain graph
Graph_CSR
<
nm_v
,
nm_e
>
sub_g
;
//! Global sub-sub-domain graph
Graph_CSR
<
nm_v
,
nm_e
>
gp
;
//! Init vtxdist needed for Parmetis
openfpm
::
vector
<
idx_t
>
vtxdist
;
//! partitions
openfpm
::
vector
<
openfpm
::
vector
<
idx_t
>>
partitions
;
//! Init data structure to keep trace of new vertices distribution in processors (needed to update main graph)
openfpm
::
vector
<
openfpm
::
vector
<
size_t
>>
v_per_proc
;
//! Number of moved vertices in all iterations
size_t
g_moved
=
0
;
//! Max number of moved vertices in all iterations
size_t
m_moved
=
0
;
//! Wn for SAR heuristic
float
w_n
=
0
;
//! Computation cost for SAR heuristic
float
c_c
=
5
;
//! Number of time-steps since the previous DLB
size_t
n_ts
=
1
;
//! Idle time accumulated so far, needed for SAR heuristic
openfpm
::
vector
<
float
>
i_times
;
// Vector to collect all timings
openfpm
::
vector
<
long
>
times
;
static
void
*
message_receive
(
size_t
msg_i
,
size_t
total_msg
,
size_t
total_p
,
size_t
i
,
size_t
ri
,
void
*
ptr
)
{
openfpm
::
vector
<
openfpm
::
vector
<
idx_t
>>
*
v
=
static_cast
<
openfpm
::
vector
<
openfpm
::
vector
<
idx_t
>>
*>
(
ptr
);
v
->
get
(
i
).
resize
(
msg_i
/
sizeof
(
idx_t
));
return
&
(
v
->
get
(
i
).
get
(
0
));
}
/*! \brief Constructor, it decompose and distribute the sub-domains across the processors
*
* \param v_cl Virtual cluster, used internally for communications
...
...
@@ -382,6 +335,45 @@ private:
*/
}
/* ! \brief Calculate communication and migration costs
*
* \param gh_s ghost thickness
* \param ts how many timesteps have passed since last calculation, used to approximate the cost
*/
void
computeCommunicationAndMigrationCosts
(
float
gh_s
,
size_t
ts
)
{
size_t
p_id
=
v_cl
.
getProcessUnitID
();
float
migration
;
SpaceBox
<
dim
,
T
>
cellBox
=
cd
.
getCellBox
();
float
b_s
=
(
cellBox
.
getHigh
(
0
)
-
cellBox
.
getLow
(
0
));
// compute the gh_area for 2 dim case
float
gh_v
=
(
gh_s
*
b_s
);
// multiply for sub-sub-domain side for each domain
for
(
int
i
=
2
;
i
<
dim
;
i
++
)
gh_v
*=
b_s
;
size_t
norm
=
(
size_t
)
(
1.0
/
gh_v
);
migration
=
pow
(
b_s
,
dim
);
size_t
prev
=
0
;
for
(
size_t
i
=
0
;
i
<
gp
.
getNVertex
();
i
++
)
{
gp
.
vertex
(
i
).
template
get
<
nm_v
::
migration
>()
=
norm
*
migration
*
gp
.
vertex
(
i
).
template
get
<
nm_v
::
computation
>();
for
(
size_t
s
=
0
;
s
<
gp
.
getNChilds
(
i
);
s
++
)
{
gp
.
edge
(
prev
+
s
).
template
get
<
nm_e
::
communication
>()
=
1
*
gp
.
vertex
(
i
).
template
get
<
nm_v
::
computation
>()
*
ts
;
}
prev
+=
gp
.
getNChilds
(
i
);
}
}
// Save the ghost boundaries
Ghost
<
dim
,
T
>
ghost
;
...
...
@@ -420,195 +412,7 @@ private:
}
}
/* \brief fill the graph of the processor with the first decomposition (linear)
* Put vertices into processor graph (different for each processor)
*
* \param sub_g sub graph to fill
* \param gp mai graph, source for the vertices
* \param vtxdist array with the distribution of vertices through processors
* \param proc_id rank of the processor
* \param Np total number of processors
*/
void
fillSubGraph
()
{
int
Np
=
v_cl
.
getProcessingUnits
();
int
p_id
=
v_cl
.
getProcessUnitID
();
for
(
size_t
j
=
vtxdist
.
get
(
p_id
),
local_j
=
0
;
j
<
vtxdist
.
get
(
p_id
+
1
);
j
++
,
local_j
++
)
{
// Add vertex
nm_v
pv
=
gp
.
vertexById
(
j
);
sub_g
.
addVertex
(
pv
);
// Add edges of vertex
for
(
size_t
s
=
0
;
s
<
gp
.
getNChilds
(
j
);
s
++
)
{
nm_e
pe
=
gp
.
edge
(
j
+
s
);
sub_g
.
template
addEdge
<
NoCheck
>(
local_j
,
gp
.
getChild
(
j
,
s
),
pe
);
}
}
// Just for output purpose
if
(
p_id
==
0
)
{
for
(
int
i
=
0
;
i
<
Np
;
i
++
)
{
for
(
size_t
j
=
vtxdist
.
get
(
i
);
j
<
vtxdist
.
get
(
i
+
1
);
j
++
)
{
gp
.
vertexById
(
j
).
template
get
<
nm_v
::
proc_id
>()
=
i
;
}
}
}
}
/* \brief Update main graph ad subgraph with the partition in partitions param and renumber graphs
*
* \param partitions array storing all the partitions
* \param gp main graph
* \param sub_g sub graph
* \param v_per_proc array needed to recontruct the main graph
* \param vtxdist array with the distribution of vertices through processors
* \param statuses array of statsu objects
* \param proc_id current processors rank
* \param Np total umber of processors
*/
void
updateGraphs
()
{
int
Np
=
v_cl
.
getProcessingUnits
();
int
p_id
=
v_cl
.
getProcessUnitID
();
//stats info
size_t
moved
=
0
;
// reset sub graph and local subgroph index
int
local_j
=
0
;
sub_g
.
clear
();
// Init n_vtxdist to gather informations about the new decomposition
openfpm
::
vector
<
idx_t
>
n_vtxdist
(
Np
+
1
);
for
(
int
i
=
0
;
i
<=
Np
;
i
++
)
n_vtxdist
.
get
(
i
)
=
0
;
// Update main graph with other partitions made by Parmetis in other processors and the local partition
for
(
int
i
=
0
;
i
<
Np
;
i
++
)
{
int
ndata
=
partitions
.
get
(
i
).
size
();
// Update the main graph with received informations
for
(
int
k
=
0
,
l
=
vtxdist
.
get
(
i
);
k
<
ndata
&&
l
<
vtxdist
.
get
(
i
+
1
);
k
++
,
l
++
)
{
// Create new n_vtxdist (1) (just count processors vertices)
n_vtxdist
.
get
(
partitions
.
get
(
i
).
get
(
k
)
+
1
)
++
;
if
(
gp
.
vertexById
(
l
).
template
get
<
nm_v
::
proc_id
>()
!=
partitions
.
get
(
i
).
get
(
k
))
moved
++
;
// Update proc id in the vertex
gp
.
vertexById
(
l
).
template
get
<
nm_v
::
proc_id
>()
=
partitions
.
get
(
i
).
get
(
k
);
gp
.
vertex
(
l
).
template
get
<
nm_v
::
global_id
>()
=
l
;
// Add vertex to temporary structure of distribution (needed to update main graph)
v_per_proc
.
get
(
partitions
.
get
(
i
).
get
(
k
)).
add
(
gp
.
getVertexOldId
(
l
));
// Add vertices belonging to this processor in sub graph
if
(
partitions
.
get
(
i
).
get
(
k
)
==
p_id
)
{
nm_v
pv
=
gp
.
vertexById
(
l
);
sub_g
.
addVertex
(
pv
);
// Add edges of vertex
for
(
size_t
s
=
0
;
s
<
gp
.
getNChildsByVertexId
(
l
);
s
++
)
{
nm_e
pe
=
gp
.
edge
(
l
+
s
);
sub_g
.
template
addEdge
<
NoCheck
>(
local_j
,
gp
.
getChildByVertexId
(
l
,
s
),
pe
);
}
local_j
++
;
}
}
}
// Create new n_vtxdist (2) (write boundaries)
for
(
int
i
=
2
;
i
<=
Np
;
i
++
)
{
n_vtxdist
.
get
(
i
)
+=
n_vtxdist
.
get
(
i
-
1
);
}
// Copy the new decomposition in the main vtxdist
for
(
int
i
=
0
;
i
<=
Np
;
i
++
)
{
vtxdist
.
get
(
i
)
=
n_vtxdist
.
get
(
i
);
}
// Renumbering subgraph
sub_g
.
reset_map_ids
();
for
(
size_t
j
=
vtxdist
.
get
(
p_id
),
i
=
0
;
j
<
vtxdist
.
get
(
p_id
+
1
);
j
++
,
i
++
)
{
sub_g
.
set_map_ids
(
j
,
sub_g
.
vertex
(
i
).
template
get
<
nm_v
::
global_id
>());
sub_g
.
vertex
(
i
).
template
get
<
nm_v
::
id
>()
=
j
;
}
// Renumbering main graph
for
(
size_t
p
=
0
;
p
<
Np
;
p
++
)
{
for
(
size_t
j
=
vtxdist
.
get
(
p
),
i
=
0
;
j
<
vtxdist
.
get
(
p
+
1
);
j
++
,
i
++
)
{
gp
.
set_map_ids
(
j
,
v_per_proc
.
get
(
p
).
get
(
i
));
gp
.
vertex
(
v_per_proc
.
get
(
p
).
get
(
i
)).
template
get
<
nm_v
::
id
>()
=
j
;
}
}
g_moved
+=
moved
;
if
(
moved
>
m_moved
)
m_moved
=
moved
;
}
/* ! \brief Calculate communication and migration costs
*
* \param gh_s ghost thickness
* \param ts how many timesteps have passed since last calculation, used to approximate the cost
*/
void
computeCommunicationAndMigrationCosts
(
float
gh_s
,
size_t
ts
)
{
size_t
p_id
=
v_cl
.
getProcessUnitID
();
float
migration
;
SpaceBox
<
dim
,
T
>
cellBox
=
cd
.
getCellBox
();
float
b_s
=
(
cellBox
.
getHigh
(
0
)
-
cellBox
.
getLow
(
0
));
// compute the gh_area for 2 dim case
float
gh_v
=
(
gh_s
*
b_s
);
// multiply for sub-sub-domain side for each domain
for
(
int
i
=
2
;
i
<
dim
;
i
++
)
gh_v
*=
b_s
;
size_t
norm
=
(
size_t
)
(
1.0
/
gh_v
);
migration
=
pow
(
b_s
,
dim
);
size_t
prev
=
0
;
for
(
size_t
i
=
0
;
i
<
gp
.
getNVertex
();
i
++
)
{
gp
.
vertex
(
i
).
template
get
<
nm_v
::
migration
>()
=
norm
*
migration
*
gp
.
vertex
(
i
).
template
get
<
nm_v
::
computation
>();
for
(
size_t
s
=
0
;
s
<
gp
.
getNChilds
(
i
);
s
++
)
{
gp
.
edge
(
prev
+
s
).
template
get
<
nm_e
::
communication
>()
=
1
*
gp
.
vertex
(
i
).
template
get
<
nm_v
::
computation
>()
*
ts
;
}
prev
+=
gp
.
getNChilds
(
i
);
}
}
// Heap memory receiver
HeapMemory
hp_recv
;
...
...
@@ -922,99 +726,6 @@ public:
CreateDecomposition
(
v_cl
);
}
/* ! \brief Refine current decomposition
*
* It makes a refinement of the current decomposition using Parmetis function RefineKWay
* After that it also does the remapping of the graph
*
*/
void
refine
()
{
size_t
Np
=
v_cl
.
getProcessingUnits
();
size_t
p_id
=
v_cl
.
getProcessUnitID
();
//0.01 and 1 must be given TODO
computeCommunicationAndMigrationCosts
(
0.01
,
n_ts
);
// Reset parmetis graph and reconstruct it
parmetis_graph
.
reset
(
gp
,
sub_g
);
// Refine
parmetis_graph
.
refine
<
nm_v
::
proc_id
>
(
vtxdist
,
sub_g
);
// Get result partition for this processor
idx_t
*
partition
=
parmetis_graph
.
getPartition
();
partitions
.
get
(
p_id
).
resize
(
sub_g
.
getNVertex
());
std
::
copy
(
partition
,
partition
+
sub_g
.
getNVertex
(),
&
partitions
.
get
(
p_id
).
get
(
0
));
// Reset data structure to keep trace of new vertices distribution in processors (needed to update main graph)
for
(
int
i
=
0
;
i
<
Np
;
++
i
)
{
v_per_proc
.
get
(
i
).
clear
();
}
openfpm
::
vector
<
size_t
>
prc
;
openfpm
::
vector
<
size_t
>
sz
;
openfpm
::
vector
<
void
*>
ptr
;
for
(
size_t
i
=
0
;
i
<
Np
;
i
++
)
{
if
(
i
!=
v_cl
.
getProcessUnitID
())
{
partitions
.
get
(
i
).
clear
();
prc
.
add
(
i
);
sz
.
add
(
sub_g
.
getNVertex
()
*
sizeof
(
idx_t
));
ptr
.
add
(
partitions
.
get
(
p_id
).
getPointer
());
}
}
// Exchange informations through processors
v_cl
.
sendrecvMultipleMessagesNBX
(
prc
.
size
(),
&
sz
.
get
(
0
),
&
prc
.
get
(
0
),
&
ptr
.
get
(
0
),
message_receive
,
&
partitions
,
NONE
);
// Update graphs with the new distributions
updateGraphs
();
}
/*! Function that gather times informations and decides if a rebalance is needed
* it uses the SAR heuristic
*
*/
bool
balanceNeeded
(
long
t
){
float
t_max
=
0
,
t_avg
=
0
;
// Exchange time informations through processors
v_cl
.
allGather
(
t
,
times
);
v_cl
.
execute
();
t_max
=
*
(
std
::
max_element
(
std
::
begin
(
times
),
std
::
end
(
times
)));
//if(v_cl.getProcessUnitID())
//std::cout << "tmax: " << t_max << "\n";
t_avg
=
std
::
accumulate
(
times
.
begin
(),
times
.
end
(),
0
)
/
v_cl
.
getProcessingUnits
();
//std::cout << "tavg: " << t_avg << "\n";
// add idle time to vector
i_times
.
add
(
t_max
-
t_avg
);
// Compute Wn
double
it_sum
=
*
(
std
::
max_element
(
std
::
begin
(
i_times
),
std
::
end
(
i_times
)));
float
nw_n
=
(
it_sum
+
c_c
)
/
n_ts
;
if
(
nw_n
>
w_n
){
i_times
.
clear
();
n_ts
=
1
;
w_n
=
nw_n
;
return
true
;
}
else
{
++
n_ts
;
w_n
=
nw_n
;
return
false
;
}
}
/*! \brief Get the number of local sub-domains
*
* \return the number of sub-domains
...
...
@@ -1157,61 +868,6 @@ public:
return
true
;
}
/* ! \brief function that return the position of the vertex in the space
*
* \param id vertex id
* \param pos vector tha t will contain x, y, z
*
*/
void
getVertexPosition
(
size_t
id
,
openfpm
::
vector
<
real_t
>
&
pos
)
{
pos
.
get
(
0
)
=
gp
.
vertex
(
id
).
template
get
<
nm_v
::
x
>();
pos
.
get
(
1
)
=
gp
.
vertex
(
id
).
template
get
<
nm_v
::
y
>();
if
(
dim
==
3
)
pos
.
get
(
2
)
=
gp
.
vertex
(
id
).
template
get
<
nm_v
::
z
>();
}
/* ! \brief function that set the weight of the vertex
*
* \param id vertex id
*
* \return vector with x, y, z
*
*/
void
setVertexWeight
(
size_t
id
,
size_t
weight
)
{
gp
.
vertex
(
id
).
template
get
<
nm_v
::
computation
>()
=
weight
;
}
/* ! \brief return number of moved vertices in all iterations so far
*
* \param id vertex id
*
* \return vector with x, y, z
*
*/
size_t
getTotalMovedV
()
{
return
g_moved
;
}
/* ! \brief return number of moved vertices in all iterations so far
*
* \param id vertex id
*
* \return vector with x, y, z
*
*/
size_t
getMaxMovedV
()
{
return
m_moved
;
}
void
debugPrint
()
{
std
::
cout
<<
"Subdomains
\n
"
;
...
...
vtk/Makefile.am
View file @
852762d0
...
...
@@ -2,17 +2,17 @@
LINKLIBS
=
$(METIS_LIB)
$(PTHREAD_LIBS)
$(OPT_LIBS)
$(BOOST_LDFLAGS)
$(BOOST_IOSTREAMS_LIB)
$(CUDA_LIBS)
noinst_PROGRAMS
=
cart_dec metis_dec dom_box
cart_dec_SOURCES
=
CartDecomposition_gen_vtk.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster.cpp ../openfpm_d
ata
/src/Memleak_check.cpp
cart_dec_SOURCES
=
CartDecomposition_gen_vtk.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster.cpp ../openfpm_d
evices
/src/Memleak_check.cpp
cart_dec_CXXFLAGS
=
$(CUDA_CFLAGS)
$(INCLUDES_PATH)
$(METIS_INCLUDE)
$(BOOST_CPPFLAGS)
-I
../src
-Wno-unused-function
cart_dec_CFLAGS
=
$(CUDA_CFLAGS)
cart_dec_LDADD
=
$(LINKLIBS)
-lmetis
metis_dec_SOURCES
=
Metis_gen_vtk.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster.cpp ../openfpm_d
ata
/src/Memleak_check.cpp
metis_dec_SOURCES
=
Metis_gen_vtk.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster.cpp ../openfpm_d
evices
/src/Memleak_check.cpp
metis_dec_CXXFLAGS
=
$(CUDA_CFLAGS)
$(INCLUDES_PATH)
$(METIS_INCLUDE)
$(BOOST_CPPFLAGS)
-I
../src
-Wno-unused-function
metis_dec_CFLAGS
=
$(CUDA_CFLAGS)
metis_dec_LDADD
=
$(LINKLIBS)
-lmetis
dom_box_SOURCES
=
domain_gen_vtk.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster.cpp ../openfpm_d
ata
/src/Memleak_check.cpp
dom_box_SOURCES
=
domain_gen_vtk.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster.cpp ../openfpm_d
evices
/src/Memleak_check.cpp
dom_box_CXXFLAGS
=
$(CUDA_CFLAGS)
$(INCLUDES_PATH)
$(METIS_INCLUDE)
$(BOOST_CPPFLAGS)
-I
../src
-Wno-unused-function
dom_box_CFLAGS
=
$(CUDA_CFLAGS)
dom_box_LDADD
=
$(LINKLIBS)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment