From aad5c2dd9ef562149d2cca46502748dc77627d02 Mon Sep 17 00:00:00 2001
From: Pietro Incardona <i-bird@localhost.localdomain>
Date: Thu, 19 Nov 2015 11:07:35 -0500
Subject: [PATCH] Start refactoization

---
 src/Decomposition/MetisDistribution.hpp    |  15 +
 src/Decomposition/ParMetisDistribution.hpp | 373 +++++++++++++++++++++
 2 files changed, 388 insertions(+)
 create mode 100644 src/Decomposition/MetisDistribution.hpp
 create mode 100644 src/Decomposition/ParMetisDistribution.hpp

diff --git a/src/Decomposition/MetisDistribution.hpp b/src/Decomposition/MetisDistribution.hpp
new file mode 100644
index 000000000..783a8e1bb
--- /dev/null
+++ b/src/Decomposition/MetisDistribution.hpp
@@ -0,0 +1,15 @@
+/*
+ * MetisDistribution.hpp
+ *
+ *  Created on: Nov 19, 2015
+ *      Author: i-bird
+ */
+
+#ifndef SRC_DECOMPOSITION_METISDISTRIBUTION_HPP_
+#define SRC_DECOMPOSITION_METISDISTRIBUTION_HPP_
+
+
+
+
+
+#endif /* SRC_DECOMPOSITION_METISDISTRIBUTION_HPP_ */
diff --git a/src/Decomposition/ParMetisDistribution.hpp b/src/Decomposition/ParMetisDistribution.hpp
new file mode 100644
index 000000000..b97ef34ef
--- /dev/null
+++ b/src/Decomposition/ParMetisDistribution.hpp
@@ -0,0 +1,373 @@
+/*
+ * ParMetisDistribution.hpp
+ *
+ *  Created on: Nov 19, 2015
+ *      Author: i-bird
+ */
+
+#ifndef SRC_DECOMPOSITION_PARMETISDISTRIBUTION_HPP_
+#define SRC_DECOMPOSITION_PARMETISDISTRIBUTION_HPP_
+
+template<unsigned int dim, typename T>
+class ParMetisDistribution
+{
+	//! Vcluster
+	Vcluster & v_cl;
+
+	//! Convert the graph to parmetis format
+	Parmetis<Graph_CSR<nm_v, nm_e>> parmetis_graph;
+
+	//! Processor sub-sub-domain graph
+	Graph_CSR<nm_v, nm_e> sub_g;
+
+	//! Global sub-sub-domain graph
+	Graph_CSR<nm_v, nm_e> gp;
+
+	//! Init vtxdist needed for Parmetis
+	openfpm::vector<idx_t> vtxdist;
+
+	//! partitions
+	openfpm::vector<openfpm::vector<idx_t>> partitions;
+
+	//! Init data structure to keep trace of new vertices distribution in processors (needed to update main graph)
+	openfpm::vector<openfpm::vector<size_t>> v_per_proc;
+
+	//! Number of moved vertices in all iterations
+	size_t g_moved = 0;
+
+	//! Max number of moved vertices in all iterations
+	size_t m_moved = 0;
+
+	//! Wn for SAR heuristic
+	float w_n = 0;
+
+	//! Computation cost for SAR heuristic
+	float c_c = 5;
+
+	//! Number of time-steps since the previous DLB
+	size_t n_ts = 1;
+
+	//! Idle time accumulated so far, needed for SAR heuristic
+	openfpm::vector<float> i_times;
+
+	// Vector to collect all timings
+	openfpm::vector<long> times;
+
+	/* \brief fill the graph of the processor with the first decomposition (linear)
+	 * Put vertices into processor graph (different for each processor)
+	 *
+	 * \param sub_g sub graph to fill
+	 * \param gp mai graph, source for the vertices
+	 * \param vtxdist array with the distribution of vertices through processors
+	 * \param proc_id rank of the processor
+	 * \param Np total number of processors
+	 */
+	void fillSubGraph()
+	{
+
+		int Np = v_cl.getProcessingUnits();
+		int p_id = v_cl.getProcessUnitID();
+
+		for (size_t j = vtxdist.get(p_id), local_j = 0; j < vtxdist.get(p_id + 1); j++, local_j++)
+		{
+			// Add vertex
+			nm_v pv = gp.vertexById(j);
+			sub_g.addVertex(pv);
+
+			// Add edges of vertex
+			for (size_t s = 0; s < gp.getNChilds(j); s++)
+			{
+				nm_e pe = gp.edge(j + s);
+				sub_g.template addEdge<NoCheck>(local_j, gp.getChild(j, s), pe);
+			}
+		}
+
+		// Just for output purpose
+		if (p_id == 0)
+		{
+			for (int i = 0; i < Np; i++)
+			{
+				for (size_t j = vtxdist.get(i); j < vtxdist.get(i + 1); j++)
+				{
+					gp.vertexById(j).template get<nm_v::proc_id>() = i;
+				}
+			}
+		}
+	}
+
+	/* \brief Update main graph ad subgraph with the partition in partitions param and renumber graphs
+	 *
+	 * \param partitions array storing all the partitions
+	 * \param gp main graph
+	 * \param sub_g sub graph
+	 * \param v_per_proc array needed to recontruct the main graph
+	 * \param vtxdist array with the distribution of vertices through processors
+	 * \param statuses array of statsu objects
+	 * \param proc_id current processors rank
+	 * \param Np total umber of processors
+	 */
+	void updateGraphs()
+	{
+
+		int Np = v_cl.getProcessingUnits();
+		int p_id = v_cl.getProcessUnitID();
+
+		//stats info
+		size_t moved = 0;
+
+		// reset sub graph and local subgroph index
+		int local_j = 0;
+		sub_g.clear();
+
+		// Init n_vtxdist to gather informations about the new decomposition
+		openfpm::vector < idx_t > n_vtxdist(Np + 1);
+		for (int i = 0; i <= Np; i++)
+			n_vtxdist.get(i) = 0;
+
+		// Update main graph with other partitions made by Parmetis in other processors and the local partition
+		for (int i = 0; i < Np; i++)
+		{
+
+			int ndata = partitions.get(i).size();
+
+			// Update the main graph with received informations
+			for (int k = 0, l = vtxdist.get(i); k < ndata && l < vtxdist.get(i + 1); k++, l++)
+			{
+
+				// Create new n_vtxdist (1) (just count processors vertices)
+				n_vtxdist.get(partitions.get(i).get(k) + 1)++;
+
+				if
+(				gp.vertexById(l).template get<nm_v::proc_id>() != partitions.get(i).get(k))
+				moved++;
+
+				// Update proc id in the vertex
+				gp.vertexById(l).template get<nm_v::proc_id>() = partitions.get(i).get(k);
+				gp.vertex(l).template get<nm_v::global_id>() = l;
+
+				// Add vertex to temporary structure of distribution (needed to update main graph)
+				v_per_proc.get(partitions.get(i).get(k)).add(gp.getVertexOldId(l));
+
+				// Add vertices belonging to this processor in sub graph
+				if (partitions.get(i).get(k) == p_id)
+				{
+
+					nm_v pv = gp.vertexById(l);
+					sub_g.addVertex(pv);
+
+					// Add edges of vertex
+					for (size_t s = 0; s < gp.getNChildsByVertexId(l); s++)
+					{
+						nm_e pe = gp.edge(l + s);
+						sub_g.template addEdge<NoCheck>(local_j, gp.getChildByVertexId(l, s), pe);
+					}
+
+					local_j++;
+				}
+			}
+		}
+
+		// Create new n_vtxdist (2) (write boundaries)
+		for (int i = 2; i <= Np; i++)
+		{
+			n_vtxdist.get(i) += n_vtxdist.get(i - 1);
+		}
+
+		// Copy the new decomposition in the main vtxdist
+		for (int i = 0; i <= Np; i++)
+		{
+			vtxdist.get(i) = n_vtxdist.get(i);
+		}
+
+		// Renumbering subgraph
+		sub_g.reset_map_ids();
+		for (size_t j = vtxdist.get(p_id), i = 0; j < vtxdist.get(p_id + 1); j++, i++)
+		{
+			sub_g.set_map_ids(j, sub_g.vertex(i).template get<nm_v::global_id>());
+			sub_g.vertex(i).template get<nm_v::id>() = j;
+		}
+
+		// Renumbering main graph
+		for (size_t p = 0; p < Np; p++)
+		{
+			for (size_t j = vtxdist.get(p), i = 0; j < vtxdist.get(p + 1); j++, i++)
+			{
+				gp.set_map_ids(j, v_per_proc.get(p).get(i));
+				gp.vertex(v_per_proc.get(p).get(i)).template get<nm_v::id>() = j;
+			}
+		}
+
+		g_moved += moved;
+
+		if (moved > m_moved)
+			m_moved = moved;
+
+	}
+
+	static void * message_receive(size_t msg_i, size_t total_msg, size_t total_p, size_t i, size_t ri, void * ptr)
+	{
+		openfpm::vector < openfpm::vector < idx_t >> *v = static_cast<openfpm::vector<openfpm::vector<idx_t>> *>(ptr);
+
+		v->get(i).resize(msg_i / sizeof(idx_t));
+
+		return &(v->get(i).get(0));
+	}
+
+public:
+
+	//! constructor
+	ParMetisDistribution(Vcluster & v_cl)
+	:v_cl(v_cl)
+	{}
+
+	/* ! \brief Refine current decomposition
+	 *
+	 * It makes a refinement of the current decomposition using Parmetis function RefineKWay
+	 * After that it also does the remapping of the graph
+	 *
+	 */
+	void refine()
+	{
+		size_t Np = v_cl.getProcessingUnits();
+		size_t p_id = v_cl.getProcessUnitID();
+
+		//0.01 and 1 must be given TODO
+		computeCommunicationAndMigrationCosts(0.01, n_ts);
+
+		// Reset parmetis graph and reconstruct it
+		parmetis_graph.reset(gp, sub_g);
+
+		// Refine
+		parmetis_graph.refine<nm_v::proc_id>(vtxdist, sub_g);
+
+		// Get result partition for this processor
+		idx_t * partition = parmetis_graph.getPartition();
+
+		partitions.get(p_id).resize(sub_g.getNVertex());
+		std::copy(partition, partition + sub_g.getNVertex(), &partitions.get(p_id).get(0));
+
+		// Reset data structure to keep trace of new vertices distribution in processors (needed to update main graph)
+		for (int i = 0; i < Np; ++i)
+		{
+			v_per_proc.get(i).clear();
+		}
+
+		openfpm::vector < size_t > prc;
+		openfpm::vector < size_t > sz;
+		openfpm::vector<void *> ptr;
+
+		for (size_t i = 0; i < Np; i++)
+		{
+			if (i != v_cl.getProcessUnitID())
+			{
+				partitions.get(i).clear();
+				prc.add(i);
+				sz.add(sub_g.getNVertex() * sizeof(idx_t));
+				ptr.add(partitions.get(p_id).getPointer());
+			}
+		}
+
+		// Exchange informations through processors
+		v_cl.sendrecvMultipleMessagesNBX(prc.size(), &sz.get(0), &prc.get(0), &ptr.get(0), message_receive, &partitions,
+				NONE);
+
+		// Update graphs with the new distributions
+		updateGraphs();
+	}
+
+	/*! \brief Function that gather times informations and decides if a rebalance is needed it uses the SAR heuristic
+	 *
+	 * \param t
+	 *
+	 */
+	bool balanceNeeded(long t)
+	{
+		float t_max = 0, t_avg = 0;
+
+		// Exchange time informations through processors
+		v_cl.allGather(t, times);
+		v_cl.execute();
+
+		t_max = *(std::max_element(std::begin(times), std::end(times)));
+		//if(v_cl.getProcessUnitID())
+			//std::cout << "tmax: " << t_max << "\n";
+
+		t_avg = std::accumulate(times.begin(), times.end(), 0) / v_cl.getProcessingUnits();
+		//std::cout << "tavg: " << t_avg << "\n";
+
+		// add idle time to vector
+		i_times.add(t_max - t_avg);
+
+		// Compute Wn
+		double it_sum = *(std::max_element(std::begin(i_times), std::end(i_times)));
+		float nw_n = (it_sum + c_c) / n_ts;
+
+		if(nw_n > w_n){
+			i_times.clear();
+			n_ts = 1;
+			w_n = nw_n;
+			return true;
+		}else{
+			++n_ts;
+			w_n = nw_n;
+			return false;
+		}
+	}
+
+	/* ! \brief function that return the position of the vertex in the space
+	 *
+	 * \param id vertex id
+	 * \param pos vector that will contain x, y, z
+	 *
+	 */
+	void getVertexPosition(size_t id, openfpm::vector<real_t> &pos)
+	{
+		pos.get(0) = gp.vertex(id).template get<nm_v::x>();
+		pos.get(1) = gp.vertex(id).template get<nm_v::y>();
+
+		if (dim == 3)
+			pos.get(2) = gp.vertex(id).template get<nm_v::z>();
+	}
+
+	/* ! \brief function that set the weight of the vertex
+	 *
+	 * \param id vertex id
+	 *
+	 * \return vector with x, y, z
+	 *
+	 */
+	void setVertexWeight(size_t id, size_t weight)
+	{
+		gp.vertex(id).template get<nm_v::computation>() = weight;
+	}
+
+	/* ! \brief return number of moved vertices in all iterations so far
+	 *
+	 * \param id vertex id
+	 *
+	 * \return vector with x, y, z
+	 *
+	 */
+	size_t getTotalMovedV()
+	{
+
+		return g_moved;
+
+	}
+
+	/* ! \brief return number of moved vertices in all iterations so far
+	 *
+	 * \param id vertex id
+	 *
+	 * \return vector with x, y, z
+	 *
+	 */
+	size_t getMaxMovedV()
+	{
+
+		return m_moved;
+
+	}
+};
+
+#endif /* SRC_DECOMPOSITION_PARMETISDISTRIBUTION_HPP_ */
-- 
GitLab