CartDecomposition.hpp 40.6 KB
Newer Older
incardon's avatar
incardon committed
1 2 3 4 5 6 7 8 9 10 11 12
/*
 * CartDecomposition.hpp
 *
 *  Created on: Aug 15, 2014
 *      Author: Pietro Incardona
 */

#ifndef CARTDECOMPOSITION_HPP
#define CARTDECOMPOSITION_HPP

#include "config.h"
#include "Decomposition.hpp"
incardon's avatar
incardon committed
13
#include "Vector/map_vector.hpp"
incardon's avatar
incardon committed
14 15 16 17 18 19 20
#include <vector>
#include "global_const.hpp"
#include <initializer_list>
#include "SubdomainGraphNodes.hpp"
#include "metis_util.hpp"
#include "dec_optimizer.hpp"
#include "Space/Shape/Box.hpp"
incardon's avatar
incardon committed
21
#include "Space/Shape/Point.hpp"
incardon's avatar
incardon committed
22
#include "NN/CellList/CellDecomposer.hpp"
incardon's avatar
incardon committed
23 24
#include <unordered_map>
#include "NN/CellList/CellList.hpp"
incardon's avatar
incardon committed
25
#include "Space/Ghost.hpp"
incardon's avatar
incardon committed
26 27
#include "common.hpp"
#include "ie_loc_ghost.hpp"
28 29
#include "ie_ghost.hpp"
#include "nn_processor.hpp"
incardon's avatar
incardon committed
30 31 32 33 34 35 36 37 38 39

/**
 * \brief This class decompose a space into subspaces
 *
 * \tparam dim is the dimensionality of the physical domain we are going to decompose.
 * \tparam T type of the space we decompose, Real, Integer, Complex ...
 * \tparam layout to use
 * \tparam Memory Memory factory used to allocate memory
 * \tparam Domain Structure that contain the information of your physical domain
 *
40 41
 * Given an N-dimensional space, this class decompose the space into a Cartesian grid of small
 * sub-sub-domain. At each sub-sub-domain is assigned  an id that identify which processor is
incardon's avatar
incardon committed
42
 * going to take care of that part of space (in general the space assigned to a processor is
43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61
 * simply connected), a second step merge several sub-sub-domain with same id into bigger region
 *  sub-domain with the id. Each sub-domain has an extended space called ghost part
 *
 * Assuming that VCluster.getProcessUnitID(), equivalent to the MPI processor rank, return the processor local
 * processor id, we define
 *
 * * local sub-domain: all the sub-domain with id == local processor
 * * external ghost box: (or ghost box) are the boxes that compose the ghost space of the processor, or the
 *   boxes produced expanding every local sub-domain by the ghost extension and intersecting with the sub-domain
 *   of the other processors
 * * Near processors are the processors adjacent to the local processor, where with adjacent we mean all the processor
 *   that has a non-zero intersection with the ghost part of the local processor, or all the processors that
 *   produce non-zero external boxes with the local processor, or all the processor that should communicate
 *   in case of ghost data synchronization
 * * internal ghost box: is the part of ghost of the near processor that intersect the space of the
 *       processor, or the boxes produced expanding the sub-domain of the near processors with the local sub-domain
 * * Near processor sub-domain: is a sub-domain that live in the a near (or contiguous) processor
 * * Near processor list: the list of all the near processor of the local processor (each processor has a list
 *                        of the near processor)
incardon's avatar
incardon committed
62
 * * Local ghosts interal or external are all the ghosts that does not involve inter-processor communications
63 64
 *
 * \see calculateGhostBoxes() for a visualization of internal and external ghost boxes
incardon's avatar
incardon committed
65
 *
incardon's avatar
incardon committed
66 67
 */

incardon's avatar
incardon committed
68
template<unsigned int dim, typename T, template<typename> class device_l=openfpm::device_cpu, typename Memory=HeapMemory, template<unsigned int, typename> class Domain=Box>
69
class CartDecomposition : public ie_loc_ghost<dim,T>, public nn_prcs<dim,T>
incardon's avatar
incardon committed
70
{
incardon's avatar
incardon committed
71

incardon's avatar
incardon committed
72
public:
incardon's avatar
incardon committed
73

incardon's avatar
incardon committed
74 75 76 77 78 79 80 81
	//! Type of the domain we are going to decompose
	typedef T domain_type;

	//! It simplify to access the SpaceBox element
	typedef SpaceBox<dim,T> Box;

private:

incardon's avatar
incardon committed
82
	//! This is the key type to access  data_s, for example in the case of vector
incardon's avatar
incardon committed
83
	//! acc_key is size_t
incardon's avatar
incardon committed
84
	typedef typename openfpm::vector<SpaceBox<dim,T>,device_l<SpaceBox<dim,T>>,Memory,openfpm::vector_grow_policy_default,openfpm::vect_isel<SpaceBox<dim,T>>::value >::access_key acc_key;
incardon's avatar
incardon committed
85 86

	//! the set of all local sub-domain as vector
incardon's avatar
incardon committed
87
	openfpm::vector<SpaceBox<dim,T>> sub_domains;
incardon's avatar
incardon committed
88

89
	//! for each sub-domain (first vector), contain the list (nested vector) of the neighborhood processors
incardon's avatar
incardon committed
90
	//! and for each processor contain the boxes calculated from the intersection
91 92
	//! of the sub-domains + ghost with the near-by processor sub-domain () and the other way around
	//! \see calculateGhostBoxes
93
	openfpm::vector< openfpm::vector< Box_proc<dim,T> > > box_nn_processor_int;
incardon's avatar
incardon committed
94

95
	//! It store the same information of box_nn_processor_int organized by processor id
96
	openfpm::vector< Box_dom<dim,T> > proc_int_box;
97

incardon's avatar
incardon committed
98
	//! for each sub-domain, contain the list of the neighborhood processors
incardon's avatar
incardon committed
99 100
	openfpm::vector<openfpm::vector<long unsigned int> > box_nn_processor;

incardon's avatar
incardon committed
101
	//! Structure that contain for each sub-sub-domain box the processor id
incardon's avatar
Add ORB  
incardon committed
102
	//! exist for efficient global communication
incardon's avatar
incardon committed
103 104
	openfpm::vector<size_t> fine_s;

incardon's avatar
incardon committed
105 106
	//! Structure that store the cartesian grid information
	grid_sm<dim,void> gr;
incardon's avatar
incardon committed
107

incardon's avatar
incardon committed
108 109 110
	//! Structure that decompose your structure into cell without creating them
	//! useful to convert positions to CellId or sub-domain id in this case
	CellDecomposer_sm<dim,T> cd;
incardon's avatar
incardon committed
111 112 113 114 115 116 117 118 119 120

	//! rectangular domain to decompose
	Domain<dim,T> domain;

	//! Box Spacing
	T spacing[dim];

	//! Runtime virtual cluster machine
	Vcluster & v_cl;

incardon's avatar
incardon committed
121
	//! Cell-list that store the geometrical information of the internal ghost boxes
incardon's avatar
incardon committed
122 123
	CellList<dim,T,FAST> geo_cell;

incardon's avatar
incardon committed
124 125 126
	//! Cell-list that store the geometrical information of the local internal ghost boxes
	CellList<dim,T,FAST> lgeo_cell;

incardon's avatar
incardon committed
127

incardon's avatar
incardon committed
128 129 130 131 132 133 134 135 136 137 138 139 140 141 142
	/*! \brief Create internally the decomposition
	 *
     * \param v_cl Virtual cluster, used internally to handle or pipeline communication
	 *
	 */
	void CreateDecomposition(Vcluster & v_cl)
	{
		// Calculate the total number of box and and the spacing
		// on each direction
		// Get the box containing the domain
		SpaceBox<dim,T> bs = domain.getBox();

		for (unsigned int i = 0; i < dim ; i++)
		{
			// Calculate the spacing
incardon's avatar
incardon committed
143
			spacing[i] = (bs.getHigh(i) - bs.getLow(i)) / gr.size(i);
incardon's avatar
incardon committed
144 145 146 147
		}

		// Here we use METIS
		// Create a cartesian grid graph
incardon's avatar
incardon committed
148
		CartesianGraphFactory<dim,Graph_CSR<nm_part_v,nm_part_e>> g_factory_part;
incardon's avatar
incardon committed
149 150

		// Processor graph
incardon's avatar
incardon committed
151
		Graph_CSR<nm_part_v,nm_part_e> gp = g_factory_part.template construct<NO_EDGE,T,dim-1>(gr.getSize(),domain);
incardon's avatar
incardon committed
152 153 154 155 156 157 158 159

		// Get the number of processing units
		size_t Np = v_cl.getProcessingUnits();

		// Get the processor id
		long int p_id = v_cl.getProcessUnitID();

		// Convert the graph to metis
incardon's avatar
incardon committed
160
		Metis<Graph_CSR<nm_part_v,nm_part_e>> met(gp,Np);
incardon's avatar
incardon committed
161 162

		// decompose
incardon's avatar
incardon committed
163
		met.decompose<nm_part_v::id>();
incardon's avatar
incardon committed
164

incardon's avatar
Add ORB  
incardon committed
165
		// fill the structure that store the processor id for each sub-domain
incardon's avatar
incardon committed
166
		fine_s.resize(gr.size());
incardon's avatar
Add ORB  
incardon committed
167

incardon's avatar
incardon committed
168 169
		// Optimize the decomposition creating bigger spaces
		// And reducing Ghost over-stress
incardon's avatar
incardon committed
170
		dec_optimizer<dim,Graph_CSR<nm_part_v,nm_part_e>> d_o(gp,gr.getSize());
incardon's avatar
incardon committed
171 172 173 174

		// set of Boxes produced by the decomposition optimizer
		openfpm::vector<::Box<dim,size_t>> loc_box;

incardon's avatar
incardon committed
175
		// optimize the decomposition
incardon's avatar
incardon committed
176 177
		d_o.template optimize<nm_part_v::sub_id,nm_part_v::id>(gp,p_id,loc_box,box_nn_processor);

incardon's avatar
incardon committed
178 179 180
		// Initialize ss_box and bbox
		if (loc_box.size() >= 0)
		{
181 182
			SpaceBox<dim,size_t> sub_dc = loc_box.get(0);
			SpaceBox<dim,T> sub_d(sub_dc);
incardon's avatar
incardon committed
183 184 185
			sub_d.mul(spacing);
			sub_d.expand(spacing);

186 187 188 189 190 191 192 193 194 195 196 197 198
			// Fixing sub-domains to cover all the domain

			// Fixing sub_d
			// if (loc_box) is a the boundary we have to ensure that the box span the full
			// domain (avoiding rounding off error)
			for (size_t i = 0 ; i < dim ; i++)
			{
				if (sub_dc.getHigh(i) == cd.getGrid().size(i) - 1)
				{
					sub_d.setHigh(i,domain.getHigh(i));
				}
			}

incardon's avatar
incardon committed
199 200 201 202
			// add the sub-domain
			sub_domains.add(sub_d);

			ss_box = sub_d;
incardon's avatar
incardon committed
203
			ss_box -= ss_box.getP1();
incardon's avatar
incardon committed
204 205 206
			bbox = sub_d;
		}

incardon's avatar
incardon committed
207
		// convert into sub-domain
incardon's avatar
incardon committed
208
		for (size_t s = 1 ; s < loc_box.size() ; s++)
incardon's avatar
incardon committed
209
		{
210 211
			SpaceBox<dim,size_t> sub_dc = loc_box.get(s);
			SpaceBox<dim,T> sub_d(sub_dc);
incardon's avatar
incardon committed
212

incardon's avatar
incardon committed
213 214 215
			// re-scale and add spacing (the end is the starting point of the next domain + spacing)
			sub_d.mul(spacing);
			sub_d.expand(spacing);
incardon's avatar
incardon committed
216

217 218 219 220 221 222 223 224 225 226 227 228 229
			// Fixing sub-domains to cover all the domain

			// Fixing sub_d
			// if (loc_box) is a the boundary we have to ensure that the box span the full
			// domain (avoiding rounding off error)
			for (size_t i = 0 ; i < dim ; i++)
			{
				if (sub_dc.getHigh(i) == cd.getGrid().size(i) - 1)
				{
					sub_d.setHigh(i,domain.getHigh(i));
				}
			}

incardon's avatar
incardon committed
230 231
			// add the sub-domain
			sub_domains.add(sub_d);
incardon's avatar
incardon committed
232 233 234 235 236 237

			// Calculate the bound box
			bbox.enclose(sub_d);

			// Create the smallest box contained in all sub-domain
			ss_box.contained(sub_d);
incardon's avatar
incardon committed
238
		}
incardon's avatar
incardon committed
239

240 241
		nn_prcs<dim,T>::create(box_nn_processor, sub_domains);

incardon's avatar
incardon committed
242
		// fill fine_s structure
incardon's avatar
incardon committed
243 244 245
		// fine_s structure contain the processor id for each sub-sub-domain
		// with sub-sub-domain we mean the sub-domain decomposition before
		// running dec_optimizer (before merging sub-domains)
incardon's avatar
incardon committed
246 247 248 249 250 251 252 253 254 255 256
		auto it = gp.getVertexIterator();

		while (it.isNext())
		{
			size_t key = it.get();

			// fill with the fine decomposition
			fine_s.get(key) = gp.template vertex_p<nm_part_v::id>(key);

			++it;
		}
257 258 259 260 261 262

		// Get the smallest sub-division on each direction
		::Box<dim,T> unit = getSmallestSubdivision();
		// Get the processor bounding Box
		::Box<dim,T> bound = getProcessorBounds();

incardon's avatar
incardon committed
263
		// calculate the sub-divisions
264 265
		size_t div[dim];
		for (size_t i = 0 ; i < dim ; i++)
incardon's avatar
incardon committed
266
			div[i] = (size_t)((bound.getHigh(i) - bound.getLow(i)) / unit.getHigh(i));
267 268 269 270 271 272 273 274 275 276

		// Create shift
		Point<dim,T> orig;

		// p1 point of the Processor bound box is the shift
		for (size_t i = 0 ; i < dim ; i++)
			orig.get(i) = bound.getLow(i);

		// Initialize the geo_cell structure
		geo_cell.Initialize(domain,div,orig);
incardon's avatar
incardon committed
277
		lgeo_cell.Initialize(domain,div,orig);
incardon's avatar
incardon committed
278 279
	}

incardon's avatar
incardon committed
280 281 282
	// Save the ghost boundaries
	Ghost<dim,T> ghost;

incardon's avatar
incardon committed
283

incardon's avatar
incardon committed
284 285 286 287 288 289 290 291 292
	/*! \brief Create the subspaces that decompose your domain
	 *
	 * Create the subspaces that decompose your domain
	 *
	 */

	void CreateSubspaces()
	{
		// Create a grid where each point is a space
293
		grid_sm<dim,void> g(div);
incardon's avatar
incardon committed
294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321

		// create a grid_key_dx iterator
		grid_key_dx_iterator<dim> gk_it(g);

		// Divide the space into subspaces
		while (gk_it.isNext())
		{
			//! iterate through all subspaces
			grid_key_dx<dim> key = gk_it.get();

			//! Create a new subspace
			SpaceBox<dim,T> tmp;

			//! fill with the Margin of the box
			for (int i = 0 ; i < dim ; i++)
			{
				tmp.setHigh(i,(key.get(i)+1)*spacing[i]);
				tmp.setLow(i,key.get(i)*spacing[i]);
			}

			//! add the space box
			sub_domains.add(tmp);

			// add the iterator
			++gk_it;
		}
	}

322 323 324 325 326 327 328 329 330 331 332 333 334 335
	/*! \brief Create the box_nn_processor_int (bx part)  structure
	 *
	 * This structure store for each sub-domain of this processors enlarged by the ghost size the boxes that
	 *  come from the intersection with the near processors sub-domains (External ghost box)
	 *
	 * \param ghost margins
	 *
	 * \note Are the G8_0 G9_0 G9_1 G5_0 boxes in calculateGhostBoxes
	 * \see calculateGhostBoxes
	 *
	 */
	void create_box_nn_processor_ext(Ghost<dim,T> & ghost)
	{
		box_nn_processor_int.resize(sub_domains.size());
336
		proc_int_box.resize(nn_prcs<dim,T>::getNNProcessors());
337 338 339 340 341 342 343 344 345

		// For each sub-domain
		for (size_t i = 0 ; i < sub_domains.size() ; i++)
		{
			SpaceBox<dim,T> sub_with_ghost = sub_domains.get(i);

			// enlarge the sub-domain with the ghost
			sub_with_ghost.enlarge(ghost);

incardon's avatar
incardon committed
346
			// resize based on the number of adjacent processors
347 348
			box_nn_processor_int.get(i).resize(box_nn_processor.get(i).size());

incardon's avatar
incardon committed
349
			// For each processor adjacent to this sub-domain
350 351 352 353 354
			for (size_t j = 0 ; j < box_nn_processor.get(i).size() ; j++)
			{
				// Contiguous processor
				size_t p_id = box_nn_processor.get(i).get(j);

355
				// store the box in proc_int_box storing from which sub-domain they come from
356
				Box_dom<dim,T> & proc_int_box_g = proc_int_box.get(nn_prcs<dim,T>::ProctoID(p_id));
357

incardon's avatar
incardon committed
358
				// get the set of sub-domains of the adjacent processor p_id
359
				const openfpm::vector< ::Box<dim,T> > & nn_processor_subdomains_g = nn_prcs<dim,T>::getAdjacentSubdomain(p_id);
360 361

				// near processor sub-domain intersections
362
				openfpm::vector< ::Box<dim,T> > & box_nn_processor_int_gg = box_nn_processor_int.get(i).get(j).bx;
363 364

				// for each near processor sub-domain intersect with the enlarged local sub-domain and store it
365
				for (size_t b = 0 ; b < nn_processor_subdomains_g.size() ; b++)
366 367 368
				{
					::Box<dim,T> bi;

369
					bool intersect = sub_with_ghost.Intersect(::Box<dim,T>(nn_processor_subdomains_g.get(b)),bi);
370 371 372 373 374 375 376

					if (intersect == true)
					{
						struct p_box pb;

						pb.box = bi;
						pb.proc = p_id;
377
						pb.lc_proc = nn_prcs<dim,T>::ProctoID(p_id);
378

379 380 381 382 383 384 385 386 387 388
						//
						// Updating
						//
						// vb_ext
						// box_nn_processor_int
						// proc_int_box
						//
						// They all store the same information but organized in different ways
						// read the description of each for more information
						//
389
						vb_ext.add(pb);
390 391
						box_nn_processor_int_gg.add(bi);
						proc_int_box_g.ebx.add();
incardon's avatar
incardon committed
392 393
						proc_int_box_g.ebx.last() = bi;
						proc_int_box_g.ebx.last().sub = i;
394 395 396

						// Search for the correct id
						size_t k = 0;
397 398 399
						size_t p_idp = nn_prcs<dim,T>::ProctoID(p_id);

						for (k = 0 ; k < nn_prcs<dim,T>::getInternalAdjSubdomain(p_idp).size() ; k++)
400
						{
401
							if (nn_prcs<dim,T>::getInternalAdjSubdomain(p_idp).get(k) == i)
402 403
								break;
						}
404
						if (k == nn_prcs<dim,T>::getInternalAdjSubdomain(p_idp).size())
405 406 407
							std::cerr << "Error: " << __FILE__ << ":" << __LINE__ << " sub-domain not found\n";

						proc_int_box_g.ebx.last().id = (k * nn_processor_subdomains_g.size() + b) * v_cl.getProcessingUnits() + p_id;
408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428
					}
				}
			}
		}
	}

	/*! \brief Create the box_nn_processor_int (nbx part) structure, the geo_cell list and proc_int_box
	 *
	 * This structure store for each sub-domain of this processors the boxes that come from the intersection
	 * of the near processors sub-domains enlarged by the ghost size (Internal ghost box). These boxes
	 * fill a geometrical cell list. The proc_int_box store the same information ordered by near processors
	 *
	 * \param ghost margins
	 *
	 * \note Are the B8_0 B9_0 B9_1 B5_0 boxes in calculateGhostBoxes
	 * \see calculateGhostBoxes
	 *
	 */
	void create_box_nn_processor_int(Ghost<dim,T> & ghost)
	{
		box_nn_processor_int.resize(sub_domains.size());
429
		proc_int_box.resize(nn_prcs<dim,T>::getNNProcessors());
430 431 432 433 434 435 436 437 438 439 440

		// For each sub-domain
		for (size_t i = 0 ; i < sub_domains.size() ; i++)
		{
			// For each processor contiguous to this sub-domain
			for (size_t j = 0 ; j < box_nn_processor.get(i).size() ; j++)
			{
				// Contiguous processor
				size_t p_id = box_nn_processor.get(i).get(j);

				// get the set of sub-domains of the contiguous processor p_id
441
				const openfpm::vector< ::Box<dim,T> > & nn_p_box = nn_prcs<dim,T>::getAdjacentSubdomain(p_id);
442 443

				// get the local processor id
444
				size_t lc_proc = nn_prcs<dim,T>::getAdjacentProcessor(p_id)/*nn_processor_subdomains[p_id].id*/;
445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465

				// For each near processor sub-domains enlarge and intersect with the local sub-domain and store the result
				for (size_t k = 0 ; k < nn_p_box.size() ; k++)
				{

					// enlarge the near-processor sub-domain
					::Box<dim,T> n_sub = nn_p_box.get(k);

					// local sub-domain
					::SpaceBox<dim,T> l_sub = sub_domains.get(i);

					// Create a margin of ghost size around the near processor sub-domain
					n_sub.enlarge(ghost);

					// Intersect with the local sub-domain
					p_box b_int;
					bool intersect = n_sub.Intersect(l_sub,b_int.box);

					// store if it intersect
					if (intersect == true)
					{
466
						// the box fill with the processor id
467 468 469 470 471
						b_int.proc = p_id;

						// fill the local processor id
						b_int.lc_proc = lc_proc;

472 473 474 475 476 477 478 479 480 481 482 483
						//
						// Updating
						//
						// vb_int
						// box_nn_processor_int
						// proc_int_box
						//
						// They all store the same information but organized in different ways
						// read the description of each for more information
						//

						// add the box to the near processor sub-domain intersections
484 485 486 487
						openfpm::vector< ::Box<dim,T> > & p_box_int = box_nn_processor_int.get(i).get(j).nbx;
						p_box_int.add(b_int.box);
						vb_int.add(b_int);

488
						// store the box in proc_int_box storing from which sub-domain they come from
489
						Box_dom<dim,T> & pr_box_int = proc_int_box.get(nn_prcs<dim,T>::ProctoID(p_id));
incardon's avatar
incardon committed
490
						Box_sub<dim,T> sb;
incardon's avatar
incardon committed
491
						sb = b_int.box;
492
						sb.sub = i;
incardon's avatar
incardon committed
493

494 495
						// Search for the correct id
						size_t s = 0;
496 497
						size_t p_idp = nn_prcs<dim,T>::ProctoID(p_id);
						for (s = 0 ; s < nn_prcs<dim,T>::getInternalAdjSubdomain(p_idp).size() ; s++)
498
						{
499
							if (nn_prcs<dim,T>::getInternalAdjSubdomain(p_idp).get(s) == i)
500 501
								break;
						}
502
						if (s == nn_prcs<dim,T>::getInternalAdjSubdomain(p_idp).size())
503 504
							std::cerr << "Error: " << __FILE__ << ":" << __LINE__ << " sub-domain not found\n";

505
						sb.id = (k * nn_prcs<dim,T>::getInternalAdjSubdomain(p_idp).size() + s) * v_cl.getProcessingUnits() + v_cl.getProcessUnitID();
506

507
						pr_box_int.ibx.add(sb);
508 509 510

						// update the geo_cell list

incardon's avatar
incardon committed
511
						// get the cells this box span
512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531
						const grid_key_dx<dim> p1 = geo_cell.getCellGrid(b_int.box.getP1());
						const grid_key_dx<dim> p2 = geo_cell.getCellGrid(b_int.box.getP2());

						// Get the grid and the sub-iterator
						auto & gi = geo_cell.getGrid();
						grid_key_dx_iterator_sub<dim> g_sub(gi,p1,p2);

						// add the box-id to the cell list
						while (g_sub.isNext())
						{
							auto key = g_sub.get();
							geo_cell.addCell(gi.LinId(key),vb_int.size()-1);
							++g_sub;
						}
					}
				}
			}
		}
	}

incardon's avatar
incardon committed
532 533 534 535 536 537 538 539 540 541 542 543 544 545 546
	// Heap memory receiver
	HeapMemory hp_recv;

	// vector v_proc
	openfpm::vector<size_t> v_proc;

	// Receive counter
	size_t recv_cnt;

	/*! \brief Message allocation
	 *
	 * \param message size required to receive from i
	 * \param total message size to receive from all the processors
	 * \param the total number of processor want to communicate with you
	 * \param i processor id
incardon's avatar
incardon committed
547 548
	 * \param ri request id (it is an id that goes from 0 to total_p, and is unique
	 *           every time message_alloc is called)
incardon's avatar
incardon committed
549 550 551 552 553
	 * \param ptr a pointer to the vector_dist structure
	 *
	 * \return the pointer where to store the message
	 *
	 */
incardon's avatar
incardon committed
554
	static void * message_alloc(size_t msg_i ,size_t total_msg, size_t total_p, size_t i, size_t ri, void * ptr)
incardon's avatar
incardon committed
555 556
	{
		// cast the pointer
incardon's avatar
incardon committed
557
		CartDecomposition<dim,T,device_l,Memory,Domain> * cd = static_cast< CartDecomposition<dim,T,device_l,Memory,Domain> *>(ptr);
incardon's avatar
incardon committed
558 559

		// Resize the memory
560
		cd->nn_processor_subdomains[i].bx.resize(msg_i / sizeof(::Box<dim,T>) );
incardon's avatar
incardon committed
561 562

		// Return the receive pointer
incardon's avatar
incardon committed
563
		return cd->nn_processor_subdomains[i].bx.getPointer();
incardon's avatar
incardon committed
564 565
	}

incardon's avatar
incardon committed
566 567 568 569 570 571 572 573
public:

	/*! \brief Cartesian decomposition constructor
	 *
     * \param v_cl Virtual cluster, used internally to handle or pipeline communication
	 *
	 */
	CartDecomposition(Vcluster & v_cl)
574
	:nn_prcs<dim,T>(v_cl),v_cl(v_cl)
incardon's avatar
incardon committed
575 576 577 578
	{
		// Reset the box to zero
		bbox.zero();
	}
incardon's avatar
incardon committed
579 580 581 582 583

	//! Cartesian decomposition destructor
	~CartDecomposition()
	{}

584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605
	// It store all the boxes of the near processors in a linear array
	struct p_box
	{
		//! Box that identify the intersection of the ghost of the near processor with the
		//! processor sub-domain
		::Box<dim,T> box;
		//! local processor id
		size_t lc_proc;
		//! processor id
		size_t proc;

		/*! \brief Check if two p_box are the same
		 *
		 * \param pb box to check
		 *
		 */
		bool operator==(const p_box & pb)
		{
			return pb.lc_proc == lc_proc;
		}
	};

incardon's avatar
incardon committed
606 607
	openfpm::vector<size_t> ids;

608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667
	/*! \brief class to select the returned id by ghost_processorID
	 *
	 */
	class box_id
	{
	public:
		/*! \brief Return the box id
		 *
		 * \param p structure containing the id informations
		 * \param b_id box_id
		 *
		 * \return box id
		 *
		 */
		inline static size_t id(p_box & p, size_t b_id)
		{
			return b_id;
		}
	};

	/*! \brief class to select the returned id by ghost_processorID
	 *
	 */
	class processor_id
	{
	public:
		/*! \brief Return the processor id
		 *
		 * \param p structure containing the id informations
		 * \param b_id box_id
		 *
		 * \return processor id
		 *
		 */
		inline static size_t id(p_box & p, size_t b_id)
		{
			return p.proc;
		}
	};

	/*! \brief class to select the returned id by ghost_processorID
	 *
	 */
	class lc_processor_id
	{
	public:
		/*! \brief Return the near processor id
		 *
		 * \param p structure containing the id informations
		 * \param b_id box_id
		 *
		 * \return local processor id
		 *
		 */
		inline static size_t id(p_box & p, size_t b_id)
		{
			return p.lc_proc;
		}
	};

668 669 670 671 672 673 674 675 676 677 678
	/*! /brief Given a point it return the set of boxes in which the point fall
	 *
	 * \param p Point to check
	 * \return An iterator with the id's of the internal boxes in which the point fall
	 *
	 */
	auto getInternalIDBoxes(Point<dim,T> & p) -> decltype(geo_cell.getIterator(geo_cell.getCell(p)))
	{
		return geo_cell.getIterator(geo_cell.getCell(p));
	}

679 680 681 682

#define UNIQUE 1
#define MULTIPLE 2

incardon's avatar
incardon committed
683
	/*! \brief Given a position it return if the position belong to any neighborhood processor ghost
incardon's avatar
incardon committed
684
	 * (Internal ghost)
incardon's avatar
incardon committed
685
	 *
686
	 * \tparam id type of if to get box_id processor_id lc_processor_id
incardon's avatar
incardon committed
687
	 * \param p Particle position
688 689 690
	 * \param opt intersection boxes of the same processor can overlap, so in general the function
	 *        can produce more entry with the same processor, the UNIQUE option eliminate double entries
	 *        (UNIQUE) is for particle data (MULTIPLE) is for grid data [default MULTIPLE]
incardon's avatar
incardon committed
691 692 693 694
	 *
	 * \param return the processor ids
	 *
	 */
695
	template <typename id> inline const openfpm::vector<size_t> ghost_processorID(Point<dim,T> & p, const int opt = MULTIPLE)
incardon's avatar
incardon committed
696 697 698
	{
		ids.clear();

699
		// Check with geo-cell if a particle is inside one Cell containing boxes
incardon's avatar
incardon committed
700

701
		auto cell_it = geo_cell.getIterator(geo_cell.getCell(p));
incardon's avatar
incardon committed
702 703

		// For each element in the cell, check if the point is inside the box
704
		// if it is, store the processor id
incardon's avatar
incardon committed
705 706 707 708 709 710
		while (cell_it.isNext())
		{
			size_t bid = cell_it.get();

			if (vb_int.get(bid).box.isInside(p) == true)
			{
711
				ids.add(id::id(vb_int.get(bid),bid));
incardon's avatar
incardon committed
712
			}
713 714

			++cell_it;
incardon's avatar
incardon committed
715 716
		}

717 718 719 720
		// Make the id unique
		if (opt == UNIQUE)
			ids.unique();

incardon's avatar
incardon committed
721 722 723 724
		return ids;
	}

	/*! \brief Given a position it return if the position belong to any neighborhood processor ghost
incardon's avatar
incardon committed
725
	 * (Internal ghost)
incardon's avatar
incardon committed
726
	 *
727
	 * \tparam id type of if to get box_id processor_id lc_processor_id
incardon's avatar
incardon committed
728 729 730 731 732
	 * \param p Particle position
	 *
	 * \param return the processor ids
	 *
	 */
733
	template<typename id, typename Mem> inline const openfpm::vector<size_t> ghost_processorID(const encapc<1,Point<dim,T>,Mem> & p, const int opt = MULTIPLE)
incardon's avatar
incardon committed
734 735 736 737 738
	{
		ids.clear();

		// Check with geo-cell if a particle is inside one Cell containing boxes

incardon's avatar
incardon committed
739
		auto cell_it = geo_cell.getIterator(geo_cell.getCell(p));
incardon's avatar
incardon committed
740 741 742 743 744 745 746 747 748

		// For each element in the cell, check if the point is inside the box
		// if it is, store the processor id
		while (cell_it.isNext())
		{
			size_t bid = cell_it.get();

			if (vb_int.get(bid).box.isInside(p) == true)
			{
749
				ids.add(id::id(vb_int.get(bid),bid));
incardon's avatar
incardon committed
750
			}
751 752

			++cell_it;
incardon's avatar
incardon committed
753 754
		}

755 756 757 758
		// Make the id unique
		if (opt == UNIQUE)
			ids.unique();

incardon's avatar
incardon committed
759 760 761
		return ids;
	}

762 763
	// External ghost boxes for this processor, indicated with G8_0 G9_0 ...
	openfpm::vector<p_box> vb_ext;
incardon's avatar
incardon committed
764

765
	// Internal ghost boxes for this processor domain, indicated with B8_0 B9_0 ..... in the figure
incardon's avatar
incardon committed
766
	// below as a linear vector
incardon's avatar
incardon committed
767
	openfpm::vector<p_box> vb_int;
incardon's avatar
incardon committed
768

incardon's avatar
incardon committed
769
	/*! It calculate the internal ghost boxes
incardon's avatar
incardon committed
770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800
	 *
	 * Example: Processor 10 calculate
	 * B8_0 B9_0 B9_1 and B5_0
	 *
	 *
+----------------------------------------------------+
|                                                    |
|                 Processor 8                        |
|                 Sub-domain 0                       +-----------------------------------+
|                                                    |                                   |
|                                                    |                                   |
++--------------+---+---------------------------+----+        Processor 9                |
 |              |   |     B8_0                  |    |        Subdomain 0                |
 |              +------------------------------------+                                   |
 |              |   |                           |    |                                   |
 |              |   |  XXXXXXXXXXXXX XX         |B9_0|                                   |
 |              | B |  X Processor 10 X         |    |                                   |
 | Processor 5  | 5 |  X Sub-domain 0 X         |    |                                   |
 | Subdomain 0  | _ |  X              X         +----------------------------------------+
 |              | 0 |  XXXXXXXXXXXXXXXX         |    |                                   |
 |              |   |                           |    |                                   |
 |              |   |                           |    |        Processor 9                |
 |              |   |                           |B9_1|        Subdomain 1                |
 |              |   |                           |    |                                   |
 |              |   |                           |    |                                   |
 |              |   |                           |    |                                   |
 +--------------+---+---------------------------+----+                                   |
                                                     |                                   |
                                                     +-----------------------------------+

       and also
incardon's avatar
incardon committed
801
       G8_0 G9_0 G9_1 G5_0 (External ghost boxes)
incardon's avatar
incardon committed
802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849

+----------------------------------------------------+
|                                                    |
|                 Processor 8                        |
|                 Sub-domain 0                       +-----------------------------------+
|           +---------------------------------------------+                              |
|           |         G8_0                           |    |                              |
++--------------+------------------------------------+    |   Processor 9                |
 |          |   |                                    |    |   Subdomain 0                |
 |          |   |                                    |G9_0|                              |
 |          |   |                                    |    |                              |
 |          |   |      XXXXXXXXXXXXX XX              |    |                              |
 |          |   |      X Processor 10 X              |    |                              |
 | Processor|5  |      X Sub-domain 0 X              |    |                              |
 | Subdomain|0  |      X              X              +-----------------------------------+
 |          |   |      XXXXXXXXXXXXXXXX              |    |                              |
 |          | G |                                    |    |                              |
 |          | 5 |                                    |    |   Processor 9                |
 |          | | |                                    |    |   Subdomain 1                |
 |          | 0 |                                    |G9_1|                              |
 |          |   |                                    |    |                              |
 |          |   |                                    |    |                              |
 +--------------+------------------------------------+    |                              |
            |                                        |    |                              |
            +----------------------------------------+----+------------------------------+


	 *
	 *
	 *
	 * \param ghost margins for each dimensions (p1 negative part) (p2 positive part)
	 *
                ^ p2[1]
                |
                |
           +----+----+
           |         |
           |         |
p1[0]<-----+         +----> p2[0]
           |         |
           |         |
           +----+----+
                |
                v  p1[1]

	 *
	 *
	 */
incardon's avatar
incardon committed
850
	void calculateGhostBoxes()
incardon's avatar
incardon committed
851 852 853 854 855 856 857
	{
#ifdef DEBUG
		// the ghost margins are assumed to be smaller
		// than one sub-domain

		for (size_t i = 0 ; i < dim ; i++)
		{
incardon's avatar
incardon committed
858
			if (ghost.template getLow(i) >= domain.template getHigh(i) / gr.size(i) || ghost.template getHigh(i)  >= domain.template getHigh(i) / gr.size(i))
incardon's avatar
incardon committed
859
			{
860
				std::cerr << "Error " << __FILE__ << ":" << __LINE__  << " : Ghost are bigger than one domain" << "\n";
incardon's avatar
incardon committed
861 862 863 864 865 866
			}
		}
#endif

		// Intersect all the local sub-domains with the sub-domains of the contiguous processors

incardon's avatar
incardon committed
867
		// create the internal structures that store ghost information
868 869
		create_box_nn_processor_ext(ghost);
		create_box_nn_processor_int(ghost);
incardon's avatar
incardon committed
870

incardon's avatar
incardon committed
871
		// ebox must come after ibox (in this case)
incardon's avatar
incardon committed
872 873 874

		ie_loc_ghost<dim,T>::create_loc_ghost_ibox(ghost,sub_domains);
		ie_loc_ghost<dim,T>::create_loc_ghost_ebox(ghost,sub_domains);
incardon's avatar
incardon committed
875 876 877 878 879 880 881 882 883

		// get the smallest sub-domain dimension on each direction
		for (size_t i = 0 ; i < dim ; i++)
		{
			if (ghost.template getLow(i) >= ss_box.getHigh(i) || ghost.template getHigh(i)  >= domain.template getHigh(i) / gr.size(i))
			{
				std::cerr << "Error " << __FILE__ << ":" << __LINE__  << " : Ghost are bigger than one domain" << "\n";
			}
		}
incardon's avatar
incardon committed
884 885
	}

incardon's avatar
incardon committed
886 887 888 889 890 891
	/*! \brief processorID return in which processor the particle should go
	 *
	 * \return processorID
	 *
	 */

incardon's avatar
incardon committed
892
	template<typename Mem> size_t inline processorID(encapc<1, Point<dim,T>, Mem> p)
incardon's avatar
incardon committed
893
	{
incardon's avatar
incardon committed
894
		return fine_s.get(cd.getCell(p));
incardon's avatar
incardon committed
895 896
	}

incardon's avatar
incardon committed
897 898 899 900 901 902 903 904 905 906 907 908 909
	// Smallest subdivision on each direction
	::Box<dim,T> ss_box;

	/*! \brief Get the smallest subdivision of the domain on each direction
	 *
	 * \return a box p1 is set to zero
	 *
	 */
	const ::Box<dim,T> & getSmallestSubdivision()
	{
		return ss_box;
	}

incardon's avatar
incardon committed
910 911 912 913 914 915
	/*! \brief processorID return in which processor the particle should go
	 *
	 * \return processorID
	 *
	 */

916
	size_t inline processorID(const T (&p)[dim]) const
incardon's avatar
incardon committed
917
	{
incardon's avatar
incardon committed
918
		return fine_s.get(cd.getCell(p));
incardon's avatar
incardon committed
919 920
	}

incardon's avatar
incardon committed
921 922
	/*! \brief Set the parameter of the decomposition
	 *
incardon's avatar
incardon committed
923
     * \param div_ storing into how many domain to decompose on each dimension
incardon's avatar
incardon committed
924 925 926
     * \param domain_ domain to decompose
	 *
	 */
incardon's avatar
incardon committed
927
	void setParameters(const size_t (& div_)[dim], Domain<dim,T> domain_, Ghost<dim,T> ghost = Ghost<dim,T>())
incardon's avatar
incardon committed
928
	{
incardon's avatar
incardon committed
929 930
		// set the ghost
		this->ghost = ghost;
incardon's avatar
incardon committed
931 932
		// Set the decomposition parameters

incardon's avatar
incardon committed
933
		gr.setDimensions(div_);
incardon's avatar
incardon committed
934
		domain = domain_;
incardon's avatar
incardon committed
935
		cd.setDimensions(domain,div_,0);
incardon's avatar
incardon committed
936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981

		//! Create the decomposition

		CreateDecomposition(v_cl);
	}

	/*! \brief Get the number of local local hyper-cubes or sub-domains
	 *
	 * \return the number of sub-domains
	 *
	 */
	size_t getNLocalHyperCube()
	{
		return sub_domains.size();
	}

	/*! \brief Get the number of one set of hyper-cube enclosing one particular
	 *         subspace, the hyper-cube enclose your space, even if one box is enough
	 *         can be more that one to increase occupancy
	 *
     * In case of Cartesian decomposition it just return 1, each subspace
	 * has one hyper-cube, and occupancy 1
	 *
	 * \param id of the subspace
	 * \return the number of hyper-cube enclosing your space
	 *
	 */
	size_t getNHyperCube(size_t id)
	{
		return 1;
	}

	/*! \brief Get the hyper-cube margins id_c has to be 0
	 *
	 * Get the hyper-cube margins id_c has to be 0, each subspace
	 * has one hyper-cube
	 *
	 * \param id of the subspace
	 * \param id_c
	 * \return The specified hyper-cube space
	 *
	 */
	SpaceBox<dim,T> & getHyperCubeMargins(size_t id, size_t id_c)
	{
#ifdef DEBUG
		// Check if this subspace exist
incardon's avatar
incardon committed
982
		if (id >= gr.size())
incardon's avatar
incardon committed
983 984 985 986 987 988 989 990 991 992 993 994 995
		{
			std::cerr << "Error CartDecomposition: id > N_tot";
		}
		else if (id_c > 0)
		{
			// Each subspace is an hyper-cube so return error if id_c > 0
			std::cerr << "Error CartDecomposition: id_c > 0";
		}
#endif

		return sub_domains.get<Object>(id);
	}

incardon's avatar
incardon committed
996
	/*! \brief Get the total number of sub-domain for the local processor
incardon's avatar
incardon committed
997
	 *
incardon's avatar
incardon committed
998
	 * \return The total number of sub-domains
incardon's avatar
incardon committed
999 1000 1001 1002 1003
	 *
	 */

	size_t getNHyperCube()
	{
incardon's avatar
incardon committed
1004
		return gr.size();
incardon's avatar
incardon committed
1005 1006
	}

incardon's avatar
incardon committed
1007
	/*! \brief Get the local sub-domain
incardon's avatar
incardon committed
1008
	 *
incardon's avatar
incardon committed
1009 1010
	 * \param i (each local processor can have more than one sub-domain)
	 * \return the sub-domain
incardon's avatar
incardon committed
1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029
	 *
	 */
	SpaceBox<dim,T> getLocalHyperCube(size_t lc)
	{
		// Create a space box
		SpaceBox<dim,T> sp;

		// fill the space box

		for (size_t k = 0 ; k < dim ; k++)
		{
			// create the SpaceBox Low and High
			sp.setLow(k,sub_domains.template get<Box::p1>(lc)[k]);
			sp.setHigh(k,sub_domains.template get<Box::p2>(lc)[k]);
		}

		return sp;
	}

incardon's avatar
incardon committed
1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047
	/*! \brief Get the local sub-domain with ghost extension
	 *
	 * \param i (each local processor can have more than one sub-domain)
	 * \return the sub-domain
	 *
	 */

	SpaceBox<dim,T> getSubDomainWithGhost(size_t lc)
	{
		// Create a space box
		SpaceBox<dim,T> sp = sub_domains.get(lc);

		// enlarge with ghost
		sp.enlarge(ghost);

		return sp;
	}

incardon's avatar
incardon committed
1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060
	/*! \brief Return the structure that store the physical domain
	 *
	 * Return the structure that store the physical domain
	 *
	 * \return The physical domain
	 *
	 */

	Domain<dim,T> & getDomain()
	{
		return domain;
	}

incardon's avatar
incardon committed
1061 1062 1063
	/*! \brief Check if the particle is local
	 *
	 * \param p object position
incardon's avatar
incardon committed
1064
	 *
incardon's avatar
incardon committed
1065
	 * \return true if it is local
incardon's avatar
incardon committed
1066 1067
	 *
	 */
1068
	template<typename Mem> bool isLocal(const encapc<1, Point<dim,T>, Mem> p) const
incardon's avatar
incardon committed
1069
	{
1070
		return processorID<Mem>(p) == v_cl.getProcessUnitID();
incardon's avatar
incardon committed
1071
	}
incardon's avatar
incardon committed
1072

incardon's avatar
incardon committed
1073
	/*! \brief Check if the particle is local
incardon's avatar
incardon committed
1074
	 *
incardon's avatar
incardon committed
1075
	 * \param p object position
incardon's avatar
incardon committed
1076
	 *
incardon's avatar
incardon committed
1077
	 * \return true if it is local
incardon's avatar
incardon committed
1078 1079
	 *
	 */
1080
	bool isLocal(const T (&pos)[dim]) const
incardon's avatar
incardon committed
1081
	{
incardon's avatar
incardon committed
1082 1083
		return processorID(pos) == v_cl.getProcessUnitID();
	}
incardon's avatar
incardon committed
1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095

	::Box<dim,T> bbox;

	/*! \brief Return the bounding box containing the processor box + smallest subdomain spacing
	 *
	 * \return The bounding box
	 *
	 */
	::Box<dim,T> & getProcessorBounds()
	{
		return bbox;
	}
incardon's avatar
incardon committed
1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108

	/*! \brief if the point fall into the ghost of some near processor it return the processors id's in which
	 *  it fall
	 *
	 * \param p Point
	 * \return iterator of the processors id's
	 *
	 */
	inline auto labelPoint(Point<dim,T> & p) -> decltype(geo_cell.getIterator(geo_cell.getCell(p)))
	{
		return geo_cell.getIterator(geo_cell.getCell(p));
	}

1109

1110 1111 1112
	////////////// Functions to get decomposition information ///////////////


1113
	/*! \brief Get the number of Internal ghost boxes for one processor
1114
	 *
1115
	 * \param id near processor list id (the id go from 0 to getNNProcessor())
1116 1117 1118 1119 1120 1121 1122 1123
	 * \return the number of internal ghost
	 *
	 */
	inline size_t getProcessorNIGhost(size_t id) const
	{
		return proc_int_box.get(id).ibx.size();
	}

1124
	/*! \brief Get the number of External ghost boxes for one processor id
1125
	 *
1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138
	 * \param id near processor list id (the id go from 0 to getNNProcessor())
	 * \return the number of external ghost
	 *
	 */
	inline size_t getProcessorNEGhost(size_t id) const
	{
		return proc_int_box.get(id).ebx.size();
	}

	/*! \brief Get the j Internal ghost box for one processor
	 *
	 * \param id near processor list id (the id go from 0 to getNNProcessor())
	 * \param j box (each near processor can produce more than one internal ghost box)
1139 1140 1141 1142 1143
	 * \return the box
	 *
	 */
	inline const ::Box<dim,T> & getProcessorIGhostBox(size_t id, size_t j) const
	{
incardon's avatar
incardon committed
1144
		return proc_int_box.get(id).ibx.get(j);
1145 1146
	}

1147
	/*! \brief Get the j External ghost box
1148 1149 1150 1151 1152 1153 1154 1155
	 *
	 * \param id near processor list id (the id go from 0 to getNNProcessor())
	 * \param j box (each near processor can produce more than one external ghost box)
	 * \return the box
	 *
	 */
	inline const ::Box<dim,T> & getProcessorEGhostBox(size_t id, size_t j) const
	{
incardon's avatar
incardon committed
1156 1157 1158
		return proc_int_box.get(id).ebx.get(j);
	}

1159