CartDecomposition.hpp 32.2 KB
Newer Older
incardon's avatar
incardon committed
1 2 3 4 5 6 7 8 9 10 11 12
/*
 * CartDecomposition.hpp
 *
 *  Created on: Aug 15, 2014
 *      Author: Pietro Incardona
 */

#ifndef CARTDECOMPOSITION_HPP
#define CARTDECOMPOSITION_HPP

#include "config.h"
#include "Decomposition.hpp"
incardon's avatar
incardon committed
13
#include "Vector/map_vector.hpp"
incardon's avatar
incardon committed
14 15 16 17 18 19 20
#include <vector>
#include "global_const.hpp"
#include <initializer_list>
#include "SubdomainGraphNodes.hpp"
#include "metis_util.hpp"
#include "dec_optimizer.hpp"
#include "Space/Shape/Box.hpp"
incardon's avatar
incardon committed
21
#include "Space/Shape/Point.hpp"
incardon's avatar
incardon committed
22
#include "NN/CellList/CellDecomposer.hpp"
incardon's avatar
incardon committed
23 24
#include <unordered_map>
#include "NN/CellList/CellList.hpp"
incardon's avatar
incardon committed
25
#include "Space/Ghost.hpp"
incardon's avatar
incardon committed
26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43

/**
 * \brief This class decompose a space into subspaces
 *
 * This class decompose a space into regular hyper-cube subspaces, and give the possibilities to
 * select one subspace
 *
 * \tparam dim is the dimensionality of the physical domain we are going to decompose.
 * \tparam T type of the space we decompose, Real, Integer, Complex ...
 * \tparam layout to use
 * \tparam Memory Memory factory used to allocate memory
 * \tparam Domain Structure that contain the information of your physical domain
 * \tparam data type of structure that store the sub-domain decomposition can be an openfpm structure like
 *        vector, ...
 *
 * \note if PARALLEL_DECOMPOSITION macro is defined a parallel decomposition algorithm is used, basically
 *       each processor does not recompute the same decomposition
 *
incardon's avatar
incardon committed
44 45 46 47 48
 *  \note sub-sub-domain portion of space at finer level than the sub-domain (before optimization)
 *        (or before sub-sub-domain merging)
 *  \note sub-domain portion of space (after optimization)
 *  \note near processor sub-domain a sub-domain that live in the a near (or contiguous) processor
 *
incardon's avatar
incardon committed
49 50
 */

incardon's avatar
incardon committed
51
template<unsigned int dim, typename T, template<typename> class device_l=openfpm::device_cpu, typename Memory=HeapMemory, template<unsigned int, typename> class Domain=Box, template<typename, typename, typename, typename, unsigned int> class data_s = openfpm::vector>
incardon's avatar
incardon committed
52 53
class CartDecomposition
{
incardon's avatar
incardon committed
54 55 56 57 58 59 60 61 62
	struct N_box
	{
		// id of the processor in the nn_processor list
		size_t id;

		// Near processor sub-domains
		typename openfpm::vector<::Box<dim,T>> bx;
	};

incardon's avatar
incardon committed
63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
	struct Box_proc
	{
		// Intersection between the local sub-domain enlarged by the ghost and the contiguous processor
		// sub-domains
		openfpm::vector<::Box<dim,T>> bx;

		// Intersection between the contiguous processor sub-domain enlarged by the ghost with the
		// local sub-domain
		openfpm::vector<::Box<dim,T>> nbx;


		// processor
		size_t proc;
	};

incardon's avatar
incardon committed
78
public:
incardon's avatar
incardon committed
79

incardon's avatar
incardon committed
80 81 82 83 84 85 86 87
	//! Type of the domain we are going to decompose
	typedef T domain_type;

	//! It simplify to access the SpaceBox element
	typedef SpaceBox<dim,T> Box;

private:

incardon's avatar
incardon committed
88
	//! This is the key type to access  data_s, for example in the case of vector
incardon's avatar
incardon committed
89
	//! acc_key is size_t
incardon's avatar
incardon committed
90
	typedef typename data_s<SpaceBox<dim,T>,device_l<SpaceBox<dim,T>>,Memory,openfpm::vector_grow_policy_default,openfpm::vect_isel<SpaceBox<dim,T>>::value >::access_key acc_key;
incardon's avatar
incardon committed
91 92 93 94 95 96 97 98 99

	//! Subspace selected
	//! access_key in case of grid is just the set of the index to access the grid
	std::vector<acc_key> id_sub;

	//! the margin of the sub-domain selected
	SpaceBox<dim,T> sub_domain;

	//! the set of all local sub-domain as vector
incardon's avatar
incardon committed
100
	openfpm::vector<SpaceBox<dim,T>> sub_domains;
incardon's avatar
incardon committed
101

incardon's avatar
incardon committed
102 103 104 105 106
	//! List of near processors
	openfpm::vector<size_t> nn_processors;

	//! for each sub-domain, contain the list of the neighborhood processors
	//! and for each processor contain the boxes calculated from the intersection
incardon's avatar
incardon committed
107
	//! of the sub-domain + ghost with the near-by processor sub-domain ()
incardon's avatar
incardon committed
108 109
	openfpm::vector< openfpm::vector< Box_proc > > box_nn_processor_int;

incardon's avatar
incardon committed
110
	//! for each sub-domain, contain the list of the neighborhood processors
incardon's avatar
incardon committed
111 112 113
	openfpm::vector<openfpm::vector<long unsigned int> > box_nn_processor;

	// for each near-processor store the sub-domain of the near processor
incardon's avatar
incardon committed
114
	std::unordered_map<size_t, N_box> nn_processor_subdomains;
incardon's avatar
incardon committed
115

incardon's avatar
Add ORB  
incardon committed
116 117
	//! Structure that contain for each sub-domain box the processor id
	//! exist for efficient global communication
incardon's avatar
incardon committed
118 119
	openfpm::vector<size_t> fine_s;

incardon's avatar
incardon committed
120 121
	//! Structure that store the cartesian grid information
	grid_sm<dim,void> gr;
incardon's avatar
incardon committed
122

incardon's avatar
incardon committed
123 124 125
	//! Structure that decompose your structure into cell without creating them
	//! useful to convert positions to CellId or sub-domain id in this case
	CellDecomposer_sm<dim,T> cd;
incardon's avatar
incardon committed
126 127 128 129

	//! rectangular domain to decompose
	Domain<dim,T> domain;

incardon's avatar
incardon committed
130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
	//! Ghost boxes of the processor
	//! for each Sub-domain it store the ghost boxes, or
	//! the set of boxes that enclose the the ghost space
	//! Box cannot overlap, they contain one id that is the
	//! processor the information should come from
	openfpm::vector< openfpm::vector<Domain<dim,T>> > gh_dom;

	//! Internal boxes of the processor
	//! for each Sub-domain it store the boxes enclosing the
	//! space that must be communicated when another processor
	//! require the ghost
	//! Box can overlap, they contain one id that is the
	//! processor the information should be communicated to
	openfpm::vector< openfpm::vector< Domain<dim,T>> > int_box;

incardon's avatar
incardon committed
145 146 147 148 149 150
	//! Box Spacing
	T spacing[dim];

	//! Runtime virtual cluster machine
	Vcluster & v_cl;

incardon's avatar
incardon committed
151 152 153 154 155
	//! Structure that store the geometrical information about intersection between the local sub-domain
	//! and the near processor sub-domains
	CellList<dim,T,FAST> geo_cell;


incardon's avatar
incardon committed
156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
	/*! \brief Create internally the decomposition
	 *
     * \param v_cl Virtual cluster, used internally to handle or pipeline communication
	 *
	 */
	void CreateDecomposition(Vcluster & v_cl)
	{
		// Calculate the total number of box and and the spacing
		// on each direction
		// Get the box containing the domain
		SpaceBox<dim,T> bs = domain.getBox();

		for (unsigned int i = 0; i < dim ; i++)
		{
			// Calculate the spacing
incardon's avatar
incardon committed
171
			spacing[i] = (bs.getHigh(i) - bs.getLow(i)) / gr.size(i);
incardon's avatar
incardon committed
172 173 174 175
		}

		// Here we use METIS
		// Create a cartesian grid graph
incardon's avatar
incardon committed
176
		CartesianGraphFactory<dim,Graph_CSR<nm_part_v,nm_part_e>> g_factory_part;
incardon's avatar
incardon committed
177 178

		// Processor graph
incardon's avatar
incardon committed
179
		Graph_CSR<nm_part_v,nm_part_e> gp = g_factory_part.template construct<NO_EDGE,T,dim-1>(gr.getSize(),domain);
incardon's avatar
incardon committed
180 181 182 183 184 185 186 187

		// Get the number of processing units
		size_t Np = v_cl.getProcessingUnits();

		// Get the processor id
		long int p_id = v_cl.getProcessUnitID();

		// Convert the graph to metis
incardon's avatar
incardon committed
188
		Metis<Graph_CSR<nm_part_v,nm_part_e>> met(gp,Np);
incardon's avatar
incardon committed
189 190

		// decompose
incardon's avatar
incardon committed
191
		met.decompose<nm_part_v::id>();
incardon's avatar
incardon committed
192

incardon's avatar
Add ORB  
incardon committed
193
		// fill the structure that store the processor id for each sub-domain
incardon's avatar
incardon committed
194
		fine_s.resize(gr.size());
incardon's avatar
Add ORB  
incardon committed
195

incardon's avatar
incardon committed
196 197
		// Optimize the decomposition creating bigger spaces
		// And reducing Ghost over-stress
incardon's avatar
incardon committed
198
		dec_optimizer<dim,Graph_CSR<nm_part_v,nm_part_e>> d_o(gp,gr.getSize());
incardon's avatar
incardon committed
199 200 201 202

		// set of Boxes produced by the decomposition optimizer
		openfpm::vector<::Box<dim,size_t>> loc_box;

incardon's avatar
incardon committed
203
		// optimize the decomposition
incardon's avatar
incardon committed
204 205
		d_o.template optimize<nm_part_v::sub_id,nm_part_v::id>(gp,p_id,loc_box,box_nn_processor);

incardon's avatar
incardon committed
206 207
		// produce the list of the contiguous processor (nn_processors) and link nn_processor_subdomains to the
		// processor list
incardon's avatar
incardon committed
208 209 210 211 212 213 214 215 216 217 218 219
		for (size_t i = 0 ;  i < box_nn_processor.size() ; i++)
		{
			for (size_t j = 0 ; j < box_nn_processor.get(i).size() ; j++)
			{
				nn_processors.add(box_nn_processor.get(i).get(j));
			}
		}

		// make the list sorted and unique
	    std::sort(nn_processors.begin(), nn_processors.end());
	    auto last = std::unique(nn_processors.begin(), nn_processors.end());
	    nn_processors.erase(last, nn_processors.end());
incardon's avatar
incardon committed
220

incardon's avatar
incardon committed
221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252
		// produce the list of the contiguous processor (nn_processors) and link nn_processor_subdomains to the
		// processor list
		for (size_t i = 0 ;  i < box_nn_processor.size() ; i++)
		{
			for (size_t j = 0 ; j < box_nn_processor.get(i).size() ; j++)
			{
				// processor id near to this sub-domain
				size_t proc_id = box_nn_processor.get(i).get(j);

				size_t k = 0;
				// search inside near processor list
				for (k = 0 ; k < nn_processors.size() ; k++)
					if (nn_processors.get(k) == proc_id)	break;

				nn_processor_subdomains[proc_id].id = k;
			}
		}

		// Initialize ss_box and bbox
		if (loc_box.size() >= 0)
		{
			SpaceBox<dim,T> sub_d(loc_box.get(0));
			sub_d.mul(spacing);
			sub_d.expand(spacing);

			// add the sub-domain
			sub_domains.add(sub_d);

			ss_box = sub_d;
			bbox = sub_d;
		}

incardon's avatar
incardon committed
253
		// convert into sub-domain
incardon's avatar
incardon committed
254
		for (size_t s = 1 ; s < loc_box.size() ; s++)
incardon's avatar
incardon committed
255 256 257
		{
			SpaceBox<dim,T> sub_d(loc_box.get(s));

incardon's avatar
incardon committed
258 259 260
			// re-scale and add spacing (the end is the starting point of the next domain + spacing)
			sub_d.mul(spacing);
			sub_d.expand(spacing);
incardon's avatar
incardon committed
261 262 263

			// add the sub-domain
			sub_domains.add(sub_d);
incardon's avatar
incardon committed
264 265 266 267 268 269

			// Calculate the bound box
			bbox.enclose(sub_d);

			// Create the smallest box contained in all sub-domain
			ss_box.contained(sub_d);
incardon's avatar
incardon committed
270
		}
incardon's avatar
incardon committed
271

incardon's avatar
incardon committed
272 273 274 275 276 277 278 279
		//++++++++++++++++++++++++++++++++++++++++ Debug output NN boxes
		{
		VTKWriter<openfpm::vector<::SpaceBox<dim,T>>,VECTOR_BOX> vtk_box1;
		vtk_box1.add(sub_domains);
		vtk_box1.write(std::string("loc_") + std::to_string(v_cl.getProcessUnitID()) + std::string(".vtk"));
		}
		//+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

incardon's avatar
incardon committed
280
		// fill fine_s structure
incardon's avatar
incardon committed
281 282 283
		// fine_s structure contain the processor id for each sub-sub-domain
		// with sub-sub-domain we mean the sub-domain decomposition before
		// running dec_optimizer (before merging sub-domains)
incardon's avatar
incardon committed
284 285 286 287 288 289 290 291 292 293 294
		auto it = gp.getVertexIterator();

		while (it.isNext())
		{
			size_t key = it.get();

			// fill with the fine decomposition
			fine_s.get(key) = gp.template vertex_p<nm_part_v::id>(key);

			++it;
		}
incardon's avatar
incardon committed
295 296 297 298 299 300 301 302 303 304 305
	}

	/*! \brief Create the subspaces that decompose your domain
	 *
	 * Create the subspaces that decompose your domain
	 *
	 */

	void CreateSubspaces()
	{
		// Create a grid where each point is a space
306
		grid_sm<dim,void> g(div);
incardon's avatar
incardon committed
307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334

		// create a grid_key_dx iterator
		grid_key_dx_iterator<dim> gk_it(g);

		// Divide the space into subspaces
		while (gk_it.isNext())
		{
			//! iterate through all subspaces
			grid_key_dx<dim> key = gk_it.get();

			//! Create a new subspace
			SpaceBox<dim,T> tmp;

			//! fill with the Margin of the box
			for (int i = 0 ; i < dim ; i++)
			{
				tmp.setHigh(i,(key.get(i)+1)*spacing[i]);
				tmp.setLow(i,key.get(i)*spacing[i]);
			}

			//! add the space box
			sub_domains.add(tmp);

			// add the iterator
			++gk_it;
		}
	}

incardon's avatar
incardon committed
335 336 337 338 339 340 341 342 343 344 345 346 347 348 349
	// Heap memory receiver
	HeapMemory hp_recv;

	// vector v_proc
	openfpm::vector<size_t> v_proc;

	// Receive counter
	size_t recv_cnt;

	/*! \brief Message allocation
	 *
	 * \param message size required to receive from i
	 * \param total message size to receive from all the processors
	 * \param the total number of processor want to communicate with you
	 * \param i processor id
incardon's avatar
incardon committed
350 351
	 * \param ri request id (it is an id that goes from 0 to total_p, and is unique
	 *           every time message_alloc is called)
incardon's avatar
incardon committed
352 353 354 355 356
	 * \param ptr a pointer to the vector_dist structure
	 *
	 * \return the pointer where to store the message
	 *
	 */
incardon's avatar
incardon committed
357
	static void * message_alloc(size_t msg_i ,size_t total_msg, size_t total_p, size_t i, size_t ri, void * ptr)
incardon's avatar
incardon committed
358 359 360 361
	{
		// cast the pointer
		CartDecomposition<dim,T,device_l,Memory,Domain,data_s> * cd = static_cast< CartDecomposition<dim,T,device_l,Memory,Domain,data_s> *>(ptr);

incardon's avatar
incardon committed
362 363 364 365 366
		if (cd->v_cl.getProcessUnitID() == 0)
		{
			std::cout << "Receiving from " << i << "       msg size: " << msg_i << "\n";
		}

incardon's avatar
incardon committed
367
		// Resize the memory
incardon's avatar
incardon committed
368
		cd->nn_processor_subdomains[i].bx.resize(msg_i);
incardon's avatar
incardon committed
369 370

		// Return the receive pointer
incardon's avatar
incardon committed
371
		return cd->nn_processor_subdomains[i].bx.getPointer();
incardon's avatar
incardon committed
372 373
	}

incardon's avatar
incardon committed
374 375 376 377 378 379 380 381
public:

	/*! \brief Cartesian decomposition copy constructor
	 *
     * \param v_cl Virtual cluster, used internally to handle or pipeline communication
	 *
	 */
	CartDecomposition(CartDecomposition<dim,T,device_l,Memory,Domain,data_s> && cd)
incardon's avatar
incardon committed
382
	:sub_domain(cd.sub_domain),gr(cd.gr),cd(cd.cd),domain(cd.domain),v_cl(cd.v_cl)
incardon's avatar
incardon committed
383
	{
incardon's avatar
incardon committed
384 385 386
		// Reset the box to zero
		bbox.zero();

incardon's avatar
incardon committed
387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406
		//! Subspace selected
		//! access_key in case of grid is just the set of the index to access the grid
		id_sub.swap(cd.id_sub);

		//! the set of all local sub-domain as vector
		sub_domains.swap(cd.sub_domains);

		for (int i = 0 ; i < dim ; i++)
		{
			//! Box Spacing
			this->spacing[i] = spacing[i];
		}
	}

	/*! \brief Cartesian decomposition constructor
	 *
     * \param v_cl Virtual cluster, used internally to handle or pipeline communication
	 *
	 */
	CartDecomposition(Vcluster & v_cl)
incardon's avatar
incardon committed
407
	:id_sub(0),v_cl(v_cl)
incardon's avatar
incardon committed
408 409 410 411
	{
		// Reset the box to zero
		bbox.zero();
	}
incardon's avatar
incardon committed
412 413 414 415 416 417 418 419 420

	/*! \brief Cartesian decomposition constructor, it divide the space in boxes
	 *
	 * \param dec is a vector that store how to divide on each dimension
	 * \param domain is the domain to divide
	 * \param v_cl are information of the cluster runtime machine
	 *
	 */
	CartDecomposition(std::vector<size_t> dec, Domain<dim,T> domain, Vcluster & v_cl)
incardon's avatar
incardon committed
421
	:id_sub(0),gr(dec),cd(domain,dec,0),domain(domain),v_cl(v_cl)
incardon's avatar
incardon committed
422
	{
incardon's avatar
incardon committed
423 424
		// Reset the box to zero
		bbox.zero();
incardon's avatar
incardon committed
425

incardon's avatar
incardon committed
426
		// Create the decomposition
incardon's avatar
incardon committed
427 428 429 430 431 432 433
		CreateDecomposition(v_cl);
	}

	//! Cartesian decomposition destructor
	~CartDecomposition()
	{}

incardon's avatar
incardon committed
434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499
	openfpm::vector<size_t> ids;

	/*! \brief Given a position it return if the position belong to any neighborhood processor ghost
	 *
	 * \param p Particle position
	 *
	 * \param return the processor ids
	 *
	 */
	const openfpm::vector<size_t> ghost_processorID(Point<dim,T> & p)
	{
		ids.clear();

		// Check with geo-cell if a particle is inside one Cell caotaining boxes

		auto cell_it = geo_cell.getCellIterator(p);

		// For each element in the cell, check if the point is inside the box
		// if it is store the processor id
		while (cell_it.isNext())
		{
			size_t bid = cell_it.get();

			if (vb_int.get(bid).box.isInside(p) == true)
			{
				ids.add(vb_int.get(bid).proc);
			}
		}

		return ids;
	}

	/*! \brief Given a position it return if the position belong to any neighborhood processor ghost
	 *
	 * \param p Particle position
	 *
	 * \param return the processor ids
	 *
	 */
	template<typename Mem> inline const openfpm::vector<size_t> ghost_processorID(const encapc<1,Point<dim,T>,Mem> & p)
	{
		ids.clear();

		// Check with geo-cell if a particle is inside one Cell containing boxes

		auto cell_it = geo_cell.getCellIterator(p);

		// For each element in the cell, check if the point is inside the box
		// if it is, store the processor id
		while (cell_it.isNext())
		{
			size_t bid = cell_it.get();

			if (vb_int.get(bid).box.isInside(p) == true)
			{
				ids.add(vb_int.get(bid).proc);
			}
		}

		return ids;
	}

	// Internal boxes for this processor domain, indicated with B8_0 B9_0 ..... in the figure
	// below as a linear vector
	openfpm::vector<::Box<dim,T>> vb_int;

incardon's avatar
incardon committed
500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588
	/*! It calculate the ghost boxes and internal boxes
	 *
	 * Example: Processor 10 calculate
	 * B8_0 B9_0 B9_1 and B5_0
	 *
	 *
+----------------------------------------------------+
|                                                    |
|                 Processor 8                        |
|                 Sub-domain 0                       +-----------------------------------+
|                                                    |                                   |
|                                                    |                                   |
++--------------+---+---------------------------+----+        Processor 9                |
 |              |   |     B8_0                  |    |        Subdomain 0                |
 |              +------------------------------------+                                   |
 |              |   |                           |    |                                   |
 |              |   |  XXXXXXXXXXXXX XX         |B9_0|                                   |
 |              | B |  X Processor 10 X         |    |                                   |
 | Processor 5  | 5 |  X Sub-domain 0 X         |    |                                   |
 | Subdomain 0  | _ |  X              X         +----------------------------------------+
 |              | 0 |  XXXXXXXXXXXXXXXX         |    |                                   |
 |              |   |                           |    |                                   |
 |              |   |                           |    |        Processor 9                |
 |              |   |                           |B9_1|        Subdomain 1                |
 |              |   |                           |    |                                   |
 |              |   |                           |    |                                   |
 |              |   |                           |    |                                   |
 +--------------+---+---------------------------+----+                                   |
                                                     |                                   |
                                                     +-----------------------------------+

       and also
       G8_0 G9_0 G9_1 G5_0

+----------------------------------------------------+
|                                                    |
|                 Processor 8                        |
|                 Sub-domain 0                       +-----------------------------------+
|           +---------------------------------------------+                              |
|           |         G8_0                           |    |                              |
++--------------+------------------------------------+    |   Processor 9                |
 |          |   |                                    |    |   Subdomain 0                |
 |          |   |                                    |G9_0|                              |
 |          |   |                                    |    |                              |
 |          |   |      XXXXXXXXXXXXX XX              |    |                              |
 |          |   |      X Processor 10 X              |    |                              |
 | Processor|5  |      X Sub-domain 0 X              |    |                              |
 | Subdomain|0  |      X              X              +-----------------------------------+
 |          |   |      XXXXXXXXXXXXXXXX              |    |                              |
 |          | G |                                    |    |                              |
 |          | 5 |                                    |    |   Processor 9                |
 |          | | |                                    |    |   Subdomain 1                |
 |          | 0 |                                    |G9_1|                              |
 |          |   |                                    |    |                              |
 |          |   |                                    |    |                              |
 +--------------+------------------------------------+    |                              |
            |                                        |    |                              |
            +----------------------------------------+----+------------------------------+


	 *
	 *
	 *
	 * \param ghost margins for each dimensions (p1 negative part) (p2 positive part)
	 *
                ^ p2[1]
                |
                |
           +----+----+
           |         |
           |         |
p1[0]<-----+         +----> p2[0]
           |         |
           |         |
           +----+----+
                |
                v  p1[1]

	 *
	 *
	 */
	void calculateGhostBoxes(Ghost<dim,T> & ghost)
	{
#ifdef DEBUG
		// the ghost margins are assumed to be smaller
		// than one sub-domain

		for (size_t i = 0 ; i < dim ; i++)
		{
incardon's avatar
incardon committed
589
			if (ghost.template getLow(i) >= domain.template getHigh(i) / gr.size(i) || ghost.template getHigh(i)  >= domain.template getHigh(i) / gr.size(i))
incardon's avatar
incardon committed
590 591 592 593 594 595
			{
				std::cerr << "Error: Ghost are bigger that one domain" << "\n";
			}
		}
#endif

incardon's avatar
incardon committed
596 597
		// create a buffer with the sub-domains of this processor, the informations ( the boxes )
		// of the sub-domains contiguous to the processor A are sent to the processor A and
incardon's avatar
incardon committed
598 599
		// the information of the contiguous sub-domains in the near processors are received
		//
incardon's avatar
incardon committed
600
		openfpm::vector< openfpm::vector< ::SpaceBox<dim,T>> > boxes(nn_processors.size());
incardon's avatar
incardon committed
601 602 603 604 605 606 607

		for (size_t b = 0 ; b < box_nn_processor.size() ; b++)
		{
			for (size_t p = 0 ; p < box_nn_processor.get(b).size() ; p++)
			{
				size_t prc = box_nn_processor.get(b).get(p);

incardon's avatar
incardon committed
608 609 610 611 612
				// id of the processor in the processor list
				// [value between 0 and the number of the near processors]
				size_t id = nn_processor_subdomains[prc].id;

				boxes.get(id).add(sub_domains.get(b));
incardon's avatar
incardon committed
613 614 615
			}
		}

incardon's avatar
incardon committed
616 617 618 619 620 621 622 623 624 625 626 627
		//++++++++++++++++++++++++++++++++++++++++ Debug output NN boxes
		{
		for (size_t b = 0 ; b < boxes.size() ; b++)
		{
			VTKWriter<openfpm::vector<::SpaceBox<dim,T>>,VECTOR_BOX> vtk_box1;
			vtk_box1.add(boxes.get(b));
			vtk_box1.write(std::string("Processor_") + std::to_string(v_cl.getProcessUnitID()) + "_" + std::to_string(nn_processors.get(b)) + std::string(".vtk"));
		}
		}

		//+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

incardon's avatar
incardon committed
628 629 630
		// Intersect all the local sub-domains with the sub-domains of the contiguous processors

		// Get the sub-domains of the near processors
incardon's avatar
incardon committed
631 632 633 634 635 636 637 638 639 640 641 642
		v_cl.sendrecvMultipleMessages(nn_processors,boxes,CartDecomposition<dim,T,device_l,Memory,Domain,data_s>::message_alloc, this ,NEED_ALL_SIZE);

		// ++++++++++++++++++++++++++++++++++++++++++ Check received boxes

		{
		VTKWriter<openfpm::vector<::Box<dim,T>>,VECTOR_BOX> vtk_box1;
		for (size_t p = 0 ; p < nn_processors.size() ; p++)
		{
			size_t prc = nn_processors.get(p);

			if (v_cl.getProcessUnitID() == 0)
				std::cout << "Received from " << prc << "      n_boxes: " << nn_processor_subdomains[prc].bx.size() << "\n";
incardon's avatar
incardon committed
643

incardon's avatar
incardon committed
644 645 646 647 648 649 650 651
			vtk_box1.add(nn_processor_subdomains[prc].bx);
		}
		vtk_box1.write(std::string("rb_Processor_") + std::to_string(v_cl.getProcessUnitID()) + std::string(".vtk"));
		}

		// +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

		box_nn_processor_int.resize(sub_domains.size());
incardon's avatar
incardon committed
652 653 654 655

		// For each sub-domain
		for (size_t i = 0 ; i < sub_domains.size() ; i++)
		{
incardon's avatar
incardon committed
656
			SpaceBox<dim,T> sub_with_ghost = sub_domains.get(i);
incardon's avatar
incardon committed
657 658

			// enlarge the sub-domain with the ghost
incardon's avatar
incardon committed
659 660 661 662
			sub_with_ghost.enlarge(ghost);

			// resize based on the number of contiguous processors
			box_nn_processor_int.get(i).resize(box_nn_processor.get(i).size());
incardon's avatar
incardon committed
663 664 665 666 667 668 669 670

			// For each processor contiguous to this sub-domain
			for (size_t j = 0 ; j < box_nn_processor.get(i).size() ; j++)
			{
				// Contiguous processor
				size_t p_id = box_nn_processor.get(i).get(j);

				// get the set of sub-domains of the contiguous processor p_id
incardon's avatar
incardon committed
671
				openfpm::vector< ::Box<dim,T> > & p_box = nn_processor_subdomains[p_id].bx;
incardon's avatar
incardon committed
672 673 674 675 676 677 678

				// near processor sub-domain intersections
				openfpm::vector< ::Box<dim,T> > & p_box_int = box_nn_processor_int.get(i).get(j).bx;

				// for each near processor sub-domain intersect with the enlarged local sub-domain and store it
				for (size_t b = 0 ; b < p_box.size() ; b++)
				{
incardon's avatar
incardon committed
679
					::Box<dim,T> bi;
incardon's avatar
incardon committed
680

incardon's avatar
incardon committed
681
					bool intersect = sub_with_ghost.Intersect(::Box<dim,T>(p_box.get(b)),bi);
incardon's avatar
incardon committed
682 683 684 685 686 687 688 689 690 691 692 693 694

					if (intersect == true)
						p_box_int.add(bi);
				}
			}

			// For each processor contiguous to this sub-domain
			for (size_t j = 0 ; j < box_nn_processor.get(i).size() ; j++)
			{
				// Contiguous processor
				size_t p_id = box_nn_processor.get(i).get(j);

				// get the set of sub-domains of the contiguous processor p_id
incardon's avatar
incardon committed
695
				openfpm::vector< ::Box<dim,T> > & nn_p_box = nn_processor_subdomains[p_id].bx;
incardon's avatar
incardon committed
696 697

				// near processor sub-domain intersections
incardon's avatar
incardon committed
698
				openfpm::vector< ::Box<dim,T> > & p_box_int = box_nn_processor_int.get(i).get(j).nbx;
incardon's avatar
incardon committed
699 700 701 702

				// For each near processor sub-domains enlarge and intersect with the local sub-domain and store the result
				for (size_t k = 0 ; k < nn_p_box.size() ; k++)
				{
incardon's avatar
incardon committed
703
					// enlarge the near-processor sub-domain
incardon's avatar
incardon committed
704 705
					::Box<dim,T> n_sub = nn_p_box.get(k);

incardon's avatar
incardon committed
706 707 708
					// local sub-domain
					::SpaceBox<dim,T> l_sub = sub_domains.get(i);

incardon's avatar
incardon committed
709
					// Create a margin of ghost size around the near processor sub-domain
incardon's avatar
incardon committed
710
					n_sub.enlarge(ghost);
incardon's avatar
incardon committed
711 712 713

					// Intersect with the local sub-domain

incardon's avatar
incardon committed
714 715
					::Box<dim,T> b_int;
					bool intersect = n_sub.Intersect(l_sub,b_int);
incardon's avatar
incardon committed
716 717 718 719 720

					// store if it intersect
					if (intersect == true)
					{
						p_box_int.add(b_int);
incardon's avatar
incardon committed
721
						vb_int.add(b_int);
incardon's avatar
incardon committed
722 723 724 725

						// update the geo_cell list

						// get the boxes this box span
incardon's avatar
incardon committed
726 727
						const grid_key_dx<dim> p1 = geo_cell.getCellGrid(b_int.getP1());
						const grid_key_dx<dim> p2 = geo_cell.getCellGrid(b_int.getP2());
incardon's avatar
incardon committed
728 729 730 731 732 733 734 735

						// Get the grid and the sub-iterator
						auto & gi = geo_cell.getGrid();
						grid_key_dx_iterator_sub<dim> g_sub(gi,p1,p2);

						// add the box-id to the cell list
						while (g_sub.isNext())
						{
incardon's avatar
incardon committed
736 737
							auto key = g_sub.get();
							geo_cell.addCell(gi.LinId(key),vb_int.size()-1);
incardon's avatar
incardon committed
738 739 740 741 742
							++g_sub;
						}
					}
				}
			}
incardon's avatar
incardon committed
743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760


			// ++++++++++++++++++++++++++++++++++++++++ Debug +++++++++++++++++++++++++++++

			{
			VTKWriter<openfpm::vector<::Box<dim,T>>,VECTOR_BOX> vtk_box1;
			for (size_t p = 0 ; p < box_nn_processor_int.size() ; p++)
			{
				for (size_t s = 0 ; s < box_nn_processor_int.get(p).size() ; s++)
				{
					vtk_box1.add(box_nn_processor_int.get(p).get(s).nbx);
				}
			}
			vtk_box1.write(std::string("inte_Processor_") + std::to_string(v_cl.getProcessUnitID()) + std::string(".vtk"));
			}

			// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

incardon's avatar
incardon committed
761 762 763
		}
	}

incardon's avatar
incardon committed
764 765 766 767 768 769
	/*! \brief processorID return in which processor the particle should go
	 *
	 * \return processorID
	 *
	 */

incardon's avatar
incardon committed
770
	template<typename Mem> size_t inline processorID(encapc<1, Point<dim,T>, Mem> p)
incardon's avatar
incardon committed
771
	{
incardon's avatar
incardon committed
772
		return fine_s.get(cd.getCell(p));
incardon's avatar
incardon committed
773 774
	}

incardon's avatar
incardon committed
775 776 777 778 779 780 781 782 783 784 785 786 787
	// Smallest subdivision on each direction
	::Box<dim,T> ss_box;

	/*! \brief Get the smallest subdivision of the domain on each direction
	 *
	 * \return a box p1 is set to zero
	 *
	 */
	const ::Box<dim,T> & getSmallestSubdivision()
	{
		return ss_box;
	}

incardon's avatar
incardon committed
788 789 790 791 792 793 794 795
	/*! \brief processorID return in which processor the particle should go
	 *
	 * \return processorID
	 *
	 */

	size_t inline processorID(T (&p)[dim])
	{
incardon's avatar
incardon committed
796
		return fine_s.get(cd.getCell(p));
incardon's avatar
incardon committed
797 798
	}

incardon's avatar
incardon committed
799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822
	/*! \brief Set the parameter of the decomposition
	 *
     * \param div_ std::vector storing into how many domain to decompose on each dimension
     * \param domain_ domain to decompose
	 *
	 */
	void setParameters(std::vector<size_t> div_, Domain<dim,T> domain_)
	{
		// Set the decomposition parameters

		div = div_;
		domain = domain_;

		//! Create the decomposition

		CreateDecomposition(v_cl);
	}

	/*! \brief Set the parameter of the decomposition
	 *
     * \param div_ std::vector storing into how many domain to decompose on each dimension
     * \param domain_ domain to decompose
	 *
	 */
incardon's avatar
incardon committed
823
	void setParameters(const size_t (& div_)[dim], Domain<dim,T> domain_)
incardon's avatar
incardon committed
824 825 826
	{
		// Set the decomposition parameters

incardon's avatar
incardon committed
827
		gr.setDimensions(div_);
incardon's avatar
incardon committed
828
		domain = domain_;
incardon's avatar
incardon committed
829
		cd.setDimensions(domain,div_,0);
incardon's avatar
incardon committed
830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934

		//! Create the decomposition

		CreateDecomposition(v_cl);
	}

	/*! \brief Get the number of local local hyper-cubes or sub-domains
	 *
	 * \return the number of sub-domains
	 *
	 */
	size_t getNLocalHyperCube()
	{
		return sub_domains.size();
	}

	/*! The the bulk part of the data set, or the data that
	 * does not depend from the ghosts layers
	 *
	 * \return the bulk of your data
	 *
	 */
	T getBulk()
	{

	}

	/*! \brief This function divide the data set into bulk, border, external and internal part
	 *
	 * \tparam dim dimensionality of the structure storing your data
	 *         (example if they are in 3D grid, has to be 3)
	 * \tparam T type of object we are dividing
	 * \tparam device type of layout selected
	 * \param data 1-dimensional grid of point
	 * \param nb define the neighborhood of all the points
	 * \return a structure with the set of objects divided
	 *
	 */

//	dataDiv<T> CartDecomposition<dim,T,layout>::divide(layout::grid<1,Point<dim,T>> & data, neighborhood & nb);

	/*! The the internal part of the data set, or the data that
	 * are inside the local space
	 *
	 * \return the internal part of your data
	 *
	 */
	T getInternal()
	{

	}

	/*! Get the internal part of the dataset, or the data that
	 * depend from the ghost layers
	 *
	 * \return the ghost part of your data
	 *
	 */

	T getBorder()
	{

	}

	/*! Get the external part of the dataset, or the data that
	 * are outside localSpace including ghost
	 *
	 * \return the external part of your data
	 *
	 */
	T getExternal()
	{

	}

	/*! \brief Get the number of one set of hyper-cube enclosing one particular
	 *         subspace, the hyper-cube enclose your space, even if one box is enough
	 *         can be more that one to increase occupancy
	 *
     * In case of Cartesian decomposition it just return 1, each subspace
	 * has one hyper-cube, and occupancy 1
	 *
	 * \param id of the subspace
	 * \return the number of hyper-cube enclosing your space
	 *
	 */
	size_t getNHyperCube(size_t id)
	{
		return 1;
	}

	/*! \brief Get the hyper-cube margins id_c has to be 0
	 *
	 * Get the hyper-cube margins id_c has to be 0, each subspace
	 * has one hyper-cube
	 *
	 * \param id of the subspace
	 * \param id_c
	 * \return The specified hyper-cube space
	 *
	 */
	SpaceBox<dim,T> & getHyperCubeMargins(size_t id, size_t id_c)
	{
#ifdef DEBUG
		// Check if this subspace exist
incardon's avatar
incardon committed
935
		if (id >= gr.size())
incardon's avatar
incardon committed
936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958
		{
			std::cerr << "Error CartDecomposition: id > N_tot";
		}
		else if (id_c > 0)
		{
			// Each subspace is an hyper-cube so return error if id_c > 0
			std::cerr << "Error CartDecomposition: id_c > 0";
		}
#endif

		return sub_domains.get<Object>(id);
	}

	/*! \brief Get the total number of Hyper-cube
	 *
	 * Get the total number of Hyper-cube
	 *
	 * \return The total number of hyper-cube
	 *
	 */

	size_t getNHyperCube()
	{
incardon's avatar
incardon committed
959
		return gr.size();
incardon's avatar
incardon committed
960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034
	}

	/*! \brief produce an hyper-cube approximation of the space decomposition
	 *
	 */

	void hyperCube()
	{
	}

	/*! \brief Select the local space
	 *
	 * Select the local space
	 *
	 * \param sub select the sub-space
	 *
	 */
	void setSpace(size_t sub)
	{
		id_sub.push_back(sub);
	}


	/*! \brief Get the local grids
	 *
	 * Get the local grids
	 *
	 * \return the local grids
	 *
	 */

	auto getLocalHyperCubes() -> decltype(sub_domains) &
	{
		return sub_domains;
	}

	/*! \brief Get the local hyper-cubes
	 *
	 * Get the local hyper-cubes
	 *
	 * \param lc is the id of the space
	 * \return the local hyper-cube
	 *
	 */

	SpaceBox<dim,T> getLocalHyperCube(size_t lc)
	{
		// Create a space box
		SpaceBox<dim,T> sp;

		// fill the space box

		for (size_t k = 0 ; k < dim ; k++)
		{
			// create the SpaceBox Low and High
			sp.setLow(k,sub_domains.template get<Box::p1>(lc)[k]);
			sp.setHigh(k,sub_domains.template get<Box::p2>(lc)[k]);
		}

		return sp;
	}

	/*! \brief Return the structure that store the physical domain
	 *
	 * Return the structure that store the physical domain
	 *
	 * \return The physical domain
	 *
	 */

	Domain<dim,T> & getDomain()
	{
		return domain;
	}

incardon's avatar
incardon committed
1035 1036 1037
	/*! \brief Check if the particle is local
	 *
	 * \param p object position
incardon's avatar
incardon committed
1038
	 *
incardon's avatar
incardon committed
1039
	 * \return true if it is local
incardon's avatar
incardon committed
1040 1041
	 *
	 */
incardon's avatar
incardon committed
1042
	template<typename Mem> bool isLocal(encapc<1, Point<dim,T>, Mem> p)
incardon's avatar
incardon committed
1043
	{
incardon's avatar
incardon committed
1044 1045
		return processorID<Mem>() == v_cl.getProcessUnitID();
	}
incardon's avatar
incardon committed
1046

incardon's avatar
incardon committed
1047
	/*! \brief Check if the particle is local
incardon's avatar
incardon committed
1048
	 *
incardon's avatar
incardon committed
1049
	 * \param p object position
incardon's avatar
incardon committed
1050
	 *
incardon's avatar
incardon committed
1051
	 * \return true if it is local
incardon's avatar
incardon committed
1052 1053
	 *
	 */
incardon's avatar
incardon committed
1054
	bool isLocal(T (&pos)[dim])
incardon's avatar
incardon committed
1055
	{
incardon's avatar
incardon committed
1056 1057
		return processorID(pos) == v_cl.getProcessUnitID();
	}
incardon's avatar
incardon committed
1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069

	::Box<dim,T> bbox;

	/*! \brief Return the bounding box containing the processor box + smallest subdomain spacing
	 *
	 * \return The bounding box
	 *
	 */
	::Box<dim,T> & getProcessorBounds()
	{
		return bbox;
	}
incardon's avatar
incardon committed
1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131

	/*! \brief if the point fall into the ghost of some near processor it return the processors id's in which
	 *  it fall
	 *
	 * \param p Point
	 * \return iterator of the processors id's
	 *
	 */
	inline auto labelPoint(Point<dim,T> & p) -> decltype(geo_cell.getIterator(geo_cell.getCell(p)))
	{
		return geo_cell.getIterator(geo_cell.getCell(p));
	}

	/*! \brief if the point fall into the ghost of some near processor it return the processor number in which
	 *  it fall
	 *
	 * \param p Point
	 * \return number of processors
	 *
	 */
	inline size_t labelPointNp(Point<dim,T> & p)
	{
		return geo_cell.getNelements(geo_cell.getCell(p));
	}

	/*! \brief It return the label point cell
	 *
	 * The labeling of a point p is regulated by a Cell list, give a point it give a cell-id
	 *
	 * \param p Point
	 * \return cell-id
	 *
	 */
	inline size_t labelPointCell(Point<dim,T> & p)
	{
		return geo_cell.getCell(p);
	}

	/*! \brief Fill the ghost buffer
	 *
	 * \tparam one or more properties to get
	 *
	 */
/*	template<unsigned int ...i> void ghost_get()
	{
		// first check if a local particle must be sent to another processor
		for (size_t i = 0 ; i < ; i++)
		{

		}
	}*/

	/*! \brief Fill the ghost buffer
	 *
	 * \tparam one or more properties to get
	 *
	 */
/*	template<unsigned int ...i> void ghost_put()
	{

	}*/

incardon's avatar
incardon committed
1132 1133 1134 1135
};


#endif