CartDecomposition.hpp 32.2 KB
Newer Older
incardon's avatar
incardon committed
1
2
3
4
5
6
7
8
9
10
11
12
/*
 * CartDecomposition.hpp
 *
 *  Created on: Aug 15, 2014
 *      Author: Pietro Incardona
 */

#ifndef CARTDECOMPOSITION_HPP
#define CARTDECOMPOSITION_HPP

#include "config.h"
#include "Decomposition.hpp"
incardon's avatar
incardon committed
13
#include "Vector/map_vector.hpp"
incardon's avatar
incardon committed
14
15
16
17
18
19
20
#include <vector>
#include "global_const.hpp"
#include <initializer_list>
#include "SubdomainGraphNodes.hpp"
#include "metis_util.hpp"
#include "dec_optimizer.hpp"
#include "Space/Shape/Box.hpp"
incardon's avatar
incardon committed
21
#include "Space/Shape/Point.hpp"
incardon's avatar
incardon committed
22
#include "NN/CellList/CellDecomposer.hpp"
incardon's avatar
incardon committed
23
24
#include <unordered_map>
#include "NN/CellList/CellList.hpp"
incardon's avatar
incardon committed
25
#include "Space/Ghost.hpp"
incardon's avatar
incardon committed
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43

/**
 * \brief This class decompose a space into subspaces
 *
 * This class decompose a space into regular hyper-cube subspaces, and give the possibilities to
 * select one subspace
 *
 * \tparam dim is the dimensionality of the physical domain we are going to decompose.
 * \tparam T type of the space we decompose, Real, Integer, Complex ...
 * \tparam layout to use
 * \tparam Memory Memory factory used to allocate memory
 * \tparam Domain Structure that contain the information of your physical domain
 * \tparam data type of structure that store the sub-domain decomposition can be an openfpm structure like
 *        vector, ...
 *
 * \note if PARALLEL_DECOMPOSITION macro is defined a parallel decomposition algorithm is used, basically
 *       each processor does not recompute the same decomposition
 *
incardon's avatar
incardon committed
44
45
46
47
48
 *  \note sub-sub-domain portion of space at finer level than the sub-domain (before optimization)
 *        (or before sub-sub-domain merging)
 *  \note sub-domain portion of space (after optimization)
 *  \note near processor sub-domain a sub-domain that live in the a near (or contiguous) processor
 *
incardon's avatar
incardon committed
49
50
 */

incardon's avatar
incardon committed
51
template<unsigned int dim, typename T, template<typename> class device_l=openfpm::device_cpu, typename Memory=HeapMemory, template<unsigned int, typename> class Domain=Box, template<typename, typename, typename, typename, unsigned int> class data_s = openfpm::vector>
incardon's avatar
incardon committed
52
53
class CartDecomposition
{
incardon's avatar
incardon committed
54
55
56
57
58
59
60
61
62
	struct N_box
	{
		// id of the processor in the nn_processor list
		size_t id;

		// Near processor sub-domains
		typename openfpm::vector<::Box<dim,T>> bx;
	};

incardon's avatar
incardon committed
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
	struct Box_proc
	{
		// Intersection between the local sub-domain enlarged by the ghost and the contiguous processor
		// sub-domains
		openfpm::vector<::Box<dim,T>> bx;

		// Intersection between the contiguous processor sub-domain enlarged by the ghost with the
		// local sub-domain
		openfpm::vector<::Box<dim,T>> nbx;


		// processor
		size_t proc;
	};

incardon's avatar
incardon committed
78
public:
incardon's avatar
incardon committed
79

incardon's avatar
incardon committed
80
81
82
83
84
85
86
87
	//! Type of the domain we are going to decompose
	typedef T domain_type;

	//! It simplify to access the SpaceBox element
	typedef SpaceBox<dim,T> Box;

private:

incardon's avatar
incardon committed
88
	//! This is the key type to access  data_s, for example in the case of vector
incardon's avatar
incardon committed
89
	//! acc_key is size_t
incardon's avatar
incardon committed
90
	typedef typename data_s<SpaceBox<dim,T>,device_l<SpaceBox<dim,T>>,Memory,openfpm::vector_grow_policy_default,openfpm::vect_isel<SpaceBox<dim,T>>::value >::access_key acc_key;
incardon's avatar
incardon committed
91
92
93
94
95
96
97
98
99

	//! Subspace selected
	//! access_key in case of grid is just the set of the index to access the grid
	std::vector<acc_key> id_sub;

	//! the margin of the sub-domain selected
	SpaceBox<dim,T> sub_domain;

	//! the set of all local sub-domain as vector
incardon's avatar
incardon committed
100
	openfpm::vector<SpaceBox<dim,T>> sub_domains;
incardon's avatar
incardon committed
101

incardon's avatar
incardon committed
102
103
104
105
106
	//! List of near processors
	openfpm::vector<size_t> nn_processors;

	//! for each sub-domain, contain the list of the neighborhood processors
	//! and for each processor contain the boxes calculated from the intersection
incardon's avatar
incardon committed
107
	//! of the sub-domain + ghost with the near-by processor sub-domain ()
incardon's avatar
incardon committed
108
109
	openfpm::vector< openfpm::vector< Box_proc > > box_nn_processor_int;

incardon's avatar
incardon committed
110
	//! for each sub-domain, contain the list of the neighborhood processors
incardon's avatar
incardon committed
111
112
113
	openfpm::vector<openfpm::vector<long unsigned int> > box_nn_processor;

	// for each near-processor store the sub-domain of the near processor
incardon's avatar
incardon committed
114
	std::unordered_map<size_t, N_box> nn_processor_subdomains;
incardon's avatar
incardon committed
115

incardon's avatar
Add ORB    
incardon committed
116
117
	//! Structure that contain for each sub-domain box the processor id
	//! exist for efficient global communication
incardon's avatar
incardon committed
118
119
	openfpm::vector<size_t> fine_s;

incardon's avatar
incardon committed
120
121
	//! Structure that store the cartesian grid information
	grid_sm<dim,void> gr;
incardon's avatar
incardon committed
122

incardon's avatar
incardon committed
123
124
125
	//! Structure that decompose your structure into cell without creating them
	//! useful to convert positions to CellId or sub-domain id in this case
	CellDecomposer_sm<dim,T> cd;
incardon's avatar
incardon committed
126
127
128
129

	//! rectangular domain to decompose
	Domain<dim,T> domain;

incardon's avatar
incardon committed
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
	//! Ghost boxes of the processor
	//! for each Sub-domain it store the ghost boxes, or
	//! the set of boxes that enclose the the ghost space
	//! Box cannot overlap, they contain one id that is the
	//! processor the information should come from
	openfpm::vector< openfpm::vector<Domain<dim,T>> > gh_dom;

	//! Internal boxes of the processor
	//! for each Sub-domain it store the boxes enclosing the
	//! space that must be communicated when another processor
	//! require the ghost
	//! Box can overlap, they contain one id that is the
	//! processor the information should be communicated to
	openfpm::vector< openfpm::vector< Domain<dim,T>> > int_box;

incardon's avatar
incardon committed
145
146
147
148
149
150
	//! Box Spacing
	T spacing[dim];

	//! Runtime virtual cluster machine
	Vcluster & v_cl;

incardon's avatar
incardon committed
151
152
153
154
155
	//! Structure that store the geometrical information about intersection between the local sub-domain
	//! and the near processor sub-domains
	CellList<dim,T,FAST> geo_cell;


incardon's avatar
incardon committed
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
	/*! \brief Create internally the decomposition
	 *
     * \param v_cl Virtual cluster, used internally to handle or pipeline communication
	 *
	 */
	void CreateDecomposition(Vcluster & v_cl)
	{
		// Calculate the total number of box and and the spacing
		// on each direction
		// Get the box containing the domain
		SpaceBox<dim,T> bs = domain.getBox();

		for (unsigned int i = 0; i < dim ; i++)
		{
			// Calculate the spacing
incardon's avatar
incardon committed
171
			spacing[i] = (bs.getHigh(i) - bs.getLow(i)) / gr.size(i);
incardon's avatar
incardon committed
172
173
174
175
		}

		// Here we use METIS
		// Create a cartesian grid graph
incardon's avatar
incardon committed
176
		CartesianGraphFactory<dim,Graph_CSR<nm_part_v,nm_part_e>> g_factory_part;
incardon's avatar
incardon committed
177
178

		// Processor graph
incardon's avatar
incardon committed
179
		Graph_CSR<nm_part_v,nm_part_e> gp = g_factory_part.template construct<NO_EDGE,T,dim-1>(gr.getSize(),domain);
incardon's avatar
incardon committed
180
181
182
183
184
185
186
187

		// Get the number of processing units
		size_t Np = v_cl.getProcessingUnits();

		// Get the processor id
		long int p_id = v_cl.getProcessUnitID();

		// Convert the graph to metis
incardon's avatar
incardon committed
188
		Metis<Graph_CSR<nm_part_v,nm_part_e>> met(gp,Np);
incardon's avatar
incardon committed
189
190

		// decompose
incardon's avatar
incardon committed
191
		met.decompose<nm_part_v::id>();
incardon's avatar
incardon committed
192

incardon's avatar
Add ORB    
incardon committed
193
		// fill the structure that store the processor id for each sub-domain
incardon's avatar
incardon committed
194
		fine_s.resize(gr.size());
incardon's avatar
Add ORB    
incardon committed
195

incardon's avatar
incardon committed
196
197
		// Optimize the decomposition creating bigger spaces
		// And reducing Ghost over-stress
incardon's avatar
incardon committed
198
		dec_optimizer<dim,Graph_CSR<nm_part_v,nm_part_e>> d_o(gp,gr.getSize());
incardon's avatar
incardon committed
199
200
201
202

		// set of Boxes produced by the decomposition optimizer
		openfpm::vector<::Box<dim,size_t>> loc_box;

incardon's avatar
incardon committed
203
		// optimize the decomposition
incardon's avatar
incardon committed
204
205
		d_o.template optimize<nm_part_v::sub_id,nm_part_v::id>(gp,p_id,loc_box,box_nn_processor);

incardon's avatar
incardon committed
206
207
		// produce the list of the contiguous processor (nn_processors) and link nn_processor_subdomains to the
		// processor list
incardon's avatar
incardon committed
208
209
210
211
212
213
214
215
216
217
218
219
		for (size_t i = 0 ;  i < box_nn_processor.size() ; i++)
		{
			for (size_t j = 0 ; j < box_nn_processor.get(i).size() ; j++)
			{
				nn_processors.add(box_nn_processor.get(i).get(j));
			}
		}

		// make the list sorted and unique
	    std::sort(nn_processors.begin(), nn_processors.end());
	    auto last = std::unique(nn_processors.begin(), nn_processors.end());
	    nn_processors.erase(last, nn_processors.end());
incardon's avatar
incardon committed
220

incardon's avatar
incardon committed
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
		// produce the list of the contiguous processor (nn_processors) and link nn_processor_subdomains to the
		// processor list
		for (size_t i = 0 ;  i < box_nn_processor.size() ; i++)
		{
			for (size_t j = 0 ; j < box_nn_processor.get(i).size() ; j++)
			{
				// processor id near to this sub-domain
				size_t proc_id = box_nn_processor.get(i).get(j);

				size_t k = 0;
				// search inside near processor list
				for (k = 0 ; k < nn_processors.size() ; k++)
					if (nn_processors.get(k) == proc_id)	break;

				nn_processor_subdomains[proc_id].id = k;
			}
		}

		// Initialize ss_box and bbox
		if (loc_box.size() >= 0)
		{
			SpaceBox<dim,T> sub_d(loc_box.get(0));
			sub_d.mul(spacing);
			sub_d.expand(spacing);

			// add the sub-domain
			sub_domains.add(sub_d);

			ss_box = sub_d;
			bbox = sub_d;
		}

incardon's avatar
incardon committed
253
		// convert into sub-domain
incardon's avatar
incardon committed
254
		for (size_t s = 1 ; s < loc_box.size() ; s++)
incardon's avatar
incardon committed
255
256
257
		{
			SpaceBox<dim,T> sub_d(loc_box.get(s));

incardon's avatar
incardon committed
258
259
260
			// re-scale and add spacing (the end is the starting point of the next domain + spacing)
			sub_d.mul(spacing);
			sub_d.expand(spacing);
incardon's avatar
incardon committed
261
262
263

			// add the sub-domain
			sub_domains.add(sub_d);
incardon's avatar
incardon committed
264
265
266
267
268
269

			// Calculate the bound box
			bbox.enclose(sub_d);

			// Create the smallest box contained in all sub-domain
			ss_box.contained(sub_d);
incardon's avatar
incardon committed
270
		}
incardon's avatar
incardon committed
271

incardon's avatar
incardon committed
272
273
274
275
276
277
278
279
		//++++++++++++++++++++++++++++++++++++++++ Debug output NN boxes
		{
		VTKWriter<openfpm::vector<::SpaceBox<dim,T>>,VECTOR_BOX> vtk_box1;
		vtk_box1.add(sub_domains);
		vtk_box1.write(std::string("loc_") + std::to_string(v_cl.getProcessUnitID()) + std::string(".vtk"));
		}
		//+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

incardon's avatar
incardon committed
280
		// fill fine_s structure
incardon's avatar
incardon committed
281
282
283
		// fine_s structure contain the processor id for each sub-sub-domain
		// with sub-sub-domain we mean the sub-domain decomposition before
		// running dec_optimizer (before merging sub-domains)
incardon's avatar
incardon committed
284
285
286
287
288
289
290
291
292
293
294
		auto it = gp.getVertexIterator();

		while (it.isNext())
		{
			size_t key = it.get();

			// fill with the fine decomposition
			fine_s.get(key) = gp.template vertex_p<nm_part_v::id>(key);

			++it;
		}
incardon's avatar
incardon committed
295
296
297
298
299
300
301
302
303
304
305
	}

	/*! \brief Create the subspaces that decompose your domain
	 *
	 * Create the subspaces that decompose your domain
	 *
	 */

	void CreateSubspaces()
	{
		// Create a grid where each point is a space
306
		grid_sm<dim,void> g(div);
incardon's avatar
incardon committed
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334

		// create a grid_key_dx iterator
		grid_key_dx_iterator<dim> gk_it(g);

		// Divide the space into subspaces
		while (gk_it.isNext())
		{
			//! iterate through all subspaces
			grid_key_dx<dim> key = gk_it.get();

			//! Create a new subspace
			SpaceBox<dim,T> tmp;

			//! fill with the Margin of the box
			for (int i = 0 ; i < dim ; i++)
			{
				tmp.setHigh(i,(key.get(i)+1)*spacing[i]);
				tmp.setLow(i,key.get(i)*spacing[i]);
			}

			//! add the space box
			sub_domains.add(tmp);

			// add the iterator
			++gk_it;
		}
	}

incardon's avatar
incardon committed
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
	// Heap memory receiver
	HeapMemory hp_recv;

	// vector v_proc
	openfpm::vector<size_t> v_proc;

	// Receive counter
	size_t recv_cnt;

	/*! \brief Message allocation
	 *
	 * \param message size required to receive from i
	 * \param total message size to receive from all the processors
	 * \param the total number of processor want to communicate with you
	 * \param i processor id
incardon's avatar
incardon committed
350
351
	 * \param ri request id (it is an id that goes from 0 to total_p, and is unique
	 *           every time message_alloc is called)
incardon's avatar
incardon committed
352
353
354
355
356
	 * \param ptr a pointer to the vector_dist structure
	 *
	 * \return the pointer where to store the message
	 *
	 */
incardon's avatar
incardon committed
357
	static void * message_alloc(size_t msg_i ,size_t total_msg, size_t total_p, size_t i, size_t ri, void * ptr)
incardon's avatar
incardon committed
358
359
360
361
	{
		// cast the pointer
		CartDecomposition<dim,T,device_l,Memory,Domain,data_s> * cd = static_cast< CartDecomposition<dim,T,device_l,Memory,Domain,data_s> *>(ptr);

incardon's avatar
incardon committed
362
363
364
365
366
		if (cd->v_cl.getProcessUnitID() == 0)
		{
			std::cout << "Receiving from " << i << "       msg size: " << msg_i << "\n";
		}

incardon's avatar
incardon committed
367
		// Resize the memory
incardon's avatar
incardon committed
368
		cd->nn_processor_subdomains[i].bx.resize(msg_i);
incardon's avatar
incardon committed
369
370

		// Return the receive pointer
incardon's avatar
incardon committed
371
		return cd->nn_processor_subdomains[i].bx.getPointer();
incardon's avatar
incardon committed
372
373
	}

incardon's avatar
incardon committed
374
375
376
377
378
379
380
381
public:

	/*! \brief Cartesian decomposition copy constructor
	 *
     * \param v_cl Virtual cluster, used internally to handle or pipeline communication
	 *
	 */
	CartDecomposition(CartDecomposition<dim,T,device_l,Memory,Domain,data_s> && cd)
incardon's avatar
incardon committed
382
	:sub_domain(cd.sub_domain),gr(cd.gr),cd(cd.cd),domain(cd.domain),v_cl(cd.v_cl)
incardon's avatar
incardon committed
383
	{
incardon's avatar
incardon committed
384
385
386
		// Reset the box to zero
		bbox.zero();

incardon's avatar
incardon committed
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
		//! Subspace selected
		//! access_key in case of grid is just the set of the index to access the grid
		id_sub.swap(cd.id_sub);

		//! the set of all local sub-domain as vector
		sub_domains.swap(cd.sub_domains);

		for (int i = 0 ; i < dim ; i++)
		{
			//! Box Spacing
			this->spacing[i] = spacing[i];
		}
	}

	/*! \brief Cartesian decomposition constructor
	 *
     * \param v_cl Virtual cluster, used internally to handle or pipeline communication
	 *
	 */
	CartDecomposition(Vcluster & v_cl)
incardon's avatar
incardon committed
407
	:id_sub(0),v_cl(v_cl)
incardon's avatar
incardon committed
408
409
410
411
	{
		// Reset the box to zero
		bbox.zero();
	}
incardon's avatar
incardon committed
412
413
414
415
416
417
418
419
420

	/*! \brief Cartesian decomposition constructor, it divide the space in boxes
	 *
	 * \param dec is a vector that store how to divide on each dimension
	 * \param domain is the domain to divide
	 * \param v_cl are information of the cluster runtime machine
	 *
	 */
	CartDecomposition(std::vector<size_t> dec, Domain<dim,T> domain, Vcluster & v_cl)
incardon's avatar
incardon committed
421
	:id_sub(0),gr(dec),cd(domain,dec,0),domain(domain),v_cl(v_cl)
incardon's avatar
incardon committed
422
	{
incardon's avatar
incardon committed
423
424
		// Reset the box to zero
		bbox.zero();
incardon's avatar
incardon committed
425

incardon's avatar
incardon committed
426
		// Create the decomposition
incardon's avatar
incardon committed
427
428
429
430
431
432
433
		CreateDecomposition(v_cl);
	}

	//! Cartesian decomposition destructor
	~CartDecomposition()
	{}

incardon's avatar
incardon committed
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
	openfpm::vector<size_t> ids;

	/*! \brief Given a position it return if the position belong to any neighborhood processor ghost
	 *
	 * \param p Particle position
	 *
	 * \param return the processor ids
	 *
	 */
	const openfpm::vector<size_t> ghost_processorID(Point<dim,T> & p)
	{
		ids.clear();

		// Check with geo-cell if a particle is inside one Cell caotaining boxes

		auto cell_it = geo_cell.getCellIterator(p);

		// For each element in the cell, check if the point is inside the box
		// if it is store the processor id
		while (cell_it.isNext())
		{
			size_t bid = cell_it.get();

			if (vb_int.get(bid).box.isInside(p) == true)
			{
				ids.add(vb_int.get(bid).proc);
			}
		}

		return ids;
	}

	/*! \brief Given a position it return if the position belong to any neighborhood processor ghost
	 *
	 * \param p Particle position
	 *
	 * \param return the processor ids
	 *
	 */
	template<typename Mem> inline const openfpm::vector<size_t> ghost_processorID(const encapc<1,Point<dim,T>,Mem> & p)
	{
		ids.clear();

		// Check with geo-cell if a particle is inside one Cell containing boxes

		auto cell_it = geo_cell.getCellIterator(p);

		// For each element in the cell, check if the point is inside the box
		// if it is, store the processor id
		while (cell_it.isNext())
		{
			size_t bid = cell_it.get();

			if (vb_int.get(bid).box.isInside(p) == true)
			{
				ids.add(vb_int.get(bid).proc);
			}
		}

		return ids;
	}

	// Internal boxes for this processor domain, indicated with B8_0 B9_0 ..... in the figure
	// below as a linear vector
	openfpm::vector<::Box<dim,T>> vb_int;

incardon's avatar
incardon committed
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
	/*! It calculate the ghost boxes and internal boxes
	 *
	 * Example: Processor 10 calculate
	 * B8_0 B9_0 B9_1 and B5_0
	 *
	 *
+----------------------------------------------------+
|                                                    |
|                 Processor 8                        |
|                 Sub-domain 0                       +-----------------------------------+
|                                                    |                                   |
|                                                    |                                   |
++--------------+---+---------------------------+----+        Processor 9                |
 |              |   |     B8_0                  |    |        Subdomain 0                |
 |              +------------------------------------+                                   |
 |              |   |                           |    |                                   |
 |              |   |  XXXXXXXXXXXXX XX         |B9_0|                                   |
 |              | B |  X Processor 10 X         |    |                                   |
 | Processor 5  | 5 |  X Sub-domain 0 X         |    |                                   |
 | Subdomain 0  | _ |  X              X         +----------------------------------------+
 |              | 0 |  XXXXXXXXXXXXXXXX         |    |                                   |
 |              |   |                           |    |                                   |
 |              |   |                           |    |        Processor 9                |
 |              |   |                           |B9_1|        Subdomain 1                |
 |              |   |                           |    |                                   |
 |              |   |                           |    |                                   |
 |              |   |                           |    |                                   |
 +--------------+---+---------------------------+----+                                   |
                                                     |                                   |
                                                     +-----------------------------------+

       and also
       G8_0 G9_0 G9_1 G5_0

+----------------------------------------------------+
|                                                    |
|                 Processor 8                        |
|                 Sub-domain 0                       +-----------------------------------+
|           +---------------------------------------------+                              |
|           |         G8_0                           |    |                              |
++--------------+------------------------------------+    |   Processor 9                |
 |          |   |                                    |    |   Subdomain 0                |
 |          |   |                                    |G9_0|                              |
 |          |   |                                    |    |                              |
 |          |   |      XXXXXXXXXXXXX XX              |    |                              |
 |          |   |      X Processor 10 X              |    |                              |
 | Processor|5  |      X Sub-domain 0 X              |    |                              |
 | Subdomain|0  |      X              X              +-----------------------------------+
 |          |   |      XXXXXXXXXXXXXXXX              |    |                              |
 |          | G |                                    |    |                              |
 |          | 5 |                                    |    |   Processor 9                |
 |          | | |                                    |    |   Subdomain 1                |
 |          | 0 |                                    |G9_1|                              |
 |          |   |                                    |    |                              |
 |          |   |                                    |    |                              |
 +--------------+------------------------------------+    |                              |
            |                                        |    |                              |
            +----------------------------------------+----+------------------------------+


	 *
	 *
	 *
	 * \param ghost margins for each dimensions (p1 negative part) (p2 positive part)
	 *
                ^ p2[1]
                |
                |
           +----+----+
           |         |
           |         |
p1[0]<-----+         +----> p2[0]
           |         |
           |         |
           +----+----+
                |
                v  p1[1]

	 *
	 *
	 */
	void calculateGhostBoxes(Ghost<dim,T> & ghost)
	{
#ifdef DEBUG
		// the ghost margins are assumed to be smaller
		// than one sub-domain

		for (size_t i = 0 ; i < dim ; i++)
		{
incardon's avatar
incardon committed
589
			if (ghost.template getLow(i) >= domain.template getHigh(i) / gr.size(i) || ghost.template getHigh(i)  >= domain.template getHigh(i) / gr.size(i))
incardon's avatar
incardon committed
590
591
592
593
594
595
			{
				std::cerr << "Error: Ghost are bigger that one domain" << "\n";
			}
		}
#endif

incardon's avatar
incardon committed
596
597
		// create a buffer with the sub-domains of this processor, the informations ( the boxes )
		// of the sub-domains contiguous to the processor A are sent to the processor A and
incardon's avatar
incardon committed
598
599
		// the information of the contiguous sub-domains in the near processors are received
		//
incardon's avatar
incardon committed
600
		openfpm::vector< openfpm::vector< ::SpaceBox<dim,T>> > boxes(nn_processors.size());
incardon's avatar
incardon committed
601
602
603
604
605
606
607

		for (size_t b = 0 ; b < box_nn_processor.size() ; b++)
		{
			for (size_t p = 0 ; p < box_nn_processor.get(b).size() ; p++)
			{
				size_t prc = box_nn_processor.get(b).get(p);

incardon's avatar
incardon committed
608
609
610
611
612
				// id of the processor in the processor list
				// [value between 0 and the number of the near processors]
				size_t id = nn_processor_subdomains[prc].id;

				boxes.get(id).add(sub_domains.get(b));
incardon's avatar
incardon committed
613
614
615
			}
		}

incardon's avatar
incardon committed
616
617
618
619
620
621
622
623
624
625
626
627
		//++++++++++++++++++++++++++++++++++++++++ Debug output NN boxes
		{
		for (size_t b = 0 ; b < boxes.size() ; b++)
		{
			VTKWriter<openfpm::vector<::SpaceBox<dim,T>>,VECTOR_BOX> vtk_box1;
			vtk_box1.add(boxes.get(b));
			vtk_box1.write(std::string("Processor_") + std::to_string(v_cl.getProcessUnitID()) + "_" + std::to_string(nn_processors.get(b)) + std::string(".vtk"));
		}
		}

		//+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

incardon's avatar
incardon committed
628
629
630
		// Intersect all the local sub-domains with the sub-domains of the contiguous processors

		// Get the sub-domains of the near processors
incardon's avatar
incardon committed
631
632
633
634
635
636
637
638
639
640
641
642
		v_cl.sendrecvMultipleMessages(nn_processors,boxes,CartDecomposition<dim,T,device_l,Memory,Domain,data_s>::message_alloc, this ,NEED_ALL_SIZE);

		// ++++++++++++++++++++++++++++++++++++++++++ Check received boxes

		{
		VTKWriter<openfpm::vector<::Box<dim,T>>,VECTOR_BOX> vtk_box1;
		for (size_t p = 0 ; p < nn_processors.size() ; p++)
		{
			size_t prc = nn_processors.get(p);

			if (v_cl.getProcessUnitID() == 0)
				std::cout << "Received from " << prc << "      n_boxes: " << nn_processor_subdomains[prc].bx.size() << "\n";
incardon's avatar
incardon committed
643

incardon's avatar
incardon committed
644
645
646
647
648
649
650
651
			vtk_box1.add(nn_processor_subdomains[prc].bx);
		}
		vtk_box1.write(std::string("rb_Processor_") + std::to_string(v_cl.getProcessUnitID()) + std::string(".vtk"));
		}

		// +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

		box_nn_processor_int.resize(sub_domains.size());
incardon's avatar
incardon committed
652
653
654
655

		// For each sub-domain
		for (size_t i = 0 ; i < sub_domains.size() ; i++)
		{
incardon's avatar
incardon committed
656
			SpaceBox<dim,T> sub_with_ghost = sub_domains.get(i);
incardon's avatar
incardon committed
657
658

			// enlarge the sub-domain with the ghost
incardon's avatar
incardon committed
659
660
661
662
			sub_with_ghost.enlarge(ghost);

			// resize based on the number of contiguous processors
			box_nn_processor_int.get(i).resize(box_nn_processor.get(i).size());
incardon's avatar
incardon committed
663
664
665
666
667
668
669
670

			// For each processor contiguous to this sub-domain
			for (size_t j = 0 ; j < box_nn_processor.get(i).size() ; j++)
			{
				// Contiguous processor
				size_t p_id = box_nn_processor.get(i).get(j);

				// get the set of sub-domains of the contiguous processor p_id
incardon's avatar
incardon committed
671
				openfpm::vector< ::Box<dim,T> > & p_box = nn_processor_subdomains[p_id].bx;
incardon's avatar
incardon committed
672
673
674
675
676
677
678

				// near processor sub-domain intersections
				openfpm::vector< ::Box<dim,T> > & p_box_int = box_nn_processor_int.get(i).get(j).bx;

				// for each near processor sub-domain intersect with the enlarged local sub-domain and store it
				for (size_t b = 0 ; b < p_box.size() ; b++)
				{
incardon's avatar
incardon committed
679
					::Box<dim,T> bi;
incardon's avatar
incardon committed
680

incardon's avatar
incardon committed
681
					bool intersect = sub_with_ghost.Intersect(::Box<dim,T>(p_box.get(b)),bi);
incardon's avatar
incardon committed
682
683
684
685
686
687
688
689
690
691
692
693
694

					if (intersect == true)
						p_box_int.add(bi);
				}
			}

			// For each processor contiguous to this sub-domain
			for (size_t j = 0 ; j < box_nn_processor.get(i).size() ; j++)
			{
				// Contiguous processor
				size_t p_id = box_nn_processor.get(i).get(j);

				// get the set of sub-domains of the contiguous processor p_id
incardon's avatar
incardon committed
695
				openfpm::vector< ::Box<dim,T> > & nn_p_box = nn_processor_subdomains[p_id].bx;
incardon's avatar
incardon committed
696
697

				// near processor sub-domain intersections
incardon's avatar
incardon committed
698
				openfpm::vector< ::Box<dim,T> > & p_box_int = box_nn_processor_int.get(i).get(j).nbx;
incardon's avatar
incardon committed
699
700
701
702

				// For each near processor sub-domains enlarge and intersect with the local sub-domain and store the result
				for (size_t k = 0 ; k < nn_p_box.size() ; k++)
				{
incardon's avatar
incardon committed
703
					// enlarge the near-processor sub-domain
incardon's avatar
incardon committed
704
705
					::Box<dim,T> n_sub = nn_p_box.get(k);

incardon's avatar
incardon committed
706
707
708
					// local sub-domain
					::SpaceBox<dim,T> l_sub = sub_domains.get(i);

incardon's avatar
incardon committed
709
					// Create a margin of ghost size around the near processor sub-domain
incardon's avatar
incardon committed
710
					n_sub.enlarge(ghost);
incardon's avatar
incardon committed
711
712
713

					// Intersect with the local sub-domain

incardon's avatar
incardon committed
714
715
					::Box<dim,T> b_int;
					bool intersect = n_sub.Intersect(l_sub,b_int);
incardon's avatar
incardon committed
716
717
718
719
720

					// store if it intersect
					if (intersect == true)
					{
						p_box_int.add(b_int);
incardon's avatar
incardon committed
721
						vb_int.add(b_int);
incardon's avatar
incardon committed
722
723
724
725

						// update the geo_cell list

						// get the boxes this box span
incardon's avatar
incardon committed
726
727
						const grid_key_dx<dim> p1 = geo_cell.getCellGrid(b_int.getP1());
						const grid_key_dx<dim> p2 = geo_cell.getCellGrid(b_int.getP2());
incardon's avatar
incardon committed
728
729
730
731
732
733
734
735

						// Get the grid and the sub-iterator
						auto & gi = geo_cell.getGrid();
						grid_key_dx_iterator_sub<dim> g_sub(gi,p1,p2);

						// add the box-id to the cell list
						while (g_sub.isNext())
						{
incardon's avatar
incardon committed
736
737
							auto key = g_sub.get();
							geo_cell.addCell(gi.LinId(key),vb_int.size()-1);
incardon's avatar
incardon committed
738
739
740
741
742
							++g_sub;
						}
					}
				}
			}
incardon's avatar
incardon committed
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760


			// ++++++++++++++++++++++++++++++++++++++++ Debug +++++++++++++++++++++++++++++

			{
			VTKWriter<openfpm::vector<::Box<dim,T>>,VECTOR_BOX> vtk_box1;
			for (size_t p = 0 ; p < box_nn_processor_int.size() ; p++)
			{
				for (size_t s = 0 ; s < box_nn_processor_int.get(p).size() ; s++)
				{
					vtk_box1.add(box_nn_processor_int.get(p).get(s).nbx);
				}
			}
			vtk_box1.write(std::string("inte_Processor_") + std::to_string(v_cl.getProcessUnitID()) + std::string(".vtk"));
			}

			// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

incardon's avatar
incardon committed
761
762
763
		}
	}

incardon's avatar
incardon committed
764
765
766
767
768
769
	/*! \brief processorID return in which processor the particle should go
	 *
	 * \return processorID
	 *
	 */

incardon's avatar
incardon committed
770
	template<typename Mem> size_t inline processorID(encapc<1, Point<dim,T>, Mem> p)
incardon's avatar
incardon committed
771
	{
incardon's avatar
incardon committed
772
		return fine_s.get(cd.getCell(p));
incardon's avatar
incardon committed
773
774
	}

incardon's avatar
incardon committed
775
776
777
778
779
780
781
782
783
784
785
786
787
	// Smallest subdivision on each direction
	::Box<dim,T> ss_box;

	/*! \brief Get the smallest subdivision of the domain on each direction
	 *
	 * \return a box p1 is set to zero
	 *
	 */
	const ::Box<dim,T> & getSmallestSubdivision()
	{
		return ss_box;
	}

incardon's avatar
incardon committed
788
789
790
791
792
793
794
795
	/*! \brief processorID return in which processor the particle should go
	 *
	 * \return processorID
	 *
	 */

	size_t inline processorID(T (&p)[dim])
	{
incardon's avatar
incardon committed
796
		return fine_s.get(cd.getCell(p));
incardon's avatar
incardon committed
797
798
	}

incardon's avatar
incardon committed
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
	/*! \brief Set the parameter of the decomposition
	 *
     * \param div_ std::vector storing into how many domain to decompose on each dimension
     * \param domain_ domain to decompose
	 *
	 */
	void setParameters(std::vector<size_t> div_, Domain<dim,T> domain_)
	{
		// Set the decomposition parameters

		div = div_;
		domain = domain_;

		//! Create the decomposition

		CreateDecomposition(v_cl);
	}

	/*! \brief Set the parameter of the decomposition
	 *
     * \param div_ std::vector storing into how many domain to decompose on each dimension
     * \param domain_ domain to decompose
	 *
	 */
incardon's avatar
incardon committed
823
	void setParameters(const size_t (& div_)[dim], Domain<dim,T> domain_)
incardon's avatar
incardon committed
824
825
826
	{
		// Set the decomposition parameters

incardon's avatar
incardon committed
827
		gr.setDimensions(div_);
incardon's avatar
incardon committed
828
		domain = domain_;
incardon's avatar
incardon committed
829
		cd.setDimensions(domain,div_,0);
incardon's avatar
incardon committed
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934

		//! Create the decomposition

		CreateDecomposition(v_cl);
	}

	/*! \brief Get the number of local local hyper-cubes or sub-domains
	 *
	 * \return the number of sub-domains
	 *
	 */
	size_t getNLocalHyperCube()
	{
		return sub_domains.size();
	}

	/*! The the bulk part of the data set, or the data that
	 * does not depend from the ghosts layers
	 *
	 * \return the bulk of your data
	 *
	 */
	T getBulk()
	{

	}

	/*! \brief This function divide the data set into bulk, border, external and internal part
	 *
	 * \tparam dim dimensionality of the structure storing your data
	 *         (example if they are in 3D grid, has to be 3)
	 * \tparam T type of object we are dividing
	 * \tparam device type of layout selected
	 * \param data 1-dimensional grid of point
	 * \param nb define the neighborhood of all the points
	 * \return a structure with the set of objects divided
	 *
	 */

//	dataDiv<T> CartDecomposition<dim,T,layout>::divide(layout::grid<1,Point<dim,T>> & data, neighborhood & nb);

	/*! The the internal part of the data set, or the data that
	 * are inside the local space
	 *
	 * \return the internal part of your data
	 *
	 */
	T getInternal()
	{

	}

	/*! Get the internal part of the dataset, or the data that
	 * depend from the ghost layers
	 *
	 * \return the ghost part of your data
	 *
	 */

	T getBorder()
	{

	}

	/*! Get the external part of the dataset, or the data that
	 * are outside localSpace including ghost
	 *
	 * \return the external part of your data
	 *
	 */
	T getExternal()
	{

	}

	/*! \brief Get the number of one set of hyper-cube enclosing one particular
	 *         subspace, the hyper-cube enclose your space, even if one box is enough
	 *         can be more that one to increase occupancy
	 *
     * In case of Cartesian decomposition it just return 1, each subspace
	 * has one hyper-cube, and occupancy 1
	 *
	 * \param id of the subspace
	 * \return the number of hyper-cube enclosing your space
	 *
	 */
	size_t getNHyperCube(size_t id)
	{
		return 1;
	}

	/*! \brief Get the hyper-cube margins id_c has to be 0
	 *
	 * Get the hyper-cube margins id_c has to be 0, each subspace
	 * has one hyper-cube
	 *
	 * \param id of the subspace
	 * \param id_c
	 * \return The specified hyper-cube space
	 *
	 */
	SpaceBox<dim,T> & getHyperCubeMargins(size_t id, size_t id_c)
	{
#ifdef DEBUG
		// Check if this subspace exist
incardon's avatar
incardon committed
935
		if (id >= gr.size())
incardon's avatar
incardon committed
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
		{
			std::cerr << "Error CartDecomposition: id > N_tot";
		}
		else if (id_c > 0)
		{
			// Each subspace is an hyper-cube so return error if id_c > 0
			std::cerr << "Error CartDecomposition: id_c > 0";
		}
#endif

		return sub_domains.get<Object>(id);
	}

	/*! \brief Get the total number of Hyper-cube
	 *
	 * Get the total number of Hyper-cube
	 *
	 * \return The total number of hyper-cube
	 *
	 */

	size_t getNHyperCube()
	{
incardon's avatar
incardon committed
959
		return gr.size();
incardon's avatar
incardon committed
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
	}

	/*! \brief produce an hyper-cube approximation of the space decomposition
	 *
	 */

	void hyperCube()
	{
	}

	/*! \brief Select the local space
	 *
	 * Select the local space
	 *
	 * \param sub select the sub-space
	 *
	 */
	void setSpace(size_t sub)
	{
		id_sub.push_back(sub);
	}


	/*! \brief Get the local grids
	 *
	 * Get the local grids
	 *
	 * \return the local grids
	 *
	 */

	auto getLocalHyperCubes() -> decltype(sub_domains) &
	{
		return sub_domains;
	}

	/*! \brief Get the local hyper-cubes
	 *
	 * Get the local hyper-cubes
	 *
	 * \param lc is the id of the space
	 * \return the local hyper-cube
	 *
	 */

	SpaceBox<dim,T> getLocalHyperCube(size_t lc)
	{
		// Create a space box
		SpaceBox<dim,T> sp;

		// fill the space box

		for (size_t k = 0 ; k < dim ; k++)
		{
			// create the SpaceBox Low and High
			sp.setLow(k,sub_domains.template get<Box::p1>(lc)[k]);
			sp.setHigh(k,sub_domains.template get<Box::p2>(lc)[k]);
		}

		return sp;
	}

	/*! \brief Return the structure that store the physical domain
	 *
	 * Return the structure that store the physical domain
	 *
	 * \return The physical domain
	 *
	 */

	Domain<dim,T> & getDomain()
	{
		return domain;
	}

incardon's avatar
incardon committed
1035
1036
1037
	/*! \brief Check if the particle is local
	 *
	 * \param p object position
incardon's avatar
incardon committed
1038
	 *
incardon's avatar
incardon committed
1039
	 * \return true if it is local
incardon's avatar
incardon committed
1040
1041
	 *
	 */
incardon's avatar
incardon committed
1042
	template<typename Mem> bool isLocal(encapc<1, Point<dim,T>, Mem> p)
incardon's avatar
incardon committed
1043
	{
incardon's avatar
incardon committed
1044
1045
		return processorID<Mem>() == v_cl.getProcessUnitID();
	}
incardon's avatar
incardon committed
1046

incardon's avatar
incardon committed
1047
	/*! \brief Check if the particle is local
incardon's avatar
incardon committed
1048
	 *
incardon's avatar
incardon committed
1049
	 * \param p object position
incardon's avatar
incardon committed
1050
	 *
incardon's avatar
incardon committed
1051
	 * \return true if it is local
incardon's avatar
incardon committed
1052
1053
	 *
	 */
incardon's avatar
incardon committed
1054
	bool isLocal(T (&pos)[dim])
incardon's avatar
incardon committed
1055
	{
incardon's avatar
incardon committed
1056
1057
		return processorID(pos) == v_cl.getProcessUnitID();
	}
incardon's avatar
incardon committed
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069

	::Box<dim,T> bbox;

	/*! \brief Return the bounding box containing the processor box + smallest subdomain spacing
	 *
	 * \return The bounding box
	 *
	 */
	::Box<dim,T> & getProcessorBounds()
	{
		return bbox;
	}
incardon's avatar
incardon committed
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131

	/*! \brief if the point fall into the ghost of some near processor it return the processors id's in which
	 *  it fall
	 *
	 * \param p Point
	 * \return iterator of the processors id's
	 *
	 */
	inline auto labelPoint(Point<dim,T> & p) -> decltype(geo_cell.getIterator(geo_cell.getCell(p)))
	{
		return geo_cell.getIterator(geo_cell.getCell(p));
	}

	/*! \brief if the point fall into the ghost of some near processor it return the processor number in which
	 *  it fall
	 *
	 * \param p Point
	 * \return number of processors
	 *
	 */
	inline size_t labelPointNp(Point<dim,T> & p)
	{
		return geo_cell.getNelements(geo_cell.getCell(p));
	}

	/*! \brief It return the label point cell
	 *
	 * The labeling of a point p is regulated by a Cell list, give a point it give a cell-id
	 *
	 * \param p Point
	 * \return cell-id
	 *
	 */
	inline size_t labelPointCell(Point<dim,T> & p)
	{
		return geo_cell.getCell(p);
	}

	/*! \brief Fill the ghost buffer
	 *
	 * \tparam one or more properties to get
	 *
	 */
/*	template<unsigned int ...i> void ghost_get()
	{
		// first check if a local particle must be sent to another processor
		for (size_t i = 0 ; i < ; i++)
		{

		}
	}*/

	/*! \brief Fill the ghost buffer
	 *
	 * \tparam one or more properties to get
	 *
	 */
/*	template<unsigned int ...i> void ghost_put()
	{

	}*/

incardon's avatar
incardon committed
1132
1133
1134
1135
};


#endif