vector_dist_comm.hpp 55.8 KB
Newer Older
Pietro Incardona's avatar
Pietro Incardona committed
1
2
3
4
5
6
7
8
9
10
/*
 * vector_dist_comm.hpp
 *
 *  Created on: Aug 18, 2016
 *      Author: i-bird
 */

#ifndef SRC_VECTOR_VECTOR_DIST_COMM_HPP_
#define SRC_VECTOR_VECTOR_DIST_COMM_HPP_

incardon's avatar
incardon committed
11
#define TEST1
incardon's avatar
incardon committed
12

incardon's avatar
incardon committed
13
14
15
#if defined(CUDA_GPU) && defined(__NVCC__)
#include "util/cuda/moderngpu/kernel_mergesort.hxx"
#include "Vector/cuda/vector_dist_cuda_funcs.cuh"
16
#include "util/cuda/moderngpu/kernel_scan.hxx"
incardon's avatar
incardon committed
17
18
19
#endif

#include "Vector/util/vector_dist_funcs.hpp"
incardon's avatar
incardon committed
20
#include "cuda/vector_dist_comm_util_funcs.cuh"
incardon's avatar
incardon committed
21

Pietro Incardona's avatar
Pietro Incardona committed
22
23
#define NO_POSITION 1
#define WITH_POSITION 2
incardon's avatar
incardon committed
24
#define NO_CHANGE_ELEMENTS 4
Pietro Incardona's avatar
Pietro Incardona committed
25

26
27
#define BIND_DEC_TO_GHOST 1

incardon's avatar
incardon committed
28
#define RUN_ON_DEVICE 1024
incardon's avatar
incardon committed
29
#define MAP_LOCAL 2
incardon's avatar
Latest    
incardon committed
30

incardon's avatar
incardon committed
31
32
33
34
35
36
37
/*! \brief compute the communication options from the ghost_get/put options
 *
 *
 */
inline static size_t compute_options(size_t opt)
{
	size_t opt_ = NONE;
38
39
40
41
42
43
44
45
46
47
48
49
	if (opt & NO_CHANGE_ELEMENTS && opt & SKIP_LABELLING)
	{opt_ = RECEIVE_KNOWN | KNOWN_ELEMENT_OR_BYTE;}

	if (opt & RUN_ON_DEVICE)
	{
#if defined(CUDA_GPU) && defined(__NVCC__)
		// Before doing the communication on RUN_ON_DEVICE we have to be sure that the previous kernels complete
		opt_ |= MPI_GPU_DIRECT;
#else
		std::cout << __FILE__ << ":" << __LINE__ << " error: to use the option RUN_ON_DEVICE you must compile with NVCC" << std::endl;
#endif
	}
incardon's avatar
incardon committed
50
51
52
53

	return opt_;
}

Pietro Incardona's avatar
Pietro Incardona committed
54
55
56
57
58
59
60
61
62
63
64
65
/*! \brief This class is an helper for the communication of vector_dist
 *
 * \tparam dim Dimensionality of the space where the elements lives
 * \tparam St type of space float, double ...
 * \tparam prop properties the vector element store in OpenFPM data structure format
 * \tparam Decomposition Decomposition strategy to use CartDecomposition ...
 * \tparam Memory Memory pool where store the information HeapMemory ...
 *
 * \see vector_dist
 *
 */

incardon's avatar
incardon committed
66
67
68
69
70
71
template<unsigned int dim,
         typename St,
         typename prop,
         typename Decomposition = CartDecomposition<dim,St>,
         typename Memory = HeapMemory,
         template<typename> class layout_base = memory_traits_lin>
Pietro Incardona's avatar
Pietro Incardona committed
72
73
class vector_dist_comm
{
incardon's avatar
incardon committed
74
75
76
	//! Number of units for each sub-domain
	size_t v_sub_unit_factor = 64;

incardon's avatar
incardon committed
77
	//! definition of the send vector for position
78
	typedef openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base,openfpm::grow_policy_identity> send_pos_vector;
incardon's avatar
incardon committed
79

Pietro Incardona's avatar
Pietro Incardona committed
80
	//! VCluster
81
	Vcluster<Memory> & v_cl;
Pietro Incardona's avatar
Pietro Incardona committed
82
83
84
85
86
87
88

	//! Domain decomposition
	Decomposition dec;

	//! It map the processor id with the communication request into map procedure
	openfpm::vector<size_t> p_map_req;

Pietro Incardona's avatar
Pietro Incardona committed
89
	//! For each near processor, outgoing particle id
incardon's avatar
incardon committed
90
91
92
93
	//! \warning opart is assumed to be an ordered list
	//! first id particle id
	//! second id shift id
	//! third id is the processor id
incardon's avatar
incardon committed
94
95
96
97
	openfpm::vector<aggregate<int,int,int>,
					Memory,
					typename layout_base<aggregate<int,int,int>>::type,
					layout_base > m_opart;
Pietro Incardona's avatar
Pietro Incardona committed
98

incardon's avatar
incardon committed
99
	//! Per processor ordered particles id for ghost_get (see prc_g_opart)
incardon's avatar
incardon committed
100
101
102
	//! For each processor the internal vector store the id of the
	//! particles that must be communicated to the other processors
	openfpm::vector<openfpm::vector<aggregate<size_t,size_t>>> g_opart;
Pietro Incardona's avatar
Pietro Incardona committed
103

104
105
106
107
108
109
	//! Same as g_opart but on device, the vector of vector is flatten into a single vector
    openfpm::vector<aggregate<unsigned int,unsigned long int>,
                    CudaMemory,
                    typename memory_traits_inte<aggregate<unsigned int,unsigned long int>>::type,
                    memory_traits_inte> g_opart_device;

110
111
112
113
114
115
	//! Helper buffer for computation (on GPU) of local particles (position)
	openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base> v_pos_tmp;

	//! Helper buffer for computation (on GPU) of local particles (properties)
	openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base> v_prp_tmp;

incardon's avatar
incardon committed
116
117
118
	//! Per processor number of particle g_opart_sz.get(i) = g_opart.get(i).size()
	openfpm::vector<size_t> g_opart_sz;

incardon's avatar
incardon committed
119
	//! processor rank list of g_opart
incardon's avatar
incardon committed
120
121
	openfpm::vector<size_t> prc_g_opart;

incardon's avatar
incardon committed
122
123
124
	//! It store the list of processor that communicate with us (local processor)
	//! from the last ghost get
	openfpm::vector<size_t> prc_recv_get;
Pietro Incardona's avatar
Pietro Incardona committed
125

incardon's avatar
incardon committed
126
	//! the same as prc_recv_get but for put
Pietro Incardona's avatar
Pietro Incardona committed
127
128
	openfpm::vector<size_t> prc_recv_put;

incardon's avatar
incardon committed
129
130
	//! the same as prc_recv_get but for map
	openfpm::vector<size_t> prc_recv_map;
Pietro Incardona's avatar
Pietro Incardona committed
131

incardon's avatar
incardon committed
132
133
134
	//! It store the size of the elements added for each processor that communicate with us (local processor)
	//! from the last ghost get
	openfpm::vector<size_t> recv_sz_get;
incardon's avatar
incardon committed
135
136
	//! Conversion to byte of recv_sz_get
	openfpm::vector<size_t> recv_sz_get_byte;
incardon's avatar
incardon committed
137

Pietro Incardona's avatar
Pietro Incardona committed
138

incardon's avatar
incardon committed
139
	//! The same as recv_sz_get but for put
Pietro Incardona's avatar
Pietro Incardona committed
140
141
	openfpm::vector<size_t> recv_sz_put;

incardon's avatar
incardon committed
142
143
	//! The same as recv_sz_get but for map
	openfpm::vector<size_t> recv_sz_map;
Pietro Incardona's avatar
Pietro Incardona committed
144

incardon's avatar
incardon committed
145
146
147
	//! elements sent for each processors (ghost_get)
	openfpm::vector<size_t> prc_sz_gg;

incardon's avatar
incardon committed
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
	//! temporary buffer to processors ids
    openfpm::vector<aggregate<unsigned int>,
                            Memory,
                            typename layout_base<aggregate<unsigned int>>::type,
                            layout_base> proc_id_out;

    //! temporary buffer for the scan result
	openfpm::vector<aggregate<unsigned int>,
                             Memory,
                             typename layout_base<aggregate<unsigned int>>::type,
                             layout_base> starts;

	//! Processor communication size
	openfpm::vector<aggregate<unsigned int, unsigned int>,Memory,typename layout_base<aggregate<unsigned int, unsigned int>>::type,layout_base> prc_offset;


	//! Temporary CudaMemory to do stuff
	CudaMemory mem;

Pietro Incardona's avatar
Pietro Incardona committed
167
168
169
170
	//! Local ghost marker (across the ghost particles it mark from where we have the)
	//! replicated ghost particles that are local
	size_t lg_m;

171
	//! Sending buffer
incardon's avatar
incardon committed
172
	openfpm::vector_fr<Memory> hsmem;
173

incardon's avatar
incardon committed
174
	//! process the particle with properties
175
176
177
	template<typename prp_object, int ... prp>
	struct proc_with_prp
	{
incardon's avatar
incardon committed
178
		//! process the particle
179
180
181
182
183
184
185
186
187
188
189
190
		template<typename T1, typename T2> inline static void proc(size_t lbl, size_t cnt, size_t id, T1 & v_prp, T2 & m_prp)
		{
			// source object type
			typedef encapc<1, prop, typename openfpm::vector<prop>::layout_type> encap_src;
			// destination object type
			typedef encapc<1, prp_object, typename openfpm::vector<prp_object>::layout_type> encap_dst;

			// Copy only the selected properties
			object_si_d<encap_src, encap_dst, OBJ_ENCAP, prp...>(v_prp.get(id), m_prp.get(lbl).get(cnt));
		}
	};

incardon's avatar
incardon committed
191
192
193
194
195
196
197
198
199
200
	/*! \brief Calculate sending buffer size for each processor
	 *
	 * \param prc_sz_r processor size
	 * \param prc_r processor ids
	 *
	 */
	inline void calc_send_buffers(openfpm::vector<aggregate<unsigned int,unsigned int>,Memory,typename layout_base<aggregate<unsigned int,unsigned int>>::type,layout_base> & prc_sz,
								  openfpm::vector<size_t> & prc_sz_r,
								  openfpm::vector<size_t> & prc_r,
								  size_t opt)
201
	{
incardon's avatar
incardon committed
202
		if (opt & RUN_ON_DEVICE)
203
		{
incardon's avatar
incardon committed
204
#ifndef TEST1
205
206
			size_t prev_off = 0;
			for (size_t i = 0; i < prc_sz.size() ; i++)
207
			{
208
209
210
211
212
213
				if (prc_sz.template get<1>(i) != (unsigned int)-1)
				{
					prc_r.add(prc_sz.template get<1>(i));
					prc_sz_r.add(prc_sz.template get<0>(i) - prev_off);
				}
				prev_off = prc_sz.template get<0>(i);
214
			}
incardon's avatar
incardon committed
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
#else

			// Calculate the sending buffer size for each processor, put this information in
			// a contiguous buffer

			for (size_t i = 0; i < v_cl.getProcessingUnits(); i++)
			{
				if (prc_sz.template get<0>(i) != 0 && v_cl.rank() != i)
				{
					prc_r.add(i);
					prc_sz_r.add(prc_sz.template get<0>(i));
				}
			}

#endif
230
231
232
		}
		else
		{
incardon's avatar
incardon committed
233
234
			// Calculate the sending buffer size for each processor, put this information in
			// a contiguous buffer
235

incardon's avatar
incardon committed
236
237
			p_map_req.resize(v_cl.getProcessingUnits());
			for (size_t i = 0; i < v_cl.getProcessingUnits(); i++)
238
			{
incardon's avatar
incardon committed
239
240
241
242
243
244
				if (prc_sz.template get<0>(i) != 0)
				{
					p_map_req.get(i) = prc_r.size();
					prc_r.add(i);
					prc_sz_r.add(prc_sz.template get<0>(i));
				}
245
246
247
248
			}
		}
	}

249
250
	//! From which decomposition the shift boxes are calculated
	long int shift_box_ndec = -1;
Pietro Incardona's avatar
Pietro Incardona committed
251

Pietro Incardona's avatar
Pietro Incardona committed
252
	//! this map is used to check if a combination is already present
Pietro Incardona's avatar
Pietro Incardona committed
253
254
	std::unordered_map<size_t, size_t> map_cmb;

Pietro Incardona's avatar
Pietro Incardona committed
255
256
	//! The boxes touching the border of the domain are divided in groups (first vector)
	//! each group contain internal ghost coming from sub-domains of the same section
incardon's avatar
incardon committed
257
	openfpm::vector_std<openfpm::vector_std<Box<dim, St>>> box_f;
Pietro Incardona's avatar
Pietro Incardona committed
258

259
260
261
262
	//! The boxes touching the border of the domain + shift vector linearized from where they come from
	openfpm::vector<Box<dim, St>,Memory,typename layout_base<Box<dim,St>>::type,layout_base> box_f_dev;
	openfpm::vector<aggregate<unsigned int>,Memory,typename layout_base<aggregate<unsigned int>>::type,layout_base> box_f_sv;

Pietro Incardona's avatar
Pietro Incardona committed
263
	//! Store the sector for each group (previous vector)
Pietro Incardona's avatar
Pietro Incardona committed
264
265
	openfpm::vector_std<comb<dim>> box_cmb;

Pietro Incardona's avatar
Pietro Incardona committed
266
	//! Id of the local particle to replicate for ghost_get
267
	openfpm::vector<aggregate<unsigned int,unsigned int>,Memory,typename layout_base<aggregate<unsigned int,unsigned int>>::type,layout_base> o_part_loc;
Pietro Incardona's avatar
Pietro Incardona committed
268

incardon's avatar
incardon committed
269
270
271
	//! Processor communication size
	openfpm::vector<aggregate<unsigned int, unsigned int>,Memory,typename layout_base<aggregate<unsigned int, unsigned int>>::type,layout_base> prc_sz;

Pietro Incardona's avatar
Pietro Incardona committed
272
273
274
275
276
277
	/*! \brief For every internal ghost box we create a structure that order such internal local ghost box in
	 *         shift vectors
	 *
	 */
	void createShiftBox()
	{
278
		if (shift_box_ndec == (long int)dec.get_ndec())
279
		{return;}
Pietro Incardona's avatar
Pietro Incardona committed
280

incardon's avatar
incardon committed
281
282
283
284
285
286
287
		struct sh_box
		{
			size_t shift_id;

			unsigned int box_f_sv;
			Box<dim,St> box_f_dev;

incardon's avatar
incardon committed
288
			bool operator<(const sh_box & tmp) const
incardon's avatar
incardon committed
289
290
291
292
293
			{
				return shift_id < tmp.shift_id;
			}

		};
incardon's avatar
incardon committed
294
		openfpm::vector<sh_box> reord_shift;
incardon's avatar
incardon committed
295

Pietro Incardona's avatar
Pietro Incardona committed
296
		// Add local particles coming from periodic boundary, the only boxes that count are the one
297
		// touching the border
Pietro Incardona's avatar
Pietro Incardona committed
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
		for (size_t i = 0; i < dec.getNLocalSub(); i++)
		{
			size_t Nl = dec.getLocalNIGhost(i);

			for (size_t j = 0; j < Nl; j++)
			{
				// If the ghost does not come from the intersection with an out of
				// border sub-domain the combination is all zero and n_zero return dim
				if (dec.getLocalIGhostPos(i, j).n_zero() == dim)
					continue;

				// Check if we already have boxes with such combination
				auto it = map_cmb.find(dec.getLocalIGhostPos(i, j).lin());
				if (it == map_cmb.end())
				{
					// we do not have it
					box_f.add();
					box_f.last().add(dec.getLocalIGhostBox(i, j));
					box_cmb.add(dec.getLocalIGhostPos(i, j));
					map_cmb[dec.getLocalIGhostPos(i, j).lin()] = box_f.size() - 1;
				}
				else
				{
					// we have it
					box_f.get(it->second).add(dec.getLocalIGhostBox(i, j));
				}

incardon's avatar
incardon committed
325
326
327
328
				reord_shift.add();
				reord_shift.last().shift_id = dec.getLocalIGhostPos(i, j).lin();
				reord_shift.last().box_f_dev = dec.getLocalIGhostBox(i, j);
				reord_shift.last().box_f_sv = dec.convertShift(dec.getLocalIGhostPos(i, j));
Pietro Incardona's avatar
Pietro Incardona committed
329
330
331
			}
		}

incardon's avatar
incardon committed
332
333
334
335
336
337
338
339
340
341
342
343
		// now we sort box_f by shift_id, the reason is that we have to avoid duplicated particles
		reord_shift.sort();

		box_f_dev.resize(reord_shift.size());
		box_f_sv.resize(reord_shift.size());

		for (size_t i = 0 ; i < reord_shift.size() ; i++)
		{
			box_f_dev.get(i) = reord_shift.get(i).box_f_dev;
			box_f_sv.template get<0>(i) = reord_shift.get(i).box_f_sv;
		}

incardon's avatar
incardon committed
344
345
#ifdef CUDA_GPU

346
		// move box_f_dev and box_f_sv to device
incardon's avatar
incardon committed
347
348
		box_f_dev.template hostToDevice<0,1>();
		box_f_sv.template hostToDevice<0>();
349

incardon's avatar
incardon committed
350
351
#endif

352
		shift_box_ndec = dec.get_ndec();
Pietro Incardona's avatar
Pietro Incardona committed
353
354
355
356
	}

	/*! \brief Local ghost from labeled particles
	 *
Pietro Incardona's avatar
Pietro Incardona committed
357
358
	 * \param v_pos vector of particle positions
	 * \param v_prp vector of particles properties
incardon's avatar
incardon committed
359
	 * \param opt options
Pietro Incardona's avatar
Pietro Incardona committed
360
361
	 *
	 */
362
	void local_ghost_from_opart(openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base> & v_pos,
incardon's avatar
incardon committed
363
364
			                    openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base> & v_prp,
			                    size_t opt)
Pietro Incardona's avatar
Pietro Incardona committed
365
366
	{
		// get the shift vectors
367
		const openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base> & shifts = dec.getShiftVectors();
Pietro Incardona's avatar
Pietro Incardona committed
368

incardon's avatar
incardon committed
369
		if (!(opt & NO_POSITION))
Pietro Incardona's avatar
Pietro Incardona committed
370
		{
371
			if (opt & RUN_ON_DEVICE)
incardon's avatar
incardon committed
372
			{
incardon's avatar
incardon committed
373
374
				local_ghost_from_opart_impl<true,dim,St,prop,Memory,layout_base,std::is_same<Memory,CudaMemory>::value>
				::run(o_part_loc,shifts,v_pos,v_prp,opt);
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
			}
			else
			{
				for (size_t i = 0 ; i < o_part_loc.size() ; i++)
				{
					size_t lin_id = o_part_loc.template get<1>(i);
					size_t key = o_part_loc.template get<0>(i);

					Point<dim, St> p = v_pos.get(key);
					// shift
					p -= shifts.get(lin_id);

					// add this particle shifting its position
					v_pos.add(p);
					v_prp.get(lg_m+i) = v_prp.get(key);
				}
incardon's avatar
incardon committed
391
392
393
394
			}
		}
		else
		{
395
			if (opt & RUN_ON_DEVICE)
incardon's avatar
incardon committed
396
			{
incardon's avatar
incardon committed
397
398
				local_ghost_from_opart_impl<false,dim,St,prop,Memory,layout_base,std::is_same<Memory,CudaMemory>::value>
				::run(o_part_loc,shifts,v_pos,v_prp,opt);
399
400
401
402
403
404
405
406
407
			}
			else
			{
				for (size_t i = 0 ; i < o_part_loc.size() ; i++)
				{
					size_t key = o_part_loc.template get<0>(i);

					v_prp.get(lg_m+i) = v_prp.get(key);
				}
incardon's avatar
incardon committed
408
			}
Pietro Incardona's avatar
Pietro Incardona committed
409
410
411
412
413
414
415
		}
	}

	/*! \brief Local ghost from decomposition
	 *
	 * \param v_pos vector of particle positions
	 * \param v_prp vector of particle properties
Pietro Incardona's avatar
Pietro Incardona committed
416
	 * \param g_m ghost marker
Pietro Incardona's avatar
Pietro Incardona committed
417
418
	 *
	 */
419
420
	void local_ghost_from_dec(openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base> & v_pos,
			                  openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base> & v_prp,
421
			                  size_t g_m,size_t opt)
Pietro Incardona's avatar
Pietro Incardona committed
422
423
424
425
	{
		o_part_loc.clear();

		// get the shift vectors
426
		const openfpm::vector<Point<dim,St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base> & shifts = dec.getShiftVectors();
Pietro Incardona's avatar
Pietro Incardona committed
427

428
429
		if (opt & RUN_ON_DEVICE)
		{
incardon's avatar
incardon committed
430
			local_ghost_from_dec_impl<dim,St,prop,Memory,layout_base,std::is_same<Memory,CudaMemory>::value>
incardon's avatar
incardon committed
431
			::run(o_part_loc,shifts,box_f_dev,box_f_sv,v_cl,starts,v_pos,v_prp,g_m,opt);
432
433
		}
		else
Pietro Incardona's avatar
Pietro Incardona committed
434
		{
435
436
			// Label the internal (assigned) particles
			auto it = v_pos.getIteratorTo(g_m);
Pietro Incardona's avatar
Pietro Incardona committed
437

438
			while (it.isNext())
Pietro Incardona's avatar
Pietro Incardona committed
439
			{
440
441
442
443
				auto key = it.get();

				// If particles are inside these boxes
				for (size_t i = 0; i < box_f.size(); i++)
Pietro Incardona's avatar
Pietro Incardona committed
444
				{
445
					for (size_t j = 0; j < box_f.get(i).size(); j++)
Pietro Incardona's avatar
Pietro Incardona committed
446
					{
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
						if (box_f.get(i).get(j).isInsideNP(v_pos.get(key)) == true)
						{
							size_t lin_id = dec.convertShift(box_cmb.get(i));

							o_part_loc.add();
							o_part_loc.template get<0>(o_part_loc.size()-1) = key;
							o_part_loc.template get<1>(o_part_loc.size()-1) = lin_id;

							Point<dim, St> p = v_pos.get(key);
							// shift
							p -= shifts.get(lin_id);

							// add this particle shifting its position
							v_pos.add(p);
							v_prp.add();
							v_prp.last() = v_prp.get(key);

							// boxes in one group can be overlapping
							// we do not have to search for the other
							// boxes otherwise we will have duplicate particles
							//
							// A small note overlap of boxes across groups is fine
							// (and needed) because each group has different shift
							// producing non overlapping particles
							//
							break;
						}
Pietro Incardona's avatar
Pietro Incardona committed
474
475
476
					}
				}

477
478
				++it;
			}
Pietro Incardona's avatar
Pietro Incardona committed
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
		}
	}

	/*! \brief Add local particles based on the boundary conditions
	 *
	 * In order to understand what this function use the following
	 *
	 \verbatim

	 [1,1]
	 +---------+------------------------+---------+
	 | (1,-1)  |                        | (1,1)   |
	 |   |     |    (1,0) --> 7         |   |     |
	 |   v     |                        |   v     |
	 |   6     |                        |   8     |
	 +--------------------------------------------+
	 |         |                        |         |
	 |         |                        |         |
	 |         |                        |         |
	 | (-1,0)  |                        | (1,0)   |
	 |    |    |                        |   |     |
	 |    v    |      (0,0) --> 4       |   v     |
	 |    3    |                        |   5     |
	 |         |                        |         |
 B	 |         |                        |     A   |
 *	 |         |                        |    *    |
	 |         |                        |         |
	 |         |                        |         |
	 |         |                        |         |
	 +--------------------------------------------+
	 | (-1,-1) |                        | (-1,1)  |
	 |    |    |   (-1,0) --> 1         |    |    |
	 |    v    |                        |    v    |
	 |    0    |                        |    2    |
	 +---------+------------------------+---------+


	 \endverbatim

	 *
	 *  The box is the domain, while all boxes at the border (so not (0,0) ) are the
	 *  ghost part at the border of the domain. If a particle A is in the position in figure
	 *  a particle B must be created. This function duplicate the particle A, if A and B are
	 *  local
	 *
	 * \param v_pos vector of particle of positions
	 * \param v_prp vector of particle properties
	 * \param g_m ghost marker
	 * \param opt options
	 *
	 */
530
531
532
533
	void add_loc_particles_bc(openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base> & v_pos,
			                  openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base> & v_prp ,
			                  size_t & g_m,
			                  size_t opt)
Pietro Incardona's avatar
Pietro Incardona committed
534
535
536
537
	{
		// Create the shift boxes
		createShiftBox();

538
		if (!(opt & SKIP_LABELLING))
incardon's avatar
incardon committed
539
			lg_m = v_prp.size();
Pietro Incardona's avatar
Pietro Incardona committed
540

Pietro Incardona's avatar
Pietro Incardona committed
541
542
543
544
545
		if (box_f.size() == 0)
			return;
		else
		{
			if (opt & SKIP_LABELLING)
incardon's avatar
incardon committed
546
			{local_ghost_from_opart(v_pos,v_prp,opt);}
Pietro Incardona's avatar
Pietro Incardona committed
547
			else
548
			{local_ghost_from_dec(v_pos,v_prp,g_m,opt);}
Pietro Incardona's avatar
Pietro Incardona committed
549
550
551
552
553
554
555
556
557
		}
	}

	/*! \brief This function fill the send buffer for the particle position after the particles has been label with labelParticles
	 *
	 * \param v_pos vector of particle positions
	 * \param g_pos_send Send buffer to fill
	 *
	 */
558
	void fill_send_ghost_pos_buf(openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base> & v_pos,
559
560
561
								 openfpm::vector<size_t> & prc_sz,
			                     openfpm::vector<send_pos_vector> & g_pos_send,
			                     size_t opt)
Pietro Incardona's avatar
Pietro Incardona committed
562
563
	{
		// get the shift vectors
564
		const openfpm::vector<Point<dim,St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base> & shifts = dec.getShiftVectors();
Pietro Incardona's avatar
Pietro Incardona committed
565
566

		// create a number of send buffers equal to the near processors
incardon's avatar
incardon committed
567
		g_pos_send.resize(prc_sz.size());
568
569
570

		resize_retained_buffer(hsmem,g_pos_send.size());

Pietro Incardona's avatar
Pietro Incardona committed
571
572
		for (size_t i = 0; i < g_pos_send.size(); i++)
		{
573
574
575
			// Buffer must retained and survive the destruction of the
			// vector
			if (hsmem.get(i).ref() == 0)
incardon's avatar
incardon committed
576
			{hsmem.get(i).incRef();}
577
578
579
580

			// Set the memory for retain the send buffer
			g_pos_send.get(i).setMemory(hsmem.get(i));

Pietro Incardona's avatar
Pietro Incardona committed
581
			// resize the sending vector (No allocation is produced)
582
			g_pos_send.get(i).resize(prc_sz.get(i));
Pietro Incardona's avatar
Pietro Incardona committed
583
584
		}

585
		if (opt & RUN_ON_DEVICE)
Pietro Incardona's avatar
Pietro Incardona committed
586
		{
587
588
589
590
591
592
#if defined(CUDA_GPU) && defined(__NVCC__)

			size_t offset = 0;

			// Fill the sending buffers
			for (size_t i = 0 ; i < g_pos_send.size() ; i++)
Pietro Incardona's avatar
Pietro Incardona committed
593
			{
594
595
				auto ite = g_pos_send.get(i).getGPUIterator();

incardon's avatar
incardon committed
596
597
598
				CUDA_LAUNCH((process_ghost_particles_pos<dim,decltype(g_opart_device.toKernel()),decltype(g_pos_send.get(i).toKernel()),decltype(v_pos.toKernel()),decltype(shifts.toKernel())>),
				ite.wthr,ite.thr,
				g_opart_device.toKernel(), g_pos_send.get(i).toKernel(),
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
				 v_pos.toKernel(),shifts.toKernel(),offset);

				offset += prc_sz.get(i);
			}

#else

			std::cout << __FILE__ << ":" << __LINE__ << " error RUN_ON_DEVICE require that you compile with NVCC, but it seem compiled with a normal compiler" << std::endl;

#endif
		}
		else
		{
			// Fill the send buffer
			for (size_t i = 0; i < g_opart.size(); i++)
			{
				for (size_t j = 0; j < g_opart.get(i).size(); j++)
				{
					Point<dim, St> s = v_pos.get(g_opart.get(i).template get<0>(j));
					s -= shifts.get(g_opart.get(i).template get<1>(j));
					g_pos_send.get(i).set(j, s);
				}
Pietro Incardona's avatar
Pietro Incardona committed
621
622
623
624
			}
		}
	}

Pietro Incardona's avatar
Pietro Incardona committed
625
626
627
628
629
630
631
632
633
634
635
	/*! \brief This function fill the send buffer for ghost_put
	 *
	 * \tparam send_vector type used to send data
	 * \tparam prp_object object containing only the properties to send
	 * \tparam prp set of properties to send
	 *
	 * \param v_prp vector of particle properties
	 * \param g_send_prp Send buffer to fill
	 * \param g_m ghost marker
	 *
	 */
incardon's avatar
incardon committed
636
	template<typename send_vector, typename prp_object, int ... prp> void fill_send_ghost_put_prp_buf(openfpm::vector<prop> & v_prp, openfpm::vector<send_vector> & g_send_prp, size_t & g_m)
Pietro Incardona's avatar
Pietro Incardona committed
637
638
639
	{
		// create a number of send buffers equal to the near processors
		// from which we received
incardon's avatar
incardon committed
640
		g_send_prp.resize(prc_recv_get.size());
641
642
643

		resize_retained_buffer(hsmem,g_send_prp.size());

Pietro Incardona's avatar
Pietro Incardona committed
644
645
		for (size_t i = 0; i < g_send_prp.size(); i++)
		{
646
647
648
649
650
651
652
653
			// Buffer must retained and survive the destruction of the
			// vector
			if (hsmem.get(i).ref() == 0)
				hsmem.get(i).incRef();

			// Set the memory for retain the send buffer
			g_send_prp.get(i).setMemory(hsmem.get(i));

Pietro Incardona's avatar
Pietro Incardona committed
654
			// resize the sending vector (No allocation is produced)
incardon's avatar
incardon committed
655
			g_send_prp.get(i).resize(recv_sz_get.get(i));
Pietro Incardona's avatar
Pietro Incardona committed
656
657
658
659
660
		}

		size_t accum = g_m;

		// Fill the send buffer
incardon's avatar
incardon committed
661
		for (size_t i = 0; i < prc_recv_get.size(); i++)
Pietro Incardona's avatar
Pietro Incardona committed
662
663
		{
			size_t j2 = 0;
incardon's avatar
incardon committed
664
			for (size_t j = accum; j < accum + recv_sz_get.get(i); j++)
Pietro Incardona's avatar
Pietro Incardona committed
665
666
667
668
669
670
671
672
673
674
675
676
			{
				// source object type
				typedef encapc<1, prop, typename openfpm::vector<prop>::layout_type> encap_src;
				// destination object type
				typedef encapc<1, prp_object, typename openfpm::vector<prp_object>::layout_type> encap_dst;

				// Copy only the selected properties
				object_si_d<encap_src, encap_dst, OBJ_ENCAP, prp...>(v_prp.get(j), g_send_prp.get(i).get(j2));

				j2++;
			}

incardon's avatar
incardon committed
677
			accum = accum + recv_sz_get.get(i);
Pietro Incardona's avatar
Pietro Incardona committed
678
679
680
		}
	}

681
682
683
684
	/*! \brief resize the retained buffer by nbf
	 *
	 *
	 */
incardon's avatar
incardon committed
685
	void resize_retained_buffer(openfpm::vector_fr<Memory> & rt_buf, size_t nbf)
686
687
688
689
690
691
692
693
694
695
	{
		// Release all the buffer that are going to be deleted
		for (size_t i = nbf ; i < rt_buf.size() ; i++)
		{
			rt_buf.get(i).decRef();
		}

		hsmem.resize(nbf);
	}

696
697
698
699
700
701
702
703
704
705
706
	/*! \brief Set the buffer for each property
	 *
	 *
	 */
	template<typename send_vector, typename v_mpl>
	struct set_mem_retained_buffers_inte
	{
		openfpm::vector<send_vector> & g_send_prp;

		size_t i;

incardon's avatar
incardon committed
707
		openfpm::vector_fr<Memory> & hsmem;
708
709
710
711

		size_t j;

		set_mem_retained_buffers_inte(openfpm::vector<send_vector> & g_send_prp, size_t i ,
incardon's avatar
incardon committed
712
				                      openfpm::vector_fr<Memory> & hsmem, size_t j)
713
714
		:g_send_prp(g_send_prp),i(i),hsmem(hsmem),j(j)
		{}
715
716
717
718
719

		//! It call the setMemory function for each property
		template<typename T>
		inline void operator()(T& t)
		{
incardon's avatar
incardon committed
720
			g_send_prp.get(i).template setMemory<T::value>(hsmem.get(j));
721
722
723
724
725
726
727
728
729

			j++;
		}
	};

	template<bool inte_or_lin,typename send_vector, typename v_mpl>
	struct set_mem_retained_buffers
	{
		static inline size_t set_mem_retained_buffers_(openfpm::vector<send_vector> & g_send_prp,
730
				     	 	 	 	 	 	 openfpm::vector<size_t> & prc_sz,
731
											 size_t i,
incardon's avatar
incardon committed
732
											 openfpm::vector_fr<Memory> & hsmem,
733
734
735
736
737
738
											 size_t j)
		{
			// Set the memory for retain the send buffer
			g_send_prp.get(i).setMemory(hsmem.get(j));

			// resize the sending vector (No allocation is produced)
739
			g_send_prp.get(i).resize(prc_sz.get(i));
740
741
742
743
744
745
746
747
748

			return j+1;
		}
	};

	template<typename send_vector, typename v_mpl>
	struct set_mem_retained_buffers<true,send_vector,v_mpl>
	{
		static inline size_t set_mem_retained_buffers_(openfpm::vector<send_vector> & g_send_prp,
749
											 openfpm::vector<size_t> & prc_sz,
750
				 	 	 	 	 	 	 	 size_t i,
incardon's avatar
incardon committed
751
				 	 	 	 	 	 	 	 openfpm::vector_fr<Memory> & hsmem,
752
753
754
755
				 	 	 	 	 	 	 	 size_t j)
		{
			set_mem_retained_buffers_inte<send_vector,v_mpl> smrbi(g_send_prp,i,hsmem,j);

incardon's avatar
incardon committed
756
			boost::mpl::for_each_ref<boost::mpl::range_c<int,0,boost::mpl::size<v_mpl>::type::value>>(smrbi);
757

incardon's avatar
incardon committed
758
759
760
761
762
763
			// if we do not send properties do not reallocate
			if (boost::mpl::size<v_mpl>::type::value != 0)
			{
				// resize the sending vector (No allocation is produced)
				g_send_prp.get(i).resize(prc_sz.get(i));
			}
764
765
766
767
768

			return smrbi.j;
		}
	};

Pietro Incardona's avatar
Pietro Incardona committed
769
770
771
772
773
774
775
776
777
778
	/*! \brief This function fill the send buffer for properties after the particles has been label with labelParticles
	 *
	 * \tparam send_vector type used to send data
	 * \tparam prp_object object containing only the properties to send
	 * \tparam prp set of properties to send
	 *
	 * \param v_prp vector of particle properties
	 * \param g_send_prp Send buffer to fill
	 *
	 */
779
780
	template<typename send_vector, typename prp_object, int ... prp>
	void fill_send_ghost_prp_buf(openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base> & v_prp,
781
782
783
								 openfpm::vector<size_t> & prc_sz,
			                     openfpm::vector<send_vector> & g_send_prp,
			                     size_t opt)
Pietro Incardona's avatar
Pietro Incardona committed
784
	{
785
786
787
788
789
790
		size_t factor = 1;

		typedef typename to_boost_vmpl<prp...>::type v_mpl;

		if (is_layout_inte<layout_base<prop>>::value == true) {factor *= sizeof...(prp);}

Pietro Incardona's avatar
Pietro Incardona committed
791
		// create a number of send buffers equal to the near processors
792
		g_send_prp.resize(prc_sz.size());
793

794
		resize_retained_buffer(hsmem,g_send_prp.size()*factor);
795

796
		for (size_t i = 0; i < hsmem.size(); i++)
Pietro Incardona's avatar
Pietro Incardona committed
797
		{
798
799
800
			// Buffer must retained and survive the destruction of the
			// vector
			if (hsmem.get(i).ref() == 0)
801
802
			{hsmem.get(i).incRef();}
		}
803

804
805
806
		size_t j = 0;
		for (size_t i = 0; i < g_send_prp.size(); i++)
		{
807
			j = set_mem_retained_buffers<is_layout_inte<layout_base<prop>>::value,send_vector,v_mpl>::set_mem_retained_buffers_(g_send_prp,prc_sz,i,hsmem,j);
Pietro Incardona's avatar
Pietro Incardona committed
808
809
		}

810
		if (opt & RUN_ON_DEVICE)
Pietro Incardona's avatar
Pietro Incardona committed
811
		{
812
813
814
815
#if defined(CUDA_GPU) && defined(__NVCC__)

			size_t offset = 0;

incardon's avatar
incardon committed
816
			if (sizeof...(prp) != 0)
Pietro Incardona's avatar
Pietro Incardona committed
817
			{
incardon's avatar
incardon committed
818
819
820
821
				// Fill the sending buffers
				for (size_t i = 0 ; i < g_send_prp.size() ; i++)
				{
					auto ite = g_send_prp.get(i).getGPUIterator();
Pietro Incardona's avatar
Pietro Incardona committed
822

incardon's avatar
incardon committed
823
824
825
					CUDA_LAUNCH((process_ghost_particles_prp<decltype(g_opart_device.toKernel()),decltype(g_send_prp.get(i).toKernel()),decltype(v_prp.toKernel()),prp...>),
					ite.wthr,ite.thr,
					g_opart_device.toKernel(), g_send_prp.get(i).toKernel(),
incardon's avatar
incardon committed
826
					 v_prp.toKernel(),offset);
827

incardon's avatar
incardon committed
828
829
					offset += prc_sz.get(i);
				}
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
			}

#else

			std::cout << __FILE__ << ":" << __LINE__ << " error RUN_ON_DEVICE require that you compile with NVCC, but it seem compiled with a normal compiler" << std::endl;

#endif
		}
		else
		{
			// Fill the send buffer
			for (size_t i = 0; i < g_opart.size(); i++)
			{
				for (size_t j = 0; j < g_opart.get(i).size(); j++)
				{
					// source object type
					typedef decltype(v_prp.get(g_opart.get(i).template get<0>(j))) encap_src;
					// destination object type
					typedef decltype(g_send_prp.get(i).get(j)) encap_dst;

					// Copy only the selected properties
					object_si_d<encap_src, encap_dst, OBJ_ENCAP, prp...>(v_prp.get(g_opart.get(i).template get<0>(j)), g_send_prp.get(i).get(j));
				}
Pietro Incardona's avatar
Pietro Incardona committed
853
854
855
856
857
858
859
860
861
			}
		}
	}

	/*! \brief allocate and fill the send buffer for the map function
	 *
	 * \param v_pos vector of particle positions
	 * \param v_prp vector of particles properties
	 * \param prc_sz_r For each processor in the list the size of the message to send
incardon's avatar
incardon committed
862
863
	 * \param m_pos sending buffer for position
	 * \param m_prp sending buffer for properties
incardon's avatar
incardon committed
864
	 * \param offset from where start the list of the particles that migrate in o_part
incardon's avatar
incardon committed
865
	 *        This parameter is used only in case of RUN_ON_DEVICE option
Pietro Incardona's avatar
Pietro Incardona committed
866
867
	 *
	 */
incardon's avatar
incardon committed
868
869
870
	void fill_send_map_buf(openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base> & v_pos,
			               openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base> & v_prp,
			               openfpm::vector<size_t> & prc_sz_r,
incardon's avatar
incardon committed
871
			               openfpm::vector<size_t> & prc_r,
incardon's avatar
incardon committed
872
			               openfpm::vector<openfpm::vector<Point<dim,St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base,openfpm::grow_policy_identity>> & m_pos,
incardon's avatar
incardon committed
873
			               openfpm::vector<openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base,openfpm::grow_policy_identity>> & m_prp,
874
			               openfpm::vector<aggregate<unsigned int, unsigned int>,Memory,typename layout_base<aggregate<unsigned int, unsigned int>>::type,layout_base> & prc_sz,
incardon's avatar
incardon committed
875
			               size_t opt)
Pietro Incardona's avatar
Pietro Incardona committed
876
	{
incardon's avatar
incardon committed
877
878
879
		m_prp.resize(prc_sz_r.size());
		m_pos.resize(prc_sz_r.size());
		openfpm::vector<size_t> cnt(prc_sz_r.size());
Pietro Incardona's avatar
Pietro Incardona committed
880

incardon's avatar
incardon committed
881
		for (size_t i = 0; i < prc_sz_r.size() ; i++)
Pietro Incardona's avatar
Pietro Incardona committed
882
883
		{
			// set the size and allocate, using mem warant that pos and prp is contiguous
incardon's avatar
incardon committed
884
885
886
			m_pos.get(i).resize(prc_sz_r.get(i));
			m_prp.get(i).resize(prc_sz_r.get(i));
			cnt.get(i) = 0;
Pietro Incardona's avatar
Pietro Incardona committed
887
888
		}

incardon's avatar
incardon committed
889
		if (opt & RUN_ON_DEVICE)
incardon's avatar
incardon committed
890
		{
incardon's avatar
incardon committed
891
892
893
			if (v_cl.size() == 1)
			{return;}

incardon's avatar
incardon committed
894
#if defined(CUDA_GPU) && defined(__NVCC__)
Pietro Incardona's avatar
Pietro Incardona committed
895

896
897
			// The first part of m_opart and prc_sz contain the local particles

incardon's avatar
incardon committed
898
899
			#ifndef TEST1

900
901
902
903
904
			v_pos_tmp.resize(prc_sz.template get<0>(0));
			v_prp_tmp.resize(prc_sz.template get<0>(0));

			auto ite = v_pos_tmp.getGPUIterator();

incardon's avatar
incardon committed
905
			// fill v_pos_tmp and v_prp_tmp with local particles
906
907
908
909
910
911
912
913
			process_map_particles<decltype(m_opart.toKernel()),decltype(v_pos_tmp.toKernel()),decltype(v_prp_tmp.toKernel()),
					                                           decltype(v_pos.toKernel()),decltype(v_prp.toKernel())>
			<<<ite.wthr,ite.thr>>>
			(m_opart.toKernel(),v_pos_tmp.toKernel(), v_prp_tmp.toKernel(),
					            v_pos.toKernel(),v_prp.toKernel(),0);

			size_t offset = prc_sz.template get<0>(0);

incardon's avatar
incardon committed
914
			// Fill the sending buffers
incardon's avatar
incardon committed
915
916
917
			for (size_t i = 0 ; i < m_pos.size() ; i++)
			{
				auto ite = m_pos.get(i).getGPUIterator();
incardon's avatar
incardon committed
918

incardon's avatar
incardon committed
919
920
921
922
923
924
925
926
927
				process_map_particles<decltype(m_opart.toKernel()),decltype(m_pos.get(i).toKernel()),decltype(m_prp.get(i).toKernel()),
						                                           decltype(v_pos.toKernel()),decltype(v_prp.toKernel())>
				<<<ite.wthr,ite.thr>>>
				(m_opart.toKernel(),m_pos.get(i).toKernel(), m_prp.get(i).toKernel(),
						            v_pos.toKernel(),v_prp.toKernel(),offset);

				offset += prc_sz_r.size();
			}

928
929
930
931
			// old local particles with the actual local particles
			v_pos_tmp.swap(v_pos);
			v_prp_tmp.swap(v_prp);

incardon's avatar
incardon committed
932
933
934
935
936
937
938
939
940
941
942
943
			#else

			int rank = v_cl.rank();

			v_pos_tmp.resize(prc_sz.template get<0>(rank));
			v_prp_tmp.resize(prc_sz.template get<0>(rank));

			auto ite = v_pos_tmp.getGPUIterator();

			starts.template deviceToHost<0>();
			size_t offset = starts.template get<0>(rank);

incardon's avatar
incardon committed
944
945
946
947
			// no work to do
			if (ite.wthr.x != 0)
			{
				// fill v_pos_tmp and v_prp_tmp with local particles
incardon's avatar
incardon committed
948
949
950
951
				CUDA_LAUNCH((process_map_particles<decltype(m_opart.toKernel()),decltype(v_pos_tmp.toKernel()),decltype(v_prp_tmp.toKernel()),
					                                           decltype(v_pos.toKernel()),decltype(v_prp.toKernel())>),
				ite.wthr,ite.thr,
				m_opart.toKernel(),v_pos_tmp.toKernel(), v_prp_tmp.toKernel(),
incardon's avatar
incardon committed
952
					            v_pos.toKernel(),v_prp.toKernel(),offset);
incardon's avatar
incardon committed
953
			}
incardon's avatar
incardon committed
954
955
956
957
958
959
960
961

			// Fill the sending buffers
			for (size_t i = 0 ; i < m_pos.size() ; i++)
			{
				size_t offset = starts.template get<0>(prc_r.template get<0>(i));

				auto ite = m_pos.get(i).getGPUIterator();

incardon's avatar
incardon committed
962
963
964
965
				// no work to do
				if (ite.wthr.x != 0)
				{

incardon's avatar
incardon committed
966
967
968
969
					CUDA_LAUNCH((process_map_particles<decltype(m_opart.toKernel()),decltype(m_pos.get(i).toKernel()),decltype(m_prp.get(i).toKernel()),
						                                           decltype(v_pos.toKernel()),decltype(v_prp.toKernel())>),
					ite.wthr,ite.thr,
					m_opart.toKernel(),m_pos.get(i).toKernel(), m_prp.get(i).toKernel(),
incardon's avatar
incardon committed
970
						            v_pos.toKernel(),v_prp.toKernel(),offset);
incardon's avatar
incardon committed
971
972

				}
incardon's avatar
incardon committed
973
974
975
976
977
978
979
			}

			// old local particles with the actual local particles
			v_pos_tmp.swap(v_pos);
			v_prp_tmp.swap(v_prp);

			#endif
incardon's avatar
incardon committed
980
981
#else

incardon's avatar
incardon committed
982
			std::cout << __FILE__ << ":" << __LINE__ << " error RUN_ON_DEVICE require that you compile with NVCC, but it seem compiled with a normal compiler" << std::endl;
incardon's avatar
incardon committed
983
984
985
986

#endif
		}
		else
Pietro Incardona's avatar
Pietro Incardona committed
987
		{
incardon's avatar
incardon committed
988
989
990
991
992
993
994
995
996
997
998
			// end vector point
			long int id_end = v_pos.size();

			// end opart point
			long int end = m_opart.size()-1;

			// Run through all the particles and fill the sending buffer
			for (size_t i = 0; i < m_opart.size(); i++)
			{
				process_map_particle<proc_without_prp>(i,end,id_end,m_opart,p_map_req,m_pos,m_prp,v_pos,v_prp,cnt);
			}
incardon's avatar
incardon committed
999

1000
1001
1002
			v_pos.resize(v_pos.size() - m_opart.size());
			v_prp.resize(v_prp.size() - m_opart.size());
		}
Pietro Incardona's avatar
Pietro Incardona committed
1003
1004
	}

incardon's avatar
incardon committed
1005

Pietro Incardona's avatar
Pietro Incardona committed
1006
	/*! \brief allocate and fill the send buffer for the map function
incardon's avatar
incardon committed
1007
1008
1009
	 *
	 * \tparam prp_object object type to send
	 * \tparam prp properties to send
Pietro Incardona's avatar
Pietro Incardona committed
1010
1011
1012
	 *
	 * \param v_pos vector of particle positions
	 * \param v_prp vector of particle properties
incardon's avatar
incardon committed
1013
	 * \param prc_sz_r number of particles to send for each processor
incardon's avatar
incardon committed
1014
1015
	 * \param m_pos sending buffer for position
	 * \param m_prp sending buffer for properties
Pietro Incardona's avatar
Pietro Incardona committed
1016
1017
	 *
	 */
incardon's avatar
incardon committed
1018
1019
1020
1021
1022
1023
	template<typename prp_object,int ... prp>
	void fill_send_map_buf_list(openfpm::vector<Point<dim, St>> & v_pos,
			                    openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base> & v_prp,
								openfpm::vector<size_t> & prc_sz_r,
								openfpm::vector<openfpm::vector<Point<dim,St>>> & m_pos,
								openfpm::vector<openfpm::vector<prp_object>> & m_prp)
Pietro Incardona's avatar
Pietro Incardona committed
1024
	{
1025
1026
1027
		m_prp.resize(prc_sz_r.size());
		m_pos.resize(prc_sz_r.size());
		openfpm::vector<size_t> cnt(prc_sz_r.size());
Pietro Incardona's avatar
Pietro Incardona committed
1028

1029
		for (size_t i = 0; i < prc_sz_r.size(); i++)
Pietro Incardona's avatar
Pietro Incardona committed
1030
1031
		{
			// set the size and allocate, using mem warant that pos and prp is contiguous
incardon's avatar
incardon committed
1032
1033
			m_pos.get(i).resize(prc_sz_r.get(i));
			m_prp.get(i).resize(prc_sz_r.get(i));
1034
			cnt.get(i) = 0;
Pietro Incardona's avatar
Pietro Incardona committed
1035
1036
		}

incardon's avatar
incardon committed
1037
1038
1039
1040
1041
		// end vector point
		long int id_end = v_pos.size();

		// end opart point
		long int end = m_opart.size()-1;
Pietro Incardona's avatar
Pietro Incardona committed
1042

incardon's avatar
incardon committed
1043
1044
		// Run through all the particles and fill the sending buffer
		for (size_t i = 0; i < m_opart.size(); i++)
Pietro Incardona's avatar
Pietro Incardona committed
1045
		{
incardon's avatar
incardon committed
1046
			process_map_particle<proc_with_prp<prp_object,prp...>>(i,end,id_end,m_opart,p_map_req,m_pos,m_prp,v_pos,v_prp,cnt);
Pietro Incardona's avatar
Pietro Incardona committed
1047
		}
incardon's avatar
incardon committed
1048

1049
1050
		v_pos.resize(v_pos.size() - m_opart.size());
		v_prp.resize(v_prp.size() - m_opart.size());
Pietro Incardona's avatar
Pietro Incardona committed
1051
1052
1053
1054
1055
1056
1057
	}

	/*! \brief Label particles for mappings
	 *
	 * \param v_pos vector of particle positions
	 * \param lbl_p Particle labeled
	 * \param prc_sz For each processor the number of particles to send
incardon's avatar
Latest    
incardon committed
1058
	 * \param opt options
Pietro Incardona's avatar
Pietro Incardona committed
1059
1060
	 *
	 */
incardon's avatar
incardon committed
1061
	template<typename obp> void labelParticleProcessor(openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base> & v_pos,
incardon's avatar
incardon committed
1062
1063
1064
1065
1066
			                                           openfpm::vector<aggregate<int,int,int>,
			                                                           Memory,
			                                                           typename layout_base<aggregate<int,int,int>>::type,
			                                                           layout_base> & lbl_p,
			                                           openfpm::vector<aggregate<unsigned int,unsigned int>,Memory,typename layout_base<aggregate<unsigned int,unsigned int>>::type,layout_base> & prc_sz,
incardon's avatar
Latest    
incardon committed
1067
			                                           size_t opt)
Pietro Incardona's avatar
Pietro Incardona committed
1068
	{
incardon's avatar
incardon committed
1069
		if (opt == RUN_ON_DEVICE)
incardon's avatar
Latest    
incardon committed
1070
		{
incardon's avatar
incardon committed
1071
#ifdef __NVCC__
incardon's avatar
Latest    
incardon committed
1072

incardon's avatar
incardon committed
1073
			// Map directly on gpu
incardon's avatar
Latest    
incardon committed
1074

incardon's avatar
incardon committed
1075
			lbl_p.resize(v_pos.size());
Pietro Incardona's avatar
Pietro Incardona committed
1076

incardon's avatar
incardon committed
1077
			// labelling kernel
Pietro Incardona's avatar
Pietro Incardona committed
1078

incardon's avatar
incardon committed
1079
1080
			prc_sz.template fill<0>(0);

incardon's avatar
incardon committed
1081
1082
1083
1084
1085
1086
1087
1088
			auto ite = v_pos.getGPUIterator();
			if (ite.wthr.x == 0)
			{
				starts.resize(v_cl.size());
				starts.template fill<0>(0);
				return;
			}

incardon's avatar
incardon committed
1089
1090
1091
1092
1093
1094
1095
1096
1097
			// we have one process we can skip ...
			if (v_cl.size() == 1)
			{
				// ... but we have to apply the boundary conditions

				periodicity_int<dim> bc;

				for (size_t i = 0 ; i < dim ; i++)	{bc.bc[i] = dec.periodicity(i);}

incardon's avatar
incardon committed
1098
				CUDA_LAUNCH((apply_bc_each_part<dim,St,decltype(v_pos.toKernel())>),ite.wthr,ite.thr,dec.getDomain(),bc,v_pos.toKernel());
incardon's avatar
incardon committed
1099
1100
1101
1102

				return;
			}

incardon's avatar
incardon committed
1103
			// label particle processor
incardon's avatar
incardon committed
1104
1105
1106
			CUDA_LAUNCH((process_id_proc_each_part<dim,St,decltype(dec.toKernel()),decltype(v_pos.toKernel()),decltype(lbl_p.toKernel()),decltype(prc_sz.toKernel())>),
			ite.wthr,ite.thr,
			dec.toKernel(),v_pos.toKernel(),lbl_p.toKernel(),prc_sz.toKernel(),v_cl.rank());
incardon's avatar
incardon committed
1107

incardon's avatar
incardon committed
1108

incardon's avatar
incardon committed
1109
			#ifndef TEST1
Pietro Incardona's avatar
Pietro Incardona committed
1110

incardon's avatar
incardon committed
1111
1112
			// sort particles
			mergesort((int *)lbl_p.template getDeviceBuffer<1>(),(int *)lbl_p.template getDeviceBuffer<0>(), lbl_p.size(), mgpu::template less_t<int>(), v_cl.getmgpuContext());
Pietro Incardona's avatar
Pietro Incardona committed
1113

incardon's avatar
incardon committed
1114
			mem.allocate(sizeof(int));
1115
			mem.fill(0);
Pietro Incardona's avatar
Pietro Incardona committed
1116

incardon's avatar
incardon committed
1117
			// Find the buffer bases
1118
			find_buffer_offsets<1,decltype(lbl_p.toKernel()),decltype(prc_sz.toKernel())><<<ite.wthr,ite.thr>>>
incardon's avatar
incardon committed
1119
1120
1121
1122
					           (lbl_p.toKernel(),(int *)mem.getDevicePointer(),prc_sz.toKernel());

			// Trasfer the number of offsets on CPU
			mem.deviceToHost();
1123
1124
1125
			prc_sz.template deviceToHost<0,1>();
			// get also the last element from lbl_p;
			lbl_p.template deviceToHost<1>(lbl_p.size()-1,lbl_p.size()-1);
incardon's avatar
incardon committed
1126

incardon's avatar
incardon committed
1127
			mem.deviceToHost();
incardon's avatar
incardon committed
1128
			int noff = *(int *)mem.getPointer();
1129
1130
1131
			prc_sz.resize(noff+1);
			prc_sz.template get<0>(prc_sz.size()-1) = lbl_p.size();
			prc_sz.template get<1>(prc_sz.size()-1) = lbl_p.template get<1>(lbl_p.size()-1);
Pietro Incardona's avatar
Pietro Incardona committed
1132

incardon's avatar
incardon committed
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
			#else

			starts.resize(v_cl.size());
			mgpu::scan((unsigned int *)prc_sz.template getDeviceBuffer<0>(), prc_sz.size(), (unsigned int *)starts.template getDeviceBuffer<0>() , v_cl.getmgpuContext());

			// move prc_sz to host
			prc_sz.template deviceToHost<0>();

			ite = lbl_p.getGPUIterator();

			// we order lbl_p
incardon's avatar
incardon committed
1144
			CUDA_LAUNCH((reorder_lbl<decltype(lbl_p.toKernel()),decltype(starts.toKernel())>),ite.wthr,ite.thr,lbl_p.toKernel(),starts.toKernel());
incardon's avatar
incardon committed
1145
1146
1147

			#endif

incardon's avatar
incardon committed
1148
1149
#else

incardon's avatar
incardon committed
1150
			std::cout << __FILE__ << ":" << __LINE__ << " error, it seems you tried to call map with RUN_ON_DEVICE option, this requires to compile the program with NVCC" << std::endl;
incardon's avatar
incardon committed
1151
1152
1153
1154
1155
1156
1157

#endif
		}
		else
		{
			// reset lbl_p
			lbl_p.clear();
incardon's avatar
incardon committed
1158
			prc_sz_gg.clear();
incardon's avatar
incardon committed
1159
1160
			o_part_loc.clear();
			g_opart.clear();
incardon's avatar
incardon committed
1161
			prc_g_opart.clear();
incardon's avatar
incardon committed
1162
1163

			// resize the label buffer
incardon's avatar
incardon committed
1164
			prc_sz.template fill<0>(0);
incardon's avatar
incardon committed
1165
1166
1167
1168
1169

			auto it = v_pos.getIterator();

			// Label all the particles with the processor id where they should go
			while (it.isNext