vector_dist_comm.hpp 68.2 KB
Newer Older
Pietro Incardona's avatar
Pietro Incardona committed
1 2 3 4 5 6 7 8 9 10
/*
 * vector_dist_comm.hpp
 *
 *  Created on: Aug 18, 2016
 *      Author: i-bird
 */

#ifndef SRC_VECTOR_VECTOR_DIST_COMM_HPP_
#define SRC_VECTOR_VECTOR_DIST_COMM_HPP_

incardon's avatar
incardon committed
11
#define TEST1
incardon's avatar
incardon committed
12

incardon's avatar
incardon committed
13 14 15
#if defined(CUDA_GPU) && defined(__NVCC__)
#include "util/cuda/moderngpu/kernel_mergesort.hxx"
#include "Vector/cuda/vector_dist_cuda_funcs.cuh"
16
#include "util/cuda/moderngpu/kernel_scan.hxx"
incardon's avatar
incardon committed
17
#include "util/cuda/kernels.cuh"
incardon's avatar
incardon committed
18 19 20
#endif

#include "Vector/util/vector_dist_funcs.hpp"
incardon's avatar
incardon committed
21
#include "cuda/vector_dist_comm_util_funcs.cuh"
incardon's avatar
incardon committed
22

incardon's avatar
incardon committed
23 24 25
constexpr int NO_POSITION = 1;
constexpr int WITH_POSITION = 2;
constexpr int NO_CHANGE_ELEMENTS = 4;
Pietro Incardona's avatar
Pietro Incardona committed
26

incardon's avatar
incardon committed
27
constexpr int BIND_DEC_TO_GHOST = 1;
28

incardon's avatar
incardon committed
29 30
constexpr int RUN_ON_DEVICE = 1024;
constexpr int MAP_LOCAL = 2;
incardon's avatar
Latest  
incardon committed
31

incardon's avatar
incardon committed
32 33 34
constexpr int GHOST_SYNC = 0;
constexpr int GHOST_ASYNC = 1;

incardon's avatar
incardon committed
35 36 37 38 39 40 41
/*! \brief compute the communication options from the ghost_get/put options
 *
 *
 */
inline static size_t compute_options(size_t opt)
{
	size_t opt_ = NONE;
42 43 44 45 46 47 48 49 50 51 52 53
	if (opt & NO_CHANGE_ELEMENTS && opt & SKIP_LABELLING)
	{opt_ = RECEIVE_KNOWN | KNOWN_ELEMENT_OR_BYTE;}

	if (opt & RUN_ON_DEVICE)
	{
#if defined(CUDA_GPU) && defined(__NVCC__)
		// Before doing the communication on RUN_ON_DEVICE we have to be sure that the previous kernels complete
		opt_ |= MPI_GPU_DIRECT;
#else
		std::cout << __FILE__ << ":" << __LINE__ << " error: to use the option RUN_ON_DEVICE you must compile with NVCC" << std::endl;
#endif
	}
incardon's avatar
incardon committed
54 55 56 57

	return opt_;
}

incardon's avatar
incardon committed
58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138
/*! \brief template selector for asynchronous or not asynchronous
 *
 * \tparam impl implementation
 * \tparam prp properties
 *
 */
template<unsigned int impl, template<typename> class layout_base, unsigned int ... prp>
struct ghost_exchange_comm_impl
{
	template<typename Vcluster_type, typename vector_prop_type,
	         typename vector_pos_type, typename send_vector,
	         typename prc_recv_get_type, typename prc_g_opart_type,
	         typename recv_sz_get_type, typename recv_sz_get_byte_type,
	         typename g_opart_sz_type>
	static inline void sendrecv_prp(Vcluster_type & v_cl,
						 openfpm::vector<send_vector> & g_send_prp,
						 vector_prop_type & v_prp,
						 vector_pos_type & v_pos,
						 prc_g_opart_type & prc_g_opart,
						 prc_recv_get_type & prc_recv_get,
						 recv_sz_get_type & recv_sz_get,
						 recv_sz_get_byte_type & recv_sz_get_byte,
						 g_opart_sz_type & g_opart_sz,
						 size_t g_m,
						 size_t opt)
	{
		// if there are no properties skip
		// SSendRecvP send everything when we do not give properties

		if (sizeof...(prp) != 0)
		{
			size_t opt_ = compute_options(opt);
			if (opt & SKIP_LABELLING)
			{
				if (opt & RUN_ON_DEVICE)
				{
					op_ssend_gg_recv_merge_run_device opm(g_m);
					v_cl.template SSendRecvP_op<op_ssend_gg_recv_merge_run_device,send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_g_opart,opm,prc_recv_get,recv_sz_get,opt_);
				}
				else
				{
					op_ssend_gg_recv_merge opm(g_m);
					v_cl.template SSendRecvP_op<op_ssend_gg_recv_merge,send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_g_opart,opm,prc_recv_get,recv_sz_get,opt_);
				}
			}
			else
			{v_cl.template SSendRecvP<send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_g_opart,prc_recv_get,recv_sz_get,recv_sz_get_byte,opt_);}

			// fill g_opart_sz
			g_opart_sz.resize(prc_g_opart.size());

			for (size_t i = 0 ; i < prc_g_opart.size() ; i++)
				g_opart_sz.get(i) = g_send_prp.get(i).size();
		}
	}

	template<typename Vcluster_type, typename vector_prop_type,
		     typename vector_pos_type, typename send_pos_vector,
		     typename prc_recv_get_type, typename prc_g_opart_type,
		     typename recv_sz_get_type>
	static inline void sendrecv_pos(Vcluster_type & v_cl,
									openfpm::vector<send_pos_vector> & g_pos_send,
									vector_prop_type & v_prp,
									vector_pos_type & v_pos,
									prc_recv_get_type & prc_recv_get,
									recv_sz_get_type & recv_sz_get,
									prc_g_opart_type & prc_g_opart,
									size_t opt)
	{
		size_t opt_ = compute_options(opt);
		if (opt & SKIP_LABELLING)
		{
			v_cl.template SSendRecv<send_pos_vector,decltype(v_pos),layout_base>(g_pos_send,v_pos,prc_g_opart,prc_recv_get,recv_sz_get,opt_);
		}
		else
		{
			prc_recv_get.clear();
			recv_sz_get.clear();
			v_cl.template SSendRecv<send_pos_vector,decltype(v_pos),layout_base>(g_pos_send,v_pos,prc_g_opart,prc_recv_get,recv_sz_get,opt_);
		}
	}
incardon's avatar
incardon committed
139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170

	template<typename Vcluster_type, typename vector_prop_type,
		     typename vector_pos_type, typename send_pos_vector,
		     typename prc_recv_get_type, typename prc_g_opart_type,
		     typename recv_sz_get_type>
	static inline void sendrecv_pos_wait(Vcluster_type & v_cl,
										 openfpm::vector<send_pos_vector> & g_pos_send,
										 vector_prop_type & v_prp,
										 vector_pos_type & v_pos,
										 prc_recv_get_type & prc_recv_get,
										 recv_sz_get_type & recv_sz_get,
										 prc_g_opart_type & prc_g_opart,
										 size_t opt)
	{}

	template<typename Vcluster_type, typename vector_prop_type,
	         typename vector_pos_type, typename send_vector,
	         typename prc_recv_get_type, typename prc_g_opart_type,
	         typename recv_sz_get_type, typename recv_sz_get_byte_type,
	         typename g_opart_sz_type>
	static inline void sendrecv_prp_wait(Vcluster_type & v_cl,
			 	 	 	 	 	 	 	 openfpm::vector<send_vector> & g_send_prp,
			 	 	 	 	 	 	 	 vector_prop_type & v_prp,
			 	 	 	 	 	 	 	 vector_pos_type & v_pos,
			 	 	 	 	 	 	 	 prc_g_opart_type & prc_g_opart,
			 	 	 	 	 	 	 	 prc_recv_get_type & prc_recv_get,
			 	 	 	 	 	 	 	 recv_sz_get_type & recv_sz_get,
			 	 	 	 	 	 	 	 recv_sz_get_byte_type & recv_sz_get_byte,
			 	 	 	 	 	 	 	 g_opart_sz_type & g_opart_sz,
			 	 	 	 	 	 	 	 size_t g_m,
			 	 	 	 	 	 	 	 size_t opt)
	{}
incardon's avatar
incardon committed
171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193
};


template<template<typename> class layout_base, unsigned int ... prp>
struct ghost_exchange_comm_impl<GHOST_ASYNC,layout_base, prp ... >
{
	template<typename Vcluster_type, typename vector_prop_type,
	         typename vector_pos_type, typename send_vector,
	         typename prc_recv_get_type, typename prc_g_opart_type,
	         typename recv_sz_get_type, typename recv_sz_get_byte_type,
	         typename g_opart_sz_type>
	static inline void sendrecv_prp(Vcluster_type & v_cl,
						 openfpm::vector<send_vector> & g_send_prp,
						 vector_prop_type & v_prp,
						 vector_pos_type & v_pos,
						 prc_g_opart_type & prc_g_opart,
						 prc_recv_get_type & prc_recv_get,
						 recv_sz_get_type & recv_sz_get,
						 recv_sz_get_byte_type & recv_sz_get_byte,
						 g_opart_sz_type & g_opart_sz,
						 size_t g_m,
						 size_t opt)
	{
incardon's avatar
incardon committed
194 195 196
		prc_recv_get.clear();
		recv_sz_get.clear();

incardon's avatar
incardon committed
197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217
		// if there are no properties skip
		// SSendRecvP send everything when we do not give properties

		if (sizeof...(prp) != 0)
		{
			size_t opt_ = compute_options(opt);
			if (opt & SKIP_LABELLING)
			{
				if (opt & RUN_ON_DEVICE)
				{
					op_ssend_gg_recv_merge_run_device opm(g_m);
					v_cl.template SSendRecvP_opAsync<op_ssend_gg_recv_merge_run_device,send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_g_opart,opm,prc_recv_get,recv_sz_get,opt_);
				}
				else
				{
					op_ssend_gg_recv_merge opm(g_m);
					v_cl.template SSendRecvP_opAsync<op_ssend_gg_recv_merge,send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_g_opart,opm,prc_recv_get,recv_sz_get,opt_);
				}
			}
			else
			{v_cl.template SSendRecvPAsync<send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_g_opart,prc_recv_get,recv_sz_get,recv_sz_get_byte,opt_);}
incardon's avatar
incardon committed
218
		}
incardon's avatar
incardon committed
219

incardon's avatar
incardon committed
220 221
		// fill g_opart_sz
		g_opart_sz.resize(prc_g_opart.size());
incardon's avatar
incardon committed
222

incardon's avatar
incardon committed
223 224
		for (size_t i = 0 ; i < prc_g_opart.size() ; i++)
		{g_opart_sz.get(i) = g_send_prp.get(i).size();}
incardon's avatar
incardon committed
225 226 227 228 229 230 231 232 233 234 235 236 237 238 239
	}

	template<typename Vcluster_type, typename vector_prop_type,
		     typename vector_pos_type, typename send_pos_vector,
		     typename prc_recv_get_type, typename prc_g_opart_type,
		     typename recv_sz_get_type>
	static inline void sendrecv_pos(Vcluster_type & v_cl,
									openfpm::vector<send_pos_vector> & g_pos_send,
									vector_prop_type & v_prp,
									vector_pos_type & v_pos,
									prc_recv_get_type & prc_recv_get,
									recv_sz_get_type & recv_sz_get,
									prc_g_opart_type & prc_g_opart,
									size_t opt)
	{
incardon's avatar
incardon committed
240 241 242
		prc_recv_get.clear();
		recv_sz_get.clear();

incardon's avatar
incardon committed
243 244 245 246 247 248 249 250 251 252 253 254
		size_t opt_ = compute_options(opt);
		if (opt & SKIP_LABELLING)
		{
			v_cl.template SSendRecvAsync<send_pos_vector,decltype(v_pos),layout_base>(g_pos_send,v_pos,prc_g_opart,prc_recv_get,recv_sz_get,opt_);
		}
		else
		{
			prc_recv_get.clear();
			recv_sz_get.clear();
			v_cl.template SSendRecvAsync<send_pos_vector,decltype(v_pos),layout_base>(g_pos_send,v_pos,prc_g_opart,prc_recv_get,recv_sz_get,opt_);
		}
	}
incardon's avatar
incardon committed
255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319

	template<typename Vcluster_type, typename vector_prop_type,
		     typename vector_pos_type, typename send_pos_vector,
		     typename prc_recv_get_type, typename prc_g_opart_type,
		     typename recv_sz_get_type>
	static inline void sendrecv_pos_wait(Vcluster_type & v_cl,
										 openfpm::vector<send_pos_vector> & g_pos_send,
										 vector_prop_type & v_prp,
										 vector_pos_type & v_pos,
										 prc_recv_get_type & prc_recv_get,
										 recv_sz_get_type & recv_sz_get,
										 prc_g_opart_type & prc_g_opart,
										 size_t opt)
	{
		size_t opt_ = compute_options(opt);
		if (opt & SKIP_LABELLING)
		{
			v_cl.template SSendRecvWait<send_pos_vector,decltype(v_pos),layout_base>(g_pos_send,v_pos,prc_g_opart,prc_recv_get,recv_sz_get,opt_);
		}
		else
		{
			v_cl.template SSendRecvWait<send_pos_vector,decltype(v_pos),layout_base>(g_pos_send,v_pos,prc_g_opart,prc_recv_get,recv_sz_get,opt_);
		}
	}

	template<typename Vcluster_type, typename vector_prop_type,
	         typename vector_pos_type, typename send_vector,
	         typename prc_recv_get_type, typename prc_g_opart_type,
	         typename recv_sz_get_type, typename recv_sz_get_byte_type,
	         typename g_opart_sz_type>
	static inline void sendrecv_prp_wait(Vcluster_type & v_cl,
			 	 	 	 	 	 	 	 openfpm::vector<send_vector> & g_send_prp,
			 	 	 	 	 	 	 	 vector_prop_type & v_prp,
			 	 	 	 	 	 	 	 vector_pos_type & v_pos,
			 	 	 	 	 	 	 	 prc_g_opart_type & prc_g_opart,
			 	 	 	 	 	 	 	 prc_recv_get_type & prc_recv_get,
			 	 	 	 	 	 	 	 recv_sz_get_type & recv_sz_get,
			 	 	 	 	 	 	 	 recv_sz_get_byte_type & recv_sz_get_byte,
			 	 	 	 	 	 	 	 g_opart_sz_type & g_opart_sz,
			 	 	 	 	 	 	 	 size_t g_m,
			 	 	 	 	 	 	 	 size_t opt)
	{
		// if there are no properties skip
		// SSendRecvP send everything when we do not give properties

		if (sizeof...(prp) != 0)
		{
			size_t opt_ = compute_options(opt);
			if (opt & SKIP_LABELLING)
			{
				if (opt & RUN_ON_DEVICE)
				{
					op_ssend_gg_recv_merge_run_device opm(g_m);
					v_cl.template SSendRecvP_opWait<op_ssend_gg_recv_merge_run_device,send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_g_opart,opm,prc_recv_get,recv_sz_get,opt_);
				}
				else
				{
					op_ssend_gg_recv_merge opm(g_m);
					v_cl.template SSendRecvP_opWait<op_ssend_gg_recv_merge,send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_g_opart,opm,prc_recv_get,recv_sz_get,opt_);
				}
			}
			else
			{v_cl.template SSendRecvPWait<send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_g_opart,prc_recv_get,recv_sz_get,recv_sz_get_byte,opt_);}
		}
	}
incardon's avatar
incardon committed
320 321 322
};


Pietro Incardona's avatar
Pietro Incardona committed
323 324 325 326 327 328 329 330 331 332 333 334
/*! \brief This class is an helper for the communication of vector_dist
 *
 * \tparam dim Dimensionality of the space where the elements lives
 * \tparam St type of space float, double ...
 * \tparam prop properties the vector element store in OpenFPM data structure format
 * \tparam Decomposition Decomposition strategy to use CartDecomposition ...
 * \tparam Memory Memory pool where store the information HeapMemory ...
 *
 * \see vector_dist
 *
 */

incardon's avatar
incardon committed
335 336 337 338 339 340
template<unsigned int dim,
         typename St,
         typename prop,
         typename Decomposition = CartDecomposition<dim,St>,
         typename Memory = HeapMemory,
         template<typename> class layout_base = memory_traits_lin>
Pietro Incardona's avatar
Pietro Incardona committed
341 342
class vector_dist_comm
{
incardon's avatar
incardon committed
343 344 345
	//! Number of units for each sub-domain
	size_t v_sub_unit_factor = 64;

incardon's avatar
incardon committed
346
	//! definition of the send vector for position
347
	typedef openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base,openfpm::grow_policy_identity> send_pos_vector;
incardon's avatar
incardon committed
348

Pietro Incardona's avatar
Pietro Incardona committed
349
	//! VCluster
350
	Vcluster<Memory> & v_cl;
Pietro Incardona's avatar
Pietro Incardona committed
351 352 353 354 355 356 357

	//! Domain decomposition
	Decomposition dec;

	//! It map the processor id with the communication request into map procedure
	openfpm::vector<size_t> p_map_req;

Pietro Incardona's avatar
Pietro Incardona committed
358
	//! For each near processor, outgoing particle id
incardon's avatar
incardon committed
359 360 361 362
	//! \warning opart is assumed to be an ordered list
	//! first id particle id
	//! second id shift id
	//! third id is the processor id
incardon's avatar
incardon committed
363 364 365 366
	openfpm::vector<aggregate<int,int,int>,
					Memory,
					typename layout_base<aggregate<int,int,int>>::type,
					layout_base > m_opart;
Pietro Incardona's avatar
Pietro Incardona committed
367

incardon's avatar
incardon committed
368
	//! Per processor ordered particles id for ghost_get (see prc_g_opart)
incardon's avatar
incardon committed
369 370 371
	//! For each processor the internal vector store the id of the
	//! particles that must be communicated to the other processors
	openfpm::vector<openfpm::vector<aggregate<size_t,size_t>>> g_opart;
Pietro Incardona's avatar
Pietro Incardona committed
372

373 374 375 376 377 378
	//! Same as g_opart but on device, the vector of vector is flatten into a single vector
    openfpm::vector<aggregate<unsigned int,unsigned long int>,
                    CudaMemory,
                    typename memory_traits_inte<aggregate<unsigned int,unsigned long int>>::type,
                    memory_traits_inte> g_opart_device;

379 380 381 382 383 384
	//! Helper buffer for computation (on GPU) of local particles (position)
	openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base> v_pos_tmp;

	//! Helper buffer for computation (on GPU) of local particles (properties)
	openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base> v_prp_tmp;

incardon's avatar
incardon committed
385 386 387
	//! Per processor number of particle g_opart_sz.get(i) = g_opart.get(i).size()
	openfpm::vector<size_t> g_opart_sz;

incardon's avatar
incardon committed
388
	//! processor rank list of g_opart
incardon's avatar
incardon committed
389 390
	openfpm::vector<size_t> prc_g_opart;

incardon's avatar
incardon committed
391 392
	//! It store the list of processor that communicate with us (local processor)
	//! from the last ghost get
incardon's avatar
incardon committed
393 394
	openfpm::vector<size_t> prc_recv_get_pos;
	openfpm::vector<size_t> prc_recv_get_prp;
Pietro Incardona's avatar
Pietro Incardona committed
395

incardon's avatar
incardon committed
396
	//! the same as prc_recv_get but for put
Pietro Incardona's avatar
Pietro Incardona committed
397 398
	openfpm::vector<size_t> prc_recv_put;

incardon's avatar
incardon committed
399 400
	//! the same as prc_recv_get but for map
	openfpm::vector<size_t> prc_recv_map;
Pietro Incardona's avatar
Pietro Incardona committed
401

incardon's avatar
incardon committed
402 403
	//! It store the size of the elements added for each processor that communicate with us (local processor)
	//! from the last ghost get
incardon's avatar
incardon committed
404 405
	openfpm::vector<size_t> recv_sz_get_pos;
	openfpm::vector<size_t> recv_sz_get_prp;
incardon's avatar
incardon committed
406 407
	//! Conversion to byte of recv_sz_get
	openfpm::vector<size_t> recv_sz_get_byte;
incardon's avatar
incardon committed
408

Pietro Incardona's avatar
Pietro Incardona committed
409

incardon's avatar
incardon committed
410
	//! The same as recv_sz_get but for put
Pietro Incardona's avatar
Pietro Incardona committed
411 412
	openfpm::vector<size_t> recv_sz_put;

incardon's avatar
incardon committed
413 414
	//! The same as recv_sz_get but for map
	openfpm::vector<size_t> recv_sz_map;
Pietro Incardona's avatar
Pietro Incardona committed
415

incardon's avatar
incardon committed
416 417 418
	//! elements sent for each processors (ghost_get)
	openfpm::vector<size_t> prc_sz_gg;

incardon's avatar
incardon committed
419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437
	//! temporary buffer to processors ids
    openfpm::vector<aggregate<unsigned int>,
                            Memory,
                            typename layout_base<aggregate<unsigned int>>::type,
                            layout_base> proc_id_out;

    //! temporary buffer for the scan result
	openfpm::vector<aggregate<unsigned int>,
                             Memory,
                             typename layout_base<aggregate<unsigned int>>::type,
                             layout_base> starts;

	//! Processor communication size
	openfpm::vector<aggregate<unsigned int, unsigned int>,Memory,typename layout_base<aggregate<unsigned int, unsigned int>>::type,layout_base> prc_offset;


	//! Temporary CudaMemory to do stuff
	CudaMemory mem;

Pietro Incardona's avatar
Pietro Incardona committed
438 439 440 441
	//! Local ghost marker (across the ghost particles it mark from where we have the)
	//! replicated ghost particles that are local
	size_t lg_m;

442
	//! Sending buffer
incardon's avatar
incardon committed
443
	openfpm::vector_fr<Memory> hsmem;
444

incardon's avatar
incardon committed
445
	//! process the particle with properties
446 447 448
	template<typename prp_object, int ... prp>
	struct proc_with_prp
	{
incardon's avatar
incardon committed
449
		//! process the particle
450 451 452 453 454 455 456 457 458 459 460 461
		template<typename T1, typename T2> inline static void proc(size_t lbl, size_t cnt, size_t id, T1 & v_prp, T2 & m_prp)
		{
			// source object type
			typedef encapc<1, prop, typename openfpm::vector<prop>::layout_type> encap_src;
			// destination object type
			typedef encapc<1, prp_object, typename openfpm::vector<prp_object>::layout_type> encap_dst;

			// Copy only the selected properties
			object_si_d<encap_src, encap_dst, OBJ_ENCAP, prp...>(v_prp.get(id), m_prp.get(lbl).get(cnt));
		}
	};

incardon's avatar
incardon committed
462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501
	/*! \brief Get the number of particles received from each processor during the last ghost_get
	 *
	 *
	 * \param i processor (list index)
	 * \return the number of particles
	 */
	size_t get_last_ghost_get_received_parts(size_t i)
	{
		// If the last ghost_get did not have properties the information about the number of particles
		// received is in recv_sz_get_ois
		if (recv_sz_get_prp.size() != 0)
		{return recv_sz_get_prp.get(i);}
		else
		{return recv_sz_get_pos.get(i);}
	}

	/*! \brief Get the number of processor involved during the last ghost_get
	 *
	 * \return the number of processor
	 */
	size_t get_last_ghost_get_num_proc()
	{
		if (prc_recv_get_prp.size() != 0)
		{return prc_recv_get_prp.size();}
		else
		{return prc_recv_get_pos.size();}
	}

	/*! \brief Get the number of processor involved during the last ghost_get
	 *
	 * \return the number of processor
	 */
	openfpm::vector<size_t> & get_last_ghost_get_num_proc_vector()
	{
		if (prc_recv_get_prp.size() != 0)
		{return prc_recv_get_prp;}
		else
		{return prc_recv_get_pos;}
	}

incardon's avatar
incardon committed
502 503 504 505 506 507 508 509 510 511
	/*! \brief Calculate sending buffer size for each processor
	 *
	 * \param prc_sz_r processor size
	 * \param prc_r processor ids
	 *
	 */
	inline void calc_send_buffers(openfpm::vector<aggregate<unsigned int,unsigned int>,Memory,typename layout_base<aggregate<unsigned int,unsigned int>>::type,layout_base> & prc_sz,
								  openfpm::vector<size_t> & prc_sz_r,
								  openfpm::vector<size_t> & prc_r,
								  size_t opt)
512
	{
incardon's avatar
incardon committed
513
		if (opt & RUN_ON_DEVICE)
514
		{
incardon's avatar
incardon committed
515
#ifndef TEST1
516 517
			size_t prev_off = 0;
			for (size_t i = 0; i < prc_sz.size() ; i++)
518
			{
519 520 521 522 523 524
				if (prc_sz.template get<1>(i) != (unsigned int)-1)
				{
					prc_r.add(prc_sz.template get<1>(i));
					prc_sz_r.add(prc_sz.template get<0>(i) - prev_off);
				}
				prev_off = prc_sz.template get<0>(i);
525
			}
incardon's avatar
incardon committed
526 527 528 529 530 531 532 533 534 535 536 537 538 539 540
#else

			// Calculate the sending buffer size for each processor, put this information in
			// a contiguous buffer

			for (size_t i = 0; i < v_cl.getProcessingUnits(); i++)
			{
				if (prc_sz.template get<0>(i) != 0 && v_cl.rank() != i)
				{
					prc_r.add(i);
					prc_sz_r.add(prc_sz.template get<0>(i));
				}
			}

#endif
541 542 543
		}
		else
		{
incardon's avatar
incardon committed
544 545
			// Calculate the sending buffer size for each processor, put this information in
			// a contiguous buffer
546

incardon's avatar
incardon committed
547 548
			p_map_req.resize(v_cl.getProcessingUnits());
			for (size_t i = 0; i < v_cl.getProcessingUnits(); i++)
549
			{
incardon's avatar
incardon committed
550 551 552 553 554 555
				if (prc_sz.template get<0>(i) != 0)
				{
					p_map_req.get(i) = prc_r.size();
					prc_r.add(i);
					prc_sz_r.add(prc_sz.template get<0>(i));
				}
556 557 558 559
			}
		}
	}

560 561
	//! From which decomposition the shift boxes are calculated
	long int shift_box_ndec = -1;
Pietro Incardona's avatar
Pietro Incardona committed
562

Pietro Incardona's avatar
Pietro Incardona committed
563
	//! this map is used to check if a combination is already present
Pietro Incardona's avatar
Pietro Incardona committed
564 565
	std::unordered_map<size_t, size_t> map_cmb;

Pietro Incardona's avatar
Pietro Incardona committed
566 567
	//! The boxes touching the border of the domain are divided in groups (first vector)
	//! each group contain internal ghost coming from sub-domains of the same section
incardon's avatar
incardon committed
568
	openfpm::vector_std<openfpm::vector_std<Box<dim, St>>> box_f;
Pietro Incardona's avatar
Pietro Incardona committed
569

570 571 572 573
	//! The boxes touching the border of the domain + shift vector linearized from where they come from
	openfpm::vector<Box<dim, St>,Memory,typename layout_base<Box<dim,St>>::type,layout_base> box_f_dev;
	openfpm::vector<aggregate<unsigned int>,Memory,typename layout_base<aggregate<unsigned int>>::type,layout_base> box_f_sv;

Pietro Incardona's avatar
Pietro Incardona committed
574
	//! Store the sector for each group (previous vector)
Pietro Incardona's avatar
Pietro Incardona committed
575 576
	openfpm::vector_std<comb<dim>> box_cmb;

Pietro Incardona's avatar
Pietro Incardona committed
577
	//! Id of the local particle to replicate for ghost_get
578
	openfpm::vector<aggregate<unsigned int,unsigned int>,Memory,typename layout_base<aggregate<unsigned int,unsigned int>>::type,layout_base> o_part_loc;
Pietro Incardona's avatar
Pietro Incardona committed
579

incardon's avatar
incardon committed
580 581 582
	//! Processor communication size
	openfpm::vector<aggregate<unsigned int, unsigned int>,Memory,typename layout_base<aggregate<unsigned int, unsigned int>>::type,layout_base> prc_sz;

Pietro Incardona's avatar
Pietro Incardona committed
583 584 585 586 587 588
	/*! \brief For every internal ghost box we create a structure that order such internal local ghost box in
	 *         shift vectors
	 *
	 */
	void createShiftBox()
	{
589
		if (shift_box_ndec == (long int)dec.get_ndec())
590
		{return;}
Pietro Incardona's avatar
Pietro Incardona committed
591

incardon's avatar
incardon committed
592 593 594 595 596 597 598
		struct sh_box
		{
			size_t shift_id;

			unsigned int box_f_sv;
			Box<dim,St> box_f_dev;

incardon's avatar
incardon committed
599
			bool operator<(const sh_box & tmp) const
incardon's avatar
incardon committed
600 601 602 603 604
			{
				return shift_id < tmp.shift_id;
			}

		};
incardon's avatar
incardon committed
605
		openfpm::vector<sh_box> reord_shift;
incardon's avatar
incardon committed
606 607 608
		box_f.clear();
		map_cmb.clear();
		box_cmb.clear();
incardon's avatar
incardon committed
609

Pietro Incardona's avatar
Pietro Incardona committed
610
		// Add local particles coming from periodic boundary, the only boxes that count are the one
611
		// touching the border
Pietro Incardona's avatar
Pietro Incardona committed
612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638
		for (size_t i = 0; i < dec.getNLocalSub(); i++)
		{
			size_t Nl = dec.getLocalNIGhost(i);

			for (size_t j = 0; j < Nl; j++)
			{
				// If the ghost does not come from the intersection with an out of
				// border sub-domain the combination is all zero and n_zero return dim
				if (dec.getLocalIGhostPos(i, j).n_zero() == dim)
					continue;

				// Check if we already have boxes with such combination
				auto it = map_cmb.find(dec.getLocalIGhostPos(i, j).lin());
				if (it == map_cmb.end())
				{
					// we do not have it
					box_f.add();
					box_f.last().add(dec.getLocalIGhostBox(i, j));
					box_cmb.add(dec.getLocalIGhostPos(i, j));
					map_cmb[dec.getLocalIGhostPos(i, j).lin()] = box_f.size() - 1;
				}
				else
				{
					// we have it
					box_f.get(it->second).add(dec.getLocalIGhostBox(i, j));
				}

incardon's avatar
incardon committed
639 640 641 642
				reord_shift.add();
				reord_shift.last().shift_id = dec.getLocalIGhostPos(i, j).lin();
				reord_shift.last().box_f_dev = dec.getLocalIGhostBox(i, j);
				reord_shift.last().box_f_sv = dec.convertShift(dec.getLocalIGhostPos(i, j));
Pietro Incardona's avatar
Pietro Incardona committed
643 644 645
			}
		}

incardon's avatar
incardon committed
646 647 648 649 650 651 652 653 654 655 656 657
		// now we sort box_f by shift_id, the reason is that we have to avoid duplicated particles
		reord_shift.sort();

		box_f_dev.resize(reord_shift.size());
		box_f_sv.resize(reord_shift.size());

		for (size_t i = 0 ; i < reord_shift.size() ; i++)
		{
			box_f_dev.get(i) = reord_shift.get(i).box_f_dev;
			box_f_sv.template get<0>(i) = reord_shift.get(i).box_f_sv;
		}

incardon's avatar
incardon committed
658 659
#ifdef CUDA_GPU

660
		// move box_f_dev and box_f_sv to device
incardon's avatar
incardon committed
661 662
		box_f_dev.template hostToDevice<0,1>();
		box_f_sv.template hostToDevice<0>();
663

incardon's avatar
incardon committed
664 665
#endif

666
		shift_box_ndec = dec.get_ndec();
Pietro Incardona's avatar
Pietro Incardona committed
667 668 669 670
	}

	/*! \brief Local ghost from labeled particles
	 *
Pietro Incardona's avatar
Pietro Incardona committed
671 672
	 * \param v_pos vector of particle positions
	 * \param v_prp vector of particles properties
incardon's avatar
incardon committed
673
	 * \param opt options
Pietro Incardona's avatar
Pietro Incardona committed
674 675
	 *
	 */
676
	void local_ghost_from_opart(openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base> & v_pos,
incardon's avatar
incardon committed
677 678
			                    openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base> & v_prp,
			                    size_t opt)
Pietro Incardona's avatar
Pietro Incardona committed
679 680
	{
		// get the shift vectors
681
		const openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base> & shifts = dec.getShiftVectors();
Pietro Incardona's avatar
Pietro Incardona committed
682

incardon's avatar
incardon committed
683
		if (!(opt & NO_POSITION))
Pietro Incardona's avatar
Pietro Incardona committed
684
		{
685
			if (opt & RUN_ON_DEVICE)
incardon's avatar
incardon committed
686
			{
incardon's avatar
incardon committed
687 688
				local_ghost_from_opart_impl<true,dim,St,prop,Memory,layout_base,std::is_same<Memory,CudaMemory>::value>
				::run(o_part_loc,shifts,v_pos,v_prp,opt);
689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704
			}
			else
			{
				for (size_t i = 0 ; i < o_part_loc.size() ; i++)
				{
					size_t lin_id = o_part_loc.template get<1>(i);
					size_t key = o_part_loc.template get<0>(i);

					Point<dim, St> p = v_pos.get(key);
					// shift
					p -= shifts.get(lin_id);

					// add this particle shifting its position
					v_pos.add(p);
					v_prp.get(lg_m+i) = v_prp.get(key);
				}
incardon's avatar
incardon committed
705 706 707 708
			}
		}
		else
		{
709
			if (opt & RUN_ON_DEVICE)
incardon's avatar
incardon committed
710
			{
incardon's avatar
incardon committed
711 712
				local_ghost_from_opart_impl<false,dim,St,prop,Memory,layout_base,std::is_same<Memory,CudaMemory>::value>
				::run(o_part_loc,shifts,v_pos,v_prp,opt);
713 714 715 716 717 718 719 720 721
			}
			else
			{
				for (size_t i = 0 ; i < o_part_loc.size() ; i++)
				{
					size_t key = o_part_loc.template get<0>(i);

					v_prp.get(lg_m+i) = v_prp.get(key);
				}
incardon's avatar
incardon committed
722
			}
Pietro Incardona's avatar
Pietro Incardona committed
723 724 725 726 727 728 729
		}
	}

	/*! \brief Local ghost from decomposition
	 *
	 * \param v_pos vector of particle positions
	 * \param v_prp vector of particle properties
Pietro Incardona's avatar
Pietro Incardona committed
730
	 * \param g_m ghost marker
Pietro Incardona's avatar
Pietro Incardona committed
731 732
	 *
	 */
733 734
	void local_ghost_from_dec(openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base> & v_pos,
			                  openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base> & v_prp,
735
			                  size_t g_m,size_t opt)
Pietro Incardona's avatar
Pietro Incardona committed
736 737 738 739
	{
		o_part_loc.clear();

		// get the shift vectors
740
		const openfpm::vector<Point<dim,St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base> & shifts = dec.getShiftVectors();
Pietro Incardona's avatar
Pietro Incardona committed
741

742 743
		if (opt & RUN_ON_DEVICE)
		{
incardon's avatar
incardon committed
744
			local_ghost_from_dec_impl<dim,St,prop,Memory,layout_base,std::is_same<Memory,CudaMemory>::value>
incardon's avatar
incardon committed
745
			::run(o_part_loc,shifts,box_f_dev,box_f_sv,v_cl,starts,v_pos,v_prp,g_m,opt);
746 747
		}
		else
Pietro Incardona's avatar
Pietro Incardona committed
748
		{
749 750
			// Label the internal (assigned) particles
			auto it = v_pos.getIteratorTo(g_m);
Pietro Incardona's avatar
Pietro Incardona committed
751

752
			while (it.isNext())
Pietro Incardona's avatar
Pietro Incardona committed
753
			{
754 755 756 757
				auto key = it.get();

				// If particles are inside these boxes
				for (size_t i = 0; i < box_f.size(); i++)
Pietro Incardona's avatar
Pietro Incardona committed
758
				{
759
					for (size_t j = 0; j < box_f.get(i).size(); j++)
Pietro Incardona's avatar
Pietro Incardona committed
760
					{
761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787
						if (box_f.get(i).get(j).isInsideNP(v_pos.get(key)) == true)
						{
							size_t lin_id = dec.convertShift(box_cmb.get(i));

							o_part_loc.add();
							o_part_loc.template get<0>(o_part_loc.size()-1) = key;
							o_part_loc.template get<1>(o_part_loc.size()-1) = lin_id;

							Point<dim, St> p = v_pos.get(key);
							// shift
							p -= shifts.get(lin_id);

							// add this particle shifting its position
							v_pos.add(p);
							v_prp.add();
							v_prp.last() = v_prp.get(key);

							// boxes in one group can be overlapping
							// we do not have to search for the other
							// boxes otherwise we will have duplicate particles
							//
							// A small note overlap of boxes across groups is fine
							// (and needed) because each group has different shift
							// producing non overlapping particles
							//
							break;
						}
Pietro Incardona's avatar
Pietro Incardona committed
788 789 790
					}
				}

791 792
				++it;
			}
Pietro Incardona's avatar
Pietro Incardona committed
793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843
		}
	}

	/*! \brief Add local particles based on the boundary conditions
	 *
	 * In order to understand what this function use the following
	 *
	 \verbatim

	 [1,1]
	 +---------+------------------------+---------+
	 | (1,-1)  |                        | (1,1)   |
	 |   |     |    (1,0) --> 7         |   |     |
	 |   v     |                        |   v     |
	 |   6     |                        |   8     |
	 +--------------------------------------------+
	 |         |                        |         |
	 |         |                        |         |
	 |         |                        |         |
	 | (-1,0)  |                        | (1,0)   |
	 |    |    |                        |   |     |
	 |    v    |      (0,0) --> 4       |   v     |
	 |    3    |                        |   5     |
	 |         |                        |         |
 B	 |         |                        |     A   |
 *	 |         |                        |    *    |
	 |         |                        |         |
	 |         |                        |         |
	 |         |                        |         |
	 +--------------------------------------------+
	 | (-1,-1) |                        | (-1,1)  |
	 |    |    |   (-1,0) --> 1         |    |    |
	 |    v    |                        |    v    |
	 |    0    |                        |    2    |
	 +---------+------------------------+---------+


	 \endverbatim

	 *
	 *  The box is the domain, while all boxes at the border (so not (0,0) ) are the
	 *  ghost part at the border of the domain. If a particle A is in the position in figure
	 *  a particle B must be created. This function duplicate the particle A, if A and B are
	 *  local
	 *
	 * \param v_pos vector of particle of positions
	 * \param v_prp vector of particle properties
	 * \param g_m ghost marker
	 * \param opt options
	 *
	 */
844 845 846 847
	void add_loc_particles_bc(openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base> & v_pos,
			                  openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base> & v_prp ,
			                  size_t & g_m,
			                  size_t opt)
Pietro Incardona's avatar
Pietro Incardona committed
848 849 850 851
	{
		// Create the shift boxes
		createShiftBox();

852
		if (!(opt & SKIP_LABELLING))
incardon's avatar
incardon committed
853
			lg_m = v_prp.size();
Pietro Incardona's avatar
Pietro Incardona committed
854

Pietro Incardona's avatar
Pietro Incardona committed
855 856 857 858 859
		if (box_f.size() == 0)
			return;
		else
		{
			if (opt & SKIP_LABELLING)
incardon's avatar
incardon committed
860
			{local_ghost_from_opart(v_pos,v_prp,opt);}
Pietro Incardona's avatar
Pietro Incardona committed
861
			else
862
			{local_ghost_from_dec(v_pos,v_prp,g_m,opt);}
Pietro Incardona's avatar
Pietro Incardona committed
863 864 865 866 867 868 869 870 871
		}
	}

	/*! \brief This function fill the send buffer for the particle position after the particles has been label with labelParticles
	 *
	 * \param v_pos vector of particle positions
	 * \param g_pos_send Send buffer to fill
	 *
	 */
872
	void fill_send_ghost_pos_buf(openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base> & v_pos,
873 874
								 openfpm::vector<size_t> & prc_sz,
			                     openfpm::vector<send_pos_vector> & g_pos_send,
incardon's avatar
incardon committed
875 876
			                     size_t opt,
			                     bool async)
Pietro Incardona's avatar
Pietro Incardona committed
877 878
	{
		// get the shift vectors
879
		const openfpm::vector<Point<dim,St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base> & shifts = dec.getShiftVectors();
Pietro Incardona's avatar
Pietro Incardona committed
880 881

		// create a number of send buffers equal to the near processors
incardon's avatar
incardon committed
882
		g_pos_send.resize(prc_sz.size());
883

incardon's avatar
incardon committed
884 885 886 887 888 889 890 891 892 893
		size_t old_hsmem_size = 0;

		// if we do async
		if (async == true)
		{
			old_hsmem_size = hsmem.size();
			resize_retained_buffer(hsmem,g_pos_send.size() + hsmem.size());
		}
		else
		{resize_retained_buffer(hsmem,g_pos_send.size());}
894

Pietro Incardona's avatar
Pietro Incardona committed
895 896
		for (size_t i = 0; i < g_pos_send.size(); i++)
		{
897 898
			// Buffer must retained and survive the destruction of the
			// vector
incardon's avatar
incardon committed
899 900
			if (hsmem.get(i+old_hsmem_size).ref() == 0)
			{hsmem.get(i+old_hsmem_size).incRef();}
901 902

			// Set the memory for retain the send buffer
incardon's avatar
incardon committed
903
			g_pos_send.get(i).setMemory(hsmem.get(i+old_hsmem_size));
904

Pietro Incardona's avatar
Pietro Incardona committed
905
			// resize the sending vector (No allocation is produced)
906
			g_pos_send.get(i).resize(prc_sz.get(i));
Pietro Incardona's avatar
Pietro Incardona committed
907 908
		}

909
		if (opt & RUN_ON_DEVICE)
Pietro Incardona's avatar
Pietro Incardona committed
910
		{
911 912 913 914 915 916
#if defined(CUDA_GPU) && defined(__NVCC__)

			size_t offset = 0;

			// Fill the sending buffers
			for (size_t i = 0 ; i < g_pos_send.size() ; i++)
Pietro Incardona's avatar
Pietro Incardona committed
917
			{
918 919
				auto ite = g_pos_send.get(i).getGPUIterator();

incardon's avatar
incardon committed
920
				CUDA_LAUNCH((process_ghost_particles_pos<dim,decltype(g_opart_device.toKernel()),decltype(g_pos_send.get(i).toKernel()),decltype(v_pos.toKernel()),decltype(shifts.toKernel())>),
incardon's avatar
incardon committed
921
				ite,
incardon's avatar
incardon committed
922
				g_opart_device.toKernel(), g_pos_send.get(i).toKernel(),
923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944
				 v_pos.toKernel(),shifts.toKernel(),offset);

				offset += prc_sz.get(i);
			}

#else

			std::cout << __FILE__ << ":" << __LINE__ << " error RUN_ON_DEVICE require that you compile with NVCC, but it seem compiled with a normal compiler" << std::endl;

#endif
		}
		else
		{
			// Fill the send buffer
			for (size_t i = 0; i < g_opart.size(); i++)
			{
				for (size_t j = 0; j < g_opart.get(i).size(); j++)
				{
					Point<dim, St> s = v_pos.get(g_opart.get(i).template get<0>(j));
					s -= shifts.get(g_opart.get(i).template get<1>(j));
					g_pos_send.get(i).set(j, s);
				}
Pietro Incardona's avatar
Pietro Incardona committed
945 946 947 948
			}
		}
	}

Pietro Incardona's avatar
Pietro Incardona committed
949 950 951 952 953 954 955 956 957 958 959
	/*! \brief This function fill the send buffer for ghost_put
	 *
	 * \tparam send_vector type used to send data
	 * \tparam prp_object object containing only the properties to send
	 * \tparam prp set of properties to send
	 *
	 * \param v_prp vector of particle properties
	 * \param g_send_prp Send buffer to fill
	 * \param g_m ghost marker
	 *
	 */
incardon's avatar
incardon committed
960 961 962 963
	template<typename send_vector, typename prp_object, int ... prp>
	void fill_send_ghost_put_prp_buf(openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base> & v_prp,
									 openfpm::vector<send_vector> & g_send_prp,
									 size_t & g_m)
Pietro Incardona's avatar
Pietro Incardona committed
964 965 966
	{
		// create a number of send buffers equal to the near processors
		// from which we received
incardon's avatar
incardon committed
967 968 969 970 971 972

		// NOTE in some case the information can be in prc_recv_get_pos

		size_t nproc = get_last_ghost_get_num_proc();

		g_send_prp.resize(nproc);
973 974 975

		resize_retained_buffer(hsmem,g_send_prp.size());

Pietro Incardona's avatar
Pietro Incardona committed
976 977
		for (size_t i = 0; i < g_send_prp.size(); i++)
		{
978 979 980 981 982 983 984 985
			// Buffer must retained and survive the destruction of the
			// vector
			if (hsmem.get(i).ref() == 0)
				hsmem.get(i).incRef();

			// Set the memory for retain the send buffer
			g_send_prp.get(i).setMemory(hsmem.get(i));

incardon's avatar
incardon committed
986 987
			size_t n_part_recv = get_last_ghost_get_received_parts(i);

Pietro Incardona's avatar
Pietro Incardona committed
988
			// resize the sending vector (No allocation is produced)
incardon's avatar
incardon committed
989
			g_send_prp.get(i).resize(n_part_recv);
Pietro Incardona's avatar
Pietro Incardona committed
990 991 992 993 994
		}

		size_t accum = g_m;

		// Fill the send buffer
incardon's avatar
incardon committed
995
		for (size_t i = 0; i < g_send_prp.size(); i++)
Pietro Incardona's avatar
Pietro Incardona committed
996 997
		{
			size_t j2 = 0;
incardon's avatar
incardon committed
998 999 1000
			size_t n_part_recv = get_last_ghost_get_received_parts(i);

			for (size_t j = accum; j < accum + n_part_recv; j++)
Pietro Incardona's avatar
Pietro Incardona committed
1001 1002
			{
				// source object type
incardon's avatar
incardon committed
1003
				typedef encapc<1, prop, typename openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base>::layout_type> encap_src;
Pietro Incardona's avatar
Pietro Incardona committed
1004
				// destination object type
incardon's avatar
incardon committed
1005
				typedef encapc<1, prp_object, typename openfpm::vector<prp_object,Memory,typename layout_base<prp_object>::type,layout_base>::layout_type> encap_dst;
Pietro Incardona's avatar
Pietro Incardona committed
1006 1007 1008 1009 1010 1011 1012

				// Copy only the selected properties
				object_si_d<encap_src, encap_dst, OBJ_ENCAP, prp...>(v_prp.get(j), g_send_prp.get(i).get(j2));

				j2++;
			}

incardon's avatar
incardon committed
1013
			accum = accum + n_part_recv;
Pietro Incardona's avatar
Pietro Incardona committed
1014 1015 1016
		}
	}

1017 1018 1019 1020
	/*! \brief resize the retained buffer by nbf
	 *
	 *
	 */
incardon's avatar
incardon committed
1021
	void resize_retained_buffer(openfpm::vector_fr<Memory> & rt_buf, size_t nbf)
1022 1023 1024 1025 1026 1027 1028 1029 1030 1031
	{
		// Release all the buffer that are going to be deleted
		for (size_t i = nbf ; i < rt_buf.size() ; i++)
		{
			rt_buf.get(i).decRef();
		}

		hsmem.resize(nbf);
	}

1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042
	/*! \brief Set the buffer for each property
	 *
	 *
	 */
	template<typename send_vector, typename v_mpl>
	struct set_mem_retained_buffers_inte
	{
		openfpm::vector<send_vector> & g_send_prp;

		size_t i;

incardon's avatar
incardon committed
1043
		openfpm::vector_fr<Memory> & hsmem;
1044 1045 1046 1047

		size_t j;

		set_mem_retained_buffers_inte(openfpm::vector<send_vector> & g_send_prp, size_t i ,
incardon's avatar
incardon committed
1048
				                      openfpm::vector_fr<Memory> & hsmem, size_t j)
1049 1050
		:g_send_prp(g_send_prp),i(i),hsmem(hsmem),j(j)
		{}
1051 1052 1053 1054 1055

		//! It call the setMemory function for each property
		template<typename T>
		inline void operator()(T& t)
		{
incardon's avatar
incardon committed
1056
			g_send_prp.get(i).template setMemory<T::value>(hsmem.get(j));
1057 1058 1059 1060 1061 1062 1063 1064 1065

			j++;
		}
	};

	template<bool inte_or_lin,typename send_vector, typename v_mpl>
	struct set_mem_retained_buffers
	{
		static inline size_t set_mem_retained_buffers_(openfpm::vector<send_vector> & g_send_prp,
1066
				     	 	 	 	 	 	 openfpm::vector<size_t> & prc_sz,
1067
											 size_t i,
incardon's avatar
incardon committed
1068
											 openfpm::vector_fr<Memory> & hsmem,
1069 1070 1071 1072 1073 1074
											 size_t j)
		{
			// Set the memory for retain the send buffer
			g_send_prp.get(i).setMemory(hsmem.get(j));

			// resize the sending vector (No allocation is produced)
1075
			g_send_prp.get(i).resize(prc_sz.get(i));
1076 1077 1078 1079 1080 1081 1082 1083 1084

			return j+1;
		}
	};

	template<typename send_vector, typename v_mpl>
	struct set_mem_retained_buffers<true,send_vector,v_mpl>
	{
		static inline size_t set_mem_retained_buffers_(openfpm::vector<send_vector> & g_send_prp,
1085
											 openfpm::vector<size_t> & prc_sz,
1086
				 	 	 	 	 	 	 	 size_t i,
incardon's avatar
incardon committed
1087
				 	 	 	 	 	 	 	 openfpm::vector_fr<Memory> & hsmem,
1088 1089 1090 1091
				 	 	 	 	 	 	 	 size_t j)
		{
			set_mem_retained_buffers_inte<send_vector,v_mpl> smrbi(g_send_prp,i,hsmem,j);

incardon's avatar
incardon committed
1092
			boost::mpl::for_each_ref<boost::mpl::range_c<int,0,boost::mpl::size<v_mpl>::type::value>>(smrbi);
1093

incardon's avatar
incardon committed
1094 1095 1096 1097 1098 1099
			// if we do not send properties do not reallocate
			if (boost::mpl::size<v_mpl>::type::value != 0)
			{
				// resize the sending vector (No allocation is produced)
				g_send_prp.get(i).resize(prc_sz.get(i));
			}
1100 1101 1102 1103 1104

			return smrbi.j;
		}
	};

Pietro Incardona's avatar
Pietro Incardona committed
1105 1106 1107 1108 1109 1110 1111 1112 1113 1114
	/*! \brief This function fill the send buffer for properties after the particles has been label with labelParticles
	 *
	 * \tparam send_vector type used to send data
	 * \tparam prp_object object containing only the properties to send
	 * \tparam prp set of properties to send
	 *
	 * \param v_prp vector of particle properties
	 * \param g_send_prp Send buffer to fill
	 *
	 */
1115 1116
	template<typename send_vector, typename prp_object, int ... prp>
	void fill_send_ghost_prp_buf(openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base> & v_prp,
1117 1118 1119
								 openfpm::vector<size_t> & prc_sz,
			                     openfpm::vector<send_vector> & g_send_prp,
			                     size_t opt)
Pietro Incardona's avatar
Pietro Incardona committed
1120
	{
1121 1122 1123 1124 1125 1126
		size_t factor = 1;

		typedef typename to_boost_vmpl<prp...>::type v_mpl;

		if (is_layout_inte<layout_base<prop>>::value == true) {factor *= sizeof...(prp);}

Pietro Incardona's avatar
Pietro Incardona committed
1127
		// create a number of send buffers equal to the near processors
1128
		g_send_prp.resize(prc_sz.size());
1129

1130
		resize_retained_buffer(hsmem,g_send_prp.size()*factor);
1131

1132
		for (size_t i = 0; i < hsmem.size(); i++)
Pietro Incardona's avatar
Pietro Incardona committed
1133
		{
1134 1135 1136
			// Buffer must retained and survive the destruction of the
			// vector
			if (hsmem.get(i).ref() == 0)
1137 1138
			{hsmem.get(i).incRef();}
		}
1139

1140 1141 1142
		size_t j = 0;
		for (size_t i = 0; i < g_send_prp.size(); i++)
		{
1143
			j = set_mem_retained_buffers<is_layout_inte<layout_base<prop>>::value,send_vector,v_mpl>::set_mem_retained_buffers_(g_send_prp,prc_sz,i,hsmem,j);
Pietro Incardona's avatar
Pietro Incardona committed
1144 1145
		}

1146
		if (opt & RUN_ON_DEVICE)
Pietro Incardona's avatar
Pietro Incardona committed
1147
		{
1148 1149 1150 1151
#if defined(CUDA_GPU) && defined(__NVCC__)

			size_t offset = 0;

incardon's avatar
incardon committed
1152
			if (sizeof...(prp) != 0)
Pietro Incardona's avatar
Pietro Incardona committed
1153
			{
incardon's avatar
incardon committed
1154 1155 1156 1157
				// Fill the sending buffers
				for (size_t i = 0 ; i < g_send_prp.size() ; i++)
				{
					auto ite = g_send_prp.get(i).getGPUIterator();
Pietro Incardona's avatar
Pietro Incardona committed
1158

incardon's avatar
incardon committed
1159
					CUDA_LAUNCH((process_ghost_particles_prp<decltype(g_opart_device.toKernel()),decltype(g_send_prp.get(i).toKernel()),decltype(v_prp.toKernel()),prp...>),
incardon's avatar
incardon committed
1160
					ite,
incardon's avatar
incardon committed
1161
					g_opart_device.toKernel(), g_send_prp.get(i).toKernel(),
incardon's avatar
incardon committed
1162
					 v_prp.toKernel(),offset);
1163

incardon's avatar
incardon committed
1164 1165
					offset += prc_sz.get(i);
				}
1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188
			}

#else

			std::cout << __FILE__ << ":" << __LINE__ << " error RUN_ON_DEVICE require that you compile with NVCC, but it seem compiled with a normal compiler" << std::endl;

#endif
		}
		else
		{
			// Fill the send buffer
			for (size_t i = 0; i < g_opart.size(); i++)
			{
				for (size_t j = 0; j < g_opart.get(i).size(); j++)
				{
					// source object type
					typedef decltype(v_prp.get(g_opart.get(i).template get<0>(j))) encap_src;
					// destination object type
					typedef decltype(g_send_prp.get(i).get(j)) encap_dst;

					// Copy only the selected properties
					object_si_d<encap_src, encap_dst, OBJ_ENCAP, prp...>(v_prp.get(g_opart.get(i).template get<0>(j)), g_send_prp.get(i).get(j));
				}
Pietro Incardona's avatar
Pietro Incardona committed
1189 1190 1191 1192 1193 1194 1195 1196 1197
			}
		}
	}

	/*! \brief allocate and fill the send buffer for the map function
	 *
	 * \param v_pos vector of particle positions
	 * \param v_prp vector of particles properties
	 * \param prc_sz_r For each processor in the list the size of the message to send
incardon's avatar
incardon committed
1198 1199
	 * \param m_pos sending buffer for position
	 * \param m_prp sending buffer for properties
incardon's avatar
incardon committed
1200
	 * \param offset from where start the list of the particles that migrate in o_part
incardon's avatar
incardon committed
1201
	 *        This parameter is used only in case of RUN_ON_DEVICE option
Pietro Incardona's avatar
Pietro Incardona committed
1202 1203
	 *
	 */
incardon's avatar
incardon committed
1204 1205 1206
	void fill_send_map_buf(openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base> & v_pos,
			               openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base> & v_prp,
			               openfpm::vector<size_t> & prc_sz_r,
incardon's avatar
incardon committed
1207
			               openfpm::vector<size_t> & prc_r,
incardon's avatar
incardon committed
1208
			               openfpm::vector<openfpm::vector<Point<dim,St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base,openfpm::grow_policy_identity>> & m_pos,
incardon's avatar
incardon committed
1209
			               openfpm::vector<openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base,openfpm::grow_policy_identity>> & m_prp,
1210
			               openfpm::vector<aggregate<unsigned int, unsigned int>,Memory,typename layout_base<aggregate<unsigned int, unsigned int>>::type,layout_base> & prc_sz,
incardon's avatar
incardon committed
1211
			               size_t opt)
Pietro Incardona's avatar
Pietro Incardona committed
1212
	{
incardon's avatar
incardon committed
1213 1214 1215
		m_prp.resize(prc_sz_r.size());
		m_pos.resize(prc_sz_r.size());
		openfpm::vector<size_t> cnt(prc_sz_r.size());
Pietro Incardona's avatar
Pietro Incardona committed
1216

incardon's avatar
incardon committed
1217
		for (size_t i = 0; i < prc_sz_r.size() ; i++)
Pietro Incardona's avatar
Pietro Incardona committed
1218 1219
		{
			// set the size and allocate, using mem warant that pos and prp is contiguous
incardon's avatar
incardon committed
1220 1221 1222
			m_pos.get(i).resize(prc_sz_r.get(i));
			m_prp.get(i).resize(prc_sz_r.get(i));
			cnt.get(i) = 0;
Pietro Incardona's avatar
Pietro Incardona committed
1223 1224
		}

incardon's avatar
incardon committed
1225
		if (opt & RUN_ON_DEVICE)
incardon's avatar
incardon committed
1226
		{
incardon's avatar
incardon committed
1227 1228 1229
			if (v_cl.size() == 1)
			{return;}