diff --git a/example/Vector/3_molecular_dynamic/Makefile b/example/Vector/3_molecular_dynamic/Makefile index e9ce585fd6c8b94c615662f222b82fddd65f2a79..cc252770e8128e156b313910f200a693a552f837 100644 --- a/example/Vector/3_molecular_dynamic/Makefile +++ b/example/Vector/3_molecular_dynamic/Makefile @@ -6,11 +6,13 @@ LDIR = OBJ = main.o OBJ_EXPR = main_expr.o +OBJ_VL = main_vl.o +OBJ_VL_SYM = main_vl_sym.o -all: md_dyn md_dyn_expr +all: md_dyn md_dyn_expr md_dyn_vl md_dyn_vl_sym %.o: %.cpp - $(CC) -O3 -g -c --std=c++11 -o $@ $< $(INCLUDE_PATH) + $(CC) -O3 -c --std=c++11 -o $@ $< $(INCLUDE_PATH) md_dyn: $(OBJ) $(CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS) @@ -18,11 +20,17 @@ md_dyn: $(OBJ) md_dyn_expr: $(OBJ_EXPR) $(CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS) +md_dyn_vl: $(OBJ_VL) + $(CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS) + +md_dyn_vl_sym: $(OBJ_VL_SYM) + $(CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS) + run: all - source $$HOME/openfpm_vars; mpirun -np 3 ./md_dyn; mpirun -np 3 ./md_dyn_expr + source $$HOME/openfpm_vars; mpirun -np 3 ./md_dyn; mpirun -np 3 ./md_dyn_expr; mpirun -np 3 ./md_dyn_vl; mpirun -np 3 ./md_dyn_vl_sym .PHONY: clean all run clean: - rm -f *.o *~ core md_dyn md_dyn_expr + rm -f *.o *~ core md_dyn md_dyn_expr md_dyn_vl md_dyn_vl_sym diff --git a/example/Vector/3_molecular_dynamic/main.cpp b/example/Vector/3_molecular_dynamic/main.cpp index 81188e97d753597ca1bc9cfeec51cb8fbd127e85..223e63adbbb078bc6ea2cf20f82a334aa216fc51 100644 --- a/example/Vector/3_molecular_dynamic/main.cpp +++ b/example/Vector/3_molecular_dynamic/main.cpp @@ -1,9 +1,9 @@ - #include "Vector/vector_dist.hpp" #include "Decomposition/CartDecomposition.hpp" #include "data_type/aggregate.hpp" #include "Plot/GoogleChart.hpp" #include "Plot/util.hpp" +#include "timer.hpp" /*! * \page Vector_3_md Vector 3 molecular dynamic @@ -48,7 +48,7 @@ constexpr int force = 1; //! \cond [calc forces] \endcond -void calc_forces(vector_dist<3,double, aggregate<double[3],double[3]> > & vd, CellList<3, double, FAST, shift<3, double> > & NN, double sigma12, double sigma6) +void calc_forces(vector_dist<3,double, aggregate<double[3],double[3]> > & vd, CellList<3, double, FAST, shift<3, double> > & NN, double sigma12, double sigma6, double r_cut2) { //! \cond [calc forces] \endcond @@ -127,6 +127,9 @@ void calc_forces(vector_dist<3,double, aggregate<double[3],double[3]> > & vd, Ce // take the norm of this vector double rn = norm2(r); + if (rn > r_cut2) + {++Np; continue;}; + // Calculate the force, using pow is slower Point<3,double> f = 24.0*(2.0 *sigma12 / (rn*rn*rn*rn*rn*rn*rn) - sigma6 / (rn*rn*rn*rn)) * r; @@ -165,8 +168,10 @@ void calc_forces(vector_dist<3,double, aggregate<double[3],double[3]> > & vd, Ce //! \cond [calc energy] \endcond -double calc_energy(vector_dist<3,double, aggregate<double[3],double[3]> > & vd, CellList<3, double, FAST, shift<3, double> > & NN, double sigma12, double sigma6) +double calc_energy(vector_dist<3,double, aggregate<double[3],double[3]> > & vd, CellList<3, double, FAST, shift<3, double> > & NN, double sigma12, double sigma6, double r_cut2) { + double rc = r_cut2; + double shift = 2.0 * ( sigma12 / (rc*rc*rc*rc*rc*rc) - sigma6 / ( rc*rc*rc) ); //! \cond [calc energy] \endcond @@ -185,7 +190,7 @@ double calc_energy(vector_dist<3,double, aggregate<double[3],double[3]> > & vd, //! \cond [up cell ene] \endcond double E = 0.0; - vd.updateCellList(NN); +// vd.updateCellList(NN); //! \cond [up cell ene] \endcond @@ -236,8 +241,11 @@ double calc_energy(vector_dist<3,double, aggregate<double[3],double[3]> > & vd, // take the normalized direction double rn = norm2(xp - xq); + if (rn > r_cut2) + {++Np;continue;} + // potential energy (using pow is slower) - E += 2.0 * ( sigma12 / (rn*rn*rn*rn*rn*rn) - sigma6 / ( rn*rn*rn) ); + E += 2.0 * ( sigma12 / (rn*rn*rn*rn*rn*rn) - sigma6 / ( rn*rn*rn) ) - shift; // Next neighborhood ++Np; @@ -414,13 +422,16 @@ int main(int argc, char* argv[]) * */ + timer tsim; + tsim.start(); + //! \cond [md steps] \endcond // Get the Cell list structure auto NN = vd.getCellList(r_cut); // calculate forces - calc_forces(vd,NN,sigma12,sigma6); + calc_forces(vd,NN,sigma12,sigma6,r_cut*r_cut); unsigned long int f = 0; // MD time stepping @@ -452,7 +463,7 @@ int main(int argc, char* argv[]) vd.template ghost_get<>(); // calculate forces or a(tn + 1) Step 2 - calc_forces(vd,NN,sigma12,sigma6); + calc_forces(vd,NN,sigma12,sigma6,r_cut*r_cut); // Integrate the velocity Step 3 @@ -481,7 +492,7 @@ int main(int argc, char* argv[]) vd.ghost_get<>(); // We calculate the energy - double energy = calc_energy(vd,NN,sigma12,sigma6); + double energy = calc_energy(vd,NN,sigma12,sigma6,r_cut*r_cut); auto & vcl = create_vcluster(); vcl.sum(energy); vcl.execute(); @@ -501,6 +512,9 @@ int main(int argc, char* argv[]) //! \cond [md steps] \endcond + tsim.stop(); + std::cout << "Time: " << tsim.getwct() << std::endl; + /*! * \page Vector_3_md Vector 3 molecular dynamic * diff --git a/example/Vector/3_molecular_dynamic/main_expr.cpp b/example/Vector/3_molecular_dynamic/main_expr.cpp index 57f474efe8f2c12ffd35f2f253e8d4afab28f348..5e214636672e0866c109717a775c9fd444f9f1bf 100644 --- a/example/Vector/3_molecular_dynamic/main_expr.cpp +++ b/example/Vector/3_molecular_dynamic/main_expr.cpp @@ -1,22 +1,23 @@ #include "Vector/vector_dist.hpp" #include "Plot/GoogleChart.hpp" #include "Operators/Vector/vector_dist_operators.hpp" +#include "timer.hpp" constexpr int velocity = 0; constexpr int force = 1; struct ln_potential { - double sigma12,sigma6; + double sigma12,sigma6,r_cut2,shift; - ln_potential(double sigma12_, double sigma6_) {sigma12 = sigma12_, sigma6 = sigma6_;} + ln_potential(double sigma12_, double sigma6_, double r_cut2_, double shift_) {sigma12 = sigma12_; sigma6 = sigma6_; r_cut2 = r_cut2_;shift = shift_;} Point<2,double> value(const Point<3,double> & xp, const Point<3,double> xq) { double rn = norm2(xp - xq); + if (rn >= r_cut2) return 0.0; - Point<2,double> E({2.0 * ( sigma12 / (rn*rn*rn*rn*rn*rn) - sigma6 / ( rn*rn*rn) ), - 0.0}); + Point<2,double> E({2.0 * ( sigma12 / (rn*rn*rn*rn*rn*rn) - sigma6 / ( rn*rn*rn) ) - shift,0.0}); return E; } @@ -24,24 +25,28 @@ struct ln_potential struct ln_force { - double sigma12,sigma6; + double sigma12,sigma6,r_cut2; - ln_force(double sigma12_, double sigma6_) {sigma12 = sigma12_; sigma6 = sigma6_;} + ln_force(double sigma12_, double sigma6_, double r_cut2_) {sigma12 = sigma12_; sigma6 = sigma6_;r_cut2 = r_cut2_;} Point<3,double> value(const Point<3,double> & xp, const Point<3,double> xq) { Point<3,double> r = xp - xq; double rn = norm2(r); + if (rn > r_cut2) return 0.0; + return 24.0*(2.0 * sigma12 / (rn*rn*rn*rn*rn*rn*rn) - sigma6 / (rn*rn*rn*rn)) * r; } }; int main(int argc, char* argv[]) { - double dt = 0.0005, sigma = 0.1, r_cut = 0.3; + double dt = 0.0005, sigma = 0.1, r_cut = 3.0*sigma; double sigma6 = pow(sigma,6), sigma12 = pow(sigma,12); + double rc2 = r_cut * r_cut; + double shift = 2.0 * ( sigma12 / (rc2*rc2*rc2*rc2*rc2*rc2) - sigma6 / ( rc2*rc2*rc2) ); openfpm::vector<double> x; openfpm::vector<openfpm::vector<double>> y; @@ -55,8 +60,8 @@ int main(int argc, char* argv[]) size_t bc[3]={PERIODIC,PERIODIC,PERIODIC}; Ghost<3,float> ghost(r_cut); - ln_force lf(sigma12,sigma6); - ln_potential lp(sigma12,sigma6); + ln_force lf(sigma12,sigma6,r_cut*r_cut); + ln_potential lp(sigma12,sigma6,r_cut*r_cut,shift); vector_dist<3,double, aggregate<Point<3,double>,Point<3,double>> > vd(0,box,bc,ghost); @@ -82,6 +87,9 @@ int main(int argc, char* argv[]) v_force = 0; v_velocity = 0; + timer tsim; + tsim.start(); + auto NN = vd.getCellList(r_cut); vd.updateCellList(NN); @@ -109,6 +117,7 @@ int main(int argc, char* argv[]) vd.write("particles_",f); vd.ghost_get<>(); + vd.updateCellList(NN); Point<2,double> E = rsum(applyKernel_in_sim(vd,NN,lp) + (v_velocity * v_velocity)/2.0,vd).get(); vcl.sum(E.get(0));vcl.sum(E.get(1)); @@ -125,6 +134,9 @@ int main(int argc, char* argv[]) } } + tsim.stop(); + std::cout << "Time: " << tsim.getwct() << std::endl; + GCoptions options; options.title = std::string("Energy with time"); options.yAxis = std::string("Energy"); diff --git a/openfpm_data b/openfpm_data index f075dd5da60f5170e01a3265a88881e98fc3b7e4..9301fe459ca2ef81115c642ee7a54c983ddfd6f1 160000 --- a/openfpm_data +++ b/openfpm_data @@ -1 +1 @@ -Subproject commit f075dd5da60f5170e01a3265a88881e98fc3b7e4 +Subproject commit 9301fe459ca2ef81115c642ee7a54c983ddfd6f1 diff --git a/src/Makefile.am b/src/Makefile.am index d61146d9da87b3f2187f8199ff4aa44fbf81ae53..143ccee42292fdbd051b5aa7dda3223dad5f18b2 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -9,7 +9,7 @@ nobase_include_HEADERS = Decomposition/CartDecomposition.hpp Decomposition/CartD Decomposition/nn_processor.hpp Decomposition/ie_loc_ghost.hpp Decomposition/ORB.hpp \ Graph/CartesianGraphFactory.hpp \ Grid/grid_dist_id.hpp Grid/grid_dist_id_iterator_dec.hpp Grid/grid_dist_util.hpp Grid/grid_dist_id_iterator_sub.hpp Grid/grid_dist_id_iterator.hpp Grid/grid_dist_key.hpp Grid/staggered_dist_grid.hpp Grid/staggered_dist_grid_util.hpp Grid/staggered_dist_grid_copy.hpp \ - Vector/vector_dist.hpp Vector/vector_dist_ofb.hpp Vector/vector_dist_iterator.hpp Vector/vector_dist_key.hpp \ + Vector/vector_dist_comm.hpp Vector/vector_dist.hpp Vector/vector_dist_ofb.hpp Vector/vector_dist_iterator.hpp Vector/vector_dist_key.hpp \ config/config.h \ example.mk \ Decomposition/Distribution/metis_util.hpp Decomposition/Distribution/parmetis_dist_util.hpp Decomposition/Distribution/parmetis_util.hpp Decomposition/Distribution/MetisDistribution.hpp Decomposition/Distribution/ParMetisDistribution.hpp Decomposition/Distribution/DistParMetisDistribution.hpp dec_optimizer.hpp SubdomainGraphNodes.hpp \ diff --git a/src/Vector/vector_dist.hpp b/src/Vector/vector_dist.hpp index f5348417c3882d42b06ff68b1443fae8d1017143..0dc710bf845b8bb8c0fc51aced5dbca3c52c64f5 100644 --- a/src/Vector/vector_dist.hpp +++ b/src/Vector/vector_dist.hpp @@ -30,8 +30,7 @@ #include "Decomposition/CartDecomposition.hpp" #include "data_type/aggregate.hpp" #include "NN/VerletList/VerletList.hpp" - -#define V_SUB_UNIT_FACTOR 64 +#include "vector_dist_comm.hpp" #define NO_ID false #define ID true @@ -40,9 +39,6 @@ #define GET 1 #define PUT 2 -#define NO_POSITION 1 -#define WITH_POSITION 2 - // Write the particles with ghost #define NO_GHOST 0 #define WITH_GHOST 2 @@ -73,16 +69,13 @@ */ template<unsigned int dim, typename St, typename prop, typename Decomposition = CartDecomposition<dim,St>, typename Memory = HeapMemory> -class vector_dist +class vector_dist : public vector_dist_comm<dim,St,prop,Decomposition,Memory> { private: //! Ghost marker, all the particle with id > g_m are ghost all with g_m < are real particle size_t g_m = 0; - //! Space Decomposition - Decomposition dec; - //! Particle position vector, (It has 2 elements) the first has real particles assigned to a processor //! the second element contain unassigned particles openfpm::vector<Point<dim, St>> v_pos; @@ -94,708 +87,6 @@ private: //! Virtual cluster Vcluster & v_cl; - //! definition of the send vector for position - typedef openfpm::vector<Point<dim, St>, ExtPreAlloc<Memory>, typename memory_traits_lin<Point<dim, St>>::type, memory_traits_lin , openfpm::grow_policy_identity> send_pos_vector; - - ////////////////////////////// - // COMMUNICATION variables - ////////////////////////////// - - //! It map the processor id with the communication request into map procedure - openfpm::vector<size_t> p_map_req; - - //! For each near processor, outgoing particle id and shift vector - openfpm::vector<openfpm::vector<size_t>> opart; - - //! For each near processor, particle shift vector - openfpm::vector<openfpm::vector<size_t>> oshift; - - //! For each adjacent processor the size of the ghost sending buffer - openfpm::vector<size_t> ghost_prc_sz; - - //! Sending buffer for the ghost particles properties - BHeapMemory g_prp_mem; - - //! Sending buffer for the ghost particles position - BHeapMemory g_pos_mem; - - //! For each adjacent processor it store the size of the receiving message in byte - openfpm::vector<size_t> recv_sz; - - //! For each adjacent processor it store the received message for ghost get - openfpm::vector<BHeapMemory> recv_mem_gg; - - //! For each processor it store the received message for global map - openfpm::vector<BHeapMemory> recv_mem_gm; - - /*! \brief It store for each processor the position and properties vector of the particles - * - * This structure is used in the map function - * - */ - struct pos_prop - { - //! position vector - openfpm::vector<Point<dim, St>, PreAllocHeapMemory<2>, typename memory_traits_lin<Point<dim, St>>::type, memory_traits_lin, openfpm::grow_policy_identity> pos; - //! properties vector - openfpm::vector<prop, PreAllocHeapMemory<2>, typename memory_traits_lin<prop>::type, memory_traits_lin, openfpm::grow_policy_identity> prp; - }; - - /*! \brief for each processor store 2 vector containing the sending buffers - * - * This structure is used in the map_list function - * - */ - template <typename sel_prop> - struct pos_prop_sel - { - //! position vector - openfpm::vector<Point<dim, St>, PreAllocHeapMemory<2>, typename memory_traits_lin<Point<dim, St>>::type, memory_traits_lin, openfpm::grow_policy_identity> pos; - //! properties vector - openfpm::vector<sel_prop, PreAllocHeapMemory<2>, typename memory_traits_lin<sel_prop>::type, memory_traits_lin, openfpm::grow_policy_identity> prp; - }; - - /*! \brief Label particles for mappings - * - * \param lbl_p Particle labeled - * \param prc_sz For each processor the number of particles to send - * \param opart id of the particles to send - * - */ - template<typename obp> void labelParticleProcessor(openfpm::vector<openfpm::vector<size_t>> & lbl_p, openfpm::vector<size_t> & prc_sz, openfpm::vector<size_t> & opart) - { - // reset lbl_p - lbl_p.resize(v_cl.getProcessingUnits()); - for (size_t i = 0; i < lbl_p.size(); i++) - lbl_p.get(i).clear(); - - // resize the label buffer - prc_sz.resize(v_cl.getProcessingUnits()); - - auto it = v_pos.getIterator(); - - // Label all the particles with the processor id where they should go - while (it.isNext()) - { - auto key = it.get(); - - // Apply the boundary conditions - dec.applyPointBC(v_pos.get(key)); - - size_t p_id = 0; - - // Check if the particle is inside the domain - if (dec.getDomain().isInside(v_pos.get(key)) == true) - p_id = dec.processorIDBC(v_pos.get(key)); - else - p_id = obp::out(key, v_cl.getProcessUnitID()); - - // Particle to move - if (p_id != v_cl.getProcessUnitID()) - { - if ((long int) p_id != -1) - { - prc_sz.get(p_id)++;lbl_p - .get(p_id).add(key); - opart.add(key); - } - else - { - opart.add(key); - } - } - - // Add processors and add size - - ++it; - } - } - - /*! \brief Label the particles - * - * It count the number of particle to send to each processors and save its ids - * - * \see nn_prcs::getShiftvectors() - * - */ - void labelParticlesGhost() - { - // Buffer that contain the number of elements to send for each processor - ghost_prc_sz.clear(); - ghost_prc_sz.resize(dec.getNNProcessors()); - - // Buffer that contain for each processor the id of the particle to send - opart.clear(); - opart.resize(dec.getNNProcessors()); - - // Buffer that contain for each processor the id of the shift vector - oshift.clear(); - oshift.resize(dec.getNNProcessors()); - - // Iterate over all particles - auto it = v_pos.getIteratorTo(g_m); - while (it.isNext()) - { - auto key = it.get(); - - // Given a particle, it return which processor require it (first id) and shift id, second id - // For an explanation about shifts vectors please consult getShiftVector in ie_ghost - const openfpm::vector<std::pair<size_t, size_t>> & vp_id = dec.template ghost_processorID_pair<typename Decomposition::lc_processor_id, typename Decomposition::shift_id>(v_pos.get(key), UNIQUE); - - for (size_t i = 0; i < vp_id.size(); i++) - { - // processor id - size_t p_id = vp_id.get(i).first; - - // add particle to communicate - ghost_prc_sz.get(p_id)++; - opart.get(p_id).add(key); - oshift.get(p_id).add(vp_id.get(i).second); - } - - ++it; - } - } - - /*! \brief Add local particles based on the boundary conditions - * - * In order to understand what this function use the following - * - \verbatim - - [1,1] - +---------+------------------------+---------+ - | (1,-1) | | (1,1) | - | | | (1,0) --> 7 | | | - | v | | v | - | 6 | | 8 | - +--------------------------------------------+ - | | | | - | | | | - | | | | - | (-1,0) | | (1,0) | - | | | | | | - | v | (0,0) --> 4 | v | - | 3 | | 5 | - | | | | - B | | | A | - * | | | * | - | | | | - | | | | - | | | | - +--------------------------------------------+ - | (-1,-1) | | (-1,1) | - | | | (-1,0) --> 1 | | | - | v | | v | - | 0 | | 2 | - +---------+------------------------+---------+ - - - \endverbatim - - * - * The box is the domain, while all boxes at the border (so not (0,0) ) are the - * ghost part at the border of the domain. If a particle A is in the position in figure - * a particle B must be created. This function duplicate the particle A, if A and B are - * local - * - */ - void add_loc_particles_bc() - { - // get the shift vectors - const openfpm::vector<Point<dim, St>> & shifts = dec.getShiftVectors(); - - // this map is used to check if a combination is already present - std::unordered_map<size_t, size_t> map_cmb; - - // Add local particles coming from periodic boundary, the only boxes that count are the one - // touching the border, filter them - - // The boxes touching the border of the domain are divided in groups (first vector) - // each group contain internal ghost coming from sub-domain of the same section - openfpm::vector_std<openfpm::vector_std<Box<dim, St>>>box_f; - openfpm::vector_std<comb<dim>> box_cmb; - - for (size_t i = 0; i < dec.getNLocalSub(); i++) - { - size_t Nl = dec.getLocalNIGhost(i); - - for (size_t j = 0; j < Nl; j++) - { - // If the ghost does not come from the intersection with an out of - // border sub-domain the combination is all zero and n_zero return dim - if (dec.getLocalIGhostPos(i, j).n_zero() == dim) - continue; - - // Check if we already have boxes with such combination - auto it = map_cmb.find(dec.getLocalIGhostPos(i, j).lin()); - if (it == map_cmb.end()) - { - // we do not have it - box_f.add(); - box_f.last().add(dec.getLocalIGhostBox(i, j)); - box_cmb.add(dec.getLocalIGhostPos(i, j)); - map_cmb[dec.getLocalIGhostPos(i, j).lin()] = box_f.size() - 1; - } - else - { - // we have it - box_f.get(it->second).add(dec.getLocalIGhostBox(i, j)); - } - - } - } - - if (box_f.size() == 0) - return; - else - { - // Label the internal (assigned) particles - auto it = v_pos.getIteratorTo(g_m); - - while (it.isNext()) - { - auto key = it.get(); - - // If particles are inside these boxes - for (size_t i = 0; i < box_f.size(); i++) - { - for (size_t j = 0; j < box_f.get(i).size(); j++) - { - if (box_f.get(i).get(j).isInside(v_pos.get(key)) == true) - { - Point<dim, St> p = v_pos.get(key); - // shift - p -= shifts.get(box_cmb.get(i).lin()); - - // add this particle shifting its position - v_pos.add(p); - v_prp.add(); - v_prp.last() = v_prp.get(key); - - // boxes in one group can be overlapping - // we do not have to search for the other - // boxes otherwise we will have duplicate particles - // - // A small note overlap of boxes across groups is fine - // (and needed) because each group has different shift - // producing non overlapping particles - // - break; - } - } - } - - ++it; - } - } - } - - /*! \brief This function fill the send buffer for the particle position after the particles has been label with labelParticles - * - * \param g_pos_send Send buffer to fill - * \param prAlloc_pos Memory object for the send buffer - * - */ - void fill_send_ghost_pos_buf(openfpm::vector<send_pos_vector> & g_pos_send, ExtPreAlloc<Memory> * prAlloc_pos) - { - // get the shift vectors - const openfpm::vector<Point<dim, St>> & shifts = dec.getShiftVectors(); - - // create a number of send buffers equal to the near processors - g_pos_send.resize(ghost_prc_sz.size()); - for (size_t i = 0; i < g_pos_send.size(); i++) - { - // set the preallocated memory to ensure contiguity - g_pos_send.get(i).setMemory(*prAlloc_pos); - - // resize the sending vector (No allocation is produced) - g_pos_send.get(i).resize(ghost_prc_sz.get(i)); - } - - // Fill the send buffer - for (size_t i = 0; i < opart.size(); i++) - { - for (size_t j = 0; j < opart.get(i).size(); j++) - { - Point<dim, St> s = v_pos.get(opart.get(i).get(j)); - s -= shifts.get(oshift.get(i).get(j)); - g_pos_send.get(i).set(j, s); - } - } - } - - /*! \brief This function fill the send buffer for properties after the particles has been label with labelParticles - * - * \tparam send_vector type used to send data - * \tparam prp_object object containing only the properties to send - * \tparam prp set of properties to send - * - * \param g_send_prp Send buffer to fill - * \param prAlloc_prp Memory object for the send buffer - * - */ - template<typename send_vector, typename prp_object, int ... prp> void fill_send_ghost_prp_buf(openfpm::vector<send_vector> & g_send_prp, ExtPreAlloc<Memory> * prAlloc_prp) - { - // create a number of send buffers equal to the near processors - g_send_prp.resize(ghost_prc_sz.size()); - for (size_t i = 0; i < g_send_prp.size(); i++) - { - // set the preallocated memory to ensure contiguity - g_send_prp.get(i).setMemory(*prAlloc_prp); - - // resize the sending vector (No allocation is produced) - g_send_prp.get(i).resize(ghost_prc_sz.get(i)); - } - - // Fill the send buffer - for (size_t i = 0; i < opart.size(); i++) - { - for (size_t j = 0; j < opart.get(i).size(); j++) - { - // source object type - typedef encapc<1, prop, typename openfpm::vector<prop>::layout_type> encap_src; - // destination object type - typedef encapc<1, prp_object, typename openfpm::vector<prp_object>::layout_type> encap_dst; - - // Copy only the selected properties - object_si_d<encap_src, encap_dst, OBJ_ENCAP, prp...>(v_prp.get(opart.get(i).get(j)), g_send_prp.get(i).get(j)); - } - } - } - - /*! \brief allocate and fill the send buffer for the map function - * - * \param prc_r List of processor rank involved in the send - * \param prc_sz_r For each processor in the list the size of the message to send - * \param pb send buffer - * - */ - void fill_send_map_buf(openfpm::vector<size_t> & prc_r, openfpm::vector<size_t> & prc_sz_r, openfpm::vector<pos_prop> & pb) - { - pb.resize(prc_r.size()); - - for (size_t i = 0; i < prc_r.size(); i++) - { - // Create the size required to store the particles position and properties to communicate - size_t s1 = openfpm::vector<Point<dim, St>, HeapMemory, typename memory_traits_lin<Point<dim, St>>::type, memory_traits_lin, openfpm::grow_policy_identity>::calculateMem(prc_sz_r.get(i), 0); - size_t s2 = openfpm::vector<prop, HeapMemory, typename memory_traits_lin<prop>::type, memory_traits_lin, openfpm::grow_policy_identity>::calculateMem(prc_sz_r.get(i), 0); - - // Preallocate the memory - size_t sz[2] = { s1, s2 }; - PreAllocHeapMemory<2> * mem = new PreAllocHeapMemory<2>(sz); - - // Set the memory allocator - pb.get(i).pos.setMemory(*mem); - pb.get(i).prp.setMemory(*mem); - - // set the size and allocate, using mem warant that pos and prp is contiguous - pb.get(i).pos.resize(prc_sz_r.get(i)); - pb.get(i).prp.resize(prc_sz_r.get(i)); - } - - // Run through all the particles and fill the sending buffer - - for (size_t i = 0; i < opart.size(); i++) - { - auto it = opart.get(i).getIterator(); - size_t lbl = p_map_req.get(i); - - while (it.isNext()) - { - size_t key = it.get(); - size_t id = opart.get(i).get(key); - - pb.get(lbl).pos.set(key, v_pos.get(id)); - pb.get(lbl).prp.set(key, v_prp.get(id)); - - ++it; - } - } - } - - /*! \brief allocate and fill the send buffer for the map function - * - * \param prc_r List of processor rank involved in the send - * \param prc_sz_r For each processor in the list the size of the message to send - * \param pb send buffer - * - */ - template<typename prp_object,int ... prp> void fill_send_map_buf_list(openfpm::vector<size_t> & prc_r, openfpm::vector<size_t> & prc_sz_r, openfpm::vector<pos_prop_sel<prp_object>> & pb) - { - pb.resize(prc_r.size()); - - for (size_t i = 0; i < prc_r.size(); i++) - { - // Create the size required to store the particles position and properties to communicate - size_t s1 = openfpm::vector<Point<dim, St>, HeapMemory, typename memory_traits_lin<Point<dim, St>>::type, memory_traits_lin, openfpm::grow_policy_identity>::calculateMem(prc_sz_r.get(i), 0); - size_t s2 = openfpm::vector<prp_object, HeapMemory, typename memory_traits_lin<prp_object>::type, memory_traits_lin, openfpm::grow_policy_identity>::calculateMem(prc_sz_r.get(i), 0); - - // Preallocate the memory - size_t sz[2] = { s1, s2 }; - PreAllocHeapMemory<2> * mem = new PreAllocHeapMemory<2>(sz); - - // Set the memory allocator - pb.get(i).pos.setMemory(*mem); - pb.get(i).prp.setMemory(*mem); - - // set the size and allocate, using mem warant that pos and prp is contiguous - pb.get(i).pos.resize(prc_sz_r.get(i)); - pb.get(i).prp.resize(prc_sz_r.get(i)); - } - - // Run through all the particles and fill the sending buffer - - for (size_t i = 0; i < opart.size(); i++) - { - auto it = opart.get(i).getIterator(); - size_t lbl = p_map_req.get(i); - - while (it.isNext()) - { - size_t key = it.get(); - size_t id = opart.get(i).get(key); - - pb.get(lbl).pos.set(key, v_pos.get(id)); - - // source object type - typedef encapc<1, prop, typename openfpm::vector<prop>::layout_type> encap_src; - // destination object type - typedef encapc<1, prp_object, typename openfpm::vector<prp_object>::layout_type> encap_dst; - - // Copy only the selected properties - object_si_d<encap_src, encap_dst, OBJ_ENCAP, prp...>(v_prp.get(id), pb.get(lbl).prp.get(key)); - - ++it; - } - } - } - - /*! \brief This function process the receiced data for the properties and populate the ghost - * - * \tparam send_vector type used to send data - * \tparam prp_object object containing only the properties to send - * \tparam prp set of properties to send - * - */ - template<typename send_vector, typename prp_object, int ... prp> void process_received_ghost_prp() - { - // Mark the ghost part - g_m = v_prp.size(); - - // Process the received data (recv_mem_gg != 0 if you have data) - for (size_t i = 0; i < dec.getNNProcessors() && recv_mem_gg.size() != 0; i++) - { - // calculate the number of received elements - size_t n_ele = recv_sz.get(i) / sizeof(prp_object); - - // add the received particles to the vector - PtrMemory * ptr1 = new PtrMemory(recv_mem_gg.get(i).getPointer(), recv_sz.get(i)); - - // create vector representation to a piece of memory already allocated - openfpm::vector<prp_object, PtrMemory, typename memory_traits_lin<prp_object>::type, memory_traits_lin , openfpm::grow_policy_identity> v2; - - v2.setMemory(*ptr1); - - // resize with the number of elements - v2.resize(n_ele); - - // Add the ghost particle - v_prp.template add_prp<prp_object, PtrMemory, openfpm::grow_policy_identity, prp...>(v2); - } - } - - /*! \brief This function process the received data for the properties and populate the ghost - * - */ - void process_received_ghost_pos() - { - // Process the received data (recv_mem_gg != 0 if you have data) - for (size_t i = 0; i < dec.getNNProcessors() && recv_mem_gg.size() != 0; i++) - { - // calculate the number of received elements - size_t n_ele = recv_sz.get(i) / sizeof(Point<dim, St> ); - - // add the received particles to the vector - PtrMemory * ptr1 = new PtrMemory(recv_mem_gg.get(i).getPointer(), recv_sz.get(i)); - - // create vector representation to a piece of memory already allocated - - openfpm::vector<Point<dim, St>, PtrMemory, typename memory_traits_lin<Point<dim, St>>::type, memory_traits_lin , openfpm::grow_policy_identity> v2; - - v2.setMemory(*ptr1); - - // resize with the number of elements - v2.resize(n_ele); - - // Add the ghost particle - v_pos.template add<PtrMemory, openfpm::grow_policy_identity>(v2); - } - } - - /*! \brief Process the received particles - * - * \param out_part list of the out-going particles - * - */ - void process_received_map(openfpm::vector<size_t> & out_part) - { - size_t o_p_id = 0; - - for (size_t i = 0; i < recv_mem_gm.size(); i++) - { - // Get the number of elements - - size_t n_ele = recv_mem_gm.get(i).size() / (sizeof(Point<dim, St> ) + sizeof(prop)); - - // Pointer of the received positions for each near processor - void * ptr_pos = (unsigned char *) recv_mem_gm.get(i).getPointer(); - // Pointer of the received properties for each near processor - void * ptr_prp = (unsigned char *) recv_mem_gm.get(i).getPointer() + n_ele * sizeof(Point<dim, St> ); - - PtrMemory * ptr1 = new PtrMemory(ptr_pos, n_ele * sizeof(Point<dim, St> )); - PtrMemory * ptr2 = new PtrMemory(ptr_prp, n_ele * sizeof(prop)); - - // create vector representation to a piece of memory already allocated - - openfpm::vector<Point<dim, St>, PtrMemory, typename memory_traits_lin<Point<dim, St>>::type, memory_traits_lin ,openfpm::grow_policy_identity> vpos; - openfpm::vector<prop, PtrMemory, typename memory_traits_lin<prop>::type, memory_traits_lin ,openfpm::grow_policy_identity> vprp; - - vpos.setMemory(*ptr1); - vprp.setMemory(*ptr2); - - vpos.resize(n_ele); - vprp.resize(n_ele); - - // Add the received particles to v_pos and v_prp - - size_t j = 0; - for (; j < vpos.size() && o_p_id < out_part.size(); j++, o_p_id++) - { - v_pos.set(out_part.get(o_p_id), vpos.get(j)); - v_prp.set(out_part.get(o_p_id), vprp.get(j)); - } - - for (; j < vpos.size(); j++) - { - v_pos.add(); - v_pos.set(v_pos.size() - 1, vpos.get(j)); - v_prp.add(); - v_prp.set(v_prp.size() - 1, vprp.get(j)); - } - } - - // remove the (out-going particles) in the vector - - v_pos.remove(out_part, o_p_id); - v_prp.remove(out_part, o_p_id); - } - - /*! \brief Process the received particles - * - * \param out_part list of the out-going particles - * - */ - template<typename prp_object , int ... prp> void process_received_map_list(openfpm::vector<size_t> & out_part) - { - size_t o_p_id = 0; - - for (size_t i = 0; i < recv_mem_gm.size(); i++) - { - // Get the number of elements - - size_t n_ele = recv_mem_gm.get(i).size() / (sizeof(Point<dim, St> ) + sizeof(prp_object)); - - // Pointer of the received positions for each near processor - void * ptr_pos = (unsigned char *) recv_mem_gm.get(i).getPointer(); - // Pointer of the received properties for each near processor - void * ptr_prp = (unsigned char *) recv_mem_gm.get(i).getPointer() + n_ele * sizeof(Point<dim, St> ); - - PtrMemory * ptr1 = new PtrMemory(ptr_pos, n_ele * sizeof(Point<dim, St> )); - PtrMemory * ptr2 = new PtrMemory(ptr_prp, n_ele * sizeof(prp_object)); - - // create vector representation to a piece of memory already allocated - - openfpm::vector<Point<dim, St>, PtrMemory, typename memory_traits_lin<Point<dim, St>>::type, memory_traits_lin ,openfpm::grow_policy_identity> vpos; - openfpm::vector<prp_object, PtrMemory, typename memory_traits_lin<prp_object>::type, memory_traits_lin ,openfpm::grow_policy_identity> vprp; - - vpos.setMemory(*ptr1); - vprp.setMemory(*ptr2); - - vpos.resize(n_ele); - vprp.resize(n_ele); - - // Add the received particles to v_pos and v_prp - - size_t j = 0; - for (; j < vpos.size() && o_p_id < out_part.size(); j++, o_p_id++) - { - v_pos.set(out_part.get(o_p_id), vpos.get(j)); - v_prp.template set_o<decltype(vprp.get(j)), prp... >(out_part.get(o_p_id), vprp.get(j)); - } - - for (; j < vpos.size(); j++) - { - v_pos.add(); - v_pos.set(v_pos.size() - 1, vpos.get(j)); - v_prp.template set_o<decltype(vprp.get(j)), prp... >(v_prp.size() - 1, vprp.get(j)); - } - } - - // remove the (out-going particles) in the vector - - v_pos.remove(out_part, o_p_id); - v_prp.remove(out_part, o_p_id); - } - - /*! \brief Calculate send buffers total size and allocation - * - * \tparam prp_object object containing only the properties to send - * - * \param size_byte_prp total size for the property buffer - * \param size_byte_pos total size for the position buffer - * - */ - template<typename prp_object> void calc_send_ghost_buf(size_t & size_byte_prp, size_t & size_byte_pos) - { - // Calculate the total size required for the sending buffer - for (size_t i = 0; i < ghost_prc_sz.size(); i++) - { - size_t alloc_ele = openfpm::vector<prp_object, HeapMemory, typename memory_traits_lin<prp_object>::type, memory_traits_lin , openfpm::grow_policy_identity>::calculateMem(ghost_prc_sz.get(i), 0); - size_byte_prp += alloc_ele; - - alloc_ele = openfpm::vector<Point<dim, St>, HeapMemory, typename memory_traits_lin<Point<dim, St>>::type, memory_traits_lin, openfpm::grow_policy_identity>::calculateMem(ghost_prc_sz.get(i), 0); - size_byte_pos += alloc_ele; - } - } - - /*! \brief Calculate parameters for the cell list - * - * \param div Division array - * \param r_cut interation radius or size of each cell - * \param enlarge In case of padding particles the cell list must be enlarged, like a ghost. This parameter says how much must be enlarged - * - * \return the processor bounding box - */ -/* inline Box<dim, St> cl_param_calculate(size_t (&div)[dim], St r_cut, const Ghost<dim, St> & enlarge) - { - // calculate the parameters of the cell list - - // get the processor bounding box - Box<dim, St> pbox = dec.getProcessorBounds(); - - // extend by the ghost - pbox.enlarge(enlarge); - - // Calculate the division array and the cell box - for (size_t i = 0; i < dim; i++) - { - div[i] = static_cast<size_t>((pbox.getP2().get(i) - pbox.getP1().get(i)) / r_cut); - div[i]++; - pbox.setHigh(i,pbox.getLow(i) + div[i]*r_cut); - } - return pbox; - }*/ /*! \brief Initialize the structures * @@ -823,34 +114,6 @@ private: g_m = p_np; } - /*! \brief Initialize the decomposition - * - * \param box domain - * \param bc boundary conditions - * \param g ghost extension - * - */ - void init_decomposition(Box<dim,St> & box, const size_t (& bc)[dim],const Ghost<dim,St> & g) - { - // Create a valid decomposition of the space - // Get the number of processor and calculate the number of sub-domain - // for decomposition - size_t n_proc = v_cl.getProcessingUnits(); - size_t n_sub = n_proc * getDefaultNsubsub(); - - // Calculate the maximum number (before merging) of sub-domain on - // each dimension - size_t div[dim]; - for (size_t i = 0; i < dim; i++) - { - div[i] = openfpm::math::round_big_2(pow(n_sub, 1.0 / dim)); - } - - // Create the sub-domains - dec.setParameters(div, box, bc, g); - dec.decompose(); - } - public: //! space type @@ -868,7 +131,7 @@ public: */ vector_dist<dim,St,prop,Decomposition,Memory> & operator=(const vector_dist<dim,St,prop,Decomposition,Memory> & v) { - dec = v.dec; + static_cast<vector_dist_comm<dim,St,prop,Decomposition,Memory> *>(this)->operator=(static_cast<vector_dist_comm<dim,St,prop,Decomposition,Memory>>(v)); g_m = v.g_m; v_pos = v.v_pos; @@ -886,7 +149,7 @@ public: */ vector_dist<dim,St,prop,Decomposition,Memory> & operator=(vector_dist<dim,St,prop,Decomposition,Memory> && v) { - dec.swap(v.dec); + static_cast<vector_dist_comm<dim,St,prop,Decomposition,Memory> *>(this)->operator=(static_cast<vector_dist_comm<dim,St,prop,Decomposition,Memory> >(v)); g_m = v.g_m; v_pos.swap(v.v_pos); @@ -901,7 +164,7 @@ public: * */ vector_dist(const vector_dist<dim,St,prop,Decomposition,Memory> & v) - :dec(v.v_cl),v_cl(v.v_cl) + :vector_dist_comm<dim,St,prop,Decomposition,Memory>(v.dec),v_cl(v.v_cl) { #ifdef SE_CLASS2 check_new(this,8,VECTOR_DIST_EVENT,4); @@ -932,7 +195,7 @@ public: * */ vector_dist(const Decomposition & dec, size_t np) : - dec(dec), v_cl(create_vcluster()) + vector_dist_comm<dim,St,prop,Decomposition,Memory>(dec), v_cl(create_vcluster()) { #ifdef SE_CLASS2 check_new(this,8,VECTOR_DIST_EVENT,4); @@ -950,15 +213,15 @@ public: * \param g Ghost margins * */ - vector_dist(size_t np, Box<dim, St> box, const size_t (&bc)[dim], const Ghost<dim, St> & g) : - dec(create_vcluster()), v_cl(create_vcluster()) + vector_dist(size_t np, Box<dim, St> box, const size_t (&bc)[dim], const Ghost<dim, St> & g) + :v_cl(create_vcluster()) { #ifdef SE_CLASS2 check_new(this,8,VECTOR_DIST_EVENT,4); #endif init_structures(np); - init_decomposition(box,bc,g); + this->init_decomposition(box,bc,g); } ~vector_dist() @@ -968,14 +231,14 @@ public: #endif } - /*! \brief Get the number of minimum sub-domain + /*! \brief return the local size of the vector * - * \return minimum number + * \return local size * */ - static size_t getDefaultNsubsub() + size_t size_local() { - return V_SUB_UNIT_FACTOR; + return g_m; } /*! \brief return the local size of the vector @@ -983,9 +246,9 @@ public: * \return local size * */ - size_t size_local() + size_t size_local_with_ghost() { - return g_m; + return v_pos.size(); } /*! \brief Get the position of an element @@ -1046,292 +309,6 @@ public: return v_prp.template get<id>(vec_key.getKey()); } - /*! \brief It move all the particles that does not belong to the local processor to the respective processor - * - * \tparam out of bound policy it specify what to do when the particles are detected out of bound - * - * In general this function is called after moving the particles to move the - * elements out the local processor. Or just after initialization if each processor - * contain non local particles - * - */ - template<typename obp = KillParticle> void map() - { - // outgoing particles-id - openfpm::vector<size_t> out_part; - - // Processor communication size - openfpm::vector<size_t> prc_sz(v_cl.getProcessingUnits()); - - // It contain the list of the processors this processor should to communicate with - openfpm::vector<size_t> p_list; - - // map completely reset the ghost part - v_pos.resize(g_m); - v_prp.resize(g_m); - - // Contain the processor id of each particle (basically where they have to go) - labelParticleProcessor<obp>(opart, prc_sz, out_part); - - // Calculate the sending buffer size for each processor, put this information in - // a contiguous buffer - p_map_req.resize(v_cl.getProcessingUnits()); - openfpm::vector<size_t> prc_sz_r; - openfpm::vector<size_t> prc_r; - - for (size_t i = 0; i < v_cl.getProcessingUnits(); i++) - { - if (prc_sz.get(i) != 0) - { - p_map_req.get(i) = prc_r.size(); - prc_r.add(i); - prc_sz_r.add(prc_sz.get(i)); - } - } - - // Allocate the send buffers - - openfpm::vector<pos_prop> pb; - - // fill the send buffers - fill_send_map_buf(prc_r, prc_sz_r, pb); - - // Create the set of pointers - openfpm::vector<void *> ptr(prc_r.size()); - for (size_t i = 0; i < prc_r.size(); i++) - { - ptr.get(i) = pb.get(i).pos.getPointer(); - } - - // convert the particle number to buffer size - for (size_t i = 0; i < prc_sz_r.size(); i++) - { - prc_sz_r.get(i) = prc_sz_r.get(i) * (sizeof(prop) + sizeof(Point<dim, St> )); - } - - // Send and receive the particles - - recv_mem_gm.clear(); - v_cl.sendrecvMultipleMessagesNBX(prc_sz_r.size(), (size_t *) prc_sz_r.getPointer(), (size_t *) prc_r.getPointer(), (void **) ptr.getPointer(), vector_dist::message_alloc_map, this, NEED_ALL_SIZE); - - // Process the incoming particles - - process_received_map(out_part); - - // mark the ghost part - - g_m = v_pos.size(); - } - - /*! \brief It move all the particles that does not belong to the local processor to the respective processor - * - * \tparam out of bound policy it specify what to do when the particles are detected out of bound - * - * In general this function is called after moving the particles to move the - * elements out the local processor. Or just after initialization if each processor - * contain non local particles - * - */ - template<unsigned int ... prp> void map_list() - { - typedef KillParticle obp; - - // outgoing particles-id - openfpm::vector<size_t> out_part; - - // Processor communication size - openfpm::vector<size_t> prc_sz(v_cl.getProcessingUnits()); - - // It contain the list of the processors this processor should to communicate with - openfpm::vector<size_t> p_list; - - // map completely reset the ghost part - v_pos.resize(g_m); - v_prp.resize(g_m); - - // Contain the processor id of each particle (basically where they have to go) - labelParticleProcessor<obp>(opart, prc_sz, out_part); - - // Calculate the sending buffer size for each processor, put this information in - // a contiguous buffer - p_map_req.resize(v_cl.getProcessingUnits()); - openfpm::vector<size_t> prc_sz_r; - openfpm::vector<size_t> prc_r; - - for (size_t i = 0; i < v_cl.getProcessingUnits(); i++) - { - if (prc_sz.get(i) != 0) - { - p_map_req.get(i) = prc_r.size(); - prc_r.add(i); - prc_sz_r.add(prc_sz.get(i)); - } - } - - // Sending property object - typedef object<typename object_creator<typename prop::type, prp...>::type> prp_object; - - // Allocate the send buffers - - openfpm::vector<pos_prop_sel<prp_object>> pb; - - // fill the send buffers - fill_send_map_buf_list<prp_object,prp...>(prc_r, prc_sz_r, pb); - - // Create the set of pointers - openfpm::vector<void *> ptr(prc_r.size()); - for (size_t i = 0; i < prc_r.size(); i++) - { - ptr.get(i) = pb.get(i).pos.getPointer(); - } - - // convert the particle number to buffer size - for (size_t i = 0; i < prc_sz_r.size(); i++) - { - prc_sz_r.get(i) = prc_sz_r.get(i) * (sizeof(prp_object) + sizeof(Point<dim, St> )); - } - - // Send and receive the particles - - recv_mem_gm.clear(); - v_cl.sendrecvMultipleMessagesNBX(prc_sz_r.size(), (size_t *) prc_sz_r.getPointer(), (size_t *) prc_r.getPointer(), (void **) ptr.getPointer(), vector_dist::message_alloc_map, this, NEED_ALL_SIZE); - - // Process the incoming particles - - process_received_map_list<prp_object, prp...>(out_part); - - // mark the ghost part - - g_m = v_pos.size(); - } - - /*! \brief It synchronize the properties and position of the ghost particles - * - * \tparam prp list of properties to get synchronize - * \param opt options WITH_POSITION, it send also the positional information of the particles - * - */ - template<int ... prp> void ghost_get(size_t opt = WITH_POSITION) - { - // Unload receive buffer - for (size_t i = 0 ; i < recv_mem_gg.size() ; i++) - recv_sz.get(i) = 0; - - // Sending property object - typedef object<typename object_creator<typename prop::type, prp...>::type> prp_object; - - // send vector for each processor - typedef openfpm::vector<prp_object, ExtPreAlloc<Memory>, typename memory_traits_lin<prp_object>::type, memory_traits_lin, openfpm::grow_policy_identity> send_vector; - - // reset the ghost part - v_pos.resize(g_m); - v_prp.resize(g_m); - - // Label all the particles - labelParticlesGhost(); - - // Calculate memory and allocation for the send buffers - - // Total size - size_t size_byte_prp = 0; - size_t size_byte_pos = 0; - - calc_send_ghost_buf<prp_object>(size_byte_prp, size_byte_pos); - - // Create memory for the send buffer - - g_prp_mem.resize(size_byte_prp); - if (opt != NO_POSITION) - g_pos_mem.resize(size_byte_pos); - - // Create and fill send buffer for particle properties - - ExtPreAlloc<Memory> * prAlloc_prp = new ExtPreAlloc<Memory>(size_byte_prp, g_prp_mem); - - openfpm::vector<send_vector> g_send_prp; - fill_send_ghost_prp_buf<send_vector, prp_object, prp...>(g_send_prp, prAlloc_prp); - - // Create and fill the send buffer for the particle position - - ExtPreAlloc<Memory> * prAlloc_pos; - openfpm::vector<send_pos_vector> g_pos_send; - if (opt != NO_POSITION) - { - prAlloc_pos = new ExtPreAlloc<Memory>(size_byte_pos, g_pos_mem); - fill_send_ghost_pos_buf(g_pos_send, prAlloc_pos); - } - - // Create processor list - openfpm::vector<size_t> prc; - for (size_t i = 0; i < opart.size(); i++) - prc.add(dec.IDtoProc(i)); - - // Send/receive the particle properties information - v_cl.sendrecvMultipleMessagesNBX(prc, g_send_prp, msg_alloc_ghost_get, this); - process_received_ghost_prp<send_vector, prp_object, prp...>(); - - if (opt != NO_POSITION) - { - // Send/receive the particle properties information - v_cl.sendrecvMultipleMessagesNBX(prc, g_pos_send, msg_alloc_ghost_get, this); - process_received_ghost_pos(); - } - - add_loc_particles_bc(); - } - - /*! \brief Call-back to allocate buffer to receive incoming elements (particles) - * - * \param msg_i message size required to receive from i - * \param total_msg message size to receive from all the processors - * \param total_p the total number of processor want to communicate with you - * \param i processor id - * \param ri request id (it is an id that goes from 0 to total_p, and is unique - * every time message_alloc is called) - * \param ptr void pointer parameter for additional data to pass to the call-back - * - */ - static void * msg_alloc_ghost_get(size_t msg_i, size_t total_msg, size_t total_p, size_t i, size_t ri, void * ptr) - { - vector_dist<dim, St, prop, Decomposition, Memory> * v = static_cast<vector_dist<dim, St, prop, Decomposition, Memory> *>(ptr); - - v->recv_sz.resize(v->dec.getNNProcessors()); - v->recv_mem_gg.resize(v->dec.getNNProcessors()); - - // Get the local processor id - size_t lc_id = v->dec.ProctoID(i); - - // resize the receive buffer - v->recv_mem_gg.get(lc_id).resize(msg_i); - v->recv_sz.get(lc_id) = msg_i; - - return v->recv_mem_gg.get(lc_id).getPointer(); - } - - /*! \brief Call-back to allocate buffer to receive incoming elements (particles) - * - * \param msg_i size required to receive the message from i - * \param total_msg total size to receive from all the processors - * \param total_p the total number of processor that want to communicate with you - * \param i processor id - * \param ri request id (it is an id that goes from 0 to total_p, and is unique - * every time message_alloc is called) - * \param ptr a pointer to the vector_dist structure - * - * \return the pointer where to store the message for the processor i - * - */ - static void * message_alloc_map(size_t msg_i, size_t total_msg, size_t total_p, size_t i, size_t ri, void * ptr) - { - // cast the pointer - vector_dist<dim, St, prop, Decomposition, Memory> * vd = static_cast<vector_dist<dim, St, prop, Decomposition, Memory> *>(ptr); - - vd->recv_mem_gm.resize(vd->v_cl.getProcessingUnits()); - vd->recv_mem_gm.get(i).resize(msg_i); - - return vd->recv_mem_gm.get(i).getPointer(); - } - /*! \brief Add local particle * * It add a local particle, with "local" we mean in this processor @@ -1382,7 +359,7 @@ public: template<typename CellL = CellList<dim, St, FAST, shift<dim, St> > > CellL getCellList(St r_cut) { // Get ghost and anlarge by 1% - Ghost<dim,St> g = dec.getGhost(); + Ghost<dim,St> g = getDecomposition().getGhost(); g.magnify(1.013); return getCellList(r_cut, g); @@ -1400,7 +377,7 @@ public: template<typename CellL = CellList_hilb<dim, St, FAST, shift<dim, St> > > CellL getCellList_hilb(St r_cut) { // Get ghost and anlarge by 1% - Ghost<dim,St> g = dec.getGhost(); + Ghost<dim,St> g = getDecomposition().getGhost(); g.magnify(1.013); return getCellList_hilb(r_cut, g); @@ -1457,7 +434,7 @@ public: size_t div[dim]; // get the processor bounding box - Box<dim, St> pbox = dec.getProcessorBounds(); + Box<dim, St> pbox = getDecomposition().getProcessorBounds(); // Processor bounding box cl_param_calculate(pbox, div, r_cut, enlarge); @@ -1492,7 +469,7 @@ public: size_t div[dim]; // get the processor bounding box - Box<dim, St> pbox = dec.getProcessorBounds(); + Box<dim, St> pbox = getDecomposition().getProcessorBounds(); // Processor bounding box cl_param_calculate(pbox,div, r_cut, enlarge); @@ -1505,29 +482,45 @@ public: } + /*! \brief for each particle get the verlet list * * \param r_cut cut-off radius + * \param opt option like VL_SYMMETRIC and VL_NON_SYMMETRIC * */ - VerletList<dim,St,FAST,shift<dim,St> > getVerlet(St r_cut) + VerletList<dim,St,FAST,shift<dim,St> > getVerlet(St r_cut, size_t opt = VL_NON_SYMMETRIC) { VerletList<dim,St,FAST,shift<dim,St>> ver; - // Division array - size_t div[dim]; - // get the processor bounding box - Box<dim, St> bt = dec.getProcessorBounds(); + Box<dim, St> bt = getDecomposition().getProcessorBounds(); + + // Get the ghost + Ghost<dim,St> g = getDecomposition().getGhost(); + g.magnify(1.013); - // Calculate the divisions for the Cell-lists - cl_param_calculate(bt,div,r_cut,Ghost<dim,St>(0.0)); + // enlarge the box where the Verlet is defined + bt.enlarge(g); - ver.Initialize(bt,r_cut,v_pos,g_m); + ver.Initialize(bt,getDecomposition().getProcessorBounds(),r_cut,v_pos,g_m,opt); return ver; } + /*! \brief for each particle get the verlet list + * + * \param r_cut cut-off radius + * \param ver Verlet to update + * \param r_cut cutoff radius + * \param opt option like VL_SYMMETRIC and VL_NON_SYMMETRIC + * + */ + void updateVerlet(VerletList<dim,St,FAST,shift<dim,St> > & ver, St r_cut, size_t opt = VL_NON_SYMMETRIC) + { + ver.update(getDecomposition().getDomain(),r_cut,v_pos,g_m, opt); + } + /*! \brief for each particle get the verlet list * * \param verlet output verlet list for each particle @@ -1598,7 +591,7 @@ public: */ template<typename CellL=CellList<dim,St,FAST,shift<dim,St> > > void reorder (int32_t m) { - reorder(m,dec.getGhost()); + reorder(m,getDecomposition().getGhost()); } @@ -1626,7 +619,7 @@ public: // calculate the parameters of the cell list // get the processor bounding box - Box<dim,St> pbox = dec.getProcessorBounds(); + Box<dim,St> pbox = getDecomposition().getProcessorBounds(); // extend by the ghost pbox.enlarge(enlarge); @@ -1762,7 +755,7 @@ public: stop.set_d(i, sz[i] - 1); } - grid_dist_id_iterator_dec<Decomposition> it_dec(dec, sz, start, stop); + grid_dist_id_iterator_dec<Decomposition> it_dec(getDecomposition(), sz, start, stop); return it_dec; } @@ -1786,14 +779,69 @@ public: return vector_dist_iterator(0, g_m); } + /*! \brief Get an iterator that traverse the particles in the domain + * + * \return an iterator + * + */ + vector_dist_iterator getDomainAndGhostIterator() const + { + return vector_dist_iterator(0, v_pos.size()); + } + /*! \brief Get the decomposition * * \return * */ - Decomposition & getDecomposition() + inline Decomposition & getDecomposition() + { + return vector_dist_comm<dim,St,prop,Decomposition,Memory>::getDecomposition(); + } + + /*! \brief It move all the particles that does not belong to the local processor to the respective processor + * + * \tparam out of bound policy it specify what to do when the particles are detected out of bound + * + * In general this function is called after moving the particles to move the + * elements out the local processor. Or just after initialization if each processor + * contain non local particles + * + * \tparam prp properties to communicate + * + * + */ + template<unsigned int ... prp> void map_list() { - return dec; + this->template map_list_<prp...>(v_pos,v_prp,g_m); + } + + + /*! \brief It move all the particles that does not belong to the local processor to the respective processor + * + * \tparam out of bound policy it specify what to do when the particles are detected out of bound + * + * In general this function is called after moving the particles to move the + * elements out the local processor. Or just after initialization if each processor + * contain non local particles + * + * + */ + template<typename obp = KillParticle> void map() + { + this->template map_<obp>(v_pos,v_prp,g_m); + } + + /*! \brief It synchronize the properties and position of the ghost particles + * + * \tparam prp list of properties to get synchronize + * + * \param opt options WITH_POSITION, it send also the positional information of the particles + * + */ + template<int ... prp> inline void ghost_get(size_t opt = WITH_POSITION) + { + this->template ghost_get_<prp...>(v_pos,v_prp,g_m,opt); } /*! \brief Remove a set of elements from the distributed vector @@ -1832,9 +880,11 @@ public: { CellDecomposer_sm<dim, St> cdsm; + Decomposition & dec = getDecomposition(); + cdsm.setDimensions(dec.getDomain(), dec.getGrid().getSize(), 0); - for (size_t i = 0; i < dec.getNSubSubDomains(); i++) + for (size_t i = 0; i < getDecomposition().getNSubSubDomains(); i++) { dec.setSubSubDomainComputationCost(i, 1); } diff --git a/src/Vector/vector_dist_cell_list_tests.hpp b/src/Vector/vector_dist_cell_list_tests.hpp new file mode 100644 index 0000000000000000000000000000000000000000..ca3a1b49f96ae8848debaf94fd7c03099635b0d7 --- /dev/null +++ b/src/Vector/vector_dist_cell_list_tests.hpp @@ -0,0 +1,925 @@ +/* + * vector_dist_cell_list_tests.hpp + * + * Created on: Aug 16, 2016 + * Author: i-bird + */ + +#ifndef SRC_VECTOR_VECTOR_DIST_CELL_LIST_TESTS_HPP_ +#define SRC_VECTOR_VECTOR_DIST_CELL_LIST_TESTS_HPP_ + + +///////////////////////// test hilb /////////////////////////////// + +BOOST_AUTO_TEST_CASE( vector_dist_reorder_2d_test ) +{ + Vcluster & v_cl = create_vcluster(); + + if (v_cl.getProcessingUnits() > 48) + return; + + // set the seed + // create the random generator engine + std::srand(v_cl.getProcessUnitID()); + std::default_random_engine eg; + std::uniform_real_distribution<float> ud(0.0f, 1.0f); + + long int k = 524288 * v_cl.getProcessingUnits(); + + long int big_step = k / 4; + big_step = (big_step == 0)?1:big_step; + + print_test_v( "Testing 2D vector with hilbert curve reordering k<=",k); + + // 2D test + for ( ; k >= 2 ; k-= decrement(k,big_step) ) + { + BOOST_TEST_CHECKPOINT( "Testing 2D vector with hilbert curve reordering k=" << k ); + + Box<2,float> box({0.0,0.0},{1.0,1.0}); + + // Boundary conditions + size_t bc[2]={NON_PERIODIC,NON_PERIODIC}; + + vector_dist<2,float, Point_test<float>, CartDecomposition<2,float> > vd(k,box,bc,Ghost<2,float>(0.0)); + + auto it = vd.getIterator(); + + while (it.isNext()) + { + auto key = it.get(); + + vd.getPos(key)[0] = ud(eg); + vd.getPos(key)[1] = ud(eg); + + ++it; + } + + vd.map(); + + // Create first cell list + + auto NN1 = vd.getCellList(0.01); + + //An order of a curve + int32_t m = 6; + + //Reorder a vector + vd.reorder(m); + + // Create second cell list + auto NN2 = vd.getCellList(0.01); + + //Check equality of cell sizes + for (size_t i = 0 ; i < NN1.getGrid().size() ; i++) + { + size_t n1 = NN1.getNelements(i); + size_t n2 = NN2.getNelements(i); + + BOOST_REQUIRE_EQUAL(n1,n2); + } + } +} + +BOOST_AUTO_TEST_CASE( vector_dist_cl_random_vs_hilb_forces_test ) +{ + Vcluster & v_cl = create_vcluster(); + + if (v_cl.getProcessingUnits() > 48) + return; + + ///////////////////// INPUT DATA ////////////////////// + + // Dimensionality of the space + const size_t dim = 3; + // Cut-off radiuses. Can be put different number of values + openfpm::vector<float> cl_r_cutoff {0.05}; + // The starting amount of particles (remember that this number is multiplied by number of processors you use for testing) + size_t cl_k_start = 10000; + // The lower threshold for number of particles + size_t cl_k_min = 1000; + // Ghost part of distributed vector + double ghost_part = 0.05; + + /////////////////////////////////////////////////////// + + //For different r_cut + for (size_t r = 0; r < cl_r_cutoff.size(); r++ ) + { + //Cut-off radius + float r_cut = cl_r_cutoff.get(r); + + //Number of particles + size_t k = cl_k_start * v_cl.getProcessingUnits(); + + std::string str("Testing " + std::to_string(dim) + "D vector's forces (random vs hilb celllist) k<="); + + vector_dist_test::print_test_v(str,k); + + //For different number of particles + for (size_t k_int = k ; k_int >= cl_k_min ; k_int/=2 ) + { + BOOST_TEST_CHECKPOINT( "Testing " << dim << "D vector's forces (random vs hilb celllist) k<=" << k_int ); + + Box<dim,float> box; + + for (size_t i = 0; i < dim; i++) + { + box.setLow(i,0.0); + box.setHigh(i,1.0); + } + + // Boundary conditions + size_t bc[dim]; + + for (size_t i = 0; i < dim; i++) + bc[i] = PERIODIC; + + vector_dist<dim,float, aggregate<float[dim]>, CartDecomposition<dim,float> > vd(k_int,box,bc,Ghost<dim,float>(ghost_part)); + + vector_dist<dim,float, aggregate<float[dim]>, CartDecomposition<dim,float> > vd2(k_int,box,bc,Ghost<dim,float>(ghost_part)); + + // Initialize dist vectors + vd_initialize_double<dim>(vd, vd2, v_cl, k_int); + + vd.template ghost_get<0>(); + vd2.template ghost_get<0>(); + + //Get a cell list + + auto NN = vd.getCellList(r_cut); + + //Calculate forces + + calc_forces<dim>(NN,vd,r_cut); + + //Get a cell list hilb + + auto NN_hilb = vd2.getCellList_hilb(r_cut); + + //Calculate forces + calc_forces_hilb<dim>(NN_hilb,vd2,r_cut); + + // Calculate average + size_t count = 1; + Point<dim,float> avg; + for (size_t i = 0 ; i < dim ; i++) {avg.get(i) = 0.0;} + + auto it_v2 = vd.getIterator(); + while (it_v2.isNext()) + { + //key + vect_dist_key_dx key = it_v2.get(); + + for (size_t i = 0; i < dim; i++) + avg.get(i) += fabs(vd.template getProp<0>(key)[i]); + + ++count; + ++it_v2; + } + + for (size_t i = 0 ; i < dim ; i++) {avg.get(i) /= count;} + + auto it_v = vd.getIterator(); + while (it_v.isNext()) + { + //key + vect_dist_key_dx key = it_v.get(); + + for (size_t i = 0; i < dim; i++) + { + auto a1 = vd.template getProp<0>(key)[i]; + auto a2 = vd2.template getProp<0>(key)[i]; + + //Check that the forces are (almost) equal + float per = 0.1; + if (a1 != 0.0) + per = fabs(0.1*avg.get(i)/a1); + + BOOST_REQUIRE_CLOSE(a1,a2,per); + } + + ++it_v; + } + } + } +} + +BOOST_AUTO_TEST_CASE( vector_dist_cl_random_vs_reorder_forces_test ) +{ + Vcluster & v_cl = create_vcluster(); + + if (v_cl.getProcessingUnits() > 48) + return; + + ///////////////////// INPUT DATA ////////////////////// + + // Dimensionality of the space + const size_t dim = 3; + // Cut-off radiuses. Can be put different number of values + openfpm::vector<float> cl_r_cutoff {0.01}; + // The starting amount of particles (remember that this number is multiplied by number of processors you use for testing) + size_t cl_k_start = 10000; + // The lower threshold for number of particles + size_t cl_k_min = 1000; + // Ghost part of distributed vector + double ghost_part = 0.01; + + /////////////////////////////////////////////////////// + + //For different r_cut + for (size_t r = 0; r < cl_r_cutoff.size(); r++ ) + { + //Cut-off radius + float r_cut = cl_r_cutoff.get(r); + + //Number of particles + size_t k = cl_k_start * v_cl.getProcessingUnits(); + + std::string str("Testing " + std::to_string(dim) + "D vector's forces (random vs reorder) k<="); + + vector_dist_test::print_test_v(str,k); + + //For different number of particles + for (size_t k_int = k ; k_int >= cl_k_min ; k_int/=2 ) + { + BOOST_TEST_CHECKPOINT( "Testing " << dim << "D vector's forces (random vs reorder) k<=" << k_int ); + + Box<dim,float> box; + + for (size_t i = 0; i < dim; i++) + { + box.setLow(i,0.0); + box.setHigh(i,1.0); + } + + // Boundary conditions + size_t bc[dim]; + + for (size_t i = 0; i < dim; i++) + bc[i] = PERIODIC; + + vector_dist<dim,float, aggregate<float[dim], float[dim]>, CartDecomposition<dim,float> > vd(k_int,box,bc,Ghost<dim,float>(ghost_part)); + + // Initialize vd + vd_initialize<dim,decltype(vd)>(vd, v_cl, k_int); + + vd.template ghost_get<0>(); + + //Get a cell list + + auto NN1 = vd.getCellList(r_cut); + + //Calculate forces '0' + + calc_forces<dim>(NN1,vd,r_cut); + + //Reorder and get a cell list again + + vd.reorder(4); + + vd.template ghost_get<0>(); + + auto NN2 = vd.getCellList(r_cut); + + //Calculate forces '1' + calc_forces<dim,1>(NN2,vd,r_cut); + + // Calculate average (For Coverty scan we start from 1) + size_t count = 1; + Point<dim,float> avg; + for (size_t i = 0 ; i < dim ; i++) {avg.get(i) = 0.0;} + + auto it_v2 = vd.getIterator(); + while (it_v2.isNext()) + { + //key + vect_dist_key_dx key = it_v2.get(); + + for (size_t i = 0; i < dim; i++) + avg.get(i) += fabs(vd.template getProp<0>(key)[i]); + + ++count; + ++it_v2; + } + + for (size_t i = 0 ; i < dim ; i++) {avg.get(i) /= count;} + + //Test for equality of forces + auto it_v = vd.getDomainIterator(); + + while (it_v.isNext()) + { + //key + vect_dist_key_dx key = it_v.get(); + + for (size_t i = 0; i < dim; i++) + { + auto a1 = vd.template getProp<0>(key)[i]; + auto a2 = vd.template getProp<1>(key)[i]; + + //Check that the forces are (almost) equal + float per = 0.1; + if (a1 != 0.0) + per = fabs(0.1*avg.get(i)/a1); + + BOOST_REQUIRE_CLOSE(a1,a2,per); + } + + ++it_v; + } + } + } +} + + +BOOST_AUTO_TEST_CASE( vector_dist_sym_cell_list_test ) +{ + long int k = 4096*create_vcluster().getProcessingUnits(); + + long int big_step = k / 30; + big_step = (big_step == 0)?1:big_step; + long int small_step = 21; + + print_test( "Testing vector symmetric cell-list k<=",k); + + // 3D test + for ( ; k > 8*big_step ; k-= (k > 2*big_step)?big_step:small_step ) + { + double r_cut = 0.1; + + // domain + Box<3,double> box({0.0,0.0,0.0},{1.0,1.0,1.0}); + + // Boundary conditions + size_t bc[3]={PERIODIC,PERIODIC,PERIODIC}; + + // ghost, big enough to contain the interaction radius + Ghost<3,float> ghost(r_cut); + + vector_dist<3,double, aggregate<double> > vd(k,box,bc,ghost); + + { + auto it = vd.getDomainIterator(); + + while (it.isNext()) + { + auto p = it.get(); + + vd.getPos(p)[0] = (double)rand()/RAND_MAX; + vd.getPos(p)[1] = (double)rand()/RAND_MAX; + vd.getPos(p)[2] = (double)rand()/RAND_MAX; + + ++it; + } + } + + vd.map(); + vd.ghost_get<>(); + + // Get the Cell list structure + auto NN = vd.getCellList(r_cut); + + // Get an iterator over particles + auto it2 = vd.getDomainAndGhostIterator(); + + openfpm::vector<openfpm::vector<size_t>> idx; + idx.resize(vd.size_local_with_ghost()); + + /////////// SYMMETRIC CASE CELL-LIST //////// + + // For each particle ... + while (it2.isNext()) + { + // ... p + auto p = it2.get(); + + // Get the position of the particle p + Point<3,double> xp = vd.getPos(p); + + // Get an iterator over the neighborhood of the particle p symmetric + auto NpSym = NN.template getNNIteratorSym<NO_CHECK>(NN.getCell(vd.getPos(p)),p.getKey()); + + // For each neighborhood of the particle p + while (NpSym.isNext()) + { + // Neighborhood particle q + auto q = NpSym.get(); + + // if p == q skip this particle + if (q == p.getKey()) {++NpSym; continue;}; + + // Get position of the particle q + Point<3,double> xq = vd.getPos(q); + + // take the normalized direction + double rn = norm2(xp - xq); + + // potential energy (using pow is slower) + vd.getProp<0>(p) += rn; + vd.getProp<0>(q) += rn; + + idx.get(p.getKey()).add(q); + idx.get(q).add(p.getKey()); + + + // Next neighborhood + ++NpSym; + } + + // Next Particle + ++it2; + } + + /////////////// NON SYMMETRIC CASE //////////////////////// + + openfpm::vector<openfpm::vector<size_t>> idx2; + idx2.resize(vd.size_local()); + + auto it = vd.getDomainIterator(); + + // For each particle ... + while (it.isNext()) + { + // ... p + auto p = it.get(); + + // Get the position of the particle p + Point<3,double> xp = vd.getPos(p); + + // Get an iterator over the neighborhood of the particle p + auto Np = NN.template getNNIterator<NO_CHECK>(NN.getCell(vd.getPos(p))); + + double Ep = 0.0; + + // For each neighborhood of the particle p + while (Np.isNext()) + { + // Neighborhood particle q + auto q = Np.get(); + + // if p == q skip this particle + if (q == p.getKey()) {++Np; continue;}; + + // Get position of the particle q + Point<3,double> xq = vd.getPos(q); + + // take the normalized direction + double rn = norm2(xp - xq); + + idx2.get(p.getKey()).add(q); + + // potential energy (using pow is slower) + Ep += rn; + + // Next neighborhood + ++Np; + } + + idx.get(p.getKey()).sort(); + idx2.get(p.getKey()).sort(); + + bool ret = true; + + for (size_t i = 0 ; i < idx.get(p.getKey()).size() ; i++) + ret &= idx.get(p.getKey()).get(i) == idx2.get(p.getKey()).get(i); + + BOOST_REQUIRE_EQUAL(ret,true); + + BOOST_REQUIRE_CLOSE(Ep,vd.getProp<0>(p),0.01); + + // Next Particle + ++it; + } + } +} + + +BOOST_AUTO_TEST_CASE( vector_dist_sym_verlet_list_test ) +{ + if (create_vcluster().getProcessingUnits() > 12) + return; + + long int k = 4096*create_vcluster().getProcessingUnits(); + + long int big_step = k / 30; + big_step = (big_step == 0)?1:big_step; + long int small_step = 21; + + print_test( "Testing vector symmetric verlet-list k<=",k); + + // 3D test + for ( ; k > 8*big_step ; k-= (k > 2*big_step)?big_step:small_step ) + { + double r_cut = 0.1; + + // domain + Box<3,double> box({0.0,0.0,0.0},{1.0,1.0,1.0}); + + // Boundary conditions + size_t bc[3]={PERIODIC,PERIODIC,PERIODIC}; + + // ghost, big enough to contain the interaction radius + Ghost<3,float> ghost(r_cut); + + vector_dist<3,double, aggregate<double> > vd(k,box,bc,ghost); + + { + auto it = vd.getDomainIterator(); + + while (it.isNext()) + { + auto p = it.get(); + + vd.getPos(p)[0] = (double)rand()/RAND_MAX; + vd.getPos(p)[1] = (double)rand()/RAND_MAX; + vd.getPos(p)[2] = (double)rand()/RAND_MAX; + + ++it; + } + } + + vd.map(); + vd.ghost_get<>(); + + // Get the Cell list structure + auto NN = vd.getVerlet(r_cut,VL_SYMMETRIC); + auto NNc = vd.getVerlet(r_cut); + + // Get an iterator over particles + auto it2 = vd.getDomainAndGhostIterator(); + + openfpm::vector<openfpm::vector<size_t>> idx; + idx.resize(vd.size_local_with_ghost()); + + /////////// SYMMETRIC CASE VERLET-LIST //////// + + // For each particle ... + while (it2.isNext()) + { + // ... p + auto p = it2.get(); + + // Get the position of the particle p + Point<3,double> xp = vd.getPos(p); + + // Get an iterator over the neighborhood of the particle p symmetric + auto NpSym = NN.template getNNIterator<NO_CHECK>(p.getKey()); + + // For each neighborhood of the particle p + while (NpSym.isNext()) + { + // Neighborhood particle q + auto q = NpSym.get(); + + // if p == q skip this particle + if (q == p.getKey()) {++NpSym; continue;}; + + // Get position of the particle q + Point<3,double> xq = vd.getPos(q); + + // take the normalized direction + double rn = norm2(xp - xq); + + // potential energy (using pow is slower) + vd.getProp<0>(p) += rn; + vd.getProp<0>(q) += rn; + + idx.get(p.getKey()).add(q); + idx.get(q).add(p.getKey()); + + + // Next neighborhood + ++NpSym; + } + + // Next Particle + ++it2; + } + + /////////////// NON SYMMETRIC CASE VERLET-LIST //////////////////////// + + openfpm::vector<openfpm::vector<size_t>> idx2; + idx2.resize(vd.size_local()); + + auto it = vd.getDomainIterator(); + + // For each particle ... + while (it.isNext()) + { + // ... p + auto p = it.get(); + + // Get the position of the particle p + Point<3,double> xp = vd.getPos(p); + + // Get an iterator over the neighborhood of the particle p + auto Np = NNc.template getNNIterator<NO_CHECK>(p.getKey()); + + double Ep = 0.0; + + // For each neighborhood of the particle p + while (Np.isNext()) + { + // Neighborhood particle q + auto q = Np.get(); + + // if p == q skip this particle + if (q == p.getKey()) {++Np; continue;}; + + // Get position of the particle q + Point<3,double> xq = vd.getPos(q); + + // take the normalized direction + double rn = norm2(xp - xq); + + idx2.get(p.getKey()).add(q); + + // potential energy (using pow is slower) + Ep += rn; + + // Next neighborhood + ++Np; + } + + idx.get(p.getKey()).sort(); + idx2.get(p.getKey()).sort(); + + bool ret = true; + + BOOST_REQUIRE_EQUAL(idx.get(p.getKey()).size(),idx2.get(p.getKey()).size()); + + for (size_t i = 0 ; i < idx.get(p.getKey()).size() ; i++) + { + ret &= idx.get(p.getKey()).get(i) == idx2.get(p.getKey()).get(i); + } + + BOOST_REQUIRE_EQUAL(ret,true); + + BOOST_REQUIRE_CLOSE(Ep,vd.getProp<0>(p),0.01); + + // Next Particle + ++it; + } + } +} + +struct couple_contrib +{ + size_t i; + size_t j; + size_t ir; + size_t jr; + double E; + + bool operator<(couple_contrib & ct) + { + return E < ct.E; + } +}; + +BOOST_AUTO_TEST_CASE( vector_dist_sym_tot_red_computation ) +{ + if (create_vcluster().getProcessingUnits() > 12) + return; + + long int k = 4096*create_vcluster().getProcessingUnits(); + + size_t base = create_vcluster().getProcessUnitID() * 4096; + + long int big_step = k / 30; + big_step = (big_step == 0)?1:big_step; + long int small_step = 21; + + print_test( "Testing vector symmetric verlet-list k<=",k); + + openfpm::vector<couple_contrib> cct; + openfpm::vector<couple_contrib> cct2; + + // 3D test +/* for ( ; k > 8*big_step ; k-= (k > 2*big_step)?big_step:small_step ) + {*/ + double r_cut = 0.05; + + // domain + Box<3,double> box({0.0,0.0,0.0},{1.0,1.0,1.0}); + + // Boundary conditions + size_t bc[3]={PERIODIC,PERIODIC,PERIODIC}; + + // ghost, big enough to contain the interaction radius + Ghost<3,float> ghost(r_cut); + + vector_dist<3,double, aggregate<size_t> > vd(k,box,bc,ghost); + + { + auto it = vd.getDomainIterator(); + + while (it.isNext()) + { + auto p = it.get(); + + vd.getPos(p)[0] = (double)rand()/RAND_MAX; + vd.getPos(p)[1] = (double)rand()/RAND_MAX; + vd.getPos(p)[2] = (double)rand()/RAND_MAX; + + vd.getProp<0>(p) = p.getKey() + base; + + ++it; + } + } + + vd.map(); + vd.ghost_get<0>(); + + // Get the Cell list structure + auto NN = vd.getVerlet(r_cut,VL_SYMMETRIC_RED); + auto NNc = vd.getVerlet(r_cut); + + // Get an iterator over particles + auto it2 = vd.getDomainAndGhostIterator(); + + /////////// SYMMETRIC CASE VERLET-LIST REDUCTION //////// + + { + auto it2 = vd.getDomainAndGhostIterator(); + + // DEBUG + + // For each particle ... + while (it2.isNext()) + { + // ... p + auto p = it2.get(); + + if( vd.getProp<0>(p) == 2524 || vd.getProp<0>(p) == 4135 ) + { + Vcluster & v_cl = create_vcluster(); + Point<3,double> xp = vd.getPos(p); + + std::cout << "Particle " << p.getKey() << " " << xp.toString() << " " << v_cl.getProcessUnitID() << std::endl; + } + + ++it2; + } + } + + + openfpm::vector<couple_contrib> ctt; + + double tot_sym = 0.0; + size_t tot_n_sym = 0; + + // For each particle ... + while (it2.isNext()) + { + // ... p + auto p = it2.get(); + + // Get the position of the particle p + Point<3,double> xp = vd.getPos(p); + + // Get an iterator over the neighborhood of the particle p symmetric + auto NpSym = NN.template getNNIterator<NO_CHECK>(p.getKey()); + + // For each neighborhood of the particle p + while (NpSym.isNext()) + { + // Neighborhood particle q + auto q = NpSym.get(); + + // if p == q skip this particle + if (q == p.getKey()) {++NpSym; continue;}; + + // Get position of the particle q + Point<3,double> xq = vd.getPos(q); + + // take the normalized direction + double rn = norm2(xp - xq); + + ////// debug //////////// + + couple_contrib cc; + cc.i = vd.getProp<0>(p); + cc.j = vd.getProp<0>(q); + cc.ir = p.getKey(); + cc.jr = q; + cc.E = rn; + + cct.add(cc); + + ///////////////////////// + + // potential energy (using pow is slower) + tot_sym += rn; + tot_n_sym++; + + // Next neighborhood + ++NpSym; + } + + // Next Particle + ++it2; + } + + /////////////// NON SYMMETRIC CASE VERLET-LIST //////////////////////// + + double tot = 0.0; + size_t tot_n = 0; + + auto it = vd.getDomainIterator(); + + // For each particle ... + while (it.isNext()) + { + // ... p + auto p = it.get(); + + // Get the position of the particle p + Point<3,double> xp = vd.getPos(p); + + // Get an iterator over the neighborhood of the particle p + auto Np = NNc.template getNNIterator<NO_CHECK>(p.getKey()); + + // For each neighborhood of the particle p + while (Np.isNext()) + { + // Neighborhood particle q + auto q = Np.get(); + + // if p == q skip this particle + if (q == p.getKey()) {++Np; continue;}; + + // Get position of the particle q + Point<3,double> xq = vd.getPos(q); + + // take the normalized direction + double rn = norm2(xp - xq); + + // potential energy (using pow is slower) + tot += rn/2.0; + tot_n++; + + ////// debug //////////// + + couple_contrib cc; + cc.i = vd.getProp<0>(p); + cc.j = vd.getProp<0>(q); + cc.E = rn; + + cct2.add(cc); + + ///////////////////////// + + // Next neighborhood + ++Np; + } + + // Next Particle + ++it; + } + + Vcluster & v_cl = create_vcluster(); + + v_cl.sum(tot_n); + v_cl.sum(tot_n_sym); + v_cl.sum(tot); + v_cl.sum(tot_sym); + v_cl.execute(); + + openfpm::vector<couple_contrib> cc_collect; + openfpm::vector<couple_contrib> cc_collect2; + + v_cl.SGather(cct,cc_collect,0); + v_cl.SGather(cct2,cc_collect2,0); + + if (v_cl.getProcessUnitID() == 0) + { + cc_collect.sort(); + cc_collect2.sort(); + + for (size_t i = 0 ; i < cc_collect2.size() ; i++) + { + if (i % 10000 == 0) + std::cout << i << " " << cc_collect2.size() << std::endl; + + for (size_t k = i+1 ; k < i+2 && k < cc_collect2.size() ; k++) + { + if (cc_collect2.get(i).i == cc_collect2.get(k).j && cc_collect2.get(i).j == cc_collect2.get(k).i) + { + cc_collect2.remove(k); + } + } + } + + for (size_t i = 0 ; i < cc_collect.size() && i < cc_collect2.size() ; i++) + std::cout << "E1: " << cc_collect.get(i).E << " " << cc_collect.get(i).i << "(" << cc_collect.get(i).ir << ")" << " " << cc_collect.get(i).j << "(" << cc_collect.get(i).jr << ")" << " E2: " << cc_collect2.get(i).E << " " << cc_collect2.get(i).i << " " << cc_collect2.get(i).j << std::endl; + } + +// BOOST_REQUIRE_EQUAL(2*tot_n_sym,tot_n); +// BOOST_REQUIRE_CLOSE(tot,tot_sym,0.00001); + +/* }*/ +} + +#endif /* SRC_VECTOR_VECTOR_DIST_CELL_LIST_TESTS_HPP_ */ diff --git a/src/Vector/vector_dist_comm.hpp b/src/Vector/vector_dist_comm.hpp new file mode 100644 index 0000000000000000000000000000000000000000..fa8dd9c417058b187173ec5c86545f6c2b4d9895 --- /dev/null +++ b/src/Vector/vector_dist_comm.hpp @@ -0,0 +1,1217 @@ +/* + * vector_dist_comm.hpp + * + * Created on: Aug 18, 2016 + * Author: i-bird + */ + +#ifndef SRC_VECTOR_VECTOR_DIST_COMM_HPP_ +#define SRC_VECTOR_VECTOR_DIST_COMM_HPP_ + +#define V_SUB_UNIT_FACTOR 64 + +#define SKIP_LABELLING 512 + +#define NO_POSITION 1 +#define WITH_POSITION 2 + +/*! \brief This class is an helper for the communication of vector_dist + * + * \tparam dim Dimensionality of the space where the elements lives + * \tparam St type of space float, double ... + * \tparam prop properties the vector element store in OpenFPM data structure format + * \tparam Decomposition Decomposition strategy to use CartDecomposition ... + * \tparam Memory Memory pool where store the information HeapMemory ... + * + * \see vector_dist + * + */ + +template<unsigned int dim, typename St, typename prop, typename Decomposition = CartDecomposition<dim,St>, typename Memory = HeapMemory> +class vector_dist_comm +{ + //! VCluster + Vcluster & v_cl; + + //! Domain decomposition + Decomposition dec; + + //! It map the processor id with the communication request into map procedure + openfpm::vector<size_t> p_map_req; + + //! For each near processor, outgoing particle id and shift vector + openfpm::vector<openfpm::vector<size_t>> opart; + + //! For each near processor, particle shift vector + openfpm::vector<openfpm::vector<size_t>> oshift; + + //! For each adjacent processor the size of the ghost sending buffer + openfpm::vector<size_t> ghost_prc_sz; + + //! Sending buffer for the ghost particles properties + BHeapMemory g_prp_mem; + + //! Sending buffer for the ghost particles position + BHeapMemory g_pos_mem; + + //! For each adjacent processor it store the size of the receiving message in byte + openfpm::vector<size_t> recv_sz; + + //! For each adjacent processor it store the received message for ghost get + openfpm::vector<BHeapMemory> recv_mem_gg; + + //! For each processor it store the received message for global map + openfpm::vector<BHeapMemory> recv_mem_gm; + + /*! \brief It store for each processor the position and properties vector of the particles + * + * This structure is used in the map function + * + */ + struct pos_prop + { + //! position vector + openfpm::vector<Point<dim, St>, PreAllocHeapMemory<2>, typename memory_traits_lin<Point<dim, St>>::type, memory_traits_lin, openfpm::grow_policy_identity> pos; + //! properties vector + openfpm::vector<prop, PreAllocHeapMemory<2>, typename memory_traits_lin<prop>::type, memory_traits_lin, openfpm::grow_policy_identity> prp; + }; + + /*! \brief for each processor store 2 vector containing the sending buffers + * + * This structure is used in the map_list function + * + */ + template <typename sel_prop> + struct pos_prop_sel + { + //! position vector + openfpm::vector<Point<dim, St>, PreAllocHeapMemory<2>, typename memory_traits_lin<Point<dim, St>>::type, memory_traits_lin, openfpm::grow_policy_identity> pos; + //! properties vector + openfpm::vector<sel_prop, PreAllocHeapMemory<2>, typename memory_traits_lin<sel_prop>::type, memory_traits_lin, openfpm::grow_policy_identity> prp; + }; + + //! definition of the send vector for position + typedef openfpm::vector<Point<dim, St>, ExtPreAlloc<Memory>, typename memory_traits_lin<Point<dim, St>>::type, memory_traits_lin , openfpm::grow_policy_identity> send_pos_vector; + + bool is_shift_box_created = false; + + // this map is used to check if a combination is already present + std::unordered_map<size_t, size_t> map_cmb; + + // The boxes touching the border of the domain are divided in groups (first vector) + // each group contain internal ghost coming from sub-domains of the same section + openfpm::vector_std<openfpm::vector_std<Box<dim, St>>>box_f; + + // Store the sector for each group (previous vector) + openfpm::vector_std<comb<dim>> box_cmb; + + // + openfpm::vector<aggregate<size_t,size_t>> o_part_loc; + + /*! \brief For every internal ghost box we create a structure that order such internal local ghost box in + * shift vectors + * + * \param shifts vectors + * + */ + void createShiftBox() + { + if (is_shift_box_created == true) + return; + + // Add local particles coming from periodic boundary, the only boxes that count are the one + // touching the border, filter them + for (size_t i = 0; i < dec.getNLocalSub(); i++) + { + size_t Nl = dec.getLocalNIGhost(i); + + for (size_t j = 0; j < Nl; j++) + { + // If the ghost does not come from the intersection with an out of + // border sub-domain the combination is all zero and n_zero return dim + if (dec.getLocalIGhostPos(i, j).n_zero() == dim) + continue; + + // Check if we already have boxes with such combination + auto it = map_cmb.find(dec.getLocalIGhostPos(i, j).lin()); + if (it == map_cmb.end()) + { + // we do not have it + box_f.add(); + box_f.last().add(dec.getLocalIGhostBox(i, j)); + box_cmb.add(dec.getLocalIGhostPos(i, j)); + map_cmb[dec.getLocalIGhostPos(i, j).lin()] = box_f.size() - 1; + } + else + { + // we have it + box_f.get(it->second).add(dec.getLocalIGhostBox(i, j)); + } + + } + } + + is_shift_box_created = true; + } + + /*! \brief Local ghost from labeled particles + * + * + */ + void local_ghost_from_opart(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp) + { + // get the shift vectors + const openfpm::vector<Point<dim, St>> & shifts = dec.getShiftVectors(); + + for (size_t i = 0 ; i < o_part_loc.size() ; i++) + { + size_t lin_id = o_part_loc.get<1>(i); + size_t key = o_part_loc.template get<0>(i); + + Point<dim, St> p = v_pos.get(key); + // shift + p -= shifts.get(lin_id); + + // add this particle shifting its position + v_pos.add(p); + v_prp.add(); + v_prp.last() = v_prp.get(key); + } + } + + /*! \brief Local ghost from decomposition + * + * \param v_pos vector of particle positions + * \param v_prp vector of particle properties + * \g_m ghost marker + * + */ + void local_ghost_from_dec(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp, size_t g_m) + { + o_part_loc.clear(); + + // get the shift vectors + const openfpm::vector<Point<dim, St>> & shifts = dec.getShiftVectors(); + + // Label the internal (assigned) particles + auto it = v_pos.getIteratorTo(g_m); + + while (it.isNext()) + { + auto key = it.get(); + + // If particles are inside these boxes + for (size_t i = 0; i < box_f.size(); i++) + { + for (size_t j = 0; j < box_f.get(i).size(); j++) + { + if (box_f.get(i).get(j).isInside(v_pos.get(key)) == true) + { + size_t lin_id = box_cmb.get(i).lin(); + + o_part_loc.add(); + o_part_loc.template get<0>(o_part_loc.size()-1) = key; + o_part_loc.template get<1>(o_part_loc.size()-1) = lin_id; + + Point<dim, St> p = v_pos.get(key); + // shift + p -= shifts.get(lin_id); + + // add this particle shifting its position + v_pos.add(p); + v_prp.add(); + v_prp.last() = v_prp.get(key); + + // boxes in one group can be overlapping + // we do not have to search for the other + // boxes otherwise we will have duplicate particles + // + // A small note overlap of boxes across groups is fine + // (and needed) because each group has different shift + // producing non overlapping particles + // + break; + } + } + } + + ++it; + } + } + + /*! \brief Add local particles based on the boundary conditions + * + * In order to understand what this function use the following + * + \verbatim + + [1,1] + +---------+------------------------+---------+ + | (1,-1) | | (1,1) | + | | | (1,0) --> 7 | | | + | v | | v | + | 6 | | 8 | + +--------------------------------------------+ + | | | | + | | | | + | | | | + | (-1,0) | | (1,0) | + | | | | | | + | v | (0,0) --> 4 | v | + | 3 | | 5 | + | | | | + B | | | A | + * | | | * | + | | | | + | | | | + | | | | + +--------------------------------------------+ + | (-1,-1) | | (-1,1) | + | | | (-1,0) --> 1 | | | + | v | | v | + | 0 | | 2 | + +---------+------------------------+---------+ + + + \endverbatim + + * + * The box is the domain, while all boxes at the border (so not (0,0) ) are the + * ghost part at the border of the domain. If a particle A is in the position in figure + * a particle B must be created. This function duplicate the particle A, if A and B are + * local + * + * \param v_pos vector of particle of positions + * \param v_prp vector of particle properties + * \param g_m ghost marker + * \param opt options + * + */ + void add_loc_particles_bc(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp ,size_t & g_m, size_t opt) + { + // Create the shift boxes + createShiftBox(); + + if (box_f.size() == 0) + return; + else + { + if (opt & SKIP_LABELLING) + local_ghost_from_opart(v_pos,v_prp); + else + local_ghost_from_dec(v_pos,v_prp,g_m); + } + } + + /*! \brief This function fill the send buffer for the particle position after the particles has been label with labelParticles + * + * \param v_pos vector of particle positions + * \param g_pos_send Send buffer to fill + * \param prAlloc_pos Memory object for the send buffer + * + */ + void fill_send_ghost_pos_buf(openfpm::vector<Point<dim, St>> & v_pos,openfpm::vector<send_pos_vector> & g_pos_send, ExtPreAlloc<Memory> * prAlloc_pos) + { + // get the shift vectors + const openfpm::vector<Point<dim, St>> & shifts = dec.getShiftVectors(); + + // create a number of send buffers equal to the near processors + g_pos_send.resize(ghost_prc_sz.size()); + for (size_t i = 0; i < g_pos_send.size(); i++) + { + // set the preallocated memory to ensure contiguity + g_pos_send.get(i).setMemory(*prAlloc_pos); + + // resize the sending vector (No allocation is produced) + g_pos_send.get(i).resize(ghost_prc_sz.get(i)); + } + + // Fill the send buffer + for (size_t i = 0; i < opart.size(); i++) + { + for (size_t j = 0; j < opart.get(i).size(); j++) + { + Point<dim, St> s = v_pos.get(opart.get(i).get(j)); + s -= shifts.get(oshift.get(i).get(j)); + g_pos_send.get(i).set(j, s); + } + } + } + + /*! \brief This function fill the send buffer for properties after the particles has been label with labelParticles + * + * \tparam send_vector type used to send data + * \tparam prp_object object containing only the properties to send + * \tparam prp set of properties to send + * + * \param v_prp vector of particle properties + * \param g_send_prp Send buffer to fill + * \param prAlloc_prp Memory object for the send buffer + * + */ + template<typename send_vector, typename prp_object, int ... prp> void fill_send_ghost_prp_buf(openfpm::vector<prop> & v_prp, openfpm::vector<send_vector> & g_send_prp, ExtPreAlloc<Memory> * prAlloc_prp) + { + // create a number of send buffers equal to the near processors + g_send_prp.resize(ghost_prc_sz.size()); + for (size_t i = 0; i < g_send_prp.size(); i++) + { + // set the preallocated memory to ensure contiguity + g_send_prp.get(i).setMemory(*prAlloc_prp); + + // resize the sending vector (No allocation is produced) + g_send_prp.get(i).resize(ghost_prc_sz.get(i)); + } + + // Fill the send buffer + for (size_t i = 0; i < opart.size(); i++) + { + for (size_t j = 0; j < opart.get(i).size(); j++) + { + // source object type + typedef encapc<1, prop, typename openfpm::vector<prop>::layout_type> encap_src; + // destination object type + typedef encapc<1, prp_object, typename openfpm::vector<prp_object>::layout_type> encap_dst; + + // Copy only the selected properties + object_si_d<encap_src, encap_dst, OBJ_ENCAP, prp...>(v_prp.get(opart.get(i).get(j)), g_send_prp.get(i).get(j)); + } + } + } + + /*! \brief allocate and fill the send buffer for the map function + * + * \param v_pos vector of particle positions + * \param v_prp vector of particles properties + * \param prc_r List of processor rank involved in the send + * \param prc_sz_r For each processor in the list the size of the message to send + * \param pb send buffer + * + */ + void fill_send_map_buf(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp,openfpm::vector<size_t> & prc_r, openfpm::vector<size_t> & prc_sz_r, openfpm::vector<pos_prop> & pb) + { + pb.resize(prc_r.size()); + + for (size_t i = 0; i < prc_r.size(); i++) + { + // Create the size required to store the particles position and properties to communicate + size_t s1 = openfpm::vector<Point<dim, St>, HeapMemory, typename memory_traits_lin<Point<dim, St>>::type, memory_traits_lin, openfpm::grow_policy_identity>::calculateMem(prc_sz_r.get(i), 0); + size_t s2 = openfpm::vector<prop, HeapMemory, typename memory_traits_lin<prop>::type, memory_traits_lin, openfpm::grow_policy_identity>::calculateMem(prc_sz_r.get(i), 0); + + // Preallocate the memory + size_t sz[2] = { s1, s2 }; + PreAllocHeapMemory<2> * mem = new PreAllocHeapMemory<2>(sz); + + // Set the memory allocator + pb.get(i).pos.setMemory(*mem); + pb.get(i).prp.setMemory(*mem); + + // set the size and allocate, using mem warant that pos and prp is contiguous + pb.get(i).pos.resize(prc_sz_r.get(i)); + pb.get(i).prp.resize(prc_sz_r.get(i)); + } + + // Run through all the particles and fill the sending buffer + + for (size_t i = 0; i < opart.size(); i++) + { + auto it = opart.get(i).getIterator(); + size_t lbl = p_map_req.get(i); + + while (it.isNext()) + { + size_t key = it.get(); + size_t id = opart.get(i).get(key); + + pb.get(lbl).pos.set(key, v_pos.get(id)); + pb.get(lbl).prp.set(key, v_prp.get(id)); + + ++it; + } + } + } + + /*! \brief allocate and fill the send buffer for the map function + * + * \param v_pos vector of particle positions + * \param v_prp vector of particle properties + * \param prc_r List of processor rank involved in the send + * \param prc_sz_r For each processor in the list the size of the message to send + * \param pb send buffer + * + */ + template<typename prp_object,int ... prp> void fill_send_map_buf_list(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp, openfpm::vector<size_t> & prc_r, openfpm::vector<size_t> & prc_sz_r, openfpm::vector<pos_prop_sel<prp_object>> & pb) + { + pb.resize(prc_r.size()); + + for (size_t i = 0; i < prc_r.size(); i++) + { + // Create the size required to store the particles position and properties to communicate + size_t s1 = openfpm::vector<Point<dim, St>, HeapMemory, typename memory_traits_lin<Point<dim, St>>::type, memory_traits_lin, openfpm::grow_policy_identity>::calculateMem(prc_sz_r.get(i), 0); + size_t s2 = openfpm::vector<prp_object, HeapMemory, typename memory_traits_lin<prp_object>::type, memory_traits_lin, openfpm::grow_policy_identity>::calculateMem(prc_sz_r.get(i), 0); + + // Preallocate the memory + size_t sz[2] = { s1, s2 }; + PreAllocHeapMemory<2> * mem = new PreAllocHeapMemory<2>(sz); + + // Set the memory allocator + pb.get(i).pos.setMemory(*mem); + pb.get(i).prp.setMemory(*mem); + + // set the size and allocate, using mem warant that pos and prp is contiguous + pb.get(i).pos.resize(prc_sz_r.get(i)); + pb.get(i).prp.resize(prc_sz_r.get(i)); + } + + // Run through all the particles and fill the sending buffer + + for (size_t i = 0; i < opart.size(); i++) + { + auto it = opart.get(i).getIterator(); + size_t lbl = p_map_req.get(i); + + while (it.isNext()) + { + size_t key = it.get(); + size_t id = opart.get(i).get(key); + + pb.get(lbl).pos.set(key, v_pos.get(id)); + + // source object type + typedef encapc<1, prop, typename openfpm::vector<prop>::layout_type> encap_src; + // destination object type + typedef encapc<1, prp_object, typename openfpm::vector<prp_object>::layout_type> encap_dst; + + // Copy only the selected properties + object_si_d<encap_src, encap_dst, OBJ_ENCAP, prp...>(v_prp.get(id), pb.get(lbl).prp.get(key)); + + ++it; + } + } + } + + /*! \brief Label particles for mappings + * + * \param v_pos vector of particle positions + * \param lbl_p Particle labeled + * \param prc_sz For each processor the number of particles to send + * \param opart id of the particles to send + * + */ + template<typename obp> void labelParticleProcessor(openfpm::vector<Point<dim, St>> & v_pos,openfpm::vector<openfpm::vector<size_t>> & lbl_p, openfpm::vector<size_t> & prc_sz, openfpm::vector<size_t> & opart) + { + // reset lbl_p + lbl_p.resize(v_cl.getProcessingUnits()); + for (size_t i = 0; i < lbl_p.size(); i++) + lbl_p.get(i).clear(); + + // resize the label buffer + prc_sz.resize(v_cl.getProcessingUnits()); + + auto it = v_pos.getIterator(); + + // Label all the particles with the processor id where they should go + while (it.isNext()) + { + auto key = it.get(); + + // Apply the boundary conditions + dec.applyPointBC(v_pos.get(key)); + + size_t p_id = 0; + + // Check if the particle is inside the domain + if (dec.getDomain().isInside(v_pos.get(key)) == true) + p_id = dec.processorIDBC(v_pos.get(key)); + else + p_id = obp::out(key, v_cl.getProcessUnitID()); + + // Particle to move + if (p_id != v_cl.getProcessUnitID()) + { + if ((long int) p_id != -1) + { + prc_sz.get(p_id)++;lbl_p + .get(p_id).add(key); + opart.add(key); + } + else + { + opart.add(key); + } + } + + // Add processors and add size + + ++it; + } + } + + /*! \brief This function process the receiced data for the properties and populate the ghost + * + * \tparam send_vector type used to send data + * \tparam prp_object object containing only the properties to send + * \tparam prp set of properties to send + * + * \param v_prp vector of particle properties + * \param g_m ghost marker + * + */ + template<typename send_vector, typename prp_object, int ... prp> void process_received_ghost_prp(openfpm::vector<prop> & v_prp, size_t & g_m) + { + // Mark the ghost part + g_m = v_prp.size(); + + // Process the received data (recv_mem_gg != 0 if you have data) + for (size_t i = 0; i < dec.getNNProcessors() && recv_mem_gg.size() != 0; i++) + { + // calculate the number of received elements + size_t n_ele = recv_sz.get(i) / sizeof(prp_object); + + // add the received particles to the vector + PtrMemory * ptr1 = new PtrMemory(recv_mem_gg.get(i).getPointer(), recv_sz.get(i)); + + // create vector representation to a piece of memory already allocated + openfpm::vector<prp_object, PtrMemory, typename memory_traits_lin<prp_object>::type, memory_traits_lin , openfpm::grow_policy_identity> v2; + + v2.setMemory(*ptr1); + + // resize with the number of elements + v2.resize(n_ele); + + // Add the ghost particle + v_prp.template add_prp<prp_object, PtrMemory, openfpm::grow_policy_identity, prp...>(v2); + } + } + + /*! \brief This function process the received data for the properties and populate the ghost + * + * \param v_pos vector of particle positions + * + */ + void process_received_ghost_pos(openfpm::vector<Point<dim, St>> & v_pos) + { + // Process the received data (recv_mem_gg != 0 if you have data) + for (size_t i = 0; i < dec.getNNProcessors() && recv_mem_gg.size() != 0; i++) + { + // calculate the number of received elements + size_t n_ele = recv_sz.get(i) / sizeof(Point<dim, St> ); + + // add the received particles to the vector + PtrMemory * ptr1 = new PtrMemory(recv_mem_gg.get(i).getPointer(), recv_sz.get(i)); + + // create vector representation to a piece of memory already allocated + + openfpm::vector<Point<dim, St>, PtrMemory, typename memory_traits_lin<Point<dim, St>>::type, memory_traits_lin , openfpm::grow_policy_identity> v2; + + v2.setMemory(*ptr1); + + // resize with the number of elements + v2.resize(n_ele); + + // Add the ghost particle + v_pos.template add<PtrMemory, openfpm::grow_policy_identity>(v2); + } + } + + /*! \brief Process the received particles + * + * \param v_pos vector of particle positions + * \param v_prp vector of particle properties + * \param out_part list of the out-going particles + * + */ + void process_received_map(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp,openfpm::vector<size_t> & out_part) + { + size_t o_p_id = 0; + + for (size_t i = 0; i < recv_mem_gm.size(); i++) + { + // Get the number of elements + + size_t n_ele = recv_mem_gm.get(i).size() / (sizeof(Point<dim, St> ) + sizeof(prop)); + + // Pointer of the received positions for each near processor + void * ptr_pos = (unsigned char *) recv_mem_gm.get(i).getPointer(); + // Pointer of the received properties for each near processor + void * ptr_prp = (unsigned char *) recv_mem_gm.get(i).getPointer() + n_ele * sizeof(Point<dim, St> ); + + PtrMemory * ptr1 = new PtrMemory(ptr_pos, n_ele * sizeof(Point<dim, St> )); + PtrMemory * ptr2 = new PtrMemory(ptr_prp, n_ele * sizeof(prop)); + + // create vector representation to a piece of memory already allocated + + openfpm::vector<Point<dim, St>, PtrMemory, typename memory_traits_lin<Point<dim, St>>::type, memory_traits_lin ,openfpm::grow_policy_identity> vpos; + openfpm::vector<prop, PtrMemory, typename memory_traits_lin<prop>::type, memory_traits_lin ,openfpm::grow_policy_identity> vprp; + + vpos.setMemory(*ptr1); + vprp.setMemory(*ptr2); + + vpos.resize(n_ele); + vprp.resize(n_ele); + + // Add the received particles to v_pos and v_prp + + size_t j = 0; + for (; j < vpos.size() && o_p_id < out_part.size(); j++, o_p_id++) + { + v_pos.set(out_part.get(o_p_id), vpos.get(j)); + v_prp.set(out_part.get(o_p_id), vprp.get(j)); + } + + for (; j < vpos.size(); j++) + { + v_pos.add(); + v_pos.set(v_pos.size() - 1, vpos.get(j)); + v_prp.add(); + v_prp.set(v_prp.size() - 1, vprp.get(j)); + } + } + + // remove the (out-going particles) in the vector + + v_pos.remove(out_part, o_p_id); + v_prp.remove(out_part, o_p_id); + } + + /*! \brief Process the received particles + * + * \param v_pos vector of particle positions + * \param v_prp vector of particle properties + * \param out_part list of the out-going particles + * + */ + template<typename prp_object , int ... prp> void process_received_map_list(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp, openfpm::vector<size_t> & out_part) + { + size_t o_p_id = 0; + + for (size_t i = 0; i < recv_mem_gm.size(); i++) + { + // Get the number of elements + + size_t n_ele = recv_mem_gm.get(i).size() / (sizeof(Point<dim, St> ) + sizeof(prp_object)); + + // Pointer of the received positions for each near processor + void * ptr_pos = (unsigned char *) recv_mem_gm.get(i).getPointer(); + // Pointer of the received properties for each near processor + void * ptr_prp = (unsigned char *) recv_mem_gm.get(i).getPointer() + n_ele * sizeof(Point<dim, St> ); + + PtrMemory * ptr1 = new PtrMemory(ptr_pos, n_ele * sizeof(Point<dim, St> )); + PtrMemory * ptr2 = new PtrMemory(ptr_prp, n_ele * sizeof(prp_object)); + + // create vector representation to a piece of memory already allocated + + openfpm::vector<Point<dim, St>, PtrMemory, typename memory_traits_lin<Point<dim, St>>::type, memory_traits_lin ,openfpm::grow_policy_identity> vpos; + openfpm::vector<prp_object, PtrMemory, typename memory_traits_lin<prp_object>::type, memory_traits_lin ,openfpm::grow_policy_identity> vprp; + + vpos.setMemory(*ptr1); + vprp.setMemory(*ptr2); + + vpos.resize(n_ele); + vprp.resize(n_ele); + + // Add the received particles to v_pos and v_prp + + size_t j = 0; + for (; j < vpos.size() && o_p_id < out_part.size(); j++, o_p_id++) + { + v_pos.set(out_part.get(o_p_id), vpos.get(j)); + v_prp.template set_o<decltype(vprp.get(j)), prp... >(out_part.get(o_p_id), vprp.get(j)); + } + + for (; j < vpos.size(); j++) + { + v_pos.add(); + v_pos.set(v_pos.size() - 1, vpos.get(j)); + v_prp.template set_o<decltype(vprp.get(j)), prp... >(v_prp.size() - 1, vprp.get(j)); + } + } + + // remove the (out-going particles) in the vector + + v_pos.remove(out_part, o_p_id); + v_prp.remove(out_part, o_p_id); + } + + /*! \brief Calculate send buffers total size and allocation + * + * \tparam prp_object object containing only the properties to send + * + * \param v_pos vector of particle positions + * \param v_prp vector of particle properties + * \param size_byte_prp total size for the property buffer + * \param size_byte_pos total size for the position buffer + * + */ + template<typename prp_object> void calc_send_ghost_buf(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp, size_t & size_byte_prp, size_t & size_byte_pos) + { + // Calculate the total size required for the sending buffer + for (size_t i = 0; i < ghost_prc_sz.size(); i++) + { + size_t alloc_ele = openfpm::vector<prp_object, HeapMemory, typename memory_traits_lin<prp_object>::type, memory_traits_lin , openfpm::grow_policy_identity>::calculateMem(ghost_prc_sz.get(i), 0); + size_byte_prp += alloc_ele; + + alloc_ele = openfpm::vector<Point<dim, St>, HeapMemory, typename memory_traits_lin<Point<dim, St>>::type, memory_traits_lin, openfpm::grow_policy_identity>::calculateMem(ghost_prc_sz.get(i), 0); + size_byte_pos += alloc_ele; + } + } + + /*! \brief Label the particles + * + * It count the number of particle to send to each processors and save its ids + * + * \see nn_prcs::getShiftvectors() + * + * \param v_pos vector of particle positions + * \param v_prp vector of particle properties + * \param g_m ghost marker + * + */ + void labelParticlesGhost(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp, size_t & g_m) + { + // Buffer that contain the number of elements to send for each processor + ghost_prc_sz.clear(); + ghost_prc_sz.resize(dec.getNNProcessors()); + + // Buffer that contain for each processor the id of the particle to send + opart.clear(); + opart.resize(dec.getNNProcessors()); + + // Buffer that contain for each processor the id of the shift vector + oshift.clear(); + oshift.resize(dec.getNNProcessors()); + + // Iterate over all particles + auto it = v_pos.getIteratorTo(g_m); + while (it.isNext()) + { + auto key = it.get(); + + // Given a particle, it return which processor require it (first id) and shift id, second id + // For an explanation about shifts vectors please consult getShiftVector in ie_ghost + const openfpm::vector<std::pair<size_t, size_t>> & vp_id = dec.template ghost_processorID_pair<typename Decomposition::lc_processor_id, typename Decomposition::shift_id>(v_pos.get(key), UNIQUE); + + for (size_t i = 0; i < vp_id.size(); i++) + { + // processor id + size_t p_id = vp_id.get(i).first; + + // add particle to communicate + ghost_prc_sz.get(p_id)++; + opart.get(p_id).add(key); + oshift.get(p_id).add(vp_id.get(i).second); + } + + ++it; + } + } + + /*! \brief Call-back to allocate buffer to receive incoming elements (particles) + * + * \param msg_i message size required to receive from i + * \param total_msg message size to receive from all the processors + * \param total_p the total number of processor want to communicate with you + * \param i processor id + * \param ri request id (it is an id that goes from 0 to total_p, and is unique + * every time message_alloc is called) + * \param ptr void pointer parameter for additional data to pass to the call-back + * + */ + static void * msg_alloc_ghost_get(size_t msg_i, size_t total_msg, size_t total_p, size_t i, size_t ri, void * ptr) + { + vector_dist_comm<dim, St, prop, Decomposition, Memory> * v = static_cast<vector_dist_comm<dim, St, prop, Decomposition, Memory> *>(ptr); + + v->recv_sz.resize(v->dec.getNNProcessors()); + v->recv_mem_gg.resize(v->dec.getNNProcessors()); + + // Get the local processor id + size_t lc_id = v->dec.ProctoID(i); + + // resize the receive buffer + v->recv_mem_gg.get(lc_id).resize(msg_i); + v->recv_sz.get(lc_id) = msg_i; + + return v->recv_mem_gg.get(lc_id).getPointer(); + } + + + /*! \brief Call-back to allocate buffer to receive incoming elements (particles) + * + * \param msg_i size required to receive the message from i + * \param total_msg total size to receive from all the processors + * \param total_p the total number of processor that want to communicate with you + * \param i processor id + * \param ri request id (it is an id that goes from 0 to total_p, and is unique + * every time message_alloc is called) + * \param ptr a pointer to the vector_dist structure + * + * \return the pointer where to store the message for the processor i + * + */ + static void * message_alloc_map(size_t msg_i, size_t total_msg, size_t total_p, size_t i, size_t ri, void * ptr) + { + // cast the pointer + vector_dist_comm<dim, St, prop, Decomposition, Memory> * vd = static_cast<vector_dist_comm<dim, St, prop, Decomposition, Memory> *>(ptr); + + vd->recv_mem_gm.resize(vd->v_cl.getProcessingUnits()); + vd->recv_mem_gm.get(i).resize(msg_i); + + return vd->recv_mem_gm.get(i).getPointer(); + } + +public: + + /*! \brief Constructor + * + * \param dec Domain decompositon + * + */ + vector_dist_comm(const Decomposition & dec) + :v_cl(create_vcluster()),dec(dec) + { + + } + + /*! \brief Constructor + * + * \param dec Domain decompositon + * + */ + vector_dist_comm(Decomposition && dec) + :v_cl(create_vcluster),dec(dec) + { + + } + + /*! \brief Constructor + * + */ + vector_dist_comm() + :v_cl(create_vcluster()),dec(create_vcluster()) + { + } + + /*! \brief Get the number of minimum sub-domain + * + * \return minimum number + * + */ + static size_t getDefaultNsubsub() + { + return V_SUB_UNIT_FACTOR; + } + + /*! \brief Initialize the decomposition + * + * \param box domain + * \param bc boundary conditions + * \param g ghost extension + * + */ + void init_decomposition(Box<dim,St> & box, const size_t (& bc)[dim],const Ghost<dim,St> & g) + { + // Create a valid decomposition of the space + // Get the number of processor and calculate the number of sub-domain + // for decomposition + size_t n_proc = v_cl.getProcessingUnits(); + size_t n_sub = n_proc * getDefaultNsubsub(); + + // Calculate the maximum number (before merging) of sub-domain on + // each dimension + size_t div[dim]; + for (size_t i = 0; i < dim; i++) + { + div[i] = openfpm::math::round_big_2(pow(n_sub, 1.0 / dim)); + } + + // Create the sub-domains + dec.setParameters(div, box, bc, g); + dec.decompose(); + } + + /*! \brief It synchronize the properties and position of the ghost particles + * + * \tparam prp list of properties to get synchronize + * + * \param opt options WITH_POSITION, it send also the positional information of the particles + * \param v_pos vector of position to update + * \param v_prp vector of properties to update + * \param g_m marker between real and ghost particles + * + */ + template<int ... prp> inline void ghost_get_(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp, size_t & g_m, size_t opt = WITH_POSITION) + { + // Unload receive buffer + for (size_t i = 0 ; i < recv_mem_gg.size() ; i++) + recv_sz.get(i) = 0; + + // Sending property object + typedef object<typename object_creator<typename prop::type, prp...>::type> prp_object; + + // send vector for each processor + typedef openfpm::vector<prp_object, ExtPreAlloc<Memory>, typename memory_traits_lin<prp_object>::type, memory_traits_lin, openfpm::grow_policy_identity> send_vector; + + // reset the ghost part + v_pos.resize(g_m); + v_prp.resize(g_m); + + // Label all the particles + if ((opt & SKIP_LABELLING) == false) + labelParticlesGhost(v_pos,v_prp,g_m); + + // Calculate memory and allocation for the send buffers + + // Total size + size_t size_byte_prp = 0; + size_t size_byte_pos = 0; + + calc_send_ghost_buf<prp_object>(v_pos,v_prp,size_byte_prp, size_byte_pos); + + // Create memory for the send buffer + + g_prp_mem.resize(size_byte_prp); + if (opt != NO_POSITION) + g_pos_mem.resize(size_byte_pos); + + // Create and fill send buffer for particle properties + + ExtPreAlloc<Memory> * prAlloc_prp = new ExtPreAlloc<Memory>(size_byte_prp, g_prp_mem); + + openfpm::vector<send_vector> g_send_prp; + fill_send_ghost_prp_buf<send_vector, prp_object, prp...>(v_prp,g_send_prp, prAlloc_prp); + + // Create and fill the send buffer for the particle position + + ExtPreAlloc<Memory> * prAlloc_pos; + openfpm::vector<send_pos_vector> g_pos_send; + if (opt != NO_POSITION) + { + prAlloc_pos = new ExtPreAlloc<Memory>(size_byte_pos, g_pos_mem); + fill_send_ghost_pos_buf(v_pos,g_pos_send, prAlloc_pos); + } + + // Create processor list + openfpm::vector<size_t> prc; + for (size_t i = 0; i < opart.size(); i++) + prc.add(dec.IDtoProc(i)); + + // Send/receive the particle properties information + v_cl.sendrecvMultipleMessagesNBX(prc, g_send_prp, msg_alloc_ghost_get, this); + process_received_ghost_prp<send_vector, prp_object, prp...>(v_prp,g_m); + + if (opt != NO_POSITION) + { + // Send/receive the particle properties information + v_cl.sendrecvMultipleMessagesNBX(prc, g_pos_send, msg_alloc_ghost_get, this); + process_received_ghost_pos(v_pos); + } + + add_loc_particles_bc(v_pos,v_prp,g_m,opt); + } + + + /*! \brief It move all the particles that does not belong to the local processor to the respective processor + * + * \tparam out of bound policy it specify what to do when the particles are detected out of bound + * + * In general this function is called after moving the particles to move the + * elements out the local processor. Or just after initialization if each processor + * contain non local particles + * + * \tparam prp properties to communicate + * + * \param v_pos vector of particle positions + * \param v_prp vector of particle properties + * \param g_m ghost marker + * + */ + template<unsigned int ... prp> void map_list_(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp, size_t & g_m) + { + typedef KillParticle obp; + + // outgoing particles-id + openfpm::vector<size_t> out_part; + + // Processor communication size + openfpm::vector<size_t> prc_sz(v_cl.getProcessingUnits()); + + // It contain the list of the processors this processor should to communicate with + openfpm::vector<size_t> p_list; + + // map completely reset the ghost part + v_pos.resize(g_m); + v_prp.resize(g_m); + + // Contain the processor id of each particle (basically where they have to go) + labelParticleProcessor<obp>(v_pos,opart, prc_sz, out_part); + + // Calculate the sending buffer size for each processor, put this information in + // a contiguous buffer + p_map_req.resize(v_cl.getProcessingUnits()); + openfpm::vector<size_t> prc_sz_r; + openfpm::vector<size_t> prc_r; + + for (size_t i = 0; i < v_cl.getProcessingUnits(); i++) + { + if (prc_sz.get(i) != 0) + { + p_map_req.get(i) = prc_r.size(); + prc_r.add(i); + prc_sz_r.add(prc_sz.get(i)); + } + } + + // Sending property object + typedef object<typename object_creator<typename prop::type, prp...>::type> prp_object; + + // Allocate the send buffers + + openfpm::vector<pos_prop_sel<prp_object>> pb; + + // fill the send buffers + fill_send_map_buf_list<prp_object,prp...>(v_pos,v_prp,prc_r, prc_sz_r, pb); + + // Create the set of pointers + openfpm::vector<void *> ptr(prc_r.size()); + for (size_t i = 0; i < prc_r.size(); i++) + { + ptr.get(i) = pb.get(i).pos.getPointer(); + } + + // convert the particle number to buffer size + for (size_t i = 0; i < prc_sz_r.size(); i++) + { + prc_sz_r.get(i) = prc_sz_r.get(i) * (sizeof(prp_object) + sizeof(Point<dim, St> )); + } + + // Send and receive the particles + + recv_mem_gm.clear(); + v_cl.sendrecvMultipleMessagesNBX(prc_sz_r.size(), (size_t *) prc_sz_r.getPointer(), (size_t *) prc_r.getPointer(), (void **) ptr.getPointer(), vector_dist_comm::message_alloc_map, this, NEED_ALL_SIZE); + + // Process the incoming particles + + process_received_map_list<prp_object, prp...>(v_pos,v_prp,out_part); + + // mark the ghost part + + g_m = v_pos.size(); + } + + /*! \brief It move all the particles that does not belong to the local processor to the respective processor + * + * \tparam out of bound policy it specify what to do when the particles are detected out of bound + * + * In general this function is called after moving the particles to move the + * elements out the local processor. Or just after initialization if each processor + * contain non local particles + * + * \param v_pos vector of particle positions + * \param v_prp vector of particle properties + * \param g_m ghost marker + * + */ + template<typename obp = KillParticle> void map_(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp, size_t & g_m) + { + // outgoing particles-id + openfpm::vector<size_t> out_part; + + // Processor communication size + openfpm::vector<size_t> prc_sz(v_cl.getProcessingUnits()); + + // It contain the list of the processors this processor should to communicate with + openfpm::vector<size_t> p_list; + + // map completely reset the ghost part + v_pos.resize(g_m); + v_prp.resize(g_m); + + // Contain the processor id of each particle (basically where they have to go) + labelParticleProcessor<obp>(v_pos,opart, prc_sz, out_part); + + // Calculate the sending buffer size for each processor, put this information in + // a contiguous buffer + p_map_req.resize(v_cl.getProcessingUnits()); + openfpm::vector<size_t> prc_sz_r; + openfpm::vector<size_t> prc_r; + + for (size_t i = 0; i < v_cl.getProcessingUnits(); i++) + { + if (prc_sz.get(i) != 0) + { + p_map_req.get(i) = prc_r.size(); + prc_r.add(i); + prc_sz_r.add(prc_sz.get(i)); + } + } + + // Allocate the send buffers + + openfpm::vector<pos_prop> pb; + + // fill the send buffers + fill_send_map_buf(v_pos,v_prp,prc_r, prc_sz_r, pb); + + // Create the set of pointers + openfpm::vector<void *> ptr(prc_r.size()); + for (size_t i = 0; i < prc_r.size(); i++) + { + ptr.get(i) = pb.get(i).pos.getPointer(); + } + + // convert the particle number to buffer size + for (size_t i = 0; i < prc_sz_r.size(); i++) + { + prc_sz_r.get(i) = prc_sz_r.get(i) * (sizeof(prop) + sizeof(Point<dim, St> )); + } + + // Send and receive the particles + + recv_mem_gm.clear(); + v_cl.sendrecvMultipleMessagesNBX(prc_sz_r.size(), (size_t *) prc_sz_r.getPointer(), (size_t *) prc_r.getPointer(), (void **) ptr.getPointer(), vector_dist_comm::message_alloc_map, this, NEED_ALL_SIZE); + + // Process the incoming particles + + process_received_map(v_pos,v_prp,out_part); + + // mark the ghost part + + g_m = v_pos.size(); + } + + /*! \brief Get the decomposition + * + * \return + * + */ + inline Decomposition & getDecomposition() + { + return dec; + } + + /*! \brief Copy a vector + * + * \param vc vector to copy + * + */ + vector_dist_comm<dim,St,prop,Decomposition,Memory> & operator=(const vector_dist_comm<dim,St,prop,Decomposition,Memory> & vc) + { + dec = vc.dec; + + return *this; + } + + /*! \brief Copy a vector + * + * \param vc vector to copy + * + */ + vector_dist_comm<dim,St,prop,Decomposition,Memory> & operator=(vector_dist_comm<dim,St,prop,Decomposition,Memory> && vc) + { + dec.swap(vc.dec); + + return *this; + } +}; + + +#endif /* SRC_VECTOR_VECTOR_DIST_COMM_HPP_ */ diff --git a/src/Vector/vector_dist_unit_test.hpp b/src/Vector/vector_dist_unit_test.hpp index 6c300d2643b599a2b4d47dedccf9136e41b549a6..f3bb94365c6d065ea84ed48e2c395767c3975b98 100644 --- a/src/Vector/vector_dist_unit_test.hpp +++ b/src/Vector/vector_dist_unit_test.hpp @@ -1389,7 +1389,192 @@ BOOST_AUTO_TEST_CASE( vector_dist_periodic_map_list ) } } + +BOOST_AUTO_TEST_CASE( vector_dist_ghost_with_ghost_buffering ) +{ + Vcluster & v_cl = create_vcluster(); + + if (v_cl.getProcessingUnits() > 3) + return; + + // set the seed + // create the random generator engine + std::srand(v_cl.getProcessUnitID()); + std::default_random_engine eg; + std::uniform_real_distribution<float> ud(0.0f, 1.0f); + + long int k = 524288 * v_cl.getProcessingUnits(); + + long int big_step = k / 4; + big_step = (big_step == 0)?1:big_step; + + print_test("Testing 3D periodic vector with ghost buffering k=",k); + BOOST_TEST_CHECKPOINT( "Testing 3D periodic with ghost buffering k=" << k ); + + Box<3,float> box({0.0,0.0,0.0},{1.0,1.0,1.0}); + + // Boundary conditions + size_t bc[3]={PERIODIC,PERIODIC,PERIODIC}; + + // ghost + Ghost<3,float> ghost(0.1); + + typedef aggregate<float> part_prop; + + // Distributed vector + vector_dist<3,float, part_prop > vd(k,box,bc,ghost); + + auto it = vd.getIterator(); + + while (it.isNext()) + { + auto key = it.get(); + + vd.getPos(key)[0] = ud(eg); + vd.getPos(key)[1] = ud(eg); + vd.getPos(key)[2] = ud(eg); + + // Fill some properties randomly + + vd.getProp<0>(key) = 0.0; + + ++it; + } + + vd.map(); + + // sync the ghost + vd.ghost_get<0>(); + + openfpm::vector<size_t> list_idx; + openfpm::vector<size_t> list_idx2; + + auto it3 = vd.getGhostIterator(); + while (it3.isNext()) + { + auto key = it3.get(); + + list_idx.add(key.getKey()); + + ++it3; + } + + list_idx.sort(); + + for (size_t i = 0 ; i < 10 ; i++) + { + auto it = vd.getDomainIterator(); + + while (it.isNext()) + { + auto key = it.get(); + + vd.getPos(key)[0] = ud(eg); + vd.getPos(key)[1] = ud(eg); + vd.getPos(key)[2] = ud(eg); + + // Fill some properties randomly + + vd.getProp<0>(key) = i; + + ++it; + } + + vd.ghost_get<0>(SKIP_LABELLING); + + list_idx2.clear(); + auto it2 = vd.getGhostIterator(); + bool ret = true; + + while (it2.isNext()) + { + auto key = it2.get(); + + list_idx2.add(key.getKey()); + ret &= vd.getProp<0>(key) == i; + + ++it2; + } + + BOOST_REQUIRE_EQUAL(ret,true); + BOOST_REQUIRE_EQUAL(list_idx.size(),list_idx2.size()); + + list_idx2.sort(); + + ret = true; + for (size_t i = 0 ; i < list_idx.size() ; i++) + ret &= list_idx.get(i) == list_idx2.get(i); + + BOOST_REQUIRE_EQUAL(ret,true); + } + + vd.map(); + vd.ghost_get<0>(); + + list_idx.clear(); + + auto it4 = vd.getGhostIterator(); + while (it4.isNext()) + { + auto key = it4.get(); + + list_idx.add(key.getKey()); + + ++it4; + } + + list_idx.sort(); + + for (size_t i = 0 ; i < 10 ; i++) + { + auto it = vd.getDomainIterator(); + + while (it.isNext()) + { + auto key = it.get(); + + vd.getPos(key)[0] = ud(eg); + vd.getPos(key)[1] = ud(eg); + vd.getPos(key)[2] = ud(eg); + + // Fill some properties randomly + + vd.getProp<0>(key) = i; + + ++it; + } + + vd.ghost_get<0>(SKIP_LABELLING); + + list_idx2.clear(); + auto it2 = vd.getGhostIterator(); + bool ret = true; + + while (it2.isNext()) + { + auto key = it2.get(); + + list_idx2.add(key.getKey()); + ret &= vd.getProp<0>(key) == i; + + ++it2; + } + + BOOST_REQUIRE_EQUAL(ret,true); + BOOST_REQUIRE_EQUAL(list_idx.size(),list_idx2.size()); + + list_idx2.sort(); + + ret = true; + for (size_t i = 0 ; i < list_idx.size() ; i++) + ret &= list_idx.get(i) == list_idx2.get(i); + + BOOST_REQUIRE_EQUAL(ret,true); + } +} + #include "vector_dist_cell_list_tests.hpp" +#include "vector_dist_NN_tests.hpp" BOOST_AUTO_TEST_SUITE_END() diff --git a/src/pdata_performance.hpp b/src/pdata_performance.hpp new file mode 100644 index 0000000000000000000000000000000000000000..d8c74c90fbee1027354f5058672cef96d41d2a5d --- /dev/null +++ b/src/pdata_performance.hpp @@ -0,0 +1,20 @@ +/* + * performance.hpp + * + * Created on: Mar 9, 2016 + * Author: yaroslav + */ + +#ifndef SRC_PDATA_PERFORMANCE_HPP_ +#define SRC_PDATA_PERFORMANCE_HPP_ + +BOOST_AUTO_TEST_SUITE( performance ) + +#include "Vector/vector_dist_verlet_performance_tests.hpp" +#include "Vector/vector_dist_cl_performance_tests.hpp" +#include "Vector/vector_dist_cl_hilb_performance_tests.hpp" + + +BOOST_AUTO_TEST_SUITE_END() + +#endif /* SRC_PDATA_PERFORMANCE_HPP_ */