From 0c66b3b5a82764046b40050c988efdecc3c94d08 Mon Sep 17 00:00:00 2001 From: Pietro Incardona <incardon@mpi-cbg.de> Date: Wed, 8 Feb 2017 10:23:21 +0100 Subject: [PATCH] Several fixes for DLB --- example/Vector/7_SPH_dlb/main.cpp | 344 +++++++++++------- src/Decomposition/CartDecomposition.hpp | 10 +- .../Domain_NN_calculator_cart.hpp | 36 +- src/Vector/vector_dist.hpp | 148 +++++++- 4 files changed, 376 insertions(+), 162 deletions(-) diff --git a/example/Vector/7_SPH_dlb/main.cpp b/example/Vector/7_SPH_dlb/main.cpp index 83c5a0a0..c7ada2e6 100644 --- a/example/Vector/7_SPH_dlb/main.cpp +++ b/example/Vector/7_SPH_dlb/main.cpp @@ -12,7 +12,11 @@ * Load balancing and Dynamic load balancing we indicate the possibility of the system to re-adapt the domain * decomposition to keep all the processor load and reduce idle time. * - * ## inclusion ## {#e0_v_inclusion} + * \htmlonly + * <img src="http://ppmcore.mpi-cbg.de/web/images/examples/7_SPH_dlb/dam_break_all.jpg"/> + * \endhtmlonly + * + * ## Inclusion ## {#e7_sph_inclusion} * * In order to use distributed vectors in our code we have to include the file Vector/vector_dist.hpp * we also include DrawParticles that has nice utilities to draw particles in parallel accordingly @@ -34,7 +38,7 @@ /*! * \page Vector_7_sph_dlb Vector 7 SPH Dam break simulation with Dynamic load balacing * - * ## Parameters {#e7_sph_parameters} + * ## SPH simulation {#e7_sph_parameters} * * The SPH formulation used in this example code follow these equations * @@ -59,10 +63,15 @@ * \f$ H = \sqrt{3 \cdot dp} \tag{7} \f$ * * Explain the equations is out of the context of this tutorial. An introduction - * can be found in the original Monghagan SPH paper. In this example we use the version + * can be found regarding SPH in general in the original Monghagan SPH paper. + * In this example we use the sligtly modified version * used by Dual-SPH (http://www.dual.sphysics.org/). A summary of the equation and constants can be founded in * their User Manual and the XML user Manual. - * In the following we define all the constants required by the simulation + * + * ### Parameters {#e7_sph_parameters} + * + * Based on the equation + * reported before several constants must be defined. * * \snippet Vector/7_SPH_dlb/main.cpp sim parameters * @@ -79,10 +88,10 @@ // initial spacing between particles dp in the formulas const double dp = 0.0085; // Maximum height of the fluid water -// is coing to be calculated and filled later on +// is going to be calculated and filled later on double h_swl = 0.0; -// in the formulas indicated with c_s (constant used to calculate the sound speed) +// c_s in the formulas (constant used to calculate the sound speed) const double coeff_sound = 20.0; // gamma in the formulas @@ -94,8 +103,10 @@ const double H = 0.0147224318643; // Eta in the formulas const double Eta2 = 0.01 * H*H; - +// alpha in the formula const double visco = 0.1; + +// cbar in the formula (calculated later) double cbar = 0.0; // Mass of the fluid particles @@ -157,6 +168,8 @@ const int velocity = 6; // velocity at previous step const int velocity_prev = 7; +/*! \cond [sim parameters] \endcond */ + /*! \cond [vector_dist_def] \endcond */ // Type of the vector containing particles @@ -175,9 +188,9 @@ struct ModelCustom template<typename Decomposition, typename vector> inline void addComputation(Decomposition & dec, const vector & vd, size_t v, size_t p) { if (vd.template getProp<type>(p) == FLUID) - dec.addComputationCost(v,3); + dec.addComputationCost(v,4); else - dec.addComputationCost(v,2); + dec.addComputationCost(v,3); } template<typename Decomposition> inline void applyModel(Decomposition & dec, size_t v) @@ -191,7 +204,7 @@ struct ModelCustom /*! * \page Vector_7_sph_dlb Vector 7 SPH Dam break simulation with Dynamic load balacing * - * ## Equation of state and SPH Kernels {#e7_sph_equation_state} + * ### Equation of state {#e7_sph_equation_state} * * This function implement the formula 3 in the set of equations. It calculate the * pressure of each particle based on the local density of each particle. @@ -225,7 +238,9 @@ inline void EqState(particles & vd) /*! * \page Vector_7_sph_dlb Vector 7 SPH Dam break simulation with Dynamic load balancing * - * This function define the Cubic kernel or \f$ W_{ab} \f$ in the set of equations. The cubic kernel is + * ### Cubic SPH kernel and derivatives {#e7_sph_kernel} + * + * This function define the Cubic kernel or \f$ W_{ab} \f$. The cubic kernel is * defined as * * \f$ \begin{cases} 1.0 - \frac{3}{2} q^2 + \frac{3}{4} q^3 & 0 < q < 1 \\ (2 - q)^3 & 1 < q < 2 \\ 0 & q > 2 \end{cases} \f$ @@ -255,7 +270,7 @@ inline double Wab(double r) /*! * \page Vector_7_sph_dlb Vector 7 SPH Dam break simulation with Dynamic load balancing * - * This function define the derivative of the Cubic kernel function \f$ W_{ab} \f$ in the set of equations. + * This function define the gradient of the Cubic kernel function \f$ W_{ab} \f$. * * \f$ \nabla W_{ab} = \beta (x,y,z) \f$ * @@ -309,6 +324,8 @@ inline void DWab(Point<3,double> & dx, Point<3,double> & DW, double r, bool prin /*! * \page Vector_7_sph_dlb Vector 7 SPH Dam break simulation with Dynamic load balancing * + * ### Tensile correction {#e7_sph_tensile} + * * This function define the Tensile term. An explanation of the Tensile term is out of the * context of this tutorial, but in brief is an additional repulsive term that avoid the particles * to get too near. Can be considered at small scale like a repulsive force that avoid @@ -359,7 +376,9 @@ inline double Tensile(double r, double rhoa, double rhob, double prs1, double pr * * \page Vector_7_sph_dlb Vector 7 SPH Dam break simulation with Dynamic load balancing * - * This function is the implementation of the viscous term \f$ \Pi_{ab} \f$ + * ### Viscous term {#e7_sph_viscous} + * + * This function implement the viscous term \f$ \Pi_{ab} \f$ * * \snippet Vector/7_SPH_dlb/main.cpp viscous_term * @@ -392,7 +411,7 @@ inline double Pi(const Point<3,double> & dr, double rr2, Point<3,double> & dv, d * * \page Vector_7_sph_dlb Vector 7 SPH Dam break simulation with Dynamic load balancing * - * ## Force calculation {#e7_force_calc} + * ### Force calculation {#e7_force_calc} * * Calculate forces. It calculate equation 1 and 2 in the set of formulas * @@ -555,7 +574,10 @@ template<typename CellList> inline double calc_forces(particles & vd, CellList & * * \page Vector_7_sph_dlb Vector 7 SPH Dam break simulation with Dynamic load balancing * + * ### Integration and dynamic time integration {#e7_delta_time_t} + * * This function calculate the Maximum acceleration and velocity across the particles. + * It is used to calculate a dynamic time-stepping. * * \snippet Vector/7_SPH_dlb/main.cpp max_acc_vel * @@ -589,6 +611,11 @@ void max_acceleration_and_velocity(particles & vd, double & max_acc, double & ma } max_acc = sqrt(max_acc); max_vel = sqrt(max_vel); + + Vcluster & v_cl = create_vcluster(); + v_cl.max(max_acc); + v_cl.max(max_vel); + v_cl.execute(); } /*! \cond [max_acc_vel] \endcond */ @@ -658,7 +685,7 @@ double calc_deltaT(particles & vd, double ViscDtMax) * * \f$ \rho_a^{n+1} = \rho_a^n + \delta t D_a^n \f$ * - * More the integration this function also check that no particles go outside the simulation + * This function also check that no particles go outside the simulation * domain or their density go dangerously out of range. If a particle go out of range is removed * from the simulation * @@ -673,39 +700,34 @@ openfpm::vector<size_t> to_remove; size_t cnt = 0; -void verlet_int(particles & vd, double dt, bool VerletStep) +void verlet_int(particles & vd, double dt) { + // list of the particle to remove to_remove.clear(); - // Calculate the maximum acceleration + // particle iterator auto part = vd.getDomainIterator(); double dt205 = dt*dt*0.5; double dt2 = dt*2.0; + // For each particle ... while (part.isNext()) { + // ... a auto a = part.get(); + // if the particle is boundary if (vd.template getProp<type>(a) == BOUNDARY) { + // Update rho double rhop = vd.template getProp<rho>(a); // Update only the density - if (VerletStep == true) - { - vd.template getProp<velocity>(a)[0] = 0.0; - vd.template getProp<velocity>(a)[1] = 0.0; - vd.template getProp<velocity>(a)[2] = 0.0; - vd.template getProp<rho>(a) = vd.template getProp<rho_prev>(a) + dt2*vd.template getProp<drho>(a); - } - else - { - vd.template getProp<velocity>(a)[0] = 0.0; - vd.template getProp<velocity>(a)[1] = 0.0; - vd.template getProp<velocity>(a)[2] = 0.0; - vd.template getProp<rho>(a) = vd.template getProp<rho>(a) + dt*vd.template getProp<drho>(a); - } + vd.template getProp<velocity>(a)[0] = 0.0; + vd.template getProp<velocity>(a)[1] = 0.0; + vd.template getProp<velocity>(a)[2] = 0.0; + vd.template getProp<rho>(a) = vd.template getProp<rho_prev>(a) + dt2*vd.template getProp<drho>(a); vd.template getProp<rho_prev>(a) = rhop; @@ -727,23 +749,89 @@ void verlet_int(particles & vd, double dt, bool VerletStep) double velZ = vd.template getProp<velocity>(a)[2]; double rhop = vd.template getProp<rho>(a); - if (VerletStep == true) + vd.template getProp<velocity>(a)[0] = vd.template getProp<velocity_prev>(a)[0] + vd.template getProp<force>(a)[0]*dt2; + vd.template getProp<velocity>(a)[1] = vd.template getProp<velocity_prev>(a)[1] + vd.template getProp<force>(a)[1]*dt2; + vd.template getProp<velocity>(a)[2] = vd.template getProp<velocity_prev>(a)[2] + vd.template getProp<force>(a)[2]*dt2; + vd.template getProp<rho>(a) = vd.template getProp<rho_prev>(a) + dt2*vd.template getProp<drho>(a); + + // Check if the particle go out of range in space and in density + if (vd.getPos(a)[0] < 0.000263878 || vd.getPos(a)[1] < 0.000263878 || vd.getPos(a)[2] < 0.000263878 || + vd.getPos(a)[0] > 0.000263878+1.59947 || vd.getPos(a)[1] > 0.000263878+0.672972 || vd.getPos(a)[2] > 0.000263878+0.903944 || + vd.template getProp<rho>(a) < RhoMin || vd.template getProp<rho>(a) > RhoMax) { - vd.template getProp<velocity>(a)[0] = vd.template getProp<velocity_prev>(a)[0] + vd.template getProp<force>(a)[0]*dt2; - vd.template getProp<velocity>(a)[1] = vd.template getProp<velocity_prev>(a)[1] + vd.template getProp<force>(a)[1]*dt2; - vd.template getProp<velocity>(a)[2] = vd.template getProp<velocity_prev>(a)[2] + vd.template getProp<force>(a)[2]*dt2; - vd.template getProp<rho>(a) = vd.template getProp<rho_prev>(a) + dt2*vd.template getProp<drho>(a); + to_remove.add(a.getKey()); } - else - { - vd.template getProp<velocity>(a)[0] = vd.template getProp<velocity>(a)[0] + vd.template getProp<force>(a)[0]*dt; - vd.template getProp<velocity>(a)[1] = vd.template getProp<velocity>(a)[1] + vd.template getProp<force>(a)[1]*dt; - vd.template getProp<velocity>(a)[2] = vd.template getProp<velocity>(a)[2] + vd.template getProp<force>(a)[2]*dt; + + vd.template getProp<velocity_prev>(a)[0] = velX; + vd.template getProp<velocity_prev>(a)[1] = velY; + vd.template getProp<velocity_prev>(a)[2] = velZ; + vd.template getProp<rho_prev>(a) = rhop; + + ++part; + } + + // remove the particles + vd.remove(to_remove,0); + + // increment the iteration counter + cnt++; +} + +void euler_int(particles & vd, double dt) +{ + // list of the particle to remove + to_remove.clear(); + + // particle iterator + auto part = vd.getDomainIterator(); + + double dt205 = dt*dt*0.5; + double dt2 = dt*2.0; + + // For each particle ... + while (part.isNext()) + { + // ... a + auto a = part.get(); + + // if the particle is boundary + if (vd.template getProp<type>(a) == BOUNDARY) + { + // Update rho + double rhop = vd.template getProp<rho>(a); + + // Update only the density + vd.template getProp<velocity>(a)[0] = 0.0; + vd.template getProp<velocity>(a)[1] = 0.0; + vd.template getProp<velocity>(a)[2] = 0.0; vd.template getProp<rho>(a) = vd.template getProp<rho>(a) + dt*vd.template getProp<drho>(a); - } - // Check if there are particles to remove + vd.template getProp<rho_prev>(a) = rhop; + + ++part; + continue; + } + //-Calculate displacement and update position / Calcula desplazamiento y actualiza posicion. + double dx = vd.template getProp<velocity>(a)[0]*dt + vd.template getProp<force>(a)[0]*dt205; + double dy = vd.template getProp<velocity>(a)[1]*dt + vd.template getProp<force>(a)[1]*dt205; + double dz = vd.template getProp<velocity>(a)[2]*dt + vd.template getProp<force>(a)[2]*dt205; + + vd.getPos(a)[0] += dx; + vd.getPos(a)[1] += dy; + vd.getPos(a)[2] += dz; + + double velX = vd.template getProp<velocity>(a)[0]; + double velY = vd.template getProp<velocity>(a)[1]; + double velZ = vd.template getProp<velocity>(a)[2]; + double rhop = vd.template getProp<rho>(a); + + vd.template getProp<velocity>(a)[0] = vd.template getProp<velocity>(a)[0] + vd.template getProp<force>(a)[0]*dt; + vd.template getProp<velocity>(a)[1] = vd.template getProp<velocity>(a)[1] + vd.template getProp<force>(a)[1]*dt; + vd.template getProp<velocity>(a)[2] = vd.template getProp<velocity>(a)[2] + vd.template getProp<force>(a)[2]*dt; + vd.template getProp<rho>(a) = vd.template getProp<rho>(a) + dt*vd.template getProp<drho>(a); + + // Check if the particle go out of range in space and in density if (vd.getPos(a)[0] < 0.000263878 || vd.getPos(a)[1] < 0.000263878 || vd.getPos(a)[2] < 0.000263878 || vd.getPos(a)[0] > 0.000263878+1.59947 || vd.getPos(a)[1] > 0.000263878+0.672972 || vd.getPos(a)[2] > 0.000263878+0.903944 || vd.template getProp<rho>(a) < RhoMin || vd.template getProp<rho>(a) > RhoMax) @@ -759,8 +847,10 @@ void verlet_int(particles & vd, double dt, bool VerletStep) ++part; } + // remove the particles vd.remove(to_remove,0); + // increment the iteration counter cnt++; } @@ -772,7 +862,7 @@ int main(int argc, char* argv[]) * * \page Vector_7_sph_dlb Vector 7 SPH Dam break simulation with Dynamic load balancing * - * ## Main function ## + * ## Main function {#e7_sph_main} * * Here we Initialize the library, we create a Box that define our domain, boundary conditions and ghost * @@ -805,11 +895,12 @@ int main(int argc, char* argv[]) /*! * \page Vector_7_sph_dlb Vector 7 SPH Dam break simulation with Dynamic load balancing * - * ## %Vector create ## + * ### Vector create {#e7_sph_vcreate} * * Here we define a distributed vector in 3D, we use the particles type that we defined previously. - * Each particle contain the following properties const size_t type = 0; - * * **rho** Density of the particle; + * Each particle contain the following properties + * * **type** Type of the particle + * * **rho** Density of the particle * * **rho_prev** Density at previous timestep * * **Pressure** Pressure of the particle * * **drho** Derivative of the density over time @@ -820,7 +911,16 @@ int main(int argc, char* argv[]) * * In this case the vector contain 0 particles initially * - * \see \ref vector_inst + * \see \ref e0_s_vector_inst + * + * The option DEC_GRAN(512) is related to the Load-Balancing decomposition + * granularity. It indicate that the space must be decomposed by at least + * + * \f$ N_{subsub} = 512 \cdot N_p \f$ + * + * Where \f$ N_{subsub} \f$ is the number of sub-sub-domain in which the space + * must be decomposed and \f$ N_p \f$ is the number of processors. (The concept + * of sub-sub-domain will be explained leter) * * \snippet Vector/7_SPH_dlb/main.cpp vector inst * \snippet Vector/7_SPH_dlb/main.cpp vector_dist_def @@ -829,25 +929,26 @@ int main(int argc, char* argv[]) //! \cond [vector inst] \endcond - particles vd(0,domain,bc,g,DEC_GRAN(4096)); + particles vd(0,domain,bc,g,DEC_GRAN(512)); //! \cond [vector inst] \endcond /*! * \page Vector_7_sph_dlb Vector 7 SPH Dam break simulation with Dynamic load balancing * - * ## Draw particles and initialization ## + * ### Draw particles and initialization ## {#e7_sph_draw_part_init} * - * In this part we initialize the problem creating particles in the position that - * we want. In order to do it we use the class DrawParticles. Because some of - * the simulation constants require the maximum height of the fluid to be calculated - * and the maximum fluid height is determined at runtime, some of the constants are - * calculated here. In this case the vector contain 0 particles initially. + * In this part we initialize the problem creating particles. In order to do it we use the class DrawParticles. Because some of + * the simulation constants require the maximum height \f$ h_{swl} \f$ of the fluid to be calculated + * and the maximum fluid height is determined at runtime, some of the constants just after we create the + * fluid particles * - * ### Draw Fluid ### + * ### Draw Fluid ### {#e7_sph_draw_part_fluid} * - * We start drawing the fluid particles, the initial pressure is initialized accordingly to the - * Hydrostatic pressure given by: + * The Function DrawParticles::DrawBox return an iterator that can be used to create particle in a predefined + * box (smaller than the simulation domain) with a predefined spacing. + * We start drawing the fluid particles, the initial pressure is initialized accordingly to the + * Hydrostatic pressure given by: * * \f$ P = \rho_{0} g (h_{max} - z) \f$ * @@ -868,14 +969,11 @@ int main(int argc, char* argv[]) //! \cond [draw fluid] \endcond - // the scalar is the element at position 0 in the aggregate - const int type = 0; - + // You can ignore all these dp/2.0 is a trick to reach the same initialization + // of Dual-SPH that use a different criteria to draw particles Box<3,double> fluid_box({dp/2.0,dp/2.0,dp/2.0},{0.4+dp/2.0,0.67-dp/2.0,0.3+dp/2.0}); - // first we create Fluid particles - // Fluid particles are created - + // return an iterator to the fluid particles to add to vd auto fluid_it = DrawParticles::DrawBox(vd,sz,domain,fluid_box); // here we fill some of the constants needed by the simulation @@ -884,14 +982,18 @@ int main(int argc, char* argv[]) B = (coeff_sound)*(coeff_sound)*gravity*h_swl*rho_zero / gamma_; cbar = coeff_sound * sqrt(gravity * h_swl); + // for each particle inside the fluid box ... while (fluid_it.isNext()) { + // ... add a particle ... vd.add(); + // ... and set it position ... vd.getLastPos()[0] = fluid_it.get().get(0); vd.getLastPos()[1] = fluid_it.get().get(1); vd.getLastPos()[2] = fluid_it.get().get(2); + // and its type. vd.template getLastProp<type>() = FLUID; // We also initialize the density of the particle and the hydro-static pressure given by @@ -913,6 +1015,7 @@ int main(int argc, char* argv[]) vd.template getLastProp<velocity_prev>()[1] = 0.0; vd.template getLastProp<velocity_prev>()[2] = 0.0; + // next fluid particle ++fluid_it; } @@ -923,9 +1026,9 @@ int main(int argc, char* argv[]) * * ### Draw Recipient ### * - * Here we draw the recipient using DrawSkin function. This function can draw a - * box of particles removed of a second box or an array of boxes. So all the particles in the area included - * in the shape A - B - C. There is no restriction that B or C must be included into A. + * Here we draw the recipient using the function DrawParticles::DrawSkin. This function can draw a set + * of particles inside a box A removed of a second box or an array of boxes. So all the particles in the + * area included in the area A - B - C. There is no restriction that B or C must be included into A. * * \htmlonly * <img src="http://ppmcore.mpi-cbg.de/web/images/examples/7_SPH_dlb/recipient.jpg"/> @@ -1035,46 +1138,50 @@ int main(int argc, char* argv[]) * * If at this point we output the particles and we visualize where they are accordingly * to their processor id we can easily see that particles are distributed unevenly. The - * processor that has particle in while has few particles and all of them are non fluid. + * processor that has particles in white has few particles and all of them are non fluid. * This mean that it will be almost in idle. This situation is not ideal * * \htmlonly * <img src="http://ppmcore.mpi-cbg.de/web/images/examples/7_SPH_dlb/unbalanced_particles.jpg"/> * \endhtmlonly * - * In order to reach an optimal situation we have to distribute the particles in order to - * reach a balanced situation. In order to do this we have to set the computation of each - * sub-sub-domain, redecompose the space and distributed the particles accordingly to this - * new configuration. In order to do this we need a model. A model specify how to set - * the computational cost in each sub-subdomains (Quadratic, Linear, with the number of - * particles ...). In this special case where we have two type of particles, that different - * computational weight we use a custom model in order to reach an optimal configuration. - * A custom model is nothing else than a structure with 3 methods. + * In order to reach an optimal situation we have to distribute the particles to + * reach a balanced situation. To do this we have to set the computation of each + * sub-sub-domain, redecompose the space and distribute the particles accordingly to this + * new configuration. To do this we need a model. A model specify how to set + * the computational cost for each sub-sub-domains (for example it specify if the computational cost to + * process a sub-sub-domain is quadratic or linear with the number of + * particles ...). A model look like this. * * \snippet Vector/7_SPH_dlb/main.cpp model custom * - * Setting the the computational cost on sub-domains is performed running across the particles. - * For each particles, it is calculated on which sub-sub-domain it belong. Than the function - * **addComputation** is called. Inside this call we can set the weight in the way we prefer. - * In this case we set the weight as: + * Setting the the computational cost on sub-sub-domains is performed running + * across the particles. For each one of them, it is calculated on which sub-sub-domain it belong. + * Than the function **addComputation** is called. Inside this call we can set the weight + * in the way we prefer. In this case we set the weight as: * - * \f$ w_v = \sum_{N_s} 3 N_{fluid} + 2N_{boundary} \f$ + * \f$ w_v = 4 N_{fluid} + 3 N_{boundary} \f$ * - * Where \f$ N_{fluid} \f$ Is the number of fluid particles in the sub-sub-domain and \f$ N_{boundary} \f$ - * are the number of boundary particles. While \f$ N_s = N_{fluid} + N_{boundary} \f$. - * This number is also used to calculate the cost in communication and in migration. The cost in communication - * is given by \f$ \frac{V_{ghost}}{V_{sub-sub}} w_v t_s \f$, while the migration cost is given by - * \f$ v_{sub-sub} w_v \f$. In general\f$ t_s \f$ is the number of ghost get between two rebalance. + * Where \f$ N_{fluid} \f$ Is the number of fluid particles in the sub-sub-domains and \f$ N_{boundary} \f$ + * are the number of boundary particles. For example in our ModelCustom we square this number, + * because the computation is proportional to the square of the number of particles in each sub-sub-domain. * A second cycle is performed in order to calculate a complex function of this number (for example squaring). - * In our ModelCustom we square this number, because the computation is proportional to the square of the number - * of particles in each sub-sub-domain. After filled the computational cost based on out model - * we can decompose the problem in computational equal chunk for each processor. After - * we decomposed using the function **decompose()** we use the map function to redistribute + * + * Implicitly the communication cost is given by \f$ \frac{V_{ghost}}{V_{sub-sub}} + * t_s \f$, while the migration cost is given by \f$ v_{sub-sub} \f$. In general\f$ t_s \f$ is the number + * of ghost get between two rebalance. In this special case where we have two type of particles, + * we have two different computation for each of them, this mean that fluid particles + * and boundary particles has different computation cost. + * + * After filling the computational cost based on our model + * we can decompose the problem in computationally equal chunk for each processor. + * We use the function **decomposed** to redecompose the space and subsequently we use + * the function map to redistribute * the particles. * * \note All processors now has part of the fluid. It is good to note that the computationaly * balanced configuration does not correspond to the evenly distributed particles to know - * more about that please follow the video tutorial + * more about that please follow the video tutorials * * \snippet Vector/7_SPH_dlb/main.cpp load balancing * @@ -1084,35 +1191,17 @@ int main(int argc, char* argv[]) * */ - vd.getDecomposition().write("Decomposition_before_load_bal"); - vd.write("Geometry_before"); - //! \cond [load balancing] \endcond // Now that we fill the vector with particles ModelCustom md; vd.addComputationCosts(md); - vd.getDecomposition().getDistribution().write("Distribution_BEFORE_DECOMPOSE"); vd.getDecomposition().decompose(); vd.map(); //! \cond [load balancing] \endcond - vd.addComputationCosts(md); -// vd.getDecomposition().getDistribution().write("AFTER_DECOMPOSE1"); - -// vd.getDecomposition().rebalance(1); - -// vd.map(); -// vd.getDecomposition().getDistribution().write("Distrobution_AFTER_DECOMPOSE"); - - std::cout << "N particles: " << vd.size_local() << " " << create_vcluster().getProcessUnitID() << " " << "Get processor Load " << vd.getDecomposition().getDistribution().getProcessorLoad() << std::endl; - - vd.write("Geometry_after"); - vd.getDecomposition().write("Decomposition_after_load_bal"); - vd.getDecomposition().getDistribution().write("Distribution_load_bal"); - vd.ghost_get<type,rho,Pressure,velocity>(); auto NN = vd.getCellList(2*H); @@ -1127,7 +1216,7 @@ int main(int argc, char* argv[]) * The main loop do time integration. It calculate the pressure based on the * density, than calculate the forces, than we calculate delta time, and finally update position * and velocity. After 200 time-step we do a rebalancing. And we save the configuration - * avery 0.01 seconds + * every 0.01 seconds * * \snippet Vector/7_SPH_dlb/main.cpp main loop * @@ -1135,9 +1224,6 @@ int main(int argc, char* argv[]) //! \cond [main loop] \endcond - double time_mean = 0.0; - double time_min = 1000.0; - double time_max = 0.0; size_t write = 0; size_t it = 0; size_t it_reb = 0; @@ -1153,8 +1239,7 @@ int main(int argc, char* argv[]) { vd.map(); - vd.getDecomposition().write("DLB_BEFORE_"); -// it_reb = 0; + it_reb = 0; ModelCustom md; vd.addComputationCosts(md); vd.getDecomposition().decompose(); @@ -1165,51 +1250,30 @@ int main(int argc, char* argv[]) vd.map(); - - if (it_reb == 200) - { - vd.getDecomposition().write("DLB_AFTER_"); - - it_reb = 0; - vd.addComputationCosts(md); - std::cout << "PROCESSOR LOAD: " << vd.getDecomposition().getDistribution().getProcessorLoad() << " MEAN: " << time_mean / 200 << " MIN: " << time_min << " MAX: " << time_max << std::endl; - time_mean = 0.0; - time_min = 1000.0; - time_max = 0.0; - } - - vd.ghost_get<type,rho,Pressure,velocity>(); - // Calculate pressure from the density EqState(vd); double max_visc = 0.0; - it_time.start(); + vd.ghost_get<type,rho,Pressure,velocity>(); // Calc forces calc_forces(vd,NN,max_visc); - it_time.stop(); // Get the maximum viscosity term across processors v_cl.max(max_visc); v_cl.execute(); - time_mean += it_time.getwct(); - time_min = std::min(time_min,it_time.getwct()); - time_max = std::max(time_max,it_time.getwct()); // Calculate delta t integration double dt = calc_deltaT(vd,max_visc); -// std::cout << "Calculate deltaT: " << dt << " " << DtMin << std::endl; - - // VerletStep + // VerletStep or euler step it++; if (it < 40) - verlet_int(vd,dt,true); + verlet_int(vd,dt); else { - verlet_int(vd,dt,false); + euler_int(vd,dt); it = 0; } diff --git a/src/Decomposition/CartDecomposition.hpp b/src/Decomposition/CartDecomposition.hpp index b63439f2..d3daf4d1 100755 --- a/src/Decomposition/CartDecomposition.hpp +++ b/src/Decomposition/CartDecomposition.hpp @@ -351,11 +351,13 @@ public: for (size_t i = 0; i < dist.getNSubSubDomains(); i++) { - dist.setMigrationCost(i, norm * migration * dist.getSubSubDomainComputationCost(i) ); + dist.setMigrationCost(i, norm * migration /* * dist.getSubSubDomainComputationCost(i)*/ ); for (size_t s = 0; s < dist.getNSubSubDomainNeighbors(i); s++) { - dist.setCommunicationCost(i, s, 1 * dist.getSubSubDomainComputationCost(i) * ts); + // We have to remove dist.getSubSubDomainComputationCost(i) otherwise the graph is + // not directed + dist.setCommunicationCost(i, s, 1 /** dist.getSubSubDomainComputationCost(i)*/ * ts); } prev += dist.getNSubSubDomainNeighbors(i); } @@ -1017,6 +1019,8 @@ public: createSubdomains(v_cl,bc); calculateGhostBoxes(); + + domain_nn_calculator_cart<dim>::reset(); } /*! \brief Refine the decomposition, available only for ParMetis distribution, for Metis it is a null call @@ -1036,6 +1040,8 @@ public: createSubdomains(v_cl,bc); calculateGhostBoxes(); + + domain_nn_calculator_cart<dim>::reset(); } /*! \brief Refine the decomposition, available only for ParMetis distribution, for Metis it is a null call diff --git a/src/Decomposition/Domain_NN_calculator_cart.hpp b/src/Decomposition/Domain_NN_calculator_cart.hpp index 8df2ba8a..f3cb87da 100644 --- a/src/Decomposition/Domain_NN_calculator_cart.hpp +++ b/src/Decomposition/Domain_NN_calculator_cart.hpp @@ -20,6 +20,12 @@ class domain_nn_calculator_cart //! True if domain and anomalous domain cells are computed bool are_domain_anom_computed; + //! Are linearized the domain cell + bool are_dom_lin; + + //! are linearized the anomalous cells + bool are_anom_lin; + //! anomalous cell neighborhood openfpm::vector<subsub<dim>> anom; @@ -95,8 +101,16 @@ class domain_nn_calculator_cart * * */ - void CalculateDomAndAnomCells(openfpm::vector<subsub<dim>> & sub_keys, openfpm::vector<grid_key_dx<dim>> & dom_subsub, const ::Box<dim,long int> & proc_box, grid_key_dx<dim> & shift, const openfpm::vector<::Box<dim, size_t>> & loc_box) + void CalculateDomAndAnomCells(openfpm::vector<subsub<dim>> & sub_keys, + openfpm::vector<grid_key_dx<dim>> & dom_subsub, + const ::Box<dim,long int> & proc_box, + grid_key_dx<dim> & shift, + const openfpm::vector<::Box<dim, size_t>> & loc_box) { + // Reset dom and dom_subsub + sub_keys.clear(); + dom_subsub.clear(); + size_t sz[dim]; for (size_t j = 0 ; j < dim ; j++) @@ -169,7 +183,7 @@ class domain_nn_calculator_cart public: domain_nn_calculator_cart() - :are_domain_anom_computed(false) + :are_domain_anom_computed(false),are_dom_lin(false),are_anom_lin(false) {} /*! \brief Get the domain Cells @@ -190,13 +204,15 @@ public: are_domain_anom_computed = true; } - if (shift != shift_calc_dom || gs != gs_calc_dom || dom_lin.size() == 0) + if (are_dom_lin == false) { dom_lin.clear(); shift_calc_dom = shift; gs_calc_dom = gs; for (size_t i = 0 ; i < dom.size() ; i++) dom_lin.add(gs.LinId(dom.get(i) - cell_shift)); + + are_dom_lin = true; } return dom_lin; @@ -221,10 +237,7 @@ public: are_domain_anom_computed = true; } - // Convert the list of the neighborhood sub-sub-domains for each sub-sub-domains - // into a linearized sub-sub-domain index. Again we check that this operation has not - // been already executed - if (shift != shift_calc_anom || gs != gs_calc_anom || anom_lin.size() == 0) + if (are_anom_lin == false) { anom_lin.clear(); shift_calc_anom = shift; @@ -257,10 +270,19 @@ public: anom_lin.get(i).NN_subsub.get(self_cell) = tmp; } } + + are_anom_lin = true; } return anom_lin; } + + void reset() + { + are_domain_anom_computed = false; + are_dom_lin = false; + are_anom_lin = false; + } }; #endif /* SRC_DECOMPOSITION_DOMAIN_NN_CALCULATOR_CART_HPP_ */ diff --git a/src/Vector/vector_dist.hpp b/src/Vector/vector_dist.hpp index 9bcffed3..f077e701 100644 --- a/src/Vector/vector_dist.hpp +++ b/src/Vector/vector_dist.hpp @@ -193,6 +193,39 @@ private: return !bx.isContained(pbox); } + template<typename CellList> bool check_cl_reconstruct_sym(const CellList & NN, St r_cut) + { + Box<dim,long int> pbox_prev; + + for (size_t i = 0 ; i < dim ; i++) + { + pbox_prev.setLow(i,NN.getStartDomainCell().get(i)); + pbox_prev.setHigh(i,NN.getStopDomainCell().get(i)); + } + + // get the processor bounding box + Box<dim, St> pbox = getDecomposition().getProcessorBounds(); + + CellDecomposer_sm<dim,St,shift<dim,St>> cd2; + size_t pad = 0; + + cl_param_calculateSym(NN.getDomain(),cd2,getDecomposition().getGhost(),r_cut,pad); + + CellDecomposer_sm<dim,St,shift<dim,St>> cd3; + + NN.setCellDecomposer(cd3,cd2,pbox,pad); + + Box<dim,long int> pbox_disc; + + for (size_t i = 0 ; i < dim ; i++) + { + pbox_disc.setLow(i,cd3.getStartDomainCell().get(i)); + pbox_disc.setHigh(i,cd3.getStopDomainCell().get(i)); + } + + return !pbox_prev.isContained(pbox_disc); + } + public: //! space type @@ -708,32 +741,121 @@ public: * \param r_cut cut-off radius * \param ver Verlet to update * \param r_cut cutoff radius - * \param opt option like VL_SYMMETRIC and VL_NON_SYMMETRIC + * \param opt option like VL_SYMMETRIC and VL_NON_SYMMETRIC or VL_CRS_SYMMETRIC * */ void updateVerlet(VerletList<dim,St,FAST,shift<dim,St> > & ver, St r_cut, size_t opt = VL_NON_SYMMETRIC) { - if (opt == VL_CRS_SYMMETRIC) + if (opt == VL_SYMMETRIC) + { + auto & NN = ver.getInternalCellList(); + + // Here we have to check that the Box defined by the Cell-list is the same as the domain box of this + // processor. if it is not like that we have to completely reconstruct from stratch + bool to_reconstruct = check_cl_reconstruct_sym(NN,r_cut); + + if (to_reconstruct == false) + ver.update(getDecomposition().getDomain(),r_cut,v_pos,g_m, opt); + else + { + VerletList<dim,St,FAST,shift<dim,St> > ver_tmp; + + ver_tmp = getVerlet(r_cut); + ver.swap(ver); + } + } + else if (opt == VL_CRS_SYMMETRIC) + { + auto & NN = ver.getInternalCellList(); + + // Here we have to check that the Box defined by the Cell-list is the same as the domain box of this + // processor. if it is not like that we have to completely reconstruct from stratch + bool to_reconstruct = check_cl_reconstruct_sym(NN,r_cut); + + if (to_reconstruct == false) + { + // Shift + grid_key_dx<dim> cell_shift = NN.getShift(); + + // Shift + grid_key_dx<dim> shift = NN.getShift(); + + // Add padding + for (size_t i = 0 ; i < dim ; i++) + shift.set_d(i,shift.get(i) + NN.getPadding(i)); + + grid_sm<dim,void> gs = NN.getInternalGrid(); + + ver.updateCrs(getDecomposition().getDomain(),r_cut,v_pos,g_m,getDecomposition().getDomainCells(shift,cell_shift,gs),getDecomposition().getAnomDomainCells(shift,cell_shift,gs)); + } + else + { + VerletList<dim,St,FAST,shift<dim,St> > ver_tmp; + + ver_tmp = getVerletCrs(r_cut); + ver.swap(ver_tmp); + } + } + else { - // Get the internal cell list auto & NN = ver.getInternalCellList(); - // Shift - grid_key_dx<dim> cell_shift = NN.getShift(); + // Here we have to check that the Box defined by the Cell-list is the same as the domain box of this + // processor. if it is not like that we have to completely reconstruct from stratch + bool to_reconstruct = check_cl_reconstruct(NN.getDomain(),r_cut); - // Shift - grid_key_dx<dim> shift = NN.getShift(); + if (to_reconstruct == false) + ver.update(getDecomposition().getDomain(),r_cut,v_pos,g_m, opt); + else + { + VerletList<dim,St,FAST,shift<dim,St> > ver_tmp; - // Add padding - for (size_t i = 0 ; i < dim ; i++) - shift.set_d(i,shift.get(i) + NN.getPadding(i)); + ver_tmp = getVerlet(r_cut); + ver.swap(ver_tmp); + } + } - grid_sm<dim,void> gs = NN.getInternalGrid(); +/* // Get the internal cell list + auto & NN = ver.getInternalCellList(); + + // Here we have to check that the Box defined by the Cell-list is the same as the domain box of this + // processor. if it is not like that we have to completely reconstruct from stratch + bool to_reconstruct = check_cl_reconstruct(NN.getDomain(),r_cut); + + if (to_reconstruct == false) + { + if (opt == VL_CRS_SYMMETRIC) + { + // Shift + grid_key_dx<dim> cell_shift = NN.getShift(); - ver.updateCrs(getDecomposition().getDomain(),r_cut,v_pos,g_m,getDecomposition().getDomainCells(shift,cell_shift,gs),getDecomposition().getAnomDomainCells(shift,cell_shift,gs)); + // Shift + grid_key_dx<dim> shift = NN.getShift(); + + // Add padding + for (size_t i = 0 ; i < dim ; i++) + shift.set_d(i,shift.get(i) + NN.getPadding(i)); + + grid_sm<dim,void> gs = NN.getInternalGrid(); + + ver.updateCrs(getDecomposition().getDomain(),r_cut,v_pos,g_m,getDecomposition().getDomainCells(shift,cell_shift,gs),getDecomposition().getAnomDomainCells(shift,cell_shift,gs)); + } + else + ver.update(getDecomposition().getDomain(),r_cut,v_pos,g_m, opt); } else - ver.update(getDecomposition().getDomain(),r_cut,v_pos,g_m, opt); + { + VerletList<dim,St,FAST,shift<dim,St> > ver_tmp; + + if (opt == VL_SYMMETRIC) + ver_tmp = getVerletSym(r_cut); + else if (opt == VL_CRS_SYMMETRIC) + ver_tmp = getVerletCrs(r_cut); + else + ver_tmp = getVerlet(r_cut); + + ver.swap(ver_tmp); + }*/ } /*! \brief for each particle get the verlet list -- GitLab