diff --git a/CHANGELOG.md b/CHANGELOG.md index 0f25e2668ba27e464683408fd60b065c23ccb3d4..f7b13ee3269a818ae03faebbc81dd9692b38164e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,19 +1,25 @@ # Change Log All notable changes to this project will be documented in this file. -## [1.1.0] +## [1.1.0] February 2018 ### Added - Interface for Multi-vector dynamic load balancing +- Increaded performance for grid ghost get +- Introduced forms to increase the performance of the grid iterator in case of stencil code (see example 5_GrayScott) +- EMatrix wrapped eigen matrices compatibles with vector_dist_id +- General tuning for high dimension vector_dist_id (up to 50 dimensions) +- Added Discrete element Method example (8_DEM) ### Fixed - Installation/detection of PETSC -- 2D Fixing IO in binary for vector - CRITICAL-BUG scalar product in combination with vector product is broken (it return 0) +- Fixing 2D IO in binary for vector +- Fixing 1D grid writer in ASCII mode -## [1.0.0] 13 September 2017 +## [1.0.0] 13 September 2017 (Codename: Vortex) ### Added - Introduced getDomainIterator for Cell-list diff --git a/configure.ac b/configure.ac index 519f74fcbea28eb455cb7b4502eee2aee3d061bc..75e0932e20c6b990155e39add389456d6f622531 100644 --- a/configure.ac +++ b/configure.ac @@ -64,8 +64,7 @@ INCLUDES_PATH=" " echo "$base" > install_dir # Needed for build library -AC_PROG_RANLIB -AM_PROG_AR +LT_INIT # Checks for programs. AC_PROG_CXX diff --git a/dep_dir b/dep_dir new file mode 100644 index 0000000000000000000000000000000000000000..1f7391f92b6a3792204e07e99f71f643cc35e7e1 --- /dev/null +++ b/dep_dir @@ -0,0 +1 @@ +master diff --git a/example/Grid/0_simple/main.cpp b/example/Grid/0_simple/main.cpp index 30ae5a9472b27365b89de2a6e0e4a5147fdcdafa..339d420e1d5b4d7738ec5c0d259ec0f5b6e8b232 100644 --- a/example/Grid/0_simple/main.cpp +++ b/example/Grid/0_simple/main.cpp @@ -9,6 +9,7 @@ * \subpage Grid_2_solve_eq * \subpage Grid_3_gs * \subpage Grid_3_gs_3D + * \subpage Grid_3_gs_3D_vector * */ diff --git a/example/Grid/3_gray_scott_3d/Makefile b/example/Grid/3_gray_scott_3d/Makefile index 170e428569ba200362dfa7bbc4a5d533f4006696..04db7e20fa78c9a519f254e71d61f27e47df69f8 100644 --- a/example/Grid/3_gray_scott_3d/Makefile +++ b/example/Grid/3_gray_scott_3d/Makefile @@ -7,7 +7,7 @@ LDIR = OBJ = main.o %.o: %.cpp - $(CC) -O3 -c --std=c++11 -o $@ $< $(INCLUDE_PATH) + $(CC) -O3 -g -c --std=c++11 -o $@ $< $(INCLUDE_PATH) gray_scott: $(OBJ) $(CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS) diff --git a/example/Grid/3_gray_scott_3d/main.cpp b/example/Grid/3_gray_scott_3d/main.cpp index 88e814cba1bc08693a77912e74c9049be081d104..7c7e8f4f2b842b709bd80b4413273f0d3943881e 100644 --- a/example/Grid/3_gray_scott_3d/main.cpp +++ b/example/Grid/3_gray_scott_3d/main.cpp @@ -6,6 +6,8 @@ * * \page Grid_3_gs_3D Gray Scott in 3D * + * [TOC] + * * # Solving a gray scott-system in 3D # {#e3_gs_gray_scott} * * This example is just an extension of the 2D Gray scott example. @@ -17,9 +19,25 @@ * <img src="http://ppmcore.mpi-cbg.de/web/images/examples/gray_scott_3d/gs_alpha.png"/> * \endhtmlonly * + * More or less this example is the adaptation of the previous example to 3D + * with the improvement of using stencil iterator. + * + * ## Stencil iterator {#e3_gs_grat_scott_si} + * + * Stencil iterator require that you define a stencil, + * + * \snippet Grid/3_gray_scott_3d/main.cpp stencil def + * + * once is defined it is + * possible get and use a stencil iterator + * + * \snippet Grid/3_gray_scott_3d/main.cpp stencil get and use + * + * The rest of the example remain the same with the exception + * that the code has been extended in 3D. + * * \see \ref Grid_2_solve_eq * - * \snippet Grid/3_gray_scott/main.cpp constants * */ @@ -110,26 +128,10 @@ int main(int argc, char* argv[]) double K = 0.053; double F = 0.014; - //! \cond [init lib] \endcond - - /*! - * \page Grid_3_gs_3D Gray Scott in 3D - * - * Here we create 2 distributed grid in 2D Old and New. In particular because we want that - * the second grid is distributed across processors in the same way we pass the decomposition - * of the Old grid to the New one in the constructor with **Old.getDecomposition()**. Doing this, - * we force the two grid to have the same decomposition. - * - * \snippet Grid/3_gray_scott/main.cpp init grid - * - */ - - //! \cond [init grid] \endcond - grid_dist_id<3, double, aggregate<double,double>> Old(sz,domain,g,bc); // New grid with the decomposition of the old grid - grid_dist_id<3, double, aggregate<double,double>> New(Old.getDecomposition(),sz,g); + grid_dist_id<3, double, aggregate<double,double>> New(Old.getDecomposition(),sz,g); // spacing of the grid on x and y @@ -149,33 +151,39 @@ int main(int argc, char* argv[]) timer tot_sim; tot_sim.start(); + //! \cond [stencil def] \endcond + static grid_key_dx<3> star_stencil_3D[7] = {{0,0,0}, {0,0,-1}, - {0,0,1}, - {0,-1,0}, - {0,1,0}, - {-1,0,0}, - {1,0,0}}; + {0,0,1}, + {0,-1,0}, + {0,1,0}, + {-1,0,0}, + {1,0,0}}; + + //! \cond [stencil def] \endcond for (size_t i = 0; i < timeSteps; ++i) { if (i % 300 == 0) std::cout << "STEP: " << i << std::endl; + //! \cond [stencil get and use] \endcond + auto it = Old.getDomainIteratorStencil(star_stencil_3D); while (it.isNext()) { // center point - auto Cp = it.getStencil<0>(); + auto Cp = it.getStencilGrid<0>(); // plus,minus X,Y,Z - auto mx = it.getStencil<1>(); - auto px = it.getStencil<2>(); - auto my = it.getStencil<3>(); - auto py = it.getStencil<4>(); - auto mz = it.getStencil<5>(); - auto pz = it.getStencil<6>(); + auto mx = it.getStencilGrid<1>(); + auto px = it.getStencilGrid<2>(); + auto my = it.getStencilGrid<3>(); + auto py = it.getStencilGrid<4>(); + auto mz = it.getStencilGrid<5>(); + auto pz = it.getStencilGrid<6>(); // update based on Eq 2 New.get<U>(Cp) = Old.get<U>(Cp) + uFactor * ( @@ -206,6 +214,8 @@ int main(int argc, char* argv[]) ++it; } + //! \cond [stencil get and use] \endcond + // Here we copy New into the old grid in preparation of the new step // It would be better to alternate, but using this we can show the usage // of the function copy. To note that copy work only on two grid of the same @@ -216,11 +226,11 @@ int main(int argc, char* argv[]) // After copy we synchronize again the ghost part U and V Old.ghost_get<U,V>(); - // Every 30 time step we output the configuration for + // Every 500 time step we output the configuration for // visualization - if (i % 60 == 0) + if (i % 500 == 0) { - Old.write_frame("output",count,VTK_WRITER | FORMAT_BINARY); + Old.save("output_" + std::to_string(count)); count++; } } @@ -246,4 +256,13 @@ int main(int argc, char* argv[]) openfpm_finalize(); //! \cond [finalize] \endcond + + /*! + * \page Grid_3_gs_3D Gray Scott in 3D + * + * # Full code # {#code} + * + * \include Grid/3_gray_scott_3d/main.cpp + * + */ } diff --git a/example/Grid/3_gray_scott_3d_vectorization/Makefile b/example/Grid/3_gray_scott_3d_vectorization/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..9488bea01f7cb9eb49d5570ea692d8091ff9cfcb --- /dev/null +++ b/example/Grid/3_gray_scott_3d_vectorization/Makefile @@ -0,0 +1,27 @@ +include ../../example.mk + +CC=mpic++ + +LDIR = + +OBJ = main.o update_new.o + +%.o: %.f90 + mpif90 -ffree-line-length-none -fno-range-check -fno-second-underscore -fimplicit-none -mavx -O3 -c -g -o $@ $< + +%.o: %.cpp + $(CC) -O3 -mavx -g -c --std=c++11 -Wno-ignored-attributes -o $@ $< $(INCLUDE_PATH) -I/home/i-bird/VC/include + +gray_scott: $(OBJ) + $(CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS) -L/home/i-bird/VC/lib -lVc + +all: gray_scott + +run: all + mpirun -np 4 ./gray_scott + +.PHONY: clean all run + +clean: + rm -f *.o *~ core gray_scott + diff --git a/example/Grid/3_gray_scott_3d_vectorization/config.cfg b/example/Grid/3_gray_scott_3d_vectorization/config.cfg new file mode 100644 index 0000000000000000000000000000000000000000..1eecbac3577c765edca7f90cf5f61cfb6b9f4880 --- /dev/null +++ b/example/Grid/3_gray_scott_3d_vectorization/config.cfg @@ -0,0 +1,2 @@ +[pack] +files = main.cpp Makefile diff --git a/example/Grid/3_gray_scott_3d_vectorization/main.cpp b/example/Grid/3_gray_scott_3d_vectorization/main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f153be0dcdf63e7403b9570a72f96d68317c532c --- /dev/null +++ b/example/Grid/3_gray_scott_3d_vectorization/main.cpp @@ -0,0 +1,416 @@ +#include "Grid/grid_dist_id.hpp" +#include "data_type/aggregate.hpp" +#include "timer.hpp" +#include "Vc/Vc" + +/*! + * + * \page Grid_3_gs_3D_vector Gray Scott in 3D fast implementation with vectorization + * + * # Solving a gray scott-system in 3D # {#e3_gs_gray_scott_vector} + * + * This example is just an improved version of the previous 3D Gray scott example. + * In particular we do the following improvements we separate U and V in two grids + * in order to vectorize. Every loop now handle 4 double in case of AVX-256 and 2 double + * in case of SSE. We also avoid to use the function copy and we alternate the use of the + * fields New and Old. If at the first iteration we read from Old and we write on New in + * the second iteration we read from New and we write on Old. The last improvement is write + * on hdf5 rather that VTK. VTK writers are convenient but are slow for performances. HDF5 + * files can be saved with **save()** reload with **load()** and after loading can be written + * on VTK with **write** this mean that HDF5 files can be easily converted into VTK in a second moment. + * Not only but because HDF5 files can be saved on multiple processors and reloaded on a different + * number of processors, you can use this method to stitch VTK files together. + * + * + * In figure is the final solution of the problem + * + * \htmlonly + * <img src="http://ppmcore.mpi-cbg.de/web/images/examples/gray_scott_3d/gs_alpha.png"/> + * \endhtmlonly + * + * \see \ref Grid_2_solve_eq + * + * \snippet Grid/3_gray_scott_3d_vectorization/main.cpp constants + * + */ + +//! \cond [constants] \endcond + +//#define FORTRAN_UPDATE + +constexpr int x = 0; +constexpr int y = 1; +constexpr int z = 2; + +extern "C" void update_new(const int* lo, const int* hi, + double* u, const int* ulo, const int* uhi, + double* v, const int* vlo, const int* vhi, + double* flu, const int* fulo, const int* fuhi, + double* flv, const int* fvlo, const int* fvhi, + const double * dt, const double * uFactor, const double * vFactor, const double * F, + const double * K); + + +//! \cond [constants] \endcond + +void init(grid_dist_id<3,double,aggregate<double> > & OldU, + grid_dist_id<3,double,aggregate<double> > & OldV, + grid_dist_id<3,double,aggregate<double> > & NewU, + grid_dist_id<3,double,aggregate<double> > & NewV, + Box<3,double> & domain) +{ + auto it = OldU.getDomainIterator(); + + while (it.isNext()) + { + // Get the local grid key + auto key = it.get(); + + // Old values U and V + OldU.get(key) = 1.0; + OldV.get(key) = 0.0; + + // Old values U and V + NewU.get(key) = 0.0; + NewV.get(key) = 0.0; + + ++it; + } + + long int x_start = OldU.size(0)*1.55f/domain.getHigh(0); + long int y_start = OldU.size(1)*1.55f/domain.getHigh(1); + long int z_start = OldU.size(1)*1.55f/domain.getHigh(2); + + long int x_stop = OldU.size(0)*1.85f/domain.getHigh(0); + long int y_stop = OldU.size(1)*1.85f/domain.getHigh(1); + long int z_stop = OldU.size(1)*1.85f/domain.getHigh(2); + + grid_key_dx<3> start({x_start,y_start,z_start}); + grid_key_dx<3> stop ({x_stop,y_stop,z_stop}); + auto it_init = OldU.getSubDomainIterator(start,stop); + + while (it_init.isNext()) + { + auto key = it_init.get(); + + OldU.get(key) = 0.5 + (((double)std::rand())/RAND_MAX -0.5)/10.0; + OldV.get(key) = 0.25 + (((double)std::rand())/RAND_MAX -0.5)/20.0; + + ++it_init; + } +} + + +//! \cond [vectorization] \endcond + +void step(grid_dist_id<3, double, aggregate<double>> & OldU, + grid_dist_id<3, double, aggregate<double>> & OldV, + grid_dist_id<3, double, aggregate<double>> & NewU, + grid_dist_id<3, double, aggregate<double>> & NewV, + grid_key_dx<3> (& star_stencil_3D)[7], + Vc::double_v uFactor, Vc::double_v vFactor, double deltaT, double F, double K) +{ +#ifndef FORTRAN_UPDATE + + //! \cond [cpp_update] \endcond + + WHILE_M(OldU,star_stencil_3D) + auto & U_old = GET_GRID_M(OldU); + auto & V_old = GET_GRID_M(OldV); + + auto & U_new = GET_GRID_M(NewU); + auto & V_new = GET_GRID_M(NewV); + ITERATE_3D_M(Vc::double_v::Size) + + // center point + auto Cp = it.getStencil<0>(); + + // plus,minus X,Y,Z + auto mx = it.getStencil<1>(); + auto px = it.getStencil<2>(); + auto my = it.getStencil<3>(); + auto py = it.getStencil<4>(); + auto mz = it.getStencil<5>(); + auto pz = it.getStencil<6>(); + + // + Vc::double_v u_c(&U_old.get<0>(Cp),Vc::Unaligned); + Vc::double_v u_mz(&U_old.get<0>(mz),Vc::Unaligned); + Vc::double_v u_pz(&U_old.get<0>(pz),Vc::Unaligned); + Vc::double_v u_my(&U_old.get<0>(my),Vc::Unaligned); + Vc::double_v u_py(&U_old.get<0>(py),Vc::Unaligned); + Vc::double_v u_mx(&U_old.get<0>(mx),Vc::Unaligned); + Vc::double_v u_px(&U_old.get<0>(px),Vc::Unaligned); + + + Vc::double_v v_c(&V_old.get<0>(Cp),Vc::Unaligned); + Vc::double_v v_mz(&V_old.get<0>(mz),Vc::Unaligned); + Vc::double_v v_pz(&V_old.get<0>(pz),Vc::Unaligned); + Vc::double_v v_my(&V_old.get<0>(my),Vc::Unaligned); + Vc::double_v v_py(&V_old.get<0>(py),Vc::Unaligned); + Vc::double_v v_mx(&V_old.get<0>(mx),Vc::Unaligned); + Vc::double_v v_px(&V_old.get<0>(px),Vc::Unaligned); + + Vc::double_v out1 = u_c + uFactor * (u_mz + u_pz + + u_my + u_py + + u_mx + u_px + + - 6.0 * u_c) + + - deltaT * u_c * v_c * v_c + - deltaT * F * (u_c - 1.0); + + Vc::double_v out2 = v_c + vFactor * (v_mz + v_pz + + v_my + v_py + + v_mx + v_px + + - 6.0 * v_c ) + + deltaT * u_c * v_c * v_c + + - deltaT * (F+K) * v_c; + + out1.store(&U_new.get<0>(Cp),Vc::Unaligned); + out2.store(&V_new.get<0>(Cp),Vc::Unaligned); + END_LOOP_M + + //! \cond [cpp_update] \endcond + +#else + + //! \cond [fort_update] \endcond + + double uFactor_s = uFactor[0]; + double vFactor_s = vFactor[0]; + + auto & ginfo = OldU.getLocalGridsInfo(); + + for (size_t i = 0 ; i < OldU.getN_loc_grid() ; i++) + { + auto & U_old = OldU.get_loc_grid(i); + auto & V_old = OldV.get_loc_grid(i); + + auto & U_new = NewU.get_loc_grid(i); + auto & V_new = NewV.get_loc_grid(i); + + int lo[3] = {(int)ginfo.get(i).Dbox.getLow(0),(int)ginfo.get(i).Dbox.getLow(1),(int)ginfo.get(i).Dbox.getLow(2)}; + int hi[3] = {(int)ginfo.get(i).Dbox.getHigh(0),(int)ginfo.get(i).Dbox.getHigh(1),(int)ginfo.get(i).Dbox.getHigh(2)}; + + int ulo[3] = {0,0,0}; + int uhi[3] = {(int)ginfo.get(i).GDbox.getHigh(0),(int)ginfo.get(i).GDbox.getHigh(1),(int)ginfo.get(i).GDbox.getHigh(2)}; + int nulo[3] = {0,0,0}; + int nuhi[3] = {(int)ginfo.get(i).GDbox.getHigh(0),(int)ginfo.get(i).GDbox.getHigh(1),(int)ginfo.get(i).GDbox.getHigh(2)}; + + int vlo[3] = {0,0,0}; + int vhi[3] = {(int)ginfo.get(i).GDbox.getHigh(0),(int)ginfo.get(i).GDbox.getHigh(1),(int)ginfo.get(i).GDbox.getHigh(2)}; + int nvlo[3] = {0,0,0}; + int nvhi[3] = {(int)ginfo.get(i).GDbox.getHigh(0),(int)ginfo.get(i).GDbox.getHigh(1),(int)ginfo.get(i).GDbox.getHigh(2)}; + + update_new(lo,hi, + (double *)U_old.getPointer(),ulo,uhi, + (double *)V_old.getPointer(),vlo,vhi, + (double *)U_new.getPointer(),nulo,nuhi, + (double *)V_new.getPointer(),nulo,nvhi, + &deltaT, &uFactor_s, &vFactor_s,&F,&K); + } + + //! \cond [fort_update] \endcond + +#endif +} + +//! \cond [vectorization] \endcond + +int main(int argc, char* argv[]) +{ + openfpm_init(&argc,&argv); + + // domain + Box<3,double> domain({0.0,0.0},{2.5,2.5,2.5}); + + // grid size + size_t sz[3] = {256,256,256}; + + // Define periodicity of the grid + periodicity<3> bc = {PERIODIC,PERIODIC,PERIODIC}; + + // Ghost in grid unit + Ghost<3,long int> g(1); + + // deltaT + double deltaT = 1; + + // Diffusion constant for specie U + double du = 2*1e-5; + + // Diffusion constant for specie V + double dv = 1*1e-5; + + // Number of timesteps + size_t timeSteps = 5000; + + // K and F (Physical constant in the equation) + double K = 0.053; + double F = 0.014; + + //! \cond [init lib] \endcond + + /*! + * \page Grid_3_gs_3D_vector Gray Scott in 3D fast implementation with vectorization + * + * Here we create 2 distributed grid in 3D Old and New splitting U and V in two different fields. + * In particular because we want that all the grids are distributed across processors in the same + * way we pass the decomposition of the first grid. + * + * \snippet Grid/3_gray_scott_3d_vectorization/main.cpp init grid + * + */ + + //! \cond [init grid] \endcond + + grid_dist_id<3, double, aggregate<double>> OldU(sz,domain,g,bc); + grid_dist_id<3, double, aggregate<double>> OldV(OldU.getDecomposition(),sz,g); + + // New grid with the decomposition of the old grid + grid_dist_id<3, double, aggregate<double>> NewU(OldU.getDecomposition(),sz,g); + grid_dist_id<3, double, aggregate<double>> NewV(OldV.getDecomposition(),sz,g); + + // spacing of the grid on x and y + + double spacing[3] = {OldU.spacing(0),OldU.spacing(1),OldU.spacing(2)}; + + init(OldU,OldV,NewU,NewV,domain); + + //! \cond [init grid] \endcond + + // sync the ghost + size_t count = 0; + OldU.template ghost_get<0>(); + OldV.template ghost_get<0>(); + + // because we assume that spacing[x] == spacing[y] we use formula 2 + // and we calculate the prefactor of Eq 2 + Vc::double_v uFactor = deltaT * du/(spacing[x]*spacing[x]); + Vc::double_v vFactor = deltaT * dv/(spacing[x]*spacing[x]); + + timer tot_sim; + tot_sim.start(); + + static grid_key_dx<3> star_stencil_3D[7] = {{0,0,0}, + {0,0,-1}, + {0,0,1}, + {0,-1,0}, + {0,1,0}, + {-1,0,0}, + {1,0,0}}; + + for (size_t i = 0; i < timeSteps; ++i) + { + if (i % 300 == 0) + std::cout << "STEP: " << i << std::endl; + + /*! + * \page Grid_3_gs_3D_vector Gray Scott in 3D fast implementation with vectorization + * + * Alternate New and Old field to run one step, switch between old and new if the iteration + * is even or odd. The function step is nothing else than the implementation of Gray-Scott + * 3D in the previous example but in a more optimized way. + * + * \snippet Grid/3_gray_scott_3d_vectorization/main.cpp alternate + * + * In this function we show two methods to optimize this function. + * + * * We can use the macro **WHILE_M** passing the stencil definition, **ITERATE_3D** to define the loop, + * **END_LOOP** to close the loop, and use the function + * function **getStencil<0>()** to retrieve the stencil points. Additionaly we can use Vc::double_v instead + * of double to vectorize the code. This method give the advantage to keep all the + * code in C++. + * + * \snippet Grid/3_gray_scott_3d_vectorization/main.cpp cpp_update + * + * * Another possibility is to use FORTRAN. Because FORTRAN has better + * support for multi dimensional array another possibility is to process each local grid using + * FORTRAN, this also give us the opportunity to show hybrid code. We can switch between + * one and the other method commenting + * and uncommeting the line #define FORTRAN_UPDATE in the code. + * + * \snippet Grid/3_gray_scott_3d_vectorization/main.cpp fort_update + * + * \include Grid/3_gray_scott_3d_vectorization/update_new.f90 + * + */ + + //! \cond [alternate] \endcond + + if (i % 2 == 0) + { + step(OldU,OldV,NewU,NewV,star_stencil_3D,uFactor,vFactor,deltaT,F,K); + + NewU.ghost_get<0>(); + NewV.ghost_get<0>(); + } + else + { + step(NewU,NewV,OldU,OldV,star_stencil_3D,uFactor,vFactor,deltaT,F,K); + + OldU.ghost_get<0>(); + OldV.ghost_get<0>(); + } + + //! \cond [alternate] \endcond + + /*! + * \page Grid_3_gs_3D_vector Gray Scott in 3D fast implementation with vectorization + * + * Instead of using the function **write** we use the function **save** to save on HDF5 + * + * \snippet Grid/3_gray_scott_3d_vectorization/main.cpp save hdf5 + * + */ + + //! \cond [save hdf5] \endcond + + // Every 2000 time step we output the configuration on hdf5 + if (i % 2000 == 0) + { + OldU.save("output_u_" + std::to_string(count)); + OldV.save("output_v_" + std::to_string(count)); + count++; + } + + //! \cond [save hdf5] \endcond + } + + tot_sim.stop(); + + if (create_vcluster().rank() == 0) + {std::cout << "Total simulation: " << tot_sim.getwct() << std::endl;} + + // We frite the final configuration + OldV.write("final"); + + //! \cond [time stepping] \endcond + + /*! + * \page Grid_3_gs_3D_vector Gray Scott in 3D fast implementation with vectorization + * + * ## Finalize ## + * + * Deinitialize the library + * + * \snippet Grid/3_gray_scott_3d_vectorization/main.cpp finalize + * + */ + + //! \cond [finalize] \endcond + + openfpm_finalize(); + + //! \cond [finalize] \endcond + + /*! + * \page Grid_3_gs_3D_vector Gray Scott in 3D fast implementation with vectorization + * + * # Full code # {#code} + * + * \include Grid/3_gray_scott_3d_vectorization/main.cpp + * + */ +} + + diff --git a/example/Grid/3_gray_scott_3d_vectorization/update_new.f90 b/example/Grid/3_gray_scott_3d_vectorization/update_new.f90 new file mode 100644 index 0000000000000000000000000000000000000000..5728a019ea7043dd6bc324287a77b7b48ada4079 --- /dev/null +++ b/example/Grid/3_gray_scott_3d_vectorization/update_new.f90 @@ -0,0 +1,48 @@ +subroutine update_new ( & + lo, hi, & + u, ulo, uhi, & + v, vlo, vhi, & + u_new, nulo, nuhi, & + v_new, nvlo, nvhi, & + dt, uFactor, vFactor,F,Kf) bind(C, name="update_new") + + implicit none + + integer, intent(in) :: lo(3), hi(3) + integer, intent(in) :: ulo(3), uhi(3) + integer, intent(in) :: vlo(3), vhi(3) + integer, intent(in) :: nulo(3), nuhi(3), nvlo(3), nvhi(3) + real*8, intent(in) :: u (ulo(1):uhi(1),ulo(2):uhi(2),ulo(3):uhi(3)) + real*8, intent(in) :: v (vlo(1):vhi(1),vlo(2):vhi(2),vlo(3):vhi(3)) + real*8, intent(inout) :: u_new( nulo(1): nuhi(1), nulo(2): nuhi(2), nulo(3): nuhi(3)) + real*8, intent(inout) :: v_new( nvlo(1): nvhi(1), nvlo(2): nvhi(2), nvlo(3): nvhi(3)) + real*8, intent(in) :: dt, F, Kf, uFactor, vFactor + + ! local variables + integer i,j,k + + ! x-fluxes + do k = lo(3), hi(3) + do j = lo(2), hi(2) + do i = lo(1), hi(1) + u_new(i,j,k) = u(i,j,k) + uFactor * ( u(i+1,j,k) + u(i-1,j,k) + & + u(i,j+1,k) + u(i,j-1,k) + & + u(i,j,k-1) + u(i,j,k+1) - & + 6.0*u(i,j,k) ) - & + dt * u(i,j,k)*v(i,j,k)*v(i,j,k) - & + dt * F * (u(i,j,k) - 1.0) + + + v_new(i,j,k) = v(i,j,k) + vFactor * ( v(i+1,j,k) + v(i-1,j,k) + & + v(i,j+1,k) + v(i,j-1,k) + & + v(i,j,k-1) + v(i,j,k+1) - & + 6.0*v(i,j,k) ) + & + dt * u(i,j,k)*v(i,j,k)*v(i,j,k) - & + dt * (F+Kf) * v(i,j,k) + end do + end do + end do + + +end subroutine update_new + diff --git a/example/Numerics/PS-CMA-ES/Makefile b/example/Numerics/PS-CMA-ES/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..ea9ed12dd4edc5b71386367cc308ef374cb5d3fa --- /dev/null +++ b/example/Numerics/PS-CMA-ES/Makefile @@ -0,0 +1,24 @@ +include ../../example.mk + +CC=mpic++ + +LDIR = + +OBJ = main.o + +%.o: %.cpp + $(CC) -O3 -g -c --std=c++11 -o $@ $< $(INCLUDE_PATH) + +ps_cma_es: $(OBJ) + $(CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS) + +all: ps_cma_es + +run: all + mpirun -np 2 ./ps_cma_es + +.PHONY: clean all run + +clean: + rm -f *.o *~ core ps_cma_es + diff --git a/example/Numerics/PS-CMA-ES/f15_cec_const.hpp b/example/Numerics/PS-CMA-ES/f15_cec_const.hpp new file mode 100644 index 0000000000000000000000000000000000000000..a0b94ab621cb893d09d7a51963a0adbb633ab7af --- /dev/null +++ b/example/Numerics/PS-CMA-ES/f15_cec_const.hpp @@ -0,0 +1,26 @@ +#ifndef F15_CEC_CONST +#define F15_CEC_CONST + +#include "Eigen/Dense" + +const double f15_const[10][100] = {3.3253000e+000,-1.2835000e+000,1.8984000e+000,-4.0950000e-001,8.8100000e-002,2.7580000e+000,9.7760000e-001,-1.8090000e+000,-2.4957000e+000,2.7367000e+000,-2.8961000e+000,-2.9413000e+000,3.2682000e+000,3.6495000e+000,5.0310000e-001,-4.2187000e+000,-7.8880000e-001,3.8384000e+000,1.3397000e+000,2.9312000e+000,-3.2869000e+000,3.6213000e+000,3.4834000e+000,-4.2260000e+000,-1.0784000e+000,-4.4730000e-001,2.7601000e+000,4.6200000e-002,4.3606000e+000,-2.0347000e+000,-3.5089000e+000,-2.2028000e+000,-1.6626000e+000,-1.7293000e+000,1.8300000e-002,-4.0478000e+000,-1.4034000e+000,-1.7866000e+000,-2.4212000e+000,-7.4340000e-001,-1.5438000e+000,-3.0272000e+000,2.6055000e+000,-3.1007000e+000,2.8010000e+000,1.4532000e+000,2.9887000e+000,1.2490000e-001,3.0169000e+000,-1.3140000e+000,3.4259000e+000,2.4992000e+000,9.7940000e-001,-1.8358000e+000,-3.8253000e+000,2.7079000e+000,1.9604000e+000,1.3197000e+000,-2.4128000e+000,-4.7550000e-001,2.1681000e+000,3.2344000e+000,3.3590000e+000,-1.4400000e-002,-1.8732000e+000,-1.9496000e+000,4.7110000e-001,8.2110000e-001,-1.9980000e-001,3.7350000e+000,-3.3720000e-001,-1.7267000e+000,-6.5950000e-001,-3.0531000e+000,-4.1052000e+000,2.3991000e+000,-3.2670000e-001,1.3279000e+000,3.7907000e+000,-1.8800000e+000,2.9056000e+000,1.8643000e+000,-5.2800000e-001,-1.9298000e+000,-8.5830000e-001,6.0570000e-001,-1.4152000e+000,4.0411000e+000,2.1519000e+000,3.6699000e+000,-1.3395000e+000,1.5661000e+000,-2.4511000e+000,-2.3292000e+000,1.2750000e-001,-3.0202000e+000,-1.0900000e+000,-3.6965000e+000,-1.1674000e+000,1.5488000e+000\ +,-2.2465000e+000,3.9382000e+000,-7.9990000e-001,-4.2205000e+000,-2.9393000e+000,-4.3433000e+000,1.6348000e+000,3.1011000e+000,-9.2690000e-001,-2.5328000e+000,1.1839000e+000,-2.9094000e+000,2.8157000e+000,-2.2581000e+000,-1.2020000e-001,4.1264000e+000,-3.5397000e+000,9.9270000e-001,-3.5492000e+000,3.5247000e+000,1.3420000e-001,-3.6413000e+000,1.8572000e+000,-3.8112000e+000,-5.1770000e-001,-4.3274000e+000,-3.9499000e+000,1.6129000e+000,-3.6689000e+000,3.7438000e+000,-2.7323000e+000,-3.7832000e+000,-8.9120000e-001,2.7046000e+000,4.3176000e+000,1.6330000e+000,-7.3310000e-001,-1.7864000e+000,7.8770000e-001,-7.4840000e-001,-4.2517000e+000,1.9060000e+000,-2.8498000e+000,-1.5533000e+000,4.6900000e-001,-2.4370000e+000,-1.4181000e+000,2.5617000e+000,2.5139000e+000,1.4288000e+000,-4.4704000e+000,1.7249000e+000,-3.3660000e+000,-3.3114000e+000,3.7927000e+000,4.3542000e+000,-4.1577000e+000,-2.0797000e+000,-4.1467000e+000,9.2950000e-001,-3.0638000e+000,3.2160000e-001,-3.3978000e+000,-1.0110000e+000,2.9571000e+000,-2.8415000e+000,3.9917000e+000,-8.3070000e-001,-2.4358000e+000,-4.4600000e-001,-4.3041000e+000,-1.6925000e+000,2.2654000e+000,3.4818000e+000,4.1474000e+000,-2.9950000e-001,-3.9534000e+000,4.3495000e+000,-1.9537000e+000,4.3235000e+000,1.8070000e+000,1.8991000e+000,4.3600000e-001,-2.7083000e+000,-2.8008000e+000,-3.8717000e+000,-2.8596000e+000,3.3523000e+000,1.0176000e+000,3.0750000e-001,1.0646000e+000,-2.1500000e+000,-2.8102000e+000,-4.4798000e+000,5.4910000e-001,-3.5628000e+000,-2.9116000e+000,4.2015000e+000,-4.0054000e+000,-1.7860000e+000\ +,1.7378000e+000,-4.4943000e+000,3.1910000e-001,-3.2414000e+000,1.2388000e+000,2.4878000e+000,-4.1218000e+000,-2.6083000e+000,-2.8100000e-001,3.4336000e+000,-9.1790000e-001,1.8598000e+000,3.0641000e+000,-3.6210000e-001,5.5760000e-001,-2.4441000e+000,-2.2366000e+000,-5.2660000e-001,4.3657000e+000,1.9230000e+000,4.1786000e+000,-4.0957000e+000,-3.1427000e+000,3.2389000e+000,1.5794000e+000,-4.2122000e+000,3.6131000e+000,-3.7048000e+000,-1.1209000e+000,-1.3074000e+000,3.3170000e-001,-7.8830000e-001,-4.1121000e+000,3.4491000e+000,8.0040000e-001,4.6400000e-001,-2.3190000e+000,-1.5837000e+000,2.6929000e+000,-1.0506000e+000,2.1719000e+000,-1.4665000e+000,2.9197000e+000,1.1862000e+000,2.7782000e+000,-1.8744000e+000,3.7281000e+000,-1.9880000e+000,1.7553000e+000,-2.2000000e-003,1.8093000e+000,-1.8750000e-001,1.0918000e+000,1.4909000e+000,-3.6830000e-001,-2.6889000e+000,3.4836000e+000,-3.8560000e+000,2.1800000e+000,2.3080000e-001,-3.9911000e+000,1.7939000e+000,2.1553000e+000,-6.7330000e-001,3.1100000e-002,3.1810000e-001,1.1508000e+000,4.0126000e+000,3.3660000e-001,1.6810000e-001,-2.7874000e+000,-2.8053000e+000,2.0216000e+000,-1.2160000e+000,-1.0253000e+000,-4.3512000e+000,1.0470000e+000,-4.2517000e+000,1.1389000e+000,1.9587000e+000,-2.1917000e+000,-3.0772000e+000,1.7597000e+000,-1.8843000e+000,6.5440000e-001,-3.1522000e+000,-2.9200000e+000,3.8666000e+000,-3.8922000e+000,8.7910000e-001,-3.5768000e+000,-8.5390000e-001,2.5625000e+000,4.2585000e+000,-4.4818000e+000,-3.8622000e+000,3.3085000e+000,-4.3596000e+000,8.1780000e-001,9.1200000e-001\ +,-1.5504000e+000,-4.3339000e+000,8.9780000e-001,-1.3839000e+000,4.2338000e+000,2.6283000e+000,-8.5900000e-001,-1.8942000e+000,1.7407000e+000,-1.2537000e+000,1.3161000e+000,-3.1738000e+000,2.8559000e+000,5.5400000e-002,4.4044000e+000,-9.9420000e-001,-4.2987000e+000,3.1463000e+000,2.7438000e+000,5.7790000e-001,-1.4390000e-001,1.2750000e-001,6.6570000e-001,-4.2310000e+000,-9.9960000e-001,-5.8970000e-001,-4.5240000e-001,1.2225000e+000,-2.2496000e+000,-8.3210000e-001,3.0274000e+000,-2.5083000e+000,-2.9410000e-001,4.0227000e+000,5.5750000e-001,-1.7082000e+000,-3.4266000e+000,-1.3186000e+000,-4.4435000e+000,3.8150000e+000,-2.4026000e+000,-1.9805000e+000,-3.9941000e+000,1.7521000e+000,-2.4486000e+000,-3.5640000e+000,2.8962000e+000,-4.0694000e+000,-2.1251000e+000,-6.6470000e-001,8.6770000e-001,1.6697000e+000,-3.1888000e+000,2.2569000e+000,-4.0996000e+000,-2.2851000e+000,3.3932000e+000,-2.1120000e+000,-4.2880000e-001,-2.5799000e+000,1.4335000e+000,1.8559000e+000,3.8649000e+000,-2.8086000e+000,-4.2600000e+000,3.8483000e+000,-1.1864000e+000,3.9864000e+000,3.1897000e+000,-3.5196000e+000,5.1700000e-002,-1.6459000e+000,6.2250000e-001,1.6034000e+000,-2.5510000e-001,2.3951000e+000,-2.9564000e+000,3.4064000e+000,-2.6920000e+000,-2.0972000e+000,-4.2272000e+000,-4.2909000e+000,-1.9785000e+000,2.6433000e+000,-2.4360000e+000,-3.6061000e+000,-3.3946000e+000,1.2882000e+000,1.3625000e+000,-2.2262000e+000,-5.2730000e-001,2.1000000e-002,-2.2951000e+000,3.5702000e+000,2.3550000e+000,-3.5396000e+000,4.3945000e+000,-7.5000000e-003,1.5150000e+000,-1.8038000e+000\ +,-2.7358000e+000,4.8530000e-001,2.8932000e+000,1.7750000e-001,1.5936000e+000,2.3591000e+000,3.4123000e+000,-3.1076000e+000,3.8696000e+000,2.3145000e+000,-3.7018000e+000,-4.3772000e+000,-2.1313000e+000,-1.1746000e+000,2.2956000e+000,1.3502000e+000,3.2284000e+000,1.6749000e+000,-4.3424000e+000,-1.0206000e+000,-4.3462000e+000,-1.7030000e+000,1.6973000e+000,-1.5694000e+000,-3.0295000e+000,4.1132000e+000,-2.1307000e+000,-3.9905000e+000,-4.0260000e-001,-1.6620000e+000,-2.4780000e-001,-3.2789000e+000,-1.6622000e+000,-1.2509000e+000,3.8842000e+000,-1.5414000e+000,3.8323000e+000,3.8892000e+000,9.1310000e-001,1.0501000e+000,9.8150000e-001,-2.0173000e+000,-1.4322000e+000,-2.4151000e+000,1.0046000e+000,-3.6765000e+000,-1.1788000e+000,2.7020000e-001,-2.8760000e-001,-9.5390000e-001,2.1957000e+000,-1.2470000e-001,-4.1052000e+000,3.5400000e+000,-4.3878000e+000,2.9056000e+000,4.2666000e+000,3.9671000e+000,3.5829000e+000,3.9816000e+000,-1.4905000e+000,-1.0256000e+000,2.7029000e+000,2.6330000e-001,-1.1798000e+000,2.3091000e+000,-2.5930000e-001,-7.0060000e-001,4.3161000e+000,3.7458000e+000,4.0370000e-001,-2.0975000e+000,4.3694000e+000,2.4717000e+000,4.1478000e+000,-3.5657000e+000,-1.0330000e-001,3.7534000e+000,1.9894000e+000,3.4790000e+000,-1.8541000e+000,3.8470000e+000,1.1231000e+000,4.4799000e+000,-3.9805000e+000,1.8823000e+000,-1.0147000e+000,4.1805000e+000,4.3017000e+000,1.4542000e+000,2.0347000e+000,2.7745000e+000,-1.6727000e+000,3.5495000e+000,-1.1870000e-001,-3.1237000e+000,-3.2825000e+000,-2.3430000e-001,1.4084000e+000,2.5355000e+000\ +,-1.8717000e+000,1.8285000e+000,-3.7085000e+000,2.1660000e+000,2.7980000e-001,-2.5635000e+000,-2.1321000e+000,-2.9868000e+000,-2.5391000e+000,-1.3354000e+000,-2.7000000e-001,-3.0712000e+000,-1.7479000e+000,4.2000000e-002,-3.8396000e+000,-3.3518000e+000,3.2361000e+000,3.9031000e+000,-3.8211000e+000,-9.0720000e-001,-4.2347000e+000,-3.2265000e+000,-1.3196000e+000,-4.0690000e-001,2.2933000e+000,-1.8828000e+000,-1.7421000e+000,-1.3624000e+000,2.2034000e+000,-1.5554000e+000,3.9148000e+000,3.0060000e+000,-2.7808000e+000,3.1430000e+000,-4.3288000e+000,3.1607000e+000,2.9885000e+000,3.4770000e+000,-3.2540000e+000,3.5659000e+000,-2.3289000e+000,-6.3320000e-001,-1.3463000e+000,1.4149000e+000,-3.1376000e+000,2.8234000e+000,-1.7904000e+000,1.4993000e+000,-2.8887000e+000,2.1925000e+000,-2.6683000e+000,-4.2745000e+000,3.9368000e+000,3.6760000e+000,3.2610000e-001,1.3481000e+000,-2.0259000e+000,-3.5200000e-001,4.5020000e-001,1.2222000e+000,-1.7436000e+000,3.0571000e+000,1.5054000e+000,5.9900000e-001,-3.7241000e+000,3.8567000e+000,-1.6305000e+000,-1.7861000e+000,-4.2061000e+000,3.1688000e+000,-2.5142000e+000,1.4365000e+000,-4.2584000e+000,-2.9095000e+000,-2.7201000e+000,-3.9780000e+000,-3.8197000e+000,3.2116000e+000,-2.0274000e+000,3.3790000e+000,2.0176000e+000,3.7376000e+000,-1.7626000e+000,3.1218000e+000,1.6559000e+000,3.3415000e+000,1.5242000e+000,1.7250000e+000,2.0883000e+000,3.4093000e+000,-1.7778000e+000,-2.4508000e+000,-1.7771000e+000,2.9953000e+000,-9.4000000e-003,-1.5103000e+000,4.3057000e+000,-1.2379000e+000,2.5305000e+000,-5.9280000e-001\ +,-3.9243000e+000,-2.7541000e+000,-2.9730000e-001,1.3850000e-001,-1.8557000e+000,-2.6559000e+000,-3.6582000e+000,-7.3010000e-001,-1.1095000e+000,3.3570000e+000,3.0578000e+000,-9.7650000e-001,3.6632000e+000,4.0256000e+000,-3.2321000e+000,-1.0683000e+000,-2.4430000e-001,-2.6259000e+000,-3.6503000e+000,-2.2150000e-001,3.5501000e+000,1.5428000e+000,-2.3871000e+000,3.3152000e+000,-3.1084000e+000,-6.1890000e-001,1.6179000e+000,-4.4262000e+000,3.7020000e-001,2.6440000e+000,-2.1320000e-001,-3.0420000e+000,3.9154000e+000,1.1186000e+000,-4.2283000e+000,-6.9900000e-002,-2.6670000e+000,-2.9021000e+000,8.3800000e-002,5.8690000e-001,4.4932000e+000,-2.5687000e+000,2.3839000e+000,-5.0720000e-001,-2.3617000e+000,-2.2267000e+000,-1.8803000e+000,4.0205000e+000,-4.3917000e+000,-1.3007000e+000,2.2299000e+000,-3.8293000e+000,-2.6600000e-002,-4.1760000e-001,3.4822000e+000,-4.3770000e+000,1.7739000e+000,-3.0710000e+000,-1.9767000e+000,2.6253000e+000,4.2554000e+000,-2.3100000e-002,9.2870000e-001,1.5838000e+000,-3.7755000e+000,-1.7040000e-001,-3.5334000e+000,1.5598000e+000,1.9987000e+000,8.3790000e-001,-4.3789000e+000,-9.0710000e-001,-2.3975000e+000,-3.0816000e+000,-2.6495000e+000,-1.1391000e+000,1.1763000e+000,-4.3940000e-001,-2.1198000e+000,-1.2030000e-001,3.7726000e+000,2.3534000e+000,-3.5806000e+000,4.4012000e+000,-2.5145000e+000,-3.7623000e+000,-2.4466000e+000,-1.0585000e+000,-3.6637000e+000,-3.1811000e+000,-2.3190000e-001,-3.0277000e+000,4.3686000e+000,-3.8050000e-001,5.2390000e-001,-3.1020000e+000,3.6013000e+000,-3.2767000e+000,8.7490000e-001,3.7456000e+000\ +,-4.4362000e+000,-1.0142000e+000,2.6180000e-001,2.5649000e+000,9.5110000e-001,-1.2742000e+000,-2.0990000e+000,2.0800000e-001,-2.2216000e+000,1.9331000e+000,-3.0628000e+000,2.5846000e+000,-3.4871000e+000,-1.4314000e+000,2.9744000e+000,-1.6853000e+000,1.7319000e+000,1.1900000e-002,-2.2333000e+000,-3.6649000e+000,6.9610000e-001,-1.3300000e+000,-4.0811000e+000,2.3000000e-001,1.0150000e+000,2.4349000e+000,2.6735000e+000,-4.0300000e+000,6.6320000e-001,-4.3627000e+000,3.9411000e+000,-2.8084000e+000,-1.2894000e+000,3.2805000e+000,8.7000000e-002,7.1320000e-001,1.8993000e+000,2.5630000e+000,-3.2035000e+000,1.8894000e+000,-1.0149000e+000,4.3570000e+000,2.6677000e+000,-3.4265000e+000,-2.6617000e+000,1.4306000e+000,-3.2000000e-001,3.4067000e+000,-6.8620000e-001,-2.6620000e-001,-2.2974000e+000,6.6110000e-001,-1.1390000e-001,-2.7930000e-001,-4.0534000e+000,-2.9895000e+000,2.4600000e+000,4.0680000e+000,6.1130000e-001,-2.1785000e+000,3.5749000e+000,4.2679000e+000,7.2050000e-001,-3.4930000e-001,-2.4683000e+000,2.8913000e+000,-2.7926000e+000,-4.4259000e+000,1.6203000e+000,-7.0780000e-001,6.6900000e-002,3.3265000e+000,-8.9650000e-001,3.8144000e+000,1.8644000e+000,-3.8740000e+000,-4.4329000e+000,-2.7533000e+000,1.5199000e+000,2.4951000e+000,-4.0784000e+000,-1.5782000e+000,2.4547000e+000,1.3106000e+000,-1.6517000e+000,-2.7385000e+000,-1.8217000e+000,2.5841000e+000,3.5956000e+000,-4.4051000e+000,1.9856000e+000,-1.0218000e+000,1.1335000e+000,3.2501000e+000,-1.1250000e-001,-3.3356000e+000,7.3370000e-001,8.1000000e-001,-3.4267000e+000,-1.7573000e+000\ +,-1.4198000e+000,3.6078000e+000,2.0771000e+000,2.7252000e+000,-3.3609000e+000,-2.0665000e+000,-3.8042000e+000,-3.9882000e+000,1.9212000e+000,2.7170000e+000,-3.6992000e+000,3.0584000e+000,3.1365000e+000,2.5994000e+000,-2.2861000e+000,3.2691000e+000,-4.3166000e+000,1.5112000e+000,-3.4301000e+000,-3.1168000e+000,-4.8610000e-001,-2.1520000e-001,-4.4595000e+000,-4.4585000e+000,2.7490000e+000,-3.6245000e+000,-4.0940000e+000,3.0953000e+000,1.8911000e+000,3.3585000e+000,1.4740000e-001,5.5140000e-001,-4.1774000e+000,2.6014000e+000,-1.8385000e+000,-3.6295000e+000,-2.1696000e+000,-8.1170000e-001,2.7015000e+000,-3.0258000e+000,-3.0333000e+000,-8.4910000e-001,-2.6945000e+000,2.5817000e+000,-3.9572000e+000,1.5827000e+000,4.4099000e+000,-1.1827000e+000,2.0778000e+000,-4.1023000e+000,-8.0790000e-001,-4.3809000e+000,-3.5968000e+000,3.3155000e+000,-2.2298000e+000,3.5023000e+000,-1.4810000e-001,1.9668000e+000,-2.9158000e+000,2.4857000e+000,4.1099000e+000,-4.1160000e+000,-1.6791000e+000,-3.2401000e+000,1.3775000e+000,9.0580000e-001,3.7376000e+000,2.2668000e+000,3.8236000e+000,-9.9970000e-001,1.4778000e+000,1.6980000e-001,-1.4464000e+000,7.0470000e-001,-2.8071000e+000,-8.7970000e-001,-1.8638000e+000,1.7548000e+000,-1.9996000e+000,1.5675000e+000,2.9245000e+000,-1.1090000e-001,-2.0504000e+000,-1.2071000e+000,-2.4366000e+000,-3.8301000e+000,4.1004000e+000,-4.1967000e+000,-9.3300000e-002,-1.1239000e+000,2.9965000e+000,9.2280000e-001,-1.4416000e+000,-9.5000000e-003,-3.3145000e+000,3.4467000e+000,-9.8670000e-001,2.5468000e+000,3.5220000e-001,3.4812000e+000\ +,-2.4369000e+000,-2.3015000e+000,1.1539000e+000,1.5121000e+000,-2.4170000e-001,2.0860000e-001,-1.4078000e+000,3.2117000e+000,1.6180000e-001,-2.6028000e+000,-5.2800000e-001,-1.7132000e+000,-4.4610000e-001,-3.6180000e+000,2.4411000e+000,1.5120000e+000,6.9000000e-002,-1.1301000e+000,-1.9017000e+000,-2.7420000e+000,-2.6130000e+000,-3.4728000e+000,2.7522000e+000,4.2066000e+000,-6.1710000e-001,-2.9867000e+000,3.4816000e+000,3.3065000e+000,-3.1555000e+000,2.4976000e+000,1.4455000e+000,-1.9139000e+000,-2.4110000e+000,-3.8611000e+000,4.4870000e+000,3.3597000e+000,1.9467000e+000,-3.0049000e+000,2.6107000e+000,3.7599000e+000,-2.3608000e+000,-4.4512000e+000,-2.7817000e+000,3.6587000e+000,-4.3400000e-002,1.3095000e+000,-4.2113000e+000,1.0998000e+000,1.5087000e+000,1.8103000e+000,2.7342000e+000,1.6980000e-001,4.1891000e+000,-4.2409000e+000,4.1945000e+000,-1.4423000e+000,2.0603000e+000,3.5687000e+000,2.2500000e-001,2.9241000e+000,3.2211000e+000,2.0296000e+000,2.3792000e+000,2.8299000e+000,1.1196000e+000,3.5512000e+000,-3.7236000e+000,4.1712000e+000,-7.0130000e-001,-3.1732000e+000,3.7700000e-002,4.3992000e+000,-1.0487000e+000,-3.2646000e+000,4.1234000e+000,-1.2268000e+000,3.0004000e+000,2.2522000e+000,-1.4002000e+000,2.0008000e+000,2.7492000e+000,3.2190000e-001,2.3040000e-001,1.9087000e+000,6.6720000e-001,9.4710000e-001,-1.3234000e+000,-1.1531000e+000,1.6404000e+000,3.7113000e+000,1.1623000e+000,2.8554000e+000,-2.2389000e+000,-1.4650000e+000,-3.6592000e+000,-7.3410000e-001,-2.8758000e+000,2.2515000e+000,1.5780000e+000,-3.7634000e+000}; + +const double f15_cec_bias = 120; + +const double job_lambda[10] = {1.0,1.0,10.0,10.0,5.0/60.0,5.0/60.0,5.0/32.0,5.0/32.0,5.0/100.0,5.0/100.0}; + +Eigen::VectorXd f15_o[10]; + +double f15_max[10]; +double bias[10] = {0,100,200,300,400,500,600,700,800,900}; + +#endif diff --git a/example/Numerics/PS-CMA-ES/f15_cec_fun.hpp b/example/Numerics/PS-CMA-ES/f15_cec_fun.hpp new file mode 100644 index 0000000000000000000000000000000000000000..5cd0578b9374ead42c1864cb892fe95fa38b5b2e --- /dev/null +++ b/example/Numerics/PS-CMA-ES/f15_cec_fun.hpp @@ -0,0 +1,189 @@ +/* + * f15_cec_fun.hpp + * + * Created on: Jan 14, 2018 + * Author: i-bird + */ + +#ifndef EXAMPLE_NUMERICS_PS_CMA_ES_F15_CEC_FUN_HPP_ +#define EXAMPLE_NUMERICS_PS_CMA_ES_F15_CEC_FUN_HPP_ + +#include "f15_cec_const.hpp" +#include <limits> +#include <math.h> + +template<unsigned int dim> +void Job15(int funcnr,Eigen::VectorXd & vars,double & res) +{ + // local used vars + double sum,sum1,sum2,prod,e1,e2; + int i,j,k; + // weierstrass vars + int Kmax = 20; + const double a_c = 0.5; + const double b_c = 3.0; + + + if (funcnr < 2) + { + // rastrigin + sum = 10.0 * dim; + for (size_t i = 0 ; i < dim ; i++) + { + sum += vars(i)*vars(i); + sum -= 10.0*cos(2*M_PI*vars[i]); + } + + res = sum; + } + else if (funcnr < 4) + { + // weierstrass + sum1 = 0.0; + sum2 = 0.0; + double a_k = 1.0; + double b_k = 1.0; + for (size_t i = 0 ; i < dim ; i++) + { + a_k = 1.0; + b_k = 1.0; + for (size_t j = 0 ; j <= Kmax ; j++, a_k *= a_c,b_k *= b_c) + { + sum1 = sum1 + a_k * cos((M_PI)*2.0 * b_k * (vars(i)+0.5)); + } + } + a_k = 1.0; + b_k = 1.0; + for (size_t j = 0 ; j <= Kmax ; j++, a_k *= a_c, b_k *= b_c) + { + sum2 = sum2 + a_k * cos((M_PI)*2.0 * b_k * (0.5)); + } + res = sum1 - sum2*dim; + } + else if (funcnr < 6) + { + // griewank + prod = 1; + sum = 0.0; + for (size_t i = 1 ; i <= dim ; i++) + { + sum= sum + (vars(i-1)*vars(i-1))/4000.0; + prod=prod * cos(vars(i-1)/(sqrt(double(i)))); + } + res = sum-prod+1; + } + else if (funcnr < 8) + { + // ackley + e1 = 0.0; + e2 = 0.0; + for (size_t i = 0 ; i < dim ; i++) + { + e1 = e1 + vars(i)*vars(i); + e2 = e2 + cos(2.0*M_PI*vars(i)); + } + res = exp(1.0) + 20.0 - 20*exp(-0.2*sqrt(e1/dim)); + res = res - exp(e2/dim); + } + else if (funcnr <= 10) + { + // sphere + sum = vars.transpose() * vars; + res = sum; + } +} + +template<unsigned int dim> +double hybrid_composition(Eigen::VectorXd & vars) +{ + double ZBQLNOR; + + //local used vars + double wMax,sumSqr,wSum,w1mMaxPow; + int i,j,k; + double sumF,t_res; + Eigen::VectorXd job_z[10]; + + for (size_t i = 0 ; i < 10 ; i++) + {job_z[i].resize(dim);} + + double job_w[10]; + double res = 0.0; + + for (size_t i = 0 ; i < dim ; i++) + { + if (vars[i] < -5.0 || vars[i] > 5.0) + {return std::numeric_limits<double>::infinity();} + } + + // get the raw weights + wMax = - std::numeric_limits<double>::max(); + for (size_t i = 0; i < 10 ; i++) + { + sumSqr = 0.0; + //Shift the Input + job_z[i] = vars - f15_o[i]; + sumSqr += (job_z[i].transpose() * job_z[i]); + + job_w[i] = exp(-1.0 * sumSqr / (2.0 * dim)); + + if (wMax < job_w[i]) + {wMax = job_w[i];} + } + + // Modify the weights + wSum = 0.0; + + w1mMaxPow = 1.0 - wMax*wMax*wMax*wMax*wMax*wMax*wMax*wMax*wMax*wMax; + for (size_t i = 0; i < 10 ; i++) + { + if (job_w[i] != wMax) + {job_w[i] = job_w[i]* w1mMaxPow;}; + + wSum = wSum + job_w[i]; + } + + // Normalize the weights + for (size_t i = 0; i < 10 ; i++) + {job_w[i] /= wSum;} + + sumF = 0.0; + + for (size_t i = 0; i < 10 ; i++) + { + job_z[i] = job_z[i] / job_lambda[i]; + + //calling the basic functions + + Job15<dim>(i,job_z[i],t_res); + + sumF = sumF + job_w[i] * (2000.0*t_res/f15_max[i] + bias[i]); + } + + res = sumF + 120; + + return res; +} + +template<unsigned int dim> +void prepare_f15() +{ + // load f15_o + for (size_t j = 0 ; j < 10 ; j++) + { + Eigen::VectorXd fmp(dim); + f15_o[j].resize(dim); + for (size_t i = 0 ; i < dim ; i++) + { + f15_o[j](i) = f15_const[j][i]; + fmp(i) = 5.0 / job_lambda[j]; + } + + double result; + Job15<dim>(j,fmp,result); + + f15_max[j] = fabs(result); + } +} + +#endif /* EXAMPLE_NUMERICS_PS_CMA_ES_F15_CEC_FUN_HPP_ */ diff --git a/example/Numerics/PS-CMA-ES/main.cpp b/example/Numerics/PS-CMA-ES/main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9b5fca8efe1f748fd8460cc31007f0b0d72fe939 --- /dev/null +++ b/example/Numerics/PS-CMA-ES/main.cpp @@ -0,0 +1,1169 @@ +/*! + * + * \page PS_CMA_ES Particle swarm CMA-ES Evolution strategy + * + * + * [TOC] + * + * # Optimization {#Opti_cma_es} + * + * + * In this example we show how to code PS-CMA-ES. This is just a simple variation to the + * CMA-ES, where you have multiple CMA-ES running. The the best solution across them is + * used to produce a drift velocity toward that point. + * + * ## Introduction {#ps_cme_es} + * + * In this example we try to find the global optimum of a function. In particular we are + * using the function F15 from the CEC 2005 benchmark test, to validate that PS-CMA-ES work. + * This example contain multiple files: + * + * * f15_cec_const.hpp definitions of constants for the F15 function + * * f15_cec_fun.hpp the function itself + * + * The function is quite complicated and for reference please refere to the function + * F15 "Hybrid Composition" in the CEC 2005 test. The function can be called with + * hybrid_composition<dim>(x) where dim is the dimensionality and x is the point + * where is evaluated the function. The dimensionality can go from 1 to 50. + * + * Considering to have a function \f$ f \f$ from \f$ \mathbb{R}^{dim} \f$ to \f$ \mathbb{R} \f$, + * the algorithm use a set of particles to find in parallel the global optimum of a function. + * The algorithm rather than try to find the global optimum + * sampling point randomly in the space, it uses a set of particles each of them having a gaussian + * sampling distribution \f$ e^{\sigma \cdot x^tCx} \f$ with C a \f$ dim \cdot dim \f$ matrix. + * At each step for each particle p **lambda** points are sampled using the sampling + * distribution centered on the particle position. The covariant matrix and sigma are is subsequently + * adjusted to favor sampling around the best sampled points. In order to do this the algorithm + * need the eigen-value decomposition of \f$ C = B^{t}DB \f$ where \f$ D \f$ is a diagonal + * Matrix and \f$ B \f$ is the Matrix of the Eigen-vector. In order to reduce or increase + * the sampling area the sigma is instead used. The algorithm use the vector **path_s** to + * detect stagnation of the particle movement, and use **path_c**(a transfomed version of **path_s**) + * to refine the sampling covariant matrix from the fact that the particle is "moving" toward that + * direction. PS-CMA-ES is just a variation in which every **N_pos** CMA-Es steps the CMA-ES is + * sampling distribution and position is biased toward the best founded point across all independent + * CMA-ES. + * + * Explain the CMA-ES algorithm in detail is out of the purpose of this tutorial example. + * We will briefly go across the main step. For a full reference of the CMA-ES + * algoritm please refers to <a href="https://arxiv.org/abs/1604.00772">this paper</a>. + * While for PS-CMA-ES refers to <a href="http://mosaic.mpi-cbg.de/docs/Mueller2009a.pdf">this paper</a>. + * + * + * ## Inclusions {#inclusions_and_constants} + * + * In this example we use a set of particles so we will use **vector_dist**, we will use + * Eigen dense matrix. Because the standard dense matrix are not compatible with + * the vector_dist we will use **EMatrix** that are simple wrapper to Eigen::Matrix + * but compatible with vector_dist. Because EMatrix are compatible with all the + * Eigen value functions we can use them in all Eigen functions. For CMA-ES algorithm + * we also need Eigen-value eigen-vector decomposition and Jacobi-Rotation for the + * particle-swarm part. + * + * \snippet Numerics/PS-CMA-ES/main.cpp ps_cma_es_inclusion + * + * PS-CMA-ES require several properties to be stored on the particles, some has been already + * explained. Here we explain the others. + * + * * **Zeta** contain the lambda sampled points (before apply the covariant matrix tranformation) + * so it contain points samples on a gaussian of sigma=1 centered in zero + * + * * **ord** Contain the sequrnce if we want the lambda generated points in order from the best to + * the worst + * + * * **stop** If a flag that indicate that the CMA-ES reached some stop criteria + * + * * **fithist** It contain historical information about the particles to penalize them in case the + * go out of boundary. It is 1:1 taken from cmaes.m (production version) + * <a href="https://www.lri.fr/~hansen/cmaes_inmatlab.html">this paper</a> (or Google it) + * + * * **weight** Same concept of fithist other information to penalize particles going out of + * the boundary + * + * * **validfit** Same concept of fithist other information to penalize particles going out of + * the boundary + * + * * **xold** It contain the previous position of the particles used in several calculations + * + * * **last_restart** CMA-ES The CMA-ES sigma become very small the CMA-ES converged. At this point + * we can do two things, one is to stop the CMA-ES, the other is to restart-it to explore + * better the space. In case it restart. this parameter indicate at which iteration happen the + * last restart. + * + * * **iniphase** Same concept of fithist other information to penalize particles going out of + * the boundary + * + * * **xmean_st** This contain the new position of the particle it will be stored as particle position + * at the end of the CMA-ES step + * + * * **xmean_st** This contain the new position of the particle in a space where we do not apply the + * covariant transformation. (In practice is a weighted sum of the Zeta samples points) + * + * \snippet Numerics/PS-CMA-ES/main.cpp def_part_set + * + * ## Parameters {#ps_cma_par} + * + * CMA-ES and further PS-CMA-ES require some parameters in order to work. refers to the + * papers above to have a full explanation, but here is a short one + * + * * **dim** Dimensionality of the test function + * + * * **lambda** number of sample points taken at each iteration by CMA-ES + * suggested to use \f$ 4+floor(3*log(dim)) \f$ + * + * * **mu** only mu best points are considered to adapt the Covariant matrix + * + * * **psoWeight** How much the pso step bias the particle positions + * + * * **N_pso** Number of CMA-ES step before do a PSO step (200 give the possibility + * to the CMA-ES to explore the neighborhood and narrow down at least a funnel) + * + * * **stopTolX** stop criteria for CMA-ES. When the the sampling area is small enough + * stop + * + * * **StopToUpX** stop criteria is the sampling area become too big + * + * * **restart_cma** If the CMA-ES reach a stop criteria reinitialize and restart + * + * * **hist_size** size of the array fit_hist (default should be mainly fine) + * + */ + +//! [ps_cma_es_inclusion] + +#define EIGEN_USE_LAPACKE +#include "Vector/vector_dist.hpp" +#include "DMatrix/EMatrix.hpp" +#include <Eigen/Eigenvalues> +#include <Eigen/Jacobi> +#include <limits> +#include "Vector/vector_dist.hpp" +#include <f15_cec_fun.hpp> +#include <boost/math/special_functions/sign.hpp> + +//! [ps_cma_es_inclusion] + +//! [parameters] + +// PARAMETERS +constexpr int dim = 10; +// when you set dim set also lambda to to 4+std::floor(3*log(dim)) +constexpr int lambda = 7; +constexpr int mu = lambda/2; +double psoWeight = 0.7; +// number of cma-step before pso step +int N_pso = 200; +double stopTolX = 2e-11; +double stopTolUpX = 2000.0; +int restart_cma = 1; +size_t max_fun_eval = 30000000; +constexpr int hist_size = 21; + +// Convenient global variables (Their value is set after) +double mu_eff = 1.0; +double cs = 1.0; +double cc = 1.0; +double ccov = 1.0; +double chiN; +double d_amps = 1.0; +double stop_fitness = 1.0; +int eigeneval = 0; +double t_c = 0.1; +double b = 0.1; + +//! [parameters] + +//! [def_part_set] + +//////////// definitions of the particle set + +constexpr int sigma = 0; +constexpr int Cov_m = 1; +constexpr int B = 2; +constexpr int D = 3; +constexpr int Zeta = 4; +constexpr int path_s = 5; +constexpr int path_c = 6; +constexpr int ord = 7; +constexpr int stop = 8; +constexpr int fithist = 9; +constexpr int weight = 10; +constexpr int validfit = 11; +constexpr int xold = 12; +constexpr int last_restart = 13; +constexpr int iniphase = 14; +constexpr int xmean_st = 15; +constexpr int meanz_st = 16; + +typedef vector_dist<dim,double, aggregate<double, + Eigen::MatrixXd, + Eigen::MatrixXd, + Eigen::DiagonalMatrix<double,Eigen::Dynamic>, + Eigen::VectorXd[lambda], + Eigen::VectorXd, + Eigen::VectorXd, + int[lambda], + int, + double [hist_size], + double [dim], + double, + Eigen::VectorXd, + int, + bool, + Eigen::VectorXd, + Eigen::VectorXd> > particle_type; + +//! [def_part_set] + + +double generateGaussianNoise(double mu, double sigma) +{ + static const double epsilon = std::numeric_limits<double>::min(); + static const double two_pi = 2.0*3.14159265358979323846; + + thread_local double z1; + thread_local double generate; + generate = !generate; + + if (!generate) + {return z1 * sigma + mu;} + + double u1, u2; + do + { + u1 = rand() * (1.0 / RAND_MAX); + u2 = rand() * (1.0 / RAND_MAX); + } + while ( u1 <= epsilon ); + + double z0; + z0 = sqrt(-2.0 * log(u2)) * cos(two_pi * u1); + z1 = sqrt(-2.0 * log(u2)) * sin(two_pi * u1); + return z0 * sigma + mu; +} + +template<unsigned int dim> +Eigen::VectorXd generateGaussianVector() +{ + Eigen::VectorXd tmp; + tmp.resize(dim); + + for (size_t i = 0 ; i < dim ; i++) + { + tmp(i) = generateGaussianNoise(0,1); + } + + return tmp; +} + +template<unsigned int dim> +void fill_vector(double (& f)[dim], Eigen::VectorXd & ev) +{ + for (size_t i = 0 ; i < dim ; i++) + {ev(i) = f[i];} +} + +void fill_vector(const double * f, Eigen::VectorXd & ev) +{ + for (size_t i = 0 ; i < ev.size() ; i++) + {ev(i) = f[i];} +} + +struct fun_index +{ + double f; + int id; + + bool operator<(const fun_index & tmp) + { + return f < tmp.f; + } +}; + +double wm[mu]; + +void init_weight() +{ + for (size_t i = 0 ; i < mu ; i++) + {wm[i] = log(double(mu)+1.0) - log(double(i)+1.0);} + + double tot = 0.0; + + for (size_t i = 0 ; i < mu ; i++) + {tot += wm[i];} + + double sum = 0.0; + double sum2 = 0.0; + + for (size_t i = 0 ; i < mu ; i++) + { + wm[i] /= tot; + sum += wm[i]; + sum2 += wm[i]*wm[i]; + } + + // also set mu_eff + mu_eff=sum*sum/sum2; + +} + +double weight_sample(int i) +{ + return wm[i]; +} + + +void create_rotmat(Eigen::VectorXd & S,Eigen::VectorXd & T, Eigen::MatrixXd & R) +{ + Eigen::VectorXd S_work(dim); + Eigen::VectorXd T_work(dim); + Eigen::VectorXd S_sup(dim); + Eigen::VectorXd T_sup(dim); + + Eigen::MatrixXd R_tar(dim,dim); + Eigen::MatrixXd R_tmp(dim,dim); + Eigen::MatrixXd R_sup(dim,dim); + double G_S,G_C; + Eigen::MatrixXd S_tmp(2,2); + Eigen::MatrixXd T_tmp(2,2); + int p,q,i; + + S_work = S; + T_work = T; + + R.setIdentity(); + R_tar = R; + R_tmp = R; + + for (p = dim - 2; p >= 0 ; p -= 1) + { + + for (q = dim - 1 ; q >= p+1 ; q-= 1) + { + T_tmp(0) = T_work(p); + T_tmp(1) = T_work(q); + S_tmp(0) = S_work(p); + S_tmp(1) = S_work(q); + + // Perform Givens Rotation on start vector + + Eigen::JacobiRotation<double> G; + double z; + G.makeGivens(S_tmp(0), S_tmp(1),&z); + + // Check direction of rotation + double sign = 1.0; + if (z < 0.0) + {sign = -1.0;} + + // Build a Rotation Matrix out of G_C and G_S + R_tmp.setIdentity(); + R_tmp(p,p) = sign*G.c(); + R_tmp(q,q) = sign*G.c(); + R_tmp(p,q) = sign*-G.s(); + R_tmp(q,p) = sign*G.s(); + + // Rotate start vector and update R + // S_work = R_tmp*S_work + + S_work = R_tmp*S_work; + // R = R_tmp*R + R = R_tmp*R; + + // Perform Givens Rotation on target vector + + G.makeGivens(T_tmp(0), T_tmp(1),&z); + + sign = 1.0; + if (z < 0.0) + {sign = -1.0;} + + R_tmp.setIdentity(); + R_tmp(p,p) = sign*G.c(); + R_tmp(q,q) = sign*G.c(); + R_tmp(p,q) = sign*-G.s(); + R_tmp(q,p) = sign*G.s(); + + // Rotate target vector and update R_tar + + T_work = R_tmp*T_work; + R_tar = R_tmp*R_tar; + } + } + + R = R_tar.transpose()*R; + + // Check the rotation + + Eigen::VectorXd Check(dim); + Check = R*S; +} + +void updatePso(openfpm::vector<double> & best_sol, + double sigma, + Eigen::VectorXd & xmean, + Eigen::VectorXd & xold, + Eigen::MatrixXd & B, + Eigen::DiagonalMatrix<double,Eigen::Dynamic> & D, + Eigen::MatrixXd & C_pso) +{ + Eigen::VectorXd best_sol_ei(dim); + + double bias_weight = psoWeight; + fill_vector(&best_sol.get(0),best_sol_ei); + Eigen::VectorXd gb_vec = best_sol_ei-xmean; + double gb_vec_length = sqrt(gb_vec.transpose() * gb_vec); + Eigen::VectorXd b_main = B.col(dim-1); + Eigen::VectorXd bias(dim); + bias.setZero(); + + // Rotation Matrix + Eigen::MatrixXd R(dim,dim); + + if (gb_vec_length > 0.0) + { + if(sigma < gb_vec_length) + { + if(sigma/gb_vec_length <= t_c*gb_vec_length) + {bias = 0.5*gb_vec;} + else + {bias = sigma*gb_vec/gb_vec_length;} + } + else + {bias.setZero();} + } + + xmean = xmean + bias; + + if (psoWeight < 1.0) + { + Eigen::MatrixXd B_rot(dim,dim); + Eigen::DiagonalMatrix<double,Eigen::Dynamic> D_square(dim); + + Eigen::VectorXd gb_vec_old = best_sol_ei - xold; + create_rotmat(b_main,gb_vec_old,R); + for (size_t i = 0 ; i < dim ; i++) + {B_rot.col(i) = R*B.col(i);} + + for (size_t i = 0 ; i < dim ; i++) + {D_square.diagonal()[i] = D.diagonal()[i] * D.diagonal()[i];} + C_pso = B_rot * D_square * B_rot.transpose(); + + Eigen::MatrixXd trUp = C_pso.triangularView<Eigen::Upper>(); + Eigen::MatrixXd trDw = C_pso.triangularView<Eigen::StrictlyUpper>(); + C_pso = trUp + trDw.transpose(); + } +} + +void broadcast_best_solution(particle_type & vd, + openfpm::vector<double> & best_sol, + double & best, + double best_sample, + openfpm::vector<double> & best_sample_sol) +{ + best_sol.resize(dim); + auto & v_cl = create_vcluster(); + + double best_old = best_sample; + v_cl.min(best_sample); + v_cl.execute(); + + // The old solution remain the best + if (best < best_sample) + {return;} + + best = best_sample; + + size_t rank; + if (best_old == best_sample) + { + rank = v_cl.getProcessUnitID(); + + // we own the minimum and we decide who broad cast + v_cl.min(rank); + v_cl.execute(); + + if (rank == v_cl.getProcessUnitID()) + { + for (size_t i = 0 ; i < dim ; i++) + {best_sol.get(i) = best_sample_sol.get(i);} + } + } + else + { + rank = std::numeric_limits<size_t>::max(); + + // we do not own decide who broad cast + v_cl.min(rank); + v_cl.execute(); + } + + // now we broad cast the best solution across processors + + v_cl.Bcast(best_sol,rank); + v_cl.execute(); +} + +void cmaes_myprctile(openfpm::vector<fun_index> & f_obj, double (& perc)[2], double (& res)[2]) +{ + double sar[lambda]; + double availablepercentiles[lambda]; + int idx[hist_size]; + int i,k; + + for (size_t i = 0 ; i < lambda ; i++) + { + availablepercentiles[i] = 0.0; + sar[i] = f_obj.get(i).f; + } + std::sort(&sar[0],&sar[lambda]); + + for (size_t i = 0 ; i < 2 ; i++) + { + if (perc[i] <= (100.0*0.5/lambda)) + {res[i] = sar[0];} + else if (perc[i] >= (100.0*(lambda-0.5)/lambda) ) + {res[i] = sar[lambda-1];} + else + { + for (size_t j = 0 ; j < lambda ; j++) + {availablepercentiles[j] = 100.0 * ((double(j)+1.0)-0.5) / lambda;} + + for (k = 0 ; k < lambda ; k++) + {if(availablepercentiles[k] >= perc[i]) {break;}} + k-=1; + + res[i] = sar[k] + (sar[k+1]-sar[k]) * (perc[i] + -availablepercentiles[k]) / (availablepercentiles[k+1] - availablepercentiles[k]); + } + } +} + +double maxval(double (& buf)[hist_size], bool (& mask)[hist_size]) +{ + double max = 0.0; + for (size_t i = 0 ; i < hist_size ; i++) + { + if (buf[i] > max && mask[i] == true) + {max = buf[i];} + } + + return max; +} + +double minval(double (& buf)[hist_size], bool (& mask)[hist_size]) +{ + double min = std::numeric_limits<double>::max(); + for (size_t i = 0 ; i < hist_size ; i++) + { + if (buf[i] < min && mask[i] == true) + {min = buf[i];} + } + + return min; +} + +void cmaes_intobounds(Eigen::VectorXd & x, Eigen::VectorXd & xout,bool (& idx)[dim], bool & idx_any) +{ + idx_any = false; + for (size_t i = 0; i < dim ; i++) + { + if(x(i) < -5.0) + { + xout(i) = -5.0; + idx[i] = true; + idx_any = true; + } + else if (x(i) > 5.0) + { + xout(i) = 5.0; + idx[i] = true; + idx_any = true; + } + else + { + xout(i) = x(i); + idx[i] = false; + } + } +} + +void cmaes_handlebounds(openfpm::vector<fun_index> & f_obj, + double sigma, + double & validfit, + Eigen::VectorXd (& arxvalid)[lambda], + Eigen::VectorXd (& arx)[lambda], + Eigen::MatrixXd & C, + Eigen::VectorXd & xmean, + Eigen::VectorXd & xold, + double (& weight)[dim], + double (& fithist)[hist_size], + bool & iniphase, + double & validfitval, + double mu_eff, + int step, + int last_restart) +{ + double val[2]; + double value; + double diag[dim]; + double meandiag; + int i,k,maxI; + bool mask[hist_size]; + bool idx[dim]; + Eigen::VectorXd tx(dim); + int dfitidx[hist_size]; + double dfitsort[hist_size]; + double prct[2] = {25.0,75.0}; + bool idx_any; + + for (size_t i = 0 ; i < hist_size ; i++) + { + dfitsort[i] = 0.0; + dfitidx[i] = 0; + + if (fithist[i] > 0.0) + {mask[i] = true;} + else + {mask[i] = false;} + } + + for (size_t i = 0 ; i < dim ; i++) + {diag[i] = C(i,i);} + + maxI = 0; + + meandiag = C.trace()/dim; + + cmaes_myprctile(f_obj, prct, val); + value = (val[1] - val[0]) / dim / meandiag / (sigma*sigma); + + if (value >= std::numeric_limits<double>::max()) + { + auto & v_cl = create_vcluster(); + std::cout << "Process " << v_cl.rank() << " warning: Non-finite fitness range" << std::endl; + value = maxval(fithist,mask); + } + else if(value == 0.0) + { + value = minval(fithist,mask); + } + else if (validfit == 0.0) + { + for (size_t i = 0 ; i < hist_size ; i++) + {fithist[i] = -1.0;} + validfit = 1; + } + + for (size_t i = 0; i < hist_size ; i++) + { + if(fithist[i] < 0.0) + { + fithist[i] = value; + maxI = i; + break; + } + else if(i == hist_size-1) + { + for (size_t k = 0 ; k < hist_size-1 ; k++) + {fithist[k] = fithist[k+1];} + fithist[i] = value; + maxI = i; + } + } + + cmaes_intobounds(xmean,tx,idx,idx_any); + + if (iniphase) + { + if (idx_any) + { + if(maxI == 0) + {value = fithist[0];} + else + { + openfpm::vector<fun_index> fitsort(maxI+1); + for (size_t i = 0 ; i <= maxI; i++) + { + fitsort.get(i).f = fithist[i]; + fitsort.get(i).id = i; + } + + fitsort.sort(); + for (size_t k = 0; k <= maxI ; k++) + {fitsort.get(k).f = fithist[fitsort.get(k).id];} + + if ((maxI+1) % 2 == 0) + {value = (fitsort.get(maxI/2).f+fitsort.get(maxI/2+1).f)/2.0;} + else + {value = fitsort.get(maxI/2).f;} + } + for (size_t i = 0 ; i < dim ; i++) + { + diag[i] = diag[i]/meandiag; + weight[i] = 2.0002 * value / diag[i]; + } + if (validfitval == 1.0 && step-last_restart > 2) + { + iniphase = false; + } + } + } + + if(idx_any) + { + tx = xmean - tx; + for(size_t i = 0 ; i < dim ; i++) + { + idx[i] = (idx[i] && (fabs(tx(i)) > 3.0*std::max(1.0,sqrt(dim)/mu_eff) * sigma * sqrt(diag[i]))); + idx[i] = (idx[i] && (std::copysign(1.0,tx(i)) == std::copysign(1.0,(xmean(i)-xold(i)))) ); + } + for (size_t i = 0 ; i < dim ; i++) + { + if (idx[i] == true) + { + weight[i] = pow(1.2,(std::max(1.0,mu_eff/10.0/dim)))*weight[i]; + } + } + } + double arpenalty[lambda]; + for (size_t i = 0 ; i < lambda ; i++) + { + arpenalty[i] = 0.0; + for (size_t j = 0 ; j < dim ; j++) + { + arpenalty[i] += weight[j] * (arxvalid[i](j) - arx[i](j))*(arxvalid[i](j) - arx[i](j)); + } + f_obj.get(i).f += arpenalty[i]; + } +// fitness%sel = fitness%raw + bnd%arpenalty; +} + +double adjust_sigma(double sigma, Eigen::MatrixXd & C) +{ + for (size_t i = 0 ; i < dim ; i++) + { + if (sigma*sqrt(C(i,i)) > 5.0) + {sigma = 5.0/sqrt(C(i,i));} + } + + return sigma; +} + + +void cma_step(particle_type & vd, int step, double & best, + int & best_i, openfpm::vector<double> & best_sol, + size_t & fun_eval) +{ + size_t fe = 0; + Eigen::VectorXd xmean(dim); + Eigen::VectorXd mean_z(dim); + Eigen::VectorXd arxvalid[lambda]; + Eigen::VectorXd arx[lambda]; + + for (size_t i = 0 ; i < lambda ; i++) + { + arx[i].resize(dim); + arxvalid[i].resize(dim); + } + + double best_sample = std::numeric_limits<double>::max(); + openfpm::vector<double> best_sample_sol(dim); + + openfpm::vector<fun_index> f_obj(lambda); + + int counteval = step*lambda; + + auto it = vd.getDomainIterator(); + while (it.isNext()) + { + auto p = it.get(); + + if (vd.getProp<stop>(p) == true) + {++it;continue;} + + Eigen::VectorXd (& arz)[lambda] = vd.getProp<Zeta>(p); + + // fill the mean vector; + + fill_vector(vd.getPos(p),xmean); + + for (size_t j = 0 ; j < lambda ; j++) + { + vd.getProp<Zeta>(p)[j] = generateGaussianVector<dim>(); + arx[j] = xmean + vd.getProp<sigma>(p)*vd.getProp<B>(p)*vd.getProp<D>(p)*vd.getProp<Zeta>(p)[j]; + + // sample point has to be inside -5.0 and 5.0 + for (size_t i = 0 ; i < dim ; i++) + { + if (arx[j](i) < -5.0) + {arxvalid[j](i) = -5.0;} + else if (arx[j](i) > 5.0) + {arxvalid[j](i) = 5.0;} + else + {arxvalid[j](i) = arx[j](i);} + } + + f_obj.get(j).f = hybrid_composition<dim>(arxvalid[j]); + f_obj.get(j).id = j; + fe++; + + // Get the best ever + if (f_obj.get(j).f < best_sample) + { + best_sample = f_obj.get(j).f; + + // Copy the new mean as position of the particle + for (size_t i = 0 ; i < dim ; i++) + {best_sample_sol.get(i) = arxvalid[j](i);} + } + } + + // Add penalities for out of bound points + cmaes_handlebounds(f_obj,vd.getProp<sigma>(p), + vd.getProp<validfit>(p),arxvalid, + arx,vd.getProp<Cov_m>(p), + xmean,vd.getProp<xold>(p),vd.getProp<weight>(p), + vd.getProp<fithist>(p),vd.getProp<iniphase>(p), + vd.getProp<validfit>(p),mu_eff, + step,vd.getProp<last_restart>(p)); + + f_obj.sort(); + + for (size_t j = 0 ; j < lambda ; j++) + {vd.getProp<ord>(p)[j] = f_obj.get(j).id;} + + vd.getProp<xold>(p) = xmean; + + // Calculate weighted mean + + xmean.setZero(); + mean_z.setZero(); + for (size_t j = 0 ; j < mu ; j++) + { + xmean += weight_sample(j)*arx[vd.getProp<ord>(p)[j]]; + mean_z += weight_sample(j)*vd.getProp<Zeta>(p)[vd.getProp<ord>(p)[j]]; + } + + vd.getProp<xmean_st>(p) = xmean; + vd.getProp<meanz_st>(p) = mean_z; + + ++it; + } + + // Find the best point across processors + broadcast_best_solution(vd,best_sol,best,best_sample,best_sample_sol); + + // bool calculate B and D + bool calc_bd = counteval - eigeneval > lambda/(ccov)/dim/10; + if (calc_bd == true) + {eigeneval = counteval;} + + auto it2 = vd.getDomainIterator(); + while (it2.isNext()) + { + auto p = it2.get(); + + if (vd.getProp<stop>(p) == true) + {++it2;continue;} + + xmean = vd.getProp<xmean_st>(p); + mean_z = vd.getProp<meanz_st>(p); + + vd.getProp<path_s>(p) = vd.getProp<path_s>(p)*(1.0 - cs) + sqrt(cs*(2.0-cs)*mu_eff)*vd.getProp<B>(p)*mean_z; + + double hsig = vd.getProp<path_s>(p).norm()/sqrt(1.0-pow((1.0-cs),(2.0*double((step-vd.getProp<last_restart>(p))))))/chiN < 1.4 + 2.0/(dim+1); + + vd.getProp<path_c>(p) = (1-cc)*vd.getProp<path_c>(p) + hsig * sqrt(cc*(2-cc)*mu_eff)*(vd.getProp<B>(p)*vd.getProp<D>(p)*mean_z); + + if (step % N_pso == 0) + { + Eigen::MatrixXd C_pso(dim,dim); + updatePso(best_sol,vd.getProp<sigma>(p),xmean,vd.getProp<xold>(p),vd.getProp<B>(p),vd.getProp<D>(p),C_pso); + + // Adapt covariance matrix C + vd.getProp<Cov_m>(p) = (1.0-ccov+(1.0-hsig)*ccov*cc*(2.0-cc)/mu_eff)*vd.getProp<Cov_m>(p) + + ccov*(1.0/mu_eff)*(vd.getProp<path_c>(p)*vd.getProp<path_c>(p).transpose()); + + for (size_t i = 0 ; i < mu ; i++) + {vd.getProp<Cov_m>(p) += ccov*(1.0-1.0/mu_eff)*(vd.getProp<B>(p)*vd.getProp<D>(p)*vd.getProp<Zeta>(p)[vd.getProp<ord>(p)[i]])*weight_sample(i)* + (vd.getProp<B>(p)*vd.getProp<D>(p)*vd.getProp<Zeta>(p)[vd.getProp<ord>(p)[i]]).transpose(); + } + + vd.getProp<Cov_m>(p) = psoWeight*vd.getProp<Cov_m>(p) + (1.0 - psoWeight)*C_pso; + } + else + { + // Adapt covariance matrix C + vd.getProp<Cov_m>(p) = (1.0-ccov+(1.0-hsig)*ccov*cc*(2.0-cc)/mu_eff)*vd.getProp<Cov_m>(p) + + ccov*(1.0/mu_eff)*(vd.getProp<path_c>(p)*vd.getProp<path_c>(p).transpose()); + + for (size_t i = 0 ; i < mu ; i++) + {vd.getProp<Cov_m>(p) += ccov*(1.0-1.0/mu_eff)*(vd.getProp<B>(p)*vd.getProp<D>(p)*vd.getProp<Zeta>(p)[vd.getProp<ord>(p)[i]])*weight_sample(i)* + (vd.getProp<B>(p)*vd.getProp<D>(p)*vd.getProp<Zeta>(p)[vd.getProp<ord>(p)[i]]).transpose(); + } + } + + // Numeric error + + double smaller = std::numeric_limits<double>::max(); + for (size_t i = 0 ; i < dim ; i++) + { + if (vd.getProp<sigma>(p)*sqrt(vd.getProp<D>(p).diagonal()[i]) > 5.0) + { + if (smaller > 5.0/sqrt(vd.getProp<D>(p).diagonal()[i])) + {smaller = 5.0/sqrt(vd.getProp<D>(p).diagonal()[i]);} + } + } + if (smaller != std::numeric_limits<double>::max()) + {vd.getProp<sigma>(p) = smaller;} + + //Adapt step-size sigma + vd.getProp<sigma>(p) = vd.getProp<sigma>(p)*exp((cs/d_amps)*(vd.getProp<path_s>(p).norm()/chiN - 1)); + + // Update B and D from C + + if (calc_bd) + { + Eigen::MatrixXd trUp = vd.getProp<Cov_m>(p).triangularView<Eigen::Upper>(); + Eigen::MatrixXd trDw = vd.getProp<Cov_m>(p).triangularView<Eigen::StrictlyUpper>(); + vd.getProp<Cov_m>(p) = trUp + trDw.transpose(); + + // Eigen decomposition + Eigen::SelfAdjointEigenSolver<Eigen::MatrixXd> eig_solver; + + eig_solver.compute(vd.getProp<Cov_m>(p)); + + for (size_t i = 0 ; i < eig_solver.eigenvalues().size() ; i++) + {vd.getProp<D>(p).diagonal()[i] = sqrt(eig_solver.eigenvalues()[i]);} + vd.getProp<B>(p) = eig_solver.eigenvectors(); + + // Make first component always positive + for (size_t i = 0 ; i < dim ; i++) + { + if (vd.getProp<B>(p)(0,i) < 0) + {vd.getProp<B>(p).col(i) = - vd.getProp<B>(p).col(i);} + } + + Eigen::MatrixXd tmp = vd.getProp<B>(p).transpose(); + } + + // Copy the new mean as position of the particle + for (size_t i = 0 ; i < dim ; i++) + {vd.getPos(p)[i] = xmean(i);} + + vd.getProp<sigma>(p) = adjust_sigma(vd.getProp<sigma>(p),vd.getProp<Cov_m>(p)); + + // Stop conditions + bool stop_tol = true; + bool stop_tolX = true; + for (size_t i = 0 ; i < dim ; i++) + { + stop_tol &= (vd.getProp<sigma>(p)*std::max(fabs(vd.getProp<path_c>(p)(i)),sqrt(vd.getProp<Cov_m>(p)(i,i)))) < stopTolX; + stop_tolX &= vd.getProp<sigma>(p)*sqrt(vd.getProp<D>(p).diagonal()[i]) > stopTolUpX; + } + + vd.getProp<stop>(p) = stop_tol | stop_tolX; + + // Escape flat fitness, or better terminate? + if (f_obj.get(0).f == f_obj.get(std::ceil(0.7*lambda)).f ) + { + vd.getProp<sigma>(p) = vd.getProp<sigma>(p)*exp(0.2+cs/d_amps); + std::cout << "warning: flat fitness, consider reformulating the objective"; + + // Stop it + vd.getProp<stop>(p) = true; + } + + if (vd.getProp<stop>(p) == true) + {std::cout << "Stopped" << std::endl;} + + if (restart_cma && vd.getProp<stop>(p) == true) + { + std::cout << "------- Restart #" << std::endl; + + std::cout << "---------------------------------" << std::endl; + std::cout << "Best: " << best << " " << fun_eval << std::endl; + std::cout << "---------------------------------" << std::endl; + + vd.getProp<last_restart>(p) = step; + vd.getProp<xold>(p).setZero(); + + for (size_t i = 0 ; i < vd.getProp<D>(p).diagonal().size() ; i++) + {vd.getProp<D>(p).diagonal()[i] = 1.0;} + vd.getProp<B>(p).resize(dim,dim); + vd.getProp<B>(p).setIdentity(); + vd.getProp<Cov_m>(p) = vd.getProp<B>(p)*vd.getProp<D>(p)*vd.getProp<D>(p)*vd.getProp<B>(p); + vd.getProp<path_s>(p).resize(dim); + vd.getProp<path_s>(p).setZero(dim); + vd.getProp<path_c>(p).resize(dim); + vd.getProp<path_c>(p).setZero(dim); + vd.getProp<stop>(p) = false; + vd.getProp<iniphase>(p) = true; + vd.getProp<last_restart>(p) = 0; + vd.getProp<sigma>(p) = 2.0; + + // a different point in space + for (size_t i = 0 ; i < dim ; i++) + { + // we define x, assign a random position between 0.0 and 1.0 + vd.getPos(p)[i] = 10.0*(double)rand() / RAND_MAX - 5.0; + } + + // Initialize the bound history + + for (size_t i = 0 ; i < hist_size ; i++) + {vd.getProp<fithist>(p)[i] = -1.0;} + vd.getProp<fithist>(p)[0] = 1.0; + vd.getProp<validfit>(p) = 0.0; + } + + ++it2; + } + + auto & v_cl = create_vcluster(); + v_cl.sum(fe); + v_cl.execute(); + + fun_eval += fe; +} + + + +int main(int argc, char* argv[]) +{ + // initialize the library + openfpm_init(&argc,&argv); + + auto & v_cl = create_vcluster(); + + // Here we define our domain a 2D box with internals from 0 to 1.0 for x and y + Box<dim,double> domain; + + for (size_t i = 0 ; i < dim ; i++) + { + domain.setLow(i,0.0); + domain.setHigh(i,1.0); + } + + // Here we define the boundary conditions of our problem + size_t bc[dim]; + for (size_t i = 0 ; i < dim ; i++) + {bc[i] = NON_PERIODIC;}; + + prepare_f15<dim>(); + + // extended boundary around the domain, and the processor domain + Ghost<dim,double> g(0.0); + + particle_type vd(16,domain,bc,g); + + // Initialize constants + + stop_fitness = 1e-10; + size_t stopeval = 1e3*dim*dim; + + // Strategy parameter setting: Selection + init_weight(); + + // Strategy parameter setting: Adaptation + cc = 4.0 / (dim+4.0); + cs = (mu_eff+2.0) / (double(dim)+mu_eff+3.0); + ccov = (1.0/mu_eff) * 2.0/((dim+1.41)*(dim+1.41)) + + (1.0 - 1.0/mu_eff)* std::min(1.0,(2.0*mu_eff-1.0)/((dim+2.0)*(dim+2.0) + mu_eff)); + d_amps = 1 + 2*std::max(0.0, sqrt((mu_eff-1.0)/(dim+1))-1) + cs; + + chiN = sqrt(dim)*(1.0-1.0/(4.0*dim)+1.0/(21.0*dim*dim)); + + //! \cond [assign position] \endcond + + + // initialize the srand + int seed = 24756*v_cl.rank()*v_cl.rank() + time(NULL); + srand(seed); + + auto it = vd.getDomainIterator(); + + while (it.isNext()) + { + auto p = it.get(); + + for (size_t i = 0 ; i < dim ; i++) + { + // we define x, assign a random position between 0.0 and 1.0 + vd.getPos(p)[i] = 10.0*(double)rand() / RAND_MAX - 5.0; + } + + vd.getProp<sigma>(p) = 2.0; + + // Initialize the covariant Matrix,B and D to identity + + vd.getProp<D>(p).resize(dim); + for (size_t i = 0 ; i < vd.getProp<D>(p).diagonal().size() ; i++) + {vd.getProp<D>(p).diagonal()[i] = 1.0;} + vd.getProp<B>(p).resize(dim,dim); + vd.getProp<B>(p).setIdentity(); + vd.getProp<Cov_m>(p) = vd.getProp<B>(p)*vd.getProp<D>(p)*vd.getProp<D>(p)*vd.getProp<B>(p); + vd.getProp<path_s>(p).resize(dim); + vd.getProp<path_s>(p).setZero(dim); + vd.getProp<path_c>(p).resize(dim); + vd.getProp<path_c>(p).setZero(dim); + vd.getProp<stop>(p) = false; + vd.getProp<iniphase>(p) = true; + vd.getProp<last_restart>(p) = 0; + + // Initialize the bound history + + for (size_t i = 0 ; i < hist_size ; i++) + {vd.getProp<fithist>(p)[i] = -1.0;} + vd.getProp<fithist>(p)[0] = 1.0; + vd.getProp<validfit>(p) = 0.0; + + // next particle + ++it; + } + + if (v_cl.rank() == 0) + {std::cout << "Starting PS-CMA-ES" << std::endl;} + + double best = 0.0; + int best_i = 0; + + best = std::numeric_limits<double>::max(); + openfpm::vector<double> best_sol(dim); + // now do several iteration + + int stop_cond = 0; + size_t fun_eval = 0; + int i = 0; + while (fun_eval < max_fun_eval && best > 120.000001) + { + // sample offspring + cma_step(vd,i+1,best,best_i,best_sol,fun_eval); + + i++; + } + + if (v_cl.rank() == 0) + { + std::cout << "Best solution: " << best << " with " << fun_eval << std::endl; + std::cout << "at: " << std::endl; + + for (size_t i = 0 ; i < best_sol.size() ; i++) + { + std::cout << best_sol.get(i) << " "; + } + } + + openfpm_finalize(); + + //! \cond [finalize] \endcond + + /*! + * \page Vector_0_simple Vector 0 simple + * + * ## Full code ## {#code_e0_sim} + * + * \include Vector/0_simple/main.cpp + * + */ +} diff --git a/example/Numerics/Vortex_in_cell/Makefile b/example/Numerics/Vortex_in_cell/Makefile index 4767e9df8811a61d521b119b0d5e6016f3ddce76..9d1ae5f955fffa142fcb4032b4eddf53109dcd71 100644 --- a/example/Numerics/Vortex_in_cell/Makefile +++ b/example/Numerics/Vortex_in_cell/Makefile @@ -1,4 +1,4 @@ -include ../../../example.mk +include ../../example.mk CC=mpic++ diff --git a/example/Numerics/Vortex_in_cell/main_vic_petsc.cpp b/example/Numerics/Vortex_in_cell/main_vic_petsc.cpp index f9e1de78f5761012771a8a61c92facc075fbfd6f..2283fa4ac712af07030fc7f994226dc12f7ce55d 100644 --- a/example/Numerics/Vortex_in_cell/main_vic_petsc.cpp +++ b/example/Numerics/Vortex_in_cell/main_vic_petsc.cpp @@ -877,9 +877,9 @@ template<typename grid> void calc_rhs(grid & g_vort, grid & g_vel, grid & g_dwp) // calculate several pre-factors for the stencil finite // difference - float fac1 = 2.0f*nu/(g_vort.spacing(0)*g_vort.spacing(0)); - float fac2 = 2.0f*nu/(g_vort.spacing(1)*g_vort.spacing(1)); - float fac3 = 2.0f*nu/(g_vort.spacing(2)*g_vort.spacing(2)); + float fac1 = 1.0f*nu/(g_vort.spacing(0)*g_vort.spacing(0)); + float fac2 = 1.0f*nu/(g_vort.spacing(1)*g_vort.spacing(1)); + float fac3 = 1.0f*nu/(g_vort.spacing(2)*g_vort.spacing(2)); float fac4 = 0.5f/(g_vort.spacing(0)); float fac5 = 0.5f/(g_vort.spacing(1)); diff --git a/example/VCluster/0_simple/main.cpp b/example/VCluster/0_simple/main.cpp index af5dc17f0c0ae06a7bc4132c801df05261c667be..c0bab78450f7f099747519c88d7dd9ee65a9fb39 100644 --- a/example/VCluster/0_simple/main.cpp +++ b/example/VCluster/0_simple/main.cpp @@ -7,6 +7,7 @@ * * \subpage VCluster_0_simple * \subpage VCluster_1_semantic + * \subpage VCluster_2_serial_and_parallel * */ diff --git a/example/Vector/7_SPH_dlb_opt/main.cpp b/example/Vector/7_SPH_dlb_opt/main.cpp index d576c1a1d0f69647fc4ebe816c8c3585d6e0e43c..dff03e4625269cbb3f4d372fc76f765dc798dd8f 100644 --- a/example/Vector/7_SPH_dlb_opt/main.cpp +++ b/example/Vector/7_SPH_dlb_opt/main.cpp @@ -1118,6 +1118,7 @@ int main(int argc, char* argv[]) { vd.deleteGhost(); vd.write_frame("Geometry",write,VTK_WRITER | FORMAT_BINARY); + vd.getDecomposition().write("dec" + std::to_string(write)); vd.ghost_get<type,rho,Pressure,velocity>(SKIP_LABELLING); write++; diff --git a/images/vector.cpp b/images/vector.cpp index 7b9e3361a137e05c2d546d1f5ccb87f160a7f11b..9414d2e6af4fba9846154c1212699ef32c544a36 100644 --- a/images/vector.cpp +++ b/images/vector.cpp @@ -34,6 +34,11 @@ public: #endif + static inline bool noPointers() + { + return true; + } + }; int main(int argc, char* argv[]) @@ -58,7 +63,7 @@ int main(int argc, char* argv[]) size_t bc[2]={PERIODIC,PERIODIC}; Ghost<2,float> g(0.01); - vector_dist<2,float, Particle<float>, CartDecomposition<2,float> > vd(4096,domain,bc,g); + vector_dist<2,float, Particle<float> > vd(4096,domain,bc,g); auto it = vd.getIterator(); diff --git a/openfpm_data b/openfpm_data index f7ca1bc2fe8eeb9c6e3bdf34898341ffa91f8c1c..6c2a5911ac16f93ab0ae1e7ac14723c952aa5c16 160000 --- a/openfpm_data +++ b/openfpm_data @@ -1 +1 @@ -Subproject commit f7ca1bc2fe8eeb9c6e3bdf34898341ffa91f8c1c +Subproject commit 6c2a5911ac16f93ab0ae1e7ac14723c952aa5c16 diff --git a/openfpm_devices b/openfpm_devices index 2da3b22b477d8b94b60fb9eb5f1a4daacb6857b5..46e4994c5dff879a71e6ae090c50b2f23235d435 160000 --- a/openfpm_devices +++ b/openfpm_devices @@ -1 +1 @@ -Subproject commit 2da3b22b477d8b94b60fb9eb5f1a4daacb6857b5 +Subproject commit 46e4994c5dff879a71e6ae090c50b2f23235d435 diff --git a/openfpm_io b/openfpm_io index fac23ddd992dc17d82904bd5083f5235416c2255..89411e76fbed1ab098d2bfec13e7759aca51a14d 160000 --- a/openfpm_io +++ b/openfpm_io @@ -1 +1 @@ -Subproject commit fac23ddd992dc17d82904bd5083f5235416c2255 +Subproject commit 89411e76fbed1ab098d2bfec13e7759aca51a14d diff --git a/openfpm_numerics b/openfpm_numerics index 4e569e3bcec0ac24ebd0b2a30a1b7bf9b602497d..d9df4f304c897432f85291233218b036c8857523 160000 --- a/openfpm_numerics +++ b/openfpm_numerics @@ -1 +1 @@ -Subproject commit 4e569e3bcec0ac24ebd0b2a30a1b7bf9b602497d +Subproject commit d9df4f304c897432f85291233218b036c8857523 diff --git a/openfpm_pdata.doc b/openfpm_pdata.doc index afd54c557ca81549f429a12fb10d2e8989dde33f..e356c1f644c459d47927b845e8cb1a8d06b43103 100644 --- a/openfpm_pdata.doc +++ b/openfpm_pdata.doc @@ -38,7 +38,7 @@ PROJECT_NAME = "OpenFPM_pdata" # could be handy for archiving the generated documentation or if some version # control system is used. -PROJECT_NUMBER = 1.0.0 +PROJECT_NUMBER = 1.1.0 # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a diff --git a/openfpm_vcluster b/openfpm_vcluster index a99918127f5835c31d2df4e9020efdeb46d07d66..dba6676c2638e01dc3b9277dbbd80ea3f46ea6d0 160000 --- a/openfpm_vcluster +++ b/openfpm_vcluster @@ -1 +1 @@ -Subproject commit a99918127f5835c31d2df4e9020efdeb46d07d66 +Subproject commit dba6676c2638e01dc3b9277dbbd80ea3f46ea6d0 diff --git a/src/Decomposition/CartDecomposition.hpp b/src/Decomposition/CartDecomposition.hpp index 8b91795fab6c39f9fd97465a915150c2d3b94e95..806cc50eb1ead1e5d8ec2597f1ec35760aec1b0b 100755 --- a/src/Decomposition/CartDecomposition.hpp +++ b/src/Decomposition/CartDecomposition.hpp @@ -41,6 +41,50 @@ #define CARTDEC_ERROR 2000lu +/*! \brief It spread the sub-sub-domain on a regular cartesian grid of size dim + * + * \warning this function only guarantee that the division on each direction is + * 2^n with some n and does not guarantee that the number of + * sub-sub-domain is preserved + * + * \param div number of division on each direction as output + * \param n_sub number of sub-domain + * \param dim_r dimension reduction + * + */ +template<unsigned int dim> static void nsub_to_div2(size_t (& div)[dim], size_t n_sub, size_t dim_r) +{ + for (size_t i = 0; i < dim; i++) + { + if (i < dim_r) + {div[i] = openfpm::math::round_big_2(pow(n_sub, 1.0 / dim_r));} + else + {div[i] = 1;} + } +} + +/*! \brief It spread the sub-sub-domain on a regular cartesian grid of size dim + * + * \warning this function only guarantee that the division on each direction is + * 2^n with some n and does not guarantee that the number of + * sub-sub-domain is preserved + * + * \param div number of division on each direction as output + * \param n_sub number of sub-domain + * \param dim_r dimension reduction + * + */ +template<unsigned int dim> static void nsub_to_div(size_t (& div)[dim], size_t n_sub, size_t dim_r) +{ + for (size_t i = 0; i < dim; i++) + { + if (i < dim_r) + {div[i] = std::floor(pow(n_sub, 1.0 / dim_r));} + else + {div[i] = 1;} + } +} + #define COMPUTE_SKIN_SUB 1 /** @@ -352,13 +396,13 @@ public: // calculate the sub-divisions size_t div[dim]; for (size_t i = 0; i < dim; i++) - div[i] = (size_t) ((bound.getHigh(i) - bound.getLow(i)) / cd.getCellBox().getP2()[i]); + {div[i] = (size_t) ((bound.getHigh(i) - bound.getLow(i)) / cd.getCellBox().getP2()[i]);} // Initialize the geo_cell structure ie_ghost<dim,T>::Initialize_geo_cell(bound,div); // Initialize shift vectors - ie_ghost<dim,T>::generateShiftVectors(domain); + ie_ghost<dim,T>::generateShiftVectors(domain,bc); } } @@ -512,9 +556,7 @@ public: \endverbatim * - * - * - * \param ghost margins for each dimensions (p1 negative part) (p2 positive part) + * ghost margins for each dimensions (p1 negative part) (p2 positive part) * * \verbatim @@ -798,6 +840,9 @@ public: cart.box_nn_processor = box_nn_processor; cart.fine_s = fine_s; cart.gr = gr; + cart.gr_dist = gr_dist; + cart.dist = dist; + cart.commCostSet = commCostSet; cart.cd = cd; cart.domain = domain; for (size_t i = 0 ; i < dim ; i++) @@ -830,11 +875,17 @@ public: box_nn_processor = cart.box_nn_processor; fine_s = cart.fine_s; gr = cart.gr; + gr_dist = cart.gr_dist; + dist = cart.dist; + commCostSet = cart.commCostSet; cd = cart.cd; domain = cart.domain; for (size_t i = 0 ; i < dim ; i++) - {spacing[i] = cart.spacing[i];}; + { + spacing[i] = cart.spacing[i]; + magn[i] = cart.magn[i]; + }; ghost = cart.ghost; @@ -863,10 +914,16 @@ public: box_nn_processor.swap(cart.box_nn_processor); fine_s.swap(cart.fine_s); gr = cart.gr; + gr_dist = cart.gr_dist; + dist = cart.dist; + commCostSet = cart.commCostSet; cd = cart.cd; domain = cart.domain; for (size_t i = 0 ; i < dim ; i++) - {spacing[i] = cart.spacing[i];}; + { + spacing[i] = cart.spacing[i]; + magn[i] = cart.magn[i]; + }; ghost = cart.ghost; @@ -1030,6 +1087,77 @@ public: } } + /*! \brief Set the best parameters for the decomposition + * + * It based on number of processors and dimensionality find a "good" parameter setting + * + * \param domain_ domain to decompose + * \param bc boundary conditions + * \param ghost Ghost size + * \param sec_dist Distribution grid. The distribution grid help in reducing the underlying + * distribution problem simplifying decomposition problem. This is done in order to + * reduce the load/balancing dynamic load balancing problem + * + * \param dec_gran number of sub-sub-domain for each processor + * + */ + void setGoodParameters(::Box<dim,T> domain_, + const size_t (& bc)[dim], + const Ghost<dim,T> & ghost, + size_t dec_gran, + const grid_sm<dim,void> & sec_dist = grid_sm<dim,void>()) + { + size_t div[dim]; + + // Create a valid decomposition of the space + // Get the number of processor and calculate the number of sub-domain + // for decomposition + size_t n_proc = v_cl.getProcessingUnits(); + size_t n_sub = n_proc * dec_gran; + + // Calculate the maximum number (before merging) of sub-domain on + // each dimension + + nsub_to_div2(div,n_sub,dim); + +/* for (size_t i = 0; i < dim; i++) + { + div[i] = openfpm::math::round_big_2(pow(n_sub, 1.0 / dim)); + }*/ + + if (dim > 3) + { + long int dim_r = dim-1; + do + { + // Check for adjustment + size_t tot_size = 1; + for (size_t i = 0 ; i < dim ; i++) + {tot_size *= div[i];} + + // the granularity is too coarse increase the divisions + if (tot_size / n_proc > 0.75*dec_gran ) + {break;} + + nsub_to_div(div,n_sub,dim_r); + + dim_r--; + } while(dim_r > 0); + } + + setParameters(div,domain_,bc,ghost,sec_dist); + } + + /*! \brief return the parameters of the decomposition + * + * \param div_ number of divisions in each dimension + * + */ + void getParameters(size_t (& div_)[dim]) + { + for (size_t i = 0 ; i < dim ; i++) + {div_[i] = this->gr.size(i);} + } /*! \brief Set the parameter of the decomposition * @@ -1042,7 +1170,11 @@ public: * reduce the load/balancing dynamic load balancing problem * */ - void setParameters(const size_t (& div_)[dim], ::Box<dim,T> domain_, const size_t (& bc)[dim] ,const Ghost<dim,T> & ghost, const grid_sm<dim,void> & sec_dist = grid_sm<dim,void>()) + void setParameters(const size_t (& div_)[dim], + ::Box<dim,T> domain_, + const size_t (& bc)[dim], + const Ghost<dim,T> & ghost, + const grid_sm<dim,void> & sec_dist = grid_sm<dim,void>()) { // set the boundary conditions for (size_t i = 0 ; i < dim ; i++) @@ -1096,7 +1228,7 @@ public: reset(); if (commCostSet == false) - computeCommunicationAndMigrationCosts(1); + {computeCommunicationAndMigrationCosts(1);} dist.decompose(); @@ -1118,7 +1250,7 @@ public: reset(); if (commCostSet == false) - computeCommunicationAndMigrationCosts(ts); + {computeCommunicationAndMigrationCosts(ts);} dist.refine(); @@ -1140,7 +1272,7 @@ public: reset(); if (commCostSet == false) - computeCommunicationAndMigrationCosts(ts); + {computeCommunicationAndMigrationCosts(ts);} dist.redecompose(); diff --git a/src/Decomposition/Distribution/DistParMetisDistribution.hpp b/src/Decomposition/Distribution/DistParMetisDistribution.hpp index f96a345c3b647eda74736ba4cfecc5735bd67053..d20e71fc302af7c14eacdc7324bd56087e14ef2c 100644 --- a/src/Decomposition/Distribution/DistParMetisDistribution.hpp +++ b/src/Decomposition/Distribution/DistParMetisDistribution.hpp @@ -272,9 +272,7 @@ public: /*! \brief return number of moved vertices in all iterations so far * - * \param id vertex id - * - * \return vector with x, y, z + * \return number of moved vertices * */ size_t getMaxMovedV() diff --git a/src/Decomposition/Distribution/Distribution_unit_tests.hpp b/src/Decomposition/Distribution/Distribution_unit_tests.hpp index 37a9944f9a97d5d8276acaff2be74952dadb96cc..1c527d5b6ee2bde0f96a192aa31991513cfe22f9 100644 --- a/src/Decomposition/Distribution/Distribution_unit_tests.hpp +++ b/src/Decomposition/Distribution/Distribution_unit_tests.hpp @@ -422,11 +422,6 @@ BOOST_AUTO_TEST_CASE( Space_distribution_test) //! [refine with dist_parmetis the decomposition] } -void print_test_v(std::string test, size_t sz) -{ - if (create_vcluster().getProcessUnitID() == 0) - std::cout << test << " " << sz << "\n"; -} BOOST_AUTO_TEST_SUITE_END() diff --git a/src/Decomposition/Distribution/ParMetisDistribution.hpp b/src/Decomposition/Distribution/ParMetisDistribution.hpp index 9ee2435da83384ec736f57779d4e21ea8bd5b9c9..cacf912a91b66389ab380a7259ac08162fefdba2 100644 --- a/src/Decomposition/Distribution/ParMetisDistribution.hpp +++ b/src/Decomposition/Distribution/ParMetisDistribution.hpp @@ -304,6 +304,7 @@ public: * */ ParMetisDistribution(ParMetisDistribution<dim,T> && pm) + :v_cl(pm.v_cl) { this->operator=(pm); } @@ -638,6 +639,7 @@ public: verticesGotWeights = dist.verticesGotWeights; sub_sub_owner = dist.sub_sub_owner; m2g = dist.m2g; + parmetis_graph = dist.parmetis_graph; return *this; } @@ -655,6 +657,7 @@ public: verticesGotWeights = dist.verticesGotWeights; sub_sub_owner.swap(dist.sub_sub_owner); m2g.swap(dist.m2g); + parmetis_graph = dist.parmetis_graph; return *this; } diff --git a/src/Decomposition/Distribution/SpaceDistribution.hpp b/src/Decomposition/Distribution/SpaceDistribution.hpp index 2aa6afc52a297e75f3aab6726b054170e0f62dae..26343e14b2f32cd423e86cab88d8ac743a18586d 100644 --- a/src/Decomposition/Distribution/SpaceDistribution.hpp +++ b/src/Decomposition/Distribution/SpaceDistribution.hpp @@ -10,6 +10,7 @@ #include "util/mathutil.hpp" #include "NN/CellList/CellDecomposer.hpp" +#include "Grid/grid_key_dx_iterator_hilbert.hpp" /*! \brief Class that distribute sub-sub-domains across processors using an hilbert curve * to divide the space @@ -63,6 +64,7 @@ public: * */ SpaceDistribution(SpaceDistribution<dim,T> && pm) + :v_cl(pm.v_cl) { this->operator=(pm); } diff --git a/src/Decomposition/Distribution/parmetis_util.hpp b/src/Decomposition/Distribution/parmetis_util.hpp index c555326f4a20ef04b67f0e57b04b8bf93696df91..c34bc34a5e65797b548e1b2cd22073deecf6ee36 100755 --- a/src/Decomposition/Distribution/parmetis_util.hpp +++ b/src/Decomposition/Distribution/parmetis_util.hpp @@ -526,10 +526,11 @@ public: */ const Parmetis<Graph> & operator=(const Parmetis<Graph> & pm) { - comm = pm.comm; - v_cl = pm.v_cl; + MPI_Comm_dup(pm.comm, &comm); p_id = pm.p_id; nc = pm.nc; + n_dec = pm.n_dec; + dist_tol = pm.dist_tol; setDefaultParameters(pm.Mg.wgtflag[0] == 3); @@ -545,10 +546,12 @@ public: */ const Parmetis<Graph> & operator=(Parmetis<Graph> && pm) { - comm = pm.comm; - v_cl = pm.v_cl; + // TODO Move into VCluster + MPI_Comm_dup(pm.comm, &comm); p_id = pm.p_id; nc = pm.nc; + n_dec = pm.n_dec; + dist_tol = pm.dist_tol; setDefaultParameters(pm.Mg.wgtflag[0] == 3); diff --git a/src/Decomposition/Domain_NN_calculator_cart.hpp b/src/Decomposition/Domain_NN_calculator_cart.hpp index 082c53d80a41423d0fb144693a23e035bedcfc67..8f795bbc04b61cc73b38d79edf20173b08d74429 100644 --- a/src/Decomposition/Domain_NN_calculator_cart.hpp +++ b/src/Decomposition/Domain_NN_calculator_cart.hpp @@ -121,7 +121,7 @@ class domain_nn_calculator_cart // +2 is padding for (size_t j = 0 ; j < dim ; j++) - sz[j] = proc_box.getHigh(j) - proc_box.getLow(j) + 2 + 1; + {sz[j] = proc_box.getHigh(j) - proc_box.getLow(j) + 2 + 1;} gs.setDimensions(sz); @@ -130,7 +130,7 @@ class domain_nn_calculator_cart g.setMemory(); for (size_t i = 0 ; i < dim ; i++) - one.set_d(i,1); + {one.set_d(i,1);} // Calculate the csr neighborhood openfpm::vector<std::pair<grid_key_dx<dim>,grid_key_dx<dim>>> csr; @@ -191,7 +191,7 @@ class domain_nn_calculator_cart sub_keys.last().NN_subsub.resize(g.template get<0>(key).size()); for (size_t i = 0 ; i < g.template get<0>(key).size() ; i++) - sub_keys.last().NN_subsub.get(i) = g.template get<0>(key).get(i) - one; + {sub_keys.last().NN_subsub.get(i) = g.template get<0>(key).get(i) - one;} } ++it; diff --git a/src/Decomposition/ORB.hpp b/src/Decomposition/ORB.hpp index d8efe63aabc831ab17bede98c4bd5b7bf847eecd..e9422298e8402de9d23c2433946798b959dd4d5a 100755 --- a/src/Decomposition/ORB.hpp +++ b/src/Decomposition/ORB.hpp @@ -8,7 +8,6 @@ #ifndef ORB_HPP_ #define ORB_HPP_ -#include "data_type/scalar.hpp" #include "util/mathutil.hpp" /*! \brief this class is a functor for "for_each" algorithm @@ -77,7 +76,7 @@ struct do_when_dim_gr_i<dim,i,ORB,typename boost::enable_if< boost::mpl::bool_<( * */ -template<typename T> class ORB_node : public scalar<T> +template<typename T> class ORB_node : public aggregate<T> { public: diff --git a/src/Decomposition/common.hpp b/src/Decomposition/common.hpp index f9cdfeb61108b4d7c52edd10bbe14544b7c6399a..5a920679a1f1ea284d3e5a2d0f77577c71957196 100755 --- a/src/Decomposition/common.hpp +++ b/src/Decomposition/common.hpp @@ -83,6 +83,12 @@ struct Box_sub //! see ie_ghost follow sector explanation comb<dim> cmb; + + //! Constructor reset cmb + Box_sub() + { + cmb.zero(); + } }; //! Particular case for local internal ghost boxes diff --git a/src/Decomposition/ie_ghost.hpp b/src/Decomposition/ie_ghost.hpp index 2847d8c747287fcffcef0cfc5dca99c90a2eec7e..7863bdb8092a97084ff4926b7b9b56e18bac26ab 100755 --- a/src/Decomposition/ie_ghost.hpp +++ b/src/Decomposition/ie_ghost.hpp @@ -10,6 +10,8 @@ #include "common.hpp" #include "nn_processor.hpp" +#include "Decomposition/shift_vect_converter.hpp" + /*! \brief structure that store and compute the internal and external local ghost box * @@ -38,7 +40,7 @@ class ie_ghost openfpm::vector<p_box<dim,T> > vb_int; //! Cell-list that store the geometrical information of the internal ghost boxes - CellList<dim,T,Mem_fast,shift<dim,T>> geo_cell; + CellList<dim,T,Mem_fast<>,shift<dim,T>> geo_cell; //! shift vectors openfpm::vector<Point<dim,T>> shifts; @@ -49,6 +51,8 @@ class ie_ghost //! Temporal buffers to return temporal information openfpm::vector<size_t> ids; + //! shift converter + shift_vect_converter<dim,T> sc_convert; /*! \brief Given a local sub-domain i, it give the id of such sub-domain in the sent list * for the processor p_id @@ -153,35 +157,9 @@ protected: * \param domain box that describe the domain * */ - void generateShiftVectors(const Box<dim,T> & domain) + void generateShiftVectors(const Box<dim,T> & domain, size_t (& bc)[dim]) { - shifts.resize(openfpm::math::pow(3,dim)); - - HyperCube<dim> hyp; - - for (long int i = dim-1 ; i >= 0 ; i--) - { - std::vector<comb<dim>> cmbs = hyp.getCombinations_R(i); - - for (size_t j = 0 ; j < cmbs.size() ; j++) - { - for (size_t k = 0 ; k < dim ; k++) - { - switch (cmbs[j][k]) - { - case 1: - shifts.get(cmbs[j].lin()).template get<0>()[k] = -(domain.getHigh(k) - domain.getLow(k)); - break; - case 0: - shifts.get(cmbs[j].lin()).template get<0>()[k] = 0; - break; - case -1: - shifts.get(cmbs[j].lin()).template get<0>()[k] = (domain.getHigh(k) - domain.getLow(k)); - break; - } - } - } - } + sc_convert.generateShiftVectors(domain,bc,shifts); } /*! \brief Initialize the geo cell list structure @@ -196,7 +174,7 @@ protected: void Initialize_geo_cell(const Box<dim,T> & domain, const size_t (&div)[dim]) { // Initialize the geo_cell structure - geo_cell.Initialize(domain,div); + geo_cell.Initialize(domain,div,0); } /*! \brief Create the box_nn_processor_int (bx part) structure @@ -371,7 +349,7 @@ protected: b_int.lc_proc = lc_proc; // fill the shift id - b_int.shift_id = nn_p_box_pos.get(k).lin(); + b_int.shift_id = convertShift(nn_p_box_pos.get(k)); // // Updating @@ -531,6 +509,20 @@ public: return shifts; } + /*! It return the converted shift vector + * + * In high dimensions the number of shifts vectors explode exponentially, so we are + * expecting that some of the boundary is non periodic to reduce the numbers of shift + * vectors + * + * \return the shift vectors + * + */ + size_t convertShift(const comb<dim> & cmb) + { + return sc_convert.linId(cmb); + } + /*! \brief Get the number of Internal ghost boxes for one processor * * \param id near processor list id (the id go from 0 to getNNProcessor()) diff --git a/src/Decomposition/ie_loc_ghost.hpp b/src/Decomposition/ie_loc_ghost.hpp index 727b1dca1d063db2be6ff4b4b88d8e0405320493..64f50caee5cef345cd40717191f42c4be980a5c3 100755 --- a/src/Decomposition/ie_loc_ghost.hpp +++ b/src/Decomposition/ie_loc_ghost.hpp @@ -159,7 +159,7 @@ class ie_loc_ghost // that must be adjusted, each of this boxes define a shift in case of periodic boundary condition for (long int i = dim-1 ; i >= 0 ; i--) { - std::vector<comb<dim>> cmbs = hyp.getCombinations_R(i); + std::vector<comb<dim>> cmbs = hyp.getCombinations_R_bc(i,bc); for (size_t j = 0 ; j < cmbs.size() ; j++) { diff --git a/src/Decomposition/nn_processor.hpp b/src/Decomposition/nn_processor.hpp index aac751e7f06f1e1d62fd1151e5a8fc6dc816c291..42e56002484ca5651f5987cec503e1e508ec35a7 100755 --- a/src/Decomposition/nn_processor.hpp +++ b/src/Decomposition/nn_processor.hpp @@ -178,7 +178,7 @@ class nn_prcs // that must be adjusted, each of this boxes define a shift in case of periodic boundary condition for (long int i = dim-1 ; i >= 0 ; i--) { - std::vector<comb<dim>> cmbs = hyp.getCombinations_R(i); + std::vector<comb<dim>> cmbs = hyp.getCombinations_R_bc(i,bc); for (size_t j = 0 ; j < cmbs.size() ; j++) { diff --git a/src/Decomposition/shift_vect_converter.hpp b/src/Decomposition/shift_vect_converter.hpp new file mode 100644 index 0000000000000000000000000000000000000000..d77d35d9a521847720caeec70ecdfa45e48169c1 --- /dev/null +++ b/src/Decomposition/shift_vect_converter.hpp @@ -0,0 +1,196 @@ +/* + * shift_vect_converter.hpp + * + * Created on: Feb 8, 2018 + * Author: i-bird + */ + +#ifndef SRC_DECOMPOSITION_SHIFT_VECT_CONVERTER_HPP_ +#define SRC_DECOMPOSITION_SHIFT_VECT_CONVERTER_HPP_ + +#include "Space/Shape/HyperCube.hpp" + +/*! \brief in case of high dimensions shift vector converter + * + * In case of high-dimensions the number of shift vectors explode, this class + * handle such case + * + */ +template<unsigned int dim, typename T> +class shift_vect_converter +{ + //! Indicate which indexes are non_periodic + size_t red_shift_v[dim]; + + // indexes + size_t tmp[dim]; + + // Dimension + int dim_r = 0; + + /*! \brief Here we generare the shift vectors for the low dimension case + * + * \param domain box that describe the domain + * + */ + void generateShiftVectors_ld(const Box<dim,T> & domain, size_t (& bc)[dim], openfpm::vector<Point<dim,T>> & shifts) + { + shifts.resize(openfpm::math::pow(3,dim)); + + HyperCube<dim> hyp; + + for (long int i = dim-1 ; i >= 0 ; i--) + { + std::vector<comb<dim>> cmbs = hyp.getCombinations_R(i); + + for (size_t j = 0 ; j < cmbs.size() ; j++) + { + for (size_t k = 0 ; k < dim ; k++) + { + switch (cmbs[j][k]) + { + case 1: + shifts.get(cmbs[j].lin()).template get<0>()[k] = -(domain.getHigh(k) - domain.getLow(k)); + break; + case 0: + shifts.get(cmbs[j].lin()).template get<0>()[k] = 0; + break; + case -1: + shifts.get(cmbs[j].lin()).template get<0>()[k] = (domain.getHigh(k) - domain.getLow(k)); + break; + } + } + } + } + } + + /*! \brief Here we generare the shift vectors for the high dimension case + * + * \param domain box that describe the domain + * + */ + void generateShiftVectors_hd(const Box<dim,T> & domain, size_t (& bc)[dim], openfpm::vector<Point<dim,T>> & shifts) + { + // get the indexes of the free degree of freedom + for (size_t i = 0 ; i < dim ; i++) + { + if (bc[i] == PERIODIC) + { + red_shift_v[dim_r] = i; + dim_r++; + } + } + + HyperCube<dim> hyp; + + // precalculate the nuber of shift vectors + size_t nsv = 0; + for (long int i = dim-1 ; i >= 0 ; i--) + {nsv += hyp.getCombinations_R_bc(i,bc).size();} + shifts.resize(nsv+1); + + for (long int i = dim-1 ; i >= 0 ; i--) + { + std::vector<comb<dim>> cmbs = hyp.getCombinations_R_bc(i,bc); + + for (size_t j = 0 ; j < cmbs.size() ; j++) + { + size_t lin_cmb = linId_hd(cmbs[j]); + + for (size_t k = 0 ; k < dim ; k++) + { + switch (cmbs[j][k]) + { + case 1: + shifts.get(lin_cmb).template get<0>()[k] = -(domain.getHigh(k) - domain.getLow(k)); + break; + case 0: + shifts.get(lin_cmb).template get<0>()[k] = 0; + break; + case -1: + shifts.get(lin_cmb).template get<0>()[k] = (domain.getHigh(k) - domain.getLow(k)); + break; + } + } + } + } + } + +public: + + /*! \brief Here we generare the shift vectors for the low dimension case + * + * \param domain box that describe the domain + * + */ + void generateShiftVectors(const Box<dim,T> & domain, size_t (& bc)[dim], openfpm::vector<Point<dim,T>> & shifts) + { + if (dim < 10) + {generateShiftVectors_ld(domain,bc,shifts);} + else + {generateShiftVectors_hd(domain,bc,shifts);} + } + + /*! \brief Initialize + * + * \param bc boundary conditions + * + */ + void Initialize(size_t (& bc)[dim]) + { + // get the indexes of the free degree of freedom + for (size_t i = 0 ; i < dim ; i++) + { + if (bc[i] == PERIODIC) + { + red_shift_v[dim] = i; + dim_r++; + } + } + } + + /*! \brief linearize the combination in case of high dimension + * + * \param cmb combination + * + */ + size_t linId_hd(const comb<dim> & cmb) + { + size_t cul = 1; + size_t lin = 0; + for (long int i = 0 ; i < dim_r ; i++) + { + lin += cul*(cmb.c[red_shift_v[i]] + 1); + cul *= 3; + } + + return lin; + } + + /*! \brief linearize the combination in case of low dimensions + * + * \param cmb combination + * + */ + inline size_t linId_ld(const comb<dim> & cmb) + { + return cmb.lin(); + } + + /*! \brief linearize the combination in case of high dimensions + * + * \param cmb combination + * + */ + inline size_t linId(const comb<dim> & cmb) + { + if (dim < 10) + {return linId_ld(cmb);} + + return linId_hd(cmb); + } + +}; + + +#endif /* SRC_DECOMPOSITION_SHIFT_VECT_CONVERTER_HPP_ */ diff --git a/src/Decomposition/CartDecomposition_unit_test.hpp b/src/Decomposition/tests/CartDecomposition_unit_test.cpp similarity index 80% rename from src/Decomposition/CartDecomposition_unit_test.hpp rename to src/Decomposition/tests/CartDecomposition_unit_test.cpp index 7e7026bff80b1fb198729b70b995fc3fd009e842..e66bbbf31c7d3727faa8b154e052bb1bf2547852 100755 --- a/src/Decomposition/CartDecomposition_unit_test.hpp +++ b/src/Decomposition/tests/CartDecomposition_unit_test.cpp @@ -1,7 +1,7 @@ -#ifndef CARTDECOMPOSITION_UNIT_TEST_HPP -#define CARTDECOMPOSITION_UNIT_TEST_HPP +#define BOOST_TEST_DYN_LINK +#include <boost/test/unit_test.hpp> -#include "CartDecomposition.hpp" +#include "Decomposition/CartDecomposition.hpp" #include "util/mathutil.hpp" BOOST_AUTO_TEST_SUITE (CartDecomposition_test) @@ -414,6 +414,135 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_non_periodic_test_dist_grid) BOOST_REQUIRE_EQUAL(val,true); } +BOOST_AUTO_TEST_CASE( CartDecomposition_nsub_algo_functions_test) +{ + size_t n_sub = 64*2; + size_t div[3]; + + nsub_to_div2<3>(div,n_sub,3); + + BOOST_REQUIRE_EQUAL(div[0],8ul); + BOOST_REQUIRE_EQUAL(div[1],8ul); + BOOST_REQUIRE_EQUAL(div[2],8ul); + + nsub_to_div2<3>(div,n_sub,2); + + BOOST_REQUIRE_EQUAL(div[0],16ul); + BOOST_REQUIRE_EQUAL(div[1],16ul); + BOOST_REQUIRE_EQUAL(div[2],1ul); + + nsub_to_div2<3>(div,n_sub,1); + + BOOST_REQUIRE_EQUAL(div[0],128ul); + BOOST_REQUIRE_EQUAL(div[1],1ul); + BOOST_REQUIRE_EQUAL(div[2],1ul); + + n_sub = 64*3; + nsub_to_div<3>(div,n_sub,3); + + BOOST_REQUIRE_EQUAL(div[0],5ul); + BOOST_REQUIRE_EQUAL(div[1],5ul); + BOOST_REQUIRE_EQUAL(div[2],5ul); + + nsub_to_div<3>(div,n_sub,2); + + BOOST_REQUIRE_EQUAL(div[0],13ul); + BOOST_REQUIRE_EQUAL(div[1],13ul); + BOOST_REQUIRE_EQUAL(div[2],1ul); + + nsub_to_div<3>(div,n_sub,1); + + BOOST_REQUIRE_EQUAL(div[0],192ul); + BOOST_REQUIRE_EQUAL(div[1],1ul); + BOOST_REQUIRE_EQUAL(div[2],1ul); + + // Test high dimension cart decomposition subdivision + + Box<50,double> domain; + size_t bc[50]; + Ghost<50,double> ghost(0.01); + + for(size_t i = 0 ; i < 50 ; i++) + { + domain.setLow(i,0.0); + domain.setHigh(i,1.0); + bc[i] = NON_PERIODIC; + } + + CartDecomposition<50,double> dec(create_vcluster()); + + dec.setGoodParameters(domain,bc,ghost,64); + + size_t div2[50]; + dec.getParameters(div2); + + auto & v_cl = create_vcluster(); + if (v_cl.size() == 1) + { + for (size_t i = 0 ; i < 50 ; i++) + { + if (i < 6) + {BOOST_REQUIRE_EQUAL(div2[i],2ul);} + else + {BOOST_REQUIRE_EQUAL(div2[i],1ul);} + } + } + + if (v_cl.size() == 2) + { + for (size_t i = 0 ; i < 50 ; i++) + { + if (i < 7) + {BOOST_REQUIRE_EQUAL(div2[i],2ul);} + else + {BOOST_REQUIRE_EQUAL(div2[i],1ul);} + } + } + + if (v_cl.size() == 3) + { + for (size_t i = 0 ; i < 50 ; i++) + { + if (i < 2) + {BOOST_REQUIRE_EQUAL(div2[i],13ul);} + else + {BOOST_REQUIRE_EQUAL(div2[i],1ul);} + } + } + + if (v_cl.size() == 4) + { + for (size_t i = 0 ; i < 50 ; i++) + { + if (i < 8) + {BOOST_REQUIRE_EQUAL(div2[i],2ul);} + else + {BOOST_REQUIRE_EQUAL(div2[i],1ul);} + } + } + + if (v_cl.size() == 5) + { + for (size_t i = 0 ; i < 50 ; i++) + { + if (i < 8) + {BOOST_REQUIRE_EQUAL(div2[i],2ul);} + else + {BOOST_REQUIRE_EQUAL(div2[i],1ul);} + } + } + + if (v_cl.size() == 6) + { + for (size_t i = 0 ; i < 50 ; i++) + { + if (i < 3) + {BOOST_REQUIRE_EQUAL(div2[i],7ul);} + else + {BOOST_REQUIRE_EQUAL(div2[i],1ul);} + } + } +} + BOOST_AUTO_TEST_SUITE_END() -#endif diff --git a/src/Decomposition/tests/shift_vect_converter_tests.cpp b/src/Decomposition/tests/shift_vect_converter_tests.cpp new file mode 100644 index 0000000000000000000000000000000000000000..260fc7289e04b2e28136cecc350f694ad9b622da --- /dev/null +++ b/src/Decomposition/tests/shift_vect_converter_tests.cpp @@ -0,0 +1,123 @@ +/* + * shift_vect_converter_tests.cpp + * + * Created on: Feb 8, 2018 + * Author: i-bird + */ + +#define BOOST_TEST_DYN_LINK +#include <boost/test/unit_test.hpp> +#include "Space/Shape/Box.hpp" + +#include "Vector/map_vector.hpp" +#include "Decomposition/shift_vect_converter.hpp" + +BOOST_AUTO_TEST_SUITE( shift_vect_converter_tests_suite ) + +BOOST_AUTO_TEST_CASE( shift_vect_converter_tests_use ) +{ + { + Box<3,double> domain({0.0,0.0,0.0},{1.0,1.0,1.0}); + shift_vect_converter<3,double> svc; + size_t bc[3] = {PERIODIC,PERIODIC,PERIODIC}; + + openfpm::vector<Point<3,double>> sv; + + svc.generateShiftVectors(domain,bc,sv); + + BOOST_REQUIRE_EQUAL(sv.size(),27ul); + + // We test that the cominations generate the correct shift vectors + comb<3> cmb1({-1,-1,1}); + comb<3> cmb2({-1,0,1}); + comb<3> cmb3({0,0,1}); + + size_t i = svc.linId(cmb1); + + BOOST_REQUIRE_EQUAL(sv.get<0>(i)[0],-1.0); + BOOST_REQUIRE_EQUAL(sv.get<0>(i)[1],1.0); + BOOST_REQUIRE_EQUAL(sv.get<0>(i)[2],1.0); + + i = svc.linId(cmb2); + + BOOST_REQUIRE_EQUAL(sv.get<0>(i)[0],-1.0); + BOOST_REQUIRE_EQUAL(sv.get<0>(i)[1],0.0); + BOOST_REQUIRE_EQUAL(sv.get<0>(i)[2],1.0); + + i = svc.linId(cmb3); + + BOOST_REQUIRE_EQUAL(sv.get<0>(i)[0],-1.0); + BOOST_REQUIRE_EQUAL(sv.get<0>(i)[1],0.0); + BOOST_REQUIRE_EQUAL(sv.get<0>(i)[2],0.0); + + } + + { + openfpm::vector<Point<50,double>> sv; + Box<50,double> domain; + size_t bc[50]; + + for (size_t i = 0 ; i < 50 ; i++) + { + domain.setLow(i,0.0); + domain.setHigh(i,1.0); + bc[i] = NON_PERIODIC; + } + + bc[5] = PERIODIC; + bc[17] = PERIODIC; + bc[23] = PERIODIC; + + shift_vect_converter<50,double> svc; + + svc.generateShiftVectors(domain,bc,sv); + + BOOST_REQUIRE_EQUAL(sv.size(),27ul); + + // We test that the cominations generate the correct shift vectors + comb<50> cmb1; + comb<50> cmb2; + comb<50> cmb3; + + cmb1.c[5] = 1; + cmb1.c[17] = -1; + cmb1.c[23] = -1; + + cmb2.c[5] = 1; + cmb2.c[17] = 0; + cmb2.c[23] = -1; + + cmb3.c[5] = 1; + cmb3.c[17] = 0; + cmb3.c[23] = 0; + + size_t i = svc.linId(cmb1); + + BOOST_REQUIRE_EQUAL(sv.get<0>(i)[5],-1.0); + BOOST_REQUIRE_EQUAL(sv.get<0>(i)[6],0.0); + BOOST_REQUIRE_EQUAL(sv.get<0>(i)[17],1.0); + BOOST_REQUIRE_EQUAL(sv.get<0>(i)[23],1.0); + BOOST_REQUIRE_EQUAL(sv.get<0>(i)[24],0.0); + + i = svc.linId(cmb2); + + BOOST_REQUIRE_EQUAL(sv.get<0>(i)[5],-1.0); + BOOST_REQUIRE_EQUAL(sv.get<0>(i)[6],0.0); + BOOST_REQUIRE_EQUAL(sv.get<0>(i)[17],0.0); + BOOST_REQUIRE_EQUAL(sv.get<0>(i)[23],1.0); + BOOST_REQUIRE_EQUAL(sv.get<0>(i)[24],0.0); + + i = svc.linId(cmb3); + + BOOST_REQUIRE_EQUAL(sv.get<0>(i)[5],-1.0); + BOOST_REQUIRE_EQUAL(sv.get<0>(i)[6],0.0); + BOOST_REQUIRE_EQUAL(sv.get<0>(i)[17],0.0); + BOOST_REQUIRE_EQUAL(sv.get<0>(i)[23],0.0); + BOOST_REQUIRE_EQUAL(sv.get<0>(i)[24],0.0); + + } +} + + +BOOST_AUTO_TEST_SUITE_END() + diff --git a/src/Grid/Iterators/grid_dist_id_iterator.hpp b/src/Grid/Iterators/grid_dist_id_iterator.hpp index 6cfc5f54a3b08bc9cf95d037c5a764920ec91ac9..60cb2e0c4d3573cc86a0ce95b8995a09225b93c8 100644 --- a/src/Grid/Iterators/grid_dist_id_iterator.hpp +++ b/src/Grid/Iterators/grid_dist_id_iterator.hpp @@ -48,7 +48,7 @@ class grid_dist_iterator<dim,device_grid,FREE,stencil> size_t g_c; //! List of the grids we are going to iterate - const openfpm::vector<device_grid> & gList; + openfpm::vector<device_grid> & gList; //! Extension of each grid: domain and ghost + domain const openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext; @@ -59,13 +59,17 @@ class grid_dist_iterator<dim,device_grid,FREE,stencil> //! stop point (is the grid size) grid_key_dx<dim> stop; + // device grid pointer + device_grid * dg; + /*! \brief from g_c increment g_c until you find a valid grid * */ void selectValidGrid() { // When the grid has size 0 potentially all the other informations are garbage - while (g_c < gList.size() && (gList.get(g_c).size() == 0 || gdb_ext.get(g_c).Dbox.isValid() == false ) ) g_c++; + while (g_c < gList.size() && (gList.get(g_c).size() == 0 || gdb_ext.get(g_c).Dbox.isValid() == false ) ) + {g_c++;} // get the next grid iterator if (g_c < gList.size()) @@ -83,7 +87,7 @@ class grid_dist_iterator<dim,device_grid,FREE,stencil> * \param stop end point * */ - grid_dist_iterator(const openfpm::vector<device_grid> & gk, const openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext, const grid_key_dx<dim> & stop) + grid_dist_iterator(openfpm::vector<device_grid> & gk, const openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext, const grid_key_dx<dim> & stop) :g_c(0),gList(gk),gdb_ext(gdb_ext),stop(stop) { // Initialize the current iterator @@ -100,7 +104,7 @@ class grid_dist_iterator<dim,device_grid,FREE,stencil> * \param stencil_pnt stencil points * */ - grid_dist_iterator(const openfpm::vector<device_grid> & gk, + grid_dist_iterator(openfpm::vector<device_grid> & gk, const openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext, const grid_key_dx<dim> & stop, const grid_key_dx<dim> (& stencil_pnt)[stencil::nsp]) @@ -151,7 +155,7 @@ class grid_dist_iterator<dim,device_grid,FREE,stencil> // If there are no other grid stop if (g_c >= gList.size()) - return false; + {return false;} return true; } @@ -240,6 +244,18 @@ class grid_dist_iterator<dim,device_grid,FREE,stencil> { return grid_dist_lin_dx(g_c,a_it.template getStencil<id>()); } + + /*! \brief Return the stencil point offset + * + * \tparam id + * + * \return linearized distributed key + * + */ + template<unsigned int id> inline grid_dist_g_dx<device_grid> getStencilGrid() + { + return grid_dist_g_dx<device_grid>(dg,a_it.template getStencil<id>()); + } }; diff --git a/src/Grid/Iterators/grid_dist_id_iterator_sub.hpp b/src/Grid/Iterators/grid_dist_id_iterator_sub.hpp index a1730e5dcdcd263219752c93a7462af1f64547f7..1b2533880d8a588e57b39a534443dc1ee73718ca 100644 --- a/src/Grid/Iterators/grid_dist_id_iterator_sub.hpp +++ b/src/Grid/Iterators/grid_dist_id_iterator_sub.hpp @@ -249,6 +249,79 @@ class grid_dist_iterator_sub { return stop; } + + /*! \brief Return the number of local grids + * + * + */ + inline size_t N_loc_grid() + { + return gList.size(); + } + + /*! \brief Return the component j of the starting point (P1) of the domain part + * for the local grid i + * + * \param i local grid + * \param j dimension + * + * + */ + inline size_t loc_grid_info_start(size_t i,size_t j) + { + return gdb_ext.get(i).DBox.getLow(i); + } + + /*! \brief Return the component j of the stop point (P2) of the domain part + * for the local grid i + * + * \param i local grid + * \param j dimension + * + * + */ + inline size_t loc_grid_info_size(size_t i,size_t j) + { + return gdb_ext.get(i).GDBox.getHigh(i); + } }; + +//////// MACRO in 3D + +#define WHILE_M(grid,stencil) auto & ginfo = grid.getLocalGridsInfo();\ + for (size_t s = 0 ; s < grid.getN_loc_grid() ; s++)\ + {\ + auto it = grid.get_loc_grid_iterator_stencil(s,stencil);\ +\ + int lo[3] = {(int)ginfo.get(s).Dbox.getLow(0),(int)ginfo.get(s).Dbox.getLow(1),(int)ginfo.get(s).Dbox.getLow(2)};\ + int hi[3] = {(int)ginfo.get(s).Dbox.getHigh(0),(int)ginfo.get(s).Dbox.getHigh(1),(int)ginfo.get(s).Dbox.getHigh(2)};\ +\ + int uhi[3] = {(int)ginfo.get(s).GDbox.getHigh(0),(int)ginfo.get(s).GDbox.getHigh(1),(int)ginfo.get(s).GDbox.getHigh(2)};\ +\ + int sx = uhi[0]+1;\ + int sxsy = (uhi[0]+1)*(uhi[1]+1); + +#define ITERATE_3D_M(n_pt) int i = lo[2];\ + for ( ; i <= hi[2] ; i+=1)\ + {\ + int j = lo[1];\ + for ( ; j <= hi[1] ; j+=1)\ + {\ + int k = lo[0];\ + for ( ; k <= hi[0] ; k+=n_pt)\ + { + + +#define GET_GRID_M(grid) grid.get_loc_grid(s); + + +#define END_LOOP_M(n_pt) it.private_sum<n_pt>();\ + }\ + it.private_adjust( - k + sx + lo[0]);\ + }\ + it.private_adjust(- j*sx + sxsy + lo[1]*sx);\ + }\ + } + #endif /* SRC_GRID_GRID_DIST_ID_ITERATOR_SUB_HPP_ */ diff --git a/src/Grid/Iterators/grid_dist_id_iterators_unit_tests.hpp b/src/Grid/Iterators/grid_dist_id_iterators_unit_tests.hpp index bab53f28483e171d92533524b29b1e1d00c84501..0c62f40e589ca8ca8555a16be9d72ff83ffc4440 100644 --- a/src/Grid/Iterators/grid_dist_id_iterators_unit_tests.hpp +++ b/src/Grid/Iterators/grid_dist_id_iterators_unit_tests.hpp @@ -46,7 +46,7 @@ void Test2D_sub(const Box<2,float> & domain, long int k) Ghost<2,float> g(0.01 / factor); // Distributed grid with id decomposition - grid_dist_id<2, float, scalar<float>> g_dist(sz,domain,g); + grid_dist_id<2, float, aggregate<float>> g_dist(sz,domain,g); // check the consistency of the decomposition bool val = g_dist.getDecomposition().check_consistency(); @@ -320,7 +320,7 @@ void Test3D_stencil(const Box<3,float> & domain, long int k) Ghost<3,long int> g(1); // Distributed grid with id decomposition - grid_dist_id<3, float, aggregate<long int>, CartDecomposition<3,float>> g_dist(sz,domain,g); + grid_dist_id<3, float, aggregate<long int, long int, double>, CartDecomposition<3,float>> g_dist(sz,domain,g); // fill the grid with values @@ -382,6 +382,97 @@ void Test3D_stencil(const Box<3,float> & domain, long int k) } } +void Test3D_fast_vect(const Box<3,float> & domain, long int k) +{ + grid_key_dx<3> star_stencil_3D[7] = {{0,0,0}, + {0,0,-1}, + {0,0,1}, + {0,-1,0}, + {0,1,0}, + {-1,0,0}, + {1,0,0}}; + + { + Vcluster & v_cl = create_vcluster(); + + if ( v_cl.getProcessingUnits() > 32 ) + return; + + long int big_step = k / 30; + big_step = (big_step == 0)?1:big_step; + long int small_step = 21; + + print_test( "Testing grid 3D fast stencil k<=",k); + + // 3D test + for ( ; k >= 2 ; k-= (k > 2*big_step)?big_step:small_step ) + { + BOOST_TEST_CHECKPOINT( "Testing grid skin iterator from decomposition k<=" << k ); + + // grid size + size_t sz[3]; + sz[0] = k; + sz[1] = k; + sz[2] = k; + + if (k <= 9) + continue; + + Ghost<3,long int> g(1); + + // Distributed grid with id decomposition + grid_dist_id<3, float, aggregate<long int>, CartDecomposition<3,float>> g_dist(sz,domain,g); + + // fill the grid with values + + auto it = g_dist.getDomainGhostIterator(); + + while (it.isNext()) + { + auto p = it.get(); + auto gkey = it.getGKey(p); + + g_dist.template get<0>(p) = gkey.get(0)*gkey.get(0) + gkey.get(1)*gkey.get(1) + gkey.get(2)*gkey.get(2); + + ++it; + } + + g_dist.ghost_get<0>(); + + size_t ret = true; + + WHILE_M(g_dist,star_stencil_3D) + auto & gstl = GET_GRID_M(g_dist); + ITERATE_3D_M(1) + // center point + auto Cp = it.getStencil<0>(); + + // plus,minus X,Y,Z + auto mx = it.getStencil<1>(); + auto px = it.getStencil<2>(); + auto my = it.getStencil<3>(); + auto py = it.getStencil<4>(); + auto mz = it.getStencil<5>(); + auto pz = it.getStencil<6>(); + + long int sum = -6*gstl.template get<0>(Cp) + + gstl.template get<0>(mx) + + gstl.template get<0>(px) + + gstl.template get<0>(my) + + gstl.template get<0>(py) + + gstl.template get<0>(mz) + + gstl.template get<0>(pz); + + ret &= (sum == 6); + + END_LOOP_M(1) + + BOOST_REQUIRE_EQUAL(ret,true); + } + + } +} + // Test decomposition grid iterator void Test3D_decskinit(const Box<3,float> & domain, long int k) @@ -517,6 +608,16 @@ BOOST_AUTO_TEST_CASE( grid_dist_it_iterators_skin_test ) Test3D_decskinit(domain3,k); } +BOOST_AUTO_TEST_CASE( grid_dist_it_iterators_3D_fast ) +{ + // Domain + Box<3,float> domain3({0.0,0.0,0.0},{1.0,1.0,1.0}); + + size_t k = 128*128*128*create_vcluster().getProcessingUnits(); + k = std::pow(k, 1/3.); + Test3D_fast_vect(domain3,k); +} + BOOST_AUTO_TEST_SUITE_END() #endif /* SRC_GRID_ITERATORS_GRID_DIST_ID_ITERATORS_UNIT_TESTS_HPP_ */ diff --git a/src/Grid/grid_dist_id.hpp b/src/Grid/grid_dist_id.hpp index 49cb6b5866a30a328df89163530d324dac12db2d..fab6c7e6d30c14bc0de2f34144a1137bb96c43d2 100644 --- a/src/Grid/grid_dist_id.hpp +++ b/src/Grid/grid_dist_id.hpp @@ -74,6 +74,9 @@ class grid_dist_id : public grid_dist_id_comm<dim,St,T,Decomposition,Memory,devi //! Ghost expansion Ghost<dim,St> ghost; + //! Ghost expansion + Ghost<dim,long int> ghost_int; + //! Local grids mutable openfpm::vector<device_grid> loc_grid; @@ -192,6 +195,53 @@ class grid_dist_id : public grid_dist_id_comm<dim,St,T,Decomposition,Memory,devi return flp; } + /*! \brief this function is for optimization of the ghost size + * + * Because the decomposition work in continuum and discrete ghost is + * converted in continuum, in some case continuum ghost because of + * rounding-off error can produce ghost bigger than the discrete selected + * one. This function adjust for this round-off error + * + * \param sub_domain the sub-domain + * \param sub_domain_other the other sub-domain + * \param ib internal ghost box to adjust + * + */ + void set_for_adjustment(const Box<dim,long int> & sub_domain, + const Box<dim,St> & sub_domain_other, + const comb<dim> & cmb, + Box<dim,long int> & ib, + Ghost<dim,long int> & g) + { + if (g.isInvalidGhost() == true) + {return;} + + // Convert from SpaceBox<dim,St> to SpaceBox<dim,long int> + Box<dim,long int> sub_domain_other_exp = cd_sm.convertDomainSpaceIntoGridUnits(sub_domain_other,dec.periodicity()); + + // translate sub_domain_other based on cmb + for (size_t i = 0 ; i < dim ; i++) + { + if (cmb.c[i] == 1) + { + sub_domain_other_exp.setLow(i,sub_domain_other_exp.getLow(i) - ginfo.size(i)); + sub_domain_other_exp.setHigh(i,sub_domain_other_exp.getHigh(i) - ginfo.size(i)); + } + else if (cmb.c[i] == -1) + { + sub_domain_other_exp.setLow(i,sub_domain_other_exp.getLow(i) + ginfo.size(i)); + sub_domain_other_exp.setHigh(i,sub_domain_other_exp.getHigh(i) + ginfo.size(i)); + } + } + + sub_domain_other_exp.enlarge(g); + if (sub_domain_other_exp.Intersect(sub_domain,ib) == false) + { + for (size_t i = 0 ; i < dim ; i++) + {ib.setHigh(i,ib.getLow(i) - 1);} + } + } + /*! \brief Create per-processor internal ghost boxes list in grid units and g_id_to_external_ghost_box * */ @@ -221,6 +271,21 @@ class grid_dist_id : public grid_dist_id_comm<dim,St,T,Decomposition,Memory,devi if (ib.isValid() == false) continue; + size_t sub_id = dec.getProcessorIGhostSub(i,j); + size_t r_sub = dec.getProcessorIGhostSSub(i,j); + + auto & n_box = dec.getNearSubdomains(dec.IDtoProc(i)); + + Box<dim,long int> sub = gdb_ext.get(sub_id).Dbox; + sub += gdb_ext.get(sub_id).origin; + + set_for_adjustment(sub, + n_box.get(r_sub),dec.getProcessorIGhostPos(i,j), + ib,ghost_int); + + if (ib.isValid() == false) + continue; + // save the box and the sub-domain id (it is calculated as the linearization of P1) ::Box<dim,size_t> cvt = ib; @@ -337,6 +402,19 @@ class grid_dist_id : public grid_dist_id_comm<dim,St,T,Decomposition,Memory,devi ::Box<dim,St> ib_dom = dec.getLocalIGhostBox(i,j); ::Box<dim,long int> ib = cd_sm.convertDomainSpaceIntoGridUnits(ib_dom,dec.periodicity()); + // Check if ib is valid if not it mean that the internal ghost does not contain information so skip it + if (ib.isValid() == false) + continue; + + size_t sub_id = i; + size_t r_sub = dec.getLocalIGhostSub(i,j); + + Box<dim,long int> sub = gdb_ext.get(sub_id).Dbox; + sub += gdb_ext.get(sub_id).origin; + + set_for_adjustment(sub,dec.getSubDomain(r_sub), + dec.getLocalIGhostPos(i,j),ib,ghost_int); + // Check if ib is valid if not it mean that the internal ghost does not contain information so skip it if (ib.isValid() == false) continue; @@ -524,18 +602,6 @@ class grid_dist_id : public grid_dist_id_comm<dim,St,T,Decomposition,Memory,devi protected: - /*! \brief Get the point where it start the origin of the grid of the sub-domain i - * - * \param i sub-domain - * - * \return the point - * - */ - Point<dim,St> getOffset(size_t i) - { - return pmul(Point<dim,St>(gdb_ext.get(i).origin), cd_sm.getCellBox().getP2()); - } - /*! \brief Given a local sub-domain i with a local grid Domain + ghost return the part of the local grid that is domain * * \param i sub-domain @@ -609,6 +675,18 @@ public: return domain; } + /*! \brief Get the point where it start the origin of the grid of the sub-domain i + * + * \param i sub-domain + * + * \return the point + * + */ + Point<dim,St> getOffset(size_t i) + { + return pmul(Point<dim,St>(gdb_ext.get(i).origin), cd_sm.getCellBox().getP2()) + getDomain().getP1(); + } + /*! \brief Get the spacing of the grid in direction i * * \param i dimension @@ -653,8 +731,11 @@ public: * \param ext extension of the grid (must be positive on every direction) * */ - template<typename H> grid_dist_id(const grid_dist_id<dim,St,H,typename Decomposition::base_type,Memory,grid_cpu<dim,H>> & g, const Ghost<dim,long int> & gh, Box<dim,size_t> ext) - :dec(create_vcluster()),v_cl(create_vcluster()) + template<typename H> + grid_dist_id(const grid_dist_id<dim,St,H,typename Decomposition::base_type,Memory,grid_cpu<dim,H>> & g, + const Ghost<dim,long int> & gh, + Box<dim,size_t> ext) + :ghost_int(gh),dec(create_vcluster()),v_cl(create_vcluster()) { #ifdef SE_CLASS2 check_new(this,8,GRID_DIST_EVENT,4); @@ -701,8 +782,11 @@ public: * \param ghost Ghost part * */ - grid_dist_id(const Decomposition & dec, const size_t (& g_sz)[dim], const Ghost<dim,St> & ghost) - :domain(dec.getDomain()),ghost(ghost),dec(dec),v_cl(create_vcluster()),ginfo(g_sz),ginfo_v(g_sz) + grid_dist_id(const Decomposition & dec, + const size_t (& g_sz)[dim], + const Ghost<dim,St> & ghost) + :domain(dec.getDomain()),ghost(ghost),ghost_int(INVALID_GHOST),dec(dec),v_cl(create_vcluster()), + ginfo(g_sz),ginfo_v(g_sz) { #ifdef SE_CLASS2 check_new(this,8,GRID_DIST_EVENT,4); @@ -719,8 +803,10 @@ public: * \param ghost Ghost part * */ - grid_dist_id(Decomposition && dec, const size_t (& g_sz)[dim], const Ghost<dim,St> & ghost) - :domain(dec.getDomain()),ghost(ghost),dec(dec),ginfo(g_sz),ginfo_v(g_sz),v_cl(create_vcluster()) + grid_dist_id(Decomposition && dec, const size_t (& g_sz)[dim], + const Ghost<dim,St> & ghost) + :domain(dec.getDomain()),ghost(ghost),dec(dec),ginfo(g_sz), + ginfo_v(g_sz),v_cl(create_vcluster()),ghost_int(INVALID_GHOST) { #ifdef SE_CLASS2 check_new(this,8,GRID_DIST_EVENT,4); @@ -739,8 +825,10 @@ public: * \warning In very rare case the ghost part can be one point bigger than the one specified * */ - grid_dist_id(const Decomposition & dec, const size_t (& g_sz)[dim], const Ghost<dim,long int> & g) - :domain(dec.getDomain()),dec(create_vcluster()),v_cl(create_vcluster()),ginfo(g_sz),ginfo_v(g_sz) + grid_dist_id(const Decomposition & dec, const size_t (& g_sz)[dim], + const Ghost<dim,long int> & g) + :domain(dec.getDomain()),ghost_int(g),dec(create_vcluster()),v_cl(create_vcluster()), + ginfo(g_sz),ginfo_v(g_sz) { #ifdef SE_CLASS2 check_new(this,8,GRID_DIST_EVENT,4); @@ -764,8 +852,10 @@ public: * \warning In very rare case the ghost part can be one point bigger than the one specified * */ - grid_dist_id(Decomposition && dec, const size_t (& g_sz)[dim], const Ghost<dim,long int> & g) - :domain(dec.getDomain()),dec(dec),v_cl(create_vcluster()),ginfo(g_sz),ginfo_v(g_sz) + grid_dist_id(Decomposition && dec, const size_t (& g_sz)[dim], + const Ghost<dim,long int> & g) + :domain(dec.getDomain()),dec(dec),v_cl(create_vcluster()),ginfo(g_sz), + ginfo_v(g_sz),ghost_int(g) { #ifdef SE_CLASS2 check_new(this,8,GRID_DIST_EVENT,4); @@ -787,7 +877,8 @@ public: * \warning In very rare case the ghost part can be one point bigger than the one specified * */ - grid_dist_id(const size_t (& g_sz)[dim],const Box<dim,St> & domain, const Ghost<dim,St> & g) + grid_dist_id(const size_t (& g_sz)[dim],const Box<dim,St> & domain, + const Ghost<dim,St> & g) :grid_dist_id(g_sz,domain,g,create_non_periodic<dim>()) { } @@ -816,8 +907,10 @@ public: * \warning In very rare case the ghost part can be one point bigger than the one specified * */ - grid_dist_id(const size_t (& g_sz)[dim],const Box<dim,St> & domain, const Ghost<dim,St> & g, const periodicity<dim> & p) - :domain(domain),ghost(g),dec(create_vcluster()),v_cl(create_vcluster()),ginfo(g_sz),ginfo_v(g_sz) + grid_dist_id(const size_t (& g_sz)[dim],const Box<dim,St> & domain, + const Ghost<dim,St> & g, const periodicity<dim> & p) + :domain(domain),ghost(g),ghost_int(INVALID_GHOST),dec(create_vcluster()),v_cl(create_vcluster()), + ginfo(g_sz),ginfo_v(g_sz) { #ifdef SE_CLASS2 check_new(this,8,GRID_DIST_EVENT,4); @@ -838,8 +931,10 @@ public: * \warning In very rare case the ghost part can be one point bigger than the one specified * */ - grid_dist_id(const size_t (& g_sz)[dim],const Box<dim,St> & domain, const Ghost<dim,long int> & g, const periodicity<dim> & p) - :domain(domain),dec(create_vcluster()),v_cl(create_vcluster()),ginfo(g_sz),ginfo_v(g_sz) + grid_dist_id(const size_t (& g_sz)[dim],const Box<dim,St> & domain, + const Ghost<dim,long int> & g, const periodicity<dim> & p) + :domain(domain),ghost_int(g),dec(create_vcluster()),v_cl(create_vcluster()),ginfo(g_sz), + ginfo_v(g_sz) { #ifdef SE_CLASS2 check_new(this,8,GRID_DIST_EVENT,4); @@ -1199,7 +1294,7 @@ public: * \return the selected element * */ - template <unsigned int p>inline auto get(const grid_dist_key_dx<dim> & v1) const -> typename std::add_lvalue_reference<decltype(loc_grid.get(v1.getSub()).template get<p>(v1.getKey()))>::type + template <unsigned int p = 0>inline auto get(const grid_dist_key_dx<dim> & v1) const -> typename std::add_lvalue_reference<decltype(loc_grid.get(v1.getSub()).template get<p>(v1.getKey()))>::type { #ifdef SE_CLASS2 check_valid(this,8); @@ -1215,7 +1310,7 @@ public: * \return the selected element * */ - template <unsigned int p>inline auto get(const grid_dist_key_dx<dim> & v1) -> typename std::add_lvalue_reference<decltype(loc_grid.get(v1.getSub()).template get<p>(v1.getKey()))>::type + template <unsigned int p = 0>inline auto get(const grid_dist_key_dx<dim> & v1) -> typename std::add_lvalue_reference<decltype(loc_grid.get(v1.getSub()).template get<p>(v1.getKey()))>::type { #ifdef SE_CLASS2 check_valid(this,8); @@ -1231,7 +1326,39 @@ public: * \return the selected element * */ - template <unsigned int p>inline auto get(const grid_dist_lin_dx & v1) const -> typename std::add_lvalue_reference<decltype(loc_grid.get(v1.getSub()).template get<p>(v1.getKey()))>::type + template <unsigned int p = 0>inline auto get(grid_dist_g_dx<device_grid> & v1) const -> typename std::add_lvalue_reference<decltype(v1.getSub()->template get<p>(v1.getKey()))>::type + { +#ifdef SE_CLASS2 + check_valid(this,8); +#endif + return v1.getSub()->template get<p>(v1.getKey()); + } + + /*! \brief Get the reference of the selected element + * + * \tparam p property to get (is an integer) + * \param v1 grid_key that identify the element in the grid + * + * \return the selected element + * + */ + template <unsigned int p = 0>inline auto get(grid_dist_g_dx<device_grid> & v1) -> typename std::add_lvalue_reference<decltype(v1.getSub()->template get<p>(v1.getKey()))>::type + { +#ifdef SE_CLASS2 + check_valid(this,8); +#endif + return v1.getSub()->template get<p>(v1.getKey()); + } + + /*! \brief Get the reference of the selected element + * + * \tparam p property to get (is an integer) + * \param v1 grid_key that identify the element in the grid + * + * \return the selected element + * + */ + template <unsigned int p = 0>inline auto get(const grid_dist_lin_dx & v1) const -> typename std::add_lvalue_reference<decltype(loc_grid.get(v1.getSub()).template get<p>(v1.getKey()))>::type { #ifdef SE_CLASS2 check_valid(this,8); @@ -1247,7 +1374,7 @@ public: * \return the selected element * */ - template <unsigned int p>inline auto get(const grid_dist_lin_dx & v1) -> typename std::add_lvalue_reference<decltype(loc_grid.get(v1.getSub()).template get<p>(v1.getKey()))>::type + template <unsigned int p = 0>inline auto get(const grid_dist_lin_dx & v1) -> typename std::add_lvalue_reference<decltype(loc_grid.get(v1.getSub()).template get<p>(v1.getKey()))>::type { #ifdef SE_CLASS2 check_valid(this,8); @@ -1263,7 +1390,7 @@ public: * \return the selected element * */ - template <unsigned int p>inline auto getProp(const grid_dist_key_dx<dim> & v1) const -> decltype(this->template get<p>(v1)) + template <unsigned int p = 0>inline auto getProp(const grid_dist_key_dx<dim> & v1) const -> decltype(this->template get<p>(v1)) { return this->template get<p>(v1); } @@ -1276,7 +1403,7 @@ public: * \return the selected element * */ - template <unsigned int p>inline auto getProp(const grid_dist_key_dx<dim> & v1) -> decltype(this->template get<p>(v1)) + template <unsigned int p = 0>inline auto getProp(const grid_dist_key_dx<dim> & v1) -> decltype(this->template get<p>(v1)) { return this->template get<p>(v1); } @@ -1366,6 +1493,7 @@ public: g_id_to_internal_ghost_box); } + /*! \brief Copy the give grid into this grid * * It copy the first grid into the given grid (No ghost) @@ -1373,21 +1501,52 @@ public: * \warning the Decomposition must be ensured to be the same, otherwise crashes can happen, if you want to copy the grid independently from the decomposition please use the operator equal * * \param g Grid to copy + * \param use_memcpy use memcpy function if possible * * \return itself * */ - grid_dist_id<dim,St,T,Decomposition,Memory,device_grid> & copy(grid_dist_id<dim,St,T,Decomposition,Memory,device_grid> & g) + grid_dist_id<dim,St,T,Decomposition,Memory,device_grid> & copy(grid_dist_id<dim,St,T,Decomposition,Memory,device_grid> & g, bool use_memcpy = true) { - auto it = this->getDomainIterator(); + if (T::noPointers() == true && use_memcpy) + { + for (size_t i = 0 ; i < this->getN_loc_grid() ; i++) + { + auto & gs_src = this->get_loc_grid(i).getGrid(); + + long int start = gs_src.LinId(gdb_ext.get(i).Dbox.getKP1()); + long int stop = gs_src.LinId(gdb_ext.get(i).Dbox.getKP2()); + + if (stop < start) {continue;} + + void * dst = static_cast<void *>(static_cast<char *>(this->get_loc_grid(i).getPointer()) + start*sizeof(T)); + void * src = static_cast<void *>(static_cast<char *>(g.get_loc_grid(i).getPointer()) + start*sizeof(T)); - while (it.isNext()) + memcpy(dst,src,sizeof(T) * (stop + 1 - start)); + } + } + else { - auto key = it.get(); + grid_key_dx<dim> cnt[1]; + cnt[0].zero(); + + for (size_t i = 0 ; i < this->getN_loc_grid() ; i++) + { + auto & dst = this->get_loc_grid(i); + auto & src = g.get_loc_grid(i); + + auto it = this->get_loc_grid_iterator_stencil(i,cnt); - this->loc_grid.get(key.getSub()).get_o(key.getKey()) = g.loc_grid.get(key.getSub()).get_o(key.getKey()); + while (it.isNext()) + { + // center point + auto Cp = it.template getStencil<0>(); - ++it; + dst.get_o(Cp) = src.get_o(Cp); + + ++it; + } + } } return *this; @@ -1510,6 +1669,36 @@ public: return loc_grid.get(i); } + /*! \brief Get the i sub-domain grid + * + * \param i sub-domain + * + * \return local grid + * + */ + grid_key_dx_iterator_sub<dim,no_stencil> get_loc_grid_iterator(size_t i) + { + return grid_key_dx_iterator_sub<dim,no_stencil>(loc_grid.get(i).getGrid(), + gdb_ext.get(i).Dbox.getKP1(), + gdb_ext.get(i).Dbox.getKP2()); + } + + /*! \brief Get the i sub-domain grid + * + * \param i sub-domain + * + * \return local grid + * + */ + template<unsigned int Np> + grid_key_dx_iterator_sub<dim,stencil_offset_compute<dim,Np>> get_loc_grid_iterator_stencil(size_t i,const grid_key_dx<dim> (& stencil_pnt)[Np]) + { + return grid_key_dx_iterator_sub<dim,stencil_offset_compute<dim,Np>>(loc_grid.get(i).getGrid(), + gdb_ext.get(i).Dbox.getKP1(), + gdb_ext.get(i).Dbox.getKP2(), + stencil_pnt); + } + /*! \brief Return the number of local grid * * \return the number of local grid @@ -1614,6 +1803,26 @@ public: map(); } + /*! \brief Get the internal local ghost box + * + * \return the internal local ghost box + * + */ + const openfpm::vector<i_lbox_grid<dim>> & get_loc_ig_box() + { + return this->loc_ig_box; + } + + /*! \brief Get the internal ghost box + * + * \return the internal local ghost box + * + */ + const openfpm::vector<i_lbox_grid<dim>> & get_ig_box() + { + return this->ig_box; + } + //! Define friend classes //\cond friend grid_dist_id<dim,St,T,typename Decomposition::extended_type,Memory,device_grid>; diff --git a/src/Grid/grid_dist_id_HDF5_chckpnt_restart_test.hpp b/src/Grid/grid_dist_id_HDF5_chckpnt_restart_test.hpp index fd63918ace57d8f4a9f4f65bda40651e83fec643..75a1878eae3cff13d368cedbc7b4797236c7e62a 100644 --- a/src/Grid/grid_dist_id_HDF5_chckpnt_restart_test.hpp +++ b/src/Grid/grid_dist_id_HDF5_chckpnt_restart_test.hpp @@ -38,7 +38,7 @@ BOOST_AUTO_TEST_CASE( grid_dist_id_hdf5_save_test ) Ghost<2,float> g(ghost_part); // Distributed grid with id decomposition - grid_dist_id<2, float, scalar<float>, CartDecomposition<2,float>> g_dist(sz,domain,g); + grid_dist_id<2, float, aggregate<float>, CartDecomposition<2,float>> g_dist(sz,domain,g); // get the decomposition auto & dec = g_dist.getDecomposition(); @@ -71,12 +71,12 @@ BOOST_AUTO_TEST_CASE( grid_dist_id_hdf5_save_test ) size_t sum = 0; for (size_t i = 0; i < count_total.size(); i++) - sum += count_total.get(i); + {sum += count_total.get(i);} timer t; t.start(); // Save the grid - g_dist.save("grid_dist_id.h5"); + g_dist.save("grid_dist_id.h5" + std::to_string(v_cl.getProcessingUnits())); t.stop(); } @@ -106,24 +106,15 @@ BOOST_AUTO_TEST_CASE( grid_dist_id_hdf5_load_test ) Ghost<2,float> g(ghost_part); // Distributed grid with id decomposition - grid_dist_id<2, float, scalar<float>, CartDecomposition<2,float>> g_dist(sz,domain,g); - - g_dist.getDecomposition().write("Before_load_grid_decomposition"); - g_dist.write("Before_Loaded_grid"); - - timer t; - t.start(); - // Save the grid - g_dist.load("grid_dist_id.h5"); - t.stop(); + grid_dist_id<2, float, aggregate<float>, CartDecomposition<2,float>> g_dist(sz,domain,g); - g_dist.write("Loaded_grid"); - g_dist.getDecomposition().write("Loaded_grid_decomposition"); + g_dist.load("grid_dist_id.h5" + std::to_string(v_cl.getProcessingUnits())); auto it = g_dist.getDomainIterator(); size_t count = 0; + bool match = true; while (it.isNext()) { //key @@ -134,7 +125,7 @@ BOOST_AUTO_TEST_CASE( grid_dist_id_hdf5_load_test ) auto keyg = g_dist.getGKey(key); - BOOST_REQUIRE_EQUAL(g_dist.template get<0>(key), keyg.get(0)); + match &= g_dist.template get<0>(key) == keyg.get(0); ++it; count++; @@ -150,6 +141,7 @@ BOOST_AUTO_TEST_CASE( grid_dist_id_hdf5_load_test ) sum += count_total.get(i); BOOST_REQUIRE_EQUAL(sum, (size_t)k*k); + BOOST_REQUIRE_EQUAL(match,true); } diff --git a/src/Grid/grid_dist_id_comm.hpp b/src/Grid/grid_dist_id_comm.hpp index b41d03de4c39d92d105726aab9c122c203b3199b..2b894424811880276482f5aefdd99f3fd4643185 100644 --- a/src/Grid/grid_dist_id_comm.hpp +++ b/src/Grid/grid_dist_id_comm.hpp @@ -9,7 +9,7 @@ #define SRC_GRID_GRID_DIST_ID_COMM_HPP_ #include "Vector/vector_dist_ofb.hpp" -#include "data_type/scalar.hpp" +#include "Grid/copy_grid_fast.hpp" /*! \brief Unpack selector * @@ -88,7 +88,12 @@ struct grid_unpack_selector_with_prp<true,T,device_grid,Memory> while (sub2.isNext()) { - object_s_di_op<op,decltype(gs.get_o(it_src.get())),decltype(gd.get_o(sub2.get())),OBJ_ENCAP,prp...>(gs.get_o(it_src.get()),gd.get_o(sub2.get())); + object_s_di_op<op, + decltype(gs.get_o(it_src.get())), + decltype(gd.get_o(sub2.get())), + OBJ_ENCAP,prp...> + (gs.get_o(it_src.get()), + gd.get_o(sub2.get())); ++sub2; ++it_src; @@ -213,6 +218,9 @@ class grid_dist_id_comm openfpm::vector<device_grid> & loc_grid, std::unordered_map<size_t,size_t> & g_id_to_external_ghost_box) { + grid_key_dx<dim> cnt[1]; + cnt[0].zero(); + //! For all the sub-domains for (size_t i = 0 ; i < loc_ig_box.size() ; i++) { @@ -239,30 +247,39 @@ class grid_dist_id_comm if (bx_dst.isValid() == false) continue; - grid_key_dx_iterator_sub<dim> sub_src(loc_grid.get(i).getGrid(),bx_src.getKP1(),bx_src.getKP2()); - grid_key_dx_iterator_sub<dim> sub_dst(loc_grid.get(sub_id_dst).getGrid(),bx_dst.getKP1(),bx_dst.getKP2()); + const auto & gs = loc_grid.get(i); + auto & gd = loc_grid.get(sub_id_dst); #ifdef SE_CLASS1 if (loc_eg_box.get(sub_id_dst).bid.get(k).sub != i) - std::cerr << "Error " << __FILE__ << ":" << __LINE__ << " source and destination are not correctly linked" << "\n"; + {std::cerr << "Error " << __FILE__ << ":" << __LINE__ << " source and destination are not correctly linked" << "\n";} - if (sub_src.getVolume() != sub_dst.getVolume()) - std::cerr << "Error " << __FILE__ << ":" << __LINE__ << " source and destination does not match in size" << "\n"; + if (bx_src.getVolumeKey() != bx_dst.getVolumeKey()) + {std::cerr << "Error " << __FILE__ << ":" << __LINE__ << " source and destination does not match in size" << "\n";} -#endif + auto bxs = gs.getGrid().getBoxKey(); + auto bxd = gd.getGrid().getBoxKey(); - const auto & gs = loc_grid.get(i); - auto & gd = loc_grid.get(sub_id_dst); + if (bxs.isContained(bx_src) == false) + {std::cerr << "Error " << __FILE__ << ":" << __LINE__ << " the source box is out of bound of the local grid" << "\n";} - while (sub_src.isNext()) - { - // Option 1 - gd.set(sub_dst.get(),gs,sub_src.get()); + if (bxd.isContained(bx_dst) == false) + {std::cerr << "Error " << __FILE__ << ":" << __LINE__ << " the destination box is out of bound of the local grid" << "\n";} - ++sub_src; - ++sub_dst; - } +#endif + + typedef typename std::remove_reference<decltype(gd)>::type grid_cp; + typedef typename std::remove_reference<decltype(loc_grid.get(i).getGrid())>::type grid_info_cp; + + copy_grid_fast<!is_contiguos<prp...>::type::value || has_pack_gen<typename device_grid::value_type>::value, + dim, + grid_cp, + grid_info_cp>::copy(loc_grid.get(i).getGrid(), + loc_grid.get(sub_id_dst).getGrid(), + bx_src, + bx_dst, + gs,gd,cnt); } } } @@ -341,6 +358,187 @@ class grid_dist_id_comm } } + /*! \brief this function create send and receive asynchronously to receive ghosts part + * + * \param ig_box internal ghost box + * \param eg_box external ghost box + * + */ + template<int... prp> + void send_and_receive_ghost(ExtPreAlloc<Memory> ** prAlloc_prp, + ExtPreAlloc<Memory> ** prRecv_prp, + const openfpm::vector<ip_box_grid<dim>> & ig_box, + const openfpm::vector<ep_box_grid<dim>> & eg_box, + const openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext, + openfpm::vector<device_grid> & loc_grid, + size_t & req) + { + // Sending property object + typedef object<typename object_creator<typename T::type,prp...>::type> prp_object; + + // Create a packing request vector + for ( size_t i = 0 ; i < ig_box.size() ; i++ ) + { + // for each ghost box + for (size_t j = 0 ; j < ig_box.get(i).bid.size() ; j++) + { + // And linked sub-domain + size_t sub_id = ig_box.get(i).bid.get(j).sub; + // Internal ghost box + Box<dim,long int> g_ig_box = ig_box.get(i).bid.get(j).box; + + if (g_ig_box.isValid() == false) + continue; + + g_ig_box -= gdb_ext.get(sub_id).origin.template convertPoint<size_t>(); + + // Pack a size_t for the internal ghost id + Packer<size_t,HeapMemory>::packRequest(req); + + // Create a sub grid iterator spanning the internal ghost layer + + grid_key_dx_iterator_sub<dim> sub_it(loc_grid.get(sub_id).getGrid(),g_ig_box.getKP1(),g_ig_box.getKP2()); + // and pack the internal ghost grid + Packer<device_grid,HeapMemory>::template packRequest<prp...>(loc_grid.get(sub_id),sub_it,req); + } + } + + // resize the property buffer memory + g_send_prp_mem.resize(req); + + // Create an object of preallocated memory for properties + (*prAlloc_prp) = new ExtPreAlloc<Memory>(req,g_send_prp_mem); + (*prAlloc_prp)->incRef(); + + // Pack information + Pack_stat sts; + + // Pack the information for each processor and send it + for ( size_t i = 0 ; i < ig_box.size() ; i++ ) + { + + sts.mark(); + void * pointer = (*prAlloc_prp)->getPointerEnd(); + + // for each ghost box + for (size_t j = 0 ; j < ig_box.get(i).bid.size() ; j++) + { + // we pack only if it is valid + if (ig_box.get(i).bid.get(j).box.isValid() == false) + continue; + + // And linked sub-domain + size_t sub_id = ig_box.get(i).bid.get(j).sub; + // Internal ghost box + Box<dim,size_t> g_ig_box = ig_box.get(i).bid.get(j).box; + g_ig_box -= gdb_ext.get(sub_id).origin.template convertPoint<size_t>(); + // Ghost box global id + size_t g_id = ig_box.get(i).bid.get(j).g_id; + + // Pack a size_t for the internal ghost id + Packer<size_t,HeapMemory>::pack(**prAlloc_prp,g_id,sts); + + // Create a sub grid iterator spanning the internal ghost layer + grid_key_dx_iterator_sub<dim> sub_it(loc_grid.get(sub_id).getGrid(),g_ig_box.getKP1(),g_ig_box.getKP2()); + // and pack the internal ghost grid + Packer<device_grid,HeapMemory>::template pack<prp...>(**prAlloc_prp,loc_grid.get(sub_id),sub_it,sts); + } + // send the request + + void * pointer2 = (*prAlloc_prp)->getPointerEnd(); + + v_cl.send(ig_box.get(i).prc,0,pointer,(char *)pointer2 - (char *)pointer); + } + + // Calculate the total information to receive from each processors + std::vector<size_t> prp_recv; + + //! Receive the information from each processors + for ( size_t i = 0 ; i < eg_box.size() ; i++ ) + { + prp_recv.push_back(0); + + // for each external ghost box + for (size_t j = 0 ; j < eg_box.get(i).bid.size() ; j++) + { + // External ghost box + Box<dim,size_t> g_eg_box = eg_box.get(i).bid.get(j).g_e_box; + prp_recv[prp_recv.size()-1] += g_eg_box.getVolumeKey() * sizeof(prp_object) + sizeof(size_t); + } + } + + size_t tot_recv = ExtPreAlloc<Memory>::calculateMem(prp_recv); + + //! Resize the receiving buffer + g_recv_prp_mem.resize(tot_recv); + + // Create an object of preallocated memory for properties + (*prRecv_prp) = new ExtPreAlloc<Memory>(tot_recv,g_recv_prp_mem); + (*prRecv_prp)->incRef(); + + // queue the receives + for ( size_t i = 0 ; i < eg_box.size() ; i++ ) + { + (*prRecv_prp)->allocate(prp_recv[i]); + v_cl.recv(eg_box.get(i).prc,0,(*prRecv_prp)->getPointer(),prp_recv[i]); + } + } + + /*! \brief Process the received data + * + * \param eg_box external ghost box + * + */ + template<int... prp> + void process_received(ExtPreAlloc<Memory> * prRecv_prp, + const openfpm::vector<ep_box_grid<dim>> & eg_box, + openfpm::vector<device_grid> & loc_grid, + std::unordered_map<size_t,size_t> & g_id_to_external_ghost_box) + { + Unpack_stat ps; + + // Unpack the object + for ( size_t i = 0 ; i < eg_box.size() ; i++ ) + { + // for each external ghost box + for (size_t j = 0 ; j < eg_box.get(i).bid.size() ; j++) + { + // Unpack the ghost box global-id + + size_t g_id; + Unpacker<size_t,HeapMemory>::unpack(*prRecv_prp,g_id,ps); + + size_t l_id = 0; + // convert the global id into local id + auto key = g_id_to_external_ghost_box.find(g_id); + if (key != g_id_to_external_ghost_box.end()) // FOUND + l_id = key->second; + else + { + // NOT FOUND + + // It must be always found, if not it mean that the processor has no-idea of + // what is stored and conseguently do not know how to unpack, print a critical error + // and return + + std::cerr << "Error: " << __FILE__ << ":" << __LINE__ << " Critical, cannot unpack object, because received data cannot be interpreted\n"; + + return; + } + + // Get the external ghost box associated with the packed information + Box<dim,size_t> box = eg_box.get(i).bid.get(l_id).l_e_box; + size_t sub_id = eg_box.get(i).bid.get(l_id).sub; + + // sub-grid where to unpack + grid_key_dx_iterator_sub<dim> sub2(loc_grid.get(sub_id).getGrid(),box.getKP1(),box.getKP2()); + + // Unpack + Unpacker<device_grid,HeapMemory>::template unpack<prp...>(*prRecv_prp,sub2,loc_grid.get(sub_id),ps); + } + } + } + public: /*! \brief Reconstruct the local grids @@ -370,34 +568,19 @@ public: while (it.isNext()) { - //auto key = it.get(); - - //if (g.template get<0>(key) != 1) - //std::cout << "WRONG???????" << std::endl; - ++it; count++; } SpaceBox<dim,long int> b = m_oGrid_recv.get(a).template get<1>(k); - //device_grid gr_send(sz); - //gr_send.setMemory(); - - //std::cout << "B: (" << b.getLow(0) << "; " << b.getLow(1) << "); (" << b.getHigh(0) << "; " << b.getHigh(1) << "); " << "G: (" << g.getGrid().getBox().getHigh(0) << "; " << g.getGrid().getBox().getHigh(1) << ")" << std::endl; - - // Set the dimensions of the local grid - //g.resize(l_res); - Point<dim,St> p; for (size_t n = 0; n < dim; n++) - p.get(n) = g.getGrid().getBox().getHigh(n); - - //std::cout << "G after resize: (" << g.getGrid().getBox().getLow(0) << "; " << g.getGrid().getBox().getLow(1) << "); (" << g.getGrid().getBox().getHigh(0) << "; " << g.getGrid().getBox().getHigh(1) << ")" << std::endl; + {p.get(n) = g.getGrid().getBox().getHigh(n);} Point<dim,St> point; for (size_t n = 0; n < dim; n++) - point.get(n) = (b.getHigh(n) + b.getLow(n))/2; + {point.get(n) = (b.getHigh(n) + b.getLow(n))/2;} for (size_t j = 0; j < gdb_ext.size(); j++) { @@ -422,7 +605,6 @@ public: std::string str = key.to_string(); grid_key_dx<dim> key2 = key - start; - //std::cout << "Key: " << str << std::endl; loc_grid.get(j).get_o(key) = g.get_o(key2); count2++; @@ -432,7 +614,6 @@ public: } } } - //std::cout << "Count after: " << count2 << std::endl; } /*! \brief Label intersection grids for mappings @@ -641,115 +822,13 @@ public: openfpm::vector<device_grid> & loc_grid, std::unordered_map<size_t,size_t> & g_id_to_external_ghost_box) { - // Sending property object - typedef object<typename object_creator<typename T::type,prp...>::type> prp_object; - size_t req = 0; - // Create a packing request vector - for ( size_t i = 0 ; i < ig_box.size() ; i++ ) - { - // for each ghost box - for (size_t j = 0 ; j < ig_box.get(i).bid.size() ; j++) - { - // And linked sub-domain - size_t sub_id = ig_box.get(i).bid.get(j).sub; - // Internal ghost box - Box<dim,long int> g_ig_box = ig_box.get(i).bid.get(j).box; - - if (g_ig_box.isValid() == false) - continue; - - g_ig_box -= gdb_ext.get(sub_id).origin.template convertPoint<size_t>(); - - // Pack a size_t for the internal ghost id - Packer<size_t,HeapMemory>::packRequest(req); - // Create a sub grid iterator spanning the internal ghost layer - grid_key_dx_iterator_sub<dim> sub_it(loc_grid.get(sub_id).getGrid(),g_ig_box.getKP1(),g_ig_box.getKP2()); - // and pack the internal ghost grid - Packer<device_grid,HeapMemory>::template packRequest<prp...>(loc_grid.get(sub_id),sub_it,req); - } - } - - // resize the property buffer memory - g_send_prp_mem.resize(req); + ExtPreAlloc<Memory> * prRecv_prp = NULL; + ExtPreAlloc<Memory> * prAlloc_prp = NULL; - // Create an object of preallocated memory for properties - ExtPreAlloc<Memory> & prAlloc_prp = *(new ExtPreAlloc<Memory>(req,g_send_prp_mem)); - - prAlloc_prp.incRef(); - - // Pack information - Pack_stat sts; - - // Pack the information for each processor and send it - for ( size_t i = 0 ; i < ig_box.size() ; i++ ) - { - - sts.mark(); - void * pointer = prAlloc_prp.getPointerEnd(); - - // for each ghost box - for (size_t j = 0 ; j < ig_box.get(i).bid.size() ; j++) - { - // we pack only if it is valid - if (ig_box.get(i).bid.get(j).box.isValid() == false) - continue; - - // And linked sub-domain - size_t sub_id = ig_box.get(i).bid.get(j).sub; - // Internal ghost box - Box<dim,size_t> g_ig_box = ig_box.get(i).bid.get(j).box; - g_ig_box -= gdb_ext.get(sub_id).origin.template convertPoint<size_t>(); - // Ghost box global id - size_t g_id = ig_box.get(i).bid.get(j).g_id; - - // Pack a size_t for the internal ghost id - Packer<size_t,HeapMemory>::pack(prAlloc_prp,g_id,sts); - // Create a sub grid iterator spanning the internal ghost layer - grid_key_dx_iterator_sub<dim> sub_it(loc_grid.get(sub_id).getGrid(),g_ig_box.getKP1(),g_ig_box.getKP2()); - // and pack the internal ghost grid - Packer<device_grid,HeapMemory>::template pack<prp...>(prAlloc_prp,loc_grid.get(sub_id),sub_it,sts); - } - // send the request - - void * pointer2 = prAlloc_prp.getPointerEnd(); - - v_cl.send(ig_box.get(i).prc,0,pointer,(char *)pointer2 - (char *)pointer); - } - - // Calculate the total information to receive from each processors - std::vector<size_t> prp_recv; - - //! Receive the information from each processors - for ( size_t i = 0 ; i < eg_box.size() ; i++ ) - { - prp_recv.push_back(0); - - // for each external ghost box - for (size_t j = 0 ; j < eg_box.get(i).bid.size() ; j++) - { - // External ghost box - Box<dim,size_t> g_eg_box = eg_box.get(i).bid.get(j).g_e_box; - prp_recv[prp_recv.size()-1] += g_eg_box.getVolumeKey() * sizeof(prp_object) + sizeof(size_t); - } - } - - size_t tot_recv = ExtPreAlloc<Memory>::calculateMem(prp_recv); - - //! Resize the receiving buffer - g_recv_prp_mem.resize(tot_recv); - - // Create an object of preallocated memory for properties - ExtPreAlloc<Memory> & prRecv_prp = *(new ExtPreAlloc<Memory>(tot_recv,g_recv_prp_mem)); - prRecv_prp.incRef(); - - // queue the receives - for ( size_t i = 0 ; i < eg_box.size() ; i++ ) - { - prRecv_prp.allocate(prp_recv[i]); - v_cl.recv(eg_box.get(i).prc,0,prRecv_prp.getPointer(),prp_recv[i]); - } + if (v_cl.getProcessingUnits() != 1) + {send_and_receive_ghost<prp...>(&prAlloc_prp,&prRecv_prp, ig_box,eg_box,gdb_ext,loc_grid,req);} // Before wait for the communication to complete we sync the local ghost // in order to overlap with communication @@ -759,48 +838,8 @@ public: // wait to receive communication v_cl.execute(); - Unpack_stat ps; - - // Unpack the object - for ( size_t i = 0 ; i < eg_box.size() ; i++ ) - { - // for each external ghost box - for (size_t j = 0 ; j < eg_box.get(i).bid.size() ; j++) - { - // Unpack the ghost box global-id - - size_t g_id; - Unpacker<size_t,HeapMemory>::unpack(prRecv_prp,g_id,ps); - - size_t l_id = 0; - // convert the global id into local id - auto key = g_id_to_external_ghost_box.find(g_id); - if (key != g_id_to_external_ghost_box.end()) // FOUND - l_id = key->second; - else - { - // NOT FOUND - - // It must be always found, if not it mean that the processor has no-idea of - // what is stored and conseguently do not know how to unpack, print a critical error - // and return - - std::cerr << "Error: " << __FILE__ << ":" << __LINE__ << " Critical, cannot unpack object, because received data cannot be interpreted\n"; - - return; - } - - // Get the external ghost box associated with the packed information - Box<dim,size_t> box = eg_box.get(i).bid.get(l_id).l_e_box; - size_t sub_id = eg_box.get(i).bid.get(l_id).sub; - - // sub-grid where to unpack - grid_key_dx_iterator_sub<dim> sub2(loc_grid.get(sub_id).getGrid(),box.getKP1(),box.getKP2()); - - // Unpack - Unpacker<device_grid,HeapMemory>::template unpack<prp...>(prRecv_prp,sub2,loc_grid.get(sub_id),ps); - } - } + if (v_cl.getProcessingUnits() != 1) + {process_received<prp...>(prRecv_prp,eg_box,loc_grid,g_id_to_external_ghost_box);} } /*! \brief It merge the information in the ghost with the @@ -849,6 +888,7 @@ public: // Pack a size_t for the internal ghost id Packer<size_t,HeapMemory>::packRequest(req); + // Create a sub grid iterator spanning the internal ghost layer grid_key_dx_iterator_sub<dim> sub_it(loc_grid.get(sub_id).getGrid(),g_eg_box.getKP1(),g_eg_box.getKP2()); // and pack the internal ghost grid @@ -891,6 +931,7 @@ public: // Pack a size_t for the internal ghost id Packer<size_t,HeapMemory>::pack(prAlloc_prp,g_id,sts); + // Create a sub grid iterator spanning the internal ghost layer grid_key_dx_iterator_sub<dim> sub_it(loc_grid.get(sub_id).getGrid(),g_eg_box.getKP1(),g_eg_box.getKP2()); // and pack the internal ghost grid diff --git a/src/Grid/grid_dist_id_unit_test.cpp b/src/Grid/grid_dist_id_unit_test.cpp index 3bc6ff21359e76240c6e6be94b0b59789c37b2dd..187552054cca9eac1465124d0b5df61cb31f5d4e 100644 --- a/src/Grid/grid_dist_id_unit_test.cpp +++ b/src/Grid/grid_dist_id_unit_test.cpp @@ -1,22 +1,14 @@ -#ifndef GRID_DIST_UNIT_TEST_HPP -#define GRID_DIST_UNIT_TEST_HPP - #define BOOST_TEST_DYN_LINK #include <boost/test/unit_test.hpp> #include "Point_test.hpp" #include "grid_dist_id.hpp" -#include "data_type/scalar.hpp" #include "data_type/aggregate.hpp" +extern void print_test_v(std::string test, size_t sz); BOOST_AUTO_TEST_SUITE( grid_dist_id_test ) -void print_test(std::string test, size_t sz) -{ - if (create_vcluster().getProcessUnitID() == 0) - std::cout << test << " " << sz << "\n"; -} BOOST_AUTO_TEST_CASE( grid_dist_id_domain_grid_unit_converter3D_test) { @@ -39,7 +31,7 @@ BOOST_AUTO_TEST_CASE( grid_dist_id_domain_grid_unit_converter3D_test) big_step = (big_step == 0)?1:big_step; long int small_step = 21; - print_test( "Testing 3D grid converter k<=",k); + print_test_v( "Testing 3D grid converter k<=",k); // 3D test for ( ; k >= 2 ; k-= (k > 2*big_step)?big_step:small_step ) @@ -56,7 +48,7 @@ BOOST_AUTO_TEST_CASE( grid_dist_id_domain_grid_unit_converter3D_test) Ghost<3,float> g(0.01); // Distributed grid with id decomposition - grid_dist_id<3, float, scalar<float>, CartDecomposition<3,float>> g_dist(sz,domain,g); + grid_dist_id<3, float, aggregate<float>, CartDecomposition<3,float>> g_dist(sz,domain,g); // get the decomposition auto & dec = g_dist.getDecomposition(); @@ -127,7 +119,7 @@ BOOST_AUTO_TEST_CASE( grid_dist_id_domain_grid_unit_converter_test) Ghost<2,float> g(0.01); // Distributed grid with id decomposition - grid_dist_id<2, float, scalar<float>, CartDecomposition<2,float>> g_dist(sz,domain,g); + grid_dist_id<2, float, aggregate<float>, CartDecomposition<2,float>> g_dist(sz,domain,g); // get the decomposition auto & dec = g_dist.getDecomposition(); @@ -167,7 +159,7 @@ void Test2D(const Box<2,float> & domain, long int k) big_step = (big_step == 0)?1:big_step; long int small_step = 21; - print_test( "Testing 2D grid k<=",k); + print_test_v( "Testing 2D grid k<=",k); // 2D test for ( ; k >= 2 ; k-= (k > 2*big_step)?big_step:small_step ) @@ -187,7 +179,7 @@ void Test2D(const Box<2,float> & domain, long int k) Ghost<2,float> g(0.01 / factor); // Distributed grid with id decomposition - grid_dist_id<2, float, scalar<float>> g_dist(sz,domain,g); + grid_dist_id<2, float, aggregate<float>> g_dist(sz,domain,g); // check the consistency of the decomposition bool val = g_dist.getDecomposition().check_consistency(); @@ -288,7 +280,7 @@ void Test1D(const Box<1,float> & domain, long int k) if (v_cl.getProcessingUnits() > 48) return; - print_test( "Testing 1D grid k<=",k); + print_test_v( "Testing 1D grid k<=",k); // 1D test for ( ; k >= 2 ; k-= (k > 2*big_step)?big_step:small_step ) @@ -307,7 +299,7 @@ void Test1D(const Box<1,float> & domain, long int k) Ghost<1,float> g(0.01 / factor); // Distributed grid with id decomposition - grid_dist_id<1, float, scalar<float>> g_dist(sz,domain,g); + grid_dist_id<1, float, aggregate<float>> g_dist(sz,domain,g); // check the consistency of the decomposition bool val = g_dist.getDecomposition().check_consistency(); @@ -405,7 +397,7 @@ void Test3D_sub(const Box<3,float> & domain, long int k) if (create_vcluster().getProcessingUnits() > 32) return; - print_test( "Testing 3D grid sub k<=",k); + print_test_v( "Testing 3D grid sub k<=",k); // 3D test for ( ; k >= 2 ; k-= (k > 2*big_step)?big_step:small_step ) @@ -425,7 +417,7 @@ void Test3D_sub(const Box<3,float> & domain, long int k) Ghost<3,float> g(0.01 / factor); // Distributed grid with id decomposition - grid_dist_id<3, float, scalar<float>, CartDecomposition<3,float>> g_dist(sz,domain,g); + grid_dist_id<3, float, aggregate<float>, CartDecomposition<3,float>> g_dist(sz,domain,g); // check the consistency of the decomposition bool val = g_dist.getDecomposition().check_consistency(); @@ -514,7 +506,7 @@ void Test3D(const Box<3,float> & domain, long int k) big_step = (big_step == 0)?1:big_step; long int small_step = 21; - print_test( "Testing 3D grid k<=",k); + print_test_v( "Testing 3D grid k<=",k); // 3D test for ( ; k >= 2 ; k-= (k > 2*big_step)?big_step:small_step ) @@ -534,7 +526,7 @@ void Test3D(const Box<3,float> & domain, long int k) Ghost<3,float> g(0.01 / factor); // Distributed grid with id decomposition - grid_dist_id<3, float, scalar<float>, CartDecomposition<3,float>> g_dist(sz,domain,g); + grid_dist_id<3, float, aggregate<float>, CartDecomposition<3,float>> g_dist(sz,domain,g); // check the consistency of the decomposition bool val = g_dist.getDecomposition().check_consistency(); @@ -627,7 +619,7 @@ void Test3D_gg(const Box<3,float> & domain, long int k, long int gk) if (create_vcluster().getProcessingUnits() > 32) return; - print_test( "Testing 3D grid k<=",k); + print_test_v( "Testing 3D grid k<=",k); // 3D test for ( ; k > 64 ; k /= 2 ) @@ -644,7 +636,7 @@ void Test3D_gg(const Box<3,float> & domain, long int k, long int gk) Ghost<3,long int> g(gk); // Distributed grid with id decomposition - grid_dist_id<3, float, scalar<float>, CartDecomposition<3,float>> g_dist(sz,domain,g); + grid_dist_id<3, float, aggregate<float>, CartDecomposition<3,float>> g_dist(sz,domain,g); // check the consistency of the decomposition bool val = g_dist.getDecomposition().check_consistency(); @@ -678,7 +670,7 @@ void Test3D_domain(const Box<3,float> & domain, long int k, const periodicity<3> big_step = (big_step == 0)?1:big_step; long int small_step = 21; - print_test( "Testing 3D grid shift domain k<=",k); + print_test_v( "Testing 3D grid shift domain k<=",k); // 3D test for ( ; k >= 2 ; k-= (k > 2*big_step)?big_step:small_step ) @@ -783,7 +775,7 @@ void Test2D_complex(const Box<2,float> & domain, long int k) big_step = (big_step == 0)?1:big_step; long int small_step = 21; - print_test( "Testing 2D complex grid k<=",k); + print_test_v( "Testing 2D complex grid k<=",k); // 2D test for ( ; k >= 2 ; k-= (k > 2*big_step)?big_step:small_step ) @@ -947,7 +939,7 @@ void Test3D_complex(const Box<3,float> & domain, long int k) big_step = (big_step == 0)?1:big_step; long int small_step = 21; - print_test( "Testing 3D grid complex k<=",k); + print_test_v( "Testing 3D grid complex k<=",k); // 2D test for ( ; k >= 2 ; k-= (k > 2*big_step)?big_step:small_step ) @@ -1113,7 +1105,7 @@ void Test3D_dup(const Box<3,float> & domain, long int k) if ( v_cl.getProcessingUnits() > 32 ) return; - print_test( "Testing 3D duplicate topology complex k<=",k); + print_test_v( "Testing 3D duplicate topology complex k<=",k); // 3D test for ( ; k >= 2 ; k-= (k > 2*big_step)?big_step:small_step ) @@ -1216,7 +1208,7 @@ void Test3D_periodic(const Box<3,float> & domain, long int k) big_step = (big_step == 0)?1:big_step; long int small_step = 21; - print_test( "Testing grid periodic k<=",k); + print_test_v( "Testing grid periodic k<=",k); // 3D test for ( ; k >= 2 ; k-= (k > 2*big_step)?big_step:small_step ) @@ -1373,7 +1365,7 @@ void Test3D_periodic_put(const Box<3,float> & domain, long int k) big_step = (big_step == 0)?1:big_step; long int small_step = 21; - print_test( "Testing grid periodic put k<=",k); + print_test_v( "Testing grid periodic put k<=",k); // 3D test for ( ; k >= 2 ; k-= (k > 2*big_step)?big_step:small_step ) @@ -1493,7 +1485,7 @@ void Test_grid_copy(const Box<3,float> & domain, long int k) big_step = (big_step == 0)?1:big_step; long int small_step = 21; - print_test( "Testing grid copy k<=",k); + print_test_v( "Testing grid copy k<=",k); // 3D test for ( ; k >= 2 ; k-= (k > 2*big_step)?big_step:small_step ) @@ -1590,6 +1582,89 @@ void Test_grid_copy(const Box<3,float> & domain, long int k) } } +void Test_ghost_correction(Box<3,double> & domain, long int k, long int g_) +{ + size_t sz[3] = {(size_t)k,(size_t)k,(size_t)k}; + periodicity<3> bc = {PERIODIC,PERIODIC,PERIODIC}; + + Ghost<3,long int> g(g_); + + grid_dist_id<3, double, aggregate<double>> grid(sz,domain,g,bc); + + auto itg = grid.getDomainGhostIterator(); + + while (itg.isNext()) + { + auto key = itg.get(); + + grid.template get<0>(key) = 0.0; + + ++itg; + } + + // Fill everything with 5 + + auto it = grid.getDomainIterator(); + + while (it.isNext()) + { + auto key = it.get(); + auto gkey = it.getGKey(key); + + if (gkey.get(0) == -4 && gkey.get(1) == 20 && gkey.get(2) == -4) + { + grid.template get<0>(key) = 20.0; + } + else + { + grid.template get<0>(key) = 5.0; + } + + ++it; + } + + grid.ghost_get<0>(); + auto it2 = grid.getDomainGhostIterator(); + + bool is_inside = true; + + while (it2.isNext()) + { + auto key = it2.get(); + auto gkey = it2.getGKey(key); + + if (grid.template get<0>(key) == 5.0) + { + // Here we check that the point is with in one stencil point + // from one sub-domain + + bool is_inside_point = false; + for (size_t i = 0 ; i < grid.getN_loc_grid() ; i++) + { + Box<3,long int> bx = grid.getLocalGridsInfo().get(i).Dbox; + bx += grid.getLocalGridsInfo().get(i).origin; + + bx.enlarge(g); + + if (bx.isInside(gkey.toPoint()) == true) + { + is_inside_point |= true; + } + } + + is_inside &= is_inside_point; + } + + ++it2; + } + + + grid.getDecomposition().write("dec_set_for_adj"); + grid.write("dec_for_adj"); + + BOOST_REQUIRE_EQUAL(is_inside,true); +} + #include "grid_dist_id_unit_test_ext_dom.hpp" #include "grid_dist_id_unit_test_unb_ghost.hpp" @@ -1756,6 +1831,146 @@ BOOST_AUTO_TEST_CASE( grid_dist_id_periodic_put_test ) Test3D_periodic_put(domain3,k); } +BOOST_AUTO_TEST_CASE ( grid_ghost_correction ) +{ + Box<3,double> domain({0.0,0.0,0.0},{2.5,2.5,2.5}); + + long int k = 128; + + Test_ghost_correction(domain,k,1); + Test_ghost_correction(domain,k,2); + Test_ghost_correction(domain,k,3); + Test_ghost_correction(domain,k,4); + + k = 64; + + Test_ghost_correction(domain,k,1); + Test_ghost_correction(domain,k,2); + Test_ghost_correction(domain,k,3); + Test_ghost_correction(domain,k,4); + + k = 32; + + Test_ghost_correction(domain,k,1); + Test_ghost_correction(domain,k,2); + Test_ghost_correction(domain,k,3); + Test_ghost_correction(domain,k,4); + + k = 16; + + Test_ghost_correction(domain,k,1); + Test_ghost_correction(domain,k,2); + Test_ghost_correction(domain,k,3); + Test_ghost_correction(domain,k,4); +} + +BOOST_AUTO_TEST_CASE ( grid_basic_functions ) +{ + auto & v_cl = create_vcluster(); + + if (v_cl.getProcessingUnits() != 1) + {return;} + + size_t sz[2] = {(size_t)8,(size_t)8}; + periodicity<2> bc = {PERIODIC,PERIODIC}; + + Ghost<2,long int> g(1); + Box<2,double> domain({-1.0,-1.0},{1.0,1.0}); + + grid_dist_id<2, double, aggregate<double>> grid(sz,domain,g,bc); + + BOOST_REQUIRE_EQUAL(grid.getOffset(0)[0],-1.25); + BOOST_REQUIRE_EQUAL(grid.getOffset(0)[1],-1.25); +} + +BOOST_AUTO_TEST_CASE ( grid_overflow_round_off_error ) +{ + size_t numGridPoint = 100; + const double domainSize = 20851.7; + double domainLength = sqrt(domainSize); + + Box<2,double> domain({0.0,0.0},{domainLength,domainLength}); + + size_t sz[2] = {numGridPoint,numGridPoint}; + + periodicity<2> bc = {PERIODIC,PERIODIC}; + + Ghost<2,double> g(3.0*(domain.getHigh(0) - domain.getLow(0))/numGridPoint + 0.001); + + grid_dist_id<2, double, aggregate<double, double, double, double, double>> grid(sz,domain,g,bc); + + auto & gs = grid.getGridInfo(); + + auto it = grid.getDomainIterator(); + + while (it.isNext()) + { + auto p = it.get(); + auto gp = it.getGKey(p); + + grid.get<0>(p) = gs.LinId(gp); + + ++it; + } + + grid.ghost_get<0>(); + + // Now we check + + auto it2 = grid.getDomainIterator(); + + bool match = true; + + while (it2.isNext()) + { + auto p = it2.get(); + auto gp = it.getGKey(p); + + if (gs.LinId(gp) != grid.get<0>(p)) + {match = false;} + + // look around + + auto px = p.move(0,1); + auto gpx = it.getGKey(px); + auto mx = p.move(0,-1); + auto gmx = it.getGKey(mx); + + auto py = p.move(1,1); + auto gpy = it.getGKey(py); + auto my = p.move(1,-1); + auto gmy = it.getGKey(my); + + gpx.set_d(0,gpx.get(0) % gs.size(0)); + gpx.set_d(1,gpx.get(1) % gs.size(1)); + + if (grid.template get<0>(px) != gs.LinId(gpx)) + {match = false;} + + gmx.set_d(0,(gmx.get(0) + gs.size(0)) % gs.size(0)); + gmx.set_d(1,(gmx.get(1) + gs.size(1)) % gs.size(1)); + + if (grid.template get<0>(mx) != gs.LinId(gmx)) + {match = false;} + + gpy.set_d(0,gpy.get(0) % gs.size(0)); + gpy.set_d(1,gpy.get(1) % gs.size(1)); + + if (grid.template get<0>(py) != gs.LinId(gpy)) + {match = false;} + + gmy.set_d(0,(gmy.get(0) + gs.size(0)) % gs.size(0)); + gmy.set_d(1,(gmy.get(1) + gs.size(1)) % gs.size(1)); + + if (grid.template get<0>(my) != gs.LinId(gmy)) + {match = false;} + + ++it2; + } + + BOOST_REQUIRE_EQUAL(match,true); +} + + BOOST_AUTO_TEST_SUITE_END() -#endif diff --git a/src/Grid/grid_dist_id_unit_test_ext_dom.hpp b/src/Grid/grid_dist_id_unit_test_ext_dom.hpp index 3a8baa9e22fe0e79c4baafc5cd01c6e41ff79f01..179e9304d6533b677eb28996f5c6504fa871d960 100644 --- a/src/Grid/grid_dist_id_unit_test_ext_dom.hpp +++ b/src/Grid/grid_dist_id_unit_test_ext_dom.hpp @@ -23,7 +23,7 @@ void Test3D_extended_grid(const Box<3,float> & domain, long int k) if ( v_cl.getProcessingUnits() > 32 ) return; - print_test( "Testing 3D extended grid k<=",k); + print_test_v( "Testing 3D extended grid k<=",k); // factor float factor = pow(create_vcluster().getProcessingUnits()/2.0f,1.0f/3.0f); diff --git a/src/Grid/grid_dist_id_unit_test_unb_ghost.hpp b/src/Grid/grid_dist_id_unit_test_unb_ghost.hpp index 51fdbb517a09dbcb08e82b84f23e95e2a2677ae5..d51d25f27b30a5bc511d6c18429e827f3d5b6a33 100644 --- a/src/Grid/grid_dist_id_unit_test_unb_ghost.hpp +++ b/src/Grid/grid_dist_id_unit_test_unb_ghost.hpp @@ -17,7 +17,7 @@ void Test3D_unb_ghost(const Box<3,float> & domain, long int k) if (create_vcluster().getProcessingUnits() > 48) return; - print_test( "Testing 3D grid unbound ghost k<=",k); + print_test_v( "Testing 3D grid unbound ghost k<=",k); // 3D test for ( ; k >= 2 ; k-= (k > 2*big_step)?big_step:small_step ) @@ -34,7 +34,7 @@ void Test3D_unb_ghost(const Box<3,float> & domain, long int k) Ghost<3,float> g(0.49); // Distributed grid with id decomposition - grid_dist_id<3, float, scalar<float>, CartDecomposition<3,float>> g_dist(sz,domain,g); + grid_dist_id<3, float, aggregate<float>, CartDecomposition<3,float>> g_dist(sz,domain,g); g_dist.getDecomposition().write("no_bound_decomposition"); @@ -132,7 +132,7 @@ void Test3D_unb_ghost_periodic(const Box<3,float> & domain, long int k) big_step = (big_step == 0)?1:big_step; long int small_step = 21; - print_test( "Testing grid periodic unbound ghost k<=",k); + print_test_v( "Testing grid periodic unbound ghost k<=",k); // 3D test for ( ; k >= 2 ; k-= (k > 2*big_step)?big_step:small_step ) diff --git a/src/Grid/grid_dist_key.hpp b/src/Grid/grid_dist_key.hpp index 420a281af57106a3529e0a8973b2dc4862c495cc..08b254a78aa9e926a2f3465490d789f449b02b72 100644 --- a/src/Grid/grid_dist_key.hpp +++ b/src/Grid/grid_dist_key.hpp @@ -248,4 +248,102 @@ public: } }; +/*! \brief Distributed linearized key + * + * instead of having the sub-subdomain index it store directly a pointer to the grid + * + */ +template<typename device_grid> +class grid_dist_g_dx +{ + //! grid list counter + device_grid * dg; + + //! Local grid iterator + size_t key; + +public: + + /*! \brief return the sub-domain grid + * + * + */ + inline device_grid * getSub() + { + return dg; + } + + + /*! \brief Get the key + * + * \return the local key + * + */ + inline size_t getKey() const + { + return key; + } + + + /*! \brief Get the reference key + * + * \return the local key + * + */ + inline size_t & getKeyRef() + { + return key; + } + + /* \brief Check if two key are the same + * + * \param key_t key to check + * + * \return true if the two key are equal + * + */ + + inline bool operator==(const grid_dist_g_dx & key_t) + { + if (dg != key_t.dg) + return false; + + // Check the two key index by index + + return getKey() == key_t.getKey(); + } + + + /*! \brief Constructor + * + * \param dg array of local grid + * \param key actual position linearized + * + */ + inline grid_dist_g_dx(device_grid * dg, size_t key) + :dg(dg),key(key) + { + } + + //! Constructor + inline grid_dist_g_dx(){} + + /*! \brief convert the key to string + * + * \return a string representing the position + * + */ + std::string to_string() + { + std::stringstream str; + + str << "sub_domain=" << dg << " "; + str << "lin_id=" << key << " "; + + str << "\n"; + + return str.str(); + } +}; + #endif diff --git a/src/Grid/grid_dist_util.hpp b/src/Grid/grid_dist_util.hpp index 77e2383cbdef22ef28ffee634e15d109f3731329..0d38b73a7eceb01b58777da73179323cfd1b8ab9 100644 --- a/src/Grid/grid_dist_util.hpp +++ b/src/Grid/grid_dist_util.hpp @@ -80,6 +80,15 @@ template<int dim, typename Decomposition> inline void create_gdb_ext(openfpm::ve SpaceBox<Decomposition::dims, typename Decomposition::stype> sp = dec.getSubDomain(i); SpaceBox<Decomposition::dims, typename Decomposition::stype> sp_g = dec.getSubDomainWithGhost(i); + // Because of round off we expand for safety the ghost area + // std::nextafter return the next bigger or smaller representable floating + // point number + for (size_t i = 0 ; i < Decomposition::dims ; i++) + { + sp_g.setLow(i,std::nextafter(sp_g.getLow(i),sp_g.getLow(i) - 1.0)); + sp_g.setHigh(i,std::nextafter(sp_g.getHigh(i),sp_g.getHigh(i) + 1.0)); + } + // Convert from SpaceBox<dim,St> to SpaceBox<dim,long int> SpaceBox<Decomposition::dims,long int> sp_t = cd_sm.convertDomainSpaceIntoGridUnits(sp,dec.periodicity()); SpaceBox<Decomposition::dims,long int> sp_tg = cd_sm.convertDomainSpaceIntoGridUnits(sp_g,dec.periodicity()); @@ -124,7 +133,7 @@ template<int dim, typename Decomposition> inline void create_gdb_ext(openfpm::ve // fill the spacing for (size_t i = 0 ; i < dim ; i++) - spacing[i] = cd_sm.getCellBox().getP2()[i]; + {spacing[i] = cd_sm.getCellBox().getP2()[i];} } /*! \brief it store a box, its unique id and the sub-domain from where it come from diff --git a/src/Grid/staggered_dist_grid_util.hpp b/src/Grid/staggered_dist_grid_util.hpp index 7a7a8d9643aed988a064bb6ee46a5f8138974339..3b2261c2f8df0eaaeff4625ada743c3dd492dfe3 100644 --- a/src/Grid/staggered_dist_grid_util.hpp +++ b/src/Grid/staggered_dist_grid_util.hpp @@ -147,7 +147,11 @@ struct extends<T[N1][N2][N3]> return N1 * N2 * N3; } - //! number of indexes + /*! number of indexes + * + * \return 3 + * + */ static inline size_t dim() { return 3; @@ -164,7 +168,11 @@ struct extends<T[N1][N2][N3][N4]> return N1 * N2 * N3 * N4; } - //! number of indexes + /*! number of indexes + * + * \return 4 + * + */ static inline size_t dim() { return 4; @@ -181,7 +189,11 @@ struct extends<T[N1][N2][N3][N4][N5]> return N1 * N2 * N3 * N4 * N5; } - //! number of indexes + /*! number of indexes + * + * \return 5 + * + */ static inline size_t dim() { return 5; @@ -198,7 +210,11 @@ struct extends<T[N1][N2][N3][N4][N5][N6]> return N1 * N2 * N3 * N4 * N5 * N6; } - //! number of indexes + /*! number of indexes + * + * \return 6 + * + */ static inline size_t dim() { return 6; @@ -215,7 +231,11 @@ struct extends<T[N1][N2][N3][N4][N5][N6][N7]> return N1 * N2 * N3 * N4 * N5 * N6 * N7; } - //! number of indexes + /*! number of indexes + * + * \return 7 + * + */ static inline size_t dim() { return 7; @@ -226,13 +246,21 @@ struct extends<T[N1][N2][N3][N4][N5][N6][N7]> template<typename T,size_t N1,size_t N2,size_t N3,size_t N4,size_t N5, size_t N6, size_t N7, size_t N8> struct extends<T[N1][N2][N3][N4][N5][N6][N7][N8]> { - //! number of elements + /*! number of elements + * + * \return the number of elements as N1*N2*N3*......... + * + */ static inline size_t mul() { return N1 * N2 * N3 * N4 * N5 * N6 * N7 * N8; } - //! number of indexes + /*! number of indexes + * + * \return 8 + * + */ static inline size_t dim() { return 8; @@ -243,13 +271,21 @@ struct extends<T[N1][N2][N3][N4][N5][N6][N7][N8]> template<typename T,size_t N1,size_t N2,size_t N3,size_t N4,size_t N5, size_t N6, size_t N7, size_t N8, size_t N9> struct extends<T[N1][N2][N3][N4][N5][N6][N7][N8][N9]> { - //! number of elements + /*! number of elements + * + * \return the number of elements as N1*N2*N3*......... + * + */ static inline size_t mul() { return N1 * N2 * N3 * N4 * N5 * N6 * N7 * N8 * N9; } - //! number of indexes + /*! number of indexes + * + * \return 9 + * + */ static inline size_t dim() { return 9; @@ -260,13 +296,21 @@ struct extends<T[N1][N2][N3][N4][N5][N6][N7][N8][N9]> template<typename T,size_t N1,size_t N2,size_t N3,size_t N4,size_t N5, size_t N6, size_t N7, size_t N8, size_t N9, size_t N10> struct extends<T[N1][N2][N3][N4][N5][N6][N7][N8][N9][N10]> { - //! number of elements + /*! number of elements + * + * \return the number of elements as N1*N2*N3*......... + * + */ static inline size_t mul() { return N1 * N2 * N3 * N4 * N5 * N6 * N7 * N8 * N9 * N10; } - //! number of indexes + /*! number of indexes + * + * \return 10 + * + */ static inline size_t dim() { return 10; @@ -286,7 +330,7 @@ struct extends<T[N1][N2][N3][N4][N5][N6][N7][N8][N9][N10]> template<typename T> struct write_stag { - /*! \brieg write the staggered grid + /*! \brief write the staggered grid * * \tparam p_val property we are going to write * \tparam sg staggered grid type @@ -332,7 +376,7 @@ struct write_stag template<typename T,size_t N1> struct write_stag<T[N1]> { - /*! \brieg write the staggered grid + /*! \brief write the staggered grid * * \tparam p_val property we are going to write * \tparam sg staggered grid type @@ -374,7 +418,7 @@ struct write_stag<T[N1]> template<typename T,size_t N1,size_t N2> struct write_stag<T[N1][N2]> { - /*! \brieg write the staggered grid + /*! \brief write the staggered grid * * \tparam p_val property we are going to write * \tparam sg staggered grid type @@ -431,15 +475,26 @@ struct write_stag<T[N1][N2]> template<unsigned int dim, typename v, bool has_pM = has_posMask<v>::value> class stag_set_position { + //! vector containing the position of the properties in the cells (staggered properties are staggered) + // within the cell openfpm::vector<comb<dim>> (& pos_prp)[boost::fusion::result_of::size<v>::type::value]; public: + /*! \brief Constructor + * + * \param vector of the staggered position (It is going to be filled by this class) + * + */ stag_set_position( openfpm::vector<comb<dim>> (& pos_prp)[boost::fusion::result_of::size<v>::type::value]) :pos_prp(pos_prp) {} - //! It call the copy function for each property + /*! It calculate the staggered position for every property + * + * \param t property + * + */ template<typename T> void operator()(T& t) const { @@ -519,15 +574,28 @@ template<unsigned int dim, typename v> class stag_set_position<dim,v,false> { private: + + //! vector containing the position of the properties in the cells (staggered properties are staggered) + // within the cell openfpm::vector<comb<dim>> (& pos_prp)[boost::fusion::result_of::size<v>::type::value]; public: + + /*! \brief Constructor + * + * \param vector of the staggered position (It is going to be filled by this class) + * + */ stag_set_position( openfpm::vector<comb<dim>> (& pos_prp)[boost::fusion::result_of::size<v>::type::value]) :pos_prp(pos_prp) {} - //! It call the copy function for each property + /*! It calculate the staggered position for every property + * + * \param t property + * + */ template<typename T> void operator()(T& t) const { diff --git a/src/Makefile.am b/src/Makefile.am index db1aa5ff1ce329b58d4e2da454afd188d22b5328..67fab895782135f5a3c7be00862b33322a84f95c 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,18 +1,18 @@ LINKLIBS = $(HDF5_LDFLAGS) $(HDF5_LIBS) $(OPENMP_LDFLAGS) $(LIBHILBERT_LIB) $(METIS_LIB) $(PTHREAD_LIBS) $(OPT_LIBS) $(BOOST_LDFLAGS) $(BOOST_IOSTREAMS_LIB) $(CUDA_LIBS) $(PETSC_LIB) $(PARMETIS_LIB) $(BOOST_UNIT_TEST_FRAMEWORK_LIB) $(BOOST_CHRONO_LIB) $(BOOST_TIMER_LIB) $(BOOST_SYSTEM_LIB) $(LIBIFCORE) noinst_PROGRAMS = pdata -pdata_SOURCES = main.cpp pdata_performance.cpp Grid/grid_dist_id_unit_test.cpp lib/pdata.cpp test_multiple_o.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster/VCluster.cpp ../openfpm_devices/src/Memleak_check.cpp +pdata_SOURCES = main.cpp Vector/tests/vector_dist_cell_list_tests.cpp Vector/tests/vector_dist_complex_prp_unit_test.cpp Vector/tests/vector_dist_HDF5_chckpnt_restart_test.cpp Vector/tests/vector_dist_MP_unit_tests.cpp Vector/tests/vector_dist_NN_tests.cpp Vector/tests/vector_dist_unit_test.cpp pdata_performance.cpp Decomposition/tests/CartDecomposition_unit_test.cpp Decomposition/tests/shift_vect_converter_tests.cpp Grid/grid_dist_id_unit_test.cpp lib/pdata.cpp test_multiple_o.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster/VCluster.cpp ../openfpm_devices/src/Memleak_check.cpp pdata_CXXFLAGS = $(HDF5_CPPFLAGS) $(OPENMP_CFLAGS) $(AM_CXXFLAGS) $(LIBHILBERT_INCLUDE) $(PETSC_INCLUDE) $(CUDA_CFLAGS) $(INCLUDES_PATH) $(PARMETIS_INCLUDE) $(METIS_INCLUDE) $(BOOST_CPPFLAGS) $(H5PART_INCLUDE) -DPARALLEL_IO -Wno-unused-local-typedefs pdata_CFLAGS = $(CUDA_CFLAGS) pdata_LDADD = $(LINKLIBS) -lparmetis -lmetis -nobase_include_HEADERS = Decomposition/CartDecomposition.hpp Decomposition/CartDecomposition_ext.hpp Decomposition/common.hpp Decomposition/Decomposition.hpp Decomposition/ie_ghost.hpp \ +nobase_include_HEADERS = Decomposition/CartDecomposition.hpp Decomposition/shift_vect_converter.hpp Decomposition/CartDecomposition_ext.hpp Decomposition/common.hpp Decomposition/Decomposition.hpp Decomposition/ie_ghost.hpp \ Decomposition/Domain_NN_calculator_cart.hpp Decomposition/nn_processor.hpp Decomposition/ie_loc_ghost.hpp Decomposition/ORB.hpp \ Graph/CartesianGraphFactory.hpp \ Grid/grid_dist_id.hpp Grid/grid_dist_id_comm.hpp Grid/Iterators/grid_dist_id_iterator_util.hpp Grid/Iterators/grid_dist_id_iterator_dec.hpp Grid/Iterators/grid_dist_id_iterator_dec_skin.hpp Grid/grid_dist_util.hpp Grid/Iterators/grid_dist_id_iterator_sub.hpp Grid/Iterators/grid_dist_id_iterator.hpp Grid/grid_dist_key.hpp Grid/staggered_dist_grid.hpp Grid/staggered_dist_grid_util.hpp Grid/staggered_dist_grid_copy.hpp \ Vector/se_class3_vector.hpp Vector/vector_dist_multiphase_functions.hpp Vector/vector_dist_comm.hpp Vector/vector_dist.hpp Vector/vector_dist_ofb.hpp Vector/Iterators/vector_dist_iterator.hpp Vector/vector_dist_key.hpp \ config/config.h \ example.mk \ - Decomposition/Distribution/metis_util.hpp Decomposition/Distribution/SpaceDistribution.hpp Decomposition/Distribution/parmetis_dist_util.hpp Decomposition/Distribution/parmetis_util.hpp Decomposition/Distribution/MetisDistribution.hpp Decomposition/Distribution/ParMetisDistribution.hpp Decomposition/Distribution/DistParMetisDistribution.hpp Decomposition/dec_optimizer.hpp SubdomainGraphNodes.hpp \ + Decomposition/Distribution/metis_util.hpp Decomposition/Distribution/SpaceDistribution.hpp Decomposition/Distribution/parmetis_dist_util.hpp Decomposition/Distribution/parmetis_util.hpp Decomposition/Distribution/MetisDistribution.hpp Decomposition/Distribution/ParMetisDistribution.hpp Decomposition/Distribution/DistParMetisDistribution.hpp Decomposition/dec_optimizer.hpp SubdomainGraphNodes.hpp \ Graph/ids.hpp Graph/dist_map_graph.hpp Graph/DistGraphFactory.hpp \ DLB/DLB.hpp DLB/LB_Model.hpp diff --git a/src/SubdomainGraphNodes.hpp b/src/SubdomainGraphNodes.hpp index 63be7b721bcebe37f18e3512ce5e51cf6f5d1b0d..39b19fa4f1058bd9c136e8194b5ceb3319715a5e 100755 --- a/src/SubdomainGraphNodes.hpp +++ b/src/SubdomainGraphNodes.hpp @@ -214,6 +214,11 @@ struct nm_part_v boost::fusion::at_c<1>(data) = p.template get<1>(); } + static inline bool noPointers() + { + return true; + } + }; /*! \brief Reduced edge graph node @@ -239,6 +244,11 @@ struct nm_part_e { static const std::string name[]; }; + + static inline bool noPointers() + { + return true; + } }; diff --git a/src/Vector/performance/vector_dist_performance_common.hpp b/src/Vector/performance/vector_dist_performance_common.hpp index d7988c2c9bdd037f226f0fa2111170ce35c527f7..e6e0d261e4562271b72c57fd160787f0df0a305c 100644 --- a/src/Vector/performance/vector_dist_performance_common.hpp +++ b/src/Vector/performance/vector_dist_performance_common.hpp @@ -8,6 +8,7 @@ #ifndef SRC_VECTOR_PERFORMANCE_VECTOR_DIST_PERFORMANCE_COMMON_HPP_ #define SRC_VECTOR_PERFORMANCE_VECTOR_DIST_PERFORMANCE_COMMON_HPP_ +#include "Vector/vector_dist.hpp" /*! \brief Calculate and put particles' forces * diff --git a/src/Vector/performance/vector_dist_performance_util.hpp b/src/Vector/performance/vector_dist_performance_util.hpp index 86a22b82b4c4b59334242fd5ea8500de53658dd9..8f0a07ec9212c7d8f4efb0f4f4dd055564e4d2a5 100644 --- a/src/Vector/performance/vector_dist_performance_util.hpp +++ b/src/Vector/performance/vector_dist_performance_util.hpp @@ -70,17 +70,7 @@ static inline void addchartarea(std::string & chart_area, int lvl) } -void addUpdtateTime(GoogleChart & cg) -{ - time_t t = time(0); // get time now - struct tm * now = localtime( & t ); - - std::stringstream str; - - str << "<h3>Updated: " << now->tm_mday << "/" << now->tm_mon + 1 << "/" << now->tm_year+1900 << " " << now->tm_hour << ":" << now->tm_min << ":" << now->tm_sec << std::endl; - - cg.addHTML(str.str()); -} +void addUpdtateTime(GoogleChart & cg); /*! \brief Standard deviation * @@ -103,15 +93,6 @@ static inline void standard_deviation(openfpm::vector<double> measures, double & dev = sqrt(dev / (measures.size() - 1)); } -/*! \brief Print out only ones (no matter how many processors involved) - * - * \param test, sz Data to print out - */ -void print_test_v(std::string test) -{ - if (create_vcluster().getProcessUnitID() == 0) - std::cout << test << "\n"; -} /*! \brief Benchmark particles' forces time @@ -294,7 +275,7 @@ template<unsigned int dim, typename v_dist> void move_particles(v_dist & vd, dou * * */ -void StandardPerformanceGraph(std::string file_mean, +extern void StandardPerformanceGraph(std::string file_mean, std::string file_var, std::string file_mean_save, std::string file_var_save, @@ -306,101 +287,7 @@ void StandardPerformanceGraph(std::string file_mean, openfpm::vector<std::string> & gnames, std::string x_string, std::string y_string, - bool use_log) -{ - openfpm::vector<openfpm::vector<openfpm::vector<double>>> y_ref_mean; - openfpm::vector<openfpm::vector<openfpm::vector<double>>> y_ref_dev; - y_ref_mean.load(file_mean); - y_ref_dev.load(file_var); - - // warning level - openfpm::vector<int> warning_vlevel; - - // Calculation time graphs data - - openfpm::vector<size_t> x; - openfpm::vector<openfpm::vector<openfpm::vector<double>>> y2; - openfpm::vector<openfpm::vector<openfpm::vector<double>>> y2_dev; - openfpm::vector<std::string> yn2; - - if (names.size() == 0) - return; - - for (size_t i = 0 ; i < names.size() ; i++) - yn2.add(names.get(i)); - - for (size_t i = 0; i < xp.size() ; i++) - x.add(xp.get(i)); - - yp_mean.save(file_mean_save); - yp_dev.save(file_var_save); - - if (y_ref_mean.size() != 0 && yp_mean.size() != 0 && yp_mean.get(0).size() != 0) - { - // We reconstruct y and yn - - y2.clear(); - yn2.clear(); - - for (size_t i = 0 ; i < yp_mean.get(0).get(0).size() ; i++) - { - yn2.add(names.get(i)); - yn2.add("interval"); - yn2.add("interval"); - } - - y2.resize(yp_mean.size()); - for (size_t r = 0; r < yp_mean.size(); r++) - { - int warning_level = -1; - - y2.get(r).resize(yp_mean.get(r).size()); - for (size_t k = 0; k < yp_mean.get(r).size(); k++) - { - - // Number of graph points - for (size_t g = 0 ; g < yp_mean.get(r).get(k).size() ; g++) - { - // Time for construction hilbert and random - y2.get(r).get(k).add(yp_mean.get(r).get(k).get(g)); - y2.get(r).get(k).add(y_ref_mean.get(r).get(k).get(g) - 3.0*y_ref_dev.get(r).get(k).get(g)); - y2.get(r).get(k).add(y_ref_mean.get(r).get(k).get(g) + 3.0*y_ref_dev.get(r).get(k).get(g)); + bool use_log); - warning_set(warning_level,yp_mean.get(r).get(k).get(g),y_ref_mean.get(r).get(k).get(g),y_ref_dev.get(r).get(k).get(g)); - } - } - - warning_vlevel.add(warning_level); - } - } - else - { - return; - } - - // Calculation time graphs report - - // Google charts options - GCoptions options2; - - options2.yAxis = std::string(y_string); - options2.xAxis = std::string(x_string); - options2.lineWidth = 4; - - for (size_t i = 0; i < y2.size() ; i++) - { - std::string chart_area; - if (warning_vlevel.size() != 0) - addchartarea(chart_area,warning_vlevel.get(i)); - - if (use_log == true) - {options2.more = GC_Y_LOG + "," + GC_ZOOM + chart_area;} - else - {options2.more = GC_ZOOM + chart_area;} - - options2.title = gnames.get(i); - cg.AddLinesGraph(x,y2.get(i),yn2,options2); - } -} #endif /* SRC_VECTOR_VECTOR_DIST_PERFORMANCE_UTIL_HPP_ */ diff --git a/src/Vector/vector_dist_HDF5_chckpnt_restart_test.hpp b/src/Vector/tests/vector_dist_HDF5_chckpnt_restart_test.cpp similarity index 85% rename from src/Vector/vector_dist_HDF5_chckpnt_restart_test.hpp rename to src/Vector/tests/vector_dist_HDF5_chckpnt_restart_test.cpp index 66ece4bd4d9292da3fbf4f1a34da868dbb08597b..a843c42ef5d4e9c32b5b212f911ad18aae8da084 100644 --- a/src/Vector/vector_dist_HDF5_chckpnt_restart_test.hpp +++ b/src/Vector/tests/vector_dist_HDF5_chckpnt_restart_test.cpp @@ -4,16 +4,15 @@ * Created on: Jun 12, 2016 * Author: Yaroslav Zaluzhnyi */ +#define BOOST_TEST_DYN_LINK +#include <boost/test/unit_test.hpp> -#ifndef SRC_VECTOR_VECTOR_DIST_HDF5_CHCKPNT_RESTART_TEST_HPP_ -#define SRC_VECTOR_VECTOR_DIST_HDF5_CHCKPNT_RESTART_TEST_HPP_ - -#include "vector_dist.hpp" +#include "Vector/vector_dist.hpp" #include "Packer_Unpacker/Pack_selector.hpp" #include "Packer_Unpacker/Packer.hpp" #include "Packer_Unpacker/Unpacker.hpp" #include "Vector/performance/vector_dist_performance_util.hpp" - +#include "NN/CellList/CellList_util.hpp" #include "hdf5.h" @@ -46,7 +45,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_hdf5_save_test ) // ghost Ghost<dim,float> ghost(1.0/(Ng-2)); - vector_dist<dim,float, aggregate<float[dim]>, CartDecomposition<dim,float> > vd(0,box,bc,ghost); + vector_dist<dim,float, aggregate<float[dim]> > vd(0,box,bc,ghost); // Put particles @@ -85,7 +84,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_hdf5_save_test ) // Save the vector vd.save("vector_dist.h5"); - vector_dist<dim,float, aggregate<float[dim]>, CartDecomposition<dim,float> > vd2(0,box,bc,ghost); + vector_dist<dim,float, aggregate<float[dim]> > vd2(0,box,bc,ghost); vd2.load("vector_dist.h5"); @@ -143,7 +142,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_hdf5_load_test ) // ghost Ghost<dim,float> ghost(1.0/(Ng-2)); - vector_dist<dim,float, aggregate<float[dim]>, CartDecomposition<dim,float> > vd(0,box,bc,ghost); + vector_dist<dim,float, aggregate<float[dim]> > vd(0,box,bc,ghost); // Load the vector vd.load("test_data/vector_dist_24.h5"); @@ -177,5 +176,3 @@ BOOST_AUTO_TEST_CASE( vector_dist_hdf5_load_test ) BOOST_AUTO_TEST_SUITE_END() - -#endif /* SRC_VECTOR_VECTOR_DIST_HDF5_CHCKPNT_RESTART_TEST_HPP_ */ diff --git a/src/Vector/vector_dist_MP_unit_tests.hpp b/src/Vector/tests/vector_dist_MP_unit_tests.cpp similarity index 98% rename from src/Vector/vector_dist_MP_unit_tests.hpp rename to src/Vector/tests/vector_dist_MP_unit_tests.cpp index 807b7a259f435db2839f345097e2e343d38542f8..381da62217d93ea2605ed84157085f203d4de964 100644 --- a/src/Vector/vector_dist_MP_unit_tests.hpp +++ b/src/Vector/tests/vector_dist_MP_unit_tests.cpp @@ -5,10 +5,12 @@ * Author: i-bird */ -#ifndef SRC_VECTOR_VECTOR_DIST_MP_UNIT_TESTS_HPP_ -#define SRC_VECTOR_VECTOR_DIST_MP_UNIT_TESTS_HPP_ +#define BOOST_TEST_DYN_LINK +#include <boost/test/unit_test.hpp> #include "Vector/vector_dist_multiphase_functions.hpp" +#include "VCluster/VCluster.hpp" +#include "Vector/vector_dist.hpp" BOOST_AUTO_TEST_SUITE( vector_dist_multiphase_test ) @@ -426,4 +428,3 @@ BOOST_AUTO_TEST_CASE( vector_dist_multiphase_cell_list_sym_test ) BOOST_AUTO_TEST_SUITE_END() -#endif /* SRC_VECTOR_VECTOR_DIST_MP_UNIT_TESTS_HPP_ */ diff --git a/src/Vector/vector_dist_NN_tests.hpp b/src/Vector/tests/vector_dist_NN_tests.cpp similarity index 88% rename from src/Vector/vector_dist_NN_tests.hpp rename to src/Vector/tests/vector_dist_NN_tests.cpp index 0b38a0e7a24c816185213c6d389bb74ebf06efc1..9c6c79f1a849a3ef7ed4a642b53ba9fa878fe535 100644 --- a/src/Vector/vector_dist_NN_tests.hpp +++ b/src/Vector/tests/vector_dist_NN_tests.cpp @@ -5,11 +5,16 @@ * Author: i-bird */ -#ifndef SRC_VECTOR_VECTOR_DIST_NN_TESTS_HPP_ -#define SRC_VECTOR_VECTOR_DIST_NN_TESTS_HPP_ +#define BOOST_TEST_DYN_LINK +#include <boost/test/unit_test.hpp> +#include "VCluster/VCluster.hpp" +#include "Vector/vector_dist.hpp" -BOOST_AUTO_TEST_CASE( vector_dist_full_NN ) +extern void print_test_v(std::string test, size_t sz); + +template<typename VerletList> +void test_full_nn(long int k) { Vcluster & v_cl = create_vcluster(); @@ -22,16 +27,10 @@ BOOST_AUTO_TEST_CASE( vector_dist_full_NN ) std::default_random_engine eg; std::uniform_real_distribution<float> ud(0.0f, 1.0f); -#ifdef TEST_COVERAGE_MODE - long int k = 50 * v_cl.getProcessingUnits(); -#else - long int k = 750 * v_cl.getProcessingUnits(); -#endif - long int big_step = k / 4; big_step = (big_step == 0)?1:big_step; - print_test("Testing 3D full NN search k=",k); + print_test_v("Testing 3D full NN search k=",k); BOOST_TEST_CHECKPOINT( "Testing 3D full NN search k=" << k ); Box<3,float> box({0.0,0.0,0.0},{1.0,1.0,1.0}); @@ -134,14 +133,14 @@ BOOST_AUTO_TEST_CASE( vector_dist_full_NN ) /////////////////////////////////// - auto NNv = vd.getVerlet(r_cut*1.0001); + auto NNv = vd.template getVerlet<VerletList>(r_cut*1.0001); it = vd.getDomainIterator(); while (it.isNext()) { Point<3,float> xp = vd.getPos(it.get()); - auto Np = NNv.getNNIterator<NO_CHECK>(it.get().getKey()); + auto Np = NNv.template getNNIterator<NO_CHECK>(it.get().getKey()); list_idx2.get(it.get().getKey()).clear(); @@ -185,7 +184,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_full_NN ) while (it.isNext()) { Point<3,float> xp = vd.getPos(it.get()); - auto Np = NNv.getNNIterator<NO_CHECK>(it.get().getKey()); + auto Np = NNv.template getNNIterator<NO_CHECK>(it.get().getKey()); list_idx2.get(it.get().getKey()).clear(); @@ -221,6 +220,24 @@ BOOST_AUTO_TEST_CASE( vector_dist_full_NN ) } } +BOOST_AUTO_TEST_CASE( vector_dist_full_NN ) +{ + auto & v_cl = create_vcluster(); + +#ifdef TEST_COVERAGE_MODE + long int k = 50 * v_cl.getProcessingUnits(); +#else + long int k = 750 * v_cl.getProcessingUnits(); +#endif + + test_full_nn<VERLET_MEMFAST(3,float)>(k); + + k /= 2; + test_full_nn<VERLET_MEMBAL(3,float)>(k); + k /= 2; + test_full_nn<VERLET_MEMMW(3,float)>(k); +} + BOOST_AUTO_TEST_CASE( vector_dist_particle_iteration ) { Vcluster & v_cl = create_vcluster(); @@ -236,7 +253,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_particle_iteration ) long int k = 750 * v_cl.getProcessingUnits(); - print_test("Testing 3D particle cell iterator=",k); + print_test_v("Testing 3D particle cell iterator=",k); BOOST_TEST_CHECKPOINT( "Testing 3D full NN search k=" << k ); Box<3,float> box({0.0,0.0,0.0},{1.0,1.0,1.0}); @@ -302,4 +319,3 @@ BOOST_AUTO_TEST_CASE( vector_dist_particle_iteration ) BOOST_REQUIRE_EQUAL((long int)count,k); } -#endif /* SRC_VECTOR_VECTOR_DIST_NN_TESTS_HPP_ */ diff --git a/src/Vector/vector_dist_cell_list_tests.hpp b/src/Vector/tests/vector_dist_cell_list_tests.cpp similarity index 83% rename from src/Vector/vector_dist_cell_list_tests.hpp rename to src/Vector/tests/vector_dist_cell_list_tests.cpp index 0962a3cd5f2439fb252c01aceefe21eaeeb195a8..e8875868c2af684cb2ac093fd1c2bd91b7e790ef 100644 --- a/src/Vector/vector_dist_cell_list_tests.hpp +++ b/src/Vector/tests/vector_dist_cell_list_tests.cpp @@ -6,14 +6,18 @@ */ #include "config.h" +#define BOOST_TEST_DYN_LINK +#include <boost/test/unit_test.hpp> +#include "Point_test.hpp" +#include "Vector/performance/vector_dist_performance_common.hpp" +#include "Vector/vector_dist.hpp" -#ifndef SRC_VECTOR_VECTOR_DIST_CELL_LIST_TESTS_HPP_ -#define SRC_VECTOR_VECTOR_DIST_CELL_LIST_TESTS_HPP_ - +extern void print_test_v(std::string test, size_t sz); +extern long int decrement(long int k, long int step); ///////////////////////// test hilb /////////////////////////////// -BOOST_AUTO_TEST_CASE( vector_dist_reorder_2d_test ) +void test_reorder_sfc(reorder_opt opt) { Vcluster & v_cl = create_vcluster(); @@ -35,19 +39,19 @@ BOOST_AUTO_TEST_CASE( vector_dist_reorder_2d_test ) long int big_step = k / 4; big_step = (big_step == 0)?1:big_step; - print_test_v( "Testing 2D vector with hilbert curve reordering k<=",k); + print_test_v( "Testing 2D vector with sfc curve reordering k<=",k); // 2D test for ( ; k >= 2 ; k-= decrement(k,big_step) ) { - BOOST_TEST_CHECKPOINT( "Testing 2D vector with hilbert curve reordering k=" << k ); + BOOST_TEST_CHECKPOINT( "Testing 2D vector with sfc curve reordering k=" << k ); Box<2,float> box({0.0,0.0},{1.0,1.0}); // Boundary conditions size_t bc[2]={NON_PERIODIC,NON_PERIODIC}; - vector_dist<2,float, Point_test<float>, CartDecomposition<2,float> > vd(k,box,bc,Ghost<2,float>(0.01)); + vector_dist<2,float, Point_test<float> > vd(k,box,bc,Ghost<2,float>(0.01)); auto it = vd.getIterator(); @@ -73,7 +77,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_reorder_2d_test ) int32_t m = 6; //Reorder a vector - vd.reorder(m); + vd.reorder(m,opt); // Create second cell list auto NN2 = vd.getCellList(0.01,true); @@ -89,6 +93,12 @@ BOOST_AUTO_TEST_CASE( vector_dist_reorder_2d_test ) } } +BOOST_AUTO_TEST_CASE( vector_dist_reorder_2d_test ) +{ + test_reorder_sfc(reorder_opt::HILBERT); + test_reorder_sfc(reorder_opt::LINEAR); +} + BOOST_AUTO_TEST_CASE( vector_dist_cl_random_vs_hilb_forces_test ) { Vcluster & v_cl = create_vcluster(); @@ -122,7 +132,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_cl_random_vs_hilb_forces_test ) std::string str("Testing " + std::to_string(dim) + "D vector's forces (random vs hilb celllist) k<="); - vector_dist_test::print_test_v(str,k); + print_test_v(str,k); //For different number of particles for (size_t k_int = k ; k_int >= cl_k_min ; k_int/=2 ) @@ -143,9 +153,9 @@ BOOST_AUTO_TEST_CASE( vector_dist_cl_random_vs_hilb_forces_test ) for (size_t i = 0; i < dim; i++) bc[i] = PERIODIC; - vector_dist<dim,float, aggregate<float[dim]>, CartDecomposition<dim,float> > vd(k_int,box,bc,Ghost<dim,float>(ghost_part)); + vector_dist<dim,float, aggregate<float[dim]> > vd(k_int,box,bc,Ghost<dim,float>(ghost_part)); - vector_dist<dim,float, aggregate<float[dim]>, CartDecomposition<dim,float> > vd2(k_int,box,bc,Ghost<dim,float>(ghost_part)); + vector_dist<dim,float, aggregate<float[dim]> > vd2(k_int,box,bc,Ghost<dim,float>(ghost_part)); // Initialize dist vectors vd_initialize_double<dim>(vd, vd2, v_cl, k_int); @@ -180,7 +190,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_cl_random_vs_hilb_forces_test ) vect_dist_key_dx key = it_v2.get(); for (size_t i = 0; i < dim; i++) - avg.get(i) += fabs(vd.getProp<0>(key)[i]); + {avg.get(i) += fabs(vd.getProp<0>(key)[i]);} ++count; ++it_v2; @@ -246,7 +256,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_cl_random_vs_reorder_forces_test ) std::string str("Testing " + std::to_string(dim) + "D vector's forces (random vs reorder) k<="); - vector_dist_test::print_test_v(str,k); + print_test_v(str,k); //For different number of particles for (size_t k_int = k ; k_int >= cl_k_min ; k_int/=2 ) @@ -267,7 +277,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_cl_random_vs_reorder_forces_test ) for (size_t i = 0; i < dim; i++) bc[i] = PERIODIC; - vector_dist<dim,float, aggregate<float[dim], float[dim]>, CartDecomposition<dim,float> > vd(k_int,box,bc,Ghost<dim,float>(ghost_part)); + vector_dist<dim,float, aggregate<float[dim], float[dim]> > vd(k_int,box,bc,Ghost<dim,float>(ghost_part)); // Initialize vd vd_initialize<dim,decltype(vd)>(vd, v_cl, k_int); @@ -360,7 +370,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_symmetric_cell_list ) long int big_step = k / 4; big_step = (big_step == 0)?1:big_step; - print_test("Testing 3D periodic vector symmetric cell-list k=",k); + print_test_v("Testing 3D periodic vector symmetric cell-list k=",k); BOOST_TEST_CHECKPOINT( "Testing 3D periodic vector symmetric cell-list k=" << k ); Box<3,float> box({-L,-L,-L},{L,L,L}); @@ -569,7 +579,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_symmetric_crs_cell_list ) long int big_step = k / 4; big_step = (big_step == 0)?1:big_step; - print_test("Testing 3D periodic vector symmetric crs cell-list k=",k); + print_test_v("Testing 3D periodic vector symmetric crs cell-list k=",k); BOOST_TEST_CHECKPOINT( "Testing 3D periodic vector symmetric crs cell-list k=" << k ); Box<3,float> box({-L,-L,-L},{L,L,L}); @@ -776,7 +786,8 @@ BOOST_AUTO_TEST_CASE( vector_dist_symmetric_crs_cell_list ) BOOST_REQUIRE_EQUAL(ret,true); } -BOOST_AUTO_TEST_CASE( vector_dist_symmetric_verlet_list ) +template<typename VerletList> +void test_vd_symmetric_verlet_list() { Vcluster & v_cl = create_vcluster(); @@ -796,7 +807,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_symmetric_verlet_list ) long int big_step = k / 4; big_step = (big_step == 0)?1:big_step; - print_test("Testing 3D periodic vector symmetric cell-list k=",k); + print_test_v("Testing 3D periodic vector symmetric cell-list k=",k); BOOST_TEST_CHECKPOINT( "Testing 3D periodic vector symmetric verlet-list k=" << k ); Box<3,float> box({-L,-L,-L},{L,L,L}); @@ -839,9 +850,9 @@ BOOST_AUTO_TEST_CASE( vector_dist_symmetric_verlet_list ) // Fill some properties randomly - vd.getPropWrite<0>(key) = 0; - vd.getPropWrite<1>(key) = 0; - vd.getPropWrite<2>(key) = key.getKey() + start; + vd.template getPropWrite<0>(key) = 0; + vd.template getPropWrite<1>(key) = 0; + vd.template getPropWrite<2>(key) = key.getKey() + start; ++it; } @@ -849,9 +860,9 @@ BOOST_AUTO_TEST_CASE( vector_dist_symmetric_verlet_list ) vd.map(); // sync the ghost - vd.ghost_get<0,2>(); + vd.template ghost_get<0,2>(); - auto NN = vd.getVerlet(r_cut); + auto NN = vd.template getVerlet<VerletList>(r_cut); auto p_it = vd.getDomainIterator(); while (p_it.isNext()) @@ -883,10 +894,10 @@ BOOST_AUTO_TEST_CASE( vector_dist_symmetric_verlet_list ) if (distance < r_cut ) { - vd.getPropWrite<0>(p)++; - vd.getPropWrite<3>(p).add(); - vd.getPropWrite<3>(p).last().xq = xq; - vd.getPropWrite<3>(p).last().id = vd.getPropRead<2>(q); + vd.template getPropWrite<0>(p)++; + vd.template getPropWrite<3>(p).add(); + vd.template getPropWrite<3>(p).last().xq = xq; + vd.template getPropWrite<3>(p).last().id = vd.template getPropRead<2>(q); } ++Np; @@ -897,7 +908,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_symmetric_verlet_list ) // We now try symmetric Cell-list - auto NN2 = vd.getVerletSym(r_cut); + auto NN2 = vd.template getVerletSym<VerletList>(r_cut); auto p_it2 = vd.getDomainIterator(); @@ -907,7 +918,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_symmetric_verlet_list ) Point<3,float> xp = vd.getPosRead(p); - auto Np = NN2.getNNIterator<NO_CHECK>(p.getKey()); + auto Np = NN2.template getNNIterator<NO_CHECK>(p.getKey()); while (Np.isNext()) { @@ -930,16 +941,16 @@ BOOST_AUTO_TEST_CASE( vector_dist_symmetric_verlet_list ) if (distance < r_cut ) { - vd.getPropWrite<1>(p)++; - vd.getPropWrite<1>(q)++; + vd.template getPropWrite<1>(p)++; + vd.template getPropWrite<1>(q)++; - vd.getPropWrite<4>(p).add(); - vd.getPropWrite<4>(q).add(); + vd.template getPropWrite<4>(p).add(); + vd.template getPropWrite<4>(q).add(); - vd.getPropWrite<4>(p).last().xq = xq; - vd.getPropWrite<4>(q).last().xq = xp; - vd.getPropWrite<4>(p).last().id = vd.getPropRead<2>(q); - vd.getPropWrite<4>(q).last().id = vd.getPropRead<2>(p); + vd.template getPropWrite<4>(p).last().xq = xq; + vd.template getPropWrite<4>(q).last().xq = xp; + vd.template getPropWrite<4>(p).last().id = vd.template getPropRead<2>(q); + vd.template getPropWrite<4>(q).last().id = vd.template getPropRead<2>(p); } ++Np; @@ -948,8 +959,8 @@ BOOST_AUTO_TEST_CASE( vector_dist_symmetric_verlet_list ) ++p_it2; } - vd.ghost_put<add_,1>(); - vd.ghost_put<merge_,4>(); + vd.template ghost_put<add_,1>(); + vd.template ghost_put<merge_,4>(); auto p_it3 = vd.getDomainIterator(); @@ -958,15 +969,15 @@ BOOST_AUTO_TEST_CASE( vector_dist_symmetric_verlet_list ) { auto p = p_it3.get(); - ret &= vd.getPropRead<1>(p) == vd.getPropRead<0>(p); + ret &= vd.template getPropRead<1>(p) == vd.template getPropRead<0>(p); - vd.getPropWrite<3>(p).sort(); - vd.getPropWrite<4>(p).sort(); + vd.template getPropWrite<3>(p).sort(); + vd.template getPropWrite<4>(p).sort(); - ret &= vd.getPropRead<3>(p).size() == vd.getPropRead<4>(p).size(); + ret &= vd.template getPropRead<3>(p).size() == vd.template getPropRead<4>(p).size(); - for (size_t i = 0 ; i < vd.getPropRead<3>(p).size() ; i++) - ret &= vd.getPropRead<3>(p).get(i).id == vd.getPropRead<4>(p).get(i).id; + for (size_t i = 0 ; i < vd.template getPropRead<3>(p).size() ; i++) + ret &= vd.template getPropRead<3>(p).get(i).id == vd.template getPropRead<4>(p).get(i).id; if (ret == false) break; @@ -977,7 +988,15 @@ BOOST_AUTO_TEST_CASE( vector_dist_symmetric_verlet_list ) BOOST_REQUIRE_EQUAL(ret,true); } -BOOST_AUTO_TEST_CASE( vector_dist_symmetric_verlet_list_no_bottom ) +BOOST_AUTO_TEST_CASE( vector_dist_symmetric_verlet_list ) +{ + test_vd_symmetric_verlet_list<VERLET_MEMFAST(3,float)>(); + test_vd_symmetric_verlet_list<VERLET_MEMBAL(3,float)>(); + test_vd_symmetric_verlet_list<VERLET_MEMMW(3,float)>(); +} + +template<typename VerletList> +void vector_sym_verlet_list_nb() { Vcluster & v_cl = create_vcluster(); @@ -997,7 +1016,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_symmetric_verlet_list_no_bottom ) long int big_step = k / 4; big_step = (big_step == 0)?1:big_step; - print_test("Testing 3D periodic vector symmetric cell-list no bottom k=",k); + print_test_v("Testing 3D periodic vector symmetric cell-list no bottom k=",k); BOOST_TEST_CHECKPOINT( "Testing 3D periodic vector symmetric cell-list no bottom k=" << k ); Box<3,float> box({-L,-L,-L},{L,L,L}); @@ -1051,13 +1070,13 @@ BOOST_AUTO_TEST_CASE( vector_dist_symmetric_verlet_list_no_bottom ) // Fill some properties randomly - vd.getPropWrite<0>(key) = 0; - vd.getPropWrite<1>(key) = 0; - vd.getPropWrite<2>(key) = key.getKey() + start; + vd.template getPropWrite<0>(key) = 0; + vd.template getPropWrite<1>(key) = 0; + vd.template getPropWrite<2>(key) = key.getKey() + start; - vd2.getPropWrite<0>(key) = 0; - vd2.getPropWrite<1>(key) = 0; - vd2.getPropWrite<2>(key) = key.getKey() + start; + vd2.template getPropWrite<0>(key) = 0; + vd2.template getPropWrite<1>(key) = 0; + vd2.template getPropWrite<2>(key) = key.getKey() + start; ++it; } @@ -1066,10 +1085,10 @@ BOOST_AUTO_TEST_CASE( vector_dist_symmetric_verlet_list_no_bottom ) vd2.map(); // sync the ghost - vd.ghost_get<0,2>(); - vd2.ghost_get<0,2>(); + vd.template ghost_get<0,2>(); + vd2.template ghost_get<0,2>(); - auto NN = vd.getVerlet(r_cut); + auto NN = vd.template getVerlet<VerletList>(r_cut); auto p_it = vd.getDomainIterator(); while (p_it.isNext()) @@ -1101,10 +1120,10 @@ BOOST_AUTO_TEST_CASE( vector_dist_symmetric_verlet_list_no_bottom ) if (distance < r_cut ) { - vd.getPropWrite<0>(p)++; - vd.getPropWrite<3>(p).add(); - vd.getPropWrite<3>(p).last().xq = xq; - vd.getPropWrite<3>(p).last().id = vd.getPropRead<2>(q); + vd.template getPropWrite<0>(p)++; + vd.template getPropWrite<3>(p).add(); + vd.template getPropWrite<3>(p).last().xq = xq; + vd.template getPropWrite<3>(p).last().id = vd.template getPropRead<2>(q); } ++Np; @@ -1115,7 +1134,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_symmetric_verlet_list_no_bottom ) // We now try symmetric Cell-list - auto NN2 = vd2.getVerletSym(r_cut); + auto NN2 = vd2.template getVerletSym<VerletList>(r_cut); auto p_it2 = vd2.getDomainIterator(); @@ -1125,7 +1144,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_symmetric_verlet_list_no_bottom ) Point<3,float> xp = vd2.getPosRead(p); - auto Np = NN2.getNNIterator<NO_CHECK>(p.getKey()); + auto Np = NN2.template getNNIterator<NO_CHECK>(p.getKey()); while (Np.isNext()) { @@ -1148,16 +1167,16 @@ BOOST_AUTO_TEST_CASE( vector_dist_symmetric_verlet_list_no_bottom ) if (distance < r_cut ) { - vd2.getPropWrite<1>(p)++; - vd2.getPropWrite<1>(q)++; + vd2.template getPropWrite<1>(p)++; + vd2.template getPropWrite<1>(q)++; - vd2.getPropWrite<4>(p).add(); - vd2.getPropWrite<4>(q).add(); + vd2.template getPropWrite<4>(p).add(); + vd2.template getPropWrite<4>(q).add(); - vd2.getPropWrite<4>(p).last().xq = xq; - vd2.getPropWrite<4>(q).last().xq = xp; - vd2.getPropWrite<4>(p).last().id = vd2.getPropRead<2>(q); - vd2.getPropWrite<4>(q).last().id = vd2.getPropRead<2>(p); + vd2.template getPropWrite<4>(p).last().xq = xq; + vd2.template getPropWrite<4>(q).last().xq = xp; + vd2.template getPropWrite<4>(p).last().id = vd2.template getPropRead<2>(q); + vd2.template getPropWrite<4>(q).last().id = vd2.template getPropRead<2>(p); } ++Np; @@ -1167,8 +1186,8 @@ BOOST_AUTO_TEST_CASE( vector_dist_symmetric_verlet_list_no_bottom ) ++p_it2; } - vd2.ghost_put<add_,1>(); - vd2.ghost_put<merge_,4>(); + vd2.template ghost_put<add_,1>(); + vd2.template ghost_put<merge_,4>(); #ifdef SE_CLASS3 vd2.getDomainIterator(); @@ -1181,16 +1200,15 @@ BOOST_AUTO_TEST_CASE( vector_dist_symmetric_verlet_list_no_bottom ) { auto p = p_it3.get(); - ret &= vd2.getPropRead<1>(p) == vd.getPropRead<0>(p); - + ret &= vd2.template getPropRead<1>(p) == vd.template getPropRead<0>(p); - vd.getPropWrite<3>(p).sort(); - vd2.getPropWrite<4>(p).sort(); + vd.template getPropWrite<3>(p).sort(); + vd2.template getPropWrite<4>(p).sort(); - ret &= vd.getPropRead<3>(p).size() == vd2.getPropRead<4>(p).size(); + ret &= vd.template getPropRead<3>(p).size() == vd2.template getPropRead<4>(p).size(); - for (size_t i = 0 ; i < vd.getPropRead<3>(p).size() ; i++) - ret &= vd.getPropRead<3>(p).get(i).id == vd2.getPropRead<4>(p).get(i).id; + for (size_t i = 0 ; i < vd.template getPropRead<3>(p).size() ; i++) + ret &= vd.template getPropRead<3>(p).get(i).id == vd2.template getPropRead<4>(p).get(i).id; if (ret == false) break; @@ -1202,7 +1220,18 @@ BOOST_AUTO_TEST_CASE( vector_dist_symmetric_verlet_list_no_bottom ) } } -template<typename part_prop> void test_crs_full(vector_dist<3,float, part_prop > & vd, +BOOST_AUTO_TEST_CASE( vector_dist_symmetric_verlet_list_no_bottom ) +{ + vector_sym_verlet_list_nb<VERLET_MEMFAST(3,float)>(); + vector_sym_verlet_list_nb<VERLET_MEMBAL(3,float)>(); + vector_sym_verlet_list_nb<VERLET_MEMMW(3,float)>(); + + vector_sym_verlet_list_nb<VERLET_MEMFAST_INT(3,float)>(); + vector_sym_verlet_list_nb<VERLET_MEMBAL_INT(3,float)>(); + vector_sym_verlet_list_nb<VERLET_MEMMW_INT(3,float)>(); +} + +template<typename VerletList, typename part_prop> void test_crs_full(vector_dist<3,float, part_prop > & vd, vector_dist<3,float, part_prop > & vd2, std::default_random_engine & eg, std::uniform_real_distribution<float> & ud, @@ -1243,7 +1272,7 @@ template<typename part_prop> void test_crs_full(vector_dist<3,float, part_prop > vd.template ghost_get<0,2>(); vd2.template ghost_get<0,2>(); - auto NN = vd.getVerlet(r_cut); + auto NN = vd.template getVerlet<VerletList>(r_cut); auto p_it = vd.getDomainIterator(); while (p_it.isNext()) @@ -1289,7 +1318,7 @@ template<typename part_prop> void test_crs_full(vector_dist<3,float, part_prop > // We now try symmetric Verlet-list Crs scheme - auto NN2 = vd2.getVerletCrs(r_cut); + auto NN2 = vd2.template getVerletCrs<VerletList>(r_cut); // Because iterating across particles in the CSR scheme require a Cell-list auto p_it2 = vd2.getParticleIteratorCRS_Cell(NN2.getInternalCellList()); @@ -1378,7 +1407,7 @@ template<typename part_prop> void test_crs_full(vector_dist<3,float, part_prop > BOOST_REQUIRE_EQUAL(ret,true); } - +template<typename VerletList> void test_csr_verlet_list() { Vcluster & v_cl = create_vcluster(); @@ -1399,7 +1428,7 @@ void test_csr_verlet_list() long int big_step = k / 4; big_step = (big_step == 0)?1:big_step; - print_test("Testing 3D periodic vector symmetric cell-list k=",k); + print_test_v("Testing 3D periodic vector symmetric cell-list k=",k); BOOST_TEST_CHECKPOINT( "Testing 3D periodic vector symmetric cell-list k=" << k ); Box<3,float> box({-L,-L,-L},{L,L,L}); @@ -1435,9 +1464,10 @@ void test_csr_verlet_list() vector_dist<3,float, part_prop > vd2(k,box,bc,ghost2,BIND_DEC_TO_GHOST); size_t start = vd.init_size_accum(k); - test_crs_full(vd,vd2,eg,ud,start,r_cut); + test_crs_full<VerletList>(vd,vd2,eg,ud,start,r_cut); } +template<typename VerletList> void test_csr_verlet_list_override() { Vcluster & v_cl = create_vcluster(); @@ -1458,7 +1488,7 @@ void test_csr_verlet_list_override() long int big_step = k / 4; big_step = (big_step == 0)?1:big_step; - print_test("Testing 3D periodic vector symmetric cell-list k=",k); + print_test_v("Testing 3D periodic vector symmetric cell-list k=",k); BOOST_TEST_CHECKPOINT( "Testing 3D periodic vector symmetric cell-list k=" << k ); Box<3,float> box({-L,-L,-L},{L,L,L}); @@ -1508,20 +1538,25 @@ void test_csr_verlet_list_override() vector_dist<3,float, part_prop > vd2(k,box,bc,ghost2,BIND_DEC_TO_GHOST,gdist2_d); size_t start = vd.init_size_accum(k); - test_crs_full(vd,vd2,eg,ud,start,r_cut); + test_crs_full<VerletList>(vd,vd2,eg,ud,start,r_cut); } BOOST_AUTO_TEST_CASE( vector_dist_symmetric_crs_verlet_list ) { - test_csr_verlet_list(); + test_csr_verlet_list<VERLET_MEMFAST(3,float)>(); + test_csr_verlet_list<VERLET_MEMBAL(3,float)>(); + test_csr_verlet_list<VERLET_MEMMW(3,float)>(); } BOOST_AUTO_TEST_CASE( vector_dist_symmetric_crs_verlet_list_dec_override ) { - test_csr_verlet_list_override(); + test_csr_verlet_list_override<VERLET_MEMFAST(3,float)>(); + test_csr_verlet_list_override<VERLET_MEMBAL(3,float)>(); + test_csr_verlet_list_override<VERLET_MEMMW(3,float)>(); } -BOOST_AUTO_TEST_CASE( vector_dist_symmetric_crs_verlet_list_partit ) +template <typename VerletList> +void test_vd_symmetric_crs_verlet() { Vcluster & v_cl = create_vcluster(); @@ -1543,7 +1578,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_symmetric_crs_verlet_list_partit ) long int big_step = k / 4; big_step = (big_step == 0)?1:big_step; - print_test("Testing 3D periodic vector symmetric cell-list k=",k); + print_test_v("Testing 3D periodic vector symmetric cell-list k=",k); BOOST_TEST_CHECKPOINT( "Testing 3D periodic vector symmetric cell-list k=" << k ); Box<3,float> box({-L,-L,-L},{L,L,L}); @@ -1590,7 +1625,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_symmetric_crs_verlet_list_partit ) // We now try symmetric Verlet-list Crs scheme - auto NN2 = vd.getVerletCrs(r_cut); + auto NN2 = vd.template getVerletCrs<VerletList>(r_cut); // Because iterating across particles in the CSR scheme require a Cell-list auto p_it2 = vd.getParticleIteratorCRS_Cell(NN2.getInternalCellList()); @@ -1613,6 +1648,13 @@ BOOST_AUTO_TEST_CASE( vector_dist_symmetric_crs_verlet_list_partit ) BOOST_REQUIRE_EQUAL(ret,true); } +BOOST_AUTO_TEST_CASE( vector_dist_symmetric_crs_verlet_list_partit ) +{ + test_vd_symmetric_crs_verlet<VERLET_MEMFAST(3,float)>(); + test_vd_symmetric_crs_verlet<VERLET_MEMBAL(3,float)>(); + test_vd_symmetric_crs_verlet<VERLET_MEMMW(3,float)>(); +} + BOOST_AUTO_TEST_CASE( vector_dist_checking_unloaded_processors ) { Vcluster & v_cl = create_vcluster(); @@ -1633,7 +1675,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_checking_unloaded_processors ) long int big_step = k / 4; big_step = (big_step == 0)?1:big_step; - print_test("Testing 3D periodic vector symmetric cell-list (unload processors) k=",k); + print_test_v("Testing 3D periodic vector symmetric cell-list (unload processors) k=",k); BOOST_TEST_CHECKPOINT( "Testing 3D periodic vector symmetric cell-list (unload processors) k=" << k ); Box<3,float> box({0,0,0},{L,L,L}); @@ -1722,7 +1764,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_cell_list_multi_type ) long int big_step = k / 4; big_step = (big_step == 0)?1:big_step; - print_test("Testing 3D periodic vector symmetric cell-list k=",k); + print_test_v("Testing 3D periodic vector symmetric cell-list k=",k); BOOST_TEST_CHECKPOINT( "Testing 3D periodic vector symmetric cell-list k=" << k ); Box<3,float> box({-L,-L,-L},{L,L,L}); @@ -1812,4 +1854,3 @@ BOOST_AUTO_TEST_CASE( vector_dist_cell_list_multi_type ) BOOST_REQUIRE_EQUAL(ret,true); } -#endif /* SRC_VECTOR_VECTOR_DIST_CELL_LIST_TESTS_HPP_ */ diff --git a/src/Vector/vector_dist_complex_prp_unit_test.hpp b/src/Vector/tests/vector_dist_complex_prp_unit_test.cpp similarity index 96% rename from src/Vector/vector_dist_complex_prp_unit_test.hpp rename to src/Vector/tests/vector_dist_complex_prp_unit_test.cpp index bc90cf2f4f55cdedcc169212feeb360b439be0fb..8d2956ba1f77fa9e2c2731480b892264094dedad 100644 --- a/src/Vector/vector_dist_complex_prp_unit_test.hpp +++ b/src/Vector/tests/vector_dist_complex_prp_unit_test.cpp @@ -4,10 +4,15 @@ * Created on: Sep 18, 2016 * Author: i-bird */ +#define BOOST_TEST_DYN_LINK +#include <boost/test/unit_test.hpp> -#ifndef SRC_VECTOR_VECTOR_DIST_COMPLEX_PRP_UNIT_TEST_HPP_ -#define SRC_VECTOR_VECTOR_DIST_COMPLEX_PRP_UNIT_TEST_HPP_ +#include "Vector/vector_dist.hpp" +#include "Vector/performance/vector_dist_performance_util.hpp" +#include "vector_dist_util_unit_tests.hpp" +extern void print_test_v(std::string test, size_t sz); +extern long int decrement(long int k, long int step); BOOST_AUTO_TEST_CASE( vector_dist_periodic_complex_prp_test_use_3d ) { @@ -232,4 +237,3 @@ BOOST_AUTO_TEST_CASE( vector_dist_periodic_complex_prp_test_use_3d ) } -#endif /* SRC_VECTOR_VECTOR_DIST_COMPLEX_PRP_UNIT_TEST_HPP_ */ diff --git a/src/Vector/vector_dist_unit_test.hpp b/src/Vector/tests/vector_dist_unit_test.cpp similarity index 93% rename from src/Vector/vector_dist_unit_test.hpp rename to src/Vector/tests/vector_dist_unit_test.cpp index 6c154e19025930bcc4ec187766115a0e6fdd7622..d5fc12cf011333cc439ce3cfd88e7b863a5ef8b7 100644 --- a/src/Vector/vector_dist_unit_test.hpp +++ b/src/Vector/tests/vector_dist_unit_test.cpp @@ -5,23 +5,59 @@ * Author: Pietro Incardona */ -#ifndef VECTOR_DIST_UNIT_TEST_HPP_ -#define VECTOR_DIST_UNIT_TEST_HPP_ +#define BOOST_TEST_DYN_LINK +#include <boost/test/unit_test.hpp> #include "config.h" #include <random> #include "Vector/vector_dist.hpp" #include "data_type/aggregate.hpp" +#include "vector_dist_util_unit_tests.hpp" +#include "Point_test.hpp" #include "Vector/performance/vector_dist_performance_common.hpp" +/*! \brief Print a string about the test + * + * \param test string to print + * \param sz size + * + */ +void print_test_v(std::string test, size_t sz) +{ + if (create_vcluster().getProcessUnitID() == 0) + std::cout << test << " " << sz << "\n"; +} + +/*! \brief Get next testing step decrementing the size + * + * \param k actual size + * \param step + * + * \return the next step + * + */ +long int decrement(long int k, long int step) +{ + if (k <= 32) + { + return 1; + } + else if (k - 2*step+1 <= 0) + { + return k - 32; + } + else + return step; +} + /*! \brief Count the total number of particles * * \param vd distributed vector * \param bc boundary conditions * */ -template<unsigned int dim> size_t total_n_part_lc(vector_dist<dim,float, Point_test<float>, CartDecomposition<dim,float> > & vd, size_t (& bc)[dim]) +template<unsigned int dim, template <typename> class layout> size_t total_n_part_lc(vector_dist<dim,float, Point_test<float>,typename layout<Point_test<float>>::type, layout, CartDecomposition<dim,float> > & vd, size_t (& bc)[dim]) { Vcluster & v_cl = vd.getVC(); auto it2 = vd.getDomainIterator(); @@ -50,48 +86,6 @@ template<unsigned int dim> size_t total_n_part_lc(vector_dist<dim,float, Point_t return cnt; } -/*! \brief Count local and non local - * - * \param vd distributed vector - * \param it iterator - * \param bc boundary conditions - * \param box domain box - * \param dom_ext domain + ghost box - * \param l_cnt local particles counter - * \param nl_cnt non local particles counter - * \param n_out out of domain + ghost particles counter - * - */ -template<unsigned int dim,typename vector_dist> inline void count_local_n_local(vector_dist & vd, vector_dist_iterator & it, size_t (& bc)[dim] , Box<dim,float> & box, Box<dim,float> & dom_ext, size_t & l_cnt, size_t & nl_cnt, size_t & n_out) -{ - const CartDecomposition<dim,float> & ct = vd.getDecomposition(); - - while (it.isNext()) - { - auto key = it.get(); - // Check if it is in the domain - if (box.isInsideNP(vd.getPos(key)) == true) - { - // Check if local - if (ct.isLocalBC(vd.getPos(key),bc) == true) - l_cnt++; - else - nl_cnt++; - } - else - { - nl_cnt++; - } - - Point<dim,float> xp = vd.getPos(key); - - // Check that all particles are inside the Domain + Ghost part - if (dom_ext.isInside(xp) == false) - n_out++; - - ++it; - } -} BOOST_AUTO_TEST_SUITE( vector_dist_test ) @@ -101,6 +95,7 @@ void print_test(std::string test, size_t sz) std::cout << test << " " << sz << "\n"; } +template<typename vector> void Test2D_ghost(Box<2,float> & box) { // Communication object @@ -150,7 +145,7 @@ void Test2D_ghost(Box<2,float> & box) size_t bc[2]={NON_PERIODIC,NON_PERIODIC}; // Vector of particles - vector_dist<2,float, Point_test<float> > vd(g_info.size(),box,bc,g); + vector vd(g_info.size(),box,bc,g); // size_t size_t cobj = 0; @@ -194,16 +189,16 @@ void Test2D_ghost(Box<2,float> & box) auto key = v_it2.get(); // fill with the processor ID where these particle live - vd.getProp<p::s>(key) = vd.getPos(key)[0] + vd.getPos(key)[1] * 16.0f; - vd.getProp<p::v>(key)[0] = v_cl.getProcessUnitID(); - vd.getProp<p::v>(key)[1] = v_cl.getProcessUnitID(); - vd.getProp<p::v>(key)[2] = v_cl.getProcessUnitID(); + vd.template getProp<p::s>(key) = vd.getPos(key)[0] + vd.getPos(key)[1] * 16.0f; + vd.template getProp<p::v>(key)[0] = v_cl.getProcessUnitID(); + vd.template getProp<p::v>(key)[1] = v_cl.getProcessUnitID(); + vd.template getProp<p::v>(key)[2] = v_cl.getProcessUnitID(); ++v_it2; } // do a ghost get - vd.ghost_get<p::s,p::v>(); + vd.template ghost_get<p::s,p::v>(); //! [Redistribute the particles and sync the ghost properties] @@ -224,7 +219,7 @@ void Test2D_ghost(Box<2,float> & box) auto key = g_it.get(); // Check the received data - BOOST_REQUIRE_EQUAL(vd.getPos(key)[0] + vd.getPos(key)[1] * 16.0f,vd.getProp<p::s>(key)); + BOOST_REQUIRE_EQUAL(vd.getPos(key)[0] + vd.getPos(key)[1] * 16.0f,vd.template getProp<p::s>(key)); bool is_in = false; size_t b = 0; @@ -247,7 +242,7 @@ void Test2D_ghost(Box<2,float> & box) BOOST_REQUIRE_EQUAL(is_in,true); // Check that the particle come from the correct processor - BOOST_REQUIRE_EQUAL(vd.getProp<p::v>(key)[0],dec.getEGhostBoxProcessor(lb)); + BOOST_REQUIRE_EQUAL(vd.template getProp<p::v>(key)[0],dec.getEGhostBoxProcessor(lb)); n_part++; ++g_it; @@ -271,33 +266,28 @@ void Test2D_ghost(Box<2,float> & box) BOOST_AUTO_TEST_CASE( vector_dist_ghost ) { + typedef vector_dist<2,float, Point_test<float>> vector; + Box<2,float> box({0.0,0.0},{1.0,1.0}); - Test2D_ghost(box); + Test2D_ghost<vector>(box); Box<2,float> box2({-1.0,-1.0},{2.5,2.5}); - Test2D_ghost(box2); + Test2D_ghost<vector>(box2); } -void print_test_v(std::string test, size_t sz) +BOOST_AUTO_TEST_CASE( vector_dist_ghost_inte ) { - if (create_vcluster().getProcessUnitID() == 0) - std::cout << test << " " << sz << "\n"; -} + typedef vector_dist<2,float, Point_test<float>,memory_traits_inte<Point_test<float>>::type,memory_traits_inte> vector; -long int decrement(long int k, long int step) -{ - if (k <= 32) - { - return 1; - } - else if (k - 2*step+1 <= 0) - { - return k - 32; - } - else - return step; + Box<2,float> box({0.0,0.0},{1.0,1.0}); + Test2D_ghost<vector>(box); + + Box<2,float> box2({-1.0,-1.0},{2.5,2.5}); + Test2D_ghost<vector>(box2); } + + BOOST_AUTO_TEST_CASE( vector_dist_iterator_test_use_2d ) { Vcluster & v_cl = create_vcluster(); @@ -741,7 +731,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_periodic_test_use_3d ) } } -BOOST_AUTO_TEST_CASE( vector_dist_periodic_test_random_walk ) +void test_random_walk(size_t opt) { Vcluster & v_cl = create_vcluster(); @@ -775,7 +765,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_periodic_test_random_walk ) Ghost<3,float> ghost(0.01 / factor); // Distributed vector - vector_dist<3,float, Point_test<float>, CartDecomposition<3,float> > vd(k,box,bc,ghost); + vector_dist<3,float, Point_test<float> > vd(k,box,bc,ghost); auto it = vd.getIterator(); @@ -809,7 +799,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_periodic_test_random_walk ) ++it; } - vd.map(); + vd.map(opt); vd.ghost_get<0>(); @@ -821,6 +811,16 @@ BOOST_AUTO_TEST_CASE( vector_dist_periodic_test_random_walk ) } } +BOOST_AUTO_TEST_CASE( vector_dist_periodic_test_random_walk ) +{ + test_random_walk(NONE); +} + +BOOST_AUTO_TEST_CASE( vector_dist_periodic_test_random_walk_local_map ) +{ + test_random_walk(MAP_LOCAL); +} + BOOST_AUTO_TEST_CASE( vector_dist_periodic_map ) { Box<3,float> box({0.0,0.0,0.0},{1.0,1.0,1.0}); @@ -835,7 +835,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_periodic_map ) Ghost<3,float> ghost(0.05 / factor); // Distributed vector - vector_dist<3,float, Point_test<float>, CartDecomposition<3,float> > vd(1,box,bc,ghost); + vector_dist<3,float, Point_test<float> > vd(1,box,bc,ghost); // put particles al 1.0, check that they go to 0.0 @@ -886,7 +886,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_not_periodic_map ) Ghost<3,float> ghost(0.05 / factor); // Distributed vector - vector_dist<3,float, Point_test<float>, CartDecomposition<3,float> > vd(1,box,bc,ghost); + vector_dist<3,float, Point_test<float> > vd(1,box,bc,ghost); // put particles al 1.0, check that they go to 0.0 @@ -941,7 +941,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_out_of_bound_policy ) Ghost<3,float> ghost(0.05 / factor); // Distributed vector - vector_dist<3,float, Point_test<float>, CartDecomposition<3,float> > vd(100,box,bc,ghost); + vector_dist<3,float, Point_test<float> > vd(100,box,bc,ghost); // put particles at out of the boundary, they must be detected and and killed @@ -1025,7 +1025,7 @@ void Test_interacting(Box<3,float> & box) Ghost<3,float> ghost(r_cut); // Distributed vector - vector_dist<3,float, Point_test<float>, CartDecomposition<3,float> > vd(k,box,bc,ghost); + vector_dist<3,float, Point_test<float> > vd(k,box,bc,ghost); auto it = vd.getIterator(); @@ -1165,7 +1165,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_grid_iterator ) Ghost<3,float> ghost(1.0/(Ng-2)); // Distributed vector - vector_dist<3,float, Point_test<float>, CartDecomposition<3,float> > vd(0,box,bc,ghost); + vector_dist<3,float, Point_test<float> > vd(0,box,bc,ghost); // Put particles on a grid creating a Grid iterator auto it = vd.getGridIterator(sz); @@ -1239,7 +1239,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_cell_verlet_test ) Ghost<3,float> ghost(third_dist*1.1); // Distributed vector - vector_dist<3,float, Point_test<float>, CartDecomposition<3,float> > vd(0,box,bc,ghost); + vector_dist<3,float, Point_test<float> > vd(0,box,bc,ghost); // Put particles on a grid creating a Grid iterator auto it = vd.getGridIterator(sz); @@ -1284,7 +1284,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_cell_verlet_test ) // Create a verlet list for each particle - VerletList<3,float,FAST,shift<3,float>> verlet = vd.getVerlet(third_dist); + VerletList<3,float,Mem_fast<>,shift<3,float>> verlet = vd.getVerlet(third_dist); bool correct = true; @@ -1868,10 +1868,6 @@ BOOST_AUTO_TEST_CASE( vector_of_vector_dist ) } -#include "vector_dist_cell_list_tests.hpp" -#include "vector_dist_NN_tests.hpp" -#include "vector_dist_complex_prp_unit_test.hpp" BOOST_AUTO_TEST_SUITE_END() -#endif /* VECTOR_DIST_UNIT_TEST_HPP_ */ diff --git a/src/Vector/tests/vector_dist_util_unit_tests.hpp b/src/Vector/tests/vector_dist_util_unit_tests.hpp new file mode 100644 index 0000000000000000000000000000000000000000..f830e13fc558c0ef4da49e80995079c3c783f730 --- /dev/null +++ b/src/Vector/tests/vector_dist_util_unit_tests.hpp @@ -0,0 +1,57 @@ +/* + * vector_dist_util_unit_tests.hpp + * + * Created on: Feb 14, 2018 + * Author: i-bird + */ + +#ifndef SRC_VECTOR_TESTS_VECTOR_DIST_UTIL_UNIT_TESTS_HPP_ +#define SRC_VECTOR_TESTS_VECTOR_DIST_UTIL_UNIT_TESTS_HPP_ + + +/*! \brief Count local and non local + * + * \param vd distributed vector + * \param it iterator + * \param bc boundary conditions + * \param box domain box + * \param dom_ext domain + ghost box + * \param l_cnt local particles counter + * \param nl_cnt non local particles counter + * \param n_out out of domain + ghost particles counter + * + */ +template<unsigned int dim,typename vector_dist> inline void count_local_n_local(vector_dist & vd, vector_dist_iterator & it, size_t (& bc)[dim] , Box<dim,float> & box, Box<dim,float> & dom_ext, size_t & l_cnt, size_t & nl_cnt, size_t & n_out) +{ + const CartDecomposition<dim,float> & ct = vd.getDecomposition(); + + while (it.isNext()) + { + auto key = it.get(); + // Check if it is in the domain + if (box.isInsideNP(vd.getPos(key)) == true) + { + // Check if local + if (ct.isLocalBC(vd.getPos(key),bc) == true) + l_cnt++; + else + nl_cnt++; + } + else + { + nl_cnt++; + } + + Point<dim,float> xp = vd.getPos(key); + + // Check that all particles are inside the Domain + Ghost part + if (dom_ext.isInside(xp) == false) + n_out++; + + ++it; + } +} + + + +#endif /* SRC_VECTOR_TESTS_VECTOR_DIST_UTIL_UNIT_TESTS_HPP_ */ diff --git a/src/Vector/vector_dist.hpp b/src/Vector/vector_dist.hpp index 77e6c99dbea89183de465bd4cd5fe94c7a366962..a96fa1628bd671a21ef12c77655bf80ec4399e6d 100644 --- a/src/Vector/vector_dist.hpp +++ b/src/Vector/vector_dist.hpp @@ -35,6 +35,8 @@ #include "NN/CellList/ParticleIt_Cells.hpp" #include "NN/CellList/ProcKeys.hpp" +#define DEC_GRAN(gr) ((size_t)gr << 32) + #define VECTOR_DIST_ERROR_OBJECT std::runtime_error("Runtime vector distributed error"); #ifdef SE_CLASS3 @@ -51,8 +53,6 @@ #define NO_ID false #define ID true -#define DEC_GRAN(gr) ((size_t)gr << 32) - // Perform a ghost get or a ghost put #define GET 1 #define PUT 2 @@ -61,8 +61,13 @@ #define NO_GHOST 0 #define WITH_GHOST 2 +#define GCL_NON_SYMMETRIC 0 +#define GCL_SYMMETRIC 1 +#define GCL_HILBERT 2 + + //! General function t get a cell-list -template<unsigned int dim, typename St, typename CellL, typename Vector> +template<unsigned int dim, typename St, typename CellL, typename Vector, unsigned int impl> struct gcl { /*! \brief Get the Cell list based on the type @@ -81,8 +86,8 @@ struct gcl }; //! General function t get a cell-list -template<unsigned int dim, typename St, typename Vector, typename Mem_type> -struct gcl<dim,St,CellList_gen<dim, St, Process_keys_hilb,Mem_type, shift<dim, St> >,Vector> +template<unsigned int dim, typename St, typename CellL, typename Vector> +struct gcl<dim,St,CellL,Vector,GCL_HILBERT> { /*! \brief Get the Cell list based on the type * @@ -93,20 +98,53 @@ struct gcl<dim,St,CellList_gen<dim, St, Process_keys_hilb,Mem_type, shift<dim, S * \return the constructed cell-list * */ - static inline CellList_gen<dim, St, Process_keys_hilb, Mem_type, shift<dim, St> > get(Vector & vd, const St & r_cut, const Ghost<dim,St> & g) + static inline CellL get(Vector & vd, const St & r_cut, const Ghost<dim,St> & g) { return vd.getCellList_hilb(r_cut,g); } }; -#define CELL_MEMFAST(dim,St) CellList_gen<dim, St, Process_keys_lin, Mem_fast, shift<dim, St> > -#define CELL_MEMBAL(dim,St) CellList_gen<dim, St, Process_keys_lin, Mem_bal, shift<dim, St> > -#define CELL_MEMMW(dim,St) CellList_gen<dim, St, Process_keys_lin, Mem_mw, shift<dim, St> > +//! General function t get a cell-list +template<unsigned int dim, typename St, typename CellL, typename Vector> +struct gcl<dim,St,CellL,Vector,GCL_SYMMETRIC> +{ + /*! \brief Get the Cell list based on the type + * + * \param vd Distributed vector + * \param r_cut Cut-off radius + * \param g Ghost + * + * \return the constructed cell-list + * + */ + static inline CellL get(Vector & vd, const St & r_cut, const Ghost<dim,St> & g) + { + return vd.getCellListSym(r_cut); + } +}; + +#define CELL_MEMFAST(dim,St) CellList_gen<dim, St, Process_keys_lin, Mem_fast<>, shift<dim, St> > +#define CELL_MEMBAL(dim,St) CellList_gen<dim, St, Process_keys_lin, Mem_bal<>, shift<dim, St> > +#define CELL_MEMMW(dim,St) CellList_gen<dim, St, Process_keys_lin, Mem_mw<>, shift<dim, St> > -#define CELL_MEMFAST_HILB(dim,St) CellList_gen<dim, St, Process_keys_hilb, Mem_fast, shift<dim, St> > -#define CELL_MEMBAL_HILB(dim,St) CellList_gen<dim, St, Process_keys_hilb, Mem_bal, shift<dim, St> > -#define CELL_MEMMW_HILB(dim,St) CellList_gen<dim, St, Process_keys_hilb, Mem_mw, shift<dim, St> > +#define CELL_MEMFAST_HILB(dim,St) CellList_gen<dim, St, Process_keys_hilb, Mem_fast<>, shift<dim, St> > +#define CELL_MEMBAL_HILB(dim,St) CellList_gen<dim, St, Process_keys_hilb, Mem_bal<>, shift<dim, St> > +#define CELL_MEMMW_HILB(dim,St) CellList_gen<dim, St, Process_keys_hilb, Mem_mw<>, shift<dim, St> > +#define VERLET_MEMFAST(dim,St) VerletList<dim,St,Mem_fast<>,shift<dim,St> > +#define VERLET_MEMBAL(dim,St) VerletList<dim,St,Mem_bal<>,shift<dim,St> > +#define VERLET_MEMMW(dim,St) VerletList<dim,St,Mem_mw<>,shift<dim,St> > + +#define VERLET_MEMFAST_INT(dim,St) VerletList<dim,St,Mem_fast<unsigned int>,shift<dim,St> > +#define VERLET_MEMBAL_INT(dim,St) VerletList<dim,St,Mem_bal<unsigned int>,shift<dim,St> > +#define VERLET_MEMMW_INT(dim,St) VerletList<dim,St,Mem_mw<unsigned int>,shift<dim,St> > + +enum reorder_opt +{ + NO_REORDER = 0, + HILBERT = 1, + LINEAR = 2 +}; /*! \brief Distributed vector * @@ -133,13 +171,19 @@ struct gcl<dim,St,CellList_gen<dim, St, Process_keys_hilb,Mem_type, shift<dim, S * */ -template<unsigned int dim, typename St, typename prop, typename Decomposition = CartDecomposition<dim,St>, typename Memory = HeapMemory> -class vector_dist : public vector_dist_comm<dim,St,prop,Decomposition,Memory> +template<unsigned int dim, + typename St, + typename prop, + typename layout = typename memory_traits_lin<prop>::type, + template <typename> class layout_base = memory_traits_lin, + typename Decomposition = CartDecomposition<dim,St>, + typename Memory = HeapMemory> +class vector_dist : public vector_dist_comm<dim,St,prop,layout,layout_base,Decomposition,Memory> { public: //! Self type - typedef vector_dist<dim,St,prop,Decomposition,Memory> self; + typedef vector_dist<dim,St,prop,layout,layout_base,Decomposition,Memory> self; //! property object typedef prop value_type; @@ -151,11 +195,11 @@ private: //! Particle position vector, (It has 2 elements) the first has real particles assigned to a processor //! the second element contain unassigned particles - openfpm::vector<Point<dim, St>> v_pos; + openfpm::vector<Point<dim, St>,Memory> v_pos; //! Particle properties vector, (It has 2 elements) the first has real particles assigned to a processor //! the second element contain unassigned particles - openfpm::vector<prop> v_prp; + openfpm::vector<prop,Memory,layout,layout_base> v_prp; //! Virtual cluster Vcluster & v_cl; @@ -231,6 +275,51 @@ private: } } + /*! \brief Reorder based on hilbert space filling curve + * + * \param v_pos_dest reordered vector of position + * \param v_prp_dest reordered vector of properties + * \param m order of the space filling curve + * \param cell_list cell-list + * + */ + template<typename CellL, typename sfc_it> + void reorder_sfc(openfpm::vector<Point<dim,St>> & v_pos_dest, + openfpm::vector<prop> & v_prp_dest, + sfc_it & h_it, + CellL & cell_list) + { + v_pos_dest.resize(v_pos.size()); + v_prp_dest.resize(v_prp.size()); + + //Index for v_pos_dest + size_t count = 0; + + grid_key_dx<dim> ksum; + + for (size_t i = 0; i < dim ; i++) + {ksum.set_d(i,cell_list.getPadding(i));} + + while (h_it.isNext()) + { + auto key = h_it.get(); + key += ksum; + + size_t lin = cell_list.getGrid().LinId(key); + + // for each particle in the Cell "lin" + for (size_t i = 0; i < cell_list.getNelements(lin); i++) + { + //reorder + auto v = cell_list.get(lin,i); + v_pos_dest.get(count) = v_pos.get(v); + v_prp_dest.get(count) = v_prp.get(v); + + count++; + } + ++h_it; + } + } public: @@ -247,9 +336,9 @@ public: * \return itself * */ - vector_dist<dim,St,prop,Decomposition,Memory> & operator=(const vector_dist<dim,St,prop,Decomposition,Memory> & v) + vector_dist<dim,St,prop,layout,layout_base,Decomposition,Memory> & operator=(const vector_dist<dim,St,prop,layout,layout_base,Decomposition,Memory> & v) { - static_cast<vector_dist_comm<dim,St,prop,Decomposition,Memory> *>(this)->operator=(static_cast<vector_dist_comm<dim,St,prop,Decomposition,Memory>>(v)); + static_cast<vector_dist_comm<dim,St,prop,layout,layout_base,Decomposition,Memory> *>(this)->operator=(static_cast<vector_dist_comm<dim,St,prop,layout,layout_base,Decomposition,Memory>>(v)); g_m = v.g_m; v_pos = v.v_pos; @@ -271,9 +360,9 @@ public: * \return itself * */ - vector_dist<dim,St,prop,Decomposition,Memory> & operator=(vector_dist<dim,St,prop,Decomposition,Memory> && v) + vector_dist<dim,St,prop,layout,layout_base,Decomposition,Memory> & operator=(vector_dist<dim,St,prop,layout,layout_base,Decomposition,Memory> && v) { - static_cast<vector_dist_comm<dim,St,prop,Decomposition,Memory> *>(this)->operator=(static_cast<vector_dist_comm<dim,St,prop,Decomposition,Memory> >(v)); + static_cast<vector_dist_comm<dim,St,prop,layout,layout_base,Decomposition,Memory> *>(this)->operator=(static_cast<vector_dist_comm<dim,St,prop,layout,layout_base,Decomposition,Memory> >(v)); g_m = v.g_m; v_pos.swap(v.v_pos); @@ -294,8 +383,8 @@ public: * \param v vector to copy * */ - vector_dist(const vector_dist<dim,St,prop,Decomposition,Memory> & v) - :vector_dist_comm<dim,St,prop,Decomposition,Memory>(v.getDecomposition()),v_cl(v.v_cl) SE_CLASS3_VDIST_CONSTRUCTOR + vector_dist(const vector_dist<dim,St,prop,layout,layout_base,Decomposition,Memory> & v) + :vector_dist_comm<dim,St,prop,layout,layout_base,Decomposition,Memory>(v.getDecomposition()),v_cl(v.v_cl) SE_CLASS3_VDIST_CONSTRUCTOR { #ifdef SE_CLASS2 check_new(this,8,VECTOR_DIST_EVENT,4); @@ -309,7 +398,7 @@ public: * \param v vector to copy * */ - vector_dist(vector_dist<dim,St,prop,Decomposition,Memory> && v) noexcept + vector_dist(vector_dist<dim,St,prop,layout,layout_base,Decomposition,Memory> && v) noexcept :v_cl(v.v_cl) SE_CLASS3_VDIST_CONSTRUCTOR { #ifdef SE_CLASS2 @@ -330,7 +419,7 @@ public: * */ vector_dist(const Decomposition & dec, size_t np) : - vector_dist_comm<dim,St,prop,Decomposition,Memory>(dec), v_cl(create_vcluster()) SE_CLASS3_VDIST_CONSTRUCTOR + vector_dist_comm<dim,St,prop,layout,layout_base,Decomposition,Memory>(dec), v_cl(create_vcluster()) SE_CLASS3_VDIST_CONSTRUCTOR { #ifdef SE_CLASS2 check_new(this,8,VECTOR_DIST_EVENT,4); @@ -874,7 +963,7 @@ public: * \return the Cell list * */ - template<typename CellL = CellList<dim, St, Mem_fast, shift<dim, St> > > CellL getCellListSym(St r_cut) + template<typename CellL = CellList<dim, St, Mem_fast<>, shift<dim, St> > > CellL getCellListSym(St r_cut) { #ifdef SE_CLASS1 if (!(opt & BIND_DEC_TO_GHOST)) @@ -918,7 +1007,7 @@ public: * \return the Cell list * */ - template<typename CellL = CellList_gen<dim, St, Process_keys_lin, Mem_fast, shift<dim, St> > > + template<typename CellL = CellList_gen<dim, St, Process_keys_lin, Mem_fast<>, shift<dim, St> > > CellL getCellList(St r_cut, bool no_se3 = false) { #ifdef SE_CLASS3 @@ -945,7 +1034,7 @@ public: * \return the Cell list * */ - template<typename CellL = CellList_gen<dim, St, Process_keys_hilb, Mem_fast, shift<dim, St> > > + template<typename CellL = CellList_gen<dim, St, Process_keys_hilb, Mem_fast<>, shift<dim, St> > > CellL getCellList_hilb(St r_cut) { #ifdef SE_CLASS3 @@ -995,7 +1084,7 @@ public: } else { - CellL cli_tmp = gcl<dim,St,CellL,self>::get(*this,r_cut,getDecomposition().getGhost()); + CellL cli_tmp = gcl<dim,St,CellL,self,GCL_NON_SYMMETRIC>::get(*this,r_cut,getDecomposition().getGhost()); cell_list.swap(cli_tmp); } @@ -1008,7 +1097,7 @@ public: * \param cell_list Cell list to update * */ - template<typename CellL = CellList<dim, St, Mem_fast, shift<dim, St> > > void updateCellListSym(CellL & cell_list) + template<typename CellL = CellList<dim, St, Mem_fast<>, shift<dim, St> > > void updateCellListSym(CellL & cell_list) { #ifdef SE_CLASS3 se3.getNN(); @@ -1032,7 +1121,7 @@ public: } else { - CellL cli_tmp = gcl<dim,St,CellL,self>::get(*this,r_cut,getDecomposition().getGhost()); + CellL cli_tmp = gcl<dim,St,CellL,self,GCL_SYMMETRIC>::get(*this,r_cut,getDecomposition().getGhost()); cell_list.swap(cli_tmp); } @@ -1054,7 +1143,7 @@ public: * \return the CellList * */ - template<typename CellL = CellList_gen<dim, St, Process_keys_lin, Mem_fast, shift<dim, St> > > + template<typename CellL = CellList_gen<dim, St, Process_keys_lin, Mem_fast<>, shift<dim, St> > > CellL getCellList(St r_cut, const Ghost<dim, St> & enlarge, bool no_se3 = false) { #ifdef SE_CLASS3 @@ -1073,7 +1162,8 @@ public: // Processor bounding box cl_param_calculate(pbox, div, r_cut, enlarge); - cell_list.Initialize(pbox, div, g_m); + cell_list.Initialize(pbox, div); + cell_list.set_gm(g_m); cell_list.set_ndec(getDecomposition().get_ndec()); updateCellList(cell_list,no_se3); @@ -1096,7 +1186,7 @@ public: * \return The Cell-list * */ - template<typename CellL = CellList_gen<dim, St, Process_keys_hilb, Mem_fast, shift<dim, St> > > CellL getCellList_hilb(St r_cut, const Ghost<dim, St> & enlarge) + template<typename CellL = CellList_gen<dim, St, Process_keys_hilb, Mem_fast<>, shift<dim, St> > > CellL getCellList_hilb(St r_cut, const Ghost<dim, St> & enlarge) { #ifdef SE_CLASS3 se3.getNN(); @@ -1113,7 +1203,8 @@ public: // Processor bounding box cl_param_calculate(pbox,div, r_cut, enlarge); - cell_list.Initialize(pbox, div, g_m); + cell_list.Initialize(pbox, div); + cell_list.set_gm(g_m); cell_list.set_ndec(getDecomposition().get_ndec()); updateCellList(cell_list); @@ -1128,13 +1219,14 @@ public: * \return the verlet list * */ - VerletList<dim,St,FAST,shift<dim,St> > getVerletSym(St r_cut) + template <typename VerletL = VerletList<dim,St,Mem_fast<>,shift<dim,St> >> + VerletL getVerletSym(St r_cut) { #ifdef SE_CLASS3 se3.getNN(); #endif - VerletList<dim,St,FAST,shift<dim,St>> ver; + VerletL ver; // Processor bounding box Box<dim, St> pbox = getDecomposition().getProcessorBounds(); @@ -1153,7 +1245,8 @@ public: * \return the verlet list * */ - VerletList<dim,St,FAST,shift<dim,St> > getVerletCrs(St r_cut) + template <typename VerletL = VerletList<dim,St,Mem_fast<>,shift<dim,St> >> + VerletL getVerletCrs(St r_cut) { #ifdef SE_CLASS1 if (!(opt & BIND_DEC_TO_GHOST)) @@ -1167,7 +1260,7 @@ public: se3.getNN(); #endif - VerletList<dim,St,FAST,shift<dim,St>> ver; + VerletL ver; // Processor bounding box Box<dim, St> pbox = getDecomposition().getProcessorBounds(); @@ -1205,13 +1298,14 @@ public: * \return a VerletList object * */ - VerletList<dim,St,FAST,shift<dim,St> > getVerlet(St r_cut) + template <typename VerletL = VerletList<dim,St,Mem_fast<>,shift<dim,St> >> + VerletL getVerlet(St r_cut) { #ifdef SE_CLASS3 se3.getNN(); #endif - VerletList<dim,St,FAST,shift<dim,St>> ver; + VerletL ver; // get the processor bounding box Box<dim, St> bt = getDecomposition().getProcessorBounds(); @@ -1238,7 +1332,7 @@ public: * \param opt option like VL_SYMMETRIC and VL_NON_SYMMETRIC or VL_CRS_SYMMETRIC * */ - void updateVerlet(VerletList<dim,St,FAST,shift<dim,St> > & ver, St r_cut, size_t opt = VL_NON_SYMMETRIC) + template<typename Mem_type> void updateVerlet(VerletList<dim,St,Mem_type,shift<dim,St> > & ver, St r_cut, size_t opt = VL_NON_SYMMETRIC) { #ifdef SE_CLASS3 se3.getNN(); @@ -1256,9 +1350,9 @@ public: ver.update(getDecomposition().getDomain(),r_cut,v_pos,g_m, opt); else { - VerletList<dim,St,FAST,shift<dim,St> > ver_tmp; + VerletList<dim,St,Mem_type,shift<dim,St> > ver_tmp; - ver_tmp = getVerlet(r_cut); + ver_tmp = getVerlet<VerletList<dim,St,Mem_type,shift<dim,St> >>(r_cut); ver.swap(ver); } } @@ -1297,9 +1391,9 @@ public: } else { - VerletList<dim,St,FAST,shift<dim,St> > ver_tmp; + VerletList<dim,St,Mem_type,shift<dim,St> > ver_tmp; - ver_tmp = getVerletCrs(r_cut); + ver_tmp = getVerletCrs<VerletList<dim,St,Mem_type,shift<dim,St> >>(r_cut); ver.swap(ver_tmp); } } @@ -1315,9 +1409,9 @@ public: ver.update(getDecomposition().getDomain(),r_cut,v_pos,g_m, opt); else { - VerletList<dim,St,FAST,shift<dim,St> > ver_tmp; + VerletList<dim,St,Mem_type,shift<dim,St> > ver_tmp; - ver_tmp = getVerlet(r_cut); + ver_tmp = getVerlet<VerletList<dim,St,Mem_type,shift<dim,St> >>(r_cut); ver.swap(ver_tmp); } } @@ -1331,9 +1425,10 @@ public: * \param m an order of a hilbert curve * */ - template<typename CellL=CellList_gen<dim,St,Process_keys_lin,Mem_fast,shift<dim,St> > > void reorder (int32_t m) + template<typename CellL=CellList_gen<dim,St,Process_keys_lin,Mem_bal<>,shift<dim,St> > > + void reorder (int32_t m, reorder_opt opt = reorder_opt::HILBERT) { - reorder(m,getDecomposition().getGhost()); + reorder<CellL>(m,getDecomposition().getGhost(),opt); } @@ -1349,7 +1444,8 @@ public: * \param enlarge In case of padding particles the cell list must be enlarged, like a ghost this parameter say how much must be enlarged * */ - template<typename CellL=CellList_gen<dim,St,Process_keys_lin,Mem_fast,shift<dim,St> > > void reorder(int32_t m, const Ghost<dim,St> & enlarge) + template<typename CellL=CellList_gen<dim,St,Process_keys_lin,Mem_bal<>,shift<dim,St> > > + void reorder(int32_t m, const Ghost<dim,St> & enlarge, reorder_opt opt = reorder_opt::HILBERT) { // reset the ghost part v_pos.resize(g_m); @@ -1373,7 +1469,8 @@ public: div[i] = 1 << m; } - cell_list.Initialize(pbox,div,g_m); + cell_list.Initialize(pbox,div); + cell_list.set_gm(g_m); // for each particle add the particle to the cell list @@ -1396,38 +1493,24 @@ public: openfpm::vector<Point<dim,St>> v_pos_dest; openfpm::vector<prop> v_prp_dest; - v_pos_dest.resize(v_pos.size()); - v_prp_dest.resize(v_prp.size()); - - //hilberts curve iterator - grid_key_dx_iterator_hilbert<dim> h_it(m); - - //Index for v_pos_dest - size_t count = 0; - - grid_key_dx<dim> ksum; - - for (size_t i = 0; i < dim ; i++) - ksum.set_d(i,cell_list.getPadding(i)); - - while (h_it.isNext()) + if (opt == reorder_opt::HILBERT) { - auto key = h_it.get(); - key += ksum; + grid_key_dx_iterator_hilbert<dim> h_it(m); - size_t lin = cell_list.getGrid().LinId(key); - - // for each particle in the Cell "lin" - for (size_t i = 0; i < cell_list.getNelements(lin); i++) - { - //reorder - auto v = cell_list.get(lin,i); - v_pos_dest.get(count) = v_pos.get(v); - v_prp_dest.get(count) = v_prp.get(v); + reorder_sfc<CellL,grid_key_dx_iterator_hilbert<dim>>(v_pos_dest,v_prp_dest,h_it,cell_list); + } + else if (opt == reorder_opt::LINEAR) + { + grid_sm<dim,void> gs(div); + grid_key_dx_iterator<dim> h_it(gs); - count++; - } - ++h_it; + reorder_sfc<CellL,grid_key_dx_iterator<dim>>(v_pos_dest,v_prp_dest,h_it,cell_list); + } + else + { + // We do nothing, we second swap nullify the first + v_pos.swap(v_pos_dest); + v_prp.swap(v_prp_dest); } v_pos.swap(v_pos_dest); @@ -1614,7 +1697,7 @@ public: */ inline Decomposition & getDecomposition() { - return vector_dist_comm<dim,St,prop,Decomposition,Memory>::getDecomposition(); + return vector_dist_comm<dim,St,prop,layout,layout_base,Decomposition,Memory>::getDecomposition(); } /*! \brief Get the decomposition @@ -1624,7 +1707,7 @@ public: */ inline const Decomposition & getDecomposition() const { - return vector_dist_comm<dim,St,prop,Decomposition,Memory>::getDecomposition(); + return vector_dist_comm<dim,St,prop,layout,layout_base,Decomposition,Memory>::getDecomposition(); } /*! \brief It move all the particles that does not belong to the local processor to the respective processor @@ -1639,9 +1722,17 @@ public: * * */ - template<unsigned int ... prp> void map_list() + template<unsigned int ... prp> void map_list(size_t opt = NONE) { - this->template map_list_<prp...>(v_pos,v_prp,g_m); +#ifdef SE_CLASS3 + se3.map_pre(); +#endif + + this->template map_list_<prp...>(v_pos,v_prp,g_m,opt); + +#ifdef SE_CLASS3 + se3.map_post(); +#endif } @@ -1655,13 +1746,13 @@ public: * * */ - template<typename obp = KillParticle> void map() + template<typename obp = KillParticle> void map(size_t opt = NONE) { #ifdef SE_CLASS3 se3.map_pre(); #endif - this->template map_<obp>(v_pos,v_prp,g_m); + this->template map_<obp>(v_pos,v_prp,g_m,opt); #ifdef SE_CLASS3 se3.map_post(); @@ -2134,7 +2225,7 @@ public: * \return Particle iterator * */ - template<typename vrl> openfpm::vector_key_iterator_seq<typename vrl::local_index_t> getParticleIteratorCRS(vrl & NN) + template<typename vrl> openfpm::vector_key_iterator_seq<typename vrl::Mem_type_type::loc_index> getParticleIteratorCRS(vrl & NN) { #ifdef SE_CLASS1 if (!(opt & BIND_DEC_TO_GHOST)) @@ -2145,7 +2236,7 @@ public: #endif // First we check that - return openfpm::vector_key_iterator_seq<typename vrl::local_index_t>(NN.getParticleSeq()); + return openfpm::vector_key_iterator_seq<typename vrl::Mem_type_type::loc_index>(NN.getParticleSeq()); } /*! \brief Return from which cell we have to start in case of CRS interation diff --git a/src/Vector/vector_dist_comm.hpp b/src/Vector/vector_dist_comm.hpp index 758f51c070f79820ee183fc4cc42f760f0df9d34..1be8d771c5d5ffba3f68e90592f209ce18f1ea1b 100644 --- a/src/Vector/vector_dist_comm.hpp +++ b/src/Vector/vector_dist_comm.hpp @@ -17,6 +17,8 @@ #define BIND_DEC_TO_GHOST 1 +#define MAP_LOCAL 2 + /*! \brief compute the communication options from the ghost_get/put options * * @@ -42,7 +44,13 @@ inline static size_t compute_options(size_t opt) * */ -template<unsigned int dim, typename St, typename prop, typename Decomposition = CartDecomposition<dim,St>, typename Memory = HeapMemory> +template<unsigned int dim, + typename St, + typename prop, + typename layout, + template <typename> class layout_base, + typename Decomposition = CartDecomposition<dim,St>, + typename Memory = HeapMemory> class vector_dist_comm { //! Number of units for each sub-domain @@ -262,7 +270,8 @@ class vector_dist_comm * \param v_prp vector of particles properties * */ - void local_ghost_from_opart(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp) + void local_ghost_from_opart(openfpm::vector<Point<dim, St>> & v_pos, + openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base> & v_prp) { // get the shift vectors const openfpm::vector<Point<dim, St>> & shifts = dec.getShiftVectors(); @@ -289,7 +298,9 @@ class vector_dist_comm * \param g_m ghost marker * */ - void local_ghost_from_dec(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp, size_t g_m) + void local_ghost_from_dec(openfpm::vector<Point<dim, St>> & v_pos, + openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base> & v_prp, + size_t g_m) { o_part_loc.clear(); @@ -310,7 +321,7 @@ class vector_dist_comm { if (box_f.get(i).get(j).isInside(v_pos.get(key)) == true) { - size_t lin_id = box_cmb.get(i).lin(); + size_t lin_id = dec.convertShift(box_cmb.get(i)); o_part_loc.add(); o_part_loc.template get<0>(o_part_loc.size()-1) = key; @@ -390,7 +401,10 @@ class vector_dist_comm * \param opt options * */ - void add_loc_particles_bc(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp ,size_t & g_m, size_t opt) + void add_loc_particles_bc(openfpm::vector<Point<dim, St>> & v_pos, + openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base> & v_prp , + size_t & g_m, + size_t opt) { // Create the shift boxes createShiftBox(); @@ -403,9 +417,9 @@ class vector_dist_comm else { if (opt & SKIP_LABELLING) - local_ghost_from_opart(v_pos,v_prp); + {local_ghost_from_opart(v_pos,v_prp);} else - local_ghost_from_dec(v_pos,v_prp,g_m); + {local_ghost_from_dec(v_pos,v_prp,g_m);} } } @@ -430,7 +444,7 @@ class vector_dist_comm // Buffer must retained and survive the destruction of the // vector if (hsmem.get(i).ref() == 0) - hsmem.get(i).incRef(); + {hsmem.get(i).incRef();} // Set the memory for retain the send buffer g_pos_send.get(i).setMemory(hsmem.get(i)); @@ -532,7 +546,9 @@ class vector_dist_comm * \param g_send_prp Send buffer to fill * */ - template<typename send_vector, typename prp_object, int ... prp> void fill_send_ghost_prp_buf(openfpm::vector<prop> & v_prp, openfpm::vector<send_vector> & g_send_prp) + template<typename send_vector, typename prp_object, int ... prp> + void fill_send_ghost_prp_buf(openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base> & v_prp, + openfpm::vector<send_vector> & g_send_prp) { // create a number of send buffers equal to the near processors g_send_prp.resize(g_opart.size()); @@ -559,7 +575,7 @@ class vector_dist_comm for (size_t j = 0; j < g_opart.get(i).size(); j++) { // source object type - typedef encapc<1, prop, typename openfpm::vector<prop>::layout_type> encap_src; + typedef encapc<1, prop, typename openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base>::layout_type> encap_src; // destination object type typedef encapc<1, prp_object, typename openfpm::vector<prp_object>::layout_type> encap_dst; @@ -578,7 +594,11 @@ class vector_dist_comm * \param m_prp sending buffer for properties * */ - void fill_send_map_buf(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp, openfpm::vector<size_t> & prc_sz_r, openfpm::vector<openfpm::vector<Point<dim,St>>> & m_pos, openfpm::vector<openfpm::vector<prop>> & m_prp) + void fill_send_map_buf(openfpm::vector<Point<dim, St>> & v_pos, + openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base> & v_prp, + openfpm::vector<size_t> & prc_sz_r, + openfpm::vector<openfpm::vector<Point<dim,St>>> & m_pos, + openfpm::vector<openfpm::vector<prop>> & m_prp) { m_prp.resize(prc_sz_r.size()); m_pos.resize(prc_sz_r.size()); @@ -621,7 +641,12 @@ class vector_dist_comm * \param m_prp sending buffer for properties * */ - template<typename prp_object,int ... prp> void fill_send_map_buf_list(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp, openfpm::vector<size_t> & prc_sz_r, openfpm::vector<openfpm::vector<Point<dim,St>>> & m_pos, openfpm::vector<openfpm::vector<prp_object>> & m_prp) + template<typename prp_object,int ... prp> + void fill_send_map_buf_list(openfpm::vector<Point<dim, St>> & v_pos, + openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base> & v_prp, + openfpm::vector<size_t> & prc_sz_r, + openfpm::vector<openfpm::vector<Point<dim,St>>> & m_pos, + openfpm::vector<openfpm::vector<prp_object>> & m_prp) { m_prp.resize(prc_sz_r.size()); m_pos.resize(prc_sz_r.size()); @@ -658,7 +683,10 @@ class vector_dist_comm * \param prc_sz For each processor the number of particles to send * */ - template<typename obp> void labelParticleProcessor(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<aggregate<size_t,size_t,size_t>> & lbl_p, openfpm::vector<size_t> & prc_sz) + template<typename obp> + void labelParticleProcessor(openfpm::vector<Point<dim, St>> & v_pos, + openfpm::vector<aggregate<size_t,size_t,size_t>> & lbl_p, + openfpm::vector<size_t> & prc_sz) { // reset lbl_p lbl_p.clear(); @@ -680,9 +708,9 @@ class vector_dist_comm // Check if the particle is inside the domain if (dec.getDomain().isInside(v_pos.get(key)) == true) - p_id = dec.processorIDBC(v_pos.get(key)); + {p_id = dec.processorIDBC(v_pos.get(key));} else - p_id = obp::out(key, v_cl.getProcessUnitID()); + {p_id = obp::out(key, v_cl.getProcessUnitID());} // Particle to move if (p_id != v_cl.getProcessUnitID()) @@ -720,7 +748,10 @@ class vector_dist_comm * \param g_m ghost marker * */ - void labelParticlesGhost(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp, openfpm::vector<size_t> & prc, size_t & g_m) + void labelParticlesGhost(openfpm::vector<Point<dim, St>> & v_pos, + openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base> & v_prp, + openfpm::vector<size_t> & prc, + size_t & g_m) { // Buffer that contain for each processor the id of the particle to send g_opart.clear(); @@ -784,7 +815,7 @@ class vector_dist_comm static void * message_alloc_map(size_t msg_i, size_t total_msg, size_t total_p, size_t i, size_t ri, void * ptr) { // cast the pointer - vector_dist_comm<dim, St, prop, Decomposition, Memory> * vd = static_cast<vector_dist_comm<dim, St, prop, Decomposition, Memory> *>(ptr); + vector_dist_comm<dim, St, prop,layout,layout_base, Decomposition, Memory> * vd = static_cast<vector_dist_comm<dim, St, prop, layout, layout_base, Decomposition, Memory> *>(ptr); vd->recv_mem_gm.resize(vd->v_cl.getProcessingUnits()); vd->recv_mem_gm.get(i).resize(msg_i); @@ -799,7 +830,7 @@ public: * \param v vector to copy * */ - vector_dist_comm(const vector_dist_comm<dim,St,prop,Decomposition,Memory> & v) + vector_dist_comm(const vector_dist_comm<dim,St,prop,layout,layout_base,Decomposition,Memory> & v) :v_cl(create_vcluster()),dec(create_vcluster()),lg_m(0) { this->operator=(v); @@ -901,27 +932,15 @@ public: cl_param_calculateSym<dim,St>(box,cd_sm,g,pad); for (size_t i = 0 ; i < dim ; i++) - div[i] = cd_sm.getDiv()[i] - 2*pad; + {div[i] = cd_sm.getDiv()[i] - 2*pad;} + + // Create the sub-domains + dec.setParameters(div, box, bc, g, gdist); } else { - // Create a valid decomposition of the space - // Get the number of processor and calculate the number of sub-domain - // for decomposition - size_t n_proc = v_cl.getProcessingUnits(); - size_t n_sub = n_proc * getDecompositionGranularity(); - - // Calculate the maximum number (before merging) of sub-domain on - // each dimension - - for (size_t i = 0; i < dim; i++) - { - div[i] = openfpm::math::round_big_2(pow(n_sub, 1.0 / dim)); - } + dec.setGoodParameters(box, bc, g, getDecompositionGranularity(), gdist); } - - // Create the sub-domains - dec.setParameters(div, box, bc, g, gdist); dec.decompose(); } @@ -935,7 +954,11 @@ public: * \param g_m marker between real and ghost particles * */ - template<int ... prp> inline void ghost_get_(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp, size_t & g_m, size_t opt = WITH_POSITION) + template<int ... prp> inline + void ghost_get_(openfpm::vector<Point<dim, St>> & v_pos, + openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base> & v_prp, + size_t & g_m, + size_t opt = WITH_POSITION) { // Sending property object typedef object<typename object_creator<typename prop::type, prp...>::type> prp_object; @@ -953,7 +976,7 @@ public: // Label all the particles if ((opt & SKIP_LABELLING) == false) - labelParticlesGhost(v_pos,v_prp,prc_g_opart,g_m); + {labelParticlesGhost(v_pos,v_prp,prc_g_opart,g_m);} // Send and receive ghost particle information { @@ -969,10 +992,10 @@ public: { size_t opt_ = compute_options(opt); op_ssend_gg_recv_merge opm(g_m); - v_cl.SSendRecvP_op<op_ssend_gg_recv_merge,send_vector,decltype(v_prp),prp...>(g_send_prp,v_prp,prc_g_opart,opm,prc_recv_get,recv_sz_get,opt_); + v_cl.SSendRecvP_op<op_ssend_gg_recv_merge,send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_g_opart,opm,prc_recv_get,recv_sz_get,opt_); } else - v_cl.SSendRecvP<send_vector,decltype(v_prp),prp...>(g_send_prp,v_prp,prc_g_opart,prc_recv_get,recv_sz_get,recv_sz_get_byte); + {v_cl.SSendRecvP<send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_g_opart,prc_recv_get,recv_sz_get,recv_sz_get_byte);} // fill g_opart_sz g_opart_sz.resize(prc_g_opart.size()); @@ -1033,9 +1056,12 @@ public: * \param v_pos vector of particle positions * \param v_prp vector of particle properties * \param g_m ghost marker + * \param opt options * */ - template<unsigned int ... prp> void map_list_(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp, size_t & g_m) + template<unsigned int ... prp> + void map_list_(openfpm::vector<Point<dim, St>> & v_pos, + openfpm::vector<prop> & v_prp, size_t & g_m, size_t opt = NONE) { typedef KillParticle obp; @@ -1065,6 +1091,17 @@ public: } } + // In case we have receive option + + if (opt & MAP_LOCAL) + { + // if the map is local we indicate that we receive only from the neighborhood processors + + prc_recv_map.clear(); + for (size_t i = 0 ; i < dec.getNNProcessors() ; i++) + {prc_recv_map.add(dec.IDtoProc(i));} + } + // Sending property object typedef object<typename object_creator<typename prop::type, prp...>::type> prp_object; @@ -1075,8 +1112,8 @@ public: fill_send_map_buf_list<prp_object,prp...>(v_pos,v_prp,prc_sz_r, m_pos, m_prp); - v_cl.SSendRecv(m_pos,v_pos,prc_r,prc_recv_map,recv_sz_map); - v_cl.SSendRecvP<openfpm::vector<prp_object>,decltype(v_prp),prp...>(m_prp,v_prp,prc_r,prc_recv_map,recv_sz_map); + v_cl.SSendRecv(m_pos,v_pos,prc_r,prc_recv_map,recv_sz_map,opt); + v_cl.SSendRecvP<openfpm::vector<prp_object>,decltype(v_prp),layout_base,prp...>(m_prp,v_prp,prc_r,prc_recv_map,recv_sz_map,opt); // mark the ghost part @@ -1096,7 +1133,10 @@ public: * \param g_m ghost marker * */ - template<typename obp = KillParticle> void map_(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp, size_t & g_m) + template<typename obp = KillParticle> + void map_(openfpm::vector<Point<dim, St>> & v_pos, + openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base> & v_prp, + size_t & g_m, size_t opt = NONE) { // Processor communication size openfpm::vector<size_t> prc_sz(v_cl.getProcessingUnits()); @@ -1131,8 +1171,8 @@ public: fill_send_map_buf(v_pos,v_prp, prc_sz_r, m_pos, m_prp); - v_cl.SSendRecv(m_pos,v_pos,prc_r,prc_recv_map,recv_sz_map); - v_cl.SSendRecv(m_prp,v_prp,prc_r,prc_recv_map,recv_sz_map); + v_cl.SSendRecv(m_pos,v_pos,prc_r,prc_recv_map,recv_sz_map,opt); + v_cl.SSendRecv(m_prp,v_prp,prc_r,prc_recv_map,recv_sz_map,opt); // mark the ghost part @@ -1166,7 +1206,7 @@ public: * \return iteself * */ - vector_dist_comm<dim,St,prop,Decomposition,Memory> & operator=(const vector_dist_comm<dim,St,prop,Decomposition,Memory> & vc) + vector_dist_comm<dim,St,prop,layout,layout_base,Decomposition,Memory> & operator=(const vector_dist_comm<dim,St,prop,layout,layout_base,Decomposition,Memory> & vc) { dec = vc.dec; @@ -1180,7 +1220,7 @@ public: * \return itself * */ - vector_dist_comm<dim,St,prop,Decomposition,Memory> & operator=(vector_dist_comm<dim,St,prop,Decomposition,Memory> && vc) + vector_dist_comm<dim,St,prop,layout,layout_base,Decomposition,Memory> & operator=(vector_dist_comm<dim,St,prop,layout,layout_base,Decomposition,Memory> && vc) { dec = vc.dec; @@ -1198,7 +1238,11 @@ public: * \param opt options * */ - template<template<typename,typename> class op, int ... prp> void ghost_put_(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp, size_t & g_m, size_t opt) + template<template<typename,typename> class op, int ... prp> + void ghost_put_(openfpm::vector<Point<dim, St>> & v_pos, + openfpm::vector<prop> & v_prp, + size_t & g_m, + size_t opt) { // Sending property object typedef object<typename object_creator<typename prop::type, prp...>::type> prp_object; @@ -1215,12 +1259,12 @@ public: size_t opt_ = compute_options(opt); op_ssend_recv_merge<op> opm(g_opart); - v_cl.SSendRecvP_op<op_ssend_recv_merge<op>,send_vector,decltype(v_prp),prp...>(g_send_prp,v_prp,prc_recv_get,opm,prc_g_opart,g_opart_sz,opt_); + v_cl.SSendRecvP_op<op_ssend_recv_merge<op>,send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_recv_get,opm,prc_g_opart,g_opart_sz,opt_); } else { op_ssend_recv_merge<op> opm(g_opart); - v_cl.SSendRecvP_op<op_ssend_recv_merge<op>,send_vector,decltype(v_prp),prp...>(g_send_prp,v_prp,prc_recv_get,opm,prc_recv_put,recv_sz_put); + v_cl.SSendRecvP_op<op_ssend_recv_merge<op>,send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_recv_get,opm,prc_recv_put,recv_sz_put); } // process also the local replicated particles diff --git a/src/Vector/vector_dist_dlb_test.hpp b/src/Vector/vector_dist_dlb_test.hpp index e7d5817d46a118159c074b89456a800abe6a21c1..71788a95f5ced8664649077f376f64ba8d7bf937 100644 --- a/src/Vector/vector_dist_dlb_test.hpp +++ b/src/Vector/vector_dist_dlb_test.hpp @@ -8,125 +8,16 @@ #ifndef SRC_VECTOR_VECTOR_DIST_DLB_TEST_HPP_ #define SRC_VECTOR_VECTOR_DIST_DLB_TEST_HPP_ -BOOST_AUTO_TEST_SUITE( vector_dist_dlb_test ) - -template<typename vector_type> void test_dlb_vector() -{ - Vcluster & v_cl = create_vcluster(); - - if (v_cl.getProcessingUnits() > 8) - return; - - Box<3,float> domain({0.0,0.0,0.0},{1.0,1.0,1.0}); - Ghost<3,float> g(0.1); - size_t bc[3] = {PERIODIC,PERIODIC,PERIODIC}; - - vector_type vd(0,domain,bc,g,DEC_GRAN(2048)); - - // Only processor 0 initialy add particles on a corner of a domain - - if (v_cl.getProcessUnitID() == 0) - { - for(size_t i = 0 ; i < 50000 ; i++) - { - vd.add(); - - vd.getLastPos()[0] = ((float)rand())/RAND_MAX * 0.3; - vd.getLastPos()[1] = ((float)rand())/RAND_MAX * 0.3; - vd.getLastPos()[2] = ((float)rand())/RAND_MAX * 0.3; - } - } - - vd.map(); - vd.template ghost_get<>(); - - ModelSquare md; - md.factor = 10; - vd.addComputationCosts(md); - vd.getDecomposition().decompose(); - vd.map(); - - - vd.addComputationCosts(md); - - openfpm::vector<size_t> loads; - size_t load = vd.getDecomposition().getDistribution().getProcessorLoad(); - v_cl.allGather(load,loads); - v_cl.execute(); - - for (size_t i = 0 ; i < loads.size() ; i++) - { - float load_f = load; - float load_fc = loads.get(i); - - BOOST_REQUIRE_CLOSE(load_f,load_fc,7.0); - } - - BOOST_REQUIRE(vd.size_local() != 0); - - Point<3,float> v({1.0,1.0,1.0}); - - for (size_t i = 0 ; i < 25 ; i++) - { - // move the particles by 0.1 - - auto it = vd.getDomainIterator(); - - while (it.isNext()) - { - auto p = it.get(); - - vd.getPos(p)[0] += v.get(0) * 0.09; - vd.getPos(p)[1] += v.get(1) * 0.09; - vd.getPos(p)[2] += v.get(2) * 0.09; - - ++it; - } - vd.map(); - - ModelSquare md; - vd.addComputationCosts(md); - vd.getDecomposition().redecompose(200); - vd.map(); - - BOOST_REQUIRE(vd.size_local() != 0); - - vd.template ghost_get<>(); - - vd.addComputationCosts(md); - - openfpm::vector<size_t> loads; - size_t load = vd.getDecomposition().getDistribution().getProcessorLoad(); - v_cl.allGather(load,loads); - v_cl.execute(); - - for (size_t i = 0 ; i < loads.size() ; i++) - { - float load_f = load; - float load_fc = loads.get(i); - - BOOST_REQUIRE_CLOSE(load_f,load_fc,10.0); - } - } -} +#include "DLB/LB_Model.hpp" +#include "vector_dist.hpp" +BOOST_AUTO_TEST_SUITE( vector_dist_dlb_test ) -template<typename vector_type> void test_dlb_multi_phase_vector() +template<typename vector_type> +void mp_test_template(vector_type & vd0, vector_type & vd1, vector_type & vd2, vector_type & vd3) { Vcluster & v_cl = create_vcluster(); - if (v_cl.getProcessingUnits() > 8) - return; - - Box<3,float> domain({0.0,0.0,0.0},{1.0,1.0,1.0}); - Ghost<3,float> g(0.1); - size_t bc[3] = {PERIODIC,PERIODIC,PERIODIC}; - - vector_type vd0(0,domain,bc,g,DEC_GRAN(2048)); - vector_type vd1(0,domain,bc,g,DEC_GRAN(2048)); - vector_type vd2(0,domain,bc,g,DEC_GRAN(2048)); - vector_type vd3(0,domain,bc,g,DEC_GRAN(2048)); - // Only processor 0 initialy add particles on a corner of a domain if (v_cl.getProcessUnitID() == 0) @@ -320,6 +211,156 @@ template<typename vector_type> void test_dlb_multi_phase_vector() } } +template<typename vector_type> void test_dlb_vector() +{ + Vcluster & v_cl = create_vcluster(); + + if (v_cl.getProcessingUnits() > 8) + return; + + Box<3,float> domain({0.0,0.0,0.0},{1.0,1.0,1.0}); + Ghost<3,float> g(0.1); + size_t bc[3] = {PERIODIC,PERIODIC,PERIODIC}; + + vector_type vd(0,domain,bc,g,DEC_GRAN(2048)); + + // Only processor 0 initialy add particles on a corner of a domain + + if (v_cl.getProcessUnitID() == 0) + { + for(size_t i = 0 ; i < 50000 ; i++) + { + vd.add(); + + vd.getLastPos()[0] = ((float)rand())/RAND_MAX * 0.3; + vd.getLastPos()[1] = ((float)rand())/RAND_MAX * 0.3; + vd.getLastPos()[2] = ((float)rand())/RAND_MAX * 0.3; + } + } + + vd.map(); + vd.template ghost_get<>(); + + ModelSquare md; + md.factor = 10; + vd.addComputationCosts(md); + vd.getDecomposition().decompose(); + vd.map(); + + + vd.addComputationCosts(md); + + openfpm::vector<size_t> loads; + size_t load = vd.getDecomposition().getDistribution().getProcessorLoad(); + v_cl.allGather(load,loads); + v_cl.execute(); + + for (size_t i = 0 ; i < loads.size() ; i++) + { + float load_f = load; + float load_fc = loads.get(i); + + BOOST_REQUIRE_CLOSE(load_f,load_fc,7.0); + } + + BOOST_REQUIRE(vd.size_local() != 0); + + Point<3,float> v({1.0,1.0,1.0}); + + for (size_t i = 0 ; i < 25 ; i++) + { + // move the particles by 0.1 + + auto it = vd.getDomainIterator(); + + while (it.isNext()) + { + auto p = it.get(); + + vd.getPos(p)[0] += v.get(0) * 0.09; + vd.getPos(p)[1] += v.get(1) * 0.09; + vd.getPos(p)[2] += v.get(2) * 0.09; + + ++it; + } + vd.map(); + + ModelSquare md; + vd.addComputationCosts(md); + vd.getDecomposition().redecompose(200); + vd.map(); + + BOOST_REQUIRE(vd.size_local() != 0); + + vd.template ghost_get<>(); + + vd.addComputationCosts(md); + + openfpm::vector<size_t> loads; + size_t load = vd.getDecomposition().getDistribution().getProcessorLoad(); + v_cl.allGather(load,loads); + v_cl.execute(); + + for (size_t i = 0 ; i < loads.size() ; i++) + { + float load_f = load; + float load_fc = loads.get(i); + + BOOST_REQUIRE_CLOSE(load_f,load_fc,10.0); + } + } +} + + +template<typename vector_type> void test_dlb_multi_phase_vector() +{ + Vcluster & v_cl = create_vcluster(); + + if (v_cl.getProcessingUnits() > 8) + return; + + Box<3,float> domain({0.0,0.0,0.0},{1.0,1.0,1.0}); + Ghost<3,float> g(0.1); + size_t bc[3] = {PERIODIC,PERIODIC,PERIODIC}; + + vector_type vd0(0,domain,bc,g,DEC_GRAN(2048)); + vector_type vd1(0,domain,bc,g,DEC_GRAN(2048)); + vector_type vd2(0,domain,bc,g,DEC_GRAN(2048)); + vector_type vd3(0,domain,bc,g,DEC_GRAN(2048)); + + mp_test_template(vd0,vd1,vd2,vd3); +} + + + +template<typename vector_type> void test_dlb_multi_phase_v_vector() +{ + Vcluster & v_cl = create_vcluster(); + + if (v_cl.getProcessingUnits() > 8) + return; + + Box<3,float> domain({0.0,0.0,0.0},{1.0,1.0,1.0}); + Ghost<3,float> g(0.1); + size_t bc[3] = {PERIODIC,PERIODIC,PERIODIC}; + + openfpm::vector<vector_type> v_phases; + { + vector_type vd0(0,domain,bc,g,DEC_GRAN(2048)); + v_phases.add(vd0); + v_phases.add(vector_type(vd0.getDecomposition(),0)); + v_phases.add(vector_type(vd0.getDecomposition(),0)); + v_phases.add(vector_type(vd0.getDecomposition(),0)); + } + + auto & vd0 = v_phases.get(0); + auto & vd1 = v_phases.get(1); + auto & vd2 = v_phases.get(2); + auto & vd3 = v_phases.get(3); + + mp_test_template(vd0,vd1,vd2,vd3); +} + BOOST_AUTO_TEST_CASE( vector_dist_dlb_test_part ) { test_dlb_vector<vector_dist<3,float,aggregate<float>>>(); @@ -330,9 +371,19 @@ BOOST_AUTO_TEST_CASE( vector_dist_dlb_multi_phase_test_part ) test_dlb_multi_phase_vector<vector_dist<3,float,aggregate<float>>>(); } +BOOST_AUTO_TEST_CASE( vector_dist_dlb_multi_phase_v_test_part ) +{ + test_dlb_multi_phase_v_vector<vector_dist<3,float,aggregate<float>>>(); +} + BOOST_AUTO_TEST_CASE( vector_dist_dlb_metis_test_part ) { - test_dlb_vector<vector_dist<3,float,aggregate<float>,CartDecomposition<3,float,HeapMemory,MetisDistribution<3,float>>>>(); + test_dlb_vector<vector_dist<3, + float, + aggregate<float>, + memory_traits_lin<aggregate<float>>::type, + memory_traits_lin, + CartDecomposition<3,float,HeapMemory,MetisDistribution<3,float>>>>(); } BOOST_AUTO_TEST_SUITE_END() diff --git a/src/Vector/vector_dist_multiphase_functions.hpp b/src/Vector/vector_dist_multiphase_functions.hpp index 3d13593baa561a1cf212a7947c32211c49576eac..c9c9d235f97ccecf54cbd662072edbac4910256a 100644 --- a/src/Vector/vector_dist_multiphase_functions.hpp +++ b/src/Vector/vector_dist_multiphase_functions.hpp @@ -11,9 +11,11 @@ #include "NN/CellList/CellListM.hpp" #include "NN/VerletList/VerletListM.hpp" -template<typename Vector, typename CL, typename T> VerletList<Vector::dims,typename Vector::stype,FAST,shift<Vector::dims,typename Vector::stype>> createVerlet(Vector & v, Vector & v1, CL & cl, T r_cut) +template<typename Vector, typename CL, typename T> +VerletList<Vector::dims,typename Vector::stype,Mem_fast<>,shift<Vector::dims,typename Vector::stype>> +createVerlet(Vector & v, Vector & v1, CL & cl, T r_cut) { - VerletList<Vector::dims,typename Vector::stype,FAST,shift<Vector::dims,typename Vector::stype>> ver; + VerletList<Vector::dims,typename Vector::stype,Mem_fast<>,shift<Vector::dims,typename Vector::stype>> ver; ver.Initialize(cl,r_cut,v.getPosVector(),v1.getPosVector(),v.size_local()); @@ -72,9 +74,11 @@ template<unsigned int nbit, typename Vector, typename T> CellListM<Vector::dims, /////// Symmetric version -template<typename Vector,typename CL, typename T> VerletList<Vector::dims,typename Vector::stype,FAST,shift<Vector::dims,typename Vector::stype>> createVerletSym(Vector & v, Vector & v1, CL & cl, T r_cut) +template<typename Vector,typename CL, typename T> +VerletList<Vector::dims,typename Vector::stype,Mem_fast<>,shift<Vector::dims,typename Vector::stype>> +createVerletSym(Vector & v, Vector & v1, CL & cl, T r_cut) { - VerletList<Vector::dims,typename Vector::stype,FAST,shift<Vector::dims,typename Vector::stype>> ver; + VerletList<Vector::dims,typename Vector::stype,Mem_fast<>,shift<Vector::dims,typename Vector::stype>> ver; ver.Initialize(cl,r_cut,v.getPosVector(),v1.getPosVector(),v.size_local()); @@ -88,7 +92,7 @@ template<unsigned int sh_byte, typename Vector, typename Vector1 ,typename CL, t openfpm::vector<pos_v<Vector::dims,typename Vector::stype>> v_phases; for (size_t i = 0 ; i < phases.size() ; i++) - v_phases.add(pos_v<Vector::dims,typename Vector::stype>(phases.get(i).getPosVector())); + {v_phases.add(pos_v<Vector::dims,typename Vector::stype>(phases.get(i).getPosVector()));} ver.Initialize(cl,pp,r_cut,v.getPosVector(),v_phases,v.size_local(),VL_SYMMETRIC); diff --git a/src/Vector/vector_dist_ofb.hpp b/src/Vector/vector_dist_ofb.hpp index ec56d7bbbeec179e491e876c6cb64ec518fd6e2f..5e17e1ec52b56369d77f54dad6a38107dc3250c8 100644 --- a/src/Vector/vector_dist_ofb.hpp +++ b/src/Vector/vector_dist_ofb.hpp @@ -42,7 +42,7 @@ struct KillParticleWithWarning */ static size_t out(size_t pp_id, size_t p_id) { - std::cerr << "Warning: " << __FILE__ << ":" << __LINE__ << " out of bound particle detected "; + std::cerr << "Warning: " << __FILE__ << ":" << __LINE__ << " out of bound particle detected " << std::endl; return -1; } @@ -78,7 +78,7 @@ struct Error */ static size_t out(size_t pp_id, size_t p_id) { - std::cerr << "Error: " << __FILE__ << ":" << __LINE__ << " out of bound particle detected "; + std::cerr << "Error: " << __FILE__ << ":" << __LINE__ << " out of bound particle detected " << std::endl; exit(-1); diff --git a/src/gargabe.hpp b/src/gargabe.hpp index a82d59ed33d473deeee74dafadc2a1e9e0c9895a..9f9fbe2778e6b1f49b66e083d0f0e3ee5a404196 100644 --- a/src/gargabe.hpp +++ b/src/gargabe.hpp @@ -1159,4 +1159,67 @@ fix_ie_g_box(); ////////////////////// +/* Point<dim,long int> p; + p.get(0) = 0; + p.get(1) = 81; + p.get(2) = 79; + if (ib.isInside(p)) + { + int debug = 0; + debug++; + } + + for (size_t i = 0 ; i < dim ; i++) + { + if (sub_domain.getLow(i) == ib_dom.getLow(i) && + (sub_domain_other.getHigh(i) == sub_domain.getLow(i) || cmb.c[i] == 1)) + { + if (g.getHigh(i) != INVALID_GHOST && (ib.getHigh(i) - ib.getLow(i) + 1) > g.getHigh(i)) + { + ib.setHigh(i,ib.getLow(i) + g.getHigh(i) - 1); + } + } + + if (sub_domain.getHigh(i) == ib_dom.getHigh(i) && + (sub_domain_other.getLow(i) == sub_domain.getHigh(i) || cmb.c[i] == 1)) + { + if (g.getLow(i) != -INVALID_GHOST && (ib.getHigh(i) - ib.getLow(i) + 1) > abs(g.getLow(i))) + { + ib.setLow(i, g.getHigh(i) - g.getLow(i) + 1); + } + } + } + + // This is a special case because a domain intersect itself by + // periodicity + if (sub_domain == sub_domain_other) + { + for (size_t i = 0 ; i < dim ; i++) + { + if (sub_domain.getLow(i) == ib_dom.getLow(i) && + sub_domain.getLow(i) == domain.getLow(i) && + sub_domain_other.getHigh(i) == domain.getHigh(i) && + cmb.c[i] == 1) + { + if (g.getHigh(i) != INVALID_GHOST && (ib.getHigh(i) - ib.getLow(i) + 1) > g.getHigh(i)) + { + ib.setHigh(i,ib.getLow(i) + g.getHigh(i) - 1); + } + } + + if (sub_domain.getHigh(i) == ib_dom.getHigh(i) && + sub_domain.getHigh(i) == domain.getHigh(i) && + sub_domain_other.getLow(i) == sub_domain.getHigh(i) && + cmb.c[i] == -1) + { + if (g.getLow(i) != -INVALID_GHOST && (ib.getHigh(i) - ib.getLow(i) + 1) > abs(g.getLow(i))) + { + ib.setLow(i, g.getHigh(i) - g.getLow(i) + 1); + } + } + } + }*/ + +////////////////////////// + #endif /* GARGABE_HPP_ */ diff --git a/src/main.cpp b/src/main.cpp index 0c2e4403cf2ddb336370e7cd6fd4dd745775f114..64710443d94f49dced0147604d52275ea5872273 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -42,12 +42,9 @@ int main(int argc, char* argv[]) #include "unit_test_init_cleanup.hpp" #include "Graph/CartesianGraphFactory_unit_test.hpp" -#include "Decomposition/CartDecomposition_unit_test.hpp" #include "Decomposition/ORB_unit_test.hpp" #include "Decomposition/Distribution/metis_util_unit_test.hpp" #include "Decomposition/dec_optimizer_unit_test.hpp" -#include "Vector/vector_dist_unit_test.hpp" -#include "Vector/vector_dist_HDF5_chckpnt_restart_test.hpp" #include "Grid/grid_dist_id_HDF5_chckpnt_restart_test.hpp" #include "Decomposition/Distribution/Distribution_unit_tests.hpp" #include "Grid/Iterators/grid_dist_id_iterators_unit_tests.hpp" @@ -56,7 +53,6 @@ int main(int argc, char* argv[]) #include "Graph/DistGraphFactory.hpp" #include "Decomposition/nn_processor_unit_test.hpp" #include "Grid/staggered_grid_dist_unit_test.hpp" -#include "Vector/vector_dist_MP_unit_tests.hpp" #include "Vector/se_class3_vector_unit_tests.hpp" #include "Vector/vector_dist_dlb_test.hpp" #include "Decomposition/Domain_NN_calculator_cart_unit_test.hpp"