From ce8eb6ed51e6906b27facdb6b9a4183f6654faea Mon Sep 17 00:00:00 2001 From: Pietro Incardona <incardon@mpi-cbg.de> Date: Wed, 20 Dec 2017 12:12:46 +0100 Subject: [PATCH] benchmark paper with C++ macro for fast stencil --- .../3_gray_scott_3d_vectorization/Makefile | 2 +- .../3_gray_scott_3d_vectorization/main.cpp | 65 ++++++++++++------- .../Iterators/grid_dist_id_iterator_sub.hpp | 35 ++++++++++ 3 files changed, 76 insertions(+), 26 deletions(-) diff --git a/example/Grid/3_gray_scott_3d_vectorization/Makefile b/example/Grid/3_gray_scott_3d_vectorization/Makefile index 68d37df8c..9488bea01 100644 --- a/example/Grid/3_gray_scott_3d_vectorization/Makefile +++ b/example/Grid/3_gray_scott_3d_vectorization/Makefile @@ -10,7 +10,7 @@ OBJ = main.o update_new.o mpif90 -ffree-line-length-none -fno-range-check -fno-second-underscore -fimplicit-none -mavx -O3 -c -g -o $@ $< %.o: %.cpp - $(CC) -O3 -mavx -g -c --std=c++11 -Wno-ignored-attributes -o $@ $< $(INCLUDE_PATH) -I/scratch/p_ppm/VCDEVEL/include + $(CC) -O3 -mavx -g -c --std=c++11 -Wno-ignored-attributes -o $@ $< $(INCLUDE_PATH) -I/home/i-bird/VC/include gray_scott: $(OBJ) $(CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS) -L/home/i-bird/VC/lib -lVc diff --git a/example/Grid/3_gray_scott_3d_vectorization/main.cpp b/example/Grid/3_gray_scott_3d_vectorization/main.cpp index d99e431b0..db8f5ea04 100644 --- a/example/Grid/3_gray_scott_3d_vectorization/main.cpp +++ b/example/Grid/3_gray_scott_3d_vectorization/main.cpp @@ -36,7 +36,7 @@ //! \cond [constants] \endcond -#define FORTRAN_UPDATE +//#define FORTRAN_UPDATE constexpr int x = 0; constexpr int y = 1; @@ -111,18 +111,17 @@ void step(grid_dist_id<3, double, aggregate<double>> & OldU, Vc::double_v uFactor, Vc::double_v vFactor, double deltaT, double F, double K) { #ifndef FORTRAN_UPDATE - for (size_t i = 0 ; i < OldU.getN_loc_grid() ; i++) - { - auto & U_old = OldU.get_loc_grid(i); - auto & V_old = OldV.get_loc_grid(i); - auto & U_new = NewU.get_loc_grid(i); - auto & V_new = NewV.get_loc_grid(i); + //! \cond [cpp_update] \endcond - auto it = OldU.get_loc_grid_iterator_stencil(i,star_stencil_3D); + WHILE_M(OldU,star_stencil_3D) + auto & U_old = GET_GRID_M(OldU); + auto & V_old = GET_GRID_M(OldV); + + auto & U_new = GET_GRID_M(NewU); + auto & V_new = GET_GRID_M(NewV); + ITERATE_3D_M - while (it.isNext()) - { // center point auto Cp = it.getStencil<0>(); @@ -168,13 +167,14 @@ void step(grid_dist_id<3, double, aggregate<double>> & OldU, out1.store(&U_new.get<0>(Cp),Vc::Unaligned); out2.store(&V_new.get<0>(Cp),Vc::Unaligned); + END_LOOP_M + + //! \cond [cpp_update] \endcond - // Next point in the grid - it += Vc::double_v::Size; - } - } #else + //! \cond [fort_update] \endcond + double uFactor_s = uFactor[0]; double vFactor_s = vFactor[0]; @@ -209,6 +209,8 @@ void step(grid_dist_id<3, double, aggregate<double>> & OldU, &deltaT, &uFactor_s, &vFactor_s,&F,&K); } + //! \cond [fort_update] \endcond + #endif } @@ -306,19 +308,30 @@ int main(int argc, char* argv[]) * \page Grid_3_gs_3D_vector Gray Scott in 3D fast implementation with vectorization * * Alternate New and Old field to run one step, switch between old and new if the iteration - * is even or odd. The function step is nothing else than the the implementation of Gray-Scott - * 3D in the previous example. + * is even or odd. The function step is nothing else than the implementation of Gray-Scott + * 3D in the previous example but in a more optimized way. * * \snippet Grid/3_gray_scott_3d_vectorization/main.cpp alternate * + * In this function we show two methods to optimize this function. * - * The function has two main changes. The first is that we iterate - * across local grids rather than a performance improvement is a convenient way - * in case we have a lot of grid in this case we moved from 3 grid Old and New - * to 4 grids. The second improvement is using Vc::double_v instead of double to - * vectorize the code. + * * We can use the macro **WHILE_M** passing the stencil definition, **ITERATE_3D** to define the loop, + * **END_LOOP** to close the loop, and use the function + * function **getStencil<0>()** to retrieve the stencil points. Additionaly we can use Vc::double_v instead + * of double to vectorize the code. This method give the advantage to keep all the + * code in C++. * - * \snippet Grid/3_gray_scott_3d_vectorization/main.cpp vectorization + * \snippet Grid/3_gray_scott_3d_vectorization/main.cpp cpp_update + * + * * Another possibility is to use FORTRAN. Because FORTRAN has better + * support for multi dimensional array another possibility is to process each local grid using + * FORTRAN, this also give us the opportunity to show hybrid code. We can switch between + * one and the other method commenting + * and uncommeting the line #define FORTRAN_UPDATE in the code. + * + * \snippet Grid/3_gray_scott_3d_vectorization/main.cpp fort_update + * + * \include Grid/3_gray_scott_3d_vectorization/update_new.f90 * */ @@ -352,10 +365,10 @@ int main(int argc, char* argv[]) //! \cond [save hdf5] \endcond - // Every 500 time step we output the configuration on hdf5 + // Every 2000 time step we output the configuration on hdf5 if (i % 2000 == 0) { -// OldU.save("output_u_" + std::to_string(count)); + OldU.save("output_u_" + std::to_string(count)); OldV.save("output_v_" + std::to_string(count)); count++; } @@ -364,7 +377,9 @@ int main(int argc, char* argv[]) } tot_sim.stop(); - std::cout << "Total simulation: " << tot_sim.getwct() << std::endl; + + if (create_vcluster().rank() == 0) + {std::cout << "Total simulation: " << tot_sim.getwct() << std::endl;} // We frite the final configuration OldV.write("final"); diff --git a/src/Grid/Iterators/grid_dist_id_iterator_sub.hpp b/src/Grid/Iterators/grid_dist_id_iterator_sub.hpp index a1730e5dc..c249787f2 100644 --- a/src/Grid/Iterators/grid_dist_id_iterator_sub.hpp +++ b/src/Grid/Iterators/grid_dist_id_iterator_sub.hpp @@ -249,6 +249,41 @@ class grid_dist_iterator_sub { return stop; } + + /*! \brief Return the number of local grids + * + * + */ + inline size_t N_loc_grid() + { + return gList.size(); + } + + /*! \brief Return the component j of the starting point (P1) of the domain part + * for the local grid i + * + * \param i local grid + * \param j dimension + * + * + */ + inline size_t loc_grid_info_start(size_t i,size_t j) + { + return gdb_ext.get(i).DBox.getLow(i); + } + + /*! \brief Return the component j of the stop point (P2) of the domain part + * for the local grid i + * + * \param i local grid + * \param j dimension + * + * + */ + inline size_t loc_grid_info_size(size_t i,size_t j) + { + return gdb_ext.get(i).GDBox.getHigh(i); + } }; #endif /* SRC_GRID_GRID_DIST_ID_ITERATOR_SUB_HPP_ */ -- GitLab