Commit 3d995585 authored by incardon's avatar incardon
Browse files

Fixing conflicts

parents 7bc57fc2 b152d3b7
# Change Log
All notable changes to this project will be documented in this file.
## [1.0.0] 13 September 2017
## [1.1.0] February 2018
### Added
- Interface for Multi-vector dynamic load balancing
- Increaded performance for grid ghost get
- Introduced forms to increase the performance of the grid iterator in case of stencil code (see example 5_GrayScott)
- EMatrix wrapped eigen matrices compatibles with vector_dist_id
- General tuning for high dimension vector_dist_id (up to 50 dimensions) + PS_CMA_ES (Particle-Swarm Covariant Matrix Adaptation Evolution Strategy) example in Numerics
- Added Discrete element Method example (8_DEM)
- Added serial_to_parallel example VCluster (2_serial_to_parallel). The example it show how to port a serial example into openfpm gradually swtiching from
a serial section to a parallel section
- Introduced map(LOCAL) for fast communication in case we have small movement
### Fixed
- Installation/detection of PETSC
- CRITICAL-BUG scalar product in combination with vector product is broken (it return 0)
- Fixing 2D IO in binary for vector
- Fixing 1D grid writer in ASCII mode
- Fixing Intel compilation of Linear algebra
## [1.0.0] 13 September 2017 (Codename: Vortex)
### Added
- Introduced getDomainIterator for Cell-list
......
......@@ -81,12 +81,8 @@ then
source /etc/profile
echo "$PATH"
module load eigen/3.2.0
module load suitesparse/4.2.1-gnu-multimkl
module load boost/1.60.0
module load gcc/5.3.0
module load openmpi/1.10.2-gnu
module unload bullxmpi
module load gcc/7.1.0
module load openmpi/3.0.0-gnu7.1
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/home/incard/PARMETIS/lib:/home/incard/METIS/lib:/home/incard/HDF5/lib"
......@@ -105,20 +101,27 @@ else
echo "Compiling general"
source ~/.bashrc
installation_dir=""
if [ x"$2" == x"sbalzarini-mac-15" ]; then
installation_dir="--prefix=/Users/jenkins/openfpm_install"
else
installation_dir="--prefix=/home/jenkins/openfpm_install"
fi
mkdir $HOME/$branch
if [ x"$4" == x"full" ]; then
./install -i $HOME/$branch -s -c "--prefix=/Users/jenkins/openfpm_install"
./install -i $HOME/$branch -s -c "$installation_dir"
mv $HOME/openfpm_vars $HOME/openfpm_vars_$branch
source $HOME/openfpm_vars_$branch
elif [ x"$3" == x"numerics" ]; then
branch=$(git ls-remote --heads origin | grep $(git rev-parse HEAD) | cut -d / -f 3)
./install -i $HOME/$branch -m -s -c "--prefix=/home/jenkins/openfpm_install"
./install -i $HOME/$branch -m -s -c "$installation_dir"
mv $HOME/openfpm_vars $HOME/openfpm_vars_$branch
source $HOME/openfpm_vars_$branch
make $3
else
./install -i $HOME/$branch -m -s -c "--prefix=/Users/jenkins/openfpm_install --no-recursion"
./install -i $HOME/$branch -m -s -c "$installation_dir --no-recursion"
mv $HOME/openfpm_vars $HOME/openfpm_vars_$branch
source $HOME/openfpm_vars_$branch
make $3
......
......@@ -18,6 +18,7 @@ if [ "$2" == "windows10" ]; then
branch=$(git ls-remote --heads origin | grep $(git rev-parse HEAD) | cut -d / -f 3)
git pull origin master
./install -i "/home/jenkins/$branch" < input_install
fi
......
......@@ -64,8 +64,7 @@ INCLUDES_PATH=" "
echo "$base" > install_dir
# Needed for build library
AC_PROG_RANLIB
AM_PROG_AR
LT_INIT
# Checks for programs.
AC_PROG_CXX
......@@ -139,14 +138,6 @@ fi
AX_LIB_HILBERT([],[echo "Cannot detect libhilbert, use the --with-libhilbert option if it is not installed in the default location"
exit 210])
##########
## Check for PETSC
AX_LIB_PETSC()
## Check for quadmath
have_quad_lib=no
have_quad_head=no
......@@ -245,6 +236,7 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([])],
)
AC_LANG_POP([C++])
CXXFLAGS="$my_save_cflags"
AC_SUBST([AM_CXXFLAGS])
......@@ -377,6 +369,12 @@ AX_LAPACK([],[])
AX_SUITESPARSE([],[])
##########
## Check for PETSC
AX_LIB_PETSC()
###### Checking for EIGEN
AX_EIGEN([],[])
......
master
......@@ -17,7 +17,6 @@
#define SE_CLASS3
#define THROW_ON_ERROR
#include "Memleak_check.hpp"
#include "data_type/scalar.hpp"
#include "Grid/grid_dist_id.hpp"
#include "Decomposition/CartDecomposition.hpp"
#include "Point_test.hpp"
......@@ -72,7 +71,7 @@ int main(int argc, char* argv[])
// * g: ghost extension
//
//
grid_dist_id<3, float, scalar<float[3]>, CartDecomposition<3,float>> * g_dist = new grid_dist_id<3, float, scalar<float[3]>, CartDecomposition<3,float>>(sz,domain,g);
grid_dist_id<3, float, aggregate<float[3]>, CartDecomposition<3,float>> * g_dist = new grid_dist_id<3, float, aggregate<float[3]> >(sz,domain,g);
//
// ### WIKI 6 ###
......
......@@ -9,6 +9,7 @@
* \subpage Grid_2_solve_eq
* \subpage Grid_3_gs
* \subpage Grid_3_gs_3D
* \subpage Grid_3_gs_3D_vector
*
*/
......
......@@ -321,7 +321,7 @@ int main(int argc, char* argv[])
// visualization
if (i % 100 == 0)
{
Old.write("output",count);
Old.write_frame("output",count);
count++;
}
}
......
......@@ -7,7 +7,7 @@ LDIR =
OBJ = main.o
%.o: %.cpp
$(CC) -O3 -c --std=c++11 -o $@ $< $(INCLUDE_PATH)
$(CC) -O3 -g -c --std=c++11 -o $@ $< $(INCLUDE_PATH)
gray_scott: $(OBJ)
$(CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS)
......
......@@ -6,6 +6,8 @@
*
* \page Grid_3_gs_3D Gray Scott in 3D
*
* [TOC]
*
* # Solving a gray scott-system in 3D # {#e3_gs_gray_scott}
*
* This example is just an extension of the 2D Gray scott example.
......@@ -17,9 +19,25 @@
* <img src="http://ppmcore.mpi-cbg.de/web/images/examples/gray_scott_3d/gs_alpha.png"/>
* \endhtmlonly
*
* More or less this example is the adaptation of the previous example to 3D
* with the improvement of using stencil iterator.
*
* ## Stencil iterator {#e3_gs_grat_scott_si}
*
* Stencil iterator require that you define a stencil,
*
* \snippet Grid/3_gray_scott_3d/main.cpp stencil def
*
* once is defined it is
* possible get and use a stencil iterator
*
* \snippet Grid/3_gray_scott_3d/main.cpp stencil get and use
*
* The rest of the example remain the same with the exception
* that the code has been extended in 3D.
*
* \see \ref Grid_2_solve_eq
*
* \snippet Grid/3_gray_scott/main.cpp constants
*
*/
......@@ -107,29 +125,13 @@ int main(int argc, char* argv[])
size_t timeSteps = 5000;
// K and F (Physical constant in the equation)
double K = 0.014;
double F = 0.053;
//! \cond [init lib] \endcond
/*!
* \page Grid_3_gs_3D Gray Scott in 3D
*
* Here we create 2 distributed grid in 2D Old and New. In particular because we want that
* the second grid is distributed across processors in the same way we pass the decomposition
* of the Old grid to the New one in the constructor with **Old.getDecomposition()**. Doing this,
* we force the two grid to have the same decomposition.
*
* \snippet Grid/3_gray_scott/main.cpp init grid
*
*/
//! \cond [init grid] \endcond
double K = 0.053;
double F = 0.014;
grid_dist_id<3, double, aggregate<double,double>> Old(sz,domain,g,bc);
// New grid with the decomposition of the old grid
grid_dist_id<3, double, aggregate<double,double>> New(Old.getDecomposition(),sz,g);
grid_dist_id<3, double, aggregate<double,double>> New(Old.getDecomposition(),sz,g);
// spacing of the grid on x and y
......@@ -149,46 +151,71 @@ int main(int argc, char* argv[])
timer tot_sim;
tot_sim.start();
//! \cond [stencil def] \endcond
static grid_key_dx<3> star_stencil_3D[7] = {{0,0,0},
{0,0,-1},
{0,0,1},
{0,-1,0},
{0,1,0},
{-1,0,0},
{1,0,0}};
//! \cond [stencil def] \endcond
for (size_t i = 0; i < timeSteps; ++i)
{
if (i % 300 == 0)
std::cout << "STEP: " << i << std::endl;
{std::cout << "STEP: " << i << std::endl;}
//! \cond [stencil get and use] \endcond
auto it = Old.getDomainIterator();
auto it = Old.getDomainIteratorStencil(star_stencil_3D);
while (it.isNext())
{
auto key = it.get();
// center point
auto Cp = it.getStencil<0>();
// plus,minus X,Y,Z
auto mx = it.getStencil<1>();
auto px = it.getStencil<2>();
auto my = it.getStencil<3>();
auto py = it.getStencil<4>();
auto mz = it.getStencil<5>();
auto pz = it.getStencil<6>();
// update based on Eq 2
New.get<U>(key) = Old.get<U>(key) + uFactor * (
Old.get<U>(key.move(x,1)) +
Old.get<U>(key.move(x,-1)) +
Old.get<U>(key.move(y,1)) +
Old.get<U>(key.move(y,-1)) +
Old.get<U>(key.move(z,1)) +
Old.get<U>(key.move(z,-1)) -
6.0*Old.get<U>(key)) +
- deltaT * Old.get<U>(key) * Old.get<V>(key) * Old.get<V>(key) +
- deltaT * F * (Old.get<U>(key) - 1.0);
New.get<U>(Cp) = Old.get<U>(Cp) + uFactor * (
Old.get<U>(mz) +
Old.get<U>(pz) +
Old.get<U>(my) +
Old.get<U>(py) +
Old.get<U>(mx) +
Old.get<U>(px) -
6.0*Old.get<U>(Cp)) +
- deltaT * Old.get<U>(Cp) * Old.get<V>(Cp) * Old.get<V>(Cp) +
- deltaT * F * (Old.get<U>(Cp) - 1.0);
// update based on Eq 2
New.get<V>(key) = Old.get<V>(key) + vFactor * (
Old.get<V>(key.move(x,1)) +
Old.get<V>(key.move(x,-1)) +
Old.get<V>(key.move(y,1)) +
Old.get<V>(key.move(y,-1)) +
Old.get<V>(key.move(z,1)) +
Old.get<V>(key.move(z,-1)) -
6*Old.get<V>(key)) +
deltaT * Old.get<U>(key) * Old.get<V>(key) * Old.get<V>(key) +
- deltaT * (F+K) * Old.get<V>(key);
New.get<V>(Cp) = Old.get<V>(Cp) + vFactor * (
Old.get<V>(mz) +
Old.get<V>(pz) +
Old.get<V>(my) +
Old.get<V>(py) +
Old.get<V>(mx) +
Old.get<V>(px) -
6*Old.get<V>(Cp)) +
deltaT * Old.get<U>(Cp) * Old.get<V>(Cp) * Old.get<V>(Cp) +
- deltaT * (F+K) * Old.get<V>(Cp);
// Next point in the grid
++it;
}
//! \cond [stencil get and use] \endcond
// Here we copy New into the old grid in preparation of the new step
// It would be better to alternate, but using this we can show the usage
// of the function copy. To note that copy work only on two grid of the same
......@@ -199,11 +226,11 @@ int main(int argc, char* argv[])
// After copy we synchronize again the ghost part U and V
Old.ghost_get<U,V>();
// Every 30 time step we output the configuration for
// Every 500 time step we output the configuration for
// visualization
if (i % 60 == 0)
if (i % 500 == 0)
{
Old.write_frame("output",count,VTK_WRITER | FORMAT_BINARY);
Old.save("output_" + std::to_string(count));
count++;
}
}
......@@ -229,4 +256,13 @@ int main(int argc, char* argv[])
openfpm_finalize();
//! \cond [finalize] \endcond
/*!
* \page Grid_3_gs_3D Gray Scott in 3D
*
* # Full code # {#code}
*
* \include Grid/3_gray_scott_3d/main.cpp
*
*/
}
include ../../example.mk
CC=mpic++
LDIR =
OBJ = main.o update_new.o
%.o: %.f90
mpif90 -ffree-line-length-none -fno-range-check -fno-second-underscore -fimplicit-none -mavx -O3 -c -g -o $@ $<
%.o: %.cpp
$(CC) -O3 -mavx -g -c --std=c++11 -Wno-ignored-attributes -o $@ $< $(INCLUDE_PATH) -I/where/is/vc/installation/include
gray_scott: $(OBJ)
$(CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS) -L/where/is/vc/installation/lib # -lVc (Add -lVc if you use VCDevel)
all: gray_scott
run: all
mpirun -np 4 ./gray_scott
.PHONY: clean all run
clean:
rm -f *.o *~ core gray_scott
[pack]
files = main.cpp update_new.f90 Makefile
#include "Grid/grid_dist_id.hpp"
#include "data_type/aggregate.hpp"
#include "timer.hpp"
#define FORTRAN_UPDATE
#ifndef FORTRAN_UPDATE
#include "Vc/Vc"
#endif
/*!
*
* \page Grid_3_gs_3D_vector Gray Scott in 3D fast implementation with vectorization
*
* # Solving a gray scott-system in 3D # {#e3_gs_gray_scott_vector}
*
* This example is just an improved version of the previous 3D Gray scott example.
* It can use VCDevel library for vectorization in C++ or Fortran multi-array code update. For the first case
* the library VCDevel must be installed. It can be installed using the command ./script/install_VCDEVEL.sh
* /where/are/the/dependencies/directory and changing the Makefile to include the VCDevel library
* like show in the Makefile. By default this example use fortran update because does not require
* external libraries.
* In particular we do the following improvements we separate U and V in two grids
* in order to vectorize. Every loop now handle 4 double in case of AVX-256 and 2 double
* in case of SSE. We also avoid to use the function copy and we alternate the use of the
* fields New and Old. If at the first iteration we read from Old and we write on New in
* the second iteration we read from New and we write on Old. The last improvement is write
* on hdf5 rather that VTK. VTK writers are convenient but are slow for performances. HDF5
* files can be saved with **save()** reload with **load()** and after loading can be written
* on VTK with **write** this mean that HDF5 files can be easily converted into VTK in a second moment.
* Not only but because HDF5 files can be saved on multiple processors and reloaded on a different
* number of processors, you can use this method to stitch VTK files together.
*
*
* In figure is the final solution of the problem
*
* \htmlonly
* <img src="http://ppmcore.mpi-cbg.de/web/images/examples/gray_scott_3d/gs_alpha.png"/>
* \endhtmlonly
*
* \see \ref Grid_2_solve_eq
*
* \snippet Grid/3_gray_scott_3d_vectorization/main.cpp constants
*
*/
//! \cond [constants] \endcond
constexpr int x = 0;
constexpr int y = 1;
constexpr int z = 2;
extern "C" void update_new(const int* lo, const int* hi,
double* u, const int* ulo, const int* uhi,
double* v, const int* vlo, const int* vhi,
double* flu, const int* fulo, const int* fuhi,
double* flv, const int* fvlo, const int* fvhi,
const double * dt, const double * uFactor, const double * vFactor, const double * F,
const double * K);
//! \cond [constants] \endcond
void init(grid_dist_id<3,double,aggregate<double> > & OldU,
grid_dist_id<3,double,aggregate<double> > & OldV,
grid_dist_id<3,double,aggregate<double> > & NewU,
grid_dist_id<3,double,aggregate<double> > & NewV,
Box<3,double> & domain)
{
auto it = OldU.getDomainIterator();
while (it.isNext())
{
// Get the local grid key
auto key = it.get();
// Old values U and V
OldU.get(key) = 1.0;
OldV.get(key) = 0.0;
// Old values U and V
NewU.get(key) = 0.0;
NewV.get(key) = 0.0;
++it;
}
long int x_start = OldU.size(0)*1.55f/domain.getHigh(0);
long int y_start = OldU.size(1)*1.55f/domain.getHigh(1);
long int z_start = OldU.size(1)*1.55f/domain.getHigh(2);
long int x_stop = OldU.size(0)*1.85f/domain.getHigh(0);
long int y_stop = OldU.size(1)*1.85f/domain.getHigh(1);
long int z_stop = OldU.size(1)*1.85f/domain.getHigh(2);
grid_key_dx<3> start({x_start,y_start,z_start});
grid_key_dx<3> stop ({x_stop,y_stop,z_stop});
auto it_init = OldU.getSubDomainIterator(start,stop);
while (it_init.isNext())
{
auto key = it_init.get();
OldU.get(key) = 0.5 + (((double)std::rand())/RAND_MAX -0.5)/10.0;
OldV.get(key) = 0.25 + (((double)std::rand())/RAND_MAX -0.5)/20.0;
++it_init;
}
}
//! \cond [vectorization] \endcond
void step(grid_dist_id<3, double, aggregate<double>> & OldU,
grid_dist_id<3, double, aggregate<double>> & OldV,
grid_dist_id<3, double, aggregate<double>> & NewU,
grid_dist_id<3, double, aggregate<double>> & NewV,
grid_key_dx<3> (& star_stencil_3D)[7],
double uFactor_s, double vFactor_s, double deltaT, double F, double K)
{
#ifndef FORTRAN_UPDATE
//! \cond [cpp_update] \endcond
Vc::double uFactor = uFactor_s;
Vc::double vFactor = vFactor_s;
WHILE_M(OldU,star_stencil_3D)
auto & U_old = GET_GRID_M(OldU);
auto & V_old = GET_GRID_M(OldV);
auto & U_new = GET_GRID_M(NewU);
auto & V_new = GET_GRID_M(NewV);
ITERATE_3D_M(Vc::double_v::Size)
// center point
auto Cp = it.getStencil<0>();
// plus,minus X,Y,Z
auto mx = it.getStencil<1>();
auto px = it.getStencil<2>();
auto my = it.getStencil<3>();
auto py = it.getStencil<4>();
auto mz = it.getStencil<5>();
auto pz = it.getStencil<6>();
//
Vc::double_v u_c(&U_old.get<0>(Cp),Vc::Unaligned);
Vc::double_v u_mz(&U_old.get<0>(mz),Vc::Unaligned);
Vc::double_v u_pz(&U_old.get<0>(pz),Vc::Unaligned);
Vc::double_v u_my(&U_old.get<0>(my),Vc::Unaligned);
Vc::double_v u_py(&U_old.get<0>(py),Vc::Unaligned);
Vc::double_v u_mx(&U_old.get<0>(mx),Vc::Unaligned);
Vc::double_v u_px(&U_old.get<0>(px),Vc::Unaligned);
Vc::double_v v_c(&V_old.get<0>(Cp),Vc::Unaligned);
Vc::double_v v_mz(&V_old.get<0>(mz),Vc::Unaligned);
Vc::double_v v_pz(&V_old.get<0>(pz),Vc::Unaligned);
Vc::double_v v_my(&V_old.get<0>(my),Vc::Unaligned);
Vc::double_v v_py(&V_old.get<0>(py),Vc::Unaligned);
Vc::double_v v_mx(&V_old.get<0>(mx),Vc::Unaligned);
Vc::double_v v_px(&V_old.get<0>(px),Vc::Unaligned);
Vc::double_v out1 = u_c + uFactor * (u_mz + u_pz +
u_my + u_py +
u_mx + u_px +
- 6.0 * u_c) +
- deltaT * u_c * v_c * v_c
- deltaT * F * (u_c - 1.0);
Vc::double_v out2 = v_c + vFactor * (v_mz + v_pz +
v_my + v_py +
v_mx + v_px +
- 6.0 * v_c ) +
deltaT * u_c * v_c * v_c +
- deltaT * (F+K) * v_c;
out1.store(&U_new.get<0>(Cp),Vc::Unaligned);
out2.store(&V_new.get<0>(Cp),Vc::Unaligned);
END_LOOP_M(Vc::double_v::Size)
//! \cond [cpp_update] \endcond
#else