Commit 61e2d5e0 authored by incardon's avatar incardon
Browse files

Fixing Allocations Free of memory

parent e9155785
Pipeline #462 failed with stages
in 4 seconds
# Change Log
All notable changes to this project will be documented in this file.
## [2.0.0] December 2018
### Added
- Adding GPU support (see example 3_molecular_dynamic_gpu)
### Changed
- The type Vcluster now is templated and the standard Vcluster is Vcluster<>
## [1.1.0] February 2018
### Added
......
......@@ -388,6 +388,9 @@ int main(int argc, char* argv[])
++it;
}
vd.map();
vd.ghost_get<>();
//! \cond [vect grid] \endcond
/*!
......
......@@ -53,7 +53,7 @@ int main(int argc, char* argv[])
openfpm_init(&argc,&argv);
Vcluster & vcl = create_vcluster();
Vcluster<> & vcl = create_vcluster();
size_t sz[3] = {10,10,10};
Box<3,float> box({0.0,0.0,0.0},{1.0,1.0,1.0});
......
......@@ -272,7 +272,7 @@ int main(int argc, char* argv[])
openfpm::vector<openfpm::vector<double>> y;
openfpm_init(&argc,&argv);
Vcluster & v_cl = create_vcluster();
Vcluster<> & v_cl = create_vcluster();
// we will use it do place particles on a 10x10x10 Grid like
size_t sz[3] = {10,10,10};
......
include ../../example.mk
CC_SCOREP=scorep --nocompiler --cuda --mpp=mpi nvcc
CC=${CC_SCOREP}
LDIR =
OBJ = main.o
all: md_dyn
%.o: %.cu
$(CC) -O3 -g -c -isystem=/home/i-bird/MPI/include --std=c++11 -o $@ $< $(INCLUDE_PATH_NVCC)
md_dyn: $(OBJ)
$(CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH_NVCC) $(LIBS) -L/home/i-bird/MPI/lib -L/usr/local/cuda/lib64 -lcudart -lmpi -L/usr/local/cuda/extras/CUPTI/lib64 -lhdf5
run: all
mpirun -np 3 ./md_dyn && mpirun -np 3 ./md_dyn_expr && mpirun -np 3 ./md_dyn_vl;
.PHONY: clean all run
clean:
rm -f *.o *~ core md_dyn md_dyn_expr md_dyn_vl
/*! \page Vector_3_md Vector 3 molecular dynamic
*
* \subpage Vector_3_md_dyn
* \subpage Vector_3_md_vl
*
*/
#include "Vector/vector_dist.hpp"
#include "Plot/GoogleChart.hpp"
#include "Plot/util.hpp"
#include "timer.hpp"
typedef float real_number;
/*!
* \page Vector_3_md_dyn Vector 3 molecular dynamic with cell-list on GPU
*
* [TOC]
*
* # Molecular Dynamic with Lennard-Jones potential # {#e3_md}
*
* This example show a simple Lennard-Jones molecular dynamic simulation in a stable regime.
* Particle feel each other by the potential. In this example we are going to use GPU instead of CPU
*
* \f$ V(x_p,x_q) = 4( (\frac{\sigma}{r})^{12} - (\frac{\sigma}{r})^6 ) \f$
*
* ## Constants ##
*
* Here we define some useful constants
*
* \snippet Vector/3_molecular_dynamic/main.cpp constants
*
*/
//! \cond [constants] \endcond
constexpr int velocity = 0;
constexpr int force = 1;
constexpr int energy = 2;
//! \cond [constants] \endcond
/*!
*
* \page Vector_3_md_dyn Vector 3 molecular dynamic with cell-list
*
* ## Calculate forces ## {#e3_md_cf}
*
* In this function we calculate the forces between particles. It require the vector of particles
* Cell list and scaling factor for the Lennard-Jhones potential.
*
* \snippet Vector/3_molecular_dynamic/main.cpp calc forces
* \snippet Vector/3_molecular_dynamic/main.cpp calc forces2
*
*
* In the following we are going into the detail of this function
*
*/
//! \cond [calc forces] \endcond
template<typename vector_dist_type,typename NN_type>
__global__ void calc_force_gpu(vector_dist_type vd, NN_type NN, real_number sigma12, real_number sigma6, real_number r_cut2)
{
auto p = GET_PARTICLE(vd);
// Get the position xp of the particle
Point<3,real_number> xp = vd.getPos(p);
// Reset the force counter
vd.template getProp<force>(p)[0] = 0.0;
vd.template getProp<force>(p)[1] = 0.0;
vd.template getProp<force>(p)[2] = 0.0;
// Get an iterator over the neighborhood particles of p
auto Np = NN.getNNIterator(NN.getCell(vd.getPos(p)));
// For each neighborhood particle ...
while (Np.isNext())
{
// ... q
auto q = Np.get();
// if (p == q) skip this particle
if (q == p) {++Np; continue;};
// Get the position of p
Point<3,real_number> xq = vd.getPos(q);
// Get the distance between p and q
Point<3,real_number> r = xp - xq;
// take the norm of this vector
real_number rn = norm2(r);
if (rn > r_cut2)
{++Np; continue;};
// Calculate the force, using pow is slower
Point<3,real_number> f = 24.0*(2.0 *sigma12 / (rn*rn*rn*rn*rn*rn*rn) - sigma6 / (rn*rn*rn*rn)) * r;
// we sum the force produced by q on p
vd.template getProp<force>(p)[0] += f.get(0);
vd.template getProp<force>(p)[1] += f.get(1);
vd.template getProp<force>(p)[2] += f.get(2);
// Next neighborhood
++Np;
}
}
template<typename vector_dist_type>
__global__ void update_velocity_position(vector_dist_type vd, real_number dt)
{
auto p = GET_PARTICLE(vd);
// here we calculate v(tn + 0.5)
vd.template getProp<velocity>(p)[0] += 0.5*dt*vd.template getProp<force>(p)[0];
vd.template getProp<velocity>(p)[1] += 0.5*dt*vd.template getProp<force>(p)[1];
vd.template getProp<velocity>(p)[2] += 0.5*dt*vd.template getProp<force>(p)[2];
// here we calculate x(tn + 1)
vd.getPos(p)[0] += vd.template getProp<velocity>(p)[0]*dt;
vd.getPos(p)[1] += vd.template getProp<velocity>(p)[1]*dt;
vd.getPos(p)[2] += vd.template getProp<velocity>(p)[2]*dt;
}
template<typename vector_dist_type>
__global__ void update_velocity(vector_dist_type vd, real_number dt)
{
auto p = GET_PARTICLE(vd);
// here we calculate v(tn + 1)
vd.template getProp<velocity>(p)[0] += 0.5*dt*vd.template getProp<force>(p)[0];
vd.template getProp<velocity>(p)[1] += 0.5*dt*vd.template getProp<force>(p)[1];
vd.template getProp<velocity>(p)[2] += 0.5*dt*vd.template getProp<force>(p)[2];
}
template<typename vector_dist_type,typename NN_type>
__global__ void particle_energy(vector_dist_type vd, NN_type NN, real_number sigma12, real_number sigma6, real_number shift, real_number r_cut2)
{
auto p = GET_PARTICLE(vd);
// Get the position of the particle p
Point<3,real_number> xp = vd.getPos(p);
// Get an iterator over the neighborhood of the particle p
auto Np = NN.getNNIterator(NN.getCell(vd.getPos(p)));
real_number E = 0;
// For each neighborhood of the particle p
while (Np.isNext())
{
// Neighborhood particle q
auto q = Np.get();
// if p == q skip this particle
if (q == p) {++Np; continue;};
// Get position of the particle q
Point<3,real_number> xq = vd.getPos(q);
// take the normalized direction
real_number rn = norm2(xp - xq);
if (rn > r_cut2)
{++Np;continue;}
// potential energy (using pow is slower)
E += 2.0 * ( sigma12 / (rn*rn*rn*rn*rn*rn) - sigma6 / ( rn*rn*rn) ) - shift;
// Next neighborhood
++Np;
}
// Kinetic energy of the particle given by its actual speed
vd.template getProp<energy>(p) = E + (vd.template getProp<velocity>(p)[0]*vd.template getProp<velocity>(p)[0] +
vd.template getProp<velocity>(p)[1]*vd.template getProp<velocity>(p)[1] +
vd.template getProp<velocity>(p)[2]*vd.template getProp<velocity>(p)[2]) / 2;
}
template<typename CellList> void calc_forces(vector_dist_gpu<3,real_number, aggregate<real_number[3],real_number[3],real_number> > & vd, CellList & NN, real_number sigma12, real_number sigma6, real_number r_cut2)
{
//! \cond [calc forces] \endcond
/*!
* \page Vector_3_md_dyn Vector 3 molecular dynamic with cell-list
*
* This function in called several time and require neighborhood of each particle. In order to speed-up the
* Cell-list construction we can use updateCellList function to reuse the memory of the previous cell-list.
* updateCellList can be faster than createCellList
*
* \see \ref e1_part_celllist
*
* \snippet Vector/3_molecular_dynamic/main.cpp ucl
*
*/
//! \cond [ucl] \endcond
vd.updateCellList(NN);
//! \cond [ucl] \endcond
/*!
*
* \page Vector_3_md_dyn Vector 3 molecular dynamic with cell-list
*
* Get an iterator over the particles and get its position. For each particle p iterate in its neighborhood q
* and calculate the force based on the Lennard-Jhones potential given by
*
* \f$ F(x_p,x_q) = 24( \frac{2 \sigma^{12}}{r^{13}} - \frac{\sigma^6}{r^{7}}) \hat{r} \f$
*
* \see \ref e0_s_assign_pos
*
* \snippet Vector/3_molecular_dynamic/main.cpp force calc
*
*/
//! \cond [force calc] \endcond
// Get an iterator over particles
auto it2 = vd.getDomainIteratorGPU();
calc_force_gpu<<<it2.wthr,it2.thr>>>(vd.toKernel(),NN.toKernel(),sigma12,sigma6,r_cut2);
//! \cond [force calc] \endcond
//! \cond [calc forces2] \endcond
}
//! \cond [calc forces2] \endcond
/*!
* \page Vector_3_md_dyn Vector 3 molecular dynamic with cell-list
*
* ## Calculate energy ## {#e3_md_ce}
*
* We also need a function to calculate energy, this function require the same parameter as calculate forces
*
* \snippet Vector/3_molecular_dynamic/main.cpp calc energy
* \snippet Vector/3_molecular_dynamic/main.cpp calc energy2
*
*/
//! \cond [calc energy] \endcond
template<typename CellList> real_number calc_energy(vector_dist_gpu<3,real_number, aggregate<real_number[3],real_number[3],real_number> > & vd, CellList & NN, real_number sigma12, real_number sigma6, real_number r_cut2)
{
real_number rc = r_cut2;
real_number shift = 2.0 * ( sigma12 / (rc*rc*rc*rc*rc*rc) - sigma6 / ( rc*rc*rc) );
//! \cond [calc energy] \endcond
/*!
* \page Vector_3_md_dyn Vector 3 molecular dynamic with cell-list
*
* Reset the counter for the energy counter and
* update the cell list from the actual particle configuration
*
* \snippet Vector/3_molecular_dynamic/main.cpp up cell ene
*
* In the following we are going into the detail of this function
*
*/
//! \cond [up cell ene] \endcond
vd.updateCellList(NN);
//! \cond [up cell ene] \endcond
/*!
*
* \page Vector_3_md_dyn Vector 3 molecular dynamic with cell-list
*
* First we get an iterator over the particles and get its position. For each particle p iterate in its neighborhood q
* and calculate the energy based on the Lennard-Jhones potential given by
*
* \f$ V(x_p,x_q) = 4(\frac{1}{r^{12}} - \frac{1}{r^{6}}) \f$
*
* \see \ref e0_s_assign_pos
*
* \snippet Vector/3_molecular_dynamic/main.cpp energy calc comp
*
*/
//! \cond [energy calc comp] \endcond
auto it2 = vd.getDomainIteratorGPU();
particle_energy<<<it2.wthr,it2.thr>>>(vd.toKernel(),NN.toKernel(),sigma12,sigma6,shift,r_cut2);
//! \cond [energy calc comp] \endcond
//! \cond [calc energy2] \endcond
// Calculated energy
return reduce<energy>(vd);
}
//! \cond [calc energy2] \endcond
int main(int argc, char* argv[])
{
/*!
* \page Vector_3_md_dyn Vector 3 molecular dynamic with cell-list
*
* ## Initialization ## {#e3_md_init}
*
* After we defined the two main function calc forces and calc energy, we Initialize
* the library, we create a Box that define our domain, boundary conditions and ghost.
* Than we define important parameters of the simulation, time step integration,
* size of the box, and cut-off radius of the interaction. We also define 2 vectors
* x and y (they are like std::vector) used for statistic
*
* \see \ref e0_s_init
*
* \snippet Vector/3_molecular_dynamic/main.cpp constants run
*
*/
//! \cond [constants run] \endcond
openfpm_init(&argc,&argv);
real_number sigma = 0.01;
real_number r_cut = 3.0*sigma;
// we will use it do place particles on a 10x10x10 Grid like
size_t sz[3] = {100,100,100};
// domain
Box<3,float> box({0.0,0.0,0.0},{1.0,1.0,1.0});
// Boundary conditions
size_t bc[3]={PERIODIC,PERIODIC,PERIODIC};
// ghost, big enough to contain the interaction radius
Ghost<3,float> ghost(r_cut);
real_number dt = 0.00005;
real_number sigma12 = pow(sigma,12);
real_number sigma6 = pow(sigma,6);
openfpm::vector<real_number> x;
openfpm::vector<openfpm::vector<real_number>> y;
//! \cond [constants run] \endcond
/*!
* \page Vector_3_md_dyn Vector 3 molecular dynamic with cell-list
*
* Than we define a distributed vector in 3D, containing 2 vectorial properties the
* first is the actual velocity of the particle the other is the force
*
* \see \ref e0_s_vector_inst
*
* \snippet Vector/3_molecular_dynamic/main.cpp vect create
*
*/
//! \cond [vect create] \endcond
vector_dist_gpu<3,real_number, aggregate<real_number[3],real_number[3],real_number> > vd(0,box,bc,ghost);
//! \cond [vect create] \endcond
/*!
* \page Vector_3_md_dyn Vector 3 molecular dynamic with cell-list
*
* ## Particles on a grid like position ## {#e3_md_gl}
*
* We define a grid iterator, to create particles on a grid like way. In the same cycle we also reset
* force and velocity
*
* \see \ref e1_cl_gr_it
*
* \snippet Vector/3_molecular_dynamic/main.cpp vect grid
*
*/
//! \cond [vect grid] \endcond
// We create the grid iterator
auto it = vd.getGridIterator(sz);
while (it.isNext())
{
// Create a new particle
vd.add();
// key contain (i,j,k) index of the grid
auto key = it.get();
// The index of the grid can be accessed with key.get(0) == i, key.get(1) == j ...
// We use getLastPos to set the position of the last particle added
vd.getLastPos()[0] = key.get(0) * it.getSpacing(0);
vd.getLastPos()[1] = key.get(1) * it.getSpacing(1);
vd.getLastPos()[2] = key.get(2) * it.getSpacing(2);
// We use getLastProp to set the property value of the last particle we added
vd.template getLastProp<velocity>()[0] = 0.0;
vd.template getLastProp<velocity>()[1] = 0.0;
vd.template getLastProp<velocity>()[2] = 0.0;
vd.template getLastProp<force>()[0] = 0.0;
vd.template getLastProp<force>()[1] = 0.0;
vd.template getLastProp<force>()[2] = 0.0;
++it;
}
vd.hostToDevicePos();
vd.hostToDeviceProp<velocity,force>();
vd.map(RUN_ON_DEVICE);
vd.ghost_get<>(RUN_ON_DEVICE);
//! \cond [vect grid] \endcond
/*!
* \page Vector_3_md_dyn Vector 3 molecular dynamic with cell-list
*
* ## Molecular dynamic steps ## {#e3_md_vi}
*
* Here we do 10000 MD steps using verlet integrator
*
* The verlet integration stepping look like this
*
* \f[ \vec{v}(t_{n}+1/2) = \vec{v}_p(t_n) + \frac{1}{2} \delta t \vec{a}(t_n) \f]
* \f[ \vec{x}(t_{n}+1) = \vec{x}_p(t_n) + \delta t \vec{v}(t_n+1/2) \f]
*
* calculate the forces from \f$ \vec{a} (t_{n}) \f$ finally
*
* \f[ \vec{v}(t_{n+1}) = \vec{v}_p(t_n+1/2) + \frac{1}{2} \delta t \vec{a}(t_n+1) \f]
*
* The cell-list structure is required to calculate forces. As demonstration
* purpose instead of using the standard Cell-list with (getCellList) we use the CELL_MEMBAL
* type. The impact in performance of using the CELL_MEMBAL instead of CELL_MEMFAST is less
* than 1% on the other hand CELL_MEMFAST use 16 Megabyte of memory
*
* \see \ref e1_cls_types
*
* Inside this cycle we are using several features that has been explained before in particular
*
* \see \ref e0_s_assign_pos
*
* \see \ref e0_s_map
*
* \see \ref e0_s_reduce
*
* \see \ref e1_part_ghost
*
* \see \ref e0_s_vis_vtk
*
* \snippet Vector/3_molecular_dynamic/main.cpp md steps
*
*/
timer tsim;
tsim.start();
//! \cond [md steps] \endcond
// Get the Cell list structure
auto NN = vd.getCellListGPU(r_cut);
// The standard
// auto NN = vd.getCellList(r_cut);
// calculate forces
calc_forces(vd,NN,sigma12,sigma6,r_cut*r_cut);
unsigned long int f = 0;
// MD time stepping
for (size_t i = 0; i < 100 ; i++)
{
// Get the iterator
auto it3 = vd.getDomainIteratorGPU();
update_velocity_position<<<it3.wthr,it3.thr>>>(vd.toKernel(),dt);
// Because we moved the particles in space we have to map them and re-sync the ghost
vd.map(RUN_ON_DEVICE);
vd.template ghost_get<>(RUN_ON_DEVICE);
// calculate forces or a(tn + 1) Step 2
calc_forces(vd,NN,sigma12,sigma6,r_cut*r_cut);
// Integrate the velocity Step 3
auto it4 = vd.getDomainIteratorGPU();
update_velocity<<<it4.wthr,it4.thr>>>(vd.toKernel(),dt);
// After every iteration collect some statistic about the configuration
if (i % 100 == 0)
{
vd.deviceToHostPos();
vd.deviceToHostProp<0,1,2>();
// We write the particle position for visualization (Without ghost)
vd.deleteGhost();
vd.write_frame("particles_",f);
// we resync the ghost
vd.ghost_get<>(RUN_ON_DEVICE);
// We calculate the energy
real_number energy = calc_energy(vd,NN,sigma12,sigma6,r_cut*r_cut);
auto & vcl = create_vcluster();
vcl.sum(energy);<