Commit ceb2d51f authored by incardon's avatar incardon
Browse files

Fastest sparse grid stencil implementation

parent 6314ffe5
Pipeline #1877 failed with stages
in 15 seconds
......@@ -428,14 +428,14 @@ elif [ -d "$i_dir/HDF5/lib64" ]; then
hdf5_lib=$i_dir/HDF5/lib64
fi
echo "INCLUDE_PATH=$cuda_include_dirs -Wno-deprecated-declarations $openmp_flags -I. -I$install_base/openfpm_numerics/include -I$install_base/openfpm_pdata/include/config -I$install_base/openfpm_pdata/include -I$install_base/openfpm_data/include -I$install_base/openfpm_vcluster/include -I$install_base/openfpm_io/include -I$install_base/openfpm_devices/include -I$i_dir/METIS/include -I$i_dir/PARMETIS/include -I$i_dir/BOOST/include -I$i_dir/HDF5/include -I$i_dir/LIBHILBERT/include $lin_alg_inc" > example.mk
echo "LIBS_PATH=$openmp_flags -L$install_base/openfpm_devices/lib -L$install_base/openfpm_pdata/lib -L$install_base/openfpm_vcluster/lib -L$i_dir/METIS/lib -L$i_dir/PARMETIS/lib -L$i_dir/BOOST/lib -L$hdf5_lib -L$i_dir/LIBHILBERT/lib $lin_alg_dir " >> example.mk
echo "INCLUDE_PATH=$cuda_include_dirs -Wno-deprecated-declarations $openmp_flags -I. -I$install_base/openfpm_numerics/include -I$install_base/openfpm_pdata/include/config -I$install_base/openfpm_pdata/include -I$install_base/openfpm_data/include -I$install_base/openfpm_vcluster/include -I$install_base/openfpm_io/include -I$install_base/openfpm_devices/include -I$i_dir/VCDEVEL/include -I$i_dir/METIS/include -I$i_dir/PARMETIS/include -I$i_dir/BOOST/include -I$i_dir/HDF5/include -I$i_dir/LIBHILBERT/include $lin_alg_inc" > example.mk
echo "LIBS_PATH=$openmp_flags -L$install_base/openfpm_devices/lib -L$install_base/openfpm_pdata/lib -L$install_base/openfpm_vcluster/lib -L$i_dir/VCDEVEL/lib -L$i_dir/METIS/lib -L$i_dir/PARMETIS/lib -L$i_dir/BOOST/lib -L$hdf5_lib -L$i_dir/LIBHILBERT/lib $lin_alg_dir " >> example.mk
if [ x"$gpu_support" == x"1" ]; then
echo "LIBS=-lvcluster -lofpm_pdata -lofpmmemory -lparmetis -lmetis -lboost_iostreams -lboost_program_options -lhdf5 -llibhilbert $(cat cuda_lib) $lin_alg_lib -ldl" >> example.mk
echo "LIBS_SE2=-lvcluster -lofpmmemory_se2 -lparmetis -lmetis -lboost_iostreams -lboost_program_options -lhdf5 -llibhilbert $(cat cuda_lib) $lin_alg_lib" >> example.mk
echo "LIBS=-lvcluster -lofpm_pdata -lofpmmemory -lparmetis -lmetis -lboost_iostreams -lboost_program_options -lhdf5 -llibhilbert -lVc $(cat cuda_lib) $lin_alg_lib -ldl" >> example.mk
echo "LIBS_SE2=-lvcluster -lofpmmemory_se2 -lparmetis -lmetis -lboost_iostreams -lboost_program_options -lhdf5 -llibhilbert -lVc $(cat cuda_lib) $lin_alg_lib" >> example.mk
else
echo "LIBS=-lvcluster -lofpm_pdata -lofpmmemory -lparmetis -lmetis -lboost_iostreams -lboost_program_options -lhdf5 -llibhilbert $lin_alg_lib -ldl" >> example.mk
echo "LIBS_SE2=-lvcluster -lofpmmemory_se2 -lparmetis -lmetis -lboost_iostreams -lboost_program_options -lhdf5 -llibhilbert $lin_alg_lib" >> example.mk
echo "LIBS=-lvcluster -lofpm_pdata -lofpmmemory -lparmetis -lmetis -lboost_iostreams -lboost_program_options -lhdf5 -llibhilbert -lVc $lin_alg_lib -ldl" >> example.mk
echo "LIBS_SE2=-lvcluster -lofpmmemory_se2 -lparmetis -lmetis -lboost_iostreams -lboost_program_options -lhdf5 -llibhilbert -lVc $lin_alg_lib" >> example.mk
fi
echo "INCLUDE_PATH_NVCC=-Xcompiler="-Wno-deprecated-declarations" $(cat openmp_flags) "$(cat cuda_options)" -I. -I$install_base/openfpm_numerics/include -I$install_base/openfpm_pdata/include/config -I$install_base/openfpm_pdata/include -I$install_base/openfpm_data/include -I$install_base/openfpm_vcluster/include -I$install_base/openfpm_io/include -I$install_base/openfpm_devices/include -I$i_dir/METIS/include -I$i_dir/PARMETIS/include -I$i_dir/BOOST/include -I$i_dir/HDF5/include -I$i_dir/LIBHILBERT/include $lin_alg_inc" >> example.mk
cp example.mk src/example.mk
......
openfpm_data @ 5e00d1f0
Subproject commit 372e8b26afb369779f9c86ef67f6e7546120490f
Subproject commit 5e00d1f0f7fbbd4e4649c41284dc83d3df393222
......@@ -878,27 +878,29 @@ public:
/*! \brief Move down (to finer level) the key
*
* \param i level
* \param key of grid at level i
* \param lvl level
* \param key multi-resolution AMR key
*
*/
template<typename bg_key>
void moveLvlDw(int i, grid_dist_key_dx<dim,bg_key> & key)
grid_dist_key_dx<dim> moveDw(int lvl, const grid_dist_key_dx<dim> & key)
{
#ifdef SE_CLASS1
if (i >= this->getNLvl() - 1)
if (lvl >= getNLvl() - 1)
{std::cerr << __FILE__ << ":" << __LINE__ << " error: we are already at the last level, we cannot go one level down" << std::endl;}
#endif
auto & key_ref = key.getKeyRef();
size_t lvl = i;
grid_dist_key_dx<dim> out;
for (size_t j = 0 ; j < dim ; j++)
for (size_t i = 0 ; i < dim ; i++)
{
key_ref.set_d(j,(key_ref.get(j) << 1) + mv_off.get(i).get(key.getSub()).dw.get(j) );
out.getKeyRef().set_d(i,(key.getKeyRef().get(i) << 1) + mv_off.get(lvl).get(key.getSub()).dw.get(i) );
}
out.setSub(key.getSub());
return out;
}
/*! \brief From a distributed key it return a AMR key that contain also the grid level
......@@ -937,6 +939,33 @@ public:
key.setLvl(lvl-1);
}
/*! \brief Move up (to coarser level) the key
*
* \param lvl level
* \param key multi-resolution AMR key
*
*/
grid_dist_key_dx<dim> moveUp(int lvl, const grid_dist_key_dx<dim> & key)
{
#ifdef SE_CLASS1
if (lvl == 0)
{std::cerr << __FILE__ << ":" << __LINE__ << " error: we are already at the top level, we cannot go one level up" << std::endl;}
#endif
grid_dist_key_dx<dim> out;
for (size_t i = 0 ; i < dim ; i++)
{
out.getKeyRef().set_d(i,(key.getKeyRef().get(i) - mv_off.get(lvl).get(key.getSub()).up.get(i)) >> 1);
}
out.setSub(key.getSub());
return out;
}
/*! \brief Get the position on the grid in global coordinates
*
* \param v1 amr key
......
......@@ -155,7 +155,7 @@ private:
template<unsigned int p_sub> void fill_domain(Graph & graph,const Box<dim,size_t> & box, long int ids)
{
// Create a subgrid iterator
grid_key_dx_iterator_sub<dim,no_stencil,do_not_print_warning_on_adjustment<dim>> g_sub(gh,box.getKP1(),box.getKP2());
grid_key_dx_iterator_sub<dim,no_stencil,grid_sm<dim,void>,do_not_print_warning_on_adjustment<dim,grid_sm<dim,void>>> g_sub(gh,box.getKP1(),box.getKP2());
// iterate through all grid points
......@@ -218,7 +218,7 @@ private:
for (size_t d = 0 ; d < v_w.size() ; d++)
{
// Create a sub-grid iterator
grid_key_dx_iterator_sub_bc<dim,no_stencil,do_not_print_warning_on_adjustment<dim>> g_sub(gh,v_w.template get<wavefront<dim>::start>(d),v_w.template get<wavefront<dim>::stop>(d),bc);
grid_key_dx_iterator_sub_bc<dim,no_stencil,grid_sm<dim,void>,do_not_print_warning_on_adjustment<dim,grid_sm<dim,void>>> g_sub(gh,v_w.template get<wavefront<dim>::start>(d),v_w.template get<wavefront<dim>::stop>(d),bc);
// iterate through all grid points
......@@ -307,7 +307,7 @@ private:
// Create an iterator of the expanded wavefront
grid_key_dx<dim> start = grid_key_dx<dim>(v_w.template get<wavefront<dim>::start>(d)) + w_comb[d];
grid_key_dx<dim> stop = grid_key_dx<dim>(v_w.template get<wavefront<dim>::stop>(d)) + w_comb[d];
grid_key_dx_iterator_sub<dim,no_stencil,do_not_print_warning_on_adjustment<dim>> it(gh,start,stop);
grid_key_dx_iterator_sub<dim,no_stencil,grid_sm<dim,void>,do_not_print_warning_on_adjustment<dim,grid_sm<dim,void>>> it(gh,start,stop);
// for each sub-domain in the expanded wavefront
while (it.isNext())
......
......@@ -2490,6 +2490,65 @@ public:
dist.setDistTol(md.distributionTol());
}
/*! \brief apply a convolution using the stencil N
*
*
*/
template<unsigned int prop_src, unsigned int prop_dst, unsigned int stencil_size, unsigned int N, typename lambda_f, typename ... ArgsT >
void conv(int (& stencil)[N][dim], grid_key_dx<3> start, grid_key_dx<3> stop , lambda_f func, ArgsT ... args)
{
for (int i = 0 ; i < loc_grid.size() ; i++)
{
Box<dim,long int> inte;
Box<dim,long int> base;
for (int j = 0 ; j < dim ; j++)
{
base.setLow(j,(long int)start.get(j) - (long int)gdb_ext.get(i).origin.get(j));
base.setHigh(j,(long int)stop.get(j) - (long int)gdb_ext.get(i).origin.get(j));
}
Box<dim,long int> dom = gdb_ext.get(i).Dbox;
bool overlap = dom.Intersect(base,inte);
if (overlap == true)
{
loc_grid.get(i).template conv<prop_src,prop_dst,stencil_size>(stencil,inte.getKP1(),inte.getKP2(),func,args...);
}
}
}
/*! \brief apply a convolution using the stencil N
*
*
*/
template<unsigned int prop_src1, unsigned int prop_src2, unsigned int prop_dst1, unsigned int prop_dst2, unsigned int stencil_size, unsigned int N, typename lambda_f, typename ... ArgsT >
void conv2(int (& stencil)[N][dim], grid_key_dx<3> start, grid_key_dx<3> stop , lambda_f func, ArgsT ... args)
{
for (int i = 0 ; i < loc_grid.size() ; i++)
{
Box<dim,long int> inte;
Box<dim,long int> base;
for (int j = 0 ; j < dim ; j++)
{
base.setLow(j,(long int)start.get(j) - (long int)gdb_ext.get(i).origin.get(j));
base.setHigh(j,(long int)stop.get(j) - (long int)gdb_ext.get(i).origin.get(j));
}
Box<dim,long int> dom = gdb_ext.get(i).Dbox;
bool overlap = dom.Intersect(base,inte);
if (overlap == true)
{
loc_grid.get(i).template conv2<prop_src1,prop_src2,prop_dst1,prop_dst2,stencil_size>(stencil,inte.getKP1(),inte.getKP2(),func,args...);
}
}
}
/*! \brief Write the distributed grid information
*
* * grid_X.vtk Output each local grids for each local processor X
......@@ -2501,7 +2560,7 @@ public:
* \return true if the write operation succeed
*
*/
bool write(std::string output, size_t opt = VTK_WRITER | FORMAT_ASCII )
bool write(std::string output, size_t opt = VTK_WRITER | FORMAT_BINARY )
{
#ifdef SE_CLASS2
check_valid(this,8);
......@@ -2598,9 +2657,9 @@ public:
*
*/
template<unsigned int Np>
grid_key_dx_iterator_sub<dim,stencil_offset_compute<dim,Np>> get_loc_grid_iterator_stencil(size_t i,const grid_key_dx<dim> (& stencil_pnt)[Np])
grid_key_dx_iterator_sub<dim,stencil_offset_compute<dim,Np>,typename device_grid::linearizer_type> get_loc_grid_iterator_stencil(size_t i,const grid_key_dx<dim> (& stencil_pnt)[Np])
{
return grid_key_dx_iterator_sub<dim,stencil_offset_compute<dim,Np>>(loc_grid.get(i).getGrid(),
return grid_key_dx_iterator_sub<dim,stencil_offset_compute<dim,Np>,typename device_grid::linearizer_type>(loc_grid.get(i).getGrid(),
gdb_ext.get(i).Dbox.getKP1(),
gdb_ext.get(i).Dbox.getKP2(),
stencil_pnt);
......@@ -2937,6 +2996,10 @@ public:
template<unsigned int dim, typename St, typename T>
using sgrid_dist_id = grid_dist_id<dim,St,T,CartDecomposition<dim,St>,HeapMemory,sgrid_cpu<dim,T,HeapMemory>>;
template<unsigned int dim, typename St, typename T>
using sgrid_dist_soa = grid_dist_id<dim,St,T,CartDecomposition<dim,St>,HeapMemory,sgrid_soa<dim,T,HeapMemory>>;
#ifdef __NVCC__
template<unsigned int dim, typename St, typename T>
using sgrid_dist_id_gpu = grid_dist_id<dim,St,T,CartDecomposition<dim,St,CudaMemory,memory_traits_inte>,CudaMemory,SparseGridGpu<dim,T>>;
......
......@@ -1276,6 +1276,8 @@ void Test3D_periodic(const Box<3,float> & domain, long int k)
}
}
// Test grid periodic
void Test3D_periodic_put(const Box<3,float> & domain, long int k)
......
......@@ -20,6 +20,111 @@ const int z = 2;
BOOST_AUTO_TEST_SUITE( sgrid_dist_id_test )
BOOST_AUTO_TEST_CASE (sgrid_dist_id_soa )
{
periodicity<3> bc = {PERIODIC, PERIODIC, PERIODIC};
// Domain
Box<3,double> domain({-0.3,-0.3,-0.3},{1.0,1.0,1.0});
// grid size
size_t sz[3];
sz[0] = 100;
sz[1] = 100;
sz[2] = 100;
// Ghost
Ghost<3,double> g(0.01);
sgrid_dist_soa<3,double,Point_test<float>> sg(sz,domain,g,bc);
// create a grid iterator over a bilion point
auto it = sg.getGridIterator();
while(it.isNext())
{
auto gkey = it.get();
auto key = it.get_dist();
size_t sx = gkey.get(0) - 512;
size_t sy = gkey.get(1) - 512;
size_t sz = gkey.get(2) - 512;
if (sx*sx + sy*sy + sz*sz < 128*128)
{
sg.template insert<0>(key) = 1.0;
}
++it;
}
bool match = true;
auto it2 = sg.getGridIterator();
while(it2.isNext())
{
auto gkey = it2.get();
auto key = it2.get_dist();
size_t sx = gkey.get(0) - 512;
size_t sy = gkey.get(1) - 512;
size_t sz = gkey.get(2) - 512;
if (sx*sx + sy*sy + sz*sz < 128*128)
{
match &= (sg.template get<0>(key) == 1.0);
}
++it2;
}
auto & gr = sg.getGridInfo();
auto it3 = sg.getDomainIterator();
while (it3.isNext())
{
auto key = it3.get();
auto gkey = it3.getGKey(key);
sg.template insert<0>(key) = gkey.get(0)*gkey.get(0) + gkey.get(1)*gkey.get(1) + gkey.get(2)*gkey.get(2);
++it3;
}
sg.ghost_get<0>();
// now we check the stencil
bool good = true;
auto it4 = sg.getDomainIterator();
while (it4.isNext())
{
auto key = it4.get();
auto gkey = it4.getGKey(key);
size_t sx = gkey.get(0) - 512;
size_t sy = gkey.get(1) - 512;
size_t sz = gkey.get(2) - 512;
double lap;
lap = sg.template get<0>(key.move(x,1)) + sg.template get<0>(key.move(x,-1)) +
sg.template get<0>(key.move(y,1)) + sg.template get<0>(key.move(y,-1)) +
sg.template get<0>(key.move(z,1)) + sg.template get<0>(key.move(z,-1)) -
6.0*sg.template get<0>(key);
good &= (lap == 6.0);
++it4;
}
BOOST_REQUIRE_EQUAL(match,true);
}
BOOST_AUTO_TEST_CASE( sgrid_dist_id_basic_test_2D)
{
periodicity<2> bc = {NON_PERIODIC, NON_PERIODIC};
......@@ -247,4 +352,178 @@ BOOST_AUTO_TEST_CASE( sgrid_dist_id_basic_test)
}
BOOST_AUTO_TEST_CASE( sparse_grid_fast_stencil_vectorized_simplified_conv2)
{
constexpr int U = 0;
constexpr int V = 1;
constexpr int U_next = 2;
constexpr int V_next = 3;
constexpr int x = 0;
constexpr int y = 1;
constexpr int z = 2;
Box<3,double> domain({0.0,0.0,0.0},{2.5,2.5,2.5});
// grid size
size_t sz[3] = {32,32,32};
// Define periodicity of the grid
periodicity<3> bc = {PERIODIC,PERIODIC,PERIODIC};
// Ghost in grid unit
Ghost<3,long int> g(1);
// deltaT
double deltaT = 1;
// Diffusion constant for specie U
double du = 2*1e-5;
// Diffusion constant for specie V
double dv = 1*1e-5;
// Number of timesteps
size_t timeSteps = 5000;
// K and F (Physical constant in the equation)
double K = 0.053;
double F = 0.014;
sgrid_dist_soa<3, double, aggregate<double,double,double,double>> grid(sz,domain,g,bc);
auto it = grid.getGridIterator();
while (it.isNext())
{
// Get the local grid key
auto key = it.get_dist();
// Old values U and V
grid.template insert<U>(key) = 1.0;
grid.template insert<V>(key) = 0.0;
// Old values U and V
grid.template insert<U_next>(key) = 0.0;
grid.template insert<V_next>(key) = 0.0;
++it;
}
long int x_start = grid.size(0)*1.55f/domain.getHigh(0);
long int y_start = grid.size(1)*1.55f/domain.getHigh(1);
long int z_start = grid.size(1)*1.55f/domain.getHigh(2);
long int x_stop = grid.size(0)*1.85f/domain.getHigh(0);
long int y_stop = grid.size(1)*1.85f/domain.getHigh(1);
long int z_stop = grid.size(1)*1.85f/domain.getHigh(2);
grid_key_dx<3> start({x_start,y_start,z_start});
grid_key_dx<3> stop ({x_stop,y_stop,z_stop});
auto it_init = grid.getGridIterator(start,stop);
while (it_init.isNext())
{
auto key = it_init.get_dist();
grid.template insert<U>(key) = 0.5 + (((double)std::rand())/RAND_MAX -0.5)/10.0;
grid.template insert<V>(key) = 0.25 + (((double)std::rand())/RAND_MAX -0.5)/20.0;
++it_init;
}
// spacing of the grid on x and y
double spacing[3] = {grid.spacing(0),grid.spacing(1),grid.spacing(2)};
// sync the ghost
size_t count = 0;
grid.template ghost_get<U,V>();
// because we assume that spacing[x] == spacing[y] we use formula 2
// and we calculate the prefactor of Eq 2
double uFactor = deltaT * du/(spacing[x]*spacing[x]);
double vFactor = deltaT * dv/(spacing[x]*spacing[x]);
int stencil[6][3] = {{1,0,0},{-1,0,0},{0,-1,0},{0,1,0},{0,0,-1},{0,0,1}};
//! \cond [stencil get and use] \endcond
auto func = [uFactor,vFactor,deltaT,F,K](Vc::double_v & u_out,Vc::double_v & v_out,
Vc::double_v (& u)[7],Vc::double_v (& v)[7],
unsigned char * mask){
u_out = u[0] + uFactor *(u[1] + u[2] +
u[3] + u[4] +
u[5] + u[6] - 6.0*u[0]) - deltaT * u[0]*v[0]*v[0]
- deltaT * F * (u[0] - 1.0);
v_out = v[0] + vFactor *(v[1] + v[2] +
v[3] + v[4] +
v[5] + v[6] - 6.0*v[0]) + deltaT * u[0]*v[0]*v[0]
- deltaT * (F+K) * v[0];
};
grid.conv2<U,V,U_next,V_next,1>(stencil,{0,0,0},{(long int)sz[0]-1,(long int)sz[1]-1,(long int)sz[2]-1},func);
grid.conv2<U,V,U_next,V_next,1>(stencil,{0,0,0},{(long int)sz[0]-1,(long int)sz[1]-1,(long int)sz[2]-1},func);
bool match = true;
{
auto it = grid.getDomainIterator();
double max_U = 0.0;
double max_V = 0.0;
grid_dist_key_dx<3> k_max;
while (it.isNext())
{
// center point
auto Cp = it.get();
// plus,minus X,Y,Z
auto mx = Cp.move(0,-1);
auto px = Cp.move(0,+1);
auto my = Cp.move(1,-1);
auto py = Cp.move(1,1);
auto mz = Cp.move(2,-1);
auto pz = Cp.move(2,1);
// update based on Eq 2
if ( fabs(grid.get<U>(Cp) + uFactor * (
grid.get<U>(mz) +
grid.get<U>(pz) +
grid.get<U>(my) +
grid.get<U>(py) +
grid.get<U>(mx) +
grid.get<U>(px) -
6.0*grid.get<U>(Cp)) +
- deltaT * grid.get<U>(Cp) * grid.get<V>(Cp) * grid.get<V>(Cp) +
- deltaT * F * (grid.get<U>(Cp) - 1.0) - grid.get<U_next>(Cp)) > 0.000000001 )
{
match = false;
}
// update based on Eq 2
if ( fabs(grid.get<V>(Cp) + vFactor * (
grid.get<V>(mz) +
grid.get<V>(pz) +
grid.get<V>(my) +
grid.get<V>(py) +
grid.get<V>(mx) +
grid.get<V>(px) -
6*grid.get<V>(Cp)) +
deltaT * grid.get<U>(Cp) * grid.get<V>(Cp) * grid.get<V>(Cp) +
- deltaT * (F+K) * grid.get<V>(Cp) - grid.get<V_next>(Cp)) > 0.000000001 )
{
match = false;
}
++it;
}
}
BOOST_REQUIRE_EQUAL(match,true);
}
BOOST_AUTO_TEST_SUITE_END()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment