Commit 6414b522 authored by incardon's avatar incardon
Browse files

conv crossing with lamda compiling

parent ceb2d51f
Pipeline #1878 failed with stages
in 6 seconds
......@@ -167,6 +167,10 @@ install(FILES Grid/grid_dist_id.hpp
Grid/staggered_dist_grid_copy.hpp
DESTINATION openfpm_pdata/include/Grid/ )
install(FILES Grid/cuda/grid_dist_id_kernels.cuh
Grid/cuda/grid_dist_id_iterator_gpu.cuh
DESTINATION openfpm_pdata/include/Grid/cuda/ )
install(FILES Amr/grid_dist_amr_key_iterator.hpp
Amr/grid_dist_amr_key.hpp
Amr/grid_dist_amr.hpp
......
......@@ -66,64 +66,37 @@ __global__ void insert_remove_icell(vector_sparse_type vs, vector_sparse_type vs
vsi.flush_block_remove(b, threadIdx.x == 0 & threadIdx.y == 0 & threadIdx.z == 0);
}
#endif
template<unsigned int dim, typename T, template<typename> class layout_base , typename Memory>
class domain_icell_calculator
template<unsigned int dim, typename T, template<typename> class layout_base , typename Memory, typename cnt_type, typename ids_type, bool is_gpu>
struct CalculateInternalCells_impl
{
typedef unsigned int cnt_type;
typedef int ids_type;
openfpm::vector<aggregate<ids_type>,Memory,typename layout_base<aggregate<ids_type>>::type,layout_base> icells;
openfpm::vector<aggregate<ids_type>,Memory,typename layout_base<aggregate<ids_type>>::type,layout_base> dcells;
CellDecomposer_sm<dim,T,shift<dim,T>> cd;
public:
/*! \brief Calculate the subdomain that are in the skin part of the domain
*
\verbatim
+---+---+---+---+---+---+
| 1 | 2 | 3 | 4 | 5 | 6 |
+---+---+---+---+---+---+
|28 | | 7 |
+---+ +---+
|27 | | 8 |
+---+ +---+
|26 | | 9 |
+---+ DOM1 +---+
|25 | |10 |
+---+ +---+
|24 | |11 |
+---+ +---+---+
|23 | |13 |12 |
+---+-----------+---+---+
|22 | |14 |
+---+ +---+
|21 | DOM2 |15 |
+---+---+---+---+---+
|20 |19 |18 | 17|16 |
+---+---+---+---+---+ <----- Domain end here
|
^ |
|_____________|
template<typename VCluster_type>
static void CalculateInternalCells(VCluster_type & v_cl,
openfpm::vector<Box<dim,T>,Memory,typename layout_base<Box<dim,T>>::type,layout_base> & ig_box,
openfpm::vector<SpaceBox<dim,T>,Memory,typename layout_base<SpaceBox<dim,T>>::type,layout_base> & domain,
Box<dim,T> & pbox,
T r_cut,
const Ghost<dim,T> & enlarge,
CellDecomposer_sm<dim,T,shift<dim,T>> & cd,
openfpm::vector<aggregate<ids_type>,Memory,typename layout_base<aggregate<ids_type>>::type,layout_base> & icells,
openfpm::vector<aggregate<ids_type>,Memory,typename layout_base<aggregate<ids_type>>::type,layout_base> & dcells)
{
}
};
\endverbatim
*
* It does it on GPU or CPU
*
*/
template<unsigned int dim, typename T, template<typename> class layout_base , typename Memory, typename cnt_type, typename ids_type>
struct CalculateInternalCells_impl<dim,T,layout_base,Memory,cnt_type,ids_type,true>
{
template<typename VCluster_type>
void CalculateInternalCells(VCluster_type & v_cl,
openfpm::vector<Box<dim,T>,Memory,typename layout_base<Box<dim,T>>::type,layout_base> & ig_box,
openfpm::vector<SpaceBox<dim,T>,Memory,typename layout_base<SpaceBox<dim,T>>::type,layout_base> & domain,
Box<dim,T> & pbox,
T r_cut,
const Ghost<dim,T> & enlarge)
static void CalculateInternalCells(VCluster_type & v_cl,
openfpm::vector<Box<dim,T>,Memory,typename layout_base<Box<dim,T>>::type,layout_base> & ig_box,
openfpm::vector<SpaceBox<dim,T>,Memory,typename layout_base<SpaceBox<dim,T>>::type,layout_base> & domain,
Box<dim,T> & pbox,
T r_cut,
const Ghost<dim,T> & enlarge,
CellDecomposer_sm<dim,T,shift<dim,T>> & cd,
openfpm::vector<aggregate<ids_type>,Memory,typename layout_base<aggregate<ids_type>>::type,layout_base> & icells,
openfpm::vector<aggregate<ids_type>,Memory,typename layout_base<aggregate<ids_type>>::type,layout_base> & dcells)
{
#ifdef __NVCC__
......@@ -216,6 +189,71 @@ class domain_icell_calculator
vs.swapIndexVector(icells);
vsi.swapIndexVector(dcells);
#endif
}
};
#endif
template<unsigned int dim, typename T, template<typename> class layout_base , typename Memory>
class domain_icell_calculator
{
typedef unsigned int cnt_type;
typedef int ids_type;
openfpm::vector<aggregate<ids_type>,Memory,typename layout_base<aggregate<ids_type>>::type,layout_base> icells;
openfpm::vector<aggregate<ids_type>,Memory,typename layout_base<aggregate<ids_type>>::type,layout_base> dcells;
CellDecomposer_sm<dim,T,shift<dim,T>> cd;
public:
/*! \brief Calculate the subdomain that are in the skin part of the domain
*
\verbatim
+---+---+---+---+---+---+
| 1 | 2 | 3 | 4 | 5 | 6 |
+---+---+---+---+---+---+
|28 | | 7 |
+---+ +---+
|27 | | 8 |
+---+ +---+
|26 | | 9 |
+---+ DOM1 +---+
|25 | |10 |
+---+ +---+
|24 | |11 |
+---+ +---+---+
|23 | |13 |12 |
+---+-----------+---+---+
|22 | |14 |
+---+ +---+
|21 | DOM2 |15 |
+---+---+---+---+---+
|20 |19 |18 | 17|16 |
+---+---+---+---+---+ <----- Domain end here
|
^ |
|_____________|
\endverbatim
*
* It does it on GPU or CPU
*
*/
template<typename VCluster_type>
void CalculateInternalCells(VCluster_type & v_cl,
openfpm::vector<Box<dim,T>,Memory,typename layout_base<Box<dim,T>>::type,layout_base> & ig_box,
openfpm::vector<SpaceBox<dim,T>,Memory,typename layout_base<SpaceBox<dim,T>>::type,layout_base> & domain,
Box<dim,T> & pbox,
T r_cut,
const Ghost<dim,T> & enlarge)
{
#ifdef __NVCC__
CalculateInternalCells_impl<dim,T,layout_base,Memory,cnt_type,ids_type,std::is_same<Memory,CudaMemory>::value>::CalculateInternalCells(v_cl,ig_box,domain,pbox,r_cut,enlarge,cd,icells,dcells);
#endif
}
......
......@@ -2411,8 +2411,6 @@ public:
grid_key_dx<dim> cnt[1];
cnt[0].zero();
size_t ele_id;
for (size_t i = 0 ; i < this->getN_loc_grid() ; i++)
{
auto & dst = this->get_loc_grid(i);
......@@ -2519,6 +2517,35 @@ public:
}
}
/*! \brief apply a convolution using the stencil N
*
*
*/
template<unsigned int prop_src, unsigned int prop_dst, unsigned int stencil_size, typename lambda_f, typename ... ArgsT >
void conv_cross(grid_key_dx<3> start, grid_key_dx<3> stop , lambda_f func, ArgsT ... args)
{
for (int i = 0 ; i < loc_grid.size() ; i++)
{
Box<dim,long int> inte;
Box<dim,long int> base;
for (int j = 0 ; j < dim ; j++)
{
base.setLow(j,(long int)start.get(j) - (long int)gdb_ext.get(i).origin.get(j));
base.setHigh(j,(long int)stop.get(j) - (long int)gdb_ext.get(i).origin.get(j));
}
Box<dim,long int> dom = gdb_ext.get(i).Dbox;
bool overlap = dom.Intersect(base,inte);
if (overlap == true)
{
loc_grid.get(i).template conv_cross<prop_src,prop_dst,stencil_size>(inte.getKP1(),inte.getKP2(),func,args...);
}
}
}
/*! \brief apply a convolution using the stencil N
*
*
......@@ -2548,6 +2575,34 @@ public:
}
}
/*! \brief apply a convolution using the stencil N
*
*
*/
template<unsigned int prop_src1, unsigned int prop_src2, unsigned int prop_dst1, unsigned int prop_dst2, unsigned int stencil_size, typename lambda_f, typename ... ArgsT >
void conv_cross2(grid_key_dx<3> start, grid_key_dx<3> stop , lambda_f func, ArgsT ... args)
{
for (int i = 0 ; i < loc_grid.size() ; i++)
{
Box<dim,long int> inte;
Box<dim,long int> base;
for (int j = 0 ; j < dim ; j++)
{
base.setLow(j,(long int)start.get(j) - (long int)gdb_ext.get(i).origin.get(j));
base.setHigh(j,(long int)stop.get(j) - (long int)gdb_ext.get(i).origin.get(j));
}
Box<dim,long int> dom = gdb_ext.get(i).Dbox;
bool overlap = dom.Intersect(base,inte);
if (overlap == true)
{
loc_grid.get(i).template conv_cross2<prop_src1,prop_src2,prop_dst1,prop_dst2,stencil_size>(inte.getKP1(),inte.getKP2(),func,args...);
}
}
}
/*! \brief Write the distributed grid information
*
......@@ -2657,7 +2712,8 @@ public:
*
*/
template<unsigned int Np>
grid_key_dx_iterator_sub<dim,stencil_offset_compute<dim,Np>,typename device_grid::linearizer_type> get_loc_grid_iterator_stencil(size_t i,const grid_key_dx<dim> (& stencil_pnt)[Np])
grid_key_dx_iterator_sub<dim,stencil_offset_compute<dim,Np>,typename device_grid::linearizer_type>
get_loc_grid_iterator_stencil(size_t i,const grid_key_dx<dim> (& stencil_pnt)[Np])
{
return grid_key_dx_iterator_sub<dim,stencil_offset_compute<dim,Np>,typename device_grid::linearizer_type>(loc_grid.get(i).getGrid(),
gdb_ext.get(i).Dbox.getKP1(),
......
......@@ -526,4 +526,178 @@ BOOST_AUTO_TEST_CASE( sparse_grid_fast_stencil_vectorized_simplified_conv2)
BOOST_REQUIRE_EQUAL(match,true);
}
BOOST_AUTO_TEST_CASE( sparse_grid_fast_stencil_vectorized_simplified_conv2_crossing)
{
constexpr int U = 0;
constexpr int V = 1;
constexpr int U_next = 2;
constexpr int V_next = 3;
constexpr int x = 0;
constexpr int y = 1;
constexpr int z = 2;
Box<3,double> domain({0.0,0.0,0.0},{2.5,2.5,2.5});
// grid size
size_t sz[3] = {32,32,32};
// Define periodicity of the grid
periodicity<3> bc = {PERIODIC,PERIODIC,PERIODIC};
// Ghost in grid unit
Ghost<3,long int> g(1);
// deltaT
double deltaT = 1;
// Diffusion constant for specie U
double du = 2*1e-5;
// Diffusion constant for specie V
double dv = 1*1e-5;
// Number of timesteps
size_t timeSteps = 5000;
// K and F (Physical constant in the equation)
double K = 0.053;
double F = 0.014;
sgrid_dist_soa<3, double, aggregate<double,double,double,double>> grid(sz,domain,g,bc);
auto it = grid.getGridIterator();
while (it.isNext())
{
// Get the local grid key
auto key = it.get_dist();
// Old values U and V
grid.template insert<U>(key) = 1.0;
grid.template insert<V>(key) = 0.0;
// Old values U and V
grid.template insert<U_next>(key) = 0.0;
grid.template insert<V_next>(key) = 0.0;
++it;
}
long int x_start = grid.size(0)*1.55f/domain.getHigh(0);
long int y_start = grid.size(1)*1.55f/domain.getHigh(1);
long int z_start = grid.size(1)*1.55f/domain.getHigh(2);
long int x_stop = grid.size(0)*1.85f/domain.getHigh(0);
long int y_stop = grid.size(1)*1.85f/domain.getHigh(1);
long int z_stop = grid.size(1)*1.85f/domain.getHigh(2);
grid_key_dx<3> start({x_start,y_start,z_start});
grid_key_dx<3> stop ({x_stop,y_stop,z_stop});
auto it_init = grid.getGridIterator(start,stop);
while (it_init.isNext())
{
auto key = it_init.get_dist();
grid.template insert<U>(key) = 0.5 + (((double)std::rand())/RAND_MAX -0.5)/10.0;
grid.template insert<V>(key) = 0.25 + (((double)std::rand())/RAND_MAX -0.5)/20.0;
++it_init;
}
// spacing of the grid on x and y
double spacing[3] = {grid.spacing(0),grid.spacing(1),grid.spacing(2)};
// sync the ghost
size_t count = 0;
grid.template ghost_get<U,V>();
// because we assume that spacing[x] == spacing[y] we use formula 2
// and we calculate the prefactor of Eq 2
double uFactor = deltaT * du/(spacing[x]*spacing[x]);
double vFactor = deltaT * dv/(spacing[x]*spacing[x]);
//! \cond [stencil get and use] \endcond
auto func = [uFactor,vFactor,deltaT,F,K](Vc::double_v & u_out,Vc::double_v & v_out,
Vc::double_v & u,Vc::double_v & v,
cross_stencil_v & us,cross_stencil_v & vs,
unsigned char * mask){
u_out = u + uFactor *(us.xm + us.xp +
us.ym + us.yp +
us.zm + us.zp - 6.0*u) - deltaT * u*v*v
- deltaT * F * (u - 1.0);
v_out = v + vFactor *(vs.xm + vs.xp +
vs.ym + vs.yp +
vs.zm + vs.zp - 6.0*v) + deltaT * u*v*v
- deltaT * (F+K) * v;
};
grid.conv_cross2<U,V,U_next,V_next,1>({0,0,0},{(long int)sz[0]-1,(long int)sz[1]-1,(long int)sz[2]-1},func);
grid.conv_cross2<U,V,U_next,V_next,1>({0,0,0},{(long int)sz[0]-1,(long int)sz[1]-1,(long int)sz[2]-1},func);
bool match = true;
{
auto it = grid.getDomainIterator();
double max_U = 0.0;
double max_V = 0.0;
grid_dist_key_dx<3> k_max;
while (it.isNext())
{
// center point
auto Cp = it.get();
// plus,minus X,Y,Z
auto mx = Cp.move(0,-1);
auto px = Cp.move(0,+1);
auto my = Cp.move(1,-1);
auto py = Cp.move(1,1);
auto mz = Cp.move(2,-1);
auto pz = Cp.move(2,1);
// update based on Eq 2
if ( fabs(grid.get<U>(Cp) + uFactor * (
grid.get<U>(mz) +
grid.get<U>(pz) +
grid.get<U>(my) +
grid.get<U>(py) +
grid.get<U>(mx) +
grid.get<U>(px) -
6.0*grid.get<U>(Cp)) +
- deltaT * grid.get<U>(Cp) * grid.get<V>(Cp) * grid.get<V>(Cp) +
- deltaT * F * (grid.get<U>(Cp) - 1.0) - grid.get<U_next>(Cp)) > 0.000000001 )
{
match = false;
}
// update based on Eq 2
if ( fabs(grid.get<V>(Cp) + vFactor * (
grid.get<V>(mz) +
grid.get<V>(pz) +
grid.get<V>(my) +
grid.get<V>(py) +
grid.get<V>(mx) +
grid.get<V>(px) -
6*grid.get<V>(Cp)) +
deltaT * grid.get<U>(Cp) * grid.get<V>(Cp) * grid.get<V>(Cp) +
- deltaT * (F+K) * grid.get<V>(Cp) - grid.get<V_next>(Cp)) > 0.000000001 )
{
match = false;
}
++it;
}
}
BOOST_REQUIRE_EQUAL(match,true);
}
BOOST_AUTO_TEST_SUITE_END()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment