Commit 45013703 authored by incardon's avatar incardon

Fixing for GCC8

parent 6b5c7f13
......@@ -216,7 +216,7 @@ template<typename solver_type,typename lid_nn_3d> void lid_driven_cavity_3d()
#if __GNUC__ == 8
std::string file1 = std::string("test/") + s + "lid_driven_cavity_3d_p" + std::to_string(v_cl.getProcessingUnits()) + "_grid_" + std::to_string(v_cl.getProcessUnitID()) + "_test_GCC7.vtk";
std::string file1 = std::string("test/") + s + "lid_driven_cavity_3d_p" + std::to_string(v_cl.getProcessingUnits()) + "_grid_" + std::to_string(v_cl.getProcessUnitID()) + "_test_GCC8.vtk";
std::string file2 = s + "lid_driven_cavity_3d_p" + std::to_string(v_cl.getProcessingUnits()) + "_grid_" + std::to_string(v_cl.getProcessUnitID()) + ".vtk";
#elif __GNUC__ == 7
......
......@@ -150,7 +150,7 @@ struct pos_or_propR<vector,PROP_POS>
}
};
template<unsigned int prp ,int impl>
template<unsigned int prp ,bool is_sort, int impl>
struct vector_dist_op_compute_op
{
template<typename vector, typename expr>
......@@ -163,7 +163,7 @@ struct vector_dist_op_compute_op
};
template<unsigned int prp>
struct vector_dist_op_compute_op<prp,comp_host>
struct vector_dist_op_compute_op<prp,false,comp_host>
{
template<typename vector, typename expr>
static void compute_expr(vector & v,expr & v_exp)
......@@ -243,8 +243,46 @@ __global__ void compute_double_ker(vector vd, double d)
pos_or_propL_ker<vector,prp>::value(vd,p) = d;
}
/////////// SORTED VERSION //
template<unsigned int prp, unsigned int dim ,typename vector, typename NN_type, typename expr>
__global__ void compute_expr_ker_sort_vv(vector vd, NN_type NN, expr v_exp)
{
int p;
GET_PARTICLE_SORT(p,NN);
for (unsigned int i = 0 ; i < dim ; i++)
{
vd.template get<prp>(p)[i] = v_exp.value(p).get(i);
}
}
template<unsigned int prp, typename vector, typename NN_type, typename expr>
__global__ void compute_expr_ker_sort_v(vector vd, NN_type NN, expr v_exp)
{
int p;
GET_PARTICLE_SORT(p,NN);
vd.template get<prp>(p) = v_exp.value(p);
}
template<unsigned int prp, typename vector, typename expr, typename NN_type>
__global__ void compute_expr_ker_sort(vector vd, NN_type NN, expr v_exp)
{
int p;
GET_PARTICLE_SORT(p,NN);
pos_or_propL_ker<vector,prp>::value(vd,p) = v_exp.value(p);
}
/////////////////////////////
template<unsigned int prp>
struct vector_dist_op_compute_op<prp,comp_dev>
struct vector_dist_op_compute_op<prp,false,comp_dev>
{
template<typename vector, typename expr>
static void compute_expr(vector & v,expr & v_exp)
......@@ -256,6 +294,26 @@ struct vector_dist_op_compute_op<prp,comp_dev>
compute_expr_ker<prp><<<ite.wthr,ite.thr>>>(v,v_exp);
}
template<typename vector, typename expr>
static void compute_expr_v(vector & v,expr & v_exp)
{
v_exp.init();
auto ite = v.getGPUIterator(256);
compute_expr_ker_v<prp><<<ite.wthr,ite.thr>>>(v,v_exp);
}
template<unsigned int dim, typename vector, typename expr>
static void compute_expr_vv(vector & v,expr & v_exp)
{
v_exp.init();
auto ite = v.getGPUIterator(256);
compute_expr_ker_vv<prp,dim><<<ite.wthr,ite.thr>>>(v,v_exp);
}
template<typename vector>
static void compute_const(vector & v,double d)
{
......@@ -265,6 +323,46 @@ struct vector_dist_op_compute_op<prp,comp_dev>
}
};
template<unsigned int prp>
struct vector_dist_op_compute_op<prp,true,comp_dev>
{
template<typename vector, typename expr>
static void compute_expr(vector & v,expr & v_exp)
{
v_exp.init();
auto ite = v.getDomainIteratorGPU(256);
auto NN = v_exp.getNN();
compute_expr_ker_sort<prp><<<ite.wthr,ite.thr>>>(v,*NN,v_exp);
}
template<typename vector, typename expr>
static void compute_expr_v(vector & v,expr & v_exp)
{
v_exp.init();
auto ite = v.getGPUIterator(256);
auto NN = v_exp.getNN();
compute_expr_ker_sort_v<prp><<<ite.wthr,ite.thr>>>(v,*NN,v_exp);
}
template<unsigned int dim, typename vector, typename expr>
static void compute_expr_vv(vector & v,expr & v_exp)
{
v_exp.init();
auto ite = v.getGPUIterator(256);
auto NN = v_exp.getNN();
compute_expr_ker_sort_vv<prp,dim><<<ite.wthr,ite.thr>>>(v,*NN,v_exp);
}
};
#endif
......
......@@ -1138,7 +1138,6 @@ bool check_values_apply_kernel(vector & vd, Kernel & ker, NN_type & NN)
Point<3,float> xp = vd.getPos(p);
float base1 = vd.template getProp<A>(p);
float prp_x = vd.template getProp<VC>(p) * vd.template getProp<VB>(p) + norm(vd.template getProp<VB>(p));
// For each neighborhood particle
......@@ -1760,6 +1759,71 @@ void vector_dist_op_ap_ker_impl(vector & vd, vA_type & vA,
check_values_apply_kernel3_reduce<impl>(vd,ker,cl_host,p);
}
template<typename vector,
typename vA_type,
typename vC_type,
typename vVA_type,
typename vVB_type,
typename vVC_type>
void vector_dist_op_ap_ker_impl_sort(vector & vd, vA_type & vA,
vC_type & vC,
vVA_type & vVA,
vVB_type & vVB,
vVC_type & vVC,
unsigned int opt)
{
// we apply an exponential kernel to calculate something
auto cl_gpu = vd.getCellListGPU(0.05);
auto cl = cl_gpu.toKernel();
auto cl_host = vd.template getCellListDev<comp_host>(0.05);
exp_kernel ker(0.2);
vA = applyKernel_in_sort(vVC * vVB + norm(vVB),vd,cl,ker) + vC;
vd.template merge_sort<A>(cl_gpu);
check_values_apply_kernel<comp_dev>(vd,ker,cl_host);
vVA = applyKernel_in_sort(2.0*vVC + vVB ,vd,cl,ker) + vVC;
vd.template merge_sort<VA>(cl_gpu);
check_values_apply_kernel2<comp_dev>(vd,ker,cl_host);
/* vA = rsum(applyKernel_in_sort(vVC * vVB + norm(vVB),vd,cl,ker)) + vC;
vd.template merge_sort<A>(cl_gpu);
check_values_apply_kernel_reduce<comp_dev>(vd,ker,cl_host);
vVA = rsum(applyKernel_in_sort(2.0*vVC + vVB ,vd,cl,ker)) + vVC;
vd.template merge_sort<VA>(cl_gpu);
check_values_apply_kernel2_reduce<comp_dev>(vd,ker,cl_host);*/
vA = applyKernel_in_gen_sort(vVC * vVB + norm(vVB),vd,cl,ker) + vC;
vd.template merge_sort<A>(cl_gpu);
check_values_apply_kernel<comp_dev>(vd,ker,cl_host);
vVA = applyKernel_in_gen_sort(2.0*vVC + vVB ,vd,cl,ker) + vVC;
vd.template merge_sort<VA>(cl_gpu);
check_values_apply_kernel2<comp_dev>(vd,ker,cl_host);
/* vA = rsum(applyKernel_in_gen_sort(vVC * vVB + norm(vVB),vd,cl,ker)) + vC;
vd.template merge_sort<A>(cl_gpu);
check_values_apply_kernel_reduce<comp_dev>(vd,ker,cl_host);
vVA = rsum(applyKernel_in_gen_sort(2.0*vVC + vVB ,vd,cl,ker)) + vVC;
vd.template merge_sort<VA>(cl_gpu);
check_values_apply_kernel2_reduce<comp_dev>(vd,ker,cl_host);*/
// Check it compile the code is the same
vVA = applyKernel_in_gen_sort(vVC,vd,cl,ker) + vVC;
vd.template merge_sort<VA>(cl_gpu);
check_values_apply_kernel3<comp_dev>(vd,ker,cl_host);
vVA = applyKernel_in_sort(vVC,vd,cl,ker) + vVC;
vd.template merge_sort<VA>(cl_gpu);
check_values_apply_kernel3<comp_dev>(vd,ker,cl_host);
/* Point<2,float> p = rsum(applyKernel_in_sim_sort(vd,cl,ker)).get();
check_values_apply_kernel3_reduce<comp_dev>(vd,ker,cl_host,p);*/
}
template<unsigned int impl>
struct check_all_apply_ker
{
......@@ -1782,19 +1846,18 @@ struct check_all_apply_ker
}
};
template<>
struct check_all_apply_ker<comp_dev>
{
template<typename vector_type> static void check(vector_type & vd)
{
auto vdk = vd.toKernel();
auto vA = getV<A,comp_dev>(vd);
auto vC = getV<C,comp_dev>(vd);
auto vA = getV<A>(vd,vdk);
auto vC = getV<C>(vd,vdk);
auto vVA = getV<VA>(vd,vdk);
auto vVB = getV<VB>(vd,vdk);
auto vVC = getV<VC>(vd,vdk);
auto vVA = getV<VA,comp_dev>(vd);
auto vVB = getV<VB,comp_dev>(vd);
auto vVC = getV<VC,comp_dev>(vd);
// fill vd with some value
fill_values<comp_dev>(vd);
......@@ -1808,4 +1871,28 @@ struct check_all_apply_ker<comp_dev>
}
};
struct check_all_apply_ker_sort
{
template<typename vector_type> static void check(vector_type & vd)
{
auto vA = getV_sort<A>(vd);
auto vC = getV_sort<C>(vd);
auto vVA = getV_sort<VA>(vd);
auto vVB = getV_sort<VB>(vd);
auto vVC = getV_sort<VC>(vd);
// fill vd with some value
fill_values<comp_dev>(vd);
vd.map(RUN_ON_DEVICE);
vd.template ghost_get<0,1,2,3,4,5,6>(RUN_ON_DEVICE);
vd.template deviceToHostProp<0,1,2,3,4,5,6>();
vd.deviceToHostPos();
vector_dist_op_ap_ker_impl_sort(vd,vA,vC,vVA,vVB,vVC,RUN_ON_DEVICE);
}
};
#endif /* VECTOR_DIST_OPERATORS_TESTS_UTIL_HPP_ */
......@@ -13,6 +13,7 @@
#include "Operators/Vector/vector_dist_operators.hpp"
#include "Operators/Vector/tests/vector_dist_operators_tests_util.hpp"
BOOST_AUTO_TEST_SUITE( vector_dist_operators_apply_kernel_test_cpu )
BOOST_AUTO_TEST_CASE( vector_dist_operators_apply_kernel_test )
{
......@@ -32,4 +33,6 @@ BOOST_AUTO_TEST_CASE( vector_dist_operators_apply_kernel_test )
check_all_apply_ker<comp_host>::check(vd);
}
BOOST_AUTO_TEST_SUITE_END()
......@@ -13,6 +13,8 @@
#include "Operators/Vector/vector_dist_operators.hpp"
#include "Operators/Vector/tests/vector_dist_operators_tests_util.hpp"
BOOST_AUTO_TEST_SUITE( vector_dist_operators_apply_kernel_test_gpu )
BOOST_AUTO_TEST_CASE( vector_dist_operators_apply_host_gpu_test )
{
if (create_vcluster().getProcessingUnits() > 3)
......@@ -50,3 +52,23 @@ BOOST_AUTO_TEST_CASE( vector_dist_operators_apply_kernel_gpu_test )
check_all_apply_ker<comp_dev>::check(vd);
}
BOOST_AUTO_TEST_CASE( vector_dist_operators_apply_kernel_gpu_sort_test )
{
if (create_vcluster().getProcessingUnits() > 3)
return;
Box<3,float> box({0.0,0.0,0.0},{1.0,1.0,1.0});
// Boundary conditions
size_t bc[3]={PERIODIC,PERIODIC,PERIODIC};
// ghost
Ghost<3,float> ghost(0.05);
vector_dist_gpu<3,float,aggregate<float,float,float,VectorS<3,float>,VectorS<3,float>,VectorS<3,float>,float>> vd(512,box,bc,ghost);
check_all_apply_ker_sort::check(vd);
}
BOOST_AUTO_TEST_SUITE_END()
......@@ -39,6 +39,12 @@ public:
typedef void vtype;
//! result for is sort
typedef boost::mpl::bool_<false> is_sort;
//! NN_type
typedef void NN_type;
//! vector expression from a constant point
vector_dist_expression(point p)
:p(p)
......
......@@ -32,10 +32,19 @@ public:\
typedef typename exp1::is_ker is_ker;\
\
typedef typename vector_result<typename exp1::vtype,void>::type vtype;\
\
typedef typename vector_is_sort_result<exp1::is_sort::value,false>::type is_sort;\
\
typedef typename nn_type_result<typename exp1::NN_type,void>::type NN_type;\
\
vector_dist_expression_op(const exp1 & o1)\
:o1(o1)\
{}\
\
inline NN_type * getNN()\
{\
return nn_type_result<typename exp1::NN_type,void>::getNN(o1);\
}\
\
const vtype & getVector()\
{\
......@@ -143,6 +152,15 @@ public:\
typedef std::integral_constant<bool,exp1::is_ker::value || exp1::is_ker::value> is_ker;\
\
typedef typename vector_result<typename exp1::vtype,typename exp2::vtype>::type vtype;\
\
typedef typename vector_is_sort_result<exp1::is_sort::value,exp2::is_sort::value>::type is_sort;\
\
typedef typename nn_type_result<typename exp1::NN_type,typename exp2::NN_type>::type NN_type;\
\
inline NN_type * getNN()\
{\
return nn_type_result<typename exp1::NN_type,typename exp2::NN_type>::getNN(o1,o2);\
}\
\
vtype & getVector()\
{\
......@@ -243,7 +261,7 @@ CREATE_VDIST_ARG2_FUNC(pmul,pmul,VECT_PMUL)
////////// Special function reduce /////////////////////////
template<typename val_type, bool is_scalar = is_Point<val_type>::type::value>
template<typename val_type, bool is_sort, bool is_scalar = is_Point<val_type>::type::value>
struct point_scalar_process
{
typedef aggregate<val_type> type;
......@@ -253,9 +271,12 @@ struct point_scalar_process
{
#ifdef __NVCC__
auto ite = ve.getGPUIterator(256);
// auto ite = ve.getGPUIterator(256);
compute_expr_ker_v<0><<<ite.wthr,ite.thr>>>(ve.toKernel(),o1);
// compute_expr_ker_v<0><<<ite.wthr,ite.thr>>>(ve.toKernel(),o1);
auto vek = ve.toKernel();
vector_dist_op_compute_op<0,is_sort,comp_dev>::compute_expr_v(vek,o1);
exp_tmp2.resize(sizeof(val_type));
......@@ -272,8 +293,8 @@ struct point_scalar_process
}
};
template<typename val_type>
struct point_scalar_process<val_type,true>
template<typename val_type, bool is_sort>
struct point_scalar_process<val_type,is_sort,true>
{
typedef val_type type;
......@@ -282,9 +303,12 @@ struct point_scalar_process<val_type,true>
{
#ifdef __NVCC__
auto ite = ve.getGPUIterator(256);
// auto ite = ve.getGPUIterator(256);
// compute_expr_ker_vv<0,val_type::dims><<<ite.wthr,ite.thr>>>(ve.toKernel(),o1);
compute_expr_ker_vv<0,val_type::dims><<<ite.wthr,ite.thr>>>(ve.toKernel(),o1);
auto vek = ve.toKernel();
vector_dist_op_compute_op<0,is_sort,comp_dev>::template compute_expr_vv<val_type::dims>(vek,o1);
exp_tmp2.resize(sizeof(val_type));
......@@ -340,6 +364,12 @@ public:
//! return the vector type on which this expression operate
typedef typename vector_result<typename exp1::vtype,void>::type vtype;
//! result for is sort
typedef typename vector_is_sort_result<exp1::is_sort::value,false>::type is_sort;
//! NN_type
typedef typename nn_type_result<typename exp1::NN_type,void>::type NN_type;
//! constructor from an epxression exp1 and a vector vd
vector_dist_expression_op(const exp1 & o1)
:o1(o1),val(0)
......@@ -357,7 +387,11 @@ public:
// we have to do it on GPU
openfpm::vector<typename point_scalar_process<val_type>::type,CudaMemory,typename memory_traits_inte<typename point_scalar_process<val_type>::type>::type,memory_traits_inte,openfpm::grow_policy_identity> ve;
openfpm::vector<typename point_scalar_process<val_type,is_sort::value>::type,
CudaMemory,
typename memory_traits_inte<typename point_scalar_process<val_type,is_sort::value>::type>::type,
memory_traits_inte,
openfpm::grow_policy_identity> ve;
auto & orig_v = o1.getVector();
......@@ -367,7 +401,7 @@ public:
ve.setMemory(exp_tmp);
ve.resize(orig_v.size_local());
point_scalar_process<val_type>::process(val,ve,o1);
point_scalar_process<val_type,is_sort::value>::process(val,ve,o1);
#else
std::cout << __FILE__ << ":" << __LINE__ << " error, to use expression on GPU you must compile with nvcc compiler " << std::endl;
#endif
......@@ -393,6 +427,16 @@ public:
}
}
/*! \brief get the NN object
*
* \return the NN object
*
*/
inline NN_type * getNN() const
{
return nn_type_result<typename exp1::NN_type,void>::getNN(o1);
}
//! it return the result of the expression
inline typename std::remove_reference<rtype>::type get()
{
......
......@@ -28,21 +28,20 @@ BOOST_AUTO_TEST_CASE(vector_dist_operators_list_ker_test)
vector_dist_ker_list<vector_dist_kernel> & vdkl = vd.private_get_vector_dist_ker_list();
auto vdk = vd.toKernel();
BOOST_REQUIRE_EQUAL(vdkl.n_entry(),0);
{
auto vA = getV<A>(vd,vdk);
auto vA = getV<A,comp_dev>(vd);
BOOST_REQUIRE_EQUAL(vdkl.n_entry(),1);
{
auto vB = getV<B>(vd,vdk);
auto vC = getV<C>(vd,vdk);
auto vB = getV<B,comp_dev>(vd);
auto vC = getV<C,comp_dev>(vd);
auto vVA = getV<VA>(vd,vdk);
auto vVB = getV<VB>(vd,vdk);
auto vVC = getV<VC>(vd,vdk);
auto vVA = getV<VA,comp_dev>(vd);
auto vVB = getV<VB,comp_dev>(vd);
auto vVC = getV<VC,comp_dev>(vd);
BOOST_REQUIRE_EQUAL(vdkl.n_entry(),6);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment