diff --git a/CMakeLists.txt b/CMakeLists.txt index 26b87bc7452a83fcb6c777b8afbf836ce913f394..30a8bc6e99b57166ba128ebd439a09434f60efdb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,13 +1,11 @@ cmake_minimum_required(VERSION 3.8 FATAL_ERROR) project(openfpm_pdata LANGUAGES C CXX) -enable_testing() - -list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_DIR}/cmake_modules/) - if (POLICY CMP0074) - cmake_policy(SET CMP0074 NEW) -endif() + cmake_policy(SET CMP0074 OLD) +endif () + +list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_DIR}/../cmake_modules/) set(BOOST_INCLUDE ${Boost_INCLUDE_DIR} CACHE PATH "Include directory for BOOST") set(PETSC_ROOT CACHE PATH "If compiling with linear algebra indicate the PETSC root directory") @@ -43,29 +41,28 @@ set(ENV{PATH} "$ENV{PATH}:${HDF5_ROOT}/bin") set(HDF5_PREFER_PARALLEL TRUE) if(ENABLE_GPU) - enable_language(CUDA) - find_package(CUDA) + enable_language(CUDA) + find_package(CUDA) if (CUDA_VERSION_MAJOR EQUAL 9 AND CUDA_VERSION_MINOR EQUAL 2) message("CUDA is compatible") set(WARNING_SUPPRESSION_AND_OPTION_NVCC -Xcudafe "--display_error_number --diag_suppress=611 --diag_suppress=2885 --diag_suppress=2886 --diag_suppress=2887 --diag_suppress=2888 --diag_suppress=186 --diag_suppress=111" --expt-extended-lambda) + FILE(WRITE cuda_options " -Xcudafe \"--display_error_number --diag_suppress=611 --diag_suppress=2885 --diag_suppress=2886 --diag_suppress=2887 --diag_suppress=2888 --diag_suppress=186 --diag_suppress=111\" --expt-extended-lambda ") elseif ( CUDA_VERSION_MAJOR EQUAL 10 AND CUDA_VERSION_MINOR EQUAL 1 ) message("CUDA is compatible") - set(WARNING_SUPPRESSION_AND_OPTION_NVCC -Xcudafe "--display_error_number --diag_suppress=611 --diag_suppress=2885 --diag_suppress=2886 --diag_suppress=2887 --diag_suppress=2888 --diag_suppress=186 --diag_suppress=111 --diag_suppress=2928 --diag_suppress=2931 --diag_suppress=2929 --diag_suppress=2930" --expt-extended-lambda) + set(WARNING_SUPPRESSION_AND_OPTION_NVCC -Xcudafe "--display_error_number --diag_suppress=2915 --diag_suppress=2914 --diag_suppress=2912 --diag_suppress=2913 --diag_suppress=111 --diag_suppress=186 --diag_suppress=611 " --expt-extended-lambda ) + FILE(WRITE cuda_options "-Xcudafe \"--display_error_number --diag_suppress=2915 --diag_suppress=2914 --diag_suppress=2912 --diag_suppress=2913 --diag_suppress=111 --diag_suppress=186 --diag_suppress=611 \" --expt-extended-lambda") elseif ( CUDA_VERSION_MAJOR EQUAL 10 AND CUDA_VERSION_MINOR EQUAL 2 ) message("CUDA is compatible") - set(WARNING_SUPPRESSION_AND_OPTION_NVCC -Xcudafe "--display_error_number --diag_suppress=2976 --diag_suppress=2977 --diag_suppress=2978 --diag_suppress=2979 --diag_suppress=1835 --diag_suppress=611 --diag_suppress=186 --diag_suppress=128" --expt-extended-lambda) - set(WARNING_SUPPRESSION_AND_OPTION_NVCC_TEXT "-Xcudafe \"--display_error_number --diag_suppress=2976 --diag_suppress=2977 --diag_suppress=2978 --diag_suppress=2979 --diag_suppress=1835 --diag_suppress=611 --diag_suppress=186 --diag_suppress=128\" --expt-extended-lambda") + set(WARNING_SUPPRESSION_AND_OPTION_NVCC -Xcudafe "--display_error_number --diag_suppress=2976 --diag_suppress=2977 --diag_suppress=2979 --diag_suppress=186" --expt-extended-lambda) else() - message(FATAL_ERROR "CUDA is incompatible, version 9.2 is only supported") + message(FATAL_ERROR "CUDA is incompatible, version 9.2 and 10.1 is only supported") endif() - - endif() -find_package(Boost 1.68.0 COMPONENTS unit_test_framework iostreams program_options) -find_package(MPI) +find_package(Boost 1.72.0 REQUIRED COMPONENTS unit_test_framework iostreams program_options) +find_package(MPI REQUIRED) find_package(PETSc) find_package(HDF5) find_package(Eigen3) @@ -137,6 +134,10 @@ else() message( FATAL_ERROR "MPI is required in order to install OpenFPM" ) endif() +if(PETSC_FOUND) + set(DEFINE_HAVE_PETSC "#define HAVE_PETSC") +endif() + if (Boost_FOUND) set(DEFINE_HAVE_BOOST "#define HAVE_BOOST") set(DEFINE_HAVE_BOOST_IOSTREAMS "#define HAVE_BOOST_IOSTREAMS") @@ -163,11 +164,20 @@ if(EIGEN3_FOUND) set(DEFINE_HAVE_EIGEN "#define HAVE_EIGEN") endif() + +if(EIGEN3_FOUND) + set(DEFINE_HAVE_EIGEN "#define HAVE_EIGEN") +endif() + if(LIBHILBERT_FOUND) - set(DEFINE_HAVE_LIBHILBERT "#define HAVE_LIBHILBERT 1") + set(DEFINE_HAVE_LIBHILBERT "#define HAVE_LIBHILBERT 1") else() - file(WRITE error_code "210") - message( FATAL_ERROR "LibHilbert is required in order to install OpenFPM") + file(WRITE error_code "210") + message( FATAL_ERROR "LibHilbert is required in order to install OpenFPM") +endif() + +if(SUITESPARSE_FOUND AND SuiteSparse_UMFPACK_FOUND) + set(DEFINE_HAVE_SUITESPARSE "#define HAVE_SUITESPARSE") endif() if(SUITESPARSE_FOUND AND SuiteSparse_UMFPACK_FOUND) diff --git a/run.sh b/run.sh index 4c12da58244a9423c4fecb37b488ade6c14adba7..3a89ffd91f60673986531980885855179d53fd5c 100755 --- a/run.sh +++ b/run.sh @@ -18,6 +18,7 @@ echo "RUN numerics test" cd .. source $HOME/openfpm_vars_$branch + cd openfpm_numerics mpirun -np $3 ../build/openfpm_numerics/src/numerics diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index afcedf063889fc0fff8c1bca74d0474c72fbf193..e161c5d69fbf09fc8509623920ed2e8b564147a1 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -2,8 +2,29 @@ cmake_minimum_required(VERSION 3.8 FATAL_ERROR) ########################### Executables -add_executable(numerics main.cpp Matrix/SparseMatrix_unit_tests.cpp interpolation/interpolation_unit_tests.cpp Vector/Vector_unit_tests.cpp Solvers/petsc_solver_unit_tests.cpp FiniteDifference/FDScheme_unit_tests.cpp FiniteDifference/eq_unit_test_3d.cpp FiniteDifference/eq_unit_test.cpp Operators/Vector/vector_dist_operators_unit_tests.cpp ../../openfpm_vcluster/src/VCluster/VCluster.cpp ../../openfpm_devices/src/memory/CudaMemory.cu ../../openfpm_devices/src/memory/HeapMemory.cpp ../../openfpm_devices/src/memory/PtrMemory.cpp ../../openfpm_devices/src/Memleak_check.cpp) +if (CUDA_FOUND) + set(CUDA_SOURCES Operators/Vector/vector_dist_operators_unit_tests.cu + Operators/Vector/vector_dist_operators_apply_kernel_unit_tests.cu) +endif() + +add_executable(numerics ${OPENFPM_INIT_FILE} ${CUDA_SOURCES} + main.cpp + Matrix/SparseMatrix_unit_tests.cpp + interpolation/interpolation_unit_tests.cpp + Vector/Vector_unit_tests.cpp + Solvers/petsc_solver_unit_tests.cpp + FiniteDifference/FDScheme_unit_tests.cpp + FiniteDifference/eq_unit_test_3d.cpp + FiniteDifference/eq_unit_test.cpp + Operators/Vector/vector_dist_operators_unit_tests.cpp + Operators/Vector/vector_dist_operators_apply_kernel_unit_tests.cpp + ../../openfpm_vcluster/src/VCluster/VCluster.cpp + ../../openfpm_devices/src/memory/CudaMemory.cu + ../../openfpm_devices/src/memory/HeapMemory.cpp + ../../openfpm_devices/src/memory/PtrMemory.cpp + ../../openfpm_devices/src/Memleak_check.cpp + ../../src/lib/pdata.cpp) ########################### @@ -16,9 +37,12 @@ endif() if(CUDA_FOUND) target_compile_options(numerics PUBLIC $<$<COMPILE_LANGUAGE:CUDA>: ${WARNING_SUPPRESSION_AND_OPTION_NVCC} >) +<<<<<<< HEAD if (TEST_COVERAGE) target_compile_options(numerics PRIVATE $<$<COMPILE_LANGUAGE:CUDA>: -Xcompiler "-fprofile-arcs -ftest-coverage" >) endif() +======= +>>>>>>> sparse_cl endif() target_include_directories (numerics PUBLIC ${CUDA_INCLUDE_DIRS}) @@ -34,14 +58,22 @@ target_include_directories (numerics PUBLIC ${METIS_ROOT}/include) target_include_directories (numerics PUBLIC ${HDF5_ROOT}/include) target_include_directories (numerics PUBLIC ${LIBHILBERT_INCLUDE_DIRS}) target_include_directories (numerics PUBLIC ${Boost_INCLUDE_DIRS}) +target_include_directories (numerics PUBLIC ${Vc_INCLUDE_DIR}) if(EIGEN3_FOUND) target_include_directories (numerics PUBLIC ${EIGEN3_INCLUDE_DIR}) endif() +link_directories(${PARMETIS_ROOT} ${METIS_ROOT}) target_link_libraries(numerics ${Boost_LIBRARIES}) +<<<<<<< HEAD target_link_libraries(numerics -L${PARMETIS_ROOT}/lib parmetis) target_link_libraries(numerics ${HDF5_LIBRARIES}) +======= +target_link_libraries(numerics ${PARMETIS_LIBRARIES}) +target_link_libraries(numerics -L${HDF5_ROOT}/lib hdf5 hdf5_hl) +>>>>>>> sparse_cl target_link_libraries(numerics -L${LIBHILBERT_LIBRARY_DIRS} ${LIBHILBERT_LIBRARIES}) +target_link_libraries(numerics ${Vc_LIBRARIES}) if(PETSC_FOUND) target_include_directories (numerics PUBLIC ${PETSC_INCLUDES}) target_link_libraries(numerics ${PETSC_LIBRARIES}) diff --git a/src/FiniteDifference/eq_unit_test_3d.cpp b/src/FiniteDifference/eq_unit_test_3d.cpp index 9456a8e87af08fccb179110086d2a425bf0c1054..760b276b25960183fcc0f032c47999ca2678fa4f 100644 --- a/src/FiniteDifference/eq_unit_test_3d.cpp +++ b/src/FiniteDifference/eq_unit_test_3d.cpp @@ -214,7 +214,12 @@ template<typename solver_type,typename lid_nn_3d> void lid_driven_cavity_3d() #else - #if __GNUC__ == 7 + #if __GNUC__ == 8 + + std::string file1 = std::string("test/") + s + "lid_driven_cavity_3d_p" + std::to_string(v_cl.getProcessingUnits()) + "_grid_" + std::to_string(v_cl.getProcessUnitID()) + "_test_GCC8.vtk"; + std::string file2 = s + "lid_driven_cavity_3d_p" + std::to_string(v_cl.getProcessingUnits()) + "_grid_" + std::to_string(v_cl.getProcessUnitID()) + ".vtk"; + + #elif __GNUC__ == 7 std::string file1 = std::string("test/") + s + "lid_driven_cavity_3d_p" + std::to_string(v_cl.getProcessingUnits()) + "_grid_" + std::to_string(v_cl.getProcessUnitID()) + "_test_GCC7.vtk"; std::string file2 = s + "lid_driven_cavity_3d_p" + std::to_string(v_cl.getProcessingUnits()) + "_grid_" + std::to_string(v_cl.getProcessUnitID()) + ".vtk"; diff --git a/src/Operators/Vector/cuda/vector_dist_operators_cuda.cuh b/src/Operators/Vector/cuda/vector_dist_operators_cuda.cuh new file mode 100644 index 0000000000000000000000000000000000000000..22fab49438e1918939b2aecd66bfbd75407caaec --- /dev/null +++ b/src/Operators/Vector/cuda/vector_dist_operators_cuda.cuh @@ -0,0 +1,369 @@ +/* + * vector_dist_operators_cuda.cuh + * + * Created on: May 31, 2019 + * Author: i-bird + */ + +#ifndef VECTOR_DIST_OPERATORS_CUDA_CUH_ +#define VECTOR_DIST_OPERATORS_CUDA_CUH_ + +constexpr unsigned int PROP_POS =(unsigned int)-1; + +/*! \brief selector for position or properties left side expression + * + * \tparam vector type of the original vector + * + * \tparam prp property id + * + */ +template <typename vector, unsigned int prp> +struct pos_or_propL +{ + //! return the value (position or property) of the particle k in the vector v + static inline auto value(vector & v, const vect_dist_key_dx & k) -> decltype(v.template getProp<prp>(k)) + { + return v.template getProp<prp>(k); + } +}; + +/*! \brief selector for position or properties left side expression + * + * \tparam vector type of the original vector + * + * \tparam prp property id + * + */ +template <typename vector, unsigned int prp> +struct pos_or_propL_ker +{ + //! return the value (position or property) of the particle k in the vector v + __device__ static inline auto value(vector & v, const unsigned int & k) -> decltype(v.template getProp<prp>(k)) + { + return v.template getProp<prp>(k); + } +}; + + +/*! \brief selector for position or properties left side + * + * \tparam vector type of the original vector + * + * \tparam prp property id + * + */ +template <typename vector> +struct pos_or_propL<vector,PROP_POS> +{ +#ifdef SE_CLASS3 + + //! return the value (position or property) of the particle k in the vector v + static inline auto value(vector & v, const vect_dist_key_dx & k) -> decltype(getExprL(v.getPos(k).getReference())) + { + return getExprL(v.getPos(k).getReference()); + } + +#else + + //! return the value (position or property) of the particle k in the vector v + static inline auto value(vector & v, const vect_dist_key_dx & k) -> decltype(ger<vector::dims,typename vector::stype>::getExprL(v.getPos(k))) + { + return ger<vector::dims,typename vector::stype>::getExprL(v.getPos(k)); + } + +#endif +}; + +/*! \brief selector for position or properties left side + * + * \tparam vector type of the original vector + * + * \tparam prp property id + * + */ +template <typename vector> +struct pos_or_propL_ker<vector,PROP_POS> +{ +#ifdef SE_CLASS3 + + //! return the value (position or property) of the particle k in the vector v + static inline auto value(vector & v, const vect_dist_key_dx & k) -> decltype(getExprL(v.getPos(k).getReference())) + { + return getExprL(v.getPos(k).getReference()); + } + +#else + + //! return the value (position or property) of the particle k in the vector v + __device__ static inline auto value(vector & v, const unsigned int & k) -> decltype(ger<vector::dims,typename vector::stype>::getExprL(v.getPos(k))) + { + return ger<vector::dims,typename vector::stype>::getExprL(v.getPos(k)); + } + +#endif +}; + +/*! \brief selector for position or properties right side position + * + * \tparam vector type of the original vector + * + * \tparam prp property id + * + */ +template <typename vector, unsigned int prp> +struct pos_or_propR +{ + //! return the value (position or property) of the particle k in the vector v + __device__ __host__ static inline auto value(vector & v, const vect_dist_key_dx & k) -> decltype(v.template getProp<prp>(k)) + { + return v.template getProp<prp>(k); + } + + //! return the value (position or property) of the particle k in the vector v + __device__ __host__ static inline auto value(vector & v, const unsigned int & k) -> decltype(v.template getProp<prp>(k)) + { + return v.template getProp<prp>(k); + } +}; + + +/*! \brief selector for position or properties right side + * + * \tparam vector type of the original vector + * + * \tparam prp property id + * + */ +template <typename vector> +struct pos_or_propR<vector,PROP_POS> +{ + //! return the value (position or property) of the particle k in the vector v + __device__ __host__ static inline auto value(vector & v, const vect_dist_key_dx & k) -> decltype(ger<vector::dims,typename vector::stype>::getExprR(v.getPos(k))) + { + return ger<vector::dims,typename vector::stype>::getExprR(v.getPos(k)); + } + + //! return the value (position or property) of the particle k in the vector v + __device__ __host__ static inline auto value(vector & v, const unsigned int & k) -> decltype(ger<vector::dims,typename vector::stype>::getExprR(v.getPos(k))) + { + return ger<vector::dims,typename vector::stype>::getExprR(v.getPos(k)); + } +}; + +template<unsigned int prp ,bool is_sort, int impl> +struct vector_dist_op_compute_op +{ + template<typename vector, typename expr> + static void compute_expr(vector & v,expr & v_exp) + {} + + template<typename vector> + static void compute_const(vector & v,double d) + {} +}; + +template<unsigned int prp> +struct vector_dist_op_compute_op<prp,false,comp_host> +{ + template<typename vector, typename expr> + static void compute_expr(vector & v,expr & v_exp) + { + v_exp.init(); + + auto it = v.getDomainIterator(); + + while (it.isNext()) + { + auto key = it.get(); + + pos_or_propL<vector,prp>::value(v,key) = v_exp.value(key); + + ++it; + } + } + + template<typename vector> + static void compute_const(vector & v,double d) + { + auto it = v.getDomainIterator(); + + while (it.isNext()) + { + auto key = it.get(); + + pos_or_propL<vector,prp>::value(v,key) = d; + + ++it; + } + } +}; + +#ifdef __NVCC__ + +template<unsigned int prp, unsigned int dim ,typename vector, typename expr> +__global__ void compute_expr_ker_vv(vector vd, expr v_exp) +{ + int p = threadIdx.x + blockIdx.x * blockDim.x; + + if (p >= vd.size()) {return;} + + for (unsigned int i = 0 ; i < dim ; i++) + { + vd.template get<prp>(p)[i] = v_exp.value(p).get(i); + } +} + +template<unsigned int prp, typename vector, typename expr> +__global__ void compute_expr_ker_v(vector vd, expr v_exp) +{ + int p = threadIdx.x + blockIdx.x * blockDim.x; + + if (p >= vd.size()) {return;} + + vd.template get<prp>(p) = v_exp.value(p); +} + +template<unsigned int prp, typename vector, typename expr> +__global__ void compute_expr_ker(vector vd, expr v_exp) +{ + int p = threadIdx.x + blockIdx.x * blockDim.x; + + if (p >= vd.size_local()) {return;} + + pos_or_propL_ker<vector,prp>::value(vd,p) = v_exp.value(p); +} + +template<unsigned int prp, typename vector> +__global__ void compute_double_ker(vector vd, double d) +{ + int p = threadIdx.x + blockIdx.x * blockDim.x; + + if (p >= vd.size_local()) {return;} + + pos_or_propL_ker<vector,prp>::value(vd,p) = d; +} + +/////////// SORTED VERSION // + +template<unsigned int prp, unsigned int dim ,typename vector, typename NN_type, typename expr> +__global__ void compute_expr_ker_sort_vv(vector vd, NN_type NN, expr v_exp) +{ + int p; + + GET_PARTICLE_SORT(p,NN); + + for (unsigned int i = 0 ; i < dim ; i++) + { + vd.template get<prp>(p)[i] = v_exp.value(p).get(i); + } +} + +template<unsigned int prp, typename vector, typename NN_type, typename expr> +__global__ void compute_expr_ker_sort_v(vector vd, NN_type NN, expr v_exp) +{ + int p; + + GET_PARTICLE_SORT(p,NN); + + vd.template get<prp>(p) = v_exp.value(p); +} + +template<unsigned int prp, typename vector, typename expr, typename NN_type> +__global__ void compute_expr_ker_sort(vector vd, NN_type NN, expr v_exp) +{ + int p; + + GET_PARTICLE_SORT(p,NN); + + pos_or_propL_ker<vector,prp>::value(vd,p) = v_exp.value(p); +} + + +///////////////////////////// + +template<unsigned int prp> +struct vector_dist_op_compute_op<prp,false,comp_dev> +{ + template<typename vector, typename expr> + static void compute_expr(vector & v,expr & v_exp) + { + v_exp.init(); + + auto ite = v.getDomainIteratorGPU(256); + + compute_expr_ker<prp><<<ite.wthr,ite.thr>>>(v,v_exp); + } + + template<typename vector, typename expr> + static void compute_expr_v(vector & v,expr & v_exp) + { + v_exp.init(); + + auto ite = v.getGPUIterator(256); + + compute_expr_ker_v<prp><<<ite.wthr,ite.thr>>>(v,v_exp); + } + + template<unsigned int dim, typename vector, typename expr> + static void compute_expr_vv(vector & v,expr & v_exp) + { + v_exp.init(); + + auto ite = v.getGPUIterator(256); + + compute_expr_ker_vv<prp,dim><<<ite.wthr,ite.thr>>>(v,v_exp); + } + + template<typename vector> + static void compute_const(vector & v,double d) + { + auto ite = v.getDomainIteratorGPU(256); + + compute_double_ker<prp><<<ite.wthr,ite.thr>>>(v,d); + } +}; + +template<unsigned int prp> +struct vector_dist_op_compute_op<prp,true,comp_dev> +{ + template<typename vector, typename expr> + static void compute_expr(vector & v,expr & v_exp) + { + v_exp.init(); + + auto ite = v.getDomainIteratorGPU(256); + + auto NN = v_exp.getNN(); + + compute_expr_ker_sort<prp><<<ite.wthr,ite.thr>>>(v,*NN,v_exp); + } + + template<typename vector, typename expr> + static void compute_expr_v(vector & v,expr & v_exp) + { + v_exp.init(); + + auto ite = v.getGPUIterator(256); + + auto NN = v_exp.getNN(); + + compute_expr_ker_sort_v<prp><<<ite.wthr,ite.thr>>>(v,*NN,v_exp); + } + + template<unsigned int dim, typename vector, typename expr> + static void compute_expr_vv(vector & v,expr & v_exp) + { + v_exp.init(); + + auto ite = v.getGPUIterator(256); + + auto NN = v_exp.getNN(); + + compute_expr_ker_sort_vv<prp,dim><<<ite.wthr,ite.thr>>>(v,*NN,v_exp); + } +}; + +#endif + + +#endif /* VECTOR_DIST_OPERATORS_CUDA_CUH_ */ diff --git a/src/Operators/Vector/tests/vector_dist_operators_tests_util.hpp b/src/Operators/Vector/tests/vector_dist_operators_tests_util.hpp new file mode 100644 index 0000000000000000000000000000000000000000..9c3cb7afff400f9c91309b1a41cf86a371fab208 --- /dev/null +++ b/src/Operators/Vector/tests/vector_dist_operators_tests_util.hpp @@ -0,0 +1,1898 @@ +/* + * vector_dist_operators_tests_util.hpp + * + * Created on: May 31, 2019 + * Author: i-bird + */ + +#ifndef VECTOR_DIST_OPERATORS_TESTS_UTIL_HPP_ +#define VECTOR_DIST_OPERATORS_TESTS_UTIL_HPP_ + +#include "Operators/Vector/vector_dist_operators.hpp" +#include "Space/Shape/Point.hpp" + +constexpr int A = 0; +constexpr int B = 1; +constexpr int C = 2; + +constexpr int VA = 3; +constexpr int VB = 4; +constexpr int VC = 5; + +constexpr int TA = 6; + +//////////////////// Here we define all the function to checl the operators + +template <unsigned int prp, unsigned int impl, typename vector> +bool check_values(vector & v,float a) +{ + if (impl == comp_dev) + {v.template deviceToHostProp<prp>();} + + bool ret = true; + auto it = v.getDomainIterator(); + + while (it.isNext()) + { + ret &= v.template getProp<prp>(it.get()) == a; + + ++it; + } + + BOOST_REQUIRE_EQUAL(ret,true); + + return ret; +} + +template <unsigned int impl, typename vector> +bool check_values_complex_expr(vector & vd) +{ + if (impl == comp_dev) + {vd.template deviceToHostProp<A,B,C>();} + + bool ret = true; + auto it = vd.getDomainIterator(); + + while (it.isNext()) + { + auto key = it.get(); + + float base1 = vd.template getProp<B>(key) + 2.0 + vd.template getProp<B>(key) - 2.0*vd.template getProp<C>(key) / 5.0; + float base2 = vd.template getProp<A>(key); + + ret &= base1 == base2; + + ++it; + } + + BOOST_REQUIRE_EQUAL(ret,true); + + return ret; +} + +template <typename rtype, typename vector, unsigned int A, unsigned int B, unsigned int C, unsigned int impl> +bool check_values_pos_sum(vector & vd, const rtype & p) +{ + if (impl == comp_dev) + {vd.template deviceToHostProp<A>();} + + bool ret = true; + auto it = vd.getDomainIterator(); + + while (it.isNext()) + { + auto key = it.get(); + + Point<vector::dims,typename vector::stype> xp = vd.getPos(key); + + rtype base1 = rtype(xp) + p; + rtype base2 = vd.template getProp<A>(key); + + ret &= base1 == base2; + + ++it; + } + + BOOST_REQUIRE_EQUAL(ret,true); + + return ret; +} + +template <typename rtype, typename vector, unsigned int A, unsigned int B, unsigned int C, unsigned int impl> +bool check_values_pos_sub(vector & vd, const rtype & p) +{ + if (impl == comp_dev) + {vd.template deviceToHostProp<A>();} + + bool ret = true; + auto it = vd.getDomainIterator(); + + while (it.isNext()) + { + auto key = it.get(); + + Point<vector::dims,typename vector::stype> xp = vd.getPos(key); + + rtype base1 = rtype(xp) - p; + rtype base2 = vd.template getProp<A>(key); + + ret &= base1 == base2; + + ++it; + } + + BOOST_REQUIRE_EQUAL(ret,true); + + return ret; +} + +template <typename rtype, typename vector, unsigned int A, unsigned int B, unsigned int C, unsigned int impl> +bool check_values_pos_sub_minus(vector & vd, const rtype & p) +{ + if (impl == comp_dev) + {vd.template deviceToHostProp<A>();} + + bool ret = true; + auto it = vd.getDomainIterator(); + + while (it.isNext()) + { + auto key = it.get(); + + Point<vector::dims,typename vector::stype> xp = vd.getPos(key); + + rtype base1 = -(rtype(xp) - p); + rtype base2 = vd.template getProp<A>(key); + + ret &= base1 == base2; + + ++it; + } + + BOOST_REQUIRE_EQUAL(ret,true); + + return ret; +} + +template <typename rtype, typename vector, unsigned int A, unsigned int B, unsigned int C, unsigned int impl> +bool check_values_point_sub(vector & vd, const rtype & p) +{ + if (impl == comp_dev) + {vd.template deviceToHostProp<A,B>();} + + bool ret = true; + auto it = vd.getDomainIterator(); + + while (it.isNext()) + { + auto key = it.get(); + + rtype base1 = -vd.template getProp<B>(key); + rtype base2 = vd.template getProp<A>(key); + + ret &= base1 == base2; + + ++it; + } + + BOOST_REQUIRE_EQUAL(ret,true); + + return ret; +} + +template <typename rtype, typename vector, unsigned int A, unsigned int B, unsigned int C, unsigned int impl> +bool check_values_sum(vector & vd, double d) +{ + if (impl == comp_dev) + {vd.template deviceToHostProp<A,B>();} + + bool ret = true; + auto it = vd.getDomainIterator(); + + while (it.isNext()) + { + auto key = it.get(); + + rtype base1 = vd.template getProp<B>(key) + d; + rtype base2 = vd.template getProp<A>(key); + + ret &= base1 == base2; + + ++it; + } + + BOOST_REQUIRE_EQUAL(ret,true); + + return ret; +} + +template <typename rtype, typename vector, unsigned int A, unsigned int B, unsigned int C, unsigned int impl> +bool check_values_sum(vector & vd1, vector & vd2) +{ + if (impl == comp_dev) + { + vd1.template deviceToHostProp<A,B>(); + vd2.template deviceToHostProp<C>(); + } + + bool ret = true; + auto it = vd1.getDomainIterator(); + + while (it.isNext()) + { + auto key = it.get(); + + rtype base1 = vd1.template getProp<B>(key) + vd2.template getProp<C>(key); + rtype base2 = vd1.template getProp<A>(key); + + ret &= base1 == base2; + + ++it; + } + + BOOST_REQUIRE_EQUAL(ret,true); + + return ret; +} + +template <typename rtype, typename vector, unsigned int A, unsigned int B, unsigned int C, unsigned int impl> +bool check_values_sum_3(vector & vd1) +{ + if (impl == comp_dev) + {vd1.template deviceToHostProp<A,B>();} + + bool ret = true; + auto it = vd1.getDomainIterator(); + + while (it.isNext()) + { + auto key = it.get(); + + rtype base1 = vd1.template getProp<B>(key) + vd1.template getProp<C>(key) + vd1.template getProp<B>(key); + rtype base2 = vd1.template getProp<A>(key); + + ret &= base1 == base2; + + ++it; + } + + BOOST_REQUIRE_EQUAL(ret,true); + + return ret; +} + +template <typename rtype, typename vector, unsigned int A, unsigned int B, unsigned int C, unsigned int impl> +bool check_values_sum_4(vector & vd1) +{ + if (impl == comp_dev) + {vd1.template deviceToHostProp<A,B,C>();} + + bool ret = true; + auto it = vd1.getDomainIterator(); + + while (it.isNext()) + { + auto key = it.get(); + + rtype base1 = (vd1.template getProp<B>(key) + vd1.template getProp<C>(key)) + (vd1.template getProp<B>(key) + vd1.template getProp<C>(key)); + rtype base2 = vd1.template getProp<A>(key); + + + ret &= base1 == base2; + + ++it; + } + + BOOST_REQUIRE_EQUAL(ret,true); + + return ret; +} + +template <typename rtype, typename vector, unsigned int A, unsigned int B, unsigned int C, unsigned int impl> +bool check_values_sub(vector & vd, double d) +{ + if (impl == comp_dev) + {vd.template deviceToHostProp<A,B>();} + + bool ret = true; + auto it = vd.getDomainIterator(); + + while (it.isNext()) + { + auto key = it.get(); + + rtype base1 = vd.template getProp<B>(key) - d; + rtype base2 = vd.template getProp<A>(key); + + ret &= base1 == base2; + + ++it; + } + + BOOST_REQUIRE_EQUAL(ret,true); + + return ret; +} + +template <typename rtype, typename vector, unsigned int A, unsigned int B, unsigned int C, unsigned int impl> +bool check_values_sub(double d, vector & vd) +{ + if (impl == comp_dev) + {vd.template deviceToHostProp<A,B>();} + + bool ret = true; + auto it = vd.getDomainIterator(); + + while (it.isNext()) + { + auto key = it.get(); + + rtype base1 = d - vd.template getProp<B>(key); + rtype base2 = vd.template getProp<A>(key); + + ret &= base1 == base2; + + ++it; + } + + BOOST_REQUIRE_EQUAL(ret,true); + + return ret; +} + +template <typename rtype, typename vector, unsigned int A, unsigned int B, unsigned int C, unsigned int impl> +bool check_values_sub(vector & vd1, vector & vd2) +{ + if (impl == comp_dev) + { + vd1.template deviceToHostProp<A,C>(); + vd2.template deviceToHostProp<B>(); + } + + bool ret = true; + auto it = vd1.getDomainIterator(); + + while (it.isNext()) + { + auto key = it.get(); + + rtype base1 = vd1.template getProp<C>(key) - vd2.template getProp<B>(key); + rtype base2 = vd1.template getProp<A>(key); + + ret &= base1 == base2; + + ++it; + } + + BOOST_REQUIRE_EQUAL(ret,true); + + return ret; +} + +template <typename rtype, typename vector, unsigned int A, unsigned int B, unsigned int C, unsigned int impl> +bool check_values_sub_31(vector & vd1) +{ + if (impl == comp_dev) + {vd1.template deviceToHostProp<A,C>();} + + bool ret = true; + auto it = vd1.getDomainIterator(); + + while (it.isNext()) + { + auto key = it.get(); + + rtype base1 = vd1.template getProp<B>(key) - (vd1.template getProp<C>(key) + vd1.template getProp<B>(key)); + rtype base2 = vd1.template getProp<A>(key); + + ret &= base1 == base2; + + ++it; + } + + BOOST_REQUIRE_EQUAL(ret,true); + + return ret; +} + + +template <typename rtype, typename vector, unsigned int A, unsigned int B, unsigned int C, unsigned int impl> +bool check_values_sub_32(vector & vd1) +{ + if (impl == comp_dev) + {vd1.template deviceToHostProp<A,B,C>();} + + bool ret = true; + auto it = vd1.getDomainIterator(); + + while (it.isNext()) + { + auto key = it.get(); + + rtype base1 = (vd1.template getProp<C>(key) + vd1.template getProp<B>(key)) - vd1.template getProp<B>(key); + rtype base2 = vd1.template getProp<A>(key); + + ret &= base1 == base2; + + ++it; + } + + BOOST_REQUIRE_EQUAL(ret,true); + + return ret; +} + +template <typename rtype, typename vector, unsigned int A, unsigned int B, unsigned int C, unsigned int impl> +bool check_values_sub_4(vector & vd1) +{ + if (impl == comp_dev) + {vd1.template deviceToHostProp<A,B,C>();} + + bool ret = true; + auto it = vd1.getDomainIterator(); + + while (it.isNext()) + { + auto key = it.get(); + + rtype base1 = (vd1.template getProp<C>(key) + vd1.template getProp<B>(key)) - (vd1.template getProp<C>(key) + vd1.template getProp<B>(key)); + rtype base2 = vd1.template getProp<A>(key); + + if (impl == comp_dev) + { + ret &= (double)norm(base1 - base2) < 0.00001; + } + else + { + ret &= base1 == base2; + } + + ++it; + } + + BOOST_REQUIRE_EQUAL(ret,true); + + return ret; +} + + +template <typename rtype, typename vector, unsigned int A, unsigned int B, unsigned int C, unsigned int impl> +bool check_values_mul(vector & vd, double d) +{ + if (impl == comp_dev) + {vd.template deviceToHostProp<A,B>();} + + bool ret = true; + auto it = vd.getDomainIterator(); + + while (it.isNext()) + { + auto key = it.get(); + + rtype base1 = vd.template getProp<B>(key) * d; + rtype base2 = vd.template getProp<A>(key); + + ret &= base1 == base2; + + ++it; + } + + BOOST_REQUIRE_EQUAL(ret,true); + + return ret; +} + + +template <typename rtype, typename vector, unsigned int A, unsigned int B, unsigned int C, unsigned int impl> +bool check_values_mul(vector & vd1, vector & vd2) +{ + if (impl == comp_dev) + { + vd1.template deviceToHostProp<A,C>(); + vd2.template deviceToHostProp<B>(); + } + + bool ret = true; + auto it = vd1.getDomainIterator(); + + while (it.isNext()) + { + auto key = it.get(); + + rtype base1 = vd1.template getProp<C>(key) * vd2.template getProp<B>(key); + rtype base2 = vd1.template getProp<A>(key); + + if (impl == comp_dev) + { + ret &= (double)norm(base1 - base2) < 0.00001; + } + else + { + ret &= base1 == base2; + } + + ++it; + } + + BOOST_REQUIRE_EQUAL(ret,true); + + return ret; +} + +template <typename rtype, typename vector, unsigned int A, unsigned int B, unsigned int C, unsigned int impl> +bool check_values_mul_3(vector & vd1) +{ + if (impl == comp_dev) + {vd1.template deviceToHostProp<A,B,C>();} + + bool ret = true; + auto it = vd1.getDomainIterator(); + + while (it.isNext()) + { + auto key = it.get(); + + rtype base1 = vd1.template getProp<B>(key) * (vd1.template getProp<B>(key) + vd1.template getProp<C>(key)); + rtype base2 = vd1.template getProp<A>(key); + + if (impl == comp_dev) + { + ret &= (double)norm(base1 - base2) < 0.00001; + } + else + { + ret &= base1 == base2; + } + + ++it; + } + + BOOST_REQUIRE_EQUAL(ret,true); + + return ret; +} + + +template <typename rtype, typename vector, unsigned int A, unsigned int B, unsigned int C, unsigned int impl> +bool check_values_mul_4(vector & vd1) +{ + if (impl == comp_dev) + {vd1.template deviceToHostProp<A,B,C>();} + + bool ret = true; + auto it = vd1.getDomainIterator(); + + while (it.isNext()) + { + auto key = it.get(); + + rtype base1 = (vd1.template getProp<B>(key) + vd1.template getProp<C>(key)) * (vd1.template getProp<B>(key) + vd1.template getProp<C>(key)); + rtype base2 = vd1.template getProp<A>(key); + + if (impl == comp_dev) + { + ret &= (double)norm(base1 - base2) < 0.00001; + } + else + { + ret &= base1 == base2; + } + + ++it; + } + + BOOST_REQUIRE_EQUAL(ret,true); + + return ret; +} + + + +template <typename rtype, typename vector, unsigned int A, unsigned int B, unsigned int C, unsigned int impl> +bool check_values_div(vector & vd, double d) +{ + if (impl == comp_dev) + {vd.template deviceToHostProp<A,B>();} + + bool ret = true; + auto it = vd.getDomainIterator(); + + while (it.isNext()) + { + auto key = it.get(); + + rtype base1 = vd.template getProp<B>(key) / d; + rtype base2 = vd.template getProp<A>(key); + + ret &= base1 == base2; + + ++it; + } + + BOOST_REQUIRE_EQUAL(ret,true); + + return ret; +} + +template <typename rtype, typename vector, unsigned int A, unsigned int B, unsigned int C, unsigned int impl> +bool check_values_div(double d, vector & vd) +{ + if (impl == comp_dev) + {vd.template deviceToHostProp<A,B>();} + + bool ret = true; + auto it = vd.getDomainIterator(); + + while (it.isNext()) + { + auto key = it.get(); + + rtype base1 = d / vd.template getProp<B>(key); + rtype base2 = vd.template getProp<A>(key); + + ret &= base1 == base2; + + ++it; + } + + BOOST_REQUIRE_EQUAL(ret,true); + + return ret; +} + +template <typename rtype, typename vector, unsigned int A, unsigned int B, unsigned int C, unsigned int impl> +bool check_values_div(vector & vd1, vector & vd2) +{ + if (impl == comp_dev) + { + vd1.template deviceToHostProp<A,C>(); + vd2.template deviceToHostProp<B>(); + } + + bool ret = true; + auto it = vd1.getDomainIterator(); + + while (it.isNext()) + { + auto key = it.get(); + + rtype base1 = vd1.template getProp<C>(key) / vd2.template getProp<B>(key); + rtype base2 = vd1.template getProp<A>(key); + + ret &= base1 == base2; + + ++it; + } + + BOOST_REQUIRE_EQUAL(ret,true); + + return ret; +} + +template <typename rtype, typename vector, unsigned int A, unsigned int B, unsigned int C, unsigned int impl> +bool check_values_div_31(vector & vd1) +{ + if (impl == comp_dev) + {vd1.template deviceToHostProp<A,B,C>();} + + bool ret = true; + auto it = vd1.getDomainIterator(); + + while (it.isNext()) + { + auto key = it.get(); + + rtype base1 = vd1.template getProp<B>(key) / (vd1.template getProp<B>(key) + vd1.template getProp<C>(key)); + rtype base2 = vd1.template getProp<A>(key); + + ret &= base1 == base2; + + ++it; + } + + BOOST_REQUIRE_EQUAL(ret,true); + + return ret; +} + +template <typename rtype, typename vector, unsigned int A, unsigned int B, unsigned int C, unsigned int impl> +bool check_values_div_32(vector & vd1) +{ + if (impl == comp_dev) + {vd1.template deviceToHostProp<A,B,C>();} + + bool ret = true; + auto it = vd1.getDomainIterator(); + + while (it.isNext()) + { + auto key = it.get(); + + rtype base1 = (vd1.template getProp<C>(key) + vd1.template getProp<B>(key)) / vd1.template getProp<B>(key); + rtype base2 = vd1.template getProp<A>(key); + + ret &= base1 == base2; + + ++it; + } + + BOOST_REQUIRE_EQUAL(ret,true); + + return ret; +} + + +template <typename rtype, typename vector, unsigned int A, unsigned int B, unsigned int C, unsigned int impl> +bool check_values_div_4(vector & vd1) +{ + if (impl == comp_dev) + {vd1.template deviceToHostProp<A,B,C>();} + + bool ret = true; + auto it = vd1.getDomainIterator(); + + while (it.isNext()) + { + auto key = it.get(); + + rtype base1 = (vd1.template getProp<B>(key) + vd1.template getProp<C>(key)) / (vd1.template getProp<B>(key) + vd1.template getProp<C>(key)); + rtype base2 = vd1.template getProp<A>(key); + + ret &= base1 == base2; + + ++it; + } + + BOOST_REQUIRE_EQUAL(ret,true); + + return ret; +} + +template <unsigned int impl, typename vector> +bool check_values_scal_norm_dist(vector & vd) +{ + if (impl == comp_dev) + {vd.template deviceToHostProp<A,VB,VC>();} + + bool ret = true; + auto it = vd.getDomainIterator(); + + while (it.isNext()) + { + auto key = it.get(); + + float base1 = vd.template getProp<VB>(key) * vd.template getProp<VC>(key) + norm(vd.template getProp<VC>(key) + vd.template getProp<VB>(key)) + distance(vd.template getProp<VC>(key),vd.template getProp<VB>(key)); + float base2 = vd.template getProp<A>(key); + + if (impl == comp_dev) + { + ret &= (double)norm(base1 - base2) < 0.00001; + } + else + { + ret &= base1 == base2; + } + + ++it; + } + + BOOST_REQUIRE_EQUAL(ret,true); + + return ret; +} + +template <unsigned int impl,typename vector> +void fill_values(vector & v) +{ + auto it = v.getDomainIterator(); + + while (it.isNext()) + { + auto p = it.get(); + + v.getPos(p)[0] = (float)rand() / RAND_MAX; + v.getPos(p)[1] = (float)rand() / RAND_MAX; + v.getPos(p)[2] = (float)rand() / RAND_MAX; + + v.template getProp<A>(p) = fabs(sin(p.getKey()+1.0)); + v.template getProp<B>(p) = fabs(sin(2.0*p.getKey()+3.0)); + v.template getProp<C>(p) = fabs(sin(3.0*p.getKey()+18.0)); + + for (size_t k = 0 ; k < 3 ; k++) + { + v.template getProp<VA>(p)[k] = fabs(sin(p.getKey()+1.0+k)); + v.template getProp<VB>(p)[k] = fabs(sin(2.0*p.getKey()+1.0+3.0)); + v.template getProp<VC>(p)[k] = fabs(sin(3.0*p.getKey()+1.0+k)); + } + + ++it; + } + + if (impl == comp_dev) + { + v.template hostToDeviceProp<A,B,C,VA,VB,VC>(); + v.hostToDevicePos(); + } +} + +template<unsigned int impl,typename vector_type, + typename vA_type, + typename vB_type, + typename vC_type, + typename vVA_type, + typename vVB_type, + typename vVC_type, + typename vPOS_type> +void check_all_expressions_imp(vector_type & vd, + vA_type vA, + vB_type vB, + vC_type vC, + vVA_type vVA, + vVB_type vVB, + vVC_type vVC, + vPOS_type vPOS) +{ + // vector type + typedef vector_type vtype; + + vA = 1.0; + vB = 2.0f; + vC = 3.0; + + check_values<A,impl>(vd,1.0); + check_values<B,impl>(vd,2.0); + check_values<C,impl>(vd,3.0); + + vA = vB; + check_values<A,impl>(vd,2.0); + + fill_values<impl>(vd); + + vA = vB + 2.0 + vB - 2.0*vC / 5.0; + check_values_complex_expr<impl>(vd); + + // Various combination of 2 operator + + vA = vB + 2.0; + check_values_sum<float,vtype,A,B,C,impl>(vd,2.0); + vA = 2.0 + vB; + check_values_sum<float,vtype,A,B,C,impl>(vd,2.0); + vA = vC + vB; + check_values_sum<float,vtype,A,B,C,impl>(vd,vd); + + vA = vB - 2.0; + check_values_sub<float,vtype,A,B,C,impl>(vd,2.0); + vA = 2.0 - vB; + check_values_sub<float,vtype,A,B,C,impl>(2.0,vd); + vA = vC - vB; + check_values_sub<float,vtype,A,B,C,impl>(vd,vd); + + vA = vB * 2.0; + check_values_mul<float,vtype,A,B,C,impl>(vd,2.0); + vA = 2.0 * vB; + check_values_mul<float,vtype,A,B,C,impl>(vd,2.0); + vA = vC * vB; + check_values_mul<float,vtype,A,B,C,impl>(vd,vd); + + vA = vB / 2.0; + check_values_div<float,vtype,A,B,C,impl>(vd,2.0); + vA = 2.0 / vB; + check_values_div<float,vtype,A,B,C,impl>(2.0,vd); + vA = vC / vB; + check_values_div<float,vtype,A,B,C,impl>(vd,vd); + + // Variuos combination 3 operator + + vA = vB + (vC + vB); + check_values_sum_3<float,vtype,A,B,C,impl>(vd); + vA = (vC + vB) + vB; + check_values_sum_3<float,vtype,A,B,C,impl>(vd); + vA = (vC + vB) + (vC + vB); + check_values_sum_4<float,vtype,A,B,C,impl>(vd); + + vA = vB - (vC + vB); + check_values_sub_31<float,vtype,A,B,C,impl>(vd); + vA = (vC + vB) - vB; + check_values_sub_32<float,vtype,A,B,C,impl>(vd); + vA = (vC + vB) - (vC + vB); + check_values_sub_4<float,vtype,A,B,C,impl>(vd); + + vA = vB * (vC + vB); + check_values_mul_3<float,vtype,A,B,C,impl>(vd); + vA = (vC + vB) * vB; + check_values_mul_3<float,vtype,A,B,C,impl>(vd); + vA = (vC + vB) * (vC + vB); + check_values_mul_4<float,vtype,A,B,C,impl>(vd); + + vA = vB / (vC + vB); + check_values_div_31<float,vtype,A,B,C,impl>(vd); + vA = (vC + vB) / vB; + check_values_div_32<float,vtype,A,B,C,impl>(vd); + vA = (vC + vB) / (vC + vB); + check_values_div_4<float,vtype,A,B,C,impl>(vd); + + if (impl == comp_host) + { + auto test = vC + vB; + auto & v = test.getVector(); + BOOST_REQUIRE_EQUAL((void *)&v,(void *)&vd); + } + + // We try with vectors + + // Various combination of 2 operator + + vVA = vVB + 2.0; + check_values_sum<VectorS<3,float>,vtype,VA,VB,VC,impl>(vd,2.0f); + vVA = 2.0 + vVB; + check_values_sum<VectorS<3,float>,vtype,VA,VB,VC,impl>(vd,2.0f); + vVA = vVC + vVB; + check_values_sum<VectorS<3,float>,vtype,VA,VB,VC,impl>(vd,vd); + + vVA = vVB - 2.0; + check_values_sub<VectorS<3,float>,vtype,VA,VB,VC,impl>(vd,2.0f); + vVA = 2.0 - vVB; + check_values_sub<VectorS<3,float>,vtype,VA,VB,VC,impl>(2.0f,vd); + vVA = vVC - vVB; + check_values_sub<VectorS<3,float>,vtype,VA,VB,VC,impl>(vd,vd); + + vVA = vVB * 2.0; + check_values_mul<VectorS<3,float>,vtype,VA,VB,VC,impl>(vd,2.0f); + vVA = 2.0 * vVB; + check_values_mul<VectorS<3,float>,vtype,VA,VB,VC,impl>(vd,2.0f); + vVA = vVC * vVB; + check_values_mul<VectorS<3,float>,vtype,VA,VB,VC,impl>(vd,vd); + + vVA = vVB / 2.0; + check_values_div<VectorS<3,float>,vtype,VA,VB,VC,impl>(vd,2.0f); + vVA = 2.0 / vVB; + check_values_div<VectorS<3,float>,vtype,VA,VB,VC,impl>(2.0f,vd); + vVA = vVC / vVB; + check_values_div<VectorS<3,float>,vtype,VA,VB,VC,impl>(vd,vd); + + if (impl == comp_host) + { + auto test = vVB / 2.0; + auto & v = test.getVector(); + BOOST_REQUIRE_EQUAL((void *)&v,(void *)&vd); + } + + // Variuos combination 3 operator + + vVA = vVB + (vVC + vVB); + check_values_sum_3<VectorS<3,float>,vtype,VA,VB,VC,impl>(vd); + vVA = (vVC + vVB) + vVB; + check_values_sum_3<VectorS<3,float>,vtype,VA,VB,VC,impl>(vd); + vVA = (vVC + vVB) + (vVC + vVB); + check_values_sum_4<VectorS<3,float>,vtype,VA,VB,VC,impl>(vd); + + vVA = vVB - (vVC + vVB); + check_values_sub_31<VectorS<3,float>,vtype,VA,VB,VC,impl>(vd); + vVA = (vVC + vVB) - vVB; + check_values_sub_32<VectorS<3,float>,vtype,VA,VB,VC,impl>(vd); + vVA = (vVC + vVB) - (vVC + vVB); + check_values_sub_4<VectorS<3,float>,vtype,VA,VB,VC,impl>(vd); + + vVA = vVB * (vVC + vVB); + check_values_mul_3<VectorS<3,float>,vtype,VA,VB,VC,impl>(vd); + vVA = (vVC + vVB) * vVB; + check_values_mul_3<VectorS<3,float>,vtype,VA,VB,VC,impl>(vd); + vVA = (vVC + vVB) * (vVC + vVB); + check_values_mul_4<VectorS<3,float>,vtype,VA,VB,VC,impl>(vd); + vA = vVB * (vVC + vVB); + check_values_mul_3<float,vtype,A,VB,VC,impl>(vd); + vA = (vVC + vVB) * vVB; + check_values_mul_3<float,vtype,A,VB,VC,impl>(vd); + vA = (vVC + vVB) * (vVC + vVB); + check_values_mul_4<float,vtype,A,VB,VC,impl>(vd); + + if (impl == comp_host) + { + auto test = (vVC + vVB) * (vVC + vVB); + auto & v = test.getVector(); + BOOST_REQUIRE_EQUAL((void *)&v,(void *)&vd); + } + + vVA = vVB / (vVC + vVB); + check_values_div_31<VectorS<3,float>,vtype,VA,VB,VC,impl>(vd); + vVA = (vVC + vVB) / vVB; + check_values_div_32<VectorS<3,float>,vtype,VA,VB,VC,impl>(vd); + vVA = (vVC + vVB) / (vVC + vVB); + check_values_div_4<VectorS<3,float>,vtype,VA,VB,VC,impl>(vd); + + // normalization function + + vA = vVB * vVC + norm(vVC + vVB) + openfpm::distance(vVC,vVB); + check_values_scal_norm_dist<impl>(vd); + + Point<3,float> p0({2.0,2.0,2.0}); + auto p0_e = getVExpr(p0); + + vVA = vPOS + p0_e; + check_values_pos_sum<VectorS<3,float>,vtype,VA,VB,VC,impl>(vd,p0); + + vVA = vPOS - p0_e; + check_values_pos_sub<Point<3,float>,vtype,VA,VB,VC,impl>(vd,p0); + + vVA = -(vPOS - p0_e); + check_values_pos_sub_minus<Point<3,float>,vtype,VA,VB,VC,impl>(vd,p0); + + vVA = -vVB; + check_values_point_sub<Point<3,float>,vtype,VA,VB,VC,impl>(vd,p0); + + if (impl == comp_host) + { + auto test = vPOS + p0_e; + auto & v = test.getVector(); + BOOST_REQUIRE_EQUAL((void *)&v,(void *)&vd); + } + + // Just check it compile testing it will test the same code + // as the previuous one + vVC = exp(vVB); + vA = norm(vPOS); + vVA = vPOS + 2.0; + vVA = 2.0 + vPOS; + vVA = vPOS + vPOS; + vVA = vPOS - 2.0f; + vVA = 2.0 - vPOS; + vVA = vPOS - vPOS; + + if (impl == comp_host) + { + auto test = exp(vVB); + auto & v = test.getVector(); + BOOST_REQUIRE_EQUAL((void *)&v,(void *)&vd); + } + + vVA = vPOS * 2.0; + vVA = 2.0 * vPOS; + vVA = vPOS * vPOS; + + vVA = vPOS / 2.0f; + vVA = 2.0f / vPOS; + vVA = vPOS / vPOS; + + // Variuos combination 3 operator + + vVA = vPOS + (vPOS + vPOS); + vVA = (vPOS + vPOS) + vPOS; + vVA = (vPOS + vPOS) + (vPOS + vPOS); + + vVA = vPOS - (vPOS + vPOS); + vVA = (vPOS + vPOS) - vPOS; + vVA = (vVC + vPOS) - (vPOS + vPOS); + + vVA = vPOS * (vPOS + vPOS); + vVA = (vPOS + vPOS) * vPOS; + vVA = (vPOS + vPOS) * (vPOS + vPOS); + vA = vPOS * (vPOS + vPOS); + vA = (vPOS + vPOS) * vPOS; + vA = (vPOS + vPOS) * (vPOS + vPOS); + + vVA = vPOS / (vPOS + vPOS); + vVA = (vPOS + vPOS) / vPOS; + vVA = (vPOS + vPOS) / (vPOS + vPOS); +} + +template<unsigned int impl> +struct check_all_expressions +{ + template<typename vector_type> static void check(vector_type & vd) + { + auto vA = getV<A>(vd); + auto vB = getV<B>(vd); + auto vC = getV<C>(vd); + + auto vVA = getV<VA>(vd); + auto vVB = getV<VB>(vd); + auto vVC = getV<VC>(vd); + + auto vPOS = getV<PROP_POS>(vd); + + check_all_expressions_imp<impl>(vd,vA,vB,vC,vVA,vVB,vVC,vPOS); + } +}; + +template<> +struct check_all_expressions<comp_dev> +{ + template<typename vector_type> static void check(vector_type & vd) + { + auto vdk = vd.toKernel(); + + auto vA = getV<A>(vdk); + auto vB = getV<B>(vdk); + auto vC = getV<C>(vdk); + + auto vVA = getV<VA>(vdk); + auto vVB = getV<VB>(vdk); + auto vVC = getV<VC>(vdk); + + auto vPOS = getV<PROP_POS>(vdk); + + check_all_expressions_imp<comp_dev>(vd,vA,vB,vC,vVA,vVB,vVC,vPOS); + } +}; + + +template <unsigned impl, typename vector,typename Kernel, typename NN_type> +bool check_values_apply_kernel(vector & vd, Kernel & ker, NN_type & NN) +{ + bool ret = true; + + if (impl == comp_dev) + { + vd.template deviceToHostProp<A,C,VB,VC>(); + vd.deviceToHostPos(); + } + + auto it = vd.getDomainIterator(); + + auto it2 = vd.getDomainIterator(); + while (it2.isNext()) + { + float base2 = 0.0; + auto p = it2.get(); + + Point<3,float> xp = vd.getPos(p); + + float base1 = vd.template getProp<A>(p); + float prp_x = vd.template getProp<VC>(p) * vd.template getProp<VB>(p) + norm(vd.template getProp<VB>(p)); + + // For each neighborhood particle + auto Np = NN.template getNNIterator<NO_CHECK>(NN.getCell(xp)); + + while (Np.isNext()) + { + // Neighborhood particle q + auto q = Np.get(); + + if (q == p.getKey()) {++Np; continue;}; + + // position q + Point<3,float> xq = vd.getPos(q); + + float prp_y = vd.template getProp<VC>(q) * vd.template getProp<VB>(q) + norm(vd.template getProp<VB>(q)); + + base2 += ker.value(xp,xq,prp_x,prp_y); + + ++Np; + } + + base2 += vd.template getProp<C>(p); + + if (impl == comp_host) + {ret &= base1 == base2;} + else + {ret &= fabs(base1 - base2) < 0.0001;} + + ++it2; + } + + BOOST_REQUIRE_EQUAL(ret,true); + + return ret; +} + +template <unsigned int impl, typename vector,typename Kernel, typename NN_type> +bool check_values_apply_kernel_reduce(vector & vd, Kernel & ker, NN_type & NN) +{ + bool ret = true; + + if (impl == comp_dev) + { + vd.template deviceToHostProp<A,C,VB,VC>(); + vd.deviceToHostPos(); + } + + auto it = vd.getDomainIterator(); + + float base1 = 0.0; + float base2 = 0.0; + float base3 = 0.0; + + auto it2 = vd.getDomainIterator(); + while (it2.isNext()) + { + auto p = it2.get(); + + Point<3,float> xp = vd.getPos(p); + + float ker_accu = 0.0; + float prp_x = vd.template getProp<VC>(p) * vd.template getProp<VB>(p) + norm(vd.template getProp<VB>(p)); + + // For each neighborhood particle + auto Np = NN.template getNNIterator<NO_CHECK>(NN.getCell(xp)); + + while (Np.isNext()) + { + // Neighborhood particle q + auto q = Np.get(); + + if (q == p.getKey()) {++Np; continue;}; + + // position q + Point<3,float> xq = vd.getPos(q); + + float prp_y = vd.template getProp<VC>(q) * vd.template getProp<VB>(q) + norm(vd.template getProp<VB>(q)); + + ker_accu += ker.value(xp,xq,prp_x,prp_y); + + ++Np; + } + + base2 += ker_accu; + + ++it2; + } + + auto it3 = vd.getDomainIterator(); + while (it3.isNext()) + { + auto p = it3.get(); + + base1 = vd.template getProp<A>(p); + base3 = vd.template getProp<C>(p) + base2; + + if (impl == comp_host) + {ret &= base1 == base3;} + else + {ret &= fabs(base1 - base3) < 0.001;} + + ++it3; + } + + BOOST_REQUIRE_EQUAL(ret,true); + + return ret; +} + +template <unsigned int impl, typename vector,typename Kernel, typename NN_type> +bool check_values_apply_kernel2(vector & vd, Kernel & ker, NN_type & NN) +{ + bool ret = true; + + if (impl == comp_dev) + { + vd.template deviceToHostProp<VA,VB,VC>(); + vd.deviceToHostPos(); + } + + auto it = vd.getDomainIterator(); + + // ### WIKI 13 ### + // + // Check that apply kernel work + // + auto it2 = vd.getDomainIterator(); + while (it2.isNext()) + { + Point<3,float> base2 = 0.0; + auto p = it2.get(); + + Point<3,float> xp = vd.getPos(p); + + Point<3,float> base1 = vd.template getProp<VA>(p); + + Point<3,float> prp_x = 2.0 * vd.template getProp<VC>(p) + vd.template getProp<VB>(p); + + // For each neighborhood particle + auto Np = NN.template getNNIterator<NO_CHECK>(NN.getCell(xp)); + + while (Np.isNext()) + { + // Neighborhood particle q + auto q = Np.get(); + + if (q == p.getKey()) {++Np; continue;}; + + // position q + Point<3,float> xq = vd.getPos(q); + + Point<3,float> prp_y = 2.0 * vd.template getProp<VC>(q) + vd.template getProp<VB>(q); + + base2 += ker.value(xp,xq,prp_x,prp_y); + + ++Np; + } + + base2 += vd.template getProp<VC>(p); + + if (impl == comp_host) + {ret &= base1 == base2;} + else + { + for (size_t i = 0 ; i < 3 ; i++) + {ret &= fabs(base1.get(i) - base2.get(i)) < 0.0001;} + } + + ++it2; + } + + BOOST_REQUIRE_EQUAL(ret,true); + + return ret; +} + +template <unsigned int impl, typename vector,typename Kernel, typename NN_type> +bool check_values_apply_kernel3(vector & vd, Kernel & ker, NN_type & NN) +{ + bool ret = true; + + if (impl == comp_dev) + { + vd.template deviceToHostProp<VA,VC>(); + vd.deviceToHostPos(); + } + + auto it = vd.getDomainIterator(); + + // ### WIKI 13 ### + // + // Check that apply kernel work + // + auto it2 = vd.getDomainIterator(); + while (it2.isNext()) + { + Point<3,float> base2 = 0.0; + auto p = it2.get(); + + Point<3,float> xp = vd.getPos(p); + + Point<3,float> base1 = vd.template getProp<VA>(p); + + Point<3,float> prp_x = vd.template getProp<VC>(p); + + // For each neighborhood particle + auto Np = NN.template getNNIterator<NO_CHECK>(NN.getCell(xp)); + + while (Np.isNext()) + { + // Neighborhood particle q + auto q = Np.get(); + + if (q == p.getKey()) {++Np; continue;}; + + // position q + Point<3,float> xq = vd.getPos(q); + + Point<3,float> prp_y = vd.template getProp<VC>(q); + + base2 += ker.value(xp,xq,prp_x,prp_y); + + ++Np; + } + + base2 += vd.template getProp<VC>(p); + + if (impl == comp_host) + {ret &= base1 == base2;} + else + { + for (size_t i = 0 ; i < 3 ; i++) + {ret &= fabs(base1.get(i) - base2.get(i)) < 0.0001;} + } + + ++it2; + } + + BOOST_REQUIRE_EQUAL(ret,true); + + return ret; +} + +template <unsigned int impl, typename vector,typename Kernel, typename NN_type> +bool check_values_apply_kernel2_reduce(vector & vd, Kernel & ker, NN_type & NN) +{ + bool ret = true; + + if (impl == comp_dev) + { + vd.template deviceToHostProp<VA,VB,VC>(); + vd.deviceToHostPos(); + } + + auto it = vd.getDomainIterator(); + + Point<3,float> base1 = 0.0; + Point<3,float> base2 = 0.0; + Point<3,float> base3 = 0.0; + + auto it2 = vd.getDomainIterator(); + while (it2.isNext()) + { + auto p = it2.get(); + + Point<3,float> xp = vd.getPos(p); + + Point<3,float> ker_accu = 0.0; + Point<3,float> prp_x = 2.0f*vd.template getProp<VC>(p) + vd.template getProp<VB>(p); + + // For each neighborhood particle + auto Np = NN.template getNNIterator<NO_CHECK>(NN.getCell(xp)); + + while (Np.isNext()) + { + // Neighborhood particle q + auto q = Np.get(); + + if (q == p.getKey()) {++Np; continue;}; + + // position q + Point<3,float> xq = vd.getPos(q); + Point<3,float> prp_y = 2.0f*vd.template getProp<VC>(q) + vd.template getProp<VB>(q); + + ker_accu += ker.value(xp,xq,prp_x,prp_y); + + ++Np; + } + + base2 += ker_accu; + + ++it2; + } + + auto it3 = vd.getDomainIterator(); + while (it3.isNext()) + { + auto p = it3.get(); + + base1 = vd.template getProp<VA>(p); + base3 = vd.template getProp<VC>(p) + base2; + + if (impl == comp_host) + {ret &= base1 == base3;} + else + { + for (size_t i = 0 ; i < 3 ; i++) + {ret &= fabs(base1.get(i) - base3.get(i)) < 0.002;} + + if (ret == false) + { + int debug = 0; + debug++; + } + } + + ++it3; + } + + BOOST_REQUIRE_EQUAL(ret,true); + + return ret; +} + +template <unsigned int impl, typename vector,typename Kernel, typename NN_type> +bool check_values_apply_kernel3_reduce(vector & vd, Kernel & ker, NN_type & NN, const Point<2,float> & p) +{ + bool ret = true; + + if (impl == comp_dev) + { + vd.deviceToHostPos(); + } + + auto it = vd.getDomainIterator(); + + Point<2,float> base2 = 0.0; + + auto it2 = vd.getDomainIterator(); + while (it2.isNext()) + { + auto p = it2.get(); + + Point<3,float> xp = vd.getPos(p); + + Point<2,float> ker_accu = 0.0; + + // For each neighborhood particle + auto Np = NN.template getNNIterator<NO_CHECK>(NN.getCell(xp)); + + while (Np.isNext()) + { + // Neighborhood particle q + auto q = Np.get(); + + if (q == p.getKey()) {++Np; continue;}; + + // position q + Point<3,float> xq = vd.getPos(q); + + ker_accu += ker.value(xp,xq); + + ++Np; + } + + base2 += ker_accu; + + ++it2; + } + + if (impl == comp_host) + { + BOOST_REQUIRE_EQUAL(p.get(0),base2.get(0)); + BOOST_REQUIRE_EQUAL(p.get(1),base2.get(1)); + } + else + { + BOOST_REQUIRE(fabs(p.get(0) - base2.get(0)) < 0.001); + BOOST_REQUIRE(fabs(p.get(1) - base2.get(1)) < 0.001); + } + + return ret; +} + +typedef vector_dist<3,float,aggregate<float,float,float,VectorS<3,float>,VectorS<3,float>,VectorS<3,float>,float>> vector_type; + +#ifdef CUDA_GPU +typedef vector_dist_ker<3,float,aggregate<float,float,float,VectorS<3,float>,VectorS<3,float>,VectorS<3,float>,float>> vector_type_ker; +#endif + +//! Exponential kernel +struct exp_kernel +{ + //! variance of the exponential kernel + float var; + + //! Exponential kernel giving variance + exp_kernel(float var) + :var(var) + {} + + /*! \brief Result of the exponential kernel + * + * \param p position of the particle p + * \param q position of the particle q + * \param pA property value at p + * \param pB property value at q + * + * \return the result + * + */ + __device__ __host__ inline float value(const Point<3,float> & p, const Point<3,float> & q,float pA,float pB) + { + float dist = norm(p-q); + + return (pA + pB) * exp(dist * dist / var); + } + + /*! \brief Result of the exponential kernel + * + * \param p position of the particle p + * \param q position of the particle q + * \param pA property value at p + * \param pB property value at q + * + * \return the result + * + */ + __device__ __host__ inline Point<3,float> value(const Point<3,float> & p, const Point<3,float> & q,const Point<3,float> & pA, const Point<3,float> & pB) + { + float dist = norm(p-q); + + return (pA + pB) * exp(dist * dist / var); + } + + /*! \brief Result of the exponential kernel + * + * \param p position of the particle p + * \param q position of the particle q + * \param pA property value at p + * \param pB property value at q + * \param vd1 original vector + * + * \return the result + * + */ + template<typename vector_t> + __host__ __device__ inline float value(size_t p, size_t q, float pA, float pB, const vector_t & vd1) + { + Point<3,float> pp = vd1.getPos(p); + Point<3,float> pq = vd1.getPos(q); + + float dist = norm(pp-pq); + + return (pA + pB) * exp(dist * dist / var); + } + +#ifdef CUDA_GPU + + /*! \brief Result of the exponential kernel + * + * \param p position of the particle p + * \param q position of the particle q + * \param pA property value at p + * \param pB property value at q + * \param vd1 original vector + * + * \return the result + * + */ + __device__ inline float value(size_t p, size_t q, float pA, float pB, const vector_type_ker & vd1) + { + Point<3,float> pp = vd1.getPos(p); + Point<3,float> pq = vd1.getPos(q); + + float dist = norm(pp-pq); + + return (pA + pB) * exp(dist * dist / var); + } + +#endif + + /*! \brief Result of the exponential kernel + * + * \param p position of the particle p + * \param q position of the particle q + * \param pA property value at p + * \param pB property value at q + * \param vd1 original vector + * + * \return the result + * + */ + __host__ inline Point<3,float> value(size_t p, size_t q, const Point<3,float> & pA, const Point<3,float> & pB , const vector_type & vd1) + { + Point<3,float> pp = vd1.getPos(p); + Point<3,float> pq = vd1.getPos(q); + + float dist = norm(pp-pq); + + return (pA + pB) * exp(dist * dist / var); + } + + /*! \brief Result of the exponential kernel + * + * \param p position of the particle p + * \param q position of the particle q + * \param pA property value at p + * \param pB property value at q + * \param vd1 original vector + * + * \return the result + * + */ + template<typename vector_t> + __host__ inline Point<3,float> value(size_t p, size_t q, const Point<3,float> & pA, const Point<3,float> & pB , const vector_t & vd1) + { + Point<3,float> pp = vd1.getPos(p); + Point<3,float> pq = vd1.getPos(q); + + float dist = norm(pp-pq); + + return (pA + pB) * exp(dist * dist / var); + } + +#ifdef CUDA_GPU + + /*! \brief Result of the exponential kernel + * + * \param p position of the particle p + * \param q position of the particle q + * \param pA property value at p + * \param pB property value at q + * \param vd1 original vector + * + * \return the result + * + */ + __device__ inline Point<3,float> value(size_t p, size_t q, const Point<3,float> & pA, const Point<3,float> & pB , const vector_type_ker & vd1) + { + Point<3,float> pp = vd1.getPos(p); + Point<3,float> pq = vd1.getPos(q); + + float dist = norm(pp-pq); + + return (pA + pB) * exp(dist * dist / var); + } + +#endif + + /*! \brief Result of the exponential kernel + * + * \param p position of the particle p + * \param q position of the particle q + * + * \return the result + * + */ + __device__ __host__ inline Point<2,float> value(const Point<3,float> & p, const Point<3,float> & q) + { + float dist = norm(p-q); + + return exp(dist * dist / var); + } +}; + +template<unsigned int impl,typename vector, + typename vA_type, + typename vC_type, + typename vVA_type, + typename vVB_type, + typename vVC_type> +void vector_dist_op_ap_ker_impl(vector & vd, vA_type & vA, + vC_type & vC, + vVA_type & vVA, + vVB_type & vVB, + vVC_type & vVC, + unsigned int opt) +{ + // we apply an exponential kernel to calculate something + + auto cl = vd.template getCellListDev<impl>(0.05); + auto cl_host = vd.template getCellListDev<comp_host>(0.05); + exp_kernel ker(0.2); + + vA = applyKernel_in(vVC * vVB + norm(vVB),vd,cl,ker) + vC; + check_values_apply_kernel<impl>(vd,ker,cl_host); + + vVA = applyKernel_in(2.0*vVC + vVB ,vd,cl,ker) + vVC; + check_values_apply_kernel2<impl>(vd,ker,cl_host); + + vA = rsum(applyKernel_in(vVC * vVB + norm(vVB),vd,cl,ker)) + vC; + check_values_apply_kernel_reduce<impl>(vd,ker,cl_host); + + vVA = rsum(applyKernel_in(2.0*vVC + vVB ,vd,cl,ker)) + vVC; + check_values_apply_kernel2_reduce<impl>(vd,ker,cl_host); + + vA = applyKernel_in_gen(vVC * vVB + norm(vVB),vd,cl,ker) + vC; + check_values_apply_kernel<impl>(vd,ker,cl_host); + + vVA = applyKernel_in_gen(2.0*vVC + vVB ,vd,cl,ker) + vVC; + check_values_apply_kernel2<impl>(vd,ker,cl_host); + + vA = rsum(applyKernel_in_gen(vVC * vVB + norm(vVB),vd,cl,ker)) + vC; + check_values_apply_kernel_reduce<impl>(vd,ker,cl_host); + + vVA = rsum(applyKernel_in_gen(2.0*vVC + vVB ,vd,cl,ker)) + vVC; + check_values_apply_kernel2_reduce<impl>(vd,ker,cl_host); + + // Check it compile the code is the same + vVA = applyKernel_in_gen(vVC,vd,cl,ker) + vVC; + check_values_apply_kernel3<impl>(vd,ker,cl_host); + + vVA = applyKernel_in (vVC,vd,cl,ker) + vVC; + check_values_apply_kernel3<impl>(vd,ker,cl_host); + + Point<2,float> p = rsum(applyKernel_in_sim(vd,cl,ker)).get(); + check_values_apply_kernel3_reduce<impl>(vd,ker,cl_host,p); +} + +template<typename vector, + typename vA_type, + typename vC_type, + typename vVA_type, + typename vVB_type, + typename vVC_type> +void vector_dist_op_ap_ker_impl_sort(vector & vd, vA_type & vA, + vC_type & vC, + vVA_type & vVA, + vVB_type & vVB, + vVC_type & vVC, + unsigned int opt) +{ + // we apply an exponential kernel to calculate something + + auto cl_gpu = vd.getCellListGPU(0.05); + auto cl = cl_gpu.toKernel(); + auto cl_host = vd.template getCellListDev<comp_host>(0.05); + exp_kernel ker(0.2); + + vA = applyKernel_in_sort(vVC * vVB + norm(vVB),vd,cl,ker) + vC; + vd.template merge_sort<A>(cl_gpu); + check_values_apply_kernel<comp_dev>(vd,ker,cl_host); + + vVA = applyKernel_in_sort(2.0*vVC + vVB ,vd,cl,ker) + vVC; + vd.template merge_sort<VA>(cl_gpu); + check_values_apply_kernel2<comp_dev>(vd,ker,cl_host); + +/* vA = rsum(applyKernel_in_sort(vVC * vVB + norm(vVB),vd,cl,ker)) + vC; + vd.template merge_sort<A>(cl_gpu); + check_values_apply_kernel_reduce<comp_dev>(vd,ker,cl_host); + + vVA = rsum(applyKernel_in_sort(2.0*vVC + vVB ,vd,cl,ker)) + vVC; + vd.template merge_sort<VA>(cl_gpu); + check_values_apply_kernel2_reduce<comp_dev>(vd,ker,cl_host);*/ + + vA = applyKernel_in_gen_sort(vVC * vVB + norm(vVB),vd,cl,ker) + vC; + vd.template merge_sort<A>(cl_gpu); + check_values_apply_kernel<comp_dev>(vd,ker,cl_host); + + vVA = applyKernel_in_gen_sort(2.0*vVC + vVB ,vd,cl,ker) + vVC; + vd.template merge_sort<VA>(cl_gpu); + check_values_apply_kernel2<comp_dev>(vd,ker,cl_host); + +/* vA = rsum(applyKernel_in_gen_sort(vVC * vVB + norm(vVB),vd,cl,ker)) + vC; + vd.template merge_sort<A>(cl_gpu); + check_values_apply_kernel_reduce<comp_dev>(vd,ker,cl_host); + + vVA = rsum(applyKernel_in_gen_sort(2.0*vVC + vVB ,vd,cl,ker)) + vVC; + vd.template merge_sort<VA>(cl_gpu); + check_values_apply_kernel2_reduce<comp_dev>(vd,ker,cl_host);*/ + + // Check it compile the code is the same + vVA = applyKernel_in_gen_sort(vVC,vd,cl,ker) + vVC; + vd.template merge_sort<VA>(cl_gpu); + check_values_apply_kernel3<comp_dev>(vd,ker,cl_host); + + vVA = applyKernel_in_sort(vVC,vd,cl,ker) + vVC; + vd.template merge_sort<VA>(cl_gpu); + check_values_apply_kernel3<comp_dev>(vd,ker,cl_host); + +/* Point<2,float> p = rsum(applyKernel_in_sim_sort(vd,cl,ker)).get(); + check_values_apply_kernel3_reduce<comp_dev>(vd,ker,cl_host,p);*/ +} + +template<unsigned int impl> +struct check_all_apply_ker +{ + template<typename vector_type> static void check(vector_type & vd) + { + // fill vd with some value + fill_values<impl>(vd); + + vd.map(); + vd.template ghost_get<0,1,2,3,4,5,6>(); + + auto vA = getV<A>(vd); + auto vC = getV<C>(vd); + + auto vVA = getV<VA>(vd); + auto vVB = getV<VB>(vd); + auto vVC = getV<VC>(vd); + + vector_dist_op_ap_ker_impl<impl>(vd,vA,vC,vVA,vVB,vVC,NONE); + } +}; + + +template<> +struct check_all_apply_ker<comp_dev> +{ + template<typename vector_type> static void check(vector_type & vd) + { + auto vA = getV<A,comp_dev>(vd); + auto vC = getV<C,comp_dev>(vd); + + auto vVA = getV<VA,comp_dev>(vd); + auto vVB = getV<VB,comp_dev>(vd); + auto vVC = getV<VC,comp_dev>(vd); + + // fill vd with some value + fill_values<comp_dev>(vd); + + vd.map(RUN_ON_DEVICE); + vd.template ghost_get<0,1,2,3,4,5,6>(RUN_ON_DEVICE); + vd.template deviceToHostProp<0,1,2,3,4,5,6>(); + vd.deviceToHostPos(); + + vector_dist_op_ap_ker_impl<comp_dev>(vd,vA,vC,vVA,vVB,vVC,RUN_ON_DEVICE); + } +}; + + +struct check_all_apply_ker_sort +{ + template<typename vector_type> static void check(vector_type & vd) + { + auto vA = getV_sort<A>(vd); + auto vC = getV_sort<C>(vd); + + auto vVA = getV_sort<VA>(vd); + auto vVB = getV_sort<VB>(vd); + auto vVC = getV_sort<VC>(vd); + + // fill vd with some value + fill_values<comp_dev>(vd); + + vd.map(RUN_ON_DEVICE); + vd.template ghost_get<0,1,2,3,4,5,6>(RUN_ON_DEVICE); + vd.template deviceToHostProp<0,1,2,3,4,5,6>(); + vd.deviceToHostPos(); + + vector_dist_op_ap_ker_impl_sort(vd,vA,vC,vVA,vVB,vVC,RUN_ON_DEVICE); + } +}; + +#endif /* VECTOR_DIST_OPERATORS_TESTS_UTIL_HPP_ */ diff --git a/src/Operators/Vector/vector_dist_operators.hpp b/src/Operators/Vector/vector_dist_operators.hpp index 62f80d886524bfd7d0bedfa77ed05b54e95d3404..bbbb30fc893a9725b96fea003eb063a636d0574c 100644 --- a/src/Operators/Vector/vector_dist_operators.hpp +++ b/src/Operators/Vector/vector_dist_operators.hpp @@ -9,8 +9,9 @@ #define OPENFPM_NUMERICS_SRC_OPERATORS_VECTOR_VECTOR_DIST_OPERATORS_HPP_ #include "Vector/vector_dist.hpp" +#include "lib/pdata.hpp" +#include "cuda/vector_dist_operators_cuda.cuh" -#define PROP_POS (unsigned int)-1 #define PROP_CUSTOM (unsigned int)-2 #define VECT_SUM 1 @@ -28,6 +29,10 @@ #define VECT_APPLYKER_OUT_SIM 14 #define VECT_APPLYKER_REDUCE_SIM 15 +#define VECT_APPLYKER_IN_GEN_SORT 16 +#define VECT_APPLYKER_IN_SORT 17 +#define VECT_APPLYKER_IN_SIM_SORT 18 + #define VECT_NORM 56 #define VECT_NORM2 57 #define VECT_ABS 58 @@ -127,6 +132,86 @@ class vector_dist_expression_op }; +template<typename v1_type, typename v2_type> +struct vector_result +{ + typedef v1_type type; + + template<typename exp1, typename exp2> + static const type & getVector(const exp1 & o1, const exp2 & o2) + { + return o1.getVector(); + } + + template<typename exp1> + static const type & getVector(const exp1 & o1) + { + return o1.getVector(); + } +}; + + +template<typename v2_type> +struct vector_result<void,v2_type> +{ + typedef v2_type type; + + template<typename exp1, typename exp2> + static const type & getVector(const exp1 & o1, const exp2 & o2) + { + return o2.getVector(); + } + + template<typename exp2> + static const type & getVector(exp2 & o2) + { + return o2.getVector(); + } +}; + +template<typename NN1_type, typename NN2_type> +struct nn_type_result +{ + typedef NN1_type type; + + template<typename exp1, typename exp2> + static type * getNN(exp1 & o1, exp2 & o2) + { + return o1.getNN(); + } + + template<typename exp1> + static type * getNN(exp1 & o1) + { + return o1.getNN(); + } +}; + + +template<typename NN2_type> +struct nn_type_result<void,NN2_type> +{ + typedef NN2_type type; + + template<typename exp1, typename exp2> + static type * getNN(exp1 & o1, exp2 & o2) + { + return o2.getNN(); + } + + template<typename exp2> + static type * getNN(exp2 & o2) + { + return o2.getNN(); + } +}; + +template<bool s1, bool s2> +struct vector_is_sort_result +{ + typedef boost::mpl::bool_<s1 | s2> type; +}; + /*! \brief Sum operation * * \tparam exp1 expression1 @@ -144,11 +229,43 @@ class vector_dist_expression_op<exp1,exp2,VECT_SUM> public: + //! indicate if this vector is kernel type + typedef typename exp1::is_ker is_ker; + + //! return the vector type on which this expression operate + typedef typename vector_result<typename exp1::vtype,typename exp2::vtype>::type vtype; + + //! result for is sort + typedef typename vector_is_sort_result<exp1::is_sort::value,exp2::is_sort::value>::type is_sort; + + //! NN_type + typedef typename nn_type_result<typename exp1::NN_type,typename exp2::NN_type>::type NN_type; + //! constructor of the expression to sum two expression inline vector_dist_expression_op(const exp1 & o1, const exp2 & o2) :o1(o1),o2(o2) {} + /*! \brief get the NN object + * + * \return the NN object + * + */ + inline NN_type * getNN() const + { + return nn_type_result<typename exp1::NN_type,typename exp2::NN_type>::getNN(o1,o2); + } + + /*! \brief Return the underlying vector + * + * \return the vector + * + */ + const vtype & getVector() const + { + return vector_result<typename exp1::vtype,typename exp2::vtype>::getVector(o1,o2); + } + /*! \brief This function must be called before value * * it initialize the expression if needed @@ -173,6 +290,19 @@ public: return o1.value(key) + o2.value(key); } + /*! \brief Evaluate the expression + * + * \param key where to evaluate the expression + * + * \return return the result of the expression + * + */ + template<typename r_type=typename std::remove_reference<decltype(o1.value(0) + o2.value(0))>::type > + __device__ __host__ inline r_type value(const unsigned int & key) const + { + return o1.value(key) + o2.value(key); + } + }; /*! \brief Subtraction operation @@ -192,11 +322,42 @@ class vector_dist_expression_op<exp1,exp2,VECT_SUB> public: + typedef typename exp1::is_ker is_ker; + + //! return the vector type on which this expression operate + typedef typename vector_result<typename exp1::vtype,typename exp2::vtype>::type vtype; + + //! result for is sort + typedef typename vector_is_sort_result<exp1::is_sort::value,exp2::is_sort::value>::type is_sort; + + //! NN_type + typedef typename nn_type_result<typename exp1::NN_type,typename exp2::NN_type>::type NN_type; + //! Costruct a subtraction expression out of two expressions inline vector_dist_expression_op(const exp1 & o1, const exp2 & o2) :o1(o1),o2(o2) {} + /*! \brief get the NN object + * + * \return the NN object + * + */ + inline NN_type * getNN() const + { + return nn_type_result<typename exp1::NN_type,typename exp2::NN_type>::getNN(o1,o2); + } + + /*! \brief Return the underlying vector + * + * \return the vector + * + */ + const vtype & getVector() + { + return vector_result<typename exp1::vtype,typename exp2::vtype>::getVector(o1,o2); + } + /*! \brief This function must be called before value * * it initialize the expression if needed @@ -215,7 +376,21 @@ public: * \return the result of the expression * */ - template<typename r_type=typename std::remove_reference<decltype(o1.value(vect_dist_key_dx()) - o2.value(vect_dist_key_dx()))>::type > inline r_type value(const vect_dist_key_dx & key) const + template<typename r_type=typename std::remove_reference<decltype(o1.value(vect_dist_key_dx()) - o2.value(vect_dist_key_dx()))>::type > + inline r_type value(const vect_dist_key_dx & key) const + { + return o1.value(key) - o2.value(key); + } + + /*! \brief Evaluate the expression + * + * \param key where to evaluate the expression + * + * \return the result of the expression + * + */ + template<typename r_type=typename std::remove_reference<decltype(o1.value(vect_dist_key_dx()) - o2.value(vect_dist_key_dx()))>::type > + __device__ __host__ inline r_type value(const unsigned int & key) const { return o1.value(key) - o2.value(key); } @@ -238,11 +413,32 @@ class vector_dist_expression_op<exp1,exp2,VECT_MUL> public: + typedef typename exp1::is_ker is_ker; + + //! return the vector type on which this expression operate + typedef typename vector_result<typename exp1::vtype,typename exp2::vtype>::type vtype; + + //! result for is sort + typedef typename vector_is_sort_result<exp1::is_sort::value,exp2::is_sort::value>::type is_sort; + + //! NN_type + typedef typename nn_type_result<typename exp1::NN_type,typename exp2::NN_type>::type NN_type; + //! constructor from two expressions vector_dist_expression_op(const exp1 & o1, const exp2 & o2) :o1(o1),o2(o2) {} + /*! \brief Return the underlying vector + * + * \return the vector + * + */ + const vtype & getVector() + { + return vector_result<typename exp1::vtype,typename exp2::vtype>::getVector(o1,o2); + } + /*! \brief This function must be called before value * * it initialize the expression if needed @@ -265,6 +461,19 @@ public: { return o1.value(key) * o2.value(key); } + + /*! \brief Evaluate the expression + * + * \param key where to evaluate the expression + * + * \return the result of the expression + * + */ + template<typename r_type=typename std::remove_reference<decltype(o1.value(vect_dist_key_dx()) * o2.value(vect_dist_key_dx()))>::type > + __device__ __host__ inline r_type value(const unsigned int & key) const + { + return o1.value(key) * o2.value(key); + } }; /*! \brief Division operation @@ -284,11 +493,42 @@ class vector_dist_expression_op<exp1,exp2,VECT_DIV> public: + typedef typename exp1::is_ker is_ker; + + //! return the vector type on which this expression operate + typedef typename vector_result<typename exp1::vtype,typename exp2::vtype>::type vtype; + + //! result for is sort + typedef typename vector_is_sort_result<exp1::is_sort::value,exp2::is_sort::value>::type is_sort; + + //! NN_type + typedef typename nn_type_result<typename exp1::NN_type,typename exp2::NN_type>::type NN_type; + //! constructor from two expressions vector_dist_expression_op(const exp1 & o1, const exp2 & o2) :o1(o1),o2(o2) {} + /*! \brief get the NN object + * + * \return the NN object + * + */ + inline NN_type * getNN() const + { + return nn_type_result<typename exp1::NN_type,typename exp2::NN_type>::getNN(o1,o2); + } + + /*! \brief Return the underlying vector + * + * \return the vector + * + */ + const vtype & getVector() + { + return vector_result<typename exp1::vtype,typename exp2::vtype>::getVector(o1,o2); + } + /*! \brief This function must be called before value * * it initialize the expression if needed @@ -311,85 +551,18 @@ public: { return o1.value(key) / o2.value(key); } -}; - -/*! \brief selector for position or properties left side expression - * - * \tparam vector type of the original vector - * - * \tparam prp property id - * - */ -template <typename vector, unsigned int prp> -struct pos_or_propL -{ - //! return the value (position or property) of the particle k in the vector v - static inline auto value(vector & v, const vect_dist_key_dx & k) -> decltype(v.template getProp<prp>(k)) - { - return v.template getProp<prp>(k); - } -}; -/*! \brief selector for position or properties right side position - * - * \tparam vector type of the original vector - * - * \tparam prp property id - * - */ -template <typename vector, unsigned int prp> -struct pos_or_propR -{ - //! return the value (position or property) of the particle k in the vector v - static inline auto value(vector & v, const vect_dist_key_dx & k) -> decltype(v.template getProp<prp>(k)) - { - return v.template getProp<prp>(k); - } -}; - -/*! \brief selector for position or properties left side - * - * \tparam vector type of the original vector - * - * \tparam prp property id - * - */ -template <typename vector> -struct pos_or_propL<vector,PROP_POS> -{ -#ifdef SE_CLASS3 - - //! return the value (position or property) of the particle k in the vector v - static inline auto value(vector & v, const vect_dist_key_dx & k) -> decltype(getExprL(v.getPos(k).getReference())) - { - return getExprL(v.getPos(k).getReference()); - } - -#else - - //! return the value (position or property) of the particle k in the vector v - static inline auto value(vector & v, const vect_dist_key_dx & k) -> decltype(getExprL(v.getPos(k))) - { - return getExprL(v.getPos(k)); - } - -#endif -}; - -/*! \brief selector for position or properties right side - * - * \tparam vector type of the original vector - * - * \tparam prp property id - * - */ -template <typename vector> -struct pos_or_propR<vector,PROP_POS> -{ - //! return the value (position or property) of the particle k in the vector v - static inline auto value(vector & v, const vect_dist_key_dx & k) -> decltype(getExprR(v.getPos(k))) + /*! \brief Evaluate the expression + * + * \param key where to evaluate the expression + * + * \return the result of the expression + * + */ + template<typename r_type=typename std::remove_reference<decltype(o1.value(vect_dist_key_dx()) / o2.value(vect_dist_key_dx()))>::type > + __device__ __host__ inline r_type value(const unsigned int & key) const { - return getExprR(v.getPos(k)); + return o1.value(key) / o2.value(key); } }; @@ -405,11 +578,42 @@ class vector_dist_expression_op<exp1,void,VECT_SUB_UNI> public: + typedef typename exp1::is_ker is_ker; + + //! return the vector type on which this expression operate + typedef typename vector_result<typename exp1::vtype,void>::type vtype; + + //! result for is sort + typedef typename vector_is_sort_result<exp1::is_sort::value,false>::type is_sort; + + //! NN_type + typedef typename nn_type_result<typename exp1::NN_type,void>::type NN_type; + //! constructor from an expresssion vector_dist_expression_op(const exp1 & o1) :o1(o1) {} + /*! \brief get the NN object + * + * \return the NN object + * + */ + inline NN_type * getNN() const + { + return nn_type_result<typename exp1::NN_type,void>::getNN(o1); + } + + /*! \brief Return the underlying vector + * + * \return the vector + * + */ + const vtype & getVector() + { + return vector_result<typename exp1::vtype,void>::getVector(o1); + } + //! initialize the expression tree inline void init() const { @@ -421,6 +625,57 @@ public: { return -(o1.value(key)); } + + //! return the result of the expression + template<typename r_type=typename std::remove_reference<decltype(-(o1.value(vect_dist_key_dx(0))))>::type > + __device__ __host__ inline r_type value(const unsigned int & key) const + { + return -(o1.value(key)); + } +}; + +/*! \brief Expression implementation computation selector + * + * + */ +template<int impl, bool vect_ker> +struct vector_dist_expression_comp_sel +{ + typedef boost::mpl::int_<impl> type; +}; + +template<> +struct vector_dist_expression_comp_sel<comp_host,true> +{ + typedef boost::mpl::int_<-1> type; +}; + +template<> +struct vector_dist_expression_comp_sel<comp_dev,false> +{ + typedef boost::mpl::int_<-1> type; +}; + +template<typename vector, bool is_ker = has_vector_kernel<vector>::type::value> +struct vector_expression_transform +{ + typedef vector& type; +}; + +template<typename vector> +struct vector_expression_transform<vector,true> +{ + typedef vector type; +}; + +template<typename vector> +struct v_mem_mutable +{ + vector v; + + v_mem_mutable(vector & v) + :v(v) + {} }; /*! \brief Main class that encapsulate a vector properties operand to be used for expressions construction @@ -433,21 +688,61 @@ template<unsigned int prp, typename vector> class vector_dist_expression { //! The vector - vector & v; + mutable v_mem_mutable<typename vector_expression_transform<vector>::type> v; + + vector_dist_ker_list<vector> * vdl; public: + typedef typename has_vector_kernel<vector>::type is_ker; + //! The type of the internal vector typedef vector vtype; + //! result for is sort + typedef boost::mpl::bool_<false> is_sort; + + //! NN_type + typedef void NN_type; + //! Property id of the point static const unsigned int prop = prp; + //! constructor for an external vector vector_dist_expression(vector & v) - :v(v) + :v(v),vdl(NULL) {} + //! constructor for an external vector + ~vector_dist_expression() + { + if (vdl != NULL) + {vdl->remove(v.v);} + } + + /*! \brief get the NN object + * + * \return the NN object + * + */ + inline void * getNN() const + { + return NULL; + } + + /*! \brief Return the vector on which is acting + * + * It return the vector used in getVExpr, to get this object + * + * \return the vector + * + */ + __device__ __host__ const vector & getVector() const + { + return v.v; + } + /*! \brief Return the vector on which is acting * * It return the vector used in getVExpr, to get this object @@ -455,9 +750,20 @@ public: * \return the vector * */ - vector & getVector() + __device__ __host__ vector & getVector() + { + return v.v; + } + + /*! \brief set vector_dist_ker_list + * + * \param vdkl vector_dist_ker_list + * + */ + void set_vector_dist_ker_list(vector_dist_ker_list<vector> & vdkl, bool is_sort) { - return v; + vdkl.add(v.v,is_sort); + vdl = &vdkl; } /*! \brief This function must be called before value @@ -475,9 +781,21 @@ public: * \return the result of the expression * */ - inline auto value(const vect_dist_key_dx & k) const -> decltype(pos_or_propR<vector,prp>::value(v,k)) + __host__ inline auto value(const vect_dist_key_dx & k) const -> decltype(pos_or_propR<vector,prp>::value(v.v,k)) { - return pos_or_propR<vector,prp>::value(v,k); + return pos_or_propR<vector,prp>::value(v.v,k); + } + + /*! \brief Evaluate the expression + * + * \param k where to evaluate the expression + * + * \return the result of the expression + * + */ + __device__ inline auto value(const unsigned int & k) const -> decltype(pos_or_propR<vector,prp>::value(v.v,k)) + { + return pos_or_propR<vector,prp>::value(v.v,k); } /*! \brief Fill the vector property with the evaluated expression @@ -489,20 +807,20 @@ public: */ template<unsigned int prp2> vector & operator=(const vector_dist_expression<prp2,vector> & v_exp) { - v_exp.init(); - - auto it = v.getDomainIterator(); - - while (it.isNext()) + if (has_vector_kernel<vector>::type::value == false) { - auto key = it.get(); - - pos_or_propL<vector,prp>::value(v,key) = v_exp.value(key); - - ++it; + vector_dist_op_compute_op<prp,false,vector_dist_expression_comp_sel<comp_host, + has_vector_kernel<vector>::type::value>::type::value> + ::compute_expr(v.v,v_exp); + } + else + { + vector_dist_op_compute_op<prp,false,vector_dist_expression_comp_sel<comp_dev, + has_vector_kernel<vector>::type::value>::type::value> + ::compute_expr(v.v,v_exp); } - return v; + return v.v; } /*! \brief Fill the vector property with the evaluated expression @@ -512,22 +830,27 @@ public: * \return itself * */ - template<typename exp1, typename exp2, unsigned int op> vector & operator=(const vector_dist_expression_op<exp1,exp2,op> & v_exp) + template<typename exp1, typename exp2, unsigned int op> + vector & operator=(const vector_dist_expression_op<exp1,exp2,op> & v_exp) { - v_exp.init(); - - auto it = v.getDomainIterator(); - - while (it.isNext()) + if (has_vector_kernel<vector>::type::value == false) { - auto key = it.get(); - - pos_or_propL<vector,prp>::value(v,key) = v_exp.value(key); - - ++it; + vector_dist_op_compute_op<prp, + vector_dist_expression_op<exp1,exp2,op>::is_sort::value, + vector_dist_expression_comp_sel<comp_host, + has_vector_kernel<vector>::type::value>::type::value> + ::compute_expr(v.v,v_exp); + } + else + { + vector_dist_op_compute_op<prp, + vector_dist_expression_op<exp1,exp2,op>::is_sort::value, + vector_dist_expression_comp_sel<comp_dev, + has_vector_kernel<vector>::type::value>::type::value> + ::compute_expr(v.v,v_exp); } - return v; + return v.v; } /*! \brief Fill the vector property with the double @@ -539,18 +862,51 @@ public: */ vector & operator=(double d) { - auto it = v.getDomainIterator(); - - while (it.isNext()) + if (has_vector_kernel<vector>::type::value == false) + { + vector_dist_op_compute_op<prp, + false, + vector_dist_expression_comp_sel<comp_host, + has_vector_kernel<vector>::type::value>::type::value> + ::compute_const(v.v,d); + } + else { - auto key = it.get(); + vector_dist_op_compute_op<prp, + false, + vector_dist_expression_comp_sel<comp_dev, + has_vector_kernel<vector>::type::value>::type::value> + ::compute_const(v.v,d); + } - pos_or_propL<vector,prp>::value(v,key) = d; + return v.v; + } +}; - ++it; - } +template<typename vector, unsigned int impl> +struct switcher_get_v +{ + typedef vector type; + + static vector & get(vector & v) {return v;}; + + template<typename exp_type, typename vector_klist> + static void register_vector(exp_type & exp_v, vector_klist & v,bool is_sort) + { + } +}; + +template<typename vector> +struct switcher_get_v<vector,comp_dev> +{ + typedef decltype(std::declval<vector>().toKernel()) type; + + static type get(vector & v) {return v.toKernel();}; - return v; + template<typename exp_type, typename vector_klist> + static void register_vector(exp_type & exp_v, vector_klist & v,bool is_sort) + { + exp_v.set_vector_dist_ker_list(v.private_get_vector_dist_ker_list(),is_sort); } }; @@ -560,9 +916,31 @@ public: * \param v * */ -template <unsigned int prp,typename vector> inline vector_dist_expression<prp,vector > getV(vector & v) +template <unsigned int prp,unsigned int impl = comp_host, typename vector> +inline vector_dist_expression<prp,typename switcher_get_v<vector,impl>::type > getV(vector & v) { - vector_dist_expression<prp,vector > exp_v(v); + decltype(switcher_get_v<vector,impl>::get(v)) vk = switcher_get_v<vector,impl>::get(v); + vector_dist_expression<prp,typename switcher_get_v<vector,impl>::type > exp_v(vk); + + switcher_get_v<vector,impl>::register_vector(exp_v,v,false); + + return exp_v; +} + + +/*! \Create an expression from a vector property + * + * \tpatam prp property + * \param v + * + */ +template <unsigned int prp,typename vector> +inline vector_dist_expression<prp, typename switcher_get_v<vector,comp_dev>::type > getV_sort(vector & v) +{ + auto vk = v.toKernel_sorted(); + vector_dist_expression<prp,typename switcher_get_v<vector, comp_dev>::type > exp_v(vk); + + exp_v.set_vector_dist_ker_list(v.private_get_vector_dist_ker_list(),true); return exp_v; } @@ -580,6 +958,15 @@ class vector_dist_expression<prp,double> public: + typedef std::false_type is_ker; + + typedef void vtype; + + //! result for is sort + typedef boost::mpl::bool_<false> is_sort; + + typedef void NN_type; + //! constructor from a constant expression inline vector_dist_expression(const double & d) :d(d) @@ -606,6 +993,20 @@ public: { return d; } + + /*! \brief Evaluate the expression + * + * \param k ignored position in the vector + * + * It just return the value set in the constructor + * + * \return the constant value + * + */ + __host__ __device__ inline double value(const unsigned int & k) const + { + return d; + } }; /*! \brief Main class that encapsulate a float constant @@ -621,8 +1022,15 @@ class vector_dist_expression<prp,float> public: + typedef std::false_type is_ker; + //! type of object the structure return then evaluated - typedef float vtype; + typedef void vtype; + + //! result for is sort + typedef boost::mpl::bool_<false> is_sort; + + typedef void NN_type; //! constrictor from constant value inline vector_dist_expression(const float & d) @@ -650,6 +1058,20 @@ public: { return d; } + + /*! \brief Evaluate the expression + * + * \param k ignored position in the vector + * + * It just return the value set in the constructor + * + * \return the constant value set in the constructor + * + */ + __device__ __host__ inline float value(const unsigned int & k) const + { + return d; + } }; /* \brief sum two distributed vector expression @@ -1186,4 +1608,5 @@ operator/(const vector_dist_expression_op<exp1,exp2,op1> & va, const vector_dist #include "vector_dist_operators_extensions.hpp" #include "Operators/Vector/vector_dist_operator_assign.hpp" + #endif /* OPENFPM_NUMERICS_SRC_OPERATORS_VECTOR_VECTOR_DIST_OPERATORS_HPP_ */ diff --git a/src/Operators/Vector/vector_dist_operators_apply_kernel.hpp b/src/Operators/Vector/vector_dist_operators_apply_kernel.hpp index 80ed67c6c76d5a9e673c25e51efa0ede2f8d6f3d..f9c322b10cbd3b19aa95311db65dfe5fbb1c792c 100644 --- a/src/Operators/Vector/vector_dist_operators_apply_kernel.hpp +++ b/src/Operators/Vector/vector_dist_operators_apply_kernel.hpp @@ -77,11 +77,34 @@ struct set_zero<Point<dim,T>> } }; +constexpr int NN_index_sort = 1; +constexpr int NN_index_unsort = 0; + +template<unsigned int impl> +struct NN_index +{ + template<typename NN_type> + __device__ __host__ static auto get(NN_type & NN) -> decltype(NN.get()) + { + return NN.get(); + } +}; + +template<> +struct NN_index<NN_index_sort> +{ + template<typename NN_type> + __device__ __host__ static auto get(NN_type & NN) -> decltype(NN.get_sort()) + { + return NN.get_sort(); + } +}; + /*! \brief Apply the kernel to particle differently that is a number or is an expression * * */ -template<typename T, typename vector, typename exp,typename NN_type, typename Kernel, typename rtype, bool is_exp=is_expression<T>::value> +template<unsigned int impl, typename T, typename vector, typename exp,typename NN_type, typename Kernel, typename rtype, bool is_exp=is_expression<T>::value> struct apply_kernel_is_number_or_expression { /*! \brief Apply the kernel expression to a particle @@ -95,7 +118,7 @@ struct apply_kernel_is_number_or_expression * \return the result of apply the kernel on the particle key * */ - inline static typename std::remove_reference<rtype>::type apply(const vector & vd, NN_type & cl, const exp & v_exp, const vect_dist_key_dx & key, Kernel & lker) + __host__ __device__ inline static typename std::remove_reference<rtype>::type apply(const vector & vd, NN_type & cl, const exp & v_exp, const vect_dist_key_dx & key, Kernel & lker) { // accumulator typename std::remove_reference<rtype>::type pse = set_zero<typename std::remove_reference<rtype>::type>::create(); @@ -110,7 +133,7 @@ struct apply_kernel_is_number_or_expression auto NN = cl.template getNNIterator<NO_CHECK>(cl.getCell(p)); while(NN.isNext()) { - auto nnp = NN.get(); + auto nnp = NN_index<impl>::get(NN); // Calculate contribution given by the kernel value at position p, // given by the Near particle, exclude itself @@ -141,7 +164,7 @@ struct apply_kernel_is_number_or_expression * * */ -template<typename vector, typename exp,typename NN_type, typename Kernel, typename rtype> +template<unsigned int impl, typename vector, typename exp,typename NN_type, typename Kernel, typename rtype> struct apply_kernel_is_number_or_expression_sim { /*! \brief Apply the kernel expression to a particle @@ -154,7 +177,7 @@ struct apply_kernel_is_number_or_expression_sim * \return the result of apply the kernel on the particle key * */ - inline static typename std::remove_reference<rtype>::type apply(const vector & vd, NN_type & cl, const vect_dist_key_dx & key, Kernel & lker) + __device__ __host__ inline static typename std::remove_reference<rtype>::type apply(const vector & vd, NN_type & cl, const vect_dist_key_dx & key, Kernel & lker) { // accumulator typename std::remove_reference<rtype>::type pse = set_zero<typename std::remove_reference<rtype>::type>::create(); @@ -166,7 +189,7 @@ struct apply_kernel_is_number_or_expression_sim auto NN = cl.template getNNIterator<NO_CHECK>(cl.getCell(p)); while(NN.isNext()) { - auto nnp = NN.get(); + auto nnp = NN_index<impl>::get(NN); // Calculate contribution given by the kernel value at position p, // given by the Near particle, exclude itself @@ -192,7 +215,7 @@ struct apply_kernel_is_number_or_expression_sim * * */ -template<typename T, typename vector, typename exp,typename NN_type, typename Kernel, typename rtype, bool is_exp=is_expression<T>::value> +template<unsigned int impl, typename T, typename vector, typename exp,typename NN_type, typename Kernel, typename rtype, bool is_exp=is_expression<T>::value> struct apply_kernel_is_number_or_expression_gen { /*! \brief Apply the kernel expression to a particle @@ -206,7 +229,7 @@ struct apply_kernel_is_number_or_expression_gen * \return the result of apply the kernel on the particle key * */ - inline static typename std::remove_reference<rtype>::type apply(const vector & vd, NN_type & cl, const exp & v_exp, const vect_dist_key_dx & key, Kernel & lker) + __device__ __host__ inline static typename std::remove_reference<rtype>::type apply(const vector & vd, NN_type & cl, const exp & v_exp, const vect_dist_key_dx & key, Kernel & lker) { // accumulator typename std::remove_reference<rtype>::type pse = set_zero<typename std::remove_reference<rtype>::type>::create(); @@ -221,7 +244,7 @@ struct apply_kernel_is_number_or_expression_gen auto NN = cl.template getNNIterator<NO_CHECK>(cl.getCell(p)); while(NN.isNext()) { - auto nnp = NN.get(); + auto nnp = NN_index<impl>::get(NN); // Calculate contribution given by the kernel value at position p, // given by the Near particle, exclude itself @@ -244,6 +267,37 @@ struct apply_kernel_is_number_or_expression_gen } }; +template<typename T, bool mm> +struct mutable_or_not +{ + T obj; + + mutable_or_not(T & obj) + :obj(obj) + {} +}; + +template<typename T> +struct mutable_or_not<T,true> +{ + mutable T obj; + + mutable_or_not(T & obj) + :obj(obj) + {} +}; + +template<typename T, bool is_reference = std::is_reference<T>::value> +struct add_const_reference +{ + typedef const T type; +}; + +template<typename T> +struct add_const_reference<T,true> +{ + typedef typename std::remove_reference<T>::type const & type; +}; /*! \brief Apply kernel operation * @@ -261,23 +315,62 @@ class vector_dist_expression_op<exp1,vector_type,VECT_APPLYKER_IN> //! Get the type that contain the particle positions typedef typename boost::mpl::at<vector_type,boost::mpl::int_<2>>::type vector_orig; + //! Return the type of the Cell-list + typedef typename std::remove_reference<NN>::type NN_nr; + //! Return the type of the kernel + typedef typename std::remove_reference<Kernel>::type Kernel_nr; + //! Return the vector containing the position of the particles + typedef typename std::remove_reference<vector_orig>::type vector_orig_nr; + //! expression const exp1 o1; //! Cell-list - NN & cl; + mutable_or_not<NN,is_gpu_celllist<NN>::type::value || is_gpu_ker_celllist<NN>::type::value> cl; //! kernel - Kernel & ker; + mutable_or_not<Kernel,is_gpu_celllist<NN>::type::value || is_gpu_ker_celllist<NN>::type::value> ker; //! The vector that contain the particles - const vector_orig & vd; + typename add_const_reference<vector_orig>::type vd; //! Get the return type of applying the kernel to a particle typedef typename apply_kernel_rtype<decltype(o1.value(vect_dist_key_dx()))>::rtype rtype; public: + //! indicate if this expression operate on kernel expression + typedef typename has_vector_kernel<vector_orig_nr>::type is_ker; + + //! vector type on which this expression work + typedef vector_orig_nr vtype; + + //! indicate that is apply_kernel is not a sorted version + typedef boost::mpl::bool_<false> is_sort; + + //! type of NN structure + typedef NN_nr NN_type; + + /*! \brief get the NN object + * + * \return the NN object + * + */ + inline NN_nr * getNN() const + { + return &cl.obj; + } + + /*! \brief get the vector + * + * \return the vector + * + */ + inline const vtype & getVector() const + { + return vd; + } + /*! \brief This function must be called before value * * it initialize the expression if needed @@ -296,7 +389,120 @@ public: * \param vd vector containing the particle positions * */ - vector_dist_expression_op(const exp1 & o1, NN & cl, Kernel & ker, const vector_orig & vd) + vector_dist_expression_op(const exp1 & o1, NN_nr & cl, Kernel & ker, const vector_orig_nr & vd) + :o1(o1),cl(cl),ker(ker),vd(vd) + {} + + /*! \brief Evaluate the expression + * + * \param key where to evaluate the expression + * + * \return the result of the expression + * + */ + __device__ __host__ inline typename std::remove_reference<rtype>::type value(const vect_dist_key_dx & key) const + { + return apply_kernel_is_number_or_expression<NN_index_unsort, + decltype(o1.value(key)), + vector_orig_nr, + exp1, + NN_nr, + Kernel_nr, + rtype>::apply(vd,cl.obj,o1,key,ker.obj); + } +}; + +/*! \brief Apply kernel operation + * + * \tparam exp1 expression1 + * \tparam NN list + * + */ +template <typename exp1,typename vector_type> +class vector_dist_expression_op<exp1,vector_type,VECT_APPLYKER_IN_SORT> +{ + //! Get the type of the Cell-list + typedef typename boost::mpl::at<vector_type,boost::mpl::int_<0>>::type NN; + //! Get the type of the kernel + typedef typename boost::mpl::at<vector_type,boost::mpl::int_<1>>::type Kernel; + //! Get the type that contain the particle positions + typedef typename boost::mpl::at<vector_type,boost::mpl::int_<2>>::type vector_orig; + + //! Return the type of the Cell-list + typedef typename std::remove_reference<NN>::type NN_nr; + //! Return the type of the kernel + typedef typename std::remove_reference<Kernel>::type Kernel_nr; + //! Return the vector containing the position of the particles + typedef typename std::remove_reference<vector_orig>::type vector_orig_nr; + + //! expression + const exp1 o1; + + //! Cell-list + mutable_or_not<NN,is_gpu_celllist<NN>::type::value || is_gpu_ker_celllist<NN>::type::value> cl; + + //! kernel + mutable_or_not<Kernel,is_gpu_celllist<NN>::type::value || is_gpu_ker_celllist<NN>::type::value> ker; + + //! The vector that contain the particles + typename add_const_reference<vector_orig>::type vd; + + //! Get the return type of applying the kernel to a particle + typedef typename apply_kernel_rtype<decltype(o1.value(vect_dist_key_dx()))>::rtype rtype; + +public: + + //! indicate if this expression operate on kernel expression + typedef typename has_vector_kernel<vector_orig_nr>::type is_ker; + + //! vector type on which this expression work + typedef vector_orig_nr vtype; + + //! indicate that is apply_kernel is not a sorted version + typedef boost::mpl::bool_<true> is_sort; + + //! type of NN structure + typedef NN_nr NN_type; + + /*! \brief get the NN object + * + * \return the NN object + * + */ + inline NN_nr * getNN() const + { + return &cl.obj; + } + + /*! \brief get the vector + * + * \return the vector + * + */ + inline const vtype & getVector() const + { + return vd; + } + + /*! \brief This function must be called before value + * + * it initialize the expression if needed + * + */ + inline void init() const + { + o1.init(); + } + + /*! \brief Constructor + * + * \param o1 expression + * \param cl Cell-list + * \param ker kernel function + * \param vd vector containing the particle positions + * + */ + vector_dist_expression_op(const exp1 & o1, NN_nr & cl, Kernel & ker, const vector_orig_nr & vd) :o1(o1),cl(cl),ker(ker),vd(vd) {} @@ -307,9 +513,15 @@ public: * \return the result of the expression * */ - inline typename std::remove_reference<rtype>::type value(const vect_dist_key_dx & key) const + __device__ __host__ inline typename std::remove_reference<rtype>::type value(const vect_dist_key_dx & key) const { - return apply_kernel_is_number_or_expression<decltype(o1.value(key)),vector_orig,exp1,NN,Kernel,rtype>::apply(vd,cl,o1,key,ker); + return apply_kernel_is_number_or_expression<NN_index_sort, + decltype(o1.value(key)), + vector_orig_nr, + exp1, + NN_nr, + Kernel_nr, + rtype>::apply(vd,cl.obj,o1,key,ker.obj); } }; @@ -329,19 +541,60 @@ class vector_dist_expression_op<exp1,vector_type,VECT_APPLYKER_IN_SIM> //! Return the vector containing the position of the particles typedef typename boost::mpl::at<vector_type,boost::mpl::int_<2>>::type vector_orig; + //! Return the type of the Cell-list + typedef typename std::remove_reference<NN>::type NN_nr; + //! Return the type of the kernel + typedef typename std::remove_reference<Kernel>::type Kernel_nr; + //! Return the vector containing the position of the particles + typedef typename std::remove_reference<vector_orig>::type vector_orig_nr; + //! Cell-list - NN & cl; + mutable_or_not<NN,is_gpu_celllist<NN>::type::value || is_gpu_ker_celllist<NN>::type::value> cl; + //! kernel - Kernel & ker; + mutable_or_not<Kernel,is_gpu_celllist<NN>::type::value || is_gpu_ker_celllist<NN>::type::value> ker; + //! vector with the particle positions - const vector_orig & vd; + const vector_orig vd; //! Get the return type of the expression - typedef typename apply_kernel_rtype<decltype(std::declval<Kernel>().value(Point<vector_orig::dims,typename vector_orig::stype>(0.0), Point<vector_orig::dims,typename vector_orig::stype>(0.0) ) )>::rtype rtype; + typedef typename apply_kernel_rtype<decltype(std::declval<Kernel_nr>().value(Point<vector_orig_nr::dims,typename vector_orig_nr::stype>(0.0), Point<vector_orig_nr::dims,typename vector_orig_nr::stype>(0.0) ) )>::rtype rtype; public: + //! indicate if this expression operate on kernel expression + typedef typename has_vector_kernel<vector_orig_nr>::type is_ker; + + //! vector type on which this expression work + typedef vector_orig_nr vtype; + + //! indicate that is apply_kernel is not a sorted version + typedef boost::mpl::bool_<false> is_sort; + + //! type of NN structure + typedef NN_nr NN_type; + + /*! \brief get the NN object + * + * \return the NN object + * + */ + inline NN_nr * getNN() const + { + return &cl.obj; + } + + /*! \brief get the vector + * + * \return the vector + * + */ + inline const vtype & getVector() const + { + return vd; + } + /*! \brief This function must be called before value * * it initialize the expression if needed @@ -369,12 +622,124 @@ public: * \return the result produced by the expression * */ - inline typename std::remove_reference<rtype>::type value(const vect_dist_key_dx & key) const + __device__ __host__ inline typename std::remove_reference<rtype>::type value(const vect_dist_key_dx & key) const { - return apply_kernel_is_number_or_expression_sim<vector_orig,exp1,NN,Kernel,rtype>::apply(vd,cl,key,ker); + return apply_kernel_is_number_or_expression_sim<NN_index_unsort, + vector_orig_nr, + exp1, + NN_nr, + Kernel_nr, + rtype>::apply(vd,cl.obj,key,ker.obj); } }; +/*! \brief Apply kernel operation + * + * \tparam exp1 expression1 + * \tparam NN list + * + */ +template <typename exp1,typename vector_type> +class vector_dist_expression_op<exp1,vector_type,VECT_APPLYKER_IN_SIM_SORT> +{ + //! Return the type of the Cell-list + typedef typename boost::mpl::at<vector_type,boost::mpl::int_<0>>::type NN; + //! Return the type of the kernel + typedef typename boost::mpl::at<vector_type,boost::mpl::int_<1>>::type Kernel; + //! Return the vector containing the position of the particles + typedef typename boost::mpl::at<vector_type,boost::mpl::int_<2>>::type vector_orig; + + //! Return the type of the Cell-list + typedef typename std::remove_reference<NN>::type NN_nr; + //! Return the type of the kernel + typedef typename std::remove_reference<Kernel>::type Kernel_nr; + //! Return the vector containing the position of the particles + typedef typename std::remove_reference<vector_orig>::type vector_orig_nr; + + //! Cell-list + mutable_or_not<NN,is_gpu_celllist<NN>::type::value || is_gpu_ker_celllist<NN>::type::value> cl; + + //! kernel + mutable_or_not<Kernel,is_gpu_celllist<NN>::type::value || is_gpu_ker_celllist<NN>::type::value> ker; + + //! vector with the particle positions + const vector_orig vd; + + //! Get the return type of the expression + typedef typename apply_kernel_rtype<decltype(std::declval<Kernel_nr>().value(Point<vector_orig_nr::dims,typename vector_orig_nr::stype>(0.0), Point<vector_orig_nr::dims,typename vector_orig_nr::stype>(0.0) ) )>::rtype rtype; + + +public: + + //! indicate if this expression operate on kernel expression + typedef typename has_vector_kernel<vector_orig_nr>::type is_ker; + + //! vector type on which this expression work + typedef vector_orig_nr vtype; + + //! indicate that is apply_kernel is not a sorted version + typedef boost::mpl::bool_<true> is_sort; + + //! type of NN structure + typedef NN_nr NN_type; + + /*! \brief get the NN object + * + * \return the NN object + * + */ + inline NN_nr * getNN() const + { + return &cl.obj; + } + + /*! \brief get the vector + * + * \return the vector + * + */ + inline const vtype & getVector() const + { + return vd; + } + + /*! \brief This function must be called before value + * + * it initialize the expression if needed + * + */ + inline void init() const + { + } + + /*! \brief Constructor + * + * \param cl cell-list + * \param ker kernel + * \param vd Vector containing the particle positions + * + */ + vector_dist_expression_op(NN & cl, Kernel & ker, const vector_orig & vd) + :cl(cl),ker(ker),vd(vd) + {} + + /*! \brief Evaluate the expression + * + * \param key where to evaluate the expression + * + * \return the result produced by the expression + * + */ + __device__ __host__ inline typename std::remove_reference<rtype>::type value(const vect_dist_key_dx & key) const + { + return apply_kernel_is_number_or_expression_sim<NN_index_sort, + vector_orig_nr, + exp1, + NN_nr, + Kernel_nr, + rtype>::apply(vd,cl.obj,key,ker.obj); + } +}; /*! \brief Apply kernel operation * @@ -393,23 +758,62 @@ class vector_dist_expression_op<exp1,vector_type,VECT_APPLYKER_IN_GEN> //! Get the type of the vector containing the set of particles typedef typename boost::mpl::at<vector_type,boost::mpl::int_<2>>::type vector_orig; + //! Return the type of the Cell-list + typedef typename std::remove_reference<NN>::type NN_nr; + //! Return the type of the kernel + typedef typename std::remove_reference<Kernel>::type Kernel_nr; + //! Return the vector containing the position of the particles + typedef typename std::remove_reference<vector_orig>::type vector_orig_nr; + //! Expression const exp1 o1; - //! Cell list - NN & cl; + //! Cell-list + mutable_or_not<NN,is_gpu_celllist<NN>::type::value || is_gpu_ker_celllist<NN>::type::value> cl; - //! Kernel - Kernel & ker; + //! kernel + mutable_or_not<Kernel,is_gpu_celllist<NN>::type::value || is_gpu_ker_celllist<NN>::type::value> ker; //! Vector containing the particles - const vector_orig & vd; + const vector_orig vd; //! Return type of the expression typedef typename apply_kernel_rtype<decltype(o1.value(vect_dist_key_dx()))>::rtype rtype; public: + //! indicate if this expression operate on kernel expression + typedef typename has_vector_kernel<vector_orig_nr>::type is_ker; + + //! vector type on which this expression work + typedef vector_orig_nr vtype; + + //! indicate that is apply_kernel is not a sorted version + typedef boost::mpl::bool_<false> is_sort; + + //! type of NN structure + typedef NN_nr NN_type; + + /*! \brief get the NN object + * + * \return the NN object + * + */ + inline NN_nr * getNN() const + { + return &cl.obj; + } + + /*! \brief get the vector + * + * \return the vector + * + */ + inline const vtype & getVector() const + { + return vd; + } + /*! \brief This function must be called before value * * it initialize the expression if needed @@ -439,9 +843,257 @@ public: * \return the result of the expression * */ - inline typename std::remove_reference<rtype>::type value(const vect_dist_key_dx & key) const + __device__ __host__ inline typename std::remove_reference<rtype>::type value(const vect_dist_key_dx & key) const { - return apply_kernel_is_number_or_expression_gen<decltype(o1.value(key)),vector_orig,exp1,NN,Kernel,rtype>::apply(vd,cl,o1,key,ker); + return apply_kernel_is_number_or_expression_gen<NN_index_unsort, + decltype(o1.value(key)), + vector_orig_nr, + exp1, + NN_nr, + Kernel_nr, + rtype>::apply(vd,cl.obj,o1,key,ker.obj); + } +}; + +/*! \brief Apply kernel operation + * + * \tparam exp1 expression1 + * \tparam NN list + * + */ +template <typename exp1,typename vector_type> +class vector_dist_expression_op<exp1,vector_type,VECT_APPLYKER_IN_GEN_SORT> +{ + //! Get the type of the Cell-list + typedef typename boost::mpl::at<vector_type,boost::mpl::int_<0>>::type NN; + //! Get the type of the kernel + typedef typename boost::mpl::at<vector_type,boost::mpl::int_<1>>::type Kernel; + + //! Get the type of the vector containing the set of particles + typedef typename boost::mpl::at<vector_type,boost::mpl::int_<2>>::type vector_orig; + + //! Return the type of the Cell-list + typedef typename std::remove_reference<NN>::type NN_nr; + //! Return the type of the kernel + typedef typename std::remove_reference<Kernel>::type Kernel_nr; + //! Return the vector containing the position of the particles + typedef typename std::remove_reference<vector_orig>::type vector_orig_nr; + + //! Expression + const exp1 o1; + + //! Cell-list + mutable_or_not<NN,is_gpu_celllist<NN>::type::value || is_gpu_ker_celllist<NN>::type::value> cl; + + //! kernel + mutable_or_not<Kernel,is_gpu_celllist<NN>::type::value || is_gpu_ker_celllist<NN>::type::value> ker; + + //! Vector containing the particles + const vector_orig vd; + + //! Return type of the expression + typedef typename apply_kernel_rtype<decltype(o1.value(vect_dist_key_dx()))>::rtype rtype; + +public: + + //! indicate if this expression operate on kernel expression + typedef typename has_vector_kernel<vector_orig_nr>::type is_ker; + + //! vector type on which this expression work + typedef vector_orig_nr vtype; + + //! indicate that is apply_kernel is not a sorted version + typedef boost::mpl::bool_<true> is_sort; + + //! type of NN structure + typedef NN_nr NN_type; + + /*! \brief get the NN object + * + * \return the NN object + * + */ + inline NN_nr * getNN() const + { + return &cl.obj; + } + + /*! \brief get the vector + * + * \return the vector + * + */ + inline const vtype & getVector() const + { + return vd; + } + + /*! \brief This function must be called before value + * + * it initialize the expression if needed + * + */ + inline void init() const + { + o1.init(); + } + + /*! \brief Constructor + * + * \param o1 expression + * \param cl Cell-list + * \param ker Kernel + * \param vd vector containing the set of particles + * + */ + vector_dist_expression_op(const exp1 & o1, NN & cl, Kernel & ker, const vector_orig & vd) + :o1(o1),cl(cl),ker(ker),vd(vd) + {} + + /*! \brief Evaluate the expression + * + * \param key where to evaluate the expression + * + * \return the result of the expression + * + */ + __device__ __host__ inline typename std::remove_reference<rtype>::type value(const vect_dist_key_dx & key) const + { + return apply_kernel_is_number_or_expression_gen<NN_index_sort, + decltype(o1.value(key)), + vector_orig_nr, + exp1, + NN_nr, + Kernel_nr, + rtype>::apply(vd,cl.obj,o1,key,ker.obj); + } +}; + +template<typename cl_type, int impl=2*is_gpu_celllist<cl_type>::type::value + is_gpu_ker_celllist<cl_type>::type::value > +struct cl_selector_impl +{ + typedef cl_type& ctype; + + static ctype & get(cl_type & cl) + { + return cl; + } +}; + +template<typename cl_type> +struct cl_selector_impl<cl_type,2> +{ + typedef decltype(std::declval<cl_type>().toKernel()) ctype; + + static ctype get(cl_type & cl) + { + return cl.toKernel(); + } +}; + +template<typename cl_type> +struct cl_selector_impl<cl_type,1> +{ + typedef cl_type ctype; + + static ctype & get(cl_type & cl) + { + return cl; + } +}; + +template<typename kl_type, typename cl_type, int impl=2*is_gpu_celllist<cl_type>::type::value + is_gpu_ker_celllist<cl_type>::type::value > +struct kl_selector_impl +{ + typedef kl_type& ktype; +}; + +template<typename kl_type, typename cl_type> +struct kl_selector_impl<kl_type,cl_type,2> +{ + typedef kl_type ktype; +}; + +template<typename kl_type, typename cl_type> +struct kl_selector_impl<kl_type,cl_type,1> +{ + typedef kl_type ktype; +}; + +template<bool is_sorted, bool uwk, typename T> +struct unroll_with_to_kernel +{ + typedef T type; + + static T & get(T & v) + { + return v; + } +}; + +template<typename T> +struct unroll_with_to_kernel<false,true,T> +{ + typedef decltype(std::declval<T>().toKernel()) type; + + static type get(T & v) + { + return v.toKernel(); + } +}; + +template<typename T> +struct unroll_with_to_kernel<true,true,T> +{ + typedef decltype(std::declval<T>().toKernel_sorted()) type; + + static type get(T & v) + { + return v.toKernel_sorted(); + } +}; + +template<bool is_sorted, typename vl_type, typename cl_type, int impl=2*is_gpu_celllist<cl_type>::type::value + is_gpu_ker_celllist<cl_type>::type::value> +struct vl_selector_impl +{ + typedef vl_type& vtype; + + static vtype & get(vl_type & v) + { + return v; + } +}; + +template<typename vl_type, typename cl_type> +struct vl_selector_impl<false,vl_type,cl_type,2> +{ + typedef decltype(std::declval<vl_type>().toKernel()) vtype; + + static vtype & get(vl_type & v) + { + return v.toKernel(); + } +}; + +template<typename vl_type, typename cl_type> +struct vl_selector_impl<true,vl_type,cl_type,2> +{ + typedef decltype(std::declval<vl_type>().toKernel()) vtype; + + static vtype & get(vl_type & v) + { + return v.toKernel_sorted(); + } +}; + +template<bool is_sorted, typename vl_type, typename cl_type> +struct vl_selector_impl<is_sorted, vl_type,cl_type,1> +{ + typedef typename unroll_with_to_kernel<is_sorted,has_toKernel<vl_type>::type::value,vl_type>::type vtype; + + static vtype get(vl_type & v) + { + return unroll_with_to_kernel<is_sorted,has_toKernel<vl_type>::type::value,vl_type>::get(v); } }; @@ -458,15 +1110,57 @@ public: * */ template<typename exp1, typename exp2, unsigned int op1, typename NN, typename Kernel, typename vector_type> -inline vector_dist_expression_op<vector_dist_expression_op<exp1,exp2,op1>,boost::mpl::vector<NN,Kernel,vector_type>,VECT_APPLYKER_IN> +inline vector_dist_expression_op<vector_dist_expression_op<exp1,exp2,op1>, + boost::mpl::vector<typename cl_selector_impl<NN>::ctype, + typename kl_selector_impl<Kernel,NN>::ktype, + typename vl_selector_impl<false,vector_type,NN>::vtype>, + VECT_APPLYKER_IN> applyKernel_in(const vector_dist_expression_op<exp1,exp2,op1> & va, vector_type & vd, NN & cl, Kernel & ker) { - vector_dist_expression_op<vector_dist_expression_op<exp1,exp2,op1>,boost::mpl::vector<NN,Kernel,vector_type>,VECT_APPLYKER_IN> exp_sum(va,cl,ker,vd); + vector_dist_expression_op<vector_dist_expression_op<exp1,exp2,op1>, + boost::mpl::vector<typename cl_selector_impl<NN>::ctype, + typename kl_selector_impl<Kernel,NN>::ktype, + typename vl_selector_impl<false,vector_type,NN>::vtype>, + VECT_APPLYKER_IN> exp_sum(va, + cl_selector_impl<NN>::get(cl), + ker, + vl_selector_impl<false,vector_type,NN>::get(vd)); return exp_sum; } +///////////////////////////////////////////// Apply kernel operator //// +//////////////////////////////////////////////////////////////////////// + +/* \brief Apply kernel expression + * + * \param va vector expression one + * \param vb vector expression two + * + * \return an object that encapsulate the expression + * + */ +template<typename exp1, typename exp2, unsigned int op1, typename NN, typename Kernel, typename vector_type> +inline vector_dist_expression_op<vector_dist_expression_op<exp1,exp2,op1>, + boost::mpl::vector<typename cl_selector_impl<NN>::ctype, + typename kl_selector_impl<Kernel,NN>::ktype, + typename vl_selector_impl<true,vector_type,NN>::vtype>, + VECT_APPLYKER_IN_SORT> +applyKernel_in_sort(const vector_dist_expression_op<exp1,exp2,op1> & va, vector_type & vd, NN & cl, Kernel & ker) +{ + vector_dist_expression_op<vector_dist_expression_op<exp1,exp2,op1>, + boost::mpl::vector<typename cl_selector_impl<NN>::ctype, + typename kl_selector_impl<Kernel,NN>::ktype, + typename vl_selector_impl<true,vector_type,NN>::vtype>, + VECT_APPLYKER_IN_SORT> exp_sum(va, + cl_selector_impl<NN>::get(cl), + ker, + vl_selector_impl<true,vector_type,NN>::get(vd)); + + return exp_sum; +} + /* \brief Apply kernel expression * * \param va vector expression one @@ -476,14 +1170,52 @@ applyKernel_in(const vector_dist_expression_op<exp1,exp2,op1> & va, vector_type * */ template<typename exp1, typename exp2, unsigned int op1, typename NN, typename Kernel, typename vector_type> -inline vector_dist_expression_op<vector_dist_expression_op<exp1,exp2,op1>,boost::mpl::vector<NN,Kernel,vector_type>,VECT_APPLYKER_IN_GEN> +inline vector_dist_expression_op<vector_dist_expression_op<exp1,exp2,op1>, + boost::mpl::vector<typename cl_selector_impl<NN>::ctype, + typename kl_selector_impl<Kernel,NN>::ktype, + typename vl_selector_impl<false,vector_type,NN>::vtype>, + VECT_APPLYKER_IN_GEN> applyKernel_in_gen(const vector_dist_expression_op<exp1,exp2,op1> & va, vector_type & vd, NN & cl, Kernel & ker) { - vector_dist_expression_op<vector_dist_expression_op<exp1,exp2,op1>,boost::mpl::vector<NN,Kernel,vector_type>,VECT_APPLYKER_IN_GEN> exp_sum(va,cl,ker,vd); + vector_dist_expression_op<vector_dist_expression_op<exp1,exp2,op1>, + boost::mpl::vector<typename cl_selector_impl<NN>::ctype, + typename kl_selector_impl<Kernel,NN>::ktype, + typename vl_selector_impl<false,vector_type,NN>::vtype>, + VECT_APPLYKER_IN_GEN> exp_sum(va, + cl_selector_impl<NN>::get(cl), + ker, + vl_selector_impl<false,vector_type,NN>::get(vd)); return exp_sum; } +/* \brief Apply kernel expression + * + * \param va vector expression one + * \param vb vector expression two + * + * \return an object that encapsulate the expression + * + */ +template<typename exp1, typename exp2, unsigned int op1, typename NN, typename Kernel, typename vector_type> +inline vector_dist_expression_op<vector_dist_expression_op<exp1,exp2,op1>, + boost::mpl::vector<typename cl_selector_impl<NN>::ctype, + typename kl_selector_impl<Kernel,NN>::ktype, + typename vl_selector_impl<true,vector_type,NN>::vtype>, + VECT_APPLYKER_IN_GEN_SORT> +applyKernel_in_gen_sort(const vector_dist_expression_op<exp1,exp2,op1> & va, vector_type & vd, NN & cl, Kernel & ker) +{ + vector_dist_expression_op<vector_dist_expression_op<exp1,exp2,op1>, + boost::mpl::vector<typename cl_selector_impl<NN>::ctype, + typename kl_selector_impl<Kernel,NN>::ktype, + typename vl_selector_impl<true,vector_type,NN>::vtype>, + VECT_APPLYKER_IN_GEN_SORT> exp_sum(va, + cl_selector_impl<NN>::get(cl), + ker, + vl_selector_impl<true,vector_type,NN>::get(vd)); + + return exp_sum; +} //////////////////////////////////////// For vector expression /////////////////////// @@ -495,15 +1227,53 @@ applyKernel_in_gen(const vector_dist_expression_op<exp1,exp2,op1> & va, vector_t * \return an object that encapsulate the expression * */ -template<unsigned int prp, typename NN, typename Kernel, typename vector_type> -inline vector_dist_expression_op<vector_dist_expression<prp,vector_type>,boost::mpl::vector<NN,Kernel,vector_type>,VECT_APPLYKER_IN> -applyKernel_in(const vector_dist_expression<prp,vector_type> & va, vector_type & vd, NN & cl, Kernel & ker) +template<unsigned int prp, typename NN, typename Kernel, typename vector_type, typename vector_type_ker> +inline vector_dist_expression_op<vector_dist_expression<prp,vector_type_ker>, + boost::mpl::vector<typename cl_selector_impl<NN>::ctype, + typename kl_selector_impl<Kernel,NN>::ktype, + typename vl_selector_impl<false,vector_type,NN>::vtype>, + VECT_APPLYKER_IN> +applyKernel_in(const vector_dist_expression<prp,vector_type_ker> & va, vector_type & vd, NN & cl, Kernel & ker) { - vector_dist_expression_op<vector_dist_expression<prp,vector_type>,boost::mpl::vector<NN,Kernel,vector_type>,VECT_APPLYKER_IN> exp_sum(va,cl,ker,vd); + vector_dist_expression_op<vector_dist_expression<prp,vector_type_ker>, + boost::mpl::vector<typename cl_selector_impl<NN>::ctype, + typename kl_selector_impl<Kernel,NN>::ktype, + typename vl_selector_impl<false,vector_type,NN>::vtype>, + VECT_APPLYKER_IN> exp_sum(va, + cl_selector_impl<NN>::get(cl), + ker, + vl_selector_impl<false,vector_type,NN>::get(vd)); return exp_sum; } +/* \brief Apply kernel expression + * + * \param va vector expression one + * \param vb vector expression two + * + * \return an object that encapsulate the expression + * + */ +template<unsigned int prp, typename NN, typename Kernel, typename vector_type, typename vector_type_ker> +inline vector_dist_expression_op<vector_dist_expression<prp,vector_type_ker>, + boost::mpl::vector<typename cl_selector_impl<NN>::ctype, + typename kl_selector_impl<Kernel,NN>::ktype, + typename vl_selector_impl<true,vector_type,NN>::vtype>, + VECT_APPLYKER_IN_SORT> +applyKernel_in_sort(const vector_dist_expression<prp,vector_type_ker> & va, vector_type & vd, NN & cl, Kernel & ker) +{ + vector_dist_expression_op<vector_dist_expression<prp,vector_type_ker>, + boost::mpl::vector<typename cl_selector_impl<NN>::ctype, + typename kl_selector_impl<Kernel,NN>::ktype, + typename vl_selector_impl<true,vector_type,NN>::vtype>, + VECT_APPLYKER_IN_SORT> exp_sum(va, + cl_selector_impl<NN>::get(cl), + ker, + vl_selector_impl<true,vector_type,NN>::get(vd)); + + return exp_sum; +} /* \brief Apply kernel expression * @@ -513,15 +1283,53 @@ applyKernel_in(const vector_dist_expression<prp,vector_type> & va, vector_type & * \return an object that encapsulate the expression * */ -template<unsigned int prp, typename NN, typename Kernel, typename vector_type> -inline vector_dist_expression_op<vector_dist_expression<prp,vector_type>,boost::mpl::vector<NN,Kernel,vector_type>,VECT_APPLYKER_IN_GEN> -applyKernel_in_gen(const vector_dist_expression<prp,vector_type> & va, vector_type & vd, NN & cl, Kernel & ker) +template<unsigned int prp, typename NN, typename Kernel, typename vector_type, typename vector_type_ker> +inline vector_dist_expression_op<vector_dist_expression<prp,vector_type_ker>, + boost::mpl::vector<typename cl_selector_impl<NN>::ctype, + typename kl_selector_impl<Kernel,NN>::ktype, + typename vl_selector_impl<false,vector_type,NN>::vtype>, + VECT_APPLYKER_IN_GEN> +applyKernel_in_gen(const vector_dist_expression<prp,vector_type_ker> & va, vector_type & vd, NN & cl, Kernel & ker) { - vector_dist_expression_op<vector_dist_expression<prp,vector_type>,boost::mpl::vector<NN,Kernel,vector_type>,VECT_APPLYKER_IN_GEN> exp_sum(va,cl,ker,vd); + vector_dist_expression_op<vector_dist_expression<prp,vector_type_ker>, + boost::mpl::vector<typename cl_selector_impl<NN>::ctype, + typename kl_selector_impl<Kernel,NN>::ktype, + typename vl_selector_impl<false,vector_type,NN>::vtype>, + VECT_APPLYKER_IN_GEN> exp_sum(va, + cl_selector_impl<NN>::get(cl), + ker, + vl_selector_impl<false,vector_type,NN>::get(vd)); return exp_sum; } +/* \brief Apply kernel expression + * + * \param va vector expression one + * \param vb vector expression two + * + * \return an object that encapsulate the expression + * + */ +template<unsigned int prp, typename NN, typename Kernel, typename vector_type, typename vector_type_ker> +inline vector_dist_expression_op<vector_dist_expression<prp,vector_type_ker>, + boost::mpl::vector<typename cl_selector_impl<NN>::ctype, + typename kl_selector_impl<Kernel,NN>::ktype, + typename vl_selector_impl<true,vector_type,NN>::vtype>, + VECT_APPLYKER_IN_GEN_SORT> +applyKernel_in_gen_sort(const vector_dist_expression<prp,vector_type_ker> & va, vector_type & vd, NN & cl, Kernel & ker) +{ + vector_dist_expression_op<vector_dist_expression<prp,vector_type_ker>, + boost::mpl::vector<typename cl_selector_impl<NN>::ctype, + typename kl_selector_impl<Kernel,NN>::ktype, + typename vl_selector_impl<true,vector_type,NN>::vtype>, + VECT_APPLYKER_IN_GEN_SORT> exp_sum(va, + cl_selector_impl<NN>::get(cl), + ker, + vl_selector_impl<true,vector_type,NN>::get(vd)); + + return exp_sum; +} /* \brief Apply kernel expression * @@ -532,13 +1340,49 @@ applyKernel_in_gen(const vector_dist_expression<prp,vector_type> & va, vector_ty * */ template<typename NN, typename Kernel, typename vector_type> -inline vector_dist_expression_op<void,boost::mpl::vector<NN,Kernel,vector_type>,VECT_APPLYKER_IN_SIM> +inline vector_dist_expression_op<void, + boost::mpl::vector<typename cl_selector_impl<NN>::ctype, + typename kl_selector_impl<Kernel,NN>::ktype, + typename vl_selector_impl<false,vector_type,NN>::vtype>, + VECT_APPLYKER_IN_SIM> applyKernel_in_sim(vector_type & vd, NN & cl, Kernel & ker) { - vector_dist_expression_op<void,boost::mpl::vector<NN,Kernel,vector_type>,VECT_APPLYKER_IN_SIM> exp_sum(cl,ker,vd); + vector_dist_expression_op<void, + boost::mpl::vector<typename cl_selector_impl<NN>::ctype, + typename kl_selector_impl<Kernel,NN>::ktype, + typename vl_selector_impl<false,vector_type,NN>::vtype>, + VECT_APPLYKER_IN_SIM> exp_sum(cl_selector_impl<NN>::get(cl), + ker, + vl_selector_impl<false,vector_type,NN>::get(vd)); return exp_sum; } +/* \brief Apply kernel expression + * + * \param va vector expression one + * \param vb vector expression two + * + * \return an object that encapsulate the expression + * + */ +template<typename NN, typename Kernel, typename vector_type> +inline vector_dist_expression_op<void, + boost::mpl::vector<typename cl_selector_impl<NN>::ctype, + typename kl_selector_impl<Kernel,NN>::ktype, + typename vl_selector_impl<true,vector_type,NN>::vtype>, + VECT_APPLYKER_IN_SIM_SORT> +applyKernel_in_sim_sort(vector_type & vd, NN & cl, Kernel & ker) +{ + vector_dist_expression_op<void, + boost::mpl::vector<typename cl_selector_impl<NN>::ctype, + typename kl_selector_impl<Kernel,NN>::ktype, + typename vl_selector_impl<true,vector_type,NN>::vtype>, + VECT_APPLYKER_IN_SIM_SORT> exp_sum(cl_selector_impl<NN>::get(cl), + ker, + vl_selector_impl<true,vector_type,NN>::get(vd)); + + return exp_sum; +} #endif /* OPENFPM_NUMERICS_SRC_OPERATORS_VECTOR_VECTOR_DIST_OPERATORS_APPLY_KERNEL_HPP_ */ diff --git a/src/Operators/Vector/vector_dist_operators_apply_kernel_unit_tests.cpp b/src/Operators/Vector/vector_dist_operators_apply_kernel_unit_tests.cpp new file mode 100644 index 0000000000000000000000000000000000000000..add4d99cd2dae5eacb23d1c3b453b603f512aefc --- /dev/null +++ b/src/Operators/Vector/vector_dist_operators_apply_kernel_unit_tests.cpp @@ -0,0 +1,38 @@ +/* + * vector_dist_operators_apply_kernel_unit_tests.hpp + * + * Created on: Jun 19, 2016 + * Author: i-bird + */ + +#include "config.h" +#define BOOST_TEST_DYN_LINK +#include <boost/test/unit_test.hpp> + +#include "Vector/vector_dist.hpp" +#include "Operators/Vector/vector_dist_operators.hpp" +#include "Operators/Vector/tests/vector_dist_operators_tests_util.hpp" + +BOOST_AUTO_TEST_SUITE( vector_dist_operators_apply_kernel_test_cpu ) + +BOOST_AUTO_TEST_CASE( vector_dist_operators_apply_kernel_test ) +{ + if (create_vcluster().getProcessingUnits() > 3) + return; + + Box<3,float> box({0.0,0.0,0.0},{1.0,1.0,1.0}); + + // Boundary conditions + size_t bc[3]={PERIODIC,PERIODIC,PERIODIC}; + + // ghost + Ghost<3,float> ghost(0.05); + + vector_dist<3,float,aggregate<float,float,float,VectorS<3,float>,VectorS<3,float>,VectorS<3,float>,float>> vd(512,box,bc,ghost); + + check_all_apply_ker<comp_host>::check(vd); +} + +BOOST_AUTO_TEST_SUITE_END() + + diff --git a/src/Operators/Vector/vector_dist_operators_apply_kernel_unit_tests.cu b/src/Operators/Vector/vector_dist_operators_apply_kernel_unit_tests.cu new file mode 100644 index 0000000000000000000000000000000000000000..a93b804bebeed681327d3111d8bb974c064846f4 --- /dev/null +++ b/src/Operators/Vector/vector_dist_operators_apply_kernel_unit_tests.cu @@ -0,0 +1,74 @@ +/* + * vector_dist_operators_apply_kernel_unit_tests.hpp + * + * Created on: Jun 19, 2016 + * Author: i-bird + */ + +#include "config.h" +#define BOOST_TEST_DYN_LINK +#include <boost/test/unit_test.hpp> + +#include "Vector/vector_dist.hpp" +#include "Operators/Vector/vector_dist_operators.hpp" +#include "Operators/Vector/tests/vector_dist_operators_tests_util.hpp" + +BOOST_AUTO_TEST_SUITE( vector_dist_operators_apply_kernel_test_gpu ) + +BOOST_AUTO_TEST_CASE( vector_dist_operators_apply_host_gpu_test ) +{ + if (create_vcluster().getProcessingUnits() > 3) + return; + + Box<3,float> box({0.0,0.0,0.0},{1.0,1.0,1.0}); + + // Boundary conditions + size_t bc[3]={PERIODIC,PERIODIC,PERIODIC}; + + // ghost + Ghost<3,float> ghost(0.05); + + vector_dist_gpu<3,float,aggregate<float,float,float,VectorS<3,float>,VectorS<3,float>,VectorS<3,float>,float>> vd(512,box,bc,ghost); + + check_all_apply_ker<comp_host>::check(vd); +} + + +BOOST_AUTO_TEST_CASE( vector_dist_operators_apply_kernel_gpu_test ) +{ + if (create_vcluster().getProcessingUnits() > 3) + return; + + Box<3,float> box({0.0,0.0,0.0},{1.0,1.0,1.0}); + + // Boundary conditions + size_t bc[3]={PERIODIC,PERIODIC,PERIODIC}; + + // ghost + Ghost<3,float> ghost(0.05); + + vector_dist_gpu<3,float,aggregate<float,float,float,VectorS<3,float>,VectorS<3,float>,VectorS<3,float>,float>> vd(512,box,bc,ghost); + + check_all_apply_ker<comp_dev>::check(vd); +} + +BOOST_AUTO_TEST_CASE( vector_dist_operators_apply_kernel_gpu_sort_test ) +{ + if (create_vcluster().getProcessingUnits() > 3) + return; + + Box<3,float> box({0.0,0.0,0.0},{1.0,1.0,1.0}); + + // Boundary conditions + size_t bc[3]={PERIODIC,PERIODIC,PERIODIC}; + + // ghost + Ghost<3,float> ghost(0.05); + + vector_dist_gpu<3,float,aggregate<float,float,float,VectorS<3,float>,VectorS<3,float>,VectorS<3,float>,float>> vd(512,box,bc,ghost); + + check_all_apply_ker_sort::check(vd); +} + +BOOST_AUTO_TEST_SUITE_END() + diff --git a/src/Operators/Vector/vector_dist_operators_apply_kernel_unit_tests.hpp b/src/Operators/Vector/vector_dist_operators_apply_kernel_unit_tests.hpp deleted file mode 100644 index f45954def93a966bb8d137972d55b91e079d8feb..0000000000000000000000000000000000000000 --- a/src/Operators/Vector/vector_dist_operators_apply_kernel_unit_tests.hpp +++ /dev/null @@ -1,408 +0,0 @@ -/* - * vector_dist_operators_apply_kernel_unit_tests.hpp - * - * Created on: Jun 19, 2016 - * Author: i-bird - */ - -#ifndef OPENFPM_NUMERICS_SRC_OPERATORS_VECTOR_VECTOR_DIST_OPERATORS_APPLY_KERNEL_UNIT_TESTS_HPP_ -#define OPENFPM_NUMERICS_SRC_OPERATORS_VECTOR_VECTOR_DIST_OPERATORS_APPLY_KERNEL_UNIT_TESTS_HPP_ - -template <typename vector,typename Kernel, typename NN_type> bool check_values_apply_kernel(vector & vd, Kernel & ker, NN_type & NN) -{ - bool ret = true; - auto it = vd.getDomainIterator(); - - // ### WIKI 13 ### - // - // Check that apply kernel work - // - auto it2 = vd.getDomainIterator(); - while (it2.isNext()) - { - float base2 = 0.0; - auto p = it2.get(); - - Point<3,float> xp = vd.getPos(p); - - float base1 = vd.template getProp<A>(p); - - float prp_x = vd.template getProp<VC>(p) * vd.template getProp<VB>(p) + norm(vd.template getProp<VB>(p)); - - // For each neighborhood particle - auto Np = NN.template getNNIterator<NO_CHECK>(NN.getCell(xp)); - - while (Np.isNext()) - { - // Neighborhood particle q - auto q = Np.get(); - - if (q == p.getKey()) {++Np; continue;}; - - // position q - Point<3,float> xq = vd.getPos(q); - - float prp_y = vd.template getProp<VC>(q) * vd.template getProp<VB>(q) + norm(vd.template getProp<VB>(q)); - - base2 += ker.value(xp,xq,prp_x,prp_y); - - ++Np; - } - - base2 += vd.template getProp<C>(p); - - ret &= base1 == base2; - - ++it2; - } - - BOOST_REQUIRE_EQUAL(ret,true); - - return ret; -} - -template <typename vector,typename Kernel, typename NN_type> bool check_values_apply_kernel_reduce(vector & vd, Kernel & ker, NN_type & NN) -{ - bool ret = true; - auto it = vd.getDomainIterator(); - - float base1 = 0.0; - float base2 = 0.0; - float base3 = 0.0; - - auto it2 = vd.getDomainIterator(); - while (it2.isNext()) - { - auto p = it2.get(); - - Point<3,float> xp = vd.getPos(p); - - float ker_accu = 0.0; - float prp_x = vd.template getProp<VC>(p) * vd.template getProp<VB>(p) + norm(vd.template getProp<VB>(p)); - - // For each neighborhood particle - auto Np = NN.template getNNIterator<NO_CHECK>(NN.getCell(xp)); - - while (Np.isNext()) - { - // Neighborhood particle q - auto q = Np.get(); - - if (q == p.getKey()) {++Np; continue;}; - - // position q - Point<3,float> xq = vd.getPos(q); - - float prp_y = vd.template getProp<VC>(q) * vd.template getProp<VB>(q) + norm(vd.template getProp<VB>(q)); - - ker_accu += ker.value(xp,xq,prp_x,prp_y); - - ++Np; - } - - base2 += ker_accu; - - ++it2; - } - - auto it3 = vd.getDomainIterator(); - while (it3.isNext()) - { - auto p = it3.get(); - - base1 = vd.template getProp<A>(p); - base3 = vd.template getProp<C>(p) + base2; - - ret &= base1 == base3; - - ++it3; - } - - BOOST_REQUIRE_EQUAL(ret,true); - - return ret; -} - -template <typename vector,typename Kernel, typename NN_type> bool check_values_apply_kernel2(vector & vd, Kernel & ker, NN_type & NN) -{ - bool ret = true; - auto it = vd.getDomainIterator(); - - // ### WIKI 13 ### - // - // Check that apply kernel work - // - auto it2 = vd.getDomainIterator(); - while (it2.isNext()) - { - Point<3,float> base2 = 0.0; - auto p = it2.get(); - - Point<3,float> xp = vd.getPos(p); - - Point<3,float> base1 = vd.template getProp<VA>(p); - - Point<3,float> prp_x = 2.0 * vd.template getProp<VC>(p) + vd.template getProp<VB>(p); - - // For each neighborhood particle - auto Np = NN.template getNNIterator<NO_CHECK>(NN.getCell(xp)); - - while (Np.isNext()) - { - // Neighborhood particle q - auto q = Np.get(); - - if (q == p.getKey()) {++Np; continue;}; - - // position q - Point<3,float> xq = vd.getPos(q); - - Point<3,float> prp_y = 2.0 * vd.template getProp<VC>(q) + vd.template getProp<VB>(q); - - base2 += ker.value(xp,xq,prp_x,prp_y); - - ++Np; - } - - base2 += vd.template getProp<VC>(p); - - ret &= base1 == base2; - - ++it2; - } - - BOOST_REQUIRE_EQUAL(ret,true); - - return ret; -} - -template <typename vector,typename Kernel, typename NN_type> bool check_values_apply_kernel3(vector & vd, Kernel & ker, NN_type & NN) -{ - bool ret = true; - auto it = vd.getDomainIterator(); - - // ### WIKI 13 ### - // - // Check that apply kernel work - // - auto it2 = vd.getDomainIterator(); - while (it2.isNext()) - { - Point<3,float> base2 = 0.0; - auto p = it2.get(); - - Point<3,float> xp = vd.getPos(p); - - Point<3,float> base1 = vd.template getProp<VA>(p); - - Point<3,float> prp_x = vd.template getProp<VC>(p); - - // For each neighborhood particle - auto Np = NN.template getNNIterator<NO_CHECK>(NN.getCell(xp)); - - while (Np.isNext()) - { - // Neighborhood particle q - auto q = Np.get(); - - if (q == p.getKey()) {++Np; continue;}; - - // position q - Point<3,float> xq = vd.getPos(q); - - Point<3,float> prp_y = vd.template getProp<VC>(q); - - base2 += ker.value(xp,xq,prp_x,prp_y); - - ++Np; - } - - base2 += vd.template getProp<VC>(p); - - ret &= base1 == base2; - - ++it2; - } - - BOOST_REQUIRE_EQUAL(ret,true); - - return ret; -} - -template <typename vector,typename Kernel, typename NN_type> bool check_values_apply_kernel2_reduce(vector & vd, Kernel & ker, NN_type & NN) -{ - bool ret = true; - auto it = vd.getDomainIterator(); - - Point<3,float> base1 = 0.0; - Point<3,float> base2 = 0.0; - Point<3,float> base3 = 0.0; - - auto it2 = vd.getDomainIterator(); - while (it2.isNext()) - { - auto p = it2.get(); - - Point<3,float> xp = vd.getPos(p); - - Point<3,float> ker_accu = 0.0; - Point<3,float> prp_x = 2.0f*vd.template getProp<VC>(p) + vd.template getProp<VB>(p); - - // For each neighborhood particle - auto Np = NN.template getNNIterator<NO_CHECK>(NN.getCell(xp)); - - while (Np.isNext()) - { - // Neighborhood particle q - auto q = Np.get(); - - if (q == p.getKey()) {++Np; continue;}; - - // position q - Point<3,float> xq = vd.getPos(q); - Point<3,float> prp_y = 2.0f*vd.template getProp<VC>(q) + vd.template getProp<VB>(q); - - ker_accu += ker.value(xp,xq,prp_x,prp_y); - - ++Np; - } - - base2 += ker_accu; - - ++it2; - } - - auto it3 = vd.getDomainIterator(); - while (it3.isNext()) - { - auto p = it3.get(); - - base1 = vd.template getProp<VA>(p); - base3 = vd.template getProp<VC>(p) + base2; - - ret &= base1 == base3; - - ++it3; - } - - BOOST_REQUIRE_EQUAL(ret,true); - - return ret; -} - -template <typename vector,typename Kernel, typename NN_type> bool check_values_apply_kernel3_reduce(vector & vd, Kernel & ker, NN_type & NN, const Point<2,float> & p) -{ - bool ret = true; - auto it = vd.getDomainIterator(); - - Point<2,float> base2 = 0.0; - - auto it2 = vd.getDomainIterator(); - while (it2.isNext()) - { - auto p = it2.get(); - - Point<3,float> xp = vd.getPos(p); - - Point<2,float> ker_accu = 0.0; - - // For each neighborhood particle - auto Np = NN.template getNNIterator<NO_CHECK>(NN.getCell(xp)); - - while (Np.isNext()) - { - // Neighborhood particle q - auto q = Np.get(); - - if (q == p.getKey()) {++Np; continue;}; - - // position q - Point<3,float> xq = vd.getPos(q); - - ker_accu += ker.value(xp,xq); - - ++Np; - } - - base2 += ker_accu; - - ++it2; - } - - BOOST_REQUIRE_EQUAL(p.get(0),base2.get(0)); - BOOST_REQUIRE_EQUAL(p.get(1),base2.get(1)); - - return ret; -} - -BOOST_AUTO_TEST_CASE( vector_dist_operators_apply_kernel_test ) -{ - if (create_vcluster().getProcessingUnits() > 3) - return; - - Box<3,float> box({0.0,0.0,0.0},{1.0,1.0,1.0}); - - // Boundary conditions - size_t bc[3]={PERIODIC,PERIODIC,PERIODIC}; - - // ghost - Ghost<3,float> ghost(0.05); - - vector_dist<3,float,aggregate<float,float,float,VectorS<3,float>,VectorS<3,float>,VectorS<3,float>,float>> vd(512,box,bc,ghost); - - auto vA = getV<A>(vd); - auto vC = getV<C>(vd); - - auto vVA = getV<VA>(vd); - auto vVB = getV<VB>(vd); - auto vVC = getV<VC>(vd); - - // fill vd with some value - fill_values(vd); - - vd.map(); - vd.ghost_get<0,1,2,3,4,5,6>(); - - // we apply an exponential kernel to calculate something - - auto cl = vd.getCellList(0.05); - exp_kernel ker(0.2); - - vA = applyKernel_in(vVC * vVB + norm(vVB),vd,cl,ker) + vC; - check_values_apply_kernel(vd,ker,cl); - - vVA = applyKernel_in(2.0*vVC + vVB ,vd,cl,ker) + vVC; - check_values_apply_kernel2(vd,ker,cl); - - vA = rsum(applyKernel_in(vVC * vVB + norm(vVB),vd,cl,ker),vd) + vC; - check_values_apply_kernel_reduce(vd,ker,cl); - - vVA = rsum(applyKernel_in(2.0*vVC + vVB ,vd,cl,ker),vd) + vVC; - check_values_apply_kernel2_reduce(vd,ker,cl); - - vA = applyKernel_in_gen(vVC * vVB + norm(vVB),vd,cl,ker) + vC; - check_values_apply_kernel(vd,ker,cl); - - vVA = applyKernel_in_gen(2.0*vVC + vVB ,vd,cl,ker) + vVC; - check_values_apply_kernel2(vd,ker,cl); - - vA = rsum(applyKernel_in_gen(vVC * vVB + norm(vVB),vd,cl,ker),vd) + vC; - check_values_apply_kernel_reduce(vd,ker,cl); - - vVA = rsum(applyKernel_in_gen(2.0*vVC + vVB ,vd,cl,ker),vd) + vVC; - check_values_apply_kernel2_reduce(vd,ker,cl); - - // Check it compile the code is the same - vVA = applyKernel_in_gen(vVC,vd,cl,ker) + vVC; - check_values_apply_kernel3(vd,ker,cl); - - vVA = applyKernel_in (vVC,vd,cl,ker) + vVC; - check_values_apply_kernel3(vd,ker,cl); - - // just check it compile - Point<2,float> p = rsum(applyKernel_in_sim(vd,cl,ker),vd).get(); - check_values_apply_kernel3_reduce(vd,ker,cl,p); -} - - -#endif /* OPENFPM_NUMERICS_SRC_OPERATORS_VECTOR_VECTOR_DIST_OPERATORS_APPLY_KERNEL_UNIT_TESTS_HPP_ */ diff --git a/src/Operators/Vector/vector_dist_operators_extensions.hpp b/src/Operators/Vector/vector_dist_operators_extensions.hpp index adf208df586d0d55d7ffc828dfea92ff4d6fb324..de12ba21eddbba66909a8c674bae3cfe8b63bfcb 100644 --- a/src/Operators/Vector/vector_dist_operators_extensions.hpp +++ b/src/Operators/Vector/vector_dist_operators_extensions.hpp @@ -15,7 +15,8 @@ * \param v * */ -template <unsigned int dim, typename T> inline vector_dist_expression<16384,Point<dim,T> > getVExpr(Point<dim,T> & v) +template <unsigned int dim, typename T> +inline vector_dist_expression<16384,Point<dim,T> > getVExpr(Point<dim,T> & v) { vector_dist_expression<(unsigned int)16384,Point<dim,T>> exp_v(v); @@ -36,6 +37,14 @@ class vector_dist_expression<16384,point> public: + typedef void vtype; + + //! result for is sort + typedef boost::mpl::bool_<false> is_sort; + + //! NN_type + typedef void NN_type; + //! vector expression from a constant point vector_dist_expression(point p) :p(p) diff --git a/src/Operators/Vector/vector_dist_operators_functions.hpp b/src/Operators/Vector/vector_dist_operators_functions.hpp index 6f8fe79e34d3d005a0bef52d129e2ecfb87d504b..64bf9c90c37e2bdb1fc41f7988f94f885fd04198 100644 --- a/src/Operators/Vector/vector_dist_operators_functions.hpp +++ b/src/Operators/Vector/vector_dist_operators_functions.hpp @@ -8,6 +8,11 @@ #ifndef OPENFPM_NUMERICS_SRC_OPERATORS_VECTOR_VECTOR_DIST_OPERATORS_FUNCTIONS_HPP_ #define OPENFPM_NUMERICS_SRC_OPERATORS_VECTOR_VECTOR_DIST_OPERATORS_FUNCTIONS_HPP_ +#ifdef __NVCC__ +#include "util/cuda/moderngpu/kernel_reduce.hxx" +#include "cuda/vector_dist_operators_cuda.cuh" +#endif + /*! A macro to define single value function specialization that apply the function component-wise * * \param fun function name @@ -23,20 +28,51 @@ class vector_dist_expression_op<exp1,void,OP_ID>\ const exp1 o1;\ \ public:\ +\ + typedef typename exp1::is_ker is_ker;\ +\ + typedef typename vector_result<typename exp1::vtype,void>::type vtype;\ +\ + typedef typename vector_is_sort_result<exp1::is_sort::value,false>::type is_sort;\ +\ + typedef typename nn_type_result<typename exp1::NN_type,void>::type NN_type;\ \ vector_dist_expression_op(const exp1 & o1)\ :o1(o1)\ {}\ \ + inline NN_type * getNN()\ + {\ + return nn_type_result<typename exp1::NN_type,void>::getNN(o1);\ + }\ +\ + const vtype & getVector()\ + {\ + return vector_result<typename exp1::vtype,void>::getVector(o1);\ + }\ +\ + inline const exp1 & getExpr() const\ + {\ + return o1;\ + }\ + \ inline void init() const\ {\ o1.init();\ }\ \ - template<typename r_type=typename std::remove_reference<decltype(fun_base(o1.value(vect_dist_key_dx(0))))>::type > inline r_type value(const vect_dist_key_dx & key) const\ + template<typename r_type=typename std::remove_reference<decltype(fun_base(o1.value(vect_dist_key_dx(0))))>::type > \ + inline r_type value(const vect_dist_key_dx & key) const\ + {\ + return fun_base(o1.value(key));\ + }\ +\ + template<typename r_type=typename std::remove_reference<decltype(fun_base(o1.value(vect_dist_key_dx(0))))>::type > \ + __device__ __host__ inline r_type value(const unsigned int & key) const\ {\ return fun_base(o1.value(key));\ }\ +\ };\ \ \ @@ -112,6 +148,25 @@ class vector_dist_expression_op<exp1,exp2,OP_ID>\ const exp2 o2;\ \ public:\ +\ + typedef std::integral_constant<bool,exp1::is_ker::value || exp1::is_ker::value> is_ker;\ +\ + typedef typename vector_result<typename exp1::vtype,typename exp2::vtype>::type vtype;\ +\ + typedef typename vector_is_sort_result<exp1::is_sort::value,exp2::is_sort::value>::type is_sort;\ +\ + typedef typename nn_type_result<typename exp1::NN_type,typename exp2::NN_type>::type NN_type;\ +\ + inline NN_type * getNN()\ + {\ + return nn_type_result<typename exp1::NN_type,typename exp2::NN_type>::getNN(o1,o2);\ + }\ +\ + vtype & getVector()\ + {\ + return vector_result<typename exp1::vtype,typename exp2::vtype>::getVector(o1,o2);\ + }\ +\ \ vector_dist_expression_op(const exp1 & o1, const exp2 & o2)\ :o1(o1),o2(o2)\ @@ -206,6 +261,77 @@ CREATE_VDIST_ARG2_FUNC(pmul,pmul,VECT_PMUL) ////////// Special function reduce ///////////////////////// +template<typename val_type, bool is_sort, bool is_scalar = is_Point<val_type>::type::value> +struct point_scalar_process +{ + typedef aggregate<val_type> type; + + template<typename vector_type, typename expression> + static void process(val_type & val, vector_type & ve, expression & o1) + { +#ifdef __NVCC__ + + auto vek = ve.toKernel(); + vector_dist_op_compute_op<0,is_sort,comp_dev>::compute_expr_v(vek,o1); + + exp_tmp2[0].resize(sizeof(val_type)); + + auto & v_cl = create_vcluster<CudaMemory>(); + + mgpu::reduce((val_type *)ve.template getDeviceBuffer<0>(), ve.size(), (val_type *)(exp_tmp2[0].getDevicePointer()), mgpu::plus_t<val_type>(), v_cl.getmgpuContext()); + + exp_tmp2[0].deviceToHost(); + + val = *(val_type *)(exp_tmp2[0].getPointer()); +#else + std::cout << __FILE__ << ":" << __LINE__ << " error: to make expression work on GPU the file must be compiled on GPU" << std::endl; +#endif + } +}; + +template<typename val_type, bool is_sort> +struct point_scalar_process<val_type,is_sort,true> +{ + typedef val_type type; + + template<typename vector_type, typename expression> + static void process(val_type & val, vector_type & ve, expression & o1) + { +#ifdef __NVCC__ + +// auto ite = ve.getGPUIterator(256); + +// compute_expr_ker_vv<0,val_type::dims><<<ite.wthr,ite.thr>>>(ve.toKernel(),o1); + + auto vek = ve.toKernel(); + vector_dist_op_compute_op<0,is_sort,comp_dev>::template compute_expr_vv<val_type::dims>(vek,o1); + + exp_tmp2[0].resize(sizeof(val_type)); + + size_t offset = 0; + + auto & v_cl = create_vcluster<CudaMemory>(); + + for (size_t i = 0 ; i < val_type::dims ; i++) + { + mgpu::reduce(&((typename val_type::coord_type *)ve.template getDeviceBuffer<0>())[offset], + ve.size(), + (typename val_type::coord_type *)(exp_tmp2[0].getDevicePointer()), + mgpu::plus_t<typename val_type::coord_type>(), + v_cl.getmgpuContext()); + + exp_tmp2[0].deviceToHost(); + + val.get(i) = *(typename val_type::coord_type *)exp_tmp2[0].getPointer(); + + offset += ve.capacity(); + } + +#else + std::cout << __FILE__ << ":" << __LINE__ << " error: to make expression work on GPU the file must be compiled on GPU" << std::endl; +#endif + } +}; /*! \brief expression that encapsulate a vector reduction expression * @@ -213,8 +339,8 @@ CREATE_VDIST_ARG2_FUNC(pmul,pmul,VECT_PMUL) * \tparam vector_type type of vector on which the expression is acting * */ -template <typename exp1, typename vector_type> -class vector_dist_expression_op<exp1,vector_type,VECT_SUM_REDUCE> +template <typename exp1> +class vector_dist_expression_op<exp1,void,VECT_SUM_REDUCE> { //! expression on which apply the reduction @@ -226,36 +352,87 @@ class vector_dist_expression_op<exp1,vector_type,VECT_SUM_REDUCE> //! return type of the calculated value (without reference) mutable typename std::remove_reference<rtype>::type val; - //! vector on which we apply the reduction expression - const vector_type & vd; - public: + //! Indicate if it is an in kernel expression + typedef typename exp1::is_ker is_ker; + + //! return the vector type on which this expression operate + typedef typename vector_result<typename exp1::vtype,void>::type vtype; + + //! result for is sort + typedef typename vector_is_sort_result<exp1::is_sort::value,false>::type is_sort; + + //! NN_type + typedef typename nn_type_result<typename exp1::NN_type,void>::type NN_type; + //! constructor from an epxression exp1 and a vector vd - vector_dist_expression_op(const exp1 & o1, const vector_type & vd) - :o1(o1),val(0),vd(vd) + vector_dist_expression_op(const exp1 & o1) + :o1(o1),val(0) {} //! sum reduction require initialization where we calculate the reduction // this produce a cache for the calculated value inline void init() const { - o1.init(); + if (exp1::is_ker::value == true) + { - val = 0.0; +#ifdef __NVCC__ + typedef decltype(val) val_type; - auto it = vd.getDomainIterator(); + // we have to do it on GPU - while (it.isNext()) + openfpm::vector<typename point_scalar_process<val_type,is_sort::value>::type, + CudaMemory, + typename memory_traits_inte<typename point_scalar_process<val_type,is_sort::value>::type>::type, + memory_traits_inte, + openfpm::grow_policy_identity> ve; + + auto & orig_v = o1.getVector(); + + if (exp_tmp.ref() == 0) + {exp_tmp.incRef();} + + ve.setMemory(exp_tmp); + ve.resize(orig_v.size_local()); + + point_scalar_process<val_type,is_sort::value>::process(val,ve,o1); +#else + std::cout << __FILE__ << ":" << __LINE__ << " error, to use expression on GPU you must compile with nvcc compiler " << std::endl; +#endif + } + else { - auto key = it.get(); + const auto & orig_v = o1.getVector(); - val += o1.value(key); + o1.init(); - ++it; + val = 0.0; + + auto it = orig_v.getDomainIterator(); + + while (it.isNext()) + { + auto key = it.get(); + + val += o1.value(key); + + ++it; + } } } + /*! \brief get the NN object + * + * \return the NN object + * + */ + inline NN_type * getNN() const + { + return nn_type_result<typename exp1::NN_type,void>::getNN(o1); + } + //! it return the result of the expression inline typename std::remove_reference<rtype>::type get() { @@ -271,21 +448,21 @@ public: }; //! Reduce function (it generate an expression) -template<typename exp1, typename exp2_, unsigned int op1, typename vector_type> -inline vector_dist_expression_op<vector_dist_expression_op<exp1,exp2_,op1>,vector_type,VECT_SUM_REDUCE> -rsum(const vector_dist_expression_op<exp1,exp2_,op1> & va, const vector_type & vd) +template<typename exp1, typename exp2_, unsigned int op1> +inline vector_dist_expression_op<vector_dist_expression_op<exp1,exp2_,op1>,void,VECT_SUM_REDUCE> +rsum(const vector_dist_expression_op<exp1,exp2_,op1> & va) { - vector_dist_expression_op<vector_dist_expression_op<exp1,exp2_,op1>,vector_type,VECT_SUM_REDUCE> exp_sum(va,vd); + vector_dist_expression_op<vector_dist_expression_op<exp1,exp2_,op1>,void,VECT_SUM_REDUCE> exp_sum(va); return exp_sum; } //! Reduce function (It generate an expression) -template<unsigned int prp1, typename v1, typename vector_type> -inline vector_dist_expression_op<vector_dist_expression<prp1,v1>,vector_type,VECT_SUM_REDUCE> -rsum(const vector_dist_expression<prp1,v1> & va, const vector_type & vd) +template<unsigned int prp1, typename v1> +inline vector_dist_expression_op<vector_dist_expression<prp1,v1>,void,VECT_SUM_REDUCE> +rsum(const vector_dist_expression<prp1,v1> & va) { - vector_dist_expression_op<vector_dist_expression<prp1,v1>,vector_type,VECT_SUM_REDUCE> exp_sum(va,vd); + vector_dist_expression_op<vector_dist_expression<prp1,v1>,void,VECT_SUM_REDUCE> exp_sum(va); return exp_sum; } diff --git a/src/Operators/Vector/vector_dist_operators_unit_tests.cpp b/src/Operators/Vector/vector_dist_operators_unit_tests.cpp index 4215733d60b81ecf1193e5c4ac98af05e8f7dfa6..ce975c9be4ce5fd57fd05bd3b07048336db586b7 100644 --- a/src/Operators/Vector/vector_dist_operators_unit_tests.cpp +++ b/src/Operators/Vector/vector_dist_operators_unit_tests.cpp @@ -11,761 +11,13 @@ #define BOOST_TEST_DYN_LINK #include <boost/test/unit_test.hpp> -#include "Operators/Vector/vector_dist_operators.hpp" +#include "tests/vector_dist_operators_tests_util.hpp" -constexpr int A = 0; -constexpr int B = 1; -constexpr int C = 2; - -constexpr int VA = 3; -constexpr int VB = 4; -constexpr int VC = 5; - -constexpr int TA = 6; - -//////////////////// Here we define all the function to checl the operators - -template <unsigned int prp, typename vector> bool check_values(vector & v,float a) -{ - bool ret = true; - auto it = v.getDomainIterator(); - - while (it.isNext()) - { - ret &= v.template getProp<prp>(it.get()) == a; - - ++it; - } - - BOOST_REQUIRE_EQUAL(ret,true); - - return ret; -} - -template <typename vector> bool check_values_complex_expr(vector & vd) -{ - bool ret = true; - auto it = vd.getDomainIterator(); - - while (it.isNext()) - { - auto key = it.get(); - - float base1 = vd.template getProp<B>(key) + 2.0 + vd.template getProp<B>(key) - 2.0*vd.template getProp<C>(key) / 5.0; - float base2 = vd.template getProp<A>(key); - - ret &= base1 == base2; - - ++it; - } - - BOOST_REQUIRE_EQUAL(ret,true); - - return ret; -} - -template <typename rtype, typename vector, unsigned int A, unsigned int B, unsigned int C> bool check_values_pos_sum(vector & vd, const rtype & p) -{ - bool ret = true; - auto it = vd.getDomainIterator(); - - while (it.isNext()) - { - auto key = it.get(); - - Point<vector::dims,typename vector::stype> xp = vd.getPos(key); - - rtype base1 = rtype(xp) + p; - rtype base2 = vd.template getProp<A>(key); - - ret &= base1 == base2; - - ++it; - } - - BOOST_REQUIRE_EQUAL(ret,true); - - return ret; -} - -template <typename rtype, typename vector, unsigned int A, unsigned int B, unsigned int C> bool check_values_pos_sub(vector & vd, const rtype & p) -{ - bool ret = true; - auto it = vd.getDomainIterator(); - - while (it.isNext()) - { - auto key = it.get(); - - Point<vector::dims,typename vector::stype> xp = vd.getPos(key); - - rtype base1 = rtype(xp) - p; - rtype base2 = vd.template getProp<A>(key); - - ret &= base1 == base2; - - ++it; - } - - BOOST_REQUIRE_EQUAL(ret,true); - - return ret; -} - -template <typename rtype, typename vector, unsigned int A, unsigned int B, unsigned int C> bool check_values_pos_sub_minus(vector & vd, const rtype & p) -{ - bool ret = true; - auto it = vd.getDomainIterator(); - - while (it.isNext()) - { - auto key = it.get(); - - Point<vector::dims,typename vector::stype> xp = vd.getPos(key); - - rtype base1 = -(rtype(xp) - p); - rtype base2 = vd.template getProp<A>(key); - - ret &= base1 == base2; - - ++it; - } - - BOOST_REQUIRE_EQUAL(ret,true); - - return ret; -} - -template <typename rtype, typename vector, unsigned int A, unsigned int B, unsigned int C> bool check_values_point_sub(vector & vd, const rtype & p) -{ - bool ret = true; - auto it = vd.getDomainIterator(); - - while (it.isNext()) - { - auto key = it.get(); - - rtype base1 = -vd.template getProp<B>(key); - rtype base2 = vd.template getProp<A>(key); - - ret &= base1 == base2; - - ++it; - } - - BOOST_REQUIRE_EQUAL(ret,true); - - return ret; -} - -template <typename rtype, typename vector, unsigned int A, unsigned int B, unsigned int C> bool check_values_sum(vector & vd, double d) -{ - bool ret = true; - auto it = vd.getDomainIterator(); - - while (it.isNext()) - { - auto key = it.get(); - - rtype base1 = vd.template getProp<B>(key) + d; - rtype base2 = vd.template getProp<A>(key); - - ret &= base1 == base2; - - ++it; - } - - BOOST_REQUIRE_EQUAL(ret,true); - - return ret; -} - -template <typename rtype, typename vector, unsigned int A, unsigned int B, unsigned int C> bool check_values_sum(vector & vd1, vector & vd2) -{ - bool ret = true; - auto it = vd1.getDomainIterator(); - - while (it.isNext()) - { - auto key = it.get(); - - rtype base1 = vd1.template getProp<B>(key) + vd2.template getProp<C>(key); - rtype base2 = vd1.template getProp<A>(key); - - ret &= base1 == base2; - - ++it; - } - - BOOST_REQUIRE_EQUAL(ret,true); - - return ret; -} - -template <typename rtype, typename vector, unsigned int A, unsigned int B, unsigned int C> bool check_values_sum_3(vector & vd1) -{ - bool ret = true; - auto it = vd1.getDomainIterator(); - - while (it.isNext()) - { - auto key = it.get(); - - rtype base1 = vd1.template getProp<B>(key) + vd1.template getProp<C>(key) + vd1.template getProp<B>(key); - rtype base2 = vd1.template getProp<A>(key); - - ret &= base1 == base2; - - ++it; - } - - BOOST_REQUIRE_EQUAL(ret,true); - - return ret; -} - -template <typename rtype, typename vector, unsigned int A, unsigned int B, unsigned int C> bool check_values_sum_4(vector & vd1) -{ - bool ret = true; - auto it = vd1.getDomainIterator(); - - while (it.isNext()) - { - auto key = it.get(); - - rtype base1 = (vd1.template getProp<B>(key) + vd1.template getProp<C>(key)) + (vd1.template getProp<B>(key) + vd1.template getProp<C>(key)); - rtype base2 = vd1.template getProp<A>(key); - - - ret &= base1 == base2; - - ++it; - } - - BOOST_REQUIRE_EQUAL(ret,true); - - return ret; -} - -template <typename rtype, typename vector, unsigned int A, unsigned int B, unsigned int C> bool check_values_sub(vector & vd, double d) -{ - bool ret = true; - auto it = vd.getDomainIterator(); - - while (it.isNext()) - { - auto key = it.get(); - - rtype base1 = vd.template getProp<B>(key) - d; - rtype base2 = vd.template getProp<A>(key); - - ret &= base1 == base2; - - ++it; - } - - BOOST_REQUIRE_EQUAL(ret,true); - - return ret; -} - -template <typename rtype, typename vector, unsigned int A, unsigned int B, unsigned int C> bool check_values_sub(double d, vector & vd) -{ - bool ret = true; - auto it = vd.getDomainIterator(); - - while (it.isNext()) - { - auto key = it.get(); - - rtype base1 = d - vd.template getProp<B>(key); - rtype base2 = vd.template getProp<A>(key); - - ret &= base1 == base2; - - ++it; - } - - BOOST_REQUIRE_EQUAL(ret,true); - - return ret; -} - -template <typename rtype, typename vector, unsigned int A, unsigned int B, unsigned int C> bool check_values_sub(vector & vd1, vector & vd2) -{ - bool ret = true; - auto it = vd1.getDomainIterator(); - - while (it.isNext()) - { - auto key = it.get(); - - rtype base1 = vd1.template getProp<C>(key) - vd2.template getProp<B>(key); - rtype base2 = vd1.template getProp<A>(key); - - ret &= base1 == base2; - - ++it; - } - - BOOST_REQUIRE_EQUAL(ret,true); - - return ret; -} - -template <typename rtype, typename vector, unsigned int A, unsigned int B, unsigned int C> bool check_values_sub_31(vector & vd1) -{ - bool ret = true; - auto it = vd1.getDomainIterator(); - - while (it.isNext()) - { - auto key = it.get(); - - rtype base1 = vd1.template getProp<B>(key) - (vd1.template getProp<C>(key) + vd1.template getProp<B>(key)); - rtype base2 = vd1.template getProp<A>(key); - - ret &= base1 == base2; - - ++it; - } - - BOOST_REQUIRE_EQUAL(ret,true); - - return ret; -} - - -template <typename rtype, typename vector, unsigned int A, unsigned int B, unsigned int C> bool check_values_sub_32(vector & vd1) -{ - bool ret = true; - auto it = vd1.getDomainIterator(); - - while (it.isNext()) - { - auto key = it.get(); - - rtype base1 = (vd1.template getProp<C>(key) + vd1.template getProp<B>(key)) - vd1.template getProp<B>(key); - rtype base2 = vd1.template getProp<A>(key); - - ret &= base1 == base2; - - ++it; - } - - BOOST_REQUIRE_EQUAL(ret,true); - - return ret; -} - -template <typename rtype, typename vector, unsigned int A, unsigned int B, unsigned int C> bool check_values_sub_4(vector & vd1) -{ - bool ret = true; - auto it = vd1.getDomainIterator(); - - while (it.isNext()) - { - auto key = it.get(); - - rtype base1 = (vd1.template getProp<C>(key) + vd1.template getProp<B>(key)) - (vd1.template getProp<C>(key) + vd1.template getProp<B>(key)); - rtype base2 = vd1.template getProp<A>(key); - - ret &= base1 == base2; - - ++it; - } - - BOOST_REQUIRE_EQUAL(ret,true); - - return ret; -} - - -template <typename rtype, typename vector, unsigned int A, unsigned int B, unsigned int C> bool check_values_mul(vector & vd, double d) -{ - bool ret = true; - auto it = vd.getDomainIterator(); - - while (it.isNext()) - { - auto key = it.get(); - - rtype base1 = vd.template getProp<B>(key) * d; - rtype base2 = vd.template getProp<A>(key); - - ret &= base1 == base2; - - ++it; - } - - BOOST_REQUIRE_EQUAL(ret,true); - - return ret; -} - - -template <typename rtype, typename vector, unsigned int A, unsigned int B, unsigned int C> bool check_values_mul(vector & vd1, vector & vd2) -{ - bool ret = true; - auto it = vd1.getDomainIterator(); - - while (it.isNext()) - { - auto key = it.get(); - - rtype base1 = vd1.template getProp<C>(key) * vd2.template getProp<B>(key); - rtype base2 = vd1.template getProp<A>(key); - - ret &= base1 == base2; - - ++it; - } - - BOOST_REQUIRE_EQUAL(ret,true); - - return ret; -} - -template <typename rtype, typename vector, unsigned int A, unsigned int B, unsigned int C> bool check_values_mul_3(vector & vd1) -{ - bool ret = true; - auto it = vd1.getDomainIterator(); - - while (it.isNext()) - { - auto key = it.get(); - - rtype base1 = vd1.template getProp<B>(key) * (vd1.template getProp<B>(key) + vd1.template getProp<C>(key)); - rtype base2 = vd1.template getProp<A>(key); - - ret &= base1 == base2; - - ++it; - } - - BOOST_REQUIRE_EQUAL(ret,true); - - return ret; -} - - -template <typename rtype, typename vector, unsigned int A, unsigned int B, unsigned int C> bool check_values_mul_4(vector & vd1) -{ - bool ret = true; - auto it = vd1.getDomainIterator(); - - while (it.isNext()) - { - auto key = it.get(); - - rtype base1 = (vd1.template getProp<B>(key) + vd1.template getProp<C>(key)) * (vd1.template getProp<B>(key) + vd1.template getProp<C>(key)); - rtype base2 = vd1.template getProp<A>(key); - - ret &= base1 == base2; - - ++it; - } - - BOOST_REQUIRE_EQUAL(ret,true); - - return ret; -} - - - -template <typename rtype, typename vector, unsigned int A, unsigned int B, unsigned int C> bool check_values_div(vector & vd, double d) -{ - bool ret = true; - auto it = vd.getDomainIterator(); - - while (it.isNext()) - { - auto key = it.get(); - - rtype base1 = vd.template getProp<B>(key) / d; - rtype base2 = vd.template getProp<A>(key); - - ret &= base1 == base2; - - ++it; - } - - BOOST_REQUIRE_EQUAL(ret,true); - - return ret; -} - -template <typename rtype, typename vector, unsigned int A, unsigned int B, unsigned int C> bool check_values_div(double d, vector & vd) -{ - bool ret = true; - auto it = vd.getDomainIterator(); - - while (it.isNext()) - { - auto key = it.get(); - - rtype base1 = d / vd.template getProp<B>(key); - rtype base2 = vd.template getProp<A>(key); - - ret &= base1 == base2; - - ++it; - } - - BOOST_REQUIRE_EQUAL(ret,true); - - return ret; -} - -template <typename rtype, typename vector, unsigned int A, unsigned int B, unsigned int C> bool check_values_div(vector & vd1, vector & vd2) -{ - bool ret = true; - auto it = vd1.getDomainIterator(); - - while (it.isNext()) - { - auto key = it.get(); - - rtype base1 = vd1.template getProp<C>(key) / vd2.template getProp<B>(key); - rtype base2 = vd1.template getProp<A>(key); - - ret &= base1 == base2; - - ++it; - } - - BOOST_REQUIRE_EQUAL(ret,true); - - return ret; -} - -template <typename rtype, typename vector, unsigned int A, unsigned int B, unsigned int C> bool check_values_div_31(vector & vd1) -{ - bool ret = true; - auto it = vd1.getDomainIterator(); - - while (it.isNext()) - { - auto key = it.get(); - - rtype base1 = vd1.template getProp<B>(key) / (vd1.template getProp<B>(key) + vd1.template getProp<C>(key)); - rtype base2 = vd1.template getProp<A>(key); - - ret &= base1 == base2; - - ++it; - } - - BOOST_REQUIRE_EQUAL(ret,true); - - return ret; -} - -template <typename rtype, typename vector, unsigned int A, unsigned int B, unsigned int C> bool check_values_div_32(vector & vd1) -{ - bool ret = true; - auto it = vd1.getDomainIterator(); - - while (it.isNext()) - { - auto key = it.get(); - - rtype base1 = (vd1.template getProp<C>(key) + vd1.template getProp<B>(key)) / vd1.template getProp<B>(key); - rtype base2 = vd1.template getProp<A>(key); - - ret &= base1 == base2; - - ++it; - } - - BOOST_REQUIRE_EQUAL(ret,true); - - return ret; -} - - -template <typename rtype, typename vector, unsigned int A, unsigned int B, unsigned int C> bool check_values_div_4(vector & vd1) -{ - bool ret = true; - auto it = vd1.getDomainIterator(); - - while (it.isNext()) - { - auto key = it.get(); - - rtype base1 = (vd1.template getProp<B>(key) + vd1.template getProp<C>(key)) / (vd1.template getProp<B>(key) + vd1.template getProp<C>(key)); - rtype base2 = vd1.template getProp<A>(key); - - ret &= base1 == base2; - - ++it; - } - - BOOST_REQUIRE_EQUAL(ret,true); - - return ret; -} - -template <typename vector> bool check_values_scal_norm_dist(vector & vd) -{ - bool ret = true; - auto it = vd.getDomainIterator(); - - while (it.isNext()) - { - auto key = it.get(); - - float base1 = vd.template getProp<VB>(key) * vd.template getProp<VC>(key) + norm(vd.template getProp<VC>(key) + vd.template getProp<VB>(key)) + distance(vd.template getProp<VC>(key),vd.template getProp<VB>(key)); - float base2 = vd.template getProp<A>(key); - - ret &= base1 == base2; - - ++it; - } - - BOOST_REQUIRE_EQUAL(ret,true); - - return ret; -} - -template <typename vector> void fill_values(vector & v) -{ - auto it = v.getDomainIterator(); - - while (it.isNext()) - { - auto p = it.get(); - - v.getPos(p)[0] = (float)rand() / RAND_MAX; - v.getPos(p)[1] = (float)rand() / RAND_MAX; - v.getPos(p)[2] = (float)rand() / RAND_MAX; - - v.template getProp<A>(p) = fabs(sin(p.getKey()+1.0)); - v.template getProp<B>(p) = fabs(sin(2.0*p.getKey()+3.0)); - v.template getProp<C>(p) = fabs(sin(3.0*p.getKey()+18.0)); - - for (size_t k = 0 ; k < 3 ; k++) - { - v.template getProp<VA>(p)[k] = fabs(sin(p.getKey()+1.0+k)); - v.template getProp<VB>(p)[k] = fabs(sin(2.0*p.getKey()+1.0+3.0)); - v.template getProp<VC>(p)[k] = fabs(sin(3.0*p.getKey()+1.0+k)); - } - - ++it; - } -} - - -typedef vector_dist<3,float,aggregate<float,float,float,VectorS<3,float>,VectorS<3,float>,VectorS<3,float>,float>> vector_type; - -//! Exponential kernel -struct exp_kernel -{ - //! variance of the exponential kernel - float var; - - //! Exponential kernel giving variance - exp_kernel(float var) - :var(var) - {} - - /*! \brief Result of the exponential kernel - * - * \param p position of the particle p - * \param q position of the particle q - * \param pA property value at p - * \param pB property value at q - * - * \return the result - * - */ - inline float value(const Point<3,float> & p, const Point<3,float> & q,float pA,float pB) - { - float dist = norm(p-q); - - return (pA + pB) * exp(dist * dist / var); - } - - /*! \brief Result of the exponential kernel - * - * \param p position of the particle p - * \param q position of the particle q - * \param pA property value at p - * \param pB property value at q - * - * \return the result - * - */ - inline Point<3,float> value(const Point<3,float> & p, const Point<3,float> & q,const Point<3,float> & pA, const Point<3,float> & pB) - { - float dist = norm(p-q); - - return (pA + pB) * exp(dist * dist / var); - } - - /*! \brief Result of the exponential kernel - * - * \param p position of the particle p - * \param q position of the particle q - * \param pA property value at p - * \param pB property value at q - * \param vd1 original vector - * - * \return the result - * - */ - inline float value(size_t p, size_t q, float pA, float pB, const vector_type & vd1) - { - Point<3,float> pp = vd1.getPos(p); - Point<3,float> pq = vd1.getPos(q); - - float dist = norm(pp-pq); - - return (pA + pB) * exp(dist * dist / var); - } - - /*! \brief Result of the exponential kernel - * - * \param p position of the particle p - * \param q position of the particle q - * \param pA property value at p - * \param pB property value at q - * \param vd1 original vector - * - * \return the result - * - */ - inline Point<3,float> value(size_t p, size_t q, const Point<3,float> & pA, const Point<3,float> & pB , const vector_type & vd1) - { - Point<3,float> pp = vd1.getPos(p); - Point<3,float> pq = vd1.getPos(q); - - float dist = norm(pp-pq); - - return (pA + pB) * exp(dist * dist / var); - } - - /*! \brief Result of the exponential kernel - * - * \param p position of the particle p - * \param q position of the particle q - * - * \return the result - * - */ - inline Point<2,float> value(const Point<3,float> & p, const Point<3,float> & q) - { - float dist = norm(p-q); - - return exp(dist * dist / var); - } -}; //////////////////////////////////////////////////////////////////////////// BOOST_AUTO_TEST_SUITE( vector_dist_operators_test ) - BOOST_AUTO_TEST_CASE( vector_dist_operators_test ) { if (create_vcluster().getProcessingUnits() > 3) @@ -784,222 +36,9 @@ BOOST_AUTO_TEST_CASE( vector_dist_operators_test ) vector_dist<3,float,aggregate<float,float,float,VectorS<3,float>,VectorS<3,float>,VectorS<3,float>>> vd(100,box,bc,ghost); - auto vA = getV<A>(vd); - auto vB = getV<B>(vd); - auto vC = getV<C>(vd); - - auto vVA = getV<VA>(vd); - auto vVB = getV<VB>(vd); - auto vVC = getV<VC>(vd); - - auto vPOS = getV<PROP_POS>(vd); - - vA = 1.0; - vB = 2.0f; - vC = 3.0; - - check_values<A>(vd,1.0); - check_values<B>(vd,2.0); - check_values<C>(vd,3.0); - - vA = vB; - check_values<A>(vd,2.0); - - fill_values(vd); - - vA = vB + 2.0 + vB - 2.0*vC / 5.0; - check_values_complex_expr(vd); - - // Various combination of 2 operator - - vA = vB + 2.0; - check_values_sum<float,vtype,A,B,C>(vd,2.0); - vA = 2.0 + vB; - check_values_sum<float,vtype,A,B,C>(vd,2.0); - vA = vC + vB; - check_values_sum<float,vtype,A,B,C>(vd,vd); - - vA = vB - 2.0; - check_values_sub<float,vtype,A,B,C>(vd,2.0); - vA = 2.0 - vB; - check_values_sub<float,vtype,A,B,C>(2.0,vd); - vA = vC - vB; - check_values_sub<float,vtype,A,B,C>(vd,vd); - - vA = vB * 2.0; - check_values_mul<float,vtype,A,B,C>(vd,2.0); - vA = 2.0 * vB; - check_values_mul<float,vtype,A,B,C>(vd,2.0); - vA = vC * vB; - check_values_mul<float,vtype,A,B,C>(vd,vd); - - vA = vB / 2.0; - check_values_div<float,vtype,A,B,C>(vd,2.0); - vA = 2.0 / vB; - check_values_div<float,vtype,A,B,C>(2.0,vd); - vA = vC / vB; - check_values_div<float,vtype,A,B,C>(vd,vd); - - // Variuos combination 3 operator - - vA = vB + (vC + vB); - check_values_sum_3<float,vtype,A,B,C>(vd); - vA = (vC + vB) + vB; - check_values_sum_3<float,vtype,A,B,C>(vd); - vA = (vC + vB) + (vC + vB); - check_values_sum_4<float,vtype,A,B,C>(vd); - - vA = vB - (vC + vB); - check_values_sub_31<float,vtype,A,B,C>(vd); - vA = (vC + vB) - vB; - check_values_sub_32<float,vtype,A,B,C>(vd); - vA = (vC + vB) - (vC + vB); - check_values_sub_4<float,vtype,A,B,C>(vd); - - vA = vB * (vC + vB); - check_values_mul_3<float,vtype,A,B,C>(vd); - vA = (vC + vB) * vB; - check_values_mul_3<float,vtype,A,B,C>(vd); - vA = (vC + vB) * (vC + vB); - check_values_mul_4<float,vtype,A,B,C>(vd); - - vA = vB / (vC + vB); - check_values_div_31<float,vtype,A,B,C>(vd); - vA = (vC + vB) / vB; - check_values_div_32<float,vtype,A,B,C>(vd); - vA = (vC + vB) / (vC + vB); - check_values_div_4<float,vtype,A,B,C>(vd); - - // We try with vectors - - // Various combination of 2 operator - - vVA = vVB + 2.0; - check_values_sum<VectorS<3,float>,vtype,VA,VB,VC>(vd,2.0f); - vVA = 2.0 + vVB; - check_values_sum<VectorS<3,float>,vtype,VA,VB,VC>(vd,2.0f); - vVA = vVC + vVB; - check_values_sum<VectorS<3,float>,vtype,VA,VB,VC>(vd,vd); - - vVA = vVB - 2.0; - check_values_sub<VectorS<3,float>,vtype,VA,VB,VC>(vd,2.0f); - vVA = 2.0 - vVB; - check_values_sub<VectorS<3,float>,vtype,VA,VB,VC>(2.0f,vd); - vVA = vVC - vVB; - check_values_sub<VectorS<3,float>,vtype,VA,VB,VC>(vd,vd); - - vVA = vVB * 2.0; - check_values_mul<VectorS<3,float>,vtype,VA,VB,VC>(vd,2.0f); - vVA = 2.0 * vVB; - check_values_mul<VectorS<3,float>,vtype,VA,VB,VC>(vd,2.0f); - vVA = vVC * vVB; - check_values_mul<VectorS<3,float>,vtype,VA,VB,VC>(vd,vd); - - vVA = vVB / 2.0; - check_values_div<VectorS<3,float>,vtype,VA,VB,VC>(vd,2.0f); - vVA = 2.0 / vVB; - check_values_div<VectorS<3,float>,vtype,VA,VB,VC>(2.0f,vd); - vVA = vVC / vVB; - check_values_div<VectorS<3,float>,vtype,VA,VB,VC>(vd,vd); - - // Variuos combination 3 operator - - vVA = vVB + (vVC + vVB); - check_values_sum_3<VectorS<3,float>,vtype,VA,VB,VC>(vd); - vVA = (vVC + vVB) + vVB; - check_values_sum_3<VectorS<3,float>,vtype,VA,VB,VC>(vd); - vVA = (vVC + vVB) + (vVC + vVB); - check_values_sum_4<VectorS<3,float>,vtype,VA,VB,VC>(vd); - - vVA = vVB - (vVC + vVB); - check_values_sub_31<VectorS<3,float>,vtype,VA,VB,VC>(vd); - vVA = (vVC + vVB) - vVB; - check_values_sub_32<VectorS<3,float>,vtype,VA,VB,VC>(vd); - vVA = (vVC + vVB) - (vVC + vVB); - check_values_sub_4<VectorS<3,float>,vtype,VA,VB,VC>(vd); - - vVA = vVB * (vVC + vVB); - check_values_mul_3<VectorS<3,float>,vtype,VA,VB,VC>(vd); - vVA = (vVC + vVB) * vVB; - check_values_mul_3<VectorS<3,float>,vtype,VA,VB,VC>(vd); - vVA = (vVC + vVB) * (vVC + vVB); - check_values_mul_4<VectorS<3,float>,vtype,VA,VB,VC>(vd); - vA = vVB * (vVC + vVB); - check_values_mul_3<float,vtype,A,VB,VC>(vd); - vA = (vVC + vVB) * vVB; - check_values_mul_3<float,vtype,A,VB,VC>(vd); - vA = (vVC + vVB) * (vVC + vVB); - check_values_mul_4<float,vtype,A,VB,VC>(vd); - - vVA = vVB / (vVC + vVB); - check_values_div_31<VectorS<3,float>,vtype,VA,VB,VC>(vd); - vVA = (vVC + vVB) / vVB; - check_values_div_32<VectorS<3,float>,vtype,VA,VB,VC>(vd); - vVA = (vVC + vVB) / (vVC + vVB); - check_values_div_4<VectorS<3,float>,vtype,VA,VB,VC>(vd); - - // normalization function - - vA = vVB * vVC + norm(vVC + vVB) + openfpm::distance(vVC,vVB); - check_values_scal_norm_dist(vd); - - Point<3,float> p0({2.0,2.0,2.0}); - auto p0_e = getVExpr(p0); - - vVA = vPOS + p0_e; - check_values_pos_sum<VectorS<3,float>,vtype,VA,VB,VC>(vd,p0); - - vVA = vPOS - p0_e; - check_values_pos_sub<Point<3,float>,vtype,VA,VB,VC>(vd,p0); - - vVA = -(vPOS - p0_e); - check_values_pos_sub_minus<Point<3,float>,vtype,VA,VB,VC>(vd,p0); - - vVA = -vVB; - check_values_point_sub<Point<3,float>,vtype,VA,VB,VC>(vd,p0); - - // Just check it compile testing it will test the same code - // as the previuous one - vVC = exp(vVB); - vA = norm(vPOS); - vVA = vPOS + 2.0; - vVA = 2.0 + vPOS; - vVA = vPOS + vPOS; - vVA = vPOS - 2.0f; - vVA = 2.0 - vPOS; - vVA = vPOS - vPOS; - - vVA = vPOS * 2.0; - vVA = 2.0 * vPOS; - vVA = vPOS * vPOS; - - vVA = vPOS / 2.0f; - vVA = 2.0f / vPOS; - vVA = vPOS / vPOS; - - // Variuos combination 3 operator - - vVA = vPOS + (vPOS + vPOS); - vVA = (vPOS + vPOS) + vPOS; - vVA = (vPOS + vPOS) + (vPOS + vPOS); - - vVA = vPOS - (vPOS + vPOS); - vVA = (vPOS + vPOS) - vPOS; - vVA = (vVC + vPOS) - (vPOS + vPOS); - - vVA = vPOS * (vPOS + vPOS); - vVA = (vPOS + vPOS) * vPOS; - vVA = (vPOS + vPOS) * (vPOS + vPOS); - vA = vPOS * (vPOS + vPOS); - vA = (vPOS + vPOS) * vPOS; - vA = (vPOS + vPOS) * (vPOS + vPOS); - - vVA = vPOS / (vPOS + vPOS); - vVA = (vPOS + vPOS) / vPOS; - vVA = (vPOS + vPOS) / (vPOS + vPOS); + check_all_expressions<comp_host>::check(vd); } -#include "vector_dist_operators_apply_kernel_unit_tests.hpp" void reset(vector_dist<3,float,aggregate<float,float,float,float,float,float,float,float>> & v1) { diff --git a/src/Operators/Vector/vector_dist_operators_unit_tests.cu b/src/Operators/Vector/vector_dist_operators_unit_tests.cu new file mode 100644 index 0000000000000000000000000000000000000000..a920282d4978802cf8d5558e295a2e2604c4d1df --- /dev/null +++ b/src/Operators/Vector/vector_dist_operators_unit_tests.cu @@ -0,0 +1,90 @@ +#include "config.h" +#define BOOST_TEST_DYN_LINK +#include <boost/test/unit_test.hpp> + +#include "VCluster/VCluster.hpp" +#include "Vector/vector_dist.hpp" +#include "Operators/Vector/vector_dist_operators.hpp" +#include "Operators/Vector/tests/vector_dist_operators_tests_util.hpp" + +BOOST_AUTO_TEST_SUITE( vector_dist_operators_test_gpu ) + +BOOST_AUTO_TEST_CASE(vector_dist_operators_list_ker_test) +{ + if (create_vcluster().getProcessingUnits() > 3) + return; + + Box<3,float> box({0.0,0.0,0.0},{1.0,1.0,1.0}); + + // Boundary conditions + size_t bc[3]={PERIODIC,PERIODIC,PERIODIC}; + + // ghost + Ghost<3,float> ghost(0.2); + + vector_dist_gpu<3,float,aggregate<float,float,float,VectorS<3,float>,VectorS<3,float>,VectorS<3,float>>> vd(127*create_vcluster().size(),box,bc,ghost); + + typedef decltype(vd.toKernel()) vector_dist_kernel; + + vector_dist_ker_list<vector_dist_kernel> & vdkl = vd.private_get_vector_dist_ker_list(); + + BOOST_REQUIRE_EQUAL(vdkl.n_entry(),0); + + { + auto vA = getV<A,comp_dev>(vd); + + BOOST_REQUIRE_EQUAL(vdkl.n_entry(),1); + + { + auto vB = getV<B,comp_dev>(vd); + auto vC = getV<C,comp_dev>(vd); + + auto vVA = getV<VA,comp_dev>(vd); + auto vVB = getV<VB,comp_dev>(vd); + auto vVC = getV<VC,comp_dev>(vd); + + BOOST_REQUIRE_EQUAL(vdkl.n_entry(),6); + + // fill vd with some value + fill_values<comp_dev>(vd); + + vd.map(RUN_ON_DEVICE); + + // check that the entry has been updated + BOOST_REQUIRE_EQUAL(vdkl.check(vd.toKernel()),true); + + vd.template ghost_get<0,1,2,3,4,5>(RUN_ON_DEVICE); + + // check that the entry has been updated + BOOST_REQUIRE_EQUAL(vdkl.check(vd.toKernel()),true); + } + + BOOST_REQUIRE_EQUAL(vdkl.n_entry(),1); + } + + BOOST_REQUIRE_EQUAL(vdkl.n_entry(),0); +} + +BOOST_AUTO_TEST_CASE( vector_dist_operators_gpu_test ) +{ + if (create_vcluster().getProcessingUnits() > 3) + return; + + Box<3,float> box({0.0,0.0,0.0},{1.0,1.0,1.0}); + + // Boundary conditions + size_t bc[3]={PERIODIC,PERIODIC,PERIODIC}; + + // ghost + Ghost<3,float> ghost(0.05); + + // vector type + typedef vector_dist_gpu<3,float,aggregate<float,float,float,VectorS<3,float>,VectorS<3,float>,VectorS<3,float>>> vtype; + + vector_dist_gpu<3,float,aggregate<float,float,float,VectorS<3,float>,VectorS<3,float>,VectorS<3,float>>> vd(100,box,bc,ghost); + + check_all_expressions<comp_host>::check(vd); + check_all_expressions<comp_dev>::check(vd); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/initialize/initialize_wrapper.hpp b/src/initialize/initialize_wrapper.hpp new file mode 100644 index 0000000000000000000000000000000000000000..79d8577c2ddd2a1c59eca4a8c2240896dcb175e2 --- /dev/null +++ b/src/initialize/initialize_wrapper.hpp @@ -0,0 +1,20 @@ +/* + * initialize_vcl.hpp + * + * Created on: Aug 21, 2018 + * Author: i-bird + */ + +#ifndef INITIALIZE_VCL_HPP_ +#define INITIALIZE_VCL_HPP_ + +/*! \brief If openfpm has to work on GPU we have to be sure openfpm_init is called on a file compiled with NVCC + * + * There are two implementation initialize.cpp and initialize.cu. In configuration stage the second implementation is chosen + * if the test has to run on GPU + * + */ +void openfpm_init_wrapper(int * argc, char *** argv); +void openfpm_finalize_wrapper(); + +#endif /* INITIALIZE_VCL_HPP_ */ diff --git a/src/initialize/initialize_wrapper_cpu.cpp b/src/initialize/initialize_wrapper_cpu.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4d75490cdd25af1caecc5b4f17ad05b1fff129bc --- /dev/null +++ b/src/initialize/initialize_wrapper_cpu.cpp @@ -0,0 +1,13 @@ +#include "initialize_wrapper.hpp" +#include "VCluster/VCluster.hpp" + + +void openfpm_init_wrapper(int * argc, char *** argv) +{ + openfpm_init(argc,argv); +} + +void openfpm_finalize_wrapper() +{ + openfpm_finalize(); +} diff --git a/src/initialize/initialize_wrapper_cuda.cu b/src/initialize/initialize_wrapper_cuda.cu new file mode 100644 index 0000000000000000000000000000000000000000..e2a8e24a384b833151d7bf811930e6567ace6100 --- /dev/null +++ b/src/initialize/initialize_wrapper_cuda.cu @@ -0,0 +1,14 @@ +#define PRINT_RANK_TO_GPU + +#include "initialize_wrapper.hpp" +#include "VCluster/VCluster.hpp" + +void openfpm_init_wrapper(int * argc, char *** argv) +{ + openfpm_init(argc,argv); +} + +void openfpm_finalize_wrapper() +{ + openfpm_finalize(); +} diff --git a/src/interpolation/interpolation_unit_tests.cpp b/src/interpolation/interpolation_unit_tests.cpp index ed795c1d0f794d016b39b418c255c532ee36b043..a51522d636044c42b3937fac020859fc814fe65a 100644 --- a/src/interpolation/interpolation_unit_tests.cpp +++ b/src/interpolation/interpolation_unit_tests.cpp @@ -335,8 +335,11 @@ BOOST_AUTO_TEST_CASE( interpolation_full_single_test_3D ) { auto p = it.get(); + // coverty[dont_call] vd.getPos(p)[0] = (double)rand()/RAND_MAX; + // coverty[dont_call] vd.getPos(p)[1] = (double)rand()/RAND_MAX; + // coverty[dont_call] vd.getPos(p)[2] = (double)rand()/RAND_MAX; vd.getProp<0>(p) = 5.0; diff --git a/src/unit_test_init_cleanup.hpp b/src/unit_test_init_cleanup.hpp index 7cf89f01b1dac63dbc8753048851ff822e90f0fa..16c185431951a21ff099469e53bac17f6579c210 100644 --- a/src/unit_test_init_cleanup.hpp +++ b/src/unit_test_init_cleanup.hpp @@ -9,11 +9,12 @@ #define UNIT_TEST_INIT_CLEANUP_HPP_ #include "VCluster/VCluster.hpp" +#include "initialize/initialize_wrapper.hpp" struct ut_start { ut_start() { BOOST_TEST_MESSAGE("Initialize global VCluster"); - openfpm_init(&boost::unit_test::framework::master_test_suite().argc,&boost::unit_test::framework::master_test_suite().argv); + openfpm_init_wrapper(&boost::unit_test::framework::master_test_suite().argc,&boost::unit_test::framework::master_test_suite().argv); } ~ut_start() { diff --git a/test/petsc_solver<double>_lid_driven_cavity_3d_p1_grid_0_test_GCC4.vtk b/test/petsc_solver<double>_lid_driven_cavity_3d_p1_grid_0_test_GCC4.vtk index c7c3e836110014e00df7743d6e4b35af2e2d90ac..1379dd6be6d2cdd4557f5e59997622e1accec0b6 100644 Binary files a/test/petsc_solver<double>_lid_driven_cavity_3d_p1_grid_0_test_GCC4.vtk and b/test/petsc_solver<double>_lid_driven_cavity_3d_p1_grid_0_test_GCC4.vtk differ diff --git a/test/petsc_solver<double>_lid_driven_cavity_3d_p1_grid_0_test_GCC5.vtk b/test/petsc_solver<double>_lid_driven_cavity_3d_p1_grid_0_test_GCC5.vtk index c7c3e836110014e00df7743d6e4b35af2e2d90ac..1379dd6be6d2cdd4557f5e59997622e1accec0b6 100644 Binary files a/test/petsc_solver<double>_lid_driven_cavity_3d_p1_grid_0_test_GCC5.vtk and b/test/petsc_solver<double>_lid_driven_cavity_3d_p1_grid_0_test_GCC5.vtk differ diff --git a/test/petsc_solver<double>_lid_driven_cavity_3d_p1_grid_0_test_GCC8.vtk b/test/petsc_solver<double>_lid_driven_cavity_3d_p1_grid_0_test_GCC8.vtk index c7c3e836110014e00df7743d6e4b35af2e2d90ac..1379dd6be6d2cdd4557f5e59997622e1accec0b6 100644 Binary files a/test/petsc_solver<double>_lid_driven_cavity_3d_p1_grid_0_test_GCC8.vtk and b/test/petsc_solver<double>_lid_driven_cavity_3d_p1_grid_0_test_GCC8.vtk differ diff --git a/test/petsc_solver<double>_lid_driven_cavity_3d_p2_grid_0_test_GCC4.vtk b/test/petsc_solver<double>_lid_driven_cavity_3d_p2_grid_0_test_GCC4.vtk index e3d345d2329752fcb9f048aa3b77de3a51b5c1f8..83ce366e1a12c2d12c5de4584ae55dc0d6150f67 100644 Binary files a/test/petsc_solver<double>_lid_driven_cavity_3d_p2_grid_0_test_GCC4.vtk and b/test/petsc_solver<double>_lid_driven_cavity_3d_p2_grid_0_test_GCC4.vtk differ diff --git a/test/petsc_solver<double>_lid_driven_cavity_3d_p2_grid_0_test_GCC5.vtk b/test/petsc_solver<double>_lid_driven_cavity_3d_p2_grid_0_test_GCC5.vtk index e3d345d2329752fcb9f048aa3b77de3a51b5c1f8..83ce366e1a12c2d12c5de4584ae55dc0d6150f67 100644 Binary files a/test/petsc_solver<double>_lid_driven_cavity_3d_p2_grid_0_test_GCC5.vtk and b/test/petsc_solver<double>_lid_driven_cavity_3d_p2_grid_0_test_GCC5.vtk differ diff --git a/test/petsc_solver<double>_lid_driven_cavity_3d_p2_grid_0_test_GCC8.vtk b/test/petsc_solver<double>_lid_driven_cavity_3d_p2_grid_0_test_GCC8.vtk new file mode 100644 index 0000000000000000000000000000000000000000..83ce366e1a12c2d12c5de4584ae55dc0d6150f67 Binary files /dev/null and b/test/petsc_solver<double>_lid_driven_cavity_3d_p2_grid_0_test_GCC8.vtk differ diff --git a/test/petsc_solver<double>_lid_driven_cavity_3d_p2_grid_1_test_GCC4.vtk b/test/petsc_solver<double>_lid_driven_cavity_3d_p2_grid_1_test_GCC4.vtk index f8e11aaba84b4e7d75e3099143d0b1a960af4deb..94498c032ade432e14f7231acc29acb785f6094b 100644 Binary files a/test/petsc_solver<double>_lid_driven_cavity_3d_p2_grid_1_test_GCC4.vtk and b/test/petsc_solver<double>_lid_driven_cavity_3d_p2_grid_1_test_GCC4.vtk differ diff --git a/test/petsc_solver<double>_lid_driven_cavity_3d_p2_grid_1_test_GCC5.vtk b/test/petsc_solver<double>_lid_driven_cavity_3d_p2_grid_1_test_GCC5.vtk index f8e11aaba84b4e7d75e3099143d0b1a960af4deb..94498c032ade432e14f7231acc29acb785f6094b 100644 Binary files a/test/petsc_solver<double>_lid_driven_cavity_3d_p2_grid_1_test_GCC5.vtk and b/test/petsc_solver<double>_lid_driven_cavity_3d_p2_grid_1_test_GCC5.vtk differ diff --git a/test/petsc_solver<double>_lid_driven_cavity_3d_p2_grid_1_test_GCC8.vtk b/test/petsc_solver<double>_lid_driven_cavity_3d_p2_grid_1_test_GCC8.vtk new file mode 100644 index 0000000000000000000000000000000000000000..94498c032ade432e14f7231acc29acb785f6094b Binary files /dev/null and b/test/petsc_solver<double>_lid_driven_cavity_3d_p2_grid_1_test_GCC8.vtk differ diff --git a/test/petsc_solver<double>_lid_driven_cavity_3d_p3_grid_0_test_GCC4.vtk b/test/petsc_solver<double>_lid_driven_cavity_3d_p3_grid_0_test_GCC4.vtk index fe1c80bfe16335d67e7fbcdfef60a197fc37bbfb..6baff92b24aeb3b42bbd85e41826f0d844118988 100644 Binary files a/test/petsc_solver<double>_lid_driven_cavity_3d_p3_grid_0_test_GCC4.vtk and b/test/petsc_solver<double>_lid_driven_cavity_3d_p3_grid_0_test_GCC4.vtk differ diff --git a/test/petsc_solver<double>_lid_driven_cavity_3d_p3_grid_0_test_GCC5.vtk b/test/petsc_solver<double>_lid_driven_cavity_3d_p3_grid_0_test_GCC5.vtk index fe1c80bfe16335d67e7fbcdfef60a197fc37bbfb..6baff92b24aeb3b42bbd85e41826f0d844118988 100644 Binary files a/test/petsc_solver<double>_lid_driven_cavity_3d_p3_grid_0_test_GCC5.vtk and b/test/petsc_solver<double>_lid_driven_cavity_3d_p3_grid_0_test_GCC5.vtk differ diff --git a/test/petsc_solver<double>_lid_driven_cavity_3d_p3_grid_0_test_GCC8.vtk b/test/petsc_solver<double>_lid_driven_cavity_3d_p3_grid_0_test_GCC8.vtk index fe1c80bfe16335d67e7fbcdfef60a197fc37bbfb..6baff92b24aeb3b42bbd85e41826f0d844118988 100644 Binary files a/test/petsc_solver<double>_lid_driven_cavity_3d_p3_grid_0_test_GCC8.vtk and b/test/petsc_solver<double>_lid_driven_cavity_3d_p3_grid_0_test_GCC8.vtk differ diff --git a/test/petsc_solver<double>_lid_driven_cavity_3d_p3_grid_1_test_GCC4.vtk b/test/petsc_solver<double>_lid_driven_cavity_3d_p3_grid_1_test_GCC4.vtk index c29d84c449f6269e7e712b5ae8792d64a9f0148f..8563e0702d4b3d9fdbba8f005064f694accaf213 100644 Binary files a/test/petsc_solver<double>_lid_driven_cavity_3d_p3_grid_1_test_GCC4.vtk and b/test/petsc_solver<double>_lid_driven_cavity_3d_p3_grid_1_test_GCC4.vtk differ diff --git a/test/petsc_solver<double>_lid_driven_cavity_3d_p3_grid_1_test_GCC5.vtk b/test/petsc_solver<double>_lid_driven_cavity_3d_p3_grid_1_test_GCC5.vtk index c29d84c449f6269e7e712b5ae8792d64a9f0148f..8563e0702d4b3d9fdbba8f005064f694accaf213 100644 Binary files a/test/petsc_solver<double>_lid_driven_cavity_3d_p3_grid_1_test_GCC5.vtk and b/test/petsc_solver<double>_lid_driven_cavity_3d_p3_grid_1_test_GCC5.vtk differ diff --git a/test/petsc_solver<double>_lid_driven_cavity_3d_p3_grid_1_test_GCC8.vtk b/test/petsc_solver<double>_lid_driven_cavity_3d_p3_grid_1_test_GCC8.vtk index c29d84c449f6269e7e712b5ae8792d64a9f0148f..8563e0702d4b3d9fdbba8f005064f694accaf213 100644 Binary files a/test/petsc_solver<double>_lid_driven_cavity_3d_p3_grid_1_test_GCC8.vtk and b/test/petsc_solver<double>_lid_driven_cavity_3d_p3_grid_1_test_GCC8.vtk differ diff --git a/test/petsc_solver<double>_lid_driven_cavity_3d_p3_grid_2_test_GCC4.vtk b/test/petsc_solver<double>_lid_driven_cavity_3d_p3_grid_2_test_GCC4.vtk index 48a4fc562e6606a2f8fde8ba774c40afd94bc4c0..64c67a448ecbea3b4e973c9b820bda26b4ae0d89 100644 Binary files a/test/petsc_solver<double>_lid_driven_cavity_3d_p3_grid_2_test_GCC4.vtk and b/test/petsc_solver<double>_lid_driven_cavity_3d_p3_grid_2_test_GCC4.vtk differ diff --git a/test/petsc_solver<double>_lid_driven_cavity_3d_p3_grid_2_test_GCC5.vtk b/test/petsc_solver<double>_lid_driven_cavity_3d_p3_grid_2_test_GCC5.vtk index 48a4fc562e6606a2f8fde8ba774c40afd94bc4c0..64c67a448ecbea3b4e973c9b820bda26b4ae0d89 100644 Binary files a/test/petsc_solver<double>_lid_driven_cavity_3d_p3_grid_2_test_GCC5.vtk and b/test/petsc_solver<double>_lid_driven_cavity_3d_p3_grid_2_test_GCC5.vtk differ diff --git a/test/petsc_solver<double>_lid_driven_cavity_3d_p3_grid_2_test_GCC8.vtk b/test/petsc_solver<double>_lid_driven_cavity_3d_p3_grid_2_test_GCC8.vtk index 48a4fc562e6606a2f8fde8ba774c40afd94bc4c0..64c67a448ecbea3b4e973c9b820bda26b4ae0d89 100644 Binary files a/test/petsc_solver<double>_lid_driven_cavity_3d_p3_grid_2_test_GCC8.vtk and b/test/petsc_solver<double>_lid_driven_cavity_3d_p3_grid_2_test_GCC8.vtk differ diff --git a/test/umfpack_solver<double>_lid_driven_cavity_3d_p1_grid_0_test_GCC5.vtk b/test/umfpack_solver<double>_lid_driven_cavity_3d_p1_grid_0_test_GCC5.vtk index c520732338392209a40e04d210b4a5ccdfe24ee0..30c2ccd9c1493a2efc2108f21534d24a709820f1 100644 Binary files a/test/umfpack_solver<double>_lid_driven_cavity_3d_p1_grid_0_test_GCC5.vtk and b/test/umfpack_solver<double>_lid_driven_cavity_3d_p1_grid_0_test_GCC5.vtk differ diff --git a/test/umfpack_solver<double>_lid_driven_cavity_3d_p1_grid_0_test_GCC8.vtk b/test/umfpack_solver<double>_lid_driven_cavity_3d_p1_grid_0_test_GCC8.vtk index 4af8ade6159df032faa247752ec3d45651039e3e..180ebaae2acbdf47907fe10f2b05ddc7c28af8b8 100644 Binary files a/test/umfpack_solver<double>_lid_driven_cavity_3d_p1_grid_0_test_GCC8.vtk and b/test/umfpack_solver<double>_lid_driven_cavity_3d_p1_grid_0_test_GCC8.vtk differ diff --git a/test/umfpack_solver<double>_lid_driven_cavity_3d_p2_grid_0_test_GCC5.vtk b/test/umfpack_solver<double>_lid_driven_cavity_3d_p2_grid_0_test_GCC5.vtk index 2f9ccc685cd18de05b3ca99a224c62305d642c14..5ea925fbe2923ea32878e56455128e4be47f9dac 100644 Binary files a/test/umfpack_solver<double>_lid_driven_cavity_3d_p2_grid_0_test_GCC5.vtk and b/test/umfpack_solver<double>_lid_driven_cavity_3d_p2_grid_0_test_GCC5.vtk differ diff --git a/test/umfpack_solver<double>_lid_driven_cavity_3d_p2_grid_0_test_osx.vtk b/test/umfpack_solver<double>_lid_driven_cavity_3d_p2_grid_0_test_osx.vtk index 28536f0dfc62427ab60a38da78d40377d221b365..20e2ff3974dda95200b97c36d01f0b5d68622b87 100644 Binary files a/test/umfpack_solver<double>_lid_driven_cavity_3d_p2_grid_0_test_osx.vtk and b/test/umfpack_solver<double>_lid_driven_cavity_3d_p2_grid_0_test_osx.vtk differ diff --git a/test/umfpack_solver<double>_lid_driven_cavity_3d_p2_grid_1_test_GCC5.vtk b/test/umfpack_solver<double>_lid_driven_cavity_3d_p2_grid_1_test_GCC5.vtk index 5740bad9d0d3812700ae881b4fc2944a4ed227de..39ecef6485ef2cda8412bc8f8d72a969c075ae2a 100644 Binary files a/test/umfpack_solver<double>_lid_driven_cavity_3d_p2_grid_1_test_GCC5.vtk and b/test/umfpack_solver<double>_lid_driven_cavity_3d_p2_grid_1_test_GCC5.vtk differ diff --git a/test/umfpack_solver<double>_lid_driven_cavity_3d_p2_grid_1_test_osx.vtk b/test/umfpack_solver<double>_lid_driven_cavity_3d_p2_grid_1_test_osx.vtk index 9e0f5b26b6a44bf9aa1d5e3b3782801265e14f13..33bb99fa3ec39a90db1fcf97cf45e83f1bcf9277 100644 Binary files a/test/umfpack_solver<double>_lid_driven_cavity_3d_p2_grid_1_test_osx.vtk and b/test/umfpack_solver<double>_lid_driven_cavity_3d_p2_grid_1_test_osx.vtk differ diff --git a/test/umfpack_solver<double>_lid_driven_cavity_3d_p3_grid_0_test_GCC5.vtk b/test/umfpack_solver<double>_lid_driven_cavity_3d_p3_grid_0_test_GCC5.vtk index e6fb5478ae898ceaddfaed5569a22e5760b8056d..4a4b14ee73711e94176122eed43416d7a93edf09 100644 Binary files a/test/umfpack_solver<double>_lid_driven_cavity_3d_p3_grid_0_test_GCC5.vtk and b/test/umfpack_solver<double>_lid_driven_cavity_3d_p3_grid_0_test_GCC5.vtk differ diff --git a/test/umfpack_solver<double>_lid_driven_cavity_3d_p3_grid_0_test_GCC8.vtk b/test/umfpack_solver<double>_lid_driven_cavity_3d_p3_grid_0_test_GCC8.vtk index e6fb5478ae898ceaddfaed5569a22e5760b8056d..ce0bda3f9a1a3fd8516fb1855ac9c0b13cedc128 100644 Binary files a/test/umfpack_solver<double>_lid_driven_cavity_3d_p3_grid_0_test_GCC8.vtk and b/test/umfpack_solver<double>_lid_driven_cavity_3d_p3_grid_0_test_GCC8.vtk differ diff --git a/test/umfpack_solver<double>_lid_driven_cavity_3d_p3_grid_1_test_GCC5.vtk b/test/umfpack_solver<double>_lid_driven_cavity_3d_p3_grid_1_test_GCC5.vtk index 351bb23e0950058b7aef7cb0cd1464effb3782c1..219e0730f4896fdd1d15556eb707e0ea3d2cad5e 100644 Binary files a/test/umfpack_solver<double>_lid_driven_cavity_3d_p3_grid_1_test_GCC5.vtk and b/test/umfpack_solver<double>_lid_driven_cavity_3d_p3_grid_1_test_GCC5.vtk differ diff --git a/test/umfpack_solver<double>_lid_driven_cavity_3d_p3_grid_1_test_GCC8.vtk b/test/umfpack_solver<double>_lid_driven_cavity_3d_p3_grid_1_test_GCC8.vtk index 351bb23e0950058b7aef7cb0cd1464effb3782c1..2bf24d9716db722a8a62b855fbc153049f9c4dd5 100644 Binary files a/test/umfpack_solver<double>_lid_driven_cavity_3d_p3_grid_1_test_GCC8.vtk and b/test/umfpack_solver<double>_lid_driven_cavity_3d_p3_grid_1_test_GCC8.vtk differ diff --git a/test/umfpack_solver<double>_lid_driven_cavity_3d_p3_grid_2_test_GCC5.vtk b/test/umfpack_solver<double>_lid_driven_cavity_3d_p3_grid_2_test_GCC5.vtk index 592a6aa8c074c024d8427fa9f0977d3eaf993885..3c664a808c3a0531c3566c286a3a53c4ec9044db 100644 Binary files a/test/umfpack_solver<double>_lid_driven_cavity_3d_p3_grid_2_test_GCC5.vtk and b/test/umfpack_solver<double>_lid_driven_cavity_3d_p3_grid_2_test_GCC5.vtk differ diff --git a/test/umfpack_solver<double>_lid_driven_cavity_3d_p3_grid_2_test_GCC8.vtk b/test/umfpack_solver<double>_lid_driven_cavity_3d_p3_grid_2_test_GCC8.vtk index a6d67e1830b0f1877ef98c0c917527237035b3f5..3c664a808c3a0531c3566c286a3a53c4ec9044db 100644 Binary files a/test/umfpack_solver<double>_lid_driven_cavity_3d_p3_grid_2_test_GCC8.vtk and b/test/umfpack_solver<double>_lid_driven_cavity_3d_p3_grid_2_test_GCC8.vtk differ diff --git a/test/umfpack_solver<double>_lid_driven_cavity_p1_grid_0_test_GCC5.vtk b/test/umfpack_solver<double>_lid_driven_cavity_p1_grid_0_test_GCC5.vtk index 6b3e6a864395e8947f80e27770ef793a0821f5f9..79c606c69fcddb12dbc76e74a1c491d7b505755c 100644 Binary files a/test/umfpack_solver<double>_lid_driven_cavity_p1_grid_0_test_GCC5.vtk and b/test/umfpack_solver<double>_lid_driven_cavity_p1_grid_0_test_GCC5.vtk differ diff --git a/test/umfpack_solver<double>_lid_driven_cavity_p1_grid_0_test_GCC7.vtk b/test/umfpack_solver<double>_lid_driven_cavity_p1_grid_0_test_GCC7.vtk index 6b3e6a864395e8947f80e27770ef793a0821f5f9..ecb652391e365f19bdc195d44ea76289da3b5014 100644 Binary files a/test/umfpack_solver<double>_lid_driven_cavity_p1_grid_0_test_GCC7.vtk and b/test/umfpack_solver<double>_lid_driven_cavity_p1_grid_0_test_GCC7.vtk differ diff --git a/test/umfpack_solver<double>_lid_driven_cavity_p2_grid_0_test_GCC5.vtk b/test/umfpack_solver<double>_lid_driven_cavity_p2_grid_0_test_GCC5.vtk index fc2ae2315a0dc5a8459b2e75e6716fc6fa37a7bb..01a023cccbba7c769ee74aa1415ae2042b6bccb2 100644 Binary files a/test/umfpack_solver<double>_lid_driven_cavity_p2_grid_0_test_GCC5.vtk and b/test/umfpack_solver<double>_lid_driven_cavity_p2_grid_0_test_GCC5.vtk differ diff --git a/test/umfpack_solver<double>_lid_driven_cavity_p2_grid_0_test_GCC7.vtk b/test/umfpack_solver<double>_lid_driven_cavity_p2_grid_0_test_GCC7.vtk index fc2ae2315a0dc5a8459b2e75e6716fc6fa37a7bb..52b58e2d1614260a6e2e05741835cd7f78836668 100644 Binary files a/test/umfpack_solver<double>_lid_driven_cavity_p2_grid_0_test_GCC7.vtk and b/test/umfpack_solver<double>_lid_driven_cavity_p2_grid_0_test_GCC7.vtk differ diff --git a/test/umfpack_solver<double>_lid_driven_cavity_p2_grid_0_test_osx.vtk b/test/umfpack_solver<double>_lid_driven_cavity_p2_grid_0_test_osx.vtk index 47e6f13a464ddde9d9e49d8a705bd3223be11ed9..52b58e2d1614260a6e2e05741835cd7f78836668 100644 Binary files a/test/umfpack_solver<double>_lid_driven_cavity_p2_grid_0_test_osx.vtk and b/test/umfpack_solver<double>_lid_driven_cavity_p2_grid_0_test_osx.vtk differ diff --git a/test/umfpack_solver<double>_lid_driven_cavity_p2_grid_1_test_GCC5.vtk b/test/umfpack_solver<double>_lid_driven_cavity_p2_grid_1_test_GCC5.vtk index 680321a873531e14929de0617b277bdcbb6e4bf6..ed7b4f3459153f2230190a0ee658e0d3410dbb4d 100644 Binary files a/test/umfpack_solver<double>_lid_driven_cavity_p2_grid_1_test_GCC5.vtk and b/test/umfpack_solver<double>_lid_driven_cavity_p2_grid_1_test_GCC5.vtk differ diff --git a/test/umfpack_solver<double>_lid_driven_cavity_p2_grid_1_test_GCC7.vtk b/test/umfpack_solver<double>_lid_driven_cavity_p2_grid_1_test_GCC7.vtk index 680321a873531e14929de0617b277bdcbb6e4bf6..9d2f3dbc919055cee5589c13017f4ddbbf0485b7 100644 Binary files a/test/umfpack_solver<double>_lid_driven_cavity_p2_grid_1_test_GCC7.vtk and b/test/umfpack_solver<double>_lid_driven_cavity_p2_grid_1_test_GCC7.vtk differ diff --git a/test/umfpack_solver<double>_lid_driven_cavity_p2_grid_1_test_osx.vtk b/test/umfpack_solver<double>_lid_driven_cavity_p2_grid_1_test_osx.vtk index 5635b4cbcf24c9ff35294bf8283a446b17b547b5..9d2f3dbc919055cee5589c13017f4ddbbf0485b7 100644 Binary files a/test/umfpack_solver<double>_lid_driven_cavity_p2_grid_1_test_osx.vtk and b/test/umfpack_solver<double>_lid_driven_cavity_p2_grid_1_test_osx.vtk differ diff --git a/test/umfpack_solver<double>_lid_driven_cavity_p3_grid_0_test.vtk b/test/umfpack_solver<double>_lid_driven_cavity_p3_grid_0_test.vtk index 0ea6fb4bf28dc36a74dae446d5ab8051ccc8c044..93f2071fb33368ccd5a5639d1671f2fe7f8cf3c0 100644 Binary files a/test/umfpack_solver<double>_lid_driven_cavity_p3_grid_0_test.vtk and b/test/umfpack_solver<double>_lid_driven_cavity_p3_grid_0_test.vtk differ diff --git a/test/umfpack_solver<double>_lid_driven_cavity_p3_grid_0_test_GCC5.vtk b/test/umfpack_solver<double>_lid_driven_cavity_p3_grid_0_test_GCC5.vtk index 0ea6fb4bf28dc36a74dae446d5ab8051ccc8c044..6e77a440785a6f911c87f451f07283ce7ec9d508 100644 Binary files a/test/umfpack_solver<double>_lid_driven_cavity_p3_grid_0_test_GCC5.vtk and b/test/umfpack_solver<double>_lid_driven_cavity_p3_grid_0_test_GCC5.vtk differ diff --git a/test/umfpack_solver<double>_lid_driven_cavity_p3_grid_0_test_GCC7.vtk b/test/umfpack_solver<double>_lid_driven_cavity_p3_grid_0_test_GCC7.vtk index 0ea6fb4bf28dc36a74dae446d5ab8051ccc8c044..93f2071fb33368ccd5a5639d1671f2fe7f8cf3c0 100644 Binary files a/test/umfpack_solver<double>_lid_driven_cavity_p3_grid_0_test_GCC7.vtk and b/test/umfpack_solver<double>_lid_driven_cavity_p3_grid_0_test_GCC7.vtk differ diff --git a/test/umfpack_solver<double>_lid_driven_cavity_p3_grid_1_test.vtk b/test/umfpack_solver<double>_lid_driven_cavity_p3_grid_1_test.vtk index 7654e634b58b7c5dc98c25a46befc14a1b906524..1bbfad87f99d9a1deab2f78605f7bbadaf08c082 100644 Binary files a/test/umfpack_solver<double>_lid_driven_cavity_p3_grid_1_test.vtk and b/test/umfpack_solver<double>_lid_driven_cavity_p3_grid_1_test.vtk differ diff --git a/test/umfpack_solver<double>_lid_driven_cavity_p3_grid_1_test_GCC5.vtk b/test/umfpack_solver<double>_lid_driven_cavity_p3_grid_1_test_GCC5.vtk index 7654e634b58b7c5dc98c25a46befc14a1b906524..a2a208da5573ab669e0fc93faf9967705dbea689 100644 Binary files a/test/umfpack_solver<double>_lid_driven_cavity_p3_grid_1_test_GCC5.vtk and b/test/umfpack_solver<double>_lid_driven_cavity_p3_grid_1_test_GCC5.vtk differ diff --git a/test/umfpack_solver<double>_lid_driven_cavity_p3_grid_1_test_GCC7.vtk b/test/umfpack_solver<double>_lid_driven_cavity_p3_grid_1_test_GCC7.vtk index 7654e634b58b7c5dc98c25a46befc14a1b906524..1bbfad87f99d9a1deab2f78605f7bbadaf08c082 100644 Binary files a/test/umfpack_solver<double>_lid_driven_cavity_p3_grid_1_test_GCC7.vtk and b/test/umfpack_solver<double>_lid_driven_cavity_p3_grid_1_test_GCC7.vtk differ diff --git a/test/umfpack_solver<double>_lid_driven_cavity_p3_grid_2_test.vtk b/test/umfpack_solver<double>_lid_driven_cavity_p3_grid_2_test.vtk index 58abb3a17d4141ec87fedf51f2bb5c3113c9c065..e66f641eaf022bd8257f44fbd5a88bbed802c4e8 100644 Binary files a/test/umfpack_solver<double>_lid_driven_cavity_p3_grid_2_test.vtk and b/test/umfpack_solver<double>_lid_driven_cavity_p3_grid_2_test.vtk differ diff --git a/test/umfpack_solver<double>_lid_driven_cavity_p3_grid_2_test_GCC5.vtk b/test/umfpack_solver<double>_lid_driven_cavity_p3_grid_2_test_GCC5.vtk index 58abb3a17d4141ec87fedf51f2bb5c3113c9c065..e66f641eaf022bd8257f44fbd5a88bbed802c4e8 100644 Binary files a/test/umfpack_solver<double>_lid_driven_cavity_p3_grid_2_test_GCC5.vtk and b/test/umfpack_solver<double>_lid_driven_cavity_p3_grid_2_test_GCC5.vtk differ diff --git a/test/umfpack_solver<double>_lid_driven_cavity_p3_grid_2_test_GCC7.vtk b/test/umfpack_solver<double>_lid_driven_cavity_p3_grid_2_test_GCC7.vtk index 58abb3a17d4141ec87fedf51f2bb5c3113c9c065..e66f641eaf022bd8257f44fbd5a88bbed802c4e8 100644 Binary files a/test/umfpack_solver<double>_lid_driven_cavity_p3_grid_2_test_GCC7.vtk and b/test/umfpack_solver<double>_lid_driven_cavity_p3_grid_2_test_GCC7.vtk differ