From b45058b1122bae59e88a1ccb07269d94405fb591 Mon Sep 17 00:00:00 2001 From: Serhii Yaskovets <yaskovet@sbalzarini-mac-38.mpi-cbg.de> Date: Thu, 27 Apr 2023 11:46:03 +0200 Subject: [PATCH] Revert "Revert "Merge remote-tracking branch 'origin/FD_solver'"" This reverts commit 99cc3f75f8a17deba4bc67774e6c0bc9582a5c52. --- src/CMakeLists.txt | 30 +- src/DCPSE/DCPSE_op/DCPSE_Solver.cuh | 801 ++++++++++ src/DCPSE/DCPSE_op/DCPSE_Solver.hpp | 325 +++- src/DCPSE/DCPSE_op/DCPSE_op.hpp | 1009 ++++++++++--- src/DCPSE/DCPSE_op/DCPSE_surface_op.hpp | 1008 +++++++++++++ src/DCPSE/DCPSE_op/EqnsStruct.hpp | 738 ++++++++- .../DCPSE_op/tests/DCPSE_op_Solver_test.cpp | 188 ++- .../DCPSE_op/tests/DCPSE_op_Solver_test.cu | 1333 +++++++++++++++++ .../DCPSE_op/tests/DCPSE_op_Surface_tests.cpp | 1112 ++++++++++++++ .../DCPSE_op/tests/DCPSE_op_subset_test.cu | 616 ++++++++ src/DCPSE/DCPSE_op/tests/DCPSE_op_test3d.cpp | 350 ++++- .../tests/DCPSE_op_test_base_tests.cpp | 290 +++- src/DCPSE/Dcpse.cuh | 1031 +++++++++++++ src/DCPSE/Dcpse.hpp | 626 ++++++-- src/DCPSE/DcpseDiagonalScalingMatrix.hpp | 25 +- src/DCPSE/DcpseInterpolation.hpp | 107 ++ src/DCPSE/Monomial.cuh | 204 +++ src/DCPSE/Monomial.hpp | 1 + src/DCPSE/MonomialBasis.hpp | 118 +- src/DCPSE/Support.hpp | 45 +- src/DCPSE/SupportBuilder.cuh | 146 ++ src/DCPSE/SupportBuilder.hpp | 364 +++-- src/DCPSE/Vandermonde.hpp | 51 +- src/DCPSE/VandermondeRowBuilder.hpp | 20 +- src/DCPSE/tests/Support_unit_tests.cpp | 34 +- src/DCPSE/tests/Vandermonde_unit_tests.cpp | 4 +- src/FiniteDifference/FD_expressions.hpp | 798 +++++++++- src/FiniteDifference/FD_op_Tests.cpp | 99 ++ src/Matrix/SparseMatrix_petsc.hpp | 7 +- src/OdeIntegrators/OdeIntegrators.hpp | 205 ++- .../tests/OdeIntegrator_grid_tests.cpp | 576 +++++++ .../tests/OdeIntegratores_base_tests.cpp | 19 +- .../tests/Odeintegrators_test_gpu.cu | 104 ++ ...algebra_ofp.hpp => vector_algebra_ofp.hpp} | 792 +++++----- src/OdeIntegrators/vector_algebra_ofp_gpu.hpp | 993 ++++++++++++ .../cuda/vector_dist_operators_cuda.cuh | 9 +- .../Vector/vector_dist_operators.hpp | 586 +++++--- src/Solvers/petsc_solver.hpp | 199 ++- .../interpolation_unit_tests.cpp | 443 ++++++ src/interpolation/lambda_kernel.hpp | 39 +- src/level_set/closest_point/closest_point.hpp | 226 ++- .../closest_point_unit_tests.cpp | 185 ++- src/util/SphericalHarmonics.hpp | 19 +- 43 files changed, 14284 insertions(+), 1591 deletions(-) create mode 100644 src/DCPSE/DCPSE_op/DCPSE_Solver.cuh create mode 100644 src/DCPSE/DCPSE_op/DCPSE_surface_op.hpp create mode 100644 src/DCPSE/DCPSE_op/tests/DCPSE_op_Solver_test.cu create mode 100644 src/DCPSE/DCPSE_op/tests/DCPSE_op_Surface_tests.cpp create mode 100644 src/DCPSE/DCPSE_op/tests/DCPSE_op_subset_test.cu create mode 100644 src/DCPSE/Dcpse.cuh create mode 100644 src/DCPSE/DcpseInterpolation.hpp create mode 100644 src/DCPSE/Monomial.cuh create mode 100644 src/DCPSE/SupportBuilder.cuh create mode 100644 src/OdeIntegrators/tests/OdeIntegrator_grid_tests.cpp create mode 100644 src/OdeIntegrators/tests/Odeintegrators_test_gpu.cu rename src/OdeIntegrators/{boost_vector_algebra_ofp.hpp => vector_algebra_ofp.hpp} (86%) create mode 100644 src/OdeIntegrators/vector_algebra_ofp_gpu.hpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 37f1b92b..c7589304 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -17,6 +17,9 @@ endif() if (NOT CUDA_ON_BACKEND STREQUAL "None") set(CUDA_SOURCES Operators/Vector/vector_dist_operators_unit_tests.cu + #DCPSE/DCPSE_op/tests/DCPSE_op_Solver_test.cu + OdeIntegrators/tests/Odeintegrators_test_gpu.cu + #DCPSE/DCPSE_op/tests/DCPSE_op_subset_test.cu Operators/Vector/vector_dist_operators_apply_kernel_unit_tests.cu) endif() @@ -33,7 +36,8 @@ if ( CUDA_ON_BACKEND STREQUAL "HIP" AND HIP_FOUND ) hip_add_executable(numerics ${OPENFPM_INIT_FILE} ${CUDA_SOURCES} - OdeIntegrators/tests/OdeIntegratores_base_tests.cpp + OdeIntegrators/tests/OdeIntegratores_base_tests.cpp + OdeIntegrators/tests/OdeIntegrator_grid_tests.cpp DCPSE/DCPSE_op/tests/DCPSE_op_subset_test.cpp DCPSE/DCPSE_op/tests/DCPSE_op_test_base_tests FiniteDifference/FD_Solver_test.cpp @@ -74,6 +78,7 @@ else() add_executable(numerics ${OPENFPM_INIT_FILE} ${CUDA_SOURCES} OdeIntegrators/tests/OdeIntegratores_base_tests.cpp + OdeIntegrators/tests/OdeIntegrator_grid_tests.cpp DCPSE/DCPSE_op/tests/DCPSE_op_subset_test.cpp DCPSE/DCPSE_op/tests/DCPSE_op_test_base_tests FiniteDifference/FD_Solver_test.cpp @@ -100,6 +105,7 @@ else() Operators/Vector/vector_dist_operators_unit_tests.cpp Operators/Vector/vector_dist_operators_apply_kernel_unit_tests.cpp ../../src/lib/pdata.cpp + DCPSE/DCPSE_op/tests/DCPSE_op_Surface_tests.cpp # BoundaryConditions/tests/method_of_images_cylinder_unit_test.cpp # level_set/closest_point/closest_point_unit_tests.cpp level_set/redistancing_Sussman/tests/redistancingSussman_fast_unit_test.cpp @@ -141,6 +147,7 @@ if(CUDA_FOUND) if (TEST_COVERAGE) target_compile_options(numerics PRIVATE $<$<COMPILE_LANGUAGE:CUDA>: -Xcompiler "-fprofile-arcs -ftest-coverage" >) endif() + target_link_libraries(numerics -lcublas) endif() target_include_directories (numerics PUBLIC ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) @@ -157,6 +164,8 @@ target_include_directories (numerics PUBLIC ${HDF5_ROOT}/include) target_include_directories (numerics PUBLIC ${LIBHILBERT_INCLUDE_DIRS}) target_include_directories (numerics PUBLIC ${Boost_INCLUDE_DIRS}) target_include_directories (numerics PUBLIC ${Vc_INCLUDE_DIR}) +target_include_directories (numerics PUBLIC ${BLITZ_ROOT}/include) +target_include_directories (numerics PUBLIC ${ALGOIM_ROOT}/include) target_include_directories (numerics PUBLIC ${ALPAKA_ROOT}/include) target_include_directories (numerics PUBLIC ${MPI_C_INCLUDE_DIRS}) if(EIGEN3_FOUND) @@ -243,7 +252,7 @@ install(FILES FiniteDifference/Average.hpp FiniteDifference/FDScheme.hpp FiniteDifference/Laplacian.hpp FiniteDifference/mul.hpp - FiniteDifference/sum.hpp + FiniteDifference/sum.hpp FiniteDifference/Upwind_gradient.hpp FiniteDifference/Eno_Weno.hpp FiniteDifference/FD_op.hpp @@ -271,18 +280,23 @@ install(FILES Operators/Vector/vector_dist_operators_extensions.hpp COMPONENT OpenFPM) install(FILES Operators/Vector/cuda/vector_dist_operators_cuda.cuh - DESTINATION openfpm_numerics/include/Operators/Vector/cuda + DESTINATION openfpm_numerics/include/Operators/Vector/cuda COMPONENT OpenFPM) install(FILES DCPSE/Dcpse.hpp + DCPSE/Dcpse.cuh + DCPSE/DCPSE_op/DCPSE_Solver.cuh DCPSE/DcpseDiagonalScalingMatrix.hpp DCPSE/DcpseRhs.hpp DCPSE/Monomial.hpp + DCPSE/Monomial.cuh DCPSE/MonomialBasis.hpp DCPSE/Support.hpp + DCPSE/SupportBuilder.cuh DCPSE/SupportBuilder.hpp DCPSE/Vandermonde.hpp DCPSE/VandermondeRowBuilder.hpp + DCPSE/DcpseInterpolation.hpp DESTINATION openfpm_numerics/include/DCPSE COMPONENT OpenFPM) @@ -297,11 +311,13 @@ install(FILES util/eq_solve_common.hpp install(FILES DCPSE/DCPSE_op/DCPSE_op.hpp DCPSE/DCPSE_op/DCPSE_Solver.hpp DCPSE/DCPSE_op/EqnsStruct.hpp + DCPSE/DCPSE_op/DCPSE_surface_op.hpp DESTINATION openfpm_numerics/include/DCPSE/DCPSE_op COMPONENT OpenFPM) install(FILES OdeIntegrators/OdeIntegrators.hpp - OdeIntegrators/boost_vector_algebra_ofp.hpp + OdeIntegrators/vector_algebra_ofp.hpp + OdeIntegrators/vector_algebra_ofp_gpu.hpp DESTINATION openfpm_numerics/include/OdeIntegrators COMPONENT OpenFPM) @@ -310,7 +326,7 @@ install(FILES Draw/DrawParticles.hpp Draw/PointIteratorSkin.hpp Draw/DrawDisk.hpp Draw/DrawSphere.hpp - DESTINATION openfpm_numerics/include/Draw + DESTINATION openfpm_numerics/include/Draw COMPONENT OpenFPM) install(FILES interpolation/interpolation.hpp @@ -338,6 +354,10 @@ install(FILES DMatrix/EMatrix.hpp DESTINATION openfpm_numerics/include/DMatrix COMPONENT OpenFPM) +install(FILES level_set/closest_point/closest_point.hpp + DESTINATION openfpm_numerics/include/level_set/closest_point + COMPONENT OpenFPM) + #if(BUILD_TESTING) # add_executable(particle_test test.cu) diff --git a/src/DCPSE/DCPSE_op/DCPSE_Solver.cuh b/src/DCPSE/DCPSE_op/DCPSE_Solver.cuh new file mode 100644 index 00000000..8045214f --- /dev/null +++ b/src/DCPSE/DCPSE_op/DCPSE_Solver.cuh @@ -0,0 +1,801 @@ +// +// Created by Serhii +// + +#ifndef OPENFPM_PDATA_DCPSE_SOLVER_CUH +#define OPENFPM_PDATA_DCPSE_SOLVER_CUH + +// #include "DCPSE_op.hpp" +#include "DCPSE/DCPSE_op/DCPSE_op.hpp" +#include "Matrix/SparseMatrix.hpp" +#include "Vector/Vector.hpp" +#include "NN/CellList/CellDecomposer.hpp" +#include "Vector/Vector_util.hpp" +#include "Vector/vector_dist.hpp" +#include "Solvers/umfpack_solver.hpp" +#include "Solvers/petsc_solver.hpp" +#include "util/eq_solve_common.hpp" + +/*enum eq_struct +{ + VECTOR, + SCALAR +};*/ + +//template<unsigned int prp_id> using prop_id = boost::mpl::int_<prp_id>; +/*! \brief Create a Matrix System for Ax=b + * + * This Class is for creating a placeholder for the matrix system + * + * Ax = b + * + * + * \param Sys_eqs Equation Structure which has information about the system. Refer to EqnStruct.cpp for examples + * \param parts Particle set + * + */ +template<typename Sys_eqs, typename particles_type> +class DCPSE_scheme_gpu { + + //! type of the sparse matrix + typename Sys_eqs::SparseMatrix_type A; + + //! Vector b + typename Sys_eqs::Vector_type b; + + //! Sparse matrix triplet type + typedef typename Sys_eqs::SparseMatrix_type::triplet_type triplet; + + //! Distributed grid map + typedef vector_dist_gpu<Sys_eqs::dims, typename Sys_eqs::stype, aggregate<size_t>> p_map_type; + + //! mapping grid + p_map_type p_map; + + //! Grid points that has each processor + openfpm::vector<size_t> pnt; + + //! Particles used to impose the system + particles_type &parts; + + //! colums shift map + //int col_sm[Sys_eqs::nvar]; + + //! Each point in the grid has a global id, to decompose correctly the Matrix each processor contain a + //! contiguos range of global id, example processor 0 can have from 0 to 234 and processor 1 from 235 to 512 + //! no processors can have holes in the sequence, this number indicate where the sequence start for this + //! processor + size_t s_pnt; + + //! row of the matrix + size_t row; + + //! row on b + size_t row_b; + + //! Total number of points + size_t tot; + + //! solver options + options_solver opt; + + size_t offset; + + + /*! \brief Construct the gmap structure + * + */template<typename options> + void construct_pmap(options opt = options_solver::STANDARD) { + Vcluster<> &v_cl = create_vcluster(); + + // Calculate the size of the local domain + size_t sz = p_map.size_local(); + + // Get the total size of the local grids on each processors + v_cl.allGather(sz, pnt); + v_cl.execute(); + s_pnt = 0; + + // calculate the starting point for this processor + for (size_t i = 0; i < v_cl.getProcessUnitID(); i++) + s_pnt += pnt.get(i); + + tot = sz; + v_cl.sum(tot); + v_cl.execute(); + + // resize b if needed + if (opt == options_solver::STANDARD) { + b.resize(Sys_eqs::nvar * tot, Sys_eqs::nvar * sz); + } else if (opt == options_solver::LAGRANGE_MULTIPLIER) { + if (v_cl.rank() == v_cl.size() - 1) { + b.resize(Sys_eqs::nvar * tot + 1, Sys_eqs::nvar * sz + 1); + } else { + b.resize(Sys_eqs::nvar * tot + 1, Sys_eqs::nvar * sz); + } + } + //Use Custom number of constraints using opt as an integer + else { + if (v_cl.rank() == v_cl.size() - 1) { + b.resize(Sys_eqs::nvar * tot - offset, Sys_eqs::nvar * sz - offset); + } else { + b.resize(Sys_eqs::nvar * tot - offset, Sys_eqs::nvar * sz); + } + } + + // Calculate the starting point + + // Counter + size_t cnt = 0; + + // Create the re-mapping grid + auto it = p_map.getDomainIterator(); + + while (it.isNext()) { + auto key = it.get(); + + for (int i = 0; i < particles_type::dims; i++) { + p_map.getPos(key)[i] = parts.getPos(key)[i]; + } + + p_map.template getProp<0>(key) = cnt + s_pnt; + + ++cnt; + ++it; + } + + // sync the ghost + p_map.template ghost_get<0>(); + } + + //! Encapsulation of the b term as constant + struct constant_b { + //! scalar + typename Sys_eqs::stype scal; + + /*! \brief Constrictor from a scalar + * + * \param scal scalar + * + */ + constant_b(typename Sys_eqs::stype scal) { + this->scal = scal; + } + + /*! \brief Get the b term on a grid point + * + * \note It does not matter the grid point it is a scalar + * + * \param key grid position (unused because it is a constant) + * + * \return the scalar + * + */ + typename Sys_eqs::stype get(size_t key) { + return scal; + } + }; + + //! Encapsulation of the b term as constant + template<unsigned int prp_id> + struct variable_b { + //! scalar + typename Sys_eqs::stype scal; + + particles_type &parts; + + /*! \brief Constrictor from a scalar + * + * \param scal scalar + * + */ + variable_b(particles_type &parts) + : parts(parts) {} + + /*! \brief Get the b term on a grid point + * + * \note It does not matter the grid point it is a scalar + * + * \param key grid position (unused because it is a constant) + * + * \return the scalar + * + */ + inline typename Sys_eqs::stype get(size_t key) { + return parts.template getProp<prp_id>(key); + } + }; + + + /*! \brief Check if the Matrix is consistent + * + */ + void consistency() { + openfpm::vector<triplet> &trpl = A.getMatrixTriplets(); + + // A and B must have the same rows + if (row != row_b) { + std::cerr << "Error " << __FILE__ << ":" << __LINE__ + << " the term B and the Matrix A for Ax=B must contain the same number of rows " << row << "!=" << row_b << "\n"; + return; + } + if (row_b != p_map.size_local() * Sys_eqs::nvar) { + std::cerr << "Error " << __FILE__ << ":" << __LINE__ << " your system is underdetermined you set " + << row_b << " conditions " << " but i am expecting " << p_map.size_local() * Sys_eqs::nvar + << std::endl; + return; + } + + // Indicate all the non zero rows + openfpm::vector<unsigned char> nz_rows; + nz_rows.resize(row_b); + + for (size_t i = 0; i < trpl.size(); i++) { + if (trpl.get(i).row() - s_pnt * Sys_eqs::nvar >= nz_rows.size()) { + std::cerr << "Error " << __FILE__ << ":" << __LINE__ + << " It seems that you are setting colums that does not exist \n"; + } + if (trpl.get(i).value() != 0) { nz_rows.get(trpl.get(i).row() - s_pnt * Sys_eqs::nvar) = true; } + } + + // Indicate all the non zero colums + // This check can be done only on single processor + + Vcluster<> &v_cl = create_vcluster(); + if (v_cl.getProcessingUnits() == 1) { + openfpm::vector<unsigned> nz_cols; + nz_cols.resize(row_b); + + for (size_t i = 0; i < trpl.size(); i++) { + if (trpl.get(i).value() != 0) { nz_cols.get(trpl.get(i).col()) = true; } + } + + // all the rows must have a non zero element + for (size_t i = 0; i < nz_rows.size(); i++) { + if (nz_rows.get(i) == false) { + std::cerr << "Error: " << __FILE__ << ":" << __LINE__ << " Ill posed matrix row " << i + << " is not filled " << " equation: " << "\n"; + } + } + + // all the colums must have a non zero element + for (size_t i = 0; i < nz_cols.size(); i++) { + if (nz_cols.get(i) == false) + std::cerr << "Error: " << __FILE__ << ":" << __LINE__ << " Ill posed matrix colum " << i + << " is not filled\n"; + } + } + } + + /*! \brief Solve an equation + * + * \warning exp must be a scalar type + * + * \param exp where to store the result + * + */ + template<typename solType, typename expr_type> + void copy_impl(solType & x, expr_type exp, unsigned int comp) + { + auto & parts = exp.getVector(); + + auto it = parts.getDomainIterator(); + + while (it.isNext()) { + auto p = it.get(); + exp.value(p) = x(p.getKey() * Sys_eqs::nvar + comp + s_pnt * Sys_eqs::nvar); + ++it; + } + } + + template<typename solType, typename exp1, typename ... othersExp> + void copy_nested(solType &x, unsigned int &comp, exp1 exp, othersExp ... exps) { + copy_impl(x, exp, comp); + comp++; + + copy_nested(x, comp, exps ...); + } + + + template<typename solType, typename exp1> + void copy_nested(solType &x, unsigned int &comp, exp1 exp) { + copy_impl(x, exp, comp); + comp++; + } + +public: + + /*! \brief Set the structure of the system of equation + * + * For example for stokes-flow where you are solving for V = velocity (Vector) and P = pressure (scalar) + * + * you should call this function with + * + * setEquationStructure({eq_struct::VECTOR,eq_struct::SCALAR}) + * + */ +/* void setEquationStructure(std::initializer_list<eq_struct> l) + { + int i = 0; + for (eq_struct e : l) + { + if (e == eq_struct::VECTOR) + { + for (int j = 0 ; j < Sys_eqs::dims ; j++) + { + col_sm[i+j] = i; + } + i += Sys_eqs::dims; + } + else + { + col_sm[i] = i; + } + } + }*/ + + + /*! \brief Solve an equation + * + * \warning exp must be a scalar type + * + * \param exp where to store the result + * + */ + template<typename ... expr_type> + void solve(expr_type ... exps) { + if (sizeof...(exps) != Sys_eqs::nvar) { + std::cerr << __FILE__ << ":" << __LINE__ << " Error the number of properties you gave does not match the solution in\ + dimensionality, I am expecting " << Sys_eqs::nvar << + " properties " << std::endl; + }; + typename Sys_eqs::solver_type solver; +// umfpack_solver<double> solver; + auto x = solver.solve(getA(opt), getB(opt)); + + unsigned int comp = 0; + copy_nested(x, comp, exps ...); + } + + /*! \brief Solve an equation + * + * \warning exp must be a scalar type + * + * \param Solver Manually created Solver instead from the Equation structure + * \param exp where to store the result + * + */ + template<typename SolverType, typename ... expr_type> + void solve_with_solver(SolverType &solver, expr_type ... exps) { +#ifdef SE_CLASS1 + + if (sizeof...(exps) != Sys_eqs::nvar) { + std::cerr << __FILE__ << ":" << __LINE__ << " Error the number of properties you gave does not match the solution in\ + dimensionality, I am expecting " << Sys_eqs::nvar << + " properties " << std::endl; + }; +#endif + auto x = solver.solve(getA(opt), getB(opt)); + + unsigned int comp = 0; + copy_nested(x, comp, exps ...); + } + + /*! \brief Solve an equation + * + * \warning exp must be a scalar type + * + * \param exp where to store the result + * + */ + template<typename SolverType, typename ... expr_type> + void try_solve_with_solver(SolverType &solver, expr_type ... exps) { + if (sizeof...(exps) != Sys_eqs::nvar) { + std::cerr << __FILE__ << ":" << __LINE__ << " Error the number of properties you gave does not match the solution in\ + dimensionality, I am expecting " << Sys_eqs::nvar << + " properties " << std::endl; + }; + + auto x = solver.try_solve(getA(opt), getB(opt)); + + unsigned int comp = 0; + copy_nested(x, comp, exps ...); + } + + void reset_b() + { + row_b = 0; + } + + void reset(particles_type &part, options_solver opt = options_solver::STANDARD) + { + row = 0; + row_b = 0; + + p_map.clear(); + p_map.resize(part.size_local()); + + A.getMatrixTriplets().clear(); + + construct_pmap(opt); + } + + void reset_nodec() + { + row = 0; + row_b = 0; + + A.getMatrixTriplets().clear(); + } + + /*! \brief Constructor for the solver + * + * + * \param parts Particle set + * \param option_solver opt=options_solver::LAGRANGE_MULTIPLIER can be used for purely Neumann system + * + */ + DCPSE_scheme_gpu(particles_type &part, options_solver opt = options_solver::STANDARD) + : parts(part), p_map(part.getDecomposition(), 0), row(0), row_b(0), opt(opt) { + p_map.resize(part.size_local()); + + construct_pmap(opt); + } + + /*DCPSE_scheme_gpu(particles_type &part, int option_num) + : parts(part), p_map(part.getDecomposition(), 0), row(0), row_b(0),opt(options_solver::CUSTOM),offset(option_num) { + p_map.resize(part.size_local()); + construct_pmap(option_num); + }*/ + + + /*! \brief Impose an operator in the Matrix System + * + * This function impose an operator on a particular particle region to produce the system + * + * Ax = b + * + * + * \param op Operator to impose (A term) + * \param subset Vector with indices of particles where the operator has to be imposed + * \param prp_id<>() Property number in the aggregate (Scalar only) for imposing on the RHS b. + * \param id Equation id in the system that we are imposing given by ed_id type + * + */ + template<typename T, typename index_type, unsigned int prp_id> + void impose(const T &op, openfpm::vector<index_type> &subset, + const prop_id<prp_id> &num, + eq_id id = eq_id()) { + auto itd = subset.template getIteratorElements<0>(); + + variable_b<prp_id> vb(parts); + + impose_git(op, vb, id.getId(), itd); + } + + /*! \brief Impose b part only in the Matrix System Ax=b + * + * This function impose RHS of an existing Ax=b system. + * + * + * \param subset Vector with indices of particles where the operator has to be imposed + * \param num right hand side of the term (b term) Constant in this case + * \param id Equation id in the system that we are imposing given by ed_id type + * + */ + template<typename index_type, unsigned int prp_id> + void impose_b(openfpm::vector<index_type> &subset, + const prop_id<prp_id> &num, + eq_id id = eq_id()) { + auto itd = subset.template getIteratorElements<0>(); + + variable_b<prp_id> vb(parts); + + impose_git_b(vb, id.getId(), itd); + } + + /*! \brief Impose an operator in the Matrix System + * + * This function impose an operator on a particular particle region to produce the system + * + * Ax = b + * + * + * \param op Operator to impose (A term) + * \param subset Vector with indices of particles where the operator has to be imposed + * \param RHS Expression of the Vector to be imposed + * \param id Equation id in the system that we are imposing given by ed_id type + * + */ + template<typename T, typename index_type, typename RHS_type, typename sfinae = typename std::enable_if<!std::is_fundamental<RHS_type>::type::value>::type> + void impose(const T &op, openfpm::vector<index_type> &subset, + const RHS_type &rhs, + eq_id id = eq_id()) { + auto itd = subset.template getIteratorElements<0>(); + + impose_git(op, rhs, id.getId(), itd); + } + /*! \brief Impose b part only in the Matrix System Ax=b + * + * This function impose RHS of an existing Ax=b system. + * + * + * + * \param subset Vector with indices of particles where the operator has to be imposed + * \param Expression of the Vector to be imposed + * \param id Equation id in the system that we are imposing given by ed_id type + * + */ + template<typename index_type, typename RHS_type, typename sfinae = typename std::enable_if<!std::is_fundamental<RHS_type>::type::value>::type> + void impose_b(openfpm::vector<index_type> &subset, + const RHS_type &rhs, + eq_id id = eq_id()) { + auto itd = subset.template getIteratorElements<0>(); + impose_git_b(rhs, id.getId(), itd); + } + + /*! \brief Impose an operator in the Matrix System + * + * This function impose an operator on a particular particle region to produce the system + * + * Ax = b + * + * + * \param op Operator to impose (A term) + * \param subset Vector with indices of particles where the operator has to be imposed + * \param num Constant for all the particles + * \param id Equation id in the system that we are imposing given by ed_id type + * + */ + template<typename T, typename index_type> + void impose(const T &op, + openfpm::vector<index_type> &subset, + const typename Sys_eqs::stype num, + eq_id id = eq_id()) { + auto itd = subset.template getIteratorElements<0>(); + + constant_b b(num); + + impose_git(op, b, id.getId(), itd); + } + /*! \brief Impose b part only in the Matrix System Ax=b + * + * This function impose RHS of an existing Ax=b system. + * + * + * + * \param subset Vector with indices of particles where the operator has to be imposed + * \param num Constant for all the particles + * \param id Equation id in the system that we are imposing given by ed_id type + * + */ + template< typename index_type> + void impose_b(openfpm::vector<index_type> &subset, + const typename Sys_eqs::stype num, + eq_id id = eq_id()) { + auto itd = subset.template getIteratorElements<0>(); + + constant_b b(num); + + impose_git_b(b, id.getId(), itd); + } + + /*! \brief produce the Matrix + * + * \return the Sparse matrix produced + * + */ + template<typename options> + typename Sys_eqs::SparseMatrix_type &getA(options opt) { +#ifdef SE_CLASS1 + consistency(); +#endif + if (opt == options_solver::STANDARD) { + A.resize(tot * Sys_eqs::nvar, tot * Sys_eqs::nvar, + p_map.size_local() * Sys_eqs::nvar, + p_map.size_local() * Sys_eqs::nvar); + } + else if (opt == options_solver::LAGRANGE_MULTIPLIER) { + auto &v_cl = create_vcluster(); + openfpm::vector<triplet> &trpl = A.getMatrixTriplets(); + + if (v_cl.rank() == v_cl.size() - 1) { + A.resize(tot * Sys_eqs::nvar + 1, tot * Sys_eqs::nvar + 1, + p_map.size_local() * Sys_eqs::nvar + 1, + p_map.size_local() * Sys_eqs::nvar + 1); + + for (int i = 0; i < tot * Sys_eqs::nvar; i++) { + triplet t1; + + t1.row() = tot * Sys_eqs::nvar; + t1.col() = i; + t1.value() = 1; + + trpl.add(t1); + } + + for (int i = 0; i < p_map.size_local() * Sys_eqs::nvar; i++) { + triplet t2; + + t2.row() = i + s_pnt * Sys_eqs::nvar; + t2.col() = tot * Sys_eqs::nvar; + t2.value() = 1; + + trpl.add(t2); + } + + triplet t3; + + t3.col() = tot * Sys_eqs::nvar; + t3.row() = tot * Sys_eqs::nvar; + t3.value() = 0; + + trpl.add(t3); + + row_b++; + row++; + } + else { + A.resize(tot * Sys_eqs::nvar + 1, tot * Sys_eqs::nvar + 1, + p_map.size_local() * Sys_eqs::nvar, + p_map.size_local() * Sys_eqs::nvar); + + for (int i = 0; i < p_map.size_local() * Sys_eqs::nvar; i++) { + triplet t2; + + t2.row() = i + s_pnt * Sys_eqs::nvar; + t2.col() = tot * Sys_eqs::nvar; + t2.value() = 1; + + trpl.add(t2); + } + } + + + } + + else{ + auto &v_cl = create_vcluster(); + if (v_cl.rank() == v_cl.size() - 1) { + A.resize(tot * Sys_eqs::nvar - offset, tot * Sys_eqs::nvar - offset, + p_map.size_local() * Sys_eqs::nvar - offset, + p_map.size_local() * Sys_eqs::nvar - offset); + } + else { + A.resize(tot * Sys_eqs::nvar - offset, tot * Sys_eqs::nvar - offset, + p_map.size_local() * Sys_eqs::nvar, + p_map.size_local() * Sys_eqs::nvar); + } + } + + return A; + + } + + /*! \brief produce the B vector + * + * \return the vector produced + * + */ + typename Sys_eqs::Vector_type &getB(options_solver opt = options_solver::STANDARD) { +#ifdef SE_CLASS1 + consistency(); +#endif + if (opt == options_solver::LAGRANGE_MULTIPLIER) { + auto &v_cl = create_vcluster(); + if (v_cl.rank() == v_cl.size() - 1) { + + b(tot * Sys_eqs::nvar) = 0; + } + } + return b; + } + + + template<typename bop, typename iterator> + void impose_git_b(bop num, + long int id, + const iterator &it_d) { + auto it = it_d; + // iterate all particles points + while (it.isNext()) { + // get the particle + auto key = it.get(); + // Calculate the non-zero colums + b(p_map.template getProp<0>(key) * Sys_eqs::nvar + id) = num.get(key); +// std::cout << "b=(" << p_map.template getProp<0>(key)*Sys_eqs::nvar + id << "," << num.get(key)<<")" <<"\n"; + + // if SE_CLASS1 is defined check the position +#ifdef SE_CLASS1 + // T::position(key,gs,s_pos); +#endif + ++row_b; + ++it; + } + } + + /*! \brief Impose an operator + * + * This function impose an operator on a particular grid region to produce the system + * + * Ax = b + * + * ## Stokes equation 2D, lid driven cavity with one splipping wall + * \snippet eq_unit_test.hpp Copy the solution to grid + * + * \param op Operator to impose (A term) + * \param num right hand side of the term (b term) + * \param id Equation id in the system that we are imposing + * \param it_d iterator that define where you want to impose + * + */ + template<typename T, typename bop, typename iterator> + void impose_git(const T &op, + bop num, + long int id, + const iterator &it_d) { + openfpm::vector<triplet> &trpl = A.getMatrixTriplets(); + + auto it = it_d; + + //std::unordered_map<long int, typename particles_type::stype> cols; + + tsl::hopscotch_map<long int, typename particles_type::stype> cols; + + // iterate all particles points + while (it.isNext()) { + // get the particle + auto key = it.get(); + +/* + if (key == 298 && create_vcluster().rank() == 1) + { + int debug = 0; + debug++; + } +*/ + + // Calculate the non-zero colums + typename Sys_eqs::stype coeff = 1.0; + op.template value_nz<Sys_eqs>(p_map, key, cols, coeff, 0); + + // indicate if the diagonal has been set + bool is_diag = false; + + // create the triplet + for (auto it = cols.begin(); it != cols.end(); ++it) { + trpl.add(); + trpl.last().row() = p_map.template getProp<0>(key) * Sys_eqs::nvar + id; + trpl.last().col() = it->first; + trpl.last().value() = it->second; + if (trpl.last().row() == trpl.last().col()) + {is_diag = true;} + } + + // If does not have a diagonal entry put it to zero + if (is_diag == false) + { + trpl.add(); + trpl.last().row() = p_map.template getProp<0>(key) * Sys_eqs::nvar + id; + trpl.last().col() = p_map.template getProp<0>(key) * Sys_eqs::nvar + id; + trpl.last().value() = 0.0; + } + b(p_map.template getProp<0>(key) * Sys_eqs::nvar + id) = num.get(key); + cols.clear(); + + // if SE_CLASS1 is defined check the position +#ifdef SE_CLASS1 + // T::position(key,gs,s_pos); +#endif + + ++row; + ++row_b; + ++it; + } + } + +}; + + + +#endif //OPENFPM_PDATA_DCPSE_SOLVER_CUH diff --git a/src/DCPSE/DCPSE_op/DCPSE_Solver.hpp b/src/DCPSE/DCPSE_op/DCPSE_Solver.hpp index a9143de5..63f80d28 100644 --- a/src/DCPSE/DCPSE_op/DCPSE_Solver.hpp +++ b/src/DCPSE/DCPSE_op/DCPSE_Solver.hpp @@ -44,6 +44,9 @@ class DCPSE_scheme { //! Vector b typename Sys_eqs::Vector_type b; + //! Vector x_ig (initial guess) + typename Sys_eqs::Vector_type x_ig; + //! Sparse matrix triplet type typedef typename Sys_eqs::SparseMatrix_type::triplet_type triplet; @@ -74,6 +77,9 @@ class DCPSE_scheme { //! row on b size_t row_b; + //! row on x_ig + size_t row_x_ig; + //! Total number of points size_t tot; @@ -108,19 +114,25 @@ class DCPSE_scheme { // resize b if needed if (opt == options_solver::STANDARD) { b.resize(Sys_eqs::nvar * tot, Sys_eqs::nvar * sz); + x_ig.resize(Sys_eqs::nvar * tot, Sys_eqs::nvar * sz); + } else if (opt == options_solver::LAGRANGE_MULTIPLIER) { if (v_cl.rank() == v_cl.size() - 1) { - b.resize(Sys_eqs::nvar * tot + 1, Sys_eqs::nvar * sz + 1); + b.resize(Sys_eqs::nvar * (tot + 1), Sys_eqs::nvar * (sz + 1)); + x_ig.resize(Sys_eqs::nvar * (tot + 1), Sys_eqs::nvar * (sz + 1)); } else { b.resize(Sys_eqs::nvar * tot + 1, Sys_eqs::nvar * sz); + x_ig.resize(Sys_eqs::nvar * tot + 1, Sys_eqs::nvar * sz); } } //Use Custom number of constraints using opt as an integer else { if (v_cl.rank() == v_cl.size() - 1) { b.resize(Sys_eqs::nvar * tot - offset, Sys_eqs::nvar * sz - offset); + x_ig.resize(Sys_eqs::nvar * tot - offset, Sys_eqs::nvar * sz - offset); } else { b.resize(Sys_eqs::nvar * tot - offset, Sys_eqs::nvar * sz); + x_ig.resize(Sys_eqs::nvar * tot - offset, Sys_eqs::nvar * sz); } } @@ -211,25 +223,35 @@ class DCPSE_scheme { /*! \brief Check if the Matrix is consistent * */ - void consistency() { + void consistency(options_solver opt) + { openfpm::vector<triplet> &trpl = A.getMatrixTriplets(); + Vcluster<> &v_cl = create_vcluster(); // A and B must have the same rows if (row != row_b) { std::cerr << "Error " << __FILE__ << ":" << __LINE__ - << " the term B and the Matrix A for Ax=B must contain the same number of rows " << row << "!=" << row_b << "\n"; + << " the term B and the Matrix A for Ax=B must contain the same number of rows " << row + << "!=" << row_b << "\n"; return; } + if (row_b != p_map.size_local() * Sys_eqs::nvar) { std::cerr << "Error " << __FILE__ << ":" << __LINE__ << " your system is underdetermined you set " << row_b << " conditions " << " but i am expecting " << p_map.size_local() * Sys_eqs::nvar << std::endl; return; } - // Indicate all the non zero rows openfpm::vector<unsigned char> nz_rows; - nz_rows.resize(row_b); + + + if (v_cl.rank() == v_cl.size()-1 && opt == options_solver::LAGRANGE_MULTIPLIER) { + nz_rows.resize(row_b+Sys_eqs::nvar); + } + else{ + nz_rows.resize(row_b); + }; for (size_t i = 0; i < trpl.size(); i++) { if (trpl.get(i).row() - s_pnt * Sys_eqs::nvar >= nz_rows.size()) { @@ -242,10 +264,14 @@ class DCPSE_scheme { // Indicate all the non zero colums // This check can be done only on single processor - Vcluster<> &v_cl = create_vcluster(); if (v_cl.getProcessingUnits() == 1) { openfpm::vector<unsigned> nz_cols; - nz_cols.resize(row_b); + if (v_cl.rank() == v_cl.size()-1 && opt == options_solver::LAGRANGE_MULTIPLIER) { + nz_cols.resize(row_b+Sys_eqs::nvar); + } + else{ + nz_cols.resize(row_b); + }; for (size_t i = 0; i < trpl.size(); i++) { if (trpl.get(i).value() != 0) { nz_cols.get(trpl.get(i).col()) = true; } @@ -382,6 +408,78 @@ public: copy_nested(x, comp, exps ...); } + /*! \brief Solve an equation + * + * \warning exp must be a scalar type + * + * \param Solver Manually created Solver instead from the Equation structure + * \param exp where to store the result + * + */ + template<typename SolverType, typename ... expr_type> + void solve_with_solver_ig(SolverType &solver,expr_type ... exps) { +#ifdef SE_CLASS1 + + if (sizeof...(exps) != Sys_eqs::nvar) { + std::cerr << __FILE__ << ":" << __LINE__ << " Error the number of properties you gave does not match the solution in\ + dimensionality, I am expecting " << Sys_eqs::nvar << + " properties " << std::endl; + }; +#endif + auto x = solver.solve(getA(opt),get_x_ig(opt),getB(opt)); + + unsigned int comp = 0; + copy_nested(x, comp, exps ...); + } + + /*! \brief Successive Solve an equation + * + * \warning exp must be a scalar type + * + * \param Solver Manually created Solver instead from the Equation structure + * \param exp where to store the result + * + */ + template<typename SolverType, typename ... expr_type> + void solve_with_solver_successive(SolverType &solver,expr_type ... exps) { +#ifdef SE_CLASS1 + + if (sizeof...(exps) != Sys_eqs::nvar) { + std::cerr << __FILE__ << ":" << __LINE__ << " Error the number of properties you gave does not match the solution in\ + dimensionality, I am expecting " << Sys_eqs::nvar << + " properties " << std::endl; + }; +#endif + auto x = solver.solve_successive(getB(opt)); + + unsigned int comp = 0; + copy_nested(x, comp, exps ...); + } + + /*! \brief Successive Solve an equation with inital guess + * + * \warning exp must be a scalar type + * + * \param Solver Manually created Solver instead from the Equation structure + * \param exp where to store the result + * + */ + template<typename SolverType, typename ... expr_type> + void solve_with_solver_ig_successive(SolverType &solver,expr_type ... exps) { +#ifdef SE_CLASS1 + + if (sizeof...(exps) != Sys_eqs::nvar) { + std::cerr << __FILE__ << ":" << __LINE__ << " Error the number of properties you gave does not match the solution in\ + dimensionality, I am expecting " << Sys_eqs::nvar << + " properties " << std::endl; + }; +#endif + auto x = solver.solve_successive(get_x_ig(opt),getB(opt)); + + unsigned int comp = 0; + copy_nested(x, comp, exps ...); + } + /*! \brief Solve an equation with a given Nullspace * * \warning exp must be a scalar type @@ -424,7 +522,7 @@ public: " properties " << std::endl; }; #endif - auto x = solver.with_constant_nullspace_solve(getA(opt), getB(opt)); + auto x = solver.with_nullspace_solve(getA(opt), getB(opt)); unsigned int comp = 0; copy_nested(x, comp, exps ...); @@ -455,13 +553,19 @@ public: { row_b = 0; } + void reset_x_ig() + { + row_x_ig = 0; + } void reset(particles_type &part, options_solver opt = options_solver::STANDARD) { row = 0; row_b = 0; + row_x_ig = 0; - p_map.clear(); + + p_map.clear(); p_map.resize(part.size_local()); A.getMatrixTriplets().clear(); @@ -473,6 +577,7 @@ public: { row = 0; row_b = 0; + row_x_ig = 0; A.getMatrixTriplets().clear(); } @@ -543,6 +648,27 @@ public: impose_git_b(vb, id.getId(), itd); } + /*! \brief Impose x as initial guess for the Matrix System Ax=b + * + * This function impose an initial guess for the matrix solver Ax=b + * + * + * \param subset Vector with indices of particles where the operator has to be imposed + * \param the constant guess num. + * \param id Equation id in the system that we are imposing given by ed_id type + * + */ + template<typename index_type, unsigned int prp_id> + void impose_x_ig(openfpm::vector<index_type> &subset, + const prop_id<prp_id> &num, + eq_id id = eq_id()) { + auto itd = subset.template getIteratorElements<0>(); + + variable_b<prp_id> vx(parts); + + impose_git_x(vx, id.getId(), itd); + } + /*! \brief Impose an operator in the Matrix System * * This function impose an operator on a particular particle region to produce the system @@ -582,6 +708,22 @@ public: auto itd = subset.template getIteratorElements<0>(); impose_git_b(rhs, id.getId(), itd); } + /*! \brief Impose initial guess x in the Matrix System Ax=b + * + * This function impose initial guess x of an existing Ax=b system. + * + * \param subset Vector with indices of particles where the operator has to be imposed as a guess + * \param num Constant for all the particles + * \param id Equation id in the system that we are imposing given by ed_id type + * + */ + template<typename index_type, typename RHS_type, typename sfinae = typename std::enable_if<!std::is_fundamental<RHS_type>::type::value>::type> + void impose_x_ig(openfpm::vector<index_type> &subset, + const RHS_type &rhs, + eq_id id = eq_id()) { + auto itd = subset.template getIteratorElements<0>(); + impose_git_x(rhs, id.getId(), itd); + } /*! \brief Impose an operator in the Matrix System * @@ -629,6 +771,28 @@ public: impose_git_b(b, id.getId(), itd); } + /*! \brief Impose initial guess x in the Matrix System Ax=b +* +* This function impose RHS of an existing Ax=b system. +* +* +* +* \param subset Vector with indices of particles where the operator has to be imposed as a guess +* \param num Constant for all the particles +* \param id Equation id in the system that we are imposing given by ed_id type +* +*/ + template< typename index_type> + void impose_x_ig(openfpm::vector<index_type> &subset, + const typename Sys_eqs::stype num, + eq_id id = eq_id()) { + auto itd = subset.template getIteratorElements<0>(); + + constant_b x_ig(num); + + impose_git_x(x_ig, id.getId(), itd); + } + /*! \brief produce the Matrix * * \return the Sparse matrix produced @@ -636,9 +800,6 @@ public: */ template<typename options> typename Sys_eqs::SparseMatrix_type &getA(options opt) { -#ifdef SE_CLASS1 - consistency(); -#endif if (opt == options_solver::STANDARD) { A.resize(tot * Sys_eqs::nvar, tot * Sys_eqs::nvar, p_map.size_local() * Sys_eqs::nvar, @@ -649,60 +810,45 @@ public: openfpm::vector<triplet> &trpl = A.getMatrixTriplets(); if (v_cl.rank() == v_cl.size() - 1) { - A.resize(tot * Sys_eqs::nvar + 1, tot * Sys_eqs::nvar + 1, - p_map.size_local() * Sys_eqs::nvar + 1, - p_map.size_local() * Sys_eqs::nvar + 1); - - for (int i = 0; i < tot * Sys_eqs::nvar; i++) { - triplet t1; - - t1.row() = tot * Sys_eqs::nvar; - t1.col() = i; - t1.value() = 1; - - trpl.add(t1); - } - - for (int i = 0; i < p_map.size_local() * Sys_eqs::nvar; i++) { - triplet t2; - - t2.row() = i + s_pnt * Sys_eqs::nvar; - t2.col() = tot * Sys_eqs::nvar; - t2.value() = 1; - - trpl.add(t2); + A.resize(Sys_eqs::nvar * (tot + 1), Sys_eqs::nvar * (tot + 1), + Sys_eqs::nvar * (p_map.size_local() + 1), + Sys_eqs::nvar * (p_map.size_local() + 1)); + for (int j = 0; j < Sys_eqs::nvar; j++) { + for (int i = 0; i < tot; i++) { + triplet t1; + t1.row() = tot * Sys_eqs::nvar + j; + t1.col() = i * Sys_eqs::nvar + j; + t1.value() = 1; + trpl.add(t1); + } + for (int i = 0; i < p_map.size_local(); i++) { + triplet t2; + t2.row() = s_pnt + i * Sys_eqs::nvar + j; + t2.col() = tot * Sys_eqs::nvar + j; + t2.value() = 1; + trpl.add(t2); + } + triplet t3; + t3.col() = tot * Sys_eqs::nvar + j; + t3.row() = tot * Sys_eqs::nvar + j; + t3.value() = 0; + trpl.add(t3); } - - triplet t3; - - t3.col() = tot * Sys_eqs::nvar; - t3.row() = tot * Sys_eqs::nvar; - t3.value() = 0; - - trpl.add(t3); - - row_b++; - row++; - } - else { - A.resize(tot * Sys_eqs::nvar + 1, tot * Sys_eqs::nvar + 1, + } else { + A.resize(Sys_eqs::nvar * (tot + 1), Sys_eqs::nvar * (tot + 1), p_map.size_local() * Sys_eqs::nvar, p_map.size_local() * Sys_eqs::nvar); - - for (int i = 0; i < p_map.size_local() * Sys_eqs::nvar; i++) { - triplet t2; - - t2.row() = i + s_pnt * Sys_eqs::nvar; - t2.col() = tot * Sys_eqs::nvar; - t2.value() = 1; - - trpl.add(t2); + for (int j = 0; j < Sys_eqs::nvar; j++) { + for (int i = 0; i < p_map.size_local(); i++) { + triplet t2; + t2.row() = s_pnt + i * Sys_eqs::nvar + j; + t2.col() = tot * Sys_eqs::nvar + j; + t2.value() = 1; + trpl.add(t2); + } } } - - } - else{ auto &v_cl = create_vcluster(); if (v_cl.rank() == v_cl.size() - 1) { @@ -714,9 +860,11 @@ public: A.resize(tot * Sys_eqs::nvar - offset, tot * Sys_eqs::nvar - offset, p_map.size_local() * Sys_eqs::nvar, p_map.size_local() * Sys_eqs::nvar); - } } - + } +#ifdef SE_CLASS1 + consistency(opt); +#endif return A; } @@ -727,19 +875,38 @@ public: * */ typename Sys_eqs::Vector_type &getB(options_solver opt = options_solver::STANDARD) { -#ifdef SE_CLASS1 - consistency(); -#endif +/*#ifdef SE_CLASS1 + consistency(opt); +#endif*/ if (opt == options_solver::LAGRANGE_MULTIPLIER) { auto &v_cl = create_vcluster(); if (v_cl.rank() == v_cl.size() - 1) { - - b(tot * Sys_eqs::nvar) = 0; + for(int j=0;j<Sys_eqs::nvar;j++) + {b(tot * Sys_eqs::nvar+j) = 0;} } } return b; } + /*! \brief produce the B vector + * + * \return the vector produced + * + */ + typename Sys_eqs::Vector_type &get_x_ig(options_solver opt = options_solver::STANDARD) { +/*#ifdef SE_CLASS1 + consistency(opt); +#endif*/ + if (opt == options_solver::LAGRANGE_MULTIPLIER) { + auto &v_cl = create_vcluster(); + if (v_cl.rank() == v_cl.size() - 1) { + for(int j=0;j<Sys_eqs::nvar;j++) + {x_ig(tot * Sys_eqs::nvar+j) = 0;} + } + } + return x_ig; + } + template<typename bop, typename iterator> void impose_git_b(bop num, @@ -763,6 +930,29 @@ public: } } + template<typename xop, typename iterator> + void impose_git_x(xop num, + long int id, + const iterator &it_d) { + auto it = it_d; + // iterate all particles points + while (it.isNext()) { + // get the particle + auto key = it.get(); + // Calculate the non-zero colums + x_ig(p_map.template getProp<0>(key) * Sys_eqs::nvar + id) = num.get(key); +// std::cout << "b=(" << p_map.template getProp<0>(key)*Sys_eqs::nvar + id << "," << num.get(key)<<")" <<"\n"; + + // if SE_CLASS1 is defined check the position +#ifdef SE_CLASS1 + // T::position(key,gs,s_pos); +#endif + ++row_x_ig; + ++it; + } + } + + /*! \brief Impose an operator * * This function impose an operator on a particular grid region to produce the system @@ -839,6 +1029,7 @@ public: ++row; ++row_b; + ++row_x_ig; ++it; } } diff --git a/src/DCPSE/DCPSE_op/DCPSE_op.hpp b/src/DCPSE/DCPSE_op/DCPSE_op.hpp index b8a9a5c8..a3e4e596 100644 --- a/src/DCPSE/DCPSE_op/DCPSE_op.hpp +++ b/src/DCPSE/DCPSE_op/DCPSE_op.hpp @@ -12,6 +12,10 @@ #include "Decomposition/CartDecomposition.hpp" #include "DCPSE/Dcpse.hpp" #include "Operators/Vector/vector_dist_operators.hpp" +#if defined(__NVCC__) +#include "DCPSE/Dcpse.cuh" +#endif + const double dcpse_oversampling_factor = 1.9; const double rcut_verlet = 3.1; @@ -646,6 +650,8 @@ public: {} }; */ + + /*! \brief Class for Creating the DCPSE Operator Dx and objects and computes DCPSE Kernels. * * @@ -658,7 +664,8 @@ public: * \return Operator Dx which is a function on Vector_dist_Expressions * */ -class Derivative_x { +template<template<unsigned int, typename, typename...> class Dcpse_type = Dcpse> +class Derivative_x_T { void *dcpse; @@ -676,51 +683,70 @@ public: * */ template<typename particles_type> - Derivative_x(particles_type &parts, unsigned int ord, typename particles_type::stype rCut, + Derivative_x_T(particles_type &parts, unsigned int ord, typename particles_type::stype rCut, double oversampling_factor = dcpse_oversampling_factor, support_options opt = support_options::RADIUS) { Point<particles_type::dims, unsigned int> p; p.zero(); p.get(0) = 1; - dcpse = new Dcpse<particles_type::dims, particles_type>(parts, p, ord, rCut, oversampling_factor, opt); - + dcpse = new Dcpse_type<particles_type::dims, particles_type>(parts, p, ord, rCut, oversampling_factor, opt); } template<typename particles_type> void deallocate(particles_type &parts) { - delete (Dcpse<particles_type::dims, particles_type> *) dcpse; + delete (Dcpse_type<particles_type::dims, particles_type> *) dcpse; } template<typename operand_type> - vector_dist_expression_op<operand_type, Dcpse<operand_type::vtype::dims, typename operand_type::vtype>, VECT_DCPSE> + vector_dist_expression_op<operand_type, Dcpse_type<operand_type::vtype::dims, typename operand_type::vtype>, VECT_DCPSE> operator()(operand_type arg) { - typedef Dcpse<operand_type::vtype::dims, typename operand_type::vtype> dcpse_type; + typedef Dcpse_type<operand_type::vtype::dims, typename operand_type::vtype> dcpse_type; return vector_dist_expression_op<operand_type, dcpse_type, VECT_DCPSE>(arg, *(dcpse_type *) dcpse); } template<unsigned int prp, typename particles_type> void DrawKernel(particles_type &particles, int k) { - auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; dcpse_temp->template DrawKernel<prp>(particles, k); } template<unsigned int prp, typename particles_type> void DrawKernelNN(particles_type &particles, int k) { - auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; dcpse_temp->template DrawKernelNN<prp>(particles, k); } template<typename particles_type> void checkMomenta(particles_type &particles) { - auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; dcpse_temp->checkMomenta(particles); } + /*! \brief Method for Saving the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be saved. + */ + template<typename particles_type> + void save(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->save(file); + } + /*! \brief Method for Loading the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be loaded from. + */ + template<typename particles_type> + void load(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->load(file); + } /*! \brief Method for Updating the DCPSE Operator by recomputing DCPSE Kernels. * * @@ -728,7 +754,7 @@ public: */ template<typename particles_type> void update(particles_type &particles) { - auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; dcpse_temp->initializeUpdate(particles); } @@ -747,7 +773,8 @@ public: * \return Operator Dy which is a function on Vector_dist_Expressions * */ -class Derivative_y { +template<template<unsigned int, typename, typename...> class Dcpse_type = Dcpse> +class Derivative_y_T { void *dcpse; @@ -766,50 +793,64 @@ public: * */ template<typename particles_type> - Derivative_y(particles_type &parts, unsigned int ord, typename particles_type::stype rCut, + Derivative_y_T(particles_type &parts, unsigned int ord, typename particles_type::stype rCut, double oversampling_factor = dcpse_oversampling_factor, support_options opt = support_options::RADIUS) { Point<particles_type::dims, unsigned int> p; p.zero(); p.get(1) = 1; - dcpse = new Dcpse<particles_type::dims, particles_type>(parts, p, ord, rCut, oversampling_factor, opt); - - Dcpse<particles_type::dims, particles_type> *dcpse_ptr = (Dcpse<particles_type::dims, particles_type> *) dcpse; - - new(dcpse_ptr) Dcpse<particles_type::dims, particles_type>(parts, p, ord, rCut, oversampling_factor, opt); - dcpse_ptr++; - + dcpse = new Dcpse_type<particles_type::dims, particles_type>(parts, p, ord, rCut, oversampling_factor, opt); } template<typename particles_type> void deallocate(particles_type &parts) { - delete (Dcpse<particles_type::dims, particles_type> *) dcpse; + delete (Dcpse_type<particles_type::dims, particles_type> *) dcpse; } template<typename operand_type> - vector_dist_expression_op<operand_type, Dcpse<operand_type::vtype::dims, typename operand_type::vtype>, VECT_DCPSE> + vector_dist_expression_op<operand_type, Dcpse_type<operand_type::vtype::dims, typename operand_type::vtype>, VECT_DCPSE> operator()(operand_type arg) { - typedef Dcpse<operand_type::vtype::dims, typename operand_type::vtype> dcpse_type; + typedef Dcpse_type<operand_type::vtype::dims, typename operand_type::vtype> dcpse_type; return vector_dist_expression_op<operand_type, dcpse_type, VECT_DCPSE>(arg, *(dcpse_type *) dcpse); } template<unsigned int prp, typename particles_type> void DrawKernel(particles_type &particles, int k) { - auto dcpse2 = (Dcpse<particles_type::dims, particles_type> *) dcpse; + auto dcpse2 = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; dcpse2->template DrawKernel<prp>(particles, k); } template<typename particles_type> void checkMomenta(particles_type &particles) { - auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; dcpse_temp->checkMomenta(particles); } + /*! \brief Method for Saving the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be saved. + */ + template<typename particles_type> + void save(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->save(file); + } + /*! \brief Method for Loading the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be loaded from. + */ + template<typename particles_type> + void load(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->load(file); + } /*! \brief Method for Updating the DCPSE Operator by recomputing DCPSE Kernels. * * @@ -817,7 +858,7 @@ public: */ template<typename particles_type> void update(particles_type &particles) { - auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; dcpse_temp->initializeUpdate(particles); } @@ -835,7 +876,8 @@ public: * \return Operator Dz which is a function on Vector_dist_Expressions * */ -class Derivative_z { +template<template<unsigned int, typename, typename...> class Dcpse_type = Dcpse> +class Derivative_z_T { void *dcpse; @@ -853,29 +895,49 @@ public: * */ template<typename particles_type> - Derivative_z(particles_type &parts, unsigned int ord, typename particles_type::stype rCut, + Derivative_z_T(particles_type &parts, unsigned int ord, typename particles_type::stype rCut, double oversampling_factor = dcpse_oversampling_factor, support_options opt = support_options::RADIUS) { Point<particles_type::dims, unsigned int> p; p.zero(); p.get(2) = 1; - dcpse = new Dcpse<particles_type::dims, particles_type>(parts, p, ord, rCut, oversampling_factor, opt); + dcpse = new Dcpse_type<particles_type::dims, particles_type>(parts, p, ord, rCut, oversampling_factor, opt); } template<typename particles_type> void deallocate(particles_type &parts) { - delete (Dcpse<particles_type::dims, particles_type> *) dcpse; + delete (Dcpse_type<particles_type::dims, particles_type> *) dcpse; } template<typename operand_type> - vector_dist_expression_op<operand_type, Dcpse<operand_type::vtype::dims, typename operand_type::vtype>, VECT_DCPSE> + vector_dist_expression_op<operand_type, Dcpse_type<operand_type::vtype::dims, typename operand_type::vtype>, VECT_DCPSE> operator()(operand_type arg) { - typedef Dcpse<operand_type::vtype::dims, typename operand_type::vtype> dcpse_type; + typedef Dcpse_type<operand_type::vtype::dims, typename operand_type::vtype> dcpse_type; return vector_dist_expression_op<operand_type, dcpse_type, VECT_DCPSE>(arg, *(dcpse_type *) dcpse); } + /*! \brief Method for Saving the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be saved. + */ + template<typename particles_type> + void save(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->save(file); + } + /*! \brief Method for Loading the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be loaded from. + */ + template<typename particles_type> + void load(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->load(file); + } /*! \brief Method for Updating the DCPSE Operator by recomputing DCPSE Kernels. * * @@ -883,14 +945,14 @@ public: */ template<typename particles_type> void update(particles_type &particles) { - auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; dcpse_temp->initializeUpdate(particles); } template<typename particles_type> void checkMomenta(particles_type &particles) { - auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; dcpse_temp->checkMomenta(particles); } @@ -911,7 +973,8 @@ public: * \return Operator Grad which is a function on Vector_dist_Expressions * */ -class Gradient { +template<template<unsigned int, typename, typename...> class Dcpse_type = Dcpse> +class Gradient_T { void *dcpse; @@ -931,35 +994,38 @@ public: * */ template<typename particles_type> - Gradient(particles_type &parts, unsigned int ord, typename particles_type::stype rCut, + Gradient_T(particles_type &parts, unsigned int ord, typename particles_type::stype rCut, double oversampling_factor = dcpse_oversampling_factor, support_options opt = support_options::RADIUS) { - typedef Dcpse<particles_type::dims, particles_type> DCPSE_type; + typedef Dcpse_type<particles_type::dims, particles_type> DCPSE_type; dcpse = new unsigned char[particles_type::dims * sizeof(DCPSE_type)]; - Dcpse<particles_type::dims, particles_type> *dcpse_ptr = (Dcpse<particles_type::dims, particles_type> *) dcpse; + Dcpse_type<particles_type::dims, particles_type> *dcpse_ptr = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; for (int i = 0; i < particles_type::dims; i++) { Point<particles_type::dims, unsigned int> p; p.zero(); p.get(i) = 1; - new(dcpse_ptr) Dcpse<particles_type::dims, particles_type>(parts, p, ord, rCut, oversampling_factor, opt); - dcpse_ptr++; + + if (i) + new(&dcpse_ptr[i]) Dcpse_type<particles_type::dims, particles_type>(parts, dcpse_ptr[0], p, ord, rCut, oversampling_factor, opt); + else + new(&dcpse_ptr[i]) Dcpse_type<particles_type::dims, particles_type>(parts, p, ord, rCut, oversampling_factor, opt); } } template<typename particles_type> void deallocate(particles_type &parts) { for (int i = 0; i < particles_type::dims; i++) { - delete &(((Dcpse<particles_type::dims, particles_type> *) dcpse)[i]); + delete &(((Dcpse_type<particles_type::dims, particles_type> *) dcpse)[i]); } } template<typename operand_type> - vector_dist_expression_op<operand_type, Dcpse<operand_type::vtype::dims, typename operand_type::vtype>, VECT_DCPSE_V> + vector_dist_expression_op<operand_type, Dcpse_type<operand_type::vtype::dims, typename operand_type::vtype>, VECT_DCPSE_V> operator()(operand_type arg) { - typedef Dcpse<operand_type::vtype::dims, typename operand_type::vtype> dcpse_type; + typedef Dcpse_type<operand_type::vtype::dims, typename operand_type::vtype> dcpse_type; return vector_dist_expression_op<operand_type, dcpse_type, VECT_DCPSE_V>(arg, *(dcpse_type(*)[operand_type::vtype::dims]) dcpse); @@ -967,7 +1033,7 @@ public: template<unsigned int prp, typename particles_type> void DrawKernel(particles_type &particles, int k) { - Dcpse<particles_type::dims, particles_type> *dcpse_ptr = (Dcpse<particles_type::dims, particles_type> *) dcpse; + Dcpse_type<particles_type::dims, particles_type> *dcpse_ptr = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; for (int i = 0; i < particles_type::dims; i++) { dcpse_ptr[i].template DrawKernel<prp>(particles, i, k); @@ -982,7 +1048,7 @@ public: */ template<typename particles_type> void update(particles_type &particles) { - Dcpse<particles_type::dims, particles_type> *dcpse_ptr = (Dcpse<particles_type::dims, particles_type> *) dcpse; + Dcpse_type<particles_type::dims, particles_type> *dcpse_ptr = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; for (int i = 0; i < particles_type::dims; i++) { dcpse_ptr[i].initializeUpdate(particles); } @@ -1005,7 +1071,8 @@ public: * \return Operator which is a function on Vector_dist_Expressions * */ -class Curl2D { +template<template<unsigned int, typename, typename...> class Dcpse_type = Dcpse> +class Curl2D_T { void *dcpse; public: @@ -1024,29 +1091,29 @@ public: * */ template<typename particles_type> - Curl2D(particles_type &parts, unsigned int ord, typename particles_type::stype rCut, + Curl2D_T(particles_type &parts, unsigned int ord, typename particles_type::stype rCut, double oversampling_factor = dcpse_oversampling_factor, support_options opt = support_options::RADIUS) { - typedef Dcpse<particles_type::dims, particles_type> DCPSE_type; + typedef Dcpse_type<particles_type::dims, particles_type> DCPSE_type; dcpse = new unsigned char[particles_type::dims * sizeof(DCPSE_type)]; - Dcpse<particles_type::dims, particles_type> *dcpse_ptr = (Dcpse<particles_type::dims, particles_type> *) dcpse; + Dcpse_type<particles_type::dims, particles_type> *dcpse_ptr = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; Point<particles_type::dims, unsigned int> p; + p.zero(); p.get(1) = 1; - new(dcpse_ptr) Dcpse<particles_type::dims, particles_type>(parts, p, ord, rCut, oversampling_factor, opt); - dcpse_ptr++; + new(dcpse_ptr) Dcpse_type<particles_type::dims, particles_type>(parts, p, ord, rCut, oversampling_factor, opt); + p.zero(); p.get(0) = 1; - new(dcpse_ptr) Dcpse<particles_type::dims, particles_type>(parts, p, ord, rCut, oversampling_factor, opt); - dcpse_ptr++; + new(dcpse_ptr+1) Dcpse_type<particles_type::dims, particles_type>(parts, dcpse_ptr[0], p, ord, rCut, oversampling_factor, opt); } template<typename operand_type> - vector_dist_expression_op<operand_type, Dcpse<operand_type::vtype::dims, typename operand_type::vtype>, VECT_DCPSE_V_CURL2D> + vector_dist_expression_op<operand_type, Dcpse_type<operand_type::vtype::dims, typename operand_type::vtype>, VECT_DCPSE_V_CURL2D> operator()(operand_type arg) { - typedef Dcpse<operand_type::vtype::dims, typename operand_type::vtype> dcpse_type; + typedef Dcpse_type<operand_type::vtype::dims, typename operand_type::vtype> dcpse_type; return vector_dist_expression_op<operand_type, dcpse_type, VECT_DCPSE_V_CURL2D>(arg, *(dcpse_type(*)[operand_type::vtype::dims]) dcpse); @@ -1066,7 +1133,8 @@ public: * \return Operator which is a function on Vector_dist_Expressions * */ -class Laplacian { +template<template<unsigned int, typename, typename...> class Dcpse_type = Dcpse> +class Laplacian_T { void *dcpse; @@ -1086,28 +1154,30 @@ public: * */ template<typename particles_type> - Laplacian(particles_type &parts, unsigned int ord, typename particles_type::stype rCut, + Laplacian_T(particles_type &parts, unsigned int ord, typename particles_type::stype rCut, double oversampling_factor = dcpse_oversampling_factor, support_options opt = support_options::RADIUS) { - typedef Dcpse<particles_type::dims, particles_type> DCPSE_type; - + typedef Dcpse_type<particles_type::dims, particles_type> DCPSE_type; dcpse = new unsigned char[particles_type::dims * sizeof(DCPSE_type)]; - Dcpse<particles_type::dims, particles_type> *dcpse_ptr = (Dcpse<particles_type::dims, particles_type> *) dcpse; + Dcpse_type<particles_type::dims, particles_type> *dcpse_ptr = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; for (int i = 0; i < particles_type::dims; i++) { Point<particles_type::dims, unsigned int> p; p.zero(); p.get(i) = 2; - new(dcpse_ptr) Dcpse<particles_type::dims, particles_type>(parts, p, ord, rCut, oversampling_factor, opt); - dcpse_ptr++; + + if (i) + new(&dcpse_ptr[i]) Dcpse_type<particles_type::dims, particles_type>(parts, dcpse_ptr[0], p, ord, rCut, oversampling_factor, opt); + else + new(&dcpse_ptr[i]) Dcpse_type<particles_type::dims, particles_type>(parts, p, ord, rCut, oversampling_factor, opt); } } template<typename operand_type> - vector_dist_expression_op<operand_type, Dcpse<operand_type::vtype::dims, typename operand_type::vtype>, VECT_DCPSE_V_SUM> + vector_dist_expression_op<operand_type, Dcpse_type<operand_type::vtype::dims, typename operand_type::vtype>, VECT_DCPSE_V_SUM> operator()(operand_type arg) { - typedef Dcpse<operand_type::vtype::dims, typename operand_type::vtype> dcpse_type; + typedef Dcpse_type<operand_type::vtype::dims, typename operand_type::vtype> dcpse_type; return vector_dist_expression_op<operand_type, dcpse_type, VECT_DCPSE_V_SUM>(arg, *(dcpse_type(*)[operand_type::vtype::dims]) dcpse); @@ -1116,7 +1186,7 @@ public: template<typename particles_type> void checkMomenta(particles_type &particles) { - Dcpse<particles_type::dims, particles_type> *dcpse_ptr = (Dcpse<particles_type::dims, particles_type> *) dcpse; + Dcpse_type<particles_type::dims, particles_type> *dcpse_ptr = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; for (int i = 0; i < particles_type::dims; i++) { dcpse_ptr[i].checkMomenta(particles); @@ -1126,7 +1196,7 @@ public: template<unsigned int prp, typename particles_type> void DrawKernel(particles_type &particles, int k) { - Dcpse<particles_type::dims, particles_type> *dcpse_ptr = (Dcpse<particles_type::dims, particles_type> *) dcpse; + Dcpse_type<particles_type::dims, particles_type> *dcpse_ptr = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; for (int i = 0; i < particles_type::dims; i++) { dcpse_ptr[i].template DrawKernel<prp>(particles, k); @@ -1135,7 +1205,7 @@ public: } template<typename particles_type> void deallocate(particles_type &parts) { - delete (Dcpse<particles_type::dims, particles_type> *) dcpse; + delete (Dcpse_type<particles_type::dims, particles_type> *) dcpse; } /*! \brief Method for Updating the DCPSE Operator by recomputing DCPSE Kernels. * @@ -1144,7 +1214,7 @@ public: */ template<typename particles_type> void update(particles_type &particles) { - Dcpse<particles_type::dims, particles_type> *dcpse_ptr = (Dcpse<particles_type::dims, particles_type> *) dcpse; + Dcpse_type<particles_type::dims, particles_type> *dcpse_ptr = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; for (int i = 0; i < particles_type::dims; i++) { dcpse_ptr[i].initializeUpdate(particles); } @@ -1168,7 +1238,8 @@ public: * \return Operator which is a function on Vector_dist_Expressions. Computes Divergence of Vectors * */ -class Divergence { +template<template<unsigned int, typename, typename...> class Dcpse_type = Dcpse> +class Divergence_T { void *dcpse; @@ -1188,28 +1259,31 @@ public: * */ template<typename particles_type> - Divergence(particles_type &parts, unsigned int ord, typename particles_type::stype rCut, + Divergence_T(particles_type &parts, unsigned int ord, typename particles_type::stype rCut, double oversampling_factor = dcpse_oversampling_factor, support_options opt = support_options::RADIUS) { - typedef Dcpse<particles_type::dims, particles_type> DCPSE_type; + typedef Dcpse_type<particles_type::dims, particles_type> DCPSE_type; dcpse = new unsigned char[particles_type::dims * sizeof(DCPSE_type)]; - Dcpse<particles_type::dims, particles_type> *dcpse_ptr = (Dcpse<particles_type::dims, particles_type> *) dcpse; + Dcpse_type<particles_type::dims, particles_type> *dcpse_ptr = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; for (int i = 0; i < particles_type::dims; i++) { Point<particles_type::dims, unsigned int> p; p.zero(); p.get(i) = 1; - new(dcpse_ptr) Dcpse<particles_type::dims, particles_type>(parts, p, ord, rCut, oversampling_factor, opt); - dcpse_ptr++; + + if (i) + new(&dcpse_ptr[i]) Dcpse_type<particles_type::dims, particles_type>(parts, dcpse_ptr[0], p, ord, rCut, oversampling_factor, opt); + else + new(&dcpse_ptr[i]) Dcpse_type<particles_type::dims, particles_type>(parts, p, ord, rCut, oversampling_factor, opt); } } template<typename operand_type> - vector_dist_expression_op<operand_type, Dcpse<operand_type::vtype::dims, typename operand_type::vtype>, VECT_DCPSE_V_DIV> + vector_dist_expression_op<operand_type, Dcpse_type<operand_type::vtype::dims, typename operand_type::vtype>, VECT_DCPSE_V_DIV> operator()(operand_type arg) { - typedef Dcpse<operand_type::vtype::dims, typename operand_type::vtype> dcpse_type; + typedef Dcpse_type<operand_type::vtype::dims, typename operand_type::vtype> dcpse_type; return vector_dist_expression_op<operand_type, dcpse_type, VECT_DCPSE_V_DIV>(arg, *(dcpse_type(*)[operand_type::vtype::dims]) dcpse); @@ -1222,7 +1296,7 @@ public: */ template<typename particles_type> void update(particles_type &particles) { - Dcpse<particles_type::dims, particles_type> *dcpse_ptr = (Dcpse<particles_type::dims, particles_type> *) dcpse; + Dcpse_type<particles_type::dims, particles_type> *dcpse_ptr = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; for (int i = 0; i < particles_type::dims; i++) { dcpse_ptr[i].initializeUpdate(particles); } @@ -1245,7 +1319,8 @@ public: * \return Operator which is a function on Vector_dist_Expressions. Computes Advection of Vectors Adv(v,u) = v.Grad(u) * */ -class Advection { +template<template<unsigned int, typename, typename...> class Dcpse_type = Dcpse> +class Advection_T { void *dcpse; @@ -1265,30 +1340,33 @@ public: * */ template<typename particles_type> - Advection(particles_type &parts, unsigned int ord, typename particles_type::stype rCut, + Advection_T(particles_type &parts, unsigned int ord, typename particles_type::stype rCut, double oversampling_factor = dcpse_oversampling_factor, support_options opt = support_options::RADIUS) { - typedef Dcpse<particles_type::dims, particles_type> DCPSE_type; + typedef Dcpse_type<particles_type::dims, particles_type> DCPSE_type; dcpse = new unsigned char[particles_type::dims * sizeof(DCPSE_type)]; - Dcpse<particles_type::dims, particles_type> *dcpse_ptr = (Dcpse<particles_type::dims, particles_type> *) dcpse; + Dcpse_type<particles_type::dims, particles_type> *dcpse_ptr = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; for (int i = 0; i < particles_type::dims; i++) { Point<particles_type::dims, unsigned int> p; p.zero(); p.get(i) = 1; - new(dcpse_ptr) Dcpse<particles_type::dims, particles_type>(parts, p, ord, rCut, oversampling_factor, opt); - dcpse_ptr++; + + if (i) + new(&dcpse_ptr[i]) Dcpse_type<particles_type::dims, particles_type>(parts, dcpse_ptr[0], p, ord, rCut, oversampling_factor, opt); + else + new(&dcpse_ptr[i]) Dcpse_type<particles_type::dims, particles_type>(parts, p, ord, rCut, oversampling_factor, opt); } } template<typename operand_type1, typename operand_type2> - vector_dist_expression_op<operand_type1, std::pair<operand_type2, Dcpse<operand_type2::vtype::dims, typename operand_type2::vtype>>, VECT_DCPSE_V_DOT> + vector_dist_expression_op<operand_type1, std::pair<operand_type2, Dcpse_type<operand_type2::vtype::dims, typename operand_type2::vtype>>, VECT_DCPSE_V_DOT> operator()(operand_type1 arg, operand_type2 arg2) { - typedef Dcpse<operand_type2::vtype::dims, typename operand_type2::vtype> dcpse_type; + typedef Dcpse_type<operand_type2::vtype::dims, typename operand_type2::vtype> dcpse_type; return vector_dist_expression_op<operand_type1, std::pair<operand_type2, dcpse_type>, VECT_DCPSE_V_DOT>(arg, arg2, @@ -1297,7 +1375,7 @@ public: template<typename particles_type> void checkMomenta(particles_type &particles) { - Dcpse<particles_type::dims, particles_type> *dcpse_ptr = (Dcpse<particles_type::dims, particles_type> *) dcpse; + Dcpse_type<particles_type::dims, particles_type> *dcpse_ptr = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; for (int i = 0; i < particles_type::dims; i++) { dcpse_ptr[i].checkMomenta(particles); @@ -1307,7 +1385,7 @@ public: template<unsigned int prp, typename particles_type> void DrawKernel(particles_type &particles, int k) { - Dcpse<particles_type::dims, particles_type> *dcpse_ptr = (Dcpse<particles_type::dims, particles_type> *) dcpse; + Dcpse_type<particles_type::dims, particles_type> *dcpse_ptr = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; for (int i = 0; i < particles_type::dims; i++) { dcpse_ptr[i].template DrawKernel<prp>(particles, i, k); @@ -1322,7 +1400,7 @@ public: */ template<typename particles_type> void update(particles_type &particles) { - Dcpse<particles_type::dims, particles_type> *dcpse_ptr = (Dcpse<particles_type::dims, particles_type> *) dcpse; + Dcpse_type<particles_type::dims, particles_type> *dcpse_ptr = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; for (int i = 0; i < particles_type::dims; i++) { dcpse_ptr[i].initializeUpdate(particles); } @@ -1344,7 +1422,8 @@ public: * \return Operator Dxy which is a function on Vector_dist_Expressions * */ -class Derivative_xy { +template<template<unsigned int, typename, typename...> class Dcpse_type = Dcpse> +class Derivative_xy_T { void *dcpse; @@ -1362,7 +1441,7 @@ public: * */ template<typename particles_type> - Derivative_xy(particles_type &parts, unsigned int ord, typename particles_type::stype rCut, + Derivative_xy_T(particles_type &parts, unsigned int ord, typename particles_type::stype rCut, double oversampling_factor = dcpse_oversampling_factor, support_options opt = support_options::RADIUS) { Point<particles_type::dims, unsigned int> p; @@ -1370,31 +1449,25 @@ public: p.get(0) = 1; p.get(1) = 1; - dcpse = new Dcpse<particles_type::dims, particles_type>(parts, p, ord, rCut, oversampling_factor, opt); - - Dcpse<particles_type::dims, particles_type> *dcpse_ptr = (Dcpse<particles_type::dims, particles_type> *) dcpse; - - new(dcpse_ptr) Dcpse<particles_type::dims, particles_type>(parts, p, ord, rCut, oversampling_factor, opt); - dcpse_ptr++; - + dcpse = new Dcpse_type<particles_type::dims, particles_type>(parts, p, ord, rCut, oversampling_factor, opt); } template<typename particles_type> void deallocate(particles_type &parts) { - delete (Dcpse<particles_type::dims, particles_type> *) dcpse; + delete (Dcpse_type<particles_type::dims, particles_type> *) dcpse; } template<typename operand_type> - vector_dist_expression_op<operand_type, Dcpse<operand_type::vtype::dims, typename operand_type::vtype>, VECT_DCPSE> + vector_dist_expression_op<operand_type, Dcpse_type<operand_type::vtype::dims, typename operand_type::vtype>, VECT_DCPSE> operator()(operand_type arg) { - typedef Dcpse<operand_type::vtype::dims, typename operand_type::vtype> dcpse_type; + typedef Dcpse_type<operand_type::vtype::dims, typename operand_type::vtype> dcpse_type; return vector_dist_expression_op<operand_type, dcpse_type, VECT_DCPSE>(arg, *(dcpse_type *) dcpse); } template<unsigned int prp, typename particles_type> void DrawKernel(particles_type &particles, int k) { - Dcpse<particles_type::dims, particles_type> *dcpse_ptr = (Dcpse<particles_type::dims, particles_type> *) dcpse; + Dcpse_type<particles_type::dims, particles_type> *dcpse_ptr = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; dcpse_ptr[0].template DrawKernel<prp>(particles, k); @@ -1402,11 +1475,31 @@ public: template<typename particles_type> void checkMomenta(particles_type &particles) { - auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; dcpse_temp->checkMomenta(particles); } + /*! \brief Method for Saving the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be saved. + */ + template<typename particles_type> + void save(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->save(file); + } + /*! \brief Method for Loading the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be loaded from. + */ + template<typename particles_type> + void load(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->load(file); + } /*! \brief Method for Updating the DCPSE Operator by recomputing DCPSE Kernels. * * @@ -1414,7 +1507,7 @@ public: */ template<typename particles_type> void update(particles_type &particles) { - auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; dcpse_temp->initializeUpdate(particles); } @@ -1431,7 +1524,8 @@ public: * \return Operator Dyz which is a function on Vector_dist_Expressions * */ -class Derivative_yz { +template<template<unsigned int, typename, typename...> class Dcpse_type = Dcpse> +class Derivative_yz_T { void *dcpse; @@ -1449,7 +1543,7 @@ public: * */ template<typename particles_type> - Derivative_yz(particles_type &parts, unsigned int ord, typename particles_type::stype rCut, + Derivative_yz_T(particles_type &parts, unsigned int ord, typename particles_type::stype rCut, double oversampling_factor = dcpse_oversampling_factor, support_options opt = support_options::RADIUS) { Point<particles_type::dims, unsigned int> p; @@ -1457,31 +1551,25 @@ public: p.get(1) = 1; p.get(2) = 1; - dcpse = new Dcpse<particles_type::dims, particles_type>(parts, p, ord, rCut, oversampling_factor, opt); - - Dcpse<particles_type::dims, particles_type> *dcpse_ptr = (Dcpse<particles_type::dims, particles_type> *) dcpse; - - new(dcpse_ptr) Dcpse<particles_type::dims, particles_type>(parts, p, ord, rCut, oversampling_factor, opt); - dcpse_ptr++; - + dcpse = new Dcpse_type<particles_type::dims, particles_type>(parts, p, ord, rCut, oversampling_factor, opt); } template<typename particles_type> void deallocate(particles_type &parts) { - delete (Dcpse<particles_type::dims, particles_type> *) dcpse; + delete (Dcpse_type<particles_type::dims, particles_type> *) dcpse; } template<typename operand_type> - vector_dist_expression_op<operand_type, Dcpse<operand_type::vtype::dims, typename operand_type::vtype>, VECT_DCPSE> + vector_dist_expression_op<operand_type, Dcpse_type<operand_type::vtype::dims, typename operand_type::vtype>, VECT_DCPSE> operator()(operand_type arg) { - typedef Dcpse<operand_type::vtype::dims, typename operand_type::vtype> dcpse_type; + typedef Dcpse_type<operand_type::vtype::dims, typename operand_type::vtype> dcpse_type; return vector_dist_expression_op<operand_type, dcpse_type, VECT_DCPSE>(arg, *(dcpse_type *) dcpse); } template<unsigned int prp, typename particles_type> void DrawKernel(particles_type &particles, int k) { - Dcpse<particles_type::dims, particles_type> *dcpse_ptr = (Dcpse<particles_type::dims, particles_type> *) dcpse; + Dcpse_type<particles_type::dims, particles_type> *dcpse_ptr = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; dcpse_ptr[0].template DrawKernel<prp>(particles, k); @@ -1489,11 +1577,31 @@ public: template<typename particles_type> void checkMomenta(particles_type &particles) { - auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; dcpse_temp->checkMomenta(particles); } + /*! \brief Method for Saving the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be saved. + */ + template<typename particles_type> + void save(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->save(file); + } + /*! \brief Method for Loading the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be loaded from. + */ + template<typename particles_type> + void load(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->load(file); + } /*! \brief Method for Updating the DCPSE Operator by recomputing DCPSE Kernels. * * @@ -1501,7 +1609,7 @@ public: */ template<typename particles_type> void update(particles_type &particles) { - auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; dcpse_temp->initializeUpdate(particles); } @@ -1518,7 +1626,8 @@ public: * \return Operator Dxz which is a function on Vector_dist_Expressions * */ -class Derivative_xz { +template<template<unsigned int, typename, typename...> class Dcpse_type = Dcpse> +class Derivative_xz_T { void *dcpse; @@ -1536,7 +1645,7 @@ public: * */ template<typename particles_type> - Derivative_xz(particles_type &parts, unsigned int ord, typename particles_type::stype rCut, + Derivative_xz_T(particles_type &parts, unsigned int ord, typename particles_type::stype rCut, double oversampling_factor = dcpse_oversampling_factor, support_options opt = support_options::RADIUS) { Point<particles_type::dims, unsigned int> p; @@ -1544,31 +1653,25 @@ public: p.get(0) = 1; p.get(2) = 1; - dcpse = new Dcpse<particles_type::dims, particles_type>(parts, p, ord, rCut, oversampling_factor, opt); - - Dcpse<particles_type::dims, particles_type> *dcpse_ptr = (Dcpse<particles_type::dims, particles_type> *) dcpse; - - new(dcpse_ptr) Dcpse<particles_type::dims, particles_type>(parts, p, ord, rCut, oversampling_factor, opt); - dcpse_ptr++; - + dcpse = new Dcpse_type<particles_type::dims, particles_type>(parts, p, ord, rCut, oversampling_factor, opt); } template<typename particles_type> void deallocate(particles_type &parts) { - delete (Dcpse<particles_type::dims, particles_type> *) dcpse; + delete (Dcpse_type<particles_type::dims, particles_type> *) dcpse; } template<typename operand_type> - vector_dist_expression_op<operand_type, Dcpse<operand_type::vtype::dims, typename operand_type::vtype>, VECT_DCPSE> + vector_dist_expression_op<operand_type, Dcpse_type<operand_type::vtype::dims, typename operand_type::vtype>, VECT_DCPSE> operator()(operand_type arg) { - typedef Dcpse<operand_type::vtype::dims, typename operand_type::vtype> dcpse_type; + typedef Dcpse_type<operand_type::vtype::dims, typename operand_type::vtype> dcpse_type; return vector_dist_expression_op<operand_type, dcpse_type, VECT_DCPSE>(arg, *(dcpse_type *) dcpse); } template<unsigned int prp, typename particles_type> void DrawKernel(particles_type &particles, int k) { - Dcpse<particles_type::dims, particles_type> *dcpse_ptr = (Dcpse<particles_type::dims, particles_type> *) dcpse; + Dcpse_type<particles_type::dims, particles_type> *dcpse_ptr = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; dcpse_ptr[0].template DrawKernel<prp>(particles, k); @@ -1576,11 +1679,31 @@ public: template<typename particles_type> void checkMomenta(particles_type &particles) { - auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; dcpse_temp->checkMomenta(particles); } + /*! \brief Method for Saving the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be saved. + */ + template<typename particles_type> + void save(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->save(file); + } + /*! \brief Method for Loading the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be loaded from. + */ + template<typename particles_type> + void load(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->load(file); + } /*! \brief Method for Updating the DCPSE Operator by recomputing DCPSE Kernels. * * @@ -1588,7 +1711,7 @@ public: */ template<typename particles_type> void update(particles_type &particles) { - auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; dcpse_temp->initializeUpdate(particles); } @@ -1606,7 +1729,8 @@ public: * \return Operator Dxx which is a function on Vector_dist_Expressions * */ -class Derivative_xx { +template<template<unsigned int, typename, typename...> class Dcpse_type = Dcpse> +class Derivative_xx_T { void *dcpse; @@ -1624,7 +1748,7 @@ public: * */ template<typename particles_type> - Derivative_xx(particles_type &parts, unsigned int ord, typename particles_type::stype rCut, + Derivative_xx_T(particles_type &parts, unsigned int ord, typename particles_type::stype rCut, double oversampling_factor = dcpse_oversampling_factor, support_options opt = support_options::RADIUS) { Point<particles_type::dims, unsigned int> p; @@ -1632,36 +1756,56 @@ public: p.get(0) = 2; p.get(1) = 0; - dcpse = new Dcpse<particles_type::dims, particles_type>(parts, p, ord, rCut, oversampling_factor, opt); + dcpse = new Dcpse_type<particles_type::dims, particles_type>(parts, p, ord, rCut, oversampling_factor, opt); } template<typename particles_type> void deallocate(particles_type &parts) { - delete (Dcpse<particles_type::dims, particles_type> *) dcpse; + delete (Dcpse_type<particles_type::dims, particles_type> *) dcpse; } template<typename operand_type> - vector_dist_expression_op<operand_type, Dcpse<operand_type::vtype::dims, typename operand_type::vtype>, VECT_DCPSE> + vector_dist_expression_op<operand_type, Dcpse_type<operand_type::vtype::dims, typename operand_type::vtype>, VECT_DCPSE> operator()(operand_type arg) { - typedef Dcpse<operand_type::vtype::dims, typename operand_type::vtype> dcpse_type; + typedef Dcpse_type<operand_type::vtype::dims, typename operand_type::vtype> dcpse_type; return vector_dist_expression_op<operand_type, dcpse_type, VECT_DCPSE>(arg, *(dcpse_type *) dcpse); } template<typename particles_type> void checkMomenta(particles_type &particles) { - auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; dcpse_temp->checkMomenta(particles); } template<unsigned int prp, typename particles_type> void DrawKernel(particles_type &particles, int k) { - auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; dcpse_temp->template DrawKernel<prp>(particles, k); } + /*! \brief Method for Saving the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be saved. + */ + template<typename particles_type> + void save(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->save(file); + } + /*! \brief Method for Loading the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be loaded from. + */ + template<typename particles_type> + void load(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->load(file); + } /*! \brief Method for Updating the DCPSE Operator by recomputing DCPSE Kernels. * * @@ -1669,7 +1813,7 @@ public: */ template<typename particles_type> void update(particles_type &particles) { - auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; dcpse_temp->initializeUpdate(particles); } @@ -1687,7 +1831,8 @@ public: * \return Operator Dyy which is a function on Vector_dist_Expressions * */ -class Derivative_yy { +template<template<unsigned int, typename, typename...> class Dcpse_type = Dcpse> +class Derivative_yy_T { void *dcpse; @@ -1705,7 +1850,7 @@ public: * */ template<typename particles_type> - Derivative_yy(particles_type &parts, unsigned int ord, typename particles_type::stype rCut, + Derivative_yy_T(particles_type &parts, unsigned int ord, typename particles_type::stype rCut, double oversampling_factor = dcpse_oversampling_factor, support_options opt = support_options::RADIUS) { Point<particles_type::dims, unsigned int> p; @@ -1713,36 +1858,56 @@ public: p.get(0) = 0; p.get(1) = 2; - dcpse = new Dcpse<particles_type::dims, particles_type>(parts, p, ord, rCut, oversampling_factor, opt); + dcpse = new Dcpse_type<particles_type::dims, particles_type>(parts, p, ord, rCut, oversampling_factor, opt); } template<typename particles_type> void deallocate(particles_type &parts) { - delete (Dcpse<particles_type::dims, particles_type> *) dcpse; + delete (Dcpse_type<particles_type::dims, particles_type> *) dcpse; } template<typename operand_type> - vector_dist_expression_op<operand_type, Dcpse<operand_type::vtype::dims, typename operand_type::vtype>, VECT_DCPSE> + vector_dist_expression_op<operand_type, Dcpse_type<operand_type::vtype::dims, typename operand_type::vtype>, VECT_DCPSE> operator()(operand_type arg) { - typedef Dcpse<operand_type::vtype::dims, typename operand_type::vtype> dcpse_type; + typedef Dcpse_type<operand_type::vtype::dims, typename operand_type::vtype> dcpse_type; return vector_dist_expression_op<operand_type, dcpse_type, VECT_DCPSE>(arg, *(dcpse_type *) dcpse); } template<typename particles_type> void checkMomenta(particles_type &particles) { - auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; dcpse_temp->checkMomenta(particles); } template<unsigned int prp, typename particles_type> void DrawKernel(particles_type &particles, int k) { - auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; dcpse_temp->template DrawKernel<prp>(particles, k); } + /*! \brief Method for Saving the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be saved. + */ + template<typename particles_type> + void save(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->save(file); + } + /*! \brief Method for Loading the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be loaded from. + */ + template<typename particles_type> + void load(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->load(file); + } /*! \brief Method for Updating the DCPSE Operator by recomputing DCPSE Kernels. * * @@ -1750,7 +1915,7 @@ public: */ template<typename particles_type> void update(particles_type &particles) { - auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; dcpse_temp->initializeUpdate(particles); } @@ -1767,7 +1932,8 @@ public: * \return Operator Dzz which is a function on Vector_dist_Expressions * */ -class Derivative_zz { +template<template<unsigned int, typename, typename...> class Dcpse_type = Dcpse> +class Derivative_zz_T { void *dcpse; @@ -1785,43 +1951,63 @@ public: * */ template<typename particles_type> - Derivative_zz(particles_type &parts, unsigned int ord, typename particles_type::stype rCut, + Derivative_zz_T(particles_type &parts, unsigned int ord, typename particles_type::stype rCut, double oversampling_factor = dcpse_oversampling_factor, support_options opt = support_options::RADIUS) { Point<particles_type::dims, unsigned int> p; p.zero(); p.get(2) = 2; - dcpse = new Dcpse<particles_type::dims, particles_type>(parts, p, ord, rCut, oversampling_factor, opt); + dcpse = new Dcpse_type<particles_type::dims, particles_type>(parts, p, ord, rCut, oversampling_factor, opt); } template<typename particles_type> void deallocate(particles_type &parts) { - delete (Dcpse<particles_type::dims, particles_type> *) dcpse; + delete (Dcpse_type<particles_type::dims, particles_type> *) dcpse; } template<typename operand_type> - vector_dist_expression_op<operand_type, Dcpse<operand_type::vtype::dims, typename operand_type::vtype>, VECT_DCPSE> + vector_dist_expression_op<operand_type, Dcpse_type<operand_type::vtype::dims, typename operand_type::vtype>, VECT_DCPSE> operator()(operand_type arg) { - typedef Dcpse<operand_type::vtype::dims, typename operand_type::vtype> dcpse_type; + typedef Dcpse_type<operand_type::vtype::dims, typename operand_type::vtype> dcpse_type; return vector_dist_expression_op<operand_type, dcpse_type, VECT_DCPSE>(arg, *(dcpse_type *) dcpse); } template<typename particles_type> void checkMomenta(particles_type &particles) { - auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; dcpse_temp->checkMomenta(particles); } template<unsigned int prp, typename particles_type> void DrawKernel(particles_type &particles, int k) { - auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; dcpse_temp->template DrawKernel<prp>(particles, k); } + /*! \brief Method for Saving the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be saved. + */ + template<typename particles_type> + void save(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->save(file); + } + /*! \brief Method for Loading the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be loaded from. + */ + template<typename particles_type> + void load(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->load(file); + } /*! \brief Method for Updating the DCPSE Operator by recomputing DCPSE Kernels. * * @@ -1829,21 +2015,21 @@ public: */ template<typename particles_type> void update(particles_type &particles) { - auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; dcpse_temp->initializeUpdate(particles); } }; - -class Derivative_xxx { +template<template<unsigned int, typename, typename...> class Dcpse_type = Dcpse> +class Derivative_xxx_T { void *dcpse; public: template<typename particles_type> - Derivative_xxx(particles_type &parts, unsigned int ord, typename particles_type::stype rCut, + Derivative_xxx_T(particles_type &parts, unsigned int ord, typename particles_type::stype rCut, double oversampling_factor = dcpse_oversampling_factor, support_options opt = support_options::RADIUS) { Point<particles_type::dims, unsigned int> p; @@ -1851,32 +2037,52 @@ public: p.get(0) = 3; p.get(1) = 0; - dcpse = new Dcpse<particles_type::dims, particles_type>(parts, p, ord, rCut, oversampling_factor, opt); + dcpse = new Dcpse_type<particles_type::dims, particles_type>(parts, p, ord, rCut, oversampling_factor, opt); } template<typename operand_type> - vector_dist_expression_op<operand_type, Dcpse<operand_type::vtype::dims, typename operand_type::vtype>, VECT_DCPSE> + vector_dist_expression_op<operand_type, Dcpse_type<operand_type::vtype::dims, typename operand_type::vtype>, VECT_DCPSE> operator()(operand_type arg) { - typedef Dcpse<operand_type::vtype::dims, typename operand_type::vtype> dcpse_type; + typedef Dcpse_type<operand_type::vtype::dims, typename operand_type::vtype> dcpse_type; return vector_dist_expression_op<operand_type, dcpse_type, VECT_DCPSE>(arg, *(dcpse_type *) dcpse); } template<typename particles_type> void checkMomenta(particles_type &particles) { - auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; dcpse_temp->checkMomenta(particles); } template<unsigned int prp, typename particles_type> void DrawKernel(particles_type &particles, int k) { - auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; dcpse_temp->template DrawKernel<prp>(particles, k); } + /*! \brief Method for Saving the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be saved. + */ + template<typename particles_type> + void save(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->save(file); + } + /*! \brief Method for Loading the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be loaded from. + */ + template<typename particles_type> + void load(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->load(file); + } /*! \brief Method for Updating the DCPSE Operator by recomputing DCPSE Kernels. * * @@ -1884,21 +2090,21 @@ public: */ template<typename particles_type> void update(particles_type &particles) { - auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; dcpse_temp->initializeUpdate(particles); } }; - -class Derivative_xxy { +template<template<unsigned int, typename, typename...> class Dcpse_type = Dcpse> +class Derivative_xxy_T { void *dcpse; public: template<typename particles_type> - Derivative_xxy(particles_type &parts, unsigned int ord, typename particles_type::stype rCut, + Derivative_xxy_T(particles_type &parts, unsigned int ord, typename particles_type::stype rCut, double oversampling_factor = dcpse_oversampling_factor, support_options opt = support_options::RADIUS) { Point<particles_type::dims, unsigned int> p; @@ -1906,32 +2112,52 @@ public: p.get(0) = 2; p.get(1) = 1; - dcpse = new Dcpse<particles_type::dims, particles_type>(parts, p, ord, rCut, oversampling_factor, opt); + dcpse = new Dcpse_type<particles_type::dims, particles_type>(parts, p, ord, rCut, oversampling_factor, opt); } template<typename operand_type> - vector_dist_expression_op<operand_type, Dcpse<operand_type::vtype::dims, typename operand_type::vtype>, VECT_DCPSE> + vector_dist_expression_op<operand_type, Dcpse_type<operand_type::vtype::dims, typename operand_type::vtype>, VECT_DCPSE> operator()(operand_type arg) { - typedef Dcpse<operand_type::vtype::dims, typename operand_type::vtype> dcpse_type; + typedef Dcpse_type<operand_type::vtype::dims, typename operand_type::vtype> dcpse_type; return vector_dist_expression_op<operand_type, dcpse_type, VECT_DCPSE>(arg, *(dcpse_type *) dcpse); } template<typename particles_type> void checkMomenta(particles_type &particles) { - auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; dcpse_temp->checkMomenta(particles); } template<unsigned int prp, typename particles_type> void DrawKernel(particles_type &particles, int k) { - auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; dcpse_temp->template DrawKernel<prp>(particles, k); } + /*! \brief Method for Saving the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be saved. + */ + template<typename particles_type> + void save(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->save(file); + } + /*! \brief Method for Loading the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be loaded from. + */ + template<typename particles_type> + void load(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->load(file); + } /*! \brief Method for Updating the DCPSE Operator by recomputing DCPSE Kernels. * * @@ -1939,21 +2165,21 @@ public: */ template<typename particles_type> void update(particles_type &particles) { - auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; dcpse_temp->initializeUpdate(particles); } }; - -class Derivative_yyx { +template<template<unsigned int, typename, typename...> class Dcpse_type = Dcpse> +class Derivative_yyx_T { void *dcpse; public: template<typename particles_type> - Derivative_yyx(particles_type &parts, unsigned int ord, typename particles_type::stype rCut, + Derivative_yyx_T(particles_type &parts, unsigned int ord, typename particles_type::stype rCut, double oversampling_factor = dcpse_oversampling_factor, support_options opt = support_options::RADIUS) { Point<particles_type::dims, unsigned int> p; @@ -1961,32 +2187,52 @@ public: p.get(0) = 1; p.get(1) = 2; - dcpse = new Dcpse<particles_type::dims, particles_type>(parts, p, ord, rCut, oversampling_factor, opt); + dcpse = new Dcpse_type<particles_type::dims, particles_type>(parts, p, ord, rCut, oversampling_factor, opt); } template<typename operand_type> - vector_dist_expression_op<operand_type, Dcpse<operand_type::vtype::dims, typename operand_type::vtype>, VECT_DCPSE> + vector_dist_expression_op<operand_type, Dcpse_type<operand_type::vtype::dims, typename operand_type::vtype>, VECT_DCPSE> operator()(operand_type arg) { - typedef Dcpse<operand_type::vtype::dims, typename operand_type::vtype> dcpse_type; + typedef Dcpse_type<operand_type::vtype::dims, typename operand_type::vtype> dcpse_type; return vector_dist_expression_op<operand_type, dcpse_type, VECT_DCPSE>(arg, *(dcpse_type *) dcpse); } template<typename particles_type> void checkMomenta(particles_type &particles) { - auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; dcpse_temp->checkMomenta(particles); } template<unsigned int prp, typename particles_type> void DrawKernel(particles_type &particles, int k) { - auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; dcpse_temp->template DrawKernel<prp>(particles, k); } + /*! \brief Method for Saving the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be saved. + */ + template<typename particles_type> + void save(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->save(file); + } + /*! \brief Method for Loading the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be loaded from. + */ + template<typename particles_type> + void load(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->load(file); + } /*! \brief Method for Updating the DCPSE Operator by recomputing DCPSE Kernels. * * @@ -1994,21 +2240,21 @@ public: */ template<typename particles_type> void update(particles_type &particles) { - auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; dcpse_temp->initializeUpdate(particles); } }; - -class Derivative_yyy { +template<template<unsigned int, typename, typename...> class Dcpse_type = Dcpse> +class Derivative_yyy_T { void *dcpse; public: template<typename particles_type> - Derivative_yyy(particles_type &parts, unsigned int ord, typename particles_type::stype rCut, + Derivative_yyy_T(particles_type &parts, unsigned int ord, typename particles_type::stype rCut, double oversampling_factor = dcpse_oversampling_factor, support_options opt = support_options::RADIUS) { Point<particles_type::dims, unsigned int> p; @@ -2016,32 +2262,52 @@ public: p.get(0) = 0; p.get(1) = 3; - dcpse = new Dcpse<particles_type::dims, particles_type>(parts, p, ord, rCut, oversampling_factor, opt); + dcpse = new Dcpse_type<particles_type::dims, particles_type>(parts, p, ord, rCut, oversampling_factor, opt); } template<typename operand_type> - vector_dist_expression_op<operand_type, Dcpse<operand_type::vtype::dims, typename operand_type::vtype>, VECT_DCPSE> + vector_dist_expression_op<operand_type, Dcpse_type<operand_type::vtype::dims, typename operand_type::vtype>, VECT_DCPSE> operator()(operand_type arg) { - typedef Dcpse<operand_type::vtype::dims, typename operand_type::vtype> dcpse_type; + typedef Dcpse_type<operand_type::vtype::dims, typename operand_type::vtype> dcpse_type; return vector_dist_expression_op<operand_type, dcpse_type, VECT_DCPSE>(arg, *(dcpse_type *) dcpse); } template<typename particles_type> void checkMomenta(particles_type &particles) { - auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; dcpse_temp->checkMomenta(particles); } template<unsigned int prp, typename particles_type> void DrawKernel(particles_type &particles, int k) { - auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; dcpse_temp->template DrawKernel<prp>(particles, k); } + /*! \brief Method for Saving the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be saved. + */ + template<typename particles_type> + void save(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->save(file); + } + /*! \brief Method for Loading the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be loaded from. + */ + template<typename particles_type> + void load(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->load(file); + } /*! \brief Method for Updating the DCPSE Operator by recomputing DCPSE Kernels. * * @@ -2049,20 +2315,355 @@ public: */ template<typename particles_type> void update(particles_type &particles) { - auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; dcpse_temp->initializeUpdate(particles); } }; +template<template<unsigned int, typename, typename...> class Dcpse_type = Dcpse> +class Derivative_xxxx_T { + + void *dcpse; + +public: + + template<typename particles_type> + Derivative_xxxx_T(particles_type &parts, unsigned int ord, typename particles_type::stype rCut, + double oversampling_factor = dcpse_oversampling_factor, + support_options opt = support_options::RADIUS) { + Point<particles_type::dims, unsigned int> p; + p.zero(); + p.get(0) = 4; + p.get(1) = 0; + + dcpse = new Dcpse_type<particles_type::dims, particles_type>(parts, p, ord, rCut, oversampling_factor, opt); + } + + template<typename operand_type> + + vector_dist_expression_op<operand_type, Dcpse_type<operand_type::vtype::dims, typename operand_type::vtype>, VECT_DCPSE> + operator()(operand_type arg) { + typedef Dcpse_type<operand_type::vtype::dims, typename operand_type::vtype> dcpse_type; + + return vector_dist_expression_op<operand_type, dcpse_type, VECT_DCPSE>(arg, *(dcpse_type *) dcpse); + } + + template<typename particles_type> + void checkMomenta(particles_type &particles) { + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->checkMomenta(particles); + + } + + template<unsigned int prp, typename particles_type> + void DrawKernel(particles_type &particles, int k) { + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->template DrawKernel<prp>(particles, k); + + } + + /*! \brief Method for Saving the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be saved. + */ + template<typename particles_type> + void save(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->save(file); + } + /*! \brief Method for Loading the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be loaded from. + */ + template<typename particles_type> + void load(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->load(file); + } + /*! \brief Method for Updating the DCPSE Operator by recomputing DCPSE Kernels. + * + * + * \param parts particle set + */ + template<typename particles_type> + void update(particles_type &particles) { + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->initializeUpdate(particles); + + } +}; + + +template<template<unsigned int, typename, typename...> class Dcpse_type = Dcpse> +class Derivative_yyyy_T { + + void *dcpse; + +public: + + template<typename particles_type> + Derivative_yyyy_T(particles_type &parts, unsigned int ord, typename particles_type::stype rCut, + double oversampling_factor = dcpse_oversampling_factor, + support_options opt = support_options::RADIUS) { + Point<particles_type::dims, unsigned int> p; + p.zero(); + p.get(0) = 0; + p.get(1) = 4; + + dcpse = new Dcpse_type<particles_type::dims, particles_type>(parts, p, ord, rCut, oversampling_factor, opt); + } + + template<typename operand_type> + + vector_dist_expression_op<operand_type, Dcpse_type<operand_type::vtype::dims, typename operand_type::vtype>, VECT_DCPSE> + operator()(operand_type arg) { + typedef Dcpse_type<operand_type::vtype::dims, typename operand_type::vtype> dcpse_type; + + return vector_dist_expression_op<operand_type, dcpse_type, VECT_DCPSE>(arg, *(dcpse_type *) dcpse); + } + + template<typename particles_type> + void checkMomenta(particles_type &particles) { + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->checkMomenta(particles); + + } + + template<unsigned int prp, typename particles_type> + void DrawKernel(particles_type &particles, int k) { + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->template DrawKernel<prp>(particles, k); + + } + + /*! \brief Method for Saving the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be saved. + */ + template<typename particles_type> + void save(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->save(file); + } + /*! \brief Method for Loading the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be loaded from. + */ + template<typename particles_type> + void load(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->load(file); + } + /*! \brief Method for Updating the DCPSE Operator by recomputing DCPSE Kernels. + * + * + * \param parts particle set + */ + template<typename particles_type> + void update(particles_type &particles) { + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->initializeUpdate(particles); + + } +}; + +template<template<unsigned int, typename, typename...> class Dcpse_type = Dcpse> +class Derivative_xxyy_T { + + void *dcpse; + +public: + + template<typename particles_type> + Derivative_xxyy_T(particles_type &parts, unsigned int ord, typename particles_type::stype rCut, + double oversampling_factor = dcpse_oversampling_factor, + support_options opt = support_options::RADIUS) { + Point<particles_type::dims, unsigned int> p; + p.zero(); + p.get(0) = 2; + p.get(1) = 2; + + dcpse = new Dcpse_type<particles_type::dims, particles_type>(parts, p, ord, rCut, oversampling_factor, opt); + } + + template<typename operand_type> + + vector_dist_expression_op<operand_type, Dcpse_type<operand_type::vtype::dims, typename operand_type::vtype>, VECT_DCPSE> + operator()(operand_type arg) { + typedef Dcpse_type<operand_type::vtype::dims, typename operand_type::vtype> dcpse_type; + + return vector_dist_expression_op<operand_type, dcpse_type, VECT_DCPSE>(arg, *(dcpse_type *) dcpse); + } + + template<typename particles_type> + void checkMomenta(particles_type &particles) { + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->checkMomenta(particles); + + } + + template<unsigned int prp, typename particles_type> + void DrawKernel(particles_type &particles, int k) { + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->template DrawKernel<prp>(particles, k); + + } + + /*! \brief Method for Saving the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be saved. + */ + template<typename particles_type> + void save(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->save(file); + } + /*! \brief Method for Loading the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be loaded from. + */ + template<typename particles_type> + void load(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->load(file); + } + /*! \brief Method for Updating the DCPSE Operator by recomputing DCPSE Kernels. + * + * + * \param parts particle set + */ + template<typename particles_type> + void update(particles_type &particles) { + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->initializeUpdate(particles); + + } +}; + + +template<template<unsigned int, typename, typename...> class Dcpse_type = Dcpse> +class Derivative_G_T { + + void *dcpse; + +public: + + template<typename particles_type> + Derivative_G_T(particles_type &parts, unsigned int ord, typename particles_type::stype rCut, + const Point<particles_type::dims, unsigned int> &p,double oversampling_factor = dcpse_oversampling_factor, + support_options opt = support_options::RADIUS) { + dcpse = new Dcpse_type<particles_type::dims, particles_type>(parts, p, ord, rCut, oversampling_factor, opt); + } + + template<typename operand_type> + vector_dist_expression_op<operand_type, Dcpse_type<operand_type::vtype::dims, typename operand_type::vtype>, VECT_DCPSE> + operator()(operand_type arg) { + typedef Dcpse_type<operand_type::vtype::dims, typename operand_type::vtype> dcpse_type; + + return vector_dist_expression_op<operand_type, dcpse_type, VECT_DCPSE>(arg, *(dcpse_type *) dcpse); + } + + template<typename particles_type> + void checkMomenta(particles_type &particles) { + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->checkMomenta(particles); + + } + + template<unsigned int prp, typename particles_type> + void DrawKernel(particles_type &particles, int k) { + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->template DrawKernel<prp>(particles, k); + + } + + /*! \brief Method for Saving the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be saved. + */ + template<typename particles_type> + void save(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->save(file); + } + /*! \brief Method for Loading the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be loaded from. + */ + template<typename particles_type> + void load(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->load(file); + } + /*! \brief Method for Updating the DCPSE Operator by recomputing DCPSE Kernels. + * + * + * \param parts particle set + */ + template<typename particles_type> + void update(particles_type &particles) { + auto dcpse_temp = (Dcpse_type<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->initializeUpdate(particles); + + } +}; -//template<typename operand_type1, typename operand_type2/*, typename sfinae=typename std::enable_if< -// std::is_same<typename operand_type1::it_is_a_node,int>::value -// >::type*/ > -//plus<operand_type1,operand_type2> operator+(const operand_type1 & op1, const operand_type2 & op2) -//{ -// return plus<operand_type1,operand_type2>(op1,op2); -//} -#endif /* Eigen */ +//typedef PPInterpolation_T<Dcpse> PPInterpolation; +typedef Derivative_x_T<Dcpse> Derivative_x; +typedef Derivative_y_T<Dcpse> Derivative_y; +typedef Derivative_z_T<Dcpse> Derivative_z; +typedef Gradient_T<Dcpse> Gradient; +typedef Curl2D_T<Dcpse> Curl2D; +typedef Laplacian_T<Dcpse> Laplacian; +typedef Divergence_T<Dcpse> Divergence; +typedef Advection_T<Dcpse> Advection; +typedef Derivative_xy_T<Dcpse> Derivative_xy; +typedef Derivative_yz_T<Dcpse> Derivative_yz; +typedef Derivative_xz_T<Dcpse> Derivative_xz; +typedef Derivative_xx_T<Dcpse> Derivative_xx; +typedef Derivative_yy_T<Dcpse> Derivative_yy; +typedef Derivative_zz_T<Dcpse> Derivative_zz; +typedef Derivative_xxx_T<Dcpse> Derivative_xxx; +typedef Derivative_xxy_T<Dcpse> Derivative_xxy; +typedef Derivative_yyx_T<Dcpse> Derivative_yyx; +typedef Derivative_yyy_T<Dcpse> Derivative_yyy; +typedef Derivative_xxxx_T<Dcpse> Derivative_xxxx; +typedef Derivative_yyyy_T<Dcpse> Derivative_yyyy; +typedef Derivative_xxyy_T<Dcpse> Derivative_xxyy; +typedef Derivative_G_T<Dcpse> Derivative_G; + + +#if defined(__NVCC__) +typedef Derivative_x_T<Dcpse_gpu> Derivative_x_gpu; +typedef Derivative_y_T<Dcpse_gpu> Derivative_y_gpu; +typedef Derivative_z_T<Dcpse_gpu> Derivative_z_gpu; +typedef Gradient_T<Dcpse_gpu> Gradient_gpu; +typedef Curl2D_T<Dcpse_gpu> Curl2D_gpu; +typedef Laplacian_T<Dcpse_gpu> Laplacian_gpu; +typedef Divergence_T<Dcpse_gpu> Divergence_gpu; +typedef Advection_T<Dcpse_gpu> Advection_gpu; +typedef Derivative_xy_T<Dcpse_gpu> Derivative_xy_gpu; +typedef Derivative_yz_T<Dcpse_gpu> Derivative_yz_gpu; +typedef Derivative_xz_T<Dcpse_gpu> Derivative_xz_gpu; +typedef Derivative_xx_T<Dcpse_gpu> Derivative_xx_gpu; +typedef Derivative_yy_T<Dcpse_gpu> Derivative_yy_gpu; +typedef Derivative_zz_T<Dcpse_gpu> Derivative_zz_gpu; +typedef Derivative_xxx_T<Dcpse_gpu> Derivative_xxx_gpu; +typedef Derivative_xxy_T<Dcpse_gpu> Derivative_xxy_gpu; +typedef Derivative_yyx_T<Dcpse_gpu> Derivative_yyx_gpu; +typedef Derivative_yyy_T<Dcpse_gpu> Derivative_yyy_gpu; +typedef Derivative_G_T<Dcpse_gpu> Derivative_G_gpu; +#endif + +#endif /*EIGEN */ #endif /* DCPSE_OP_HPP_ */ diff --git a/src/DCPSE/DCPSE_op/DCPSE_surface_op.hpp b/src/DCPSE/DCPSE_op/DCPSE_surface_op.hpp new file mode 100644 index 00000000..c2a11d4e --- /dev/null +++ b/src/DCPSE/DCPSE_op/DCPSE_surface_op.hpp @@ -0,0 +1,1008 @@ +// +// Created by Abhinav Singh on 15.11.21. +// + +#ifndef OPENFPM_PDATA_DCPSE_SURFACE_OP_HPP +#define OPENFPM_PDATA_DCPSE_SURFACE_OP_HPP +#ifdef HAVE_EIGEN + +#include "DCPSE/DCPSE_op/DCPSE_op.hpp" + +template<unsigned int NORMAL_ID> +class SurfaceDerivative_x { + + void *dcpse; + +public: + /*! \brief Class for Creating the DCPSE Operator Dxx and objects and computs DCPSE Kernels. + * + * + * \param parts particle set + * \param ord order of convergence of the operator + * \param rCut Argument for cell list construction + * \param oversampling_factor multiplier to the minimum no. of particles required by the operator in support + * \param support_options default:N_particles, Radius can be used to select all particles inside rCut. Overrides oversampling. + * + * \return Operator Dxx which is a function on Vector_dist_Expressions + * + */ + template<typename particles_type> + SurfaceDerivative_x(particles_type &parts, unsigned int ord, typename particles_type::stype rCut,typename particles_type::stype nSpacing, + support_options opt = support_options::RADIUS) { + Point<particles_type::dims, unsigned int> p; + p.zero(); + p.get(0) = 1; + + dcpse = new Dcpse<particles_type::dims, particles_type>(parts, p, ord, rCut,nSpacing,value_t<NORMAL_ID>(), opt); + } + + template<typename particles_type> + void deallocate(particles_type &parts) { + delete (Dcpse<particles_type::dims, particles_type> *) dcpse; + } + + template<typename operand_type> + vector_dist_expression_op<operand_type, Dcpse<operand_type::vtype::dims, typename operand_type::vtype>, VECT_DCPSE> + operator()(operand_type arg) { + typedef Dcpse<operand_type::vtype::dims, typename operand_type::vtype> dcpse_type; + + return vector_dist_expression_op<operand_type, dcpse_type, VECT_DCPSE>(arg, *(dcpse_type *) dcpse); + } + + template<typename particles_type> + void checkMomenta(particles_type &particles) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->checkMomenta(particles); + + } + + template<unsigned int prp, typename particles_type> + void DrawKernel(particles_type &particles, int k) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->template DrawKernel<prp>(particles, k); + + } + + /*! \brief Method for Saving the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be saved. + */ + template<typename particles_type> + void save(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->save(file); + } + /*! \brief Method for Loading the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be loaded from. + */ + template<typename particles_type> + void load(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->load(file); + } + + /*! \brief Method for Updating the DCPSE Operator by recomputing DCPSE Kernels. + * + * + * \param parts particle set + */ + template<typename particles_type> + void update(particles_type &particles) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->template createNormalParticles<NORMAL_ID>(particles); + dcpse_temp->initializeUpdate(particles); + dcpse_temp->accumulateAndDeleteNormalParticles(particles); + } +}; + +template<unsigned int NORMAL_ID> +class SurfaceDerivative_y { + + void *dcpse; + +public: + /*! \brief Class for Creating the DCPSE Operator Dxx and objects and computs DCPSE Kernels. + * + * + * \param parts particle set + * \param ord order of convergence of the operator + * \param rCut Argument for cell list construction + * \param oversampling_factor multiplier to the minimum no. of particles required by the operator in support + * \param support_options default:N_particles, Radius can be used to select all particles inside rCut. Overrides oversampling. + * + * \return Operator Dxx which is a function on Vector_dist_Expressions + * + */ + template<typename particles_type> + SurfaceDerivative_y(particles_type &parts, unsigned int ord, typename particles_type::stype rCut,typename particles_type::stype nSpacing, + support_options opt = support_options::RADIUS) { + Point<particles_type::dims, unsigned int> p; + p.zero(); + p.get(1) = 1; + + dcpse = new Dcpse<particles_type::dims, particles_type>(parts, p, ord, rCut,nSpacing,value_t<NORMAL_ID>(), opt); + } + + template<typename particles_type> + void deallocate(particles_type &parts) { + delete (Dcpse<particles_type::dims, particles_type> *) dcpse; + } + + template<typename operand_type> + vector_dist_expression_op<operand_type, Dcpse<operand_type::vtype::dims, typename operand_type::vtype>, VECT_DCPSE> + operator()(operand_type arg) { + typedef Dcpse<operand_type::vtype::dims, typename operand_type::vtype> dcpse_type; + + return vector_dist_expression_op<operand_type, dcpse_type, VECT_DCPSE>(arg, *(dcpse_type *) dcpse); + } + + template<typename particles_type> + void checkMomenta(particles_type &particles) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->checkMomenta(particles); + + } + + template<unsigned int prp, typename particles_type> + void DrawKernel(particles_type &particles, int k) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->template DrawKernel<prp>(particles, k); + + } + + + /*! \brief Method for Saving the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be saved. + */ + template<typename particles_type> + void save(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->save(file); + } + /*! \brief Method for Loading the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be loaded from. + */ + template<typename particles_type> + void load(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->load(file); + } + /*! \brief Method for Updating the DCPSE Operator by recomputing DCPSE Kernels. + * + * + * \param parts particle set + */ + template<typename particles_type> + void update(particles_type &particles) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->template createNormalParticles<NORMAL_ID>(particles); + dcpse_temp->initializeUpdate(particles); + dcpse_temp->accumulateAndDeleteNormalParticles(particles); + + } +}; + +template<unsigned int NORMAL_ID> +class SurfaceDerivative_z { + + void *dcpse; + +public: + /*! \brief Class for Creating the DCPSE Operator Dxx and objects and computs DCPSE Kernels. + * + * + * \param parts particle set + * \param ord order of convergence of the operator + * \param rCut Argument for cell list construction + * \param oversampling_factor multiplier to the minimum no. of particles required by the operator in support + * \param support_options default:N_particles, Radius can be used to select all particles inside rCut. Overrides oversampling. + * + * \return Operator Dxx which is a function on Vector_dist_Expressions + * + */ + template<typename particles_type> + SurfaceDerivative_z(particles_type &parts, unsigned int ord, typename particles_type::stype rCut,typename particles_type::stype nSpacing, + support_options opt = support_options::RADIUS) { + Point<particles_type::dims, unsigned int> p; + p.zero(); + p.get(2) = 1; + + dcpse = new Dcpse<particles_type::dims, particles_type>(parts, p, ord, rCut,nSpacing,value_t<NORMAL_ID>(), opt); + } + + template<typename particles_type> + void deallocate(particles_type &parts) { + delete (Dcpse<particles_type::dims, particles_type> *) dcpse; + } + + template<typename operand_type> + vector_dist_expression_op<operand_type, Dcpse<operand_type::vtype::dims, typename operand_type::vtype>, VECT_DCPSE> + operator()(operand_type arg) { + typedef Dcpse<operand_type::vtype::dims, typename operand_type::vtype> dcpse_type; + + return vector_dist_expression_op<operand_type, dcpse_type, VECT_DCPSE>(arg, *(dcpse_type *) dcpse); + } + + template<typename particles_type> + void checkMomenta(particles_type &particles) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->checkMomenta(particles); + + } + + template<unsigned int prp, typename particles_type> + void DrawKernel(particles_type &particles, int k) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->template DrawKernel<prp>(particles, k); + + } + + /*! \brief Method for Saving the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be saved. + */ + template<typename particles_type> + void save(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->save(file); + } + /*! \brief Method for Loading the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be loaded from. + */ + template<typename particles_type> + void load(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->load(file); + } + /*! \brief Method for Updating the DCPSE Operator by recomputing DCPSE Kernels. + * + * + * \param parts particle set + */ + template<typename particles_type> + void update(particles_type &particles) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->template createNormalParticles<NORMAL_ID>(particles); + dcpse_temp->initializeUpdate(particles); + dcpse_temp->accumulateAndDeleteNormalParticles(particles); + + } +}; + +template<unsigned int NORMAL_ID> +class Laplace_Beltrami { + + void *dcpse; + +public: + /*! \brief Class for Creating the DCPSE Operator Dxx and objects and computs DCPSE Kernels. + * + * + * \param parts particle set + * \param ord order of convergence of the operator + * \param rCut Argument for cell list construction + * \param oversampling_factor multiplier to the minimum no. of particles required by the operator in support + * \param support_options default:N_particles, Radius can be used to select all particles inside rCut. Overrides oversampling. + * + * \return Operator Dxx which is a function on Vector_dist_Expressions + * + */ + template<typename particles_type> + Laplace_Beltrami(particles_type &parts, unsigned int ord, typename particles_type::stype rCut,typename particles_type::stype nSpacing, + support_options opt = support_options::RADIUS) { + Point<particles_type::dims, unsigned int> p; + p.zero(); + p.get(0) = 2; + p.get(1) = 2; + p.get(2) = 2; + + dcpse = new Dcpse<particles_type::dims, particles_type>(parts, p, ord, rCut,nSpacing,value_t<NORMAL_ID>(), opt); + } + + template<typename particles_type> + void deallocate(particles_type &parts) { + delete (Dcpse<particles_type::dims, particles_type> *) dcpse; + } + + template<typename operand_type> + vector_dist_expression_op<operand_type, Dcpse<operand_type::vtype::dims, typename operand_type::vtype>, VECT_DCPSE> + operator()(operand_type arg) { + typedef Dcpse<operand_type::vtype::dims, typename operand_type::vtype> dcpse_type; + + return vector_dist_expression_op<operand_type, dcpse_type, VECT_DCPSE>(arg, *(dcpse_type *) dcpse); + } + + template<typename particles_type> + void checkMomenta(particles_type &particles) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->checkMomenta(particles); + + } + + template<unsigned int prp, typename particles_type> + void DrawKernel(particles_type &particles, int k) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->template DrawKernel<prp>(particles, k); + + } + + /*! \brief Method for Saving the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be saved. + */ + template<typename particles_type> + void save(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->save(file); + } + /*! \brief Method for Loading the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be loaded from. + */ + template<typename particles_type> + void load(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->load(file); + } + /*! \brief Method for Updating the DCPSE Operator by recomputing DCPSE Kernels. + * + * + * \param parts particle set + */ + template<typename particles_type> + void update(particles_type &particles) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->template createNormalParticles<NORMAL_ID>(particles); + dcpse_temp->template createNormalParticles<NORMAL_ID>(particles); + dcpse_temp->initializeUpdate(particles); + dcpse_temp->accumulateAndDeleteNormalParticles(particles); + } +}; + +template<unsigned int NORMAL_ID> +class SurfaceDerivative_xx { + + void *dcpse; + +public: + /*! \brief Class for Creating the DCPSE Operator Dxx and objects and computs DCPSE Kernels. + * + * + * \param parts particle set + * \param ord order of convergence of the operator + * \param rCut Argument for cell list construction + * \param oversampling_factor multiplier to the minimum no. of particles required by the operator in support + * \param support_options default:N_particles, Radius can be used to select all particles inside rCut. Overrides oversampling. + * + * \return Operator Dxx which is a function on Vector_dist_Expressions + * + */ + template<typename particles_type> + SurfaceDerivative_xx(particles_type &parts, unsigned int ord, typename particles_type::stype rCut,typename particles_type::stype nSpacing, + support_options opt = support_options::RADIUS) { + Point<particles_type::dims, unsigned int> p; + p.zero(); + p.get(0) = 2; + + dcpse = new Dcpse<particles_type::dims, particles_type>(parts, p, ord, rCut,nSpacing,value_t<NORMAL_ID>(), opt); + } + + template<typename particles_type> + void deallocate(particles_type &parts) { + delete (Dcpse<particles_type::dims, particles_type> *) dcpse; + } + + template<typename operand_type> + vector_dist_expression_op<operand_type, Dcpse<operand_type::vtype::dims, typename operand_type::vtype>, VECT_DCPSE> + operator()(operand_type arg) { + typedef Dcpse<operand_type::vtype::dims, typename operand_type::vtype> dcpse_type; + + return vector_dist_expression_op<operand_type, dcpse_type, VECT_DCPSE>(arg, *(dcpse_type *) dcpse); + } + + template<typename particles_type> + void checkMomenta(particles_type &particles) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->checkMomenta(particles); + + } + + template<unsigned int prp, typename particles_type> + void DrawKernel(particles_type &particles, int k) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->template DrawKernel<prp>(particles, k); + + } + + /*! \brief Method for Saving the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be saved. + */ + template<typename particles_type> + void save(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->save(file); + } + /*! \brief Method for Loading the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be loaded from. + */ + template<typename particles_type> + void load(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->load(file); + } + /*! \brief Method for Updating the DCPSE Operator by recomputing DCPSE Kernels. + * + * + * \param parts particle set + */ + template<typename particles_type> + void update(particles_type &particles) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->template createNormalParticles<NORMAL_ID>(particles); + dcpse_temp->template createNormalParticles<NORMAL_ID>(particles); + dcpse_temp->initializeUpdate(particles); + dcpse_temp->accumulateAndDeleteNormalParticles(particles); + } +}; + + +template<unsigned int NORMAL_ID> +class SurfaceDerivative_yy { + + void *dcpse; + +public: + /*! \brief Class for Creating the DCPSE Operator Dxx and objects and computs DCPSE Kernels. + * + * + * \param parts particle set + * \param ord order of convergence of the operator + * \param rCut Argument for cell list construction + * \param oversampling_factor multiplier to the minimum no. of particles required by the operator in support + * \param support_options default:N_particles, Radius can be used to select all particles inside rCut. Overrides oversampling. + * + * \return Operator Dxx which is a function on Vector_dist_Expressions + * + */ + template<typename particles_type> + SurfaceDerivative_yy(particles_type &parts, unsigned int ord, typename particles_type::stype rCut,typename particles_type::stype nSpacing, + support_options opt = support_options::RADIUS) { + Point<particles_type::dims, unsigned int> p; + p.zero(); + p.get(1) = 2; + + dcpse = new Dcpse<particles_type::dims, particles_type>(parts, p, ord, rCut,nSpacing,value_t<NORMAL_ID>(), opt); + } + + template<typename particles_type> + void deallocate(particles_type &parts) { + delete (Dcpse<particles_type::dims, particles_type> *) dcpse; + } + + template<typename operand_type> + vector_dist_expression_op<operand_type, Dcpse<operand_type::vtype::dims, typename operand_type::vtype>, VECT_DCPSE> + operator()(operand_type arg) { + typedef Dcpse<operand_type::vtype::dims, typename operand_type::vtype> dcpse_type; + + return vector_dist_expression_op<operand_type, dcpse_type, VECT_DCPSE>(arg, *(dcpse_type *) dcpse); + } + + template<typename particles_type> + void checkMomenta(particles_type &particles) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->checkMomenta(particles); + + } + + template<unsigned int prp, typename particles_type> + void DrawKernel(particles_type &particles, int k) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->template DrawKernel<prp>(particles, k); + + } + + /*! \brief Method for Saving the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be saved. + */ + template<typename particles_type> + void save(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->save(file); + } + /*! \brief Method for Loading the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be loaded from. + */ + template<typename particles_type> + void load(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->load(file); + } + /*! \brief Method for Updating the DCPSE Operator by recomputing DCPSE Kernels. + * + * + * \param parts particle set + */ + template<typename particles_type> + void update(particles_type &particles) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->template createNormalParticles<NORMAL_ID>(particles); + dcpse_temp->initializeUpdate(particles); + dcpse_temp->accumulateAndDeleteNormalParticles(particles); + + } +}; + +template<unsigned int NORMAL_ID> +class SurfaceDerivative_zz { + + void *dcpse; + +public: + /*! \brief Class for Creating the DCPSE Operator Dxx and objects and computs DCPSE Kernels. + * + * + * \param parts particle set + * \param ord order of convergence of the operator + * \param rCut Argument for cell list construction + * \param oversampling_factor multiplier to the minimum no. of particles required by the operator in support + * \param support_options default:N_particles, Radius can be used to select all particles inside rCut. Overrides oversampling. + * + * \return Operator Dxx which is a function on Vector_dist_Expressions + * + */ + template<typename particles_type> + SurfaceDerivative_zz(particles_type &parts, unsigned int ord, typename particles_type::stype rCut,typename particles_type::stype nSpacing, + support_options opt = support_options::RADIUS) { + Point<particles_type::dims, unsigned int> p; + p.zero(); + p.get(2) = 2; + + dcpse = new Dcpse<particles_type::dims, particles_type>(parts, p, ord, rCut,nSpacing,value_t<NORMAL_ID>(), opt); + } + + template<typename particles_type> + void deallocate(particles_type &parts) { + delete (Dcpse<particles_type::dims, particles_type> *) dcpse; + } + + template<typename operand_type> + vector_dist_expression_op<operand_type, Dcpse<operand_type::vtype::dims, typename operand_type::vtype>, VECT_DCPSE> + operator()(operand_type arg) { + typedef Dcpse<operand_type::vtype::dims, typename operand_type::vtype> dcpse_type; + + return vector_dist_expression_op<operand_type, dcpse_type, VECT_DCPSE>(arg, *(dcpse_type *) dcpse); + } + + template<typename particles_type> + void checkMomenta(particles_type &particles) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->checkMomenta(particles); + + } + + template<unsigned int prp, typename particles_type> + void DrawKernel(particles_type &particles, int k) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->template DrawKernel<prp>(particles, k); + + } + + /*! \brief Method for Saving the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be saved. + */ + template<typename particles_type> + void save(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->save(file); + } + /*! \brief Method for Loading the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be loaded from. + */ + template<typename particles_type> + void load(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->load(file); + } + /*! \brief Method for Updating the DCPSE Operator by recomputing DCPSE Kernels. + * + * + * \param parts particle set + */ + template<typename particles_type> + void update(particles_type &particles) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->template createNormalParticles<NORMAL_ID>(particles); + particles.write("With Normal"); + dcpse_temp->initializeUpdate(particles); + dcpse_temp->accumulateAndDeleteNormalParticles(particles); + + } +}; + +template<unsigned int NORMAL_ID> +class SurfaceDerivative_xy { + + void *dcpse; + +public: + /*! \brief Class for Creating the DCPSE Operator Dxx and objects and computs DCPSE Kernels. + * + * + * \param parts particle set + * \param ord order of convergence of the operator + * \param rCut Argument for cell list construction + * \param oversampling_factor multiplier to the minimum no. of particles required by the operator in support + * \param support_options default:N_particles, Radius can be used to select all particles inside rCut. Overrides oversampling. + * + * \return Operator Dxx which is a function on Vector_dist_Expressions + * + */ + template<typename particles_type> + SurfaceDerivative_xy(particles_type &parts, unsigned int ord, typename particles_type::stype rCut,typename particles_type::stype nSpacing, + support_options opt = support_options::RADIUS) { + Point<particles_type::dims, unsigned int> p; + p.zero(); + p.get(0) = 1; + p.get(1) = 1; + + dcpse = new Dcpse<particles_type::dims, particles_type>(parts, p, ord, rCut,nSpacing,value_t<NORMAL_ID>(), opt); + } + + template<typename particles_type> + void deallocate(particles_type &parts) { + delete (Dcpse<particles_type::dims, particles_type> *) dcpse; + } + + template<typename operand_type> + vector_dist_expression_op<operand_type, Dcpse<operand_type::vtype::dims, typename operand_type::vtype>, VECT_DCPSE> + operator()(operand_type arg) { + typedef Dcpse<operand_type::vtype::dims, typename operand_type::vtype> dcpse_type; + + return vector_dist_expression_op<operand_type, dcpse_type, VECT_DCPSE>(arg, *(dcpse_type *) dcpse); + } + + template<typename particles_type> + void checkMomenta(particles_type &particles) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->checkMomenta(particles); + + } + + template<unsigned int prp, typename particles_type> + void DrawKernel(particles_type &particles, int k) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->template DrawKernel<prp>(particles, k); + + } + + /*! \brief Method for Saving the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be saved. + */ + template<typename particles_type> + void save(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->save(file); + } + /*! \brief Method for Loading the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be loaded from. + */ + template<typename particles_type> + void load(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->load(file); + } + /*! \brief Method for Updating the DCPSE Operator by recomputing DCPSE Kernels. + * + * + * \param parts particle set + */ + template<typename particles_type> + void update(particles_type &particles) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->template createNormalParticles<NORMAL_ID>(particles); + dcpse_temp->initializeUpdate(particles); + dcpse_temp->accumulateAndDeleteNormalParticles(particles); + + } +}; + + +template<unsigned int NORMAL_ID> +class SurfaceDerivative_yz { + + void *dcpse; + +public: + /*! \brief Class for Creating the DCPSE Operator Dxx and objects and computs DCPSE Kernels. + * + * + * \param parts particle set + * \param ord order of convergence of the operator + * \param rCut Argument for cell list construction + * \param oversampling_factor multiplier to the minimum no. of particles required by the operator in support + * \param support_options default:N_particles, Radius can be used to select all particles inside rCut. Overrides oversampling. + * + * \return Operator Dxx which is a function on Vector_dist_Expressions + * + */ + template<typename particles_type> + SurfaceDerivative_yz(particles_type &parts, unsigned int ord, typename particles_type::stype rCut,typename particles_type::stype nSpacing, + support_options opt = support_options::RADIUS) { + Point<particles_type::dims, unsigned int> p; + p.zero(); + p.get(1) = 1; + p.get(2) = 1; + + dcpse = new Dcpse<particles_type::dims, particles_type>(parts, p, ord, rCut,nSpacing,value_t<NORMAL_ID>(), opt); + } + + template<typename particles_type> + void deallocate(particles_type &parts) { + delete (Dcpse<particles_type::dims, particles_type> *) dcpse; + } + + template<typename operand_type> + vector_dist_expression_op<operand_type, Dcpse<operand_type::vtype::dims, typename operand_type::vtype>, VECT_DCPSE> + operator()(operand_type arg) { + typedef Dcpse<operand_type::vtype::dims, typename operand_type::vtype> dcpse_type; + + return vector_dist_expression_op<operand_type, dcpse_type, VECT_DCPSE>(arg, *(dcpse_type *) dcpse); + } + + template<typename particles_type> + void checkMomenta(particles_type &particles) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->checkMomenta(particles); + + } + + template<unsigned int prp, typename particles_type> + void DrawKernel(particles_type &particles, int k) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->template DrawKernel<prp>(particles, k); + + } + + /*! \brief Method for Saving the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be saved. + */ + template<typename particles_type> + void save(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->save(file); + } + /*! \brief Method for Loading the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be loaded from. + */ + template<typename particles_type> + void load(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->load(file); + } + /*! \brief Method for Updating the DCPSE Operator by recomputing DCPSE Kernels. + * + * + * \param parts particle set + */ + template<typename particles_type> + void update(particles_type &particles) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->template createNormalParticles<NORMAL_ID>(particles); + dcpse_temp->initializeUpdate(particles); + dcpse_temp->accumulateAndDeleteNormalParticles(particles); + + } +}; + + +template<unsigned int NORMAL_ID> +class SurfaceDerivative_xz { + + void *dcpse; + +public: + /*! \brief Class for Creating the DCPSE Operator Dxx and objects and computs DCPSE Kernels. + * + * + * \param parts particle set + * \param ord order of convergence of the operator + * \param rCut Argument for cell list construction + * \param oversampling_factor multiplier to the minimum no. of particles required by the operator in support + * \param support_options default:N_particles, Radius can be used to select all particles inside rCut. Overrides oversampling. + * + * \return Operator Dxx which is a function on Vector_dist_Expressions + * + */ + template<typename particles_type> + SurfaceDerivative_xz(particles_type &parts, unsigned int ord, typename particles_type::stype rCut,typename particles_type::stype nSpacing, + support_options opt = support_options::RADIUS) { + Point<particles_type::dims, unsigned int> p; + p.zero(); + p.get(0) = 1; + p.get(2) = 1; + + dcpse = new Dcpse<particles_type::dims, particles_type>(parts, p, ord, rCut,nSpacing,value_t<NORMAL_ID>(), opt); + } + + template<typename particles_type> + void deallocate(particles_type &parts) { + delete (Dcpse<particles_type::dims, particles_type> *) dcpse; + } + + template<typename operand_type> + vector_dist_expression_op<operand_type, Dcpse<operand_type::vtype::dims, typename operand_type::vtype>, VECT_DCPSE> + operator()(operand_type arg) { + typedef Dcpse<operand_type::vtype::dims, typename operand_type::vtype> dcpse_type; + + return vector_dist_expression_op<operand_type, dcpse_type, VECT_DCPSE>(arg, *(dcpse_type *) dcpse); + } + + template<typename particles_type> + void checkMomenta(particles_type &particles) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->checkMomenta(particles); + + } + + template<unsigned int prp, typename particles_type> + void DrawKernel(particles_type &particles, int k) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->template DrawKernel<prp>(particles, k); + + } + + /*! \brief Method for Saving the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be saved. + */ + template<typename particles_type> + void save(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->save(file); + } + /*! \brief Method for Loading the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be loaded from. + */ + template<typename particles_type> + void load(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->load(file); + } + /*! \brief Method for Updating the DCPSE Operator by recomputing DCPSE Kernels. + * + * + * \param parts particle set + */ + template<typename particles_type> + void update(particles_type &particles) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->template createNormalParticles<NORMAL_ID>(particles); + dcpse_temp->initializeUpdate(particles); + dcpse_temp->accumulateAndDeleteNormalParticles(particles); + + } +}; + +template<unsigned int NORMAL_ID> +class SurfaceDerivative_G { + + void *dcpse; + +public: + /*! \brief Class for Creating the DCPSE Operator Dxx and objects and computs DCPSE Kernels. + * + * + * \param parts particle set + * \param ord order of convergence of the operator + * \param rCut Argument for cell list construction + * \param oversampling_factor multiplier to the minimum no. of particles required by the operator in support + * \param support_options default:N_particles, Radius can be used to select all particles inside rCut. Overrides oversampling. + * + * \return Operator Dxx which is a function on Vector_dist_Expressions + * + */ + template<typename particles_type> + SurfaceDerivative_G(particles_type &parts, unsigned int ord, typename particles_type::stype rCut,typename particles_type::stype nSpacing, + const Point<particles_type::dims, unsigned int> &p,support_options opt = support_options::RADIUS) { + + dcpse = new Dcpse<particles_type::dims, particles_type>(parts, p, ord, rCut,nSpacing,value_t<NORMAL_ID>(), opt); + } + + template<typename particles_type> + void deallocate(particles_type &parts) { + delete (Dcpse<particles_type::dims, particles_type> *) dcpse; + } + + template<typename operand_type> + vector_dist_expression_op<operand_type, Dcpse<operand_type::vtype::dims, typename operand_type::vtype>, VECT_DCPSE> + operator()(operand_type arg) { + typedef Dcpse<operand_type::vtype::dims, typename operand_type::vtype> dcpse_type; + + return vector_dist_expression_op<operand_type, dcpse_type, VECT_DCPSE>(arg, *(dcpse_type *) dcpse); + } + + template<typename particles_type> + void checkMomenta(particles_type &particles) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->checkMomenta(particles); + + } + + template<unsigned int prp, typename particles_type> + void DrawKernel(particles_type &particles, int k) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->template DrawKernel<prp>(particles, k); + + } + + /*! \brief Method for Saving the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be saved. + */ + template<typename particles_type> + void save(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->save(file); + } + /*! \brief Method for Loading the DCPSE Operator. + * + * \param parts particle set + * \param file name for data to be loaded from. + */ + template<typename particles_type> + void load(particles_type &particles, const std::string &file) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->load(file); + } + /*! \brief Method for Updating the DCPSE Operator by recomputing DCPSE Kernels. + * + * + * \param parts particle set + */ + template<typename particles_type> + void update(particles_type &particles) { + auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse; + dcpse_temp->template createNormalParticles<NORMAL_ID>(particles); + dcpse_temp->initializeUpdate(particles); + dcpse_temp->accumulateAndDeleteNormalParticles(particles); + + } +}; +#endif //Eigen +#endif //OPENFPM_PDATA_DCPSE_SURFACE_OP_HPP diff --git a/src/DCPSE/DCPSE_op/EqnsStruct.hpp b/src/DCPSE/DCPSE_op/EqnsStruct.hpp index 417216ac..6d08198f 100644 --- a/src/DCPSE/DCPSE_op/EqnsStruct.hpp +++ b/src/DCPSE/DCPSE_op/EqnsStruct.hpp @@ -9,6 +9,7 @@ #include "Solvers/umfpack_solver.hpp" #include "Solvers/petsc_solver.hpp" +#ifdef HAVE_PETSC //! Specify the general characteristic of system to solve struct equations2d1 { @@ -59,7 +60,6 @@ struct equations2d2 { typedef petsc_solver<double> solver_type; }; - struct equations2d1p { //! dimensionaly of the equation ( 3D problem ...) static const unsigned int dims = 2; @@ -181,7 +181,6 @@ struct equations2d4 { typedef petsc_solver<double> solver_type; }; - struct equations3d3 { //! dimensionaly of the equation ( 3D problem ...) static const unsigned int dims = 3; @@ -278,7 +277,7 @@ struct equations3d3Pyz { typedef petsc_solver<double> solver_type; }; -struct equations3d3EPxz { +struct equations3d3Pxz { //! dimensionaly of the equation ( 3D problem ...) static const unsigned int dims = 3; //! number of fields in the system @@ -287,29 +286,29 @@ struct equations3d3EPxz { //! boundary at X and Y static constexpr bool boundary[]={PERIODIC, NON_PERIODIC,PERIODIC}; - //! type of space float, double, ... + //! type of space float, double, .. typedef double stype; //! type of base particles typedef vector_dist<dims, double, aggregate<double>> b_part; //! type of SparseMatrix for the linear solver - typedef SparseMatrix<double, int, EIGEN_BASE> SparseMatrix_type; + typedef SparseMatrix<double, int, PETSC_BASE> SparseMatrix_type; //! type of Vector for the linear solver - typedef Vector<double> Vector_type; + typedef Vector<double, PETSC_BASE> Vector_type; - typedef umfpack_solver<double> solver_type; + typedef petsc_solver<double> solver_type; }; -struct equations3d3EPz { +struct equations3d1Pz { //! dimensionaly of the equation ( 3D problem ...) static const unsigned int dims = 3; //! number of fields in the system - static const unsigned int nvar = 3; + static const unsigned int nvar = 1; //! boundary at X and Y - static constexpr bool boundary[]={PERIODIC, PERIODIC,PERIODIC}; + static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC,PERIODIC}; //! type of space float, double, ... typedef double stype; @@ -318,28 +317,31 @@ struct equations3d3EPz { typedef vector_dist<dims, double, aggregate<double>> b_part; //! type of SparseMatrix for the linear solver - typedef SparseMatrix<double, int, EIGEN_BASE> SparseMatrix_type; + typedef SparseMatrix<double, int, PETSC_BASE> SparseMatrix_type; //! type of Vector for the linear solver - typedef Vector<double> Vector_type; + typedef Vector<double, PETSC_BASE> Vector_type; - typedef umfpack_solver<double> solver_type; + typedef petsc_solver<double> solver_type; }; -struct equations3d3Pxz { + +#ifdef __NVCC__ +struct equations2d1_gpu { + //! dimensionaly of the equation ( 3D problem ...) - static const unsigned int dims = 3; + static const unsigned int dims=2; //! number of fields in the system - static const unsigned int nvar = 3; + static const unsigned int nvar=1; //! boundary at X and Y - static constexpr bool boundary[]={PERIODIC, NON_PERIODIC,PERIODIC}; + static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC}; - //! type of space float, double, .. + //! type of space float, double, ... typedef double stype; //! type of base particles - typedef vector_dist<dims, double, aggregate<double>> b_part; + typedef vector_dist_gpu<dims, double, aggregate<double>> b_part; //! type of SparseMatrix for the linear solver typedef SparseMatrix<double, int, PETSC_BASE> SparseMatrix_type; @@ -350,20 +352,20 @@ struct equations3d3Pxz { typedef petsc_solver<double> solver_type; }; -struct equations3d1Pz { +struct equations2d2_gpu { //! dimensionaly of the equation ( 3D problem ...) - static const unsigned int dims = 3; + static const unsigned int dims = 2; //! number of fields in the system - static const unsigned int nvar = 1; + static const unsigned int nvar = 2; //! boundary at X and Y - static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC,PERIODIC}; + static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC}; //! type of space float, double, ... typedef double stype; //! type of base particles - typedef vector_dist<dims, double, aggregate<double>> b_part; + typedef vector_dist_gpu<dims, double, aggregate<double>> b_part; //! type of SparseMatrix for the linear solver typedef SparseMatrix<double, int, PETSC_BASE> SparseMatrix_type; @@ -374,58 +376,79 @@ struct equations3d1Pz { typedef petsc_solver<double> solver_type; }; +struct equations2d1p_gpu { + //! dimensionaly of the equation ( 3D problem ...) + static const unsigned int dims = 2; + //! number of fields in the system + static const unsigned int nvar = 1; + + //! boundary at X and Y + static constexpr bool boundary[]={PERIODIC, PERIODIC}; -//! Specify the general characteristic of system to solve -struct equations2d1E { + //! type of space float, double, ... + typedef double stype; + + //! type of base particles + typedef vector_dist_gpu<dims, double, aggregate<double>> b_part; + + //! type of SparseMatrix for the linear solver + typedef SparseMatrix<double, int, PETSC_BASE> SparseMatrix_type; + + //! type of Vector for the linear solver + typedef Vector<double, PETSC_BASE> Vector_type; + + typedef petsc_solver<double> solver_type; +}; +struct equations2d2p_gpu { //! dimensionaly of the equation ( 3D problem ...) - static const unsigned int dims=2; + static const unsigned int dims = 2; //! number of fields in the system - static const unsigned int nvar=1; + static const unsigned int nvar = 2; //! boundary at X and Y - static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC}; + static constexpr bool boundary[]={PERIODIC, PERIODIC}; //! type of space float, double, ... typedef double stype; //! type of base particles - typedef vector_dist<dims, double, aggregate<double>> b_part; + typedef vector_dist_gpu<dims, double, aggregate<double>> b_part; //! type of SparseMatrix for the linear solver - typedef SparseMatrix<double, int, EIGEN_BASE> SparseMatrix_type; + typedef SparseMatrix<double, int, PETSC_BASE> SparseMatrix_type; //! type of Vector for the linear solver - typedef Vector<double> Vector_type; + typedef Vector<double, PETSC_BASE> Vector_type; - typedef umfpack_solver<double> solver_type; + typedef petsc_solver<double> solver_type; }; -struct equations2d2E { +struct equations2d3p_gpu { //! dimensionaly of the equation ( 3D problem ...) static const unsigned int dims = 2; //! number of fields in the system - static const unsigned int nvar = 2; + static const unsigned int nvar = 3; //! boundary at X and Y - static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC}; + static constexpr bool boundary[]={PERIODIC, PERIODIC}; //! type of space float, double, ... typedef double stype; //! type of base particles - typedef vector_dist<dims, double, aggregate<double>> b_part; + typedef vector_dist_gpu<dims, double, aggregate<double>> b_part; //! type of SparseMatrix for the linear solver - typedef SparseMatrix<double, int, EIGEN_BASE> SparseMatrix_type; + typedef SparseMatrix<double, int, PETSC_BASE> SparseMatrix_type; //! type of Vector for the linear solver - typedef Vector<double> Vector_type; + typedef Vector<double, PETSC_BASE> Vector_type; - typedef umfpack_solver<double> solver_type; + typedef petsc_solver<double> solver_type; }; -struct equations2d3E { +struct equations2d3_gpu { //! dimensionaly of the equation ( 3D problem ...) static const unsigned int dims = 2; //! number of fields in the system @@ -438,18 +461,18 @@ struct equations2d3E { typedef double stype; //! type of base particles - typedef vector_dist<dims, double, aggregate<double>> b_part; + typedef vector_dist_gpu<dims, double, aggregate<double>> b_part; //! type of SparseMatrix for the linear solver - typedef SparseMatrix<double, int, EIGEN_BASE> SparseMatrix_type; + typedef SparseMatrix<double, int, PETSC_BASE> SparseMatrix_type; //! type of Vector for the linear solver - typedef Vector<double> Vector_type; + typedef Vector<double, PETSC_BASE> Vector_type; - typedef umfpack_solver<double> solver_type; + typedef petsc_solver<double> solver_type; }; -struct equations2d4E { +struct equations2d4_gpu { //! dimensionaly of the equation ( 3D problem ...) static const unsigned int dims = 2; //! number of fields in the system @@ -462,50 +485,145 @@ struct equations2d4E { typedef double stype; //! type of base particles - typedef vector_dist<dims, double, aggregate<double>> b_part; + typedef vector_dist_gpu<dims, double, aggregate<double>> b_part; //! type of SparseMatrix for the linear solver - typedef SparseMatrix<double, int, EIGEN_BASE> SparseMatrix_type; + typedef SparseMatrix<double, int, PETSC_BASE> SparseMatrix_type; //! type of Vector for the linear solver - typedef Vector<double> Vector_type; + typedef Vector<double, PETSC_BASE> Vector_type; - typedef umfpack_solver<double> solver_type; + typedef petsc_solver<double> solver_type; }; +struct equations3d3_gpu { + //! dimensionaly of the equation ( 3D problem ...) + static const unsigned int dims = 3; + //! number of fields in the system + static const unsigned int nvar = 3; -struct equations2d1pE { + //! boundary at X and Y + static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC,NON_PERIODIC}; + + //! type of space float, double, .. + typedef double stype; + + //! type of base particles + typedef vector_dist_gpu<dims, double, aggregate<double>> b_part; + + //! type of SparseMatrix for the linear solver + typedef SparseMatrix<double, int, PETSC_BASE> SparseMatrix_type; + + //! type of Vector for the linear solver + typedef Vector<double, PETSC_BASE> Vector_type; + + typedef petsc_solver<double> solver_type; +}; + +struct equations3d1_gpu { //! dimensionaly of the equation ( 3D problem ...) - static const unsigned int dims = 2; + static const unsigned int dims = 3; //! number of fields in the system static const unsigned int nvar = 1; //! boundary at X and Y - static constexpr bool boundary[]={PERIODIC, PERIODIC}; + static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC,NON_PERIODIC}; //! type of space float, double, ... typedef double stype; + //! type of base particles + typedef vector_dist_gpu<dims, double, aggregate<double>> b_part; + + //! type of SparseMatrix for the linear solver + typedef SparseMatrix<double, int, PETSC_BASE> SparseMatrix_type; + + //! type of Vector for the linear solver + typedef Vector<double, PETSC_BASE> Vector_type; + + typedef petsc_solver<double> solver_type; +}; + +struct equations3d3Pz_gpu { + //! dimensionaly of the equation ( 3D problem ...) + static const unsigned int dims = 3; + //! number of fields in the system + static const unsigned int nvar = 3; + + //! boundary at X and Y + static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC,PERIODIC}; + + //! type of space float, double, .. + typedef double stype; + + //! type of base particles + typedef vector_dist_gpu<dims, double, aggregate<double>> b_part; + + //! type of SparseMatrix for the linear solver + typedef SparseMatrix<double, int, PETSC_BASE> SparseMatrix_type; + + //! type of Vector for the linear solver + typedef Vector<double, PETSC_BASE> Vector_type; + + typedef petsc_solver<double> solver_type; +}; + +struct equations3d3Pyz_gpu { + //! dimensionaly of the equation ( 3D problem ...) + static const unsigned int dims = 3; + //! number of fields in the system + static const unsigned int nvar = 3; + + //! boundary at X and Y + static constexpr bool boundary[]={NON_PERIODIC, PERIODIC,PERIODIC}; + + //! type of space float, double, .. + typedef double stype; + + //! type of base particles + typedef vector_dist_gpu<dims, double, aggregate<double>> b_part; + + //! type of SparseMatrix for the linear solver + typedef SparseMatrix<double, int, PETSC_BASE> SparseMatrix_type; + + //! type of Vector for the linear solver + typedef Vector<double, PETSC_BASE> Vector_type; + + typedef petsc_solver<double> solver_type; +}; + +struct equations3d3Pxz_gpu { + //! dimensionaly of the equation ( 3D problem ...) + static const unsigned int dims = 3; + //! number of fields in the system + static const unsigned int nvar = 3; + + //! boundary at X and Y + static constexpr bool boundary[]={PERIODIC, NON_PERIODIC,PERIODIC}; + + //! type of space float, double, .. + typedef double stype; + //! type of base particles typedef vector_dist<dims, double, aggregate<double>> b_part; //! type of SparseMatrix for the linear solver - typedef SparseMatrix<double, int, EIGEN_BASE> SparseMatrix_type; + typedef SparseMatrix<double, int, PETSC_BASE> SparseMatrix_type; //! type of Vector for the linear solver - typedef Vector<double> Vector_type; + typedef Vector<double, PETSC_BASE> Vector_type; - typedef umfpack_solver<double> solver_type; + typedef petsc_solver<double> solver_type; }; -struct equations2d2pE { +struct equations3d1Pz_gpu { //! dimensionaly of the equation ( 3D problem ...) - static const unsigned int dims = 2; + static const unsigned int dims = 3; //! number of fields in the system - static const unsigned int nvar = 2; + static const unsigned int nvar = 1; //! boundary at X and Y - static constexpr bool boundary[]={PERIODIC, PERIODIC}; + static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC,PERIODIC}; //! type of space float, double, ... typedef double stype; @@ -514,22 +632,28 @@ struct equations2d2pE { typedef vector_dist<dims, double, aggregate<double>> b_part; //! type of SparseMatrix for the linear solver - typedef SparseMatrix<double, int, EIGEN_BASE> SparseMatrix_type; + typedef SparseMatrix<double, int, PETSC_BASE> SparseMatrix_type; //! type of Vector for the linear solver - typedef Vector<double> Vector_type; + typedef Vector<double, PETSC_BASE> Vector_type; - typedef umfpack_solver<double> solver_type; + typedef petsc_solver<double> solver_type; }; +#endif //__NVCC__ + +#endif //HAVE_PETSC + + +//! Specify the general characteristic of system to solve +struct equations2d1E { -struct equations2d3pE { //! dimensionaly of the equation ( 3D problem ...) - static const unsigned int dims = 2; + static const unsigned int dims=2; //! number of fields in the system - static const unsigned int nvar = 3; + static const unsigned int nvar=1; //! boundary at X and Y - static constexpr bool boundary[]={PERIODIC, PERIODIC}; + static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC}; //! type of space float, double, ... typedef double stype; @@ -546,16 +670,16 @@ struct equations2d3pE { typedef umfpack_solver<double> solver_type; }; -struct equations3d3E { +struct equations2d2E { //! dimensionaly of the equation ( 3D problem ...) - static const unsigned int dims = 3; + static const unsigned int dims = 2; //! number of fields in the system - static const unsigned int nvar = 3; + static const unsigned int nvar = 2; //! boundary at X and Y - static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC,NON_PERIODIC}; + static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC}; - //! type of space float, double, .. + //! type of space float, double, ... typedef double stype; //! type of base particles @@ -570,14 +694,14 @@ struct equations3d3E { typedef umfpack_solver<double> solver_type; }; -struct equations3d1E { +struct equations2d3E { //! dimensionaly of the equation ( 3D problem ...) - static const unsigned int dims = 3; + static const unsigned int dims = 2; //! number of fields in the system - static const unsigned int nvar = 1; + static const unsigned int nvar = 3; //! boundary at X and Y - static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC,NON_PERIODIC}; + static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC}; //! type of space float, double, ... typedef double stype; @@ -594,5 +718,467 @@ struct equations3d1E { typedef umfpack_solver<double> solver_type; }; +struct equations2d4E { + //! dimensionaly of the equation ( 3D problem ...) + static const unsigned int dims = 2; + //! number of fields in the system + static const unsigned int nvar = 4; + + //! boundary at X and Y + static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC}; + + //! type of space float, double, ... + typedef double stype; + + //! type of base particles + typedef vector_dist<dims, double, aggregate<double>> b_part; + + //! type of SparseMatrix for the linear solver + typedef SparseMatrix<double, int, EIGEN_BASE> SparseMatrix_type; + + //! type of Vector for the linear solver + typedef Vector<double> Vector_type; + + typedef umfpack_solver<double> solver_type; +}; + + +struct equations2d1pE { + //! dimensionaly of the equation ( 3D problem ...) + static const unsigned int dims = 2; + //! number of fields in the system + static const unsigned int nvar = 1; + + //! boundary at X and Y + static constexpr bool boundary[]={PERIODIC, PERIODIC}; + + //! type of space float, double, ... + typedef double stype; + + //! type of base particles + typedef vector_dist<dims, double, aggregate<double>> b_part; + + //! type of SparseMatrix for the linear solver + typedef SparseMatrix<double, int, EIGEN_BASE> SparseMatrix_type; + + //! type of Vector for the linear solver + typedef Vector<double> Vector_type; + + typedef umfpack_solver<double> solver_type; +}; + +struct equations2d2pE { + //! dimensionaly of the equation ( 3D problem ...) + static const unsigned int dims = 2; + //! number of fields in the system + static const unsigned int nvar = 2; + + //! boundary at X and Y + static constexpr bool boundary[]={PERIODIC, PERIODIC}; + + //! type of space float, double, ... + typedef double stype; + + //! type of base particles + typedef vector_dist<dims, double, aggregate<double>> b_part; + + //! type of SparseMatrix for the linear solver + typedef SparseMatrix<double, int, EIGEN_BASE> SparseMatrix_type; + + //! type of Vector for the linear solver + typedef Vector<double> Vector_type; + + typedef umfpack_solver<double> solver_type; +}; + +struct equations2d3pE { + //! dimensionaly of the equation ( 3D problem ...) + static const unsigned int dims = 2; + //! number of fields in the system + static const unsigned int nvar = 3; + + //! boundary at X and Y + static constexpr bool boundary[]={PERIODIC, PERIODIC}; + + //! type of space float, double, ... + typedef double stype; + + //! type of base particles + typedef vector_dist<dims, double, aggregate<double>> b_part; + + //! type of SparseMatrix for the linear solver + typedef SparseMatrix<double, int, EIGEN_BASE> SparseMatrix_type; + + //! type of Vector for the linear solver + typedef Vector<double> Vector_type; + + typedef umfpack_solver<double> solver_type; +}; + +struct equations3d3E { + //! dimensionaly of the equation ( 3D problem ...) + static const unsigned int dims = 3; + //! number of fields in the system + static const unsigned int nvar = 3; + + //! boundary at X and Y + static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC,NON_PERIODIC}; + + //! type of space float, double, .. + typedef double stype; + + //! type of base particles + typedef vector_dist<dims, double, aggregate<double>> b_part; + + //! type of SparseMatrix for the linear solver + typedef SparseMatrix<double, int, EIGEN_BASE> SparseMatrix_type; + + //! type of Vector for the linear solver + typedef Vector<double> Vector_type; + + typedef umfpack_solver<double> solver_type; +}; + +struct equations3d1E { + //! dimensionaly of the equation ( 3D problem ...) + static const unsigned int dims = 3; + //! number of fields in the system + static const unsigned int nvar = 1; + + //! boundary at X and Y + static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC,NON_PERIODIC}; + + //! type of space float, double, ... + typedef double stype; + + //! type of base particles + typedef vector_dist<dims, double, aggregate<double>> b_part; + + //! type of SparseMatrix for the linear solver + typedef SparseMatrix<double, int, EIGEN_BASE> SparseMatrix_type; + + //! type of Vector for the linear solver + typedef Vector<double> Vector_type; + + typedef umfpack_solver<double> solver_type; +}; + + +struct equations3d3EPxz { + //! dimensionaly of the equation ( 3D problem ...) + static const unsigned int dims = 3; + //! number of fields in the system + static const unsigned int nvar = 3; + + //! boundary at X and Y + static constexpr bool boundary[]={PERIODIC, NON_PERIODIC,PERIODIC}; + + //! type of space float, double, ... + typedef double stype; + + //! type of base particles + typedef vector_dist<dims, double, aggregate<double>> b_part; + + //! type of SparseMatrix for the linear solver + typedef SparseMatrix<double, int, EIGEN_BASE> SparseMatrix_type; + + //! type of Vector for the linear solver + typedef Vector<double> Vector_type; + + typedef umfpack_solver<double> solver_type; +}; + +struct equations3d3EPz { + //! dimensionaly of the equation ( 3D problem ...) + static const unsigned int dims = 3; + //! number of fields in the system + static const unsigned int nvar = 3; + + //! boundary at X and Y + static constexpr bool boundary[]={PERIODIC, PERIODIC,PERIODIC}; + + //! type of space float, double, ... + typedef double stype; + + //! type of base particles + typedef vector_dist<dims, double, aggregate<double>> b_part; + + //! type of SparseMatrix for the linear solver + typedef SparseMatrix<double, int, EIGEN_BASE> SparseMatrix_type; + + //! type of Vector for the linear solver + typedef Vector<double> Vector_type; + + typedef umfpack_solver<double> solver_type; +}; + + +#ifdef __NVCC__ +struct equations2d1E_gpu { + + //! dimensionaly of the equation ( 3D problem ...) + static const unsigned int dims=2; + //! number of fields in the system + static const unsigned int nvar=1; + + //! boundary at X and Y + static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC}; + + //! type of space float, double, ... + typedef double stype; + + //! type of base particles + typedef vector_dist_gpu<dims, double, aggregate<double>> b_part; + + //! type of SparseMatrix for the linear solver + typedef SparseMatrix<double, int, EIGEN_BASE> SparseMatrix_type; + + //! type of Vector for the linear solver + typedef Vector<double> Vector_type; + + typedef umfpack_solver<double> solver_type; +}; + +struct equations2d2E_gpu { + //! dimensionaly of the equation ( 3D problem ...) + static const unsigned int dims = 2; + //! number of fields in the system + static const unsigned int nvar = 2; + + //! boundary at X and Y + static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC}; + + //! type of space float, double, ... + typedef double stype; + + //! type of base particles + typedef vector_dist_gpu<dims, double, aggregate<double>> b_part; + + //! type of SparseMatrix for the linear solver + typedef SparseMatrix<double, int, EIGEN_BASE> SparseMatrix_type; + + //! type of Vector for the linear solver + typedef Vector<double> Vector_type; + + typedef umfpack_solver<double> solver_type; +}; + +struct equations2d3E_gpu { + //! dimensionaly of the equation ( 3D problem ...) + static const unsigned int dims = 2; + //! number of fields in the system + static const unsigned int nvar = 3; + + //! boundary at X and Y + static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC}; + + //! type of space float, double, ... + typedef double stype; + + //! type of base particles + typedef vector_dist_gpu<dims, double, aggregate<double>> b_part; + + //! type of SparseMatrix for the linear solver + typedef SparseMatrix<double, int, EIGEN_BASE> SparseMatrix_type; + + //! type of Vector for the linear solver + typedef Vector<double> Vector_type; + + typedef umfpack_solver<double> solver_type; +}; + +struct equations2d4E_gpu { + //! dimensionaly of the equation ( 3D problem ...) + static const unsigned int dims = 2; + //! number of fields in the system + static const unsigned int nvar = 4; + + //! boundary at X and Y + static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC}; + + //! type of space float, double, ... + typedef double stype; + + //! type of base particles + typedef vector_dist_gpu<dims, double, aggregate<double>> b_part; + + //! type of SparseMatrix for the linear solver + typedef SparseMatrix<double, int, EIGEN_BASE> SparseMatrix_type; + + //! type of Vector for the linear solver + typedef Vector<double> Vector_type; + + typedef umfpack_solver<double> solver_type; +}; + + +struct equations2d1pE_gpu { + //! dimensionaly of the equation ( 3D problem ...) + static const unsigned int dims = 2; + //! number of fields in the system + static const unsigned int nvar = 1; + + //! boundary at X and Y + static constexpr bool boundary[]={PERIODIC, PERIODIC}; + + //! type of space float, double, ... + typedef double stype; + + //! type of base particles + typedef vector_dist_gpu<dims, double, aggregate<double>> b_part; + + //! type of SparseMatrix for the linear solver + typedef SparseMatrix<double, int, EIGEN_BASE> SparseMatrix_type; + + //! type of Vector for the linear solver + typedef Vector<double> Vector_type; + + typedef umfpack_solver<double> solver_type; +}; + +struct equations2d2pE_gpu { + //! dimensionaly of the equation ( 3D problem ...) + static const unsigned int dims = 2; + //! number of fields in the system + static const unsigned int nvar = 2; + + //! boundary at X and Y + static constexpr bool boundary[]={PERIODIC, PERIODIC}; + + //! type of space float, double, ... + typedef double stype; + + //! type of base particles + typedef vector_dist_gpu<dims, double, aggregate<double>> b_part; + + //! type of SparseMatrix for the linear solver + typedef SparseMatrix<double, int, EIGEN_BASE> SparseMatrix_type; + + //! type of Vector for the linear solver + typedef Vector<double> Vector_type; + + typedef umfpack_solver<double> solver_type; +}; + +struct equations2d3pE_gpu { + //! dimensionaly of the equation ( 3D problem ...) + static const unsigned int dims = 2; + //! number of fields in the system + static const unsigned int nvar = 3; + + //! boundary at X and Y + static constexpr bool boundary[]={PERIODIC, PERIODIC}; + + //! type of space float, double, ... + typedef double stype; + + //! type of base particles + typedef vector_dist_gpu<dims, double, aggregate<double>> b_part; + + //! type of SparseMatrix for the linear solver + typedef SparseMatrix<double, int, EIGEN_BASE> SparseMatrix_type; + + //! type of Vector for the linear solver + typedef Vector<double> Vector_type; + + typedef umfpack_solver<double> solver_type; +}; + +struct equations3d3E_gpu { + //! dimensionaly of the equation ( 3D problem ...) + static const unsigned int dims = 3; + //! number of fields in the system + static const unsigned int nvar = 3; + + //! boundary at X and Y + static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC,NON_PERIODIC}; + + //! type of space float, double, .. + typedef double stype; + + //! type of base particles + typedef vector_dist_gpu<dims, double, aggregate<double>> b_part; + + //! type of SparseMatrix for the linear solver + typedef SparseMatrix<double, int, EIGEN_BASE> SparseMatrix_type; + + //! type of Vector for the linear solver + typedef Vector<double> Vector_type; + + typedef umfpack_solver<double> solver_type; +}; + +struct equations3d1E_gpu { + //! dimensionaly of the equation ( 3D problem ...) + static const unsigned int dims = 3; + //! number of fields in the system + static const unsigned int nvar = 1; + + //! boundary at X and Y + static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC,NON_PERIODIC}; + + //! type of space float, double, ... + typedef double stype; + + //! type of base particles + typedef vector_dist_gpu<dims, double, aggregate<double>> b_part; + + //! type of SparseMatrix for the linear solver + typedef SparseMatrix<double, int, EIGEN_BASE> SparseMatrix_type; + + //! type of Vector for the linear solver + typedef Vector<double> Vector_type; + + typedef umfpack_solver<double> solver_type; +}; + +struct equations3d3EPxz_gpu { + //! dimensionaly of the equation ( 3D problem ...) + static const unsigned int dims = 3; + //! number of fields in the system + static const unsigned int nvar = 3; + + //! boundary at X and Y + static constexpr bool boundary[]={PERIODIC, NON_PERIODIC,PERIODIC}; + + //! type of space float, double, ... + typedef double stype; + + //! type of base particles + typedef vector_dist_gpu<dims, double, aggregate<double>> b_part; + + //! type of SparseMatrix for the linear solver + typedef SparseMatrix<double, int, EIGEN_BASE> SparseMatrix_type; + + //! type of Vector for the linear solver + typedef Vector<double> Vector_type; + + typedef umfpack_solver<double> solver_type; +}; + +struct equations3d3EPz_gpu { + //! dimensionaly of the equation ( 3D problem ...) + static const unsigned int dims = 3; + //! number of fields in the system + static const unsigned int nvar = 3; + + //! boundary at X and Y + static constexpr bool boundary[]={PERIODIC, PERIODIC,PERIODIC}; + + //! type of space float, double, ... + typedef double stype; + + //! type of base particles + typedef vector_dist_gpu<dims, double, aggregate<double>> b_part; + + //! type of SparseMatrix for the linear solver + typedef SparseMatrix<double, int, EIGEN_BASE> SparseMatrix_type; + + //! type of Vector for the linear solver + typedef Vector<double> Vector_type; + + typedef umfpack_solver<double> solver_type; +}; +#endif //__NVCC__ #endif //OPENFPM_PDATA_EQNSSTRUCT_HPP diff --git a/src/DCPSE/DCPSE_op/tests/DCPSE_op_Solver_test.cpp b/src/DCPSE/DCPSE_op/tests/DCPSE_op_Solver_test.cpp index a41ce710..133849d9 100644 --- a/src/DCPSE/DCPSE_op/tests/DCPSE_op_Solver_test.cpp +++ b/src/DCPSE/DCPSE_op/tests/DCPSE_op_Solver_test.cpp @@ -1001,6 +1001,24 @@ BOOST_AUTO_TEST_SUITE(dcpse_op_suite_tests) Solver.impose(-D_y, dw_p, prop_id<1>()); Solver.impose(-D_x, l_p, prop_id<1>()); Solver.impose(D_x, r_p, prop_id<1>()); + + Solver.reset_b(); + Solver.impose_b(bulk, prop_id<1>()); + Solver.impose_b(up_p, prop_id<1>()); + Solver.impose_b(dw_p, prop_id<1>()); + Solver.impose_b(l_p, prop_id<1>()); + Solver.impose_b(r_p, prop_id<1>()); + + Solver.solve_with_solver(solver,sol); + + + Solver.reset_b(); + Solver.impose_b(bulk, prop_id<1>()); + Solver.impose_b(up_p, prop_id<1>()); + Solver.impose_b(dw_p, prop_id<1>()); + Solver.impose_b(l_p, prop_id<1>()); + Solver.impose_b(r_p, prop_id<1>()); + Solver.solve_with_solver(solver,sol); // Solver.solve(sol); @@ -1022,6 +1040,174 @@ BOOST_AUTO_TEST_SUITE(dcpse_op_suite_tests) //domain.write("Neumann"); } + BOOST_AUTO_TEST_CASE(dcpse_poisson_Neumann2d) { + const size_t sz[2] = {31,31}; + Box<2, double> box({0, 0}, {1.0, 1.0}); + size_t bc[2] = {NON_PERIODIC, NON_PERIODIC}; + double spacing = box.getHigh(0) / (sz[0] - 1); + double rCut = 3.1 * spacing; + Ghost<2, double> ghost(spacing * 3.1); + BOOST_TEST_MESSAGE("Init vector_dist..."); + + vector_dist<2, double, aggregate<double[2],double[2],double[2],double[2],double[2]>> domain(0, box, bc, ghost); + + + //Init_DCPSE(domain) + BOOST_TEST_MESSAGE("Init domain..."); + + auto it = domain.getGridIterator(sz); + while (it.isNext()) { + + domain.add(); + + auto key = it.get(); + double x = key.get(0) * it.getSpacing(0); + domain.getLastPos()[0] = x; + double y = key.get(1) * it.getSpacing(1); + domain.getLastPos()[1] = y; + + ++it; + } + BOOST_TEST_MESSAGE("Sync domain across processors..."); + + domain.map(); + domain.ghost_get<0>(); + + Derivative_x Dx(domain, 2, rCut,1.9,support_options::N_PARTICLES); + Derivative_y Dy(domain, 2, rCut,1.9,support_options::N_PARTICLES); + Laplacian Lap(domain, 2, rCut, 1.9,support_options::N_PARTICLES); + petsc_solver<double> solver; + solver.setRestart(500); + solver.setSolver(KSPGMRES); + solver.setPreconditioner(PCSVD); + + openfpm::vector<aggregate<int>> bulk; + openfpm::vector<aggregate<int>> up_p; + openfpm::vector<aggregate<int>> dw_p; + openfpm::vector<aggregate<int>> l_p; + openfpm::vector<aggregate<int>> r_p; + + auto v = getV<0>(domain); + auto RHS=getV<1>(domain); + auto sol = getV<2>(domain); + auto anasol = getV<3>(domain); + auto err = getV<4>(domain); + + // Here fill me + + Box<2, double> up({box.getLow(0) - spacing / 2.0, box.getHigh(1) - spacing / 2.0}, + {box.getHigh(0) + spacing / 2.0, box.getHigh(1) + spacing / 2.0}); + + Box<2, double> down({box.getLow(0) - spacing / 2.0, box.getLow(1) - spacing / 2.0}, + {box.getHigh(0) + spacing / 2.0, box.getLow(1) + spacing / 2.0}); + + Box<2, double> left({box.getLow(0) - spacing / 2.0, box.getLow(1) + spacing / 2.0}, + {box.getLow(0) + spacing / 2.0, box.getHigh(1) - spacing / 2.0}); + + Box<2, double> right({box.getHigh(0) - spacing / 2.0, box.getLow(1) + spacing / 2.0}, + {box.getHigh(0) + spacing / 2.0, box.getHigh(1) - spacing / 2.0}); + + openfpm::vector<Box<2, double>> boxes; + boxes.add(up); + boxes.add(down); + boxes.add(left); + boxes.add(right); + + // Create a writer and write + VTKWriter<openfpm::vector<Box<2, double>>, VECTOR_BOX> vtk_box; + vtk_box.add(boxes); + //vtk_box.write("vtk_box.vtk"); + + + auto it2 = domain.getDomainIterator(); + + while (it2.isNext()) { + auto p = it2.get(); + Point<2, double> xp = domain.getPos(p); + //domain.getProp<3>(p)=1+xp[0]*xp[0]+2*xp[1]*xp[1]; + if (up.isInside(xp) == true) { + up_p.add(); + up_p.last().get<0>() = p.getKey(); + domain.getProp<1>(p)[0] = sin(5*xp.get(0)); + domain.getProp<1>(p)[1] = sin(5*xp.get(0)); + } else if (down.isInside(xp) == true) { + dw_p.add(); + dw_p.last().get<0>() = p.getKey(); + domain.getProp<1>(p)[0] = sin(5*xp.get(0)); + domain.getProp<1>(p)[1] = sin(5*xp.get(0)); + } else if (left.isInside(xp) == true) { + l_p.add(); + l_p.last().get<0>() = p.getKey(); + domain.getProp<1>(p)[0] = sin(5*xp.get(0)); + domain.getProp<1>(p)[1] = sin(5*xp.get(0)); + } else if (right.isInside(xp) == true) { + r_p.add(); + r_p.last().get<0>() = p.getKey(); + domain.getProp<1>(p)[0] = sin(5*xp.get(0)); + domain.getProp<1>(p)[1] = sin(5*xp.get(0)); + } else { + bulk.add(); + bulk.last().get<0>() = p.getKey(); + domain.getProp<1>(p)[0] = -10*exp(-((xp.get(0)-0.5)*(xp.get(0)-0.5)+(xp.get(1)-0.5)*(xp.get(1)-0.5))/0.02); + domain.getProp<1>(p)[1] = -10*exp(-((xp.get(0)-0.5)*(xp.get(0)-0.5)+(xp.get(1)-0.5)*(xp.get(1)-0.5))/0.02); + } + + ++it2; + } + int i=0; + while(i==0) + {sleep(400);} + + + DCPSE_scheme<equations2d2,decltype(domain)> Solver(domain,options_solver::LAGRANGE_MULTIPLIER); + eq_id vx,vy; + vx.setId(0); + vy.setId(1); + auto Poisson0 = -Lap(v[0]); + auto D_x0 = Dx(v[0]); + auto D_y0 = Dy(v[0]); + auto Poisson1 = -Lap(v[1]); + auto D_x1 = Dx(v[1]); + auto D_y1 = Dy(v[1]); + + Solver.impose(Poisson0, bulk, RHS[0],vx); + Solver.impose(Poisson1, bulk, RHS[1],vy); + Solver.impose(D_y0, up_p, RHS[0],vx); + Solver.impose(-D_y0, dw_p, RHS[0],vx); + Solver.impose(-D_x0, l_p, RHS[0],vx); + Solver.impose(D_x0, r_p, RHS[0],vx); + + Solver.impose(D_y1, up_p, RHS[1],vy); + Solver.impose(-D_y1, dw_p, RHS[1],vy); + Solver.impose(-D_x1, l_p, RHS[1],vy); + Solver.impose(D_x1, r_p, RHS[1],vy); + Solver.solve_with_solver(solver,sol[0],sol[1]); + +// Solver.solve(sol); + domain.ghost_get<2>(); + anasol[0]=-Lap(sol[0]); + anasol[1]=-Lap(sol[1]); + double worst1 = 0.0,worst2 = 0.0; + + for(int j=0;j<bulk.size();j++) + { auto p=bulk.get<0>(j); + if (fabs(domain.getProp<3>(p)[0]- domain.getProp<1>(p)[0]) >= worst1) { + worst1 = fabs(domain.getProp<3>(p)[0] - domain.getProp<1>(p)[0]); + } + if (fabs(domain.getProp<3>(p)[1]- domain.getProp<1>(p)[1]) >= worst2) { + worst2 = fabs(domain.getProp<3>(p)[1] - domain.getProp<1>(p)[1]); + } + domain.getProp<4>(p)[0] = fabs(domain.getProp<1>(p)[0] - domain.getProp<3>(p)[0]); + domain.getProp<4>(p)[1] = fabs(domain.getProp<1>(p)[1] - domain.getProp<3>(p)[1]); + + } + //Auto Error + BOOST_REQUIRE(worst1 < 1.0); + BOOST_REQUIRE(worst2 < 1.0); + + domain.write("Neumann2d"); + } + BOOST_AUTO_TEST_CASE(dcpse_slice_solver) { // int rank; @@ -1162,7 +1348,7 @@ BOOST_AUTO_TEST_SUITE(dcpse_op_suite_tests) vx.setId(0); vy.setId(1); - DCPSE_scheme<equations2d2,decltype(domain)> Solver( domain); + DCPSE_scheme<equations2d2,decltype(domain)> Solver(domain); auto Poisson0 = Lap(v[0]); auto Poisson1 = Lap(v[1]); //auto D_x = Dx(v[1]); diff --git a/src/DCPSE/DCPSE_op/tests/DCPSE_op_Solver_test.cu b/src/DCPSE/DCPSE_op/tests/DCPSE_op_Solver_test.cu new file mode 100644 index 00000000..7bcc95b7 --- /dev/null +++ b/src/DCPSE/DCPSE_op/tests/DCPSE_op_Solver_test.cu @@ -0,0 +1,1333 @@ +/* + * DCPSE_op_Solver_test.cu + * + * Created on: Jan 7, 2020 + * Author: Abhinav Singh, Pietro Incardona, Serhii + * + */ +#define BOOST_MPL_CFG_NO_PREPROCESSED_HEADERS +#define BOOST_MPL_LIMIT_VECTOR_SIZE 40 +#include "config.h" + + +#define BOOST_TEST_DYN_LINK + +#include "util/util_debug.hpp" +#include <boost/test/unit_test.hpp> +#include <iostream> +#include "../DCPSE_op.hpp" +#include "../DCPSE_Solver.hpp" +#include "../DCPSE_Solver.cuh" +#include "Operators/Vector/vector_dist_operators.hpp" +#include "Vector/vector_dist_subset.hpp" +#include "../EqnsStruct.hpp" +#include "Decomposition/Distribution/SpaceDistribution.hpp" + +BOOST_AUTO_TEST_SUITE(dcpse_op_suite_tests_cu) + +BOOST_AUTO_TEST_CASE(dcpse_op_vec3d_gpu) { +// int rank; +// MPI_Comm_rank(MPI_COMM_WORLD, &rank); + size_t edgeSemiSize = 257; + const size_t sz[3] = {edgeSemiSize, edgeSemiSize,edgeSemiSize}; + Box<3, double> box({0, 0,0}, {1,1,1}); + size_t bc[3] = {NON_PERIODIC, NON_PERIODIC, NON_PERIODIC}; + double spacing = box.getHigh(0) / (sz[0] - 1); + double rCut = 3.1 * spacing; + Ghost<3, double> ghost(rCut); + BOOST_TEST_MESSAGE("Init vector_dist..."); + double sigma2 = spacing * spacing/ (2 * 4); + + vector_dist_gpu<3, double, aggregate<double, VectorS<3, double>, VectorS<3, double>, VectorS<3, double>, VectorS<3, double>,double,double>> domain( + 0, box, bc, ghost); + + //Init_DCPSE(domain) + BOOST_TEST_MESSAGE("Init domain..."); + + auto it = domain.getGridIterator(sz); + size_t pointId = 0; + size_t counter = 0; + double minNormOne = 999; + while (it.isNext()) { + domain.add(); + auto key = it.get(); + mem_id k0 = key.get(0); + double x = k0 * spacing; + domain.getLastPos()[0] = x;//+ gaussian(rng); + mem_id k1 = key.get(1); + double y = k1 * spacing; + domain.getLastPos()[1] = y;//+gaussian(rng); + mem_id k2 = key.get(2); + double z = k2 * spacing; + domain.getLastPos()[2] = z;//+gaussian(rng); + // Here fill the function value + domain.template getLastProp<0>() = sin(domain.getLastPos()[0]) + sin(domain.getLastPos()[1]) + sin(domain.getLastPos()[2]) ; + domain.template getLastProp<1>()[0] = cos(domain.getLastPos()[0]); + domain.template getLastProp<1>()[1] = cos(domain.getLastPos()[1]) ; + domain.template getLastProp<1>()[2] = cos(domain.getLastPos()[2]); + // Here fill the validation value for Df/Dx + domain.template getLastProp<2>()[0] = 0;//cos(domain.getLastPos()[0]);//+cos(domain.getLastPos()[1]); + domain.template getLastProp<2>()[1] = 0;//-sin(domain.getLastPos()[0]);//+cos(domain.getLastPos()[1]); + domain.template getLastProp<3>()[0] = 0;//cos(domain.getLastPos()[0]);//+cos(domain.getLastPos()[1]); + domain.template getLastProp<3>()[1] = 0;//-sin(domain.getLastPos()[0]);//+cos(domain.getLastPos()[1]); + domain.template getLastProp<3>()[2] = 0; + + domain.template getLastProp<4>()[0] = -cos(domain.getLastPos()[0]) * sin(domain.getLastPos()[0]); + domain.template getLastProp<4>()[1] = -cos(domain.getLastPos()[1]) * sin(domain.getLastPos()[1]); + domain.template getLastProp<4>()[2] = -cos(domain.getLastPos()[2]) * sin(domain.getLastPos()[2]); + + + /* domain.template getLastProp<4>()[0] = cos(domain.getLastPos()[0]) * (sin(domain.getLastPos()[0]) + sin(domain.getLastPos()[1])) + + cos(domain.getLastPos()[1]) * (cos(domain.getLastPos()[0]) + cos(domain.getLastPos()[1])); + domain.template getLastProp<4>()[1] = -sin(domain.getLastPos()[0]) * (sin(domain.getLastPos()[0]) + sin(domain.getLastPos()[1])) - + sin(domain.getLastPos()[1]) * (cos(domain.getLastPos()[0]) + cos(domain.getLastPos()[1])); + domain.template getLastProp<4>()[2] = -sin(domain.getLastPos()[0]) * (sin(domain.getLastPos()[0]) + sin(domain.getLastPos()[1])) - + sin(domain.getLastPos()[1]) * (cos(domain.getLastPos()[0]) + cos(domain.getLastPos()[1]));*/ + domain.template getLastProp<5>() = cos(domain.getLastPos()[0]) * cos(domain.getLastPos()[0])+cos(domain.getLastPos()[1]) * cos(domain.getLastPos()[1])+cos(domain.getLastPos()[2]) * cos(domain.getLastPos()[2]) ; + ++counter; + ++it; + } + BOOST_TEST_MESSAGE("Sync domain across processors..."); + + domain.map(); + domain.ghost_get<0>(); + + Advection_gpu Adv(domain, 2, rCut, 1.9,support_options::RADIUS); + auto v = getV<1>(domain); + auto P = getV<0>(domain); + auto dv = getV<3>(domain); + auto dP = getV<6>(domain); + + +// typedef boost::mpl::int_<std::is_fundamental<point_expression_op<Point<2U, double>, point_expression<double>, Point<2U, double>, 3>>::value>::blabla blabla; +// std::is_fundamental<decltype(o1.value(key))> + + domain.ghost_get<1>(); + dv = Adv(v, v); + auto it2 = domain.getDomainIterator(); + + double worst1 = 0.0; + + while (it2.isNext()) { + auto p = it2.get(); + + if (fabs(domain.getProp<3>(p)[1] - domain.getProp<4>(p)[1]) > worst1) { + worst1 = fabs(domain.getProp<3>(p)[1] - domain.getProp<4>(p)[1]); + + } + + ++it2; + } + //std::cout << "Maximum Error in component 2: " << worst1 << std::endl; + BOOST_REQUIRE(worst1 < 0.03); + + //Adv.checkMomenta(domain); + //Adv.DrawKernel<2>(domain,0); + + //domain.deleteGhost(); + + dP = Adv(v, P);//+Dy(P); + auto it3 = domain.getDomainIterator(); + + double worst2 = 0.0; + + while (it3.isNext()) { + auto p = it3.get(); + if (fabs(domain.getProp<6>(p) - domain.getProp<5>(p)) > worst2) { + worst2 = fabs(domain.getProp<6>(p) - domain.getProp<5>(p)); + + } + + ++it3; + } + domain.deleteGhost(); + BOOST_REQUIRE(worst2 < 0.03); + + +} + + BOOST_AUTO_TEST_CASE(dcpse_op_solver) { +// int rank; +// MPI_Comm_rank(MPI_COMM_WORLD, &rank); + const size_t sz[2] = {31, 31}; + Box<2, double> box({0, 0}, {1.0, 1.0}); + size_t bc[2] = {NON_PERIODIC, NON_PERIODIC}; + double spacing = box.getHigh(0) / (sz[0] - 1); + Ghost<2, double> ghost(spacing * 3); + double rCut = 3.1 * spacing; + BOOST_TEST_MESSAGE("Init vector_dist..."); + + vector_dist_gpu<2, double, aggregate<double,double,double,double>> domain(0, box, bc, ghost); + + + //Init_DCPSE(domain) + BOOST_TEST_MESSAGE("Init domain..."); + + auto it = domain.getGridIterator(sz); + while (it.isNext()) { + domain.add(); + + auto key = it.get(); + double x = key.get(0) * it.getSpacing(0); + domain.getLastPos()[0] = x; + double y = key.get(1) * it.getSpacing(1); + domain.getLastPos()[1] = y; + + ++it; + } + BOOST_TEST_MESSAGE("Sync domain across processors..."); + + domain.map(); + domain.ghost_get<0>(); + + Laplacian_gpu Lap(domain, 2, rCut); + + DCPSE_scheme_gpu<equations2d1_gpu,decltype(domain)> Solver( domain); + + openfpm::vector<aggregate<int>> bulk; + openfpm::vector<aggregate<int>> up_p; + openfpm::vector<aggregate<int>> dw_p; + openfpm::vector<aggregate<int>> l_p; + openfpm::vector<aggregate<int>> r_p; + + auto v = getV<0>(domain); + auto RHS = getV<1>(domain); + auto sol = getV<2>(domain); + auto anasol = getV<3>(domain); + + // Here fill me + + Box<2, double> up({box.getLow(0) - spacing / 2.0, box.getHigh(1) - spacing / 2.0}, + {box.getHigh(0) + spacing / 2.0, box.getHigh(1) + spacing / 2.0}); + + Box<2, double> up_d({box.getLow(0) - spacing / 2.0, box.getHigh(1) - 8*spacing / 2.0}, + {box.getHigh(0) + spacing / 2.0, box.getHigh(1) - 6*spacing / 2.0}); + + Box<2, double> down({box.getLow(0) - spacing / 2.0, box.getLow(1) - spacing / 2.0}, + {box.getHigh(0) + spacing / 2.0, box.getLow(1) + spacing / 2.0}); + + Box<2, double> down_u({box.getLow(0) - spacing / 2.0, box.getLow(1) + 3*spacing / 2.0}, + {box.getHigh(0) + spacing / 2.0, box.getLow(1) + 4*spacing / 2.0}); + + Box<2, double> left({box.getLow(0) - spacing / 2.0, box.getLow(1) + spacing / 2.0}, + {box.getLow(0) + spacing / 2.0, box.getHigh(1) - spacing / 2.0}); + + Box<2, double> left_r({box.getLow(0) - spacing / 2.0, box.getLow(1) + spacing / 2.0}, + {box.getLow(0) + spacing / 2.0, box.getHigh(1) - spacing / 2.0}); + + Box<2, double> right({box.getHigh(0) - spacing / 2.0, box.getLow(1) + spacing / 2.0}, + {box.getHigh(0) + spacing / 2.0, box.getHigh(1) - spacing / 2.0}); + + Box<2, double> right_l({box.getHigh(0) - spacing / 2.0, box.getLow(1) + spacing / 2.0}, + {box.getHigh(0) + spacing / 2.0, box.getHigh(1) - spacing / 2.0}); + + + openfpm::vector<Box<2, double>> boxes; + boxes.add(up); + boxes.add(up_d); + boxes.add(down); + boxes.add(down_u); + boxes.add(left); + boxes.add(left_r); + boxes.add(right); + boxes.add(right_l); + + // Create a writer and write + VTKWriter<openfpm::vector<Box<2, double>>, VECTOR_BOX> vtk_box; + vtk_box.add(boxes); + vtk_box.write("vtk_box.vtk"); + + auto it2 = domain.getDomainIterator(); + + while (it2.isNext()) { + auto p = it2.get(); + Point<2, double> xp = domain.getPos(p); + domain.getProp<2>(p)=1+xp[0]*xp[0]+2*xp[1]*xp[1]; + if (up.isInside(xp) == true) { + up_p.add(); + up_p.last().get<0>() = p.getKey(); + domain.getProp<1>(p) = 3 + xp.get(0)*xp.get(0); + } else if (down.isInside(xp) == true) { + dw_p.add(); + dw_p.last().get<0>() = p.getKey(); + domain.getProp<1>(p) = 1 + xp.get(0)*xp.get(0); + } else if (left.isInside(xp) == true) { + l_p.add(); + l_p.last().get<0>() = p.getKey(); + domain.getProp<1>(p) = 1 + 2*xp.get(1)*xp.get(1); + } else if (right.isInside(xp) == true) { + r_p.add(); + r_p.last().get<0>() = p.getKey(); + domain.getProp<1>(p) = 2 + 2*xp.get(1)*xp.get(1); + } else { + bulk.add(); + bulk.last().get<0>() = p.getKey(); + } + + + ++it2; + } + + + auto eq1 = Lap(v); + + Solver.impose(eq1, bulk, 6); + Solver.impose(v, up_p, RHS); + Solver.impose(v, dw_p, RHS); + Solver.impose(v, l_p, prop_id<1>()); + Solver.impose(v, r_p, prop_id<1>()); + Solver.solve(v); + anasol=Lap(v); + + double worst1 = 0.0; + + it2 = domain.getDomainIterator(); + + while (it2.isNext()) { + auto p = it2.get(); + if (fabs(domain.getProp<0>(p) - domain.getProp<2>(p)) >= worst1) { + worst1 = fabs(domain.getProp<0>(p) - domain.getProp<2>(p)); + } + + domain.getProp<1>(p) = fabs(domain.getProp<0>(p) - domain.getProp<2>(p)); + + ++it2; + } + + domain.write("particles"); + BOOST_REQUIRE(worst1 < 0.03); + } + + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + BOOST_AUTO_TEST_CASE(dcpse_poisson_Robin_anal) { +// int rank; +// MPI_Comm_rank(MPI_COMM_WORLD, &rank); + const size_t sz[2] = {81,81}; + Box<2, double> box({0, 0}, {0.5, 0.5}); + size_t bc[2] = {NON_PERIODIC, NON_PERIODIC}; + double spacing = box.getHigh(0) / (sz[0] - 1); + Ghost<2, double> ghost(spacing * 3.1); + double rCut = 3.1 * spacing; + BOOST_TEST_MESSAGE("Init vector_dist..."); + + vector_dist_gpu<2, double, aggregate<double,double,double,double,double,double>> domain(0, box, bc, ghost); + + + //Init_DCPSE(domain) + BOOST_TEST_MESSAGE("Init domain..."); + + auto it = domain.getGridIterator(sz); + while (it.isNext()) { + domain.add(); + + auto key = it.get(); + double x = key.get(0) * it.getSpacing(0); + domain.getLastPos()[0] = x; + double y = key.get(1) * it.getSpacing(1); + domain.getLastPos()[1] = y; + + ++it; + } + + // Add multi res patch 1 + + { + const size_t sz2[2] = {40,40}; + Box<2,double> bx({0.25 + it.getSpacing(0)/4.0,0.25 + it.getSpacing(0)/4.0},{sz2[0]*it.getSpacing(0)/2.0 + 0.25 + it.getSpacing(0)/4.0, sz2[1]*it.getSpacing(0)/2.0 + 0.25 + it.getSpacing(0)/4.0}); + openfpm::vector<size_t> rem; + + auto it = domain.getDomainIterator(); + + while (it.isNext()) + { + auto k = it.get(); + + Point<2,double> xp = domain.getPos(k); + + if (bx.isInside(xp) == true) + { + rem.add(k.getKey()); + } + + ++it; + } + + domain.remove(rem); + + auto it2 = domain.getGridIterator(sz2); + while (it2.isNext()) { + domain.add(); + + auto key = it2.get(); + double x = key.get(0) * spacing/2.0 + 0.25 + spacing/4.0; + domain.getLastPos()[0] = x; + double y = key.get(1) * spacing/2.0 + 0.25 + spacing/4.0; + domain.getLastPos()[1] = y; + + ++it2; + } + } + + // Add multi res patch 2 + + { + const size_t sz2[2] = {40,40}; + Box<2,double> bx({0.25 + 21.0*spacing/8.0,0.25 + 21.0*spacing/8.0},{sz2[0]*spacing/4.0 + 0.25 + 21.0*spacing/8.0, sz2[1]*spacing/4.0 + 0.25 + 21*spacing/8.0}); + openfpm::vector<size_t> rem; + + auto it = domain.getDomainIterator(); + + while (it.isNext()) + { + auto k = it.get(); + + Point<2,double> xp = domain.getPos(k); + + if (bx.isInside(xp) == true) + { + rem.add(k.getKey()); + } + + ++it; + } + + domain.remove(rem); + + auto it2 = domain.getGridIterator(sz2); + while (it2.isNext()) { + domain.add(); + + auto key = it2.get(); + double x = key.get(0) * spacing/4.0 + 0.25 + 21*spacing/8.0; + domain.getLastPos()[0] = x; + double y = key.get(1) * spacing/4.0 + 0.25 + 21*spacing/8.0; + domain.getLastPos()[1] = y; + + ++it2; + } + } + + /////////////////////// + + BOOST_TEST_MESSAGE("Sync domain across processors..."); + + domain.map(); + domain.ghost_get<0>(); + + Derivative_x_gpu Dx(domain, 2, rCut / 3.0 ,1.9/*,support_options::RADIUS*/); + Derivative_y_gpu Dy(domain, 2, rCut / 3.0 ,1.9/*,support_options::RADIUS*/); + Laplacian_gpu Lap(domain, 2, rCut / 3.0 ,1.9/*,support_options::RADIUS*/); + + + openfpm::vector<aggregate<int>> bulk; + openfpm::vector<aggregate<int>> up_p; + openfpm::vector<aggregate<int>> dw_p; + openfpm::vector<aggregate<int>> l_p; + openfpm::vector<aggregate<int>> r_p; + openfpm::vector<aggregate<int>> ref_p; + + auto v = getV<0>(domain); + auto RHS=getV<1>(domain); + auto sol = getV<2>(domain); + auto anasol = getV<3>(domain); + auto err = getV<4>(domain); + auto DCPSE_sol=getV<5>(domain); + + // Here fill me + + Box<2, double> up({box.getLow(0) - spacing / 2.0, box.getHigh(1) - spacing / 2.0}, + {box.getHigh(0) + spacing / 2.0, box.getHigh(1) + spacing / 2.0}); + + Box<2, double> down({box.getLow(0) - spacing / 2.0, box.getLow(1) - spacing / 2.0}, + {box.getHigh(0) + spacing / 2.0, box.getLow(1) + spacing / 2.0}); + + Box<2, double> left({box.getLow(0) - spacing / 2.0, box.getLow(1) + spacing / 2.0}, + {box.getLow(0) + spacing / 2.0, box.getHigh(1) - spacing / 2.0}); + + Box<2, double> right({box.getHigh(0) - spacing / 2.0, box.getLow(1) + spacing / 2.0}, + {box.getHigh(0) + spacing / 2.0, box.getHigh(1) - spacing / 2.0}); + + openfpm::vector<Box<2, double>> boxes; + boxes.add(up); + boxes.add(down); + boxes.add(left); + boxes.add(right); + + // Create a writer and write + VTKWriter<openfpm::vector<Box<2, double>>, VECTOR_BOX> vtk_box; + vtk_box.add(boxes); + //vtk_box.write("vtk_box.vtk"); + + + auto it2 = domain.getDomainIterator(); + + while (it2.isNext()) { + auto p = it2.get(); + Point<2, double> xp = domain.getPos(p); + if (up.isInside(xp) == true) { + up_p.add(); + up_p.last().get<0>() = p.getKey(); + domain.getProp<1>(p) = -2*M_PI*M_PI*sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + domain.getProp<3>(p) = sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + } else if (down.isInside(xp) == true) { + dw_p.add(); + dw_p.last().get<0>() = p.getKey(); + domain.getProp<1>(p) = -2*M_PI*M_PI*sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + domain.getProp<3>(p) = sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + + } else if (left.isInside(xp) == true) { + l_p.add(); + l_p.last().get<0>() = p.getKey(); + domain.getProp<1>(p) = -2*M_PI*M_PI*sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + domain.getProp<3>(p) = sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + + } else if (right.isInside(xp) == true) { + r_p.add(); + r_p.last().get<0>() = p.getKey(); + domain.getProp<1>(p) = -2*M_PI*M_PI*sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + domain.getProp<3>(p) = sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + + } else { + bulk.add(); + bulk.last().get<0>() = p.getKey(); + domain.getProp<1>(p) = -2*M_PI*M_PI*sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + domain.getProp<3>(p) = sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + } + ++it2; + } + + domain.ghost_get<1,3>(); + + DCPSE_scheme_gpu<equations2d1_gpu,decltype(domain)> Solver( domain); + auto Poisson = Lap(v); + auto D_x = Dx(v); + auto D_y = Dy(v); + Solver.impose(Poisson, bulk, prop_id<1>()); + Solver.impose(D_y, up_p, 0); + Solver.impose(D_x, r_p, 0); + Solver.impose(v, dw_p, 0); + Solver.impose(v, l_p, 0); + + petsc_solver<double> solver; + + solver.setPreconditioner(PCBJACOBI); + solver.setRestart(500); + + Solver.solve_with_solver(solver,sol); + + //solver.print_preconditioner(); + + domain.ghost_get<2>(); + + DCPSE_sol=Lap(sol); + domain.ghost_get<5>(); + + double worst1 = 0.0; + + v=abs(DCPSE_sol-RHS); + + for(int j=0;j<bulk.size();j++) + { auto p=bulk.get<0>(j); + if (fabs(domain.getProp<3>(p) - domain.getProp<2>(p)) >= worst1) { + worst1 = fabs(domain.getProp<3>(p) - domain.getProp<2>(p)); + } + domain.getProp<4>(p) = fabs(domain.getProp<3>(p) - domain.getProp<2>(p)); + + } + //std::cout << "Maximum Analytic Error: " << worst1 << std::endl; + + //domain.ghost_get<4>(); + //domain.write("Robin_anasol"); + BOOST_REQUIRE(worst1 < 0.03); + + } + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + //81 0.00131586 + //161 0.000328664 + //320 8.30297e-05 + //520 3.12398e-05 + //1024 8.08087e-06 + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + BOOST_AUTO_TEST_CASE(dcpse_poisson_Dirichlet_anal) { +// int rank; +// MPI_Comm_rank(MPI_COMM_WORLD, &rank); + const size_t sz[2] = {200,200}; + Box<2, double> box({0, 0}, {1, 1}); + size_t bc[2] = {NON_PERIODIC, NON_PERIODIC}; + double spacing = box.getHigh(0) / (sz[0] - 1); + Ghost<2, double> ghost(spacing * 3.1); + double rCut = 3.1 * spacing; + BOOST_TEST_MESSAGE("Init vector_dist..."); + + vector_dist_gpu<2, double, aggregate<double,double,double,double,double,double>> domain(0, box, bc, ghost); + + + //Init_DCPSE(domain) + BOOST_TEST_MESSAGE("Init domain..."); + + auto it = domain.getGridIterator(sz); + while (it.isNext()) { + domain.add(); + + auto key = it.get(); + double x = key.get(0) * it.getSpacing(0); + domain.getLastPos()[0] = x; + double y = key.get(1) * it.getSpacing(1); + domain.getLastPos()[1] = y; + + ++it; + } + BOOST_TEST_MESSAGE("Sync domain across processors..."); + + domain.map(); + domain.ghost_get<0>(); + + Derivative_x_gpu Dx(domain, 2, rCut,1.9,support_options::RADIUS); + Derivative_y_gpu Dy(domain, 2, rCut,1.9,support_options::RADIUS); + Laplacian_gpu Lap(domain, 2, rCut, 1.9,support_options::RADIUS); + + openfpm::vector<aggregate<int>> bulk; + openfpm::vector<aggregate<int>> bulkF; + openfpm::vector<aggregate<int>> up_p; + openfpm::vector<aggregate<int>> dw_p; + openfpm::vector<aggregate<int>> l_p; + openfpm::vector<aggregate<int>> r_p; + openfpm::vector<aggregate<int>> ref_p; + + auto v = getV<0>(domain); + auto RHS=getV<1>(domain); + auto sol = getV<2>(domain); + auto anasol = getV<3>(domain); + auto err = getV<4>(domain); + auto DCPSE_sol=getV<5>(domain); + + // Here fill me + + Box<2, double> up({box.getLow(0) - spacing / 2.0, box.getHigh(1) - spacing / 2.0}, + {box.getHigh(0) + spacing / 2.0, box.getHigh(1) + spacing / 2.0}); + + Box<2, double> down({box.getLow(0) - spacing / 2.0, box.getLow(1) - spacing / 2.0}, + {box.getHigh(0) + spacing / 2.0, box.getLow(1) + spacing / 2.0}); + + Box<2, double> left({box.getLow(0) - spacing / 2.0, box.getLow(1) + spacing / 2.0}, + {box.getLow(0) + spacing / 2.0, box.getHigh(1) - spacing / 2.0}); + + Box<2, double> right({box.getHigh(0) - spacing / 2.0, box.getLow(1) + spacing / 2.0}, + {box.getHigh(0) + spacing / 2.0, box.getHigh(1) - spacing / 2.0}); + + openfpm::vector<Box<2, double>> boxes; + boxes.add(up); + boxes.add(down); + boxes.add(left); + boxes.add(right); + + // Create a writer and write + VTKWriter<openfpm::vector<Box<2, double>>, VECTOR_BOX> vtk_box; + vtk_box.add(boxes); + //vtk_box.write("vtk_box.vtk"); + + + auto it2 = domain.getDomainIterator(); + + while (it2.isNext()) { + auto p = it2.get(); + Point<2, double> xp = domain.getPos(p); + if (up.isInside(xp) == true) { + up_p.add(); + up_p.last().get<0>() = p.getKey(); + domain.getProp<1>(p) = -2*M_PI*M_PI*sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + domain.getProp<3>(p) = sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + bulkF.add(); + bulkF.last().get<0>() = p.getKey(); + } else if (down.isInside(xp) == true) { + dw_p.add(); + dw_p.last().get<0>() = p.getKey(); + domain.getProp<1>(p) = -2*M_PI*M_PI*sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + domain.getProp<3>(p) = sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + bulkF.add(); + bulkF.last().get<0>() = p.getKey(); + + } else if (left.isInside(xp) == true) { + l_p.add(); + l_p.last().get<0>() = p.getKey(); + domain.getProp<1>(p) = -2*M_PI*M_PI*sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + domain.getProp<3>(p) = sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + bulkF.add(); + bulkF.last().get<0>() = p.getKey(); + + } else if (right.isInside(xp) == true) { + r_p.add(); + r_p.last().get<0>() = p.getKey(); + domain.getProp<1>(p) = -2*M_PI*M_PI*sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + domain.getProp<3>(p) = sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + bulkF.add(); + bulkF.last().get<0>() = p.getKey(); + + } else { + bulk.add(); + bulk.last().get<0>() = p.getKey(); + domain.getProp<1>(p) = -2*M_PI*M_PI*sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + domain.getProp<3>(p) = sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + bulkF.add(); + bulkF.last().get<0>() = p.getKey(); + } + ++it2; + } + DCPSE_scheme_gpu<equations2d1_gpu,decltype(domain)> Solver( domain); + auto Poisson = Lap(v); + auto D_x = Dx(v); + auto D_y = Dy(v); + Solver.impose(Poisson, bulk, prop_id<1>()); + Solver.impose(v, up_p, prop_id<1>()); + Solver.impose(v, r_p, prop_id<1>()); + Solver.impose(v, dw_p, prop_id<1>()); + Solver.impose(v, l_p, prop_id<1>()); + Solver.solve(sol); + DCPSE_sol=Lap(sol); + + + for (int j = 0; j < up_p.size(); j++) { + auto p = up_p.get<0>(j); + domain.getProp<5>(p) = 0; + } + for (int j = 0; j < dw_p.size(); j++) { + auto p = dw_p.get<0>(j); + domain.getProp<5>(p) = 0; + } + for (int j = 0; j < l_p.size(); j++) { + auto p = l_p.get<0>(j); + domain.getProp<5>(p) = 0; + } + for (int j = 0; j < r_p.size(); j++) { + auto p = r_p.get<0>(j); + domain.getProp<5>(p) = 0; + } + + double worst1 = 0.0; + + v=abs(DCPSE_sol-RHS); + + for(int j=0;j<bulkF.size();j++) + { auto p=bulkF.get<0>(j); + if (fabs(domain.getProp<3>(p) - domain.getProp<2>(p)) >= worst1) { + worst1 = fabs(domain.getProp<3>(p) - domain.getProp<2>(p)); + } + domain.getProp<4>(p) = fabs(domain.getProp<3>(p) - domain.getProp<2>(p)); + + } + // std::cout << "Maximum Analytic Error: " << worst1 << std::endl; + + BOOST_REQUIRE(worst1 < 0.03); + + // domain.write("Dirichlet_anasol"); + } + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + BOOST_AUTO_TEST_CASE(dcpse_poisson_Periodic) { + //https://fenicsproject.org/docs/dolfin/1.4.0/python/demo/documented/periodic/python/documentation.html + // int rank; + // MPI_Comm_rank(MPI_COMM_WORLD, &rank); + const size_t sz[2] = {31,31}; + Box<2, double> box({0, 0}, {1.0, 1.0}); + size_t bc[2] = {PERIODIC, NON_PERIODIC}; + double spacing = box.getHigh(0) / (sz[0] - 1); + Ghost<2, double> ghost(spacing * 3.1); + double rCut = 3.1 * spacing; + BOOST_TEST_MESSAGE("Init vector_dist..."); + + vector_dist_gpu<2, double, aggregate<double,double,double,double,double,VectorS<2, double>>> domain(0, box, bc, ghost); + + //Init_DCPSE(domain) + BOOST_TEST_MESSAGE("Init domain..."); + + auto it = domain.getGridIterator(sz); + while (it.isNext()) { + domain.add(); + + auto key = it.get(); + double x = key.get(0) * it.getSpacing(0); + domain.getLastPos()[0] = x; + double y = key.get(1) * it.getSpacing(1)*0.99999; + domain.getLastPos()[1] = y; + + ++it; + } + BOOST_TEST_MESSAGE("Sync domain across processors..."); + + domain.map(); + domain.ghost_get<0>(); + + + Laplacian_gpu Lap(domain, 2, rCut, 1.9, support_options::RADIUS); + + DCPSE_scheme_gpu<equations2d1p_gpu,decltype(domain)> Solver( domain); + + openfpm::vector<aggregate<int>> bulk; + openfpm::vector<aggregate<int>> up_p; + openfpm::vector<aggregate<int>> dw_p; + openfpm::vector<aggregate<int>> l_p; + openfpm::vector<aggregate<int>> r_p; + + auto v = getV<0>(domain); + auto RHS=getV<1>(domain); + auto sol = getV<2>(domain); + auto anasol = getV<3>(domain); + auto err = getV<4>(domain); + auto u = getV<5>(domain); + + // Here fill me + + Box<2, double> up({box.getLow(0) - spacing / 2.0, box.getHigh(1) - spacing / 2.0}, + {box.getHigh(0) + spacing / 2.0, box.getHigh(1) + spacing / 2.0}); + + Box<2, double> down({box.getLow(0) - spacing / 2.0, box.getLow(1) - spacing / 2.0}, + {box.getHigh(0) + spacing / 2.0, box.getLow(1) + spacing / 2.0}); + + Box<2, double> left({box.getLow(0) - spacing / 2.0, box.getLow(1) + spacing / 2.0}, + {box.getLow(0) + spacing / 2.0, box.getHigh(1) - spacing / 2.0}); + + Box<2, double> right({box.getHigh(0) - spacing / 2.0, box.getLow(1) + spacing / 2.0}, + {box.getHigh(0) + spacing / 2.0, box.getHigh(1) - spacing / 2.0}); + + openfpm::vector<Box<2, double>> boxes; + boxes.add(up); + boxes.add(down); + boxes.add(left); + boxes.add(right); + + // Create a writer and write + VTKWriter<openfpm::vector<Box<2, double>>, VECTOR_BOX> vtk_box; + vtk_box.add(boxes); + //vtk_box.write("vtk_box.vtk"); + + + auto it2 = domain.getDomainIterator(); + + while (it2.isNext()) { + auto p = it2.get(); + Point<2, double> xp = domain.getPos(p); + //domain.getProp<3>(p)=1+xp[0]*xp[0]+2*xp[1]*xp[1]; + if (up.isInside(xp) == true) { + up_p.add(); + up_p.last().get<0>() = p.getKey(); + domain.getProp<1>(p) = 0; + } else if (down.isInside(xp) == true) { + dw_p.add(); + dw_p.last().get<0>() = p.getKey(); + domain.getProp<1>(p) = 0; + } else { + bulk.add(); + bulk.last().get<0>() = p.getKey(); + domain.getProp<1>(p) = xp.get(0)*sin(5*M_PI*xp.get(1))+exp(-((xp.get(0)-0.5)*(xp.get(0)-0.5)+(xp.get(1)-0.5)*(xp.get(1)-0.5))/0.02); + } + + ++it2; + } + + domain.ghost_get<1>(); + auto Poisson = -Lap(v); + Solver.impose(Poisson, bulk, prop_id<1>()); + Solver.impose(v, up_p, 0); + Solver.impose(v, dw_p, 0); + Solver.solve(v); + + domain.ghost_get<0>(); + anasol=-Lap(v); + double worst1 = 0.0; + + for(int j=0;j<bulk.size();j++) + { auto p=bulk.get<0>(j); + if (fabs(domain.getProp<3>(p) - domain.getProp<1>(p)) >= worst1) { + worst1 = fabs(domain.getProp<3>(p) - domain.getProp<1>(p)); + } + domain.getProp<4>(p) = fabs(domain.getProp<1>(p) - domain.getProp<3>(p)); + + } + //Auto Error + BOOST_REQUIRE(worst1 < 1.0); + + //domain.write("Poisson_Periodic"); + } + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + BOOST_AUTO_TEST_CASE(dcpse_poisson_Robin) { + //http://e6.ijs.si/medusa/wiki/index.php/Poisson%27s_equation#Full_Neumann_boundary_conditions +// int rank; +// MPI_Comm_rank(MPI_COMM_WORLD, &rank); + const size_t sz[2] = {31,31}; + Box<2, double> box({0, 0}, {1.0, 1.0}); + size_t bc[2] = {NON_PERIODIC, NON_PERIODIC}; + double spacing = box.getHigh(0) / (sz[0] - 1); + Ghost<2, double> ghost(spacing * 3); + double rCut = 3.1 * spacing; + BOOST_TEST_MESSAGE("Init vector_dist..."); + + vector_dist_gpu<2, double, aggregate<double,double,double,double,double,VectorS<2, double>>> domain(0, box, bc, ghost); + + + //Init_DCPSE(domain) + BOOST_TEST_MESSAGE("Init domain..."); + + auto it = domain.getGridIterator(sz); + while (it.isNext()) { + domain.add(); + + auto key = it.get(); + double x = key.get(0) * it.getSpacing(0); + domain.getLastPos()[0] = x; + double y = key.get(1) * it.getSpacing(1); + domain.getLastPos()[1] = y; + + ++it; + } + BOOST_TEST_MESSAGE("Sync domain across processors..."); + + domain.map(); + domain.ghost_get<0>(); + + Derivative_y_gpu Dy(domain, 2, rCut); + Laplacian_gpu Lap(domain, 2, rCut); + + DCPSE_scheme_gpu<equations2d1_gpu,decltype(domain)> Solver(domain); + + openfpm::vector<aggregate<int>> bulk; + openfpm::vector<aggregate<int>> up_p; + openfpm::vector<aggregate<int>> dw_p; + openfpm::vector<aggregate<int>> l_p; + openfpm::vector<aggregate<int>> r_p; + + auto v = getV<0>(domain); + auto sol = getV<2>(domain); + auto anasol = getV<3>(domain); + auto err = getV<4>(domain); + auto u = getV<5>(domain); + + // Here fill me + + Box<2, double> up({box.getLow(0) - spacing / 2.0, box.getHigh(1) - spacing / 2.0}, + {box.getHigh(0) + spacing / 2.0, box.getHigh(1) + spacing / 2.0}); + + Box<2, double> down({box.getLow(0) - spacing / 2.0, box.getLow(1) - spacing / 2.0}, + {box.getHigh(0) + spacing / 2.0, box.getLow(1) + spacing / 2.0}); + + Box<2, double> left({box.getLow(0) - spacing / 2.0, box.getLow(1) + spacing / 2.0}, + {box.getLow(0) + spacing / 2.0, box.getHigh(1) - spacing / 2.0}); + + Box<2, double> right({box.getHigh(0) - spacing / 2.0, box.getLow(1) + spacing / 2.0}, + {box.getHigh(0) + spacing / 2.0, box.getHigh(1) - spacing / 2.0}); + + openfpm::vector<Box<2, double>> boxes; + boxes.add(up); + boxes.add(down); + boxes.add(left); + boxes.add(right); + + // Create a writer and write + VTKWriter<openfpm::vector<Box<2, double>>, VECTOR_BOX> vtk_box; + vtk_box.add(boxes); + //vtk_box.write("vtk_box.vtk"); + + + auto it2 = domain.getDomainIterator(); + + while (it2.isNext()) { + auto p = it2.get(); + Point<2, double> xp = domain.getPos(p); + //domain.getProp<3>(p)=1+xp[0]*xp[0]+2*xp[1]*xp[1]; + if (up.isInside(xp) == true) { + up_p.add(); + up_p.last().get<0>() = p.getKey(); + domain.getProp<1>(p) = sin(5.0*xp.get(0)); + } else if (down.isInside(xp) == true) { + dw_p.add(); + dw_p.last().get<0>() = p.getKey(); + domain.getProp<1>(p) = sin(5.0*xp.get(0)); + } else if (left.isInside(xp) == true) { + l_p.add(); + l_p.last().get<0>() = p.getKey(); + domain.getProp<1>(p) = sin(5.0*xp.get(0)); + } else if (right.isInside(xp) == true) { + r_p.add(); + r_p.last().get<0>() = p.getKey(); + domain.getProp<1>(p) = sin(5.0*xp.get(0)); + } else { + bulk.add(); + bulk.last().get<0>() = p.getKey(); + domain.getProp<1>(p) = -10.0*exp(-((xp.get(0)-0.5)*(xp.get(0)-0.5)+(xp.get(1)-0.5)*(xp.get(1)-0.5))/0.02); + } + + ++it2; + } + + petsc_solver<double> pet_sol; + pet_sol.setPreconditioner(PCNONE); + + auto Poisson = Lap(v); + auto D_y = Dy(v); + Solver.impose(Poisson, bulk, prop_id<1>()); + Solver.impose(D_y, up_p, prop_id<1>()); + Solver.impose(-D_y, dw_p, prop_id<1>()); + Solver.impose(v, l_p, 0); + Solver.impose(v, r_p, 0); + + Solver.solve_with_solver(pet_sol,sol); + domain.ghost_get<2>(); + + anasol=Lap(sol); + double worst1 = 0.0; + + for(int j=0;j<bulk.size();j++) + { auto p=bulk.get<0>(j); + if (fabs(domain.getProp<3>(p) - domain.getProp<1>(p)) >= worst1) { + worst1 = fabs(domain.getProp<3>(p) - domain.getProp<1>(p)); + } + domain.getProp<4>(p) = fabs(domain.getProp<1>(p) - domain.getProp<3>(p)); + + } + //Auto Error + BOOST_REQUIRE(worst1 < 1.0); + + //std::cout << "WORST: " << worst1 << std::endl; + + //domain.write("Mixed"); + } + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + + BOOST_AUTO_TEST_CASE(dcpse_poisson_Neumann) { + //https://fenicsproject.org/docs/dolfin/1.4.0/python/demo/documented/neumann-poisson/python/documentation.html +// int rank; +// MPI_Comm_rank(MPI_COMM_WORLD, &rank); + const size_t sz[2] = {31,31}; + Box<2, double> box({0, 0}, {1.0, 1.0}); + size_t bc[2] = {NON_PERIODIC, NON_PERIODIC}; + double spacing = box.getHigh(0) / (sz[0] - 1); + double rCut = 3.1 * spacing; + Ghost<2, double> ghost(spacing * 3.1); + BOOST_TEST_MESSAGE("Init vector_dist..."); + + vector_dist_gpu<2, double, aggregate<double,double,double,double,double>> domain(0, box, bc, ghost); + + + //Init_DCPSE(domain) + BOOST_TEST_MESSAGE("Init domain..."); + + auto it = domain.getGridIterator(sz); + while (it.isNext()) { + + domain.add(); + + auto key = it.get(); + double x = key.get(0) * it.getSpacing(0); + domain.getLastPos()[0] = x; + double y = key.get(1) * it.getSpacing(1); + domain.getLastPos()[1] = y; + + ++it; + } + BOOST_TEST_MESSAGE("Sync domain across processors..."); + + domain.map(); + domain.ghost_get<0>(); + + Derivative_x_gpu Dx(domain, 2, rCut); + Derivative_y_gpu Dy(domain, 2, rCut); + Laplacian_gpu Lap(domain, 2, rCut); + petsc_solver<double> solver; + solver.setRestart(500); + solver.setSolver(KSPGMRES); + solver.setPreconditioner(PCSVD); + + openfpm::vector<aggregate<int>> bulk; + openfpm::vector<aggregate<int>> up_p; + openfpm::vector<aggregate<int>> dw_p; + openfpm::vector<aggregate<int>> l_p; + openfpm::vector<aggregate<int>> r_p; + + auto v = getV<0>(domain); + //auto RHS=getV<1>(domain); + auto sol = getV<2>(domain); + auto anasol = getV<3>(domain); + auto err = getV<4>(domain); + + // Here fill me + + Box<2, double> up({box.getLow(0) - spacing / 2.0, box.getHigh(1) - spacing / 2.0}, + {box.getHigh(0) + spacing / 2.0, box.getHigh(1) + spacing / 2.0}); + + Box<2, double> down({box.getLow(0) - spacing / 2.0, box.getLow(1) - spacing / 2.0}, + {box.getHigh(0) + spacing / 2.0, box.getLow(1) + spacing / 2.0}); + + Box<2, double> left({box.getLow(0) - spacing / 2.0, box.getLow(1) + spacing / 2.0}, + {box.getLow(0) + spacing / 2.0, box.getHigh(1) - spacing / 2.0}); + + Box<2, double> right({box.getHigh(0) - spacing / 2.0, box.getLow(1) + spacing / 2.0}, + {box.getHigh(0) + spacing / 2.0, box.getHigh(1) - spacing / 2.0}); + + openfpm::vector<Box<2, double>> boxes; + boxes.add(up); + boxes.add(down); + boxes.add(left); + boxes.add(right); + + // Create a writer and write + VTKWriter<openfpm::vector<Box<2, double>>, VECTOR_BOX> vtk_box; + vtk_box.add(boxes); + //vtk_box.write("vtk_box.vtk"); + + + auto it2 = domain.getDomainIterator(); + + while (it2.isNext()) { + auto p = it2.get(); + Point<2, double> xp = domain.getPos(p); + //domain.getProp<3>(p)=1+xp[0]*xp[0]+2*xp[1]*xp[1]; + if (up.isInside(xp) == true) { + up_p.add(); + up_p.last().get<0>() = p.getKey(); + domain.getProp<1>(p) = sin(5*xp.get(0)); + } else if (down.isInside(xp) == true) { + dw_p.add(); + dw_p.last().get<0>() = p.getKey(); + domain.getProp<1>(p) = sin(5*xp.get(0)); + } else if (left.isInside(xp) == true) { + l_p.add(); + l_p.last().get<0>() = p.getKey(); + domain.getProp<1>(p) = sin(5*xp.get(0)); + } else if (right.isInside(xp) == true) { + r_p.add(); + r_p.last().get<0>() = p.getKey(); + domain.getProp<1>(p) = sin(5*xp.get(0)); + } else { + bulk.add(); + bulk.last().get<0>() = p.getKey(); + domain.getProp<1>(p) = -10*exp(-((xp.get(0)-0.5)*(xp.get(0)-0.5)+(xp.get(1)-0.5)*(xp.get(1)-0.5))/0.02); + } + + ++it2; + } + + DCPSE_scheme_gpu<equations2d1_gpu,decltype(domain)> Solver(domain,options_solver::LAGRANGE_MULTIPLIER); + auto Poisson = -Lap(v); + auto D_x = Dx(v); + auto D_y = Dy(v); + Solver.impose(Poisson, bulk, prop_id<1>()); + Solver.impose(D_y, up_p, prop_id<1>()); + Solver.impose(-D_y, dw_p, prop_id<1>()); + Solver.impose(-D_x, l_p, prop_id<1>()); + Solver.impose(D_x, r_p, prop_id<1>()); + Solver.solve_with_solver(solver,sol); + +// Solver.solve(sol); + domain.ghost_get<2>(); + anasol=-Lap(sol); + double worst1 = 0.0; + + for(int j=0;j<bulk.size();j++) + { auto p=bulk.get<0>(j); + if (fabs(domain.getProp<3>(p) - domain.getProp<1>(p)) >= worst1) { + worst1 = fabs(domain.getProp<3>(p) - domain.getProp<1>(p)); + } + domain.getProp<4>(p) = fabs(domain.getProp<1>(p) - domain.getProp<3>(p)); + + } + //Auto Error + BOOST_REQUIRE(worst1 < 1.0); + + //domain.write("Neumann"); + } + + + BOOST_AUTO_TEST_CASE(dcpse_slice_solver) { +// int rank; +// MPI_Comm_rank(MPI_COMM_WORLD, &rank); + const size_t sz[2] = {31,31}; + Box<2, double> box({0, 0}, {1, 1}); + size_t bc[2] = {NON_PERIODIC, NON_PERIODIC}; + double spacing = box.getHigh(0) / (sz[0] - 1); + double rCut = 3.1 * spacing; + Ghost<2, double> ghost(rCut); + BOOST_TEST_MESSAGE("Init vector_dist..."); + + vector_dist_gpu<2, double, aggregate<VectorS<2, double>,VectorS<2, double>,VectorS<2, double>,VectorS<2, double>,VectorS<2, double>,VectorS<2, double>>> domain(0, box, bc, ghost); + + + //Init_DCPSE(domain) + BOOST_TEST_MESSAGE("Init domain..."); + + auto it = domain.getGridIterator(sz); + while (it.isNext()) { + domain.add(); + + auto key = it.get(); + double x = key.get(0) * it.getSpacing(0); + domain.getLastPos()[0] = x; + double y = key.get(1) * it.getSpacing(1); + domain.getLastPos()[1] = y; + + ++it; + } + BOOST_TEST_MESSAGE("Sync domain across processors..."); + + domain.map(); + domain.ghost_get<0>(); + + Derivative_x_gpu Dx(domain, 2, rCut,1.9,support_options::RADIUS); + Derivative_y_gpu Dy(domain, 2, rCut,1.9,support_options::RADIUS); + Laplacian_gpu Lap(domain, 2, rCut,1.9,support_options::RADIUS); + + + openfpm::vector<aggregate<int>> bulk; + openfpm::vector<aggregate<int>> up_p; + openfpm::vector<aggregate<int>> dw_p; + openfpm::vector<aggregate<int>> l_p; + openfpm::vector<aggregate<int>> r_p; + openfpm::vector<aggregate<int>> ref_p; + + auto v = getV<0>(domain); + auto RHS=getV<1>(domain); + auto sol = getV<2>(domain); + auto anasol = getV<3>(domain); + auto err = getV<4>(domain); + auto DCPSE_sol=getV<5>(domain); + + // Here fill me + + Box<2, double> up({box.getLow(0) - spacing / 2.0, box.getHigh(1) - spacing / 2.0}, + {box.getHigh(0) + spacing / 2.0, box.getHigh(1) + spacing / 2.0}); + + Box<2, double> down({box.getLow(0) - spacing / 2.0, box.getLow(1) - spacing / 2.0}, + {box.getHigh(0) + spacing / 2.0, box.getLow(1) + spacing / 2.0}); + + Box<2, double> left({box.getLow(0) - spacing / 2.0, box.getLow(1) + spacing / 2.0}, + {box.getLow(0) + spacing / 2.0, box.getHigh(1) - spacing / 2.0}); + + Box<2, double> right({box.getHigh(0) - spacing / 2.0, box.getLow(1) + spacing / 2.0}, + {box.getHigh(0) + spacing / 2.0, box.getHigh(1) - spacing / 2.0}); + + openfpm::vector<Box<2, double>> boxes; + boxes.add(up); + boxes.add(down); + boxes.add(left); + boxes.add(right); + + // Create a writer and write + VTKWriter<openfpm::vector<Box<2, double>>, VECTOR_BOX> vtk_box; + vtk_box.add(boxes); + //vtk_box.write("vtk_box.vtk"); + // domain.write("Slice_anasol"); + + + auto it2 = domain.getDomainIterator(); + + while (it2.isNext()) { + auto p = it2.get(); + Point<2, double> xp = domain.getPos(p); + if (up.isInside(xp) == true) { + up_p.add(); + up_p.last().get<0>() = p.getKey(); + domain.getProp<1>(p)[0] = -2*M_PI*M_PI*sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + domain.getProp<3>(p)[0] = sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + + domain.getProp<1>(p)[1] = -2*M_PI*M_PI*sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + domain.getProp<3>(p)[1] = sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + + + } else if (down.isInside(xp) == true) { + dw_p.add(); + dw_p.last().get<0>() = p.getKey(); + domain.getProp<1>(p)[0] = -2*M_PI*M_PI*sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + domain.getProp<3>(p)[0] = sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + + domain.getProp<1>(p)[1] = -2*M_PI*M_PI*sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + domain.getProp<3>(p)[1] = sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + + } else if (left.isInside(xp) == true) { + l_p.add(); + l_p.last().get<0>() = p.getKey(); + domain.getProp<1>(p)[0] = -2*M_PI*M_PI*sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + domain.getProp<3>(p)[0] = sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + + domain.getProp<1>(p)[1] = -2*M_PI*M_PI*sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + domain.getProp<3>(p)[1] = sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + + } else if (right.isInside(xp) == true) { + r_p.add(); + r_p.last().get<0>() = p.getKey(); + domain.getProp<1>(p)[0] = -2*M_PI*M_PI*sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + domain.getProp<3>(p)[0] = sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + + domain.getProp<1>(p)[1] = -2*M_PI*M_PI*sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + domain.getProp<3>(p)[1] = sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + + } else { + bulk.add(); + bulk.last().get<0>() = p.getKey(); + domain.getProp<1>(p)[0] = -2*M_PI*M_PI*sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + domain.getProp<3>(p)[0] = sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + + domain.getProp<1>(p)[1] = -2*M_PI*M_PI*sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + domain.getProp<3>(p)[1] = sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + } + ++it2; + } + + eq_id vx,vy; + + vx.setId(0); + vy.setId(1); + + DCPSE_scheme_gpu<equations2d2_gpu,decltype(domain)> Solver( domain); + auto Poisson0 = Lap(v[0]); + auto Poisson1 = Lap(v[1]); + //auto D_x = Dx(v[1]); + //auto D_y = Dy(v[1]); + Solver.impose(Poisson0, bulk, RHS[0],vx); + Solver.impose(Poisson1, bulk, RHS[1],vy); + Solver.impose(v[0], up_p, RHS[0],vx); + Solver.impose(v[1], up_p, RHS[1],vy); + Solver.impose(v[0], r_p, RHS[0],vx); + Solver.impose(v[1], r_p, RHS[1],vy); + Solver.impose(v[0], dw_p, RHS[0],vx); + Solver.impose(v[1], dw_p, RHS[1],vy); + Solver.impose(v[0], l_p, RHS[0],vx); + Solver.impose(v[1], l_p, RHS[1],vy); + Solver.solve(sol[0],sol[1]); + DCPSE_sol=Lap(sol); + double worst1 = 0.0; + double worst2 = 0.0; + + + v=sol-RHS; + + for(int j=0;j<bulk.size();j++) + { auto p=bulk.get<0>(j); + if (fabs(domain.getProp<3>(p)[0] - domain.getProp<2>(p)[0]) >= worst1) { + worst1 = fabs(domain.getProp<3>(p)[0] - domain.getProp<2>(p)[0]); + } + domain.getProp<4>(p)[0] = fabs(domain.getProp<3>(p)[0] - domain.getProp<2>(p)[0]); + + } + for(int j=0;j<bulk.size();j++) + { auto p=bulk.get<0>(j); + if (fabs(domain.getProp<3>(p)[1] - domain.getProp<2>(p)[1]) >= worst2) { + worst2 = fabs(domain.getProp<3>(p)[1] - domain.getProp<2>(p)[1]); + } + domain.getProp<4>(p)[1] = fabs(domain.getProp<3>(p)[1] - domain.getProp<2>(p)[1]); + + } + //std::cout << "Maximum Analytic Error in slice x: " << worst1 << std::endl; + //std::cout << "Maximum Analytic Error in slice y: " << worst2 << std::endl; + //domain.write("Slice_anasol"); + BOOST_REQUIRE(worst1 < 0.03); + BOOST_REQUIRE(worst2 < 0.03); + + } + + +BOOST_AUTO_TEST_SUITE_END() + + diff --git a/src/DCPSE/DCPSE_op/tests/DCPSE_op_Surface_tests.cpp b/src/DCPSE/DCPSE_op/tests/DCPSE_op_Surface_tests.cpp new file mode 100644 index 00000000..1cbe5ee1 --- /dev/null +++ b/src/DCPSE/DCPSE_op/tests/DCPSE_op_Surface_tests.cpp @@ -0,0 +1,1112 @@ +// +// Created by Abhinav Singh on 15.11.21. +// + +#include "config.h" +#ifdef HAVE_EIGEN +#ifdef HAVE_PETSC + + +#define BOOST_MPL_CFG_NO_PREPROCESSED_HEADERS +#define BOOST_MPL_LIMIT_VECTOR_SIZE 40 + +#define BOOST_TEST_DYN_LINK + +#include "util/util_debug.hpp" +#include <boost/test/unit_test.hpp> +#include <iostream> +#include "../DCPSE_surface_op.hpp" +#include "../DCPSE_Solver.hpp" +#include "Operators/Vector/vector_dist_operators.hpp" +#include "Vector/vector_dist_subset.hpp" +#include <iostream> +#include "util/SphericalHarmonics.hpp" + +BOOST_AUTO_TEST_SUITE(dcpse_op_suite_tests) + BOOST_AUTO_TEST_CASE(dcpse_surface_simple) { + double boxP1{-1.5}, boxP2{1.5}; + double boxSize{boxP2 - boxP1}; + size_t n=256; + size_t sz[2] = {n,n}; + double grid_spacing{boxSize/(sz[0]-1)}; + double rCut{3.9 * grid_spacing}; + + Box<2,double> domain{{boxP1,boxP1},{boxP2,boxP2}}; + size_t bc[2] = {NON_PERIODIC,NON_PERIODIC}; + Ghost<2,double> ghost{rCut + grid_spacing/8.0}; + auto &v_cl=create_vcluster(); + + vector_dist_ws<2, double, aggregate<double,double,double[2],double,double[2]>> Sparticles(0, domain,bc,ghost); + //Init_DCPSE(domain) + BOOST_TEST_MESSAGE("Init domain..."); + + // 1. Particles on a line + if (v_cl.rank() == 0) { + for (int i = 0; i < n; ++i) { + double xp = -1.5+i*grid_spacing; + Sparticles.add(); + Sparticles.getLastPos()[0] = xp; + Sparticles.getLastPos()[1] = 0; + Sparticles.getLastProp<3>() = std::sin(xp); + Sparticles.getLastProp<2>()[0] = 0; + Sparticles.getLastProp<2>()[1] = 1.0; + Sparticles.getLastProp<1>() = -std::sin(xp);//sin(theta)*exp(-finalT/(radius*radius)); + Sparticles.getLastSubset(0); + } + } + Sparticles.map(); + + BOOST_TEST_MESSAGE("Sync domain across processors..."); + + Sparticles.ghost_get<0,3>(); + //Sparticles.write("Sparticles"); + //Here template parameters are Normal property no. + SurfaceDerivative_xx<2> SDxx(Sparticles, 2, rCut,grid_spacing); + SurfaceDerivative_yy<2> SDyy(Sparticles, 2, rCut,grid_spacing); + //SurfaceDerivative_x<2> SDx(Sparticles, 4, rCut,grid_spacing); + //SurfaceDerivative_y<2> SDy(Sparticles, 4, rCut,grid_spacing); + auto INICONC = getV<3>(Sparticles); + auto CONC = getV<0>(Sparticles); + auto TEMP = getV<4>(Sparticles); + auto normal = getV<2>(Sparticles); + //auto ANASOL = getV<1>(domain); + CONC=SDxx(INICONC)+SDyy(INICONC); + //TEMP[0]=(-normal[0]*normal[0]+1.0) * SDx(INICONC) - normal[0]*normal[1] * SDy(INICONC); + //TEMP[1]=(-normal[1]*normal[1]+1.0) * SDy(INICONC) - normal[0]*normal[1] * SDx(INICONC); + //Sparticles.ghost_get<4>(); + //CONC=SDxx(TEMP[0]) + SDyy(TEMP[1]); + auto it2 = Sparticles.getDomainIterator(); + double worst = 0.0; + while (it2.isNext()) { + auto p = it2.get(); + if (fabs(Sparticles.getProp<1>(p) - Sparticles.getProp<0>(p)) > worst) { + worst = fabs(Sparticles.getProp<1>(p) - Sparticles.getProp<0>(p)); + } + + ++it2; + } + Sparticles.deleteGhost(); + //std::cout<<v_cl.rank()<<":WORST:"<<worst<<std::endl; + //Sparticles.write("Sparticles"); + BOOST_REQUIRE(worst < 0.03); +} + BOOST_AUTO_TEST_CASE(dcpse_surface_circle) { + + double boxP1{-1.5}, boxP2{1.5}; + double boxSize{boxP2 - boxP1}; + size_t n=512; + auto &v_cl=create_vcluster(); + //std::cout<<v_cl.rank()<<":Enter res: "<<std::endl; + //std::cin>>n; + size_t sz[2] = {n,n}; + double grid_spacing{boxSize/(sz[0]-1)}; + double rCut{5.1 * grid_spacing}; + + Box<2,double> domain{{boxP1,boxP1},{boxP2,boxP2}}; + size_t bc[2] = {NON_PERIODIC,NON_PERIODIC}; + Ghost<2,double> ghost{rCut + grid_spacing/8.0}; + vector_dist_ws<2, double, aggregate<double,double,double[2],double,double[2],double>> Sparticles(0, domain,bc,ghost); + //Init_DCPSE(domain) + BOOST_TEST_MESSAGE("Init domain..."); + + // Surface prameters + const double radius{1.0}; + std::array<double,2> center{0.0,0.0}; + Point<2,double> coord; + const double pi{3.14159265358979323846}; + + // 1. Particles on surface + double theta{0.0}; + double dtheta{2*pi/double(n)}; + if (v_cl.rank() == 0) { + for (int i = 0; i < n; ++i) { + coord[0] = center[0] + radius * std::cos(theta); + coord[1] = center[1] + radius * std::sin(theta); + Sparticles.add(); + Sparticles.getLastPos()[0] = coord[0]; + Sparticles.getLastPos()[1] = coord[1]; + Sparticles.getLastProp<3>() = std::sin(theta); + Sparticles.getLastProp<2>()[0] = std::cos(theta); + Sparticles.getLastProp<2>()[1] = std::sin(theta); + Sparticles.getLastProp<1>() = -std::sin(theta);;//sin(theta)*exp(-finalT/(radius*radius)); + Sparticles.getLastSubset(0); + theta += dtheta; + } + } + Sparticles.map(); + + BOOST_TEST_MESSAGE("Sync domain across processors..."); + + Sparticles.ghost_get<0,3>(); + //Sparticles.write("Sparticles"); + //Here template parameters are Normal property no. + SurfaceDerivative_xx<2> SDxx(Sparticles, 2, rCut,grid_spacing); + SurfaceDerivative_yy<2> SDyy(Sparticles, 2, rCut,grid_spacing); + //SurfaceDerivative_xy<2> SDxy(Sparticles, 3, rCut,grid_spacing); + //SurfaceDerivative_x<2> SDx(Sparticles, 3, rCut,grid_spacing); + //SurfaceDerivative_y<2> SDy(Sparticles, 3, rCut,grid_spacing); + auto INICONC = getV<3>(Sparticles); + auto CONC = getV<0>(Sparticles); + auto TEMP = getV<4>(Sparticles); + auto normal = getV<2>(Sparticles); + + CONC=SDxx(INICONC)+SDyy(INICONC); + //TEMP[0]=(-normal[0]*normal[0]+1.0) * SDx(INICONC) - normal[0]*normal[1] * SDy(INICONC); + //TEMP[1]=(-normal[1]*normal[1]+1.0) * SDy(INICONC) - normal[0]*normal[1] * SDx(INICONC); + //TEMP[0]=(-normal[0]*normal[0]+1.0); + //TEMP[1]=normal[0]*normal[1]; + //Sparticles.ghost_get<2,4>(); + //CONC=SDx(TEMP[0]) + SDy(TEMP[1]); + //CONC= (SDx(TEMP[0])*SDx(INICONC)+TEMP[0]*SDxx(INICONC)-(SDx(TEMP[1])*SDy(INICONC)+TEMP[1]*SDxy(INICONC)))+ + // (SDy((-normal[1]*normal[1]+1.0))*SDy(INICONC)+(-normal[1]*normal[1]+1.0)*SDyy(INICONC)-(SDy(TEMP[1])*SDx(INICONC)+TEMP[1]*SDxy(INICONC))); + auto it2 = Sparticles.getDomainIterator(); + double worst = 0.0; + while (it2.isNext()) { + auto p = it2.get(); + Sparticles.getProp<5>(p) = fabs(Sparticles.getProp<1>(p) - Sparticles.getProp<0>(p)); + if (fabs(Sparticles.getProp<1>(p) - Sparticles.getProp<0>(p)) > worst) { + worst = fabs(Sparticles.getProp<1>(p) - Sparticles.getProp<0>(p)); + } + ++it2; + } + Sparticles.deleteGhost(); + //Sparticles.write("Sparticles"); + //std::cout<<worst; + BOOST_REQUIRE(worst < 0.03); +} + BOOST_AUTO_TEST_CASE(dcpse_surface_solver_circle) { + double boxP1{-1.5}, boxP2{1.5}; + double boxSize{boxP2 - boxP1}; + size_t n=512,k=2; + auto &v_cl=create_vcluster(); + /*if(v_cl.rank()==0) + std::cout<<v_cl.rank()<<":Enter res: "<<std::endl; + std::cin>>n; + if(v_cl.rank()==0) + std::cout<<v_cl.rank()<<":Enter Freq: "<<std::endl; + std::cin>>k;*/ + size_t sz[2] = {n,n}; + double grid_spacing{boxSize/(sz[0]-1)}; + double rCut{3.9 * grid_spacing}; + + Box<2,double> domain{{boxP1,boxP1},{boxP2,boxP2}}; + size_t bc[2] = {NON_PERIODIC,NON_PERIODIC}; + Ghost<2,double> ghost{rCut + grid_spacing/8.0}; + vector_dist_ws<2, double, aggregate<double,double,double[2],double,double[2],double>> Sparticles(0, domain,bc,ghost); + //Init_DCPSE(domain) + BOOST_TEST_MESSAGE("Init domain..."); + + // Surface prameters + const double radius{1.0}; + std::array<double,2> center{0.0,0.0}; + Point<2,double> coord; + const double pi{3.14159265358979323846}; + + // 1. Particles on surface + double theta{0.0}; + double dtheta{2*pi/double(n)}; + if (v_cl.rank() == 0) { + for (int i = 0; i < n; ++i) { + coord[0] = center[0] + radius * std::cos(theta); + coord[1] = center[1] + radius * std::sin(theta); + Sparticles.add(); + Sparticles.getLastPos()[0] = coord[0]; + Sparticles.getLastPos()[1] = coord[1]; + Sparticles.getLastProp<3>() = -openfpm::math::intpowlog(k,2)*std::sin(k*theta); + Sparticles.getLastProp<2>()[0] = std::cos(theta); + Sparticles.getLastProp<2>()[1] = std::sin(theta); + Sparticles.getLastProp<1>() = std::sin(k*theta);;//sin(theta)*exp(-finalT/(radius*radius)); + Sparticles.getLastSubset(0); + if(coord[0]==1. && coord[1]==0.) + {Sparticles.getLastSubset(1);} + theta += dtheta; + } + } + Sparticles.map(); + + BOOST_TEST_MESSAGE("Sync domain across processors..."); + + Sparticles.ghost_get<0>(); + //Sparticles.write("Sparticles"); + vector_dist_subset<2, double, aggregate<double,double,double[2],double,double[2],double>> Sparticles_bulk(Sparticles,0); + vector_dist_subset<2, double, aggregate<double,double,double[2],double,double[2],double>> Sparticles_boundary(Sparticles,1); + auto & bulk=Sparticles_bulk.getIds(); + auto & boundary=Sparticles_boundary.getIds(); + //Here template parameters are Normal property no. + SurfaceDerivative_xx<2> SDxx(Sparticles, 2, rCut,grid_spacing); + SurfaceDerivative_yy<2> SDyy(Sparticles, 2, rCut,grid_spacing); + auto INICONC = getV<3>(Sparticles); + auto CONC = getV<0>(Sparticles); + auto TEMP = getV<4>(Sparticles); + auto normal = getV<2>(Sparticles); + auto ANASOL = getV<1>(Sparticles); + DCPSE_scheme<equations2d1,decltype(Sparticles)> Solver(Sparticles); + Solver.impose(SDxx(CONC)+SDyy(CONC), bulk, INICONC); + Solver.impose(CONC, boundary, ANASOL); + Solver.solve(CONC); + auto it2 = Sparticles.getDomainIterator(); + double worst = 0.0; + while (it2.isNext()) { + auto p = it2.get(); + Sparticles.getProp<5>(p) = fabs(Sparticles.getProp<1>(p) - Sparticles.getProp<0>(p)); + if (fabs(Sparticles.getProp<1>(p) - Sparticles.getProp<0>(p)) > worst) { + worst = fabs(Sparticles.getProp<1>(p) - Sparticles.getProp<0>(p)); + } + ++it2; + } + Sparticles.deleteGhost(); + //Sparticles.write("Sparticles"); + //std::cout<<worst; + BOOST_REQUIRE(worst < 0.03); +} +BOOST_AUTO_TEST_CASE(dcpse_surface_sphere) { + auto & v_cl = create_vcluster(); + timer tt; + tt.start(); + size_t n=512; + size_t n_sp=n; + // Domain + double boxP1{-1.5}, boxP2{1.5}; + double boxSize{boxP2 - boxP1}; + size_t sz[3] = {n,n,n}; + double grid_spacing{boxSize/(sz[0]-1)}; + double grid_spacing_surf=grid_spacing*30; + double rCut{2.5 * grid_spacing_surf}; + + Box<3,double> domain{{boxP1,boxP1,boxP1},{boxP2,boxP2,boxP2}}; + size_t bc[3] = {NON_PERIODIC,NON_PERIODIC,NON_PERIODIC}; + Ghost<3,double> ghost{rCut + grid_spacing/8.0}; + + constexpr int K = 1; + // particles + vector_dist_ws<3, double, aggregate<double,double,double[3],double,double[3],double>> Sparticles(0, domain,bc,ghost); + // 1. particles on the Spherical surface + double Golden_angle=M_PI * (3.0 - sqrt(5.0)); + if (v_cl.rank() == 0) { + //std::vector<Vector3f> data; + //GenerateSphere(1,data); + for(int i=1;i<n_sp;i++) + { + double y = 1.0 - (i /double(n_sp - 1.0)) * 2.0; + double radius = sqrt(1 - y * y); + double Golden_theta = Golden_angle * i; + double x = cos(Golden_theta) * radius; + double z = sin(Golden_theta) * radius; + Sparticles.add(); + Sparticles.getLastPos()[0] = x; + Sparticles.getLastPos()[1] = y; + Sparticles.getLastPos()[2] = z; + double rm=sqrt(x*x+y*y+z*z); + Sparticles.getLastProp<2>()[0] = x/rm; + Sparticles.getLastProp<2>()[1] = y/rm; + Sparticles.getLastProp<2>()[2] = z/rm; + Sparticles.getLastProp<4>()[0] = 1.0 ; + Sparticles.getLastProp<4>()[1] = std::atan2(sqrt(x*x+y*y),z); + Sparticles.getLastProp<4>()[2] = std::atan2(y,x); + if(i<=2*(K)+1) + {Sparticles.getLastSubset(1);} + else + {Sparticles.getLastSubset(0);} + } + //std::cout << "n: " << n << " - grid spacing: " << grid_spacing << " - rCut: " << rCut << "Surf Normal spacing" << grid_spacing<<std::endl; + } + + Sparticles.map(); + Sparticles.ghost_get<3>(); + + vector_dist_subset<3,double,aggregate<double,double,double[3],double,double[3],double>> Sparticles_bulk(Sparticles,0); + vector_dist_subset<3,double,aggregate<double,double,double[3],double,double[3],double>> Sparticles_boundary(Sparticles,1); + auto &bulkIds=Sparticles_bulk.getIds(); + auto &bdrIds=Sparticles_boundary.getIds(); + std::unordered_map<const lm,double,key_hash,key_equal> Alm; + //Setting max mode l_max + //Setting amplitudes to 1 + for(int l=0;l<=K;l++){ + for(int m=-l;m<=l;m++){ + Alm[std::make_tuple(l,m)]=0; + } + } + Alm[std::make_tuple(1,0)]=1; + auto it2 = Sparticles.getDomainIterator(); + while (it2.isNext()) { + auto p = it2.get(); + Point<3, double> xP = Sparticles.getProp<4>(p); + /*double Sum=0; + for(int m=-spL;m<=spL;++m) + { + Sum+=openfpm::math::Y(spL,m,xP[1],xP[2]); + }*/ + //Sparticles.getProp<ANADF>(p) = Sum;//openfpm::math::Y(K,K,xP[1],xP[2]);openfpm::math::sumY_Scalar<K>(xP[0],xP[1],xP[2],Alm);; + Sparticles.getProp<3>(p)=openfpm::math::sumY_Scalar<K>(xP[0],xP[1],xP[2],Alm); + Sparticles.getProp<1>(p)=-(K)*(K+1)*openfpm::math::sumY_Scalar<K>(xP[0],xP[1],xP[2],Alm); + ++it2; + } + auto f=getV<3>(Sparticles); + auto Df=getV<0>(Sparticles); + + SurfaceDerivative_xx<2> Sdxx{Sparticles,2,rCut,grid_spacing_surf}; + SurfaceDerivative_yy<2> Sdyy{Sparticles,2,rCut,grid_spacing_surf}; + SurfaceDerivative_zz<2> Sdzz{Sparticles,2,rCut,grid_spacing_surf}; + //Laplace_Beltrami<2> SLap{Sparticles,2,rCut,grid_spacing_surf}; + //Sdyy.DrawKernel<5>(Sparticles,0); + //Sdzz.DrawKernel<5>(Sparticles,0); +/* std::cout<<"SDXX:"<<std::endl; + Sdxx.checkMomenta(Sparticles); + std::cout<<"SDYY:"<<std::endl; + Sdyy.checkMomenta(Sparticles); + std::cout<<"SDZZ:"<<std::endl; + Sdzz.checkMomenta(Sparticles);*/ + + Sparticles.ghost_get<3>(); + Df=(Sdxx(f)+Sdyy(f)+Sdzz(f)); + //Df=SLap(f); + auto it3 = Sparticles.getDomainIterator(); + double worst = 0.0; + while (it3.isNext()) { + auto p = it3.get(); + //Sparticles.getProp<5>(p) = fabs(Sparticles.getProp<1>(p) - Sparticles.getProp<0>(p)); + if (fabs(Sparticles.getProp<1>(p) - Sparticles.getProp<0>(p)) > worst) { + worst = fabs(Sparticles.getProp<1>(p) - Sparticles.getProp<0>(p)); + } + ++it3; + } + Sparticles.deleteGhost(); + //Sparticles.write("Sparticles"); + //std::cout<<worst; + BOOST_REQUIRE(worst < 0.03); +} + + +BOOST_AUTO_TEST_CASE(dcpse_surface_sphere_old) { + auto & v_cl = create_vcluster(); + timer tt; + tt.start(); + size_t n=512; + size_t n_sp=n; + // Domain + double boxP1{-1.5}, boxP2{1.5}; + double boxSize{boxP2 - boxP1}; + size_t sz[3] = {n,n,n}; + double grid_spacing{boxSize/(sz[0]-1)}; + double grid_spacing_surf=grid_spacing*30; + double rCut{2.5 * grid_spacing_surf}; + + Box<3,double> domain{{boxP1,boxP1,boxP1},{boxP2,boxP2,boxP2}}; + size_t bc[3] = {NON_PERIODIC,NON_PERIODIC,NON_PERIODIC}; + Ghost<3,double> ghost{rCut + grid_spacing/8.0}; + + constexpr int K = 1; + // particles + vector_dist_ws<3, double, aggregate<double,double,double[3],double,double[3],double>> Sparticles(0, domain,bc,ghost); + // 1. particles on the Spherical surface + double Golden_angle=M_PI * (3.0 - sqrt(5.0)); + if (v_cl.rank() == 0) { + //std::vector<Vector3f> data; + //GenerateSphere(1,data); + std::unordered_map<const lm,double,key_hash,key_equal> Alm; + //Setting max mode l_max + //Setting amplitudes to 1 + for(int l=0;l<=K;l++){ + for(int m=-l;m<=l;m++){ + Alm[std::make_tuple(l,m)]=0; + } + } + Alm[std::make_tuple(1,0)]=1; + for(int i=1;i<n_sp;i++) + { + double y = 1.0 - (i /double(n_sp - 1.0)) * 2.0; + double radius = sqrt(1 - y * y); + double Golden_theta = Golden_angle * i; + double x = cos(Golden_theta) * radius; + double z = sin(Golden_theta) * radius; + Sparticles.add(); + Sparticles.getLastPos()[0] = x; + Sparticles.getLastPos()[1] = y; + Sparticles.getLastPos()[2] = z; + double rm=sqrt(x*x+y*y+z*z); + Sparticles.getLastProp<2>()[0] = x/rm; + Sparticles.getLastProp<2>()[1] = y/rm; + Sparticles.getLastProp<2>()[2] = z/rm; + Sparticles.getLastProp<4>()[0] = 1.0 ; + Sparticles.getLastProp<4>()[1] = std::atan2(sqrt(x*x+y*y),z); + Sparticles.getLastProp<4>()[2] = std::atan2(y,x); + double m1=openfpm::math::sumY_Scalar<K>(1.0,std::atan2(sqrt(x*x+y*y),z),std::atan2(y,x),Alm); + double m2=-(K)*(K+1)*openfpm::math::sumY_Scalar<K>(1.0,std::atan2(sqrt(x*x+y*y),z),std::atan2(y,x),Alm); + Sparticles.getLastProp<3>()=m1; + Sparticles.getLastProp<1>()=m2; + Sparticles.getLastSubset(0); + for(int j=1;j<=2;++j){ + Sparticles.add(); + Sparticles.getLastPos()[0] = x+j*grid_spacing_surf*x/rm; + Sparticles.getLastPos()[1] = y+j*grid_spacing_surf*y/rm; + Sparticles.getLastPos()[2] = z+j*grid_spacing_surf*z/rm; + Sparticles.getLastProp<3>()=m1; + Sparticles.getLastSubset(1); + //Sparticles.getLastProp<1>(p)=m2; + Sparticles.add(); + Sparticles.getLastPos()[0] = x-j*grid_spacing_surf*x/rm; + Sparticles.getLastPos()[1] = y-j*grid_spacing_surf*y/rm; + Sparticles.getLastPos()[2] = z-j*grid_spacing_surf*z/rm; + Sparticles.getLastProp<3>()=m1; + Sparticles.getLastSubset(1); + //Sparticles.getLastProp<1>(p)=m2; + } + } + //std::cout << "n: " << n << " - grid spacing: " << grid_spacing << " - rCut: " << rCut << "Surf Normal spacing" << grid_spacing<<std::endl; + } + + Sparticles.map(); + Sparticles.ghost_get<3>(); + //Sparticles.write("SparticlesInit"); + + vector_dist_subset<3,double,aggregate<double,double,double[3],double,double[3],double>> Sparticles_bulk(Sparticles,0); + vector_dist_subset<3,double,aggregate<double,double,double[3],double,double[3],double>> Sparticles_boundary(Sparticles,1); + auto &bulkIds=Sparticles_bulk.getIds(); + auto &bdrIds=Sparticles_boundary.getIds(); + /*auto it2 = Sparticles.getDomainIterator(); + while (it2.isNext()) { + auto p = it2.get(); + Point<3, double> xP = Sparticles.getProp<4>(p); + *//*double Sum=0; + for(int m=-spL;m<=spL;++m) + { + Sum+=openfpm::math::Y(spL,m,xP[1],xP[2]); + }*//* + //Sparticles.getProp<ANADF>(p) = Sum;//openfpm::math::Y(K,K,xP[1],xP[2]);openfpm::math::sumY_Scalar<K>(xP[0],xP[1],xP[2],Alm);; + Sparticles.getProp<3>(p)=openfpm::math::sumY_Scalar<K>(xP[0],xP[1],xP[2],Alm); + Sparticles.getProp<1>(p)=-(K)*(K+1)*openfpm::math::sumY_Scalar<K>(xP[0],xP[1],xP[2],Alm); + ++it2; + }*/ + auto f=getV<3>(Sparticles); + auto Df=getV<0>(Sparticles); + + //SurfaceDerivative_xx<2> Sdxx{Sparticles,2,rCut,grid_spacing_surf}; + //SurfaceDerivative_yy<2> Sdyy{Sparticles,2,rCut,grid_spacing_surf}; + //SurfaceDerivative_zz<2> Sdzz{Sparticles,2,rCut,grid_spacing_surf}; + Derivative_xx Sdxx{Sparticles,2,rCut}; + //std::cout<<"Dxx Done"<<std::endl; + Derivative_yy Sdyy{Sparticles,2,rCut}; + //std::cout<<"Dyy Done"<<std::endl; + Derivative_zz Sdzz{Sparticles,2,rCut}; + //std::cout<<"Dzz Done"<<std::endl; + + //Laplace_Beltrami<2> SLap{Sparticles,2,rCut,grid_spacing_surf}; + //SLap.DrawKernel<5>(Sparticles,73); + //Sdxx.DrawKernel<5>(Sparticles,0); + //Sdyy.DrawKernel<5>(Sparticles,0); + //Sdzz.DrawKernel<5>(Sparticles,0); +/* std::cout<<"SDXX:"<<std::endl; + Sdxx.checkMomenta(Sparticles); + std::cout<<"SDYY:"<<std::endl; + Sdyy.checkMomenta(Sparticles); + std::cout<<"SDZZ:"<<std::endl; + Sdzz.checkMomenta(Sparticles);*/ + + Sparticles.ghost_get<3>(); + Df=(Sdxx(f)+Sdyy(f)+Sdzz(f)); + //Df=SLap(f); + //auto it3 = Sparticles_bulk.getDomainIterator(); + double worst = 0.0; + for (int j = 0; j < bulkIds.size(); j++) { + auto p = bulkIds.get<0>(j); + //Sparticles.getProp<5>(p) = fabs(Sparticles.getProp<1>(p) - Sparticles.getProp<0>(p)); + if (fabs(Sparticles.getProp<1>(p) - Sparticles.getProp<0>(p)) > worst) { + worst = fabs(Sparticles.getProp<1>(p) - Sparticles.getProp<0>(p)); + } + } + Sparticles.deleteGhost(); + //Sparticles.write("SparticlesNoo"); + //std::cout<<"Worst: "<<worst<<std::endl; + BOOST_REQUIRE(worst < 0.03); +} + +/*BOOST_AUTO_TEST_CASE(dcpse_surface_sphere_proj) { + auto & v_cl = create_vcluster(); + timer tt; + tt.start(); + size_t n=512; + size_t n_sp=n; + // Domain + double boxP1{-1.5}, boxP2{1.5}; + double boxSize{boxP2 - boxP1}; + size_t sz[3] = {n,n,n}; + double grid_spacing{boxSize/(sz[0]-1)}; + double grid_spacing_surf=grid_spacing*30; + double rCut{2.5 * grid_spacing_surf}; + + Box<3,double> domain{{boxP1,boxP1,boxP1},{boxP2,boxP2,boxP2}}; + size_t bc[3] = {NON_PERIODIC,NON_PERIODIC,NON_PERIODIC}; + Ghost<3,double> ghost{rCut + grid_spacing/8.0}; + + constexpr int K = 1; + // particles + vector_dist_ws<3, double, aggregate<VectorS<3,double>,VectorS<3,double>,VectorS<3,double>,VectorS<3,double>,VectorS<3,double>,double>> Sparticles(0, domain,bc,ghost); + // 1. particles on the Spherical surface + double Golden_angle=M_PI * (3.0 - sqrt(5.0)); + if (v_cl.rank() == 0) { + //std::vector<Vector3f> data; + //GenerateSphere(1,data); + for(int i=1;i<n_sp;i++) + { + double y = 1.0 - (i /double(n_sp - 1.0)) * 2.0; + double radius = sqrt(1 - y * y); + double Golden_theta = Golden_angle * i; + double x = cos(Golden_theta) * radius; + double z = sin(Golden_theta) * radius; + Sparticles.add(); + Sparticles.getLastPos()[0] = x; + Sparticles.getLastPos()[1] = y; + Sparticles.getLastPos()[2] = z; + double rm=sqrt(x*x+y*y+z*z); + Sparticles.getLastProp<2>()[0] = x/rm; + Sparticles.getLastProp<2>()[1] = y/rm; + Sparticles.getLastProp<2>()[2] = z/rm; + Sparticles.getLastProp<4>()[0] = 1.0 ; + Sparticles.getLastProp<4>()[1] = std::atan2(sqrt(x*x+y*y),z); + Sparticles.getLastProp<4>()[2] = std::atan2(y,x); + if(i<=2*(K)+1) + {Sparticles.getLastSubset(1);} + else + {Sparticles.getLastSubset(0);} + } + //std::cout << "n: " << n << " - grid spacing: " << grid_spacing << " - rCut: " << rCut << "Surf Normal spacing" << grid_spacing<<std::endl; + } + + Sparticles.map(); + Sparticles.ghost_get<3>(); + + vector_dist_subset<3,double,aggregate<VectorS<3,double>,VectorS<3,double>,VectorS<3,double>,VectorS<3,double>,VectorS<3,double>,double>> Sparticles_bulk(Sparticles,0); + vector_dist_subset<3,double,aggregate<VectorS<3,double>,VectorS<3,double>,VectorS<3,double>,VectorS<3,double>,VectorS<3,double>,double>> Sparticles_boundary(Sparticles,1); + auto &bulkIds=Sparticles_bulk.getIds(); + auto &bdrIds=Sparticles_boundary.getIds(); + std::unordered_map<const lm,double,key_hash,key_equal> Alm; + //Setting max mode l_max + //Setting amplitudes to 1 + for(int l=0;l<=K;l++){ + for(int m=-l;m<=l;m++){ + Alm[std::make_tuple(l,m)]=0; + } + } + Alm[std::make_tuple(1,0)]=1; + auto it2 = Sparticles.getDomainIterator(); + while (it2.isNext()) { + auto p = it2.get(); + Point<3, double> xP = Sparticles.getProp<4>(p); + *//*double Sum=0; + for(int m=-spL;m<=spL;++m) + { + Sum+=openfpm::math::Y(spL,m,xP[1],xP[2]); + }*//* + //Sparticles.getProp<ANADF>(p) = Sum;//openfpm::math::Y(K,K,xP[1],xP[2]);openfpm::math::sumY_Scalar<K>(xP[0],xP[1],xP[2],Alm);; + Sparticles.getProp<3>(p)[0]=openfpm::math::sumY_Scalar<K>(xP[0],xP[1],xP[2],Alm); + Sparticles.getProp<3>(p)[1]=openfpm::math::sumY_Scalar<K>(xP[0],xP[1],xP[2],Alm); + Sparticles.getProp<3>(p)[2]=openfpm::math::sumY_Scalar<K>(xP[0],xP[1],xP[2],Alm); + Sparticles.getProp<1>(p)[0]=-(K)*(K+1)*openfpm::math::sumY_Scalar<K>(xP[0],xP[1],xP[2],Alm); + Sparticles.getProp<1>(p)[1]=-(K)*(K+1)*openfpm::math::sumY_Scalar<K>(xP[0],xP[1],xP[2],Alm); + Sparticles.getProp<1>(p)[2]=-(K)*(K+1)*openfpm::math::sumY_Scalar<K>(xP[0],xP[1],xP[2],Alm); + ++it2; + } + auto f=getV<3>(Sparticles); + auto Df=getV<0>(Sparticles); + + SurfaceProjectedGradient<2> SGP{Sparticles,2,rCut,grid_spacing_surf}; + + Sparticles.ghost_get<3>(); + Df=SGP(f); + //Df=SLap(f); + auto it3 = Sparticles.getDomainIterator(); + double worst = 0.0; + while (it3.isNext()) { + auto p = it3.get(); + //Sparticles.getProp<5>(p) = fabs(Sparticles.getProp<1>(p) - Sparticles.getProp<0>(p)); + if (fabs(Sparticles.getProp<1>(p)[0] - Sparticles.getProp<0>(p)[0]) > worst) { + worst = fabs(Sparticles.getProp<1>(p)[0] - Sparticles.getProp<0>(p)[0]); + } + ++it3; + } + Sparticles.deleteGhost(); + //Sparticles.write("Sparticles"); + //std::cout<<worst; + BOOST_REQUIRE(worst < 0.03); +}*/ + + + BOOST_AUTO_TEST_CASE(dcpse_surface_adaptive) { +// int rank; +// MPI_Comm_rank(MPI_COMM_WORLD, &rank); + const size_t sz[3] = {81,81,1}; + Box<3, double> box({0, 0,-5}, {0.5, 0.5,5}); + size_t bc[3] = {NON_PERIODIC, NON_PERIODIC,NON_PERIODIC}; + double spacing = box.getHigh(0) / (sz[0] - 1); + Ghost<3, double> ghost(spacing * 3.1); + double rCut = 3.1 * spacing; + BOOST_TEST_MESSAGE("Init vector_dist..."); + + vector_dist<3, double, aggregate<double,double,double,double,double,double,double[3]>> domain(0, box, bc, ghost); + + + //Init_DCPSE(domain) + BOOST_TEST_MESSAGE("Init domain..."); + + auto it = domain.getGridIterator(sz); + while (it.isNext()) { + domain.add(); + + auto key = it.get(); + double x = key.get(0) * it.getSpacing(0); + domain.getLastPos()[0] = x; + double y = key.get(1) * it.getSpacing(1); + domain.getLastPos()[1] = y; + + domain.getLastPos()[2] = 0; + + ++it; + } + + // Add multi res patch 1 + + { + const size_t sz2[3] = {40,40,1}; + Box<3,double> bx({0.25 + it.getSpacing(0)/4.0,0.25 + it.getSpacing(0)/4.0,-0.5},{sz2[0]*it.getSpacing(0)/2.0 + 0.25 + it.getSpacing(0)/4.0, sz2[1]*it.getSpacing(0)/2.0 + 0.25 + it.getSpacing(0)/4.0,0.5}); + openfpm::vector<size_t> rem; + + auto it = domain.getDomainIterator(); + + while (it.isNext()) + { + auto k = it.get(); + + Point<3,double> xp = domain.getPos(k); + + if (bx.isInside(xp) == true) + { + rem.add(k.getKey()); + } + + ++it; + } + + domain.remove(rem); + + auto it2 = domain.getGridIterator(sz2); + while (it2.isNext()) { + domain.add(); + + auto key = it2.get(); + double x = key.get(0) * spacing/2.0 + 0.25 + spacing/4.0; + domain.getLastPos()[0] = x; + double y = key.get(1) * spacing/2.0 + 0.25 + spacing/4.0; + domain.getLastPos()[1] = y; + domain.getLastPos()[2] = 0; + + ++it2; + } + } + + // Add multi res patch 2 + + { + const size_t sz2[3] = {40,40,1}; + Box<3,double> bx({0.25 + 21.0*spacing/8.0,0.25 + 21.0*spacing/8.0,-5},{sz2[0]*spacing/4.0 + 0.25 + 21.0*spacing/8.0, sz2[1]*spacing/4.0 + 0.25 + 21*spacing/8.0,5}); + openfpm::vector<size_t> rem; + + auto it = domain.getDomainIterator(); + + while (it.isNext()) + { + auto k = it.get(); + + Point<3,double> xp = domain.getPos(k); + + if (bx.isInside(xp) == true) + { + rem.add(k.getKey()); + } + + ++it; + } + + domain.remove(rem); + + auto it2 = domain.getGridIterator(sz2); + while (it2.isNext()) { + domain.add(); + auto key = it2.get(); + double x = key.get(0) * spacing/4.0 + 0.25 + 21*spacing/8.0; + domain.getLastPos()[0] = x; + double y = key.get(1) * spacing/4.0 + 0.25 + 21*spacing/8.0; + domain.getLastPos()[1] = y; + domain.getLastPos()[2] = 0; + + ++it2; + } + } + + /////////////////////// + + BOOST_TEST_MESSAGE("Sync domain across processors..."); + + domain.map(); + domain.ghost_get<0>(); + + openfpm::vector<aggregate<int>> bulk; + openfpm::vector<aggregate<int>> up_p; + openfpm::vector<aggregate<int>> dw_p; + openfpm::vector<aggregate<int>> l_p; + openfpm::vector<aggregate<int>> r_p; + openfpm::vector<aggregate<int>> ref_p; + + auto v = getV<0>(domain); + auto RHS=getV<1>(domain); + auto sol = getV<2>(domain); + auto anasol = getV<3>(domain); + auto err = getV<4>(domain); + auto DCPSE_sol=getV<5>(domain); + + // Here fill me + + Box<3, double> up({box.getLow(0) - spacing / 2.0, box.getHigh(1) - spacing / 2.0,-5}, + {box.getHigh(0) + spacing / 2.0, box.getHigh(1) + spacing / 2.0,5}); + + Box<3, double> down({box.getLow(0) - spacing / 2.0, box.getLow(1) - spacing / 2.0,-5}, + {box.getHigh(0) + spacing / 2.0, box.getLow(1) + spacing / 2.0,5}); + + Box<3, double> left({box.getLow(0) - spacing / 2.0, box.getLow(1) + spacing / 2.0,-5}, + {box.getLow(0) + spacing / 2.0, box.getHigh(1) - spacing / 2.0,5}); + + Box<3, double> right({box.getHigh(0) - spacing / 2.0, box.getLow(1) + spacing / 2.0,-5}, + {box.getHigh(0) + spacing / 2.0, box.getHigh(1) - spacing / 2.0,5}); + + openfpm::vector<Box<3, double>> boxes; + boxes.add(up); + boxes.add(down); + boxes.add(left); + boxes.add(right); + + // Create a writer and write + VTKWriter<openfpm::vector<Box<3, double>>, VECTOR_BOX> vtk_box; + vtk_box.add(boxes); + //vtk_box.write("vtk_box.vtk"); + + auto it2 = domain.getDomainIterator(); + + while (it2.isNext()) { + auto p = it2.get(); + Point<3, double> xp = domain.getPos(p); + if (up.isInside(xp) == true) { + up_p.add(); + up_p.last().get<0>() = p.getKey(); + domain.getProp<1>(p) = -2*M_PI*M_PI*sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + domain.getProp<3>(p) = sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + } else if (down.isInside(xp) == true) { + dw_p.add(); + dw_p.last().get<0>() = p.getKey(); + domain.getProp<1>(p) = -2*M_PI*M_PI*sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + domain.getProp<3>(p) = sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + + } else if (left.isInside(xp) == true) { + l_p.add(); + l_p.last().get<0>() = p.getKey(); + domain.getProp<1>(p) = -2*M_PI*M_PI*sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + domain.getProp<3>(p) = sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + + } else if (right.isInside(xp) == true) { + r_p.add(); + r_p.last().get<0>() = p.getKey(); + domain.getProp<1>(p) = -2*M_PI*M_PI*sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + domain.getProp<3>(p) = sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + + } else { + bulk.add(); + bulk.last().get<0>() = p.getKey(); + domain.getProp<1>(p) = -2*M_PI*M_PI*sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + domain.getProp<3>(p) = sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + } + domain.getProp<6>(p)[0] = 0; + domain.getProp<6>(p)[1] = 0; + domain.getProp<6>(p)[2] = 1; + + ++it2; + } + + domain.ghost_get<1,2,3>(); + SurfaceDerivative_xx<6> Dxx(domain, 2, rCut,3.9,support_options::ADAPTIVE); + +/* v=0; + auto itNNN=domain.getDomainIterator(); + while(itNNN.isNext()){ + auto p=itNNN.get().getKey(); + Dxx.DrawKernel<0,decltype(domain)>(domain,p); + domain.write_frame("Kernel",p); + v=0; + ++itNNN; + } + +*/ + //Dxx.DrawKernel<5,decltype(domain)>(domain,6161); + //domain.write_frame("Kernel",6161); + + SurfaceDerivative_yy<6> Dyy(domain, 2, rCut,3.9,support_options::ADAPTIVE); + SurfaceDerivative_zz<6> Dzz(domain, 2, rCut,3.9,support_options::ADAPTIVE); + + Dxx.save(domain,"Sdxx_test"); + Dyy.save(domain,"Sdyy_test"); + Dzz.save(domain,"Sdzz_test"); + + domain.ghost_get<2>(); + sol=Dxx(anasol)+Dyy(anasol)+Dzz(anasol); + domain.ghost_get<5>(); + + double worst1 = 0.0; + + for(int j=0;j<bulk.size();j++) + { auto p=bulk.get<0>(j); + if (fabs(domain.getProp<1>(p) - domain.getProp<2>(p)) >= worst1) { + worst1 = fabs(domain.getProp<1>(p) - domain.getProp<2>(p)); + } + domain.getProp<4>(p) = fabs(domain.getProp<1>(p) - domain.getProp<2>(p)); + + } + //std::cout << "Maximum Analytic Error: " << worst1 << std::endl; + //domain.ghost_get<4>(); + //domain.write("Robin_anasol"); + BOOST_REQUIRE(worst1 < 0.03); + + } + + + BOOST_AUTO_TEST_CASE(dcpse_surface_adaptive_load) { +// int rank; +// MPI_Comm_rank(MPI_COMM_WORLD, &rank); + const size_t sz[3] = {81,81,1}; + Box<3, double> box({0, 0,-5}, {0.5, 0.5,5}); + size_t bc[3] = {NON_PERIODIC, NON_PERIODIC,NON_PERIODIC}; + double spacing = box.getHigh(0) / (sz[0] - 1); + Ghost<3, double> ghost(spacing * 3.1); + double rCut = 3.1 * spacing; + BOOST_TEST_MESSAGE("Init vector_dist..."); + + vector_dist<3, double, aggregate<double,double,double,double,double,double,double[3]>> domain(0, box, bc, ghost); + + + //Init_DCPSE(domain) + BOOST_TEST_MESSAGE("Init domain..."); + + auto it = domain.getGridIterator(sz); + while (it.isNext()) { + domain.add(); + + auto key = it.get(); + double x = key.get(0) * it.getSpacing(0); + domain.getLastPos()[0] = x; + double y = key.get(1) * it.getSpacing(1); + domain.getLastPos()[1] = y; + + domain.getLastPos()[2] = 0; + + ++it; + } + + // Add multi res patch 1 + + { + const size_t sz2[3] = {40,40,1}; + Box<3,double> bx({0.25 + it.getSpacing(0)/4.0,0.25 + it.getSpacing(0)/4.0,-0.5},{sz2[0]*it.getSpacing(0)/2.0 + 0.25 + it.getSpacing(0)/4.0, sz2[1]*it.getSpacing(0)/2.0 + 0.25 + it.getSpacing(0)/4.0,0.5}); + openfpm::vector<size_t> rem; + + auto it = domain.getDomainIterator(); + + while (it.isNext()) + { + auto k = it.get(); + + Point<3,double> xp = domain.getPos(k); + + if (bx.isInside(xp) == true) + { + rem.add(k.getKey()); + } + + ++it; + } + + domain.remove(rem); + + auto it2 = domain.getGridIterator(sz2); + while (it2.isNext()) { + domain.add(); + + auto key = it2.get(); + double x = key.get(0) * spacing/2.0 + 0.25 + spacing/4.0; + domain.getLastPos()[0] = x; + double y = key.get(1) * spacing/2.0 + 0.25 + spacing/4.0; + domain.getLastPos()[1] = y; + domain.getLastPos()[2] = 0; + + ++it2; + } + } + + // Add multi res patch 2 + + { + const size_t sz2[3] = {40,40,1}; + Box<3,double> bx({0.25 + 21.0*spacing/8.0,0.25 + 21.0*spacing/8.0,-5},{sz2[0]*spacing/4.0 + 0.25 + 21.0*spacing/8.0, sz2[1]*spacing/4.0 + 0.25 + 21*spacing/8.0,5}); + openfpm::vector<size_t> rem; + + auto it = domain.getDomainIterator(); + + while (it.isNext()) + { + auto k = it.get(); + + Point<3,double> xp = domain.getPos(k); + + if (bx.isInside(xp) == true) + { + rem.add(k.getKey()); + } + + ++it; + } + + domain.remove(rem); + + auto it2 = domain.getGridIterator(sz2); + while (it2.isNext()) { + domain.add(); + + auto key = it2.get(); + double x = key.get(0) * spacing/4.0 + 0.25 + 21*spacing/8.0; + domain.getLastPos()[0] = x; + double y = key.get(1) * spacing/4.0 + 0.25 + 21*spacing/8.0; + domain.getLastPos()[1] = y; + domain.getLastPos()[2] = 0; + + ++it2; + } + } + + /////////////////////// + + BOOST_TEST_MESSAGE("Sync domain across processors..."); + + domain.map(); + domain.ghost_get<0>(); + + openfpm::vector<aggregate<int>> bulk; + openfpm::vector<aggregate<int>> up_p; + openfpm::vector<aggregate<int>> dw_p; + openfpm::vector<aggregate<int>> l_p; + openfpm::vector<aggregate<int>> r_p; + openfpm::vector<aggregate<int>> ref_p; + + auto v = getV<0>(domain); + auto RHS=getV<1>(domain); + auto sol = getV<2>(domain); + auto anasol = getV<3>(domain); + auto err = getV<4>(domain); + auto DCPSE_sol=getV<5>(domain); + + // Here fill me + + Box<3, double> up({box.getLow(0) - spacing / 2.0, box.getHigh(1) - spacing / 2.0,-5}, + {box.getHigh(0) + spacing / 2.0, box.getHigh(1) + spacing / 2.0,5}); + + Box<3, double> down({box.getLow(0) - spacing / 2.0, box.getLow(1) - spacing / 2.0,-5}, + {box.getHigh(0) + spacing / 2.0, box.getLow(1) + spacing / 2.0,5}); + + Box<3, double> left({box.getLow(0) - spacing / 2.0, box.getLow(1) + spacing / 2.0,-5}, + {box.getLow(0) + spacing / 2.0, box.getHigh(1) - spacing / 2.0,5}); + + Box<3, double> right({box.getHigh(0) - spacing / 2.0, box.getLow(1) + spacing / 2.0,-5}, + {box.getHigh(0) + spacing / 2.0, box.getHigh(1) - spacing / 2.0,5}); + + openfpm::vector<Box<3, double>> boxes; + boxes.add(up); + boxes.add(down); + boxes.add(left); + boxes.add(right); + + // Create a writer and write + VTKWriter<openfpm::vector<Box<3, double>>, VECTOR_BOX> vtk_box; + vtk_box.add(boxes); + //vtk_box.write("vtk_box.vtk"); + + + auto it2 = domain.getDomainIterator(); + + while (it2.isNext()) { + auto p = it2.get(); + Point<3, double> xp = domain.getPos(p); + if (up.isInside(xp) == true) { + up_p.add(); + up_p.last().get<0>() = p.getKey(); + domain.getProp<1>(p) = -2*M_PI*M_PI*sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + domain.getProp<3>(p) = sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + } else if (down.isInside(xp) == true) { + dw_p.add(); + dw_p.last().get<0>() = p.getKey(); + domain.getProp<1>(p) = -2*M_PI*M_PI*sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + domain.getProp<3>(p) = sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + + } else if (left.isInside(xp) == true) { + l_p.add(); + l_p.last().get<0>() = p.getKey(); + domain.getProp<1>(p) = -2*M_PI*M_PI*sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + domain.getProp<3>(p) = sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + + } else if (right.isInside(xp) == true) { + r_p.add(); + r_p.last().get<0>() = p.getKey(); + domain.getProp<1>(p) = -2*M_PI*M_PI*sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + domain.getProp<3>(p) = sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + + } else { + bulk.add(); + bulk.last().get<0>() = p.getKey(); + domain.getProp<1>(p) = -2*M_PI*M_PI*sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + domain.getProp<3>(p) = sin(M_PI*xp.get(0))*sin(M_PI*xp.get(1)); + } + domain.getProp<6>(p)[0] = 0; + domain.getProp<6>(p)[1] = 0; + domain.getProp<6>(p)[2] = 1; + + ++it2; + } + + domain.ghost_get<1,2,3>(); + SurfaceDerivative_xx<6> Dxx(domain, 2, rCut,3.9,support_options::LOAD); + SurfaceDerivative_yy<6> Dyy(domain, 2, rCut,3.9,support_options::LOAD); + SurfaceDerivative_zz<6> Dzz(domain, 2, rCut,3.9,support_options::LOAD); + Dxx.load(domain,"Sdxx_test"); + Dyy.load(domain,"Sdyy_test"); + Dzz.load(domain,"Sdzz_test"); + + domain.ghost_get<2>(); + sol=Dxx(anasol)+Dyy(anasol)+Dzz(anasol); + domain.ghost_get<5>(); + + double worst1 = 0.0; + + for(int j=0;j<bulk.size();j++) + { auto p=bulk.get<0>(j); + if (fabs(domain.getProp<1>(p) - domain.getProp<2>(p)) >= worst1) { + worst1 = fabs(domain.getProp<1>(p) - domain.getProp<2>(p)); + } + domain.getProp<4>(p) = fabs(domain.getProp<1>(p) - domain.getProp<2>(p)); + + } + //std::cout << "Maximum Analytic Error: " << worst1 << std::endl; + + //domain.ghost_get<4>(); + //domain.write("Robin_anasol"); + BOOST_REQUIRE(worst1 < 0.03); + + } + + +BOOST_AUTO_TEST_SUITE_END() + +#endif +#endif \ No newline at end of file diff --git a/src/DCPSE/DCPSE_op/tests/DCPSE_op_subset_test.cu b/src/DCPSE/DCPSE_op/tests/DCPSE_op_subset_test.cu new file mode 100644 index 00000000..3018f5c2 --- /dev/null +++ b/src/DCPSE/DCPSE_op/tests/DCPSE_op_subset_test.cu @@ -0,0 +1,616 @@ +/* + * DCPSE_op_test.cpp + * + * Created on: May 15, 2020 + * Author: Abhinav Singh + * + */ +#include "config.h" +#ifdef HAVE_EIGEN +#ifdef HAVE_PETSC + + +#define BOOST_TEST_DYN_LINK + +#include "util/util_debug.hpp" +#include <boost/test/unit_test.hpp> +#include <iostream> +#include "../DCPSE_op.hpp" +#include "../DCPSE_Solver.hpp" +#include "../DCPSE_Solver.cuh" +#include "Operators/Vector/vector_dist_operators.hpp" +#include "Vector/vector_dist_subset.hpp" +#include "../EqnsStruct.hpp" + +//template<typename T> +//struct Debug; + +#if 0 +BOOST_AUTO_TEST_SUITE(dcpse_op_subset_suite_tests_cu) + + BOOST_AUTO_TEST_CASE(dcpse_op_subset_tests) { + size_t edgeSemiSize = 40; + const size_t sz[2] = {2 * edgeSemiSize, 2 * edgeSemiSize}; + Box<2, double> box({0, 0}, {1.0,1.0}); + size_t bc[2] = {NON_PERIODIC, NON_PERIODIC}; + double spacing[2]; + spacing[0] = 1.0 / (sz[0] - 1); + spacing[1] = 1.0 / (sz[1] - 1); + double rCut = 3.9 * spacing[0]; + int ord = 2; + double sampling_factor = 4.0; + Ghost<2, double> ghost(rCut); + BOOST_TEST_MESSAGE("Init vector_dist..."); + double sigma2 = spacing[0] * spacing[1] / (2 * 4); + + vector_dist_ws_gpu<2, double, aggregate<double, double, double, VectorS<2, double>, VectorS<2, double>,VectorS<2, double>,double>> Particles(0, box, + bc, + ghost); + + //Init_DCPSE(Particles) + BOOST_TEST_MESSAGE("Init Particles..."); + std::mt19937 rng{6666666}; + + std::normal_distribution<> gaussian{0, sigma2}; + +// openfpm::vector<aggregate<int>> bulk; +// openfpm::vector<aggregate<int>> boundary; + + auto it = Particles.getGridIterator(sz); + size_t pointId = 0; + size_t counter = 0; + double minNormOne = 999; + while (it.isNext()) + { + Particles.add(); + auto key = it.get(); + mem_id k0 = key.get(0); + double x = k0 * spacing[0]; + Particles.getLastPos()[0] = x;//+ gaussian(rng); + mem_id k1 = key.get(1); + double y = k1 * spacing[1]; + Particles.getLastPos()[1] = y;//+gaussian(rng); + // Here fill the function value + Particles.template getLastProp<0>() = sin(Particles.getLastPos()[0]) + sin(Particles.getLastPos()[1]); + + if (k0 != 0 && k1 != 0 && k0 != sz[0] -1 && k1 != sz[1] - 1) + { +// bulk.add(); +// bulk.template get<0>(bulk.size()-1) = Particles.size_local() - 1; + + Particles.getLastSubset(0); + } + else + { +// boundary.add(); +// boundary.template get<0>(boundary.size()-1) = Particles.size_local() - 1; + Particles.getLastSubset(1); + } + + + ++counter; + ++it; + } + BOOST_TEST_MESSAGE("Sync Particles across processors..."); + + Particles.map(); + Particles.ghost_get<0>(); + + auto git = Particles.getGhostIterator(); + + while (git.isNext()) + { + auto p = git.get(); + + Particles.template getProp<0>(p) = std::numeric_limits<double>::quiet_NaN(); + + ++git; + } + + vector_dist_subset_gpu<2, double, aggregate<double, double, double, VectorS<2, double>, VectorS<2, double>,VectorS<2, double>,double>> Particles_bulk(Particles,0); + vector_dist_subset_gpu<2, double, aggregate<double, double, double, VectorS<2, double>, VectorS<2, double>,VectorS<2, double>,double>> Particles_boundary(Particles,1); + auto & boundary = Particles_boundary.getIds(); + + // move particles + auto P = getV<0>(Particles); + auto Out = getV<1>(Particles); + auto Pb = getV<2>(Particles); + auto Out_V = getV<3>(Particles); + + + auto P_bulk = getV<2>(Particles_bulk); + auto Out_bulk = getV<1>(Particles_bulk); + auto Out_V_bulk = getV<3>(Particles_bulk); + Out=10; + P_bulk = 5; + + P_bulk=Pb+Out; +// Particles.write("Test_output_subset"); + + // Create the subset + + /* Derivative_x Dx(Particles, 2, rCut); + Derivative_y Dy(Particles, 2, rCut); + Derivative_x Dx_bulk(Particles_bulk, 2, rCut); +*/ + Derivative_x_gpu Dx_bulk(Particles_bulk, 2, rCut,sampling_factor, support_options::RADIUS); + Derivative_y_gpu Dy_bulk(Particles_bulk, 2, rCut,sampling_factor, support_options::RADIUS); + + Out_bulk = Dx_bulk(P); + Out_V_bulk[0] = P + Dx_bulk(P); + Out_V_bulk[1] = Out_V[0] +Dy_bulk(P); + + // Check + bool is_nan = false; + + auto & v_cl = create_vcluster(); + if (v_cl.size() > 1) + { + auto it2 = Particles_bulk.getDomainIterator(); + while (it2.isNext()) + { + auto p = it2.get(); + + /* BOOST_REQUIRE_EQUAL(Particles_bulk.getProp<2>(p),15.0); + BOOST_REQUIRE(fabs(Particles_bulk.getProp<1>(p) - cos(Particles_bulk.getPos(p)[0])) < 0.005 ); + BOOST_REQUIRE(fabs(Particles_bulk.getProp<3>(p)[0] - Particles_bulk.getProp<0>(p) - cos(Particles_bulk.getPos(p)[0])) < 0.001 ); + BOOST_REQUIRE(fabs(Particles_bulk.getProp<3>(p)[1] - Particles_bulk.getProp<3>(p)[0] - cos(Particles_bulk.getPos(p)[1])) < 0.001 );*/ + + is_nan |= std::isnan(Particles_bulk.template getProp<1>(p)); + + // Particles_bulk.template getProp<0>(p) = fabs(Particles_bulk.getProp<1>(p) - cos(Particles_bulk.getPos(p)[0])); + + ++it2; + } + + BOOST_REQUIRE_EQUAL(is_nan,true); + } + +// P_bulk = Dx_bulk(P_bulk); <------------ Incorrect produce error message +// P = Dx_bulk(P); <------- Incorrect produce overflow + + Particles.ghost_get<0>(); + + for (int i = 0 ; i < boundary.size() ; i++) + { + Particles.template getProp<0>(boundary.template get<0>(i)) = std::numeric_limits<double>::quiet_NaN(); + } + + Particles.ghost_get<0>(); + + Out_bulk = Dx_bulk(P); + Out_V_bulk[0] = P + Dx_bulk(P); + Out_V_bulk[1] = Out_V[0] +Dy_bulk(P); + + auto it2 = Particles_bulk.getDomainIterator(); + while (it2.isNext()) + { + auto p = it2.get(); + + BOOST_REQUIRE_EQUAL(Particles_bulk.getProp<2>(p),15.0); + BOOST_REQUIRE(fabs(Particles_bulk.getProp<1>(p) - cos(Particles_bulk.getPos(p)[0])) < 0.005 ); + BOOST_REQUIRE(fabs(Particles_bulk.getProp<3>(p)[0] - Particles_bulk.getProp<0>(p) - cos(Particles_bulk.getPos(p)[0])) < 0.001 ); + BOOST_REQUIRE(fabs(Particles_bulk.getProp<3>(p)[1] - Particles_bulk.getProp<3>(p)[0] - cos(Particles_bulk.getPos(p)[1])) < 0.001 ); + + ++it2; + } + + + + } + + BOOST_AUTO_TEST_CASE(dcpse_op_subset_PC_lid) { +// int rank; +// MPI_Comm_rank(MPI_COMM_WORLD, &rank); + constexpr int x = 0; + constexpr int y = 1; + size_t edgeSemiSize = 20; + const size_t sz[2] = {2 * edgeSemiSize+1, 2 * edgeSemiSize+1}; + Box<2, double> box({0, 0}, {1.0,1.0}); + size_t bc[2] = {NON_PERIODIC, NON_PERIODIC}; + double spacing[2]; + spacing[0] = 1.0 / (sz[0] - 1); + spacing[1] = 1.0 / (sz[1] - 1); + double rCut = 3.9 * spacing[0]; + int ord = 2; + double sampling_factor = 4.0; + Ghost<2, double> ghost(rCut); + BOOST_TEST_MESSAGE("Init vector_dist..."); + double sigma2 = spacing[0] * spacing[1] / (2 * 4); + auto &v_cl = create_vcluster(); + + typedef aggregate<double, VectorS<2, double>, VectorS<2, double>,VectorS<2, double>,double,VectorS<2, double>,VectorS<2, double>,double> particle_type; + + vector_dist_ws_gpu<2, double, particle_type> Particles(0, box,bc,ghost); + + //Init_DCPSE(Particles) + BOOST_TEST_MESSAGE("Init Particles..."); + +// openfpm::vector<aggregate<int>> bulk; +// openfpm::vector<aggregate<int>> boundary; + + auto it = Particles.getGridIterator(sz); + while (it.isNext()) + { + Particles.add(); + auto key = it.get(); + mem_id k0 = key.get(0); + double xp0 = k0 * spacing[0]; + Particles.getLastPos()[0] = xp0; + mem_id k1 = key.get(1); + double yp0 = k1 * spacing[1]; + Particles.getLastPos()[1] = yp0; + ++it; + } + BOOST_TEST_MESSAGE("Sync Particles across processors..."); + Particles.map(); + Particles.ghost_get<0>(); + + auto it2 = Particles.getDomainIterator(); + while (it2.isNext()) { + auto p = it2.get(); + Point<2, double> xp = Particles.getPos(p); + if (xp[0] != 0 && xp[1] != 0 && xp[0] != 1.0 && xp[1] != 1.0) { +// bulk.add(); +// bulk.last().get<0>() = p.getKey(); + Particles.setSubset(p,0); + Particles.getProp<3>(p)[x] = 3.0; + Particles.getProp<3>(p)[y] = 3.0; + } else { +// boundary.add(); +// boundary.last().get<0>() = p.getKey(); + Particles.setSubset(p,1); + Particles.getProp<3>(p)[x] = xp[0]*xp[0]+xp[1]*xp[1]; + Particles.getProp<3>(p)[y] = xp[0]*xp[0]-2*xp[0]*xp[1]; + } + Particles.getProp<6>(p)[x] = xp[0]*xp[0]+xp[1]*xp[1]; + Particles.getProp<6>(p)[y] = xp[0]*xp[0]-2*xp[0]*xp[1]; + Particles.getProp<7>(p) = xp[0]+xp[1]-1.0; + + ++it2; + } + + vector_dist_subset_gpu<2, double, particle_type> Particles_bulk(Particles,0); + vector_dist_subset_gpu<2, double, particle_type> Particles_boundary(Particles,1); + auto & bulk = Particles_bulk.getIds(); + auto & boundary = Particles_boundary.getIds(); + + auto P = getV<0>(Particles); + auto V = getV<1>(Particles); + auto RHS = getV<2>(Particles); + auto dV = getV<3>(Particles); + auto div = getV<4>(Particles); + auto V_star = getV<5>(Particles); + + + auto P_bulk = getV<0>(Particles_bulk); + auto RHS_bulk =getV<2>(Particles_bulk); + + P_bulk = 0; + + Derivative_x_gpu Dx(Particles, 2, rCut,sampling_factor, support_options::RADIUS); + Derivative_xx_gpu Dxx(Particles, 2, rCut,sampling_factor, support_options::RADIUS); + Derivative_yy_gpu Dyy(Particles, 2, rCut,sampling_factor, support_options::RADIUS); + Derivative_y_gpu Dy(Particles, 2, rCut,sampling_factor, support_options::RADIUS); + Derivative_x_gpu Bulk_Dx(Particles_bulk, 2, rCut,sampling_factor, support_options::RADIUS); + Derivative_y_gpu Bulk_Dy(Particles_bulk, 2, rCut,sampling_factor, support_options::RADIUS); + + int n = 0, nmax = 5, ctr = 0, errctr=1, Vreset = 0; + double V_err=1; + if (Vreset == 1) { + P_bulk = 0; + P = 0; + Vreset = 0; + } + P=0; + eq_id vx,vy; + vx.setId(0); + vy.setId(1); + double sum, sum1, sum_k,V_err_eps=1e-3,V_err_old; + auto Stokes1=Dxx(V[x])+Dyy(V[x]); + auto Stokes2=Dxx(V[y])+Dyy(V[y]); + + petsc_solver<double> solverPetsc; + //solverPetsc.setSolver(KSPGMRES); + //solverPetsc.setRestart(250); + //solverPetsc.setPreconditioner(PCJACOBI); + V_star=0; + RHS[x] = dV[x]; + RHS[y] = dV[y]; + while (V_err >= V_err_eps && n <= nmax) { + Particles.ghost_get<0>(SKIP_LABELLING); + RHS_bulk[x] = dV[x] + Bulk_Dx(P); + RHS_bulk[y] = dV[y] + Bulk_Dy(P); + DCPSE_scheme_gpu<equations2d2_gpu, decltype(Particles)> Solver(Particles); + Solver.impose(Stokes1, bulk, RHS[0], vx); + Solver.impose(Stokes2, bulk, RHS[1], vy); + Solver.impose(V[x], boundary, RHS[0], vx); + Solver.impose(V[y], boundary, RHS[1], vy); + + /*auto A=Solver.getA(options_solver::STANDARD); + //A.getMatrixTriplets().save("Tripletes"); + A.write("Mat_lid");*/ + + Solver.solve_with_solver(solverPetsc, V[x], V[y]); + Particles.ghost_get<1>(SKIP_LABELLING); + div = -(Dx(V[x]) + Dy(V[y])); + P_bulk = P + div; + sum = 0; + sum1 = 0; + + for (int j = 0; j < bulk.size(); j++) { + auto p = bulk.get<0>(j); + sum += (Particles.getProp<5>(p)[0] - Particles.getProp<1>(p)[0]) * + (Particles.getProp<5>(p)[0] - Particles.getProp<1>(p)[0]) + + (Particles.getProp<5>(p)[1] - Particles.getProp<1>(p)[1]) * + (Particles.getProp<5>(p)[1] - Particles.getProp<1>(p)[1]); + sum1 += Particles.getProp<1>(p)[0] * Particles.getProp<1>(p)[0] + + Particles.getProp<1>(p)[1] * Particles.getProp<1>(p)[1]; + } + + sum = sqrt(sum); + sum1 = sqrt(sum1); + V_star = V; + v_cl.sum(sum); + v_cl.sum(sum1); + v_cl.execute(); + V_err_old = V_err; + V_err = sum / sum1; + if (V_err > V_err_old || abs(V_err_old - V_err) < 1e-8) { + errctr++; + //alpha_P -= 0.1; + } else { + errctr = 0; + } + if (n > 3) { + if (errctr > 3) { + std::cout << "CONVERGENCE LOOP BROKEN DUE TO INCREASE/VERY SLOW DECREASE IN ERROR" << std::endl; + Vreset = 1; + break; + } else { + Vreset = 0; + } + } + n++; + if (v_cl.rank() == 0) { + std::cout << "Rel l2 cgs err in V = " << V_err << " at " << n << std::endl; + } + } + double worst1 = 0.0; + double worst2 = 0.0; + + for(int j=0;j<bulk.size();j++) + { auto p=bulk.get<0>(j); + if (fabs(Particles.getProp<6>(p)[0] - Particles.getProp<1>(p)[0]) >= worst1) { + worst1 = fabs(Particles.getProp<6>(p)[0] - Particles.getProp<1>(p)[0]); + } + } + for(int j=0;j<bulk.size();j++) + { auto p=bulk.get<0>(j); + if (fabs(Particles.getProp<6>(p)[1] - Particles.getProp<1>(p)[1]) >= worst2) { + worst2 = fabs(Particles.getProp<6>(p)[1] - Particles.getProp<1>(p)[1]); + } + } + //Particles.deleteGhost(); + //Particles.write("PC_subset_lid"); + std::cout << "Maximum Analytic Error in Vx: " << worst1 << std::endl; + std::cout << "Maximum Analytic Error in Vy: " << worst2 << std::endl; + BOOST_REQUIRE(worst1 < 0.03); + BOOST_REQUIRE(worst2 < 0.03); + + } + + + BOOST_AUTO_TEST_CASE(dcpse_op_subset_PC_lid2) { +// int rank; +// MPI_Comm_rank(MPI_COMM_WORLD, &rank); + constexpr int x = 0; + constexpr int y = 1; + size_t edgeSemiSize = 20; + const size_t sz[2] = {2 * edgeSemiSize+1, 2 * edgeSemiSize+1}; + Box<2, double> box({0, 0}, {1.0,1.0}); + size_t bc[2] = {NON_PERIODIC, NON_PERIODIC}; + double spacing[2]; + spacing[0] = 1.0 / (sz[0] - 1); + spacing[1] = 1.0 / (sz[1] - 1); + double rCut = 3.9 * spacing[0]; + int ord = 2; + double sampling_factor = 4.0; + Ghost<2, double> ghost(rCut); + BOOST_TEST_MESSAGE("Init vector_dist..."); + auto &v_cl = create_vcluster(); + + vector_dist<2, double, aggregate<double, VectorS<2, double>, VectorS<2, double>,VectorS<2, double>,double,VectorS<2, double>,VectorS<2, double>,double>> Particles(0, box, + bc, + ghost); + vector_dist<2, double, aggregate<double, VectorS<2, double>, VectorS<2, double>,VectorS<2, double>,double,VectorS<2, double>,VectorS<2, double>,double>> Particles_subset(Particles.getDecomposition(), 0); + + + //Init_DCPSE(Particles) + BOOST_TEST_MESSAGE("Init Particles..."); + + openfpm::vector<aggregate<int>> bulk; + openfpm::vector<aggregate<int>> boundary; + + auto it = Particles.getGridIterator(sz); + size_t pointId = 0; + double minNormOne = 999; + while (it.isNext()) + { + Particles.add(); + auto key = it.get(); + mem_id k0 = key.get(0); + double xp0 = k0 * spacing[0]; + Particles.getLastPos()[0] = xp0; + mem_id k1 = key.get(1); + double yp0 = k1 * spacing[1]; + Particles.getLastPos()[1] = yp0; + ++it; + } + BOOST_TEST_MESSAGE("Sync Particles across processors..."); + Particles.map(); + Particles.ghost_get<0>(); + auto it2 = Particles.getDomainIterator(); + while (it2.isNext()) { + auto p = it2.get(); + Point<2, double> xp = Particles.getPos(p); + if (xp[0] != 0 && xp[1] != 0 && xp[0] != 1.0 && xp[1] != 1.0) { + bulk.add(); + bulk.last().get<0>() = p.getKey(); + Particles.getProp<3>(p)[x] = 3.0; + Particles.getProp<3>(p)[y] = 3.0; + } else { + boundary.add(); + boundary.last().get<0>() = p.getKey(); + Particles.getProp<3>(p)[x] = xp[0]*xp[0]+xp[1]*xp[1]; + Particles.getProp<3>(p)[y] = xp[0]*xp[0]-2*xp[0]*xp[1]; + } + Particles.getProp<6>(p)[x] = xp[0]*xp[0]+xp[1]*xp[1]; + Particles.getProp<6>(p)[y] = xp[0]*xp[0]-2*xp[0]*xp[1]; + Particles.getProp<7>(p) = xp[0]+xp[1]-1.0; + + ++it2; + } + + for (int i = 0; i < bulk.size(); i++) { + Particles_subset.add(); + Particles_subset.getLastPos()[0] = Particles.getPos(bulk.template get<0>(i))[0]; + Particles_subset.getLastPos()[1] = Particles.getPos(bulk.template get<0>(i))[1]; + } + Particles_subset.map(); + Particles_subset.ghost_get<0>(); + + + + auto P = getV<0>(Particles); + auto V = getV<1>(Particles); + auto RHS = getV<2>(Particles); + auto dV = getV<3>(Particles); + auto div = getV<4>(Particles); + auto V_star = getV<5>(Particles); + + auto P_bulk = getV<0>(Particles_subset); + auto Grad_bulk= getV<2>(Particles_subset); + + P_bulk = 0; + + Derivative_x Dx(Particles, 2, rCut,sampling_factor, support_options::RADIUS); + Derivative_x Bulk_Dx(Particles_subset, 2, rCut,sampling_factor, support_options::RADIUS); + Derivative_xx Dxx(Particles, 2, rCut,sampling_factor, support_options::RADIUS); + Derivative_yy Dyy(Particles, 2, rCut,sampling_factor, support_options::RADIUS); + Derivative_y Dy(Particles, 2, rCut,sampling_factor, support_options::RADIUS),Bulk_Dy(Particles_subset, 2, rCut,sampling_factor, support_options::RADIUS);; + + int n = 0, nmax = 5, ctr = 0, errctr=0, Vreset = 0; + double V_err=1; + if (Vreset == 1) { + P_bulk = 0; + P = 0; + Vreset = 0; + } + P=0; + eq_id vx,vy; + vx.setId(0); + vy.setId(1); + double sum, sum1, sum_k,V_err_eps=1e-3,V_err_old; + auto Stokes1=Dxx(V[x])+Dyy(V[x]); + auto Stokes2=Dxx(V[y])+Dyy(V[y]); + + petsc_solver<double> solverPetsc; + //solverPetsc.setSolver(KSPGMRES); + //solverPetsc.setRestart(250); + //solverPetsc.setPreconditioner(PCJACOBI); + V_star=0; + while (V_err >= V_err_eps && n <= nmax) { + RHS[x] = dV[x]; + RHS[y] = dV[y]; + Particles_subset.ghost_get<0>(SKIP_LABELLING); + + Grad_bulk[x] = Bulk_Dx(P_bulk); + Grad_bulk[y] = Bulk_Dy(P_bulk); + for (int i = 0; i < bulk.size(); i++) { + Particles.template getProp<2>(bulk.template get<0>(i))[x] += Particles_subset.getProp<2>(i)[x]; + Particles.template getProp<2>(bulk.template get<0>(i))[y] += Particles_subset.getProp<2>(i)[y]; + } + + DCPSE_scheme<equations2d2_gpu, decltype(Particles)> Solver(Particles); + Solver.impose(Stokes1, bulk, RHS[0], vx); + Solver.impose(Stokes2, bulk, RHS[1], vy); + Solver.impose(V[x], boundary, RHS[0], vx); + Solver.impose(V[y], boundary, RHS[1], vy); + Solver.solve_with_solver(solverPetsc, V[x], V[y]); + Particles.ghost_get<1>(SKIP_LABELLING); + div = -(Dx(V[x]) + Dy(V[y])); + P = P + div; + for (int i = 0; i < bulk.size(); i++) { + Particles_subset.getProp<0>(i) = Particles.template getProp<0>(bulk.template get<0>(i)); + } + sum = 0; + sum1 = 0; + + for (int j = 0; j < bulk.size(); j++) { + auto p = bulk.get<0>(j); + sum += (Particles.getProp<5>(p)[0] - Particles.getProp<1>(p)[0]) * + (Particles.getProp<5>(p)[0] - Particles.getProp<1>(p)[0]) + + (Particles.getProp<5>(p)[1] - Particles.getProp<1>(p)[1]) * + (Particles.getProp<5>(p)[1] - Particles.getProp<1>(p)[1]); + sum1 += Particles.getProp<1>(p)[0] * Particles.getProp<1>(p)[0] + + Particles.getProp<1>(p)[1] * Particles.getProp<1>(p)[1]; + } + + sum = sqrt(sum); + sum1 = sqrt(sum1); + V_star=V; + v_cl.sum(sum); + v_cl.sum(sum1); + v_cl.execute(); + V_err_old = V_err; + V_err = sum / sum1; + if (V_err > V_err_old || abs(V_err_old - V_err) < 1e-8) { + errctr++; + //alpha_P -= 0.1; + } else { + errctr = 0; + } + if (n > 3) { + if (errctr > 3) { + std::cout << "CONVERGENCE LOOP BROKEN DUE TO INCREASE/VERY SLOW DECREASE IN ERROR" << std::endl; + Vreset = 1; + break; + } else { + Vreset = 0; + } + } + n++; + if (v_cl.rank() == 0) { + std::cout << "Rel l2 cgs err in V = " << V_err << " at " << n << std::endl; + + } + } + double worst1 = 0.0; + double worst2 = 0.0; + + for(int j=0;j<bulk.size();j++) + { auto p=bulk.get<0>(j); + if (fabs(Particles.getProp<6>(p)[0] - Particles.getProp<1>(p)[0]) >= worst1) { + worst1 = fabs(Particles.getProp<6>(p)[0] - Particles.getProp<1>(p)[0]); + } + } + for(int j=0;j<bulk.size();j++) + { auto p=bulk.get<0>(j); + if (fabs(Particles.getProp<6>(p)[1] - Particles.getProp<1>(p)[1]) >= worst2) { + worst2 = fabs(Particles.getProp<6>(p)[1] - Particles.getProp<1>(p)[1]); + } + } + + std::cout << "Maximum Analytic Error in slice x: " << worst1 << std::endl; + std::cout << "Maximum Analytic Error in slice y: " << worst2 << std::endl; + BOOST_REQUIRE(worst1 < 0.03); + BOOST_REQUIRE(worst2 < 0.03); + + //Particles.write("PC_subset_lid2"); + } +BOOST_AUTO_TEST_SUITE_END() +#endif +#endif + +#endif //if 0 \ No newline at end of file diff --git a/src/DCPSE/DCPSE_op/tests/DCPSE_op_test3d.cpp b/src/DCPSE/DCPSE_op/tests/DCPSE_op_test3d.cpp index 29967137..0a28bc08 100644 --- a/src/DCPSE/DCPSE_op/tests/DCPSE_op_test3d.cpp +++ b/src/DCPSE/DCPSE_op/tests/DCPSE_op_test3d.cpp @@ -307,13 +307,13 @@ BOOST_AUTO_TEST_SUITE(dcpse_op_suite_tests3) } //Is failing on Ubuntu CI with 5 cores. Needs investigation. -/* BOOST_AUTO_TEST_CASE(Sph_harm) { + BOOST_AUTO_TEST_CASE(Sph_harm) { BOOST_REQUIRE(openfpm::math::Y(2,1,0.5,0)+0.459674<0.00001); //These would be a requirement once Boost releases their fix // //BOOST_REQUIRE(boost::math::legendre_p(0,-1,1)=?); double nu=1.0; - size_t grd_sz=20; + size_t grd_sz=13; const size_t sz[3] = {grd_sz,grd_sz,grd_sz}; Box<3, double> box({-1.0, -1.0,-1.0}, {1.0,1.0,1.0}); size_t bc[3] = {NON_PERIODIC, NON_PERIODIC, NON_PERIODIC}; @@ -523,7 +523,7 @@ BOOST_AUTO_TEST_SUITE(dcpse_op_suite_tests3) int ctr = 0, errctr, Vreset = 0; V_err = 1; n = 0; - tt.start(); + double solvetime=0; while (V_err >= V_err_eps && n <= nmax) { //Particles.write_frame("StokesSphere",n); Particles.ghost_get<0>(SKIP_LABELLING); @@ -540,7 +540,10 @@ BOOST_AUTO_TEST_SUITE(dcpse_op_suite_tests3) Solver.impose(V[0], Surface, V_B[0], vx); Solver.impose(V[1], Surface, V_B[1], vy); Solver.impose(V[2], Surface, V_B[2], vz); + tt.start(); Solver.solve_with_solver(solverPetsc, V[0], V[1], V[2]); + tt.stop(); + solvetime+=tt.getwct(); //Solver.solve(V[0],V[1],V[2]); //std::cout << "Stokes Solved" << std::endl; Particles.ghost_get<1>(); @@ -586,7 +589,7 @@ BOOST_AUTO_TEST_SUITE(dcpse_op_suite_tests3) n++; } - + //std::cout << "Total Solver time (wct):"<<solvetime<< std::endl; V_t=0; double worst=0; @@ -613,19 +616,350 @@ BOOST_AUTO_TEST_SUITE(dcpse_op_suite_tests3) v_cl.sum(worst); v_cl.sum(L2); v_cl.execute(); -*//* if (v_cl.rank() == 0) { +/* + if (v_cl.rank() == 0) { std::cout<<"Gd,Surf,Bulk Size: "<<grd_sz<<","<<Surface.size()<<","<<bulk.size()<<std::endl; std::cout << "L2_Final: " <<sqrt(L2)<<","<<sqrt(L2/(bulk.size()+Surface.size())) << std::endl; std::cout << "L_inf_Final: " << worst << std::endl; - }*//* + } std::cout << "L_inf_Final_test: " << worst; - //Particles.write("StokesSphere"); + Particles.write("StokesSphere");*/ BOOST_REQUIRE(worst<1e-3); - }*/ + } + + + BOOST_AUTO_TEST_CASE(Sph_harm_ig) { + BOOST_REQUIRE(openfpm::math::Y(2,1,0.5,0)+0.459674<0.00001); + //These would be a requirement once Boost releases their fix + // + //BOOST_REQUIRE(boost::math::legendre_p(0,-1,1)=?); + double nu=1.0; + size_t grd_sz=13; + const size_t sz[3] = {grd_sz,grd_sz,grd_sz}; + Box<3, double> box({-1.0, -1.0,-1.0}, {1.0,1.0,1.0}); + size_t bc[3] = {NON_PERIODIC, NON_PERIODIC, NON_PERIODIC}; + double spacing = 2.0 / (sz[0] - 1); + double rCut = 3.9*spacing; + double R=1.0; + Ghost<3, double> ghost(rCut); + // P V v_B RHS V_t P_anal RHS2 Polar cord + vector_dist_ws<3, double, aggregate<double,VectorS<3, double>,VectorS<3, double>,double,VectorS<3, double>,double,double,VectorS<3, double>,VectorS<3, double>,VectorS<3, double>>> Particles(0, box, bc, ghost); + + + auto &v_cl = create_vcluster(); + +// openfpm::vector<aggregate<int>> bulk; +// openfpm::vector<aggregate<int>> Surface; + + auto it = Particles.getGridIterator(sz); + while (it.isNext()) { + auto key = it.get(); + double x = -1.0+key.get(0) * it.getSpacing(0); + double y = -1.0+key.get(1) * it.getSpacing(1); + double z = -1.0+key.get(2) * it.getSpacing(2); + double r=sqrt(x*x+y*y+z*z); + if (r<R-spacing/2.0) { + Particles.add(); + Particles.getLastPos()[0] = x; + Particles.getLastPos()[1] = y; + Particles.getLastPos()[2] = z; + Particles.getLastProp<8>()[0] = r; + if (r==0){ + Particles.getLastProp<8>()[1] = 0.0; + } + else{ + Particles.getLastProp<8>()[1] = std::atan2(sqrt(x*x+y*y),z); + } + Particles.getLastProp<8>()[2] = std::atan2(y,x); + } + ++it; + } + + int n_sp=int(grd_sz)*int(grd_sz)*3; + + double Golden_angle=M_PI * (3.0 - sqrt(5.0)); + + for(int i=1;i<=n_sp;i++) + { + double y = 1.0 - (i /double(n_sp - 1.0)) * 2.0; + double radius = sqrt(1 - y * y); + double Golden_theta = Golden_angle * i; + double x = cos(Golden_theta) * radius; + double z = sin(Golden_theta) * radius; + + if (acos(z)==0 || acos(z)==M_PI){ + std::cout<<"Theta 0/Pi "<<std::endl; + continue; + } + + Particles.add(); + Particles.getLastPos()[0] = x; + Particles.getLastPos()[1] = y; + Particles.getLastPos()[2] = z; + Particles.getLastProp<8>()[0] = 1.0 ; + Particles.getLastProp<8>()[1] = std::atan2(sqrt(x*x+y*y),z); + Particles.getLastProp<8>()[2] = std::atan2(y,x); + } + Particles.map(); + Particles.ghost_get<0>(); + + std::unordered_map<const lm,double,key_hash,key_equal> Vr; + std::unordered_map<const lm,double,key_hash,key_equal> V1; + std::unordered_map<const lm,double,key_hash,key_equal> V2; + //Setting max mode l_max + constexpr int K = 2; + //Setting amplitudes to 0 + for(int l=0;l<=K;l++){ + for(int m=-l;m<=l;m++){ + Vr[std::make_tuple(l,m)]=0.0; + V1[std::make_tuple(l,m)]=0.0; + V2[std::make_tuple(l,m)]=0.0; + } + + + } + //Setting some amplitude for boundary velocity + V1[std::make_tuple(1,0)]=1.0; + + auto it2 = Particles.getDomainIterator(); + while (it2.isNext()) { + auto p = it2.get(); + Point<3, double> xp = Particles.getPos(p); + Point<3, double> xP = Particles.getProp<8>(p); + Particles.getProp<0>(p) =0; + if (xP[0]==1.0) { +// Surface.add(); +// Surface.last().get<0>() = p.getKey(); + Particles.getProp<0>(p) = 0; + std::vector<double> SVel; + SVel=openfpm::math::sumY<K>(xP[0],xP[1],xP[2],Vr,V1,V2); + double SP=openfpm::math::sumY_Scalar<K>(xP[0],xP[1],xP[2],Vr); + Particles.getProp<2>(p)[0] = SVel[0]; + Particles.getProp<2>(p)[1] = SVel[1]; + Particles.getProp<2>(p)[2] = SVel[2]; + Particles.getProp<9>(p)[0] = SVel[0]; + Particles.getProp<9>(p)[1] = SVel[1]; + Particles.getProp<9>(p)[2] = SVel[2]; + Particles.getProp<5>(p) = SP; + Particles.setSubset(p,1); + + } + else { +// bulk.add(); +// bulk.last().get<0>() = p.getKey(); + Particles.setSubset(p,0); + Particles.getProp<0>(p) = 0; + Particles.getProp<1>(p)[0] = 0; + Particles.getProp<1>(p)[1] = 0; + Particles.getProp<1>(p)[2] = 0; + } + ++it2; + } + + vector_dist_subset<3, double, aggregate<double,VectorS<3, double>,VectorS<3, double>,double,VectorS<3, double>,double,double,VectorS<3, double>,VectorS<3, double>,VectorS<3, double>>> Particles_bulk(Particles,0); + vector_dist_subset<3, double, aggregate<double,VectorS<3, double>,VectorS<3, double>,double,VectorS<3, double>,double,double,VectorS<3, double>,VectorS<3, double>,VectorS<3, double>>> Particles_surface(Particles,1); + + auto & bulk = Particles_bulk.getIds(); + auto & Surface = Particles_surface.getIds(); + + for (int j = 0; j < bulk.size(); j++) { + auto p = bulk.get<0>(j); + Point<3, double> xp = Particles.getPos(p); + Point<3, double> xP = Particles.getProp<8>(p); + + std::unordered_map<const lm,double,key_hash,key_equal> Ur; + std::unordered_map<const lm,double,key_hash,key_equal> U2; + std::unordered_map<const lm,double,key_hash,key_equal> U1; + std::unordered_map<const lm,double,key_hash,key_equal> Plm; + + for (int l = 0; l <= K; l++) { + for (int m = -l; m <= l; m++) { + auto Er= Vr.find(std::make_tuple(l,m)); + auto E1= V1.find(std::make_tuple(l,m)); + auto E2= V2.find(std::make_tuple(l,m)); + std::vector<double> Sol=openfpm::math::sph_anasol_u(nu,l,m,Er->second,E1->second,E2->second,xP[0]); + Ur[std::make_tuple(l,m)]=Sol[0]; + U1[std::make_tuple(l,m)]=Sol[1]; + U2[std::make_tuple(l,m)]=Sol[2]; + Plm[std::make_tuple(l,m)]=Sol[3]; + } + + } + + if(fabs(xP[0])>=1e-5 && xP[1]>1e-5 && (M_PI-xP[1])>=1e-5) + { + std::vector<double> SVel = openfpm::math::sumY<K>(xP[0], xP[1], xP[2], Ur, U1, U2); + Particles.getProp<9>(p)[0] = SVel[0]; + Particles.getProp<9>(p)[1] = SVel[1]; + Particles.getProp<9>(p)[2] = SVel[2]; + Particles.getProp<5>(p) = openfpm::math::sumY_Scalar<K>(xP[0], xP[1], xP[2], Plm); + } + } + + auto P = getV<0>(Particles); + auto V = getV<1>(Particles); + auto V_B = getV<2>(Particles); + V.setVarId(0); + auto DIV = getV<3>(Particles); + auto V_t = getV<4>(Particles); + auto P_anal = getV<5>(Particles); + auto temp=getV<6>(Particles); + auto RHS = getV<7>(Particles); + auto P_bulk = getV<0>(Particles_bulk); + auto RHS_bulk = getV<7>(Particles_bulk); + auto V_anal = getV<9>(Particles); + + V_t=V; + P=0; + P_bulk=0; + eq_id vx,vy,vz; + + vx.setId(0); + vy.setId(1); + vz.setId(2); + + double sampling=3.1; + double sampling2=1.9; + double rCut2=3.9*spacing; + + Derivative_x Dx(Particles, 2, rCut,sampling, support_options::RADIUS),B_Dx(Particles_bulk, 2, rCut,sampling, support_options::RADIUS); + Derivative_y Dy(Particles, 2, rCut,sampling, support_options::RADIUS),B_Dy(Particles_bulk, 2, rCut,sampling, support_options::RADIUS); + Derivative_z Dz(Particles, 2, rCut,sampling, support_options::RADIUS),B_Dz(Particles_bulk, 2, rCut,sampling, support_options::RADIUS); + Derivative_xx Dxx(Particles, 2, rCut2,sampling2,support_options::RADIUS); + Derivative_yy Dyy(Particles, 2, rCut2,sampling2,support_options::RADIUS); + Derivative_zz Dzz(Particles, 2, rCut2,sampling2,support_options::RADIUS); + + //std::cout << "DCPSE KERNELS DONE" << std::endl; + petsc_solver<double> solverPetsc; + solverPetsc.setPreconditioner(PCNONE); + timer tt; + double sum=0,sum1=0; + V_t=V; + double V_err_eps = 1e-5; + + double V_err = 1, V_err_old; + int n = 0; + int nmax = 30; + int ctr = 0, errctr, Vreset = 0; + V_err = 1; + n = 0; + double solvetime=0; + while (V_err >= V_err_eps && n <= nmax) { + //Particles.write_frame("StokesSphere",n); + Particles.ghost_get<0>(SKIP_LABELLING); + RHS_bulk[0] = B_Dx(P); + RHS_bulk[1] = B_Dy(P); + RHS_bulk[2] = B_Dz(P); + DCPSE_scheme<equations3d3, decltype(Particles)> Solver(Particles); + auto Stokes1 = nu * (Dxx(V[0])+Dyy(V[0])+Dzz(V[0])); + auto Stokes2 = nu * (Dxx(V[1])+Dyy(V[1])+Dzz(V[1])); + auto Stokes3 = nu * (Dxx(V[2])+Dyy(V[2])+Dzz(V[2])); + Solver.impose(Stokes1, bulk, RHS[0], vx); + Solver.impose(Stokes2, bulk, RHS[1], vy); + Solver.impose(Stokes3, bulk, RHS[2], vz); + Solver.impose(V[0], Surface, V_B[0], vx); + Solver.impose(V[1], Surface, V_B[1], vy); + Solver.impose(V[2], Surface, V_B[2], vz); + Solver.impose_x_ig(bulk, V[0], vx); + Solver.impose_x_ig(bulk, V[1], vy); + Solver.impose_x_ig(bulk, V[2], vz); + Solver.impose_x_ig(Surface, V[0], vx); + Solver.impose_x_ig(Surface, V[1], vy); + Solver.impose_x_ig(Surface, V[2], vz); + tt.start(); + Solver.solve_with_solver_ig(solverPetsc, V[0], V[1], V[2]); + tt.stop(); + solvetime+=tt.getwct(); + //Solver.solve(V[0],V[1],V[2]); + //std::cout << "Stokes Solved" << std::endl; + Particles.ghost_get<1>(); + DIV = -(Dx(V[0])+Dy(V[1])+Dz(V[2])); + P_bulk = P + DIV; + sum = 0; + sum1 = 0; + for (int j = 0; j < bulk.size(); j++) { + auto p = bulk.get<0>(j); + sum += (Particles.getProp<4>(p)[0] - Particles.getProp<1>(p)[0]) * + (Particles.getProp<4>(p)[0] - Particles.getProp<1>(p)[0]) + + (Particles.getProp<4>(p)[1] - Particles.getProp<1>(p)[1]) * + (Particles.getProp<4>(p)[1] - Particles.getProp<1>(p)[1]) + + (Particles.getProp<4>(p)[2] - Particles.getProp<1>(p)[2]) * + (Particles.getProp<4>(p)[2] - Particles.getProp<1>(p)[2]); + sum1 += Particles.getProp<1>(p)[0] * Particles.getProp<1>(p)[0] + + Particles.getProp<1>(p)[1] * Particles.getProp<1>(p)[1] + + Particles.getProp<1>(p)[2] * Particles.getProp<1>(p)[2]; + } + sum = sqrt(sum); + sum1 = sqrt(sum1); + v_cl.sum(sum); + v_cl.sum(sum1); + v_cl.execute(); + V_t = V; + Particles.ghost_get<1>(SKIP_LABELLING); + V_err_old = V_err; + V_err = sum / sum1; + if (V_err > V_err_old || abs(V_err_old - V_err) < 1e-14) { + errctr++; + } else { + errctr = 0; + } + if (n > 3) { + if (errctr > 1) { + std::cout << "CONVERGENCE LOOP BROKEN DUE TO INCREASE/VERY SLOW DECREASE IN ERROR" << std::endl; + Vreset = 1; + break; + } else { + Vreset = 0; + } + } + n++; + + } + //std::cout << "Total Solver time (wct):"<<solvetime<< std::endl; + + V_t=0; + + double worst=0; + double L2=0; + for (int j = 0; j < bulk.size(); j++) { + auto p = bulk.get<0>(j); + Point<3,double> xP=Particles.getProp<8>(p); + if(xP[0]>=1e-5 && xP[1]>1e-5 && (M_PI-xP[1])>=1e-5) + { + double dx=Particles.getProp<9>(p)[0] - Particles.getProp<1>(p)[0]; + double dy=Particles.getProp<9>(p)[1] - Particles.getProp<1>(p)[1]; + double dz=Particles.getProp<9>(p)[2] - Particles.getProp<1>(p)[2]; + Particles.getProp<4>(p)[0]=fabs(dx); + Particles.getProp<4>(p)[1]=fabs(dy); + Particles.getProp<4>(p)[2]=fabs(dz); + L2 += dx*dx+dy*dy+dz*dz; + if (std::max({fabs(dx),fabs(dy),fabs(dz)}) > worst) { + worst = std::max({fabs(dx),fabs(dy),fabs(dz)}); + } + + } + } + + v_cl.sum(worst); + v_cl.sum(L2); + v_cl.execute(); + + /* if (v_cl.rank() == 0) { + std::cout<<"Gd,Surf,Bulk Size: "<<grd_sz<<","<<Surface.size()<<","<<bulk.size()<<std::endl; + std::cout << "L2_Final: " <<sqrt(L2)<<","<<sqrt(L2/(bulk.size()+Surface.size())) + << std::endl; + std::cout << "L_inf_Final: " << worst + << std::endl; + } + std::cout << "L_inf_Final_test: " << worst;*/ + //Particles.write("StokesSphere"); + BOOST_REQUIRE(worst<1e-3); + + } BOOST_AUTO_TEST_SUITE_END() #endif diff --git a/src/DCPSE/DCPSE_op/tests/DCPSE_op_test_base_tests.cpp b/src/DCPSE/DCPSE_op/tests/DCPSE_op_test_base_tests.cpp index 49ba1367..47286818 100644 --- a/src/DCPSE/DCPSE_op/tests/DCPSE_op_test_base_tests.cpp +++ b/src/DCPSE/DCPSE_op/tests/DCPSE_op_test_base_tests.cpp @@ -23,6 +23,7 @@ #include "Operators/Vector/vector_dist_operators.hpp" #include "Vector/vector_dist_subset.hpp" #include "../EqnsStruct.hpp" +#include "DCPSE/DcpseInterpolation.hpp" BOOST_AUTO_TEST_SUITE(dcpse_op_suite_tests) BOOST_AUTO_TEST_CASE(dcpse_op_tests) { @@ -60,7 +61,7 @@ BOOST_AUTO_TEST_CASE(dcpse_op_tests) { domain.getLastPos()[1] = y;//+gaussian(rng); // Here fill the function value domain.template getLastProp<0>() = sin(domain.getLastPos()[0]) + sin(domain.getLastPos()[1]); - domain.template getLastProp<2>() = cos(domain.getLastPos()[0]) + cos(domain.getLastPos()[1]); + domain.template getLastProp<2>() = 2*cos(domain.getLastPos()[0]) + cos(domain.getLastPos()[1]); ++counter; ++it; } @@ -68,7 +69,6 @@ BOOST_AUTO_TEST_CASE(dcpse_op_tests) { domain.map(); domain.ghost_get<0>(); - Derivative_x Dx(domain, 2, rCut); Derivative_y Dy(domain, 2, rCut); Gradient Grad(domain, 2, rCut); @@ -76,7 +76,7 @@ BOOST_AUTO_TEST_CASE(dcpse_op_tests) { auto v = getV<1>(domain); auto P = getV<0>(domain); - v = Dx(P) + Dy(P); + v = 2*Dx(P) + Dy(P); auto it2 = domain.getDomainIterator(); double worst = 0.0; @@ -96,6 +96,288 @@ BOOST_AUTO_TEST_CASE(dcpse_op_tests) { } + BOOST_AUTO_TEST_CASE(dcpse_op_save_load) { + size_t edgeSemiSize = 40; + const size_t sz[2] = {2 * edgeSemiSize, 2 * edgeSemiSize}; + Box<2, double> box({0, 0}, {2 * M_PI, 2 * M_PI}); + size_t bc[2] = {NON_PERIODIC, NON_PERIODIC}; + double spacing[2]; + spacing[0] = 2 * M_PI / (sz[0] - 1); + spacing[1] = 2 * M_PI / (sz[1] - 1); + Ghost<2, double> ghost(spacing[0] * 3.9); + double rCut = 3.9 * spacing[0]; + BOOST_TEST_MESSAGE("Init vector_dist..."); + double sigma2 = spacing[0] * spacing[1] / (2 * 4); + + vector_dist<2, double, aggregate<double, double, double, VectorS<2, double>, VectorS<2, double>>> domain(0, box, + bc, + ghost); + + //Init_DCPSE(domain) + BOOST_TEST_MESSAGE("Init domain..."); + + auto it = domain.getGridIterator(sz); + size_t pointId = 0; + size_t counter = 0; + double minNormOne = 999; + while (it.isNext()) { + domain.add(); + auto key = it.get(); + mem_id k0 = key.get(0); + double x = k0 * spacing[0]; + domain.getLastPos()[0] = x;//+ gaussian(rng); + mem_id k1 = key.get(1); + double y = k1 * spacing[1]; + domain.getLastPos()[1] = y;//+gaussian(rng); + // Here fill the function value + domain.template getLastProp<0>() = sin(domain.getLastPos()[0]) + sin(domain.getLastPos()[1]); + domain.template getLastProp<2>() = 2*cos(domain.getLastPos()[0]) + cos(domain.getLastPos()[1]); + ++counter; + ++it; + } + BOOST_TEST_MESSAGE("Sync domain across processors..."); + + domain.map(); + domain.ghost_get<0>(); + Derivative_x Dx(domain, 2, rCut); + Derivative_y Dy(domain, 2, rCut); + auto v = getV<1>(domain); + auto v2 = getV<3>(domain); + auto P = getV<0>(domain); + v2 = 2*Dx(P) + Dy(P); + Dx.save(domain,"DX_test"); + Dy.save(domain,"DY_test"); + Derivative_x DxLoaded(domain, 2, rCut,1,support_options::LOAD); + Derivative_y DyLoaded(domain, 2, rCut,1,support_options::LOAD); + DxLoaded.load(domain,"DX_test"); + DyLoaded.load(domain,"DY_test"); + v= 2*DxLoaded(P)+DyLoaded(P); + auto it2 = domain.getDomainIterator(); + double worst = 0.0; + while (it2.isNext()) { + auto p = it2.get(); + + if (fabs(domain.getProp<1>(p) - domain.getProp<2>(p)) > worst) { + worst = fabs(domain.getProp<1>(p) - domain.getProp<2>(p)); + } + + ++it2; + } + domain.deleteGhost(); + //std::cout<<worst; + BOOST_REQUIRE(worst < 0.03); + } + + BOOST_AUTO_TEST_CASE(dcpse_op_save_load2) { + size_t edgeSemiSize = 40; + const size_t sz[2] = {2 * edgeSemiSize, 2 * edgeSemiSize}; + Box<2, double> box({0, 0}, {2 * M_PI, 2 * M_PI}); + size_t bc[2] = {NON_PERIODIC, NON_PERIODIC}; + double spacing[2]; + spacing[0] = 2 * M_PI / (sz[0] - 1); + spacing[1] = 2 * M_PI / (sz[1] - 1); + Ghost<2, double> ghost(spacing[0] * 3.9); + double rCut = 3.9 * spacing[0]; + BOOST_TEST_MESSAGE("Init vector_dist..."); + double sigma2 = spacing[0] * spacing[1] / (2 * 4); + + vector_dist<2, double, aggregate<double, double, double, VectorS<2, double>, VectorS<2, double>>> domain(0, box, + bc, + ghost); + + //Init_DCPSE(domain) + BOOST_TEST_MESSAGE("Init domain..."); + + auto it = domain.getGridIterator(sz); + size_t pointId = 0; + size_t counter = 0; + double minNormOne = 999; + while (it.isNext()) { + domain.add(); + auto key = it.get(); + mem_id k0 = key.get(0); + double x = k0 * spacing[0]; + domain.getLastPos()[0] = x;//+ gaussian(rng); + mem_id k1 = key.get(1); + double y = k1 * spacing[1]; + domain.getLastPos()[1] = y;//+gaussian(rng); + // Here fill the function value + domain.template getLastProp<0>() = sin(domain.getLastPos()[0]) + sin(domain.getLastPos()[1]); + domain.template getLastProp<2>() = 2*cos(domain.getLastPos()[0]) + cos(domain.getLastPos()[1]); + ++counter; + ++it; + } + BOOST_TEST_MESSAGE("Sync domain across processors..."); + + domain.map(); + domain.ghost_get<0>(); + auto v = getV<1>(domain); + auto v2 = getV<3>(domain); + auto P = getV<0>(domain); + Derivative_x DxLoaded(domain, 2, rCut,1,support_options::LOAD); + Derivative_y DyLoaded(domain, 2, rCut,1,support_options::LOAD); + DxLoaded.load(domain,"DX_test"); + DyLoaded.load(domain,"DY_test"); + v= 2*DxLoaded(P)+DyLoaded(P); + auto it2 = domain.getDomainIterator(); + double worst = 0.0; + while (it2.isNext()) { + auto p = it2.get(); + + if (fabs(domain.getProp<1>(p) - domain.getProp<2>(p)) > worst) { + worst = fabs(domain.getProp<1>(p) - domain.getProp<2>(p)); + } + + ++it2; + } + domain.deleteGhost(); + //std::cout<<worst; + BOOST_REQUIRE(worst < 0.03); + } + + BOOST_AUTO_TEST_CASE(dcpse_op_tests_fa) { + size_t edgeSemiSize = 40; + const size_t sz[2] = {2 * edgeSemiSize, 2 * edgeSemiSize}; + Box<2, double> box({0, 0}, {2 * M_PI, 2 * M_PI}); + size_t bc[2] = {NON_PERIODIC, NON_PERIODIC}; + double spacing[2]; + spacing[0] = 2 * M_PI / (sz[0] - 1); + spacing[1] = 2 * M_PI / (sz[1] - 1); + Ghost<2, double> ghost(spacing[0] * 3.9); + double rCut = 3.9 * spacing[0]; + BOOST_TEST_MESSAGE("Init vector_dist..."); + double sigma2 = spacing[0] * spacing[1] / (2 * 4); + + typedef vector_dist<2, double, aggregate<double, double, double, VectorS<2, double>, VectorS<2, double>>> vector_type; + + vector_type domain(0, box,bc,ghost); + + //Init_DCPSE(domain) + BOOST_TEST_MESSAGE("Init domain..."); + + auto it = domain.getGridIterator(sz); + size_t pointId = 0; + size_t counter = 0; + double minNormOne = 999; + while (it.isNext()) { + domain.add(); + auto key = it.get(); + mem_id k0 = key.get(0); + double x = k0 * spacing[0]; + domain.getLastPos()[0] = x;//+ gaussian(rng); + mem_id k1 = key.get(1); + double y = k1 * spacing[1]; + domain.getLastPos()[1] = y;//+gaussian(rng); + // Here fill the function value + domain.template getLastProp<0>() = sin(domain.getLastPos()[0]) + sin(domain.getLastPos()[1]); + domain.template getLastProp<2>() = cos(domain.getLastPos()[0]) + cos(domain.getLastPos()[1]); + ++counter; + ++it; + } + BOOST_TEST_MESSAGE("Sync domain across processors..."); + + domain.map(); + domain.ghost_get<0>(); + + PPInterpolation<vector_type,vector_type> Fx(domain,domain, 2, rCut); + auto v = getV<1>(domain); + auto P = getV<0>(domain); + + Fx.p2p<0,1>(); + auto it2 = domain.getDomainIterator(); + double worst = 0.0; + while (it2.isNext()) { + auto p = it2.get(); + if (fabs(domain.getProp<1>(p) - domain.getProp<0>(p)) > worst) { + worst = fabs(domain.getProp<1>(p) - domain.getProp<0>(p)); + } + ++it2; + } + //std::cout<<"Worst:"<<worst<<std::endl; + domain.deleteGhost(); + //domain.write_frame("test",0,0.024,BINARY); + BOOST_REQUIRE(worst < 0.03); + } + + BOOST_AUTO_TEST_CASE(dcpse_op_tests_mfa) { + size_t edgeSemiSize = 40; + const size_t sz[2] = {2 * edgeSemiSize, 2 * edgeSemiSize}; + Box<2, double> box({0, 0}, {2 * M_PI, 2 * M_PI}); + size_t bc[2] = {NON_PERIODIC, NON_PERIODIC}; + double spacing[2]; + spacing[0] = 2 * M_PI / (sz[0] - 1); + spacing[1] = 2 * M_PI / (sz[1] - 1); + Ghost<2, double> ghost(spacing[0] * 3.9); + double rCut = 3.9 * spacing[0]; + BOOST_TEST_MESSAGE("Init vector_dist..."); + double sigma2 = spacing[0] * spacing[1] / ( 4); + std::normal_distribution<> gaussian{0, sigma2}; + std::mt19937 rng{6666666}; + typedef vector_dist<2, double, aggregate<double, double, double, VectorS<2, double>, VectorS<2, double>>> vector_dist; + + vector_dist domain(0, box,bc,ghost); + vector_dist domain2(domain.getDecomposition(),0); + + //Init_DCPSE(domain) + BOOST_TEST_MESSAGE("Init domain..."); + + auto it = domain.getGridIterator(sz); + size_t pointId = 0; + size_t counter = 0; + double minNormOne = 999; + while (it.isNext()) { + domain.add(); + domain2.add(); + auto key = it.get(); + mem_id k0 = key.get(0); + mem_id k1 = key.get(1); + double x = k0 * spacing[0]; + double y = k1 * spacing[1]; + domain.getLastPos()[0] = x;//+ gaussian(rng); + domain.getLastPos()[1] = y;//+gaussian(rng); + if(x!=0 && y!=0 && x!=box.getHigh(0) && y!=box.getHigh(1)){ + domain2.getLastPos()[0] = x+ gaussian(rng); + domain2.getLastPos()[1] = y+ gaussian(rng); + } + else{ + domain2.getLastPos()[0] = x; + domain2.getLastPos()[1] = y; + } + // Here fill the function value + domain.template getLastProp<0>() = sin(domain.getLastPos()[0]) + sin(domain.getLastPos()[1]); + domain.template getLastProp<1>() = 0.0; + domain2.template getLastProp<0>() = sin(domain2.getLastPos()[0]) + sin(domain2.getLastPos()[1]); + ++counter; + ++it; + } + BOOST_TEST_MESSAGE("Sync domain across processors..."); + + domain.map(); + domain2.map(); + domain.ghost_get<0>(); + domain2.ghost_get<0>(); + + PPInterpolation<vector_dist,vector_dist> Fx(domain2,domain, 2, rCut); + //auto v = getV<1>(domain); + //auto P = getV<0>(domain); + Fx.p2p<0,1>(); + auto it2 = domain.getDomainIterator(); + double worst = 0.0; + while (it2.isNext()) { + auto p = it2.get(); + //domain.template getProp<2>(p) = domain.getProp<1>(p) - domain.getProp<0>(p); + if (fabs(domain.getProp<1>(p) - domain.getProp<0>(p)) > worst) { + worst = fabs(domain.getProp<1>(p) - domain.getProp<0>(p)); + } + ++it2; + } + //std::cout<<"Worst:"<<worst<<std::endl; + domain.deleteGhost(); + //domain.write("test1"); + //domain2.write("test2"); + BOOST_REQUIRE(worst < 0.03); + } + BOOST_AUTO_TEST_CASE(dcpse_op_test_lap) { size_t edgeSemiSize = 81; @@ -209,7 +491,7 @@ BOOST_AUTO_TEST_CASE(dcpse_op_tests) { domain.getLastPos()[0] = x;//+ gaussian(rng); mem_id k1 = key.get(1); double y = k1 * spacing[1]; - domain.getLastPos()[1] = y;//+gaussian(rng); + domain.getLastPos()[1] = y;//+gaussian(rng); // Here fill the function value domain.template getLastProp<1>()[0] = sin(domain.getLastPos()[0]) + sin(domain.getLastPos()[1]); domain.template getLastProp<1>()[1] = cos(domain.getLastPos()[0]) + cos(domain.getLastPos()[1]); diff --git a/src/DCPSE/Dcpse.cuh b/src/DCPSE/Dcpse.cuh new file mode 100644 index 00000000..637f995b --- /dev/null +++ b/src/DCPSE/Dcpse.cuh @@ -0,0 +1,1031 @@ +// +// Created by Serhii +// +#ifndef OPENFPM_PDATA_DCPSE_CUH +#define OPENFPM_PDATA_DCPSE_CUH + +#if defined(__NVCC__) && defined(HAVE_EIGEN) + +#include "Vector/vector_dist.hpp" +#include "MonomialBasis.hpp" +#include "DMatrix/EMatrix.hpp" +#include "SupportBuilder.hpp" +#include "SupportBuilder.cuh" +#include "Support.hpp" +#include "Vandermonde.hpp" +#include "DcpseDiagonalScalingMatrix.hpp" +#include "DcpseRhs.hpp" + +#include <chrono> + +// CUDA +#include <cuda.h> +#include <cuda_runtime.h> +#include <cusolverDn.h> + + +template<unsigned int dim, typename particles_type, typename T, typename monomialBasis_type, typename supportKey_type, typename localEps_type, typename calcKernels_type> +__global__ void calcKernels_gpu(particles_type, monomialBasis_type, supportKey_type, supportKey_type, T**, localEps_type, size_t, calcKernels_type); + +template<unsigned int dim, typename T, typename particles_type, typename monomialBasis_type, typename supportKey_type, typename localEps_type, typename matrix_type> +__global__ void assembleLocalMatrices_gpu( particles_type, Point<dim, unsigned int>, unsigned int, monomialBasis_type, supportKey_type, supportKey_type, supportKey_type, + T**, T**, localEps_type, localEps_type, matrix_type, size_t, size_t); + + +template<unsigned int dim, typename vector_type, class T = typename vector_type::stype> +class Dcpse_gpu { + static_assert(std::is_floating_point<T>::value, "CUBLAS supports only float or double"); + +public: + typedef typename vector_type::value_type part_type; + typedef vector_type vtype; + + #ifdef SE_CLASS1 + int update_ctr=0; + #endif + // This works in this way: + // 1) User constructs this by giving a domain of points (where one of the properties is the value of our f), + // the signature of the differential operator and the error order bound. + // 2) The machinery for assembling and solving the linear system for coefficients starts... + // 3) The user can then call an evaluate(point) method to get the evaluation of the differential operator + // on the given point. +private: + const Point<dim, unsigned int> differentialSignature; + const unsigned int differentialOrder; + MonomialBasis<dim> monomialBasis; + + // shared local support previosly built by another operator + bool isSharedSupport = false; + openfpm::vector_custd<size_t> supportRefs; // Each MPI rank has just access to the local ones + openfpm::vector_custd<size_t> kerOffsets; + openfpm::vector_custd<size_t> supportKeys1D; + + openfpm::vector_custd<T> localEps; // Each MPI rank has just access to the local ones + openfpm::vector_custd<T> localEpsInvPow; // Each MPI rank has just access to the local ones + openfpm::vector_custd<T> calcKernels; + + openfpm::vector<size_t> subsetKeyPid; + + vector_type & particles; + double rCut; + unsigned int convergenceOrder; + double supportSizeFactor; + + size_t maxSupportSize; + size_t supportKeysTotalN; + + support_options opt; + +public: +#ifdef SE_CLASS1 + int getUpdateCtr() const + { + return update_ctr; + } +#endif + + // Here we require the first element of the aggregate to be: + // 1) the value of the function f on the point + Dcpse_gpu(vector_type &particles, + Point<dim, unsigned int> differentialSignature, + unsigned int convergenceOrder, + T rCut, + T supportSizeFactor = 1, + support_options opt = support_options::N_PARTICLES) + :particles(particles), + differentialSignature(differentialSignature), + differentialOrder(Monomial<dim>(differentialSignature).order()), + monomialBasis(differentialSignature.asArray(), convergenceOrder), + maxSupportSize(0), + supportKeysTotalN(0), + opt(opt) + { + particles.ghost_get_subset(); + + if (supportSizeFactor < 1) + initializeAdaptive(particles, convergenceOrder, rCut); + else + initializeStaticSize(particles, convergenceOrder, rCut, supportSizeFactor); + } + + Dcpse_gpu(vector_type &particles, + const Dcpse_gpu<dim, vector_type, T>& other, + Point<dim, unsigned int> differentialSignature, + unsigned int convergenceOrder, + T rCut, + T supportSizeFactor = 1, + support_options opt = support_options::N_PARTICLES) + :particles(particles), opt(opt), + differentialSignature(differentialSignature), + differentialOrder(Monomial<dim>(differentialSignature).order()), + monomialBasis(differentialSignature.asArray(), convergenceOrder), + subsetKeyPid(other.subsetKeyPid), + supportRefs(other.supportRefs), + supportKeys1D(other.supportKeys1D), + kerOffsets(other.kerOffsets), + maxSupportSize(other.maxSupportSize), + supportKeysTotalN(other.supportKeysTotalN), + isSharedSupport(true) + { + particles.ghost_get_subset(); + + if (supportSizeFactor < 1) + initializeAdaptive(particles, convergenceOrder, rCut); + else + initializeStaticSize(particles, convergenceOrder, rCut, supportSizeFactor); + } + + template<unsigned int prp> + void DrawKernel(vector_type &particles, int k) + { + size_t xpK = k; + size_t kerOff = kerOffsets.get(k); + + size_t supportKeysSize = kerOffsets.get(k+1)-kerOffsets.get(k); + size_t* supportKeys = &((size_t*)supportKeys1D.getPointer())[kerOffsets.get(k)]; + + for (int i = 0; i < supportKeysSize; i++) + { + size_t xqK = supportKeys[i]; + + particles.template getProp<prp>(xqK) += calcKernels.get(kerOff+i); + } + } + + template<unsigned int prp> + void DrawKernelNN(vector_type &particles, int k) + { + size_t xpK = k; + size_t kerOff = kerOffsets.get(k); + size_t supportKeysSize = kerOffsets.get(k+1)-kerOffsets.get(k); + size_t* supportKeys = &((size_t*)supportKeys1D.getPointer())[kerOffsets.get(k)]; + + for (int i = 0; i < supportKeysSize; i++) + { + size_t xqK = supportKeys[i]; + + particles.template getProp<prp>(xqK) = 1.0; + } + } + + template<unsigned int prp> + void DrawKernel(vector_type &particles, int k, int i) + { + size_t xpK = k; + size_t kerOff = kerOffsets.get(k); + size_t supportKeysSize = kerOffsets.get(k+1)-kerOffsets.get(k); + size_t* supportKeys = &((size_t*)supportKeys1D.getPointer())[kerOffsets.get(k)]; + + for (int i = 0; i < supportKeysSize; i++) + { + size_t xqK = supportKeys[i]; + + particles.template getProp<prp>(xqK)[i] += calcKernels.get(kerOff+i); + } + } + + void checkMomenta(vector_type &particles) + { + openfpm::vector<aggregate<double,double>> momenta; + openfpm::vector<aggregate<double,double>> momenta_accu; + + momenta.resize(monomialBasis.size()); + momenta_accu.resize(monomialBasis.size()); + + for (int i = 0; i < momenta.size(); i++) + { + momenta.template get<0>(i) = 3000000000.0; + momenta.template get<1>(i) = -3000000000.0; + } + + size_t N = particles.size_local(); + for (size_t j = 0; j < N; ++j) + { + double eps = localEps.get(j); + + for (int i = 0; i < momenta.size(); i++) + { + momenta_accu.template get<0>(i) = 0.0; + } + + size_t xpK = supportRefs.get(j); + Point<dim, T> xp = particles.getPos(xpK); + + size_t kerOff = kerOffsets.get(xpK); + size_t supportKeysSize = kerOffsets.get(j+1)-kerOffsets.get(j); + size_t* supportKeys = &((size_t*)supportKeys1D.getPointer())[kerOffsets.get(j)]; + + for (int i = 0; i < supportKeysSize; i++) + { + size_t xqK = supportKeys[i]; + Point<dim, T> xq = particles.getPos(xqK); + Point<dim, T> normalizedArg = (xp - xq) / eps; + + auto ker = calcKernels.get(kerOff+i); + + int counter = 0; + size_t N = monomialBasis.getElements().size(); + + for (size_t i = 0; i < N; ++i) + { + const Monomial<dim> &m = monomialBasis.getElement(i); + + T mbValue = m.evaluate(normalizedArg); + momenta_accu.template get<0>(counter) += mbValue * ker; + + ++counter; + } + } + + for (int i = 0; i < momenta.size(); i++) + { + if (momenta_accu.template get<0>(i) < momenta.template get<0>(i)) + { + momenta.template get<0>(i) = momenta_accu.template get<0>(i); + } + + if (momenta_accu.template get<1>(i) > momenta.template get<1>(i)) + { + momenta.template get<1>(i) = momenta_accu.template get<0>(i); + } + } + } + + for (int i = 0; i < momenta.size(); i++) + { + std::cout << "MOMENTA: " << monomialBasis.getElements()[i] << "Min: " << momenta.template get<0>(i) << " " << "Max: " << momenta.template get<1>(i) << std::endl; + } + } + + /** + * Computes the value of the differential operator on all the particles, + * using the f values stored at the fValuePos position in the aggregate + * and storing the resulting Df values at the DfValuePos position in the aggregate. + * @tparam fValuePos Position in the aggregate of the f values to use. + * @tparam DfValuePos Position in the aggregate of the Df values to store. + * @param particles The set of particles to iterate over. + */ + template<unsigned int fValuePos, unsigned int DfValuePos> + void computeDifferentialOperator(vector_type &particles) { + char sign = 1; + if (differentialOrder % 2 == 0) { + sign = -1; + } + + size_t N = particles.size_local(); + for (size_t j = 0; j < N; ++j) + { + double epsInvPow = localEpsInvPow.get(j); + + T Dfxp = 0; + size_t xpK = supportRefs.get(j); + Point<dim, typename vector_type::stype> xp = particles.getPos(xpK); + T fxp = sign * particles.template getProp<fValuePos>(xpK); + size_t kerOff = kerOffsets.get(xpK); + + size_t supportKeysSize = kerOffsets.get(j+1)-kerOffsets.get(j); + size_t* supportKeys = &((size_t*)supportKeys1D.getPointer())[kerOffsets.get(j)]; + + for (int i = 0; i < supportKeysSize; i++) + { + size_t xqK = supportKeys[i]; + T fxq = particles.template getProp<fValuePos>(xqK); + + Dfxp += (fxq + fxp) * calcKernels.get(kerOff+i); + } + Dfxp *= epsInvPow; + + particles.template getProp<DfValuePos>(xpK) = Dfxp; + } + } + + + /*! \brief Get the number of neighbours + * + * \return the number of neighbours + * + */ + inline int getNumNN(const vect_dist_key_dx &key) + { + return kerOffsets.get(key.getKey()+1)-kerOffsets.get(key.getKey()); + } + + /*! \brief Get the coefficent j (Neighbour) of the particle key + * + * \param key particle + * \param j neighbour + * + * \return the coefficent + * + */ + inline T getCoeffNN(const vect_dist_key_dx &key, int j) + { + size_t base = kerOffsets.get(key.getKey()); + return calcKernels.get(base + j); + } + + /*! \brief Get the number of neighbours + * + * \return the number of neighbours + * + */ + inline size_t getIndexNN(const vect_dist_key_dx &key, int j) + { + size_t* supportKeys = &((size_t*)supportKeys1D.getPointer())[kerOffsets.get(key.getKey())]; + return supportKeys[j]; + } + + + inline T getSign() + { + T sign = 1.0; + if (differentialOrder % 2 == 0) { + sign = -1; + } + + return sign; + } + + T getEpsilonInvPrefactor(const vect_dist_key_dx &key) + { + return localEpsInvPow.get(key.getKey()); + } + + /** + * Computes the value of the differential operator for one particle for o1 representing a scalar + * + * \param key particle + * \param o1 source property + * \return the selected derivative + * + */ + template<typename op_type> + auto computeDifferentialOperator(const vect_dist_key_dx &key, + op_type &o1) -> decltype(is_scalar<std::is_fundamental<decltype(o1.value( + key))>::value>::analyze(key, o1)) { + + typedef decltype(is_scalar<std::is_fundamental<decltype(o1.value(key))>::value>::analyze(key, o1)) expr_type; + + T sign = 1.0; + if (differentialOrder % 2 == 0) { + sign = -1; + } + + size_t localKey = subsetKeyPid.get(key.getKey()); + double eps = localEps.get(localKey); + double epsInvPow = localEpsInvPow.get(localKey); + + auto &particles = o1.getVector(); + +#ifdef SE_CLASS1 + if(particles.getMapCtr()!=this->getUpdateCtr()) + { + std::cerr<<__FILE__<<":"<<__LINE__<<" Error: You forgot a DCPSE operator update after map."<<std::endl; + } +#endif + + expr_type Dfxp = 0; + size_t xpK = supportRefs.get(localKey); + Point<dim, T> xp = particles.getPos(xpK); + expr_type fxp = sign * o1.value(key); + size_t kerOff = kerOffsets.get(xpK); + + size_t supportKeysSize = kerOffsets.get(localKey+1)-kerOffsets.get(localKey); + size_t* supportKeys = &((size_t*)supportKeys1D.getPointer())[kerOffsets.get(localKey)]; + + for (int i = 0; i < supportKeysSize; i++) + { + size_t xqK = supportKeys[i]; + expr_type fxq = o1.value(vect_dist_key_dx(xqK)); + Dfxp = Dfxp + (fxq + fxp) * calcKernels.get(kerOff+i); + } + Dfxp = Dfxp * epsInvPow; + + // T trueDfxp = particles.template getProp<2>(xpK); + // Store Dfxp in the right position + return Dfxp; + } + + /** + * Computes the value of the differential operator for one particle for o1 representing a vector + * + * \param key particle + * \param o1 source property + * \param i component + * \return the selected derivative + * + */ + template<typename op_type> + auto computeDifferentialOperator(const vect_dist_key_dx &key, + op_type &o1, + int i) -> typename decltype(is_scalar<std::is_fundamental<decltype(o1.value( + key))>::value>::analyze(key, o1))::coord_type { + + typedef typename decltype(is_scalar<std::is_fundamental<decltype(o1.value(key))>::value>::analyze(key, o1))::coord_type expr_type; + + T sign = 1.0; + if (differentialOrder % 2 == 0) { + sign = -1; + } + + size_t localKey = subsetKeyPid.get(key.getKey()); + double eps = localEps.get(localKey); + double epsInvPow = localEpsInvPow(localKey); + + auto &particles = o1.getVector(); + +#ifdef SE_CLASS1 + if(particles.getMapCtr()!=this->getUpdateCtr()) + { + std::cerr<<__FILE__<<":"<<__LINE__<<" Error: You forgot a DCPSE operator update after map."<<std::endl; + } +#endif + + expr_type Dfxp = 0; + size_t xpK = supportRefs.get(localKey); + + Point<dim, T> xp = particles.getPos(xpK); + expr_type fxp = sign * o1.value(key)[i]; + size_t kerOff = kerOffsets.get(xpK); + size_t supportKeysSize = kerOffsets.get(localKey+1)-kerOffsets.get(localKey); + size_t* supportKeys = &((size_t*)supportKeys1D.getPointer())[kerOffsets.get(localKey)]; + + for (int j = 0; j < supportKeysSize; j++) + { + size_t xqK = supportKeys[j]; + expr_type fxq = o1.value(vect_dist_key_dx(xqK))[i]; + Dfxp = Dfxp + (fxq + fxp) * calcKernels.get(kerOff+j); + } + Dfxp = Dfxp * epsInvPow; + // + //T trueDfxp = particles.template getProp<2>(xpK); + // Store Dfxp in the right position + return Dfxp; + } + + void initializeUpdate(vector_type &particles) + { +#ifdef SE_CLASS1 + update_ctr=particles.getMapCtr(); +#endif + + kerOffsets.clear(); + supportKeys1D.clear(); + supportRefs.clear(); + localEps.clear(); + localEpsInvPow.clear(); + calcKernels.clear(); + subsetKeyPid.clear(); + + initializeStaticSize(particles, convergenceOrder, rCut, supportSizeFactor); + } + +private: + + void initializeAdaptive(vector_type &particles, + unsigned int convergenceOrder, + double rCut) { + // Still need to be tested +#ifdef SE_CLASS1 + this->update_ctr=particles.getMapCtr(); +#endif + + if (!isSharedSupport) { + subsetKeyPid.resize(particles.size_local_orig()); + supportRefs.resize(particles.size_local()); + } + localEps.resize(particles.size_local()); + localEpsInvPow.resize(particles.size_local()); + kerOffsets.resize(particles.size_local()+1); + + const T condVTOL = 1e2; + + if (!isSharedSupport) { + SupportBuilder<vector_type,vector_type> supportBuilder(particles,particles, differentialSignature, rCut,differentialOrder == 0); + unsigned int requiredSupportSize = monomialBasis.size(); + // need to resize supportKeys1D to yet unknown supportKeysTotalN + // add() takes too long + openfpm::vector<openfpm::vector<size_t>> tempSupportKeys(supportRefs.size()); + + auto it = particles.getDomainIterator(); + while (it.isNext()) { + auto key_o = particles.getOriginKey(it.get()); + subsetKeyPid.get(key_o.getKey()) = it.get().getKey(); + + Support support = supportBuilder.getSupport(it, requiredSupportSize, opt); + supportRefs.get(key_o.getKey()) = key_o.getKey(); + tempSupportKeys.get(key_o.getKey()) = support.getKeys(); + kerOffsets.get(key_o.getKey()) = supportKeysTotalN; + + if (maxSupportSize < support.size()) + maxSupportSize = support.size(); + supportKeysTotalN += support.size(); + + EMatrix<T, Eigen::Dynamic, Eigen::Dynamic> V(support.size(), monomialBasis.size()); + // Vandermonde matrix computation + Vandermonde<dim, T, EMatrix<T, Eigen::Dynamic, Eigen::Dynamic>> + vandermonde(support, monomialBasis, particles,particles); + vandermonde.getMatrix(V); + + T condV = conditionNumber(V, condVTOL); + T eps = vandermonde.getEps(); + + if (condV > condVTOL) { + requiredSupportSize *= 2; + std::cout << "INFO: Increasing, requiredSupportSize = " << requiredSupportSize << std::endl; // debug + continue; + } else requiredSupportSize = monomialBasis.size(); + + ++it; + } + + kerOffsets.get(supportRefs.size()) = supportKeysTotalN; + supportKeys1D.resize(supportKeysTotalN); + + size_t offset = 0; + for (size_t i = 0; i < tempSupportKeys.size(); ++i) + for (size_t j = 0; j < tempSupportKeys.get(i).size(); ++j, ++offset) + supportKeys1D.get(offset) = tempSupportKeys.get(i).get(j); + } + + kerOffsets.hostToDevice(); supportKeys1D.hostToDevice(); + assembleLocalMatrices(cublasDgetrfBatched, cublasDtrsmBatched); + } + + void initializeAdaptive(vector_type &particles, + unsigned int convergenceOrder, + float rCut) { + // Still need to be tested +#ifdef SE_CLASS1 + this->update_ctr=particles.getMapCtr(); +#endif + + if (!isSharedSupport) { + subsetKeyPid.resize(particles.size_local_orig()); + supportRefs.resize(particles.size_local()); + } + localEps.resize(particles.size_local()); + localEpsInvPow.resize(particles.size_local()); + kerOffsets.resize(particles.size_local()+1); + + const T condVTOL = 1e2; + + if (!isSharedSupport) { + SupportBuilder<vector_type,vector_type> supportBuilder(particles, particles, differentialSignature, rCut, differentialOrder == 0); + unsigned int requiredSupportSize = monomialBasis.size(); + // need to resize supportKeys1D to yet unknown supportKeysTotalN + // add() takes too long + openfpm::vector<openfpm::vector<size_t>> tempSupportKeys(supportRefs.size()); + + auto it = particles.getDomainIterator(); + while (it.isNext()) { + auto key_o = particles.getOriginKey(it.get()); + subsetKeyPid.get(key_o.getKey()) = it.get().getKey(); + + Support support = supportBuilder.getSupport(it, requiredSupportSize, opt); + supportRefs.get(key_o.getKey()) = key_o.getKey(); + tempSupportKeys.get(key_o.getKey()) = support.getKeys(); + kerOffsets.get(key_o.getKey()) = supportKeysTotalN; + + if (maxSupportSize < support.size()) + maxSupportSize = support.size(); + supportKeysTotalN += support.size(); + + EMatrix<T, Eigen::Dynamic, Eigen::Dynamic> V(support.size(), monomialBasis.size()); + // Vandermonde matrix computation + Vandermonde<dim, T, EMatrix<T, Eigen::Dynamic, Eigen::Dynamic>> + vandermonde(support, monomialBasis, particles); + vandermonde.getMatrix(V); + + T condV = conditionNumber(V, condVTOL); + T eps = vandermonde.getEps(); + + if (condV > condVTOL) { + requiredSupportSize *= 2; + std::cout << "INFO: Increasing, requiredSupportSize = " << requiredSupportSize << std::endl; // debug + continue; + } else requiredSupportSize = monomialBasis.size(); + + ++it; + } + + kerOffsets.get(supportRefs.size()) = supportKeysTotalN; + supportKeys1D.resize(supportKeysTotalN); + + size_t offset = 0; + for (size_t i = 0; i < tempSupportKeys.size(); ++i) + for (size_t j = 0; j < tempSupportKeys.get(i).size(); ++j, ++offset) + supportKeys1D.get(offset) = tempSupportKeys.get(i).get(j); + } + + kerOffsets.hostToDevice(); supportKeys1D.hostToDevice(); + assembleLocalMatrices(cublasSgetrfBatched, cublasStrsmBatched); + } + + void initializeStaticSize(vector_type &particles, + unsigned int convergenceOrder, + double rCut, + double supportSizeFactor) { +#ifdef SE_CLASS1 + this->update_ctr=particles.getMapCtr(); +#endif + this->rCut=rCut; + this->supportSizeFactor=supportSizeFactor; + this->convergenceOrder=convergenceOrder; + + if (!isSharedSupport) { + subsetKeyPid.resize(particles.size_local_orig()); + supportRefs.resize(particles.size_local()); + } + localEps.resize(particles.size_local()); + localEpsInvPow.resize(particles.size_local()); + +std::chrono::high_resolution_clock::time_point t1 = std::chrono::high_resolution_clock::now(); + auto it = particles.getDomainIterator(); + + if (opt==support_options::RADIUS) { + if (!isSharedSupport) { + while (it.isNext()) { + auto key_o = it.get(); subsetKeyPid.get(particles.getOriginKey(key_o).getKey()) = key_o.getKey(); + supportRefs.get(key_o.getKey()) = key_o.getKey(); + ++it; + } + + SupportBuilderGPU<vector_type> supportBuilder(particles, rCut); + supportBuilder.getSupport(supportRefs.size(), kerOffsets, supportKeys1D, maxSupportSize, supportKeysTotalN); + } + } else { + if (!isSharedSupport){ + openfpm::vector<openfpm::vector<size_t>> tempSupportKeys(supportRefs.size()); + size_t requiredSupportSize = monomialBasis.size() * supportSizeFactor; + // need to resize supportKeys1D to yet unknown supportKeysTotalN + // add() takes too long + SupportBuilder<vector_type,vector_type> supportBuilder(particles, particles, differentialSignature, rCut, differentialOrder == 0); + kerOffsets.resize(supportRefs.size()+1); + + while (it.isNext()) { + auto key_o = it.get(); subsetKeyPid.get(particles.getOriginKey(key_o).getKey()) = key_o.getKey(); + + Support support = supportBuilder.getSupport(it, requiredSupportSize, opt); + supportRefs.get(key_o.getKey()) = key_o.getKey(); + tempSupportKeys.get(key_o.getKey()) = support.getKeys(); + kerOffsets.get(key_o.getKey()) = supportKeysTotalN; + + if (maxSupportSize < support.size()) maxSupportSize = support.size(); + supportKeysTotalN += support.size(); + ++it; + } + + kerOffsets.get(supportRefs.size()) = supportKeysTotalN; + supportKeys1D.resize(supportKeysTotalN); + + size_t offset = 0; + for (size_t i = 0; i < tempSupportKeys.size(); ++i) + for (size_t j = 0; j < tempSupportKeys.get(i).size(); ++j, ++offset) + supportKeys1D.get(offset) = tempSupportKeys.get(i).get(j); + } + + kerOffsets.hostToDevice(); supportKeys1D.hostToDevice(); + } + +std::chrono::high_resolution_clock::time_point t2 = std::chrono::high_resolution_clock::now(); +std::chrono::duration<double> time_span2 = std::chrono::duration_cast<std::chrono::duration<double>>(t2 - t1); +std::cout << "Support building took " << time_span2.count() * 1000. << " milliseconds." << std::endl; + + assembleLocalMatrices(cublasDgetrfBatched, cublasDtrsmBatched); + } + + // ad hoc solution to template specialization for float/double + void initializeStaticSize(vector_type &particles, + unsigned int convergenceOrder, + float rCut, + float supportSizeFactor) { +#ifdef SE_CLASS1 + this->update_ctr=particles.getMapCtr(); +#endif + this->rCut=rCut; + this->supportSizeFactor=supportSizeFactor; + this->convergenceOrder=convergenceOrder; + + if (!isSharedSupport) { + subsetKeyPid.resize(particles.size_local_orig()); + supportRefs.resize(particles.size_local()); + } + localEps.resize(particles.size_local()); + localEpsInvPow.resize(particles.size_local()); + +std::chrono::high_resolution_clock::time_point t1 = std::chrono::high_resolution_clock::now(); + auto it = particles.getDomainIterator(); + + if (opt==support_options::RADIUS) { + if (!isSharedSupport) { + while (it.isNext()) { + auto key_o = it.get(); subsetKeyPid.get(particles.getOriginKey(key_o).getKey()) = key_o.getKey(); + supportRefs.get(key_o.getKey()) = key_o.getKey(); + ++it; + } + + SupportBuilderGPU<vector_type> supportBuilder(particles, rCut); + supportBuilder.getSupport(supportRefs.size(), kerOffsets, supportKeys1D, maxSupportSize, supportKeysTotalN); + } + } else { + if (!isSharedSupport){ + openfpm::vector<openfpm::vector<size_t>> tempSupportKeys(supportRefs.size()); + size_t requiredSupportSize = monomialBasis.size() * supportSizeFactor; + // need to resize supportKeys1D to yet unknown supportKeysTotalN + // add() takes too long + SupportBuilder<vector_type,vector_type> supportBuilder(particles, particles, differentialSignature, rCut, differentialOrder == 0); + kerOffsets.resize(supportRefs.size()+1); + + while (it.isNext()) { + auto key_o = it.get(); subsetKeyPid.get(particles.getOriginKey(key_o).getKey()) = key_o.getKey(); + + Support support = supportBuilder.getSupport(it, requiredSupportSize, opt); + supportRefs.get(key_o.getKey()) = key_o.getKey(); + tempSupportKeys.get(key_o.getKey()) = support.getKeys(); + kerOffsets.get(key_o.getKey()) = supportKeysTotalN; + + if (maxSupportSize < support.size()) maxSupportSize = support.size(); + supportKeysTotalN += support.size(); + ++it; + } + + kerOffsets.get(supportRefs.size()) = supportKeysTotalN; + supportKeys1D.resize(supportKeysTotalN); + + size_t offset = 0; + for (size_t i = 0; i < tempSupportKeys.size(); ++i) + for (size_t j = 0; j < tempSupportKeys.get(i).size(); ++j, ++offset) + supportKeys1D.get(offset) = tempSupportKeys.get(i).get(j); + } + + kerOffsets.hostToDevice(); supportKeys1D.hostToDevice(); + } + +std::chrono::high_resolution_clock::time_point t2 = std::chrono::high_resolution_clock::now(); +std::chrono::duration<double> time_span2 = std::chrono::duration_cast<std::chrono::duration<double>>(t2 - t1); +std::cout << "Support building took " << time_span2.count() * 1000. << " milliseconds." << std::endl; + + assembleLocalMatrices(cublasSgetrfBatched, cublasStrsmBatched); + } + + template<typename cublasLUDec_type, typename cublasTriangSolve_type> + void assembleLocalMatrices(cublasLUDec_type cublasLUDecFunc, cublasTriangSolve_type cublasTriangSolveFunc) { + std::chrono::high_resolution_clock::time_point t3 = std::chrono::high_resolution_clock::now(); + + // move monomial basis to kernel + auto& basis = monomialBasis.getBasis(); + openfpm::vector_custd<Monomial_gpu<dim>> basisTemp(basis.begin(), basis.end()); + basisTemp.template hostToDevice(); + MonomialBasis<dim, aggregate<Monomial_gpu<dim>>, openfpm::vector_custd_ker, memory_traits_inte> monomialBasisKernel(basisTemp.toKernel()); + + size_t numMatrices = supportRefs.size(); + size_t monomialBasisSize = monomialBasis.size(); + + int numSMs, numSMsMult = 1; + cudaDeviceGetAttribute(&numSMs, cudaDevAttrMultiProcessorCount, 0); + size_t numThreads = numSMs*numSMsMult*256; + std::cout << "numThreads " << numThreads << " numMatrices " << numMatrices << std::endl; + + // B is an intermediate matrix + openfpm::vector_custd<T> BMat(numThreads * maxSupportSize * monomialBasisSize); + // allocate device space for A, b + openfpm::vector_custd<T> AMat(numMatrices*monomialBasisSize*monomialBasisSize); + openfpm::vector_custd<T> bVec(numMatrices*monomialBasisSize); + + // create array of pointers to pass T** pointers to cublas subroutines + openfpm::vector_custd<T*> AMatPointers(numMatrices); + openfpm::vector_custd<T*> bVecPointers(numMatrices); + + auto AMatKernel = AMat.toKernel(); T* AMatKernelPointer = (T*) AMatKernel.getPointer(); + for (size_t i = 0; i < numMatrices; i++) AMatPointers.get(i) = AMatKernelPointer + i*monomialBasisSize*monomialBasisSize; + + auto bVecKernel = bVec.toKernel(); T* bVecKernelPointer = (T*) bVecKernel.getPointer(); + for (size_t i = 0; i < numMatrices; i++) bVecPointers.get(i) = bVecKernelPointer + i*monomialBasisSize; + + std::chrono::high_resolution_clock::time_point t1 = std::chrono::high_resolution_clock::now(); + std::chrono::duration<double> time_span0 = std::chrono::duration_cast<std::chrono::duration<double>>(t1 - t3); + std::cout << "Preallocation took " << time_span0.count() * 1000. << " milliseconds." << std::endl; + + // assemble local matrices on GPU + std::chrono::high_resolution_clock::time_point t9 = std::chrono::high_resolution_clock::now(); + particles.hostToDevicePos(); + supportRefs.template hostToDevice(); + AMatPointers.template hostToDevice(); + bVecPointers.template hostToDevice(); + + auto AMatPointersKernel = AMatPointers.toKernel(); T** AMatPointersKernelPointer = (T**) AMatPointersKernel.getPointer(); + auto bVecPointersKernel = bVecPointers.toKernel(); T** bVecPointersKernelPointer = (T**) bVecPointersKernel.getPointer(); + + assembleLocalMatrices_gpu<<<numSMsMult*numSMs, 256>>>(particles.toKernel(), differentialSignature, differentialOrder, monomialBasisKernel, supportRefs.toKernel(), kerOffsets.toKernel(), supportKeys1D.toKernel(), + AMatPointersKernelPointer, bVecPointersKernelPointer, localEps.toKernel(), localEpsInvPow.toKernel(), BMat.toKernel(), numMatrices, maxSupportSize); + + localEps.template deviceToHost(); + localEpsInvPow.template deviceToHost(); + + std::chrono::high_resolution_clock::time_point t10 = std::chrono::high_resolution_clock::now(); + std::chrono::duration<double> time_span3 = std::chrono::duration_cast<std::chrono::duration<double>>(t10 - t9); + std::cout << "assembleLocalMatrices_gpu took " << time_span3.count() * 1000. << " milliseconds." << std::endl; + + //cublas lu solver + std::chrono::high_resolution_clock::time_point t7 = std::chrono::high_resolution_clock::now(); + cublasHandle_t cublas_handle; cublasCreate_v2(&cublas_handle); + + openfpm::vector_custd<int> infoArray(numMatrices); auto infoArrayKernel = infoArray.toKernel(); + cublasLUDecFunc(cublas_handle, monomialBasisSize, AMatPointersKernelPointer, monomialBasisSize, NULL, (int*) infoArrayKernel.getPointer(), numMatrices); + cudaDeviceSynchronize(); + + infoArray.template deviceToHost(); + for (size_t i = 0; i < numMatrices; i++) + if (infoArray.get(i) != 0) fprintf(stderr, "Factorization of matrix %d Failed: Matrix may be singular\n", i); + + const double alpha = 1.f; + cublasTriangSolveFunc(cublas_handle, CUBLAS_SIDE_LEFT, CUBLAS_FILL_MODE_LOWER, CUBLAS_OP_N, CUBLAS_DIAG_UNIT, monomialBasisSize, 1, &alpha, AMatPointersKernelPointer, monomialBasisSize, bVecPointersKernelPointer, monomialBasisSize, numMatrices); + cublasTriangSolveFunc(cublas_handle, CUBLAS_SIDE_LEFT, CUBLAS_FILL_MODE_UPPER, CUBLAS_OP_N, CUBLAS_DIAG_NON_UNIT, monomialBasisSize, 1, &alpha, AMatPointersKernelPointer, monomialBasisSize, bVecPointersKernelPointer, monomialBasisSize, numMatrices); + cudaDeviceSynchronize(); + + std::chrono::high_resolution_clock::time_point t8 = std::chrono::high_resolution_clock::now(); + std::chrono::duration<double> time_span4 = std::chrono::duration_cast<std::chrono::duration<double>>(t8 - t7); + std::cout << "cublas took " << time_span4.count() * 1000. << " milliseconds." << std::endl; + + std::chrono::high_resolution_clock::time_point t5 = std::chrono::high_resolution_clock::now(); + // populate the calcKernels on GPU + calcKernels.resize(supportKeysTotalN); + localEps.template hostToDevice(); + auto it2 = particles.getDomainIteratorGPU(512); + calcKernels_gpu<dim><<<it2.wthr,it2.thr>>>(particles.toKernel(), monomialBasisKernel, kerOffsets.toKernel(), supportKeys1D.toKernel(), bVecPointersKernelPointer, localEps.toKernel(), numMatrices, calcKernels.toKernel()); + calcKernels.template deviceToHost(); + + std::chrono::high_resolution_clock::time_point t6 = std::chrono::high_resolution_clock::now(); + std::chrono::duration<double> time_span5 = std::chrono::duration_cast<std::chrono::duration<double>>(t6 - t5); + std::cout << "calcKernels_gpu took " << time_span5.count() * 1000. << " milliseconds." << std::endl; + + // free the resources + cublasDestroy_v2(cublas_handle); + + std::chrono::high_resolution_clock::time_point t4 = std::chrono::high_resolution_clock::now(); + std::chrono::duration<double> time_span = std::chrono::duration_cast<std::chrono::duration<double>>(t4 - t3); + std::cout << "Matrices inverse took " << time_span.count() * 1000. << " milliseconds." << std::endl; + } + + T computeKernel(Point<dim, T> x, EMatrix<T, Eigen::Dynamic, 1> & a) const { + unsigned int counter = 0; + T res = 0, expFactor = exp(-norm2(x)); + + size_t N = monomialBasis.getElements().size(); + for (size_t i = 0; i < N; ++i) + { + const Monomial<dim> &m = monomialBasis.getElement(i); + + T coeff = a(counter); + T mbValue = m.evaluate(x); + res += coeff * mbValue * expFactor; + ++counter; + } + return res; + } + + + // template <unsigned int a_dim> + // T computeKernel(Point<dim, T> x, const T (& a) [a_dim]) const { + T computeKernel(Point<dim, T> x, const T* a) const { + unsigned int counter = 0; + T res = 0, expFactor = exp(-norm2(x)); + + size_t N = monomialBasis.getElements().size(); + for (size_t i = 0; i < N; ++i) + { + const Monomial<dim> &m = monomialBasis.getElement(i); + + T coeff = a[counter]; + T mbValue = m.evaluate(x); + res += coeff * mbValue * expFactor; + ++counter; + } + return res; + } + + + T conditionNumber(const EMatrix<T, -1, -1> &V, T condTOL) const { + std::cout << "conditionNumber" << std::endl; + Eigen::JacobiSVD<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>> svd(V); + T cond = svd.singularValues()(0) + / svd.singularValues()(svd.singularValues().size() - 1); + if (cond > condTOL) { + std::cout + << "WARNING: cond(V) = " << cond + << " is greater than TOL = " << condTOL + << ", numPoints(V) = " << V.rows() + << std::endl; // debug + } + return cond; + } + +}; + + +template<unsigned int dim, typename T, typename particles_type, typename monomialBasis_type, typename supportKey_type, typename localEps_type, typename matrix_type> +__global__ void assembleLocalMatrices_gpu( + particles_type particles, Point<dim, unsigned int> differentialSignature, unsigned int differentialOrder, monomialBasis_type monomialBasis, + supportKey_type supportRefs, supportKey_type kerOffsets, supportKey_type supportKeys1D, T** h_A, T** h_b, localEps_type localEps, localEps_type localEpsInvPow, + matrix_type BMat, size_t numMatrices, size_t maxSupportSize) + { + auto p_key = GET_PARTICLE(particles); + size_t monomialBasisSize = monomialBasis.size(); + size_t BStartPos = maxSupportSize * monomialBasisSize * p_key; T* B = &((T*)BMat.getPointer())[BStartPos]; + const auto& basisElements = monomialBasis.getElements(); + int rhsSign = (Monomial_gpu<dim>(differentialSignature).order() % 2 == 0) ? 1 : -1; + + for (; + p_key < numMatrices; + p_key += blockDim.x * gridDim.x) + { + Point<dim, T> xa = particles.getPos(p_key); + + size_t supportKeysSize = kerOffsets.get(p_key+1)-kerOffsets.get(p_key); + size_t* supportKeys = &((size_t*)supportKeys1D.getPointer())[kerOffsets.get(p_key)]; + size_t xpK = supportRefs.get(p_key); + + assert(supportKeysSize >= monomialBasis.size()); + + T FACTOR = 2, avgNeighbourSpacing = 0; + for (int i = 0 ; i < supportKeysSize; i++) { + Point<dim,T> off = xa; off -= particles.getPosOrig(supportKeys[i]); + for (size_t j = 0; j < dim; ++j) + avgNeighbourSpacing += fabs(off.value(j)); + } + + avgNeighbourSpacing /= supportKeysSize; + T eps = FACTOR * avgNeighbourSpacing; + + assert(eps != 0); + + localEps.get(p_key) = eps; + localEpsInvPow.get(p_key) = 1.0 / pow(eps,differentialOrder); + + // EMatrix<T, Eigen::Dynamic, Eigen::Dynamic> B = E * V; + for (int i = 0; i < supportKeysSize; ++i) + for (int j = 0; j < monomialBasisSize; ++j) { + Point<dim,T> off = xa; off -= particles.getPosOrig(supportKeys[i]); + const Monomial_gpu<dim>& m = basisElements.get(j); + + T V_ij = m.evaluate(off) / pow(eps, m.order()); + T E_ii = exp(- norm2(off) / (2.0 * eps * eps)); + B[i*monomialBasisSize+j] = E_ii * V_ij; + } + + T sum = 0.0; + // EMatrix<T, Eigen::Dynamic, Eigen::Dynamic> A = B.transpose() * B; + for (int i = 0; i < monomialBasisSize; ++i) + for (int j = 0; j < monomialBasisSize; ++j) { + for (int k = 0; k < supportKeysSize; ++k) + sum += B[k*monomialBasisSize+i] * B[k*monomialBasisSize+j]; + + h_A[p_key][i*monomialBasisSize+j] = sum; sum = 0.0; + } + + // Compute RHS vector b + for (size_t i = 0; i < monomialBasisSize; ++i) { + const Monomial_gpu<dim>& dm = basisElements.get(i).getDerivative(differentialSignature); + h_b[p_key][i] = rhsSign * dm.evaluate(Point<dim, T>(0)); + } + } +} + +template<unsigned int dim, typename particles_type, typename T, typename monomialBasis_type, typename supportKey_type, typename localEps_type, typename calcKernels_type> +__global__ void calcKernels_gpu(particles_type particles, monomialBasis_type monomialBasis, supportKey_type kerOffsets, supportKey_type supportKeys1D, + T** h_b, localEps_type localEps, size_t numMatrices, calcKernels_type calcKernels) + { + auto p_key = GET_PARTICLE(particles); + Point<dim, T> xa = particles.getPos(p_key); + + size_t monomialBasisSize = monomialBasis.size(); + const auto& basisElements = monomialBasis.getElements(); + size_t supportKeysSize = kerOffsets.get(p_key+1)-kerOffsets.get(p_key); + size_t* supportKeys = &((size_t*)supportKeys1D.getPointer())[kerOffsets.get(p_key)]; + + T* calcKernelsLocal = &((T*)calcKernels.getPointer())[kerOffsets.get(p_key)]; + T eps = localEps.get(p_key); + + for (size_t j = 0; j < supportKeysSize; ++j) + { + size_t xqK = supportKeys[j]; + Point<dim, T> xq = particles.getPosOrig(xqK); + Point<dim, T> offNorm = (xa - xq) / eps; + T expFactor = exp(-norm2(offNorm)); + + T res = 0; + for (size_t i = 0; i < monomialBasisSize; ++i) { + const Monomial_gpu<dim> &m = basisElements.get(i); + T mbValue = m.evaluate(offNorm); + T coeff = h_b[p_key][i]; + + res += coeff * mbValue * expFactor; + } + calcKernelsLocal[j] = res; + } +} + +#endif +#endif //OPENFPM_PDATA_DCPSE_CUH + diff --git a/src/DCPSE/Dcpse.hpp b/src/DCPSE/Dcpse.hpp index b88d514e..c43987fe 100644 --- a/src/DCPSE/Dcpse.hpp +++ b/src/DCPSE/Dcpse.hpp @@ -1,6 +1,7 @@ // -// Created by tommaso on 29/03/19. -// Modified by Abhinav and Pietro +// DCPSE Created by tommaso on 29/03/19. +// Modified, Updated and Maintained by Abhinav and Pietro +//Surface Operators by Abhinav Singh on 07/10/2021 #ifndef OPENFPM_PDATA_DCPSE_HPP #define OPENFPM_PDATA_DCPSE_HPP @@ -14,6 +15,9 @@ #include "Vandermonde.hpp" #include "DcpseDiagonalScalingMatrix.hpp" #include "DcpseRhs.hpp" +#include "hash_map/hopscotch_map.h" + +template<unsigned int N> struct value_t {}; template<bool cond> struct is_scalar { @@ -35,7 +39,7 @@ struct is_scalar<false> { }; }; -template<unsigned int dim, typename vector_type> +template<unsigned int dim, typename vector_type,typename vector_type2=vector_type> class Dcpse { public: @@ -53,27 +57,110 @@ public: // 2) The machinery for assembling and solving the linear system for coefficients starts... // 3) The user can then call an evaluate(point) method to get the evaluation of the differential operator // on the given point. + ////c=HOverEpsilon. Note that the Eps value is computed by <h>/c (<h>=local average spacing for each particle and its support). This factor c is used in the Vandermonde.hpp. + double HOverEpsilon=0.9; private: const Point<dim, unsigned int> differentialSignature; const unsigned int differentialOrder; const MonomialBasis<dim> monomialBasis; - //std::vector<EMatrix<T, Eigen::Dynamic, 1>> localCoefficients; // Each MPI rank has just access to the local ones - std::vector<Support> localSupports; // Each MPI rank has just access to the local ones - std::vector<T> localEps; // Each MPI rank has just access to the local ones - std::vector<T> localEpsInvPow; // Each MPI rank has just access to the local ones - std::vector<T> localSumA; - openfpm::vector<size_t> kerOffsets; + bool isSharedLocalSupport = false; + openfpm::vector<Support> localSupports; // Each MPI rank has just access to the local ones + openfpm::vector<T> localEps; // Each MPI rank has just access to the local ones + openfpm::vector<T> localEpsInvPow; // Each MPI rank has just access to the local ones + + openfpm::vector<size_t> kerOffsets,accKerOffsets; openfpm::vector<T> calcKernels; + openfpm::vector<T> accCalcKernels; + openfpm::vector<T> nSpacings; + vector_type & particlesFrom; + vector_type2 & particlesTo; + double rCut,supportSizeFactor=1,nSpacing,AdapFac; + unsigned int convergenceOrder,nCount; - vector_type & particles; - double rCut; - unsigned int convergenceOrder; - double supportSizeFactor; + bool isSurfaceDerivative=false; + size_t initialParticleSize; - support_options opt; + support_options opt; public: + template<unsigned int NORMAL_ID> + void createNormalParticles(vector_type &particles) + { + particles.template ghost_get<NORMAL_ID>(SKIP_LABELLING); + initialParticleSize=particles.size_local_with_ghost(); + auto it = particles.getDomainAndGhostIterator(); + while(it.isNext()){ + auto key=it.get(); + Point<dim,T> xp=particles.getPos(key), Normals=particles.template getProp<NORMAL_ID>(key); + if(opt==support_options::ADAPTIVE) + { + nSpacing=nSpacings.get(key.getKey()); + } + for(int i=1;i<=nCount;i++){ + particles.addAtEnd(); + for(size_t j=0;j<dim;j++) + {particles.getLastPosEnd()[j]=xp[j]+i*nSpacing*Normals[j];} + particles.addAtEnd(); + for(size_t j=0;j<dim;j++) + {particles.getLastPosEnd()[j]=xp[j]-i*nSpacing*Normals[j];} + } + ++it; + } + } + + void accumulateAndDeleteNormalParticles(vector_type &particles) + { + tsl::hopscotch_map<size_t, size_t> nMap; + auto it = particles.getDomainIterator(); + auto supportsIt = localSupports.begin(); + openfpm::vector_std<size_t> supportBuffer; + accCalcKernels.clear(); + accKerOffsets.clear(); + accKerOffsets.resize(initialParticleSize); + accKerOffsets.fill(-1); + while(it.isNext()){ + supportBuffer.clear(); + nMap.clear(); + auto key=it.get(); + Support support = *supportsIt; + size_t xpK = support.getReferencePointKey(); + size_t kerOff = kerOffsets.get(xpK); + auto &keys = support.getKeys(); + accKerOffsets.get(xpK)=accCalcKernels.size(); + for (int i = 0 ; i < keys.size() ; i++) + { + size_t xqK = keys.get(i); + int real_particle=(xqK-initialParticleSize)/(2.*nCount); + if(real_particle<0) + { + real_particle=xqK; + } + auto found=nMap.find(real_particle); + if(found!=nMap.end()){ + accCalcKernels.get(found->second)+=calcKernels.get(kerOff+i); + } + else{ + supportBuffer.add(); + supportBuffer.get(supportBuffer.size()-1)=real_particle; + accCalcKernels.add(); + accCalcKernels.get(accCalcKernels.size()-1)=calcKernels.get(kerOff+i); + nMap[real_particle]=accCalcKernels.size()-1; + } + } + keys.swap(supportBuffer); + localSupports.get(xpK) = support; + ++supportsIt; + ++it; + } + particles.resizeAtEnd(initialParticleSize); + localEps.resize(initialParticleSize); + localEpsInvPow.resize(initialParticleSize); + localSupports.resize(initialParticleSize); + calcKernels.swap(accCalcKernels); + kerOffsets.swap(accKerOffsets); + } + #ifdef SE_CLASS1 int getUpdateCtr() const { @@ -89,36 +176,124 @@ public: T rCut, T supportSizeFactor = 1, //Maybe change this to epsilon/h or h/epsilon = c 0.9. Benchmark support_options opt = support_options::RADIUS) - :particles(particles), + :particlesFrom(particles), + particlesTo(particles), differentialSignature(differentialSignature), differentialOrder(Monomial<dim>(differentialSignature).order()), monomialBasis(differentialSignature.asArray(), convergenceOrder), opt(opt) { - // This - particles.ghost_get_subset(); + particles.ghost_get_subset(); // This communicates which ghost particles to be excluded from support if (supportSizeFactor < 1) { - initializeAdaptive(particles, convergenceOrder, rCut); + initializeAdaptive(particles, particles, convergenceOrder, rCut); } else { - initializeStaticSize(particles, convergenceOrder, rCut, supportSizeFactor); + initializeStaticSize(particles, particles, convergenceOrder, rCut, supportSizeFactor); } } + //Surface DCPSE Constructor + template<unsigned int NORMAL_ID> + Dcpse(vector_type &particles, + Point<dim, unsigned int> differentialSignature, + unsigned int convergenceOrder, + T rCut, + T nSpacing, + value_t< NORMAL_ID >, + support_options opt = support_options::RADIUS) + :particlesFrom(particles), + particlesTo(particles), + differentialSignature(differentialSignature), + differentialOrder(Monomial<dim>(differentialSignature).order()), + monomialBasis(differentialSignature.asArray(), convergenceOrder), + opt(opt),isSurfaceDerivative(true),nSpacing(nSpacing),nCount(floor(rCut/nSpacing)) + { + particles.ghost_get_subset(); // This communicates which ghost particles to be excluded from support + + if(opt==support_options::ADAPTIVE) { + this->AdapFac=nSpacing; + if(dim==2){ + nCount=3; + } + else{ + nCount=2; + } + SupportBuilder<vector_type,vector_type2> + supportBuilder(particlesFrom,particlesTo, differentialSignature, rCut, differentialOrder == 0); + supportBuilder.setAdapFac(nSpacing); + auto it = particlesTo.getDomainAndGhostIterator(); + while (it.isNext()) { + auto key_o = particlesTo.getOriginKey(it.get()); + Support support = supportBuilder.getSupport(it,monomialBasis.size(),opt); + nSpacings.add(supportBuilder.getLastMinspacing()); + ++it; + } + + } + if(opt!=support_options::LOAD) { + createNormalParticles<NORMAL_ID>(particles); +#ifdef SE_CLASS1 + particles.write("WithNormalParticlesQC"); +#endif + } + initializeStaticSize(particles, particles, convergenceOrder, rCut, supportSizeFactor); + if(opt!=support_options::LOAD) { + accumulateAndDeleteNormalParticles(particles); + } + } + + Dcpse(vector_type &particles, + const Dcpse<dim, vector_type>& other, + Point<dim, unsigned int> differentialSignature, + unsigned int convergenceOrder, + T rCut, + T supportSizeFactor = 1, + support_options opt = support_options::RADIUS) + :particlesFrom(particles), particlesTo(particles), opt(opt), + differentialSignature(differentialSignature), + differentialOrder(Monomial<dim>(differentialSignature).order()), + monomialBasis(differentialSignature.asArray(), convergenceOrder), + localSupports(other.localSupports), + isSharedLocalSupport(true) + { + particles.ghost_get_subset(); + if (supportSizeFactor < 1) + initializeAdaptive(particles, particles, convergenceOrder, rCut); + else + initializeStaticSize(particles, particles, convergenceOrder, rCut, supportSizeFactor); + } + Dcpse(vector_type &particlesFrom,vector_type2 &particlesTo, + Point<dim, unsigned int> differentialSignature, + unsigned int convergenceOrder, + T rCut, + T supportSizeFactor = 1, + support_options opt = support_options::RADIUS) + :particlesFrom(particlesFrom),particlesTo(particlesTo), + differentialSignature(differentialSignature), + differentialOrder(Monomial<dim>(differentialSignature).order()), + monomialBasis(differentialSignature.asArray(), convergenceOrder), + opt(opt) + { + particlesFrom.ghost_get_subset(); + if (supportSizeFactor < 1) + initializeAdaptive(particlesFrom,particlesTo,convergenceOrder, rCut); + else + initializeStaticSize(particlesFrom,particlesTo,convergenceOrder, rCut, supportSizeFactor); + } template<unsigned int prp> void DrawKernel(vector_type &particles, int k) { - Support support = localSupports[k]; + Support support = localSupports.get(k); size_t xpK = k; size_t kerOff = kerOffsets.get(k); auto & keys = support.getKeys(); for (int i = 0 ; i < keys.size() ; i++) { - size_t xqK = keys[i]; + size_t xqK = keys.get(i); particles.template getProp<prp>(xqK) += calcKernels.get(kerOff+i); } } @@ -126,13 +301,13 @@ public: template<unsigned int prp> void DrawKernelNN(vector_type &particles, int k) { - Support support = localSupports[k]; + Support support = localSupports.get(k); size_t xpK = k; size_t kerOff = kerOffsets.get(k); auto & keys = support.getKeys(); for (int i = 0 ; i < keys.size() ; i++) { - size_t xqK = keys[i]; + size_t xqK = keys.get(i); particles.template getProp<prp>(xqK) = 1.0; } } @@ -140,16 +315,138 @@ public: template<unsigned int prp> void DrawKernel(vector_type &particles, int k, int i) { - Support support = localSupports[k]; + + Support support = localSupports.get(k); size_t xpK = k; size_t kerOff = kerOffsets.get(k); auto & keys = support.getKeys(); for (int i = 0 ; i < keys.size() ; i++) { - size_t xqK = keys[i]; + size_t xqK = keys.get(i); particles.template getProp<prp>(xqK)[i] += calcKernels.get(kerOff+i); } } + /* + * breif Particle to Particle Interpolation Evaluation + */ + template<unsigned int prp1,unsigned int prp2> + void p2p() + { + typedef typename std::remove_reference<decltype(particlesTo.template getProp<prp2>(0))>::type T2; + + auto it = particlesTo.getDomainIterator(); + auto supportsIt = localSupports.begin(); + auto epsItInvPow = localEpsInvPow.begin(); + while (it.isNext()){ + double epsInvPow = *epsItInvPow; + T2 Dfxp = 0; + Support support = *supportsIt; + size_t xpK = support.getReferencePointKey(); + //Point<dim, typename vector_type::stype> xp = particlesTo.getPos(xpK); + //T fxp = sign * particlesTo.template getProp<fValuePos>(xpK); + size_t kerOff = kerOffsets.get(xpK); + auto & keys = support.getKeys(); + for (int i = 0 ; i < keys.size() ; i++) + { + size_t xqK = keys.get(i); + T2 fxq = particlesFrom.template getProp<prp1>(xqK); + Dfxp += fxq * calcKernels.get(kerOff+i); + } + Dfxp = epsInvPow*Dfxp; + // + //T trueDfxp = particles.template getProp<2>(xpK); + // Store Dfxp in the right position + particlesTo.template getProp<prp2>(xpK) = Dfxp; + // + ++it; + ++supportsIt; + ++epsItInvPow; + } + } + /*! \brief Save the DCPSE computations + * + */ + void save(const std::string &file){ + auto & v_cl=create_vcluster(); + size_t req = 0; + + Packer<decltype(localSupports),HeapMemory>::packRequest(localSupports,req); + Packer<decltype(localEps),HeapMemory>::packRequest(localEps,req); + Packer<decltype(localEpsInvPow),HeapMemory>::packRequest(localEpsInvPow,req); + Packer<decltype(calcKernels),HeapMemory>::packRequest(calcKernels,req); + Packer<decltype(kerOffsets),HeapMemory>::packRequest(kerOffsets,req); + + // allocate the memory + HeapMemory pmem; + //pmem.allocate(req); + ExtPreAlloc<HeapMemory> mem(req,pmem); + + //Packing + Pack_stat sts; + Packer<decltype(localSupports),HeapMemory>::pack(mem,localSupports,sts); + Packer<decltype(localEps),HeapMemory>::pack(mem,localEps,sts); + Packer<decltype(localEpsInvPow),HeapMemory>::pack(mem,localEpsInvPow,sts); + Packer<decltype(calcKernels),HeapMemory>::pack(mem,calcKernels,sts); + Packer<decltype(kerOffsets),HeapMemory>::pack(mem,kerOffsets,sts); + + // Save into a binary file + std::ofstream dump (file+"_"+std::to_string(v_cl.rank()), std::ios::out | std::ios::binary); + if (dump.is_open() == false) + { std::cerr << __FILE__ << ":" << __LINE__ <<" Unable to write since dump is open at rank "<<v_cl.rank()<<std::endl; + return; + } + dump.write ((const char *)pmem.getPointer(), pmem.size()); + return; + } + /*! \brief Load the DCPSE computations + * + * + */ + void load(const std::string & file) + { + auto & v_cl=create_vcluster(); + std::ifstream fs (file+"_"+std::to_string(v_cl.rank()), std::ios::in | std::ios::binary | std::ios::ate ); + if (fs.is_open() == false) + { + std::cerr << __FILE__ << ":" << __LINE__ << " error, opening file: " << file << std::endl; + return; + } + + // take the size of the file + size_t sz = fs.tellg(); + + fs.close(); + + // reopen the file without ios::ate to read + std::ifstream input (file+"_"+std::to_string(v_cl.rank()), std::ios::in | std::ios::binary ); + if (input.is_open() == false) + {//some message here maybe + return;} + + // Create the HeapMemory and the ExtPreAlloc memory + size_t req = 0; + req += sz; + HeapMemory pmem; + ExtPreAlloc<HeapMemory> mem(req,pmem); + + mem.allocate(pmem.size()); + + // read + input.read((char *)pmem.getPointer(), sz); + + //close the file + input.close(); + + //Unpacking + Unpack_stat ps; + Unpacker<decltype(localSupports),HeapMemory>::unpack(mem,localSupports,ps); + Unpacker<decltype(localEps),HeapMemory>::unpack(mem,localEps,ps); + Unpacker<decltype(localEpsInvPow),HeapMemory>::unpack(mem,localEpsInvPow,ps); + Unpacker<decltype(calcKernels),HeapMemory>::unpack(mem,calcKernels,ps); + Unpacker<decltype(kerOffsets),HeapMemory>::unpack(mem,kerOffsets,ps); + return; + } + void checkMomenta(vector_type &particles) { @@ -184,23 +481,24 @@ public: auto & keys = support.getKeys(); for (int i = 0 ; i < keys.size() ; i++) { - size_t xqK = keys[i]; + size_t xqK = keys.get(i); Point<dim, T> xq = particles.getPosOrig(xqK); Point<dim, T> normalizedArg = (xp - xq) / eps; auto ker = calcKernels.get(kerOff+i); int counter = 0; - for (const Monomial<dim> &m : monomialBasis.getElements()) - { - T mbValue = m.evaluate(normalizedArg); + size_t N = monomialBasis.getElements().size(); + for (size_t i = 0; i < N; ++i) + { + const Monomial<dim> &m = monomialBasis.getElement(i); + T mbValue = m.evaluate(normalizedArg); momenta_accu.template get<0>(counter) += mbValue * ker; ++counter; } - } for (int i = 0 ; i < momenta.size() ; i++) @@ -222,9 +520,9 @@ public: ++epsIt; } - for (int i = 0 ; i < momenta.size() ; i++) + for (size_t i = 0 ; i < momenta.size() ; i++) { - std::cout << "MOMENTA: " << monomialBasis.getElements()[i] << "Min: " << momenta.template get<0>(i) << " " << "Max: " << momenta.template get<1>(i) << std::endl; + std::cout << "MOMENTA: " << monomialBasis.getElement(i) << "Min: " << momenta.template get<0>(i) << " " << "Max: " << momenta.template get<1>(i) << std::endl; } } @@ -252,13 +550,13 @@ public: T Dfxp = 0; Support support = *supportsIt; size_t xpK = support.getReferencePointKey(); - Point<dim, typename vector_type::stype> xp = particles.getPos(support.getReferencePointKey()); + //Point<dim, typename vector_type::stype> xp = particles.getPos(support.getReferencePointKey()); T fxp = sign * particles.template getProp<fValuePos>(xpK); size_t kerOff = kerOffsets.get(xpK); auto & keys = support.getKeys(); for (int i = 0 ; i < keys.size() ; i++) { - size_t xqK = keys[i]; + size_t xqK = keys.get(i); T fxq = particles.template getProp<fValuePos>(xqK); Dfxp += (fxq + fxp) * calcKernels.get(kerOff+i); @@ -283,7 +581,7 @@ public: */ inline int getNumNN(const vect_dist_key_dx &key) { - return localSupports[key.getKey()].size(); + return localSupports.get(key.getKey()).size(); } /*! \brief Get the coefficent j (Neighbour) of the particle key @@ -307,14 +605,14 @@ public: */ inline size_t getIndexNN(const vect_dist_key_dx &key, int j) { - return localSupports[key.getKey()].getKeys()[j]; + return localSupports.get(key.getKey()).getKeys().get(j); } inline T getSign() { T sign = 1.0; - if (differentialOrder % 2 == 0) { + if (differentialOrder % 2 == 0 && differentialOrder!=0) { sign = -1; } @@ -323,7 +621,7 @@ public: T getEpsilonInvPrefactor(const vect_dist_key_dx &key) { - return localEpsInvPow[key.getKey()]; + return localEpsInvPow.get(key.getKey()); } /** @@ -346,8 +644,8 @@ public: sign = -1; } - double eps = localEps[key.getKey()]; - double epsInvPow = localEpsInvPow[key.getKey()]; + double eps = localEps.get(key.getKey()); + double epsInvPow = localEpsInvPow.get(key.getKey()); auto &particles = o1.getVector(); @@ -359,15 +657,15 @@ public: #endif expr_type Dfxp = 0; - Support support = localSupports[key.getKey()]; + Support support = localSupports.get(key.getKey()); size_t xpK = support.getReferencePointKey(); - Point<dim, T> xp = particles.getPos(xpK); + //Point<dim, T> xp = particles.getPos(xpK); expr_type fxp = sign * o1.value(key); size_t kerOff = kerOffsets.get(xpK); auto & keys = support.getKeys(); for (int i = 0 ; i < keys.size() ; i++) { - size_t xqK = keys[i]; + size_t xqK = keys.get(i); expr_type fxq = o1.value(vect_dist_key_dx(xqK)); Dfxp = Dfxp + (fxq + fxp) * calcKernels.get(kerOff+i); } @@ -402,11 +700,10 @@ public: sign = -1; } - double eps = localEps[key.getKey()]; - double epsInvPow = localEpsInvPow[key.getKey()]; + double eps = localEps.get(key.getKey()); + double epsInvPow = localEpsInvPow.get(key.getKey()); auto &particles = o1.getVector(); - #ifdef SE_CLASS1 if(particles.getMapCtr()!=this->getUpdateCtr()) { @@ -415,15 +712,15 @@ public: #endif expr_type Dfxp = 0; - Support support = localSupports[key.getKey()]; + Support support = localSupports.get(key.getKey()); size_t xpK = support.getReferencePointKey(); - Point<dim, T> xp = particles.getPos(xpK); + //Point<dim, T> xp = particles.getPos(xpK); expr_type fxp = sign * o1.value(key)[i]; size_t kerOff = kerOffsets.get(xpK); auto & keys = support.getKeys(); for (int j = 0 ; j < keys.size() ; j++) { - size_t xqK = keys[j]; + size_t xqK = keys.get(j); expr_type fxq = o1.value(vect_dist_key_dx(xqK))[i]; Dfxp = Dfxp + (fxq + fxp) * calcKernels.get(kerOff+j); } @@ -434,128 +731,93 @@ public: return Dfxp; } - void initializeUpdate(vector_type &particles) + + void initializeUpdate(vector_type &particlesFrom,vector_type2 &particlesTo) { #ifdef SE_CLASS1 - update_ctr=particles.getMapCtr(); + update_ctr=particlesFrom.getMapCtr(); #endif localSupports.clear(); - localSupports.resize(particles.size_local_orig()); localEps.clear(); - localEps.resize(particles.size_local_orig()); localEpsInvPow.clear(); - localEpsInvPow.resize(particles.size_local_orig()); calcKernels.clear(); kerOffsets.clear(); - kerOffsets.resize(particles.size_local_orig()); - kerOffsets.fill(-1); - - SupportBuilder<vector_type> supportBuilder(particles, differentialSignature, rCut); - unsigned int requiredSupportSize = monomialBasis.size() * supportSizeFactor; - - auto it = particles.getDomainIterator(); - while (it.isNext()) { - // Get the points in the support of the DCPSE kernel and store the support for reuse - //Support<vector_type> support = supportBuilder.getSupport(it, requiredSupportSize,opt); - Support support = supportBuilder.getSupport(it, requiredSupportSize,opt); - EMatrix<T, Eigen::Dynamic, Eigen::Dynamic> V(support.size(), monomialBasis.size()); - - auto key_o = particles.getOriginKey(it.get()); - - // Vandermonde matrix computation - Vandermonde<dim, T, EMatrix<T, Eigen::Dynamic, Eigen::Dynamic>> - vandermonde(support, monomialBasis,particles); - vandermonde.getMatrix(V); - - T eps = vandermonde.getEps(); - - localSupports[key_o.getKey()] = support; - localEps[key_o.getKey()] = eps; - localEpsInvPow[key_o.getKey()] = 1.0 / openfpm::math::intpowlog(eps,differentialOrder); - // Compute the diagonal matrix E - DcpseDiagonalScalingMatrix<dim> diagonalScalingMatrix(monomialBasis); - EMatrix<T, Eigen::Dynamic, Eigen::Dynamic> E(support.size(), support.size()); - diagonalScalingMatrix.buildMatrix(E, support, eps, particles); - // Compute intermediate matrix B - EMatrix<T, Eigen::Dynamic, Eigen::Dynamic> B = E * V; - // Compute matrix A - EMatrix<T, Eigen::Dynamic, Eigen::Dynamic> A = B.transpose() * B; - - // Compute RHS vector b - DcpseRhs<dim> rhs(monomialBasis, differentialSignature); - EMatrix<T, Eigen::Dynamic, 1> b(monomialBasis.size(), 1); - rhs.template getVector<T>(b); - // Get the vector where to store the coefficients... - EMatrix<T, Eigen::Dynamic, 1> a(monomialBasis.size(), 1); - // ...solve the linear system... - a = A.colPivHouseholderQr().solve(b); - // ...and store the solution for later reuse - kerOffsets.get(key_o.getKey()) = calcKernels.size(); + initializeStaticSize(particlesFrom,particlesTo, convergenceOrder, rCut, supportSizeFactor); + } - Point<dim, T> xp = particles.getPosOrig(key_o); + void initializeUpdate(vector_type &particles) + { +#ifdef SE_CLASS1 + update_ctr=particles.getMapCtr(); +#endif - for (auto &xqK : support.getKeys()) - { - Point<dim, T> xq = particles.getPosOrig(xqK); - Point<dim, T> normalizedArg = (xp - xq) / eps; + localSupports.clear(); + localEps.clear(); + localEpsInvPow.clear(); + calcKernels.clear(); + kerOffsets.clear(); - calcKernels.add(computeKernel(normalizedArg, a)); - } - // - ++it; - } + initializeStaticSize(particles,particles, convergenceOrder, rCut, supportSizeFactor); } private: - void initializeAdaptive(vector_type &particles, + void initializeAdaptive(vector_type &particlesFrom, + vector_type2 &particlesTo, unsigned int convergenceOrder, T rCut) { - SupportBuilder<vector_type> - supportBuilder(particles, differentialSignature, rCut); + SupportBuilder<vector_type,vector_type2> + supportBuilder(particlesFrom, particlesTo, differentialSignature, rCut, differentialOrder == 0); unsigned int requiredSupportSize = monomialBasis.size(); - localSupports.resize(particles.size_local_orig()); - localEps.resize(particles.size_local_orig()); - localEpsInvPow.resize(particles.size_local_orig()); - kerOffsets.resize(particles.size_local_orig()); + if (!isSharedLocalSupport) + localSupports.resize(particlesTo.size_local_orig()); + localEps.resize(particlesTo.size_local_orig()); + localEpsInvPow.resize(particlesTo.size_local_orig()); + kerOffsets.resize(particlesTo.size_local_orig()); kerOffsets.fill(-1); - auto it = particles.getDomainIterator(); + auto it = particlesTo.getDomainIterator(); while (it.isNext()) { const T condVTOL = 1e2; + auto key_o = particlesTo.getOriginKey(it.get()); + + if (!isSharedLocalSupport) + localSupports.get(key_o.getKey()) = supportBuilder.getSupport(it, requiredSupportSize,opt); + + Support& support = localSupports.get(key_o.getKey()); // Get the points in the support of the DCPSE kernel and store the support for reuse - Support support = supportBuilder.getSupport(it, requiredSupportSize,opt); EMatrix<T, Eigen::Dynamic, Eigen::Dynamic> V(support.size(), monomialBasis.size()); // Vandermonde matrix computation Vandermonde<dim, T, EMatrix<T, Eigen::Dynamic, Eigen::Dynamic>> - vandermonde(support, monomialBasis, particles); + vandermonde(support, monomialBasis,particlesFrom, particlesTo,HOverEpsilon); vandermonde.getMatrix(V); - T condV = conditionNumber(V, condVTOL); T eps = vandermonde.getEps(); - if (condV > condVTOL) { - requiredSupportSize *= 2; - std::cout - << "INFO: Increasing, requiredSupportSize = " << requiredSupportSize - << std::endl; // debug - continue; - } else { - requiredSupportSize = monomialBasis.size(); + if (!isSharedLocalSupport) { + T condV = conditionNumber(V, condVTOL); + + if (condV > condVTOL) { + requiredSupportSize *= 2; + std::cout + << "INFO: Increasing, requiredSupportSize = " << requiredSupportSize + << std::endl; // debug + continue; + } else + requiredSupportSize = monomialBasis.size(); } - auto key_o = particles.getOriginKey(it.get()); - localSupports[key_o.getKey()] = support; - localEps[key_o.getKey()] = eps; - localEpsInvPow[key_o.getKey()] = 1.0 / openfpm::math::intpowlog(eps,differentialOrder); + localSupports.get(key_o.getKey()) = support; + localEps.get(key_o.getKey()) = eps; + localEpsInvPow.get(key_o.getKey()) = 1.0 / openfpm::math::intpowlog(eps,differentialOrder); // Compute the diagonal matrix E DcpseDiagonalScalingMatrix<dim> diagonalScalingMatrix(monomialBasis); EMatrix<T, Eigen::Dynamic, Eigen::Dynamic> E(support.size(), support.size()); - diagonalScalingMatrix.buildMatrix(E, support, eps,particles); + diagonalScalingMatrix.buildMatrix(E, support, eps, particlesFrom, particlesTo); // Compute intermediate matrix B EMatrix<T, Eigen::Dynamic, Eigen::Dynamic> B = E * V; // Compute matrix A @@ -571,11 +833,14 @@ private: // ...and store the solution for later reuse kerOffsets.get(key_o.getKey()) = calcKernels.size(); - Point<dim, T> xp = particles.getPosOrig(key_o); + Point<dim, T> xp = particlesTo.getPosOrig(key_o); - for (auto &xqK : support.getKeys()) + const auto& support_keys = support.getKeys(); + size_t N = support_keys.size(); + for (size_t i = 0; i < N; ++i) { - Point<dim, T> xq = particles.getPosOrig(xqK); + const auto& xqK = support_keys.get(i); + Point<dim, T> xq = particlesFrom.getPosOrig(xqK); Point<dim, T> normalizedArg = (xp - xq) / eps; calcKernels.add(computeKernel(normalizedArg, a)); @@ -586,47 +851,71 @@ private: } - void initializeStaticSize(vector_type &particles, + void initializeStaticSize(vector_type &particlesFrom,vector_type2 &particlesTo, unsigned int convergenceOrder, T rCut, T supportSizeFactor) { #ifdef SE_CLASS1 - this->update_ctr=particles.getMapCtr(); + this->update_ctr=particlesFrom.getMapCtr(); #endif this->rCut=rCut; this->supportSizeFactor=supportSizeFactor; this->convergenceOrder=convergenceOrder; - SupportBuilder<vector_type> - supportBuilder(particles, differentialSignature, rCut); + auto & v_cl=create_vcluster(); + if(this->opt==LOAD){ + if(v_cl.rank()==0) + {std::cout<<"Warning: Creating empty DC-PSE operator! Please use update or load to get kernels."<<std::endl;} + return; + } + SupportBuilder<vector_type,vector_type2> + supportBuilder(particlesFrom,particlesTo, differentialSignature, rCut, differentialOrder == 0); unsigned int requiredSupportSize = monomialBasis.size() * supportSizeFactor; + supportBuilder.setAdapFac(AdapFac); - localSupports.resize(particles.size_local_orig()); - localEps.resize(particles.size_local_orig()); - localEpsInvPow.resize(particles.size_local_orig()); - kerOffsets.resize(particles.size_local_orig()); - - auto it = particles.getDomainIterator(); + if (!isSharedLocalSupport) + localSupports.resize(particlesTo.size_local_orig()); + localEps.resize(particlesTo.size_local_orig()); + localEpsInvPow.resize(particlesTo.size_local_orig()); + kerOffsets.resize(particlesTo.size_local_orig()); + kerOffsets.fill(-1); + T avgSpacingGlobal=0,avgSpacingGlobal2=0,maxSpacingGlobal=0,minSpacingGlobal=std::numeric_limits<T>::max(); + size_t Counter=0; + auto it = particlesTo.getDomainIterator(); while (it.isNext()) { // Get the points in the support of the DCPSE kernel and store the support for reuse - Support support = supportBuilder.getSupport(it, requiredSupportSize,opt); - EMatrix<T, Eigen::Dynamic, Eigen::Dynamic> V(support.size(), monomialBasis.size()); + auto key_o = particlesTo.getOriginKey(it.get()); + + if (!isSharedLocalSupport) + localSupports.get(key_o.getKey()) = supportBuilder.getSupport(it, requiredSupportSize,opt); - auto key_o = particles.getOriginKey(it.get()); + Support& support = localSupports.get(key_o.getKey()); + + EMatrix<T, Eigen::Dynamic, Eigen::Dynamic> V(support.size(), monomialBasis.size()); // Vandermonde matrix computation Vandermonde<dim, T, EMatrix<T, Eigen::Dynamic, Eigen::Dynamic>> - vandermonde(support, monomialBasis,particles); + vandermonde(support, monomialBasis,particlesFrom,particlesTo,HOverEpsilon); vandermonde.getMatrix(V); T eps = vandermonde.getEps(); + avgSpacingGlobal+=eps; + T tSpacing = vandermonde.getMinSpacing(); + avgSpacingGlobal2+=tSpacing; + if(tSpacing>maxSpacingGlobal) + { + maxSpacingGlobal=tSpacing; + } + if(tSpacing<minSpacingGlobal) + { + minSpacingGlobal=tSpacing; + } - localSupports[key_o.getKey()] = support; - localEps[key_o.getKey()] = eps; - localEpsInvPow[key_o.getKey()] = 1.0 / openfpm::math::intpowlog(eps,differentialOrder); + localEps.get(key_o.getKey()) = eps; + localEpsInvPow.get(key_o.getKey()) = 1.0 / openfpm::math::intpowlog(eps,differentialOrder); // Compute the diagonal matrix E DcpseDiagonalScalingMatrix<dim> diagonalScalingMatrix(monomialBasis); EMatrix<T, Eigen::Dynamic, Eigen::Dynamic> E(support.size(), support.size()); - diagonalScalingMatrix.buildMatrix(E, support, eps, particles); + diagonalScalingMatrix.buildMatrix(E, support, eps, particlesFrom, particlesTo); // Compute intermediate matrix B EMatrix<T, Eigen::Dynamic, Eigen::Dynamic> B = E * V; // Compute matrix A @@ -643,28 +932,42 @@ private: // ...and store the solution for later reuse kerOffsets.get(key_o.getKey()) = calcKernels.size(); - Point<dim, T> xp = particles.getPosOrig(key_o); + Point<dim, T> xp = particlesTo.getPosOrig(key_o); - for (auto &xqK : support.getKeys()) + const auto& support_keys = support.getKeys(); + size_t N = support_keys.size(); + for (size_t i = 0; i < N; ++i) { - Point<dim, T> xq = particles.getPosOrig(xqK); + const auto& xqK = support_keys.get(i); + Point<dim, T> xq = particlesFrom.getPosOrig(xqK); Point<dim, T> normalizedArg = (xp - xq) / eps; - calcKernels.add(computeKernel(normalizedArg, a)); } // ++it; + ++Counter; } - } - - + v_cl.sum(avgSpacingGlobal); + v_cl.sum(avgSpacingGlobal2); + v_cl.max(maxSpacingGlobal); + v_cl.min(minSpacingGlobal); + v_cl.sum(Counter); + v_cl.execute(); + if(v_cl.rank()==0) + {std::cout<<"DCPSE Operator Construction Complete. The global avg spacing in the support <h> is: "<<HOverEpsilon*avgSpacingGlobal/(T(Counter))<<" (c="<<HOverEpsilon<<"). Avg:"<<avgSpacingGlobal2/(T(Counter))<<" Range:["<<minSpacingGlobal<<","<<maxSpacingGlobal<<"]."<<std::endl;} + } T computeKernel(Point<dim, T> x, EMatrix<T, Eigen::Dynamic, 1> & a) const { T res = 0; unsigned int counter = 0; T expFactor = exp(-norm2(x)); - for (const Monomial<dim> &m : monomialBasis.getElements()) { + + size_t N = monomialBasis.getElements().size(); + for (size_t i = 0; i < N; ++i) + { + const Monomial<dim> &m = monomialBasis.getElement(i); + T coeff = a(counter); T mbValue = m.evaluate(x); res += coeff * mbValue * expFactor; @@ -690,7 +993,6 @@ private: }; - #endif #endif //OPENFPM_PDATA_DCPSE_HPP diff --git a/src/DCPSE/DcpseDiagonalScalingMatrix.hpp b/src/DCPSE/DcpseDiagonalScalingMatrix.hpp index ef1dbd6f..5fa97b08 100644 --- a/src/DCPSE/DcpseDiagonalScalingMatrix.hpp +++ b/src/DCPSE/DcpseDiagonalScalingMatrix.hpp @@ -1,5 +1,6 @@ // // Created by tommaso on 29/03/19. +// Modified by Serhii // #ifndef OPENFPM_PDATA_DCPSEDIAGONALSCALINGMATRIX_HPP @@ -8,39 +9,39 @@ #include "MonomialBasis.hpp" #include "Support.hpp" -template <unsigned int dim> + +template <unsigned int dim, typename monomialBasis_type = MonomialBasis<dim>> class DcpseDiagonalScalingMatrix { private: - const MonomialBasis<dim> monomialBasis; + const monomialBasis_type& monomialBasis; public: + DcpseDiagonalScalingMatrix(const monomialBasis_type &monomialBasis) : monomialBasis(monomialBasis) {} - DcpseDiagonalScalingMatrix(const MonomialBasis<dim> &monomialBasis) : monomialBasis(monomialBasis) {} - - template <typename T, typename MatrixType, typename vector_type> - void buildMatrix(MatrixType &M, Support support, T eps, vector_type & particles) + template <typename T, typename MatrixType, typename vector_type, typename vector_type2> + void buildMatrix(MatrixType &M, Support support, T eps, vector_type & particlesFrom , vector_type2 & particlesTo) { // Check that all the dimension constraints are met assert(support.size() >= monomialBasis.size()); assert(M.rows() == support.size()); assert(M.cols() == support.size()); - Point<dim,typename vector_type::stype> ref_p = particles.getPosOrig(support.getReferencePointKey()); + Point<dim,typename vector_type::stype> ref_p = particlesTo.getPosOrig(support.getReferencePointKey()); // Fill the diagonal matrix M.setZero(); // Make sure the rest of the matrix is zero! - int i = 0; - for (const auto& pt : support.getKeys()) + const auto& support_keys = support.getKeys(); + size_t N = support_keys.size(); + for (size_t i = 0; i < N; ++i) { + const auto& pt = support_keys.get(i); Point<dim,typename vector_type::stype> p = ref_p; - p -= particles.getPosOrig(pt); + p -= particlesFrom.getPosOrig(pt); M(i,i) = exp(- norm2(p) / (2.0 * eps * eps)); - ++i; } } - }; #endif //OPENFPM_PDATA_DCPSEDIAGONALSCALINGMATRIX_HPP diff --git a/src/DCPSE/DcpseInterpolation.hpp b/src/DCPSE/DcpseInterpolation.hpp new file mode 100644 index 00000000..50c01ae2 --- /dev/null +++ b/src/DCPSE/DcpseInterpolation.hpp @@ -0,0 +1,107 @@ +// +// Created by Abhinav Singh on 03.11.21. +// + +#ifndef OPENFPM_PDATA_DCPSEINTERPOLATION_HPP +#define OPENFPM_PDATA_DCPSEINTERPOLATION_HPP +#include "DCPSE/Dcpse.hpp" + +/*! \brief Class for Creating the DCPSE Operator For the function approximation objects and computes DCPSE Kernels. + * + * + * \param parts particle set + * \param ord order of convergence of the operator + * \param rCut Argument for cell list construction + * \param oversampling_factor multiplier to the minimum no. of particles required by the operator in support + * \param support_options default:N_particles, Radius can be used to select all particles inside rCut. Overrides oversampling. + * + * \return Operator Dx which is a function on Vector_dist_Expressions + * + */ +template<typename particlesFrom_type, typename particlesTo_type> +class PPInterpolation +{ + + void *dcpse; + + particlesFrom_type & particlesFrom; + particlesTo_type & particlesTo; + +public: + /*! \brief Constructor for Creating the DCPSE Operator Dx and objects and computes DCPSE Kernels. + * + * + * \param parts particle set + * \param ord order of convergence of the operator + * \param rCut Argument for cell list construction + * \param oversampling_factor multiplier to the minimum no. of particles required by the operator in support + * \param support_options default:N_particles, Radius can be used to select all particles inside rCut. Overrides oversampling. + * + * \return Operator F which is a function on Vector_dist_Expressions + * + */ + PPInterpolation(particlesFrom_type &particlesFrom,particlesTo_type &particlesTo, unsigned int ord, typename particlesFrom_type::stype rCut, + double oversampling_factor = dcpse_oversampling_factor, + support_options opt = support_options::RADIUS) + :particlesFrom(particlesFrom),particlesTo(particlesTo) + { + Point<particlesFrom_type::dims, unsigned int> p; + p.zero(); + dcpse = new Dcpse<particlesFrom_type::dims, particlesFrom_type,particlesTo_type>(particlesFrom,particlesTo, p, ord, rCut, oversampling_factor, opt); + } + + void deallocate() { + delete (Dcpse<particlesFrom_type::dims, particlesFrom_type, particlesTo_type> *) dcpse; + } + + /* template<typename operand_type> + vector_dist_expression_op<operand_type, Dcpse_type<operand_type::vtype::dims, typename operand_type::vtype>, VECT_DCPSE> + operator()(operand_type arg) { + typedef Dcpse_type<operand_type::vtype::dims, typename operand_type::vtype> dcpse_type; + return vector_dist_expression_op<operand_type, dcpse_type, VECT_DCPSE>(arg, *(dcpse_type *) dcpse); + }*/ + + template<unsigned int prp1,unsigned int prp2> + void p2p() { + auto dcpse_temp = (Dcpse<particlesFrom_type::dims, particlesFrom_type, particlesTo_type>*) dcpse; + dcpse_temp->template p2p<prp1,prp2>(); + + } + + // template<unsigned int prp, typename particles_type> + // void DrawKernel(particles_type &particles, int k) { + // auto dcpse_temp = (Dcpse_type<particlesFrom_type::dims, particlesFrom_type, particlesTo_type> *) dcpse; + // dcpse_temp->template DrawKernel<prp>(particles, k); + + // } + + // template<unsigned int prp, typename particles_type> + // void DrawKernelNN(particles_type &particles, int k) { + // auto dcpse_temp = (Dcpse_type<particlesFrom_type::dims, particlesFrom_type,particlesTo_type> *) dcpse; + // dcpse_temp->template DrawKernelNN<prp>(particles, k); + + // } + + // template<typename particles_type> + // void checkMomenta(particles_type &particles) { + // auto dcpse_temp = (Dcpse_type<particles_type::dims, particlesFrom_type, particlesTo_type> *) dcpse; + // dcpse_temp->checkMomenta(particles); + + // } + + /*! \brief Method for Updating the DCPSE Operator by recomputing DCPSE Kernels. + * + * + * \param parts particle set + */ + void update() { + auto dcpse_temp = (Dcpse<particlesFrom_type::dims, particlesFrom_type, particlesTo_type> *) dcpse; + dcpse_temp->initializeUpdate(particlesFrom,particlesTo); + + } + +}; + + + +#endif //OPENFPM_PDATA_DCPSEINTERPOLATION_HPP diff --git a/src/DCPSE/Monomial.cuh b/src/DCPSE/Monomial.cuh new file mode 100644 index 00000000..f3b892e5 --- /dev/null +++ b/src/DCPSE/Monomial.cuh @@ -0,0 +1,204 @@ +// +// Created by Serhii +// + +#ifndef OPENFPM_PDATA_MONOMIALBASISELEMENT_CUH +#define OPENFPM_PDATA_MONOMIALBASISELEMENT_CUH + +#include "Space/Shape/Point.hpp" + + +template<unsigned int dim> +class Monomial_gpu +{ +private: + unsigned int sum = 0; + unsigned int exponents[dim]; + unsigned int scalar = 1; + +public: + __host__ __device__ Monomial_gpu(); + __host__ __device__ Monomial_gpu(const Monomial_gpu<dim> &other); + __host__ __device__ Monomial_gpu(const Monomial<dim> &other); + __host__ __device__ explicit Monomial_gpu(const Point<dim, unsigned int> &other, unsigned int scalar = 1); + __host__ __device__ explicit Monomial_gpu(const Point<dim, long int> &other, unsigned int scalar = 1); + __host__ __device__ explicit Monomial_gpu(const unsigned int other[dim]); + + __host__ __device__ Monomial_gpu<dim> &operator=(const Monomial_gpu<dim> &other); + __host__ __device__ Monomial_gpu<dim> &operator=(const Monomial<dim> &other); + __host__ __device__ bool operator==(const Monomial_gpu<dim> &other) const; + __host__ __device__ void swap(const Monomial_gpu<dim> &other); + + __host__ __device__ unsigned int order() const; + __host__ __device__ unsigned int getExponent(unsigned int i) const; + __host__ __device__ void setExponent(unsigned int i, unsigned int value); + __host__ __device__ Monomial_gpu<dim> getDerivative(const Point<dim, unsigned int> differentialOrder) const; + __host__ __device__ unsigned int getScalar() const { return scalar; } + + template<typename T> __host__ __device__ T evaluate(const Point<dim, T> x) const; + template<typename T> __host__ __device__ T evaluate(const T (&x)[dim]) const; + +private: + __host__ __device__ void updateSum(); +}; + +template<unsigned int dim> +__host__ __device__ Monomial_gpu<dim>::Monomial_gpu() +{ + for (size_t i = 0; i < dim; ++i) exponents[i] = 0; + sum = 0; +} + +template<unsigned int dim> +__host__ __device__ Monomial_gpu<dim>::Monomial_gpu(const Point<dim, unsigned int> &other, unsigned int scalar) : scalar(scalar) +{ + for (size_t i = 0; i < other.nvals; ++i) + exponents[i] = other.value(i); + updateSum(); +} + +template<unsigned int dim> +__host__ __device__ Monomial_gpu<dim>::Monomial_gpu(const Point<dim, long int> &other, unsigned int scalar) : scalar(scalar) +{ + for (size_t i = 0; i < other.nvals; ++i) + exponents[i] = other.value(i); + updateSum(); +} + +template<unsigned int dim> +__host__ __device__ Monomial_gpu<dim>::Monomial_gpu(const unsigned int other[dim]) : Monomial_gpu(Point<dim, unsigned int>(other)) +{ + for (size_t i = 0; i < dim; ++i) + exponents[i] = other[i]; + updateSum(); +} + +template<unsigned int dim> +__host__ __device__ Monomial_gpu<dim>::Monomial_gpu(const Monomial_gpu<dim> &other) + : sum(other.sum), scalar(other.scalar) +{ + for (size_t i = 0; i < dim; ++i) + exponents[i] = other.exponents[i]; +} + +template<unsigned int dim> +__host__ __device__ Monomial_gpu<dim>::Monomial_gpu(const Monomial<dim> &other) + : sum(other.order()), scalar(other.getScalar()) +{ + for (size_t i = 0; i < dim; ++i) + exponents[i] = other.getExponent(i); +} + +template<unsigned int dim> +__host__ __device__ Monomial_gpu<dim> &Monomial_gpu<dim>::operator=(const Monomial_gpu<dim> &other) +{ + for (size_t i = 0; i < dim; ++i) + exponents[i] = other.exponents[i]; + + sum = other.sum; + scalar = other.scalar; + return *this; +} + +template<unsigned int dim> +__host__ __device__ Monomial_gpu<dim> &Monomial_gpu<dim>::operator=(const Monomial<dim> &other) +{ + for (size_t i = 0; i < dim; ++i) + exponents[i] = other.getExponent(i); + + sum = other.order(); + scalar = other.getScalar(); + return *this; +} + +template<unsigned int dim> +__host__ __device__ void Monomial_gpu<dim>::updateSum() +{ + sum = 0; + for (unsigned int i = 0; i < dim; ++i) + sum += exponents[i]; +} + +template<unsigned int dim> +__host__ __device__ unsigned int Monomial_gpu<dim>::order() const +{ + return sum; +} + +template<unsigned int dim> +__host__ __device__ unsigned int Monomial_gpu<dim>::getExponent(unsigned int i) const +{ + return exponents[i]; +} + +template<unsigned int dim> +__host__ __device__ void Monomial_gpu<dim>::setExponent(unsigned int i, unsigned int value) +{ + exponents[i] = value; + updateSum(); +} + +template<unsigned int dim> +__host__ __device__ bool Monomial_gpu<dim>::operator== + (const Monomial_gpu<dim> &other) const +{ + bool EQ = true; + + for (size_t i = 0; i < dim; ++i) + if (exponents[i] != other[i]) + EQ = false; + + return EQ && (scalar == other.scalar); +} + +template<unsigned int dim> +template<typename T> +__host__ __device__ T Monomial_gpu<dim>::evaluate(const Point<dim, T> x) const +{ + T res = scalar; + for (unsigned int i = 0; i < dim; ++i) + res *= pow(x[i], getExponent(i)); + + return res; +} + +template<unsigned int dim> +template<typename T> +__host__ __device__ T Monomial_gpu<dim>::evaluate(const T (& x) [dim]) const +{ + T res = scalar; + for (unsigned int i = 0; i < dim; ++i) + res *= pow(x[i], getExponent(i)); + + return res; +} + +template<unsigned int dim> +__host__ __device__ Monomial_gpu<dim> Monomial_gpu<dim>::getDerivative(const Point<dim, unsigned int> differentialOrder) const +{ + unsigned int s = scalar; + Point<dim, unsigned int> e(exponents); + for (unsigned int i = 0; i < dim; ++i) + { + unsigned int origExp = e.value(i); + int targetExp = static_cast<int>(origExp) - static_cast<int>(differentialOrder.value(i)); + for (int k = origExp; k > targetExp && k >= 0; --k) + { + s *= k; + } + e.get(i) = static_cast<unsigned int>((targetExp < 0) ? 0 : targetExp); + } + return Monomial_gpu(e, s); +} + +template<unsigned int dim> +__host__ __device__ void Monomial_gpu<dim>::swap(const Monomial_gpu<dim> &other) +{ + sum = other.sum; + scalar = other.scalar; + for (size_t i = 0; i < dim; ++i) + exponents[i] = other.exponents[i]; +} + + +#endif //OPENFPM_PDATA_MONOMIALBASISELEMENT_CUH diff --git a/src/DCPSE/Monomial.hpp b/src/DCPSE/Monomial.hpp index 6d9b19ff..11370a5d 100644 --- a/src/DCPSE/Monomial.hpp +++ b/src/DCPSE/Monomial.hpp @@ -55,6 +55,7 @@ public: return lhs << rhs.scalar << " : " << rhs.exponents.toString(); } + __host__ __device__ unsigned int getScalar() const { return scalar; } private: void updateSum(); }; diff --git a/src/DCPSE/MonomialBasis.hpp b/src/DCPSE/MonomialBasis.hpp index c9390a9d..381b975a 100644 --- a/src/DCPSE/MonomialBasis.hpp +++ b/src/DCPSE/MonomialBasis.hpp @@ -5,45 +5,51 @@ #ifndef OPENFPM_PDATA_MONOMIALBASIS_H #define OPENFPM_PDATA_MONOMIALBASIS_H -#include <vector> +#include "Vector/map_vector.hpp" #include <Grid/grid_sm.hpp> #include <Grid/iterators/grid_key_dx_iterator_sub_bc.hpp> #include "Monomial.hpp" +#include "Monomial.cuh" -template<unsigned int dim> + +template<unsigned int dim, typename T = Monomial<dim>, template<typename, template<typename...> class...> class vector_type = openfpm::vector_std, template<typename...> class... Args> class MonomialBasis { private: - std::vector<Monomial<dim>> basis; + vector_type<T, Args...> basis; public: - MonomialBasis(const std::vector<unsigned int> °rees, unsigned int convergenceOrder); + MonomialBasis() {} + + MonomialBasis(const vector_type<unsigned int, Args...> °rees, unsigned int convergenceOrder); MonomialBasis(unsigned int degrees[dim], unsigned int convergenceOrder); // explicit MonomialBasis(Point<dim, unsigned int> degrees, unsigned int convergenceOrder); - explicit MonomialBasis(const std::vector<Monomial<dim>> &basis) : basis(basis) {} + __host__ __device__ explicit MonomialBasis(const vector_type<T, Args...> &basis) : basis(basis) {} - MonomialBasis(const MonomialBasis &other); + __host__ __device__ MonomialBasis(const MonomialBasis &other); - MonomialBasis &operator=(const MonomialBasis &other); + __host__ __device__ MonomialBasis &operator=(const MonomialBasis &other); - unsigned int size() const; + __host__ __device__ unsigned int size() const; - const Monomial<dim> &getElement(unsigned int i) const; + __host__ __device__ const T &getElement(size_t i) const; - Monomial<dim> &getElement(unsigned int i); + __host__ __device__ T &getElement(size_t i); - const std::vector<Monomial<dim>> &getElements() const; + __host__ __device__ const vector_type<T, Args...> &getElements() const; - MonomialBasis<dim> getDerivative(Point<dim, unsigned int> differentialOrder) const; + __host__ __device__ MonomialBasis<dim, T, vector_type, Args...> getDerivative(Point<dim, unsigned int> differentialOrder) const; - bool operator==(const MonomialBasis &other) const; + __host__ __device__ bool operator==(const MonomialBasis &other) const; + + __host__ __device__ vector_type<T, Args...>& getBasis() { return basis; } template<typename charT, typename traits> friend std::basic_ostream<charT, traits> & - operator<<(std::basic_ostream<charT, traits> &lhs, MonomialBasis<dim> const &rhs) + operator<<(std::basic_ostream<charT, traits> &lhs, MonomialBasis<dim, T, vector_type, Args...> const &rhs) { lhs << "MonomialBasis: size=" << rhs.size() << ", elements={ "; for (const auto &el : rhs.getElements()) @@ -55,58 +61,60 @@ public: } private: - void generateBasis(std::vector<unsigned int> m, unsigned int r); + void generateBasis(vector_type<unsigned int, Args...> m, unsigned int r); }; //// Definitions below -template<unsigned int dim> -MonomialBasis<dim>::MonomialBasis(const std::vector<unsigned int> °rees, unsigned int convergenceOrder) +template<unsigned int dim, typename T, template<typename, template<typename...> class...> class vector_type, template<typename...> class... Args> +__host__ __device__ MonomialBasis<dim, T, vector_type, Args...>::MonomialBasis(const vector_type<unsigned int, Args...> °rees, unsigned int convergenceOrder) { generateBasis(degrees, convergenceOrder); } -template<unsigned int dim> -MonomialBasis<dim>::MonomialBasis(unsigned int *degrees, unsigned int convergenceOrder) - : MonomialBasis(std::vector<unsigned int>(degrees, degrees + dim), convergenceOrder) {} +template<unsigned int dim, typename T, template<typename, template<typename...> class...> class vector_type, template<typename...> class... Args> +__host__ __device__ MonomialBasis<dim, T, vector_type, Args...>::MonomialBasis(unsigned int *degrees, unsigned int convergenceOrder) + : MonomialBasis(vector_type<unsigned int, Args...>(degrees, degrees + dim), convergenceOrder) {} -template<unsigned int dim> -MonomialBasis<dim>::MonomialBasis(const MonomialBasis &other) +template<unsigned int dim, typename T, template<typename, template<typename...> class...> class vector_type, template<typename...> class... Args> +__host__ __device__ MonomialBasis<dim, T, vector_type, Args...>::MonomialBasis(const MonomialBasis &other) { - basis = other.basis; // Here it works because both std::vector and Monomial perform a deep copy. + basis = other.basis; // Here it works because both vector_type and Monomial perform a deep copy. } -template<unsigned int dim> -MonomialBasis<dim> &MonomialBasis<dim>::operator=(const MonomialBasis &other) +template<unsigned int dim, typename T, template<typename, template<typename...> class...> class vector_type, template<typename...> class... Args> +__host__ __device__ MonomialBasis<dim, T, vector_type, Args...> &MonomialBasis<dim, T, vector_type, Args...>::operator=(const MonomialBasis &other) { - basis = other.basis; // Here it works because both std::vector and Monomial perform a deep copy. + basis = other.basis; // Here it works because both vector_type and Monomial perform a deep copy. return *this; } -template<unsigned int dim> -unsigned int MonomialBasis<dim>::size() const +template<unsigned int dim, typename T, template<typename, template<typename...> class...> class vector_type, template<typename...> class... Args> +__host__ __device__ unsigned int MonomialBasis<dim, T, vector_type, Args...>::size() const { return basis.size(); } -template<unsigned int dim> -const Monomial<dim> &MonomialBasis<dim>::getElement(unsigned int i) const +template<unsigned int dim, typename T, template<typename, template<typename...> class...> class vector_type, template<typename...> class... Args> +__host__ __device__ const T &MonomialBasis<dim, T, vector_type, Args...>::getElement(size_t i) const { - return basis[i]; + return basis.get(i); } -template<unsigned int dim> -Monomial<dim> &MonomialBasis<dim>::getElement(unsigned int i) +template<unsigned int dim, typename T, template<typename, template<typename...> class...> class vector_type, template<typename...> class... Args> +__host__ __device__ T &MonomialBasis<dim, T, vector_type, Args...>::getElement(size_t i) { - return basis[i]; + return basis.get(i); } -template<unsigned int dim> -void MonomialBasis<dim>::generateBasis(std::vector<unsigned int> m, unsigned int r) +template<unsigned int dim, typename T, template<typename, template<typename...> class...> class vector_type, template<typename...> class... Args> +void MonomialBasis<dim, T, vector_type, Args...>::generateBasis(vector_type<unsigned int, Args...> m, unsigned int r) { // Compute the vector of actual dimensions to iterate over // NOTE: each index can go up to sum(m)+r - unsigned int mSum = std::accumulate(m.begin(), m.end(), 0U); + unsigned int mSum = 0U; + for (size_t i = 0; i < m.size(); ++i) mSum += m.get(i); + unsigned int orderLimit = mSum + r; size_t dimensions[dim]; std::fill(dimensions, dimensions + dim, orderLimit); @@ -125,47 +133,55 @@ void MonomialBasis<dim>::generateBasis(std::vector<unsigned int> m, unsigned int // Finally compute alpha_min unsigned char alphaMin = static_cast<unsigned char>(!(mSum % 2)); // if mSum is even, alpha_min must be 1 + if(mSum==0) + { + alphaMin = 0; + } //std::cout<<"AlphaMin: "<<alphaMin<<std::endl; //unsigned char alphaMin = 0; // we want to always have 1 in the basis while (it.isNext()) { Point<dim, long int> p = it.get().get_k(); - Monomial<dim> candidateBasisElement(p); + T candidateBasisElement(p); // Filter out the elements which don't fullfil the theoretical condition for being in the vandermonde matrix if (candidateBasisElement.order() < orderLimit && candidateBasisElement.order() >= alphaMin) { - basis.push_back(candidateBasisElement); + basis.add(candidateBasisElement); } ++it; } } -template<unsigned int dim> -const std::vector<Monomial<dim>> &MonomialBasis<dim>::getElements() const +template<unsigned int dim, typename T, template<typename, template<typename...> class...> class vector_type, template<typename...> class... Args> +__host__ __device__ const vector_type<T, Args...> &MonomialBasis<dim, T, vector_type, Args...>::getElements() const { return basis; } -template<unsigned int dim> -MonomialBasis<dim> MonomialBasis<dim>::getDerivative(const Point<dim, unsigned int> differentialOrder) const +template<unsigned int dim, typename T, template<typename, template<typename...> class...> class vector_type, template<typename...> class... Args> +__host__ __device__ MonomialBasis<dim, T, vector_type, Args...> MonomialBasis<dim, T, vector_type, Args...>::getDerivative(const Point<dim, unsigned int> differentialOrder) const { - std::vector<Monomial<dim>> derivatives; - for (const auto &monomial : getElements()) + vector_type<T, Args...> derivatives; + + for (size_t i = 0; i < basis.size(); ++i) { - derivatives.push_back(monomial.getDerivative(differentialOrder)); + // used insted of rhs ref as it does swap internally (not supported by Monomial) + T d = basis.get(i).getDerivative(differentialOrder); + derivatives.add(d); } - return MonomialBasis<dim>(derivatives); + + return MonomialBasis<dim, T, vector_type, Args...>(derivatives); } -template<unsigned int dim> -bool MonomialBasis<dim>::operator==(const MonomialBasis &other) const +template<unsigned int dim, typename T, template<typename, template<typename...> class...> class vector_type, template<typename...> class... Args> +__host__ __device__ bool MonomialBasis<dim, T, vector_type, Args...>::operator==(const MonomialBasis &other) const { return basis == other.basis; } -//template<unsigned int dim> -//MonomialBasis<dim>::MonomialBasis(Point<dim, unsigned int> degrees, unsigned int convergenceOrder) +//template<unsigned int dim, typename T, template<typename, template<typename...> class...> class vector_type, template<typename...> class... Args> +// __host__ __device__ //MonomialBasis<dim, T, vector_type, Args...>::MonomialBasis(Point<dim, unsigned int> degrees, unsigned int convergenceOrder) // : MonomialBasis(degrees.asArray(), convergenceOrder) {} #endif //OPENFPM_PDATA_MONOMIALBASIS_H diff --git a/src/DCPSE/Support.hpp b/src/DCPSE/Support.hpp index bcb5bc66..49282ab8 100644 --- a/src/DCPSE/Support.hpp +++ b/src/DCPSE/Support.hpp @@ -18,17 +18,22 @@ class Support private: size_t referencePointKey; - std::vector<size_t> keys; + openfpm::vector_std<size_t> keys; public: Support() {}; - Support(const size_t &referencePoint, const std::vector<size_t> &keys) + Support(const size_t &referencePoint, const openfpm::vector_std<size_t> &keys) :referencePointKey(referencePoint), keys(keys) {} + Support(const size_t &referencePoint, const std::vector<size_t> &keys) + :referencePointKey(referencePoint), + keys(keys.begin(), keys.end()) + {} + Support(const Support &other) : referencePointKey(other.referencePointKey), keys(other.keys) @@ -44,10 +49,44 @@ public: return referencePointKey; } - const std::vector<size_t> &getKeys() const + const openfpm::vector_std<size_t> &getKeys() const { return keys; } + + openfpm::vector_std<size_t> &getKeys() + { + return keys; + } + + static bool pack() + { + return true; + } + + static bool packRequest() + { + return true; + } + + template<int ... prp> inline void packRequest(size_t & req) const + { + req += sizeof(size_t); + keys.packRequest(req); + } + + template<int ... prp> inline void pack(ExtPreAlloc<HeapMemory> & mem, Pack_stat & sts) const + { + Packer<size_t,HeapMemory>::pack(mem,referencePointKey,sts); + keys.template pack<prp ...>(mem,sts); + } + + template<unsigned int ... prp, typename MemType> inline void unpack(ExtPreAlloc<MemType> & mem, Unpack_stat & ps) + { + Unpacker<size_t,MemType>::unpack(mem,referencePointKey,ps); + keys.template unpack<prp ...>(mem,ps); + } + }; diff --git a/src/DCPSE/SupportBuilder.cuh b/src/DCPSE/SupportBuilder.cuh new file mode 100644 index 00000000..363741b7 --- /dev/null +++ b/src/DCPSE/SupportBuilder.cuh @@ -0,0 +1,146 @@ +// +// Created by Serhii +// + +#ifndef OPENFPM_PDATA_SUPPORTBUILDER_CUH +#define OPENFPM_PDATA_SUPPORTBUILDER_CUH + +#include <Space/Shape/Point.hpp> +#include <Vector/vector_dist.hpp> +#include "Support.hpp" +#include <utility> +#include "SupportBuilder.hpp" + + +template <unsigned int dim> +__device__ __host__ bool nextCell(size_t (&offset)[dim], size_t maxOffset) { + size_t i = 0; + + while (i < dim) { + if ((++offset[i++])/maxOffset) + for (size_t j = 0; j < i; ++j) + offset[j] = 0; + else + return true; + } + return false; +} + +template<unsigned int dim, typename T, typename particles_type, typename CellList_type, typename supportSize_type> +__global__ void gatherSupportSize_gpu( + particles_type particles, CellList_type cl, supportSize_type supportSize, T rCut) { + auto p_key = GET_PARTICLE(particles); + Point<dim, T> pos = particles.getPos(p_key); + auto cell = cl.getCellGrid(pos); + + size_t grSize[dim]; cl.getGrid().getSize(grSize); + size_t offset[dim]; for (int i = 0; i < dim; ++i) offset[i] = 0; + grid_key_dx<dim> middle; for (int i = 0; i < dim; ++i) middle.set_d(i,1); + + size_t N = 0; + do { + auto key=grid_key_dx<dim>(offset); key=cell+key-middle; + + for (size_t i = 0; i < dim; ++i) + if (key.value(i) < 0 || key.value(i) >= grSize[i]) + continue; + + mem_id id = cl.getGrid().LinId(key); + const size_t cellLinId = static_cast<size_t>(id); + const size_t elemsInCell = cl.getNelements(cellLinId); + + for (size_t k = 0; k < elemsInCell; ++k) { + size_t el = cl.get(cellLinId, k); + + if (p_key == el) continue; + if (pos.distance(particles.getPosOrig(el)) < rCut) ++N; + } + + } while (nextCell<dim>(offset, 2+1)); + + supportSize.get(p_key) = N; +} + +template<unsigned int dim, typename T, typename particles_type, typename CellList_type, typename supportKey_type> +__global__ void assembleSupport_gpu(particles_type particles, CellList_type cl, supportKey_type supportSize, supportKey_type supportKeys1D, T rCut) { + auto p_key = GET_PARTICLE(particles); + Point<dim, T> pos = particles.getPos(p_key); + auto cell = cl.getCellGrid(pos); + + size_t supportKeysSize = supportSize.get(p_key+1)-supportSize.get(p_key); + size_t* supportKeys = &((size_t*)supportKeys1D.getPointer())[supportSize.get(p_key)]; + + size_t grSize[dim]; cl.getGrid().getSize(grSize); + size_t offset[dim]; for (int i = 0; i < dim; ++i) offset[i] = 0; + grid_key_dx<dim> middle; for (int i = 0; i < dim; ++i) middle.set_d(i,1); + + size_t N = 0; + do { + auto key=grid_key_dx<dim>(offset); key=cell+key-middle; + + for (size_t i = 0; i < dim; ++i) + if (key.value(i) < 0 || key.value(i) >= grSize[i]) + continue; + + mem_id id = cl.getGrid().LinId(key); + const size_t cellLinId = static_cast<size_t>(id); + const size_t elemsInCell = cl.getNelements(cellLinId); + + for (size_t k = 0; k < elemsInCell; ++k) { + size_t el = cl.get(cellLinId, k); + + if (p_key == el) continue; + if (pos.distance(particles.getPosOrig(el)) < rCut) supportKeys[N++] = el; + } + + } while (nextCell<dim>(offset, 2+1)); +} + + + +template<typename vector_type> +class SupportBuilderGPU +{ +private: + vector_type &domain; + typename vector_type::stype rCut; + +public: + SupportBuilderGPU(vector_type &domain, typename vector_type::stype rCut) + : domain(domain), rCut(rCut) {} + + void getSupport(size_t N, openfpm::vector_custd<size_t>& kerOffsets, openfpm::vector_custd<size_t>& supportKeys1D, + size_t& maxSupport, size_t& supportKeysTotalN) + { + domain.hostToDevicePos(); + auto it = domain.getDomainIteratorGPU(512); + typedef CellList_gen<vector_type::dims, typename vector_type::stype, Process_keys_lin, Mem_fast<CudaMemory>, shift<vector_type::dims, typename vector_type::stype>> params; + // auto NN = domain.getCellListGPU(rCut); + auto NN = domain.template getCellList<params>(rCut); + NN.hostToDevice(); + + + // +1 to allow getting size from cumulative sum: "size[i+1] - size[i]" + kerOffsets.resize(N+1); + gatherSupportSize_gpu<vector_type::dims><<<it.wthr,it.thr>>>(domain.toKernel(), NN.toKernel(), kerOffsets.toKernel(), rCut); + kerOffsets.template deviceToHost(); + + supportKeysTotalN = 0; maxSupport = 0; + + for (size_t i = 0; i < N; ++i) { + size_t sz = kerOffsets.get(i); + kerOffsets.get(i) = supportKeysTotalN; + supportKeysTotalN += sz; + if (maxSupport < sz) maxSupport = sz; + } + kerOffsets.get(N) = supportKeysTotalN; + + supportKeys1D.resize(supportKeysTotalN); + kerOffsets.template hostToDevice(); + assembleSupport_gpu<vector_type::dims><<<it.wthr,it.thr>>>(domain.toKernel(), NN.toKernel(), kerOffsets.toKernel(), supportKeys1D.toKernel(), rCut); + supportKeys1D.template deviceToHost(); + } +}; + + +#endif //OPENFPM_PDATA_SUPPORTBUILDER_CUH diff --git a/src/DCPSE/SupportBuilder.hpp b/src/DCPSE/SupportBuilder.hpp index 7702d434..8d83f7f8 100644 --- a/src/DCPSE/SupportBuilder.hpp +++ b/src/DCPSE/SupportBuilder.hpp @@ -16,240 +16,232 @@ enum support_options { - N_PARTICLES, - RADIUS + N_PARTICLES, + RADIUS, + LOAD, + ADAPTIVE }; -template<typename vector_type> -class SupportBuilder -{ + +template<typename vector_type,typename vector_type2> +class SupportBuilder { private: - vector_type &domain; + vector_type &domainFrom; + vector_type2 &domainTo; decltype(std::declval<vector_type>().getCellList(0.0)) cellList; const Point<vector_type::dims, unsigned int> differentialSignature; - typename vector_type::stype rCut; + typename vector_type::stype rCut, MinSpacing, AdapFac=1; + bool is_interpolation; public: - SupportBuilder(vector_type &domain, Point<vector_type::dims, unsigned int> differentialSignature, typename vector_type::stype rCut); - SupportBuilder(vector_type &domain, unsigned int differentialSignature[vector_type::dims], typename vector_type::stype rCut); + SupportBuilder(vector_type &domainFrom, vector_type2 &domainTo, + const Point<vector_type::dims, unsigned int> differentialSignature, + typename vector_type::stype rCut, + bool is_interpolation) + : domainFrom(domainFrom), + domainTo(domainTo), + differentialSignature(differentialSignature), + rCut(rCut), is_interpolation(is_interpolation) { + cellList = domainFrom.getCellList(rCut); + } + + SupportBuilder(vector_type &domainFrom, vector_type2 &domainTo, + unsigned int differentialSignature[vector_type::dims], typename vector_type::stype rCut, + bool is_interpolation) + : SupportBuilder(domainFrom, domainTo, Point<vector_type::dims, unsigned int>(differentialSignature), + rCut) {} template<typename iterator_type> - Support getSupport(iterator_type itPoint, unsigned int requiredSize, support_options opt) - { + Support getSupport(iterator_type itPoint, unsigned int requiredSize, support_options opt) { // Get spatial position from point iterator vect_dist_key_dx p = itPoint.get(); vect_dist_key_dx pOrig = itPoint.getOrig(); - Point<vector_type::dims, typename vector_type::stype> pos = domain.getPos(p.getKey()); + Point<vector_type::dims, typename vector_type::stype> pos = domainTo.getPos(p.getKey()); // Get cell containing current point and add it to the set of cell keys - grid_key_dx<vector_type::dims> curCellKey = cellList.getCellGrid(pos); // Here get the key of the cell where the current point is + grid_key_dx<vector_type::dims> curCellKey = cellList.getCellGrid( + pos); // Here get the key of the cell where the current point is std::set<grid_key_dx<vector_type::dims>> supportCells; supportCells.insert(curCellKey); // Make sure to consider a set of cells providing enough points for the support - enlargeSetOfCellsUntilSize(supportCells, requiredSize + 1,opt); // NOTE: this +1 is because we then remove the point itself + enlargeSetOfCellsUntilSize(supportCells, requiredSize + 1, + opt); // NOTE: this +1 is because we then remove the point itself // Now return all the points from the support into a vector - std::vector<size_t> supportKeys = getPointsInSetOfCells(supportCells,p,pOrig,requiredSize,opt); - - auto p_o = domain.getOriginKey(p.getKey()); - std::remove(supportKeys.begin(), supportKeys.end(), p_o.getKey()); - return Support(p_o.getKey(), supportKeys); - } - -private: - size_t getCellLinId(const grid_key_dx<vector_type::dims> &cellKey); - - size_t getNumElementsInCell(const grid_key_dx<vector_type::dims> &cellKey); + std::vector<size_t> supportKeys = getPointsInSetOfCells(supportCells, p, pOrig, requiredSize, opt); - size_t getNumElementsInSetOfCells(const std::set<grid_key_dx<vector_type::dims>> &set); - - void enlargeSetOfCellsUntilSize(std::set<grid_key_dx<vector_type::dims>> &set, unsigned int requiredSize,support_options opt); - - std::vector<size_t> getPointsInSetOfCells(std::set<grid_key_dx<vector_type::dims>> set, vect_dist_key_dx & p, vect_dist_key_dx & pOrig, size_t requiredSupportSize, support_options opt); + if (is_interpolation == false) { + auto p_o = domainFrom.getOriginKey(p.getKey()); + std::remove(supportKeys.begin(), supportKeys.end(), p_o.getKey()); + } - bool isCellKeyInBounds(grid_key_dx<vector_type::dims> key); -}; + auto p_o = domainTo.getOriginKey(p.getKey()); + return Support(p_o.getKey(), openfpm::vector_std<size_t>(supportKeys.begin(), supportKeys.end())); + } -// Method definitions below + typename vector_type::stype getLastMinspacing() { + return this->MinSpacing; + } -template<typename vector_type> -SupportBuilder<vector_type>::SupportBuilder(vector_type &domain, const Point<vector_type::dims, unsigned int> differentialSignature, - typename vector_type::stype rCut) -:domain(domain), - differentialSignature(differentialSignature), - rCut(rCut) -{ - cellList = domain.getCellList(rCut); -} + void setAdapFac(typename vector_type::stype fac) { + this->AdapFac=fac; + } +private: -template<typename vector_type> -size_t SupportBuilder<vector_type>::getNumElementsInCell(const grid_key_dx<vector_type::dims> &cellKey) -{ - const size_t curCellId = getCellLinId(cellKey); - size_t numElements = cellList.getNelements(curCellId); - return numElements; -} + size_t getCellLinId(const grid_key_dx<vector_type::dims> &cellKey) { + mem_id id = cellList.getGrid().LinId(cellKey); + return static_cast<size_t>(id); + } -template<typename vector_type> -size_t SupportBuilder<vector_type>::getNumElementsInSetOfCells(const std::set<grid_key_dx<vector_type::dims>> &set) -{ - size_t tot = 0; - for (const auto cell : set) - { - tot += getNumElementsInCell(cell); + size_t getNumElementsInCell(const grid_key_dx<vector_type::dims> &cellKey) { + const size_t curCellId = getCellLinId(cellKey); + size_t numElements = cellList.getNelements(curCellId); + return numElements; } - return tot; -} -template<typename vector_type> -void SupportBuilder<vector_type>::enlargeSetOfCellsUntilSize(std::set<grid_key_dx<vector_type::dims>> &set, unsigned int requiredSize, - support_options opt) -{ - if (opt==support_options::RADIUS){ - auto cell=*set.begin(); - grid_key_dx<vector_type::dims> middle; - int n=std::ceil(rCut/cellList.getCellBox().getHigh(0)); - size_t sz[vector_type::dims]; - for (int i=0;i<vector_type::dims;i++) - { - sz[i]=2*n+1; - middle.set_d(i,n); - } - grid_sm<vector_type::dims,void> g(sz); - grid_key_dx_iterator<vector_type::dims> g_k(g); - while(g_k.isNext()) - { - auto key=g_k.get(); - key=cell+key-middle; - if (isCellKeyInBounds(key)) - { - set.insert(key); - } - ++g_k; + size_t getNumElementsInSetOfCells(const std::set<grid_key_dx<vector_type::dims>> &set) { + size_t tot = 0; + for (const auto cell: set) { + tot += getNumElementsInCell(cell); } + return tot; } - else{ - while (getNumElementsInSetOfCells(set) < 5.0*requiredSize) //Why 5*requiredSize? Becasue it can help with adaptive resolutions. - { - auto tmpSet = set; - for (const auto el : tmpSet) + + void enlargeSetOfCellsUntilSize(std::set<grid_key_dx<vector_type::dims>> &set, unsigned int requiredSize, + support_options opt) { + if (opt == support_options::RADIUS || opt == support_options::ADAPTIVE) { + auto cell = *set.begin(); + grid_key_dx<vector_type::dims> middle; + int n = std::ceil(rCut / cellList.getCellBox().getHigh(0)); + size_t sz[vector_type::dims]; + for (int i = 0; i < vector_type::dims; i++) { + sz[i] = 2 * n + 1; + middle.set_d(i, n); + } + grid_sm<vector_type::dims, void> g(sz); + grid_key_dx_iterator<vector_type::dims> g_k(g); + while (g_k.isNext()) { + auto key = g_k.get(); + key = cell + key - middle; + if (isCellKeyInBounds(key)) { + set.insert(key); + } + ++g_k; + } + } else { + while (getNumElementsInSetOfCells(set) < + 5.0 * requiredSize) //Why 5*requiredSize? Becasue it can help with adaptive resolutions. { - for (unsigned int i = 0; i < vector_type::dims; ++i) - { - const auto pOneEl = el.move(i, +1); - const auto mOneEl = el.move(i, -1); - if (isCellKeyInBounds(pOneEl)) - { - set.insert(pOneEl); - } - if (isCellKeyInBounds(mOneEl)) - { - set.insert(mOneEl); + auto tmpSet = set; + for (const auto el: tmpSet) { + for (unsigned int i = 0; i < vector_type::dims; ++i) { + const auto pOneEl = el.move(i, +1); + const auto mOneEl = el.move(i, -1); + if (isCellKeyInBounds(pOneEl)) { + set.insert(pOneEl); + } + if (isCellKeyInBounds(mOneEl)) { + set.insert(mOneEl); + } } } - } + } } } -} - - -template<typename vector_type> -size_t SupportBuilder<vector_type>::getCellLinId(const grid_key_dx<vector_type::dims> &cellKey) -{ - mem_id id = cellList.getGrid().LinId(cellKey); - return static_cast<size_t>(id); -} - -template<typename vector_type> -std::vector<size_t> SupportBuilder<vector_type>::getPointsInSetOfCells(std::set<grid_key_dx<vector_type::dims>> set, - vect_dist_key_dx & p, - vect_dist_key_dx & pOrig, - size_t requiredSupportSize, - support_options opt) -{ - struct reord - { - typename vector_type::stype dist; - size_t offset; - - bool operator<(const reord & p) const - {return this->dist < p.dist;} - }; - - openfpm::vector<reord> rp; - std::vector<size_t> points; - Point<vector_type::dims,typename vector_type::stype> xp = domain.getPos(p); - for (const auto cellKey : set) - { - const size_t cellLinId = getCellLinId(cellKey); - const size_t elemsInCell = getNumElementsInCell(cellKey); - for (size_t k = 0; k < elemsInCell; ++k) - { - size_t el = cellList.get(cellLinId, k); - if (pOrig.getKey() == el) {continue;} - - Point<vector_type::dims,typename vector_type::stype> xq = domain.getPosOrig(el); - //points.push_back(el); - - reord pr; - - pr.dist = xp.distance(xq); - pr.offset = el; - - rp.add(pr); + std::vector<size_t> getPointsInSetOfCells(std::set<grid_key_dx<vector_type::dims>> set, + vect_dist_key_dx &p, + vect_dist_key_dx &pOrig, + size_t requiredSupportSize, + support_options opt) { + struct reord { + typename vector_type::stype dist; + size_t offset; + + bool operator<(const reord &p) const { return this->dist < p.dist; } + }; + + openfpm::vector<reord> rp; + std::vector<size_t> points; + Point<vector_type::dims, typename vector_type::stype> xp = domainTo.getPos(p); + for (const auto cellKey: set) { + const size_t cellLinId = getCellLinId(cellKey); + const size_t elemsInCell = getNumElementsInCell(cellKey); + for (size_t k = 0; k < elemsInCell; ++k) { + size_t el = cellList.get(cellLinId, k); + + if (pOrig.getKey() == el && is_interpolation == false) { continue; } + + Point<vector_type::dims, typename vector_type::stype> xq = domainFrom.getPosOrig(el); + //points.push_back(el); + + reord pr; + + pr.dist = xp.distance(xq); + pr.offset = el; + rp.add(pr); + } } - } - if (opt == support_options::RADIUS) - { - for (int i = 0 ; i < rp.size() ; i++) - { - if (rp.get(i).dist < rCut) - { - points.push_back(rp.get(i).offset); - } - } -/* #ifdef SE_CLASS1 - if (points.size()<requiredSupportSize) - { - std::cerr<<__FILE__<<":"<<__LINE__<<"Note that the DCPSE neighbourhood doesn't have asked no. particles (Increase the rCut or reduce the over_sampling factor)"; - std::cout<<"Particels asked (minimum*oversampling_factor): "<<requiredSupportSize<<". Particles Possible with given options:"<<points.size()<<"."<<std::endl; + if (opt == support_options::RADIUS) { + for (int i = 0; i < rp.size(); i++) { + if (rp.get(i).dist < rCut) { + points.push_back(rp.get(i).offset); + } + } + /* #ifdef SE_CLASS1 + if (points.size()<requiredSupportSize) + { + std::cerr<<__FILE__<<":"<<__LINE__<<"Note that the DCPSE neighbourhood doesn't have asked no. particles (Increase the rCut or reduce the over_sampling factor)"; + std::cout<<"Particels asked (minimum*oversampling_factor): "<<requiredSupportSize<<". Particles Possible with given options:"<<points.size()<<"."<<std::endl; + } + #endif*/ } - #endif*/ - } - else - { rp.sort(); - for (int i = 0 ; i < requiredSupportSize ; i++) - { - points.push_back(rp.get(i).offset); - } - } - - return points; -} + else if(opt == support_options::ADAPTIVE) { + MinSpacing = std::numeric_limits<double>::max(); + for (int i = 0; i < rp.size(); i++) { + if (MinSpacing > rp.get(i).dist && rp.get(i).dist != 0) { + MinSpacing = rp.get(i).dist; + } + } +#ifdef SE_CLASS1 + assert(MinSpacing !=0 && "You have multiple particles on the same position."); +#endif + for (int i = 0; i < rp.size(); i++) { + if (rp.get(i).dist < AdapFac * MinSpacing) { + points.push_back(rp.get(i).offset); + } + } + } + else { + rp.sort(); + for (int i = 0; i < requiredSupportSize; i++) { + points.push_back(rp.get(i).offset); + } + } -template<typename vector_type> -SupportBuilder<vector_type>::SupportBuilder(vector_type &domain, unsigned int *differentialSignature, typename vector_type::stype rCut) - : SupportBuilder(domain, Point<vector_type::dims, unsigned int>(differentialSignature), rCut) {} + //MinSpacing=MinSpacing/requiredSupportSize + return points; + } -template<typename vector_type> -bool SupportBuilder<vector_type>::isCellKeyInBounds(grid_key_dx<vector_type::dims> key) -{ - const size_t *cellGridSize = cellList.getGrid().getSize(); - for (size_t i = 0; i < vector_type::dims; ++i) + bool isCellKeyInBounds(grid_key_dx<vector_type::dims> key) { - if (key.value(i) < 0 || key.value(i) >= cellGridSize[i]) + const size_t *cellGridSize = cellList.getGrid().getSize(); + for (size_t i = 0; i < vector_type::dims; ++i) { - return false; + if (key.value(i) < 0 || key.value(i) >= cellGridSize[i]) + { + return false; + } } + return true; } - return true; -} +}; + #endif //OPENFPM_PDATA_SUPPORTBUILDER_HPP diff --git a/src/DCPSE/Vandermonde.hpp b/src/DCPSE/Vandermonde.hpp index f88f6224..c527200f 100644 --- a/src/DCPSE/Vandermonde.hpp +++ b/src/DCPSE/Vandermonde.hpp @@ -1,6 +1,6 @@ // // Created by tommaso on 21/03/19. -// +// Edited by Abhinav Singh on 24/01/2022 #ifndef OPENFPM_PDATA_VANDERMONDE_HPP #define OPENFPM_PDATA_VANDERMONDE_HPP @@ -14,22 +14,24 @@ class Vandermonde { private: const Point<dim, T> point; - std::vector<Point<dim, T>> offsets; + openfpm::vector_std<Point<dim, T>> offsets; const MonomialBasis<dim> monomialBasis; - T eps; + T eps,HOverEpsilon,minSpacing; public: /* Vandermonde(const Point<dim, T> &point, const std::vector<Point<dim, T>> &neighbours, const MonomialBasis<dim> &monomialBasis);*/ - template<typename vector_type> + template<typename vector_type, + typename vector_type2> Vandermonde(const Support &support, const MonomialBasis<dim> &monomialBasis, - const vector_type & particles) - : point(particles.getPosOrig(support.getReferencePointKey())), - monomialBasis(monomialBasis) + const vector_type & particlesFrom, + const vector_type2 & particlesTo,T HOverEpsilon=0.5) //0.5 for the test + : point(particlesTo.getPosOrig(support.getReferencePointKey())), + monomialBasis(monomialBasis),HOverEpsilon(HOverEpsilon) { - initialize(support,particles); + initialize(support,particlesFrom,particlesTo); } @@ -38,8 +40,11 @@ public: // Build the Vandermonde matrix, row-by-row VandermondeRowBuilder<dim, T> vrb(monomialBasis); unsigned int row = 0; - for (auto &offset : offsets) + + size_t N = offsets.size(); + for (size_t i = 0; i < N; ++i) { + const auto& offset = offsets.get(i); vrb.buildRow(M, row, offset, eps); ++row; } @@ -50,6 +55,10 @@ public: { return eps; } + T getMinSpacing() + { + return minSpacing; + } private: @@ -57,12 +66,20 @@ private: void computeEps(T factor) { T avgNeighbourSpacing = 0; - for (auto &offset : offsets) + minSpacing=std::numeric_limits<T>::max(); + size_t N = offsets.size(); + for (size_t i = 0; i < N; ++i) { + const auto& offset = offsets.get(i); + double dist=norm(offset); avgNeighbourSpacing += computeAbsSum(offset); + if(minSpacing>dist) + { + minSpacing=dist; + } } avgNeighbourSpacing /= offsets.size(); - eps = factor * avgNeighbourSpacing; + eps = avgNeighbourSpacing/factor; assert(eps != 0); } @@ -76,16 +93,16 @@ private: return absSum; } - template<typename vector_type> - void initialize(const Support &sup, const vector_type & particles) + template<typename vector_type, typename vector_type2> + void initialize(const Support &sup, const vector_type & particlesFrom, vector_type2 &particlesTo) { auto & keys = sup.getKeys(); for (int i = 0 ; i < keys.size() ; i++) { - Point<dim,T> p = particles.getPosOrig(sup.getReferencePointKey()); - p -= particles.getPosOrig(keys[i]); - offsets.push_back(p); + Point<dim,T> p = particlesTo.getPosOrig(sup.getReferencePointKey()); + p -= particlesFrom.getPosOrig(keys.get(i)); + offsets.add(p); } // First check that the number of points given is enough for building the Vandermonde matrix @@ -95,7 +112,7 @@ private: } // Compute eps for this point //factor here. This is C factor. - computeEps(2); + computeEps(HOverEpsilon); } }; diff --git a/src/DCPSE/VandermondeRowBuilder.hpp b/src/DCPSE/VandermondeRowBuilder.hpp index 5b893aea..2de39fa0 100644 --- a/src/DCPSE/VandermondeRowBuilder.hpp +++ b/src/DCPSE/VandermondeRowBuilder.hpp @@ -1,5 +1,6 @@ // // Created by tommaso on 22/03/19. +// Modified by Serhii // #ifndef OPENFPM_PDATA_VANDERMONDEROW_HPP @@ -7,31 +8,32 @@ #include "MonomialBasis.hpp" -template <unsigned int dim, typename T> +template <unsigned int dim, typename T, typename MonomialBasis_type = MonomialBasis<dim>> class VandermondeRowBuilder { private: - const MonomialBasis<dim> monomialBasis; + const MonomialBasis_type& monomialBasis; public: - VandermondeRowBuilder(const MonomialBasis<dim> &monomialBasis) : monomialBasis(monomialBasis) {} + VandermondeRowBuilder(const MonomialBasis_type &monomialBasis) : monomialBasis(monomialBasis) {} template <typename MatrixType> void buildRow(MatrixType &M, unsigned int row, Point<dim, T> x, T eps); }; -template<unsigned int dim, typename T> +template<unsigned int dim, typename T,typename MonomialBasis_type> template <typename MatrixType> -void VandermondeRowBuilder<dim, T>::buildRow(MatrixType &M, unsigned int row, Point<dim, T> x, T eps) +void VandermondeRowBuilder<dim, T, MonomialBasis_type>::buildRow(MatrixType &M, unsigned int row, Point<dim, T> x, T eps) { - unsigned int col = 0; - for (auto& basisElement : monomialBasis.getElements()) + auto& basisElements = monomialBasis.getElements(); + + for (size_t col = 0; col < basisElements.size(); ++col) { - Monomial<dim> m = monomialBasis.getElement(col); + Monomial<dim> m = basisElements.get(col); M(row, col) = m.evaluate(x); M(row, col) /= openfpm::math::intpowlog(eps, m.order()); - ++col; } } + #endif //OPENFPM_PDATA_VANDERMONDEROW_HPP diff --git a/src/DCPSE/tests/Support_unit_tests.cpp b/src/DCPSE/tests/Support_unit_tests.cpp index 86fc3a5c..cb892c0b 100644 --- a/src/DCPSE/tests/Support_unit_tests.cpp +++ b/src/DCPSE/tests/Support_unit_tests.cpp @@ -53,18 +53,12 @@ BOOST_AUTO_TEST_SUITE(Support_tests) // Get spatial position from point iterator vect_dist_key_dx p = itPoint.get(); const auto pos = domain.getPos(p.getKey()); - //std::cout << "p=(" << pos[0] << "," << pos[1] << ")" << std::endl; -// BOOST_REQUIRE_CLOSE(pos[0], 0, 1e-16); -// BOOST_REQUIRE_CLOSE(pos[1], 0, 1e-16); - // Now that domain is built and populated, let's test SupportBuilder - // We use (0,0) as initial point - SupportBuilder<vector_dist<2, double, aggregate<double>>> supportBuilder(domain, {1,0}, 2*spacing[0]); + typedef vector_dist<2, double, aggregate<double>> vector_dist_type; + + SupportBuilder<vector_dist_type,vector_dist_type> supportBuilder(domain, domain, {1,0}, 2*spacing[0],false); auto support = supportBuilder.getSupport(itPoint, 6, support_options::N_PARTICLES); -// for (const auto &off : support.getOffsets()) -// { -// std::cout << off.toString() << std::endl; -// } + BOOST_REQUIRE_GE(support.size(), 6); } @@ -113,25 +107,15 @@ BOOST_AUTO_TEST_SUITE(Support_tests) // Get spatial position from point iterator vect_dist_key_dx p = itPoint.get(); const auto pos = domain.getPos(p.getKey()); - //std::cout << "p=(" << pos[0] << "," << pos[1] << ")" << std::endl; -// BOOST_REQUIRE_CLOSE(pos[0], 0, 1e-16); -// BOOST_REQUIRE_CLOSE(pos[1], 0, 1e-16); - // Now that domain is built and populated, let's test SupportBuilder - // We use (0,0) as initial point - SupportBuilder<vector_dist<2, double, aggregate<double>>> supportBuilder(domain, {2,2}, 2*spacing[0]); + typedef vector_dist<2, double, aggregate<double>> vector_dist_type; + + SupportBuilder<vector_dist_type,vector_dist_type> supportBuilder(domain, domain, {2,2}, 2*spacing[0],false); auto supportPoints = supportBuilder.getSupport(itPoint, 20, support_options::N_PARTICLES); -// for (const auto &k : supportPoints) -// { -// Point<2, double> pt = domain.getPos(k); -// std::cout << pt.toString() << std::endl; -// } + BOOST_REQUIRE_GE(supportPoints.size(), 20); } -// BOOST_AUTO_TEST_CASE(Support_CopyConstructor_test) -// { -// -// } + BOOST_AUTO_TEST_SUITE_END() diff --git a/src/DCPSE/tests/Vandermonde_unit_tests.cpp b/src/DCPSE/tests/Vandermonde_unit_tests.cpp index 295b5505..dd133d66 100644 --- a/src/DCPSE/tests/Vandermonde_unit_tests.cpp +++ b/src/DCPSE/tests/Vandermonde_unit_tests.cpp @@ -109,7 +109,7 @@ BOOST_AUTO_TEST_SUITE(Vandermonde_tests) Support s(0,keys); // ...and get the matrix V - Vandermonde<2, double, EMatrix<double, Eigen::Dynamic, Eigen::Dynamic>> vandermonde(s, mb, parts); + Vandermonde<2, double, EMatrix<double, Eigen::Dynamic, Eigen::Dynamic>> vandermonde(s, mb, parts,parts); vandermonde.getMatrix(V); // Now build the matrix of expected values @@ -201,7 +201,7 @@ BOOST_AUTO_TEST_SUITE(Vandermonde_tests) Support s(0,keys); // ...and get the matrix V - Vandermonde<2, double, EMatrix<double, Eigen::Dynamic, Eigen::Dynamic>> vandermonde(s, mb, parts); + Vandermonde<2, double, EMatrix<double, Eigen::Dynamic, Eigen::Dynamic>> vandermonde(s, mb, parts,parts); vandermonde.getMatrix(V); // Now build the matrix of expected values diff --git a/src/FiniteDifference/FD_expressions.hpp b/src/FiniteDifference/FD_expressions.hpp index 89b6b822..d3499a02 100644 --- a/src/FiniteDifference/FD_expressions.hpp +++ b/src/FiniteDifference/FD_expressions.hpp @@ -8,9 +8,34 @@ #ifndef FD_EXPRESSIONS_HPP_ #define FD_EXPRESSIONS_HPP_ +template<typename T, typename Sfinae = void> +struct has_getGrid: std::false_type {}; + +template<typename T> +struct has_getGrid<T, typename Void<decltype(std::declval<T>().getGrid())>::type > : std::true_type +{}; + namespace FD { + template<bool cond, typename exp1, typename exp2> + struct first_or_second + { + static auto getGrid(const exp1 & o1, const exp2 & o2) -> decltype(o2.getGrid()) + { + return o2.getGrid(); + } + }; + + template<typename exp1, typename exp2> + struct first_or_second<true,exp1,exp2> + { + static auto getGrid(const exp1 & o1, const exp2 & o2) -> decltype(o1.getGrid()) + { + return o1.getGrid(); + } + }; + constexpr int NORM_EXPRESSION = 0; constexpr int STAG_EXPRESSION = 1; constexpr int GRID_COMP = 2; @@ -26,22 +51,22 @@ namespace FD struct grid_dist_expression_value_impl_func_scal { template<unsigned int prp, typename base_type, typename gtype> - static void inte(gtype & g, grid_dist_key_dx<gtype::dims> & k, comb<gtype::dims> & c_where, comb<gtype::dims> & c_o1, base_type & inte_out, int & c, int comp) + static void inte(gtype & g, grid_dist_key_dx<gtype::dims> & k, comb<gtype::dims> & c_where, comb<gtype::dims> & c_o1, base_type & inte_out, int & c) { if (c_where[i] != c_o1[i]) { int sign = (c_where[i] > c_o1[i])?1:-1; - grid_dist_expression_value_impl_func_scal<i-1>::template inte<prp,base_type>(g,k,c_where,c_o1,inte_out,c,comp); + grid_dist_expression_value_impl_func_scal<i-1>::template inte<prp,base_type>(g,k,c_where,c_o1,inte_out,c); long int x0 = k.getKeyRef().get(i); k.getKeyRef().set_d(i, x0 + sign); - grid_dist_expression_value_impl_func_scal<i-1>::template inte<prp,base_type>(g,k,c_where,c_o1,inte_out,c,comp); + grid_dist_expression_value_impl_func_scal<i-1>::template inte<prp,base_type>(g,k,c_where,c_o1,inte_out,c); k.getKeyRef().set_d(i, x0); } else { - grid_dist_expression_value_impl_func_scal<i-1>::template inte<prp,base_type>(g,k,c_where,c_o1,inte_out,c,comp); + grid_dist_expression_value_impl_func_scal<i-1>::template inte<prp,base_type>(g,k,c_where,c_o1,inte_out,c); } } }; @@ -50,7 +75,7 @@ namespace FD struct grid_dist_expression_value_impl_func_scal<0> { template<unsigned int prp, typename base_type, typename gtype> - static void inte(gtype & g, grid_dist_key_dx<gtype::dims> & k, comb<gtype::dims> & c_where, comb<gtype::dims> & c_o1, base_type & inte_out , int & c , int comp) + static void inte(gtype & g, grid_dist_key_dx<gtype::dims> & k, comb<gtype::dims> & c_where, comb<gtype::dims> & c_o1, base_type & inte_out , int & c) { if (c_where[0] != c_o1[0]) { @@ -91,12 +116,37 @@ namespace FD return inte; } + template<unsigned int prp, typename gtype> + static base_type inte(gtype & g, grid_dist_key_dx<gtype::dims> & k, comb<gtype::dims> & c_where, comb<gtype::dims> & c_o1) + { + int c = 0; + base_type inte = 0; + + grid_dist_expression_value_impl_func_scal<gtype::dims-1>::template inte<prp,base_type>(g,k,c_where,c_o1,inte,c); + + inte /= c; + + return inte; + } + + template<unsigned int prp, typename gtype> + static base_type value_n(gtype & g, const grid_dist_key_dx<gtype::dims> & k) + { + return g.template getProp<prp>(k); + } + template<unsigned int prp, typename gtype> static base_type value_n(gtype & g, const grid_dist_key_dx<gtype::dims> & k, int comp) { return g.template getProp<prp>(k); } + template<unsigned int prp, typename gtype> + static auto value_ref(gtype & g, const grid_dist_key_dx<gtype::dims> & k) -> decltype(g.template getProp<prp>(k)) + { + return g.template getProp<prp>(k); + } + template<unsigned int prp, typename gtype> static auto value_ref(gtype & g, const grid_dist_key_dx<gtype::dims> & k, int comp) -> decltype(g.template getProp<prp>(k)) { @@ -109,7 +159,7 @@ namespace FD struct grid_dist_expression_value_impl_func_vec { template<unsigned int prp, typename base_type, typename gtype> - static void inte(gtype & g, grid_dist_key_dx<gtype::dims> & k, comb<gtype::dims> & c_where, comb<gtype::dims> & c_o1, base_type & inte_out, int & c, int comp) + static void inte(gtype & g, grid_dist_key_dx<gtype::dims> & k, comb<gtype::dims> & c_where, comb<gtype::dims> & c_o1, base_type & inte_out, int & c, const int (& comp)[1]) { if (c_where[i] != c_o1[i]) { @@ -133,24 +183,24 @@ namespace FD struct grid_dist_expression_value_impl_func_vec<0> { template<unsigned int prp, typename base_type, typename gtype> - static void inte(gtype & g, grid_dist_key_dx<gtype::dims> & k, comb<gtype::dims> & c_where, comb<gtype::dims> & c_o1, base_type & inte_out , int & c , int comp) + static void inte(gtype & g, grid_dist_key_dx<gtype::dims> & k, comb<gtype::dims> & c_where, comb<gtype::dims> & c_o1, base_type & inte_out , int & c , const int (& comp)[1]) { if (c_where[0] != c_o1[0]) { int sign = (c_where[0] > c_o1[0])?1:-1; - inte_out += g.template getProp<prp>(k)[comp]; + inte_out += g.template getProp<prp>(k)[comp[0]]; long int x0 = k.getKeyRef().get(0); k.getKeyRef().set_d(0, x0 + sign); - inte_out += g.template getProp<prp>(k)[comp]; + inte_out += g.template getProp<prp>(k)[comp[0]]; k.getKeyRef().set_d(0, x0); c += 2; } else { - inte_out += g.template getProp<prp>(k)[comp]; + inte_out += g.template getProp<prp>(k)[comp[0]]; c += 1; } } @@ -162,7 +212,7 @@ namespace FD typedef base_type type; template<unsigned int prp, typename gtype> - static base_type inte(gtype & g, grid_dist_key_dx<gtype::dims> & k, comb<gtype::dims> & c_where, comb<gtype::dims> & c_o1, int comp) + static base_type inte(gtype & g, grid_dist_key_dx<gtype::dims> & k, comb<gtype::dims> & c_where, comb<gtype::dims> & c_o1, const int (& comp)[1]) { int c = 0; base_type inte = 0; @@ -178,15 +228,142 @@ namespace FD } template<unsigned int prp, typename gtype> - static base_type value_n(gtype & g, const grid_dist_key_dx<gtype::dims> & k, int comp) + static base_type value_n(gtype & g, const grid_dist_key_dx<gtype::dims> & k) + { + printf("Error wrong expression please check the components"); + return g.template getProp<prp>(k)[0]; + } + + template<unsigned int prp, typename gtype> + static base_type value_n(gtype & g, const grid_dist_key_dx<gtype::dims> & k, const int (& comp)[1]) + { + return g.template getProp<prp>(k)[comp[0]]; + } + + template<unsigned int prp, typename gtype> + static auto value_ref(gtype & g, const grid_dist_key_dx<gtype::dims> & k) -> decltype(g.template getProp<prp>(k)[0]) + { + printf("Error wrong expression please check the components"); + return g.template getProp<prp>(k)[0]; + } + + template<unsigned int prp, typename gtype> + static auto value_ref(gtype & g, const grid_dist_key_dx<gtype::dims> & k, const int (& comp)[1]) -> decltype(g.template getProp<prp>(k)[comp[0]]) + { + return g.template getProp<prp>(k)[comp[0]]; + } + }; + + + + + template<typename base_type, unsigned int N1,unsigned int N2> + struct grid_dist_expression_value_impl<base_type[N1][N2]> + { + typedef base_type type; + + template<unsigned int prp, typename gtype> + static base_type inte(gtype & g, grid_dist_key_dx<gtype::dims> & k, comb<gtype::dims> & c_where, comb<gtype::dims> & c_o1, const int (& comp)[2]) + { + int c = 0; + base_type inte = 0; + + grid_dist_expression_value_impl_func_vec<gtype::dims-1>::template inte<prp,base_type>(g,k,c_where,c_o1,inte,c,comp); + + if (c != 0) + {inte /= c;} + else + {inte = g.template getProp<prp>(k)[comp[0]][comp[1]];} + + return inte; + } + + template<unsigned int prp, typename gtype> + static base_type value_n(gtype & g, const grid_dist_key_dx<gtype::dims> & k) + { + printf("Error wrong expression please check the components"); + return g.template getProp<prp>(k)[0][0]; + } + + template<unsigned int prp, typename gtype> + static base_type value_n(gtype & g, const grid_dist_key_dx<gtype::dims> & k, const int (& comp)[2]) + { + return g.template getProp<prp>(k)[comp[0]][comp[1]]; + } + + template<unsigned int prp, typename gtype> + static auto value_ref(gtype & g, const grid_dist_key_dx<gtype::dims> & k) -> decltype(g.template getProp<prp>(k)[0][0]) + { + printf("Error wrong expression please check the components"); + return g.template getProp<prp>(k)[0][0]; + } + + template<unsigned int prp, typename gtype> + static auto value_ref(gtype & g, const grid_dist_key_dx<gtype::dims> & k, const int (& comp)[2]) -> decltype(g.template getProp<prp>(k)[0][0]) + { + return g.template getProp<prp>(k)[comp[0]][comp[1]]; + } + }; + + template<typename base_type, unsigned int N1,unsigned int N2, unsigned int N3> + struct grid_dist_expression_value_impl<base_type[N1][N2][N3]> + { + typedef base_type type; + + template<unsigned int prp, typename gtype> + static base_type inte(gtype & g, grid_dist_key_dx<gtype::dims> & k, comb<gtype::dims> & c_where, comb<gtype::dims> & c_o1, const int (& comp)[3]) + { + int c = 0; + base_type inte = 0; + + grid_dist_expression_value_impl_func_vec<gtype::dims-1>::template inte<prp,base_type>(g,k,c_where,c_o1,inte,c,comp); + + if (c != 0) + {inte /= c;} + else + {inte = g.template getProp<prp>(k)[comp[0]][comp[1]][comp[2]];} + + return inte; + } + + template<unsigned int prp, typename gtype> + static base_type value_n(gtype & g, const grid_dist_key_dx<gtype::dims> & k) + { + printf("Error wrong expression please check the components"); + return g.template getProp<prp>(k)[0][0][0]; + } + + template<unsigned int prp, typename gtype> + static base_type value_n(gtype & g, const grid_dist_key_dx<gtype::dims> & k, const int (& comp)[2]) + { + printf("Error wrong expression please check the components"); + return g.template getProp<prp>(k)[0][comp[0]][comp[1]]; + } + + template<unsigned int prp, typename gtype> + static base_type value_n(gtype & g, const grid_dist_key_dx<gtype::dims> & k, const int (& comp)[3]) + { + return g.template getProp<prp>(k)[comp[0]][comp[1]][comp[2]]; + } + + template<unsigned int prp, typename gtype> + static auto value_ref(gtype & g, const grid_dist_key_dx<gtype::dims> & k) -> decltype(g.template getProp<prp>(k)[0][0][0]) + { + printf("Error wrong expression please check the components"); + return g.template getProp<prp>(k)[0][0][0]; + } + + template<unsigned int prp, typename gtype> + static auto value_ref(gtype & g, const grid_dist_key_dx<gtype::dims> & k, const int (& comp)[2]) -> decltype(g.template getProp<prp>(k)[0][0][0]) { - return g.template getProp<prp>(k)[comp]; + printf("Error wrong expression please check the components"); + return g.template getProp<prp>(k)[0][comp[1]][comp[0]]; } template<unsigned int prp, typename gtype> - static auto value_ref(gtype & g, const grid_dist_key_dx<gtype::dims> & k, int comp) -> decltype(g.template getProp<prp>(k)[comp]) + static auto value_ref(gtype & g, const grid_dist_key_dx<gtype::dims> & k, const int (& comp)[3]) -> decltype(g.template getProp<prp>(k)[0][0][0]) { - return g.template getProp<prp>(k)[comp]; + return g.template getProp<prp>(k)[comp[0]][comp[1]][comp[2]]; } }; @@ -196,8 +373,10 @@ namespace FD typedef base_type type; template<unsigned int prp, typename gtype> - static base_type inte(gtype & g, grid_dist_key_dx<gtype::dims> & k, comb<gtype::dims> & c_where, comb<gtype::dims> & c_o1, int comp) + static base_type inte(gtype & g, const grid_dist_key_dx<gtype::dims> & k, comb<gtype::dims> & c_where, comb<gtype::dims> & c_o1) { + int comp[1]; + printf("Error wrong expression please check the components"); int c = 0; base_type inte = 0; @@ -206,21 +385,53 @@ namespace FD if (c != 0) {inte /= c;} else - {inte = g.template getProp<prp>(k)[comp];} + {inte = g.template getProp<prp>(k)[0];} return inte; } template<unsigned int prp, typename gtype> - static base_type value_n(gtype & g, const grid_dist_key_dx<gtype::dims> & k, int comp) + static base_type inte(gtype & g, const grid_dist_key_dx<gtype::dims> & k, comb<gtype::dims> & c_where, comb<gtype::dims> & c_o1, const int (& comp)[1]) { - return g.template getProp<prp>(k)[comp]; + int c = 0; + base_type inte = 0; + + grid_dist_key_dx<gtype::dims> k_ = k; + + grid_dist_expression_value_impl_func_vec<gtype::dims-1>::template inte<prp,base_type>(g,k_,c_where,c_o1,inte,c,comp); + + if (c != 0) + {inte /= c;} + else + {inte = g.template getProp<prp>(k)[comp[0]];} + + return inte; } template<unsigned int prp, typename gtype> - static auto value_ref(gtype & g, const grid_dist_key_dx<gtype::dims> & k, int comp) -> decltype(g.template getProp<prp>(k)[comp]) + static base_type value_n(gtype & g, const grid_dist_key_dx<gtype::dims> & k) { - return g.template getProp<prp>(k)[comp]; + printf("Error wrong expression please check the components"); + return g.template getProp<prp>(k)[0]; + } + + template<unsigned int prp, typename gtype> + static base_type value_n(gtype & g, const grid_dist_key_dx<gtype::dims> & k, const int (& comp)[1]) + { + return g.template getProp<prp>(k)[comp[0]]; + } + + template<unsigned int prp, typename gtype> + static auto value_ref(gtype & g, const grid_dist_key_dx<gtype::dims> & k, const int (& comp)[1]) -> decltype(g.template getProp<prp>(k)[comp[0]]) + { + return g.template getProp<prp>(k)[comp[0]]; + } + + template<unsigned int prp, typename gtype> + static auto value_ref(gtype & g, const grid_dist_key_dx<gtype::dims> & k) -> decltype(g.template getProp<prp>(k)[0]) + { + printf("Error wrong expression please check the components"); + return g.template getProp<prp>(k)[0]; } }; @@ -369,10 +580,22 @@ namespace FD * \return the result of the expression * */ - inline auto value(const grid_dist_key_dx<grid::dims> & k, comb<grid::dims> & c_where, int comp = 0) const -> decltype(grid_dist_expression_value_impl<type_proc>::template value_n<prp>(g,k,comp)) + inline auto value(const grid_dist_key_dx<grid::dims> & k, comb<grid::dims> & c_where) const -> decltype(grid_dist_expression_value_impl<type_proc>::template value_n<prp>(g,k)) + { + return grid_dist_expression_value_impl<type_proc>::template value_n<prp>(g,k); + } + + /*! \brief Evaluate the expression + * + * \param k where to evaluate the expression + * + * \return the result of the expression + * + */ + template<unsigned int nc> + inline auto value(const grid_dist_key_dx<grid::dims> & k, comb<grid::dims> & c_where, const int (& comp)[nc]) const -> decltype(grid_dist_expression_value_impl<type_proc>::template value_n<prp>(g,k,comp)) { return grid_dist_expression_value_impl<type_proc>::template value_n<prp>(g,k,comp); -// return g.template getProp<prp>(k); } /*! \brief Evaluate the expression @@ -382,10 +605,22 @@ namespace FD * \return the result of the expression * */ - inline auto value_ref(const grid_dist_key_dx<grid::dims> & k, comb<grid::dims> & c_where, int comp = 0) const -> decltype(grid_dist_expression_value_impl<type_proc>::template value_ref<prp>(g,k,comp)) + inline auto value_ref(const grid_dist_key_dx<grid::dims> & k, comb<grid::dims> & c_where) const -> decltype(grid_dist_expression_value_impl<type_proc>::template value_ref<prp>(g,k)) + { + return grid_dist_expression_value_impl<type_proc>::template value_ref<prp>(g,k); + } + + /*! \brief Evaluate the expression + * + * \param k where to evaluate the expression + * + * \return the result of the expression + * + */ + template<unsigned int nc> + inline auto value_ref(const grid_dist_key_dx<grid::dims> & k, comb<grid::dims> & c_where, const int (& comp)[nc]) const -> decltype(grid_dist_expression_value_impl<type_proc>::template value_ref<prp>(g,k,comp)) { return grid_dist_expression_value_impl<type_proc>::template value_ref<prp>(g,k,comp); -// return g.template getProp<prp>(k); } /*! \brief Fill the grid property with the evaluated expression @@ -395,12 +630,12 @@ namespace FD * \return itself * */ - template<unsigned int prp2> grid & operator=(const grid_dist_expression<prp2,grid,NORM_EXPRESSION> & g_exp) + template<unsigned int prp2,typename grid_type> grid & operator=(const grid_dist_expression<prp2,grid_type,NORM_EXPRESSION> & g_exp) { g_exp.init(); comb<grid::dims> s_pos; - s_pos.zero; + s_pos.zero(); auto it = g.getDomainIterator(); @@ -575,7 +810,21 @@ namespace FD * \return the result of the expression * */ - inline auto value_ref(const grid_dist_key_dx<grid::dims> & k, comb<grid::dims> & c_where,int comp = 0) const -> decltype(grid_dist_expression_value_impl<type_proc>::template value_ref<prp>(g,k,comp)) + inline auto value_ref(const grid_dist_key_dx<grid::dims> & k, comb<grid::dims> & c_where) const -> decltype(grid_dist_expression_value_impl<type_proc>::template value_ref<prp>(g,k)) + { + return grid_dist_expression_value_impl<type_proc>::template value_ref<prp>(g,k); + //return g.template getProp<prp>(k); + } + + /*! \brief Evaluate the expression + * + * \param k where to evaluate the expression + * + * \return the result of the expression + * + */ + template<unsigned int nc> + inline auto value_ref(const grid_dist_key_dx<grid::dims> & k, comb<grid::dims> & c_where, const int (& comp)[nc]) const -> decltype(grid_dist_expression_value_impl<type_proc>::template value_ref<prp>(g,k,comp)) { return grid_dist_expression_value_impl<type_proc>::template value_ref<prp>(g,k,comp); //return g.template getProp<prp>(k); @@ -588,11 +837,27 @@ namespace FD * \return the result of the expression * */ - inline auto value(grid_dist_key_dx<grid::dims> & k, comb<grid::dims> & c_where, int comp = 0) const -> decltype(grid_dist_expression_value_impl<type_proc>::template inte<prp>(g,k,c_where,c_where,comp)) + inline auto value(grid_dist_key_dx<grid::dims> & k, comb<grid::dims> & c_where) const -> decltype(grid_dist_expression_value_impl<type_proc>::template inte<prp>(g,k,c_where,c_where)) { - comb<grid::dims> c_o1 = g.getStagPositions()[prp].get(comp); + comb<grid::dims> c_o1 = g.getStagPositions()[prp].get(0); + + return grid_dist_expression_value_impl<type_proc>::template inte<prp>(g,k,c_where,c_o1); + } + + /*! \brief Evaluate the expression + * + * \param k where to evaluate the expression + * + * \return the result of the expression + * + */ + template<unsigned int nc> + inline auto value(const grid_dist_key_dx<grid::dims> & k, comb<grid::dims> & c_where, const int (& comp)[nc]) const -> decltype(grid_dist_expression_value_impl<type_proc>::template inte<prp>(g,k,c_where,c_where,comp)) + { + comb<grid::dims> c_o1 = g.getStagPositions()[prp].get(comp[0]); return grid_dist_expression_value_impl<type_proc>::template inte<prp>(g,k,c_where,c_o1,comp); +// return g.template getProp<prp>(k); } /*! \brief Fill the grid property with the evaluated expression @@ -1136,9 +1401,9 @@ namespace FD * \return the grid * */ - gtype & getGrid() + auto getGrid() -> decltype(first_or_second<has_getGrid<exp1>::value,exp1,exp2>::getGrid(o1,o2)) { - return o1.getGrid(); + return first_or_second<has_getGrid<exp1>::value,exp1,exp2>::getGrid(o1,o2); } /*! \brief Return the grid on which is acting @@ -1148,9 +1413,9 @@ namespace FD * \return the grid * */ - const gtype & getGrid() const + auto getGrid() const -> decltype(first_or_second<has_getGrid<exp1>::value,exp1,exp2>::getGrid(o1,o2)) { - return o1.getGrid(); + return first_or_second<has_getGrid<exp1>::value,exp1,exp2>::getGrid(o1,o2); } template<typename Sys_eqs, typename gmap_type, typename unordered_map_type> @@ -1220,17 +1485,27 @@ namespace FD template<typename exp_type> static int get(exp_type & o1, grid_dist_key_dx<exp_type::gtype::dims> & key, comb<exp_type::gtype::dims> & c_where, const int (& comp)[1]) { + printf("ERROR: Slicer, the expression is incorrect, please check it\n"); return 0; } + template<typename exp_type> + static auto get_ref(exp_type & o1, grid_dist_key_dx<exp_type::gtype::dims> & key, comb<exp_type::gtype::dims> & c_where, const int (& comp)[1]) -> decltype(o1.value_ref(key,c_where)) + { + printf("ERROR: Slicer, the expression is incorrect, please check it\n"); + return o1.value_ref(key,c_where); + } + template<unsigned int prop, typename exp_type, typename grid_type> inline static void assign(exp_type & o1, grid_type & g, const grid_dist_key_dx<exp_type::gtype::dims> & key) { + printf("ERROR: Slicer, the expression is incorrect, please check it\n"); } template<unsigned int prop, typename grid_type> inline static void assign_double(double d, grid_type & g, const grid_dist_key_dx<grid_type::dims> & key) { + printf("ERROR: Slicer, the expression is incorrect, please check it\n"); } }; @@ -1238,15 +1513,15 @@ namespace FD struct get_grid_dist_expression_op<1,true> { template<typename exp_type> - static auto get(exp_type & o1, grid_dist_key_dx<exp_type::gtype::dims> & key, comb<exp_type::gtype::dims> & c_where, const int (& comp)[1]) -> decltype(o1.value(key,c_where,comp[0])) + static auto get(exp_type & o1, grid_dist_key_dx<exp_type::gtype::dims> & key, comb<exp_type::gtype::dims> & c_where, const int (& comp)[1]) -> decltype(o1.value(key,c_where,comp) ) { - return o1.value(key,c_where,comp[0]); + return o1.value(key,c_where,comp); } template<typename exp_type> - static auto get_ref(exp_type & o1, grid_dist_key_dx<exp_type::gtype::dims> & key, comb<exp_type::gtype::dims> & c_where, const int (& comp)[1]) -> decltype(o1.value_ref(key,c_where,comp[0])) + static auto get_ref(exp_type & o1, grid_dist_key_dx<exp_type::gtype::dims> & key, comb<exp_type::gtype::dims> & c_where, const int (& comp)[1]) -> decltype(o1.value_ref(key,c_where,comp) ) { - return o1.value_ref(key,c_where,comp[0]); + return o1.value_ref(key,c_where,comp); } template<unsigned int prop,typename exp_type, typename grid_type> @@ -1262,19 +1537,57 @@ namespace FD } }; + template<> + struct get_grid_dist_expression_op<2,false> + { + template<typename exp_type> + static auto get(exp_type & o1, grid_dist_key_dx<exp_type::gtype::dims> & key, comb<exp_type::gtype::dims> & c_where, const int (& comp)[2]) -> decltype(o1.value(key,c_where,comp) ) + { + printf("ERROR: Slicer, the expression is incorrect, please check it\n"); + return o1.value(key,c_where,comp); + } + + template<typename exp_type> + static auto get_ref(exp_type & o1, grid_dist_key_dx<exp_type::gtype::dims> & key, comb<exp_type::gtype::dims> & c_where, const int (& comp)[2]) -> decltype(o1.value_ref(key,c_where,comp) ) + { + printf("ERROR: Slicer, the expression is incorrect, please check it\n"); + return o1.value_ref(key,c_where,comp); + } + + template<unsigned int prop,typename exp_type, typename grid_type> + inline static void assign(exp_type & o1, grid_type & g, grid_dist_key_dx<grid_type::dims> & key, comb<exp_type::gtype::dims> & c_where, const int (& comp)[2]) + { + printf("ERROR: Slicer, the expression is incorrect, please check it\n"); + pos_or_propL<grid_type,prop>::value(g,key)[comp[0]][comp[1]] = o1.value(key,c_where); + } + + template<unsigned int prop, typename grid_type> + inline static void assign_double(double d, grid_type & g, const grid_dist_key_dx<grid_type::dims> & key, const int (& comp)[2]) + { + printf("ERROR: Slicer, the expression is incorrect, please check it\n"); + pos_or_propL<grid_type,prop>::value(g,key)[comp[0]][comp[1]] = d; + } + }; + template<> struct get_grid_dist_expression_op<2,true> { template<typename exp_type> - static auto get(exp_type & o1, grid_dist_key_dx<exp_type::gtype::dims> & key, comb<exp_type::gtype::dims> & c_where, const int (& comp)[2]) -> decltype(o1.value(key,c_where)[0][0]) + static auto get(exp_type & o1, grid_dist_key_dx<exp_type::gtype::dims> & key, comb<exp_type::gtype::dims> & c_where, const int (& comp)[2]) -> decltype(o1.value(key,c_where,comp) ) + { + return o1.value(key,c_where,comp); + } + + template<typename exp_type> + static auto get_ref(exp_type & o1, grid_dist_key_dx<exp_type::gtype::dims> & key, comb<exp_type::gtype::dims> & c_where, const int (& comp)[2]) -> decltype(o1.value_ref(key,c_where,comp) ) { - return o1.value(key)[comp[0]][comp[1]]; + return o1.value_ref(key,c_where,comp); } template<unsigned int prop,typename exp_type, typename grid_type> - inline static void assign(exp_type & o1, grid_type & g, const grid_dist_key_dx<grid_type::dims> & key, const int (& comp)[2]) + inline static void assign(exp_type & o1, grid_type & g, grid_dist_key_dx<grid_type::dims> & key, comb<exp_type::gtype::dims> & c_where, const int (& comp)[2]) { - pos_or_propL<grid_type,prop>::value(g,key)[comp[0]][comp[1]] = o1.value(key); + pos_or_propL<grid_type,prop>::value(g,key)[comp[0]][comp[1]] = o1.value(key,c_where); } template<unsigned int prop, typename grid_type> @@ -1284,6 +1597,34 @@ namespace FD } }; + template<> + struct get_grid_dist_expression_op<3,true> + { + template<typename exp_type> + static auto get(exp_type & o1, grid_dist_key_dx<exp_type::gtype::dims> & key, comb<exp_type::gtype::dims> & c_where, const int (& comp)[3]) -> decltype(o1.value(key,c_where,comp) ) + { + return o1.value(key,c_where,comp); + } + + template<typename exp_type> + static auto get_ref(exp_type & o1, grid_dist_key_dx<exp_type::gtype::dims> & key, comb<exp_type::gtype::dims> & c_where, const int (& comp)[3]) -> decltype(o1.value_ref(key,c_where,comp) ) + { + return o1.value_ref(key,c_where,comp); + } + + template<unsigned int prop,typename exp_type, typename grid_type> + inline static void assign(exp_type & o1, grid_type & g, grid_dist_key_dx<grid_type::dims> & key, comb<exp_type::gtype::dims> & c_where, const int (& comp)[3]) + { + pos_or_propL<grid_type,prop>::value(g,key)[comp[0]][comp[1]][comp[2]] = o1.value(key,c_where); + } + + template<unsigned int prop, typename grid_type> + inline static void assign_double(double d, grid_type & g, const grid_dist_key_dx<grid_type::dims> & key, const int (& comp)[3]) + { + pos_or_propL<grid_type,prop>::value(g,key)[comp[0]][comp[1]][comp[2]] = d; + } + }; + /*! \brief it take an expression and create the negatove of this expression * * @@ -1307,7 +1648,7 @@ namespace FD public: - typedef std::false_type is_ker; + typedef std::false_type is_ker; typedef typename exp1::gtype gtype; @@ -1423,7 +1764,7 @@ namespace FD o1.template value_nz<Sys_eqs>(g_map,key,gs,spacing,cols,coeff,comp_ + var_id + comp[0],c_where); } - inline grid_dist_expression_op<exp1,boost::mpl::int_<2>,g_comp> operator[](int comp_) + inline grid_dist_expression_op<exp1,boost::mpl::int_<n+1>,g_comp> operator[](int comp_) { int comp_n[n+1]; @@ -1431,7 +1772,7 @@ namespace FD {comp_n[i] = comp[i];} comp_n[n] = comp_; - grid_dist_expression_op<exp1,boost::mpl::int_<2>,g_comp> v_exp(o1,comp_n,var_id); + grid_dist_expression_op<exp1,boost::mpl::int_<n+1>,g_comp> v_exp(o1,comp_n,var_id); return v_exp; } @@ -1452,7 +1793,7 @@ namespace FD * \return itself * */ - template<unsigned int prp2, unsigned int impl> gtype & operator=(const grid_dist_expression<prp2,gtype,impl> & v_exp) + template<unsigned int prp2, typename gtype2, unsigned int impl> gtype & operator=(const grid_dist_expression<prp2,gtype2,impl> & v_exp) { v_exp.init(); @@ -1460,11 +1801,14 @@ namespace FD auto it = g.getDomainIterator(); + comb<gtype::dims> c_where; + c_where.zero(); + while (it.isNext()) { auto key = it.get(); - get_grid_dist_expression_op<n,n == rank_gen<property_act>::type::value>::template assign<exp1::prop>(v_exp,g,key,comp); + get_grid_dist_expression_op<n,n == rank_gen<property_act>::type::value>::template assign<exp1::prop>(v_exp,g,key,c_where,comp); ++it; } @@ -1561,8 +1905,368 @@ namespace FD return exp_g; } + +////// Specialization for temporal FD_expressions + + template<unsigned int dim> + struct gdb_ext_plus_g_info + { + grid_sm<dim,void> & ginfo_v; + + openfpm::vector<GBoxes<dim>> & gdb_ext; + + bool operator==(const gdb_ext_plus_g_info & tmp) + { + bool is_equal = gdb_ext.size() == tmp.gdb_ext.size(); + + for (int i = 0 ; i < gdb_ext.size() ; i++) + { + is_equal &= gdb_ext.get(i) == tmp.gdb_ext.get(i); + } + + is_equal &= ginfo_v == tmp.ginfo_v; + + return is_equal; + } + }; + + template<unsigned int dim> + class grid_dist_expression_iterator_to_make_algebra_work + { + //! Grid informations object without type + grid_sm<dim,void> & ginfo_v; + + //! The grid + openfpm::vector<grid_cpu<dim,aggregate<double>>> & loc_grid; + + openfpm::vector<GBoxes<dim>> & gdb_ext; + + typedef grid_cpu<dim,aggregate<double>> device_grid; + + public: + + static constexpr unsigned int dims = dim; + + grid_dist_expression_iterator_to_make_algebra_work(openfpm::vector<grid_cpu<dim,aggregate<double>>> & loc_grid, + openfpm::vector<GBoxes<dim>> & gdb_ext, + grid_sm<dim,void> & ginfo_v) + :loc_grid(loc_grid),gdb_ext(gdb_ext),ginfo_v(ginfo_v) + {} + + gdb_ext_plus_g_info<dim> size() + { + return gdb_ext_plus_g_info<dim>{ginfo_v,gdb_ext}; + } + + //Need more treatment for staggered (c_where based on exp) + template<unsigned int prp> + inline auto get(grid_dist_key_dx<dim> & key) -> decltype(loc_grid.get(key.getSub()).template get<0>(key.getKey())) + { + return loc_grid.get(key.getSub()).template get<0>(key.getKey()); + } + + + /*! \brief Return the number of local grid + * + * \return the number of local grid + * + */ + size_t getN_loc_grid() const + { + return loc_grid.size(); + } + + /*! \brief Get the i sub-domain grid + * + * \param i sub-domain + * + * \return local grid + * + */ + device_grid & get_loc_grid(size_t i) + { + return loc_grid.get(i); + } + + /*! \brief Get the i sub-domain grid + * + * \param i sub-domain + * + * \return local grid + * + */ + const device_grid & get_loc_grid(size_t i) const + { + return loc_grid.get(i); + } + + /*! \brief Get an object containing the grid informations without type + * + * \return an information object about this grid + * + */ + const grid_sm<dim,void> & getGridInfoVoid() const + { + return ginfo_v; + } + + /*! \brief It return the informations about the local grids + * + * \return The information about the local grids + * + */ + const openfpm::vector<GBoxes<device_grid::dims>> & getLocalGridsInfo() const + { + return gdb_ext; + } + + void resize(const gdb_ext_plus_g_info<dim> & input) + { + size_t Nloc_grid = input.gdb_ext.size(); + + loc_grid.resize(Nloc_grid); + + for (int i = 0 ; i < Nloc_grid; i++) + { + size_t sz[dim]; + + for (int j = 0 ; j < dim ; j++) {sz[j] = input.gdb_ext.get(i).GDbox.getKP2().get(j) + 1;} + + loc_grid.get(i).resize(sz); + } + + gdb_ext = input.gdb_ext; + ginfo_v = input.ginfo_v; + } + + grid_dist_iterator<dim,device_grid, + decltype(device_grid::type_of_subiterator()),FREE> getIterator() + { + grid_key_dx<dim> stop(ginfo_v.getSize()); + grid_key_dx<dim> one; + one.one(); + stop = stop - one; + + grid_dist_iterator<dim,device_grid, + decltype(device_grid::type_of_subiterator()), + FREE> it(loc_grid,gdb_ext,stop); + + return it; + } + }; + + template<typename patches> + struct grid_patches + { + static constexpr unsigned int dims = patches::dims; + + openfpm::vector<patches> loc_grid; + }; + + /*! \brief Main class that encapsulate a grid properties operand to be used for expressions construction + * + * \tparam prp property involved + * \tparam grid involved + * + */ + template<unsigned int dim> + class grid_dist_expression<0,grid_patches<grid_cpu<dim,aggregate<double>>>,NORM_EXPRESSION> + { + //! The grid + mutable grid_patches<grid_cpu<dim,aggregate<double>>> data; + + mutable openfpm::vector<GBoxes<dim>> gdb_ext; + + //! Grid informations object without type + mutable grid_sm<dim,void> ginfo_v; + + typedef double type_proc; + + template<typename super_general> + void operator_equal(super_general & g_exp) + { + g_exp.init(); + + resize(g_exp.getGrid()); + + comb<dim> s_pos; + s_pos.zero(); + + auto it = this->getVector().getIterator(); + + while (it.isNext()) + { + auto key = it.get(); + + data.loc_grid.get(key.getSub()).template get<0>(key.getKey()) = g_exp.value(key,s_pos); + + ++it; + } + } + + public: + + static constexpr unsigned int dims = dim; + + typedef grid_dist_key_dx<dim,grid_key_dx<dim>> index_type; + + //! The type of the internal grid + typedef grid_dist_expression_iterator_to_make_algebra_work<dim> gtype; + + //! Property id of the point + static const unsigned int prop = 0; + + grid_dist_expression() + {} + + gdb_ext_plus_g_info<dim> size() const + { + return gdb_ext_plus_g_info<dim>{ginfo_v,gdb_ext}; + } + + //! constructor for an external grid + template<typename grid> + grid_dist_expression(grid & g) + { + resize(g); + } + + template<typename grid> + void resize(grid & g) + { + size_t Nloc_grid = g.getN_loc_grid(); + + data.loc_grid.resize(Nloc_grid); + + for (int i = 0 ; i < Nloc_grid; i++) + { + data.loc_grid.get(i).resize(g.get_loc_grid(i).getGrid().getSize()); + } + + gdb_ext = g.getLocalGridsInfo(); + ginfo_v = g.getGridInfoVoid(); + } + + grid_dist_expression_iterator_to_make_algebra_work<dim> getVector() const + { + return grid_dist_expression_iterator_to_make_algebra_work<dim>(data.loc_grid,gdb_ext,ginfo_v); + } + + /*! \brief Return the grid on which is acting + * + * It return the grid used in getVExpr, to get this object + * + * \return the grid + * + */ + grid_dist_expression_iterator_to_make_algebra_work<dim> getGrid() + { + return getVector(); + } + + /*! \brief Return the grid on which is acting + * + * It return the grid used in getVExpr, to get this object + * + * \return the grid + * + */ + const grid_dist_expression_iterator_to_make_algebra_work<dim> getGrid() const + { + return getVector(); + } + + /*! \brief This function must be called before value + * + * it initialize the expression if needed + * + */ + inline void init() const + {} + + /*! \brief Evaluate the expression + * + * \param k where to evaluate the expression + * + * \return the result of the expression + * + */ + inline double value(const grid_dist_key_dx<dim> & k, const comb<dim> & c_where = comb<dim>()) const + { + return data.loc_grid.get(k.getSub()).template get<0>(k.getKey()); + } + + /*! \brief Evaluate the expression + * + * \param k where to evaluate the expression + * + * \return the result of the expression + * + */ + // template<unsigned int nc> + // inline auto value(const grid_dist_key_dx<grid::dims> & k, comb<grid::dims> & c_where, const int (& comp)[nc]) const -> decltype(grid_dist_expression_value_impl<type_proc>::template value_n<prp>(g,k,comp)) + // { + // return loc_grid.get(k.getSub()).template get<0>(k.getKey()); + // } + + /*! \brief Evaluate the expression + * + * \param k where to evaluate the expression + * + * \return the result of the expression + * + */ + inline double & value_ref(const grid_dist_key_dx<dim> & k, const comb<dim> & c_where = comb<dim>()) + { + return data.loc_grid.get(k.getSub()).template get<0>(k.getKey()); + } + + /*! \brief Fill the grid property with the evaluated expression + * + * \param v_exp expression to evaluate + * + * \return itself + * + */ + template<unsigned int prp2, typename grid> const grid & operator=(const grid_dist_expression<prp2,grid,NORM_EXPRESSION> & g_exp) + { + operator_equal(g_exp); + + return g_exp.getGrid(); + } + + /*! \brief Fill the grid property with the evaluated expression + * + * \param v_exp expression to evaluate + * + * \return itself + * + */ + template<typename exp1, typename exp2, typename op> auto operator=(const grid_dist_expression_op<exp1,exp2,op> & g_exp) -> decltype(g_exp.getGrid()) + { + operator_equal(g_exp); + + return g_exp.getGrid(); + } + + //Need more treatment for staggered (c_where based on exp) + inline double get(grid_dist_key_dx<dim> & key) + { + comb<dim> c_where; + c_where.zero(); + return this->value(key,c_where); + } + + int isConstant(){ + return false; + } + }; + }; + +template<unsigned int dim, typename T> using texp_g = FD::grid_dist_expression<0,FD::grid_patches<grid_cpu<dim,aggregate<T>>>,FD::NORM_EXPRESSION>; + /* \brief sum two distributed grid expression * * \param ga grid expression one diff --git a/src/FiniteDifference/FD_op_Tests.cpp b/src/FiniteDifference/FD_op_Tests.cpp index 1ca8c556..a5abf939 100644 --- a/src/FiniteDifference/FD_op_Tests.cpp +++ b/src/FiniteDifference/FD_op_Tests.cpp @@ -114,6 +114,105 @@ BOOST_AUTO_TEST_SUITE(fd_op_suite_tests) } + BOOST_AUTO_TEST_CASE(fd_op_tests_vec_mat) { + size_t edgeSemiSize = 80; + const size_t sz[2] = {2 * edgeSemiSize+1, 2 * edgeSemiSize+1}; + Box<2, double> box({0, 0}, {2 * M_PI, 2 * M_PI}); + periodicity<2> bc({NON_PERIODIC, NON_PERIODIC}); + double spacing[2]; + spacing[0] = 2 * M_PI / (sz[0] - 1); + spacing[1] = 2 * M_PI / (sz[1] - 1); + Ghost<2, long int> ghost(1); + + //std::cout << "Spacing: " << spacing[0] << " " << spacing[1] << std::endl; + + grid_dist_id<2, double, aggregate<double, double, double,double[2],double[2][2],double[2][2][2]>> domain(sz, box,ghost,bc); + + BOOST_TEST_MESSAGE("Init domain..."); + auto it = domain.getDomainIterator(); + while (it.isNext()) + { + auto key_l = it.get(); + auto key = it.getGKey(key_l); + mem_id i = key.get(0); + double x = i * spacing[0]; + mem_id j = key.get(1); + double y = j * spacing[1]; + // Here fill the function value P + domain.template getProp<3>(key_l)[0] = sin(x); + domain.template getProp<3>(key_l)[1] = sin(x); + domain.template getProp<1>(key_l) = 0; + // Here fill the validation value for Df/Dx in property 3 + + domain.template getProp<2>(key_l) = cos(x); + + ++it; + } + + domain.ghost_get<0,3>(); + + FD::Derivative_x Dx; + FD::Derivative_y Dy; + + auto v = FD::getV<1>(domain); + auto P = FD::getV<0>(domain); + + auto vec = FD::getV<3>(domain); + auto Mat = FD::getV<4>(domain); + auto Mat3 = FD::getV<5>(domain); + + Mat[0][1] = Dx(vec[0]); + + Mat[1][0] = vec[0]; + + domain.ghost_get<4>(); + + Mat[0][0] = Dx(Mat[1][0]); + Mat3[0][0][0] = Dx(vec[0]); + Mat3[0][1][0] = Dx(Mat[1][0]); + + auto it2 = domain.getDomainIterator(); + + double worst = 0.0; + + while (it2.isNext()) + { + auto p = it2.get(); + + if (fabs(domain.getProp<4>(p)[0][1] - domain.getProp<2>(p)) > worst) + { + worst = fabs(domain.getProp<4>(p)[0][1] - domain.getProp<2>(p)); + } + + if (fabs(domain.getProp<4>(p)[1][0] - domain.getProp<3>(p)[0]) > worst) + { + worst = fabs(domain.getProp<4>(p)[1][0] - domain.getProp<3>(p)[0]); + } + + if (fabs(domain.getProp<4>(p)[0][0] - domain.getProp<2>(p)) > worst) + { + worst = fabs(domain.getProp<4>(p)[0][0] - domain.getProp<2>(p)); + } + + + /////////////////////////// Mat 3 + + if (fabs(domain.getProp<5>(p)[0][0][0] - domain.getProp<2>(p)) > worst) + { + worst = fabs(domain.getProp<5>(p)[0][0][0] - domain.getProp<2>(p)); + } + + if (fabs(domain.getProp<5>(p)[0][1][0] - domain.getProp<2>(p)) > worst) + { + worst = fabs(domain.getProp<5>(p)[0][1][0] - domain.getProp<2>(p)); + } + + ++it2; + } + + BOOST_REQUIRE(worst < 0.003); + } + BOOST_AUTO_TEST_CASE(lalpacian_test) { size_t edgeSemiSize = 80; const size_t sz[2] = {2 * edgeSemiSize+1, 2 * edgeSemiSize+1}; diff --git a/src/Matrix/SparseMatrix_petsc.hpp b/src/Matrix/SparseMatrix_petsc.hpp index 55533924..24cba2a2 100644 --- a/src/Matrix/SparseMatrix_petsc.hpp +++ b/src/Matrix/SparseMatrix_petsc.hpp @@ -196,7 +196,7 @@ private: INSERT_VALUES)); } - PETSC_SAFE_CALL(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY)); + PETSC_SAFE_CALL(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY)); PETSC_SAFE_CALL(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY)); m_created = true; @@ -223,6 +223,7 @@ public: { PETSC_SAFE_CALL(MatCreate(PETSC_COMM_WORLD,&mat)); PETSC_SAFE_CALL(MatSetType(mat,MATMPIAIJ)); + PETSC_SAFE_CALL(MatSetFromOptions(mat)); PETSC_SAFE_CALL(MatSetSizes(mat,n_row_local,n_row_local,N1,N2)); Vcluster<> & v_cl = create_vcluster(); @@ -245,7 +246,9 @@ public: :g_row(0),g_col(0),l_row(0l),l_col(0),start_row(0) { PETSC_SAFE_CALL(MatCreate(PETSC_COMM_WORLD,&mat)); - PETSC_SAFE_CALL(MatSetType(mat,MATMPIAIJ)); + PETSC_SAFE_CALL(MatSetType(mat,MATMPIAIJ)); + PETSC_SAFE_CALL(MatSetFromOptions(mat)); + } ~SparseMatrix() diff --git a/src/OdeIntegrators/OdeIntegrators.hpp b/src/OdeIntegrators/OdeIntegrators.hpp index 1aad7092..29334842 100644 --- a/src/OdeIntegrators/OdeIntegrators.hpp +++ b/src/OdeIntegrators/OdeIntegrators.hpp @@ -16,10 +16,12 @@ struct has_state_vector: std::false_type {}; template<typename T> struct has_state_vector<T, typename Void< typename T::is_state_vector>::type> : std::true_type {}; + + namespace boost{ template<class T,class Enabler=typename std::enable_if<has_state_vector<T>::value>::type> - inline size_t - size(const T& rng) + inline auto + size(const T& rng) -> decltype(rng.size()) { return rng.size(); } @@ -27,7 +29,59 @@ namespace boost{ #include <boost/numeric/odeint.hpp> #include "Operators/Vector/vector_dist_operators.hpp" -#include "OdeIntegrators/boost_vector_algebra_ofp.hpp" +#include "FiniteDifference/FD_expressions.hpp" +#include "OdeIntegrators/vector_algebra_ofp.hpp" + +#ifdef __NVCC__ +#include "OdeIntegrators/vector_algebra_ofp_gpu.hpp" +/*! \brief A 1d Odeint and Openfpm compatible structure. + * + * Use the method this.data.get<d>() to refer to property of all the particles in the dimension d. + * + * d starts with 0. + * + */ +struct state_type_1d_ofp_ker{ + state_type_1d_ofp_ker(){ + } + typedef decltype(std::declval<texp_v_gpu<double>>().getVector().toKernel()) state_kernel; + typedef size_t size_type; + typedef int is_state_vector; + aggregate<state_kernel> data; + + __host__ __device__ size_t size() const + { return data.get<0>().size(); } + +}; +/*! \brief A 1d Odeint and Openfpm compatible structure. + * + * Use the method this.data.get<d>() to refer to property of all the particles in the dimension d. + * + * d starts with 0. + * + */ +struct state_type_1d_ofp_gpu{ + state_type_1d_ofp_gpu(){ + } + typedef size_t size_type; + typedef int is_state_vector; + aggregate<texp_v_gpu<double>> data; + + size_t size() const + { return data.get<0>().size(); } + + void resize(size_t n) + { + data.get<0>().resize(n); + } + state_type_1d_ofp_ker toKernel() const + { + state_type_1d_ofp_ker s1_ker; + s1_ker.data.get<0>()=data.get<0>().getVector().toKernel(); + return s1_ker; + } +}; +#endif namespace boost { namespace numeric { namespace odeint { @@ -51,6 +105,7 @@ struct state_type_1d_ofp{ state_type_1d_ofp(){ } typedef size_t size_type; + typedef size_t index_type; typedef int is_state_vector; aggregate<texp_v<double>> data; @@ -74,6 +129,7 @@ struct state_type_2d_ofp{ state_type_2d_ofp(){ } typedef size_t size_type; + typedef size_t index_type; typedef int is_state_vector; aggregate<texp_v<double>,texp_v<double>> data; @@ -98,6 +154,7 @@ struct state_type_3d_ofp{ state_type_3d_ofp(){ } typedef size_t size_type; + typedef size_t index_type; typedef int is_state_vector; aggregate<texp_v<double>,texp_v<double>,texp_v<double>> data; @@ -123,6 +180,7 @@ struct state_type_4d_ofp{ state_type_4d_ofp(){ } typedef size_t size_type; + typedef size_t index_type; typedef int is_state_vector; aggregate<texp_v<double>,texp_v<double>,texp_v<double>,texp_v<double>> data; @@ -149,6 +207,7 @@ struct state_type_5d_ofp{ state_type_5d_ofp(){ } typedef size_t size_type; + typedef size_t index_type; typedef int is_state_vector; aggregate<texp_v<double>,texp_v<double>,texp_v<double>,texp_v<double>,texp_v<double>> data; @@ -166,15 +225,61 @@ struct state_type_5d_ofp{ } }; +template<int counter, typename state_type, typename ... list> +struct state_type_ofpm_add_elements +{ +// typedef aggregate<list ..., texp_v<double>> one_more; + typedef typename state_type_ofpm_add_elements<counter-1,state_type, state_type,list ...>::type type; +}; + +template<typename state_type, typename ... list> +struct state_type_ofpm_add_elements<0,state_type,list ...> +{ + typedef aggregate<list ...> type; +}; + +template<int n_state, typename state_type> +struct state_type_ofpm_impl +{ + typedef FD::gdb_ext_plus_g_info<state_type::dims> size_type; + typedef typename state_type::index_type index_type; + typedef int is_state_vector; + + typedef typename state_type_ofpm_add_elements<n_state-1,state_type, state_type>::type type_data; + + type_data data; + + FD::gdb_ext_plus_g_info<state_type::dims> size() const + { + return data.template get<0>().size(); + } + + + void resize(const FD::gdb_ext_plus_g_info<state_type::dims> & rsz_obj) + { + // to fill + } +}; + + namespace boost { namespace numeric { namespace odeint { + + // FOR particles + template<> struct is_resizeable<state_type_1d_ofp> { typedef boost::true_type type; static const bool value = type::value; }; - +#ifdef __NVCC__ + template<> + struct is_resizeable<state_type_1d_ofp_gpu> { + typedef boost::true_type type; + static const bool value = type::value; + }; +#endif template<> struct is_resizeable<state_type_2d_ofp> { typedef boost::true_type type; @@ -229,6 +334,98 @@ namespace boost { typedef double result_type; }; + // For GRIDs + + template<typename state_type> + struct is_resizeable<state_type_ofpm_impl<1,state_type> > { + typedef boost::true_type type; + static const bool value = type::value; + }; + + template<typename state_type> + struct is_resizeable<state_type_ofpm_impl<2,state_type> > { + typedef boost::true_type type; + static const bool value = type::value; + }; + + template<typename state_type> + struct is_resizeable<state_type_ofpm_impl<3,state_type> > { + typedef boost::true_type type; + static const bool value = type::value; + }; + + template<typename state_type> + struct is_resizeable<state_type_ofpm_impl<4,state_type> > { + typedef boost::true_type type; + static const bool value = type::value; + }; + + template<typename state_type> + struct is_resizeable<state_type_ofpm_impl<5,state_type> > { + typedef boost::true_type type; + static const bool value = type::value; + }; + +/* template<> + struct is_resizeable<state_type_2d_ofp> { + typedef boost::true_type type; + static const bool value = type::value; + }; + + template<> + struct is_resizeable<state_type_3d_ofp> { + typedef boost::true_type type; + static const bool value = type::value; + }; + template<> + struct is_resizeable<state_type_4d_ofp> { + typedef boost::true_type type; + static const bool value = type::value; + }; + template<> + struct is_resizeable<state_type_5d_ofp> { + typedef boost::true_type type; + static const bool value = type::value; + };*/ + + + +/* // template<unsigned int nprp, typename state_type> + struct vector_space_norm_inf<state_type_ofpm_impl<nprp,state_type>> + { + typedef double result_type; + };*/ + + template<typename state_type> + struct vector_space_norm_inf<state_type_ofpm_impl<1,state_type>> + { + typedef double result_type; + }; + + template<typename state_type> + struct vector_space_norm_inf<state_type_ofpm_impl<2,state_type>> + { + typedef double result_type; + }; + + template<typename state_type> + struct vector_space_norm_inf<state_type_ofpm_impl<3,state_type>> + { + typedef double result_type; + }; + + template<typename state_type> + struct vector_space_norm_inf<state_type_ofpm_impl<4,state_type>> + { + typedef double result_type; + }; + + template<typename state_type> + struct vector_space_norm_inf<state_type_ofpm_impl<5,state_type>> + { + typedef double result_type; + }; + } } } diff --git a/src/OdeIntegrators/tests/OdeIntegrator_grid_tests.cpp b/src/OdeIntegrators/tests/OdeIntegrator_grid_tests.cpp new file mode 100644 index 00000000..a365a46a --- /dev/null +++ b/src/OdeIntegrators/tests/OdeIntegrator_grid_tests.cpp @@ -0,0 +1,576 @@ +// +// Created by foggia on 19th Jan 2022 +// It's a modification of Abhinav's test, adapted for grids +// + +#define BOOST_TEST_DYN_LINK + +#include <iostream> +#include <boost/test/unit_test.hpp> + +#include "config.h" +#include "Grid/grid_dist_id.hpp" +#include "OdeIntegrators/OdeIntegrators.hpp" +#include "FiniteDifference/FD_op.hpp" +#include "util/util_debug.hpp" +#include "util/common.hpp" + +const double a = 2.8e-4; +const double b = 5e-3; +const double tau = .1; +const double k = .005; +const int dim = 2; + +void *gridGlobal; +typedef grid_dist_id<2,double,aggregate<double,double,double,double,double,double>> grid_type; + +// State types for systems with different number of ODEs +typedef state_type_ofpm_impl<1,texp_g<dim,double>> state_type_1ode; +typedef state_type_ofpm_impl<2,texp_g<dim,double>> state_type_2ode; +typedef state_type_ofpm_impl<3,texp_g<dim,double>> state_type_3ode; + +template<typename DX, typename DY> +struct Fitz { + + DX & ddx; + DY & ddy; + + //Constructor + Fitz(DX & m_ddx, DY & m_ddy) : + ddx(m_ddx), + ddy(m_ddy) {} + + void operator()(const state_type_2ode & x, + state_type_2ode & dxdt, + const double t) const { + + grid_type & temp = *(grid_type *) gridGlobal; + auto u{FD::getV<4>(temp)}; + auto v{FD::getV<5>(temp)}; + u = x.data.get<0>(); + v = x.data.get<1>(); + + temp.ghost_get<4,5>(); + dxdt.data.get<0>() = ddx(u) + ddy(u) + (1.0); + dxdt.data.get<1>() = ddx(v) + ddy(v) + (2.0); + + // One point stay fixed + + auto key2 = temp.getDomainIterator().get(); + + if (create_vcluster().rank() == 0) { + dxdt.data.get<0>().value_ref(key2) = 0.0; + dxdt.data.get<1>().value_ref(key2) = 0.0; + } + + double u_max{0.0}; + double v_max{0.0}; + + auto it = temp.getDomainIterator(); + + while (it.isNext()) + { + auto key = it.get(); + + if (u_max < dxdt.data.get<0>().value(key)) + u_max = dxdt.data.get<0>().value(key); + + if (v_max < dxdt.data.get<1>().value(key)) + v_max = dxdt.data.get<1>().value(key); + + ++it; + } + } +}; + +void Exponential_struct_ofp2(const state_type_3ode & x, + state_type_3ode & dxdt, + const double t) { + + // sytem: dx1/dt = x1 --> solution: x1(t) = exp(t) + // sytem: dx2/dt = 2*x2 --> solution: x2(t) = exp(2t) + dxdt.data.get<0>() = x.data.get<0>(); + dxdt.data.get<1>() = 2.0 * x.data.get<1>(); + dxdt.data.get<2>() = x.data.get<0>(); +} + + +void Exponential(const state_type_1ode & x, + state_type_1ode & dxdt, + const double t) { + + // sytem: dx/dt = x --> solution: x(t) = exp(t) + dxdt = x; +} + +// void sigmoid(const state_type_1ode & x, +// state_type_1ode & dxdt, +// const double t) { +// dxdt = x * (1.0 - x); +// } + +BOOST_AUTO_TEST_SUITE(odeInt_grid_tests) + +BOOST_AUTO_TEST_CASE(odeint_grid_test_exponential) { + + size_t edgeSemiSize{40}; + const size_t sz[dim] = {edgeSemiSize,edgeSemiSize}; + Box<dim,double> box{{0.0,0.0}, {1.0,1.0}}; + periodicity<dim> bc{{NON_PERIODIC,NON_PERIODIC}}; + double spacing[dim]; + spacing[0] = 1.0 / (sz[0] - 1); + spacing[1] = 1.0 / (sz[1] - 1); + Ghost<dim,long int> ghost{2}; + BOOST_TEST_MESSAGE("Test: exponential"); + BOOST_TEST_MESSAGE("Init grid_dist_id ..."); + + grid_dist_id<dim,double,aggregate<double,double,double>> grid{sz,box,ghost,bc}; + + auto it{grid.getDomainIterator()}; + while (it.isNext()) { + auto key = it.get(); + grid.template get<0>(key) = std::exp(0); // Initial state + grid.template get<1>(key) = std::exp(0.4); // Analytical solution + ++it; + } + grid.ghost_get<0>(); + + auto Init{FD::getV<0>(grid)}; // Initial state + auto Sol{FD::getV<1>(grid)}; // Analytical solution + auto OdeSol{FD::getV<2>(grid)}; // Numerical solution + + state_type_1ode x0; + x0.data.get<0>() = Init; + + double t{0.0}; + double tf{0.4}; + const double dt{0.1}; + + boost::numeric::odeint::runge_kutta4<state_type_1ode,double, + state_type_1ode,double, + boost::numeric::odeint::vector_space_algebra_ofp> rk4; // Time integrator + size_t steps{boost::numeric::odeint::integrate_const(rk4,Exponential,x0,0.0,tf,dt)}; + + OdeSol = x0.data.get<0>(); // Numerical solution + + // Error + auto it2{grid.getDomainIterator()}; + double worst{0.0}; + while (it2.isNext()) { + auto p{it2.get()}; + if (std::fabs(grid.template get<1>(p) - grid.template get<2>(p)) > worst) { + worst = std::fabs(grid.template get<1>(p) - grid.template get<2>(p)); + } + ++it2; + } + + std::cout << worst << std::endl; + BOOST_REQUIRE(worst < 1e-6); + + // Another way + x0.data.get<0>() = Init; + while (t < tf) { + rk4.do_step(Exponential,x0,t,dt); + OdeSol = x0.data.get<0>(); + t += dt; + } + + OdeSol = x0.data.get<0>(); + + // Error + auto it3{grid.getDomainIterator()}; + double worst2{0.0}; + while (it3.isNext()) { + auto p{it3.get()}; + if (std::fabs(grid.template get<1>(p) - grid.template get<2>(p)) > worst2) { + worst2 = fabs(grid.template get<1>(p) - grid.template get<2>(p)); + } + ++it3; + } + std::cout << worst2 << std::endl; + BOOST_REQUIRE(worst2 < 1e-6); + BOOST_REQUIRE_EQUAL(worst,worst2); +} + + + +BOOST_AUTO_TEST_CASE(odeint_grid_test_STRUCT_exponential) { + + size_t edgeSemiSize{40}; + const size_t sz[dim] = {edgeSemiSize,edgeSemiSize}; + Box<dim, double> box{{0.0,0.0},{1.0,1.0}}; + periodicity<dim> bc{{NON_PERIODIC,NON_PERIODIC}}; + double spacing[dim]; + spacing[0] = 1.0 / (sz[0] - 1); + spacing[1] = 1.0 / (sz[1] - 1); + Ghost<dim,long int> ghost{2}; + BOOST_TEST_MESSAGE("Test: exponential"); + BOOST_TEST_MESSAGE("Init grid_dist_id ..."); + + grid_dist_id<dim,double,aggregate<double,double,double,double,double,double>> grid{sz,box,ghost,bc}; + + auto it{grid.getDomainIterator()}; + while (it.isNext()) { + auto key = it.get(); + + grid.get<0>(key) = std::exp(0); + grid.template get<0>(key) = std::exp(0.0); // Initial state 1 + grid.template get<1>(key) = std::exp(0.4); // Analytical solution 1 + grid.template get<2>(key) = std::exp(0.0); // Initial state 2 + grid.template get<3>(key) = std::exp(0.8); // Analytical solution 2 + ++it; + } + grid.ghost_get<0>(); + + auto Init1{FD::getV<0>(grid)}; // Initial state 1 + auto Sol1{FD::getV<1>(grid)}; // Analytical solution 1 + + auto Init2{FD::getV<2>(grid)}; // Initial state 2 + auto Sol2{FD::getV<3>(grid)}; // Analytical solution 2 + + auto OdeSol1{FD::getV<4>(grid)}; // Numerical solution 1 + auto OdeSol2{FD::getV<5>(grid)}; // Numerical solution 2 + + state_type_3ode x0; + x0.data.get<0>() = Init1; + x0.data.get<1>() = Init2; + x0.data.get<2>() = Init1; + + double t{0}; + double tf{0.4}; + const double dt{0.1}; + + // size_t steps{boost::numeric::odeint::integrate(Exponential_struct,x0,0.0,tf,dt)}; + // size_t steps{boost::numeric::odeint::integrate_const(boost::numeric::odeint::runge_kutta4<state_type_3ode,double,state_type_3ode,double,boost::numeric::odeint::vector_space_algebra_ofp>(), + // Exponential_struct_ofp,x0,0.0,tf,dt)}; + + typedef boost::numeric::odeint::controlled_runge_kutta<boost::numeric::odeint::runge_kutta_cash_karp54<state_type_3ode,double,state_type_3ode,double,boost::numeric::odeint::vector_space_algebra_ofp>> stepper_type; + integrate_adaptive(stepper_type(),Exponential_struct_ofp2,x0,t,tf,dt); + + OdeSol1 = x0.data.get<0>(); + OdeSol2 = x0.data.get<1>(); + + // Error + auto it2{grid.getDomainIterator()}; + double worst{0.0}; + double worst2{0.0}; + while (it2.isNext()) { + auto p{it2.get()}; + if (std::fabs(grid.getProp<1>(p) - grid.getProp<4>(p)) > worst) + worst = std::fabs(grid.getProp<1>(p) - grid.getProp<4>(p)); + if (std::fabs(grid.getProp<3>(p) - grid.getProp<5>(p)) > worst2) + worst2 = std::fabs(grid.getProp<3>(p) - grid.getProp<5>(p)); + ++it2; + } + + std::cout << worst << " " << worst2 << std::endl; + BOOST_REQUIRE(worst < 1e-6); + BOOST_REQUIRE(worst2 < 1e-6); + + + // A different way + x0.data.get<0>() = Init1; + x0.data.get<1>() = Init2; + x0.data.get<2>() = Init1; + + boost::numeric::odeint::runge_kutta4<state_type_3ode,double,state_type_3ode,double,boost::numeric::odeint::vector_space_algebra_ofp> rk4; + while (t < tf) { + rk4.do_step(Exponential_struct_ofp2,x0,t,dt); + t+=dt; + } + + OdeSol1 = x0.data.get<0>(); + OdeSol2 = x0.data.get<1>(); + + // Error + auto it3{grid.getDomainIterator()}; + double worst3{0.0}; + double worst4{0.0}; + while (it3.isNext()) { + auto p{it3.get()}; + if (std::fabs(grid.getProp<1>(p) - grid.getProp<4>(p)) > worst3) + worst3 = std::fabs(grid.getProp<1>(p) - grid.getProp<4>(p)); + if (std::fabs(grid.getProp<3>(p) - grid.getProp<5>(p)) > worst4) + worst4 = std::fabs(grid.getProp<3>(p) - grid.getProp<5>(p)); + ++it3; + } + + std::cout << worst3 << " " << worst4 << std::endl; + BOOST_REQUIRE(worst3 < 1e-6); + BOOST_REQUIRE(worst4 < 5e-5); +} + + + + +BOOST_AUTO_TEST_CASE(odeint_grid_test2_exponential) { + + size_t edgeSemiSize{40}; + const size_t sz[dim] = {edgeSemiSize,edgeSemiSize}; + Box<dim,double> box{{0.0,0.0}, {1.0,1.0}}; + periodicity<dim> bc{{NON_PERIODIC,NON_PERIODIC}}; + double spacing[dim]; + spacing[0] = 1.0 / (sz[0] - 1); + spacing[1] = 1.0 / (sz[1] - 1); + Ghost<dim,long int> ghost{2}; + BOOST_TEST_MESSAGE("Test: exponential"); + BOOST_TEST_MESSAGE("Init grid_dist_id ..."); + + grid_dist_id<dim,double,aggregate<double,double,double>> grid{sz,box,ghost,bc}; + + double t{0.0}; + double tf{0.5}; + const double dt{0.1}; + + auto it{grid.getDomainIterator()}; + while (it.isNext()) { + auto key = it.get(); + grid.template get<0>(key) = std::exp(t); // Initial state + grid.template get<1>(key) = std::exp(tf); // Analytical solution + ++it; + } + grid.ghost_get<0>(); + + auto Init{FD::getV<0>(grid)}; // Initial state + auto Sol{FD::getV<1>(grid)}; // Analytical solution + auto OdeSol{FD::getV<2>(grid)}; // Numerical solution + + state_type_1ode x0; + x0.data.get<0>() = Init; + + typedef boost::numeric::odeint::controlled_runge_kutta<boost::numeric::odeint::runge_kutta_cash_karp54<state_type_1ode,double,state_type_1ode,double,boost::numeric::odeint::vector_space_algebra_ofp>> stepper_type; + + + integrate_adaptive(stepper_type(),Exponential,x0,t,tf,dt); + OdeSol = x0.data.get<0>(); // Numerical solution + + // Error + auto it2{grid.getDomainIterator()}; + double worst{0.0}; + while (it2.isNext()) { + auto p{it2.get()}; + if (std::fabs(grid.template get<1>(p) - grid.template get<2>(p)) > worst) { + worst = std::fabs(grid.template get<1>(p) - grid.template get<2>(p)); + } + ++it2; + } + + std::cout << worst << std::endl; + BOOST_REQUIRE(worst < 1e-6); + + // Another way + boost::numeric::odeint::runge_kutta4<state_type_1ode,double,state_type_1ode,double,boost::numeric::odeint::vector_space_algebra_ofp> rk4; + x0.data.get<0>() = Init; + for (size_t i = 0; i < int(tf/dt); ++i, t += dt) { + rk4.do_step(Exponential,x0,t,dt); + t += dt; + } + OdeSol = x0.data.get<0>(); + + // Error + auto it3{grid.getDomainIterator()}; + double worst2{0.0}; + while (it3.isNext()) { + auto p{it3.get()}; + if (std::fabs(grid.template get<1>(p) - grid.template get<2>(p)) > worst2) { + worst2 = fabs(grid.template get<1>(p) - grid.template get<2>(p)); + } + ++it3; + } + std::cout << worst2 << std::endl; + BOOST_REQUIRE(worst2 < 1e-6); + + // Yet another way + // x0.data.get<0>() = Init; + // integrate(rk4,Exponential,x0,t,tf,dt); + + // OdeSol = x0.data.get<0>(); + + // // Error + // auto it4{grid.getDomainIterator()}; + // double worst3{0.0}; + // while (it4.isNext()) { + // auto p{it4.get()}; + // if (std::fabs(grid.template get<1>(p) - grid.template get<2>(p)) > worst3) { + // worst3 = fabs(grid.template get<1>(p) - grid.template get<2>(p)); + // } + // ++it4; + // } + // std::cout << worst3 << std::endl; + // BOOST_REQUIRE(worst3 < 1e-6); + + // BOOST_REQUIRE_EQUAL(worst,worst2); + // BOOST_REQUIRE_EQUAL(worst2,worst3); +} + + + +// BOOST_AUTO_TEST_CASE(odeint_base_test3) +// { +// size_t edgeSemiSize = 40; +// const size_t sz[2] = {edgeSemiSize,edgeSemiSize }; +// Box<2, double> box({ 0, 0 }, { 1.0, 1.0 }); +// size_t bc[2] = {NON_PERIODIC,NON_PERIODIC}; +// double spacing[2]; +// spacing[0] = 1.0 / (sz[0] - 1); +// spacing[1] = 1.0 / (sz[1] - 1); +// double rCut = 3.9 * spacing[0]; +// Ghost<2, double> ghost(rCut); +// BOOST_TEST_MESSAGE("Init vector_dist..."); + +// vector_dist<2, double, aggregate<double, double,double>> Particles(0, box, bc, ghost); + +// double t=0.0,tf=0.5; +// const double dt=0.1; + +// auto it = Particles.getGridIterator(sz); +// while (it.isNext()) +// { +// Particles.add(); +// auto key = it.get(); +// mem_id k0 = key.get(0); +// double xp0 = k0 * spacing[0]; +// Particles.getLastPos()[0] = xp0; +// mem_id k1 = key.get(1); +// double yp0 = k1 * spacing[1]; +// Particles.getLastPos()[1] = yp0; +// Particles.getLastProp<0>() = 1.0/(1.0+exp(-t)); // Carefull in putting the constant, f = A*sigmoid does not respect f' = f*(1.0-f) but f*(1.0-f/A), for simplicity I remove the constant +// Particles.getLastProp<1>() = 1.0/(1.0+exp(-tf)); // Carefull in putting the constant, f = A*sigmoid does not respect f' = f*(1.0-f) but f*(1.0-f/A), for simplicity I remove the constant +// ++it; +// } +// Particles.map(); +// Particles.ghost_get<0>(); +// auto Init = getV<0>(Particles); +// auto Sol = getV<1>(Particles); +// auto OdeSol = getV<2>(Particles); + +// state_type x0; +// x0=Init; +// // The rhs of x' = f(x) +// //size_t steps=boost::numeric::odeint::integrate(sigmoid,x0,0.0,tf,dt); +// //typedef boost::numeric::odeint::controlled_runge_kutta< boost::numeric::odeint::runge_kutta_cash_karp54< state_type > > stepper_type; +// //integrate_adaptive( stepper_type() , sigmoid , x0 , t , tf , dt); +// size_t steps=boost::numeric::odeint::integrate_const( boost::numeric::odeint::runge_kutta4< state_type >(),sigmoid,x0,0.0,tf,dt); + +// OdeSol=x0; +// auto it2 = Particles.getDomainIterator(); +// double worst = 0.0; +// while (it2.isNext()) { +// auto p = it2.get(); +// if (fabs(Particles.getProp<1>(p) - Particles.getProp<2>(p)) > worst) { +// worst = fabs(Particles.getProp<1>(p) - Particles.getProp<2>(p)); +// } +// ++it2; +// } + +// BOOST_REQUIRE(worst < 1e-8); + +// x0=Init; +// boost::numeric::odeint::runge_kutta4< state_type > rk4; +// for( size_t i=0 ; i<int(tf/dt) ; ++i,t+=dt ) +// { +// rk4.do_step(sigmoid,x0,t,dt); +// t+=dt; +// } + +// OdeSol=x0; +// auto it3 = Particles.getDomainIterator(); +// double worst2 = 0.0; +// while (it3.isNext()) { +// auto p = it3.get(); +// if (fabs(Particles.getProp<1>(p) - Particles.getProp<2>(p)) > worst2) { +// worst2 = fabs(Particles.getProp<1>(p) - Particles.getProp<2>(p)); +// } +// ++it3; +// } +// //std::cout<<worst2<<std::endl; +// BOOST_REQUIRE(worst < 1e-6); +// BOOST_REQUIRE_EQUAL(worst,worst2); +// } + + + +#ifdef HAVE_EIGEN + +BOOST_AUTO_TEST_CASE(dcpse_op_react_diff_test) { + + size_t edgeSemiSize{5}; + const size_t sz[dim] = {2 * edgeSemiSize+1, 2 * edgeSemiSize+1}; + Box<dim,double> box{{0.0, 0.0},{1.0, 1.0}}; + periodicity<dim> bc{{PERIODIC,PERIODIC}}; + double spacing[dim]; + spacing[0] = 1.0 / (sz[0]); + spacing[1] = 1.0 / (sz[1]); + Ghost<dim,double> ghost{spacing[0] * 3}; + + BOOST_TEST_MESSAGE("Test: reaction diffusion"); + BOOST_TEST_MESSAGE("Init grid_dist_id ..."); + double sigma2 = spacing[0] * spacing[1] / (2 * 4); + + // properties: u, v, du, dv + grid_dist_id<dim,double,aggregate<double,double,double,double,double,double>> domain{sz,box,ghost,bc}; + + auto it{domain.getDomainIterator()}; + while (it.isNext()) { + auto key{it.get()}; + domain.get<0>(key) = 0.0; // u + domain.get<1>(key) = 0.0; // v + domain.get<2>(key) = 0.0; // du/dt + domain.get<3>(key) = 0.0; // dv/dt + + auto gkey = it.getGKey(key); + + if (gkey.get(0)==sz[0] / 2 && gkey.get(1) == sz[1]/2) + { + domain.get<0>(key) = 1.0; + domain.get<1>(key) = 1.0; + } + ++it; + } + domain.ghost_get<0>(); + + FD::Derivative<0,2,2,FD::CENTRAL> ddx; + FD::Derivative<1,2,2,FD::CENTRAL> ddy; + + gridGlobal=(void *) & domain; + + auto u{FD::getV<0>(domain)}; + auto v{FD::getV<1>(domain)}; + auto fu{FD::getV<2>(domain)}; + auto fv{FD::getV<3>(domain)}; + + Fitz<decltype(ddx),decltype(ddy)> system(ddx,ddy); + state_type_2ode x0; + + x0.data.get<0>() = u; + x0.data.get<1>() = v; + + double dt{0.001}; + double t{0.0}; + double tf{10.5}; + + //typedef boost::numeric::odeint::controlled_runge_kutta< boost::numeric::odeint::runge_kutta_cash_karp54< state_type_2d_ofp,double,state_type_2d_ofp,double,boost::numeric::odeint::vector_space_algebra_ofp>> stepper_type; + typedef boost::numeric::odeint::runge_kutta4<state_type_2ode,double,state_type_2ode,double,boost::numeric::odeint::vector_space_algebra_ofp> stepper_type; + + integrate_adaptive(stepper_type(),system,x0,t,tf,dt); + fu = x0.data.get<0>(); + fv = x0.data.get<1>(); + + domain.ghost_get<2,3>(); + u = ddx(fu) + ddy(fu); + v = ddx(fv) + ddy(fv); + + auto it2{domain.getDomainIterator()}; + + if (create_vcluster().rank() == 0) + ++it2; + + while (it2.isNext()) { + auto p{it2.get()}; + BOOST_REQUIRE_CLOSE(domain.get<0>(p),-1.0,1); + ++it2; + } +} +#endif + +BOOST_AUTO_TEST_SUITE_END() \ No newline at end of file diff --git a/src/OdeIntegrators/tests/OdeIntegratores_base_tests.cpp b/src/OdeIntegrators/tests/OdeIntegratores_base_tests.cpp index 9ff837a3..af12bcc6 100644 --- a/src/OdeIntegrators/tests/OdeIntegratores_base_tests.cpp +++ b/src/OdeIntegrators/tests/OdeIntegratores_base_tests.cpp @@ -15,8 +15,10 @@ #include "Vector/vector_dist_subset.hpp" #include "Decomposition/Distribution/SpaceDistribution.hpp" #include "OdeIntegrators/OdeIntegrators.hpp" +#ifdef HAVE_EIGEN #include "DCPSE/DCPSE_op/DCPSE_op.hpp" -#include "OdeIntegrators/boost_vector_algebra_ofp.hpp" +#endif +#include "OdeIntegrators/vector_algebra_ofp.hpp" typedef texp_v<double> state_type; const double a = 2.8e-4; @@ -93,7 +95,7 @@ void sigmoid( const state_type &x , state_type &dxdt , const double t ) BOOST_AUTO_TEST_SUITE(odeInt_BASE_tests) -BOOST_AUTO_TEST_CASE(odeint_base_test1) +BOOST_AUTO_TEST_CASE(odeint_base_test1) { size_t edgeSemiSize = 40; const size_t sz[2] = {edgeSemiSize,edgeSemiSize }; @@ -180,7 +182,7 @@ BOOST_AUTO_TEST_CASE(odeint_base_test1) BOOST_REQUIRE_EQUAL(worst,worst2); } -BOOST_AUTO_TEST_CASE(odeint_base_test_STRUCT_ofp) +BOOST_AUTO_TEST_CASE(odeint_base_test_STRUCT_ofp) { size_t edgeSemiSize = 40; const size_t sz[2] = {edgeSemiSize,edgeSemiSize }; @@ -293,7 +295,7 @@ BOOST_AUTO_TEST_CASE(odeint_base_test_STRUCT_ofp) } -BOOST_AUTO_TEST_CASE(odeint_base_test2) +BOOST_AUTO_TEST_CASE(odeint_base_test2) { size_t edgeSemiSize = 40; const size_t sz[2] = {edgeSemiSize,edgeSemiSize }; @@ -381,7 +383,7 @@ BOOST_AUTO_TEST_CASE(odeint_base_test2) BOOST_REQUIRE(worst2 < 1e-6); } -BOOST_AUTO_TEST_CASE(odeint_base_test3) +BOOST_AUTO_TEST_CASE(odeint_base_test3) { size_t edgeSemiSize = 40; const size_t sz[2] = {edgeSemiSize,edgeSemiSize }; @@ -464,9 +466,7 @@ BOOST_AUTO_TEST_CASE(odeint_base_test3) BOOST_REQUIRE_EQUAL(worst,worst2); } - #ifdef HAVE_EIGEN - BOOST_AUTO_TEST_CASE(dcpse_op_react_diff_test) { size_t edgeSemiSize = 5; const size_t sz[2] = {2 * edgeSemiSize+1, 2 * edgeSemiSize+1}; @@ -495,7 +495,7 @@ BOOST_AUTO_TEST_CASE(dcpse_op_react_diff_test) { size_t pointId = 0; size_t counter = 0; double minNormOne = 999; - while (it.isNext()) + while (it.isNext()) { domain.add(); auto key = it.get(); @@ -558,7 +558,7 @@ BOOST_AUTO_TEST_CASE(dcpse_op_react_diff_test) { if (create_vcluster().rank() == 0) {++it2;} - while (it2.isNext()) + while (it2.isNext()) { auto p = it2.get(); @@ -568,5 +568,4 @@ BOOST_AUTO_TEST_CASE(dcpse_op_react_diff_test) { } } #endif - BOOST_AUTO_TEST_SUITE_END() diff --git a/src/OdeIntegrators/tests/Odeintegrators_test_gpu.cu b/src/OdeIntegrators/tests/Odeintegrators_test_gpu.cu new file mode 100644 index 00000000..a1b2771d --- /dev/null +++ b/src/OdeIntegrators/tests/Odeintegrators_test_gpu.cu @@ -0,0 +1,104 @@ +// +// Created by abhinav on 2/28/23. +// +#include "config.h" +#include <type_traits> +#include <cstring> +#include "util/common.hpp" + +#define BOOST_TEST_DYN_LINK + +#include "util/util_debug.hpp" +#include <boost/test/unit_test.hpp> +#include <iostream> +#include "Operators/Vector/vector_dist_operators.hpp" +#include "OdeIntegrators/OdeIntegrators.hpp" +//#include "DCPSE/DCPSE_op/DCPSE_op.hpp" +#ifdef __NVCC__ + +typedef state_type_1d_ofp_gpu state_type; +//const double a = 2.8e-4; +//const double b = 5e-3; +//const double tau = .1; +//const double k = .005; + +void ExponentialGPU( const state_type &x , state_type &dxdt , const double t ) +{ + dxdt.data.get<0>() = x.data.get<0>(); + //x.data.get<0>().getVector().deviceToHost<0>(); + //dxdt.data.get<0>().getVector().deviceToHost<0>(); +} + +BOOST_AUTO_TEST_SUITE(odeInt_BASE_tests) + +BOOST_AUTO_TEST_CASE(odeint_base_test_gpu) + { + size_t edgeSemiSize = 512; + const size_t sz[2] = {edgeSemiSize,edgeSemiSize }; + Box<2, double> box({ 0, 0 }, { 1.0, 1.0 }); + size_t bc[2] = { NON_PERIODIC, NON_PERIODIC }; + double spacing[2]; + spacing[0] = 1.0 / (sz[0] - 1); + spacing[1] = 1.0 / (sz[1] - 1); + double rCut = 3.9 * spacing[0]; + Ghost<2, double> ghost(rCut); + BOOST_TEST_MESSAGE("Init vector_dist..."); + + vector_dist_gpu<2, double, aggregate<double, double,double>> Particles(0, box, bc, ghost); + + auto it = Particles.getGridIterator(sz); + while (it.isNext()) + { + Particles.add(); + auto key = it.get(); + mem_id k0 = key.get(0); + double xp0 = k0 * spacing[0]; + Particles.getLastPos()[0] = xp0; + mem_id k1 = key.get(1); + double yp0 = k1 * spacing[1]; + Particles.getLastPos()[1] = yp0; + Particles.getLastProp<0>() = xp0*yp0*exp(-5); + Particles.getLastProp<1>() = xp0*yp0*exp(5); + ++it; + } + + Particles.map(); + Particles.ghost_get<0>(); + Particles.hostToDeviceProp<0,1,2>(); + auto Init = getV<0,comp_dev>(Particles); + auto Sol = getV<1,comp_dev>(Particles); + auto OdeSol = getV<2,comp_dev>(Particles); + + state_type x0; + x0.data.get<0>()=Init; + x0.data.get<0>().getVector().deviceToHost<0>(); + // The rhs of x' = f(x) + + double t0=-5,tf=5; + const double dt=0.01; + + + //This doesnt work Why? + //size_t steps=boost::numeric::odeint::integrate(Exponential,x0,0.0,tf,dt); + timer tt; + tt.start(); + size_t steps=boost::numeric::odeint::integrate_const( boost::numeric::odeint::runge_kutta4< state_type, double, state_type, double, boost::numeric::odeint::vector_space_algebra_ofp_gpu,boost::numeric::odeint::ofp_operations>(),ExponentialGPU,x0,t0,tf,dt); + tt.stop(); + OdeSol=x0.data.get<0>(); + Particles.deviceToHostProp<0,1,2>(); + auto it2 = Particles.getDomainIterator(); + double worst = 0.0; + while (it2.isNext()) { + auto p = it2.get(); + if (fabs(Particles.getProp<1>(p) - Particles.getProp<2>(p)) > worst) { + worst = fabs(Particles.getProp<1>(p) - Particles.getProp<2>(p)); + } + ++it2; + } + std::cout<<"WCT:"<<tt.getwct()<<std::endl; + std::cout<<"CPU:"<<tt.getcputime()<<std::endl; + std::cout<<worst<<std::endl; + BOOST_REQUIRE(worst < 1e-6); + } +BOOST_AUTO_TEST_SUITE_END() +#endif \ No newline at end of file diff --git a/src/OdeIntegrators/boost_vector_algebra_ofp.hpp b/src/OdeIntegrators/vector_algebra_ofp.hpp similarity index 86% rename from src/OdeIntegrators/boost_vector_algebra_ofp.hpp rename to src/OdeIntegrators/vector_algebra_ofp.hpp index 26447bea..5c01e81c 100644 --- a/src/OdeIntegrators/boost_vector_algebra_ofp.hpp +++ b/src/OdeIntegrators/vector_algebra_ofp.hpp @@ -2,126 +2,40 @@ // Created by Abhinav Singh on 18.02.21. // -#ifndef OPENFPM_PDATA_BOOST_VECTOR_ALGEBRA_OFP_HPP -#define OPENFPM_PDATA_BOOST_VECTOR_ALGEBRA_OFP_HPP +#ifndef OPENFPM_PDATA_VECTOR_ALGEBRA_OFP_HPP +#define OPENFPM_PDATA_VECTOR_ALGEBRA_OFP_HPP namespace boost { namespace numeric { namespace odeint { - -/* - * This class template has to be overload in order to call vector_space_algebra::norm_inf - */ - // template< class State, class Enabler = void > struct vector_space_norm_inf; - -/* - * Example: instantiation for sole doubles and complex - */ -/* template<> - struct vector_space_norm_inf< double > + /* It copy one element of the chunk for each property + * + */ + template<typename vector_type,typename index_type,typename op_type> + struct for_each_prop1 { - typedef double result_type; - double operator()( double x ) const - { - using std::abs; - return abs(x); - } - }; - template<> - struct vector_space_norm_inf< float > - { - typedef float result_type; - result_type operator()( float x ) const + vector_type &v; + index_type &p; + op_type &op; + /*! \brief constructor + * + * + * \param src source encapsulated object + * \param dst destination encapsulated object + * + */ + __device__ __host__ inline for_each_prop1(vector_type &v,index_type &p,op_type &op) + :v(v),p(p),op(op) + {}; + //! It call the copy function for each property + template<typename T> + __device__ __host__ inline void operator()(T& t) const { - using std::abs; - return abs(x); - } - }; - - template< typename T > - struct vector_space_norm_inf< std::complex<T> > - { - typedef T result_type; - result_type operator()( std::complex<T> x ) const - { - using std::abs; - return abs( x ); - } - };*/ - - template<typename S1,typename S2> - struct for_each_prop_resize{ - S1 &v1; - S2 &v2; - /*! \brief constructor - * - * - * \param src source encapsulated object - * \param dst destination encapsulated object - * - */ - inline for_each_prop_resize(S1 &v1,S2 &v2) - :v1(v1),v2(v2) - {}; - //! It call the copy function for each property - template<typename T> - inline void operator()(T& t) const - { - v1.data.template get<T::value>().getVector().resize(v2.data.template get<T::value>().getVector().size()); - } - - }; - - /* It copy one element of the chunk for each property - * - */ - template<typename vector_type,typename index_type,typename op_type> - struct for_each_prop1 - { - - vector_type &v; - index_type &p; - op_type &op; - /*! \brief constructor - * - * - * \param src source encapsulated object - * \param dst destination encapsulated object - * - */ - inline for_each_prop1(vector_type &v,index_type &p,op_type &op) - :v(v),p(p),op(op) - {}; - //! It call the copy function for each property - template<typename T> - inline void operator()(T& t) const - { - - op(v.data.template get<T::value>().getVector().template get<0>(p)); - } - }; - - struct vector_space_algebra_ofp - { - template< class S1 , class Op > - static void for_each1( S1 &s1 , Op op ) - { - - // ToDo : build checks, that the +-*/ operators are well defined - auto it=s1.data.template get<0>().getVector().getIterator(); - while(it.isNext()){ - auto p=it.get(); - //converting to boost vector ids. - for_each_prop1<S1,size_t,Op> cp(s1,p,op); - //creating an iterator on v_ids[0] [1] [2] - boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype( s1.data)::max_prop>>(cp); - ++it; + op(v.data.template get<T::value>().getVector().template get<0>(p)); } - } - - + }; template<typename S1,typename S2,typename index_type,typename op_type> struct for_each_prop2 { @@ -137,36 +51,16 @@ namespace boost { * \param dst destination encapsulated object * */ - inline for_each_prop2(S1 &v1,S2 &v2,index_type &p,op_type &op) - :v1(v1),v2(v2),p(p),op(op) + __device__ __host__ inline for_each_prop2(S1 &v1,S2 &v2,index_type &p,op_type &op) + :v1(v1),v2(v2),p(p),op(op) {}; //! It call the copy function for each property template<typename T> - inline void operator()(T& t) const + __device__ __host__ inline void operator()(T& t) const { op(v1.data.template get<T::value>().getVector().template get<0>(p),v2.data.template get<T::value>().getVector().template get<0>(p)); } }; - template< class S1 , class S2 , class Op > - static void for_each2( S1 &s1 , S2 &s2 , Op op ) - { - for_each_prop_resize<S1,S2> the_resize(s1,s2); - boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype(s1.data)::max_prop>>(the_resize); - //s1.data.template get<0>().getVector().resize(s2.data.template get<0>().getVector().size()); - // ToDo : build checks, that the +-*/ operators are well defined - auto it=s1.data.template get<0>().getVector().getIterator(); - while(it.isNext()){ - auto p=it.get(); - //converting to boost vector ids. - for_each_prop2<S1,S2,size_t,Op> cp(s1,s2,p,op); - //creating an iterator on v_ids[0] [1] [2] - boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype( s1.data)::max_prop>>(cp); - - ++it; - } - } - - template<typename S1,typename S2,typename S3,typename index_type,typename op_type> struct for_each_prop3 @@ -184,40 +78,21 @@ namespace boost { * \param dst destination encapsulated object * */ - inline for_each_prop3(S1 &v1,S2 &v2,S3 &v3,index_type &p,op_type &op) - :v1(v1),v2(v2),v3(v3),p(p),op(op) + __device__ __host__ inline for_each_prop3(S1 &v1,S2 &v2,S3 &v3,index_type &p,op_type &op) + :v1(v1),v2(v2),v3(v3),p(p),op(op) {}; //! It call the copy function for each property template<typename T> - inline void operator()(T& t) const + __device__ __host__ inline void operator()(T& t) const { //std::cout<<v1.data.template get<T::value>().getVector().size()<<":"<<v2.data.template get<T::value>().getVector().size()<<":"<<v3.data.template get<T::value>().getVector().size()<<std::endl; + //printf("v2:%f,v3:%f \n",v2.data.template get<T::value>().getVector().template get<0>(p),v3.data.template get<T::value>().getVector().template get<0>(p)); + //printf("2\n"); op(v1.data.template get<T::value>().getVector().template get<0>(p),v2.data.template get<T::value>().getVector().template get<0>(p),v3.data.template get<T::value>().getVector().template get<0>(p)); - } - }; - - - template< class S1 , class S2 , class S3 , class Op > - static void for_each3( S1 &s1 , S2 &s2 , S3 &s3 , Op op ) - { - -// - for_each_prop_resize<S1,S2> the_resize(s1,s2); - boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype( s1.data)::max_prop>>(the_resize); - // ToDo : build checks, that the +-*/ operators are well defined - auto it=s1.data.template get<0>().getVector().getIterator(); - while(it.isNext()){ - auto p=it.get(); - //converting to boost vector ids. - for_each_prop3<S1,S2,S3,size_t,Op> cp(s1,s2,s3,p,op); - //creating an iterator on v_ids[0] [1] [2] - boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype( s1.data)::max_prop>>(cp); + //printf("v1:%f, v2:%f,v3:%f \n",v1.data.template get<T::value>().getVector().template get<0>(p),v2.data.template get<T::value>().getVector().template get<0>(p),v3.data.template get<T::value>().getVector().template get<0>(p)); - ++it; } - } - - + }; template<typename S1,typename S2,typename S3,typename S4,typename index_type,typename op_type> struct for_each_prop4 { @@ -236,36 +111,16 @@ namespace boost { * \param dst destination encapsulated object * */ - inline for_each_prop4(S1 &v1,S2 &v2,S3 &v3,S4 &v4,index_type &p,op_type &op) - :v1(v1),v2(v2),v3(v3),v4(v4),p(p),op(op) + __device__ __host__ inline for_each_prop4(S1 &v1,S2 &v2,S3 &v3,S4 &v4,index_type &p,op_type &op) + :v1(v1),v2(v2),v3(v3),v4(v4),p(p),op(op) {}; //! It call the copy function for each property template<typename T> - inline void operator()(T& t) const + __device__ __host__ inline void operator()(T& t) const { op(v1.data.template get<T::value>().getVector().template get<0>(p),v2.data.template get<T::value>().getVector().template get<0>(p),v3.data.template get<T::value>().getVector().template get<0>(p),v4.data.template get<T::value>().getVector().template get<0>(p)); } }; - - template< class S1 , class S2 , class S3 , class S4 , class Op > - static void for_each4( S1 &s1 , S2 &s2 , S3 &s3 , S4 &s4 , Op op ) - { - for_each_prop_resize<S1,S2> the_resize(s1,s2); - boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype(s1.data)::max_prop>>(the_resize); - // ToDo : build checks, that the +-*/ operators are well defined - auto it=s1.data.template get<0>().getVector().getIterator(); - while(it.isNext()){ - auto p=it.get(); - //converting to boost vector ids. - for_each_prop4<S1,S2,S3,S4,size_t,Op> cp(s1,s2,s3,s4,p,op); - //creating an iterator on v_ids[0] [1] [2] - boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype( s1.data)::max_prop>>(cp); - - ++it; - } - } - - template<typename S1,typename S2,typename S3,typename S4,typename S5,typename index_type,typename op_type> struct for_each_prop5 { @@ -285,36 +140,17 @@ namespace boost { * \param dst destination encapsulated object * */ - inline for_each_prop5(S1 &v1,S2 &v2,S3 &v3,S4 &v4,S5 &v5,index_type &p,op_type &op) + __device__ __host__ inline for_each_prop5(S1 &v1,S2 &v2,S3 &v3,S4 &v4,S5 &v5,index_type &p,op_type &op) :v1(v1),v2(v2),v3(v3),v4(v4),v5(v5),p(p),op(op) {}; //! It call the copy function for each property template<typename T> - inline void operator()(T& t) const + __device__ __host__ inline void operator()(T& t) const { op(v1.data.template get<T::value>().getVector().template get<0>(p),v2.data.template get<T::value>().getVector().template get<0>(p),v3.data.template get<T::value>().getVector().template get<0>(p),v4.data.template get<T::value>().getVector().template get<0>(p),v5.data.template get<T::value>().getVector().template get<0>(p)); } }; - template< class S1 , class S2 , class S3 , class S4,class S5 , class Op > - static void for_each5( S1 &s1 , S2 &s2 , S3 &s3 , S4 &s4,S5 &s5 , Op op ) - { - for_each_prop_resize<S1,S2> the_resize(s1,s2); - boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype(s1.data)::max_prop>>(the_resize); - // ToDo : build checks, that the +-*/ operators are well defined - auto it=s1.data.template get<0>().getVector().getIterator(); - while(it.isNext()){ - auto p=it.get(); - //converting to boost vector ids. - for_each_prop5<S1,S2,S3,S4,S5,size_t,Op> cp(s1,s2,s3,s4,s5,p,op); - //creating an iterator on v_ids[0] [1] [2] - boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype( s1.data)::max_prop>>(cp); - - ++it; - } - } - - template<typename S1,typename S2,typename S3,typename S4,typename S5,typename S6,typename index_type,typename op_type> struct for_each_prop6 { @@ -336,34 +172,16 @@ namespace boost { * \param dst destination encapsulated object * */ - inline for_each_prop6(S1 &v1,S2 &v2,S3 &v3,S4 &v4,S5 &v5,S6 &v6,index_type &p,op_type &op) + __device__ __host__ inline for_each_prop6(S1 &v1,S2 &v2,S3 &v3,S4 &v4,S5 &v5,S6 &v6,index_type &p,op_type &op) :v1(v1),v2(v2),v3(v3),v4(v4),v5(v5),v6(v6),p(p),op(op) {}; //! It call the copy function for each property template<typename T> - inline void operator()(T& t) const + __device__ __host__ inline void operator()(T& t) const { op(v1.data.template get<T::value>().getVector().template get<0>(p),v2.data.template get<T::value>().getVector().template get<0>(p),v3.data.template get<T::value>().getVector().template get<0>(p),v4.data.template get<T::value>().getVector().template get<0>(p),v5.data.template get<T::value>().getVector().template get<0>(p),v6.data.template get<T::value>().getVector().template get<0>(p)); } }; - template< class S1 , class S2 , class S3 , class S4,class S5,class S6 , class Op > - static void for_each6( S1 &s1 , S2 &s2 , S3 &s3 , S4 &s4,S5 &s5,S6 &s6 , Op op ) - { - for_each_prop_resize<S1,S2> the_resize(s1,s2); - boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype(s1.data)::max_prop>>(the_resize); - // ToDo : build checks, that the +-*/ operators are well defined - auto it=s1.data.template get<0>().getVector().getIterator(); - while(it.isNext()){ - auto p=it.get(); - //converting to boost vector ids. - for_each_prop6<S1,S2,S3,S4,S5,S6,size_t,Op> cp(s1,s2,s3,s4,s5,s6,p,op); - //creating an iterator on v_ids[0] [1] [2] - boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype( s1.data)::max_prop>>(cp); - - ++it; - } - } - template<typename S1,typename S2,typename S3,typename S4,typename S5,typename S6,typename S7,typename index_type,typename op_type> @@ -388,36 +206,17 @@ namespace boost { * \param dst destination encapsulated object * */ - inline for_each_prop7(S1 &v1,S2 &v2,S3 &v3,S4 &v4,S5 &v5,S6 &v6,S7 &v7,index_type &p,op_type &op) + __device__ __host__ inline for_each_prop7(S1 &v1,S2 &v2,S3 &v3,S4 &v4,S5 &v5,S6 &v6,S7 &v7,index_type &p,op_type &op) :v1(v1),v2(v2),v3(v3),v4(v4),v5(v5),v6(v6),v7(v7),p(p),op(op) {}; //! It call the copy function for each property template<typename T> - inline void operator()(T& t) const + __device__ __host__ inline void operator()(T& t) const { op(v1.data.template get<T::value>().getVector().template get<0>(p),v2.data.template get<T::value>().getVector().template get<0>(p),v3.data.template get<T::value>().getVector().template get<0>(p),v4.data.template get<T::value>().getVector().template get<0>(p),v5.data.template get<T::value>().getVector().template get<0>(p),v6.data.template get<T::value>().getVector().template get<0>(p),v7.data.template get<T::value>().getVector().template get<0>(p)); } }; - - template< class S1 , class S2 , class S3 , class S4,class S5,class S6 ,class S7, class Op > - static void for_each7( S1 &s1 , S2 &s2 , S3 &s3 , S4 &s4,S5 &s5,S6 &s6,S7 &s7 , Op op ) - { - for_each_prop_resize<S1,S2> the_resize(s1,s2); - boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype(s1.data)::max_prop>>(the_resize); - // ToDo : build checks, that the +-*/ operators are well defined - auto it=s1.data.template get<0>().getVector().getIterator(); - while(it.isNext()){ - auto p=it.get(); - //converting to boost vector ids. - for_each_prop7<S1,S2,S3,S4,S5,S6,S7,size_t,Op> cp(s1,s2,s3,s4,s5,s6,s7,p,op); - //creating an iterator on v_ids[0] [1] [2] - boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype( s1.data)::max_prop>>(cp); - - ++it; - } - } - template<typename S1,typename S2,typename S3,typename S4,typename S5,typename S6,typename S7,typename S8,typename index_type,typename op_type> struct for_each_prop8 { @@ -441,36 +240,17 @@ namespace boost { * \param dst destination encapsulated object * */ - inline for_each_prop8(S1 &v1,S2 &v2,S3 &v3,S4 &v4,S5 &v5,S6 &v6,S7 &v7,S8 &v8,index_type &p,op_type &op) + __device__ __host__ inline for_each_prop8(S1 &v1,S2 &v2,S3 &v3,S4 &v4,S5 &v5,S6 &v6,S7 &v7,S8 &v8,index_type &p,op_type &op) :v1(v1),v2(v2),v3(v3),v4(v4),v5(v5),v6(v6),v7(v7),v8(v8),p(p),op(op) {}; //! It call the copy function for each property template<typename T> - inline void operator()(T& t) const + __device__ __host__ inline void operator()(T& t) const { op(v1.data.template get<T::value>().getVector().template get<0>(p),v2.data.template get<T::value>().getVector().template get<0>(p),v3.data.template get<T::value>().getVector().template get<0>(p),v4.data.template get<T::value>().getVector().template get<0>(p),v5.data.template get<T::value>().getVector().template get<0>(p),v6.data.template get<T::value>().getVector().template get<0>(p),v7.data.template get<T::value>().getVector().template get<0>(p),v8.data.template get<T::value>().getVector().template get<0>(p)); } }; - - template< class S1 , class S2 , class S3 , class S4,class S5,class S6 ,class S7,class S8, class Op > - static void for_each8( S1 &s1 , S2 &s2 , S3 &s3 , S4 &s4,S5 &s5,S6 &s6,S7 &s7,S8 &s8 , Op op ) - { - for_each_prop_resize<S1,S2> the_resize(s1,s2); - boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype(s1.data)::max_prop>>(the_resize); - // ToDo : build checks, that the +-*/ operators are well defined - auto it=s1.data.template get<0>().getVector().getIterator(); - while(it.isNext()){ - auto p=it.get(); - //converting to boost vector ids. - for_each_prop8<S1,S2,S3,S4,S5,S6,S7,S8,size_t,Op> cp(s1,s2,s3,s4,s5,s6,s7,s8,p,op); - //creating an iterator on v_ids[0] [1] [2] - boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype( s1.data)::max_prop>>(cp); - - ++it; - } - } - template<typename S1,typename S2,typename S3,typename S4,typename S5,typename S6,typename S7,typename S8, typename S9,typename index_type,typename op_type> struct for_each_prop9 { @@ -495,38 +275,19 @@ namespace boost { * \param dst destination encapsulated object * */ - inline for_each_prop9(S1 &v1,S2 &v2,S3 &v3,S4 &v4,S5 &v5,S6 &v6,S7 &v7,S8 &v8,S9 &v9,index_type &p,op_type &op) + __device__ __host__ inline for_each_prop9(S1 &v1,S2 &v2,S3 &v3,S4 &v4,S5 &v5,S6 &v6,S7 &v7,S8 &v8,S9 &v9,index_type &p,op_type &op) :v1(v1),v2(v2),v3(v3),v4(v4),v5(v5),v6(v6),v7(v7),v8(v8),v9(v9),p(p),op(op) {}; //! It call the copy function for each property template<typename T> - inline void operator()(T& t) const + __device__ __host__ inline void operator()(T& t) const { op(v1.data.template get<T::value>().getVector().template get<0>(p),v2.data.template get<T::value>().getVector().template get<0>(p),v3.data.template get<T::value>().getVector().template get<0>(p),v4.data.template get<T::value>().getVector().template get<0>(p),v5.data.template get<T::value>().getVector().template get<0>(p),v6.data.template get<T::value>().getVector().template get<0>(p),v7.data.template get<T::value>().getVector().template get<0>(p),v8.data.template get<T::value>().getVector().template get<0>(p),v9.data.template get<T::value>().getVector().template get<0>(p)); } }; - - template< class S1 , class S2 , class S3 , class S4,class S5,class S6 ,class S7,class S8, class S9, class Op > - static void for_each9( S1 &s1 , S2 &s2 , S3 &s3 , S4 &s4,S5 &s5,S6 &s6,S7 &s7,S8 &s8, S9 &s9 , Op op ) - { - for_each_prop_resize<S1,S2> the_resize(s1,s2); - boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype(s1.data)::max_prop>>(the_resize); - // ToDo : build checks, that the +-*/ operators are well defined - auto it=s1.data.template get<0>().getVector().getIterator(); - while(it.isNext()){ - auto p=it.get(); - //converting to boost vector ids. - for_each_prop9<S1,S2,S3,S4,S5,S6,S7,S8,S9,size_t,Op> cp(s1,s2,s3,s4,s5,s6,s7,s8,s9,p,op); - //creating an iterator on v_ids[0] [1] [2] - boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype( s1.data)::max_prop>>(cp); - - ++it; - } - } - - template<typename S1,typename S2,typename S3,typename S4,typename S5,typename S6,typename S7,typename S8, typename S9, typename S10,typename index_type,typename op_type> - struct for_each_prop10 + template<typename S1,typename S2,typename S3,typename S4,typename S5,typename S6,typename S7,typename S8, typename S9, typename S10,typename index_type,typename op_type> + struct for_each_prop10 { S1 &v1; @@ -550,36 +311,17 @@ namespace boost { * \param dst destination encapsulated object * */ - inline for_each_prop10(S1 &v1,S2 &v2,S3 &v3,S4 &v4,S5 &v5,S6 &v6,S7 &v7,S8 &v8,S9 &v9,S10 &v10,index_type &p,op_type &op) + __device__ __host__ inline for_each_prop10(S1 &v1,S2 &v2,S3 &v3,S4 &v4,S5 &v5,S6 &v6,S7 &v7,S8 &v8,S9 &v9,S10 &v10,index_type &p,op_type &op) :v1(v1),v2(v2),v3(v3),v4(v4),v5(v5),v6(v6),v7(v7),v8(v8),v9(v9),v10(v10),p(p),op(op) {}; //! It call the copy function for each property template<typename T> - inline void operator()(T& t) const + __device__ __host__ inline void operator()(T& t) const { op(v1.data.template get<T::value>().getVector().template get<0>(p),v2.data.template get<T::value>().getVector().template get<0>(p),v3.data.template get<T::value>().getVector().template get<0>(p),v4.data.template get<T::value>().getVector().template get<0>(p),v5.data.template get<T::value>().getVector().template get<0>(p),v6.data.template get<T::value>().getVector().template get<0>(p),v7.data.template get<T::value>().getVector().template get<0>(p),v8.data.template get<T::value>().getVector().template get<0>(p),v9.data.template get<T::value>().getVector().template get<0>(p),v10.data.template get<T::value>().getVector().template get<0>(p)); } }; - - template< class S1 , class S2 , class S3 , class S4,class S5,class S6 ,class S7,class S8, class S9, class S10, class Op > - static void for_each10( S1 &s1 , S2 &s2 , S3 &s3 , S4 &s4,S5 &s5,S6 &s6,S7 &s7,S8 &s8, S9 &s9 , S10 &s10, Op op ) - { - for_each_prop_resize<S1,S2> the_resize(s1,s2); - boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype(s1.data)::max_prop>>(the_resize); - // ToDo : build checks, that the +-*/ operators are well defined - auto it=s1.data.template get<0>().getVector().getIterator(); - while(it.isNext()){ - auto p=it.get(); - //converting to boost vector ids. - for_each_prop10<S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,size_t,Op> cp(s1,s2,s3,s4,s5,s6,s7,s8,s9,s10,p,op); - //creating an iterator on v_ids[0] [1] [2] - boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype( s1.data)::max_prop>>(cp); - - ++it; - } - } - template<typename S1,typename S2,typename S3,typename S4,typename S5,typename S6,typename S7,typename S8, typename S9, typename S10, typename S11,typename index_type,typename op_type> struct for_each_prop11 { @@ -606,36 +348,18 @@ namespace boost { * \param dst destination encapsulated object * */ - inline for_each_prop11(S1 &v1,S2 &v2,S3 &v3,S4 &v4,S5 &v5,S6 &v6,S7 &v7,S8 &v8,S9 &v9,S10 &v10, S11 &v11,index_type &p,op_type &op) + __device__ __host__ inline for_each_prop11(S1 &v1,S2 &v2,S3 &v3,S4 &v4,S5 &v5,S6 &v6,S7 &v7,S8 &v8,S9 &v9,S10 &v10, S11 &v11,index_type &p,op_type &op) :v1(v1),v2(v2),v3(v3),v4(v4),v5(v5),v6(v6),v7(v7),v8(v8),v9(v9),v10(v10),v11(v11),p(p),op(op) {}; //! It call the copy function for each property template<typename T> - inline void operator()(T& t) const + __device__ __host__ inline void operator()(T& t) const { op(v1.data.template get<T::value>().getVector().template get<0>(p),v2.data.template get<T::value>().getVector().template get<0>(p),v3.data.template get<T::value>().getVector().template get<0>(p),v4.data.template get<T::value>().getVector().template get<0>(p),v5.data.template get<T::value>().getVector().template get<0>(p),v6.data.template get<T::value>().getVector().template get<0>(p),v7.data.template get<T::value>().getVector().template get<0>(p),v8.data.template get<T::value>().getVector().template get<0>(p),v9.data.template get<T::value>().getVector().template get<0>(p),v10.data.template get<T::value>().getVector().template get<0>(p),v11.data.template get<T::value>().getVector().template get<0>(p)); } }; - template< class S1 , class S2 , class S3 , class S4,class S5,class S6 ,class S7,class S8, class S9, class S10, class S11, class Op > - static void for_each11( S1 &s1 , S2 &s2 , S3 &s3 , S4 &s4,S5 &s5,S6 &s6,S7 &s7,S8 &s8, S9 &s9 , S10 &s10,S11 &s11, Op op ) - { - for_each_prop_resize<S1,S2> the_resize(s1,s2); - boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype(s1.data)::max_prop>>(the_resize); - // ToDo : build checks, that the +-*/ operators are well defined - auto it=s1.data.template get<0>().getVector().getIterator(); - while(it.isNext()){ - auto p=it.get(); - //converting to boost vector ids. - for_each_prop11<S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,S11,size_t,Op> cp(s1,s2,s3,s4,s5,s6,s7,s8,s9,s10,s11,p,op); - //creating an iterator on v_ids[0] [1] [2] - boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype( s1.data)::max_prop>>(cp); - - ++it; - } - } - template<typename S1,typename S2,typename S3,typename S4,typename S5,typename S6,typename S7,typename S8, typename S9, typename S10, typename S11, typename S12,typename index_type,typename op_type> struct for_each_prop12 { @@ -663,36 +387,18 @@ namespace boost { * \param dst destination encapsulated object * */ - inline for_each_prop12(S1 &v1,S2 &v2,S3 &v3,S4 &v4,S5 &v5,S6 &v6,S7 &v7,S8 &v8,S9 &v9,S10 &v10, S11 &v11,S12 &v12,index_type &p,op_type &op) + __device__ __host__ inline for_each_prop12(S1 &v1,S2 &v2,S3 &v3,S4 &v4,S5 &v5,S6 &v6,S7 &v7,S8 &v8,S9 &v9,S10 &v10, S11 &v11,S12 &v12,index_type &p,op_type &op) :v1(v1),v2(v2),v3(v3),v4(v4),v5(v5),v6(v6),v7(v7),v8(v8),v9(v9),v10(v10),v11(v11), v12(v12),p(p),op(op) {}; //! It call the copy function for each property template<typename T> - inline void operator()(T& t) const + __device__ __host__ inline void operator()(T& t) const { op(v1.data.template get<T::value>().getVector().template get<0>(p),v2.data.template get<T::value>().getVector().template get<0>(p),v3.data.template get<T::value>().getVector().template get<0>(p),v4.data.template get<T::value>().getVector().template get<0>(p),v5.data.template get<T::value>().getVector().template get<0>(p),v6.data.template get<T::value>().getVector().template get<0>(p),v7.data.template get<T::value>().getVector().template get<0>(p),v8.data.template get<T::value>().getVector().template get<0>(p),v9.data.template get<T::value>().getVector().template get<0>(p),v10.data.template get<T::value>().getVector().template get<0>(p),v11.data.template get<T::value>().getVector().template get<0>(p),v12.data.template get<T::value>().getVector().template get<0>(p)); } }; - template< class S1 , class S2 , class S3 , class S4,class S5,class S6 ,class S7,class S8, class S9, class S10, class S11, class S12, class Op > - static void for_each12( S1 &s1 , S2 &s2 , S3 &s3 , S4 &s4,S5 &s5,S6 &s6,S7 &s7,S8 &s8, S9 &s9 , S10 &s10,S11 &s11,S12 &s12, Op op ) - { - for_each_prop_resize<S1,S2> the_resize(s1,s2); - boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype(s1.data)::max_prop>>(the_resize); - // ToDo : build checks, that the +-*/ operators are well defined - auto it=s1.data.template get<0>().getVector().getIterator(); - while(it.isNext()){ - auto p=it.get(); - //converting to boost vector ids. - for_each_prop12<S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,S11,S12,size_t,Op> cp(s1,s2,s3,s4,s5,s6,s7,s8,s9,s10,s11,s12,p,op); - //creating an iterator on v_ids[0] [1] [2] - boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype( s1.data)::max_prop>>(cp); - - ++it; - } - } - template<typename S1,typename S2,typename S3,typename S4,typename S5,typename S6,typename S7,typename S8, typename S9, typename S10, typename S11, typename S12, typename S13,typename index_type,typename op_type> struct for_each_prop13 { @@ -721,36 +427,18 @@ namespace boost { * \param dst destination encapsulated object * */ - inline for_each_prop13(S1 &v1,S2 &v2,S3 &v3,S4 &v4,S5 &v5,S6 &v6,S7 &v7,S8 &v8,S9 &v9,S10 &v10, S11 &v11,S12 &v12,S13 &v13,index_type &p,op_type &op) + __device__ __host__ inline for_each_prop13(S1 &v1,S2 &v2,S3 &v3,S4 &v4,S5 &v5,S6 &v6,S7 &v7,S8 &v8,S9 &v9,S10 &v10, S11 &v11,S12 &v12,S13 &v13,index_type &p,op_type &op) :v1(v1),v2(v2),v3(v3),v4(v4),v5(v5),v6(v6),v7(v7),v8(v8),v9(v9),v10(v10),v11(v11), v12(v12),v13(v13),p(p),op(op) {}; //! It call the copy function for each property template<typename T> - inline void operator()(T& t) const + __device__ __host__ inline void operator()(T& t) const { op(v1.data.template get<T::value>().getVector().template get<0>(p),v2.data.template get<T::value>().getVector().template get<0>(p),v3.data.template get<T::value>().getVector().template get<0>(p),v4.data.template get<T::value>().getVector().template get<0>(p),v5.data.template get<T::value>().getVector().template get<0>(p),v6.data.template get<T::value>().getVector().template get<0>(p),v7.data.template get<T::value>().getVector().template get<0>(p),v8.data.template get<T::value>().getVector().template get<0>(p),v9.data.template get<T::value>().getVector().template get<0>(p),v10.data.template get<T::value>().getVector().template get<0>(p),v11.data.template get<T::value>().getVector().template get<0>(p),v12.data.template get<T::value>().getVector().template get<0>(p),v13.data.template get<T::value>().getVector().template get<0>(p)); } }; - template< class S1 , class S2 , class S3 , class S4,class S5,class S6 ,class S7,class S8, class S9, class S10, class S11, class S12, class S13, class Op > - static void for_each13( S1 &s1 , S2 &s2 , S3 &s3 , S4 &s4,S5 &s5,S6 &s6,S7 &s7,S8 &s8, S9 &s9 , S10 &s10,S11 &s11,S12 &s12,S13 &s13, Op op ) - { - for_each_prop_resize<S1,S2> the_resize(s1,s2); - boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype(s1.data)::max_prop>>(the_resize); - // ToDo : build checks, that the +-*/ operators are well defined - auto it=s1.data.template get<0>().getVector().getIterator(); - while(it.isNext()){ - auto p=it.get(); - //converting to boost vector ids. - for_each_prop13<S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,S11,S12,S13,size_t,Op> cp(s1,s2,s3,s4,s5,s6,s7,s8,s9,s10,s11,s12,s13,p,op); - //creating an iterator on v_ids[0] [1] [2] - boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype( s1.data)::max_prop>>(cp); - - ++it; - } - } - template<typename S1,typename S2,typename S3,typename S4,typename S5,typename S6,typename S7,typename S8, typename S9, typename S10, typename S11, typename S12, typename S13, typename S14,typename index_type,typename op_type> struct for_each_prop14 { @@ -780,36 +468,18 @@ namespace boost { * \param dst destination encapsulated object * */ - inline for_each_prop14(S1 &v1,S2 &v2,S3 &v3,S4 &v4,S5 &v5,S6 &v6,S7 &v7,S8 &v8,S9 &v9,S10 &v10, S11 &v11,S12 &v12,S13 &v13,S14 &v14,index_type &p,op_type &op) + __device__ __host__ inline for_each_prop14(S1 &v1,S2 &v2,S3 &v3,S4 &v4,S5 &v5,S6 &v6,S7 &v7,S8 &v8,S9 &v9,S10 &v10, S11 &v11,S12 &v12,S13 &v13,S14 &v14,index_type &p,op_type &op) :v1(v1),v2(v2),v3(v3),v4(v4),v5(v5),v6(v6),v7(v7),v8(v8),v9(v9),v10(v10),v11(v11), v12(v12),v13(v13),v14(v14),p(p),op(op) {}; //! It call the copy function for each property template<typename T> - inline void operator()(T& t) const + __device__ __host__ inline void operator()(T& t) const { op(v1.data.template get<T::value>().getVector().template get<0>(p),v2.data.template get<T::value>().getVector().template get<0>(p),v3.data.template get<T::value>().getVector().template get<0>(p),v4.data.template get<T::value>().getVector().template get<0>(p),v5.data.template get<T::value>().getVector().template get<0>(p),v6.data.template get<T::value>().getVector().template get<0>(p),v7.data.template get<T::value>().getVector().template get<0>(p),v8.data.template get<T::value>().getVector().template get<0>(p),v9.data.template get<T::value>().getVector().template get<0>(p),v10.data.template get<T::value>().getVector().template get<0>(p),v11.data.template get<T::value>().getVector().template get<0>(p),v12.data.template get<T::value>().getVector().template get<0>(p),v13.data.template get<T::value>().getVector().template get<0>(p),v14.data.template get<T::value>().getVector().template get<0>(p)); } }; - template< class S1 , class S2 , class S3 , class S4,class S5,class S6 ,class S7,class S8, class S9, class S10, class S11, class S12, class S13, class S14, class Op > - static void for_each14( S1 &s1 , S2 &s2 , S3 &s3 , S4 &s4,S5 &s5,S6 &s6,S7 &s7,S8 &s8, S9 &s9, S10 &s10,S11 &s11,S12 &s12,S13 &s13,S14 &s14, Op op ) - { - for_each_prop_resize<S1,S2> the_resize(s1,s2); - boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype(s1.data)::max_prop>>(the_resize); - // ToDo : build checks, that the +-*/ operators are well defined - auto it=s1.data.template get<0>().getVector().getIterator(); - while(it.isNext()){ - auto p=it.get(); - //converting to boost vector ids. - for_each_prop14<S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,S11,S12,S13,S14,size_t,Op> cp(s1,s2,s3,s4,s5,s6,s7,s8,s9,s10,s11,s12,s13,s14,p,op); - //creating an iterator on v_ids[0] [1] [2] - boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype( s1.data)::max_prop>>(cp); - - ++it; - } - } - template<typename S1,typename S2,typename S3,typename S4,typename S5,typename S6,typename S7,typename S8, typename S9, typename S10, typename S11, typename S12, typename S13, typename S14, typename S15,typename index_type,typename op_type> struct for_each_prop15 { @@ -840,17 +510,349 @@ namespace boost { * \param dst destination encapsulated object * */ - inline for_each_prop15(S1 &v1,S2 &v2,S3 &v3,S4 &v4,S5 &v5,S6 &v6,S7 &v7,S8 &v8,S9 &v9,S10 &v10, S11 &v11,S12 &v12,S13 &v13,S14 &v14,S15 &v15,index_type &p,op_type &op) + __device__ __host__ inline for_each_prop15(S1 &v1,S2 &v2,S3 &v3,S4 &v4,S5 &v5,S6 &v6,S7 &v7,S8 &v8,S9 &v9,S10 &v10, S11 &v11,S12 &v12,S13 &v13,S14 &v14,S15 &v15,index_type &p,op_type &op) :v1(v1),v2(v2),v3(v3),v4(v4),v5(v5),v6(v6),v7(v7),v8(v8),v9(v9),v10(v10),v11(v11), v12(v12),v13(v13),v14(v14),v15(v15),p(p),op(op) {}; //! It call the copy function for each property template<typename T> - inline void operator()(T& t) const + __device__ __host__ inline void operator()(T& t) const { op(v1.data.template get<T::value>().getVector().template get<0>(p),v2.data.template get<T::value>().getVector().template get<0>(p),v3.data.template get<T::value>().getVector().template get<0>(p),v4.data.template get<T::value>().getVector().template get<0>(p),v5.data.template get<T::value>().getVector().template get<0>(p),v6.data.template get<T::value>().getVector().template get<0>(p),v7.data.template get<T::value>().getVector().template get<0>(p),v8.data.template get<T::value>().getVector().template get<0>(p),v9.data.template get<T::value>().getVector().template get<0>(p),v10.data.template get<T::value>().getVector().template get<0>(p),v11.data.template get<T::value>().getVector().template get<0>(p),v12.data.template get<T::value>().getVector().template get<0>(p),v13.data.template get<T::value>().getVector().template get<0>(p),v14.data.template get<T::value>().getVector().template get<0>(p),v15.data.template get<T::value>().getVector().template get<0>(p)); } }; +/* + * This class template has to be overload in order to call vector_space_algebra::norm_inf + */ + // template< class State, class Enabler = void > struct vector_space_norm_inf; + +/* + * Example: instantiation for sole doubles and complex + */ +/* template<> + struct vector_space_norm_inf< double > + { + typedef double result_type; + double operator()( double x ) const + { + using std::abs; + return abs(x); + } + }; + + template<> + struct vector_space_norm_inf< float > + { + typedef float result_type; + result_type operator()( float x ) const + { + using std::abs; + return abs(x); + } + }; + + template< typename T > + struct vector_space_norm_inf< std::complex<T> > + { + typedef T result_type; + result_type operator()( std::complex<T> x ) const + { + using std::abs; + return abs( x ); + } + };*/ + + template<typename S1,typename S2> + struct for_each_prop_resize{ + S1 &v1; + S2 &v2; + /*! \brief constructor + * + * + * \param src source encapsulated object + * \param dst destination encapsulated object + * + */ + inline for_each_prop_resize(S1 &v1,S2 &v2) + :v1(v1),v2(v2) + {}; + //! It call the copy function for each property + template<typename T> + inline void operator()(T& t) const + { + v1.data.template get<T::value>().getVector().resize(v2.data.template get<T::value>().getVector().size()); + } + }; + + + + struct vector_space_algebra_ofp + { + template< class S1 , class Op > + static void for_each1( S1 &s1 , Op op ) + { + + // ToDo : build checks, that the +-*/ operators are well defined + auto it=s1.data.template get<0>().getVector().getIterator(); + while(it.isNext()){ + auto p=it.get(); + //converting to boost vector ids. + for_each_prop1<S1,typename S1::index_type,Op> cp(s1,p,op); + //creating an iterator on v_ids[0] [1] [2] + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype( s1.data)::max_prop>>(cp); + + ++it; + } + } + + + + template< class S1 , class S2 , class Op > + static void for_each2( S1 &s1 , S2 &s2 , Op op ) + { + for_each_prop_resize<S1,S2> the_resize(s1,s2); + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype(s1.data)::max_prop>>(the_resize); + //s1.data.template get<0>().getVector().resize(s2.data.template get<0>().getVector().size()); + // ToDo : build checks, that the +-*/ operators are well defined + auto it=s1.data.template get<0>().getVector().getIterator(); + while(it.isNext()){ + auto p=it.get(); + //converting to boost vector ids. + for_each_prop2<S1,S2,typename S1::index_type,Op> cp(s1,s2,p,op); + //creating an iterator on v_ids[0] [1] [2] + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype( s1.data)::max_prop>>(cp); + + ++it; + } + } + + + template< class S1 , class S2 , class S3 , class Op > + static void for_each3( S1 &s1 , S2 &s2 , S3 &s3 , Op op ) + { + +// + for_each_prop_resize<S1,S2> the_resize(s1,s2); + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype( s1.data)::max_prop>>(the_resize); + // ToDo : build checks, that the +-*/ operators are well defined + auto it=s1.data.template get<0>().getVector().getIterator(); + + while(it.isNext()){ + auto p=it.get(); + //converting to boost vector ids. + for_each_prop3<S1,S2,S3,typename S1::index_type,Op> cp(s1,s2,s3,p,op); + //creating an iterator on v_ids[0] [1] [2] + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype( s1.data)::max_prop>>(cp); + + ++it; + } + } + + + + + template< class S1 , class S2 , class S3 , class S4 , class Op > + static void for_each4( S1 &s1 , S2 &s2 , S3 &s3 , S4 &s4 , Op op ) + { + for_each_prop_resize<S1,S2> the_resize(s1,s2); + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype(s1.data)::max_prop>>(the_resize); + // ToDo : build checks, that the +-*/ operators are well defined + auto it=s1.data.template get<0>().getVector().getIterator(); + while(it.isNext()){ + auto p=it.get(); + //converting to boost vector ids. + for_each_prop4<S1,S2,S3,S4,typename S1::index_type,Op> cp(s1,s2,s3,s4,p,op); + //creating an iterator on v_ids[0] [1] [2] + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype( s1.data)::max_prop>>(cp); + + ++it; + } + } + + + + + template< class S1 , class S2 , class S3 , class S4,class S5 , class Op > + static void for_each5( S1 &s1 , S2 &s2 , S3 &s3 , S4 &s4,S5 &s5 , Op op ) + { + for_each_prop_resize<S1,S2> the_resize(s1,s2); + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype(s1.data)::max_prop>>(the_resize); + // ToDo : build checks, that the +-*/ operators are well defined + auto it=s1.data.template get<0>().getVector().getIterator(); + while(it.isNext()){ + auto p=it.get(); + //converting to boost vector ids. + for_each_prop5<S1,S2,S3,S4,S5,typename S1::index_type,Op> cp(s1,s2,s3,s4,s5,p,op); + //creating an iterator on v_ids[0] [1] [2] + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype( s1.data)::max_prop>>(cp); + + ++it; + } + } + + template< class S1 , class S2 , class S3 , class S4,class S5,class S6 , class Op > + static void for_each6( S1 &s1 , S2 &s2 , S3 &s3 , S4 &s4,S5 &s5,S6 &s6 , Op op ) + { + for_each_prop_resize<S1,S2> the_resize(s1,s2); + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype(s1.data)::max_prop>>(the_resize); + // ToDo : build checks, that the +-*/ operators are well defined + auto it=s1.data.template get<0>().getVector().getIterator(); + while(it.isNext()){ + auto p=it.get(); + //converting to boost vector ids. + for_each_prop6<S1,S2,S3,S4,S5,S6,typename S1::index_type,Op> cp(s1,s2,s3,s4,s5,s6,p,op); + //creating an iterator on v_ids[0] [1] [2] + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype( s1.data)::max_prop>>(cp); + + ++it; + } + } + + template< class S1 , class S2 , class S3 , class S4,class S5,class S6 ,class S7, class Op > + static void for_each7( S1 &s1 , S2 &s2 , S3 &s3 , S4 &s4,S5 &s5,S6 &s6,S7 &s7 , Op op ) + { + for_each_prop_resize<S1,S2> the_resize(s1,s2); + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype(s1.data)::max_prop>>(the_resize); + // ToDo : build checks, that the +-*/ operators are well defined + auto it=s1.data.template get<0>().getVector().getIterator(); + while(it.isNext()){ + auto p=it.get(); + //converting to boost vector ids. + for_each_prop7<S1,S2,S3,S4,S5,S6,S7,typename S1::index_type,Op> cp(s1,s2,s3,s4,s5,s6,s7,p,op); + //creating an iterator on v_ids[0] [1] [2] + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype( s1.data)::max_prop>>(cp); + + ++it; + } + } + + template< class S1 , class S2 , class S3 , class S4,class S5,class S6 ,class S7,class S8, class Op > + static void for_each8( S1 &s1 , S2 &s2 , S3 &s3 , S4 &s4,S5 &s5,S6 &s6,S7 &s7,S8 &s8 , Op op ) + { + for_each_prop_resize<S1,S2> the_resize(s1,s2); + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype(s1.data)::max_prop>>(the_resize); + // ToDo : build checks, that the +-*/ operators are well defined + auto it=s1.data.template get<0>().getVector().getIterator(); + while(it.isNext()){ + auto p=it.get(); + //converting to boost vector ids. + for_each_prop8<S1,S2,S3,S4,S5,S6,S7,S8,typename S1::index_type,Op> cp(s1,s2,s3,s4,s5,s6,s7,s8,p,op); + //creating an iterator on v_ids[0] [1] [2] + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype( s1.data)::max_prop>>(cp); + + ++it; + } + } + + template< class S1 , class S2 , class S3 , class S4,class S5,class S6 ,class S7,class S8, class S9, class Op > + static void for_each9( S1 &s1 , S2 &s2 , S3 &s3 , S4 &s4,S5 &s5,S6 &s6,S7 &s7,S8 &s8, S9 &s9 , Op op ) + { + for_each_prop_resize<S1,S2> the_resize(s1,s2); + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype(s1.data)::max_prop>>(the_resize); + // ToDo : build checks, that the +-*/ operators are well defined + auto it=s1.data.template get<0>().getVector().getIterator(); + while(it.isNext()){ + auto p=it.get(); + //converting to boost vector ids. + for_each_prop9<S1,S2,S3,S4,S5,S6,S7,S8,S9,typename S1::index_type,Op> cp(s1,s2,s3,s4,s5,s6,s7,s8,s9,p,op); + //creating an iterator on v_ids[0] [1] [2] + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype( s1.data)::max_prop>>(cp); + + ++it; + } + } + + template< class S1 , class S2 , class S3 , class S4,class S5,class S6 ,class S7,class S8, class S9, class S10, class Op > + static void for_each10( S1 &s1 , S2 &s2 , S3 &s3 , S4 &s4,S5 &s5,S6 &s6,S7 &s7,S8 &s8, S9 &s9 , S10 &s10, Op op ) + { + for_each_prop_resize<S1,S2> the_resize(s1,s2); + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype(s1.data)::max_prop>>(the_resize); + // ToDo : build checks, that the +-*/ operators are well defined + auto it=s1.data.template get<0>().getVector().getIterator(); + while(it.isNext()){ + auto p=it.get(); + //converting to boost vector ids. + for_each_prop10<S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,typename S1::index_type,Op> cp(s1,s2,s3,s4,s5,s6,s7,s8,s9,s10,p,op); + //creating an iterator on v_ids[0] [1] [2] + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype( s1.data)::max_prop>>(cp); + + ++it; + } + } + + + template< class S1 , class S2 , class S3 , class S4,class S5,class S6 ,class S7,class S8, class S9, class S10, class S11, class Op > + static void for_each11( S1 &s1 , S2 &s2 , S3 &s3 , S4 &s4,S5 &s5,S6 &s6,S7 &s7,S8 &s8, S9 &s9 , S10 &s10,S11 &s11, Op op ) + { + for_each_prop_resize<S1,S2> the_resize(s1,s2); + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype(s1.data)::max_prop>>(the_resize); + // ToDo : build checks, that the +-*/ operators are well defined + auto it=s1.data.template get<0>().getVector().getIterator(); + while(it.isNext()){ + auto p=it.get(); + //converting to boost vector ids. + for_each_prop11<S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,S11,typename S1::index_type,Op> cp(s1,s2,s3,s4,s5,s6,s7,s8,s9,s10,s11,p,op); + //creating an iterator on v_ids[0] [1] [2] + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype( s1.data)::max_prop>>(cp); + + ++it; + } + } + + template< class S1 , class S2 , class S3 , class S4,class S5,class S6 ,class S7,class S8, class S9, class S10, class S11, class S12, class Op > + static void for_each12( S1 &s1 , S2 &s2 , S3 &s3 , S4 &s4,S5 &s5,S6 &s6,S7 &s7,S8 &s8, S9 &s9 , S10 &s10,S11 &s11,S12 &s12, Op op ) + { + for_each_prop_resize<S1,S2> the_resize(s1,s2); + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype(s1.data)::max_prop>>(the_resize); + // ToDo : build checks, that the +-*/ operators are well defined + auto it=s1.data.template get<0>().getVector().getIterator(); + while(it.isNext()){ + auto p=it.get(); + //converting to boost vector ids. + for_each_prop12<S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,S11,S12,typename S1::index_type,Op> cp(s1,s2,s3,s4,s5,s6,s7,s8,s9,s10,s11,s12,p,op); + //creating an iterator on v_ids[0] [1] [2] + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype( s1.data)::max_prop>>(cp); + + ++it; + } + } + + template< class S1 , class S2 , class S3 , class S4,class S5,class S6 ,class S7,class S8, class S9, class S10, class S11, class S12, class S13, class Op > + static void for_each13( S1 &s1 , S2 &s2 , S3 &s3 , S4 &s4,S5 &s5,S6 &s6,S7 &s7,S8 &s8, S9 &s9 , S10 &s10,S11 &s11,S12 &s12,S13 &s13, Op op ) + { + for_each_prop_resize<S1,S2> the_resize(s1,s2); + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype(s1.data)::max_prop>>(the_resize); + // ToDo : build checks, that the +-*/ operators are well defined + auto it=s1.data.template get<0>().getVector().getIterator(); + while(it.isNext()){ + auto p=it.get(); + //converting to boost vector ids. + for_each_prop13<S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,S11,S12,S13,typename S1::index_type,Op> cp(s1,s2,s3,s4,s5,s6,s7,s8,s9,s10,s11,s12,s13,p,op); + //creating an iterator on v_ids[0] [1] [2] + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype( s1.data)::max_prop>>(cp); + + ++it; + } + } + + + template< class S1 , class S2 , class S3 , class S4,class S5,class S6 ,class S7,class S8, class S9, class S10, class S11, class S12, class S13, class S14, class Op > + static void for_each14( S1 &s1 , S2 &s2 , S3 &s3 , S4 &s4,S5 &s5,S6 &s6,S7 &s7,S8 &s8, S9 &s9, S10 &s10,S11 &s11,S12 &s12,S13 &s13,S14 &s14, Op op ) + { + for_each_prop_resize<S1,S2> the_resize(s1,s2); + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype(s1.data)::max_prop>>(the_resize); + // ToDo : build checks, that the +-*/ operators are well defined + auto it=s1.data.template get<0>().getVector().getIterator(); + while(it.isNext()){ + auto p=it.get(); + //converting to boost vector ids. + for_each_prop14<S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,S11,S12,S13,S14,typename S1::index_type,Op> cp(s1,s2,s3,s4,s5,s6,s7,s8,s9,s10,s11,s12,s13,s14,p,op); + //creating an iterator on v_ids[0] [1] [2] + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype( s1.data)::max_prop>>(cp); + + ++it; + } + } template< class S1 , class S2 , class S3 , class S4,class S5,class S6 ,class S7,class S8, class S9, class S10, class S11, class S12, class S13, class S14, class S15, class Op > static void for_each15( S1 &s1 , S2 &s2 , S3 &s3 , S4 &s4,S5 &s5,S6 &s6,S7 &s7,S8 &s8, S9 &s9, S10 &s10,S11 &s11,S12 &s12,S13 &s13,S14 &s14,S15 &s15, Op op ) @@ -862,7 +864,7 @@ namespace boost { while(it.isNext()){ auto p=it.get(); //converting to boost vector ids. - for_each_prop15<S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,S11,S12,S13,S14,S15,size_t,Op> cp(s1,s2,s3,s4,s5,s6,s7,s8,s9,s10,s11,s12,s13,s14,s15,p,op); + for_each_prop15<S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,S11,S12,S13,S14,S15,typename S1::index_type,Op> cp(s1,s2,s3,s4,s5,s6,s7,s8,s9,s10,s11,s12,s13,s14,s15,p,op); //creating an iterator on v_ids[0] [1] [2] boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype( s1.data)::max_prop>>(cp); @@ -911,7 +913,7 @@ namespace boost { while(it.isNext()){ auto p=it.get(); //converting to boost vector ids. - for_each_norm<S,size_t,typename boost::numeric::odeint::vector_space_norm_inf< S >::result_type> cp(s,p,n); + for_each_norm<S,typename S::index_type,typename boost::numeric::odeint::vector_space_norm_inf< S >::result_type> cp(s,p,n); //creating an iterator on v_ids[0] [1] [2] boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype(s.data)::max_prop>>(cp); @@ -939,4 +941,4 @@ namespace boost { -#endif //OPENFPM_PDATA_BOOST_VECTOR_ALGEBRA_OFP_HPP +#endif //OPENFPM_PDATA_VECTOR_ALGEBRA_OFP_HPP diff --git a/src/OdeIntegrators/vector_algebra_ofp_gpu.hpp b/src/OdeIntegrators/vector_algebra_ofp_gpu.hpp new file mode 100644 index 00000000..5893bc3b --- /dev/null +++ b/src/OdeIntegrators/vector_algebra_ofp_gpu.hpp @@ -0,0 +1,993 @@ +// +// Created by Abhinav Singh on 1.03.23. +// + +#ifndef OPENFPM_PDATA_VECTOR_ALGEBRA_OFP_GPU_HPP +#define OPENFPM_PDATA_VECTOR_ALGEBRA_OFP_GPU_HPP + +namespace boost { + namespace numeric { + namespace odeint { + + template<typename S1, typename Op> + __global__ void for_each1_ker(S1 s1, Op op) + { + unsigned int p = threadIdx.x + blockIdx.x * blockDim.x; + + if (p >= s1.data.size()) {return;} + + for_each_prop1<S1,size_t,Op> cp(s1,p,op); + //creating an iterator on v_ids[0] [1] [2] + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype( s1.data)::max_prop>>(cp); + } + + template<typename S1,typename S2, typename Op> + __global__ void for_each2_ker(S1 s1,S2 s2, Op op) + { + unsigned int p = threadIdx.x + blockIdx.x * blockDim.x; + + if (p >= s1.data.template get<0>().size()) {return;} + //printf("%f \n",s2.data.template get<0>().getVector().template get<0>(p)); + //converting to boost vector ids. + for_each_prop2<S1,S2,unsigned int,Op> cp(s1,s2,p,op); + //s1.data.template get<0>().getVector().template get<0>(p)=1.0*s2.data.template get<0>().getVector().template get<0>(p)+0.05*s3.data.template get<0>().getVector().template get<0>(p); + //creating an iterator on v_ids[0] [1] [2] + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype( s1.data)::max_prop>>(cp); + } + + template<typename S1,typename S2,typename S3, typename Op> + __global__ void for_each3_ker(S1 s1,S2 s2,S3 s3, Op op) + { + unsigned int p = threadIdx.x + blockIdx.x * blockDim.x; + + if (p >= s1.data.template get<0>().size()) {return;} + //printf("%f \n",s2.data.template get<0>().getVector().template get<0>(p)); + //converting to boost vector ids. + for_each_prop3<S1,S2,S3,unsigned int,Op> cp(s1,s2,s3,p,op); + //s1.data.template get<0>().getVector().template get<0>(p)=1.0*s2.data.template get<0>().getVector().template get<0>(p)+0.05*s3.data.template get<0>().getVector().template get<0>(p); + //creating an iterator on v_ids[0] [1] [2] + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype( s1.data)::max_prop>>(cp); + } + + template<typename S1,typename S2,typename S3,typename S4, typename Op> + __global__ void for_each4_ker(S1 s1,S2 s2,S3 s3,S4 s4, Op op) + { + unsigned int p = threadIdx.x + blockIdx.x * blockDim.x; + + if (p >= s1.data.template get<0>().size()) {return;} + //printf("%f \n",s2.data.template get<0>().getVector().template get<0>(p)); + //converting to boost vector ids. + for_each_prop4<S1,S2,S3,S4,unsigned int,Op> cp(s1,s2,s3,s4,p,op); + //s1.data.template get<0>().getVector().template get<0>(p)=1.0*s2.data.template get<0>().getVector().template get<0>(p)+0.05*s3.data.template get<0>().getVector().template get<0>(p); + //creating an iterator on v_ids[0] [1] [2] + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype( s1.data)::max_prop>>(cp); + } + + template<typename S1,typename S2,typename S3,typename S4,typename S5, typename Op> + __global__ void for_each5_ker(S1 s1,S2 s2,S3 s3,S4 s4,S5 s5, Op op) + { + unsigned int p = threadIdx.x + blockIdx.x * blockDim.x; + + if (p >= s1.data.template get<0>().size()) {return;} + //printf("%f \n",s2.data.template get<0>().getVector().template get<0>(p)); + //converting to boost vector ids. + for_each_prop5<S1,S2,S3,S4,S5,unsigned int,Op> cp(s1,s2,s3,s4,s5,p,op); + //s1.data.template get<0>().getVector().template get<0>(p)=1.0*s2.data.template get<0>().getVector().template get<0>(p)+0.05*s3.data.template get<0>().getVector().template get<0>(p); + //creating an iterator on v_ids[0] [1] [2] + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype( s1.data)::max_prop>>(cp); + } + + template<typename S1,typename S2,typename S3,typename S4,typename S5,typename S6, typename Op> + __global__ void for_each6_ker(S1 s1,S2 s2,S3 s3,S4 s4,S5 s5,S6 s6, Op op) + { + unsigned int p = threadIdx.x + blockIdx.x * blockDim.x; + + if (p >= s1.data.template get<0>().size()) {return;} + //converting to boost vector ids. + for_each_prop6<S1,S2,S3,S4,S5,S6,unsigned int,Op> cp(s1,s2,s3,s4,s5,s6,p,op); + //creating an iterator on v_ids[0] [1] [2] + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype( s1.data)::max_prop>>(cp); + } + template<typename S1,typename S2,typename S3,typename S4,typename S5,typename S6,typename S7, typename Op> + __global__ void for_each7_ker(S1 s1,S2 s2,S3 s3,S4 s4,S5 s5,S6 s6,S7 s7, Op op) + { + unsigned int p = threadIdx.x + blockIdx.x * blockDim.x; + + if (p >= s1.data.template get<0>().size()) {return;} + //converting to boost vector ids. + for_each_prop7<S1,S2,S3,S4,S5,S6,S7,unsigned int,Op> cp(s1,s2,s3,s4,s5,s6,s7,p,op); + //creating an iterator on v_ids[0] [1] [2] + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype( s1.data)::max_prop>>(cp); + } + template<typename S1,typename S2,typename S3,typename S4,typename S5,typename S6,typename S7,typename S8, typename Op> + __global__ void for_each8_ker(S1 s1,S2 s2,S3 s3,S4 s4,S5 s5,S6 s6,S7 s7,S8 s8, Op op) + { + unsigned int p = threadIdx.x + blockIdx.x * blockDim.x; + + if (p >= s1.data.template get<0>().size()) {return;} + //converting to boost vector ids. + for_each_prop8<S1,S2,S3,S4,S5,S6,S7,S8,unsigned int,Op> cp(s1,s2,s3,s4,s5,s6,s7,s8,p,op); + //creating an iterator on v_ids[0] [1] [2] + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype( s1.data)::max_prop>>(cp); + } + template<typename S1,typename S2,typename S3,typename S4,typename S5,typename S6,typename S7,typename S8,typename S9, typename Op> + __global__ void for_each9_ker(S1 s1,S2 s2,S3 s3,S4 s4,S5 s5,S6 s6,S7 s7,S8 s8,S9 s9, Op op) + { + unsigned int p = threadIdx.x + blockIdx.x * blockDim.x; + + if (p >= s1.data.template get<0>().size()) {return;} + //converting to boost vector ids. + for_each_prop9<S1,S2,S3,S4,S5,S6,S7,S8,S9,unsigned int,Op> cp(s1,s2,s3,s4,s5,s6,s7,s8,s9,p,op); + //creating an iterator on v_ids[0] [1] [2] + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype( s1.data)::max_prop>>(cp); + } + template<typename S1,typename S2,typename S3,typename S4,typename S5,typename S6,typename S7,typename S8,typename S9,typename S10, typename Op> + __global__ void for_each10_ker(S1 s1,S2 s2,S3 s3,S4 s4,S5 s5,S6 s6,S7 s7,S8 s8,S9 s9,S10 s10, Op op) + { + unsigned int p = threadIdx.x + blockIdx.x * blockDim.x; + + if (p >= s1.data.template get<0>().size()) {return;} + //converting to boost vector ids. + for_each_prop10<S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,unsigned int,Op> cp(s1,s2,s3,s4,s5,s6,s7,s8,s9,s10,p,op); + //creating an iterator on v_ids[0] [1] [2] + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype( s1.data)::max_prop>>(cp); + } + template<typename S1,typename S2,typename S3,typename S4,typename S5,typename S6,typename S7,typename S8,typename S9,typename S10,typename S11, typename Op> + __global__ void for_each11_ker(S1 s1,S2 s2,S3 s3,S4 s4,S5 s5,S6 s6,S7 s7,S8 s8,S9 s9,S10 s10,S11 s11, Op op) + { + unsigned int p = threadIdx.x + blockIdx.x * blockDim.x; + + if (p >= s1.data.template get<0>().size()) {return;} + //converting to boost vector ids. + for_each_prop11<S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,S11,unsigned int,Op> cp(s1,s2,s3,s4,s5,s6,s7,s8,s9,s10,s11,p,op); + //creating an iterator on v_ids[0] [1] [2] + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype( s1.data)::max_prop>>(cp); + } + template<typename S1,typename S2,typename S3,typename S4,typename S5,typename S6,typename S7,typename S8,typename S9,typename S10,typename S11,typename S12, typename Op> + __global__ void for_each12_ker(S1 s1,S2 s2,S3 s3,S4 s4,S5 s5,S6 s6,S7 s7,S8 s8,S9 s9,S10 s10,S11 s11,S12 s12, Op op) + { + unsigned int p = threadIdx.x + blockIdx.x * blockDim.x; + + if (p >= s1.data.template get<0>().size()) {return;} + //converting to boost vector ids. + for_each_prop12<S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,S11,S12,unsigned int,Op> cp(s1,s2,s3,s4,s5,s6,s7,s8,s9,s10,s11,s12,p,op); + //creating an iterator on v_ids[0] [1] [2] + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype( s1.data)::max_prop>>(cp); + } + template<typename S1,typename S2,typename S3,typename S4,typename S5,typename S6,typename S7,typename S8,typename S9,typename S10,typename S11,typename S12,typename S13, typename Op> + __global__ void for_each13_ker(S1 s1,S2 s2,S3 s3,S4 s4,S5 s5,S6 s6,S7 s7,S8 s8,S9 s9,S10 s10,S11 s11,S12 s12,S13 s13, Op op) + { + unsigned int p = threadIdx.x + blockIdx.x * blockDim.x; + + if (p >= s1.data.template get<0>().size()) {return;} + //converting to boost vector ids. + for_each_prop13<S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,S11,S12,S13,unsigned int,Op> cp(s1,s2,s3,s4,s5,s6,s7,s8,s9,s10,s11,s12,s13,p,op); + //creating an iterator on v_ids[0] [1] [2] + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype( s1.data)::max_prop>>(cp); + } + template<typename S1,typename S2,typename S3,typename S4,typename S5,typename S6,typename S7,typename S8,typename S9,typename S10,typename S11,typename S12,typename S13,typename S14, typename Op> + __global__ void for_each14_ker(S1 s1,S2 s2,S3 s3,S4 s4,S5 s5,S6 s6,S7 s7,S8 s8,S9 s9,S10 s10,S11 s11,S12 s12,S13 s13,S14 s14, Op op) + { + unsigned int p = threadIdx.x + blockIdx.x * blockDim.x; + + if (p >= s1.data.template get<0>().size()) {return;} + //converting to boost vector ids. + for_each_prop14<S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,S11,S12,S13,S14,unsigned int,Op> cp(s1,s2,s3,s4,s5,s6,s7,s8,s9,s10,s11,s12,s13,s14,p,op); + //creating an iterator on v_ids[0] [1] [2] + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype( s1.data)::max_prop>>(cp); + } + + + template<typename S1,typename S2,typename S3,typename S4,typename S5,typename S6,typename S7,typename S8,typename S9,typename S10,typename S11,typename S12,typename S13,typename S14,typename S15, typename Op> + __global__ void for_each15_ker(S1 s1,S2 s2,S3 s3,S4 s4,S5 s5,S6 s6,S7 s7,S8 s8,S9 s9,S10 s10,S11 s11,S12 s12,S13 s13,S14 s14,S15 s15, Op op) + { + unsigned int p = threadIdx.x + blockIdx.x * blockDim.x; + + if (p >= s1.data.template get<0>().size()) {return;} + //converting to boost vector ids. + for_each_prop15<S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,S11,S12,S13,S14,S15,unsigned int,Op> cp(s1,s2,s3,s4,s5,s6,s7,s8,s9,s10,s11,s12,s13,s14,s15,p,op); + //creating an iterator on v_ids[0] [1] [2] + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype( s1.data)::max_prop>>(cp); + } + + struct vector_space_algebra_ofp_gpu + { + + template< class S1 , class Op > + static void for_each1( S1 &s1 , Op op ) + { + // ToDo : build checks, that the +-*/ operators are well defined + auto it=s1.data.template get<0>().getVector().getDomainIteratorGPU(); + CUDA_LAUNCH((for_each1_ker),it,s1,op); + } + template< class S1 , class S2 , class Op > + static void for_each2( S1 &s1 , S2 &s2 , Op op ) + { + for_each_prop_resize<S1,S2> the_resize(s1,s2); + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype(s1.data)::max_prop>>(the_resize); + //s1.data.template get<0>().getVector().resize(s2.data.template get<0>().getVector().size()); + // ToDo : build checks, that the +-*/ operators are well defined + auto it=s1.data.template get<0>().getVector().getGPUIterator(); + + CUDA_LAUNCH((for_each2_ker),it,s1.toKernel(),s2.toKernel(),op); + } + + template< class S1 , class S2 , class S3 , class Op > + static void for_each3( S1 &s1 , S2 &s2 , S3 &s3 , Op op ) + { + for_each_prop_resize<S1,S2> the_resize(s1,s2); + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype( s1.data)::max_prop>>(the_resize); + // ToDo : build checks, that the +-*/ operators are well defined + auto it=s1.data.template get<0>().getVector().getGPUIterator(); + + CUDA_LAUNCH((for_each3_ker),it,s1.toKernel(),s2.toKernel(),s3.toKernel(),op); + + } + + template< class S1 , class S2 , class S3 , class S4 , class Op > + static void for_each4( S1 &s1 , S2 &s2 , S3 &s3 , S4 &s4 , Op op ) + { + for_each_prop_resize<S1,S2> the_resize(s1,s2); + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype(s1.data)::max_prop>>(the_resize); + // ToDo : build checks, that the +-*/ operators are well defined + auto it=s1.data.template get<0>().getVector().getGPUIterator(); + + CUDA_LAUNCH((for_each4_ker),it,s1.toKernel(),s2.toKernel(),s3.toKernel(),s4.toKernel(),op); + } + + + template< class S1 , class S2 , class S3 , class S4,class S5 , class Op > + static void for_each5( S1 &s1 , S2 &s2 , S3 &s3 , S4 &s4,S5 &s5 , Op op ) + { + for_each_prop_resize<S1,S2> the_resize(s1,s2); + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype(s1.data)::max_prop>>(the_resize); + // ToDo : build checks, that the +-*/ operators are well defined + auto it=s1.data.template get<0>().getVector().getGPUIterator(); + + CUDA_LAUNCH((for_each5_ker),it,s1.toKernel(),s2.toKernel(),s3.toKernel(),s4.toKernel(),s5.toKernel(),op); + } + + template< class S1 , class S2 , class S3 , class S4,class S5,class S6 , class Op > + static void for_each6( S1 &s1 , S2 &s2 , S3 &s3 , S4 &s4,S5 &s5,S6 &s6 , Op op ) + { + for_each_prop_resize<S1,S2> the_resize(s1,s2); + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype(s1.data)::max_prop>>(the_resize); + // ToDo : build checks, that the +-*/ operators are well defined + auto it=s1.data.template get<0>().getVector().getGPUIterator(); + + CUDA_LAUNCH((for_each6_ker),it,s1.toKernel(),s2.toKernel(),s3.toKernel(),s4.toKernel(),s5.toKernel(),s6.toKernel(),op); + } + + + template< class S1 , class S2 , class S3 , class S4,class S5,class S6 ,class S7, class Op > + static void for_each7( S1 &s1 , S2 &s2 , S3 &s3 , S4 &s4,S5 &s5,S6 &s6,S7 &s7 , Op op ) + { + for_each_prop_resize<S1,S2> the_resize(s1,s2); + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype(s1.data)::max_prop>>(the_resize); + // ToDo : build checks, that the +-*/ operators are well defined + auto it=s1.data.template get<0>().getVector().getGPUIterator(); + + CUDA_LAUNCH((for_each7_ker),it,s1.toKernel(),s2.toKernel(),s3.toKernel(),s4.toKernel(),s5.toKernel(),s6.toKernel(),s7.toKernel(),op); + } + + template< class S1 , class S2 , class S3 , class S4,class S5,class S6 ,class S7,class S8, class Op > + static void for_each8( S1 &s1 , S2 &s2 , S3 &s3 , S4 &s4,S5 &s5,S6 &s6,S7 &s7,S8 &s8 , Op op ) + { + for_each_prop_resize<S1,S2> the_resize(s1,s2); + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype(s1.data)::max_prop>>(the_resize); + // ToDo : build checks, that the +-*/ operators are well defined + auto it=s1.data.template get<0>().getVector().getGPUIterator(); + + CUDA_LAUNCH((for_each8_ker),it,s1.toKernel(),s2.toKernel(),s3.toKernel(),s4.toKernel(),s5.toKernel(),s6.toKernel(),s7.toKernel(),s8.toKernel(),op); + } + + template< class S1 , class S2 , class S3 , class S4,class S5,class S6 ,class S7,class S8, class S9, class Op > + static void for_each9( S1 &s1 , S2 &s2 , S3 &s3 , S4 &s4,S5 &s5,S6 &s6,S7 &s7,S8 &s8, S9 &s9 , Op op ) + { + for_each_prop_resize<S1,S2> the_resize(s1,s2); + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype(s1.data)::max_prop>>(the_resize); + // ToDo : build checks, that the +-*/ operators are well defined + auto it=s1.data.template get<0>().getVector().getGPUIterator(); + + CUDA_LAUNCH((for_each9_ker),it,s1.toKernel(),s2.toKernel(),s3.toKernel(),s4.toKernel(),s5.toKernel(),s6.toKernel(),s7.toKernel(),s8.toKernel(),s9.toKernel(),op); + } + + + template< class S1 , class S2 , class S3 , class S4,class S5,class S6 ,class S7,class S8, class S9, class S10, class Op > + static void for_each10( S1 &s1 , S2 &s2 , S3 &s3 , S4 &s4,S5 &s5,S6 &s6,S7 &s7,S8 &s8, S9 &s9 , S10 &s10, Op op ) + { + for_each_prop_resize<S1,S2> the_resize(s1,s2); + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype(s1.data)::max_prop>>(the_resize); + // ToDo : build checks, that the +-*/ operators are well defined + auto it=s1.data.template get<0>().getVector().getGPUIterator(); + + CUDA_LAUNCH((for_each10_ker),it,s1.toKernel(),s2.toKernel(),s3.toKernel(),s4.toKernel(),s5.toKernel(),s6.toKernel(),s7.toKernel(),s8.toKernel(),s9.toKernel(),s10.toKernel(),op); + + } + + + template< class S1 , class S2 , class S3 , class S4,class S5,class S6 ,class S7,class S8, class S9, class S10, class S11, class Op > + static void for_each11( S1 &s1 , S2 &s2 , S3 &s3 , S4 &s4,S5 &s5,S6 &s6,S7 &s7,S8 &s8, S9 &s9 , S10 &s10,S11 &s11, Op op ) + { + for_each_prop_resize<S1,S2> the_resize(s1,s2); + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype(s1.data)::max_prop>>(the_resize); + // ToDo : build checks, that the +-*/ operators are well defined + auto it=s1.data.template get<0>().getVector().getGPUIterator(); + + CUDA_LAUNCH((for_each11_ker),it,s1.toKernel(),s2.toKernel(),s3.toKernel(),s4.toKernel(),op); + } + + + template< class S1 , class S2 , class S3 , class S4,class S5,class S6 ,class S7,class S8, class S9, class S10, class S11, class S12, class Op > + static void for_each12( S1 &s1 , S2 &s2 , S3 &s3 , S4 &s4,S5 &s5,S6 &s6,S7 &s7,S8 &s8, S9 &s9 , S10 &s10,S11 &s11,S12 &s12, Op op ) + { + for_each_prop_resize<S1,S2> the_resize(s1,s2); + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype(s1.data)::max_prop>>(the_resize); + // ToDo : build checks, that the +-*/ operators are well defined + auto it=s1.data.template get<0>().getVector().getGPUIterator(); + + CUDA_LAUNCH((for_each12_ker),it,s1.toKernel(),s2.toKernel(),s3.toKernel(),s4.toKernel(),s5.toKernel(),s6.toKernel(),s7.toKernel(),s8.toKernel(),s9.toKernel(),s10.toKernel(),s11.toKernel(),s12.toKernel(),op); + } + + + template< class S1 , class S2 , class S3 , class S4,class S5,class S6 ,class S7,class S8, class S9, class S10, class S11, class S12, class S13, class Op > + static void for_each13( S1 &s1 , S2 &s2 , S3 &s3 , S4 &s4,S5 &s5,S6 &s6,S7 &s7,S8 &s8, S9 &s9 , S10 &s10,S11 &s11,S12 &s12,S13 &s13, Op op ) + { + for_each_prop_resize<S1,S2> the_resize(s1,s2); + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype(s1.data)::max_prop>>(the_resize); + // ToDo : build checks, that the +-*/ operators are well defined + auto it=s1.data.template get<0>().getVector().getGPUIterator(); + + CUDA_LAUNCH((for_each13_ker),it,s1.toKernel(),s2.toKernel(),s3.toKernel(),s4.toKernel(),s5.toKernel(),s6.toKernel(),s7.toKernel(),s8.toKernel(),s9.toKernel(),s10.toKernel(),s11.toKernel(),s12.toKernel(),s13.toKernel(),op); + } + + template< class S1 , class S2 , class S3 , class S4,class S5,class S6 ,class S7,class S8, class S9, class S10, class S11, class S12, class S13, class S14, class Op > + static void for_each14( S1 &s1 , S2 &s2 , S3 &s3 , S4 &s4,S5 &s5,S6 &s6,S7 &s7,S8 &s8, S9 &s9, S10 &s10,S11 &s11,S12 &s12,S13 &s13,S14 &s14, Op op ) + { + for_each_prop_resize<S1,S2> the_resize(s1,s2); + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype(s1.data)::max_prop>>(the_resize); + // ToDo : build checks, that the +-*/ operators are well defined + auto it=s1.data.template get<0>().getVector().getGPUIterator(); + + CUDA_LAUNCH((for_each14_ker),it,it,s1.toKernel(),s2.toKernel(),s3.toKernel(),s4.toKernel(),s5.toKernel(),s6.toKernel(),s7.toKernel(),s8.toKernel(),s9.toKernel(),s10.toKernel(),s11.toKernel(),s12.toKernel(),s13.toKernel(),s14.toKernel(),op); + } + + template< class S1 , class S2 , class S3 , class S4,class S5,class S6 ,class S7,class S8, class S9, class S10, class S11, class S12, class S13, class S14, class S15, class Op > + static void for_each15( S1 &s1 , S2 &s2 , S3 &s3 , S4 &s4,S5 &s5,S6 &s6,S7 &s7,S8 &s8, S9 &s9, S10 &s10,S11 &s11,S12 &s12,S13 &s13,S14 &s14,S15 &s15, Op op ) + { + for_each_prop_resize<S1,S2> the_resize(s1,s2); + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype(s1.data)::max_prop>>(the_resize); + auto it=s1.data.template get<0>().getVector().getGPUIterator(); + + CUDA_LAUNCH((for_each15_ker),it,s1.toKernel(),s2.toKernel(),s3.toKernel(),s4.toKernel(),s5.toKernel(),s6.toKernel(),s7.toKernel(),s8.toKernel(),s9.toKernel(),s10.toKernel(),s11.toKernel(),s12.toKernel(),s13.toKernel(),s14.toKernel(),s15.toKernel(),op); + } + + + + + + + template<typename vector_type,typename index_type,typename norm_result_type> + struct for_each_norm + { + const vector_type &v; + index_type &p; + norm_result_type &n; + /*! \brief constructor + * + * + * \param src source encapsulated object + * \param dst destination encapsulated object + * + */ + inline for_each_norm(const vector_type &v,index_type &p,norm_result_type &n) + :v(v),p(p),n(n) + {}; + //! It call the copy function for each property + template<typename T> + inline void operator()(T& t) const + { + if(fabs(v.data.template get<T::value>().getVector().template get<0>(p)) > n) + { + n=fabs(v.data.template get<T::value>().getVector().template get<0>(p)); + } + + } + }; + + template< class S > + static typename boost::numeric::odeint::vector_space_norm_inf< S >::result_type norm_inf( const S &s ) + { + typename boost::numeric::odeint::vector_space_norm_inf< S >::result_type n=0; + auto it=s.data.template get<0>().getVector().getIterator(); + while(it.isNext()){ + auto p=it.get(); + //converting to boost vector ids. + for_each_norm<S,size_t,typename boost::numeric::odeint::vector_space_norm_inf< S >::result_type> cp(s,p,n); + //creating an iterator on v_ids[0] [1] [2] + boost::mpl::for_each_ref<boost::mpl::range_c<int,0,decltype(s.data)::max_prop>>(cp); + + ++it; + } + auto &v_cl = create_vcluster(); + v_cl.max(n); + v_cl.execute(); + //std::max(); + //std::cout<<n<<std::endl; + return n; + } + }; + + +#include <algorithm> + +#include <boost/config.hpp> +#include <boost/array.hpp> +#include <boost/numeric/odeint/util/unit_helper.hpp> + + + + + +/* + * Notes: + * + * * the results structs are needed in order to work with fusion_algebra + */ +struct ofp_operations +{ + + template< class Fac1 = double > + struct scale + { + const Fac1 m_alpha1; + + scale( Fac1 alpha1 ) : m_alpha1( alpha1 ) { } + + template< class T1 > + __device__ __host__ void operator()( T1 &t1 ) const + { + t1 *= m_alpha1; + } + + typedef void result_type; + }; + + template< class Fac1 = double > + struct scale_sum1 + { + const Fac1 m_alpha1; + + scale_sum1( Fac1 alpha1 ) : m_alpha1( alpha1 ) { } + + template< class T1 , class T2 > + __device__ __host__ void operator()( T1 &t1 , const T2 &t2 ) const + { + t1 = m_alpha1 * t2; + } + + typedef void result_type; + }; + + + template< class Fac1 = double , class Fac2 = Fac1 > + struct scale_sum2 + { + const Fac1 m_alpha1; + const Fac2 m_alpha2; + + scale_sum2( Fac1 alpha1 , Fac2 alpha2 ) : m_alpha1( alpha1 ) , m_alpha2( alpha2 ) { } + + template< class T1 , class T2 , class T3 > + BOOST_FUSION_GPU_ENABLED + __device__ __host__ void operator()( T1 &t1 , const T2 &t2 , const T3 &t3) const + { + t1 = m_alpha1 * t2 + m_alpha2 * t3; + } + + typedef void result_type; + }; + + + template< class Fac1 = double , class Fac2 = Fac1 , class Fac3 = Fac2 > + struct scale_sum3 + { + const Fac1 m_alpha1; + const Fac2 m_alpha2; + const Fac3 m_alpha3; + + scale_sum3( Fac1 alpha1 , Fac2 alpha2 , Fac3 alpha3 ) + : m_alpha1( alpha1 ) , m_alpha2( alpha2 ) , m_alpha3( alpha3 ) { } + + template< class T1 , class T2 , class T3 , class T4 > + __device__ __host__ void operator()( T1 &t1 , const T2 &t2 , const T3 &t3 , const T4 &t4 ) const + { + t1 = m_alpha1 * t2 + m_alpha2 * t3 + m_alpha3 * t4; + } + + typedef void result_type; + }; + + + template< class Fac1 = double , class Fac2 = Fac1 , class Fac3 = Fac2 , class Fac4 = Fac3 > + struct scale_sum4 + { + const Fac1 m_alpha1; + const Fac2 m_alpha2; + const Fac3 m_alpha3; + const Fac4 m_alpha4; + + scale_sum4( Fac1 alpha1 , Fac2 alpha2 , Fac3 alpha3 , Fac4 alpha4 ) + : m_alpha1( alpha1 ) , m_alpha2( alpha2 ) , m_alpha3( alpha3 ) , m_alpha4( alpha4 ) { } + + template< class T1 , class T2 , class T3 , class T4 , class T5 > + __device__ __host__ void operator()( T1 &t1 , const T2 &t2 , const T3 &t3 , const T4 &t4 , const T5 &t5) const + { + t1 = m_alpha1 * t2 + m_alpha2 * t3 + m_alpha3 * t4 + m_alpha4 * t5; + } + + typedef void result_type; + }; + + + template< class Fac1 = double , class Fac2 = Fac1 , class Fac3 = Fac2 , class Fac4 = Fac3 , class Fac5 = Fac4 > + struct scale_sum5 + { + const Fac1 m_alpha1; + const Fac2 m_alpha2; + const Fac3 m_alpha3; + const Fac4 m_alpha4; + const Fac5 m_alpha5; + + scale_sum5( Fac1 alpha1 , Fac2 alpha2 , Fac3 alpha3 , Fac4 alpha4 , Fac5 alpha5 ) + : m_alpha1( alpha1 ) , m_alpha2( alpha2 ) , m_alpha3( alpha3 ) , m_alpha4( alpha4 ) , m_alpha5( alpha5 ) { } + + template< class T1 , class T2 , class T3 , class T4 , class T5 , class T6 > + __device__ __host__ void operator()( T1 &t1 , const T2 &t2 , const T3 &t3 , const T4 &t4 , const T5 &t5 , const T6 &t6) const + { + t1 = m_alpha1 * t2 + m_alpha2 * t3 + m_alpha3 * t4 + m_alpha4 * t5 + m_alpha5 * t6; + } + + typedef void result_type; + }; + + + template< class Fac1 = double , class Fac2 = Fac1 , class Fac3 = Fac2 , class Fac4 = Fac3 , class Fac5 = Fac4 , class Fac6 = Fac5 > + struct scale_sum6 + { + const Fac1 m_alpha1; + const Fac2 m_alpha2; + const Fac3 m_alpha3; + const Fac4 m_alpha4; + const Fac5 m_alpha5; + const Fac6 m_alpha6; + + scale_sum6( Fac1 alpha1 , Fac2 alpha2 , Fac3 alpha3 , Fac4 alpha4 , Fac5 alpha5 , Fac6 alpha6 ) + : m_alpha1( alpha1 ) , m_alpha2( alpha2 ) , m_alpha3( alpha3 ) , m_alpha4( alpha4 ) , m_alpha5( alpha5 ) , m_alpha6( alpha6 ){ } + + template< class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 > + __device__ __host__ void operator()( T1 &t1 , const T2 &t2 , const T3 &t3 , const T4 &t4 , const T5 &t5 , const T6 &t6 ,const T7 &t7) const + { + t1 = m_alpha1 * t2 + m_alpha2 * t3 + m_alpha3 * t4 + m_alpha4 * t5 + m_alpha5 * t6 + m_alpha6 * t7; + } + + typedef void result_type; + }; + + + template< class Fac1 = double , class Fac2 = Fac1 , class Fac3 = Fac2 , class Fac4 = Fac3 , class Fac5 = Fac4 , class Fac6 = Fac5 , class Fac7 = Fac6 > + struct scale_sum7 + { + const Fac1 m_alpha1; + const Fac2 m_alpha2; + const Fac3 m_alpha3; + const Fac4 m_alpha4; + const Fac5 m_alpha5; + const Fac6 m_alpha6; + const Fac7 m_alpha7; + + scale_sum7( Fac1 alpha1 , Fac2 alpha2 , Fac3 alpha3 , Fac4 alpha4 , + Fac5 alpha5 , Fac6 alpha6 , Fac7 alpha7 ) + : m_alpha1( alpha1 ) , m_alpha2( alpha2 ) , m_alpha3( alpha3 ) , m_alpha4( alpha4 ) , m_alpha5( alpha5 ) , m_alpha6( alpha6 ) , m_alpha7( alpha7 ) { } + + template< class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 > + __device__ __host__ void operator()( T1 &t1 , const T2 &t2 , const T3 &t3 , const T4 &t4 , const T5 &t5 , const T6 &t6 , const T7 &t7 , const T8 &t8 ) const + { + t1 = m_alpha1 * t2 + m_alpha2 * t3 + m_alpha3 * t4 + m_alpha4 * t5 + m_alpha5 * t6 + m_alpha6 * t7 + m_alpha7 * t8; + } + + typedef void result_type; + }; + + + template< class Fac1 = double , class Fac2 = Fac1 , class Fac3 = Fac2 , class Fac4 = Fac3 , class Fac5 = Fac4 , class Fac6 = Fac5 , class Fac7 = Fac6 , class Fac8 = Fac7 > + struct scale_sum8 + { + const Fac1 m_alpha1; + const Fac2 m_alpha2; + const Fac3 m_alpha3; + const Fac4 m_alpha4; + const Fac5 m_alpha5; + const Fac6 m_alpha6; + const Fac7 m_alpha7; + const Fac8 m_alpha8; + + scale_sum8( Fac1 alpha1 , Fac2 alpha2 , Fac3 alpha3 , Fac4 alpha4 , + Fac5 alpha5 , Fac6 alpha6 , Fac7 alpha7 , Fac8 alpha8 ) + : m_alpha1( alpha1 ) , m_alpha2( alpha2 ) , m_alpha3( alpha3 ) , m_alpha4( alpha4 ) , m_alpha5( alpha5 ) , m_alpha6( alpha6 ) , m_alpha7( alpha7 ) , m_alpha8( alpha8 ) { } + + template< class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 > + __device__ __host__ void operator()( T1 &t1 , const T2 &t2 , const T3 &t3 , const T4 &t4 , const T5 &t5 , const T6 &t6 , const T7 &t7 , const T8 &t8 , const T9 &t9 ) const + { + t1 = m_alpha1 * t2 + m_alpha2 * t3 + m_alpha3 * t4 + m_alpha4 * t5 + m_alpha5 * t6 + m_alpha6 * t7 + m_alpha7 * t8 + m_alpha8 * t9; + } + + typedef void result_type; + }; + + template< class Fac1 = double , class Fac2 = Fac1 , class Fac3 = Fac2 , class Fac4 = Fac3 , class Fac5 = Fac4 , class Fac6 = Fac5 , class Fac7 = Fac6 , class Fac8 = Fac7 , class Fac9 = Fac8 > + struct scale_sum9 + { + const Fac1 m_alpha1; + const Fac2 m_alpha2; + const Fac3 m_alpha3; + const Fac4 m_alpha4; + const Fac5 m_alpha5; + const Fac6 m_alpha6; + const Fac7 m_alpha7; + const Fac8 m_alpha8; + const Fac9 m_alpha9; + + scale_sum9( Fac1 alpha1 , Fac2 alpha2 , Fac3 alpha3 , Fac4 alpha4 , + Fac5 alpha5 , Fac6 alpha6 , Fac7 alpha7 , Fac8 alpha8 , Fac9 alpha9 ) + : m_alpha1( alpha1 ) , m_alpha2( alpha2 ) , m_alpha3( alpha3 ) , m_alpha4( alpha4 ) , m_alpha5( alpha5 ) , m_alpha6( alpha6 ) , m_alpha7( alpha7 ) , m_alpha8( alpha8 ) , m_alpha9( alpha9 ) { } + + template< class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 > + __device__ __host__ void operator()( T1 &t1 , const T2 &t2 , const T3 &t3 , const T4 &t4 , const T5 &t5 , const T6 &t6 , const T7 &t7 , const T8 &t8 , const T9 &t9 , const T10 &t10 ) const + { + t1 = m_alpha1 * t2 + m_alpha2 * t3 + m_alpha3 * t4 + m_alpha4 * t5 + m_alpha5 * t6 + m_alpha6 * t7 + m_alpha7 * t8 + m_alpha8 * t9 + m_alpha9 * t10; + } + + typedef void result_type; + }; + + template< class Fac1 = double , class Fac2 = Fac1 , class Fac3 = Fac2 , class Fac4 = Fac3 , class Fac5 = Fac4 , class Fac6 = Fac5 , class Fac7 = Fac6 , class Fac8 = Fac7 , class Fac9 = Fac8 , class Fac10 = Fac9 > + struct scale_sum10 + { + const Fac1 m_alpha1; + const Fac2 m_alpha2; + const Fac3 m_alpha3; + const Fac4 m_alpha4; + const Fac5 m_alpha5; + const Fac6 m_alpha6; + const Fac7 m_alpha7; + const Fac8 m_alpha8; + const Fac9 m_alpha9; + const Fac10 m_alpha10; + + scale_sum10( Fac1 alpha1 , Fac2 alpha2 , Fac3 alpha3 , Fac4 alpha4 , + Fac5 alpha5 , Fac6 alpha6 , Fac7 alpha7 , Fac8 alpha8 , Fac9 alpha9 , Fac10 alpha10 ) + : m_alpha1( alpha1 ) , m_alpha2( alpha2 ) , m_alpha3( alpha3 ) , m_alpha4( alpha4 ) , m_alpha5( alpha5 ) , m_alpha6( alpha6 ) , m_alpha7( alpha7 ) , m_alpha8( alpha8 ) , m_alpha9( alpha9 ) , m_alpha10( alpha10 ) { } + + template< class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 > + __device__ __host__ void operator()( T1 &t1 , const T2 &t2 , const T3 &t3 , const T4 &t4 , const T5 &t5 , const T6 &t6 , const T7 &t7 , const T8 &t8 , const T9 &t9 , const T10 &t10 , const T11 &t11 ) const + { + t1 = m_alpha1 * t2 + m_alpha2 * t3 + m_alpha3 * t4 + m_alpha4 * t5 + m_alpha5 * t6 + m_alpha6 * t7 + m_alpha7 * t8 + m_alpha8 * t9 + m_alpha9 * t10 + m_alpha10 * t11; + } + + typedef void result_type; + }; + + + template< class Fac1 = double , class Fac2 = Fac1 , class Fac3 = Fac2 , class Fac4 = Fac3 , class Fac5 = Fac4 , class Fac6 = Fac5 , class Fac7 = Fac6 , class Fac8 = Fac7 , class Fac9 = Fac8 , class Fac10 = Fac9 , class Fac11 = Fac10 > + struct scale_sum11 + { + const Fac1 m_alpha1; + const Fac2 m_alpha2; + const Fac3 m_alpha3; + const Fac4 m_alpha4; + const Fac5 m_alpha5; + const Fac6 m_alpha6; + const Fac7 m_alpha7; + const Fac8 m_alpha8; + const Fac9 m_alpha9; + const Fac10 m_alpha10; + const Fac11 m_alpha11; + + scale_sum11( Fac1 alpha1 , Fac2 alpha2 , Fac3 alpha3 , Fac4 alpha4 , + Fac5 alpha5 , Fac6 alpha6 , Fac7 alpha7 , Fac8 alpha8 , Fac9 alpha9 , + Fac10 alpha10 , Fac11 alpha11 ) + : m_alpha1( alpha1 ) , m_alpha2( alpha2 ) , m_alpha3( alpha3 ) , m_alpha4( alpha4 ) , m_alpha5( alpha5 ) , m_alpha6( alpha6 ) , m_alpha7( alpha7 ) , m_alpha8( alpha8 ) , m_alpha9( alpha9 ) , m_alpha10( alpha10 ) , m_alpha11( alpha11 ) { } + + template< class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 > + __device__ __host__ void operator()( T1 &t1 , const T2 &t2 , const T3 &t3 , const T4 &t4 , const T5 &t5 , const T6 &t6 , const T7 &t7 , const T8 &t8 , const T9 &t9 , const T10 &t10 , const T11 &t11 , const T12 &t12 ) const + { + t1 = m_alpha1 * t2 + m_alpha2 * t3 + m_alpha3 * t4 + m_alpha4 * t5 + m_alpha5 * t6 + m_alpha6 * t7 + m_alpha7 * t8 + m_alpha8 * t9 + m_alpha9 * t10 + m_alpha10 * t11 + m_alpha11 * t12; + } + + typedef void result_type; + }; + + template< class Fac1 = double , class Fac2 = Fac1 , class Fac3 = Fac2 , class Fac4 = Fac3 , class Fac5 = Fac4 , class Fac6 = Fac5 , class Fac7 = Fac6 , class Fac8 = Fac7 , class Fac9 = Fac8 , class Fac10 = Fac9 , class Fac11 = Fac10 , class Fac12 = Fac11 > + struct scale_sum12 + { + const Fac1 m_alpha1; + const Fac2 m_alpha2; + const Fac3 m_alpha3; + const Fac4 m_alpha4; + const Fac5 m_alpha5; + const Fac6 m_alpha6; + const Fac7 m_alpha7; + const Fac8 m_alpha8; + const Fac9 m_alpha9; + const Fac10 m_alpha10; + const Fac11 m_alpha11; + const Fac12 m_alpha12; + + scale_sum12( Fac1 alpha1 , Fac2 alpha2 , Fac3 alpha3 , Fac4 alpha4 , + Fac5 alpha5 , Fac6 alpha6 , Fac7 alpha7 , Fac8 alpha8 , Fac9 alpha9 , + Fac10 alpha10 , Fac11 alpha11 , Fac12 alpha12 ) + : m_alpha1( alpha1 ) , m_alpha2( alpha2 ) , m_alpha3( alpha3 ) , m_alpha4( alpha4 ) , m_alpha5( alpha5 ) , m_alpha6( alpha6 ) , m_alpha7( alpha7 ) , m_alpha8( alpha8 ) , m_alpha9( alpha9 ) , m_alpha10( alpha10 ) , m_alpha11( alpha11 ) , m_alpha12( alpha12 ) { } + + template< class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 > + __device__ __host__ void operator()( T1 &t1 , const T2 &t2 , const T3 &t3 , const T4 &t4 , const T5 &t5 , const T6 &t6 , const T7 &t7 , const T8 &t8 , const T9 &t9 , const T10 &t10 , const T11 &t11 , const T12 &t12 , const T13 &t13 ) const + { + t1 = m_alpha1 * t2 + m_alpha2 * t3 + m_alpha3 * t4 + m_alpha4 * t5 + m_alpha5 * t6 + m_alpha6 * t7 + m_alpha7 * t8 + m_alpha8 * t9 + m_alpha9 * t10 + m_alpha10 * t11 + m_alpha11 * t12 + m_alpha12 * t13; + } + + typedef void result_type; + }; + + template< class Fac1 = double , class Fac2 = Fac1 , class Fac3 = Fac2 , class Fac4 = Fac3 , class Fac5 = Fac4 , class Fac6 = Fac5 , class Fac7 = Fac6 , class Fac8 = Fac7 , class Fac9 = Fac8 , class Fac10 = Fac9 , class Fac11 = Fac10 , class Fac12 = Fac11 , class Fac13 = Fac12 > + struct scale_sum13 + { + const Fac1 m_alpha1; + const Fac2 m_alpha2; + const Fac3 m_alpha3; + const Fac4 m_alpha4; + const Fac5 m_alpha5; + const Fac6 m_alpha6; + const Fac7 m_alpha7; + const Fac8 m_alpha8; + const Fac9 m_alpha9; + const Fac10 m_alpha10; + const Fac11 m_alpha11; + const Fac12 m_alpha12; + const Fac13 m_alpha13; + + scale_sum13( Fac1 alpha1 , Fac2 alpha2 , Fac3 alpha3 , Fac4 alpha4 , + Fac5 alpha5 , Fac6 alpha6 , Fac7 alpha7 , Fac8 alpha8 , Fac9 alpha9 , + Fac10 alpha10 , Fac11 alpha11 , Fac12 alpha12 , Fac13 alpha13 ) + : m_alpha1( alpha1 ) , m_alpha2( alpha2 ) , m_alpha3( alpha3 ) , m_alpha4( alpha4 ) , m_alpha5( alpha5 ) , m_alpha6( alpha6 ) , m_alpha7( alpha7 ) , m_alpha8( alpha8 ) , m_alpha9( alpha9 ) , m_alpha10( alpha10 ) , m_alpha11( alpha11 ) , m_alpha12( alpha12 ) , m_alpha13( alpha13 ) { } + + template< class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 > + __device__ __host__ void operator()( T1 &t1 , const T2 &t2 , const T3 &t3 , const T4 &t4 , const T5 &t5 , const T6 &t6 , const T7 &t7 , const T8 &t8 , const T9 &t9 , const T10 &t10 , const T11 &t11 , const T12 &t12 , const T13 &t13 , const T14 &t14 ) const + { + t1 = m_alpha1 * t2 + m_alpha2 * t3 + m_alpha3 * t4 + m_alpha4 * t5 + m_alpha5 * t6 + m_alpha6 * t7 + m_alpha7 * t8 + m_alpha8 * t9 + m_alpha9 * t10 + m_alpha10 * t11 + m_alpha11 * t12 + m_alpha12 * t13 + m_alpha13 * t14; + } + + typedef void result_type; + }; + + template< class Fac1 = double , class Fac2 = Fac1 , class Fac3 = Fac2 , class Fac4 = Fac3 , class Fac5 = Fac4 , class Fac6 = Fac5 , class Fac7 = Fac6 , class Fac8 = Fac7 , class Fac9 = Fac8 , class Fac10 = Fac9 , class Fac11 = Fac10 , class Fac12 = Fac11 , class Fac13 = Fac12 , class Fac14 = Fac13 > + struct scale_sum14 + { + const Fac1 m_alpha1; + const Fac2 m_alpha2; + const Fac3 m_alpha3; + const Fac4 m_alpha4; + const Fac5 m_alpha5; + const Fac6 m_alpha6; + const Fac7 m_alpha7; + const Fac8 m_alpha8; + const Fac9 m_alpha9; + const Fac10 m_alpha10; + const Fac11 m_alpha11; + const Fac12 m_alpha12; + const Fac13 m_alpha13; + const Fac14 m_alpha14; + + scale_sum14( Fac1 alpha1 , Fac2 alpha2 , Fac3 alpha3 , Fac4 alpha4 , + Fac5 alpha5 , Fac6 alpha6 , Fac7 alpha7 , Fac8 alpha8 , Fac9 alpha9 , + Fac10 alpha10 , Fac11 alpha11 , Fac12 alpha12 , Fac13 alpha13 , Fac14 alpha14 ) + : m_alpha1( alpha1 ) , m_alpha2( alpha2 ) , m_alpha3( alpha3 ) , m_alpha4( alpha4 ) , m_alpha5( alpha5 ) , m_alpha6( alpha6 ) , m_alpha7( alpha7 ) , m_alpha8( alpha8 ) , m_alpha9( alpha9 ) , m_alpha10( alpha10 ) , m_alpha11( alpha11 ) , m_alpha12( alpha12 ) , m_alpha13( alpha13 ) , m_alpha14( alpha14 ) { } + + template< class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 > + __device__ __host__ void operator()( T1 &t1 , const T2 &t2 , const T3 &t3 , const T4 &t4 , const T5 &t5 , const T6 &t6 , const T7 &t7 , const T8 &t8 , const T9 &t9 , const T10 &t10 , const T11 &t11 , const T12 &t12 , const T13 &t13 , const T14 &t14 , const T15 &t15 ) const + { + t1 = m_alpha1 * t2 + m_alpha2 * t3 + m_alpha3 * t4 + m_alpha4 * t5 + m_alpha5 * t6 + m_alpha6 * t7 + m_alpha7 * t8 + m_alpha8 * t9 + m_alpha9 * t10 + m_alpha10 * t11 + m_alpha11 * t12 + m_alpha12 * t13 + m_alpha13 * t14 + m_alpha14 * t15; + } + + typedef void result_type; + }; + + template< class Fac1 = double , class Fac2 = Fac1 > + struct scale_sum_swap2 + { + const Fac1 m_alpha1; + const Fac2 m_alpha2; + + scale_sum_swap2( Fac1 alpha1 , Fac2 alpha2 ) : m_alpha1( alpha1 ) , m_alpha2( alpha2 ) { } + + template< class T1 , class T2 , class T3 > + __device__ __host__ void operator()( T1 &t1 , T2 &t2 , const T3 &t3) const + { + const T1 tmp( t1 ); + t1 = m_alpha1 * t2 + m_alpha2 * t3; + t2 = tmp; + } + + typedef void result_type; + }; + + /* + * for usage in for_each2 + * + * Works with boost::units by eliminating the unit + */ + template< class Fac1 = double > + struct rel_error + { + const Fac1 m_eps_abs , m_eps_rel , m_a_x , m_a_dxdt; + + rel_error( Fac1 eps_abs , Fac1 eps_rel , Fac1 a_x , Fac1 a_dxdt ) + : m_eps_abs( eps_abs ) , m_eps_rel( eps_rel ) , m_a_x( a_x ) , m_a_dxdt( a_dxdt ) { } + + + template< class T1 , class T2 , class T3 > + __device__ __host__ void operator()( T3 &t3 , const T1 &t1 , const T2 &t2 ) const + { + using std::abs; + set_unit_value( t3 , abs( get_unit_value( t3 ) ) / ( m_eps_abs + m_eps_rel * ( m_a_x * abs( get_unit_value( t1 ) ) + m_a_dxdt * abs( get_unit_value( t2 ) ) ) ) ); + } + + typedef void result_type; + }; + + + /* + * for usage in for_each3 + * + * used in the controller for the rosenbrock4 method + * + * Works with boost::units by eliminating the unit + */ + template< class Fac1 = double > + struct default_rel_error + { + const Fac1 m_eps_abs , m_eps_rel ; + + default_rel_error( Fac1 eps_abs , Fac1 eps_rel ) + : m_eps_abs( eps_abs ) , m_eps_rel( eps_rel ) { } + + + /* + * xerr = xerr / ( eps_abs + eps_rel * max( x , x_old ) ) + */ + template< class T1 , class T2 , class T3 > + __device__ __host__ void operator()( T3 &t3 , const T1 &t1 , const T2 &t2 ) const + { + BOOST_USING_STD_MAX(); + using std::abs; + Fac1 x1 = abs( get_unit_value( t1 ) ) , x2 = abs( get_unit_value( t2 ) ); + set_unit_value( t3 , abs( get_unit_value( t3 ) ) / ( m_eps_abs + m_eps_rel * max BOOST_PREVENT_MACRO_SUBSTITUTION ( x1 , x2 ) ) ); + } + + typedef void result_type; + }; + + + + /* + * for usage in reduce + */ + + template< class Value > + struct maximum + { + template< class Fac1 , class Fac2 > + __device__ __host__ Value operator()( Fac1 t1 , const Fac2 t2 ) const + { + using std::abs; + Value a1 = abs( get_unit_value( t1 ) ) , a2 = abs( get_unit_value( t2 ) ); + return ( a1 < a2 ) ? a2 : a1 ; + } + + typedef Value result_type; + }; + + + template< class Fac1 = double > + struct rel_error_max + { + const Fac1 m_eps_abs , m_eps_rel; + + rel_error_max( Fac1 eps_abs , Fac1 eps_rel ) + : m_eps_abs( eps_abs ) , m_eps_rel( eps_rel ) + { } + + template< class Res , class T1 , class T2 , class T3 > +__device__ __host__ Res operator()( Res r , const T1 &x_old , const T2 &x , const T3 &x_err ) + { + BOOST_USING_STD_MAX(); + using std::abs; + Res tmp = abs( get_unit_value( x_err ) ) / ( m_eps_abs + m_eps_rel * max BOOST_PREVENT_MACRO_SUBSTITUTION ( abs( x_old ) , abs( x ) ) ); + return max BOOST_PREVENT_MACRO_SUBSTITUTION ( r , tmp ); + } + }; + + + template< class Fac1 = double > + struct rel_error_max2 + { + const Fac1 m_eps_abs , m_eps_rel , m_a_x , m_a_dxdt; + + rel_error_max2( Fac1 eps_abs , Fac1 eps_rel , Fac1 a_x , Fac1 a_dxdt ) + : m_eps_abs( eps_abs ) , m_eps_rel( eps_rel ) , m_a_x( a_x ) , m_a_dxdt( a_dxdt ) + { } + + template< class Res , class T1 , class T2 , class T3 , class T4 > +__device__ __host__ Res operator()( Res r , const T1 &x_old , const T2 &/*x*/ , const T3 &dxdt_old , const T4 &x_err ) + { + BOOST_USING_STD_MAX(); + using std::abs; + Res tmp = abs( get_unit_value( x_err ) ) / + ( m_eps_abs + m_eps_rel * ( m_a_x * abs( get_unit_value( x_old ) ) + m_a_dxdt * abs( get_unit_value( dxdt_old ) ) ) ); + return max BOOST_PREVENT_MACRO_SUBSTITUTION ( r , tmp ); + } + }; + + + + + template< class Fac1 = double > + struct rel_error_l2 + { + const Fac1 m_eps_abs , m_eps_rel; + + rel_error_l2( Fac1 eps_abs , Fac1 eps_rel ) + : m_eps_abs( eps_abs ) , m_eps_rel( eps_rel ) + { } + + template< class Res , class T1 , class T2 , class T3 > +__device__ __host__ Res operator()( Res r , const T1 &x_old , const T2 &x , const T3 &x_err ) + { + BOOST_USING_STD_MAX(); + using std::abs; + Res tmp = abs( get_unit_value( x_err ) ) / ( m_eps_abs + m_eps_rel * max BOOST_PREVENT_MACRO_SUBSTITUTION ( abs( x_old ) , abs( x ) ) ); + return r + tmp * tmp; + } + }; + + + + + template< class Fac1 = double > + struct rel_error_l2_2 + { + const Fac1 m_eps_abs , m_eps_rel , m_a_x , m_a_dxdt; + + rel_error_l2_2( Fac1 eps_abs , Fac1 eps_rel , Fac1 a_x , Fac1 a_dxdt ) + : m_eps_abs( eps_abs ) , m_eps_rel( eps_rel ) , m_a_x( a_x ) , m_a_dxdt( a_dxdt ) + { } + + template< class Res , class T1 , class T2 , class T3 , class T4 > +__device__ __host__ Res operator()( Res r , const T1 &x_old , const T2 &/*x*/ , const T3 &dxdt_old , const T4 &x_err ) + { + using std::abs; + Res tmp = abs( get_unit_value( x_err ) ) / + ( m_eps_abs + m_eps_rel * ( m_a_x * abs( get_unit_value( x_old ) ) + m_a_dxdt * abs( get_unit_value( dxdt_old ) ) ) ); + return r + tmp * tmp; + } + }; + + +}; + + + } // odeint +} // numeric +} // boost + + +#endif //OPENFPM_PDATA_VECTOR_ALGEBRA_OFP_HPP diff --git a/src/Operators/Vector/cuda/vector_dist_operators_cuda.cuh b/src/Operators/Vector/cuda/vector_dist_operators_cuda.cuh index 0060b134..cce632d6 100644 --- a/src/Operators/Vector/cuda/vector_dist_operators_cuda.cuh +++ b/src/Operators/Vector/cuda/vector_dist_operators_cuda.cuh @@ -26,11 +26,11 @@ struct SubsetSelector_impl<true> { template<typename particle_type,typename subset_type> static void check(particle_type &particles,subset_type &particle_subset){ - - if(particles.getMapCtr()!=particle_subset.getUpdateCtr()) + //This getMapCtr needs to be created or fixed for cuda! + /* if(particles.getMapCtr()!=particle_subset.getUpdateCtr()) { std::cerr<<__FILE__<<":"<<__LINE__<<" Error: You forgot a subset update after map."<<std::endl; - } + }*/ } }; #endif @@ -80,7 +80,6 @@ struct pos_or_propL_ker }; - /*! \brief selector for position or properties left side * * \tparam vector type of the original vector @@ -428,7 +427,7 @@ struct vector_dist_op_compute_op<prp,false,comp_host> } }; - +#define NVCC #ifdef __NVCC__ template<unsigned int prp, unsigned int dim ,typename vector, typename expr> diff --git a/src/Operators/Vector/vector_dist_operators.hpp b/src/Operators/Vector/vector_dist_operators.hpp index cb1eac3b..25905116 100644 --- a/src/Operators/Vector/vector_dist_operators.hpp +++ b/src/Operators/Vector/vector_dist_operators.hpp @@ -9,6 +9,7 @@ #define OPENFPM_NUMERICS_SRC_OPERATORS_VECTOR_VECTOR_DIST_OPERATORS_HPP_ #include "Vector/vector_dist.hpp" +#include "Vector/vector_dist_subset.hpp" #include "lib/pdata.hpp" #include "cuda/vector_dist_operators_cuda.cuh" @@ -781,7 +782,7 @@ public: //! return the result of the expression - template<typename r_type=typename std::remove_reference<decltype(-(o1.value(vect_dist_key_dx(0))))>::type > + template<typename r_type=typename std::remove_reference<decltype(-(o1.value(vect_dist_key_dx(0))))>::type > __device__ __host__ inline r_type value(const vect_dist_key_dx & key) const { return -(o1.value(key)); @@ -823,6 +824,27 @@ struct vector_dist_expression_comp_sel<comp_dev,false> typedef boost::mpl::int_<-1> type; }; +/*! \brief Expression implementation computation selector + * + */ +template<bool cond> +struct vector_dist_expression_comp_proxy_sel +{ + template<bool cond_, typename v_type, typename exp_type> + static void compute(v_type &v,exp_type &v_exp) + { vector_dist_op_compute_op<0,false,vector_dist_expression_comp_sel<comp_dev,cond_>::type::value> + ::compute_expr(v,v_exp);} +}; +template<> +struct vector_dist_expression_comp_proxy_sel<false> +{ + template<bool cond, typename v_type, typename exp_type> + static void compute(v_type &v, exp_type &v_exp) + { auto v_ker=v.toKernel(); + vector_dist_op_compute_op<0,false,vector_dist_expression_comp_sel<comp_dev,cond>::type::value> + ::compute_expr(v_ker,v_exp);} +}; + template<typename vector, bool is_ker = has_vector_kernel<vector>::type::value> struct vector_expression_transform { @@ -1040,15 +1062,37 @@ public: * \return itself * */ - template<typename T> vector & operator=(const vector_dist_expression<0,openfpm::vector<aggregate<T>>> & v_exp) + template<typename T,typename memory,template <typename> class layout_base > vector & operator=(const vector_dist_expression<0,openfpm::vector<aggregate<T>, memory, layout_base>> & v_exp) { - vector_dist_op_compute_op<prp,false,vector_dist_expression_comp_sel<comp_host, - has_vector_kernel<vector>::type::value>::type::value> - ::compute_expr(v.v,v_exp); + //vector_dist_op_compute_op<prp,false,vector_dist_expression_comp_sel<comp_host,has_vector_kernel<vector>::type::value>::type::value> + //::compute_expr(v.v,v_exp); + vector_dist_op_compute_op<prp,false,vector_dist_expression_comp_sel<comp_host, + has_vector_kernel<vector>::type::value>::type::value> + ::compute_expr(v.v,v_exp); + return v.v; } + /*! \brief Fill the vector property with the evaluated expression + * + * \param v_exp expression to evaluate + * + * \return itself + * + */ + template<typename T> vector & operator=(const vector_dist_expression<0,openfpm::vector_gpu<aggregate<T>>> & v_exp) + { + vector_dist_op_compute_op<prp,false,vector_dist_expression_comp_sel<comp_dev, + has_vector_kernel<vector>::type::value>::type::value> + ::compute_expr(v.v,v_exp.getVector().toKernel()); + //constexpr bool cond=has_vector_kernel<vector>::type::value || std::is_same<vector,openfpm::vector<aggregate<T>,CudaMemory,memory_traits_inte>>::value; + //vector_dist_expression_comp_proxy_sel<!std::is_same<vector,openfpm::vector<aggregate<T>,CudaMemory,memory_traits_inte>>::value>::template compute<cond>(v.v,v_exp); + + + return v.v; + } + /*! \brief Fill the vector property with the evaluated expression * * \param v_exp expression to evaluate @@ -1139,45 +1183,45 @@ public: * \tparam vector involved * */ -template<typename T> -class vector_dist_expression<0,openfpm::vector<aggregate<T>> > +template<typename vector_type> +class vector_dist_expression_impl { - //! Internal vector - typedef openfpm::vector<aggregate<T>> vector; - - //! The temporal vector - mutable vector v; + //! Internal vector + typedef vector_type vector; + typedef typename boost::mpl::at<typename vector_type::value_type::type,boost::mpl::int_<0>>::type T; + //! The temporal vector + mutable vector v; public: typedef T * iterator; typedef const T * const_iterator; - typedef typename has_vector_kernel<vector>::type is_ker; + typedef typename has_vector_kernel<vector>::type is_ker; - //! The type of the internal vector - typedef vector vtype; + //! The type of the internal vector + typedef vector vtype; //! The type of the internal value typedef T value_type; //! result for is sort - typedef boost::mpl::bool_<false> is_sort; + typedef boost::mpl::bool_<false> is_sort; - //! NN_type - typedef void NN_type; + //! NN_type + typedef void NN_type; - //! Property id of the point - static const unsigned int prop = 0; + //! Property id of the point + static const unsigned int prop = 0; - int var_id = 0; + int var_id = 0; - void setVarId(int var_id) - { - this->var_id = var_id; - } + void setVarId(int var_id) + { + this->var_id = var_id; + } - ///////// BOOST ODEINT interface + ///////// BOOST ODEINT interface iterator begin() { return &v.template get<0>(0); } @@ -1193,11 +1237,11 @@ public: size_t size() const { return v.size(); } - void resize(size_t n) + void resize(size_t n) { - // Here + // Here - v.resize(n); + v.resize(n); } /* T * begin() { @@ -1217,161 +1261,171 @@ public: //{ return m_v[n]; } - //////////////////////////////////// + //////////////////////////////////// - vector_dist_expression() - {} + vector_dist_expression_impl() + {} - template<typename exp1, typename exp2, unsigned int op> - vector_dist_expression(const vector_dist_expression_op<exp1,exp2,op> & v_exp) - { - this->operator=(v_exp); - } + template<unsigned int prp2, typename vector2> + vector_dist_expression_impl(const vector_dist_expression<prp2,vector2> & v_exp) + { + this->operator=(v_exp); + }; - /*! \brief get the NN object - * - * \return the NN object - * - */ - inline void * getNN() const - { - return NULL; - } + template<typename exp1, typename exp2, unsigned int op> + vector_dist_expression_impl(const vector_dist_expression_op<exp1,exp2,op> & v_exp) + { + this->operator=(v_exp); + } - /*! \brief Return the vector on which is acting - * - * It return the vector used in getVExpr, to get this object - * - * \return the vector - * - */ - __device__ __host__ const vector & getVector() const - { - return v; - } + /*! \brief get the NN object + * + * \return the NN object + * + */ + inline void * getNN() const + { + return NULL; + } - /*! \brief Return the vector on which is acting - * - * It return the vector used in getVExpr, to get this object - * - * \return the vector - * - */ - __device__ __host__ vector & getVector() - { - return v; - } + /*! \brief Return the vector on which is acting + * + * It return the vector used in getVExpr, to get this object + * + * \return the vector + * + */ + __device__ __host__ const vector & getVector() const + { + return v; + } - /*! \brief This function must be called before value - * - * it initialize the expression if needed - * - */ - inline void init() const - {} + /*! \brief Return the vector on which is acting + * + * It return the vector used in getVExpr, to get this object + * + * \return the vector + * + */ + __device__ __host__ vector & getVector() + { + return v; + } - /*! \brief Evaluate the expression - * - * \param k where to evaluate the expression - * - * \return the result of the expression - * - */ - __host__ inline auto value(const vect_dist_key_dx & k) const -> decltype(v.template get<0>(k.getKey())) - { - return v.template get<0>(k.getKey()); - } + /*! \brief This function must be called before value + * + * it initialize the expression if needed + * + */ + inline void init() const + {} + + /*! \brief Evaluate the expression + * + * \param k where to evaluate the expression + * + * \return the result of the expression + * + */ + __host__ __device__ inline auto value(const vect_dist_key_dx & k) const -> decltype(v.template get<0>(k.getKey())) + { + return v.template get<0>(k.getKey()); + } - /*! \brief Fill the vector property with the evaluated expression - * - * \param v_exp expression to evaluate - * - * \return itself - * - */ - template<unsigned int prp2, typename vector2> vector & operator=(const vector_dist_expression<prp2,vector2> & v_exp) - { + /*! \brief Fill the vector property with the evaluated expression + * + * \param v_exp expression to evaluate + * + * \return itself + * + */ + template<unsigned int prp2, typename vector2> vector & operator=(const vector_dist_expression<prp2,vector2> & v_exp) + { if (v_exp.getVector().isSubset() == true) { - std::cout << __FILE__ << ":" << __LINE__ << " error on the right hand side of the expression you have to use non-subset properties" << std::endl; - return v; + std::cout << __FILE__ << ":" << __LINE__ << " error on the right hand side of the expression you have to use non-subset properties" << std::endl; + return v; } v.resize(v_exp.getVector().size_local()); + constexpr bool cond=has_vector_kernel<vector>::type::value || std::is_same<vector,openfpm::vector<aggregate<T>,CudaMemory,memory_traits_inte>>::value; + //std::cout<<cond<<std::endl; + //std::cout<< (vector_dist_expression_comp_sel<comp_host,has_vector_kernel<vector>::type::value>::type::value || std::is_same<vector,openfpm::vector<aggregate<T>,CudaMemory,memory_traits_inte>>::value)<<std::endl; + //std::cout<<(vector_dist_expression_comp_sel<2, + // has_vector_kernel<vector>::type::value>::type::value || std::is_same<vector,openfpm::vector<aggregate<T>,CudaMemory,memory_traits_inte>>::value)<<std::endl; + //std::cout<<has_vector_kernel<vector>::type::value<<std::endl; + //std::cout<<vector_dist_expression_comp_sel<2,false>::type::value<<std::endl; + //std::cout<<!std::is_same<vector,openfpm::vector<aggregate<T>,CudaMemory,memory_traits_inte>>::value<<std::endl; + if (has_vector_kernel<vector>::type::value == false && !std::is_same<vector,openfpm::vector<aggregate<T>,CudaMemory,memory_traits_inte>>::value) + { + vector_dist_op_compute_op<0,false,vector_dist_expression_comp_sel<comp_host,cond>::type::value> + ::compute_expr(v,v_exp); + } + else + { + vector_dist_expression_comp_proxy_sel<!std::is_same<vector,openfpm::vector<aggregate<T>,CudaMemory,memory_traits_inte>>::value>::template compute<cond>(v,v_exp); + } - if (has_vector_kernel<vector>::type::value == false) - { - vector_dist_op_compute_op<0,false,vector_dist_expression_comp_sel<comp_host, - has_vector_kernel<vector>::type::value>::type::value> - ::compute_expr(v,v_exp); - } - else - { - vector_dist_op_compute_op<0,false,vector_dist_expression_comp_sel<comp_dev, - has_vector_kernel<vector>::type::value>::type::value> - ::compute_expr(v,v_exp); - } - - return v; - } + return v; + } - /*! \brief Fill the vector property with the evaluated expression - * - * \param v_exp expression to evaluate - * - * \return itself - * - */ - template<typename exp1, typename exp2, unsigned int op> - vector & operator=(const vector_dist_expression_op<exp1,exp2,op> & v_exp) - { + /*! \brief Fill the vector property with the evaluated expression + * + * \param v_exp expression to evaluate + * + * \return itself + * + */ + template<typename exp1, typename exp2, unsigned int op> + vector & operator=(const vector_dist_expression_op<exp1,exp2,op> & v_exp) + { if (v_exp.getVector().isSubset() == true) { - std::cout << __FILE__ << ":" << __LINE__ << " error on the right hand side of the expression you have to use non-subset properties" << std::endl; + std::cout << __FILE__ << ":" << __LINE__ << " error on the right hand side of the expression you have to use non-subset properties" << std::endl; return v; } v.resize(v_exp.getVector().size_local()); - if (has_vector_kernel<vector>::type::value == false) - { - vector_dist_op_compute_op<0, - vector_dist_expression_op<exp1,exp2,op>::is_sort::value, - vector_dist_expression_comp_sel<comp_host, - has_vector_kernel<vector>::type::value>::type::value> - ::compute_expr(v,v_exp); - } - else - { - vector_dist_op_compute_op<0, - vector_dist_expression_op<exp1,exp2,op>::is_sort::value, - vector_dist_expression_comp_sel<comp_dev, - has_vector_kernel<vector>::type::value>::type::value> - ::compute_expr(v,v_exp); - } + if (has_vector_kernel<vector>::type::value == false) + { + vector_dist_op_compute_op<0, + vector_dist_expression_op<exp1,exp2,op>::is_sort::value, + vector_dist_expression_comp_sel<comp_host, + has_vector_kernel<vector>::type::value>::type::value> + ::compute_expr(v,v_exp); + } + else + { + vector_dist_op_compute_op<0, + vector_dist_expression_op<exp1,exp2,op>::is_sort::value, + vector_dist_expression_comp_sel<comp_dev, + has_vector_kernel<vector>::type::value>::type::value> + ::compute_expr(v,v_exp); + } - return v; - } + return v; + } - /*! \brief Fill the vector property with the double - * - * \param d value to fill - * - * \return the internal vector - * - */ - vector & operator=(double d) - { - std::cout << __FILE__ << ":" << __LINE__ << " Error: temporal with constants is unsupported" << std::endl; - } + /*! \brief Fill the vector property with the double + * + * \param d value to fill + * + * \return the internal vector + * + */ + vector & operator=(double d) + { + std::cout << __FILE__ << ":" << __LINE__ << " Error: temporal with constants is unsupported" << std::endl; + } template<typename Sys_eqs, typename pmap_type, typename unordered_map_type, typename coeff_type> inline void value_nz(pmap_type & p_map, const vect_dist_key_dx & key, unordered_map_type & cols, coeff_type & coeff, unsigned int comp) const { - std::cout << __FILE__ << ":" << __LINE__ << " Error: use of temporal is not supported to construct equations"; + std::cout << __FILE__ << ":" << __LINE__ << " Error: use of temporal is not supported to construct equations"; } inline vector_dist_expression_op<vector_dist_expression<0,vector>,boost::mpl::int_<1>,VECT_COMP> operator[](int comp) @@ -1386,8 +1440,89 @@ public: } }; +/*! \brief Sub class that encapsulate a vector properties operand to be used for expressions construction + * Temporal Expressions + * \tparam prp property involved + * \tparam vector involved + * + */ +template<typename T, typename memory,template <typename> class layout_base > +class vector_dist_expression<0,openfpm::vector<aggregate<T>,memory, layout_base> > : public vector_dist_expression_impl<openfpm::vector<aggregate<T>,memory, layout_base>> +{ + typedef openfpm::vector<aggregate<T>,memory, layout_base> vector; + typedef vector_dist_expression_impl<vector> base; + +public: + vector_dist_expression() + { + } + + template<unsigned int prp2, typename vector2> + vector_dist_expression(const vector_dist_expression<prp2,vector2> & v_exp) + :base(v_exp) + { + } + + template<typename exp1, typename exp2, unsigned int op> + vector_dist_expression(const vector_dist_expression_op<exp1,exp2,op> & v_exp) + : base(v_exp) + { + } + + template<unsigned int prp2, typename vector2> vector & operator=(const vector_dist_expression<prp2,vector2> & v_exp) + { + return base::operator=(v_exp); + } + template<typename exp1, typename exp2, unsigned int op> + vector & operator=(const vector_dist_expression_op<exp1,exp2,op> & v_exp) + { + return base::operator=(v_exp); + } +}; + +/*! \brief Sub class that encapsulate a GPU vector properties operand to be used for expressions construction + * Temporal Expressions + * \tparam prp property involved + * \tparam vector involved + * + */ +template<typename T> +class vector_dist_expression<0,openfpm::vector_gpu<aggregate<T>>> : public vector_dist_expression_impl<openfpm::vector_gpu<aggregate<T>>> +{ + typedef openfpm::vector_gpu<aggregate<T>> vector; + typedef vector_dist_expression_impl<vector> base; +public: + vector_dist_expression() + { + } + + template<unsigned int prp2, typename vector2> + vector_dist_expression(const vector_dist_expression<prp2,vector2> & v_exp) + :base(v_exp) + { + } + + template<typename exp1, typename exp2, unsigned int op> + vector_dist_expression(const vector_dist_expression_op<exp1,exp2,op> & v_exp) + : base(v_exp) + { + } + + template<unsigned int prp2, typename vector2> vector & operator=(const vector_dist_expression<prp2,vector2> & v_exp) + { + return base::operator=(v_exp); + } + template<typename exp1, typename exp2, unsigned int op> + vector & operator=(const vector_dist_expression_op<exp1,exp2,op> & v_exp) + { + return base::operator=(v_exp); + } + +}; + +template<typename T> using texp_v = vector_dist_expression<0,openfpm::vector<aggregate<T>>>; +template<typename T> using texp_v_gpu = vector_dist_expression<0,openfpm::vector_gpu<aggregate<T>>>; -template<typename T> using texp_v = vector_dist_expression<0,openfpm::vector<aggregate<T>> >; template<typename vector, unsigned int impl> struct switcher_get_v @@ -1416,7 +1551,97 @@ struct switcher_get_v<vector,comp_dev> } }; -/*! \brief it take an expression and create the negatove of this expression +/*template<unsigned int, bool is_valid> +struct get_vector_dist_expression_op +{ + template<typename exp_type> + inline static auto get(exp_type & o1, const vect_dist_key_dx & key) -> decltype(o1.value(vect_dist_key_dx(0))) + { + return o1.value(key); + } + + template<unsigned int prop, typename exp_type, typename vector_type> + inline static void assign(exp_type & o1, vector_type & v, const vect_dist_key_dx & key, const vect_dist_key_dx & key_orig) + { + pos_or_propL<vector_type,exp_type::prop>::value(v,key) = o1.value(key_orig); + } + + template<unsigned int prop, typename vector_type> + inline static void assign_double(double d, vector_type & v, const vect_dist_key_dx & key) + { + pos_or_propL<vector_type,prop>::value(v,key) = d; + } +}; + +template<> +struct get_vector_dist_expression_op<1,false> +{ + template<typename exp_type> + static int get(exp_type & o1, const vect_dist_key_dx & key, const int (& comp)[1]) + { + printf("ERROR: Slicer, the expression is incorrect, please check it\n"); + return 0; + } + + template<unsigned int prop, typename exp_type, typename vector_type> + inline static void assign(exp_type & o1, vector_type & v, const vect_dist_key_dx & key) + { + printf("ERROR: Slicer, the expression is incorrect, please check it\n"); + } + + template<unsigned int prop, typename vector_type> + inline static void assign_double(double d, vector_type & v, const vect_dist_key_dx & key) + { + printf("ERROR: Slicer, the expression is incorrect, please check it\n"); + } +}; + +template<> +struct get_vector_dist_expression_op<1,true> +{ + template<typename exp_type> + static auto get(exp_type & o1, const vect_dist_key_dx & key, const int (& comp)[1]) -> decltype(o1.value(vect_dist_key_dx(0))[0]) + { + return o1.value(key)[comp[0]]; + } + + template<unsigned int prop,typename exp_type, typename vector_type> + inline static void assign(exp_type & o1, vector_type & v, const vect_dist_key_dx & key, const vect_dist_key_dx & key_orig, const int (& comp)[1]) + { + pos_or_propL<vector_type,prop>::value(v,key)[comp[0]] = o1.value(key_orig); + } + + template<unsigned int prop, typename vector_type> + inline static void assign_double(double d, vector_type & v, const vect_dist_key_dx & key, const int (& comp)[1]) + { + pos_or_propL<vector_type,prop>::value(v,key)[comp[0]] = d; + } +}; + +template<> +struct get_vector_dist_expression_op<2,true> +{ + template<typename exp_type> + static auto get(exp_type & o1, const vect_dist_key_dx & key, const int (& comp)[2]) -> decltype(o1.value(vect_dist_key_dx(0))[0][0]) + { + return o1.value(key)[comp[0]][comp[1]]; + } + + template<unsigned int prop,typename exp_type, typename vector_type> + inline static void assign(exp_type & o1, vector_type & v, const vect_dist_key_dx & key, const vect_dist_key_dx & key_orig, const int (& comp)[2]) + { + pos_or_propL<vector_type,prop>::value(v,key)[comp[0]][comp[1]] = o1.value(key_orig); + } + + template<unsigned int prop, typename vector_type> + inline static void assign_double(double d, vector_type & v, const vect_dist_key_dx & key, const int (& comp)[2]) + { + pos_or_propL<vector_type,prop>::value(v,key)[comp[0]][comp[1]] = d; + } +};*/ + + +/*! \brief it take an expression and take the component * * */ @@ -1572,7 +1797,7 @@ public: * \return itself * */ - template<typename T> vtype & operator=(const vector_dist_expression<0,openfpm::vector<aggregate<T>>> & v_exp) + template<typename T, typename memory> vtype & operator=(const vector_dist_expression<0,openfpm::vector<aggregate<T>,memory>> & v_exp) { v_exp.init(); @@ -1949,9 +2174,9 @@ operator+(const vector_dist_expression<prp1,v1> & va, double d) * \return an object that encapsulate the expression * */ -template<unsigned int prp1 , typename v1> +template<typename T, unsigned int prp1, typename v1, typename sfinae = typename std::enable_if<std::is_same<T,float>::value>::type > inline vector_dist_expression_op<vector_dist_expression<prp1,v1>,vector_dist_expression<0,float>,VECT_SUM> -operator+(const vector_dist_expression<prp1,v1> & va, float d) +operator+(const vector_dist_expression<prp1,v1> & va, T d) { vector_dist_expression_op<vector_dist_expression<prp1,v1>,vector_dist_expression<0,float>,VECT_SUM> exp_sum(va,vector_dist_expression<0,float>(d)); @@ -2017,9 +2242,9 @@ operator+(const vector_dist_expression_op<exp1,exp2,op1> & va, double d) * \return an object that encapsulate the expression * */ -template<typename exp1 , typename exp2, unsigned int op1> +template<typename T, typename exp1 , typename exp2, unsigned int op1, typename sfinae = typename std::enable_if<std::is_same<T,float>::value>::type > inline vector_dist_expression_op<vector_dist_expression_op<exp1,exp2,op1>,vector_dist_expression<0,float>,VECT_SUM> -operator+(const vector_dist_expression_op<exp1,exp2,op1> & va, float d) +operator+(const vector_dist_expression_op<exp1,exp2,op1> & va, T d) { vector_dist_expression_op<vector_dist_expression_op<exp1,exp2,op1>,vector_dist_expression<0,float>,VECT_SUM> exp_sum(va,vector_dist_expression<0,float>(d)); @@ -2153,9 +2378,10 @@ operator-(const vector_dist_expression<prp1,v1> & va, double d) * \return an object that encapsulate the expression * */ -template<unsigned int prp1, typename v1> +//template<unsigned int prp1, typename v1> +template<typename T, unsigned int prp1,typename v1, typename sfinae = typename std::enable_if<std::is_same<T,float>::value>::type > inline vector_dist_expression_op<vector_dist_expression<prp1,v1>,vector_dist_expression<0,float>,VECT_SUB> -operator-(const vector_dist_expression<prp1,v1> & va, float d) +operator-(const vector_dist_expression<prp1,v1> & va, T d) { vector_dist_expression_op<vector_dist_expression<prp1,v1>,vector_dist_expression<0,float>,VECT_SUB> exp_sum(va,vector_dist_expression<0,float>(d)); @@ -2187,9 +2413,9 @@ operator-(double d, const vector_dist_expression<prp1,v1> & vb) * \return an object that encapsulate the expression * */ -template<unsigned int prp1, typename v1> +template<typename T, unsigned int prp1,typename v1, typename sfinae = typename std::enable_if<std::is_same<T,float>::value>::type > inline vector_dist_expression_op<vector_dist_expression<0,float>,vector_dist_expression<prp1,v1>,VECT_SUB> -operator-(float d, const vector_dist_expression<prp1,v1> & vb) +operator-(T d, const vector_dist_expression<prp1,v1> & vb) { vector_dist_expression_op<vector_dist_expression<0,float>,vector_dist_expression<prp1,v1>,VECT_SUB> exp_sum(vector_dist_expression<0,float>(d),vb); @@ -2221,9 +2447,9 @@ operator*(double d, const vector_dist_expression<p2,v2> & vb) * \return an object that encapsulate the expression * */ -template<unsigned int p2, typename v2> +template<typename T, unsigned int p2,typename v2, typename sfinae = typename std::enable_if<std::is_same<T,float>::value>::type > inline vector_dist_expression_op<vector_dist_expression<0,float>,vector_dist_expression<p2,v2>,VECT_MUL> -operator*(float d, const vector_dist_expression<p2,v2> & vb) +operator*(T d, const vector_dist_expression<p2,v2> & vb) { vector_dist_expression_op<vector_dist_expression<0,float>,vector_dist_expression<p2,v2>,VECT_MUL> exp_sum(vector_dist_expression<0,float>(d),vb); @@ -2255,9 +2481,9 @@ operator*(const vector_dist_expression<p2,v2> & va, double d) * \return an object that encapsulate the expression * */ -template<unsigned int p2, typename v2> +template<typename T, unsigned int p2,typename v2, typename sfinae = typename std::enable_if<std::is_same<T,float>::value>::type > inline vector_dist_expression_op<vector_dist_expression<p2,v2>,vector_dist_expression<0,float>,VECT_MUL> -operator*(const vector_dist_expression<p2,v2> & va, float d) +operator*(const vector_dist_expression<p2,v2> & va, T d) { vector_dist_expression_op<vector_dist_expression<p2,v2>,vector_dist_expression<0,float>,VECT_MUL> exp_sum(va,vector_dist_expression<0,float>(d)); @@ -2357,9 +2583,9 @@ operator*(const vector_dist_expression_op<exp1,exp2,op1> & va, double d) * \return an object that encapsulate the expression * */ -template<typename exp1 , typename exp2, unsigned int op1> +template<typename T, typename exp1 , typename exp2, unsigned int op1, typename sfinae = typename std::enable_if<std::is_same<T,float>::value>::type > inline vector_dist_expression_op<vector_dist_expression_op<exp1,exp2,op1>,vector_dist_expression<0,float>,VECT_MUL> -operator*(const vector_dist_expression_op<exp1,exp2,op1> & va, float d) +operator*(const vector_dist_expression_op<exp1,exp2,op1> & va, T d) { vector_dist_expression_op<vector_dist_expression_op<exp1,exp2,op1>,vector_dist_expression<0,float>,VECT_MUL> exp_sum(va,vector_dist_expression<0,float>(d)); @@ -2391,9 +2617,9 @@ operator*(double d, const vector_dist_expression_op<exp1,exp2,op1> & vb) * \return an object that encapsulate the expression * */ -template<typename exp1 , typename exp2, unsigned int op1> +template<typename T, typename exp1 , typename exp2, unsigned int op1, typename sfinae = typename std::enable_if<std::is_same<T,float>::value>::type > inline vector_dist_expression_op<vector_dist_expression<0,float>,vector_dist_expression_op<exp1,exp2,op1>,VECT_MUL> -operator*(float d, const vector_dist_expression_op<exp1,exp2,op1> & vb) +operator*(T d, const vector_dist_expression_op<exp1,exp2,op1> & vb) { vector_dist_expression_op<vector_dist_expression<0,float>,vector_dist_expression_op<exp1,exp2,op1>,VECT_MUL> exp_sum(vector_dist_expression<0,float>(d),vb); @@ -2425,9 +2651,9 @@ operator/(const vector_dist_expression_op<exp1,exp2,op1> & va, double d) * \return an object that encapsulate the expression * */ -template<typename exp1, typename exp2, unsigned int op1> +template<typename T, typename exp1 , typename exp2, unsigned int op1, typename sfinae = typename std::enable_if<std::is_same<T,float>::value>::type > inline vector_dist_expression_op<vector_dist_expression_op<exp1,exp2,op1>,vector_dist_expression<0,float>,VECT_DIV> -operator/(const vector_dist_expression_op<exp1,exp2,op1> & va, float d) +operator/(const vector_dist_expression_op<exp1,exp2,op1> & va, T d) { vector_dist_expression_op<vector_dist_expression_op<exp1,exp2,op1>,vector_dist_expression<0,float>,VECT_DIV> exp_sum(va,vector_dist_expression<0,float>(d)); @@ -2459,9 +2685,9 @@ operator/(double d, const vector_dist_expression_op<exp1,exp2,op1> & va) * \return an object that encapsulate the expression * */ -template<typename exp1, typename exp2, unsigned int op1> +template<typename T, typename exp1 , typename exp2, unsigned int op1, typename sfinae = typename std::enable_if<std::is_same<T,float>::value>::type > inline vector_dist_expression_op<vector_dist_expression_op<exp1,exp2,op1>,vector_dist_expression<0,float>,VECT_DIV> -operator/(float d, const vector_dist_expression_op<exp1,exp2,op1> & va) +operator/(T d, const vector_dist_expression_op<exp1,exp2,op1> & va) { vector_dist_expression_op<vector_dist_expression_op<exp1,exp2,op1>,vector_dist_expression<0,float>,VECT_DIV> exp_sum(vector_dist_expression<0,float>(d),va); @@ -2493,9 +2719,9 @@ operator/(const vector_dist_expression<prp1,v1> & va, double d) * \return an object that encapsulate the expression * */ -template<unsigned int prp1, typename v1> +template<typename T, unsigned int prp1,typename v1, typename sfinae = typename std::enable_if<std::is_same<T,float>::value>::type > inline vector_dist_expression_op<vector_dist_expression<prp1,v1>,vector_dist_expression<0,float>,VECT_DIV> -operator/(const vector_dist_expression<prp1,v1> & va, float d) +operator/(const vector_dist_expression<prp1,v1> & va, T d) { vector_dist_expression_op<vector_dist_expression<prp1,v1>,vector_dist_expression<0,float>,VECT_DIV> exp_sum(va,vector_dist_expression<0,float>(d)); @@ -2527,9 +2753,9 @@ operator/(double d, const vector_dist_expression<prp1,v1> & va) * \return an object that encapsulate the expression * */ -template<unsigned int prp1, typename v1> +template<typename T, unsigned int prp1,typename v1, typename sfinae = typename std::enable_if<std::is_same<T,float>::value>::type > inline vector_dist_expression_op<vector_dist_expression<0,float>,vector_dist_expression<prp1,v1>,VECT_DIV> -operator/(float d, const vector_dist_expression<prp1,v1> & va) +operator/(T d, const vector_dist_expression<prp1,v1> & va) { vector_dist_expression_op<vector_dist_expression<0,float>,vector_dist_expression<prp1,v1>,VECT_DIV> exp_sum(vector_dist_expression<0,float>(d),va); diff --git a/src/Solvers/petsc_solver.hpp b/src/Solvers/petsc_solver.hpp index 74c60035..6bb051f3 100644 --- a/src/Solvers/petsc_solver.hpp +++ b/src/Solvers/petsc_solver.hpp @@ -653,12 +653,12 @@ class petsc_solver<double> PETSC_SAFE_CALL(MatGetLocalSize(A_,&row_loc,&col_loc)); // We set the Matrix operators - PETSC_SAFE_CALL(KSPSetOperators(ksp,A_,A_)); + PETSC_SAFE_CALL(KSPSetOperators(ksp,A_,A_)); - // if we are on on best solve set-up a monitor function + // if we are on on best solve set-up a monitor function - PETSC_SAFE_CALL(KSPSetFromOptions(ksp)); - PETSC_SAFE_CALL(KSPSetUp(ksp)); + PETSC_SAFE_CALL(KSPSetFromOptions(ksp)); + //PETSC_SAFE_CALL(KSPSetUp(ksp)); // Solve the system PETSC_SAFE_CALL(KSPSolve(ksp,b_,x_)); @@ -1329,6 +1329,122 @@ public: return x; } + /*! \brief Here we invert the matrix and solve the system + * + * \param A sparse matrix + * \param b vector + * \param x solution and initial guess + * + * \return true if succeed + * + */ + Vector<double,PETSC_BASE> solve(SparseMatrix<double,int,PETSC_BASE> & A, Vector<double,PETSC_BASE> & x, const Vector<double,PETSC_BASE> & b) + { + Mat & A_ = A.getMat(); + const Vec & b_ = b.getVec(); + Vec & x_ = x.getVec(); + + /* // We set the size of x according to the Matrix A + PetscInt row; + PetscInt col; + PetscInt row_loc; + PetscInt col_loc;*/ + + PETSC_SAFE_CALL(KSPSetInitialGuessNonzero(ksp,PETSC_TRUE)); + + /*PETSC_SAFE_CALL(MatGetSize(A_,&row,&col)); + PETSC_SAFE_CALL(MatGetLocalSize(A_,&row_loc,&col_loc));*/ + + pre_solve_impl(A_,b_,x_); + solve_simple(A_,b_,x_); + + x.update(); + + return x; + + /*pre_solve_impl(A_,b_,x_); + solve_simple(A_,b_,x_); + x.update(); + + return true;*/ + } + + /*! \brief Here we invert the matrix and solve the system with previous operator + * + * \param A sparse matrix + * \param b vector + * \param x solution and initial guess + * + * \return true if succeed + * + */ + Vector<double,PETSC_BASE> solve_successive(const Vector<double,PETSC_BASE> & b,bool initial_guess = false) + { + const Vec & b_ = b.getVec(); + // We set the size of x according to the Matrix A + PetscInt row; + PetscInt row_loc; + + PETSC_SAFE_CALL(KSPSetInitialGuessNonzero(ksp,PETSC_FALSE)); + + PETSC_SAFE_CALL(VecGetSize(b_,&row)); + PETSC_SAFE_CALL(VecGetLocalSize(b_,&row_loc)); + Vector<double,PETSC_BASE> x(row,row_loc); + Vec & x_ = x.getVec(); + PETSC_SAFE_CALL(KSPSetNormType(ksp,KSP_NORM_UNPRECONDITIONED)); + solve_simple(b_,x_); + + x.update(); + + return x; + + /*pre_solve_impl(A_,b_,x_); + solve_simple(A_,b_,x_); + x.update(); + + return true;*/ + } + + /*! \brief Here we invert the matrix and solve the system with previous operator and initial guess + * + * \param A sparse matrix + * \param b vector + * \param x solution and initial guess + * + * \return true if succeed + * + */ + Vector<double,PETSC_BASE> solve_successive(Vector<double,PETSC_BASE> & x, const Vector<double,PETSC_BASE> & b) + { + const Vec & b_ = b.getVec(); + Vec & x_ = x.getVec(); + + /* // We set the size of x according to the Matrix A + PetscInt row; + PetscInt col; + PetscInt row_loc; + PetscInt col_loc;*/ + + PETSC_SAFE_CALL(KSPSetInitialGuessNonzero(ksp,PETSC_TRUE)); + + /*PETSC_SAFE_CALL(MatGetSize(A_,&row,&col)); + PETSC_SAFE_CALL(MatGetLocalSize(A_,&row_loc,&col_loc));*/ + + PETSC_SAFE_CALL(KSPSetNormType(ksp,KSP_NORM_UNPRECONDITIONED)); + + solve_simple(b_,x_); + + x.update(); + + return x; + + /*pre_solve_impl(A_,b_,x_); + solve_simple(A_,b_,x_); + x.update(); + + return true;*/ + } + /*! \brief Here we invert the matrix and solve the system using a Nullspace for Neumann BC * * \warning umfpack is not a parallel solver, this function work only with one processor @@ -1344,7 +1460,7 @@ public: * \return the solution * */ - Vector<double,PETSC_BASE> with_constant_nullspace_solve(SparseMatrix<double,int,PETSC_BASE> & A, const Vector<double,PETSC_BASE> & b, bool initial_guess = false) + Vector<double,PETSC_BASE> with_nullspace_solve(SparseMatrix<double,int,PETSC_BASE> & A, const Vector<double,PETSC_BASE> & b, bool initial_guess = false,bool symmetric = false) { Mat & A_ = A.getMat(); const Vec & b_ = b.getVec(); @@ -1354,9 +1470,6 @@ public: PetscInt col; PetscInt row_loc; PetscInt col_loc; - MatNullSpace nullspace; - - PETSC_SAFE_CALL(KSPSetInitialGuessNonzero(ksp,PETSC_FALSE)); PETSC_SAFE_CALL(MatGetSize(A_,&row,&col)); PETSC_SAFE_CALL(MatGetLocalSize(A_,&row_loc,&col_loc)); @@ -1365,14 +1478,48 @@ public: Vector<double,PETSC_BASE> x(row,row_loc); Vec & x_ = x.getVec(); - //Removing Null Space from RHS - PETSC_SAFE_CALL(MatNullSpaceCreate(PETSC_COMM_WORLD,PETSC_TRUE,0,0,&nullspace)); - PETSC_SAFE_CALL(MatNullSpaceRemove(nullspace,b_)); - PETSC_SAFE_CALL(MatNullSpaceDestroy(&nullspace)); + PETSC_SAFE_CALL(KSPSetFromOptions(ksp)); + PETSC_SAFE_CALL(KSPSetOperators(ksp,A_,A_)); + PETSC_SAFE_CALL(KSPSolve(ksp,b_,x_)); - pre_solve_impl(A_,b_,x_); - solve_simple(A_,b_,x_); + Mat F, work, V; + PetscInt N, rows; + + /* Determine factorability */ + PETSC_SAFE_CALL(MatGetFactor(A_, MATSOLVERMUMPS, MAT_FACTOR_LU, &F)); + PETSC_SAFE_CALL(MatGetLocalSize(A_, &rows, NULL)); + + /* Set MUMPS options, see MUMPS documentation for more information */ + PETSC_SAFE_CALL(MatMumpsSetIcntl(F, 24, 1)); + PETSC_SAFE_CALL(MatMumpsSetIcntl(F, 25, 1)); + /* Perform factorization */ + PETSC_SAFE_CALL(MatLUFactorSymbolic(F, A_, NULL, NULL, NULL)); + PETSC_SAFE_CALL(MatLUFactorNumeric(F, A_, NULL)); + + /* This is the dimension of the null space */ + PETSC_SAFE_CALL(MatMumpsGetInfog(F, 28, &N)); + /* This will contain the null space in the columns */ + PETSC_SAFE_CALL(MatCreateDense(PETSC_COMM_WORLD, rows, N, PETSC_DETERMINE, PETSC_DETERMINE, NULL, &V)); + PETSC_SAFE_CALL(MatDuplicate(V, MAT_DO_NOT_COPY_VALUES, &work)); + PETSC_SAFE_CALL(MatMatSolve(F, work, V)); + + std::cout<<"Dimension:" << N; + Vec nvec[N]; + for(int i=0;i<N;i++) + { + PETSC_SAFE_CALL(MatGetColumnVector(V,nvec[i],i)); + } + MatNullSpace nullspace; + + PETSC_SAFE_CALL(MatNullSpaceCreate(PETSC_COMM_WORLD,PETSC_TRUE,N,nvec,&nullspace)); + PETSC_SAFE_CALL(MatSetTransposeNullSpace(A_,nullspace)); + PETSC_SAFE_CALL(MatSetNullSpace(A_,nullspace)); + PETSC_SAFE_CALL(MatNullSpaceDestroy(&nullspace)); + + PETSC_SAFE_CALL(KSPSetOperators(ksp,A_,A_)); + PETSC_SAFE_CALL(KSPSetFromOptions(ksp)); + PETSC_SAFE_CALL(KSPSolve(ksp,b_,x_)); x.update(); return x; @@ -1419,30 +1566,6 @@ public: return getSolNormError(b.getVec(),x.getVec(),ksp); } - /*! \brief Here we invert the matrix and solve the system - * - * \param A sparse matrix - * \param b vector - * \param x solution and initial guess - * - * \return true if succeed - * - */ - bool solve(SparseMatrix<double,int,PETSC_BASE> & A, Vector<double,PETSC_BASE> & x, const Vector<double,PETSC_BASE> & b) - { - Mat & A_ = A.getMat(); - const Vec & b_ = b.getVec(); - Vec & x_ = x.getVec(); - - PETSC_SAFE_CALL(KSPSetInitialGuessNonzero(ksp,PETSC_TRUE)); - - pre_solve_impl(A_,b_,x_); - solve_simple(A_,b_,x_); - x.update(); - - return true; - } - /*! \brief Here we invert the matrix and solve the system * * \param b vector diff --git a/src/interpolation/interpolation_unit_tests.cpp b/src/interpolation/interpolation_unit_tests.cpp index 10844c4d..27038520 100644 --- a/src/interpolation/interpolation_unit_tests.cpp +++ b/src/interpolation/interpolation_unit_tests.cpp @@ -17,6 +17,8 @@ #include "interpolation.hpp" #include <boost/math/special_functions/pow.hpp> #include <Vector/vector_dist.hpp> +#include <Operators/Vector/vector_dist_operators.hpp> +#include <FiniteDifference/FD_op.hpp> #include <Grid/grid_dist_id.hpp> BOOST_AUTO_TEST_SUITE( interpolation_test ) @@ -801,6 +803,447 @@ BOOST_AUTO_TEST_CASE( int_kernel_test ) BOOST_REQUIRE_SMALL(tot,0.001f); } +BOOST_AUTO_TEST_CASE( int_kernel_test_double) +{ + mp4_kernel<double> mp4; + + double tot = 0.0; + + // Check momenta 0 + + tot += mp4.value(-1.3,0); + tot += mp4.value(-0.3,1); + tot += mp4.value(0.7,2); + tot += mp4.value(1.7,3); + + BOOST_REQUIRE_CLOSE(tot,1.0,0.001); + + // Check momenta 1 + + tot = 0.0; + + tot += -1.3*mp4.value(-1.3,0); + tot += -0.3*mp4.value(-0.3,1); + tot += 0.7*mp4.value(0.7,2); + tot += 1.7*mp4.value(1.7,3); + + BOOST_REQUIRE_SMALL(tot,0.001); + + // Check momenta 2 + + tot = 0.0; + + tot += (1.3)*(1.3)*mp4.value(-1.3,0); + tot += (0.3)*(0.3)*mp4.value(-0.3,1); + tot += (0.7)*(0.7)*mp4.value(0.7,2); + tot += (1.7)*(1.7)*mp4.value(1.7,3); + + BOOST_REQUIRE_SMALL(tot,0.001); + + + //////// Check zeta 1 + + tot = 0.0; + + z_kernel<double,1> zk1; + + tot += zk1.value(-0.3,0); + tot += zk1.value(0.7,1); + + BOOST_REQUIRE_CLOSE(tot,1.0,0.001); + + //////// zeta 2 is equivalent to mp4 we do not test + + //////// zeta 3 + + z_kernel<double,3> zk3; + + tot = 0.0; + + // Check momenta 0 + + tot += zk3.value(-2.3,0); + tot += zk3.value(-1.3,1); + tot += zk3.value(-0.3,2); + tot += zk3.value(0.7,3); + tot += zk3.value(1.7,4); + tot += zk3.value(2.7,5); + + BOOST_REQUIRE_CLOSE(tot,1.0,0.001); + + // Check momenta 1 + + tot = 0.0; + + tot += -2.3*zk3.value(-2.3,0); + tot += -1.3*zk3.value(-1.3,1); + tot += -0.3*zk3.value(-0.3,2); + tot += 0.7*zk3.value(0.7,3); + tot += 1.7*zk3.value(1.7,4); + tot += 2.7*zk3.value(2.7,5); + + BOOST_REQUIRE_SMALL(tot,0.001); + + // Check momenta 2 + + tot = 0.0; + + tot += 2.3*2.3*zk3.value(-2.3,0); + tot += 1.3*1.3*zk3.value(-1.3,1); + tot += 0.3*0.3*zk3.value(-0.3,2); + tot += 0.7*0.7*zk3.value(0.7,3); + tot += 1.7*1.7*zk3.value(1.7,4); + tot += 2.7*2.7*zk3.value(2.7,5); + + BOOST_REQUIRE_SMALL(tot,0.001); + + // Check momenta 3 + + tot = 0.0; + + tot += -2.3*-2.3*-2.3*zk3.value(-2.3,0); + tot += -1.3*-1.3*-1.3*zk3.value(-1.3,1); + tot += -0.3*-0.3*-0.3*zk3.value(-0.3,2); + tot += 0.7*0.7*0.7*zk3.value(0.7,3); + tot += 1.7*1.7*1.7*zk3.value(1.7,4); + tot += 2.7*2.7*2.7*zk3.value(2.7,5); + + BOOST_REQUIRE_SMALL(tot,0.001); + + + // z4 + + z_kernel<double,4> zk4; + + // Check momenta 0 + + tot = 0.0; + + tot += zk4.value(-3.3,0); + tot += zk4.value(-2.3,1); + tot += zk4.value(-1.3,2); + tot += zk4.value(-0.3,3); + tot += zk4.value(0.7,4); + tot += zk4.value(1.7,5); + tot += zk4.value(2.7,6); + tot += zk4.value(3.7,7); + + BOOST_REQUIRE_CLOSE(tot,1.0,0.001); + + // Check momenta 1 + + tot = 0.0; + + tot += -3.3*zk4.value(-3.3,0); + tot += -2.3*zk4.value(-2.3,1); + tot += -1.3*zk4.value(-1.3,2); + tot += -0.3*zk4.value(-0.3,3); + tot += 0.7*zk4.value(0.7,4); + tot += 1.7*zk4.value(1.7,5); + tot += 2.7*zk4.value(2.7,6); + tot += 3.7*zk4.value(3.7,7); + + BOOST_REQUIRE_SMALL(tot,0.001); + + // Check momenta 2 + + tot = 0.0; + + tot += 3.3*3.3*zk4.value(-3.3,0); + tot += 2.3*2.3*zk4.value(-2.3,1); + tot += 1.3*1.3*zk4.value(-1.3,2); + tot += 0.3*0.3*zk4.value(-0.3,3); + tot += 0.7*0.7*zk4.value(0.7,4); + tot += 1.7*1.7*zk4.value(1.7,5); + tot += 2.7*2.7*zk4.value(2.7,6); + tot += 3.7*3.7*zk4.value(3.7,7); + + BOOST_REQUIRE_SMALL(tot,0.001); + + // Check momenta 3 + + tot = 0.0; + + tot += -3.3*-3.3*-3.3*zk4.value(-3.3,0); + tot += -2.3*-2.3*-2.3*zk4.value(-2.3,1); + tot += -1.3*-1.3*-1.3*zk4.value(-1.3,2); + tot += -0.3*-0.3*-0.3*zk4.value(-0.3,3); + tot += 0.7*0.7*0.7*zk4.value(0.7,4); + tot += 1.7*1.7*1.7*zk4.value(1.7,5); + tot += 2.7*2.7*2.7*zk4.value(2.7,6); + tot += 3.7*3.7*3.7*zk4.value(3.7,7); + + BOOST_REQUIRE_SMALL(tot,0.001); + + // Check momenta 4 + + tot = 0.0; + + tot += -3.3*-3.3*-3.3*-3.3*zk4.value(-3.3,0); + tot += -2.3*-2.3*-2.3*-2.3*zk4.value(-2.3,1); + tot += -1.3*-1.3*-1.3*-1.3*zk4.value(-1.3,2); + tot += -0.3*-0.3*-0.3*-0.3*zk4.value(-0.3,3); + tot += 0.7*0.7*0.7*0.7*zk4.value(0.7,4); + tot += 1.7*1.7*1.7*1.7*zk4.value(1.7,5); + tot += 2.7*2.7*2.7*2.7*zk4.value(2.7,6); + tot += 3.7*3.7*3.7*3.7*zk4.value(3.7,7); + + BOOST_REQUIRE_SMALL(tot,0.001); +} + +/* +BOOST_AUTO_TEST_CASE(InterpolationConvergenceP2M) +{ + size_t res; + std::cout<<"Enter Res:"; + std::cin>>res; + const size_t sz[2] = {res,res}; + Box<2, double> box({0, 0}, {2 * M_PI, 2 * M_PI}); + size_t bc[2] = {PERIODIC, PERIODIC}; + double spacing[2]; + spacing[0] = 2 *M_PI / (sz[0]); + spacing[1] = 2 *M_PI / (sz[1]); + Ghost<2,long int> gg(3); + double rCut = 3.0 * spacing[0]; + Ghost<2, double> ghost(rCut); + + vector_dist<2, double, aggregate<double, double>> particles(0, box,bc,ghost); + grid_dist_id<2, double, aggregate<double, double>> gd(particles.getDecomposition(),sz,gg); + double sigma2 = spacing[0] / (40.0); + std::normal_distribution<> gaussian{0, sigma2}; + std::mt19937 rng{6666666}; + auto it = particles.getGridIterator(sz); + while (it.isNext()) { + particles.add(); + auto key = it.get(); + double x=key.get(0) * spacing[0] + gaussian(rng); + double y=key.get(1) * spacing[1] + gaussian(rng); + particles.getLastPos()[0] = x; + particles.getLastPos()[1] = y; + // Here fill the function value + particles.template getLastProp<0>() = sin(particles.getLastPos()[0]) + sin(particles.getLastPos()[0]); + ++it; + } + particles.map(); + particles.ghost_get<0>(); + + auto itG=gd.getDomainIterator(); + while(itG.isNext()) + { + auto key=itG.get(); + gd.template getProp<1>(key) = sin(gd.getPos(key)[0]) + sin(gd.getPos(key)[0]); + ++itG; + } + + particles.write("InitP"); + gd.write("Grid"); + + auto Pu=getV<0>(particles); + auto Gu=FD::getV<0>(gd); + typedef vector_dist<2, double, aggregate<double, double>> particle_type; + typedef grid_dist_id<2, double, aggregate<double, double>> gd_type; + typedef z_kernel<double,4> kerneltype; //mp4_kernel<double> + typedef lambda4_4kernel<double> kerneltype2; + interpolate<particle_type,gd_type,kerneltype2> inte2m(particles,gd); + Gu=0; + gd.ghost_get<0>(); + inte2m.template p2m<0,0>(particles,gd); + gd.template ghost_put<add_,0>(); + gd.ghost_get<0>(); + particles.write("InitPAfter"); + gd.write("GridAfter"); + + + auto it2 = gd.getDomainIterator(); + double worst = 0.0; + while (it2.isNext()) { + auto p = it2.get(); + if (fabs(gd.template getProp<1>(p) - gd.template getProp<0>(p)) > worst) { + worst = fabs(gd. template getProp<1>(p) - gd.template getProp<0>(p)); + } + ++it2; + } + std::cout<<worst<<std::endl; + //BOOST_REQUIRE(worst < 0.03); +} + +BOOST_AUTO_TEST_CASE(InterpolationConvergenceM2P) +{ + size_t res; + std::cout<<"Enter Res:"; + std::cin>>res; + const size_t sz[2] = {res,res}; + Box<2, double> box({0, 0}, {2 * M_PI, 2 * M_PI}); + size_t bc[2] = {PERIODIC, PERIODIC}; + double spacing[2]; + spacing[0] = 2 *M_PI / (sz[0]); + spacing[1] = 2 *M_PI / (sz[1]); + Ghost<2,long int> gg(3); + double rCut = 3.0 * spacing[0]; + Ghost<2, double> ghost(rCut); + + vector_dist<2, double, aggregate<double, double>> particles(0, box,bc,ghost); + grid_dist_id<2, double, aggregate<double, double>> gd(particles.getDecomposition(),sz,gg); + double sigma2 = spacing[0] * spacing[1]; + std::normal_distribution<> gaussian{0, sigma2}; + std::mt19937 rng{6666666}; + auto it = particles.getGridIterator(sz); + while (it.isNext()) { + particles.add(); + auto key = it.get(); + double x=key.get(0) * spacing[0] + gaussian(rng); + double y=key.get(1) * spacing[1] + gaussian(rng); + particles.getLastPos()[0] = x; + particles.getLastPos()[1] = y; + // Here fill the function value + particles.template getLastProp<0>() = 0; + particles.template getLastProp<1>() = sin(particles.getLastPos()[0]) + sin(particles.getLastPos()[0]); + ++it; + } + particles.map(); + particles.ghost_get<0>(); + + auto itG=gd.getDomainIterator(); + while(itG.isNext()) + { + auto key=itG.get(); + gd.template getProp<1>(key) = sin(gd.getPos(key)[0]) + sin(gd.getPos(key)[0]); + ++itG; + } + + particles.write("InitP"); + gd.write("Grid"); + + auto Pu=getV<0>(particles); + auto Gu=FD::getV<0>(gd); + typedef vector_dist<2, double, aggregate<double, double>> particle_type; + typedef grid_dist_id<2, double, aggregate<double, double>> gd_type; + typedef z_kernel<double,4> kerneltype; //mp4_kernel<double> + typedef lambda4_4kernel<double> kerneltype2; + interpolate<particle_type,gd_type,kerneltype2> inte2m(particles,gd); + Gu=0; + gd.ghost_get<0>(); + inte2m.template m2p<1,0>(gd,particles); + particles.ghost_get<0>(); + particles.write("InitPAfter"); + gd.write("GridAfter"); + + auto it2 = particles.getDomainIterator(); + + double worst = 0.0; + while (it2.isNext()) { + auto p = it2.get(); + if (fabs(particles.template getProp<1>(p) - particles.template getProp<0>(p)) > worst) { + worst = fabs(particles. template getProp<1>(p) - particles.template getProp<0>(p)); + } + ++it2; + } + std::cout<<worst<<std::endl; + //BOOST_REQUIRE(worst < 0.03); + +} + + + +BOOST_AUTO_TEST_CASE(InterpolationMoving) +{ + + size_t res; + std::cin>>res; + const size_t sz[2] = {res,res}; + Box<2, double> box({0, 0}, {2 * M_PI, 2 * M_PI}); + size_t bc[2] = {PERIODIC, PERIODIC}; + double spacing[2]; + spacing[0] = 2 *M_PI / (sz[0]); + spacing[1] = 2 *M_PI / (sz[1]); + Ghost<2,long int> gg(3); + double rCut = 3.0 * spacing[0]; + Ghost<2, double> ghost(rCut); + + vector_dist<2, double, aggregate<double, VectorS<2, double>>> particles(0, box,bc,ghost),particlesMoved(0, box,bc,ghost); + grid_dist_id<2, double, aggregate<double, VectorS<2, double>>> gd(particles.getDecomposition(),sz,gg); + + auto it = particles.getGridIterator(sz); + while (it.isNext()) { + particles.add(); + auto key = it.get(); + double x=key.get(0) * spacing[0]; + double y=key.get(1) * spacing[1]; + particles.getLastPos()[0] = x; + particles.getLastPos()[1] = y; + // Here fill the function value + particles.template getLastProp<1>() = 1.0; + particles.template getLastProp<1>() = 0; + particles.template getLastProp<0>() = 0; + if((x-3.14)*(x-3.14)+(y-3.14)*(y-3.14)<1) + { + particles.template getLastProp<0>() = 1; + } + ++it; + } + particles.map(); + particles.ghost_get<0>(); + + particles.write("InitP"); + gd.write("Grid"); + + auto Pu=getV<0>(particles); + auto Pmu=getV<0>(particlesMoved); + auto Gu=FD::getV<0>(gd); + typedef vector_dist<2, double, aggregate<double, VectorS<2, double>>> vd; + typedef grid_dist_id<2, double, aggregate<double, VectorS<2, double>>> gd_type; + interpolate<vd,gd_type,mp4_kernel<double>> inte2m(particlesMoved,gd); + interpolate<vd,gd_type,mp4_kernel<double>> inte2p(particles,gd); + double t=0,dt=0.5; + int ctr=0; + while(t<10) + { + particlesMoved.clear(); + auto it=particles.getDomainIterator(); + while(it.isNext()) + { + auto p=it.get(); + double xp=particles.getPos(p)[0],yp=particles.getPos(p)[1]; + particlesMoved.add(); + particlesMoved.getLastPos()[0] = xp+dt*particles.getProp<1>(p)[0]; + particlesMoved.getLastPos()[1] = yp+dt*particles.getProp<1>(p)[1]; + particlesMoved.getLastProp<0>() = particles.getProp<0>(p); + ++it; + } + particlesMoved.map(); + particlesMoved.ghost_get<0>(); + Gu=0; + gd.ghost_get<0>(); + inte2m.template p2m<0,0>(particlesMoved,gd); + gd.template ghost_put<add_,0>(); + gd.ghost_get<0>(); + Pu=0; + inte2p.template m2p<0,0>(gd,particles); + particles.write_frame("InitP",ctr); + gd.write_frame("Grid",ctr); + ctr++; + t+=dt; + }*/ + +/* + + auto it2 = domain.getDomainIterator(); + + double worst = 0.0; + + while (it2.isNext()) { + auto p = it2.get(); + if (fabs(domain.getProp<1>(p) - domain.getProp<2>(p)) > worst) { + worst = fabs(domain.getProp<1>(p) - domain.getProp<2>(p)); + } + ++it2; + } +*/ + + // domain.deleteGhost(); + // BOOST_REQUIRE(worst < 0.03); + +//} BOOST_AUTO_TEST_SUITE_END() diff --git a/src/interpolation/lambda_kernel.hpp b/src/interpolation/lambda_kernel.hpp index 1460d641..de447199 100644 --- a/src/interpolation/lambda_kernel.hpp +++ b/src/interpolation/lambda_kernel.hpp @@ -8,17 +8,17 @@ #include <iostream> template<typename st> -double horner(const std::array<double,10> &v, st x) +double horner(const double *v, st x) { st s = 0; for(int i=9; i>=0; i--) s = v[i] + (s * x); return s; } - -constexpr std::array<double,10> c1={(1.0 * 12.0) , (0.0 * 12.0) , -(5.0 * 3.0) , (0.0 * 12.0) , (1.0 * 3.0) , -(100.0 * 4.0) , (455.0 * 3.0) , -(295.0 * 6.0) , (345.0 * 3.0) , -(115.0 * 2.0) }; -constexpr std::array<double,10> c2={-(199.0 * 24.0) , (5485.0 * 6.0) , -(32975.0 * 3.0) , (28425.0 * 6.0) , -(61953.0 * 3.0) , (33175.0 * 4.0) , -(20685.0 * 3.0) , (3055.0 * 6.0) , -(1035.0 * 3.0) , (115.0 * 2.0) }; -constexpr std::array<double,10> c3={(5913.0 * 24.0) , -(89235.0 * 6.0) , (297585.0 * 3.0) , -(143895.0 * 6.0) , (177871.0 * 3.0) , -(54641.0 * 4.0) , (19775.0 * 3.0) , -(1715.0 * 6.0) , (345.0 * 3.0) , -(23.0 * 2.0)}; +//These needs to be Checked +double c1[10]={(1.0 * 12.0) , (0.0 * 12.0) , -(5.0 * 3.0) , (0.0 * 12.0) , (1.0 * 3.0) , -(100.0 * 4.0) , (455.0 * 3.0) , -(295.0 * 6.0) , (345.0 * 3.0) , -(115.0 * 2.0) }; +double c2[10]={-(199.0 * 24.0) , (5485.0 * 6.0) , -(32975.0 * 3.0) , (28425.0 * 6.0) , -(61953.0 * 3.0) , (33175.0 * 4.0) , -(20685.0 * 3.0) , (3055.0 * 6.0) , -(1035.0 * 3.0) , (115.0 * 2.0) }; +double c3[10]={(5913.0 * 24.0) , -(89235.0 * 6.0) , (297585.0 * 3.0) , -(143895.0 * 6.0) , (177871.0 * 3.0) , -(54641.0 * 4.0) , (19775.0 * 3.0) , -(1715.0 * 6.0) , (345.0 * 3.0) , -(23.0 * 2.0)}; template<typename st> @@ -44,4 +44,33 @@ public: } }; + +template<typename st> +double horner22(const double *v, st x) +{ + st s = 0; + for(int i=5; i>=0; i--) + s = v[i] + (s * x); + return s; +} + +double c221[6]={1.0,0.0,-1.0,-4.5,7.5,-3.0}; +double c222[6]={-4.0,18.0,-29.0,21.5,-7.5,1.0}; + + +template<typename st> +class lambda2_2kernel +{ +public: + static const int np = 6; + static inline st value(st x, size_t i) + { + if (i == 0) + return horner22(c221, -x); + else if (i == 1) + return horner22(c222, -x); + return 0.0; + } +}; + #endif //OPENFPM_PDATA_LAMBDAKERNEL_HPP diff --git a/src/level_set/closest_point/closest_point.hpp b/src/level_set/closest_point/closest_point.hpp index 293ae493..f99f4e2a 100644 --- a/src/level_set/closest_point/closest_point.hpp +++ b/src/level_set/closest_point/closest_point.hpp @@ -17,6 +17,7 @@ #ifndef __CLOSEST_POINT_HPP__ #define __CLOSEST_POINT_HPP__ +#include "Grid/grid_dist_key.hpp" #include "algoim_hocp.hpp" // Width of extra padding around each grid patch needed to correctly construct kDTree in Algoim. @@ -26,21 +27,20 @@ constexpr int algoim_padding = 4; * * @file closest_point.hpp * @struct AlgoimWrapper - * @tparam grid_type Type of the grid container - * @tparam grid_key_type Type of the key for the grid container - * @tparam dim Dimension of the space * @tparam wrapping_field Property id on the grid for the field to be wrapped + * @tparam grid_type Type of the grid container + * */ - -template<typename grid_type, typename grid_key_type, unsigned int dim, size_t wrapping_field> +template<size_t wrapping_field, typename grid_type, typename wrapping_field_type = typename boost::mpl::at<typename grid_type::value_type::type,boost::mpl::int_<wrapping_field>>::type> struct AlgoimWrapper { + const static unsigned int dim = grid_type::dims; grid_type &gd; int patch_id; AlgoimWrapper(grid_type& ls_grid, const int pid) : gd(ls_grid), patch_id(pid) {} //! Call operator for the wrapper. - double operator() (const blitz::TinyVector<int,dim> idx) const + double operator() (const blitz::TinyVector<int, dim> idx) const { long int local_key[dim]; @@ -50,28 +50,168 @@ struct AlgoimWrapper local_key[d] = idx(d) - algoim_padding; // Generate OpenFPM grid_key object from local grid indices - grid_key_type grid_key(patch_id, grid_key_dx<dim> (local_key) + ghost_offset); + grid_dist_key_dx<dim> grid_key(patch_id, grid_key_dx<dim> (local_key) + ghost_offset); return gd.template get<wrapping_field>(grid_key); } + + template<size_t extend_field_temp, int poly_order, typename coord_type, typename dx_type, typename pos_type, typename key_type> + void extend(coord_type coord, dx_type dx, pos_type pos, key_type key) { + + using Poly = typename Algoim::StencilPoly<dim, poly_order>::T_Poly; + + Poly field_poly = Poly(coord, *this, dx); + // Extension is first done to the temporary field. Otherwise interpolation will be affected. + gd.template get<extend_field_temp>(key) = field_poly(pos); + } + +}; + +template<size_t wrapping_field, typename grid_type, typename wrapping_field_type, size_t N1> +struct AlgoimWrapper<wrapping_field,grid_type,wrapping_field_type[N1]> +{ + const static unsigned int dim = grid_type::dims; + grid_type &gd; + int patch_id; + size_t comp_i; + AlgoimWrapper(grid_type& ls_grid, const int pid) : gd(ls_grid), patch_id(pid) {} + + //! Call operator for the wrapper. + double operator() (const blitz::TinyVector<int, dim> idx) const + { + long int local_key[dim]; + + auto ghost_offset = gd.getLocalGridsInfo().get(patch_id).Dbox.getKP1(); + + for (int d = 0; d < dim; ++d) + local_key[d] = idx(d) - algoim_padding; + + // Generate OpenFPM grid_key object from local grid indices + grid_dist_key_dx<dim> grid_key(patch_id, grid_key_dx<dim> (local_key) + ghost_offset); + + return gd.template get<wrapping_field>(grid_key)[comp_i]; + } + + template<size_t extend_field_temp, int poly_order, typename coord_type, typename dx_type, typename pos_type, typename key_type> + void extend(coord_type coord, dx_type dx, pos_type pos, key_type key) { + + using Poly = typename Algoim::StencilPoly<dim, poly_order>::T_Poly; + + for (int i = 0; i < N1; ++i) { + comp_i = i; + Poly field_poly = Poly(coord, *this, dx); + // Extension is first done to the temporary field. Otherwise interpolation will be affected. + gd.template get<extend_field_temp>(key)[i] = field_poly(pos); + } + } +}; + +template<size_t wrapping_field, typename grid_type, typename wrapping_field_type, size_t N1, size_t N2> +struct AlgoimWrapper<wrapping_field,grid_type,wrapping_field_type[N1][N2]> +{ + const static unsigned int dim = grid_type::dims; + grid_type &gd; + int patch_id; + size_t comp_i, comp_j; + AlgoimWrapper(grid_type& ls_grid, const int pid) : gd(ls_grid), patch_id(pid) {} + + //! Call operator for the wrapper. + double operator() (const blitz::TinyVector<int, dim> idx) const + { + long int local_key[dim]; + + auto ghost_offset = gd.getLocalGridsInfo().get(patch_id).Dbox.getKP1(); + + for (int d = 0; d < dim; ++d) + local_key[d] = idx(d) - algoim_padding; + + // Generate OpenFPM grid_key object from local grid indices + grid_dist_key_dx<dim> grid_key(patch_id, grid_key_dx<dim> (local_key) + ghost_offset); + + return gd.template get<wrapping_field>(grid_key)[comp_i][comp_j]; + } + + template<size_t extend_field_temp, int poly_order, typename coord_type, typename dx_type, typename pos_type, typename key_type> + void extend(coord_type coord, dx_type dx, pos_type pos, key_type key) { + + using Poly = typename Algoim::StencilPoly<grid_type::dims, poly_order>::T_Poly; + + for (int i = 0; i < N1; ++i) { + for (int j = 0; j < N2; ++j) { + comp_i = i; + comp_j = j; + Poly field_poly = Poly(coord, *this, dx); + // Extension is first done to the temporary field. Otherwise interpolation will be affected. + gd.template get<extend_field_temp>(key)[i][j] = field_poly(pos); + } + } + } +}; + +template<size_t wrapping_field, typename grid_type, typename wrapping_field_type, size_t N1, size_t N2, size_t N3> +struct AlgoimWrapper<wrapping_field,grid_type,wrapping_field_type[N1][N2][N3]> +{ + const static unsigned int dim = grid_type::dims; + grid_type &gd; + int patch_id; + size_t comp_i, comp_j, comp_k; + AlgoimWrapper(grid_type& ls_grid, const int pid) : gd(ls_grid), patch_id(pid) {} + + //! Call operator for the wrapper. + double operator() (const blitz::TinyVector<int, dim> idx) const + { + long int local_key[dim]; + + auto ghost_offset = gd.getLocalGridsInfo().get(patch_id).Dbox.getKP1(); + + for (int d = 0; d < dim; ++d) + local_key[d] = idx(d) - algoim_padding; + + // Generate OpenFPM grid_key object from local grid indices + grid_dist_key_dx<dim> grid_key(patch_id, grid_key_dx<dim> (local_key) + ghost_offset); + + return gd.template get<wrapping_field>(grid_key)[comp_i][comp_j][comp_k]; + } + + template<size_t extend_field_temp, int poly_order, typename coord_type, typename dx_type, typename pos_type, typename key_type> + void extend(coord_type coord, dx_type dx, pos_type pos, key_type key) { + + using Poly = typename Algoim::StencilPoly<grid_type::dims, poly_order>::T_Poly; + + for (int i = 0; i < N1; ++i) { + for (int j = 0; j < N2; ++j) { + for (int k = 0; k < N3; ++k) { + comp_i = i; + comp_j = j; + comp_k = k; + Poly field_poly = Poly(coord, *this, dx); + // Extension is first done to the temporary field. Otherwise interpolation will be affected. + gd.template get<extend_field_temp>(key)[i][j][k] = field_poly(pos); + } + } + } + } }; + /**@brief Computes the closest point coordinate for each grid point within nb_gamma from interface. * + * @tparam phi_field Property id on grid for the level set SDF (input) + * @tparam cp_field Property id on grid for storing closest point coordinates (output) + * @tparam poly_order Type of stencil interpolation (Taylor poly orders between 2 to 5 and Tri/bicubic through -1 is supported) * @tparam grid_type Type of the grid container - * @tparam grid_key_type Type of the key for the grid container - * @tparam dim Dimension of the space - * @tparam poly_order Order of the polynomial for stencil interpolation (orders between 2 to 5 is supported) - * @tparam phi_field Property id on grid for the level set SDF - * @tparam cp_field Property id on grid for storing closest point coordinates * * @param gd The distributed grid containing at least level set SDF field and placeholder for closest point coordinates * @param nb_gamma The width of the narrow band within which closest point estimation is to be done + * */ -template<typename grid_type, typename grid_key_type, unsigned int poly_order, size_t phi_field, size_t cp_field> +template<size_t phi_field, size_t cp_field, int poly_order, typename grid_type> void estimateClosestPoint(grid_type &gd, const double nb_gamma) { const unsigned int dim = grid_type::dims; + // Update the phi field in ghosts + gd.template ghost_get<phi_field>(KEEP_PROPERTIES); + // Stencil polynomial type using Poly = typename Algoim::StencilPoly<dim, poly_order>::T_Poly; @@ -93,7 +233,7 @@ void estimateClosestPoint(grid_type &gd, const double nb_gamma) p_hi.set_d(d, patches.get(i).Dbox.getHigh(d) + patches.get(i).origin[d]); } - AlgoimWrapper<grid_type, grid_key_type, dim, phi_field> phiwrap(gd, i); + AlgoimWrapper<phi_field, grid_type> phiwrap(gd, i); // Find all cells containing the interface and construct the high-order polynomials std::vector<Algoim::detail::CellPoly<dim,Poly>> cells; @@ -111,8 +251,10 @@ void estimateClosestPoint(grid_type &gd, const double nb_gamma) Algoim::KDTree<double,dim> kdtree(points); + // In order to ensure that CP is estimated for all points in the narrowband, we add a buffer to the distance check. + double nb_gamma_plus_dx = nb_gamma + gd.spacing(0); // Pass everything to the closest point computation engine - Algoim::ComputeHighOrderCP<dim,Poly> hocp(nb_gamma < std::numeric_limits<double>::max() ? nb_gamma*nb_gamma : std::numeric_limits<double>::max(), // squared bandradius + Algoim::ComputeHighOrderCP<dim,Poly> hocp(nb_gamma_plus_dx < std::numeric_limits<double>::max() ? nb_gamma_plus_dx*nb_gamma_plus_dx : std::numeric_limits<double>::max(), // squared bandradius 0.5*blitz::max(dx), // amount that each polynomial overlaps / size of the bounding ball in Newton's method Algoim::sqr(std::max(1.0e-14, std::pow(blitz::max(dx), Poly::order))), // tolerance to determine convergence cells, kdtree, points, pointcells, dx, 0.0); @@ -121,7 +263,7 @@ void estimateClosestPoint(grid_type &gd, const double nb_gamma) while(it.isNext()) { auto key = it.get(); - if(std::abs(gd.template get<phi_field>(key)) <= nb_gamma) + if(std::abs(gd.template get<phi_field>(key)) < nb_gamma) { auto key_g = gd.getGKey(key); // NOTE: This is not the real grid coordinates, but internal coordinates for algoim @@ -136,8 +278,13 @@ void estimateClosestPoint(grid_type &gd, const double nb_gamma) } else { + std::cout<<"WARN: Closest point computation fails at : "; for(int d = 0; d < dim; ++d) + { + std::cout<<key_g.get(d)<<" "; gd.template get<cp_field>(key)[d] = -100.0; + } + std::cout<<"\n"; } } ++it; @@ -148,22 +295,23 @@ void estimateClosestPoint(grid_type &gd, const double nb_gamma) /**@brief Extends a (scalar) field to within nb_gamma from interface. The grid should have level set SDF and closest point field. * - * @tparam grid_type Type of the grid container - * @tparam grid_key_type Type of the key for the grid container - * @tparam dim Dimension of the space - * @tparam poly_order Order of the polynomial for stencil interpolation * @tparam phi_field Property id on grid for the level set SDF * @tparam cp_field Property id on grid for storing closest point coordinates * @tparam extend_field Property id on grid where the field to be extended resides * @tparam extend_field_temp Property id on grid for storing temporary intermediate values + * @tparam poly_order Type of stencil interpolation (Taylor poly orders between 2 to 5 and Tri/bicubic through -1 is supported) + * @tparam grid_type Type of the grid container * * @param gd The distributed grid containing atleast level set SDF field and closest point coordinates - * @param nb_gamma The width of the narrow band within which extension is required + * @param nb_gamma The width of the narrow band within which extension is required (half band) */ -template<typename grid_type, typename grid_key_type, unsigned int poly_order, size_t phi_field, size_t cp_field, size_t extend_field, size_t extend_field_temp> +template<size_t phi_field, size_t cp_field, size_t extend_field, size_t extend_field_temp, int poly_order, typename grid_type> void extendLSField(grid_type &gd, const double nb_gamma) { const unsigned int dim = grid_type::dims; + // Update the phi and cp fields in ghost + gd.template ghost_get<phi_field, cp_field, extend_field>(KEEP_PROPERTIES); + // Stencil polynomial object using Poly = typename Algoim::StencilPoly<dim, poly_order>::T_Poly; auto &patches = gd.getLocalGridsInfo(); @@ -181,7 +329,7 @@ void extendLSField(grid_type &gd, const double nb_gamma) p_lo.set_d(d, patches.get(i).Dbox.getLow(d) + patches.get(i).origin[d]); p_hi.set_d(d, patches.get(i).Dbox.getHigh(d) + patches.get(i).origin[d]); } - + auto it = gd.getSubDomainIterator(p_lo, p_hi); while(it.isNext()) @@ -199,44 +347,45 @@ void extendLSField(grid_type &gd, const double nb_gamma) pos(d) = cp_d - coord(d)*gd.spacing(d); } - AlgoimWrapper<grid_type, grid_key_type, dim, extend_field> fieldwrap(gd,i); - Poly field_poly = Poly(coord, fieldwrap, dx); - // Extension is first done to the temporary field. Otherwise interpolation will be affected. - gd.template get<extend_field_temp>(key) = field_poly(pos); + AlgoimWrapper<extend_field, grid_type> fieldwrap(gd,i); + fieldwrap.template extend<extend_field_temp,poly_order>(coord,dx,pos,key); + // Poly field_poly = Poly(coord, fieldwrap, dx); + // // Extension is first done to the temporary field. Otherwise interpolation will be affected. + // gd.template get<extend_field_temp>(key) = field_poly(pos); } ++it; } } - + // Copy the results to the actual variable + typedef typename boost::mpl::at<typename grid_type::value_type::type,boost::mpl::int_<extend_field>>::type type_to_copy; auto it = gd.getDomainIterator(); while(it.isNext()) { auto key = it.get(); if(std::abs(gd.template get<phi_field>(key)) < nb_gamma) - gd.template get<extend_field>(key) = gd.template get<extend_field_temp>(key); + meta_copy<type_to_copy>::meta_copy_(gd.template get<extend_field_temp>(key),gd.template get<extend_field>(key)); ++it; } } /**@brief Reinitializes the level set Phi field on a grid. The grid should have level set SDF and closest point field. * - * @tparam grid_type Type of the grid container - * @tparam grid_key_type Type of the key for the grid container - * @tparam dim Dimension of the space - * @tparam poly_order Order of the polynomial for stencil interpolation * @tparam phi_field Property id on grid for the level set SDF * @tparam cp_field Property id on grid for storing closest point coordinates - * + * @tparam grid_type Type of the grid container + * @param gd The distributed grid containing atleast level set SDF field and closest point coordinates * @param nb_gamma The width of the narrow band for reinitialization */ -template<typename grid_type, typename grid_key_type, unsigned int poly_order, size_t phi_field, size_t cp_field> +template<size_t phi_field, size_t cp_field, typename grid_type> void reinitializeLS(grid_type &gd, const double nb_gamma) { const unsigned int dim = grid_type::dims; - // Stencil polynomial object - using Poly = typename Algoim::StencilPoly<dim, poly_order>::T_Poly; + + // Update the cp_field in ghost + gd.template ghost_get<cp_field>(KEEP_PROPERTIES); + auto &patches = gd.getLocalGridsInfo(); blitz::TinyVector<double,dim> dx; for(int d = 0; d < dim; ++d) @@ -271,6 +420,9 @@ void reinitializeLS(grid_type &gd, const double nb_gamma) // NOTE: This is not the real grid coordinates, but internal coordinates used for algoim double patch_pos = (key_g.get(d) - p_lo.get(d) + algoim_padding) * gd.spacing(d); double cp_d = gd.template get<cp_field>(key)[d]; + if(cp_d == -100.0) + std::cout<<"WARNING: Requesting closest point on nodes where it was not computed."<<std::endl; + distance += ((patch_pos - cp_d)*(patch_pos - cp_d)); } distance = sqrt(distance); diff --git a/src/level_set/closest_point/closest_point_unit_tests.cpp b/src/level_set/closest_point/closest_point_unit_tests.cpp index 30bcd7f6..9e2f10b2 100644 --- a/src/level_set/closest_point/closest_point_unit_tests.cpp +++ b/src/level_set/closest_point/closest_point_unit_tests.cpp @@ -5,6 +5,7 @@ #include<iostream> #include <boost/test/unit_test_log.hpp> +#include <cmath> #define BOOST_TEST_DYN_LINK #include <boost/test/unit_test.hpp> #include <iostream> @@ -28,21 +29,17 @@ typedef struct EllipseParameters{ } EllipseParams; // Generate an ellipsoid initial levelset signed distance function -template<typename grid_type, typename domain_type, size_t phi_field> -void initializeLSEllipsoid(grid_type &gd, const domain_type &domain, const EllipseParams ¶ms) +template<size_t phi_field, typename grid_type> +void initializeLSEllipsoid(grid_type &gd, const EllipseParams ¶ms) { auto it = gd.getDomainIterator(); - double dx = gd.getSpacing()[0]; - double dy = gd.getSpacing()[1]; - double dz = gd.getSpacing()[2]; while(it.isNext()) { auto key = it.get(); - auto key_g = gd.getGKey(key); - - double posx = key_g.get(0)*dx + domain.getLow(0); - double posy = key_g.get(1)*dy + domain.getLow(1); - double posz = key_g.get(2)*dz + domain.getLow(2); + Point<grid_type::dims, double> coords = gd.getPos(key); + double posx = coords.get(0); + double posy = coords.get(1); + double posz = coords.get(2); // NOTE: Except for a sphere, this is not the SDF. It is just an implicit function whose zero contour is an ellipsoid. double phi_val = 1.0 - sqrt(((posx - params.origin[0])/params.radiusA)*((posx - params.origin[0])/params.radiusA) + ((posy - params.origin[1])/params.radiusB)*((posy - params.origin[1])/params.radiusB) + ((posz - params.origin[2])/params.radiusC)*((posz - params.origin[2])/params.radiusC)); @@ -51,6 +48,36 @@ void initializeLSEllipsoid(grid_type &gd, const domain_type &domain, const Elli } } +// Initialize a scalar field or grid points near the interface +template<const unsigned int phi, const unsigned int field, typename grid_type> +void initializeScalarField3D(grid_type &gd, double init_width) +{ + auto it = gd.getDomainIterator(); + + // Trying with a L_1 and L_2 spherical harmonics as initial condition for scalar_field + double prefactor_l1 = std::sqrt(2.0/(4.0*M_PI)); + //double prefactor_l2 = std::sqrt(5.0/(16.0*M_PI)); + + while(it.isNext()) + { + auto key = it.get(); + if(gd.template get<phi>(key) < init_width) + { + auto coords = gd.getPos(key); + double posx = coords.get(0); + double posy = coords.get(1); + double posz = coords.get(2); + + double theta = std::atan2(std::sqrt(posx*posx + posy*posy), posz); + + gd.template get<field>(key) = prefactor_l1 * std::cos(theta); + //gd.template get<field>(key) = prefactor_l2 * (3.0 * std::cos(theta) * std::cos(theta) - 1.0); + } + ++it; + } + +} + BOOST_AUTO_TEST_SUITE( closest_point_test ) @@ -100,12 +127,10 @@ BOOST_AUTO_TEST_CASE( closest_point_unit_sphere ) nb_gamma = narrow_band_half_width * gdist.spacing(0); // Initializes the grid property 'phi' whose zero contour represents the ellipsoid - initializeLSEllipsoid<GridDist, Box<SIM_DIM,double>, phi>(gdist, domain, params); - gdist.template ghost_get<phi>(); + initializeLSEllipsoid<phi>(gdist, params); // Updates the property 'cp' of the grid to the closest point coords (only done in the narrowband). - estimateClosestPoint<GridDist, GridKey, POLY_ORDER, phi, cp>(gdist, nb_gamma); - gdist.template ghost_get<cp>(); + estimateClosestPoint<phi, cp, POLY_ORDER>(gdist, nb_gamma); // Estimate error in closest point estimation auto &patches = gdist.getLocalGridsInfo(); @@ -127,7 +152,6 @@ BOOST_AUTO_TEST_CASE( closest_point_unit_sphere ) if(std::abs(gdist.template get<phi>(key)) < nb_gamma) { - auto key_g = gdist.getGKey(key); // Computed closest point coordinates. // Note: This is patch coordinates not the real one. double cpx = gdist.template get<cp>(key)[x]; @@ -143,9 +167,10 @@ BOOST_AUTO_TEST_CASE( closest_point_unit_sphere ) double estim_pz = domain.getLow(z) + (p_zlo - algoim_padding)*gdist.spacing(z) + cpz; // Global coordinate of the selected grid point. - double posx = key_g.get(0)*gdist.spacing(0) + domain.getLow(0); - double posy = key_g.get(1)*gdist.spacing(1) + domain.getLow(1); - double posz = key_g.get(2)*gdist.spacing(2) + domain.getLow(2); + Point<GridDist::dims, double> coords = gdist.getPos(key); + double posx = coords.get(0); + double posy = coords.get(1); + double posz = coords.get(2); double norm = sqrt(posx*posx + posy*posy + posz*posz); // Analytically known closest point coordinate for unit sphere. @@ -213,15 +238,12 @@ BOOST_AUTO_TEST_CASE( reinitialization_unit_sphere ) nb_gamma = narrow_band_half_width * gdist.spacing(0); - initializeLSEllipsoid<GridDist, Box<SIM_DIM,double>, phi>(gdist, domain, params); - gdist.template ghost_get<phi>(); + initializeLSEllipsoid<phi>(gdist, params); - estimateClosestPoint<GridDist, GridKey, POLY_ORDER, phi, cp>(gdist, nb_gamma); - gdist.template ghost_get<cp>(); + estimateClosestPoint<phi, cp, POLY_ORDER>(gdist, nb_gamma); // Reinitialize the level set function stored in property 'phi' based on closest points in 'cp' - reinitializeLS<GridDist, GridKey, POLY_ORDER, phi, cp>(gdist, nb_gamma); - gdist.template ghost_get<phi>(); + reinitializeLS<phi, cp>(gdist, nb_gamma); // Estimate error in closest point estimation auto &patches = gdist.getLocalGridsInfo(); @@ -242,11 +264,11 @@ BOOST_AUTO_TEST_CASE( reinitialization_unit_sphere ) if(std::abs(gdist.template get<phi>(key)) < nb_gamma) { - auto key_g = gdist.getGKey(key); // Global grid coordinate - double posx = key_g.get(0)*gdist.spacing(0) + domain.getLow(0); - double posy = key_g.get(1)*gdist.spacing(1) + domain.getLow(1); - double posz = key_g.get(2)*gdist.spacing(2) + domain.getLow(2); + Point<GridDist::dims, double> coords = gdist.getPos(key); + double posx = coords.get(0); + double posy = coords.get(1); + double posz = coords.get(2); // Analytically computed signed distance // NOTE: SDF convention here is positive inside and negative outside the sphere @@ -269,4 +291,111 @@ BOOST_AUTO_TEST_CASE( reinitialization_unit_sphere ) } + +BOOST_AUTO_TEST_CASE( extension_unit_sphere ) +{ + + constexpr int SIM_DIM = 3; + constexpr int POLY_ORDER = 5; + constexpr int SIM_GRID_SIZE = 128; + + // Fields - phi, cp, scalar_field, scalar_field_temp + using GridDist = grid_dist_id<SIM_DIM,double,aggregate<double,double[SIM_DIM],double,double>>; + using GridKey = grid_dist_key_dx<SIM_DIM>; + + // Grid size on each dimension + const long int sz[SIM_DIM] = {SIM_GRID_SIZE, SIM_GRID_SIZE, SIM_GRID_SIZE}; + const size_t szu[SIM_DIM] = {(size_t) sz[0], (size_t) sz[1], (size_t) sz[2]}; + + // 3D physical domain + Box<SIM_DIM,double> domain({-1.5,-1.5,-1.5},{1.5,1.5,1.5}); + + constexpr int x = 0; + constexpr int y = 1; + constexpr int z = 2; + + // Alias for properties on the grid + constexpr int phi = 0; + constexpr int cp = 1; + constexpr int scalar_field = 2; + constexpr int scalar_field_temp = 3; + + double nb_gamma = 0.0; + + periodicity<SIM_DIM> grid_bc = {NON_PERIODIC, NON_PERIODIC, NON_PERIODIC}; + // Ghost in grid units + Ghost <SIM_DIM, long int> grid_ghost(2*narrow_band_half_width); + GridDist gdist(szu, domain, grid_ghost, grid_bc); + + EllipseParams params; + params.origin[x] = 0.0; + params.origin[y] = 0.0; + params.origin[z] = 0.0; + params.radiusA = 1.0; + params.radiusB = 1.0; + params.radiusC = 1.0; + + nb_gamma = narrow_band_half_width * gdist.spacing(0); + + initializeLSEllipsoid<phi>(gdist, params); + + estimateClosestPoint<phi, cp, POLY_ORDER>(gdist, nb_gamma); + + // Reinitialize the level set function stored in property 'phi' based on closest points in 'cp' + reinitializeLS<phi, cp>(gdist, nb_gamma); + + // Initialize a scalar field close to interface + initializeScalarField3D<phi,scalar_field>(gdist, 4*gdist.spacing(0)); + + // Extension to the full narrow band + extendLSField<phi, cp, scalar_field, scalar_field_temp, -1>(gdist, nb_gamma); + double prefactor_l1 = std::sqrt(2.0/(4.0*M_PI)); + + // Estimate error in closest point estimation + auto &patches = gdist.getLocalGridsInfo(); + double max_error = -1.0; + for(int i = 0; i < patches.size();i++) + { + auto p_xlo = patches.get(i).Dbox.getLow(0) + patches.get(i).origin[0]; + auto p_xhi = patches.get(i).Dbox.getHigh(0) + patches.get(i).origin[0]; + auto p_ylo = patches.get(i).Dbox.getLow(1) + patches.get(i).origin[1]; + auto p_yhi = patches.get(i).Dbox.getHigh(1) + patches.get(i).origin[1]; + auto p_zlo = patches.get(i).Dbox.getLow(2) + patches.get(i).origin[2]; + auto p_zhi = patches.get(i).Dbox.getHigh(2) + patches.get(i).origin[2]; + + auto it = gdist.getSubDomainIterator({p_xlo, p_ylo, p_zlo}, {p_xhi, p_yhi, p_zhi}); + while(it.isNext()) + { + auto key = it.get(); + + if(std::abs(gdist.template get<phi>(key)) < nb_gamma) + { + // Global grid coordinate + auto coords = gdist.getPos(key); + double posx = coords.get(0); + double posy = coords.get(1); + double posz = coords.get(2); + + double theta = std::atan2(std::sqrt(posx*posx + posy*posy), posz); + // Analytically computed signed distance + // NOTE: SDF convention here is positive inside and negative outside the sphere + double exact_val = prefactor_l1 * std::cos(theta); + + max_error = std::max({std::abs(exact_val - gdist.template get<scalar_field>(key)), max_error}); + } + ++it; + } + } + std::cout<<"Extension error : "<<max_error<<std::endl; + double tolerance = 1e-5; + bool check; + if (std::abs(max_error) < tolerance) + check = true; + else + check = false; + + BOOST_TEST( check ); + +} + BOOST_AUTO_TEST_SUITE_END() \ No newline at end of file diff --git a/src/util/SphericalHarmonics.hpp b/src/util/SphericalHarmonics.hpp index b900dbb3..eed0ae43 100644 --- a/src/util/SphericalHarmonics.hpp +++ b/src/util/SphericalHarmonics.hpp @@ -2,7 +2,8 @@ // Created by Abhinav Singh on 03.11.20. // - +#ifndef SPHERICALHARMONICS_HPP_ +#define SPHERICALHARMONICS_HPP_ //#include "util/util_debug.hpp" #include <boost/math/special_functions/spherical_harmonic.hpp> @@ -204,19 +205,19 @@ namespace openfpm { return openfpm::math::DYdPhi(n, m, theta, phi, boost::math::policies::policy<>()); } - double sph_A1(int l,int m,double v1, double vr) { + inline double sph_A1(int l,int m,double v1, double vr) { return 0.5 * (1 + l) * (l * v1 - vr); } - double sph_A2(int l,int m,double v1, double vr) { + inline double sph_A2(int l,int m,double v1, double vr) { return 0.5 * ((1 + l) * (-l) * v1 + (l + 3) * vr); } - double sph_B(int l, int m,double v2) { + inline double sph_B(int l, int m,double v2) { return v2; } - double sph_A3(int l,int m,double v1, double vr) { + inline double sph_A3(int l,int m,double v1, double vr) { if (m == 1){ return 0.5 *l* ((1 + l)*v1 - vr)-1.5*sph_A2(l,m,v1,vr); } @@ -225,7 +226,7 @@ namespace openfpm { } } - double sph_A4(int l,int m,double v1, double vr) { + inline double sph_A4(int l,int m,double v1, double vr) { if (m == 1){ return 0.5* (-l*(1 + l)*v1 + (2-l)*vr)+0.5*sph_A2(l,m,v1,vr); } @@ -246,7 +247,7 @@ namespace openfpm { * \return std::vector containing the spherical harmonic amplitudes (ur,u1,u2,p) for the solution at r for mode l,m. * */ - std::vector<double> sph_anasol_u(double nu,int l,int m,double vr,double v1,double v2,double r) { + inline std::vector<double> sph_anasol_u(double nu,int l,int m,double vr,double v1,double v2,double r) { double ur,u1,u2,p; if(l==0) { @@ -362,4 +363,6 @@ namespace openfpm { } -} \ No newline at end of file +} + +#endif /* SPHERICALHARMONICS_HPP_ */ -- GitLab