diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 06d55055afd565161b6677a0651a00d9df266da2..01440817d846274da44339cdbd3a5116cc77843b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -18,6 +18,7 @@ endif() if (NOT CUDA_ON_BACKEND STREQUAL "None") set(CUDA_SOURCES Operators/Vector/vector_dist_operators_unit_tests.cu DCPSE/DCPSE_op/tests/DCPSE_op_Solver_test.cu + DCPSE/DCPSE_op/tests/DCPSE_op_subset_test.cu Operators/Vector/vector_dist_operators_apply_kernel_unit_tests.cu) endif() @@ -286,7 +287,6 @@ install(FILES DCPSE/Dcpse.hpp DCPSE/DCPSE_op/DCPSE_Solver.cuh DCPSE/DcpseDiagonalScalingMatrix.hpp DCPSE/DcpseRhs.hpp - DCPSE/DcpseRhs.cuh DCPSE/Monomial.hpp DCPSE/Monomial.cuh DCPSE/MonomialBasis.hpp @@ -294,7 +294,6 @@ install(FILES DCPSE/Dcpse.hpp DCPSE/SupportBuilder.cuh DCPSE/SupportBuilder.hpp DCPSE/Vandermonde.hpp - DCPSE/Vandermonde.cuh DCPSE/VandermondeRowBuilder.hpp DCPSE/DcpseInterpolation.hpp DESTINATION openfpm_numerics/include/DCPSE diff --git a/src/DCPSE/DCPSE_op/EqnsStruct.hpp b/src/DCPSE/DCPSE_op/EqnsStruct.hpp index 8540537b536c3eb6dc1dc309407781054af9dc77..aa55df23158bd74dc2a98ceefefcd3c19b99c6bf 100644 --- a/src/DCPSE/DCPSE_op/EqnsStruct.hpp +++ b/src/DCPSE/DCPSE_op/EqnsStruct.hpp @@ -36,18 +36,524 @@ struct equations2d1 { typedef petsc_solver<double> solver_type; }; -//! Specify the general characteristic of system to solve +struct equations2d2 { + //! dimensionaly of the equation ( 3D problem ...) + static const unsigned int dims = 2; + //! number of fields in the system + static const unsigned int nvar = 2; + + //! boundary at X and Y + static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC}; + + //! type of space float, double, ... + typedef double stype; + + //! type of base particles + typedef vector_dist<dims, double, aggregate<double>> b_part; + + //! type of SparseMatrix for the linear solver + typedef SparseMatrix<double, int, PETSC_BASE> SparseMatrix_type; + + //! type of Vector for the linear solver + typedef Vector<double, PETSC_BASE> Vector_type; + + typedef petsc_solver<double> solver_type; +}; + +struct equations2d1p { + //! dimensionaly of the equation ( 3D problem ...) + static const unsigned int dims = 2; + //! number of fields in the system + static const unsigned int nvar = 1; + + //! boundary at X and Y + static constexpr bool boundary[]={PERIODIC, PERIODIC}; + + //! type of space float, double, ... + typedef double stype; + + //! type of base particles + typedef vector_dist<dims, double, aggregate<double>> b_part; + + //! type of SparseMatrix for the linear solver + typedef SparseMatrix<double, int, PETSC_BASE> SparseMatrix_type; + + //! type of Vector for the linear solver + typedef Vector<double, PETSC_BASE> Vector_type; + + typedef petsc_solver<double> solver_type; +}; + +struct equations2d2p { + //! dimensionaly of the equation ( 3D problem ...) + static const unsigned int dims = 2; + //! number of fields in the system + static const unsigned int nvar = 2; + + //! boundary at X and Y + static constexpr bool boundary[]={PERIODIC, PERIODIC}; + + //! type of space float, double, ... + typedef double stype; + + //! type of base particles + typedef vector_dist<dims, double, aggregate<double>> b_part; + + //! type of SparseMatrix for the linear solver + typedef SparseMatrix<double, int, PETSC_BASE> SparseMatrix_type; + + //! type of Vector for the linear solver + typedef Vector<double, PETSC_BASE> Vector_type; + + typedef petsc_solver<double> solver_type; +}; + + +struct equations2d3p { + //! dimensionaly of the equation ( 3D problem ...) + static const unsigned int dims = 2; + //! number of fields in the system + static const unsigned int nvar = 3; + + //! boundary at X and Y + static constexpr bool boundary[]={PERIODIC, PERIODIC}; + + //! type of space float, double, ... + typedef double stype; + + //! type of base particles + typedef vector_dist<dims, double, aggregate<double>> b_part; + + //! type of SparseMatrix for the linear solver + typedef SparseMatrix<double, int, PETSC_BASE> SparseMatrix_type; + + //! type of Vector for the linear solver + typedef Vector<double, PETSC_BASE> Vector_type; + + typedef petsc_solver<double> solver_type; +}; + +struct equations2d3 { + //! dimensionaly of the equation ( 3D problem ...) + static const unsigned int dims = 2; + //! number of fields in the system + static const unsigned int nvar = 3; + + //! boundary at X and Y + static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC}; + + //! type of space float, double, ... + typedef double stype; + + //! type of base particles + typedef vector_dist<dims, double, aggregate<double>> b_part; + + //! type of SparseMatrix for the linear solver + typedef SparseMatrix<double, int, PETSC_BASE> SparseMatrix_type; + + //! type of Vector for the linear solver + typedef Vector<double, PETSC_BASE> Vector_type; + + typedef petsc_solver<double> solver_type; +}; + +struct equations2d4 { + //! dimensionaly of the equation ( 3D problem ...) + static const unsigned int dims = 2; + //! number of fields in the system + static const unsigned int nvar = 4; + + //! boundary at X and Y + static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC}; + + //! type of space float, double, ... + typedef double stype; + + //! type of base particles + typedef vector_dist<dims, double, aggregate<double>> b_part; + + //! type of SparseMatrix for the linear solver + typedef SparseMatrix<double, int, PETSC_BASE> SparseMatrix_type; + + //! type of Vector for the linear solver + typedef Vector<double, PETSC_BASE> Vector_type; + + typedef petsc_solver<double> solver_type; +}; + +struct equations3d3 { + //! dimensionaly of the equation ( 3D problem ...) + static const unsigned int dims = 3; + //! number of fields in the system + static const unsigned int nvar = 3; + + //! boundary at X and Y + static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC,NON_PERIODIC}; + + //! type of space float, double, .. + typedef double stype; + + //! type of base particles + typedef vector_dist<dims, double, aggregate<double>> b_part; + + //! type of SparseMatrix for the linear solver + typedef SparseMatrix<double, int, PETSC_BASE> SparseMatrix_type; + + //! type of Vector for the linear solver + typedef Vector<double, PETSC_BASE> Vector_type; + + typedef petsc_solver<double> solver_type; +}; + +struct equations3d1 { + //! dimensionaly of the equation ( 3D problem ...) + static const unsigned int dims = 3; + //! number of fields in the system + static const unsigned int nvar = 1; + + //! boundary at X and Y + static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC,NON_PERIODIC}; + + //! type of space float, double, ... + typedef double stype; + + //! type of base particles + typedef vector_dist<dims, double, aggregate<double>> b_part; + + //! type of SparseMatrix for the linear solver + typedef SparseMatrix<double, int, PETSC_BASE> SparseMatrix_type; + + //! type of Vector for the linear solver + typedef Vector<double, PETSC_BASE> Vector_type; + + typedef petsc_solver<double> solver_type; +}; + +struct equations3d3Pz { + //! dimensionaly of the equation ( 3D problem ...) + static const unsigned int dims = 3; + //! number of fields in the system + static const unsigned int nvar = 3; + + //! boundary at X and Y + static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC,PERIODIC}; + + //! type of space float, double, .. + typedef double stype; + + //! type of base particles + typedef vector_dist<dims, double, aggregate<double>> b_part; + + //! type of SparseMatrix for the linear solver + typedef SparseMatrix<double, int, PETSC_BASE> SparseMatrix_type; + + //! type of Vector for the linear solver + typedef Vector<double, PETSC_BASE> Vector_type; + + typedef petsc_solver<double> solver_type; +}; + +struct equations3d3Pyz { + //! dimensionaly of the equation ( 3D problem ...) + static const unsigned int dims = 3; + //! number of fields in the system + static const unsigned int nvar = 3; + + //! boundary at X and Y + static constexpr bool boundary[]={NON_PERIODIC, PERIODIC,PERIODIC}; + + //! type of space float, double, .. + typedef double stype; + + //! type of base particles + typedef vector_dist<dims, double, aggregate<double>> b_part; + + //! type of SparseMatrix for the linear solver + typedef SparseMatrix<double, int, PETSC_BASE> SparseMatrix_type; + + //! type of Vector for the linear solver + typedef Vector<double, PETSC_BASE> Vector_type; + + typedef petsc_solver<double> solver_type; +}; + +struct equations3d3Pxz { + //! dimensionaly of the equation ( 3D problem ...) + static const unsigned int dims = 3; + //! number of fields in the system + static const unsigned int nvar = 3; + + //! boundary at X and Y + static constexpr bool boundary[]={PERIODIC, NON_PERIODIC,PERIODIC}; + + //! type of space float, double, .. + typedef double stype; + + //! type of base particles + typedef vector_dist<dims, double, aggregate<double>> b_part; + + //! type of SparseMatrix for the linear solver + typedef SparseMatrix<double, int, PETSC_BASE> SparseMatrix_type; + + //! type of Vector for the linear solver + typedef Vector<double, PETSC_BASE> Vector_type; + + typedef petsc_solver<double> solver_type; +}; + +struct equations3d1Pz { + //! dimensionaly of the equation ( 3D problem ...) + static const unsigned int dims = 3; + //! number of fields in the system + static const unsigned int nvar = 1; + + //! boundary at X and Y + static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC,PERIODIC}; + + //! type of space float, double, ... + typedef double stype; + + //! type of base particles + typedef vector_dist<dims, double, aggregate<double>> b_part; + + //! type of SparseMatrix for the linear solver + typedef SparseMatrix<double, int, PETSC_BASE> SparseMatrix_type; + + //! type of Vector for the linear solver + typedef Vector<double, PETSC_BASE> Vector_type; + + typedef petsc_solver<double> solver_type; +}; + + +#ifdef __NVCC__ struct equations2d1_gpu { //! dimensionaly of the equation ( 3D problem ...) static const unsigned int dims=2; //! number of fields in the system - static const unsigned int nvar=1; + static const unsigned int nvar=1; + + //! boundary at X and Y + static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC}; + + //! type of space float, double, ... + typedef double stype; + + //! type of base particles + typedef vector_dist_gpu<dims, double, aggregate<double>> b_part; + + //! type of SparseMatrix for the linear solver + typedef SparseMatrix<double, int, PETSC_BASE> SparseMatrix_type; + + //! type of Vector for the linear solver + typedef Vector<double, PETSC_BASE> Vector_type; + + typedef petsc_solver<double> solver_type; +}; + +struct equations2d2_gpu { + //! dimensionaly of the equation ( 3D problem ...) + static const unsigned int dims = 2; + //! number of fields in the system + static const unsigned int nvar = 2; + + //! boundary at X and Y + static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC}; + + //! type of space float, double, ... + typedef double stype; + + //! type of base particles + typedef vector_dist_gpu<dims, double, aggregate<double>> b_part; + + //! type of SparseMatrix for the linear solver + typedef SparseMatrix<double, int, PETSC_BASE> SparseMatrix_type; + + //! type of Vector for the linear solver + typedef Vector<double, PETSC_BASE> Vector_type; + + typedef petsc_solver<double> solver_type; +}; + +struct equations2d1p_gpu { + //! dimensionaly of the equation ( 3D problem ...) + static const unsigned int dims = 2; + //! number of fields in the system + static const unsigned int nvar = 1; + + //! boundary at X and Y + static constexpr bool boundary[]={PERIODIC, PERIODIC}; + + //! type of space float, double, ... + typedef double stype; + + //! type of base particles + typedef vector_dist_gpu<dims, double, aggregate<double>> b_part; + + //! type of SparseMatrix for the linear solver + typedef SparseMatrix<double, int, PETSC_BASE> SparseMatrix_type; + + //! type of Vector for the linear solver + typedef Vector<double, PETSC_BASE> Vector_type; + + typedef petsc_solver<double> solver_type; +}; + +struct equations2d2p_gpu { + //! dimensionaly of the equation ( 3D problem ...) + static const unsigned int dims = 2; + //! number of fields in the system + static const unsigned int nvar = 2; + + //! boundary at X and Y + static constexpr bool boundary[]={PERIODIC, PERIODIC}; + + //! type of space float, double, ... + typedef double stype; + + //! type of base particles + typedef vector_dist_gpu<dims, double, aggregate<double>> b_part; + + //! type of SparseMatrix for the linear solver + typedef SparseMatrix<double, int, PETSC_BASE> SparseMatrix_type; + + //! type of Vector for the linear solver + typedef Vector<double, PETSC_BASE> Vector_type; + + typedef petsc_solver<double> solver_type; +}; + +struct equations2d3p_gpu { + //! dimensionaly of the equation ( 3D problem ...) + static const unsigned int dims = 2; + //! number of fields in the system + static const unsigned int nvar = 3; + + //! boundary at X and Y + static constexpr bool boundary[]={PERIODIC, PERIODIC}; + + //! type of space float, double, ... + typedef double stype; + + //! type of base particles + typedef vector_dist_gpu<dims, double, aggregate<double>> b_part; + + //! type of SparseMatrix for the linear solver + typedef SparseMatrix<double, int, PETSC_BASE> SparseMatrix_type; + + //! type of Vector for the linear solver + typedef Vector<double, PETSC_BASE> Vector_type; + + typedef petsc_solver<double> solver_type; +}; + +struct equations2d3_gpu { + //! dimensionaly of the equation ( 3D problem ...) + static const unsigned int dims = 2; + //! number of fields in the system + static const unsigned int nvar = 3; + + //! boundary at X and Y + static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC}; + + //! type of space float, double, ... + typedef double stype; + + //! type of base particles + typedef vector_dist_gpu<dims, double, aggregate<double>> b_part; + + //! type of SparseMatrix for the linear solver + typedef SparseMatrix<double, int, PETSC_BASE> SparseMatrix_type; + + //! type of Vector for the linear solver + typedef Vector<double, PETSC_BASE> Vector_type; + + typedef petsc_solver<double> solver_type; +}; + +struct equations2d4_gpu { + //! dimensionaly of the equation ( 3D problem ...) + static const unsigned int dims = 2; + //! number of fields in the system + static const unsigned int nvar = 4; + + //! boundary at X and Y + static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC}; + + //! type of space float, double, ... + typedef double stype; + + //! type of base particles + typedef vector_dist_gpu<dims, double, aggregate<double>> b_part; + + //! type of SparseMatrix for the linear solver + typedef SparseMatrix<double, int, PETSC_BASE> SparseMatrix_type; + + //! type of Vector for the linear solver + typedef Vector<double, PETSC_BASE> Vector_type; + + typedef petsc_solver<double> solver_type; +}; + +struct equations3d3_gpu { + //! dimensionaly of the equation ( 3D problem ...) + static const unsigned int dims = 3; + //! number of fields in the system + static const unsigned int nvar = 3; + + //! boundary at X and Y + static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC,NON_PERIODIC}; + + //! type of space float, double, .. + typedef double stype; + + //! type of base particles + typedef vector_dist_gpu<dims, double, aggregate<double>> b_part; + + //! type of SparseMatrix for the linear solver + typedef SparseMatrix<double, int, PETSC_BASE> SparseMatrix_type; + + //! type of Vector for the linear solver + typedef Vector<double, PETSC_BASE> Vector_type; + + typedef petsc_solver<double> solver_type; +}; + +struct equations3d1_gpu { + //! dimensionaly of the equation ( 3D problem ...) + static const unsigned int dims = 3; + //! number of fields in the system + static const unsigned int nvar = 1; + + //! boundary at X and Y + static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC,NON_PERIODIC}; + + //! type of space float, double, ... + typedef double stype; + + //! type of base particles + typedef vector_dist_gpu<dims, double, aggregate<double>> b_part; + + //! type of SparseMatrix for the linear solver + typedef SparseMatrix<double, int, PETSC_BASE> SparseMatrix_type; + + //! type of Vector for the linear solver + typedef Vector<double, PETSC_BASE> Vector_type; + + typedef petsc_solver<double> solver_type; +}; + +struct equations3d3Pz_gpu { + //! dimensionaly of the equation ( 3D problem ...) + static const unsigned int dims = 3; + //! number of fields in the system + static const unsigned int nvar = 3; //! boundary at X and Y - static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC}; + static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC,PERIODIC}; - //! type of space float, double, ... + //! type of space float, double, .. typedef double stype; //! type of base particles @@ -62,20 +568,20 @@ struct equations2d1_gpu { typedef petsc_solver<double> solver_type; }; -struct equations2d2 { +struct equations3d3Pyz_gpu { //! dimensionaly of the equation ( 3D problem ...) - static const unsigned int dims = 2; + static const unsigned int dims = 3; //! number of fields in the system - static const unsigned int nvar = 2; + static const unsigned int nvar = 3; //! boundary at X and Y - static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC}; + static constexpr bool boundary[]={NON_PERIODIC, PERIODIC,PERIODIC}; - //! type of space float, double, ... + //! type of space float, double, .. typedef double stype; //! type of base particles - typedef vector_dist<dims, double, aggregate<double>> b_part; + typedef vector_dist_gpu<dims, double, aggregate<double>> b_part; //! type of SparseMatrix for the linear solver typedef SparseMatrix<double, int, PETSC_BASE> SparseMatrix_type; @@ -86,20 +592,20 @@ struct equations2d2 { typedef petsc_solver<double> solver_type; }; -struct equations2d2_gpu { +struct equations3d3Pxz_gpu { //! dimensionaly of the equation ( 3D problem ...) - static const unsigned int dims = 2; + static const unsigned int dims = 3; //! number of fields in the system - static const unsigned int nvar = 2; + static const unsigned int nvar = 3; //! boundary at X and Y - static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC}; + static constexpr bool boundary[]={PERIODIC, NON_PERIODIC,PERIODIC}; - //! type of space float, double, ... + //! type of space float, double, .. typedef double stype; //! type of base particles - typedef vector_dist_gpu<dims, double, aggregate<double>> b_part; + typedef vector_dist<dims, double, aggregate<double>> b_part; //! type of SparseMatrix for the linear solver typedef SparseMatrix<double, int, PETSC_BASE> SparseMatrix_type; @@ -110,14 +616,14 @@ struct equations2d2_gpu { typedef petsc_solver<double> solver_type; }; -struct equations2d1p { +struct equations3d1Pz_gpu { //! dimensionaly of the equation ( 3D problem ...) - static const unsigned int dims = 2; + static const unsigned int dims = 3; //! number of fields in the system static const unsigned int nvar = 1; //! boundary at X and Y - static constexpr bool boundary[]={PERIODIC, PERIODIC}; + static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC,PERIODIC}; //! type of space float, double, ... typedef double stype; @@ -133,39 +639,45 @@ struct equations2d1p { typedef petsc_solver<double> solver_type; }; +#endif //__NVCC__ + +#endif //HAVE_PETSC + + +//! Specify the general characteristic of system to solve +struct equations2d1E { -struct equations2d1p_gpu { //! dimensionaly of the equation ( 3D problem ...) - static const unsigned int dims = 2; + static const unsigned int dims=2; //! number of fields in the system - static const unsigned int nvar = 1; + static const unsigned int nvar=1; //! boundary at X and Y - static constexpr bool boundary[]={PERIODIC, PERIODIC}; + static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC}; //! type of space float, double, ... typedef double stype; //! type of base particles - typedef vector_dist_gpu<dims, double, aggregate<double>> b_part; + typedef vector_dist<dims, double, aggregate<double>> b_part; //! type of SparseMatrix for the linear solver - typedef SparseMatrix<double, int, PETSC_BASE> SparseMatrix_type; + typedef SparseMatrix<double, int, EIGEN_BASE> SparseMatrix_type; //! type of Vector for the linear solver - typedef Vector<double, PETSC_BASE> Vector_type; + typedef Vector<double> Vector_type; - typedef petsc_solver<double> solver_type; + typedef umfpack_solver<double> solver_type; }; -struct equations2d2p { +struct equations2d2E { //! dimensionaly of the equation ( 3D problem ...) static const unsigned int dims = 2; //! number of fields in the system static const unsigned int nvar = 2; //! boundary at X and Y - static constexpr bool boundary[]={PERIODIC, PERIODIC}; + static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC}; //! type of space float, double, ... typedef double stype; @@ -174,23 +686,22 @@ struct equations2d2p { typedef vector_dist<dims, double, aggregate<double>> b_part; //! type of SparseMatrix for the linear solver - typedef SparseMatrix<double, int, PETSC_BASE> SparseMatrix_type; + typedef SparseMatrix<double, int, EIGEN_BASE> SparseMatrix_type; //! type of Vector for the linear solver - typedef Vector<double, PETSC_BASE> Vector_type; + typedef Vector<double> Vector_type; - typedef petsc_solver<double> solver_type; + typedef umfpack_solver<double> solver_type; }; - -struct equations2d3p { +struct equations2d3E { //! dimensionaly of the equation ( 3D problem ...) static const unsigned int dims = 2; //! number of fields in the system static const unsigned int nvar = 3; //! boundary at X and Y - static constexpr bool boundary[]={PERIODIC, PERIODIC}; + static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC}; //! type of space float, double, ... typedef double stype; @@ -199,19 +710,19 @@ struct equations2d3p { typedef vector_dist<dims, double, aggregate<double>> b_part; //! type of SparseMatrix for the linear solver - typedef SparseMatrix<double, int, PETSC_BASE> SparseMatrix_type; + typedef SparseMatrix<double, int, EIGEN_BASE> SparseMatrix_type; //! type of Vector for the linear solver - typedef Vector<double, PETSC_BASE> Vector_type; + typedef Vector<double> Vector_type; - typedef petsc_solver<double> solver_type; + typedef umfpack_solver<double> solver_type; }; -struct equations2d3 { +struct equations2d4E { //! dimensionaly of the equation ( 3D problem ...) static const unsigned int dims = 2; //! number of fields in the system - static const unsigned int nvar = 3; + static const unsigned int nvar = 4; //! boundary at X and Y static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC}; @@ -223,22 +734,23 @@ struct equations2d3 { typedef vector_dist<dims, double, aggregate<double>> b_part; //! type of SparseMatrix for the linear solver - typedef SparseMatrix<double, int, PETSC_BASE> SparseMatrix_type; + typedef SparseMatrix<double, int, EIGEN_BASE> SparseMatrix_type; //! type of Vector for the linear solver - typedef Vector<double, PETSC_BASE> Vector_type; + typedef Vector<double> Vector_type; - typedef petsc_solver<double> solver_type; + typedef umfpack_solver<double> solver_type; }; -struct equations2d4 { + +struct equations2d1pE { //! dimensionaly of the equation ( 3D problem ...) static const unsigned int dims = 2; //! number of fields in the system - static const unsigned int nvar = 4; + static const unsigned int nvar = 1; //! boundary at X and Y - static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC}; + static constexpr bool boundary[]={PERIODIC, PERIODIC}; //! type of space float, double, ... typedef double stype; @@ -247,47 +759,46 @@ struct equations2d4 { typedef vector_dist<dims, double, aggregate<double>> b_part; //! type of SparseMatrix for the linear solver - typedef SparseMatrix<double, int, PETSC_BASE> SparseMatrix_type; + typedef SparseMatrix<double, int, EIGEN_BASE> SparseMatrix_type; //! type of Vector for the linear solver - typedef Vector<double, PETSC_BASE> Vector_type; + typedef Vector<double> Vector_type; - typedef petsc_solver<double> solver_type; + typedef umfpack_solver<double> solver_type; }; - -struct equations3d3 { +struct equations2d2pE { //! dimensionaly of the equation ( 3D problem ...) - static const unsigned int dims = 3; + static const unsigned int dims = 2; //! number of fields in the system - static const unsigned int nvar = 3; + static const unsigned int nvar = 2; //! boundary at X and Y - static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC,NON_PERIODIC}; + static constexpr bool boundary[]={PERIODIC, PERIODIC}; - //! type of space float, double, .. + //! type of space float, double, ... typedef double stype; //! type of base particles typedef vector_dist<dims, double, aggregate<double>> b_part; //! type of SparseMatrix for the linear solver - typedef SparseMatrix<double, int, PETSC_BASE> SparseMatrix_type; + typedef SparseMatrix<double, int, EIGEN_BASE> SparseMatrix_type; //! type of Vector for the linear solver - typedef Vector<double, PETSC_BASE> Vector_type; + typedef Vector<double> Vector_type; - typedef petsc_solver<double> solver_type; + typedef umfpack_solver<double> solver_type; }; -struct equations3d1 { +struct equations2d3pE { //! dimensionaly of the equation ( 3D problem ...) - static const unsigned int dims = 3; + static const unsigned int dims = 2; //! number of fields in the system - static const unsigned int nvar = 1; + static const unsigned int nvar = 3; //! boundary at X and Y - static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC,NON_PERIODIC}; + static constexpr bool boundary[]={PERIODIC, PERIODIC}; //! type of space float, double, ... typedef double stype; @@ -296,22 +807,22 @@ struct equations3d1 { typedef vector_dist<dims, double, aggregate<double>> b_part; //! type of SparseMatrix for the linear solver - typedef SparseMatrix<double, int, PETSC_BASE> SparseMatrix_type; + typedef SparseMatrix<double, int, EIGEN_BASE> SparseMatrix_type; //! type of Vector for the linear solver - typedef Vector<double, PETSC_BASE> Vector_type; + typedef Vector<double> Vector_type; - typedef petsc_solver<double> solver_type; + typedef umfpack_solver<double> solver_type; }; -struct equations3d3Pz { +struct equations3d3E { //! dimensionaly of the equation ( 3D problem ...) static const unsigned int dims = 3; //! number of fields in the system static const unsigned int nvar = 3; //! boundary at X and Y - static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC,PERIODIC}; + static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC,NON_PERIODIC}; //! type of space float, double, .. typedef double stype; @@ -320,38 +831,39 @@ struct equations3d3Pz { typedef vector_dist<dims, double, aggregate<double>> b_part; //! type of SparseMatrix for the linear solver - typedef SparseMatrix<double, int, PETSC_BASE> SparseMatrix_type; + typedef SparseMatrix<double, int, EIGEN_BASE> SparseMatrix_type; //! type of Vector for the linear solver - typedef Vector<double, PETSC_BASE> Vector_type; + typedef Vector<double> Vector_type; - typedef petsc_solver<double> solver_type; + typedef umfpack_solver<double> solver_type; }; -struct equations3d3Pyz { +struct equations3d1E { //! dimensionaly of the equation ( 3D problem ...) static const unsigned int dims = 3; //! number of fields in the system - static const unsigned int nvar = 3; + static const unsigned int nvar = 1; //! boundary at X and Y - static constexpr bool boundary[]={NON_PERIODIC, PERIODIC,PERIODIC}; + static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC,NON_PERIODIC}; - //! type of space float, double, .. + //! type of space float, double, ... typedef double stype; //! type of base particles typedef vector_dist<dims, double, aggregate<double>> b_part; //! type of SparseMatrix for the linear solver - typedef SparseMatrix<double, int, PETSC_BASE> SparseMatrix_type; + typedef SparseMatrix<double, int, EIGEN_BASE> SparseMatrix_type; //! type of Vector for the linear solver - typedef Vector<double, PETSC_BASE> Vector_type; + typedef Vector<double> Vector_type; - typedef petsc_solver<double> solver_type; + typedef umfpack_solver<double> solver_type; }; + struct equations3d3EPxz { //! dimensionaly of the equation ( 3D problem ...) static const unsigned int dims = 3; @@ -400,14 +912,14 @@ struct equations3d3EPz { typedef umfpack_solver<double> solver_type; }; -struct equations3d3Pxz { +struct equations3d1Pxy { //! dimensionaly of the equation ( 3D problem ...) static const unsigned int dims = 3; //! number of fields in the system - static const unsigned int nvar = 3; + static const unsigned int nvar = 1; //! boundary at X and Y - static constexpr bool boundary[]={PERIODIC, NON_PERIODIC,PERIODIC}; + static constexpr bool boundary[]={PERIODIC, PERIODIC,NON_PERIODIC}; //! type of space float, double, .. typedef double stype; @@ -424,14 +936,14 @@ struct equations3d3Pxz { typedef petsc_solver<double> solver_type; }; -struct equations3d1Pz { +struct equations3d1EPxy { //! dimensionaly of the equation ( 3D problem ...) static const unsigned int dims = 3; //! number of fields in the system static const unsigned int nvar = 1; //! boundary at X and Y - static constexpr bool boundary[]={NON_PERIODIC, NON_PERIODIC,PERIODIC}; + static constexpr bool boundary[]={PERIODIC, PERIODIC,NON_PERIODIC}; //! type of space float, double, ... typedef double stype; @@ -440,18 +952,17 @@ struct equations3d1Pz { typedef vector_dist<dims, double, aggregate<double>> b_part; //! type of SparseMatrix for the linear solver - typedef SparseMatrix<double, int, PETSC_BASE> SparseMatrix_type; + typedef SparseMatrix<double, int, EIGEN_BASE> SparseMatrix_type; //! type of Vector for the linear solver - typedef Vector<double, PETSC_BASE> Vector_type; + typedef Vector<double> Vector_type; - typedef petsc_solver<double> solver_type; + typedef umfpack_solver<double> solver_type; }; -#endif -//! Specify the general characteristic of system to solve -struct equations2d1E { +#ifdef __NVCC__ +struct equations2d1E_gpu { //! dimensionaly of the equation ( 3D problem ...) static const unsigned int dims=2; @@ -465,7 +976,7 @@ struct equations2d1E { typedef double stype; //! type of base particles - typedef vector_dist<dims, double, aggregate<double>> b_part; + typedef vector_dist_gpu<dims, double, aggregate<double>> b_part; //! type of SparseMatrix for the linear solver typedef SparseMatrix<double, int, EIGEN_BASE> SparseMatrix_type; @@ -476,7 +987,7 @@ struct equations2d1E { typedef umfpack_solver<double> solver_type; }; -struct equations2d2E { +struct equations2d2E_gpu { //! dimensionaly of the equation ( 3D problem ...) static const unsigned int dims = 2; //! number of fields in the system @@ -489,7 +1000,7 @@ struct equations2d2E { typedef double stype; //! type of base particles - typedef vector_dist<dims, double, aggregate<double>> b_part; + typedef vector_dist_gpu<dims, double, aggregate<double>> b_part; //! type of SparseMatrix for the linear solver typedef SparseMatrix<double, int, EIGEN_BASE> SparseMatrix_type; @@ -500,7 +1011,7 @@ struct equations2d2E { typedef umfpack_solver<double> solver_type; }; -struct equations2d3E { +struct equations2d3E_gpu { //! dimensionaly of the equation ( 3D problem ...) static const unsigned int dims = 2; //! number of fields in the system @@ -513,7 +1024,7 @@ struct equations2d3E { typedef double stype; //! type of base particles - typedef vector_dist<dims, double, aggregate<double>> b_part; + typedef vector_dist_gpu<dims, double, aggregate<double>> b_part; //! type of SparseMatrix for the linear solver typedef SparseMatrix<double, int, EIGEN_BASE> SparseMatrix_type; @@ -524,7 +1035,7 @@ struct equations2d3E { typedef umfpack_solver<double> solver_type; }; -struct equations2d4E { +struct equations2d4E_gpu { //! dimensionaly of the equation ( 3D problem ...) static const unsigned int dims = 2; //! number of fields in the system @@ -537,7 +1048,7 @@ struct equations2d4E { typedef double stype; //! type of base particles - typedef vector_dist<dims, double, aggregate<double>> b_part; + typedef vector_dist_gpu<dims, double, aggregate<double>> b_part; //! type of SparseMatrix for the linear solver typedef SparseMatrix<double, int, EIGEN_BASE> SparseMatrix_type; @@ -549,7 +1060,7 @@ struct equations2d4E { }; -struct equations2d1pE { +struct equations2d1pE_gpu { //! dimensionaly of the equation ( 3D problem ...) static const unsigned int dims = 2; //! number of fields in the system @@ -562,7 +1073,7 @@ struct equations2d1pE { typedef double stype; //! type of base particles - typedef vector_dist<dims, double, aggregate<double>> b_part; + typedef vector_dist_gpu<dims, double, aggregate<double>> b_part; //! type of SparseMatrix for the linear solver typedef SparseMatrix<double, int, EIGEN_BASE> SparseMatrix_type; @@ -573,7 +1084,7 @@ struct equations2d1pE { typedef umfpack_solver<double> solver_type; }; -struct equations2d2pE { +struct equations2d2pE_gpu { //! dimensionaly of the equation ( 3D problem ...) static const unsigned int dims = 2; //! number of fields in the system @@ -586,7 +1097,7 @@ struct equations2d2pE { typedef double stype; //! type of base particles - typedef vector_dist<dims, double, aggregate<double>> b_part; + typedef vector_dist_gpu<dims, double, aggregate<double>> b_part; //! type of SparseMatrix for the linear solver typedef SparseMatrix<double, int, EIGEN_BASE> SparseMatrix_type; @@ -597,7 +1108,7 @@ struct equations2d2pE { typedef umfpack_solver<double> solver_type; }; -struct equations2d3pE { +struct equations2d3pE_gpu { //! dimensionaly of the equation ( 3D problem ...) static const unsigned int dims = 2; //! number of fields in the system @@ -610,7 +1121,7 @@ struct equations2d3pE { typedef double stype; //! type of base particles - typedef vector_dist<dims, double, aggregate<double>> b_part; + typedef vector_dist_gpu<dims, double, aggregate<double>> b_part; //! type of SparseMatrix for the linear solver typedef SparseMatrix<double, int, EIGEN_BASE> SparseMatrix_type; @@ -621,7 +1132,7 @@ struct equations2d3pE { typedef umfpack_solver<double> solver_type; }; -struct equations3d3E { +struct equations3d3E_gpu { //! dimensionaly of the equation ( 3D problem ...) static const unsigned int dims = 3; //! number of fields in the system @@ -634,7 +1145,7 @@ struct equations3d3E { typedef double stype; //! type of base particles - typedef vector_dist<dims, double, aggregate<double>> b_part; + typedef vector_dist_gpu<dims, double, aggregate<double>> b_part; //! type of SparseMatrix for the linear solver typedef SparseMatrix<double, int, EIGEN_BASE> SparseMatrix_type; @@ -645,7 +1156,7 @@ struct equations3d3E { typedef umfpack_solver<double> solver_type; }; -struct equations3d1E { +struct equations3d1E_gpu { //! dimensionaly of the equation ( 3D problem ...) static const unsigned int dims = 3; //! number of fields in the system @@ -658,7 +1169,7 @@ struct equations3d1E { typedef double stype; //! type of base particles - typedef vector_dist<dims, double, aggregate<double>> b_part; + typedef vector_dist_gpu<dims, double, aggregate<double>> b_part; //! type of SparseMatrix for the linear solver typedef SparseMatrix<double, int, EIGEN_BASE> SparseMatrix_type; @@ -669,7 +1180,55 @@ struct equations3d1E { typedef umfpack_solver<double> solver_type; }; -struct equations3d1Pxy { +struct equations3d3EPxz_gpu { + //! dimensionaly of the equation ( 3D problem ...) + static const unsigned int dims = 3; + //! number of fields in the system + static const unsigned int nvar = 3; + + //! boundary at X and Y + static constexpr bool boundary[]={PERIODIC, NON_PERIODIC,PERIODIC}; + + //! type of space float, double, ... + typedef double stype; + + //! type of base particles + typedef vector_dist_gpu<dims, double, aggregate<double>> b_part; + + //! type of SparseMatrix for the linear solver + typedef SparseMatrix<double, int, EIGEN_BASE> SparseMatrix_type; + + //! type of Vector for the linear solver + typedef Vector<double> Vector_type; + + typedef umfpack_solver<double> solver_type; +}; + +struct equations3d3EPz_gpu { + //! dimensionaly of the equation ( 3D problem ...) + static const unsigned int dims = 3; + //! number of fields in the system + static const unsigned int nvar = 3; + + //! boundary at X and Y + static constexpr bool boundary[]={PERIODIC, PERIODIC,PERIODIC}; + + //! type of space float, double, ... + typedef double stype; + + //! type of base particles + typedef vector_dist_gpu<dims, double, aggregate<double>> b_part; + + //! type of SparseMatrix for the linear solver + typedef SparseMatrix<double, int, EIGEN_BASE> SparseMatrix_type; + + //! type of Vector for the linear solver + typedef Vector<double> Vector_type; + + typedef umfpack_solver<double> solver_type; +}; + +struct equations3d1Pxy_gpu { //! dimensionaly of the equation ( 3D problem ...) static const unsigned int dims = 3; //! number of fields in the system @@ -682,18 +1241,18 @@ struct equations3d1Pxy { typedef double stype; //! type of base particles - typedef vector_dist<dims, double, aggregate<double>> b_part; + typedef vector_dist_gpu<dims, double, aggregate<double>> b_part; //! type of SparseMatrix for the linear solver - typedef SparseMatrix<double, int, PETSC_BASE> SparseMatrix_type; + typedef SparseMatrix<double, int, EIGEN_BASE> SparseMatrix_type; //! type of Vector for the linear solver - typedef Vector<double, PETSC_BASE> Vector_type; + typedef Vector<double> Vector_type; - typedef petsc_solver<double> solver_type; + typedef umfpack_solver<double> solver_type; }; -struct equations3d1EPxy { +struct equations3d1EPxy_gpu { //! dimensionaly of the equation ( 3D problem ...) static const unsigned int dims = 3; //! number of fields in the system @@ -706,7 +1265,7 @@ struct equations3d1EPxy { typedef double stype; //! type of base particles - typedef vector_dist<dims, double, aggregate<double>> b_part; + typedef vector_dist_gpu<dims, double, aggregate<double>> b_part; //! type of SparseMatrix for the linear solver typedef SparseMatrix<double, int, EIGEN_BASE> SparseMatrix_type; @@ -716,4 +1275,6 @@ struct equations3d1EPxy { typedef umfpack_solver<double> solver_type; }; +#endif //__NVCC__ + #endif //OPENFPM_PDATA_EQNSSTRUCT_HPP diff --git a/src/DCPSE/DCPSE_op/tests/DCPSE_op_subset_test.cu b/src/DCPSE/DCPSE_op/tests/DCPSE_op_subset_test.cu new file mode 100644 index 0000000000000000000000000000000000000000..7903f53e6fe798d3f446cc066c24ecc1aa68992b --- /dev/null +++ b/src/DCPSE/DCPSE_op/tests/DCPSE_op_subset_test.cu @@ -0,0 +1,617 @@ +/* + * DCPSE_op_test.cpp + * + * Created on: May 15, 2020 + * Author: Abhinav Singh + * + */ +#include "config.h" +#ifdef HAVE_EIGEN +#ifdef HAVE_PETSC + + +#define BOOST_TEST_DYN_LINK + +#include "util/util_debug.hpp" +#include <boost/test/unit_test.hpp> +#include <iostream> +#include "../DCPSE_op.hpp" +#include "../DCPSE_Solver.hpp" +#include "../DCPSE_Solver.cuh" +#include "Operators/Vector/vector_dist_operators.hpp" +#include "Vector/vector_dist_subset.hpp" +#include "../EqnsStruct.hpp" + +//template<typename T> +//struct Debug; + + + + +BOOST_AUTO_TEST_SUITE(dcpse_op_subset_suite_tests_cu) + + BOOST_AUTO_TEST_CASE(dcpse_op_subset_tests) { + size_t edgeSemiSize = 40; + const size_t sz[2] = {2 * edgeSemiSize, 2 * edgeSemiSize}; + Box<2, double> box({0, 0}, {1.0,1.0}); + size_t bc[2] = {NON_PERIODIC, NON_PERIODIC}; + double spacing[2]; + spacing[0] = 1.0 / (sz[0] - 1); + spacing[1] = 1.0 / (sz[1] - 1); + double rCut = 3.9 * spacing[0]; + int ord = 2; + double sampling_factor = 4.0; + Ghost<2, double> ghost(rCut); + BOOST_TEST_MESSAGE("Init vector_dist..."); + double sigma2 = spacing[0] * spacing[1] / (2 * 4); + + vector_dist_ws_gpu<2, double, aggregate<double, double, double, VectorS<2, double>, VectorS<2, double>,VectorS<2, double>,double>> Particles(0, box, + bc, + ghost); + + //Init_DCPSE(Particles) + BOOST_TEST_MESSAGE("Init Particles..."); + std::mt19937 rng{6666666}; + + std::normal_distribution<> gaussian{0, sigma2}; + +// openfpm::vector<aggregate<int>> bulk; +// openfpm::vector<aggregate<int>> boundary; + + auto it = Particles.getGridIterator(sz); + size_t pointId = 0; + size_t counter = 0; + double minNormOne = 999; + while (it.isNext()) + { + Particles.add(); + auto key = it.get(); + mem_id k0 = key.get(0); + double x = k0 * spacing[0]; + Particles.getLastPos()[0] = x;//+ gaussian(rng); + mem_id k1 = key.get(1); + double y = k1 * spacing[1]; + Particles.getLastPos()[1] = y;//+gaussian(rng); + // Here fill the function value + Particles.template getLastProp<0>() = sin(Particles.getLastPos()[0]) + sin(Particles.getLastPos()[1]); + + if (k0 != 0 && k1 != 0 && k0 != sz[0] -1 && k1 != sz[1] - 1) + { +// bulk.add(); +// bulk.template get<0>(bulk.size()-1) = Particles.size_local() - 1; + + Particles.getLastSubset(0); + } + else + { +// boundary.add(); +// boundary.template get<0>(boundary.size()-1) = Particles.size_local() - 1; + Particles.getLastSubset(1); + } + + + ++counter; + ++it; + } + BOOST_TEST_MESSAGE("Sync Particles across processors..."); + + Particles.map(); + Particles.ghost_get<0>(); + + auto git = Particles.getGhostIterator(); + + while (git.isNext()) + { + auto p = git.get(); + + Particles.template getProp<0>(p) = std::numeric_limits<double>::quiet_NaN(); + + ++git; + } + + vector_dist_subset_gpu<2, double, aggregate<double, double, double, VectorS<2, double>, VectorS<2, double>,VectorS<2, double>,double>> Particles_bulk(Particles,0); + vector_dist_subset_gpu<2, double, aggregate<double, double, double, VectorS<2, double>, VectorS<2, double>,VectorS<2, double>,double>> Particles_boundary(Particles,1); + auto & boundary = Particles_boundary.getIds(); + + // move particles + auto P = getV<0>(Particles); + auto Out = getV<1>(Particles); + auto Pb = getV<2>(Particles); + auto Out_V = getV<3>(Particles); + + + auto P_bulk = getV<2>(Particles_bulk); + auto Out_bulk = getV<1>(Particles_bulk); + auto Out_V_bulk = getV<3>(Particles_bulk); + Out=10; + P_bulk = 5; + + P_bulk=Pb+Out; +// Particles.write("Test_output_subset"); + + // Create the subset + + /* Derivative_x Dx(Particles, 2, rCut); + Derivative_y Dy(Particles, 2, rCut); + Derivative_x Dx_bulk(Particles_bulk, 2, rCut); +*/ + Derivative_x_gpu Dx_bulk(Particles_bulk, 2, rCut,sampling_factor, support_options::RADIUS); + Derivative_y_gpu Dy_bulk(Particles_bulk, 2, rCut,sampling_factor, support_options::RADIUS); + + Out_bulk = Dx_bulk(P); + Out_V_bulk[0] = P + Dx_bulk(P); + Out_V_bulk[1] = Out_V[0] +Dy_bulk(P); + + // Check + bool is_nan = false; + + auto & v_cl = create_vcluster(); + if (v_cl.size() > 1) + { + auto it2 = Particles_bulk.getDomainIterator(); + while (it2.isNext()) + { + auto p = it2.get(); + + /* BOOST_REQUIRE_EQUAL(Particles_bulk.getProp<2>(p),15.0); + BOOST_REQUIRE(fabs(Particles_bulk.getProp<1>(p) - cos(Particles_bulk.getPos(p)[0])) < 0.005 ); + BOOST_REQUIRE(fabs(Particles_bulk.getProp<3>(p)[0] - Particles_bulk.getProp<0>(p) - cos(Particles_bulk.getPos(p)[0])) < 0.001 ); + BOOST_REQUIRE(fabs(Particles_bulk.getProp<3>(p)[1] - Particles_bulk.getProp<3>(p)[0] - cos(Particles_bulk.getPos(p)[1])) < 0.001 );*/ + + is_nan |= std::isnan(Particles_bulk.template getProp<1>(p)); + + // Particles_bulk.template getProp<0>(p) = fabs(Particles_bulk.getProp<1>(p) - cos(Particles_bulk.getPos(p)[0])); + + ++it2; + } + + BOOST_REQUIRE_EQUAL(is_nan,true); + } + +// P_bulk = Dx_bulk(P_bulk); <------------ Incorrect produce error message +// P = Dx_bulk(P); <------- Incorrect produce overflow + + Particles.ghost_get<0>(); + + for (int i = 0 ; i < boundary.size() ; i++) + { + Particles.template getProp<0>(boundary.template get<0>(i)) = std::numeric_limits<double>::quiet_NaN(); + } + + Particles.ghost_get<0>(); + + Out_bulk = Dx_bulk(P); + Out_V_bulk[0] = P + Dx_bulk(P); + Out_V_bulk[1] = Out_V[0] +Dy_bulk(P); + + auto it2 = Particles_bulk.getDomainIterator(); + while (it2.isNext()) + { + auto p = it2.get(); + + BOOST_REQUIRE_EQUAL(Particles_bulk.getProp<2>(p),15.0); + BOOST_REQUIRE(fabs(Particles_bulk.getProp<1>(p) - cos(Particles_bulk.getPos(p)[0])) < 0.005 ); + BOOST_REQUIRE(fabs(Particles_bulk.getProp<3>(p)[0] - Particles_bulk.getProp<0>(p) - cos(Particles_bulk.getPos(p)[0])) < 0.001 ); + BOOST_REQUIRE(fabs(Particles_bulk.getProp<3>(p)[1] - Particles_bulk.getProp<3>(p)[0] - cos(Particles_bulk.getPos(p)[1])) < 0.001 ); + + ++it2; + } + + + + } + + BOOST_AUTO_TEST_CASE(dcpse_op_subset_PC_lid) { +// int rank; +// MPI_Comm_rank(MPI_COMM_WORLD, &rank); + constexpr int x = 0; + constexpr int y = 1; + size_t edgeSemiSize = 20; + const size_t sz[2] = {2 * edgeSemiSize+1, 2 * edgeSemiSize+1}; + Box<2, double> box({0, 0}, {1.0,1.0}); + size_t bc[2] = {NON_PERIODIC, NON_PERIODIC}; + double spacing[2]; + spacing[0] = 1.0 / (sz[0] - 1); + spacing[1] = 1.0 / (sz[1] - 1); + double rCut = 3.9 * spacing[0]; + int ord = 2; + double sampling_factor = 4.0; + Ghost<2, double> ghost(rCut); + BOOST_TEST_MESSAGE("Init vector_dist..."); + double sigma2 = spacing[0] * spacing[1] / (2 * 4); + auto &v_cl = create_vcluster(); + + typedef aggregate<double, VectorS<2, double>, VectorS<2, double>,VectorS<2, double>,double,VectorS<2, double>,VectorS<2, double>,double> particle_type; + + vector_dist_ws_gpu<2, double, particle_type> Particles(0, box,bc,ghost); + + //Init_DCPSE(Particles) + BOOST_TEST_MESSAGE("Init Particles..."); + +// openfpm::vector<aggregate<int>> bulk; +// openfpm::vector<aggregate<int>> boundary; + + auto it = Particles.getGridIterator(sz); + while (it.isNext()) + { + Particles.add(); + auto key = it.get(); + mem_id k0 = key.get(0); + double xp0 = k0 * spacing[0]; + Particles.getLastPos()[0] = xp0; + mem_id k1 = key.get(1); + double yp0 = k1 * spacing[1]; + Particles.getLastPos()[1] = yp0; + ++it; + } + BOOST_TEST_MESSAGE("Sync Particles across processors..."); + Particles.map(); + Particles.ghost_get<0>(); + + auto it2 = Particles.getDomainIterator(); + while (it2.isNext()) { + auto p = it2.get(); + Point<2, double> xp = Particles.getPos(p); + if (xp[0] != 0 && xp[1] != 0 && xp[0] != 1.0 && xp[1] != 1.0) { +// bulk.add(); +// bulk.last().get<0>() = p.getKey(); + Particles.setSubset(p,0); + Particles.getProp<3>(p)[x] = 3.0; + Particles.getProp<3>(p)[y] = 3.0; + } else { +// boundary.add(); +// boundary.last().get<0>() = p.getKey(); + Particles.setSubset(p,1); + Particles.getProp<3>(p)[x] = xp[0]*xp[0]+xp[1]*xp[1]; + Particles.getProp<3>(p)[y] = xp[0]*xp[0]-2*xp[0]*xp[1]; + } + Particles.getProp<6>(p)[x] = xp[0]*xp[0]+xp[1]*xp[1]; + Particles.getProp<6>(p)[y] = xp[0]*xp[0]-2*xp[0]*xp[1]; + Particles.getProp<7>(p) = xp[0]+xp[1]-1.0; + + ++it2; + } + + vector_dist_subset_gpu<2, double, particle_type> Particles_bulk(Particles,0); + vector_dist_subset_gpu<2, double, particle_type> Particles_boundary(Particles,1); + auto & bulk = Particles_bulk.getIds(); + auto & boundary = Particles_boundary.getIds(); + + auto P = getV<0>(Particles); + auto V = getV<1>(Particles); + auto RHS = getV<2>(Particles); + auto dV = getV<3>(Particles); + auto div = getV<4>(Particles); + auto V_star = getV<5>(Particles); + + + auto P_bulk = getV<0>(Particles_bulk); + auto RHS_bulk =getV<2>(Particles_bulk); + + P_bulk = 0; + + Derivative_x_gpu Dx(Particles, 2, rCut,sampling_factor, support_options::RADIUS); + Derivative_xx_gpu Dxx(Particles, 2, rCut,sampling_factor, support_options::RADIUS); + Derivative_yy_gpu Dyy(Particles, 2, rCut,sampling_factor, support_options::RADIUS); + Derivative_y_gpu Dy(Particles, 2, rCut,sampling_factor, support_options::RADIUS); + Derivative_x_gpu Bulk_Dx(Particles_bulk, 2, rCut,sampling_factor, support_options::RADIUS); + Derivative_y_gpu Bulk_Dy(Particles_bulk, 2, rCut,sampling_factor, support_options::RADIUS); + + int n = 0, nmax = 5, ctr = 0, errctr=1, Vreset = 0; + double V_err=1; + if (Vreset == 1) { + P_bulk = 0; + P = 0; + Vreset = 0; + } + P=0; + eq_id vx,vy; + vx.setId(0); + vy.setId(1); + double sum, sum1, sum_k,V_err_eps=1e-3,V_err_old; + auto Stokes1=Dxx(V[x])+Dyy(V[x]); + auto Stokes2=Dxx(V[y])+Dyy(V[y]); + + petsc_solver<double> solverPetsc; + //solverPetsc.setSolver(KSPGMRES); + //solverPetsc.setRestart(250); + //solverPetsc.setPreconditioner(PCJACOBI); + V_star=0; + RHS[x] = dV[x]; + RHS[y] = dV[y]; + while (V_err >= V_err_eps && n <= nmax) { + Particles.ghost_get<0>(SKIP_LABELLING); + RHS_bulk[x] = dV[x] + Bulk_Dx(P); + RHS_bulk[y] = dV[y] + Bulk_Dy(P); + DCPSE_scheme_gpu<equations2d2_gpu, decltype(Particles)> Solver(Particles); + Solver.impose(Stokes1, bulk, RHS[0], vx); + Solver.impose(Stokes2, bulk, RHS[1], vy); + Solver.impose(V[x], boundary, RHS[0], vx); + Solver.impose(V[y], boundary, RHS[1], vy); + + /*auto A=Solver.getA(options_solver::STANDARD); + //A.getMatrixTriplets().save("Tripletes"); + A.write("Mat_lid");*/ + + Solver.solve_with_solver(solverPetsc, V[x], V[y]); + Particles.ghost_get<1>(SKIP_LABELLING); + div = -(Dx(V[x]) + Dy(V[y])); + P_bulk = P + div; + sum = 0; + sum1 = 0; + + for (int j = 0; j < bulk.size(); j++) { + auto p = bulk.get<0>(j); + sum += (Particles.getProp<5>(p)[0] - Particles.getProp<1>(p)[0]) * + (Particles.getProp<5>(p)[0] - Particles.getProp<1>(p)[0]) + + (Particles.getProp<5>(p)[1] - Particles.getProp<1>(p)[1]) * + (Particles.getProp<5>(p)[1] - Particles.getProp<1>(p)[1]); + sum1 += Particles.getProp<1>(p)[0] * Particles.getProp<1>(p)[0] + + Particles.getProp<1>(p)[1] * Particles.getProp<1>(p)[1]; + } + + sum = sqrt(sum); + sum1 = sqrt(sum1); + V_star = V; + v_cl.sum(sum); + v_cl.sum(sum1); + v_cl.execute(); + V_err_old = V_err; + V_err = sum / sum1; + if (V_err > V_err_old || abs(V_err_old - V_err) < 1e-8) { + errctr++; + //alpha_P -= 0.1; + } else { + errctr = 0; + } + if (n > 3) { + if (errctr > 3) { + std::cout << "CONVERGENCE LOOP BROKEN DUE TO INCREASE/VERY SLOW DECREASE IN ERROR" << std::endl; + Vreset = 1; + break; + } else { + Vreset = 0; + } + } + n++; + if (v_cl.rank() == 0) { + std::cout << "Rel l2 cgs err in V = " << V_err << " at " << n << std::endl; + } + } + double worst1 = 0.0; + double worst2 = 0.0; + + for(int j=0;j<bulk.size();j++) + { auto p=bulk.get<0>(j); + if (fabs(Particles.getProp<6>(p)[0] - Particles.getProp<1>(p)[0]) >= worst1) { + worst1 = fabs(Particles.getProp<6>(p)[0] - Particles.getProp<1>(p)[0]); + } + } + for(int j=0;j<bulk.size();j++) + { auto p=bulk.get<0>(j); + if (fabs(Particles.getProp<6>(p)[1] - Particles.getProp<1>(p)[1]) >= worst2) { + worst2 = fabs(Particles.getProp<6>(p)[1] - Particles.getProp<1>(p)[1]); + } + } + //Particles.deleteGhost(); + //Particles.write("PC_subset_lid"); + std::cout << "Maximum Analytic Error in Vx: " << worst1 << std::endl; + std::cout << "Maximum Analytic Error in Vy: " << worst2 << std::endl; + BOOST_REQUIRE(worst1 < 0.03); + BOOST_REQUIRE(worst2 < 0.03); + + } + + + BOOST_AUTO_TEST_CASE(dcpse_op_subset_PC_lid2) { +// int rank; +// MPI_Comm_rank(MPI_COMM_WORLD, &rank); + constexpr int x = 0; + constexpr int y = 1; + size_t edgeSemiSize = 20; + const size_t sz[2] = {2 * edgeSemiSize+1, 2 * edgeSemiSize+1}; + Box<2, double> box({0, 0}, {1.0,1.0}); + size_t bc[2] = {NON_PERIODIC, NON_PERIODIC}; + double spacing[2]; + spacing[0] = 1.0 / (sz[0] - 1); + spacing[1] = 1.0 / (sz[1] - 1); + double rCut = 3.9 * spacing[0]; + int ord = 2; + double sampling_factor = 4.0; + Ghost<2, double> ghost(rCut); + BOOST_TEST_MESSAGE("Init vector_dist..."); + auto &v_cl = create_vcluster(); + + vector_dist<2, double, aggregate<double, VectorS<2, double>, VectorS<2, double>,VectorS<2, double>,double,VectorS<2, double>,VectorS<2, double>,double>> Particles(0, box, + bc, + ghost); + vector_dist<2, double, aggregate<double, VectorS<2, double>, VectorS<2, double>,VectorS<2, double>,double,VectorS<2, double>,VectorS<2, double>,double>> Particles_subset(Particles.getDecomposition(), 0); + + + //Init_DCPSE(Particles) + BOOST_TEST_MESSAGE("Init Particles..."); + + openfpm::vector<aggregate<int>> bulk; + openfpm::vector<aggregate<int>> boundary; + + auto it = Particles.getGridIterator(sz); + size_t pointId = 0; + double minNormOne = 999; + while (it.isNext()) + { + Particles.add(); + auto key = it.get(); + mem_id k0 = key.get(0); + double xp0 = k0 * spacing[0]; + Particles.getLastPos()[0] = xp0; + mem_id k1 = key.get(1); + double yp0 = k1 * spacing[1]; + Particles.getLastPos()[1] = yp0; + ++it; + } + BOOST_TEST_MESSAGE("Sync Particles across processors..."); + Particles.map(); + Particles.ghost_get<0>(); + auto it2 = Particles.getDomainIterator(); + while (it2.isNext()) { + auto p = it2.get(); + Point<2, double> xp = Particles.getPos(p); + if (xp[0] != 0 && xp[1] != 0 && xp[0] != 1.0 && xp[1] != 1.0) { + bulk.add(); + bulk.last().get<0>() = p.getKey(); + Particles.getProp<3>(p)[x] = 3.0; + Particles.getProp<3>(p)[y] = 3.0; + } else { + boundary.add(); + boundary.last().get<0>() = p.getKey(); + Particles.getProp<3>(p)[x] = xp[0]*xp[0]+xp[1]*xp[1]; + Particles.getProp<3>(p)[y] = xp[0]*xp[0]-2*xp[0]*xp[1]; + } + Particles.getProp<6>(p)[x] = xp[0]*xp[0]+xp[1]*xp[1]; + Particles.getProp<6>(p)[y] = xp[0]*xp[0]-2*xp[0]*xp[1]; + Particles.getProp<7>(p) = xp[0]+xp[1]-1.0; + + ++it2; + } + + for (int i = 0; i < bulk.size(); i++) { + Particles_subset.add(); + Particles_subset.getLastPos()[0] = Particles.getPos(bulk.template get<0>(i))[0]; + Particles_subset.getLastPos()[1] = Particles.getPos(bulk.template get<0>(i))[1]; + } + Particles_subset.map(); + Particles_subset.ghost_get<0>(); + + + + auto P = getV<0>(Particles); + auto V = getV<1>(Particles); + auto RHS = getV<2>(Particles); + auto dV = getV<3>(Particles); + auto div = getV<4>(Particles); + auto V_star = getV<5>(Particles); + + auto P_bulk = getV<0>(Particles_subset); + auto Grad_bulk= getV<2>(Particles_subset); + + P_bulk = 0; + + Derivative_x Dx(Particles, 2, rCut,sampling_factor, support_options::RADIUS); + Derivative_x Bulk_Dx(Particles_subset, 2, rCut,sampling_factor, support_options::RADIUS); + Derivative_xx Dxx(Particles, 2, rCut,sampling_factor, support_options::RADIUS); + Derivative_yy Dyy(Particles, 2, rCut,sampling_factor, support_options::RADIUS); + Derivative_y Dy(Particles, 2, rCut,sampling_factor, support_options::RADIUS),Bulk_Dy(Particles_subset, 2, rCut,sampling_factor, support_options::RADIUS);; + + int n = 0, nmax = 5, ctr = 0, errctr=0, Vreset = 0; + double V_err=1; + if (Vreset == 1) { + P_bulk = 0; + P = 0; + Vreset = 0; + } + P=0; + eq_id vx,vy; + vx.setId(0); + vy.setId(1); + double sum, sum1, sum_k,V_err_eps=1e-3,V_err_old; + auto Stokes1=Dxx(V[x])+Dyy(V[x]); + auto Stokes2=Dxx(V[y])+Dyy(V[y]); + + petsc_solver<double> solverPetsc; + //solverPetsc.setSolver(KSPGMRES); + //solverPetsc.setRestart(250); + //solverPetsc.setPreconditioner(PCJACOBI); + V_star=0; + while (V_err >= V_err_eps && n <= nmax) { + RHS[x] = dV[x]; + RHS[y] = dV[y]; + Particles_subset.ghost_get<0>(SKIP_LABELLING); + + Grad_bulk[x] = Bulk_Dx(P_bulk); + Grad_bulk[y] = Bulk_Dy(P_bulk); + for (int i = 0; i < bulk.size(); i++) { + Particles.template getProp<2>(bulk.template get<0>(i))[x] += Particles_subset.getProp<2>(i)[x]; + Particles.template getProp<2>(bulk.template get<0>(i))[y] += Particles_subset.getProp<2>(i)[y]; + } + + DCPSE_scheme<equations2d2_gpu, decltype(Particles)> Solver(Particles); + Solver.impose(Stokes1, bulk, RHS[0], vx); + Solver.impose(Stokes2, bulk, RHS[1], vy); + Solver.impose(V[x], boundary, RHS[0], vx); + Solver.impose(V[y], boundary, RHS[1], vy); + Solver.solve_with_solver(solverPetsc, V[x], V[y]); + Particles.ghost_get<1>(SKIP_LABELLING); + div = -(Dx(V[x]) + Dy(V[y])); + P = P + div; + for (int i = 0; i < bulk.size(); i++) { + Particles_subset.getProp<0>(i) = Particles.template getProp<0>(bulk.template get<0>(i)); + } + sum = 0; + sum1 = 0; + + for (int j = 0; j < bulk.size(); j++) { + auto p = bulk.get<0>(j); + sum += (Particles.getProp<5>(p)[0] - Particles.getProp<1>(p)[0]) * + (Particles.getProp<5>(p)[0] - Particles.getProp<1>(p)[0]) + + (Particles.getProp<5>(p)[1] - Particles.getProp<1>(p)[1]) * + (Particles.getProp<5>(p)[1] - Particles.getProp<1>(p)[1]); + sum1 += Particles.getProp<1>(p)[0] * Particles.getProp<1>(p)[0] + + Particles.getProp<1>(p)[1] * Particles.getProp<1>(p)[1]; + } + + sum = sqrt(sum); + sum1 = sqrt(sum1); + V_star=V; + v_cl.sum(sum); + v_cl.sum(sum1); + v_cl.execute(); + V_err_old = V_err; + V_err = sum / sum1; + if (V_err > V_err_old || abs(V_err_old - V_err) < 1e-8) { + errctr++; + //alpha_P -= 0.1; + } else { + errctr = 0; + } + if (n > 3) { + if (errctr > 3) { + std::cout << "CONVERGENCE LOOP BROKEN DUE TO INCREASE/VERY SLOW DECREASE IN ERROR" << std::endl; + Vreset = 1; + break; + } else { + Vreset = 0; + } + } + n++; + if (v_cl.rank() == 0) { + std::cout << "Rel l2 cgs err in V = " << V_err << " at " << n << std::endl; + + } + } + double worst1 = 0.0; + double worst2 = 0.0; + + for(int j=0;j<bulk.size();j++) + { auto p=bulk.get<0>(j); + if (fabs(Particles.getProp<6>(p)[0] - Particles.getProp<1>(p)[0]) >= worst1) { + worst1 = fabs(Particles.getProp<6>(p)[0] - Particles.getProp<1>(p)[0]); + } + } + for(int j=0;j<bulk.size();j++) + { auto p=bulk.get<0>(j); + if (fabs(Particles.getProp<6>(p)[1] - Particles.getProp<1>(p)[1]) >= worst2) { + worst2 = fabs(Particles.getProp<6>(p)[1] - Particles.getProp<1>(p)[1]); + } + } + + std::cout << "Maximum Analytic Error in slice x: " << worst1 << std::endl; + std::cout << "Maximum Analytic Error in slice y: " << worst2 << std::endl; + BOOST_REQUIRE(worst1 < 0.03); + BOOST_REQUIRE(worst2 < 0.03); + + //Particles.write("PC_subset_lid2"); + } + +BOOST_AUTO_TEST_SUITE_END() +#endif +#endif \ No newline at end of file diff --git a/src/DCPSE/Dcpse.cuh b/src/DCPSE/Dcpse.cuh index 01ef4828ecdeb085657d1c3761439d4c20657376..327265041093a92c577c2b48b3c3e957aba47bda 100644 --- a/src/DCPSE/Dcpse.cuh +++ b/src/DCPSE/Dcpse.cuh @@ -13,10 +13,8 @@ #include "SupportBuilder.cuh" #include "Support.hpp" #include "Vandermonde.hpp" -#include "Vandermonde.cuh" #include "DcpseDiagonalScalingMatrix.hpp" #include "DcpseRhs.hpp" -#include "DcpseRhs.cuh" #include <chrono> @@ -31,10 +29,7 @@ __global__ void calcKernels_gpu(particles_type, monomialBasis_type, supportKey_t template<unsigned int dim, typename T, typename particles_type, typename monomialBasis_type, typename supportKey_type, typename localEps_type, typename matrix_type> __global__ void assembleLocalMatrices_gpu( particles_type, Point<dim, unsigned int>, unsigned int, monomialBasis_type, supportKey_type, supportKey_type, supportKey_type, - T**, T**, localEps_type, localEps_type, matrix_type, matrix_type, matrix_type, size_t, size_t); - -template <unsigned int dim, typename T, typename monomialBasis_type> -__device__ T computeKernel_gpu(Point<dim, T>&, const T*, const monomialBasis_type&); + T**, T**, localEps_type, localEps_type, matrix_type, size_t, size_t); template<unsigned int dim, typename vector_type, class T = typename vector_type::stype> @@ -69,6 +64,8 @@ private: openfpm::vector_custd<T> localEpsInvPow; // Each MPI rank has just access to the local ones openfpm::vector_custd<T> calcKernels; + openfpm::vector<size_t> subsetKeyPid; + vector_type & particles; double rCut; unsigned int convergenceOrder; @@ -122,6 +119,7 @@ public: differentialSignature(differentialSignature), differentialOrder(Monomial<dim>(differentialSignature).order()), monomialBasis(differentialSignature.asArray(), convergenceOrder), + subsetKeyPid(other.subsetKeyPid), supportRefs(other.supportRefs), supportKeys1D(other.supportKeys1D), kerOffsets(other.kerOffsets), @@ -200,7 +198,7 @@ public: momenta.template get<1>(i) = -3000000000.0; } - size_t N = particles.size_local_orig(); + size_t N = particles.size_local(); for (size_t j = 0; j < N; ++j) { double eps = localEps.get(j); @@ -220,7 +218,7 @@ public: for (int i = 0; i < supportKeysSize; i++) { size_t xqK = supportKeys[i]; - Point<dim, T> xq = particles.getPosOrig(xqK); + Point<dim, T> xq = particles.getPos(xqK); Point<dim, T> normalizedArg = (xp - xq) / eps; auto ker = calcKernels.get(kerOff+i); @@ -274,7 +272,7 @@ public: sign = -1; } - size_t N = particles.size_local_orig(); + size_t N = particles.size_local(); for (size_t j = 0; j < N; ++j) { double epsInvPow = localEpsInvPow.get(j); @@ -373,8 +371,9 @@ public: sign = -1; } - double eps = localEps.get(key.getKey()); - double epsInvPow = localEpsInvPow.get(key.getKey()); + size_t localKey = subsetKeyPid.get(key.getKey()); + double eps = localEps.get(localKey); + double epsInvPow = localEpsInvPow.get(localKey); auto &particles = o1.getVector(); @@ -386,13 +385,13 @@ public: #endif expr_type Dfxp = 0; - size_t xpK = supportRefs.get(key.getKey()); + size_t xpK = supportRefs.get(localKey); Point<dim, T> xp = particles.getPos(xpK); expr_type fxp = sign * o1.value(key); size_t kerOff = kerOffsets.get(xpK); - size_t supportKeysSize = kerOffsets.get(key.getKey()+1)-kerOffsets.get(key.getKey()); - size_t* supportKeys = &((size_t*)supportKeys1D.getPointer())[kerOffsets.get(key.getKey())]; + size_t supportKeysSize = kerOffsets.get(localKey+1)-kerOffsets.get(localKey); + size_t* supportKeys = &((size_t*)supportKeys1D.getPointer())[kerOffsets.get(localKey)]; for (int i = 0; i < supportKeysSize; i++) { @@ -401,8 +400,8 @@ public: Dfxp = Dfxp + (fxq + fxp) * calcKernels.get(kerOff+i); } Dfxp = Dfxp * epsInvPow; - // - //T trueDfxp = particles.template getProp<2>(xpK); + + // T trueDfxp = particles.template getProp<2>(xpK); // Store Dfxp in the right position return Dfxp; } @@ -429,8 +428,9 @@ public: sign = -1; } - double eps = localEps.get(key.getKey()); - double epsInvPow = localEpsInvPow(key.getKey()); + size_t localKey = subsetKeyPid.get(key.getKey()); + double eps = localEps.get(localKey); + double epsInvPow = localEpsInvPow(localKey); auto &particles = o1.getVector(); @@ -442,13 +442,13 @@ public: #endif expr_type Dfxp = 0; - size_t xpK = supportRefs.get(key.getKey()); + size_t xpK = supportRefs.get(localKey); Point<dim, T> xp = particles.getPos(xpK); expr_type fxp = sign * o1.value(key)[i]; size_t kerOff = kerOffsets.get(xpK); - size_t supportKeysSize = kerOffsets.get(key.getKey()+1)-kerOffsets.get(key.getKey()); - size_t* supportKeys = &((size_t*)supportKeys1D.getPointer())[kerOffsets.get(key.getKey())]; + size_t supportKeysSize = kerOffsets.get(localKey+1)-kerOffsets.get(localKey); + size_t* supportKeys = &((size_t*)supportKeys1D.getPointer())[kerOffsets.get(localKey)]; for (int j = 0; j < supportKeysSize; j++) { @@ -475,6 +475,7 @@ public: localEps.clear(); localEpsInvPow.clear(); calcKernels.clear(); + subsetKeyPid.clear(); initializeStaticSize(particles, convergenceOrder, rCut, supportSizeFactor); } @@ -488,38 +489,38 @@ private: #ifdef SE_CLASS1 this->update_ctr=particles.getMapCtr(); #endif - SupportBuilder<vector_type> supportBuilder(particles, differentialSignature, rCut); - unsigned int requiredSupportSize = monomialBasis.size(); - - if (!isSharedSupport) - supportRefs.resize(particles.size_local_orig()); - localEps.resize(particles.size_local_orig()); - localEpsInvPow.resize(particles.size_local_orig()); - kerOffsets.resize(particles.size_local_orig()+1); - - // need to resize supportKeys1D to yet unknown supportKeysTotalN - // add() takes too long - openfpm::vector<openfpm::vector<size_t>> tempSupportKeys(supportRefs.size()); - const T condVTOL = 1e2; - auto it = particles.getDomainIterator(); - while (it.isNext()) { - size_t sz; - auto key_o = particles.getOriginKey(it.get()); + if (!isSharedSupport) { + subsetKeyPid.resize(particles.size_local_orig()); + supportRefs.resize(particles.size_local()); + } + localEps.resize(particles.size_local()); + localEpsInvPow.resize(particles.size_local()); + kerOffsets.resize(particles.size_local()+1); + const T condVTOL = 1e2; - if (!isSharedSupport){ + if (!isSharedSupport) { + SupportBuilder<vector_type> supportBuilder(particles, differentialSignature, rCut); + unsigned int requiredSupportSize = monomialBasis.size(); + // need to resize supportKeys1D to yet unknown supportKeysTotalN + // add() takes too long + openfpm::vector<openfpm::vector<size_t>> tempSupportKeys(supportRefs.size()); + + auto it = particles.getDomainIterator(); + while (it.isNext()) { auto key_o = particles.getOriginKey(it.get()); + subsetKeyPid.get(key_o.getKey()) = it.get().getKey(); Support support = supportBuilder.getSupport(it, requiredSupportSize, opt); - supportRefs.get(key_o.getKey()) = support.getReferencePointKey(); + supportRefs.get(key_o.getKey()) = key_o.getKey(); tempSupportKeys.get(key_o.getKey()) = support.getKeys(); kerOffsets.get(key_o.getKey()) = supportKeysTotalN; - if (maxSupportSize < support.size()) maxSupportSize = support.size(); + if (maxSupportSize < support.size()) + maxSupportSize = support.size(); supportKeysTotalN += support.size(); - EMatrix<T, Eigen::Dynamic, Eigen::Dynamic> V(support.size(), monomialBasis.size()); // Vandermonde matrix computation Vandermonde<dim, T, EMatrix<T, Eigen::Dynamic, Eigen::Dynamic>> @@ -534,11 +535,10 @@ private: std::cout << "INFO: Increasing, requiredSupportSize = " << requiredSupportSize << std::endl; // debug continue; } else requiredSupportSize = monomialBasis.size(); + + ++it; } - ++it; - } - if (!isSharedSupport){ kerOffsets.get(supportRefs.size()) = supportKeysTotalN; supportKeys1D.resize(supportKeysTotalN); @@ -559,38 +559,38 @@ private: #ifdef SE_CLASS1 this->update_ctr=particles.getMapCtr(); #endif - SupportBuilder<vector_type> supportBuilder(particles, differentialSignature, rCut); - unsigned int requiredSupportSize = monomialBasis.size(); - - if (!isSharedSupport) - supportRefs.resize(particles.size_local_orig()); - localEps.resize(particles.size_local_orig()); - localEpsInvPow.resize(particles.size_local_orig()); - kerOffsets.resize(particles.size_local_orig()+1); - - // need to resize supportKeys1D to yet unknown supportKeysTotalN - // add() takes too long - openfpm::vector<openfpm::vector<size_t>> tempSupportKeys(supportRefs.size()); - const T condVTOL = 1e2; - auto it = particles.getDomainIterator(); - while (it.isNext()) { - size_t sz; - auto key_o = particles.getOriginKey(it.get()); + if (!isSharedSupport) { + subsetKeyPid.resize(particles.size_local_orig()); + supportRefs.resize(particles.size_local()); + } + localEps.resize(particles.size_local()); + localEpsInvPow.resize(particles.size_local()); + kerOffsets.resize(particles.size_local()+1); + const T condVTOL = 1e2; - if (!isSharedSupport){ + if (!isSharedSupport) { + SupportBuilder<vector_type> supportBuilder(particles, differentialSignature, rCut); + unsigned int requiredSupportSize = monomialBasis.size(); + // need to resize supportKeys1D to yet unknown supportKeysTotalN + // add() takes too long + openfpm::vector<openfpm::vector<size_t>> tempSupportKeys(supportRefs.size()); + + auto it = particles.getDomainIterator(); + while (it.isNext()) { auto key_o = particles.getOriginKey(it.get()); + subsetKeyPid.get(key_o.getKey()) = it.get().getKey(); Support support = supportBuilder.getSupport(it, requiredSupportSize, opt); - supportRefs.get(key_o.getKey()) = support.getReferencePointKey(); + supportRefs.get(key_o.getKey()) = key_o.getKey(); tempSupportKeys.get(key_o.getKey()) = support.getKeys(); kerOffsets.get(key_o.getKey()) = supportKeysTotalN; - if (maxSupportSize < support.size()) maxSupportSize = support.size(); + if (maxSupportSize < support.size()) + maxSupportSize = support.size(); supportKeysTotalN += support.size(); - EMatrix<T, Eigen::Dynamic, Eigen::Dynamic> V(support.size(), monomialBasis.size()); // Vandermonde matrix computation Vandermonde<dim, T, EMatrix<T, Eigen::Dynamic, Eigen::Dynamic>> @@ -605,11 +605,10 @@ private: std::cout << "INFO: Increasing, requiredSupportSize = " << requiredSupportSize << std::endl; // debug continue; } else requiredSupportSize = monomialBasis.size(); + + ++it; } - ++it; - } - if (!isSharedSupport){ kerOffsets.get(supportRefs.size()) = supportKeysTotalN; supportKeys1D.resize(supportKeysTotalN); @@ -634,51 +633,49 @@ private: this->supportSizeFactor=supportSizeFactor; this->convergenceOrder=convergenceOrder; - if (!isSharedSupport) - supportRefs.resize(particles.size_local_orig()); - localEps.resize(particles.size_local_orig()); - localEpsInvPow.resize(particles.size_local_orig()); + if (!isSharedSupport) { + subsetKeyPid.resize(particles.size_local_orig()); + supportRefs.resize(particles.size_local()); + } + localEps.resize(particles.size_local()); + localEpsInvPow.resize(particles.size_local()); std::chrono::high_resolution_clock::time_point t1 = std::chrono::high_resolution_clock::now(); auto it = particles.getDomainIterator(); if (opt==support_options::RADIUS) { - while (it.isNext()) { - auto key_o = particles.getOriginKey(it.get()); - - if (!isSharedSupport) + if (!isSharedSupport) { + while (it.isNext()) { + auto key_o = it.get(); subsetKeyPid.get(particles.getOriginKey(key_o).getKey()) = key_o.getKey(); supportRefs.get(key_o.getKey()) = key_o.getKey(); - ++it; - } - - SupportBuilderGPU<vector_type> supportBuilder(particles, differentialSignature, rCut); - supportBuilder.getSupport(supportRefs.size(), kerOffsets, supportKeys1D, maxSupportSize, supportKeysTotalN); + ++it; + } + SupportBuilderGPU<vector_type> supportBuilder(particles, rCut); + supportBuilder.getSupport(supportRefs.size(), kerOffsets, supportKeys1D, maxSupportSize, supportKeysTotalN); + } } else { - size_t requiredSupportSize = monomialBasis.size() * supportSizeFactor; - SupportBuilder<vector_type> supportBuilder(particles, differentialSignature, rCut); - - kerOffsets.resize(supportRefs.size()+1); - // need to resize supportKeys1D to yet unknown supportKeysTotalN - // add() takes too long - openfpm::vector<openfpm::vector<size_t>> tempSupportKeys(supportRefs.size()); + if (!isSharedSupport){ + openfpm::vector<openfpm::vector<size_t>> tempSupportKeys(supportRefs.size()); + size_t requiredSupportSize = monomialBasis.size() * supportSizeFactor; + // need to resize supportKeys1D to yet unknown supportKeysTotalN + // add() takes too long + SupportBuilder<vector_type> supportBuilder(particles, differentialSignature, rCut); + kerOffsets.resize(supportRefs.size()+1); - while (it.isNext()) { - if (!isSharedSupport){ - auto key_o = particles.getOriginKey(it.get()); + while (it.isNext()) { + auto key_o = it.get(); subsetKeyPid.get(particles.getOriginKey(key_o).getKey()) = key_o.getKey(); Support support = supportBuilder.getSupport(it, requiredSupportSize, opt); - supportRefs.get(key_o.getKey()) = support.getReferencePointKey(); + supportRefs.get(key_o.getKey()) = key_o.getKey(); tempSupportKeys.get(key_o.getKey()) = support.getKeys(); kerOffsets.get(key_o.getKey()) = supportKeysTotalN; if (maxSupportSize < support.size()) maxSupportSize = support.size(); supportKeysTotalN += support.size(); + ++it; } - ++it; - } - if (!isSharedSupport){ kerOffsets.get(supportRefs.size()) = supportKeysTotalN; supportKeys1D.resize(supportKeysTotalN); @@ -710,52 +707,49 @@ std::cout << "Support building took " << time_span2.count() * 1000. << " millise this->supportSizeFactor=supportSizeFactor; this->convergenceOrder=convergenceOrder; - if (!isSharedSupport) - supportRefs.resize(particles.size_local_orig()); - - localEps.resize(particles.size_local_orig()); - localEpsInvPow.resize(particles.size_local_orig()); + if (!isSharedSupport) { + subsetKeyPid.resize(particles.size_local_orig()); + supportRefs.resize(particles.size_local()); + } + localEps.resize(particles.size_local()); + localEpsInvPow.resize(particles.size_local()); std::chrono::high_resolution_clock::time_point t1 = std::chrono::high_resolution_clock::now(); auto it = particles.getDomainIterator(); if (opt==support_options::RADIUS) { - while (it.isNext()) { - auto key_o = particles.getOriginKey(it.get()); - - if (!isSharedSupport) + if (!isSharedSupport) { + while (it.isNext()) { + auto key_o = it.get(); subsetKeyPid.get(particles.getOriginKey(key_o).getKey()) = key_o.getKey(); supportRefs.get(key_o.getKey()) = key_o.getKey(); - ++it; - } - - SupportBuilderGPU<vector_type> supportBuilder(particles, differentialSignature, rCut); - supportBuilder.getSupport(supportRefs.size(), kerOffsets, supportKeys1D, maxSupportSize, supportKeysTotalN); + ++it; + } + SupportBuilderGPU<vector_type> supportBuilder(particles, rCut); + supportBuilder.getSupport(supportRefs.size(), kerOffsets, supportKeys1D, maxSupportSize, supportKeysTotalN); + } } else { - size_t requiredSupportSize = monomialBasis.size() * supportSizeFactor; - SupportBuilder<vector_type> supportBuilder(particles, differentialSignature, rCut); - - kerOffsets.resize(supportRefs.size()+1); - // need to resize supportKeys1D to yet unknown supportKeysTotalN - // add() takes too long - openfpm::vector<openfpm::vector<size_t>> tempSupportKeys(supportRefs.size()); + if (!isSharedSupport){ + openfpm::vector<openfpm::vector<size_t>> tempSupportKeys(supportRefs.size()); + size_t requiredSupportSize = monomialBasis.size() * supportSizeFactor; + // need to resize supportKeys1D to yet unknown supportKeysTotalN + // add() takes too long + SupportBuilder<vector_type> supportBuilder(particles, differentialSignature, rCut); + kerOffsets.resize(supportRefs.size()+1); - while (it.isNext()) { - if (!isSharedSupport){ - auto key_o = particles.getOriginKey(it.get()); + while (it.isNext()) { + auto key_o = it.get(); subsetKeyPid.get(particles.getOriginKey(key_o).getKey()) = key_o.getKey(); Support support = supportBuilder.getSupport(it, requiredSupportSize, opt); - supportRefs.get(key_o.getKey()) = support.getReferencePointKey(); + supportRefs.get(key_o.getKey()) = key_o.getKey(); tempSupportKeys.get(key_o.getKey()) = support.getKeys(); kerOffsets.get(key_o.getKey()) = supportKeysTotalN; if (maxSupportSize < support.size()) maxSupportSize = support.size(); supportKeysTotalN += support.size(); + ++it; } - ++it; - } - if (!isSharedSupport){ kerOffsets.get(supportRefs.size()) = supportKeysTotalN; supportKeys1D.resize(supportKeysTotalN); @@ -793,9 +787,7 @@ std::cout << "Support building took " << time_span2.count() * 1000. << " millise size_t numThreads = numSMs*numSMsMult*256; std::cout << "numThreads " << numThreads << " numMatrices " << numMatrices << std::endl; - openfpm::vector_custd<T> EMat(numThreads * maxSupportSize * dim); - openfpm::vector_custd<T> VMat(numThreads * maxSupportSize * monomialBasisSize); - // B has the same dimensions as V + // B is an intermediate matrix openfpm::vector_custd<T> BMat(numThreads * maxSupportSize * monomialBasisSize); // allocate device space for A, b openfpm::vector_custd<T> AMat(numMatrices*monomialBasisSize*monomialBasisSize); @@ -826,7 +818,7 @@ std::cout << "Support building took " << time_span2.count() * 1000. << " millise auto bVecPointersKernel = bVecPointers.toKernel(); T** bVecPointersKernelPointer = (T**) bVecPointersKernel.getPointer(); assembleLocalMatrices_gpu<<<numSMsMult*numSMs, 256>>>(particles.toKernel(), differentialSignature, differentialOrder, monomialBasisKernel, supportRefs.toKernel(), kerOffsets.toKernel(), supportKeys1D.toKernel(), - AMatPointersKernelPointer, bVecPointersKernelPointer, localEps.toKernel(), localEpsInvPow.toKernel(), EMat.toKernel(), VMat.toKernel(), BMat.toKernel(), numMatrices, maxSupportSize); + AMatPointersKernelPointer, bVecPointersKernelPointer, localEps.toKernel(), localEpsInvPow.toKernel(), BMat.toKernel(), numMatrices, maxSupportSize); localEps.template deviceToHost(); localEpsInvPow.template deviceToHost(); @@ -877,9 +869,8 @@ std::cout << "Support building took " << time_span2.count() * 1000. << " millise } T computeKernel(Point<dim, T> x, EMatrix<T, Eigen::Dynamic, 1> & a) const { - T res = 0; unsigned int counter = 0; - T expFactor = exp(-norm2(x)); + T res = 0, expFactor = exp(-norm2(x)); size_t N = monomialBasis.getElements().size(); for (size_t i = 0; i < N; ++i) @@ -898,9 +889,8 @@ std::cout << "Support building took " << time_span2.count() * 1000. << " millise // template <unsigned int a_dim> // T computeKernel(Point<dim, T> x, const T (& a) [a_dim]) const { T computeKernel(Point<dim, T> x, const T* a) const { - T res = 0; unsigned int counter = 0; - T expFactor = exp(-norm2(x)); + T res = 0, expFactor = exp(-norm2(x)); size_t N = monomialBasis.getElements().size(); for (size_t i = 0; i < N; ++i) @@ -938,21 +928,13 @@ template<unsigned int dim, typename T, typename particles_type, typename monomia __global__ void assembleLocalMatrices_gpu( particles_type particles, Point<dim, unsigned int> differentialSignature, unsigned int differentialOrder, monomialBasis_type monomialBasis, supportKey_type supportRefs, supportKey_type kerOffsets, supportKey_type supportKeys1D, T** h_A, T** h_b, localEps_type localEps, localEps_type localEpsInvPow, - matrix_type EMat, matrix_type VMat, matrix_type BMat, size_t numMatrices, size_t maxSupportSize) + matrix_type BMat, size_t numMatrices, size_t maxSupportSize) { auto p_key = GET_PARTICLE(particles); size_t monomialBasisSize = monomialBasis.size(); - - size_t EStartPos = maxSupportSize * dim * p_key; - size_t VStartPos = maxSupportSize * monomialBasisSize * p_key; - size_t BStartPos = maxSupportSize * monomialBasisSize * p_key; - - T* V = &((T*)VMat.getPointer())[VStartPos]; - T* E = &((T*)EMat.getPointer())[EStartPos]; - T* B = &((T*)BMat.getPointer())[BStartPos]; - - DcpseDiagonalScalingMatrix<dim, MonomialBasis<dim, aggregate<Monomial_gpu<dim>>, openfpm::vector_custd_ker, memory_traits_inte>> diagonalScalingMatrix(monomialBasis); - DcpseRhs_gpu<dim, MonomialBasis<dim, aggregate<Monomial_gpu<dim>>, openfpm::vector_custd_ker, memory_traits_inte>> rhs(monomialBasis, differentialSignature); + size_t BStartPos = maxSupportSize * monomialBasisSize * p_key; T* B = &((T*)BMat.getPointer())[BStartPos]; + const auto& basisElements = monomialBasis.getElements(); + int rhsSign = (Monomial_gpu<dim>(differentialSignature).order() % 2 == 0) ? 1 : -1; for (; p_key < numMatrices; @@ -964,22 +946,33 @@ __global__ void assembleLocalMatrices_gpu( size_t* supportKeys = &((size_t*)supportKeys1D.getPointer())[kerOffsets.get(p_key)]; size_t xpK = supportRefs.get(p_key); - // Vandermonde matrix computation - // Pointer to E is passed to reuse memory for offset construction inside Vandermonde. - Vandermonde_gpu<dim, T, MonomialBasis<dim, aggregate<Monomial_gpu<dim>>, openfpm::vector_custd_ker, memory_traits_inte>> - vandermonde(E, xpK, supportKeysSize, supportKeys, monomialBasis, particles); - vandermonde.getMatrix(V); + assert(supportKeysSize >= monomialBasis.size()); - T eps = vandermonde.getEps(); localEps.get(p_key) = eps; - localEpsInvPow.get(p_key) = 1.0 / pow(eps,differentialOrder); + T FACTOR = 2, avgNeighbourSpacing = 0; + for (int i = 0 ; i < supportKeysSize; i++) { + Point<dim,T> off = xa; off -= particles.getPosOrig(supportKeys[i]); + for (size_t j = 0; j < dim; ++j) + avgNeighbourSpacing += fabs(off.value(j)); + } + + avgNeighbourSpacing /= supportKeysSize; + T eps = FACTOR * avgNeighbourSpacing; - diagonalScalingMatrix.buildMatrix(E, xpK, supportKeysSize, supportKeys, eps, particles); + assert(eps != 0); + + localEps.get(p_key) = eps; + localEpsInvPow.get(p_key) = 1.0 / pow(eps,differentialOrder); // EMatrix<T, Eigen::Dynamic, Eigen::Dynamic> B = E * V; for (int i = 0; i < supportKeysSize; ++i) - for (int j = 0; j < monomialBasisSize; ++j) - // E is a diagonal matrix - B[i*monomialBasisSize+j] = E[i] * V[i*monomialBasisSize+j]; + for (int j = 0; j < monomialBasisSize; ++j) { + Point<dim,T> off = xa; off -= particles.getPosOrig(supportKeys[i]); + const Monomial_gpu<dim>& m = basisElements.get(j); + + T V_ij = m.evaluate(off) / pow(eps, m.order()); + T E_ii = exp(- norm2(off) / (2.0 * eps * eps)); + B[i*monomialBasisSize+j] = E_ii * V_ij; + } T sum = 0.0; // EMatrix<T, Eigen::Dynamic, Eigen::Dynamic> A = B.transpose() * B; @@ -992,7 +985,10 @@ __global__ void assembleLocalMatrices_gpu( } // Compute RHS vector b - rhs.template getVector<T>(h_b[p_key]); + for (size_t i = 0; i < monomialBasisSize; ++i) { + const Monomial_gpu<dim>& dm = basisElements.get(i).getDerivative(differentialSignature); + h_b[p_key][i] = rhsSign * dm.evaluate(Point<dim, T>(0)); + } } } @@ -1004,43 +1000,32 @@ __global__ void calcKernels_gpu(particles_type particles, monomialBasis_type mon Point<dim, T> xa = particles.getPos(p_key); size_t monomialBasisSize = monomialBasis.size(); + const auto& basisElements = monomialBasis.getElements(); size_t supportKeysSize = kerOffsets.get(p_key+1)-kerOffsets.get(p_key); size_t* supportKeys = &((size_t*)supportKeys1D.getPointer())[kerOffsets.get(p_key)]; + T* calcKernelsLocal = &((T*)calcKernels.getPointer())[kerOffsets.get(p_key)]; T eps = localEps.get(p_key); for (size_t j = 0; j < supportKeysSize; ++j) { size_t xqK = supportKeys[j]; - Point<dim, T> xq = particles.getPos(xqK); - Point<dim, T> normalizedArg = (xa - xq) / eps; + Point<dim, T> xq = particles.getPosOrig(xqK); + Point<dim, T> offNorm = (xa - xq) / eps; + T expFactor = exp(-norm2(offNorm)); - calcKernelsLocal[j] = computeKernel_gpu(normalizedArg, h_b[p_key], monomialBasis); - } -} - -template <unsigned int dim, typename T, typename monomialBasis_type> -__device__ T computeKernel_gpu(Point<dim, T>& x, const T* a, const monomialBasis_type& monomialBasis) { - T res = 0; - unsigned int counter = 0; - T expFactor = exp(-norm2(x)); - - const auto& basisElements = monomialBasis.getElements(); - - size_t N = basisElements.size(); - for (size_t i = 0; i < N; ++i) - { - const Monomial_gpu<dim> &m = basisElements.get(i); + T res = 0; + for (size_t i = 0; i < monomialBasisSize; ++i) { + const Monomial_gpu<dim> &m = basisElements.get(i); + T mbValue = m.evaluate(offNorm); + T coeff = h_b[p_key][i]; - T coeff = a[counter]; - T mbValue = m.evaluate(x); - res += coeff * mbValue * expFactor; - ++counter; + res += coeff * mbValue * expFactor; + } + calcKernelsLocal[j] = res; } - return res; } - #endif #endif //OPENFPM_PDATA_DCPSE_CUH diff --git a/src/DCPSE/DcpseDiagonalScalingMatrix.hpp b/src/DCPSE/DcpseDiagonalScalingMatrix.hpp index afa4a13a43861065d5c911266d91dfc24738ef6d..eefe15b091cfb8f869168410bfe933cb38afdf5b 100644 --- a/src/DCPSE/DcpseDiagonalScalingMatrix.hpp +++ b/src/DCPSE/DcpseDiagonalScalingMatrix.hpp @@ -17,7 +17,7 @@ private: const monomialBasis_type& monomialBasis; public: - __host__ __device__ DcpseDiagonalScalingMatrix(const monomialBasis_type &monomialBasis) : monomialBasis(monomialBasis) {} + DcpseDiagonalScalingMatrix(const monomialBasis_type &monomialBasis) : monomialBasis(monomialBasis) {} template <typename T, typename MatrixType, typename vector_type, typename vector_type2> void buildMatrix(MatrixType &M, Support support, T eps, vector_type & particlesFrom , vector_type2 & particlesTo) diff --git a/src/DCPSE/DcpseRhs.cuh b/src/DCPSE/DcpseRhs.cuh deleted file mode 100644 index 68dbdb93098951246ca6ca4bc7d4de2c511cf749..0000000000000000000000000000000000000000 --- a/src/DCPSE/DcpseRhs.cuh +++ /dev/null @@ -1,49 +0,0 @@ -// -// Created by Serhii -// - -#ifndef OPENFPM_PDATA_DCPSERHS_CUH -#define OPENFPM_PDATA_DCPSERHS_CUH - -#include "MonomialBasis.hpp" - - -template<unsigned int dim, typename MonomialBasis_type> -class DcpseRhs_gpu -{ -private: - const Point<dim, unsigned int>& differentialSignature; - const MonomialBasis_type& monomialBasis; - int sign; -public: - __host__ __device__ DcpseRhs_gpu(const MonomialBasis_type &monomialBasis, const Point<dim, unsigned int> &differentialSignature); - - template<typename T> - __host__ __device__ void getVector(T* b); -}; - - -template<unsigned int dim, typename MonomialBasis_type> -__host__ __device__ DcpseRhs_gpu<dim, MonomialBasis_type>::DcpseRhs_gpu(const MonomialBasis_type &monomialBasis, - const Point<dim, unsigned int> &differentialSignature) - : monomialBasis(monomialBasis), differentialSignature(differentialSignature) { - unsigned int order = (Monomial_gpu<dim>(differentialSignature)).order(); - if (order % 2 == 0) - sign = 1; - else - sign = -1; -} - -template<unsigned int dim, typename MonomialBasis_type> -template<typename T> -__host__ __device__ void DcpseRhs_gpu<dim, MonomialBasis_type>::getVector(T* b) -{ - const auto& basisElements = monomialBasis.getElements(); - - for (size_t i = 0; i < basisElements.size(); ++i) { - const Monomial_gpu<dim>& dm = basisElements.get(i).getDerivative(differentialSignature); - b[i] = sign * dm.evaluate(Point<dim, T>(0)); - } -} - -#endif //OPENFPM_PDATA_DCPSERHS_CUH diff --git a/src/DCPSE/SupportBuilder.cuh b/src/DCPSE/SupportBuilder.cuh index 19d007658d44b92af0c8c6cd1581c375b37c8ed0..363741b7bb7b0d2c49fd4b0305198263ffb44e1b 100644 --- a/src/DCPSE/SupportBuilder.cuh +++ b/src/DCPSE/SupportBuilder.cuh @@ -53,7 +53,7 @@ __global__ void gatherSupportSize_gpu( size_t el = cl.get(cellLinId, k); if (p_key == el) continue; - if (pos.distance(particles.getPos(el)) < rCut) ++N; + if (pos.distance(particles.getPosOrig(el)) < rCut) ++N; } } while (nextCell<dim>(offset, 2+1)); @@ -90,7 +90,7 @@ __global__ void assembleSupport_gpu(particles_type particles, CellList_type cl, size_t el = cl.get(cellLinId, k); if (p_key == el) continue; - if (pos.distance(particles.getPos(el)) < rCut) supportKeys[N++] = el; + if (pos.distance(particles.getPosOrig(el)) < rCut) supportKeys[N++] = el; } } while (nextCell<dim>(offset, 2+1)); @@ -103,22 +103,22 @@ class SupportBuilderGPU { private: vector_type &domain; - const Point<vector_type::dims, unsigned int> differentialSignature; typename vector_type::stype rCut; public: - SupportBuilderGPU(vector_type &domain, Point<vector_type::dims, unsigned int> differentialSignature, typename vector_type::stype rCut) - : domain(domain), differentialSignature(differentialSignature), rCut(rCut) {} - - SupportBuilderGPU(vector_type &domain, unsigned int differentialSignature[vector_type::dims], typename vector_type::stype rCut) - : SupportBuilderGPU(domain, Point<vector_type::dims, unsigned int>(differentialSignature), rCut) {} + SupportBuilderGPU(vector_type &domain, typename vector_type::stype rCut) + : domain(domain), rCut(rCut) {} void getSupport(size_t N, openfpm::vector_custd<size_t>& kerOffsets, openfpm::vector_custd<size_t>& supportKeys1D, size_t& maxSupport, size_t& supportKeysTotalN) { domain.hostToDevicePos(); auto it = domain.getDomainIteratorGPU(512); - auto NN = domain.getCellListGPU(rCut); + typedef CellList_gen<vector_type::dims, typename vector_type::stype, Process_keys_lin, Mem_fast<CudaMemory>, shift<vector_type::dims, typename vector_type::stype>> params; + // auto NN = domain.getCellListGPU(rCut); + auto NN = domain.template getCellList<params>(rCut); + NN.hostToDevice(); + // +1 to allow getting size from cumulative sum: "size[i+1] - size[i]" kerOffsets.resize(N+1); diff --git a/src/DCPSE/Vandermonde.cuh b/src/DCPSE/Vandermonde.cuh deleted file mode 100644 index fed9a8f602b1d90f13a3b4996839fdbbed35bd02..0000000000000000000000000000000000000000 --- a/src/DCPSE/Vandermonde.cuh +++ /dev/null @@ -1,119 +0,0 @@ -// -// Created by Serhii on 15/05/21 -// - - -#ifndef OPENFPM_PDATA_VANDERMONDE_CUH -#define OPENFPM_PDATA_VANDERMONDE_CUH - -#include "MonomialBasis.hpp" -#include "VandermondeRowBuilder.hpp" -#include "Support.hpp" - -template<unsigned int dim, typename T, typename MonomialBasis_type = MonomialBasis<dim>> -class Vandermonde_gpu -{ -private: - size_t N; - T* offsets; - const Point<dim, T> point; - const MonomialBasis_type& monomialBasis; - T eps; - -public: - template<typename vector_type> - __host__ __device__ Vandermonde_gpu( T* mem, size_t supportRefKey, size_t supportKeysSize, - const size_t* supportKeys, const MonomialBasis_type &monomialBasis, const vector_type & particles) - : offsets(mem), monomialBasis(monomialBasis), point(particles.getPos(supportRefKey)) - { - initialize(supportRefKey, supportKeysSize, supportKeys, particles); - } - - __host__ __device__ void getMatrix(T *M) - { - // Build the Vandermonde_gpu matrix, row-by-row - VandermondeRowBuilder<dim, T, MonomialBasis_type> vrb(monomialBasis); - unsigned int row = 0; - - for (size_t i = 0; i < N; ++i) - { - const T(*offset) [dim] = reinterpret_cast<const T(*) [dim]>(&offsets[i*dim]); - vrb.buildRow_gpu(&M[i*monomialBasis.size()], row, *offset, eps); - ++row; - } - } - - __host__ __device__ T getEps() - { - return eps; - } - - - __host__ __device__ ~Vandermonde_gpu() - { - } - -private: - - - __host__ __device__ void computeEps(T factor) - { - T avgNeighbourSpacing = 0; - for (size_t i = 0; i < N; ++i) - { - const T(*offset) [dim] = reinterpret_cast<const T(*) [dim]>(&offsets[i*dim]); - avgNeighbourSpacing += computeAbsSum(*offset); - - } - avgNeighbourSpacing /= N; - eps = factor * avgNeighbourSpacing; - assert(eps != 0); - } - - __host__ __device__ static T computeAbsSum(const Point<dim, T> &x) - { - T absSum = 0; - for (unsigned int i = 0; i < dim; ++i) - { - absSum += fabs(x.value(i)); - } - return absSum; - } - - __host__ __device__ static T computeAbsSum(const T (& x) [dim]) - { - T absSum = 0; - for (unsigned int i = 0; i < dim; ++i) - { - absSum += fabs(x[i]); - } - return absSum; - } - - - template<typename vector_type> - __host__ __device__ void initialize(size_t supportRefKey, size_t supportKeysSize, const size_t* supportKeys, const vector_type & particles) - { - N = supportKeysSize; - - for (int i = 0 ; i < N ; i++) - { - size_t otherKey = supportKeys[i]; - Point<dim,T> p = particles.getPos(supportRefKey); - const auto& p2 = particles.getPos(otherKey); - - p -= p2; - - for (size_t j = 0; j < dim; j++) { - offsets[i*dim+j] = p[j]; - auto diff = p[j]; - } - } - - computeEps(2); - } - -}; - - -#endif //OPENFPM_PDATA_VANDERMONDE_CUH diff --git a/src/DCPSE/VandermondeRowBuilder.hpp b/src/DCPSE/VandermondeRowBuilder.hpp index c4da47fcd22d6392393ae6f77d87806c244981ea..2de39fa06e9a9e8e68d54c438350865dd206af46 100644 --- a/src/DCPSE/VandermondeRowBuilder.hpp +++ b/src/DCPSE/VandermondeRowBuilder.hpp @@ -15,12 +15,10 @@ private: const MonomialBasis_type& monomialBasis; public: - __host__ __device__ VandermondeRowBuilder(const MonomialBasis_type &monomialBasis) : monomialBasis(monomialBasis) {} + VandermondeRowBuilder(const MonomialBasis_type &monomialBasis) : monomialBasis(monomialBasis) {} template <typename MatrixType> void buildRow(MatrixType &M, unsigned int row, Point<dim, T> x, T eps); - - __host__ __device__ void buildRow_gpu(T *M, unsigned int row, const T (& x) [dim], T eps); }; template<unsigned int dim, typename T,typename MonomialBasis_type> @@ -37,18 +35,5 @@ void VandermondeRowBuilder<dim, T, MonomialBasis_type>::buildRow(MatrixType &M, } } -template<unsigned int dim, typename T, typename MonomialBasis_type> -__host__ __device__ void VandermondeRowBuilder<dim, T, MonomialBasis_type>::buildRow_gpu(T* M, unsigned int row, const T (& x) [dim], T eps) -{ - // M is a pointer to the row being filled - const auto& basisElements = monomialBasis.getElements(); - - for (size_t col = 0; col < basisElements.size(); ++col) - { - const Monomial_gpu<dim>& m = basisElements.get(col); - M[col] = m.evaluate(x) / pow(eps, m.order()); - } -} - #endif //OPENFPM_PDATA_VANDERMONDEROW_HPP diff --git a/src/Operators/Vector/vector_dist_operators.hpp b/src/Operators/Vector/vector_dist_operators.hpp index 0eb79d6af216fa33036970269397106b55288916..7c3519222e74447142b2a49230d96de0fa3e0abc 100644 --- a/src/Operators/Vector/vector_dist_operators.hpp +++ b/src/Operators/Vector/vector_dist_operators.hpp @@ -9,6 +9,7 @@ #define OPENFPM_NUMERICS_SRC_OPERATORS_VECTOR_VECTOR_DIST_OPERATORS_HPP_ #include "Vector/vector_dist.hpp" +#include "Vector/vector_dist_subset.hpp" #include "lib/pdata.hpp" #include "cuda/vector_dist_operators_cuda.cuh" @@ -980,7 +981,7 @@ public: * \return the result of the expression * */ - __device__ __host__ inline auto value(const unsigned int & k) const -> decltype(pos_or_propR<vector,prp>::value(v.v,k)) + __host__ __device__ inline auto value(const unsigned int & k) const -> decltype(pos_or_propR<vector,prp>::value(v.v,k)) { return pos_or_propR<vector,prp>::value(v.v,k); } @@ -992,7 +993,7 @@ public: * \return the result of the expression * */ - __device__ __host__ inline auto value(const unsigned int & k) -> decltype(pos_or_propR<vector,prp>::value(v.v,k)) + __host__ __device__ inline auto value(const unsigned int & k) -> decltype(pos_or_propR<vector,prp>::value(v.v,k)) { return pos_or_propR<vector,prp>::value(v.v,k); }