Compare revisions

jstark · jstark · jstark · jstark · jstark · Pietro Incardona
--- a/run.sh
+++ b/run.sh
@@ -5,7 +5,7 @@
 #cd openfpm_numerics

 workspace=$1
-hostname=$2
+hostname=$(cat hostname)
 nproc=$3
 ntask_per_node=$5
 nodes=$4
@@ -15,6 +15,10 @@ echo "Directory: $workspace"
 echo "Machine: $hostname"
 echo "Branch name: $branch"

+if [ x"$hostname" == x"cifarm-centos-node"  ]; then
+        mpi_options="--allow-run-as-root"
+fi
+
 # Get the branch of pdata

 echo "RUN numerics test"
@@ -24,7 +28,7 @@ source $HOME/openfpm_vars_$branch

 cd openfpm_numerics

-mpirun -np $3 ../build/openfpm_numerics/src/numerics
+mpirun $mpi_options -np $3 ../build/openfpm_numerics/src/numerics
 if [ $? -ne 0 ]; then
   curl -X POST --data "payload={\"icon_emoji\": \":jenkins:\", \"username\": \"jenkins\"  , \"attachments\":[{ \"title\":\"Error:\", \"color\": \"#FF0000\", \"text\":\"$2 failed to complete the openfpm_numerics test \" }] }" https://hooks.slack.com/services/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
   exit 1 ;

--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -18,6 +18,8 @@ endif()
 if (NOT CUDA_ON_BACKEND STREQUAL "None")
 	set(CUDA_SOURCES Operators/Vector/vector_dist_operators_unit_tests.cu
 					 DCPSE/DCPSE_op/tests/DCPSE_op_Solver_test.cu
+			 	 	 OdeIntegrators/tests/Odeintegrators_test_gpu.cu
+					 #DCPSE/DCPSE_op/tests/DCPSE_op_subset_test.cu
 					 Operators/Vector/vector_dist_operators_apply_kernel_unit_tests.cu)
 endif()

@@ -50,7 +52,7 @@ if ( CUDA_ON_BACKEND STREQUAL "HIP" AND HIP_FOUND )
 	        DCPSE/tests/Vandermonde_unit_tests.cpp
                                                main.cpp
                                                Matrix/SparseMatrix_unit_tests.cpp
-                                                interpolation/interpolation_unit_tests.cpp
+                                                #interpolation/interpolation_unit_tests.cpp
                                                Vector/Vector_unit_tests.cpp
                                                Solvers/petsc_solver_unit_tests.cpp
                                                FiniteDifference/FDScheme_unit_tests.cpp
@@ -91,7 +93,7 @@ else()
        DCPSE/tests/Vandermonde_unit_tests.cpp
 						main.cpp
 						Matrix/SparseMatrix_unit_tests.cpp
-						interpolation/interpolation_unit_tests.cpp
+						#interpolation/interpolation_unit_tests.cpp
 						Vector/Vector_unit_tests.cpp
 						Solvers/petsc_solver_unit_tests.cpp
 						FiniteDifference/FDScheme_unit_tests.cpp
@@ -250,7 +252,7 @@ install(FILES FiniteDifference/Average.hpp
      	      FiniteDifference/FDScheme.hpp 
      	      FiniteDifference/Laplacian.hpp 
      	      FiniteDifference/mul.hpp 
-      	      FiniteDifference/sum.hpp 
+      	      FiniteDifference/sum.hpp
 	      FiniteDifference/Upwind_gradient.hpp
 	      FiniteDifference/Eno_Weno.hpp
              FiniteDifference/FD_op.hpp
@@ -278,7 +280,7 @@ install(FILES Operators/Vector/vector_dist_operators_extensions.hpp
 	       COMPONENT OpenFPM)

 install(FILES Operators/Vector/cuda/vector_dist_operators_cuda.cuh
-	      DESTINATION openfpm_numerics/include/Operators/Vector/cuda 
+	      DESTINATION openfpm_numerics/include/Operators/Vector/cuda
 	      COMPONENT OpenFPM)

 install(FILES DCPSE/Dcpse.hpp
@@ -313,7 +315,8 @@ install(FILES DCPSE/DCPSE_op/DCPSE_op.hpp
 		COMPONENT OpenFPM)

 install(FILES OdeIntegrators/OdeIntegrators.hpp
-		OdeIntegrators/boost_vector_algebra_ofp.hpp
+        OdeIntegrators/vector_algebra_ofp.hpp
+		OdeIntegrators/vector_algebra_ofp_gpu.hpp
 		DESTINATION openfpm_numerics/include/OdeIntegrators
 		COMPONENT OpenFPM)

@@ -322,7 +325,7 @@ install(FILES Draw/DrawParticles.hpp
      	      Draw/PointIteratorSkin.hpp
 	      Draw/DrawDisk.hpp
 	      Draw/DrawSphere.hpp
-      	      DESTINATION openfpm_numerics/include/Draw 
+      	      DESTINATION openfpm_numerics/include/Draw
 	      COMPONENT OpenFPM)

 install(FILES interpolation/interpolation.hpp 

--- a/src/DCPSE/DCPSE_op/DCPSE_Solver.hpp
+++ b/src/DCPSE/DCPSE_op/DCPSE_Solver.hpp
@@ -125,11 +125,11 @@ class DCPSE_scheme {

        } else if (opt == options_solver::LAGRANGE_MULTIPLIER) {
            if (v_cl.rank() == v_cl.size() - 1) {
-                b.resize(Sys_eqs::nvar * tot + 1, Sys_eqs::nvar * sz + 1);
-                x_ig.resize(Sys_eqs::nvar * tot + 1, Sys_eqs::nvar * sz + 1);
+                b.resize(Sys_eqs::nvar * (tot + 1), Sys_eqs::nvar * (sz + 1));
+                x_ig.resize(Sys_eqs::nvar * (tot + 1), Sys_eqs::nvar * (sz + 1));
            } else {
-                b.resize(Sys_eqs::nvar * tot + 1, Sys_eqs::nvar * sz);
-                x_ig.resize(Sys_eqs::nvar * tot + 1, Sys_eqs::nvar * sz);
+                b.resize(Sys_eqs::nvar * (tot + 1), Sys_eqs::nvar * sz);
+                x_ig.resize(Sys_eqs::nvar * (tot + 1), Sys_eqs::nvar * sz);
            }
        }
            //Use Custom number of constraints using opt as an integer
@@ -252,8 +252,9 @@ class DCPSE_scheme {
        // Indicate all the non zero rows
        openfpm::vector<unsigned char> nz_rows;

+
        if (v_cl.rank() == v_cl.size()-1 && opt == options_solver::LAGRANGE_MULTIPLIER) {
-            nz_rows.resize(row_b+1);
+            nz_rows.resize(row_b+Sys_eqs::nvar);
            }
        else{
            nz_rows.resize(row_b);
@@ -273,7 +274,7 @@ class DCPSE_scheme {
        if (v_cl.getProcessingUnits() == 1) {
            openfpm::vector<unsigned> nz_cols;
            if (v_cl.rank() == v_cl.size()-1 && opt == options_solver::LAGRANGE_MULTIPLIER) {
-                nz_cols.resize(row_b+1);
+                nz_cols.resize(row_b+Sys_eqs::nvar);
            }
            else{
                nz_cols.resize(row_b);
@@ -438,6 +439,54 @@ public:
        copy_nested(x, comp, exps ...);
    }

+        /*! \brief Successive Solve an equation
+ *
+ *  \warning exp must be a scalar type
+ *
+ * \param Solver Manually created Solver instead from the Equation structure
+ * \param exp where to store the result
+ *
+ */
+    template<typename SolverType, typename ... expr_type>
+    void solve_with_solver_successive(SolverType &solver,expr_type ... exps) {
+#ifdef SE_CLASS1
+
+        if (sizeof...(exps) != Sys_eqs::nvar) {
+            std::cerr << __FILE__ << ":" << __LINE__ << " Error the number of properties you gave does not match the solution in\
+    													dimensionality, I am expecting " << Sys_eqs::nvar <<
+                      " properties " << std::endl;
+        };
+#endif
+        auto x = solver.solve_successive(getB(opt));
+
+        unsigned int comp = 0;
+        copy_nested(x, comp, exps ...);
+    }
+
+    /*! \brief Successive Solve an equation with inital guess
+ *
+ *  \warning exp must be a scalar type
+ *
+ * \param Solver Manually created Solver instead from the Equation structure
+ * \param exp where to store the result
+ *
+ */
+    template<typename SolverType, typename ... expr_type>
+    void solve_with_solver_ig_successive(SolverType &solver,expr_type ... exps) {
+#ifdef SE_CLASS1
+
+        if (sizeof...(exps) != Sys_eqs::nvar) {
+            std::cerr << __FILE__ << ":" << __LINE__ << " Error the number of properties you gave does not match the solution in\
+    													dimensionality, I am expecting " << Sys_eqs::nvar <<
+                      " properties " << std::endl;
+        };
+#endif
+        auto x = solver.solve_successive(get_x_ig(opt),getB(opt));
+
+        unsigned int comp = 0;
+        copy_nested(x, comp, exps ...);
+    }
+
    /*! \brief Solve an equation with a given Nullspace
     *
     *  \warning exp must be a scalar type
@@ -480,7 +529,7 @@ public:
                      " properties " << std::endl;
        };
 #endif
-        auto x = solver.with_constant_nullspace_solve(getA(opt), getB(opt));
+        auto x = solver.with_nullspace_solve(getA(opt), getB(opt));

        unsigned int comp = 0;
        copy_nested(x, comp, exps ...);
@@ -758,6 +807,7 @@ public:
 */
    template<typename options>
    typename Sys_eqs::SparseMatrix_type &getA(options opt) {
+        if (A.isMatrixFilled()) return A;
        if (opt == options_solver::STANDARD) {
            A.resize(tot * Sys_eqs::nvar, tot * Sys_eqs::nvar,
                     p_map.size_local() * Sys_eqs::nvar,
@@ -768,60 +818,46 @@ public:
            openfpm::vector<triplet> &trpl = A.getMatrixTriplets();

            if (v_cl.rank() == v_cl.size() - 1) {
-                A.resize(tot * Sys_eqs::nvar + 1, tot * Sys_eqs::nvar + 1,
-                         p_map.size_local() * Sys_eqs::nvar + 1,
-                         p_map.size_local() * Sys_eqs::nvar + 1);
-
-                for (int i = 0; i < tot * Sys_eqs::nvar; i++) {
-                    triplet t1;
-
-                    t1.row() = tot * Sys_eqs::nvar;
-                    t1.col() = i;
-                    t1.value() = 1;
-
-                    trpl.add(t1);
-                }
-
-                for (int i = 0; i < p_map.size_local() * Sys_eqs::nvar; i++) {
-                    triplet t2;
-
-                    t2.row() = i + s_pnt * Sys_eqs::nvar;
-                    t2.col() = tot * Sys_eqs::nvar;
-                    t2.value() = 1;
-
-                    trpl.add(t2);
+                A.resize(Sys_eqs::nvar * (tot + 1), Sys_eqs::nvar * (tot + 1),
+                         Sys_eqs::nvar * (p_map.size_local() + 1),
+                         Sys_eqs::nvar * (p_map.size_local() + 1));
+
+                for (int j = 0; j < Sys_eqs::nvar; j++) {
+                    for (int i = 0; i < tot; i++) {
+                        triplet t1;
+                        t1.row() = tot * Sys_eqs::nvar + j;
+                        t1.col() = i * Sys_eqs::nvar + j;
+                        t1.value() = 1;
+                        trpl.add(t1);
+                    }
+                    for (int i = 0; i < p_map.size_local(); i++) {
+                        triplet t2;
+                        t2.row() = s_pnt * Sys_eqs::nvar + i * Sys_eqs::nvar + j;
+                        t2.col() = tot * Sys_eqs::nvar + j;
+                        t2.value() = 1;
+                        trpl.add(t2);
+                    }
+                    triplet t3;
+                    t3.col() = tot * Sys_eqs::nvar + j;
+                    t3.row() = tot * Sys_eqs::nvar + j;
+                    t3.value() = 0;
+                    trpl.add(t3);
                }
-
-                triplet t3;
-
-                t3.col() = tot * Sys_eqs::nvar;
-                t3.row() = tot * Sys_eqs::nvar;
-                t3.value() = 0;
-
-                trpl.add(t3);
-
-                //row_b++;
-                //row++;
-            }
-            else {
-                A.resize(tot * Sys_eqs::nvar + 1, tot * Sys_eqs::nvar + 1,
+            } else {
+                A.resize(Sys_eqs::nvar * (tot + 1), Sys_eqs::nvar * (tot + 1),
                         p_map.size_local() * Sys_eqs::nvar,
                         p_map.size_local() * Sys_eqs::nvar);
-
-                for (int i = 0; i < p_map.size_local() * Sys_eqs::nvar; i++) {
-                    triplet t2;
-
-                    t2.row() = i + s_pnt * Sys_eqs::nvar;
-                    t2.col() = tot * Sys_eqs::nvar;
-                    t2.value() = 1;
-
-                    trpl.add(t2);
+                for (int j = 0; j < Sys_eqs::nvar; j++) {
+                    for (int i = 0; i < p_map.size_local(); i++) {
+                        triplet t2;
+                        t2.row() = s_pnt * Sys_eqs::nvar + i * Sys_eqs::nvar + j;
+                        t2.col() = tot * Sys_eqs::nvar + j;
+                        t2.value() = 1;
+                        trpl.add(t2);
+                    }
                }
            }
-
-
        }
-
        else{
            auto &v_cl = create_vcluster();
            if (v_cl.rank() == v_cl.size() - 1) {
@@ -833,8 +869,8 @@ public:
                A.resize(tot * Sys_eqs::nvar - offset, tot * Sys_eqs::nvar - offset,
                         p_map.size_local() * Sys_eqs::nvar,
                         p_map.size_local() * Sys_eqs::nvar);
-                }
            }
+        }
 #ifdef SE_CLASS1
        consistency(opt);
 #endif
@@ -854,8 +890,8 @@ public:
        if (opt == options_solver::LAGRANGE_MULTIPLIER) {
            auto &v_cl = create_vcluster();
            if (v_cl.rank() == v_cl.size() - 1) {
-
-                b(tot * Sys_eqs::nvar) = 0;
+                for(int j=0;j<Sys_eqs::nvar;j++)
+                {b(tot * Sys_eqs::nvar+j) = 0;}
            }
        }
        return b;
@@ -873,8 +909,8 @@ public:
        if (opt == options_solver::LAGRANGE_MULTIPLIER) {
            auto &v_cl = create_vcluster();
            if (v_cl.rank() == v_cl.size() - 1) {
-
-                x_ig(tot * Sys_eqs::nvar) = 0;
+                for(int j=0;j<Sys_eqs::nvar;j++)
+                    {x_ig(tot * Sys_eqs::nvar+j) = 0;}
            }
        }
        return x_ig;
@@ -959,14 +995,6 @@ public:
            // get the particle
            auto key = it.get();

-/*
-            if (key == 298 && create_vcluster().rank() == 1)
-            {
-            	int debug = 0;
-            	debug++;
-            }
-*/
-
            // Calculate the non-zero colums
            typename Sys_eqs::stype coeff = 1.0;
            op.template value_nz<Sys_eqs>(p_map, key, cols, coeff, 0);
@@ -975,11 +1003,11 @@ public:
            bool is_diag = false;

            // create the triplet
-            for (auto it = cols.begin(); it != cols.end(); ++it) {
+            for (auto it2 = cols.begin(); it2 != cols.end(); ++it2) {
                trpl.add();
                trpl.last().row() = p_map.template getProp<0>(key) * Sys_eqs::nvar + id;
-                trpl.last().col() = it->first;
-                trpl.last().value() = it->second;
+                trpl.last().col() = it2->first;
+                trpl.last().value() = it2->second;
                if (trpl.last().row() == trpl.last().col())
                {is_diag = true;}
            }

--- a/src/DCPSE/DCPSE_op/DCPSE_op.hpp
+++ b/src/DCPSE/DCPSE_op/DCPSE_op.hpp
@@ -7,6 +7,7 @@

 #ifndef DCPSE_OP_HPP_
 #define DCPSE_OP_HPP_
+#ifdef HAVE_EIGEN

 #include "Decomposition/CartDecomposition.hpp"
 #include "DCPSE/Dcpse.hpp"
@@ -2664,5 +2665,5 @@ typedef Derivative_yyy_T<Dcpse_gpu> Derivative_yyy_gpu;
 typedef Derivative_G_T<Dcpse_gpu> Derivative_G_gpu;
 #endif

-
+#endif /*EIGEN */
 #endif /* DCPSE_OP_HPP_ */
--- a/src/DCPSE/DCPSE_op/DCPSE_surface_op.hpp
+++ b/src/DCPSE/DCPSE_op/DCPSE_surface_op.hpp
@@ -4,6 +4,7 @@

 #ifndef OPENFPM_PDATA_DCPSE_SURFACE_OP_HPP
 #define OPENFPM_PDATA_DCPSE_SURFACE_OP_HPP
+#ifdef HAVE_EIGEN

 #include "DCPSE/DCPSE_op/DCPSE_op.hpp"

@@ -91,7 +92,7 @@ public:
    template<typename particles_type>
    void update(particles_type &particles) {
        auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse;
-        dcpse_temp->createNormalParticles<NORMAL_ID>(particles);
+        dcpse_temp->template createNormalParticles<NORMAL_ID>(particles);
        dcpse_temp->initializeUpdate(particles);
        dcpse_temp->accumulateAndDeleteNormalParticles(particles);
    }
@@ -181,7 +182,7 @@ public:
    template<typename particles_type>
    void update(particles_type &particles) {
        auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse;
-        dcpse_temp->createNormalParticles<NORMAL_ID>(particles);
+        dcpse_temp->template createNormalParticles<NORMAL_ID>(particles);
        dcpse_temp->initializeUpdate(particles);
        dcpse_temp->accumulateAndDeleteNormalParticles(particles);

@@ -271,7 +272,7 @@ public:
    template<typename particles_type>
    void update(particles_type &particles) {
        auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse;
-        dcpse_temp->createNormalParticles<NORMAL_ID>(particles);
+        dcpse_temp->template createNormalParticles<NORMAL_ID>(particles);
        dcpse_temp->initializeUpdate(particles);
        dcpse_temp->accumulateAndDeleteNormalParticles(particles);

@@ -363,8 +364,8 @@ public:
    template<typename particles_type>
    void update(particles_type &particles) {
        auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse;
-        dcpse_temp->createNormalParticles<NORMAL_ID>(particles);
-        dcpse_temp->createNormalParticles<NORMAL_ID>(particles);
+        dcpse_temp->template createNormalParticles<NORMAL_ID>(particles);
+        dcpse_temp->template createNormalParticles<NORMAL_ID>(particles);
        dcpse_temp->initializeUpdate(particles);
        dcpse_temp->accumulateAndDeleteNormalParticles(particles);
    }
@@ -453,8 +454,8 @@ public:
    template<typename particles_type>
    void update(particles_type &particles) {
        auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse;
-        dcpse_temp->createNormalParticles<NORMAL_ID>(particles);
-        dcpse_temp->createNormalParticles<NORMAL_ID>(particles);
+        dcpse_temp->template createNormalParticles<NORMAL_ID>(particles);
+        dcpse_temp->template createNormalParticles<NORMAL_ID>(particles);
        dcpse_temp->initializeUpdate(particles);
        dcpse_temp->accumulateAndDeleteNormalParticles(particles);
    }
@@ -544,7 +545,7 @@ public:
    template<typename particles_type>
    void update(particles_type &particles) {
        auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse;
-        dcpse_temp->createNormalParticles<NORMAL_ID>(particles);
+        dcpse_temp->template createNormalParticles<NORMAL_ID>(particles);
        dcpse_temp->initializeUpdate(particles);
        dcpse_temp->accumulateAndDeleteNormalParticles(particles);

@@ -726,7 +727,7 @@ public:
    template<typename particles_type>
    void update(particles_type &particles) {
        auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse;
-        dcpse_temp->createNormalParticles<NORMAL_ID>(particles);
+        dcpse_temp->template createNormalParticles<NORMAL_ID>(particles);
        dcpse_temp->initializeUpdate(particles);
        dcpse_temp->accumulateAndDeleteNormalParticles(particles);

@@ -818,7 +819,7 @@ public:
    template<typename particles_type>
    void update(particles_type &particles) {
        auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse;
-        dcpse_temp->createNormalParticles<NORMAL_ID>(particles);
+        dcpse_temp->template createNormalParticles<NORMAL_ID>(particles);
        dcpse_temp->initializeUpdate(particles);
        dcpse_temp->accumulateAndDeleteNormalParticles(particles);

@@ -910,7 +911,7 @@ public:
    template<typename particles_type>
    void update(particles_type &particles) {
        auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse;
-        dcpse_temp->createNormalParticles<NORMAL_ID>(particles);
+        dcpse_temp->template createNormalParticles<NORMAL_ID>(particles);
        dcpse_temp->initializeUpdate(particles);
        dcpse_temp->accumulateAndDeleteNormalParticles(particles);

@@ -997,11 +998,11 @@ public:
    template<typename particles_type>
    void update(particles_type &particles) {
        auto dcpse_temp = (Dcpse<particles_type::dims, particles_type> *) dcpse;
-        dcpse_temp->createNormalParticles<NORMAL_ID>(particles);
+        dcpse_temp->template createNormalParticles<NORMAL_ID>(particles);
        dcpse_temp->initializeUpdate(particles);
        dcpse_temp->accumulateAndDeleteNormalParticles(particles);

    }
 };
-
+#endif //Eigen
 #endif //OPENFPM_PDATA_DCPSE_SURFACE_OP_HPP
--- a/src/DCPSE/DCPSE_op/EqnsStruct.hpp
+++ b/src/DCPSE/DCPSE_op/EqnsStruct.hpp
@@ -1228,6 +1228,7 @@ struct equations3d3EPz_gpu {
    typedef umfpack_solver<double> solver_type;
 };

+
 struct equations3d1Pxy_gpu {
    //! dimensionaly of the equation ( 3D problem ...)
    static const unsigned int dims = 3;

--- a/src/DCPSE/DCPSE_op/tests/DCPSE_op_Solver_test.cpp
+++ b/src/DCPSE/DCPSE_op/tests/DCPSE_op_Solver_test.cpp
@@ -1011,7 +1011,6 @@ BOOST_AUTO_TEST_SUITE(dcpse_op_suite_tests)

        Solver.solve_with_solver(solver,sol);

-
        Solver.reset_b();
        Solver.impose_b(bulk, prop_id<1>());
        Solver.impose_b(up_p, prop_id<1>());
@@ -1040,6 +1039,170 @@ BOOST_AUTO_TEST_SUITE(dcpse_op_suite_tests)
        //domain.write("Neumann");
    }

+        BOOST_AUTO_TEST_CASE(dcpse_poisson_Neumann2d) {
+        const size_t sz[2] = {31,31};
+        Box<2, double> box({0, 0}, {1.0, 1.0});
+        size_t bc[2] = {NON_PERIODIC, NON_PERIODIC};
+        double spacing = box.getHigh(0) / (sz[0] - 1);
+        double rCut = 3.1 * spacing;
+        Ghost<2, double> ghost(spacing * 3.1);
+        BOOST_TEST_MESSAGE("Init vector_dist...");
+
+        vector_dist<2, double, aggregate<double[2],double[2],double[2],double[2],double[2]>> domain(0, box, bc, ghost);
+
+
+        //Init_DCPSE(domain)
+        BOOST_TEST_MESSAGE("Init domain...");
+
+        auto it = domain.getGridIterator(sz);
+        while (it.isNext()) {
+
+            domain.add();
+
+            auto key = it.get();
+            double x = key.get(0) * it.getSpacing(0);
+            domain.getLastPos()[0] = x;
+            double y = key.get(1) * it.getSpacing(1);
+            domain.getLastPos()[1] = y;
+
+            ++it;
+        }
+        BOOST_TEST_MESSAGE("Sync domain across processors...");
+
+        domain.map();
+        domain.ghost_get<0>();
+
+        Derivative_x Dx(domain, 2, rCut,1.9,support_options::N_PARTICLES);
+        Derivative_y Dy(domain, 2, rCut,1.9,support_options::N_PARTICLES);
+        Laplacian Lap(domain, 2, rCut, 1.9,support_options::N_PARTICLES);
+        petsc_solver<double> solver;
+        solver.setRestart(500);
+        solver.setSolver(KSPGMRES);
+        solver.setPreconditioner(PCSVD);
+
+        openfpm::vector<aggregate<int>> bulk;
+        openfpm::vector<aggregate<int>> up_p;
+        openfpm::vector<aggregate<int>> dw_p;
+        openfpm::vector<aggregate<int>> l_p;
+        openfpm::vector<aggregate<int>> r_p;
+
+        auto v = getV<0>(domain);
+        auto RHS=getV<1>(domain);
+        auto sol = getV<2>(domain);
+        auto anasol = getV<3>(domain);
+        auto err = getV<4>(domain);
+
+        // Here fill me
+
+        Box<2, double> up({box.getLow(0) - spacing / 2.0, box.getHigh(1) - spacing / 2.0},
+                          {box.getHigh(0) + spacing / 2.0, box.getHigh(1) + spacing / 2.0});
+
+        Box<2, double> down({box.getLow(0) - spacing / 2.0, box.getLow(1) - spacing / 2.0},
+                            {box.getHigh(0) + spacing / 2.0, box.getLow(1) + spacing / 2.0});
+
+        Box<2, double> left({box.getLow(0) - spacing / 2.0, box.getLow(1) + spacing / 2.0},
+                            {box.getLow(0) + spacing / 2.0, box.getHigh(1) - spacing / 2.0});
+
+        Box<2, double> right({box.getHigh(0) - spacing / 2.0, box.getLow(1) + spacing / 2.0},
+                             {box.getHigh(0) + spacing / 2.0, box.getHigh(1) - spacing / 2.0});
+
+        openfpm::vector<Box<2, double>> boxes;
+        boxes.add(up);
+        boxes.add(down);
+        boxes.add(left);
+        boxes.add(right);
+
+        // Create a writer and write
+        VTKWriter<openfpm::vector<Box<2, double>>, VECTOR_BOX> vtk_box;
+        vtk_box.add(boxes);
+        //vtk_box.write("vtk_box.vtk");
+
+
+        auto it2 = domain.getDomainIterator();
+
+        while (it2.isNext()) {
+            auto p = it2.get();
+            Point<2, double> xp = domain.getPos(p);
+            //domain.getProp<3>(p)=1+xp[0]*xp[0]+2*xp[1]*xp[1];
+            if (up.isInside(xp) == true) {
+                up_p.add();
+                up_p.last().get<0>() = p.getKey();
+                domain.getProp<1>(p)[0] =  sin(5*xp.get(0));
+                domain.getProp<1>(p)[1] =  sin(5*xp.get(0));
+            } else if (down.isInside(xp) == true) {
+                dw_p.add();
+                dw_p.last().get<0>() = p.getKey();
+                domain.getProp<1>(p)[0] =  sin(5*xp.get(0));
+                domain.getProp<1>(p)[1] =  sin(5*xp.get(0));
+            } else if (left.isInside(xp) == true) {
+                l_p.add();
+                l_p.last().get<0>() = p.getKey();
+                domain.getProp<1>(p)[0] =  sin(5*xp.get(0));
+                domain.getProp<1>(p)[1] =  sin(5*xp.get(0));
+            } else if (right.isInside(xp) == true) {
+                r_p.add();
+                r_p.last().get<0>() = p.getKey();
+                domain.getProp<1>(p)[0] =  sin(5*xp.get(0));
+                domain.getProp<1>(p)[1] =  sin(5*xp.get(0));
+            } else {
+                bulk.add();
+                bulk.last().get<0>() = p.getKey();
+                domain.getProp<1>(p)[0] =  -10*exp(-((xp.get(0)-0.5)*(xp.get(0)-0.5)+(xp.get(1)-0.5)*(xp.get(1)-0.5))/0.02);
+                domain.getProp<1>(p)[1] =  -10*exp(-((xp.get(0)-0.5)*(xp.get(0)-0.5)+(xp.get(1)-0.5)*(xp.get(1)-0.5))/0.02);
+            }
+
+            ++it2;
+        }
+
+        DCPSE_scheme<equations2d2,decltype(domain)> Solver(domain,options_solver::LAGRANGE_MULTIPLIER);
+        eq_id vx,vy;
+        vx.setId(0);
+        vy.setId(1);
+        auto Poisson0 = -Lap(v[0]);
+        auto D_x0 = Dx(v[0]);
+        auto D_y0 = Dy(v[0]);
+        auto Poisson1 = -Lap(v[1]);
+        auto D_x1 = Dx(v[1]);
+        auto D_y1 = Dy(v[1]);
+
+        Solver.impose(Poisson0, bulk, RHS[0],vx);
+        Solver.impose(Poisson1, bulk, RHS[1],vy);
+        Solver.impose(D_y0, up_p, RHS[0],vx);
+        Solver.impose(-D_y0, dw_p, RHS[0],vx);
+        Solver.impose(-D_x0, l_p, RHS[0],vx);
+        Solver.impose(D_x0, r_p, RHS[0],vx);
+
+        Solver.impose(D_y1, up_p, RHS[1],vy);
+        Solver.impose(-D_y1, dw_p, RHS[1],vy);
+        Solver.impose(-D_x1, l_p, RHS[1],vy);
+        Solver.impose(D_x1, r_p, RHS[1],vy);
+        Solver.solve_with_solver(solver,sol[0],sol[1]);
+
+//       Solver.solve(sol);
+        domain.ghost_get<2>();
+        anasol[0]=-Lap(sol[0]);
+        anasol[1]=-Lap(sol[1]);
+        double worst1 = 0.0,worst2 = 0.0;
+
+        for(int j=0;j<bulk.size();j++)
+        {   auto p=bulk.get<0>(j);
+            if (fabs(domain.getProp<3>(p)[0]- domain.getProp<1>(p)[0]) >= worst1) {
+                worst1 = fabs(domain.getProp<3>(p)[0] - domain.getProp<1>(p)[0]);
+            }
+            if (fabs(domain.getProp<3>(p)[1]- domain.getProp<1>(p)[1]) >= worst2) {
+                worst2 = fabs(domain.getProp<3>(p)[1] - domain.getProp<1>(p)[1]);
+            }
+            domain.getProp<4>(p)[0] = fabs(domain.getProp<1>(p)[0] - domain.getProp<3>(p)[0]);
+            domain.getProp<4>(p)[1] = fabs(domain.getProp<1>(p)[1] - domain.getProp<3>(p)[1]);
+
+        }
+        //Auto Error
+        BOOST_REQUIRE(worst1 < 1.0);
+        BOOST_REQUIRE(worst2 < 1.0);
+
+        domain.write("Neumann2d");
+    }
+

    BOOST_AUTO_TEST_CASE(dcpse_slice_solver) {
 //  int rank;

--- a/src/DCPSE/DCPSE_op/tests/DCPSE_op_Solver_test.cu
+++ b/src/DCPSE/DCPSE_op/tests/DCPSE_op_Solver_test.cu
@@ -17,7 +17,6 @@
 #include <iostream>
 #include "../DCPSE_op.hpp"
 #include "../DCPSE_Solver.hpp"
-#include "../DCPSE_Solver.cuh"
 #include "Operators/Vector/vector_dist_operators.hpp"
 #include "Vector/vector_dist_subset.hpp"
 #include "../EqnsStruct.hpp"
@@ -154,7 +153,7 @@ BOOST_AUTO_TEST_CASE(dcpse_op_vec3d_gpu) {
        size_t bc[2] = {NON_PERIODIC, NON_PERIODIC};
        double spacing = box.getHigh(0) / (sz[0] - 1);
        Ghost<2, double> ghost(spacing * 3);
-        double rCut = 3.1 * spacing;
+        double rCut = 2.0 * spacing;
        BOOST_TEST_MESSAGE("Init vector_dist...");

        vector_dist_gpu<2, double, aggregate<double,double,double,double>> domain(0, box, bc, ghost);
@@ -180,7 +179,7 @@ BOOST_AUTO_TEST_CASE(dcpse_op_vec3d_gpu) {
        domain.map();
        domain.ghost_get<0>();

-        Laplacian_gpu Lap(domain, 2, rCut);
+        Laplacian_gpu Lap(domain, 2, rCut, 2,support_options::N_PARTICLES);

        DCPSE_scheme_gpu<equations2d1_gpu,decltype(domain)> Solver( domain);

@@ -415,10 +414,9 @@ BOOST_AUTO_TEST_CASE(dcpse_op_vec3d_gpu) {
        domain.map();
        domain.ghost_get<0>();

-        Derivative_x_gpu Dx(domain, 2, rCut / 3.0 ,1.9/*,support_options::RADIUS*/);
-        Derivative_y_gpu Dy(domain, 2, rCut / 3.0 ,1.9/*,support_options::RADIUS*/);
-        Laplacian_gpu Lap(domain, 2, rCut / 3.0 ,1.9/*,support_options::RADIUS*/);
-
+        Derivative_x_gpu Dx(domain, 2, rCut/3.0 ,1.9,support_options::N_PARTICLES);
+        Derivative_y_gpu Dy(domain, 2, rCut/3.0,1.9,support_options::N_PARTICLES);
+        Laplacian_gpu Lap(domain, 2, rCut/3.0 ,1.9,support_options::N_PARTICLES);

        openfpm::vector<aggregate<int>> bulk;
        openfpm::vector<aggregate<int>> up_p;
@@ -552,7 +550,7 @@ BOOST_AUTO_TEST_CASE(dcpse_op_vec3d_gpu) {
    BOOST_AUTO_TEST_CASE(dcpse_poisson_Dirichlet_anal) {
 //  int rank;
 //  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-        const size_t sz[2] = {200,200};
+        const size_t sz[2] = {81,81};
        Box<2, double> box({0, 0}, {1, 1});
        size_t bc[2] = {NON_PERIODIC, NON_PERIODIC};
        double spacing = box.getHigh(0) / (sz[0] - 1);
@@ -860,7 +858,7 @@ BOOST_AUTO_TEST_CASE(dcpse_op_vec3d_gpu) {
        size_t bc[2] = {NON_PERIODIC, NON_PERIODIC};
        double spacing = box.getHigh(0) / (sz[0] - 1);
        Ghost<2, double> ghost(spacing * 3);
-        double rCut = 3.1 * spacing;
+        double rCut = 2.0 * spacing;
        BOOST_TEST_MESSAGE("Init vector_dist...");

        vector_dist_gpu<2, double, aggregate<double,double,double,double,double,VectorS<2, double>>> domain(0, box, bc, ghost);
@@ -886,8 +884,8 @@ BOOST_AUTO_TEST_CASE(dcpse_op_vec3d_gpu) {
        domain.map();
        domain.ghost_get<0>();

-        Derivative_y_gpu Dy(domain, 2, rCut);
-        Laplacian_gpu Lap(domain, 2, rCut);
+        Derivative_y_gpu Dy(domain, 2, rCut,2,support_options::N_PARTICLES);
+        Laplacian_gpu Lap(domain, 2, rCut, 3,support_options::N_PARTICLES);

        DCPSE_scheme_gpu<equations2d1_gpu,decltype(domain)> Solver(domain);

@@ -1033,9 +1031,10 @@ BOOST_AUTO_TEST_CASE(dcpse_op_vec3d_gpu) {
        domain.map();
        domain.ghost_get<0>();

-        Derivative_x_gpu Dx(domain, 2, rCut);
-        Derivative_y_gpu Dy(domain, 2, rCut);
-        Laplacian_gpu Lap(domain, 2, rCut);
+        Derivative_x_gpu Dx(domain, 2, rCut,1.9,support_options::N_PARTICLES);
+        Derivative_y_gpu Dy(domain, 2, rCut,1.9,support_options::N_PARTICLES);
+        Laplacian_gpu Lap(domain, 2, rCut, 1.9,support_options::N_PARTICLES);
+
        petsc_solver<double> solver;
        solver.setRestart(500);
        solver.setSolver(KSPGMRES);
@@ -1175,10 +1174,9 @@ BOOST_AUTO_TEST_CASE(dcpse_op_vec3d_gpu) {
        domain.map();
        domain.ghost_get<0>();

-        Derivative_x_gpu Dx(domain, 2, rCut,1.9,support_options::RADIUS);
-        Derivative_y_gpu Dy(domain, 2, rCut,1.9,support_options::RADIUS);
-        Laplacian_gpu Lap(domain, 2, rCut,1.9,support_options::RADIUS);
-
+        Derivative_x Dx(domain, 2, rCut,1.9,support_options::RADIUS);
+        Derivative_y Dy(domain, 2, rCut,1.9,support_options::RADIUS);
+        Laplacian Lap(domain, 2, rCut,1.9,support_options::RADIUS);

        openfpm::vector<aggregate<int>> bulk;
        openfpm::vector<aggregate<int>> up_p;

--- a/src/DCPSE/DCPSE_op/tests/DCPSE_op_subset_test.cu
+++ b/src/DCPSE/DCPSE_op/tests/DCPSE_op_subset_test.cu
--- a/src/DCPSE/Dcpse.cuh
+++ b/src/DCPSE/Dcpse.cuh
@@ -484,9 +484,10 @@ public:

 private:

+    template <typename U>
    void initializeAdaptive(vector_type &particles,
                            unsigned int convergenceOrder,
-                            double rCut) {
+                            U rCut) {
        // Still need to be tested
 #ifdef SE_CLASS1
        this->update_ctr=particles.getMapCtr();
@@ -505,77 +506,7 @@ private:
        if (!isSharedSupport) {
            SupportBuilder<vector_type,vector_type>
                supportBuilder(particles, particles, differentialSignature, rCut, differentialOrder == 0);
-            unsigned int requiredSupportSize = monomialBasis.size();
-            // need to resize supportKeys1D to yet unknown supportKeysTotalN
-            // add() takes too long
-            openfpm::vector<openfpm::vector<size_t>> tempSupportKeys(supportRefs.size());
-
-            auto it = particles.getDomainIterator();
-            while (it.isNext()) {
-                auto key_o = particles.getOriginKey(it.get());
-                subsetKeyPid.get(key_o.getKey()) = it.get().getKey();
-
-                Support support = supportBuilder.getSupport(it, requiredSupportSize, opt);
-                supportRefs.get(key_o.getKey()) = key_o.getKey();
-                tempSupportKeys.get(key_o.getKey()) = support.getKeys();
-                kerOffsets.get(key_o.getKey()) = supportKeysTotalN;
-
-                if (maxSupportSize < support.size())
-                    maxSupportSize = support.size();
-                supportKeysTotalN += support.size();

-                EMatrix<T, Eigen::Dynamic, Eigen::Dynamic> V(support.size(), monomialBasis.size());
-                // Vandermonde matrix computation
-                Vandermonde<dim, T, EMatrix<T, Eigen::Dynamic, Eigen::Dynamic>>
-                        vandermonde(support, monomialBasis, particles, particles,HOverEpsilon);
-
-                vandermonde.getMatrix(V);
-
-                T condV = conditionNumber(V, condVTOL);
-                T eps = vandermonde.getEps();
-                if (condV > condVTOL) {
-                    requiredSupportSize *= 2;
-                    std::cout << "INFO: Increasing, requiredSupportSize = " << requiredSupportSize << std::endl; // debug
-                    continue;
-                } else requiredSupportSize = monomialBasis.size();
-
-                ++it;
-            }
-
-            kerOffsets.get(supportRefs.size()) = supportKeysTotalN;
-            supportKeys1D.resize(supportKeysTotalN);
-
-            size_t offset = 0;
-            for (size_t i = 0; i < tempSupportKeys.size(); ++i)
-                for (size_t j = 0; j < tempSupportKeys.get(i).size(); ++j, ++offset)
-                    supportKeys1D.get(offset) = tempSupportKeys.get(i).get(j);
-        }
-
-        kerOffsets.hostToDevice(); supportKeys1D.hostToDevice();
-        assembleLocalMatrices(cublasDgetrfBatched, cublasDtrsmBatched);
-    }
-
-    void initializeAdaptive(vector_type &particles,
-                            unsigned int convergenceOrder,
-                            float rCut) {
-        // Still need to be tested
-#ifdef SE_CLASS1
-        this->update_ctr=particles.getMapCtr();
-#endif
-
-        if (!isSharedSupport) {
-            subsetKeyPid.resize(particles.size_local_orig());
-            supportRefs.resize(particles.size_local());
-        }
-        localEps.resize(particles.size_local());
-        localEpsInvPow.resize(particles.size_local());
-        kerOffsets.resize(particles.size_local()+1);
-
-        const T condVTOL = 1e2;
-
-        if (!isSharedSupport) {
-            SupportBuilder<vector_type,vector_type>
-                supportBuilder(particles, particles, differentialSignature, rCut, differentialOrder == 0);
            unsigned int requiredSupportSize = monomialBasis.size();
            // need to resize supportKeys1D to yet unknown supportKeysTotalN
            // add() takes too long
@@ -598,13 +529,11 @@ private:
                EMatrix<T, Eigen::Dynamic, Eigen::Dynamic> V(support.size(), monomialBasis.size());
                // Vandermonde matrix computation
                Vandermonde<dim, T, EMatrix<T, Eigen::Dynamic, Eigen::Dynamic>>
-                        vandermonde(support, monomialBasis, particles, particles, HOverEpsilon);
-
+                        vandermonde(support, monomialBasis, particles, particles,HOverEpsilon);
                vandermonde.getMatrix(V);

                T condV = conditionNumber(V, condVTOL);
                T eps = vandermonde.getEps();
-
                if (condV > condVTOL) {
                    requiredSupportSize *= 2;
                    std::cout << "INFO: Increasing, requiredSupportSize = " << requiredSupportSize << std::endl; // debug
@@ -624,13 +553,14 @@ private:
        }

        kerOffsets.hostToDevice(); supportKeys1D.hostToDevice();
-        assembleLocalMatrices(cublasSgetrfBatched, cublasStrsmBatched);
+        assembleLocalMatrices_t(rCut);
    }

+    template <typename U>
    void initializeStaticSize(vector_type &particles,
                              unsigned int convergenceOrder,
-                              double rCut,
-                              double supportSizeFactor) {
+                              U rCut,
+                              U supportSizeFactor) {
 #ifdef SE_CLASS1
        this->update_ctr=particles.getMapCtr();
 #endif
@@ -697,82 +627,19 @@ std::chrono::high_resolution_clock::time_point t2 = std::chrono::high_resolution
 std::chrono::duration<double> time_span2 = std::chrono::duration_cast<std::chrono::duration<double>>(t2 - t1);
 std::cout << "Support building took " << time_span2.count() * 1000. << " milliseconds." << std::endl;

-        assembleLocalMatrices(cublasDgetrfBatched, cublasDtrsmBatched);
+        assembleLocalMatrices_t(rCut);
    }

-    // ad hoc solution to template specialization for float/double
-    void initializeStaticSize(vector_type &particles,
-                              unsigned int convergenceOrder,
-                              float rCut,
-                              float supportSizeFactor) {
-#ifdef SE_CLASS1
-        this->update_ctr=particles.getMapCtr();
-#endif
-        this->rCut=rCut;
-        this->supportSizeFactor=supportSizeFactor;
-        this->convergenceOrder=convergenceOrder;
+    // Cublas subroutine selector: float or double
+    // rCut is needed to overcome limitation of nested template class specialization
+    template <typename U> void assembleLocalMatrices_t(U rCut) {
+        throw std::invalid_argument("DCPSE operator error: CUBLAS supports only float or double"); }

-        if (!isSharedSupport) {
-            subsetKeyPid.resize(particles.size_local_orig());
-            supportRefs.resize(particles.size_local());
-        }
-        localEps.resize(particles.size_local());
-        localEpsInvPow.resize(particles.size_local());
+    void assembleLocalMatrices_t(float rCut) {
+        assembleLocalMatrices(cublasSgetrfBatched, cublasStrsmBatched); }

-std::chrono::high_resolution_clock::time_point t1 = std::chrono::high_resolution_clock::now();
-        auto it = particles.getDomainIterator();
-
-        if (opt==support_options::RADIUS) {
-            if (!isSharedSupport) {
-                while (it.isNext()) {
-                    auto key_o = it.get(); subsetKeyPid.get(particles.getOriginKey(key_o).getKey()) = key_o.getKey();
-                    supportRefs.get(key_o.getKey()) = key_o.getKey();
-                    ++it;
-                }
-
-                SupportBuilderGPU<vector_type> supportBuilder(particles, rCut);
-                supportBuilder.getSupport(supportRefs.size(), kerOffsets, supportKeys1D, maxSupportSize, supportKeysTotalN);
-            }
-        } else {
-            if (!isSharedSupport){
-                openfpm::vector<openfpm::vector<size_t>> tempSupportKeys(supportRefs.size());
-                size_t requiredSupportSize = monomialBasis.size() * supportSizeFactor;
-                // need to resize supportKeys1D to yet unknown supportKeysTotalN
-                // add() takes too long
-                SupportBuilder<vector_type,vector_type> supportBuilder(particles, particles, differentialSignature, rCut, differentialOrder == 0);
-                kerOffsets.resize(supportRefs.size()+1);
-
-                while (it.isNext()) {
-                    auto key_o = it.get(); subsetKeyPid.get(particles.getOriginKey(key_o).getKey()) = key_o.getKey();
-
-                    Support support = supportBuilder.getSupport(it, requiredSupportSize, opt);
-                    supportRefs.get(key_o.getKey()) = key_o.getKey();
-                    tempSupportKeys.get(key_o.getKey()) = support.getKeys();
-                    kerOffsets.get(key_o.getKey()) = supportKeysTotalN;
-
-                    if (maxSupportSize < support.size()) maxSupportSize = support.size();
-                    supportKeysTotalN += support.size();
-                    ++it;
-                }
-
-                kerOffsets.get(supportRefs.size()) = supportKeysTotalN;
-                supportKeys1D.resize(supportKeysTotalN);
-
-                size_t offset = 0;
-                for (size_t i = 0; i < tempSupportKeys.size(); ++i)
-                    for (size_t j = 0; j < tempSupportKeys.get(i).size(); ++j, ++offset)
-                        supportKeys1D.get(offset) = tempSupportKeys.get(i).get(j);
-            }
-
-            kerOffsets.hostToDevice(); supportKeys1D.hostToDevice();
-        }
-
-std::chrono::high_resolution_clock::time_point t2 = std::chrono::high_resolution_clock::now();
-std::chrono::duration<double> time_span2 = std::chrono::duration_cast<std::chrono::duration<double>>(t2 - t1);
-std::cout << "Support building took " << time_span2.count() * 1000. << " milliseconds." << std::endl;
-
-        assembleLocalMatrices(cublasSgetrfBatched, cublasStrsmBatched);
-    }
+    void assembleLocalMatrices_t(double rCut) {
+        assembleLocalMatrices(cublasDgetrfBatched, cublasDtrsmBatched); }

    template<typename cublasLUDec_type, typename cublasTriangSolve_type>
    void assembleLocalMatrices(cublasLUDec_type cublasLUDecFunc, cublasTriangSolve_type cublasTriangSolveFunc) {

--- a/src/DCPSE/Dcpse.hpp
+++ b/src/DCPSE/Dcpse.hpp
@@ -878,7 +878,7 @@ private:
        localEpsInvPow.resize(particlesTo.size_local_orig());
        kerOffsets.resize(particlesTo.size_local_orig());
        kerOffsets.fill(-1);
-        T avgSpacingGlobal=0,maxSpacingGlobal=0,minSpacingGlobal=std::numeric_limits<T>::max();
+        T avgSpacingGlobal=0,avgSpacingGlobal2=0,maxSpacingGlobal=0,minSpacingGlobal=std::numeric_limits<T>::max();
        size_t Counter=0;
        auto it = particlesTo.getDomainIterator();
        while (it.isNext()) {
@@ -900,6 +900,7 @@ private:
            T eps = vandermonde.getEps();
            avgSpacingGlobal+=eps;
            T tSpacing = vandermonde.getMinSpacing();
+            avgSpacingGlobal2+=tSpacing;
            if(tSpacing>maxSpacingGlobal)
            {
                maxSpacingGlobal=tSpacing;
@@ -948,12 +949,13 @@ private:
        }

        v_cl.sum(avgSpacingGlobal);
+        v_cl.sum(avgSpacingGlobal2);
        v_cl.max(maxSpacingGlobal);
        v_cl.min(minSpacingGlobal);
        v_cl.sum(Counter);
        v_cl.execute();
        if(v_cl.rank()==0)
-        {std::cout<<"DCPSE Operator Construction Complete. The global avg spacing in the support <h> is: "<<HOverEpsilon*avgSpacingGlobal/(T(Counter))<<" (c="<<HOverEpsilon<<"). Min Spacing Range=["<<minSpacingGlobal<<","<<maxSpacingGlobal<<"]."<<std::endl;}
+        {std::cout<<"DCPSE Operator Construction Complete. The global avg spacing in the support <h> is: "<<HOverEpsilon*avgSpacingGlobal/(T(Counter))<<" (c="<<HOverEpsilon<<"). Avg:"<<avgSpacingGlobal2/(T(Counter))<<" Range:["<<minSpacingGlobal<<","<<maxSpacingGlobal<<"]."<<std::endl;}
    }

    T computeKernel(Point<dim, T> x, EMatrix<T, Eigen::Dynamic, 1> & a) const {

--- a/src/DCPSE/SupportBuilder.hpp
+++ b/src/DCPSE/SupportBuilder.hpp
@@ -209,6 +209,9 @@ private:
                    MinSpacing = rp.get(i).dist;
                }
            }
+#ifdef SE_CLASS1
+        assert(MinSpacing !=0 && "You have multiple particles on the same position.");
+#endif
            for (int i = 0; i < rp.size(); i++) {
                if (rp.get(i).dist < AdapFac * MinSpacing) {
                    points.push_back(rp.get(i).offset);
@@ -221,6 +224,7 @@ private:
                    points.push_back(rp.get(i).offset);
                }
            }
+
        //MinSpacing=MinSpacing/requiredSupportSize
        return points;
    }

--- a/src/Matrix/SparseMatrix_petsc.hpp
+++ b/src/Matrix/SparseMatrix_petsc.hpp
@@ -372,6 +372,17 @@ public:
 		return 0;
 	}

+	/*! \brief Return the state of matrix
+	 *
+	 * Returns a bool flag that indicated whether the matrix
+	 * has already been filled via MatSetValues
+	 *
+	 */
+	bool isMatrixFilled()
+	{
+		return m_created;
+	}
+
 	/* Write matrix on vtk
 	 *
 	 * \param out file to write into

--- a/src/OdeIntegrators/OdeIntegrators.hpp
+++ b/src/OdeIntegrators/OdeIntegrators.hpp
@@ -30,7 +30,58 @@ namespace boost{
 #include <boost/numeric/odeint.hpp>
 #include "Operators/Vector/vector_dist_operators.hpp"
 #include "FiniteDifference/FD_expressions.hpp"
-#include "OdeIntegrators/boost_vector_algebra_ofp.hpp"
+#include "OdeIntegrators/vector_algebra_ofp.hpp"
+
+#ifdef __NVCC__
+#include "OdeIntegrators/vector_algebra_ofp_gpu.hpp"
+/*! \brief A 1d Odeint and Openfpm compatible structure.
+ *
+ *  Use the method this.data.get<d>() to refer to property of all the particles in the dimension d.
+ *
+ * d starts with 0.
+ *
+ */
+struct state_type_1d_ofp_ker{
+    state_type_1d_ofp_ker(){
+    }
+    typedef decltype(std::declval<texp_v_gpu<double>>().getVector().toKernel()) state_kernel;
+    typedef size_t size_type;
+    typedef int is_state_vector;
+    aggregate<state_kernel> data;
+
+    __host__ __device__ size_t size() const
+    { return data.get<0>().size(); }
+
+};
+/*! \brief A 1d Odeint and Openfpm compatible structure.
+ *
+ *  Use the method this.data.get<d>() to refer to property of all the particles in the dimension d.
+ *
+ * d starts with 0.
+ *
+ */
+struct state_type_1d_ofp_gpu{
+    state_type_1d_ofp_gpu(){
+    }
+    typedef size_t size_type;
+    typedef int is_state_vector;
+    aggregate<texp_v_gpu<double>> data;
+
+    size_t size() const
+    { return data.get<0>().size(); }
+
+    void resize(size_t n)
+    {
+        data.get<0>().resize(n);
+    }
+    state_type_1d_ofp_ker toKernel() const
+    {
+        state_type_1d_ofp_ker s1_ker;
+        s1_ker.data.get<0>()=data.get<0>().getVector().toKernel();
+        return s1_ker;
+    }
+};
+#endif

 namespace boost { namespace numeric { namespace odeint {

@@ -184,7 +235,7 @@ struct state_type_ofpm_add_elements
 template<typename state_type, typename ... list>
 struct state_type_ofpm_add_elements<0,state_type,list ...>
 {
-   typedef aggregate<list ...> type; 
+   typedef aggregate<list ...> type;
 };

 template<int n_state, typename state_type>
@@ -199,8 +250,8 @@ struct state_type_ofpm_impl
    type_data data;

    FD::gdb_ext_plus_g_info<state_type::dims> size() const
-    { 
-        return data.template get<0>().size(); 
+    {
+        return data.template get<0>().size();
    }


@@ -212,19 +263,6 @@ struct state_type_ofpm_impl


 namespace boost {
-
-    // template<typename state_type>
-    // struct range_size<const state_type_ofpm_impl<3,state_type>>
-    // {
-    //     typedef FD::gdb_ext_plus_g_info<state_type::dims> type;
-    // };
-
-    // template<typename state_type>
-    // struct range_size<const state_type_ofpm_impl<1,state_type>>
-    // {
-    //     typedef FD::gdb_ext_plus_g_info<state_type::dims> type;
-    // };
-
    namespace numeric {
        namespace odeint {

@@ -235,7 +273,13 @@ namespace boost {
            typedef boost::true_type type;
            static const bool value = type::value;
            };
-
+#ifdef __NVCC__
+            template<>
+            struct is_resizeable<state_type_1d_ofp_gpu> {
+                typedef boost::true_type type;
+                static const bool value = type::value;
+            };
+#endif
            template<>
            struct is_resizeable<state_type_2d_ofp> {
                typedef boost::true_type type;

--- a/src/OdeIntegrators/tests/OdeIntegrator_grid_tests.cpp
+++ b/src/OdeIntegrators/tests/OdeIntegrator_grid_tests.cpp
@@ -11,7 +11,6 @@
 #include "config.h"
 #include "Grid/grid_dist_id.hpp"
 #include "OdeIntegrators/OdeIntegrators.hpp"
-#include "OdeIntegrators/boost_vector_algebra_ofp.hpp"
 #include "FiniteDifference/FD_op.hpp"
 #include "util/util_debug.hpp"
 #include "util/common.hpp"

--- a/src/OdeIntegrators/tests/OdeIntegratores_base_tests.cpp
+++ b/src/OdeIntegrators/tests/OdeIntegratores_base_tests.cpp
@@ -15,8 +15,10 @@
 #include "Vector/vector_dist_subset.hpp"
 #include "Decomposition/Distribution/SpaceDistribution.hpp"
 #include "OdeIntegrators/OdeIntegrators.hpp"
+#ifdef HAVE_EIGEN
 #include "DCPSE/DCPSE_op/DCPSE_op.hpp"
-#include "OdeIntegrators/boost_vector_algebra_ofp.hpp"
+#endif
+#include "OdeIntegrators/vector_algebra_ofp.hpp"

 typedef texp_v<double> state_type;
 const double a = 2.8e-4;
@@ -465,7 +467,6 @@ BOOST_AUTO_TEST_CASE(odeint_base_test3)
 }

 #ifdef HAVE_EIGEN
-
 BOOST_AUTO_TEST_CASE(dcpse_op_react_diff_test) {
        size_t edgeSemiSize = 5;
        const size_t sz[2] = {2 * edgeSemiSize+1, 2 * edgeSemiSize+1};
@@ -567,5 +568,4 @@ BOOST_AUTO_TEST_CASE(dcpse_op_react_diff_test) {
        }
 }
 #endif
-
 BOOST_AUTO_TEST_SUITE_END()
--- a/src/OdeIntegrators/tests/Odeintegrators_test_gpu.cu
+++ b/src/OdeIntegrators/tests/Odeintegrators_test_gpu.cu
+//
+// Created by abhinav on 2/28/23.
+//
+#include "config.h"
+#include <type_traits>
+#include <cstring>
+#include "util/common.hpp"
+
+#define BOOST_TEST_DYN_LINK
+
+#include "util/util_debug.hpp"
+#include <boost/test/unit_test.hpp>
+#include <iostream>
+#include "Operators/Vector/vector_dist_operators.hpp"
+#include "OdeIntegrators/OdeIntegrators.hpp"
+//#include "DCPSE/DCPSE_op/DCPSE_op.hpp"
+#ifdef __NVCC__
+
+typedef state_type_1d_ofp_gpu state_type;
+//const double a = 2.8e-4;
+//const double b = 5e-3;
+//const double tau = .1;
+//const double k = .005;
+
+void ExponentialGPU( const state_type &x , state_type &dxdt , const double t )
+{
+    dxdt.data.get<0>() = x.data.get<0>();
+    //x.data.get<0>().getVector().deviceToHost<0>();
+    //dxdt.data.get<0>().getVector().deviceToHost<0>();
+}
+
+BOOST_AUTO_TEST_SUITE(odeInt_BASE_tests)
+
+BOOST_AUTO_TEST_CASE(odeint_base_test_gpu)
+        {
+        size_t edgeSemiSize = 512;
+        const size_t sz[2] = {edgeSemiSize,edgeSemiSize };
+        Box<2, double> box({ 0, 0 }, { 1.0, 1.0 });
+        size_t bc[2] = { NON_PERIODIC, NON_PERIODIC };
+        double spacing[2];
+        spacing[0] = 1.0 / (sz[0] - 1);
+        spacing[1] = 1.0 / (sz[1] - 1);
+        double rCut = 3.9 * spacing[0];
+        Ghost<2, double> ghost(rCut);
+        BOOST_TEST_MESSAGE("Init vector_dist...");
+
+        vector_dist_gpu<2, double, aggregate<double, double,double>> Particles(0, box, bc, ghost);
+
+        auto it = Particles.getGridIterator(sz);
+        while (it.isNext())
+        {
+            Particles.add();
+            auto key = it.get();
+            mem_id k0 = key.get(0);
+            double xp0 = k0 * spacing[0];
+            Particles.getLastPos()[0] = xp0;
+            mem_id k1 = key.get(1);
+            double yp0 = k1 * spacing[1];
+            Particles.getLastPos()[1] = yp0;
+            Particles.getLastProp<0>() = xp0*yp0*exp(-5);
+            Particles.getLastProp<1>() = xp0*yp0*exp(5);
+            ++it;
+        }
+
+        Particles.map();
+        Particles.ghost_get<0>();
+        Particles.hostToDeviceProp<0,1,2>();
+        auto Init = getV<0,comp_dev>(Particles);
+        auto Sol = getV<1,comp_dev>(Particles);
+        auto OdeSol = getV<2,comp_dev>(Particles);
+
+        state_type x0;
+        x0.data.get<0>()=Init;
+        x0.data.get<0>().getVector().deviceToHost<0>();
+        // The rhs of x' = f(x)
+
+        double t0=-5,tf=5;
+        const double dt=0.01;
+
+
+        //This doesnt work Why?
+        //size_t steps=boost::numeric::odeint::integrate(Exponential,x0,0.0,tf,dt);
+        timer tt;
+        tt.start();
+        size_t steps=boost::numeric::odeint::integrate_const( boost::numeric::odeint::runge_kutta4< state_type, double, state_type, double, boost::numeric::odeint::vector_space_algebra_ofp_gpu,boost::numeric::odeint::ofp_operations>(),ExponentialGPU,x0,t0,tf,dt);
+        tt.stop();
+        OdeSol=x0.data.get<0>();
+        Particles.deviceToHostProp<0,1,2>();
+        auto it2 = Particles.getDomainIterator();
+        double worst = 0.0;
+        while (it2.isNext()) {
+            auto p = it2.get();
+            if (fabs(Particles.getProp<1>(p) - Particles.getProp<2>(p)) > worst) {
+                worst = fabs(Particles.getProp<1>(p) - Particles.getProp<2>(p));
+            }
+            ++it2;
+        }
+        std::cout<<"WCT:"<<tt.getwct()<<std::endl;
+        std::cout<<"CPU:"<<tt.getcputime()<<std::endl;
+        std::cout<<worst<<std::endl;
+        BOOST_REQUIRE(worst < 1e-6);
+        }
+BOOST_AUTO_TEST_SUITE_END()
+#endif
\ No newline at end of file
--- a/src/OdeIntegrators/boost_vector_algebra_ofp.hpp
+++ b/src/OdeIntegrators/boost_vector_algebra_ofp.hpp
--- a/src/OdeIntegrators/vector_algebra_ofp_gpu.hpp
+++ b/src/OdeIntegrators/vector_algebra_ofp_gpu.hpp
--- a/src/Operators/Vector/cuda/vector_dist_operators_cuda.cuh
+++ b/src/Operators/Vector/cuda/vector_dist_operators_cuda.cuh
@@ -80,7 +80,6 @@ struct pos_or_propL_ker
 };


-
 /*! \brief selector for position or properties left side
 *
 * \tparam vector type of the original vector
@@ -428,7 +427,7 @@ struct vector_dist_op_compute_op<prp,false,comp_host>
 	}
 };

-
+#define NVCC
 #ifdef __NVCC__

 template<unsigned int prp, unsigned int dim ,typename vector, typename expr>
No results found