diff --git a/.gitignore b/.gitignore
index 66ab1b076f8438d370f47c61a67234071f542e03..aa3c28ead49b91822a4924d3810d945ef458e8a1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -127,3 +127,4 @@ projectId.sh
 
 **/.gitignore
 .gitignore
+/doxygen/
diff --git a/example/Numerics/2D_ActiveFluid/Makefile b/example/Numerics/2D_ActiveFluid/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..f312eaf60f78cd63eb20f716345c68d89e0273fc
--- /dev/null
+++ b/example/Numerics/2D_ActiveFluid/Makefile
@@ -0,0 +1,23 @@
+include ../../example.mk
+
+CC=mpic++
+
+LDIR =
+
+OBJ = main.o
+
+%.o: %.cpp
+	$(CC) -O0 -g -c --std=c++11 -o $@ $< $(INCLUDE_PATH)
+
+Active2d: $(OBJ)
+	$(CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS)
+
+all: Active2d
+
+run: all
+	mpirun -np 4 /Active2d
+
+.PHONY: clean all run
+
+clean:
+	rm -f *.o *~ core Active2d
diff --git a/example/Numerics/2D_ActiveFluid/main.cpp b/example/Numerics/2D_ActiveFluid/main.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..44b0c6f49ef32a6a7c87d17faee8f3709ab5610a
--- /dev/null
+++ b/example/Numerics/2D_ActiveFluid/main.cpp
@@ -0,0 +1,530 @@
+//
+// Created by Abhinav Singh on 15.03.20.
+//
+//#define SE_CLASS1
+#include "config.h"
+#define BOOST_MPL_CFG_NO_PREPROCESSED_HEADERS
+#define BOOST_MPL_LIMIT_VECTOR_SIZE 40
+#include <iostream>
+#include "DCPSE/DCPSE_op/DCPSE_op.hpp"
+#include "DCPSE/DCPSE_op/DCPSE_Solver.hpp"
+#include "Operators/Vector/vector_dist_operators.hpp"
+#include "Vector/vector_dist_subset.hpp"
+#include "DCPSE/DCPSE_op/EqnsStruct.hpp"
+#include "OdeIntegrators/OdeIntegrators.hpp"
+
+constexpr int x = 0;
+constexpr int y = 1;
+constexpr int POLARIZATION= 0,VELOCITY = 1, VORTICITY = 2, EXTFORCE = 3,PRESSURE = 4, STRAIN_RATE = 5, STRESS = 6, MOLFIELD = 7, DPOL = 8, DV = 9, VRHS = 10, F1 = 11, F2 = 12, F3 = 13, F4 = 14, F5 = 15, F6 = 16, V_T = 17, DIV = 18, DELMU = 19, HPB = 20, FE = 21, R = 22;
+
+double eta = 1.0;
+double nu = -0.5;
+double gama = 0.1;
+double zeta = 0.07;
+double Ks = 1.0;
+double Kb = 1.0;
+double lambda = 0.1;
+
+int wr_f;
+int wr_at;
+double V_err_eps;
+
+void *vectorGlobal=nullptr,*vectorGlobal_bulk=nullptr,*vectorGlobal_boundary=nullptr;
+const openfpm::vector<std::string>
+PropNAMES={"00-Polarization","01-Velocity","02-Vorticity","03-ExternalForce","04-Pressure","05-StrainRate","06-Stress","07-MolecularField","08-DPOL","09-DV","10-VRHS","11-f1","12-f2","13-f3","14-f4","15-f5","16-f6","17-V_T","18-DIV","19-DELMU","20-HPB","21-FrankEnergy","22-R"};
+typedef aggregate<VectorS<2, double>,VectorS<2, double>,double[2][2],VectorS<2, double>,double,double[2][2],double[2][2],VectorS<2, double>,VectorS<2, double>,VectorS<2, double>,VectorS<2, double>,double,double,double,double,double,double,VectorS<2, double>,double,double,double,double,double> Activegels;
+typedef vector_dist_ws<2, double,Activegels> vector_type;
+typedef vector_dist_subset<2, double, Activegels> vector_type2;
+
+//Functor to Compute RHS of the time derivative of the polarity
+template<typename DX,typename DY,typename DXX,typename DXY,typename DYY>
+struct PolarEv
+{
+    DX &Dx;
+    DY &Dy;
+    DXX &Dxx;
+    DXY &Dxy;
+    DYY &Dyy;
+    //Constructor
+    PolarEv(DX &Dx,DY &Dy,DXX &Dxx,DXY &Dxy,DYY &Dyy):Dx(Dx),Dy(Dy),Dxx(Dxx),Dxy(Dxy),Dyy(Dyy)
+    {}
+
+    void operator()( const state_type_2d_ofp &X , state_type_2d_ofp &dxdt , const double t ) const
+    {
+
+        vector_type &Particles= *(vector_type *) vectorGlobal;
+        vector_type2 &Particles_bulk= *(vector_type2 *) vectorGlobal_bulk;
+
+        auto Pol=getV<POLARIZATION>(Particles);
+        auto Pol_bulk=getV<POLARIZATION>(Particles_bulk);
+        auto h = getV<MOLFIELD>(Particles);
+        auto u = getV<STRAIN_RATE>(Particles);
+        auto dPol = getV<DPOL>(Particles);
+        auto W = getV<VORTICITY>(Particles);
+        auto delmu = getV<DELMU>(Particles);
+        auto H_p_b = getV<HPB>(Particles);
+        auto r = getV<R>(Particles);
+        auto dPol_bulk = getV<DPOL>(Particles_bulk);
+        Pol[x]=X.data.get<0>();
+        Pol[y]=X.data.get<1>();
+        Particles.ghost_get<POLARIZATION>(SKIP_LABELLING);
+        H_p_b = Pol[x] * Pol[x] + Pol[y] * Pol[y];
+
+        h[y] = (Pol[x] * (Ks * Dyy(Pol[y]) + Kb * Dxx(Pol[y]) + (Ks - Kb) * Dxy(Pol[x])) -
+                    Pol[y] * (Ks * Dxx(Pol[x]) + Kb * Dyy(Pol[x]) + (Ks - Kb) * Dxy(Pol[y])));
+
+        h[x] = -gama * (lambda * delmu - nu * (u[x][x] * Pol[x] * Pol[x] + u[y][y] * Pol[y] * Pol[y] + 2 * u[x][y] * Pol[x] * Pol[y]) / (H_p_b));
+ 
+        dPol_bulk[x] = ((h[x] * Pol[x] - h[y] * Pol[y]) / gama + lambda * delmu * Pol[x] -
+                     nu * (u[x][x] * Pol[x] + u[x][y] * Pol[y]) + W[x][x] * Pol[x] +
+                     W[x][y] * Pol[y]);
+        dPol_bulk[y] = ((h[x] * Pol[y] + h[y] * Pol[x]) / gama + lambda * delmu * Pol[y] -
+                     nu * (u[y][x] * Pol[x] + u[y][y] * Pol[y]) + W[y][x] * Pol[x] +
+                     W[y][y] * Pol[y]);
+        dxdt.data.get<0>()=dPol[x];
+        dxdt.data.get<1>()=dPol[y];
+    }
+};
+
+// Functor to calculate velocity and move particles with explicit euler
+template<typename DX,typename DY,typename DXX,typename DXY,typename DYY>
+struct CalcVelocity
+{
+
+    DX &Dx, &Bulk_Dx;
+    DY &Dy, &Bulk_Dy;
+    DXX &Dxx;
+    DXY &Dxy;
+    DYY &Dyy;
+
+    double t_old;
+    int ctr;
+
+    //Constructor
+    CalcVelocity(DX &Dx,DY &Dy,DXX &Dxx,DXY &Dxy,DYY &Dyy,DX &Bulk_Dx,DY &Bulk_Dy):Dx(Dx),Dy(Dy),Dxx(Dxx),Dxy(Dxy),Dyy(Dyy),Bulk_Dx(Bulk_Dx),Bulk_Dy(Bulk_Dy)
+    {
+        t_old = 0.0;
+        ctr = 0;
+    }
+
+    void operator() (state_type_2d_ofp &state, double t)
+    {
+
+        double dt = t - t_old;
+        vector_type &Particles= *(vector_type *) vectorGlobal;
+        vector_type2 &Particles_bulk= *(vector_type2 *) vectorGlobal_bulk;
+        vector_type2 &Particles_boundary= *(vector_type2 *) vectorGlobal_boundary;
+        auto &v_cl = create_vcluster();
+
+        timer tt;
+        
+        auto Pos = getV<PROP_POS>(Particles);
+        auto Pol=getV<POLARIZATION>(Particles);
+        auto V = getV<VELOCITY>(Particles);
+        auto H_p_b = getV<HPB>(Particles);
+
+
+        // skip in first time step
+        if (dt != 0) {
+            tt.start();
+            //normalize polarization
+            H_p_b = sqrt(Pol[x] * Pol[x] + Pol[y] * Pol[y]);
+            Pol = Pol / H_p_b;
+            Pos = Pos + (t-t_old)*V;
+            Particles.map();
+            Particles.ghost_get<POLARIZATION, EXTFORCE, DELMU>();
+            Particles_bulk.update();
+            Particles_boundary.update();
+            tt.start();
+            Dx.update(Particles);
+            Dy.update(Particles);
+            Dxy.update(Particles);
+            Dxx.update(Particles);
+            Dyy.update(Particles);
+
+            Bulk_Dx.update(Particles_bulk);
+            Bulk_Dy.update(Particles_bulk);
+
+            state.data.get<0>()=Pol[x];
+            state.data.get<1>()=Pol[y];
+
+            tt.stop();
+            if (v_cl.rank() == 0) {
+                std::cout << "Updating operators took " << tt.getwct() << " seconds." << std::endl;
+                std::cout << "Time step " << ctr << " : " << t << " over." << std::endl;
+                std::cout << "----------------------------------------------------------" << std::endl;
+            }
+            ctr++;
+
+        }
+        auto Dyx = Dxy;
+        t_old = t;
+        tt.start();
+
+        auto & bulk = Particles_bulk.getIds();
+        auto & boundary = Particles_boundary.getIds();
+        auto Pol_bulk=getV<POLARIZATION>(Particles_bulk);
+        auto sigma = getV<STRESS>(Particles);
+        auto r = getV<R>(Particles);
+        auto h = getV<MOLFIELD>(Particles);
+        auto FranckEnergyDensity = getV<FE>(Particles);
+        auto f1 = getV<F1>(Particles);
+        auto f2 = getV<F2>(Particles);
+        auto f3 = getV<F3>(Particles);
+        auto f4 = getV<F4>(Particles);
+        auto f5 = getV<F5>(Particles);
+        auto f6 = getV<F6>(Particles);
+        auto dV = getV<DV>(Particles);
+        auto delmu = getV<DELMU>(Particles); // why is delmu a property, not a constant?
+        auto g = getV<EXTFORCE>(Particles);
+        auto P = getV<PRESSURE>(Particles);
+        auto P_bulk = getV<PRESSURE>(Particles_bulk); //Pressure only on inside
+        auto RHS = getV<VRHS>(Particles);
+        auto RHS_bulk = getV<VRHS>(Particles_bulk);
+        auto div = getV<DIV>(Particles);
+        auto V_t = getV<V_T>(Particles);
+        auto u = getV<STRAIN_RATE>(Particles);
+        auto W = getV<VORTICITY>(Particles);
+
+        Pol_bulk[x]=state.data.get<0>();
+        Pol_bulk[y]=state.data.get<1>();
+        Particles.ghost_get<POLARIZATION>(SKIP_LABELLING);
+
+        eq_id x_comp, y_comp;
+        x_comp.setId(0);
+        y_comp.setId(1);
+
+        int n = 0,nmax = 300,errctr = 0, Vreset = 0;
+        double V_err = 1,V_err_eps = 5 * 1e-3, V_err_old,sum, sum1;
+        std::cout << "Calculate velocity (step t=" << t << ")" << std::endl;
+        tt.start();
+        petsc_solver<double> solverPetsc;
+        solverPetsc.setSolver(KSPGMRES);
+        solverPetsc.setPreconditioner(PCJACOBI);
+        Particles.ghost_get<POLARIZATION>(SKIP_LABELLING);
+        // calculate stress
+        sigma[x][x] =
+                -Ks * Dx(Pol[x]) * Dx(Pol[x]) - Kb * Dx(Pol[y]) * Dx(Pol[y]) + (Kb - Ks) * Dy(Pol[x]) * Dx(Pol[y]);
+        sigma[x][y] =
+                -Ks * Dy(Pol[y]) * Dx(Pol[y]) - Kb * Dy(Pol[x]) * Dx(Pol[x]) + (Kb - Ks) * Dx(Pol[y]) * Dx(Pol[x]);
+        sigma[y][x] =
+                -Ks * Dx(Pol[x]) * Dy(Pol[x]) - Kb * Dx(Pol[y]) * Dy(Pol[y]) + (Kb - Ks) * Dy(Pol[x]) * Dy(Pol[y]);
+        sigma[y][y] =
+                -Ks * Dy(Pol[y]) * Dy(Pol[y]) - Kb * Dy(Pol[x]) * Dy(Pol[x]) + (Kb - Ks) * Dx(Pol[y]) * Dy(Pol[x]);
+        Particles.ghost_get<STRESS>(SKIP_LABELLING);
+
+        // if R == 0 then set to 1 to avoid division by zero for defects
+        r = Pol[x] * Pol[x] + Pol[y] * Pol[y];
+        for (int j = 0; j < bulk.size(); j++) {
+            auto p = bulk.get<0>(j);
+            Particles.getProp<R>(p) = (Particles.getProp<R>(p) == 0) ? 1 : Particles.getProp<R>(p);
+        }
+        for (int j = 0; j < boundary.size(); j++) {
+            auto p = boundary.get<0>(j);
+            Particles.getProp<R>(p) = (Particles.getProp<R>(p) == 0) ? 1 : Particles.getProp<R>(p);
+        }
+
+        // calculate traversal molecular field (H_perpendicular)
+        h[y] = (Pol[x] * (Ks * Dyy(Pol[y]) + Kb * Dxx(Pol[y]) + (Ks - Kb) * Dxy(Pol[x])) -
+                Pol[y] * (Ks * Dxx(Pol[x]) + Kb * Dyy(Pol[x]) + (Ks - Kb) * Dxy(Pol[y])));
+        Particles.ghost_get<MOLFIELD>(SKIP_LABELLING);
+
+        // calulate FranckEnergyDensity
+        FranckEnergyDensity = (Ks / 2.0) *
+                              ((Dx(Pol[x]) * Dx(Pol[x])) + (Dy(Pol[x]) * Dy(Pol[x])) +
+                               (Dx(Pol[y]) * Dx(Pol[y])) +
+                               (Dy(Pol[y]) * Dy(Pol[y]))) +
+                              ((Kb - Ks) / 2.0) * ((Dx(Pol[y]) - Dy(Pol[x])) * (Dx(Pol[y]) - Dy(Pol[x])));
+        Particles.ghost_get<FE>(SKIP_LABELLING);
+
+        // calculate preactors for LHS of Stokes Equation.
+        f1 = gama * nu * Pol[x] * Pol[x] * (Pol[x] * Pol[x] - Pol[y] * Pol[y]) / (r);
+        f2 = 2.0 * gama * nu * Pol[x] * Pol[y] * (Pol[x] * Pol[x] - Pol[y] * Pol[y]) / (r);
+        f3 = gama * nu * Pol[y] * Pol[y] * (Pol[x] * Pol[x] - Pol[y] * Pol[y]) / (r);
+        f4 = 2.0 * gama * nu * Pol[x] * Pol[x] * Pol[x] * Pol[y] / (r);
+        f5 = 4.0 * gama * nu * Pol[x] * Pol[x] * Pol[y] * Pol[y] / (r);
+        f6 = 2.0 * gama * nu * Pol[x] * Pol[y] * Pol[y] * Pol[y] / (r);
+        Particles.ghost_get<F1, F2, F3, F4, F5, F6>(SKIP_LABELLING);
+        texp_v<double> Dxf1 = Dx(f1),Dxf2 = Dx(f2),Dxf3 = Dx(f3),Dxf4 = Dx(f4),Dxf5 = Dx(f5),Dxf6 = Dx(f6),
+                        Dyf1 = Dy(f1),Dyf2 = Dy(f2),Dyf3 = Dy(f3),Dyf4 = Dy(f4),Dyf5 = Dy(f5),Dyf6 = Dy(f6);
+
+        // calculate RHS of Stokes Equation without pressure
+        dV[x] = -0.5 * Dy(h[y]) + zeta * Dx(delmu * Pol[x] * Pol[x]) + zeta * Dy(delmu * Pol[x] * Pol[y]) -
+                zeta * Dx(0.5 * delmu * (Pol[x] * Pol[x] + Pol[y] * Pol[y])) -
+                0.5 * nu * Dx(-2.0 * h[y] * Pol[x] * Pol[y])
+                - 0.5 * nu * Dy(h[y] * (Pol[x] * Pol[x] - Pol[y] * Pol[y])) - Dx(sigma[x][x]) -
+                Dy(sigma[x][y]) -
+                g[x]
+                - 0.5 * nu * Dx(-gama * lambda * delmu * (Pol[x] * Pol[x] - Pol[y] * Pol[y]))
+                - 0.5 * Dy(-2.0 * gama * lambda * delmu * (Pol[x] * Pol[y]));
+
+        dV[y] = -0.5 * Dx(-h[y]) + zeta * Dy(delmu * Pol[y] * Pol[y]) + zeta * Dx(delmu * Pol[x] * Pol[y]) -
+                zeta * Dy(0.5 * delmu * (Pol[x] * Pol[x] + Pol[y] * Pol[y])) -
+                0.5 * nu * Dy(2.0 * h[y] * Pol[x] * Pol[y])
+                - 0.5 * nu * Dx(h[y] * (Pol[x] * Pol[x] - Pol[y] * Pol[y])) - Dx(sigma[y][x]) -
+                Dy(sigma[y][y]) -
+                g[y]
+                - 0.5 * nu * Dy(gama * lambda * delmu * (Pol[x] * Pol[x] - Pol[y] * Pol[y]))
+                - 0.5 * Dx(-2.0 * gama * lambda * delmu * (Pol[x] * Pol[y]));
+        Particles.ghost_get<DV>(SKIP_LABELLING);
+
+        // Encode LHS of the Stokes Equations
+        auto Stokes1 = eta * (Dxx(V[x]) + Dyy(V[x]))
+                       + 0.5 * nu * (Dxf1 * Dx(V[x]) + f1 * Dxx(V[x]))
+                       + 0.5 * nu * (Dxf2 * 0.5 * (Dx(V[y]) + Dy(V[x])) + f2 * 0.5 * (Dxx(V[y]) + Dyx(V[x])))
+                       + 0.5 * nu * (Dxf3 * Dy(V[y]) + f3 * Dyx(V[y]))
+                       + 0.5 * nu * (Dyf4 * Dx(V[x]) + f4 * Dxy(V[x]))
+                       + 0.5 * nu * (Dyf5 * 0.5 * (Dx(V[y]) + Dy(V[x])) + f5 * 0.5 * (Dxy(V[y]) + Dyy(V[x])))
+                       + 0.5 * nu * (Dyf6 * Dy(V[y]) + f6 * Dyy(V[y]));
+        auto Stokes2 = eta * (Dxx(V[y]) + Dyy(V[y]))
+                       - 0.5 * nu * (Dyf1 * Dx(V[x]) + f1 * Dxy(V[x]))
+                       - 0.5 * nu * (Dyf2 * 0.5 * (Dx(V[y]) + Dy(V[x])) + f2 * 0.5 * (Dxy(V[y]) + Dyy(V[x])))
+                       - 0.5 * nu * (Dyf3 * Dy(V[y]) + f3 * Dyy(V[y]))
+                       + 0.5 * nu * (Dxf4 * Dx(V[x]) + f4 * Dxx(V[x]))
+                       + 0.5 * nu * (Dxf5 * 0.5 * (Dx(V[y]) + Dy(V[x])) + f5 * 0.5 * (Dxx(V[y]) + Dyx(V[x])))
+                       + 0.5 * nu * (Dxf6 * Dy(V[y]) + f6 * Dyx(V[y]));
+
+        tt.stop();
+
+        std::cout << "Init of Velocity took " << tt.getwct() << " seconds." << std::endl;
+
+        tt.start();
+        V_err = 1;
+        n = 0;
+        errctr = 0;
+        if (Vreset == 1) {
+            P = 0;
+            Vreset = 0;
+        }
+        P=0;
+
+        // integrate velocity
+        Particles.ghost_get<PRESSURE>(SKIP_LABELLING);
+        RHS_bulk[x] = dV[x] + Bulk_Dx(P);
+        RHS_bulk[y] = dV[y] + Bulk_Dy(P);
+        Particles.ghost_get<VRHS>(SKIP_LABELLING);
+
+        // prepare solver
+        DCPSE_scheme<equations2d2, vector_type> Solver(Particles);
+        Solver.impose(Stokes1, bulk, RHS[0], x_comp);
+        Solver.impose(Stokes2, bulk, RHS[1], y_comp);
+        Solver.impose(V[x], boundary, 0, x_comp);
+        Solver.impose(V[y], boundary, 0, y_comp);
+        Solver.solve_with_solver(solverPetsc, V[x], V[y]);
+        Particles.ghost_get<VELOCITY>(SKIP_LABELLING);
+        div = -(Dx(V[x]) + Dy(V[y]));
+        P_bulk = P + div;
+
+        // approximate velocity
+        while (V_err >= V_err_eps && n <= nmax) {
+            Particles.ghost_get<PRESSURE>(SKIP_LABELLING);
+            RHS_bulk[x] = dV[x] + Bulk_Dx(P);
+            RHS_bulk[y] = dV[y] + Bulk_Dy(P);
+            Particles.ghost_get<VRHS>(SKIP_LABELLING);
+            Solver.reset_b();
+            Solver.impose_b(bulk, RHS[0], x_comp);
+            Solver.impose_b(bulk, RHS[1], y_comp);
+            Solver.impose_b(boundary, 0, x_comp);
+            Solver.impose_b(boundary, 0, y_comp);
+            Solver.solve_with_solver(solverPetsc, V[x], V[y]);
+            Particles.ghost_get<VELOCITY>(SKIP_LABELLING);
+            div = -(Dx(V[x]) + Dy(V[y]));
+            P_bulk = P + div;
+            // calculate error
+            sum = 0;
+            sum1 = 0;
+            for (int j = 0; j < bulk.size(); j++) {
+                auto p = bulk.get<0>(j);
+                sum += (Particles.getProp<V_T>(p)[0] - Particles.getProp<VELOCITY>(p)[0]) *
+                       (Particles.getProp<V_T>(p)[0] - Particles.getProp<VELOCITY>(p)[0]) +
+                       (Particles.getProp<V_T>(p)[1] - Particles.getProp<VELOCITY>(p)[1]) *
+                       (Particles.getProp<V_T>(p)[1] - Particles.getProp<VELOCITY>(p)[1]);
+                sum1 += Particles.getProp<VELOCITY>(p)[0] * Particles.getProp<VELOCITY>(p)[0] +
+                        Particles.getProp<VELOCITY>(p)[1] * Particles.getProp<VELOCITY>(p)[1];
+            }
+            V_t = V;
+            v_cl.sum(sum);
+            v_cl.sum(sum1);
+            v_cl.execute();
+            sum = sqrt(sum);
+            sum1 = sqrt(sum1);
+            V_err_old = V_err;
+            V_err = sum / sum1;
+            if (V_err > V_err_old || abs(V_err_old - V_err) < 1e-8) {
+                errctr++;
+                //alpha_P -= 0.1;
+            } else {
+                errctr = 0;
+            }
+            if (n > 3) {
+                if (errctr > 3) {
+                    std::cout << "CONVERGENCE LOOP BROKEN DUE TO INCREASE/VERY SLOW DECREASE IN DIVERGENCE" << std::endl;
+                    Vreset = 1;
+                    break;
+                } else {
+                    Vreset = 0;
+                }
+            }
+            n++;
+
+        }
+        tt.stop();
+
+        Particles.ghost_get<VELOCITY>(SKIP_LABELLING);
+        // calculate strain rate
+        u[x][x] = Dx(V[x]);
+        u[x][y] = 0.5 * (Dx(V[y]) + Dy(V[x]));
+        u[y][x] = 0.5 * (Dy(V[x]) + Dx(V[y]));
+        u[y][y] = Dy(V[y]);
+
+        if (v_cl.rank() == 0) {
+            std::cout << "Rel l2 cgs err in V = " << V_err << " and took " << tt.getwct() << " seconds with " << n
+                      << " iterations. dt for the stepper is " << dt
+                      << std::endl;
+        }
+
+        // calculate vorticity
+        W[x][x] = 0;
+        W[x][y] = 0.5 * (Dy(V[x]) - Dx(V[y]));
+        W[y][x] = 0.5 * (Dx(V[y]) - Dy(V[x]));
+        W[y][y] = 0;
+
+        if (ctr%wr_at==0 || ctr==wr_f){
+            Particles.deleteGhost();
+            Particles.write_frame("Polar",ctr);
+            Particles.ghost_get<POLARIZATION>();
+        }
+    }
+};
+
+int main(int argc, char* argv[])
+{
+    {   openfpm_init(&argc,&argv);
+        timer tt2;
+        tt2.start();
+        size_t Gd = int(std::atof(argv[1]));
+        double tf = std::atof(argv[2]);
+        double dt = tf/std::atof(argv[3]);
+        wr_f=int(std::atof(argv[3]));
+        wr_at=1;
+        V_err_eps = 5e-4;
+
+        double boxsize = 10;
+        const size_t sz[2] = {Gd, Gd};
+        Box<2, double> box({0, 0}, {boxsize, boxsize});
+        double Lx = box.getHigh(0),Ly = box.getHigh(1);
+        size_t bc[2] = {NON_PERIODIC, NON_PERIODIC};
+        double spacing = box.getHigh(0) / (sz[0] - 1),rCut = 3.9 * spacing;
+        int ord = 2;
+        Ghost<2, double> ghost(rCut);
+        auto &v_cl = create_vcluster();
+        vector_dist_ws<2, double,Activegels> Particles(0, box, bc, ghost);
+        Particles.setPropNames(PropNAMES);
+
+        double x0=box.getLow(0), y0=box.getLow(1), x1=box.getHigh(0), y1=box.getHigh(1);
+        auto it = Particles.getGridIterator(sz);
+        while (it.isNext()) {
+            Particles.add();
+            auto key = it.get();
+            double xp = key.get(0) * it.getSpacing(0),yp = key.get(1) * it.getSpacing(1);
+            Particles.getLastPos()[x] = xp;
+            Particles.getLastPos()[y] = yp;
+            if (xp != x0 && yp != y0 && xp != x1 && yp != y1)
+                Particles.getLastSubset(0);
+            else
+                Particles.getLastSubset(1);
+            ++it;
+        }
+        Particles.map();
+        Particles.ghost_get<POLARIZATION>();
+
+        //auto Pos = getV<PROP_POS>(Particles);
+
+        auto Pol = getV<POLARIZATION>(Particles);
+        auto V = getV<VELOCITY>(Particles);
+        auto g = getV<EXTFORCE>(Particles);
+        auto P = getV<PRESSURE>(Particles);
+        auto delmu = getV<DELMU>(Particles);
+        auto dPol = getV<DPOL>(Particles);
+
+        g = 0;delmu = -1.0;P = 0;V = 0;
+        auto it2 = Particles.getDomainIterator();
+        while (it2.isNext()) {
+            auto p = it2.get();
+            Point<2, double> xp = Particles.getPos(p);
+            Particles.getProp<POLARIZATION>(p)[x] = sin(2 * M_PI * (cos((2 * xp[x] - Lx) / Lx) - sin((2 * xp[y] - Ly) / Ly)));
+            Particles.getProp<POLARIZATION>(p)[y] = cos(2 * M_PI * (cos((2 * xp[x] - Lx) / Lx) - sin((2 * xp[y] - Ly) / Ly)));
+            ++it2;
+        }
+        Particles.ghost_get<POLARIZATION,EXTFORCE,DELMU>(SKIP_LABELLING);
+
+        vector_dist_subset<2, double, Activegels> Particles_bulk(Particles,0);
+        vector_dist_subset<2, double, Activegels> Particles_boundary(Particles,1);
+        auto & bulk = Particles_bulk.getIds();
+        auto & boundary = Particles_boundary.getIds();
+
+        auto P_bulk = getV<PRESSURE>(Particles_bulk);//Pressure only on inside
+        auto Pol_bulk = getV<POLARIZATION>(Particles_bulk);;
+        auto dPol_bulk = getV<DPOL>(Particles_bulk);
+        auto dV_bulk = getV<DV>(Particles_bulk);
+        auto RHS_bulk = getV<VRHS>(Particles_bulk);
+        auto div_bulk = getV<DIV>(Particles_bulk);
+
+        Derivative_x Dx(Particles,ord,rCut), Bulk_Dx(Particles_bulk,ord,rCut);
+        Derivative_y Dy(Particles, ord, rCut), Bulk_Dy(Particles_bulk, ord,rCut);
+        Derivative_xy Dxy(Particles, ord, rCut);
+        auto Dyx = Dxy;
+        Derivative_xx Dxx(Particles, ord, rCut);
+        Derivative_yy Dyy(Particles, ord, rCut);
+
+        boost::numeric::odeint::runge_kutta4< state_type_2d_ofp,double,state_type_2d_ofp,double,boost::numeric::odeint::vector_space_algebra_ofp> rk4;
+
+        vectorGlobal=(void *) &Particles;
+        vectorGlobal_bulk=(void *) &Particles_bulk;
+        vectorGlobal_boundary=(void *) &Particles_boundary;
+
+        PolarEv<Derivative_x,Derivative_y,Derivative_xx,Derivative_xy,Derivative_yy> System(Dx,Dy,Dxx,Dxy,Dyy);
+        CalcVelocity<Derivative_x,Derivative_y,Derivative_xx,Derivative_xy,Derivative_yy> CalcVelocityObserver(Dx,Dy,Dxx,Dxy,Dyy,Bulk_Dx,Bulk_Dy);
+
+        state_type_2d_ofp tPol;
+        tPol.data.get<0>()=Pol[x];
+        tPol.data.get<1>()=Pol[y];
+
+        eq_id vx, vy;
+        vx.setId(0);
+        vy.setId(1);
+        timer tt;
+        timer tt3;
+        dPol = Pol;
+        double V_err = 1, V_err_old;
+        double tim=0;
+
+        // intermediate time steps
+        std::vector<double> inter_times;
+
+        size_t steps = integrate_const(rk4 , System , tPol , tim , tf , dt, CalcVelocityObserver);
+
+
+        std::cout << "Time steps: " << steps << std::endl;
+
+        Pol_bulk[x]=tPol.data.get<0>();
+        Pol_bulk[y]=tPol.data.get<1>();
+
+        Particles.deleteGhost();
+        Particles.write("Polar_Last");
+        Dx.deallocate(Particles);
+        Dy.deallocate(Particles);
+        Dxy.deallocate(Particles);
+        Dxx.deallocate(Particles);
+        Dyy.deallocate(Particles);
+        Bulk_Dx.deallocate(Particles_bulk);
+        Bulk_Dy.deallocate(Particles_bulk);
+        std::cout.precision(17);
+        tt2.stop();
+        if (v_cl.rank() == 0) {
+            std::cout << "The simulation took " << tt2.getcputime() << "(CPU) ------ " << tt2.getwct()
+                      << "(Wall) Seconds.";
+        }
+    }
+    openfpm_finalize();
+}
diff --git a/example/Numerics/Stoke_flow/2_2D_LidDrivenCavity_PC/Makefile b/example/Numerics/Stoke_flow/2_2D_LidDrivenCavity_PC/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..e5cf8d6249d8f249c08bc01cd75c42fe1cf24e36
--- /dev/null
+++ b/example/Numerics/Stoke_flow/2_2D_LidDrivenCavity_PC/Makefile
@@ -0,0 +1,26 @@
+include ../../../example.mk
+
+CC=mpic++
+
+LDIR =
+
+OBJ = mainDCPSE.o
+OBJ2 = mainFD.o
+
+%.o: %.cpp
+	$(CC) -O3 -c --std=c++11 -o $@ $< $(INCLUDE_PATH)
+
+DcpseLid: $(OBJ)
+	$(CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS)
+FDLid: $(OBJ2)
+	$(CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS)
+
+all: DcpseLid FDLid
+
+run: all
+	mpirun -np 1 /DcpseLid
+
+.PHONY: clean all run
+
+clean:
+	rm -f *.o *~ core DcpseLid FDLid
diff --git a/example/Numerics/Stoke_flow/2_2D_LidDrivenCavity_PC/mainDCPSE.cpp b/example/Numerics/Stoke_flow/2_2D_LidDrivenCavity_PC/mainDCPSE.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ce6347ae6386a1c0593f87726bc575924d0bcbad
--- /dev/null
+++ b/example/Numerics/Stoke_flow/2_2D_LidDrivenCavity_PC/mainDCPSE.cpp
@@ -0,0 +1,246 @@
+//
+// Created by Abhinav Singh on 15.06.20.
+//
+
+#include "config.h"
+#include <iostream>
+#include "DCPSE/DCPSE_OP/DCPSE_op.hpp"
+#include "DCPSE/DCPSE_OP/DCPSE_Solver.hpp"
+#include "Operators/Vector/vector_dist_operators.hpp"
+#include "Vector/vector_dist_subset.hpp"
+
+int main(int argc, char* argv[])
+{
+
+    {   openfpm_init(&argc,&argv);
+        timer tt2;
+        tt2.start();
+        size_t gd_sz = 81;
+        double Re=100;
+        double V_err_eps = 1e-2;
+        double alpha=0.0125;
+
+        constexpr int x = 0;
+        constexpr int y = 1;
+        const size_t sz[2] = {gd_sz,gd_sz};
+        Box<2, double> box({0, 0}, {1,1});
+        size_t bc[2] = {NON_PERIODIC, NON_PERIODIC};
+        double spacing;
+        spacing = 1.0 / (sz[0] - 1);
+        double rCut = 3.1 * spacing;
+        int ord = 2;
+        double sampling_factor = 3.1;
+        double sampling_factor2 = 1.9;
+        
+        Ghost<2, double> ghost(rCut);
+        auto &v_cl = create_vcluster();
+        typedef aggregate<double, VectorS<2, double>, VectorS<2, double>,VectorS<2, double>,double,VectorS<2, double>,double,double> LidCavity;
+
+        vector_dist_ws<2, double, LidCavity> Particles(0, box,bc,ghost);
+        Particles.setPropNames({"00-Pressure","01-Velocity","02-RHS","03-dV","04-Divergence","05-Divergence","06-H","07-dP"});
+
+        double x0, y0, x1, y1;
+        x0 = box.getLow(0);
+        y0 = box.getLow(1);
+        x1 = box.getHigh(0);
+        y1 = box.getHigh(1);
+
+        auto it = Particles.getGridIterator(sz);
+        while (it.isNext())
+        {
+            Particles.add();
+            auto key = it.get();
+            mem_id k0 = key.get(0);
+            double xp = k0 * spacing;
+            Particles.getLastPos()[0] = xp;
+            mem_id k1 = key.get(1);
+            double yp = k1 * spacing;
+            Particles.getLastPos()[1] = yp;
+            Particles.getLastProp<1>()[0]=0.0;
+            Particles.getLastProp<1>()[1]=0.0;
+            if (xp != x0 && yp != y0 && xp != x1 && yp != y1)
+            {
+                Particles.getLastSubset(0);
+            }
+            else if(yp==y1 && xp>x0 && xp<x1)
+            {
+                Particles.getLastSubset(1);
+                Particles.getLastProp<1>()[0]=1.0;
+            }
+            else if(xp==0)
+            {
+                Particles.getLastSubset(3);
+            }
+            else if(xp==x1)
+            {
+                Particles.getLastSubset(4);
+            }
+            else
+            {
+                Particles.getLastSubset(2);
+            }
+            ++it;
+        }
+
+        Particles.map();
+        Particles.ghost_get<0>();
+
+
+        vector_dist_subset<2, double, LidCavity> Particles_bulk(Particles,0);
+        vector_dist_subset<2, double, LidCavity> Particles_up(Particles,1);
+        vector_dist_subset<2, double, LidCavity> Particles_down(Particles,2);
+        vector_dist_subset<2, double, LidCavity> Particles_left(Particles,3);
+        vector_dist_subset<2, double, LidCavity> Particles_right(Particles,4);
+        auto &bulk=Particles_bulk.getIds();
+        auto &up_p=Particles_up.getIds();
+        auto &dw_p=Particles_down.getIds();
+        auto &l_p=Particles_left.getIds();
+        auto &r_p=Particles_right.getIds();
+
+
+        auto P = getV<0>(Particles);
+        auto V = getV<1>(Particles);
+        auto RHS = getV<2>(Particles);
+        auto dV = getV<3>(Particles);
+        auto div = getV<4>(Particles);
+        auto V_star = getV<5>(Particles);
+        auto H = getV<6>(Particles);
+        auto dP = getV<7>(Particles);
+
+
+
+        auto P_bulk = getV<0>(Particles_bulk);
+        auto V_bulk = getV<1>(Particles_bulk);
+        auto RHS_bulk =getV<2>(Particles_bulk);
+        auto V_star_bulk = getV<5>(Particles_bulk);
+        auto dP_bulk = getV<7>(Particles_bulk);
+
+
+        P_bulk = 0;
+        Derivative_x Dx(Particles, 2, rCut,sampling_factor, support_options::RADIUS);
+        Derivative_xx Dxx(Particles, 2, rCut,sampling_factor2, support_options::RADIUS);
+        Derivative_yy Dyy(Particles, 2, rCut,sampling_factor2, support_options::RADIUS);
+        Derivative_y Dy(Particles, 2, rCut,sampling_factor, support_options::RADIUS);
+        Derivative_x Bulk_Dx(Particles_bulk, 2, rCut,sampling_factor, support_options::RADIUS);
+        Derivative_y Bulk_Dy(Particles_bulk, 2, rCut,sampling_factor, support_options::RADIUS);
+
+        int n = 0, nmax = 300, ctr = 0, errctr=1, Vreset = 0;
+        double V_err=1;
+        if (Vreset == 1) {
+            P_bulk = 0;
+            P = 0;
+            Vreset = 0;
+        }
+        P=0;
+
+        eq_id vx,vy;
+        vx.setId(0);
+        vy.setId(1);
+/*          W.setVarId(0);
+        Sf.setVarId(1);
+*/     
+
+        double sum, sum1, sum_k,V_err_old;
+        auto StokesX=(V[x]*Dx(V_star[x])+V[y]*Dy(V_star[x]))-(1.0/Re)*(Dxx(V_star[x])+Dyy(V_star[x]));
+        auto StokesY=(V[x]*Dx(V_star[y])+V[y]*Dy(V_star[y]))-(1.0/Re)*(Dxx(V_star[y])+Dyy(V_star[y]));
+        petsc_solver<double> solverPetsc;
+        solverPetsc.setSolver(KSPGMRES);
+        RHS[x] = 0.0;
+        RHS[y] = 0.0;
+        dV=0;
+        P=0;
+        timer tt;
+        while (V_err >= V_err_eps && n <= nmax) {
+            if (n%5==0){
+                Particles.ghost_get<0,1,2,3,4,5,6,7>(SKIP_LABELLING);
+                Particles.deleteGhost();
+                Particles.write_frame("LID",n,BINARY);
+                Particles.ghost_get<0>();
+                }
+            tt.start();
+            Particles.ghost_get<0>(SKIP_LABELLING);
+            RHS_bulk[x] = -Bulk_Dx(P);
+            RHS_bulk[y] = -Bulk_Dy(P);
+            DCPSE_scheme<equations2d2, decltype(Particles)> Solver(Particles);
+            Solver.impose(StokesX, bulk, RHS[x], vx);
+            Solver.impose(StokesY, bulk, RHS[y], vy);
+            Solver.impose(V_star[x], up_p, 1.0, vx);
+            Solver.impose(V_star[y], up_p, 0.0, vy);
+            Solver.impose(V_star[x], l_p, 0.0, vx);
+            Solver.impose(V_star[y], l_p, 0.0, vy);
+            Solver.impose(V_star[x], r_p, 0.0, vx);
+            Solver.impose(V_star[y], r_p, 0.0, vy);
+            Solver.impose(V_star[x], dw_p, 0.0, vx);
+            Solver.impose(V_star[y], dw_p, 0.0, vy);
+            Solver.solve_with_solver(solverPetsc,V_star[x], V_star[y]);
+    
+            Particles.ghost_get<5>(SKIP_LABELLING);
+            div = (Dx(V_star[x]) + Dy(V_star[y]));
+
+           DCPSE_scheme<equations2d1E,decltype(Particles)> SolverH(Particles,options_solver::LAGRANGE_MULTIPLIER);
+            auto Helmholtz = Dxx(H)+Dyy(H);
+            SolverH.impose(Helmholtz,bulk,prop_id<4>());
+            SolverH.impose(Dy(H), up_p,0);
+            SolverH.impose(Dx(H), l_p,0);
+            SolverH.impose(Dx(H), r_p,0);
+            SolverH.impose(Dy(H), dw_p,0);
+            //SolverH.solve_with_solver(solverPetsc2,H);
+            SolverH.solve(H);
+            Particles.ghost_get<6>(SKIP_LABELLING);
+            Particles.ghost_get<6>(SKIP_LABELLING);
+            P_bulk = P - alpha*(div-0.5*(V[x]*Bulk_Dx(H)+V[y]*Bulk_Dy(H)));
+            V_star_bulk[0] = V_star[0] - Bulk_Dx(H);
+            V_star_bulk[1] = V_star[1] - Bulk_Dy(H);
+            sum = 0;
+            sum1 = 0;
+
+            for (int j = 0; j < bulk.size(); j++) {
+                auto p = bulk.get<0>(j);
+                sum += (Particles.getProp<5>(p)[0] - Particles.getProp<1>(p)[0]) *
+                       (Particles.getProp<5>(p)[0] - Particles.getProp<1>(p)[0]) +
+                       (Particles.getProp<5>(p)[1] - Particles.getProp<1>(p)[1]) *
+                       (Particles.getProp<5>(p)[1] - Particles.getProp<1>(p)[1]);
+                sum1 += Particles.getProp<5>(p)[0] * Particles.getProp<5>(p)[0] +
+                        Particles.getProp<5>(p)[1] * Particles.getProp<5>(p)[1];
+            }
+
+            V = V_star;
+            v_cl.sum(sum);
+            v_cl.sum(sum1);
+            v_cl.execute();
+            sum = sqrt(sum);
+            sum1 = sqrt(sum1);
+            V_err_old = V_err;
+            V_err = sum / sum1;
+            if (V_err > V_err_old || abs(V_err_old - V_err) < 1e-8) {
+                errctr++;
+                //alpha_P -= 0.1;
+            } else {
+                errctr = 0;
+            }
+            if (n > 3) {
+                if (errctr > 5) {
+                    Vreset = 1;
+                    errctr=0;
+                    //break;
+                } else {
+                    Vreset = 0;
+                }
+            }
+            n++;
+            tt.stop();
+            if (v_cl.rank() == 0) {
+                std::cout << "Rel l2 cgs err in V = " << V_err << " at " << n << " and took " <<tt.getwct() <<"("<<tt.getcputime()<<") seconds(CPU)." << std::endl;
+            }
+        }
+        Particles.deleteGhost();
+        Particles.write("LID");
+        tt2.stop();
+        if (v_cl.rank() == 0) {
+            std::cout << "The simulation took " << tt2.getcputime() << "(CPU) ------ " << tt2.getwct()
+                      << "(Wall) Seconds.";
+        }
+    }
+    openfpm_finalize();
+
+}
\ No newline at end of file
diff --git a/example/Numerics/Stoke_flow/2_2D_LidDrivenCavity_PC/mainFD.cpp b/example/Numerics/Stoke_flow/2_2D_LidDrivenCavity_PC/mainFD.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f95ced7fcaa3a580540cd0c2ea54adcc79406699
--- /dev/null
+++ b/example/Numerics/Stoke_flow/2_2D_LidDrivenCavity_PC/mainFD.cpp
@@ -0,0 +1,142 @@
+#include "config.h"
+#include <iostream>
+#include "FiniteDifference/FD_Solver.hpp"
+#include "FiniteDifference/FD_op.hpp"
+#include "FiniteDifference/FD_expressions.hpp"
+#include "FiniteDifference/util/EqnsStructFD.hpp"
+int main(int argc, char* argv[])
+{
+    {   openfpm_init(&argc,&argv);
+        using namespace FD;
+        timer tt2;
+        tt2.start();
+        size_t gd_sz = 81;
+        constexpr int x = 0;
+        constexpr int y = 1;
+        const size_t szu[2] = {gd_sz,gd_sz};
+        int sz[2]={int(gd_sz),int(gd_sz)};
+        Box<2, double> box({0, 0}, {1,1});
+        periodicity<2> bc = {NON_PERIODIC, NON_PERIODIC};
+        double spacing;
+        spacing = 1.0 / (sz[0] - 1);
+        Ghost<2,long int> ghost(1);
+        auto &v_cl = create_vcluster();
+        typedef aggregate<double, VectorS<2, double>, VectorS<2, double>,VectorS<2, double>,double,VectorS<2, double>,double,double> LidCavity;
+        grid_dist_id<2, double, LidCavity> domain(szu, box,ghost,bc);
+        double x0, y0, x1, y1;
+        x0 = box.getLow(0);
+        y0 = box.getLow(1);
+        x1 = box.getHigh(0);
+        y1 = box.getHigh(1);
+        auto it = domain.getDomainIterator();
+        while (it.isNext())
+        {
+            auto key = it.get();
+            auto gkey = it.getGKey(key);
+            double x = gkey.get(0) * domain.spacing(0);
+            double y = gkey.get(1) * domain.spacing(1);
+            if (y==1)
+            {domain.get<1>(key) = 1.0;}
+            else
+            {domain.get<1>(key) = 0.0;}
+
+            ++it;
+        }
+        domain.ghost_get<0>();
+        Derivative_x Dx;
+        Derivative_y Dy;
+        Derivative_xx Dxx;
+        Derivative_yy Dyy;
+        auto P = getV<0>(domain);
+        auto V = getV<1>(domain);
+        auto RHS = getV<2>(domain);
+        auto dV = getV<3>(domain);
+        auto div = getV<4>(domain);
+        auto V_star = getV<5>(domain);
+        auto H = getV<6>(domain);
+        auto dP = getV<7>(domain);        
+        eq_id x_comp,y_comp;
+        x_comp.setId(0);
+        y_comp.setId(1);
+        double sum, sum1, sum_k,V_err_old;
+        auto StokesX=(V[x]*Dx(V_star[x])+V[y]*Dy(V_star[x]))-(1.0/100.0)*(Dxx(V_star[x])+Dyy(V_star[x]));
+        auto StokesY=(V[x]*Dx(V_star[y])+V[y]*Dy(V_star[y]))-(1.0/100.0)*(Dxx(V_star[y])+Dyy(V_star[y]));
+        RHS=0;
+        dV=0;
+        double V_err=1;
+        int n;
+        while (V_err >= 0.05 && n <= 50) {
+            if (n%5==0){
+                domain.ghost_get<0,1>(SKIP_LABELLING);
+                domain.write_frame("LID",n,BINARY);
+                domain.ghost_get<0>();
+            }
+            domain.ghost_get<0>(SKIP_LABELLING);
+            RHS[x] = -Dx(P);
+            RHS[y] = -Dy(P);
+            FD_scheme<equations2d2,decltype(domain)> Solver(ghost,domain);
+            Solver.impose(StokesX, {1,1},{sz[0]-2,sz[1]-2}, RHS[x], x_comp);
+            Solver.impose(StokesY, {1,1},{sz[0]-2,sz[1]-2}, RHS[y], y_comp);
+            Solver.impose(V_star[x], {0,sz[1]-1},{sz[0]-1,sz[1]-1}, 1.0, x_comp);
+            Solver.impose(V_star[y], {0,sz[1]-1},{sz[0]-1,sz[1]-1}, 0.0, y_comp);
+            Solver.impose(V_star[x], {0,1},{0,sz[1]-2}, 0.0, x_comp);
+            Solver.impose(V_star[y], {0,1},{0,sz[1]-2}, 0.0, y_comp);
+            Solver.impose(V_star[x], {sz[0]-1,1},{sz[0]-1,sz[1]-2}, 0.0, x_comp);
+            Solver.impose(V_star[y], {sz[0]-1,1},{sz[0]-1,sz[1]-2}, 0.0, y_comp);
+            Solver.impose(V_star[x], {0,0},{sz[0]-1,0}, 0.0, x_comp);
+            Solver.impose(V_star[y], {0,0},{sz[0]-1,0}, 0.0, y_comp);
+            Solver.solve(V_star[x], V_star[y]);
+
+            domain.ghost_get<5>(SKIP_LABELLING);
+            div = (Dx(V_star[x]) + Dy(V_star[y]));
+
+            FD_scheme<equations2d1E,decltype(domain)> SolverH(ghost,domain);
+            auto Helmholtz = Dxx(H)+Dyy(H);
+            SolverH.impose(Helmholtz,{1,1},{sz[0]-2,sz[1]-2},div);
+            SolverH.impose(Dy(H), {0,sz[0]-1},{sz[0]-1,sz[1]-1},0);
+            SolverH.impose(Dx(H), {sz[0]-1,1},{sz[0]-1,sz[1]-2},0);
+            SolverH.impose(Dx(H), {0,0},{sz[0]-1,0},0,x_comp,true);
+            SolverH.impose(H, {0,0},{0,0},0);
+            SolverH.impose(Dy(H), {0,1},{0,sz[1]-2},0);
+            SolverH.solve(H);
+            domain.ghost_get<1,4,6>(SKIP_LABELLING);
+            P = P - 0.01*(div-0.5*(V[x]*Dx(H)+V[y]*Dy(H)));
+            V_star[0] = V_star[0] - Dx(H);
+            V_star[1] = V_star[1] - Dy(H);
+            sum = 0;
+            sum1 = 0;
+            auto it2 = domain.getDomainIterator();
+            while (it2.isNext()) {
+                auto p = it2.get();
+                sum += (domain.getProp<5>(p)[0] - domain.getProp<1>(p)[0]) *
+                       (domain.getProp<5>(p)[0] - domain.getProp<1>(p)[0]) +
+                       (domain.getProp<5>(p)[1] - domain.getProp<1>(p)[1]) *
+                       (domain.getProp<5>(p)[1] - domain.getProp<1>(p)[1]);
+                sum1 += domain.getProp<5>(p)[0] * domain.getProp<5>(p)[0] +
+                        domain.getProp<5>(p)[1] * domain.getProp<5>(p)[1];
+                ++it2;
+            }
+            V[x] = V_star[x];
+            V[y] = V_star[y];
+            v_cl.sum(sum);
+            v_cl.sum(sum1);
+            v_cl.execute();
+            sum = sqrt(sum);
+            sum1 = sqrt(sum1);
+            V_err_old = V_err;
+            V_err = sum / sum1;
+            n++;
+            if (v_cl.rank() == 0) {
+                std::cout << "Rel l2 cgs err in V = " << V_err << "."<< std::endl;
+            }
+        }
+        domain.write("LID_final");
+        tt2.stop();
+        if (v_cl.rank() == 0) {
+            std::cout << "The entire solver  took " << tt2.getcputime() << "(CPU) ------ " << tt2.getwct()
+                      << "(Wall) Seconds.";
+        }
+    }   
+    openfpm_finalize();
+
+}
\ No newline at end of file
diff --git a/example/Numerics/Stoke_flow/3_3D_StokesFlowBall/Makefile b/example/Numerics/Stoke_flow/3_3D_StokesFlowBall/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..8b95719642c59d0d1d26ac12f9aba7cb47c82cf2
--- /dev/null
+++ b/example/Numerics/Stoke_flow/3_3D_StokesFlowBall/Makefile
@@ -0,0 +1,23 @@
+include ../../../example.mk
+
+CC=mpic++
+
+LDIR =
+
+OBJ = main.o
+
+%.o: %.cpp
+	$(CC) -O3 -c --std=c++11 -o $@ $< $(INCLUDE_PATH)
+
+Stokes3dBall: $(OBJ)
+	$(CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS)
+
+all: Stokes3dBall
+
+run: all
+	mpirun -np 4 /Stokes3dBall
+
+.PHONY: clean all run
+
+clean:
+	rm -f *.o *~ core Stokes3dBall
diff --git a/example/Numerics/Stoke_flow/3_3D_StokesFlowBall/main.cpp b/example/Numerics/Stoke_flow/3_3D_StokesFlowBall/main.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..dc1b5788c16581bd57579892f941fbf505315960
--- /dev/null
+++ b/example/Numerics/Stoke_flow/3_3D_StokesFlowBall/main.cpp
@@ -0,0 +1,291 @@
+//
+// Created by Abhinav Singh on 15.06.20.
+//
+
+#include "config.h"
+#include <iostream>
+#include "DCPSE/DCPSE_OP/DCPSE_op.hpp"
+#include "DCPSE/DCPSE_OP/DCPSE_Solver.hpp"
+#include "Operators/Vector/vector_dist_operators.hpp"
+#include "Vector/vector_dist_subset.hpp"
+#include "util/SphericalHarmonics.hpp"
+
+
+int main(int argc, char* argv[])
+{
+
+    {   openfpm_init(&argc,&argv);
+        timer tt2;
+        tt2.start();
+        size_t grd_sz = 18;
+        double V_err_eps = 5e-4;
+
+        double nu=1.0;
+       const size_t sz[3] = {grd_sz,grd_sz,grd_sz};
+        Box<3, double> box({-1.0, -1.0,-1.0}, {1.0,1.0,1.0});
+        size_t bc[3] = {NON_PERIODIC, NON_PERIODIC, NON_PERIODIC};
+        double spacing = 2.0 / (sz[0] - 1);
+        double rCut = 3.9*spacing;
+        double R=1.0;
+        Ghost<3, double> ghost(rCut);
+        //                                  P        V                 v_B           RHS            V_t         P_anal              RHS2            Polar cord
+        vector_dist_ws<3, double, aggregate<double,VectorS<3, double>,VectorS<3, double>,double,VectorS<3, double>,double,double,VectorS<3, double>,VectorS<3, double>,VectorS<3, double>>> Particles(0, box, bc, ghost);
+        Particles.setPropNames({"Pressure","Velocity","Velocity_Boundary","Divergence","V_T","P_analytical","TEMP" ,"RHS","PolarCoord","V_analytical"});
+
+        auto &v_cl = create_vcluster();
+
+        auto it = Particles.getGridIterator(sz);
+        while (it.isNext()) {
+            auto key = it.get();
+            double x = -1.0+key.get(0) * it.getSpacing(0);
+            double y = -1.0+key.get(1) * it.getSpacing(1);
+            double z = -1.0+key.get(2) * it.getSpacing(2);
+            double r=sqrt(x*x+y*y+z*z);
+            if (r<R-spacing/2.0) {
+                Particles.add();
+                Particles.getLastPos()[0] = x;
+                Particles.getLastPos()[1] = y;
+                Particles.getLastPos()[2] = z;
+                Particles.getLastProp<8>()[0] = r;
+                if (r==0){
+                    Particles.getLastProp<8>()[1] = 0.0;
+                }
+                else{
+                    Particles.getLastProp<8>()[1] = std::atan2(sqrt(x*x+y*y),z);
+                }
+                Particles.getLastProp<8>()[2] = std::atan2(y,x);
+            }
+            ++it;
+        }
+
+        int n_sp=int(grd_sz)*int(grd_sz)*3;
+
+        double Golden_angle=M_PI * (3.0 - sqrt(5.0));
+
+        for(int i=1;i<=n_sp;i++)
+        {
+            double y = 1.0 - (i /double(n_sp - 1.0)) * 2.0;
+            double radius = sqrt(1 - y * y);
+            double Golden_theta = Golden_angle * i;
+            double x = cos(Golden_theta) * radius;
+            double z = sin(Golden_theta) * radius;
+
+            if (acos(z)==0 || acos(z)==M_PI){
+                std::cout<<"Theta 0/Pi "<<std::endl;
+                continue;
+            }
+
+            Particles.add();
+            Particles.getLastPos()[0] = x;
+            Particles.getLastPos()[1] = y;
+            Particles.getLastPos()[2] = z;
+            Particles.getLastProp<8>()[0] = 1.0 ;
+            Particles.getLastProp<8>()[1] = std::atan2(sqrt(x*x+y*y),z);
+            Particles.getLastProp<8>()[2] = std::atan2(y,x);
+        }
+        Particles.map();
+        Particles.ghost_get<0>();
+
+
+        std::unordered_map<const lm,double,key_hash,key_equal> Vr;
+        std::unordered_map<const lm,double,key_hash,key_equal> V1;
+        std::unordered_map<const lm,double,key_hash,key_equal> V2;
+        //Setting max mode l_max
+        constexpr int K = 2;
+        //Setting amplitudes to 0
+        for(int l=0;l<=K;l++){
+            for(int m=-l;m<=l;m++){
+                Vr[std::make_tuple(l,m)]=0.0;
+                V1[std::make_tuple(l,m)]=0.0;
+                V2[std::make_tuple(l,m)]=0.0;
+            }
+
+
+        }
+        //Setting some amplitude for boundary velocity
+        V1[std::make_tuple(2,0)]=1.0;
+
+        auto it2 = Particles.getDomainIterator();
+        while (it2.isNext()) {
+            auto p = it2.get();
+            Point<3, double> xp = Particles.getPos(p);
+            Point<3, double> xP = Particles.getProp<8>(p);
+            Particles.getProp<0>(p) =0;
+            if (xP[0]==1.0) {
+                Particles.getProp<0>(p) =  0;
+                std::vector<double> SVel;
+                SVel=openfpm::math::sumY<K>(xP[0],xP[1],xP[2],Vr,V1,V2);
+                double SP=openfpm::math::sumY_Scalar<K>(xP[0],xP[1],xP[2],Vr);
+                Particles.getProp<2>(p)[0] = SVel[0];
+                Particles.getProp<2>(p)[1] = SVel[1];
+                Particles.getProp<2>(p)[2] = SVel[2];
+                Particles.getProp<9>(p)[0] = SVel[0];
+                Particles.getProp<9>(p)[1] = SVel[1];
+                Particles.getProp<9>(p)[2] = SVel[2];
+                Particles.getProp<5>(p) = SP;
+                Particles.setSubset(p,1);
+
+            }
+            else {
+                Particles.setSubset(p,0);
+                Particles.getProp<0>(p) =  0;
+                Particles.getProp<1>(p)[0] =  0;
+                Particles.getProp<1>(p)[1] =  0;
+                Particles.getProp<1>(p)[2] =  0;
+            }
+            ++it2;
+        }
+
+        vector_dist_subset<3, double, aggregate<double,VectorS<3, double>,VectorS<3, double>,double,VectorS<3, double>,double,double,VectorS<3, double>,VectorS<3, double>,VectorS<3, double>>> Particles_bulk(Particles,0);
+        vector_dist_subset<3, double, aggregate<double,VectorS<3, double>,VectorS<3, double>,double,VectorS<3, double>,double,double,VectorS<3, double>,VectorS<3, double>,VectorS<3, double>>> Particles_surface(Particles,1);
+
+        auto & bulk = Particles_bulk.getIds();
+        auto & Surface = Particles_surface.getIds();
+
+        for (int j = 0; j < bulk.size(); j++) {
+            auto p = bulk.get<0>(j);
+            Point<3, double> xp = Particles.getPos(p);
+            Point<3, double> xP = Particles.getProp<8>(p);
+
+            std::unordered_map<const lm,double,key_hash,key_equal> Ur;
+            std::unordered_map<const lm,double,key_hash,key_equal> U2;
+            std::unordered_map<const lm,double,key_hash,key_equal> U1;
+            std::unordered_map<const lm,double,key_hash,key_equal> Plm;
+
+            for (int l = 0; l <= K; l++) {
+                for (int m = -l; m <= l; m++) {
+                    auto Er= Vr.find(std::make_tuple(l,m));
+                    auto E1= V1.find(std::make_tuple(l,m));
+                    auto E2= V2.find(std::make_tuple(l,m));
+                    std::vector<double> Sol=openfpm::math::sph_anasol_u(nu,l,m,Er->second,E1->second,E2->second,xP[0]);
+                    Ur[std::make_tuple(l,m)]=Sol[0];
+                    U1[std::make_tuple(l,m)]=Sol[1];
+                    U2[std::make_tuple(l,m)]=Sol[2];
+                    Plm[std::make_tuple(l,m)]=Sol[3];
+                }
+
+            }
+
+            if(fabs(xP[0])>=1e-5 && xP[1]>1e-5 && (M_PI-xP[1])>=1e-5)
+            {
+                std::vector<double> SVel = openfpm::math::sumY<K>(xP[0], xP[1], xP[2], Ur, U1, U2);
+                Particles.getProp<9>(p)[0] = SVel[0];
+                Particles.getProp<9>(p)[1] = SVel[1];
+                Particles.getProp<9>(p)[2] = SVel[2];
+                Particles.getProp<5>(p) = openfpm::math::sumY_Scalar<K>(xP[0], xP[1], xP[2], Plm);
+            }
+        }
+
+        auto P = getV<0>(Particles);
+        auto V = getV<1>(Particles);
+        auto V_B = getV<2>(Particles);
+        V.setVarId(0);
+        auto DIV = getV<3>(Particles);
+        auto V_t = getV<4>(Particles);
+        auto P_anal = getV<5>(Particles);
+        auto temp=getV<6>(Particles);
+        auto RHS = getV<7>(Particles);
+        auto P_bulk = getV<0>(Particles_bulk);
+        auto RHS_bulk = getV<7>(Particles_bulk);
+        auto V_anal = getV<9>(Particles);
+
+        V_t=V;
+        P=0;
+        P_bulk=0;
+        eq_id vx,vy,vz;
+
+        vx.setId(0);
+        vy.setId(1);
+        vz.setId(2);
+
+        double sampling=3.1;
+        double sampling2=1.9;
+        double rCut2=3.9*spacing;
+
+        Derivative_x Dx(Particles, 2, rCut,sampling, support_options::RADIUS),B_Dx(Particles_bulk, 2, rCut,sampling, support_options::RADIUS);
+        Derivative_y Dy(Particles, 2, rCut,sampling, support_options::RADIUS),B_Dy(Particles_bulk, 2, rCut,sampling, support_options::RADIUS);
+        Derivative_z Dz(Particles, 2, rCut,sampling, support_options::RADIUS),B_Dz(Particles_bulk, 2, rCut,sampling, support_options::RADIUS);
+        Derivative_xx Dxx(Particles, 2, rCut2,sampling2,support_options::RADIUS);
+        Derivative_yy Dyy(Particles, 2, rCut2,sampling2,support_options::RADIUS);
+        Derivative_zz Dzz(Particles, 2, rCut2,sampling2,support_options::RADIUS);
+
+        //std::cout << "DCPSE KERNELS DONE" << std::endl;
+        petsc_solver<double> solverPetsc;
+        solverPetsc.setPreconditioner(PCNONE);
+        timer tt;
+        double sum=0,sum1=0;
+        V_t=V;
+        //double V_err_eps = 1e-5;
+
+        double V_err = 1, V_err_old;
+        int n = 0;
+        int nmax = 30;
+        int ctr = 0, errctr, Vreset = 0;
+        V_err = 1;
+        n = 0;
+        tt.start();
+        while (V_err >= V_err_eps && n <= nmax) {
+            //Particles.write_frame("StokesSphere",n);
+            Particles.ghost_get<0>(SKIP_LABELLING);
+            RHS_bulk[0] = B_Dx(P);
+            RHS_bulk[1] = B_Dy(P);
+            RHS_bulk[2] = B_Dz(P);
+            DCPSE_scheme<equations3d3, decltype(Particles)> Solver(Particles);
+            auto Stokes1 = nu * (Dxx(V[0])+Dyy(V[0])+Dzz(V[0]));
+            auto Stokes2 = nu * (Dxx(V[1])+Dyy(V[1])+Dzz(V[1]));
+            auto Stokes3 = nu * (Dxx(V[2])+Dyy(V[2])+Dzz(V[2]));
+            Solver.impose(Stokes1, bulk, RHS[0], vx);
+            Solver.impose(Stokes2, bulk, RHS[1], vy);
+            Solver.impose(Stokes3, bulk, RHS[2], vz);
+            Solver.impose(V[0], Surface, V_B[0], vx);
+            Solver.impose(V[1], Surface, V_B[1], vy);
+            Solver.impose(V[2], Surface, V_B[2], vz);
+            Solver.solve_with_solver(solverPetsc, V[0], V[1], V[2]);
+            Particles.ghost_get<1>();
+            DIV = -(Dx(V[0])+Dy(V[1])+Dz(V[2]));
+            P_bulk = P + DIV;
+            sum = 0;
+            sum1 = 0;
+            for (int j = 0; j < bulk.size(); j++) {
+                auto p = bulk.get<0>(j);
+                sum += (Particles.getProp<4>(p)[0] - Particles.getProp<1>(p)[0]) *
+                       (Particles.getProp<4>(p)[0] - Particles.getProp<1>(p)[0]) +
+                       (Particles.getProp<4>(p)[1] - Particles.getProp<1>(p)[1]) *
+                       (Particles.getProp<4>(p)[1] - Particles.getProp<1>(p)[1]) +
+                       (Particles.getProp<4>(p)[2] - Particles.getProp<1>(p)[2]) *
+                       (Particles.getProp<4>(p)[2] - Particles.getProp<1>(p)[2]);
+                sum1 += Particles.getProp<1>(p)[0] * Particles.getProp<1>(p)[0] +
+                        Particles.getProp<1>(p)[1] * Particles.getProp<1>(p)[1] +
+                        Particles.getProp<1>(p)[2] * Particles.getProp<1>(p)[2];
+            }
+            sum = sqrt(sum);
+            sum1 = sqrt(sum1);
+            v_cl.sum(sum);
+            v_cl.sum(sum1);
+            v_cl.execute();
+            V_t = V;
+            Particles.ghost_get<1>(SKIP_LABELLING);
+            V_err_old = V_err;
+            V_err = sum / sum1;
+            if (V_err > V_err_old || abs(V_err_old - V_err) < 1e-14) {
+                errctr++;
+            } else {
+                errctr = 0;
+            }
+            if (n > 3) {
+                if (errctr > 1) {
+                    std::cout << "CONVERGENCE LOOP BROKEN DUE TO INCREASE/VERY SLOW DECREASE IN Divergence" << std::endl;
+                    Vreset = 1;
+                    break;
+                } else {
+                    Vreset = 0;
+                }
+            }
+            n++;
+
+        }
+        Particles.write("StokesSphere");
+    }
+    openfpm_finalize();
+
+}
\ No newline at end of file
diff --git a/openfpm_data b/openfpm_data
index d78de3919144686d1e88a2c46a88ad3fa2a79043..c5d7618790eb69bf9fc5ca38f8bd1c0afdcc2476 160000
--- a/openfpm_data
+++ b/openfpm_data
@@ -1 +1 @@
-Subproject commit d78de3919144686d1e88a2c46a88ad3fa2a79043
+Subproject commit c5d7618790eb69bf9fc5ca38f8bd1c0afdcc2476
diff --git a/openfpm_io b/openfpm_io
index 1a8e1bb010f3562596263fea2c01e8b3514ef015..b2b9d76267afb6548a82e4b8eea541879d2555f0 160000
--- a/openfpm_io
+++ b/openfpm_io
@@ -1 +1 @@
-Subproject commit 1a8e1bb010f3562596263fea2c01e8b3514ef015
+Subproject commit b2b9d76267afb6548a82e4b8eea541879d2555f0
diff --git a/openfpm_numerics b/openfpm_numerics
index 525fd4682cbafca1a15276cd616de77ca777ba8d..b09549183092eb01315d1e6ca9cf6213ee792b1b 160000
--- a/openfpm_numerics
+++ b/openfpm_numerics
@@ -1 +1 @@
-Subproject commit 525fd4682cbafca1a15276cd616de77ca777ba8d
+Subproject commit b09549183092eb01315d1e6ca9cf6213ee792b1b