From e4f32ebd116c44e89fcfbdf91585d8e88aad497c Mon Sep 17 00:00:00 2001
From: Serhii Yaskovets <yaskovet@mpi-cbg.de>
Date: Mon, 10 Jul 2023 20:34:14 +0200
Subject: [PATCH] DCPSE on GPU: add additional tests

---
 src/CMakeLists.txt                            |   6 +-
 .../DCPSE_op/tests/DCPSE_op_Solver_test.cu    | 121 ---
 .../tests/DCPSE_op_test_base_tests.cu         | 994 ++++++++++++++++++
 .../DCPSE_op/tests/DCPSE_op_test_temporal.cu  | 260 +++++
 src/DCPSE/Dcpse.cuh                           |  97 +-
 5 files changed, 1353 insertions(+), 125 deletions(-)
 create mode 100644 src/DCPSE/DCPSE_op/tests/DCPSE_op_test_base_tests.cu
 create mode 100644 src/DCPSE/DCPSE_op/tests/DCPSE_op_test_temporal.cu

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 8dc1d1a..38e0da9 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -18,6 +18,8 @@ endif()
 if (NOT CUDA_ON_BACKEND STREQUAL "None")
 	set(CUDA_SOURCES Operators/Vector/vector_dist_operators_unit_tests.cu
 					 DCPSE/DCPSE_op/tests/DCPSE_op_Solver_test.cu
+			                 DCPSE/DCPSE_op/tests/DCPSE_op_test_base_tests.cu
+			                 DCPSE/DCPSE_op/tests/DCPSE_op_test_temporal.cu
 			 	 	 OdeIntegrators/tests/Odeintegrators_test_gpu.cu
 					 #DCPSE/DCPSE_op/tests/DCPSE_op_subset_test.cu
 					 Operators/Vector/vector_dist_operators_apply_kernel_unit_tests.cu)
@@ -39,7 +41,7 @@ if ( CUDA_ON_BACKEND STREQUAL "HIP" AND HIP_FOUND )
 				OdeIntegrators/tests/OdeIntegratores_base_tests.cpp
 				OdeIntegrators/tests/OdeIntegrator_grid_tests.cpp
                 DCPSE/DCPSE_op/tests/DCPSE_op_subset_test.cpp
-                DCPSE/DCPSE_op/tests/DCPSE_op_test_base_tests
+                DCPSE/DCPSE_op/tests/DCPSE_op_test_base_tests.cpp
                 FiniteDifference/FD_Solver_test.cpp
                 FiniteDifference/FD_op_Tests.cpp
                 DCPSE/DCPSE_op/tests/DCPSE_op_test3d.cpp
@@ -80,7 +82,7 @@ else()
 		OdeIntegrators/tests/OdeIntegratores_base_tests.cpp
 		OdeIntegrators/tests/OdeIntegrator_grid_tests.cpp
 		DCPSE/DCPSE_op/tests/DCPSE_op_subset_test.cpp
-		DCPSE/DCPSE_op/tests/DCPSE_op_test_base_tests
+		DCPSE/DCPSE_op/tests/DCPSE_op_test_base_tests.cpp
 		FiniteDifference/FD_Solver_test.cpp
 		FiniteDifference/FD_op_Tests.cpp
 		DCPSE/DCPSE_op/tests/DCPSE_op_test3d.cpp
diff --git a/src/DCPSE/DCPSE_op/tests/DCPSE_op_Solver_test.cu b/src/DCPSE/DCPSE_op/tests/DCPSE_op_Solver_test.cu
index 6da64d1..bc27d87 100644
--- a/src/DCPSE/DCPSE_op/tests/DCPSE_op_Solver_test.cu
+++ b/src/DCPSE/DCPSE_op/tests/DCPSE_op_Solver_test.cu
@@ -24,127 +24,6 @@
 
 BOOST_AUTO_TEST_SUITE(dcpse_op_suite_tests_cu)
 
-BOOST_AUTO_TEST_CASE(dcpse_op_vec3d_gpu) {
-//  int rank;
-//  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-        size_t edgeSemiSize = 257;
-        const size_t sz[3] = {edgeSemiSize,  edgeSemiSize,edgeSemiSize};
-        Box<3, double> box({0, 0,0}, {1,1,1});
-        size_t bc[3] = {NON_PERIODIC, NON_PERIODIC, NON_PERIODIC};
-        double spacing = box.getHigh(0) / (sz[0] - 1);
-        double rCut = 3.1 * spacing;
-        Ghost<3, double> ghost(rCut);
-        BOOST_TEST_MESSAGE("Init vector_dist...");
-        double sigma2 = spacing * spacing/ (2 * 4);
-
-        vector_dist_gpu<3, double, aggregate<double, VectorS<3, double>, VectorS<3, double>, VectorS<3, double>, VectorS<3, double>,double,double>> domain(
-        0, box, bc, ghost);
-
-        //Init_DCPSE(domain)
-        BOOST_TEST_MESSAGE("Init domain...");
-
-        auto it = domain.getGridIterator(sz);
-        size_t pointId = 0;
-        size_t counter = 0;
-        double minNormOne = 999;
-        while (it.isNext()) {
-            domain.add();
-            auto key = it.get();
-            mem_id k0 = key.get(0);
-            double x = k0 * spacing;
-            domain.getLastPos()[0] = x;//+ gaussian(rng);
-            mem_id k1 = key.get(1);
-            double y = k1 * spacing;
-            domain.getLastPos()[1] = y;//+gaussian(rng);
-            mem_id k2 = key.get(2);
-            double z = k2 * spacing;
-            domain.getLastPos()[2] = z;//+gaussian(rng);
-            // Here fill the function value
-            domain.template getLastProp<0>()    = sin(domain.getLastPos()[0]) + sin(domain.getLastPos()[1]) + sin(domain.getLastPos()[2]) ;
-            domain.template getLastProp<1>()[0] = cos(domain.getLastPos()[0]);
-            domain.template getLastProp<1>()[1] = cos(domain.getLastPos()[1]) ;
-            domain.template getLastProp<1>()[2] = cos(domain.getLastPos()[2]);
-            // Here fill the validation value for Df/Dx
-            domain.template getLastProp<2>()[0] = 0;//cos(domain.getLastPos()[0]);//+cos(domain.getLastPos()[1]);
-            domain.template getLastProp<2>()[1] = 0;//-sin(domain.getLastPos()[0]);//+cos(domain.getLastPos()[1]);
-            domain.template getLastProp<3>()[0] = 0;//cos(domain.getLastPos()[0]);//+cos(domain.getLastPos()[1]);
-            domain.template getLastProp<3>()[1] = 0;//-sin(domain.getLastPos()[0]);//+cos(domain.getLastPos()[1]);
-            domain.template getLastProp<3>()[2] = 0;
-
-            domain.template getLastProp<4>()[0] = -cos(domain.getLastPos()[0]) * sin(domain.getLastPos()[0]);
-            domain.template getLastProp<4>()[1] = -cos(domain.getLastPos()[1]) * sin(domain.getLastPos()[1]);
-            domain.template getLastProp<4>()[2] = -cos(domain.getLastPos()[2]) * sin(domain.getLastPos()[2]);
-
-
-            /*  domain.template getLastProp<4>()[0] = cos(domain.getLastPos()[0]) * (sin(domain.getLastPos()[0]) + sin(domain.getLastPos()[1])) +
-                                                    cos(domain.getLastPos()[1]) * (cos(domain.getLastPos()[0]) + cos(domain.getLastPos()[1]));
-              domain.template getLastProp<4>()[1] = -sin(domain.getLastPos()[0]) * (sin(domain.getLastPos()[0]) + sin(domain.getLastPos()[1])) -
-                                                    sin(domain.getLastPos()[1]) * (cos(domain.getLastPos()[0]) + cos(domain.getLastPos()[1]));
-              domain.template getLastProp<4>()[2] = -sin(domain.getLastPos()[0]) * (sin(domain.getLastPos()[0]) + sin(domain.getLastPos()[1])) -
-                                                    sin(domain.getLastPos()[1]) * (cos(domain.getLastPos()[0]) + cos(domain.getLastPos()[1]));*/
-            domain.template getLastProp<5>()    = cos(domain.getLastPos()[0]) * cos(domain.getLastPos()[0])+cos(domain.getLastPos()[1]) * cos(domain.getLastPos()[1])+cos(domain.getLastPos()[2]) * cos(domain.getLastPos()[2]) ;
-            ++counter;
-            ++it;
-        }
-        BOOST_TEST_MESSAGE("Sync domain across processors...");
-
-        domain.map();
-        domain.ghost_get<0>();
-
-        Advection_gpu Adv(domain, 2, rCut, 1.9,support_options::RADIUS);
-        auto v = getV<1>(domain);
-        auto P = getV<0>(domain);
-        auto dv = getV<3>(domain);
-        auto dP = getV<6>(domain);
-
-
-//        typedef boost::mpl::int_<std::is_fundamental<point_expression_op<Point<2U, double>, point_expression<double>, Point<2U, double>, 3>>::value>::blabla blabla;
-//        std::is_fundamental<decltype(o1.value(key))>
-
-        domain.ghost_get<1>();
-        dv = Adv(v, v);
-        auto it2 = domain.getDomainIterator();
-
-        double worst1 = 0.0;
-
-        while (it2.isNext()) {
-            auto p = it2.get();
-
-            if (fabs(domain.getProp<3>(p)[1] - domain.getProp<4>(p)[1]) > worst1) {
-                worst1 = fabs(domain.getProp<3>(p)[1] - domain.getProp<4>(p)[1]);
-
-            }
-
-            ++it2;
-        }
-        //std::cout << "Maximum Error in component 2: " << worst1 << std::endl;
-        BOOST_REQUIRE(worst1 < 0.03);
-
-        //Adv.checkMomenta(domain);
-        //Adv.DrawKernel<2>(domain,0);
-
-        //domain.deleteGhost();
-
-        dP = Adv(v, P);//+Dy(P);
-        auto it3 = domain.getDomainIterator();
-
-        double worst2 = 0.0;
-
-        while (it3.isNext()) {
-            auto p = it3.get();
-            if (fabs(domain.getProp<6>(p) - domain.getProp<5>(p)) > worst2) {
-                worst2 = fabs(domain.getProp<6>(p) - domain.getProp<5>(p));
-
-            }
-
-            ++it3;
-        }
-        domain.deleteGhost();
-        BOOST_REQUIRE(worst2 < 0.03);
-
-
-}
-
     BOOST_AUTO_TEST_CASE(dcpse_op_solver) {
 //  int rank;
 //  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
diff --git a/src/DCPSE/DCPSE_op/tests/DCPSE_op_test_base_tests.cu b/src/DCPSE/DCPSE_op/tests/DCPSE_op_test_base_tests.cu
new file mode 100644
index 0000000..8e86fe9
--- /dev/null
+++ b/src/DCPSE/DCPSE_op/tests/DCPSE_op_test_base_tests.cu
@@ -0,0 +1,994 @@
+/*
+ * DCPSE_op_test_base_tests.cpp
+ *
+ *  Created on: April 9, 2020
+ *      Author: Abhinav Singh
+ *
+ */
+#include "config.h"
+#ifdef HAVE_EIGEN
+#ifdef HAVE_PETSC
+#define BOOST_MPL_CFG_NO_PREPROCESSED_HEADERS
+#define BOOST_MPL_LIMIT_VECTOR_SIZE 30
+
+
+
+#define BOOST_TEST_DYN_LINK
+
+#include "util/util_debug.hpp"
+#include <boost/test/unit_test.hpp>
+#include <iostream>
+#include "../DCPSE_op.hpp"
+#include "../DCPSE_Solver.hpp"
+#include "Operators/Vector/vector_dist_operators.hpp"
+#include "Vector/vector_dist_subset.hpp"
+#include "../EqnsStruct.hpp"
+#include "DCPSE/DcpseInterpolation.hpp"
+
+BOOST_AUTO_TEST_SUITE(dcpse_op_suite_tests_cu)
+BOOST_AUTO_TEST_CASE(dcpse_op_tests) {
+        size_t edgeSemiSize = 40;
+        const size_t sz[2] = {2 * edgeSemiSize, 2 * edgeSemiSize};
+        Box<2, double> box({0, 0}, {2 * M_PI, 2 * M_PI});
+        size_t bc[2] = {NON_PERIODIC, NON_PERIODIC};
+        double spacing[2];
+        spacing[0] = 2 * M_PI / (sz[0] - 1);
+        spacing[1] = 2 * M_PI / (sz[1] - 1);
+        Ghost<2, double> ghost(spacing[0] * 3.9);
+        double rCut = 3.9 * spacing[0];
+        BOOST_TEST_MESSAGE("Init vector_dist...");
+        double sigma2 = spacing[0] * spacing[1] / (2 * 4);
+
+        vector_dist_gpu<2, double, aggregate<double, double, double, VectorS<2, double>, VectorS<2, double>>> domain(0, box,
+                                                                                                                 bc,
+                                                                                                                 ghost);
+
+        //Init_DCPSE(domain)
+        BOOST_TEST_MESSAGE("Init domain...");
+
+        auto it = domain.getGridIterator(sz);
+        size_t pointId = 0;
+        size_t counter = 0;
+        double minNormOne = 999;
+        while (it.isNext()) {
+            domain.add();
+            auto key = it.get();
+            mem_id k0 = key.get(0);
+            double x = k0 * spacing[0];
+            domain.getLastPos()[0] = x;//+ gaussian(rng);
+            mem_id k1 = key.get(1);
+            double y = k1 * spacing[1];
+            domain.getLastPos()[1] = y;//+gaussian(rng);
+            // Here fill the function value
+            domain.template getLastProp<0>() = sin(domain.getLastPos()[0]) + sin(domain.getLastPos()[1]);
+            domain.template getLastProp<2>() = 2*cos(domain.getLastPos()[0]) + cos(domain.getLastPos()[1]);
+            ++counter;
+            ++it;
+        }
+        BOOST_TEST_MESSAGE("Sync domain across processors...");
+
+        domain.map();
+        domain.ghost_get<0>();
+        Derivative_x_gpu Dx(domain, 2, rCut);
+        Derivative_y_gpu Dy(domain, 2, rCut);
+        Gradient_gpu Grad(domain, 2, rCut);
+        Laplacian_gpu Lap(domain, 2, rCut);
+        auto v = getV<1>(domain);
+        auto P = getV<0>(domain);
+
+        v = 2*Dx(P) + Dy(P);
+        auto it2 = domain.getDomainIterator();
+
+        double worst = 0.0;
+
+        while (it2.isNext()) {
+            auto p = it2.get();
+
+            if (fabs(domain.getProp<1>(p) - domain.getProp<2>(p)) > worst) {
+                worst = fabs(domain.getProp<1>(p) - domain.getProp<2>(p));
+            }
+
+            ++it2;
+        }
+
+        domain.deleteGhost();
+        BOOST_REQUIRE(worst < 0.03);
+
+    }
+
+    BOOST_AUTO_TEST_CASE(dcpse_op_save_load) {
+        size_t edgeSemiSize = 40;
+        const size_t sz[2] = {2 * edgeSemiSize, 2 * edgeSemiSize};
+        Box<2, double> box({0, 0}, {2 * M_PI, 2 * M_PI});
+        size_t bc[2] = {NON_PERIODIC, NON_PERIODIC};
+        double spacing[2];
+        spacing[0] = 2 * M_PI / (sz[0] - 1);
+        spacing[1] = 2 * M_PI / (sz[1] - 1);
+        Ghost<2, double> ghost(spacing[0] * 3.9);
+        double rCut = 3.9 * spacing[0];
+        BOOST_TEST_MESSAGE("Init vector_dist...");
+        double sigma2 = spacing[0] * spacing[1] / (2 * 4);
+
+        vector_dist_gpu<2, double, aggregate<double, double, double, VectorS<2, double>, VectorS<2, double>>> domain(0, box,
+                                                                                                                 bc,
+                                                                                                                 ghost);
+
+        //Init_DCPSE(domain)
+        BOOST_TEST_MESSAGE("Init domain...");
+
+        auto it = domain.getGridIterator(sz);
+        size_t pointId = 0;
+        size_t counter = 0;
+        double minNormOne = 999;
+        while (it.isNext()) {
+            domain.add();
+            auto key = it.get();
+            mem_id k0 = key.get(0);
+            double x = k0 * spacing[0];
+            domain.getLastPos()[0] = x;//+ gaussian(rng);
+            mem_id k1 = key.get(1);
+            double y = k1 * spacing[1];
+            domain.getLastPos()[1] = y;//+gaussian(rng);
+            // Here fill the function value
+            domain.template getLastProp<0>() = sin(domain.getLastPos()[0]) + sin(domain.getLastPos()[1]);
+            domain.template getLastProp<2>() = 2*cos(domain.getLastPos()[0]) + cos(domain.getLastPos()[1]);
+            ++counter;
+            ++it;
+        }
+        BOOST_TEST_MESSAGE("Sync domain across processors...");
+
+        domain.map();
+        domain.ghost_get<0>();
+        Derivative_x_gpu Dx(domain, 2, rCut);
+        Derivative_y_gpu Dy(domain, 2, rCut);
+        auto v = getV<1>(domain);
+        auto v2 = getV<3>(domain);
+        auto P = getV<0>(domain);
+        v2 = 2*Dx(P) + Dy(P);
+        Dx.save(domain,"DX_test");
+        Dy.save(domain,"DY_test");
+        Derivative_x_gpu DxLoaded(domain, 2, rCut,1,support_options::LOAD);
+        Derivative_y_gpu DyLoaded(domain, 2, rCut,1,support_options::LOAD);
+        DxLoaded.load(domain,"DX_test");
+        DyLoaded.load(domain,"DY_test");
+        v= 2*DxLoaded(P)+DyLoaded(P);
+        auto it2 = domain.getDomainIterator();
+        double worst = 0.0;
+        while (it2.isNext()) {
+            auto p = it2.get();
+
+            if (fabs(domain.getProp<1>(p) - domain.getProp<2>(p)) > worst) {
+                worst = fabs(domain.getProp<1>(p) - domain.getProp<2>(p));
+            }
+
+            ++it2;
+        }
+        domain.deleteGhost();
+        //std::cout<<worst;
+        BOOST_REQUIRE(worst < 0.03);
+    }
+
+    BOOST_AUTO_TEST_CASE(dcpse_op_save_load2) {
+        size_t edgeSemiSize = 40;
+        const size_t sz[2] = {2 * edgeSemiSize, 2 * edgeSemiSize};
+        Box<2, double> box({0, 0}, {2 * M_PI, 2 * M_PI});
+        size_t bc[2] = {NON_PERIODIC, NON_PERIODIC};
+        double spacing[2];
+        spacing[0] = 2 * M_PI / (sz[0] - 1);
+        spacing[1] = 2 * M_PI / (sz[1] - 1);
+        Ghost<2, double> ghost(spacing[0] * 3.9);
+        double rCut = 3.9 * spacing[0];
+        BOOST_TEST_MESSAGE("Init vector_dist...");
+        double sigma2 = spacing[0] * spacing[1] / (2 * 4);
+
+        vector_dist_gpu<2, double, aggregate<double, double, double, VectorS<2, double>, VectorS<2, double>>> domain(0, box,
+                                                                                                                 bc,
+                                                                                                                 ghost);
+
+        //Init_DCPSE(domain)
+        BOOST_TEST_MESSAGE("Init domain...");
+
+        auto it = domain.getGridIterator(sz);
+        size_t pointId = 0;
+        size_t counter = 0;
+        double minNormOne = 999;
+        while (it.isNext()) {
+            domain.add();
+            auto key = it.get();
+            mem_id k0 = key.get(0);
+            double x = k0 * spacing[0];
+            domain.getLastPos()[0] = x;//+ gaussian(rng);
+            mem_id k1 = key.get(1);
+            double y = k1 * spacing[1];
+            domain.getLastPos()[1] = y;//+gaussian(rng);
+            // Here fill the function value
+            domain.template getLastProp<0>() = sin(domain.getLastPos()[0]) + sin(domain.getLastPos()[1]);
+            domain.template getLastProp<2>() = 2*cos(domain.getLastPos()[0]) + cos(domain.getLastPos()[1]);
+            ++counter;
+            ++it;
+        }
+        BOOST_TEST_MESSAGE("Sync domain across processors...");
+
+        domain.map();
+        domain.ghost_get<0>();
+        auto v = getV<1>(domain);
+        auto v2 = getV<3>(domain);
+        auto P = getV<0>(domain);
+        Derivative_x_gpu DxLoaded(domain, 2, rCut,1,support_options::LOAD);
+        Derivative_y_gpu DyLoaded(domain, 2, rCut,1,support_options::LOAD);
+        DxLoaded.load(domain,"DX_test");
+        DyLoaded.load(domain,"DY_test");
+        v= 2*DxLoaded(P)+DyLoaded(P);
+        auto it2 = domain.getDomainIterator();
+        double worst = 0.0;
+        while (it2.isNext()) {
+            auto p = it2.get();
+
+            if (fabs(domain.getProp<1>(p) - domain.getProp<2>(p)) > worst) {
+                worst = fabs(domain.getProp<1>(p) - domain.getProp<2>(p));
+            }
+
+            ++it2;
+        }
+        domain.deleteGhost();
+        //std::cout<<worst;
+        BOOST_REQUIRE(worst < 0.03);
+    }
+
+    BOOST_AUTO_TEST_CASE(dcpse_op_tests_fa) {
+        size_t edgeSemiSize = 40;
+        const size_t sz[2] = {2 * edgeSemiSize, 2 * edgeSemiSize};
+        Box<2, double> box({0, 0}, {2 * M_PI, 2 * M_PI});
+        size_t bc[2] = {NON_PERIODIC, NON_PERIODIC};
+        double spacing[2];
+        spacing[0] = 2 * M_PI / (sz[0] - 1);
+        spacing[1] = 2 * M_PI / (sz[1] - 1);
+        Ghost<2, double> ghost(spacing[0] * 3.9);
+        double rCut = 3.9 * spacing[0];
+        BOOST_TEST_MESSAGE("Init vector_dist...");
+        double sigma2 = spacing[0] * spacing[1] / (2 * 4);
+
+        typedef vector_dist_gpu<2, double, aggregate<double, double, double, VectorS<2, double>, VectorS<2, double>>> vector_type;
+
+        vector_type domain(0, box,bc,ghost);
+
+        //Init_DCPSE(domain)
+        BOOST_TEST_MESSAGE("Init domain...");
+
+        auto it = domain.getGridIterator(sz);
+        size_t pointId = 0;
+        size_t counter = 0;
+        double minNormOne = 999;
+        while (it.isNext()) {
+            domain.add();
+            auto key = it.get();
+            mem_id k0 = key.get(0);
+            double x = k0 * spacing[0];
+            domain.getLastPos()[0] = x;//+ gaussian(rng);
+            mem_id k1 = key.get(1);
+            double y = k1 * spacing[1];
+            domain.getLastPos()[1] = y;//+gaussian(rng);
+            // Here fill the function value
+            domain.template getLastProp<0>() = sin(domain.getLastPos()[0]) + sin(domain.getLastPos()[1]);
+            domain.template getLastProp<2>() = cos(domain.getLastPos()[0]) + cos(domain.getLastPos()[1]);
+            ++counter;
+            ++it;
+        }
+        BOOST_TEST_MESSAGE("Sync domain across processors...");
+
+        domain.map();
+        domain.ghost_get<0>();
+
+        PPInterpolation<vector_type,vector_type> Fx(domain,domain, 2, rCut);
+        auto v = getV<1>(domain);
+        auto P = getV<0>(domain);
+
+        Fx.p2p<0,1>();
+        auto it2 = domain.getDomainIterator();
+        double worst = 0.0;
+        while (it2.isNext()) {
+            auto p = it2.get();
+            if (fabs(domain.getProp<1>(p) - domain.getProp<0>(p)) > worst) {
+                worst = fabs(domain.getProp<1>(p) - domain.getProp<0>(p));
+            }
+            ++it2;
+        }
+        //std::cout<<"Worst:"<<worst<<std::endl;
+        domain.deleteGhost();
+        //domain.write_frame("test",0,0.024,BINARY);
+        BOOST_REQUIRE(worst < 0.03);
+    }
+
+    BOOST_AUTO_TEST_CASE(dcpse_op_tests_mfa) {
+        size_t edgeSemiSize = 40;
+        const size_t sz[2] = {2 * edgeSemiSize, 2 * edgeSemiSize};
+        Box<2, double> box({0, 0}, {2 * M_PI, 2 * M_PI});
+        size_t bc[2] = {NON_PERIODIC, NON_PERIODIC};
+        double spacing[2];
+        spacing[0] = 2 * M_PI / (sz[0] - 1);
+        spacing[1] = 2 * M_PI / (sz[1] - 1);
+        Ghost<2, double> ghost(spacing[0] * 3.9);
+        double rCut = 3.9 * spacing[0];
+        BOOST_TEST_MESSAGE("Init vector_dist...");
+        double sigma2 = spacing[0] * spacing[1] / ( 4);
+        std::normal_distribution<> gaussian{0, sigma2};
+        std::mt19937 rng{6666666};
+        typedef vector_dist_gpu<2, double, aggregate<double, double, double, VectorS<2, double>, VectorS<2, double>>> vector_type;
+
+        vector_type domain(0, box,bc,ghost);
+        vector_type domain2(domain.getDecomposition(),0);
+
+        //Init_DCPSE(domain)
+        BOOST_TEST_MESSAGE("Init domain...");
+
+        auto it = domain.getGridIterator(sz);
+        size_t pointId = 0;
+        size_t counter = 0;
+        double minNormOne = 999;
+        while (it.isNext()) {
+            domain.add();
+            domain2.add();
+            auto key = it.get();
+            mem_id k0 = key.get(0);
+            mem_id k1 = key.get(1);
+            double x = k0 * spacing[0];
+            double y = k1 * spacing[1];
+            domain.getLastPos()[0] = x;//+ gaussian(rng);
+            domain.getLastPos()[1] = y;//+gaussian(rng);
+            if(x!=0 && y!=0 && x!=box.getHigh(0) && y!=box.getHigh(1)){
+                domain2.getLastPos()[0] = x+ gaussian(rng);
+                domain2.getLastPos()[1] = y+ gaussian(rng);
+            }
+            else{
+                domain2.getLastPos()[0] = x;
+                domain2.getLastPos()[1] = y;
+            }
+            // Here fill the function value
+            domain.template getLastProp<0>() = sin(domain.getLastPos()[0]) + sin(domain.getLastPos()[1]);
+            domain.template getLastProp<1>() = 0.0;
+            domain2.template getLastProp<0>() = sin(domain2.getLastPos()[0]) + sin(domain2.getLastPos()[1]);
+            ++counter;
+            ++it;
+        }
+        BOOST_TEST_MESSAGE("Sync domain across processors...");
+
+        domain.map();
+        domain2.map();
+        domain.ghost_get<0>();
+        domain2.ghost_get<0>();
+
+        PPInterpolation<vector_type,vector_type> Fx(domain2,domain, 2, rCut);
+        //auto v = getV<1>(domain);
+        //auto P = getV<0>(domain);
+        Fx.p2p<0,1>();
+        auto it2 = domain.getDomainIterator();
+        double worst = 0.0;
+        while (it2.isNext()) {
+            auto p = it2.get();
+            //domain.template getProp<2>(p) = domain.getProp<1>(p) - domain.getProp<0>(p);
+            if (fabs(domain.getProp<1>(p) - domain.getProp<0>(p)) > worst) {
+                worst = fabs(domain.getProp<1>(p) - domain.getProp<0>(p));
+            }
+            ++it2;
+        }
+        //std::cout<<"Worst:"<<worst<<std::endl;
+        domain.deleteGhost();
+        //domain.write("test1");
+        //domain2.write("test2");
+        BOOST_REQUIRE(worst < 0.03);
+    }
+
+
+    BOOST_AUTO_TEST_CASE(dcpse_op_test_lap) {
+        size_t edgeSemiSize = 81;
+        const size_t sz[2] = {2 * edgeSemiSize+1, 2 * edgeSemiSize+1};
+        Box<2, double> box({0, 0}, {2 * M_PI, 2 * M_PI});
+        size_t bc[2] = {NON_PERIODIC, NON_PERIODIC};
+        double spacing[2];
+        spacing[0] = 2 * M_PI / (sz[0] - 1);
+        spacing[1] = 2 * M_PI / (sz[1] - 1);
+        Ghost<2, double> ghost(spacing[0] * 3.9);
+        double rCut = 3.9 * spacing[0];
+        BOOST_TEST_MESSAGE("Init vector_dist...");
+        double sigma2 = spacing[0] * spacing[1] / (2 * 4);
+
+        vector_dist_gpu<2, double, aggregate<double, double, double, double, VectorS<2, double>>> domain(0, box,
+                                                                                                     bc,
+                                                                                                     ghost);
+
+        //Init_DCPSE(domain)
+        BOOST_TEST_MESSAGE("Init domain...");
+        std::normal_distribution<> gaussian{0, sigma2};
+
+        auto it = domain.getGridIterator(sz);
+        size_t pointId = 0;
+        size_t counter = 0;
+        double minNormOne = 999;
+        while (it.isNext()) {
+            domain.add();
+            auto key = it.get();
+            mem_id k0 = key.get(0);
+            double x = k0 * spacing[0];
+            domain.getLastPos()[0] = x;//+ gaussian(rng);
+            mem_id k1 = key.get(1);
+            double y = k1 * spacing[1];
+            domain.getLastPos()[1] = y;//+gaussian(rng);
+            // Here fill the function value
+            domain.template getLastProp<0>() = sin(domain.getLastPos()[0]) + sin(domain.getLastPos()[1]);
+            // Here fill the validation value for Lap
+            domain.template getLastProp<1>() = - sin(domain.getLastPos()[0]) - sin(domain.getLastPos()[1]);
+            domain.template getLastProp<4>()[0] = -sin(domain.getLastPos()[0]);
+            domain.template getLastProp<4>()[1] = -sin(domain.getLastPos()[1]);
+
+            ++counter;
+            ++it;
+        }
+        BOOST_TEST_MESSAGE("Sync domain across processors...");
+
+        domain.map();
+        domain.ghost_get<0>();
+
+        Laplacian_gpu Lap(domain, 2, rCut);
+        auto v = getV<1>(domain);
+        auto P = getV<0>(domain);
+        auto vv = getV<2>(domain);
+        auto errv = getV<3>(domain);
+
+        vv = Lap(P);
+        auto it2 = domain.getDomainIterator();
+
+        double worst = 0.0;
+
+        while (it2.isNext()) {
+            auto p = it2.get();
+
+            if (fabs(domain.getProp<1>(p) - domain.getProp<2>(p)) > worst) {
+                worst = fabs(domain.getProp<1>(p) - domain.getProp<2>(p));
+            }
+
+            ++it2;
+        }
+        errv=v-vv;
+        domain.deleteGhost();
+        BOOST_REQUIRE(worst < 0.3);
+    }
+
+    BOOST_AUTO_TEST_CASE(dcpse_op_div) {
+//  int rank;
+//  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+        size_t edgeSemiSize = 31;
+        const size_t sz[2] = {2 * edgeSemiSize+1, 2 * edgeSemiSize+1};
+        Box<2, double> box({0, 0}, {10.0, 10.0});
+        size_t bc[2] = {NON_PERIODIC, NON_PERIODIC};
+        double spacing[2];
+        spacing[0] = box.getHigh(0) / (sz[0] - 1);
+        spacing[1] = box.getHigh(1) / (sz[1] - 1);
+        double rCut = 3.1* spacing[0];
+        Ghost<2, double> ghost(rCut);
+        BOOST_TEST_MESSAGE("Init vector_dist...");
+        double sigma2 = spacing[0] * spacing[1] / (2 * 4);
+
+        vector_dist_gpu<2, double, aggregate<double, VectorS<2, double>, VectorS<2, double>, VectorS<2, double>, VectorS<2, double>,double,double>> domain(
+                0, box, bc, ghost);
+
+        //Init_DCPSE(domain)
+        BOOST_TEST_MESSAGE("Init domain...");
+//            std::random_device rd{};
+//            std::mt19937 rng{rd()};
+        std::mt19937 rng{6666666};
+
+        std::normal_distribution<> gaussian{0, sigma2};
+
+        auto it = domain.getGridIterator(sz);
+        size_t pointId = 0;
+        size_t counter = 0;
+        double minNormOne = 999;
+        while (it.isNext()) {
+            domain.add();
+            auto key = it.get();
+            mem_id k0 = key.get(0);
+            double x = k0 * spacing[0];
+            domain.getLastPos()[0] = x;//+ gaussian(rng);
+            mem_id k1 = key.get(1);
+            double y = k1 * spacing[1];
+                domain.getLastPos()[1] = y;//+gaussian(rng);
+            // Here fill the function value
+            domain.template getLastProp<1>()[0] = sin(domain.getLastPos()[0]) + sin(domain.getLastPos()[1]);
+            domain.template getLastProp<1>()[1] = cos(domain.getLastPos()[0]) + cos(domain.getLastPos()[1]);
+
+
+            // Here fill the validation value for Divergence
+            domain.template getLastProp<0>()= cos(domain.getLastPos()[0]) - sin(domain.getLastPos()[1]);
+           /* domain.template getLastProp<4>()[0] =
+                    cos(domain.getLastPos()[0]) * (sin(domain.getLastPos()[0]) + sin(domain.getLastPos()[1])) +
+                    cos(domain.getLastPos()[1]) * (cos(domain.getLastPos()[0]) + cos(domain.getLastPos()[1]));
+            domain.template getLastProp<4>()[1] =
+                    -sin(domain.getLastPos()[0]) * (sin(domain.getLastPos()[0]) + sin(domain.getLastPos()[1])) -
+                    sin(domain.getLastPos()[1]) * (cos(domain.getLastPos()[0]) + cos(domain.getLastPos()[1]));*/
+
+
+            ++counter;
+            ++it;
+        }
+        BOOST_TEST_MESSAGE("Sync domain across processors...");
+
+        domain.map();
+        domain.ghost_get<0>();
+
+        Divergence_gpu Div(domain, 2, rCut);
+        Derivative_x_gpu Dx(domain, 2, rCut);
+        Derivative_y_gpu Dy(domain, 2, rCut);
+
+        auto v = getV<1>(domain);
+        auto anasol = getV<0>(domain);
+        auto div = getV<5>(domain);
+        auto div2 = getV<6>(domain);
+
+        domain.ghost_get<1>();
+        div = Div(v);
+        // div2=Dx(v[0])+Dy(v[1]);
+        // auto it2 = domain.getDomainIterator();
+
+        // double worst1 = 0.0;
+        // double worst2 = 0.0;
+
+        // while (it2.isNext()) {
+        //     auto p = it2.get();
+        //     if (fabs(domain.getProp<0>(p) - domain.getProp<5>(p)) > worst1) {
+        //         worst1 = fabs(domain.getProp<0>(p) - domain.getProp<5>(p));
+        //     }
+        //     if (fabs(domain.getProp<0>(p) - domain.getProp<6>(p)) > worst2) {
+        //         worst2 = fabs(domain.getProp<0>(p) - domain.getProp<6>(p));
+        //     }
+        //     ++it2;
+        // }
+
+        domain.deleteGhost();
+/*
+        domain.write("DIV");
+
+        std::cout<<worst1<<":"<<worst2;
+*/
+
+        // BOOST_REQUIRE(worst1 < 0.05);
+        // BOOST_REQUIRE(worst2 < 0.05);
+
+    }
+
+     BOOST_AUTO_TEST_CASE(dcpse_op_vec) {
+ //  int rank;
+ //  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+         size_t edgeSemiSize = 160;
+         const size_t sz[2] = {2 * edgeSemiSize+1, 2 * edgeSemiSize+1};
+         Box<2, double> box({0, 0}, {10,10});
+         size_t bc[2] = {NON_PERIODIC, NON_PERIODIC};
+         double spacing[2];
+         spacing[0] = box.getHigh(0)  / (sz[0] - 1);
+         spacing[1] = box.getHigh(1) / (sz[1] - 1);
+         Ghost<2, double> ghost(spacing[0] * 3.1);
+         double rCut = 3.1 * spacing[0];
+         BOOST_TEST_MESSAGE("Init vector_dist...");
+         double sigma2 = spacing[0] * spacing[1] / (2 * 4);
+
+         vector_dist_gpu<2, double, aggregate<double, VectorS<2, double>, VectorS<2, double>, VectorS<2, double>, VectorS<2, double>,double,double>> domain(
+                 0, box, bc, ghost);
+
+         //Init_DCPSE(domain)
+         BOOST_TEST_MESSAGE("Init domain...");
+         std::random_device rd{};
+ //            std::mt19937 rng{rd()};
+         std::mt19937 rng{6666666};
+
+         std::normal_distribution<> gaussian{0, sigma2};
+
+         auto it = domain.getGridIterator(sz);
+         size_t pointId = 0;
+         size_t counter = 0;
+         double minNormOne = 999;
+         while (it.isNext()) {
+             domain.add();
+             auto key = it.get();
+             mem_id k0 = key.get(0);
+             double x = k0 * spacing[0];
+             domain.getLastPos()[0] = x;//+ gaussian(rng);
+             mem_id k1 = key.get(1);
+             double y = k1 * spacing[1];
+             domain.getLastPos()[1] = y;//+gaussian(rng);
+             // Here fill the function value
+             domain.template getLastProp<0>()    = sin(domain.getLastPos()[0]) + sin(domain.getLastPos()[1]);
+             domain.template getLastProp<1>()[0] = sin(domain.getLastPos()[0]) + sin(domain.getLastPos()[1]);
+             domain.template getLastProp<1>()[1] = cos(domain.getLastPos()[0]) + cos(domain.getLastPos()[1]);
+
+             // Here fill the validation value for Df/Dx
+             domain.template getLastProp<2>()[0] = 0;
+             domain.template getLastProp<2>()[1] = 0;
+             domain.template getLastProp<4>()[0] =
+                     cos(domain.getLastPos()[0]) * (sin(domain.getLastPos()[0]) + sin(domain.getLastPos()[1])) +
+                     cos(domain.getLastPos()[1]) * (cos(domain.getLastPos()[0]) + cos(domain.getLastPos()[1]));
+             domain.template getLastProp<4>()[1] =
+                     -sin(domain.getLastPos()[0]) * (sin(domain.getLastPos()[0]) + sin(domain.getLastPos()[1])) -
+                     sin(domain.getLastPos()[1]) * (cos(domain.getLastPos()[0]) + cos(domain.getLastPos()[1]));
+
+             domain.template getLastProp<5>()    = cos(domain.getLastPos()[0])*(sin(domain.getLastPos()[0]) + sin(domain.getLastPos()[1]))+cos(domain.getLastPos()[1]) * (cos(domain.getLastPos()[0]) + cos(domain.getLastPos()[1]));
+
+
+             ++counter;
+             ++it;
+         }
+         BOOST_TEST_MESSAGE("Sync domain across processors...");
+
+         domain.map();
+         domain.ghost_get<0>();
+
+         Advection_gpu Adv(domain, 2, rCut);
+         auto v = getV<1>(domain);
+         auto P = getV<0>(domain);
+         auto dv = getV<3>(domain);
+         auto dP = getV<6>(domain);
+
+
+         domain.ghost_get<1>();
+         dv = Adv(v, v);
+         auto it2 = domain.getDomainIterator();
+
+         double worst1 = 0.0;
+
+         while (it2.isNext()) {
+             auto p = it2.get();
+
+             if (fabs(domain.getProp<3>(p)[1] - domain.getProp<4>(p)[1]) > worst1) {
+                 worst1 = fabs(domain.getProp<3>(p)[1] - domain.getProp<4>(p)[1]);
+
+             }
+
+             ++it2;
+         }
+
+         //std::cout << "Maximum Error in component 2: " << worst1 << std::endl;
+         //domain.write("v1");
+         BOOST_REQUIRE(worst1 < 0.03);
+
+
+         dP = Adv(v, P);
+         auto it3 = domain.getDomainIterator();
+
+         double worst2 = 0.0;
+
+         while (it3.isNext()) {
+             auto p = it3.get();
+             if (fabs(domain.getProp<6>(p) - domain.getProp<5>(p)) > worst2) {
+                 worst2 = fabs(domain.getProp<6>(p) - domain.getProp<5>(p));
+
+             }
+
+             ++it3;
+         }
+
+
+         domain.deleteGhost();
+         //domain.write("v2");
+         BOOST_REQUIRE(worst2 < 0.03);
+
+     }
+
+
+     BOOST_AUTO_TEST_CASE(dcpse_slice) {
+        const size_t sz[2] = {31,31};
+        Box<2, double> box({0, 0}, {1,1});
+        size_t bc[2] = {NON_PERIODIC, NON_PERIODIC};
+        double spacing = box.getHigh(0) / (sz[0] - 1);
+        Ghost<2, double> ghost(spacing * 3.9);
+        double rCut = 3.9 * spacing;
+
+        vector_dist_gpu<2, double, aggregate<double,VectorS<2, double>,double,double[3][3]>> Particles(0, box, bc, ghost);
+
+        auto it = Particles.getGridIterator(sz);
+        while (it.isNext()) {
+            Particles.add();
+            auto key = it.get();
+            double x = key.get(0) * it.getSpacing(0);
+            Particles.getLastPos()[0] = x;
+            double y = key.get(1) * it.getSpacing(1);
+            Particles.getLastPos()[1] = y;
+
+            Particles.getLastProp<1>()[0] = sin(x+y);
+            Particles.getLastProp<1>()[1] = cos(x+y);
+
+            ++it;
+        }
+
+        Particles.map();
+        Particles.ghost_get<0,1>();
+
+        auto P = getV<0>(Particles);
+        auto V = getV<1>(Particles);
+        auto S = getV<2>(Particles);
+        auto Sig = getV<3>(Particles);
+
+        Derivative_x_gpu Dx(Particles, 2, rCut,2);
+
+        P = Dx(V[0]);
+        S = V[0]*V[0] + V[1]*V[1];
+
+        Sig[0][1] = V[0]*V[0] + V[1]*V[1];
+        Sig[1][0] = P;
+        Sig[2][2] = 5.0;
+
+        auto it2 = Particles.getDomainIterator();
+
+        double err = 0.0;
+
+        while (it2.isNext())
+        {
+            auto p = it2.get();
+
+            if (fabs(Particles.getProp<0>(p) - Particles.getProp<1>(p)[1]) >= err )
+            {
+                err = fabs(Particles.getProp<0>(p) - Particles.getProp<1>(p)[1]);
+            }
+
+            if (fabs(Particles.getProp<2>(p) - 1.0) >= err )
+            {
+                err = fabs(Particles.getProp<2>(p) - 1.0);
+            }
+
+            if (fabs(Particles.getProp<3>(p)[0][1] - 1.0) >= err )
+            {
+                err = fabs(Particles.getProp<3>(p)[0][1] - 1.0);
+            }
+
+            if (fabs(Particles.getProp<3>(p)[1][0] - Particles.getProp<1>(p)[1]) >= err )
+            {
+                err = fabs(Particles.getProp<3>(p)[1][0] - Particles.getProp<1>(p)[1]);
+            }
+
+            if (fabs(Particles.getProp<3>(p)[2][2] - 5.0) >= err )
+            {
+                err = fabs(Particles.getProp<3>(p)[2][2] - 5.0);
+            }
+
+            ++it2;
+        }
+
+        //Particles.write("test_out");
+        //std::cout << "Error: " << err << "   " << create_vcluster().rank() << std::endl;
+        BOOST_REQUIRE(err < 0.03);
+}
+
+    BOOST_AUTO_TEST_CASE(dcpse_slice_3d) {
+        const size_t sz[3] = {17,17,17};
+        Box<3, double> box({0, 0,0}, {1,1,1});
+        size_t bc[3] = {NON_PERIODIC, NON_PERIODIC,NON_PERIODIC};
+        double spacing = box.getHigh(0) / (sz[0] - 1);
+        Ghost<3, double> ghost(spacing * 3.9);
+        double rCut = 3.9 * spacing;
+
+        vector_dist_gpu<3, double, aggregate<double,VectorS<3, double>,double,double[3][3]>> Particles(0, box, bc, ghost);
+
+        auto it = Particles.getGridIterator(sz);
+        while (it.isNext()) {
+            Particles.add();
+            auto key = it.get();
+            double x = key.get(0) * it.getSpacing(0);
+            Particles.getLastPos()[0] = x;
+            double y = key.get(1) * it.getSpacing(1);
+            Particles.getLastPos()[1] = y;
+            double z = key.get(2) * it.getSpacing(2);
+            Particles.getLastPos()[2] = z;
+
+            Particles.getLastProp<1>()[0] = sin(x+y);
+            Particles.getLastProp<1>()[1] = cos(x+y);
+            Particles.getLastProp<1>()[2] = 1.0;
+
+            ++it;
+        }
+
+        Particles.map();
+        Particles.ghost_get<0,1>();
+
+
+        auto P = getV<0>(Particles);
+        auto V = getV<1>(Particles);
+        auto S = getV<2>(Particles);
+        auto Sig = getV<3>(Particles);
+
+
+        Derivative_x_gpu Dx(Particles, 2, rCut,2);
+
+        P = Dx(V[0]);
+        S = V[0]*V[0] + V[1]*V[1]+V[2]*V[2];
+
+        Sig[0][1] = V[0]*V[0] + V[1]*V[1]+V[2]*V[2];
+        Sig[1][0] = P;
+        Sig[2][2] = 5.0;
+
+        auto it2 = Particles.getDomainIterator();
+
+        double err1 = 0.0;
+        double err2 = 0.0;
+        double err3 = 0.0;
+        double err4 = 0.0;
+        double err5 = 0.0;
+
+        while (it2.isNext())
+        {
+            auto p = it2.get();
+
+            // Here we check that P = Dx(V[0]) works   P is Prop 0 = sin(x+y) V[0] = sin(x+y) and V[1] is cos(x+y)
+            if (fabs(Particles.getProp<0>(p) - Particles.getProp<1>(p)[1]) >= err1 )
+            {
+                err1 = fabs(Particles.getProp<0>(p) - Particles.getProp<1>(p)[1]);
+            }
+
+            if (fabs(Particles.getProp<2>(p) - 2.0) >= err2 )
+            {
+                err2 = fabs(Particles.getProp<2>(p) - 2.0);
+            }
+
+            if (fabs(Particles.getProp<3>(p)[0][1] - 2.0) >= err3 )
+            {
+                err3 = fabs(Particles.getProp<3>(p)[0][1] - 2.0);
+            }
+
+            // V[0]*V[0] + V[1]*V[1]+V[2]*V[2] = 2 and
+            if (fabs(Particles.getProp<3>(p)[0][1] - Particles.getProp<2>(p)) >= err4 )
+            {
+                err4 = fabs(Particles.getProp<3>(p)[0][1] - Particles.getProp<2>(p));
+            }
+
+            if (fabs(Particles.getProp<3>(p)[2][2] - 5.0) >= err5 )
+            {
+                err5 = fabs(Particles.getProp<3>(p)[2][2] - 5.0);
+            }
+
+            ++it2;
+        }
+
+        //std::cout << err1 << " " << err2 << " " << err3 << " " << err4 << " " << err5 << std::endl;
+        BOOST_REQUIRE(err1 < 0.08);
+        BOOST_REQUIRE(err2 < 0.03);
+        BOOST_REQUIRE(err3 < 0.03);
+        BOOST_REQUIRE(err4 < 0.03);
+        BOOST_REQUIRE(err5 < 0.03);
+    }
+
+    BOOST_AUTO_TEST_CASE(dcpse_op_convection) {
+        size_t edgeSemiSize = 20;
+        const size_t sz[2] = {2 * edgeSemiSize+1, 2 * edgeSemiSize+1};
+        Box<2, double> box({-1, -1}, {1.0, 1.0});
+        size_t bc[2] = {NON_PERIODIC, NON_PERIODIC};
+        double spacing[2];
+        spacing[0] = 2.0/ (sz[0] - 1);
+        spacing[1] = 2.0 / (sz[1] - 1);
+        double rCut = 3.1 * spacing[0];
+        Ghost<2, double> ghost(rCut);
+
+        BOOST_TEST_MESSAGE("Init vector_dist...");
+
+        vector_dist_gpu<2, double, aggregate<double, double, double, VectorS<2, double>>> domain(0, box,bc,
+                                                                                                                 ghost);
+        domain.setPropNames({"Concentration","Concentration_temp","Temp","Velocity"});
+
+        //Init_DCPSE(domain)
+        BOOST_TEST_MESSAGE("Init domain...");
+
+        auto it = domain.getGridIterator(sz);
+        while (it.isNext()) {
+            domain.add();
+            auto key = it.get();
+            mem_id k0 = key.get(0);
+            double x = -1.0+k0 * spacing[0];
+            domain.getLastPos()[0] = x;//+ gaussian(rng);
+            mem_id k1 = key.get(1);
+            double y = -1.0+k1 * spacing[1];
+            domain.getLastPos()[1] = y;//+gaussian(rng);
+            // Here fill the function value
+            if (x>-1 && y>-1 && x<1 && y<1)
+            {
+            domain.template getLastProp<3>()[0] = (-y)*exp(-10*((x)*(x)+(y)*(y)));;
+            domain.template getLastProp<3>()[1] = (x)*exp(-10*((x)*(x)+(y)*(y)));;
+            }
+            else{
+                domain.template getLastProp<3>()[0] = 0.0;
+                domain.template getLastProp<3>()[1] = 0.0;
+            }
+            if (x==0.0 && y>-0.5 && y<0.5)
+            {
+                domain.template getLastProp<0>() = 1.0;
+            }
+            else
+            {
+                domain.template getLastProp<0>() = 0.0;
+            }
+            ++it;
+        }
+        BOOST_TEST_MESSAGE("Sync domain across processors...");
+
+        domain.map();
+        domain.ghost_get<0>();
+
+        //Derivative_x_gpu Dx(domain, 2, rCut);
+        Derivative_xx Dxx(domain, 2, rCut);
+        //Derivative_y_gpu Dy(domain, 2, rCut);
+        Derivative_yy Dyy(domain, 2, rCut);
+        auto C = getV<0>(domain);
+        auto V = getV<3>(domain);
+        auto Cnew = getV<1>(domain);
+        auto Pos = getV<PROP_POS>(domain);
+        timer tt;
+        //domain.write_frame("Convection_init",0);
+        int ctr=0;
+        double t=0,tf=1,dt=1e-2;
+        while(t<tf)
+        {
+            domain.write_frame("Convection",ctr);
+            domain.ghost_get<0>();
+            Cnew=C+dt*0.01*(Dxx(C)+Dyy(C));
+            C=Cnew;
+            Pos=Pos+dt*V;
+            domain.map();
+            domain.ghost_get<0>();
+            auto it2 = domain.getDomainIterator();
+            while (it2.isNext()) {
+                auto p = it2.get();
+                Point<2, double> xp = domain.getPos(p);
+                double x=xp[0],y=xp[1];
+                if (x>-1 && y>-1 && x<1 && y<1)
+                {
+                    domain.template getProp<3>(p)[0] = (-y)*exp(-10*((x)*(x)+(y)*(y)));
+                    domain.template getProp<3>(p)[1] = (x)*exp(-10*((x)*(x)+(y)*(y)));;
+                }
+                else{
+                    domain.template getProp<3>(p)[0] = 0.0;
+                    domain.template getProp<3>(p)[1] = 0.0;
+                }
+                ++it2;
+            }
+            tt.start();
+            Dxx.update(domain);
+            Dyy.update(domain);
+            tt.stop();
+            //std::cout<tt.getwct()<<""
+            ctr++;
+            t+=dt;
+        }
+
+        Dxx.deallocate(domain);
+        Dyy.deallocate(domain);
+
+       /*         std::cout<<"Dx"<<std::endl;
+        Dx.checkMomenta(domain);
+        std::cout<<"Dy"<<std::endl;
+        Dy.checkMomenta(domain);
+        std::cout<<"Dxx"<<std::endl;
+        Dxx.checkMomenta(domain);
+        int ctr=0;
+        P=0;
+        v=0;
+        K1=0;
+        auto its2 = domain.getDomainIterator();
+        while (its2.isNext()) {
+            auto p = its2.get();
+            Dx.DrawKernel<0>(domain, p.getKey());
+            Dy.DrawKernel<1>(domain, p.getKey());
+            Dxx.DrawKernel<2>(domain, p.getKey());
+            domain.write_frame("Kernels",ctr);
+            P=0;
+            v=0;
+            K1=0;
+            ++its2;
+            ctr++;
+        }*/
+
+    }
+
+
+
+BOOST_AUTO_TEST_SUITE_END()
+
+
+#endif
+#endif
+
+
+
diff --git a/src/DCPSE/DCPSE_op/tests/DCPSE_op_test_temporal.cu b/src/DCPSE/DCPSE_op/tests/DCPSE_op_test_temporal.cu
new file mode 100644
index 0000000..d87b665
--- /dev/null
+++ b/src/DCPSE/DCPSE_op/tests/DCPSE_op_test_temporal.cu
@@ -0,0 +1,260 @@
+/*
+ * DCPSE_op_test_temporal.cpp
+ *
+ *  Created on: Sep 15, 2020
+ *      Author: i-bird
+ */
+#include "config.h"
+#ifdef HAVE_EIGEN
+#ifdef HAVE_PETSC
+
+#ifndef DCPSE_OP_TEST_TEMPORAL_CPP_
+#define DCPSE_OP_TEST_TEMPORAL_CPP_
+
+#define BOOST_TEST_DYN_LINK
+
+#include <boost/test/unit_test.hpp>
+#include "Operators/Vector/vector_dist_operators.hpp"
+#include "../DCPSE_op.hpp"
+
+BOOST_AUTO_TEST_SUITE(temporal_test_suite_cu)
+    BOOST_AUTO_TEST_CASE(temporal_test)
+    {
+		size_t grd_sz = 12;
+		double boxsize = 10;
+		const size_t sz[3] = {grd_sz, grd_sz, grd_sz};
+		Box<3, double> box({0, 0, 0}, {boxsize, boxsize, boxsize});
+		size_t bc[3] = {NON_PERIODIC, NON_PERIODIC, NON_PERIODIC};
+		double Lx = box.getHigh(0);
+		double Ly = box.getHigh(1);
+		double Lz = box.getHigh(2);
+		double spacing = box.getHigh(0) / (sz[0] - 1);
+		double rCut = 3.9 * spacing;
+		double rCut2 = 3.9 * spacing;
+		int ord = 2;
+		int ord2 = 2;
+		double sampling_factor = 4.0;
+		double sampling_factor2 = 2.4;
+		Ghost<3, double> ghost(rCut);
+		auto &v_cl = create_vcluster();
+
+		vector_dist_gpu<3, double, aggregate<double,VectorS<3, double>,double[3][3],double,VectorS<3,double>,double[3][3]> > Particles(0, box, bc, ghost);
+
+		auto it = Particles.getGridIterator(sz);
+		while (it.isNext())
+		{
+			Particles.add();
+			auto key = it.get();
+			double x = key.get(0) * it.getSpacing(0);
+			Particles.getLastPos()[0] = x;
+			double y = key.get(1) * it.getSpacing(1);
+			Particles.getLastPos()[1] = y;
+			double z = key.get(2) * it.getSpacing(1);
+			Particles.getLastPos()[2] = z;
+			++it;
+		}
+
+		Particles.map();
+		Particles.ghost_get<0>();
+
+		constexpr int x = 0;
+		constexpr int y = 1;
+		constexpr int z = 2;
+
+
+		constexpr int sScalar = 0;
+		constexpr int sVector = 1;
+		constexpr int sTensor = 2;
+		constexpr int dScalar = 3;
+		constexpr int dVector = 4;
+		constexpr int dTensor = 5;
+		auto Pos = getV<PROP_POS>(Particles);
+		auto sS = getV<sScalar>(Particles);
+		auto sV = getV<sVector>(Particles);
+		auto sT = getV<sTensor>(Particles);
+		auto dS = getV<dScalar>(Particles);
+		auto dV = getV<dVector>(Particles);
+		auto dT = getV<dTensor>(Particles);
+
+		//Particles_subset.write("Pars");
+		Derivative_x_gpu Dx(Particles, ord, rCut, sampling_factor, support_options::RADIUS), Bulk_Dx(Particles, ord,
+																								 rCut, sampling_factor,
+																								 support_options::RADIUS);
+        texp_v<double> TVx,TdxVx;
+		texp_v<VectorS<3, double>> TV;
+        texp_v<double[3][3]> TT;
+
+
+		auto it3 = Particles.getDomainIterator();
+
+		sS = 5;
+		sV[0] = 1;
+		sV[1] = 2;
+		sV[2] = 3;
+		sT[0][0] = 1;
+		sT[0][1] = 2;
+		sT[0][2] = 3;
+		sT[1][0] = 4;
+		sT[1][1] = 5;
+		sT[1][2] = 6;
+		sT[2][0] = 7;
+		sT[2][1] = 8;
+		sT[2][2] = 9;
+        TVx=sS;
+        dS = TVx;
+
+        {
+		auto it3 = Particles.getDomainIterator();
+
+		bool match = true;
+		while (it3.isNext())
+		{
+			auto key = it3.get();
+
+			match &= Particles.template getProp<sScalar>(key) == Particles.template getProp<dScalar>(key);
+
+			++it3;
+		}
+
+		BOOST_REQUIRE_EQUAL(match,true);
+        }
+
+
+        TVx=sS*sS;
+        dS = TVx;
+
+        {
+            auto it3 = Particles.getDomainIterator();
+
+            bool match = true;
+            while (it3.isNext())
+            {
+                auto key = it3.get();
+
+                match &= Particles.template getProp<sScalar>(key)*Particles.template getProp<sScalar>(key) == Particles.template getProp<dScalar>(key);
+
+                ++it3;
+            }
+
+            BOOST_REQUIRE_EQUAL(match,true);
+        }
+
+		TVx=sV[0];
+		dS = TVx;
+
+        {
+		auto it3 = Particles.getDomainIterator();
+
+		bool match = true;
+		while (it3.isNext())
+		{
+			auto key = it3.get();
+
+			match &= Particles.template getProp<sVector>(key)[0] == Particles.template getProp<dScalar>(key);
+
+			++it3;
+		}
+		BOOST_REQUIRE_EQUAL(match,true);
+        }
+
+        TVx=sT[0][0];
+        dS = TVx;
+        {
+		auto it3 = Particles.getDomainIterator();
+
+		bool match = true;
+		while (it3.isNext())
+		{
+			auto key = it3.get();
+
+			match &= Particles.template getProp<sVector>(key)[0] == Particles.template getProp<dScalar>(key);
+
+			++it3;
+		}
+		BOOST_REQUIRE_EQUAL(match,true);
+        }
+
+        TV=sV;
+        dV=TV;
+
+        {
+            auto it3 = Particles.getDomainIterator();
+
+            bool match = true;
+            while (it3.isNext())
+            {
+                auto key = it3.get();
+
+                match &= Particles.template getProp<sVector>(key)[0] == Particles.template getProp<dVector>(key)[0];
+                match &= Particles.template getProp<sVector>(key)[1] == Particles.template getProp<dVector>(key)[1];
+                match &= Particles.template getProp<sVector>(key)[2] == Particles.template getProp<dVector>(key)[2];
+
+                ++it3;
+            }
+            BOOST_REQUIRE_EQUAL(match,true);
+        }
+
+        TV=0.5*sV+sV;
+        dV=TV;
+        //Pol_bulk = dPol + (0.5 * dt) * k1;
+        {
+            auto it3 = Particles.getDomainIterator();
+
+            bool match = true;
+            while (it3.isNext())
+            {
+                auto key = it3.get();
+                double x1=Particles.template getProp<sVector>(key)[0];
+                double y1=Particles.template getProp<dVector>(key)[0];
+                double x2=Particles.template getProp<sVector>(key)[1];
+                double y2=Particles.template getProp<dVector>(key)[1];
+                double x3=Particles.template getProp<sVector>(key)[2];
+                double y3=Particles.template getProp<dVector>(key)[2];
+
+                match &= 1.5*Particles.template getProp<sVector>(key)[0] == Particles.template getProp<dVector>(key)[0];
+                match &= 1.5*Particles.template getProp<sVector>(key)[1] == Particles.template getProp<dVector>(key)[1];
+                match &= 1.5*Particles.template getProp<sVector>(key)[2] == Particles.template getProp<dVector>(key)[2];
+
+                ++it3;
+            }
+            BOOST_REQUIRE_EQUAL(match,true);
+        }
+
+        TV=sV;
+        dV=pmul(TV,TV);
+        //Pol_bulk = dPol + (0.5 * dt) * k1;
+        {
+            auto it3 = Particles.getDomainIterator();
+
+            bool match = true;
+            while (it3.isNext())
+            {
+                auto key = it3.get();
+                double x1=Particles.template getProp<sVector>(key)[0];
+                double y1=Particles.template getProp<dVector>(key)[0];
+                double x2=Particles.template getProp<sVector>(key)[1];
+                double y2=Particles.template getProp<dVector>(key)[1];
+                double x3=Particles.template getProp<sVector>(key)[2];
+                double y3=Particles.template getProp<dVector>(key)[2];
+
+                match &= Particles.template getProp<sVector>(key)[0]*Particles.template getProp<sVector>(key)[0] == Particles.template getProp<dVector>(key)[0];
+                match &= Particles.template getProp<sVector>(key)[1]*Particles.template getProp<sVector>(key)[1] == Particles.template getProp<dVector>(key)[1];
+                match &= Particles.template getProp<sVector>(key)[2]*Particles.template getProp<sVector>(key)[2] == Particles.template getProp<dVector>(key)[2];
+
+                ++it3;
+            }
+            //THERE IS A BUG HERE IT IS SUMMING THE VECTORS.
+            BOOST_REQUIRE_EQUAL(match,true);
+        }
+
+/*
+        TdxVx=Dx(sV[x]);
+        TT[0][0] = Dx(sV[x]);*/
+
+    }
+
+BOOST_AUTO_TEST_SUITE_END()
+
+#endif /* DCPSE_OP_TEST_TEMPORAL_CPP_ */
+#endif
+#endif
\ No newline at end of file
diff --git a/src/DCPSE/Dcpse.cuh b/src/DCPSE/Dcpse.cuh
index 7fbae6c..6e41448 100644
--- a/src/DCPSE/Dcpse.cuh
+++ b/src/DCPSE/Dcpse.cuh
@@ -66,7 +66,7 @@ private:
     openfpm::vector_custd<T> localEpsInvPow; // Each MPI rank has just access to the local ones
     openfpm::vector_custd<T> calcKernels;
 
-    openfpm::vector<size_t> subsetKeyPid;
+    openfpm::vector_custd<size_t> subsetKeyPid;
 
     vector_type & particles;
     double rCut;
@@ -432,7 +432,7 @@ public:
 
         size_t localKey = subsetKeyPid.get(key.getKey());
         double eps = localEps.get(localKey);
-        double epsInvPow = localEpsInvPow(localKey);
+        double epsInvPow = localEpsInvPow.get(localKey);
 
         auto &particles = o1.getVector();
 
@@ -482,6 +482,99 @@ public:
         initializeStaticSize(particles, convergenceOrder, rCut, supportSizeFactor);
     }
 
+    /*
+     *
+     Save computed DCPSE coefficients to file
+     *
+     */
+    void save(const std::string &file){
+        auto & v_cl=create_vcluster();
+        size_t req = 0;
+
+        Packer<decltype(supportRefs),CudaMemory>::packRequest(supportRefs,req);
+        Packer<decltype(kerOffsets),CudaMemory>::packRequest(kerOffsets,req);
+        Packer<decltype(supportKeys1D),CudaMemory>::packRequest(supportKeys1D,req);
+        Packer<decltype(localEps),CudaMemory>::packRequest(localEps,req);
+        Packer<decltype(localEpsInvPow),CudaMemory>::packRequest(localEpsInvPow,req);
+        Packer<decltype(calcKernels),CudaMemory>::packRequest(calcKernels,req);
+        Packer<decltype(subsetKeyPid),CudaMemory>::packRequest(subsetKeyPid,req);
+
+        // allocate the memory
+        CudaMemory pmem;
+        ExtPreAlloc<CudaMemory> mem(req,pmem);
+
+        //Packing
+        Pack_stat sts;
+
+        Packer<decltype(supportRefs),CudaMemory>::pack(mem,supportRefs,sts);
+        Packer<decltype(kerOffsets),CudaMemory>::pack(mem,kerOffsets,sts);
+        Packer<decltype(supportKeys1D),CudaMemory>::pack(mem,supportKeys1D,sts);
+        Packer<decltype(localEps),CudaMemory>::pack(mem,localEps,sts);
+        Packer<decltype(localEpsInvPow),CudaMemory>::pack(mem,localEpsInvPow,sts);
+        Packer<decltype(calcKernels),CudaMemory>::pack(mem,calcKernels,sts);
+        Packer<decltype(subsetKeyPid),CudaMemory>::pack(mem,subsetKeyPid,sts);
+
+        // Save into a binary file
+        std::ofstream dump (file+"_"+std::to_string(v_cl.rank()), std::ios::out | std::ios::binary);
+        if (dump.is_open() == false)
+        {   std::cerr << __FILE__ << ":" << __LINE__ <<" Unable to write since dump is open at rank "<<v_cl.rank()<<std::endl;
+            return;
+            }
+        dump.write ((const char *)pmem.getPointer(), pmem.size());
+        return;
+    }
+
+    /*! \brief Load the DCPSE computations
+     *
+     *
+     */
+    void load(const std::string & file)
+    {
+        auto & v_cl=create_vcluster();
+        std::ifstream fs (file+"_"+std::to_string(v_cl.rank()), std::ios::in | std::ios::binary | std::ios::ate );
+        if (fs.is_open() == false)
+        {
+            std::cerr << __FILE__ << ":" << __LINE__ << " error, opening file: " << file << std::endl;
+            return;
+        }
+
+        // take the size of the file
+        size_t sz = fs.tellg();
+
+        fs.close();
+
+        // reopen the file without ios::ate to read
+        std::ifstream input (file+"_"+std::to_string(v_cl.rank()), std::ios::in | std::ios::binary );
+        if (input.is_open() == false)
+        {//some message here maybe
+            return;}
+
+        // Create the CudaMemory memory
+        size_t req = 0;
+        req += sz;
+        CudaMemory pmem;
+        ExtPreAlloc<CudaMemory> mem(req,pmem);
+
+        mem.allocate(pmem.size());
+
+        // read
+        input.read((char *)pmem.getPointer(), sz);
+
+        //close the file
+        input.close();
+
+        //Unpacking
+        Unpack_stat ps;
+
+        Unpacker<decltype(supportRefs),CudaMemory>::unpack(mem,supportRefs,ps);
+        Unpacker<decltype(kerOffsets),CudaMemory>::unpack(mem,kerOffsets,ps);
+        Unpacker<decltype(supportKeys1D),CudaMemory>::unpack(mem,supportKeys1D,ps);
+        Unpacker<decltype(localEps),CudaMemory>::unpack(mem,localEps,ps);
+        Unpacker<decltype(localEpsInvPow),CudaMemory>::unpack(mem,localEpsInvPow,ps);
+        Unpacker<decltype(calcKernels),CudaMemory>::unpack(mem,calcKernels,ps);
+        Unpacker<decltype(subsetKeyPid),CudaMemory>::unpack(mem,subsetKeyPid,ps);
+    }
+
 private:
 
     template <typename U>
-- 
GitLab