From edb2ea20dc9f8c79ccb5ca61c1b3c01b74e74d31 Mon Sep 17 00:00:00 2001
From: Pietro Incardona <i-bird@localhost.localdomain>
Date: Wed, 9 Mar 2016 07:33:19 -0500
Subject: [PATCH] Small changes in the PSE Kernels

---
 configure.ac                      |  31 +++++++-
 src/FiniteDifference/FDScheme.hpp |  10 +--
 src/Makefile.am                   |   8 +-
 src/Matrix/SparseMatrix_Eigen.hpp | 104 +++----------------------
 src/PSE/Kernels.hpp               | 124 +++++++++++++++---------------
 src/Solvers/umfpack_solver.hpp    |   4 +-
 src/main.cpp                      |   3 +
 7 files changed, 117 insertions(+), 167 deletions(-)

diff --git a/configure.ac b/configure.ac
index 798428c5..71bdbe29 100755
--- a/configure.ac
+++ b/configure.ac
@@ -15,6 +15,7 @@ m4_ifdef([ACX_MPI],,[m4_include([m4/acx_mpi.m4])])
 m4_ifdef([AX_OPENMP],,[m4_include([m4/ax_openmp.m4])])
 m4_ifdef([AX_CUDA],,[m4_include([m4/ax_cuda.m4])])
 m4_ifdef([IMMDX_LIB_METIS],,[m4_include([m4/immdx_lib_metis.m4])])
+m4_ifdef([IMMDX_LIB_PARMETIS],,[m4_include([m4/immdx_lib_parmetis.m4])])
 m4_ifdef([AX_BOOST_BASE],,[m4_include([m4/ax_boost_base.m4])])
 m4_ifdef([AX_BOOST_IOSTREAMS],,[m4_include([m4/ax_boost_iostreams.m4])])
 m4_ifdef([AX_BOOST_PROGRAM_OPTIONS],,[m4_include([m4/ax_boost_program_options.m4])])
@@ -23,8 +24,19 @@ m4_ifdef([AX_BLAS],,[m4_include([m4/ax_blas.m4])])
 m4_ifdef([AX_LAPACK],,[m4_include([m4/ax_lapack.m4])])
 m4_ifdef([AX_SUITESPARSE],,[m4_include([m4/ax_suitesparse.m4])])
 m4_ifdef([AX_EIGEN],,[m4_include([m4/ax_eigen.m4])])
+m4_ifdef([AX_LIB_HDF5],,[m4_include([m4/ax_lib_hdf5.m4])]])
+
+case $host_os in
+   *cygwin*)
+        # Do something specific for cygwin
+        CXXFLAGS+=" --std=gnu++11 "
+        ;;
+    *)
+        #Default Case
+        CXXFLAGS+=" --std=c++11 "
+        ;;
+esac
 
-CXXFLAGS+=" --std=c++11 "
 NVCCFLAGS=" "
 INCLUDES_PATH=" "
 
@@ -92,6 +104,23 @@ fi
 IMMDX_LIB_METIS([],[echo "Cannot detect metis, use the --with-metis option if it is not installed in the default location"
                     exit 201])
 
+## Check for parMetis
+
+IMMDX_LIB_PARMETIS([],[echo "Cannot detect parmetis, use the --with-parmetis option if it is not installed in the default location"
+                    exit 203])
+
+#########
+
+## Check for HDF5
+
+AX_LIB_HDF5([parallel])
+
+if test x"$with_hdf5" = x"no"; then
+    echo "Cannot detect hdf5, use the --with-hdf5 option if it is not installed in the default location"
+    exit 207
+fi
+
+
 ####### include OpenFPM_numerics include path"
 
 INCLUDES_PATH+=" -I/usr/local/include -I. -Iconfig -I../../openfpm_devices/src -I../../openfpm_data/src -I../../openfpm_io/src -I../../openfpm_vcluster/src -I../../src"
diff --git a/src/FiniteDifference/FDScheme.hpp b/src/FiniteDifference/FDScheme.hpp
index 5f1b58c8..a6df0e30 100644
--- a/src/FiniteDifference/FDScheme.hpp
+++ b/src/FiniteDifference/FDScheme.hpp
@@ -135,9 +135,6 @@ private:
 
 	typedef typename Sys_eqs::SparseMatrix_type::triplet_type triplet;
 
-	// Sparse Matrix
-	openfpm::vector<triplet> trpl;
-
 	openfpm::vector<typename Sys_eqs::stype> b;
 
 	// Domain Grid informations
@@ -226,6 +223,8 @@ private:
 	 */
 	void consistency()
 	{
+		openfpm::vector<triplet> & trpl = A.getMatrixTriplets();
+
 		// A and B must have the same rows
 		if (row != row_b)
 			std::cerr << "Error " << __FILE__ << ":" << __LINE__ << "the term B and the Matrix A for Ax=B must contain the same number of rows\n";
@@ -351,7 +350,7 @@ public:
 		// Counter
 		size_t cnt = 0;
 
-		// Create the re-mapping-grid
+		// Create the re-mapping grid
 		auto it = g_map.getDomainIterator();
 
 		while (it.isNext())
@@ -422,6 +421,8 @@ public:
 	 */
 	template<typename T> void impose(const T & op , typename Sys_eqs::stype num ,long int id ,grid_dist_iterator_sub<Sys_eqs::dims,typename g_map_type::d_grid> it_d, bool skip_first = false)
 	{
+		openfpm::vector<triplet> & trpl = A.getMatrixTriplets();
+
 		auto it = it_d;
 		grid_sm<Sys_eqs::dims,void> gs = g_map.getGridInfoVoid();
 
@@ -489,7 +490,6 @@ public:
 		consistency();
 #endif
 		A.resize(row,row);
-		A.setFromTriplets(trpl);
 
 		return A;
 
diff --git a/src/Makefile.am b/src/Makefile.am
index d569b00d..0a65adb5 100755
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -1,12 +1,12 @@
 
-LINKLIBS =  $(SUITESPARSE_LIBS) $(LAPACK_LIBS) $(BLAS_LIBS)  $(METIS_LIB) $(DEFAULT_LIB) $(PTHREAD_LIBS) $(OPT_LIBS) $(BOOST_LDFLAGS) $(BOOST_PROGRAM_OPTIONS_LIB) $(BOOST_IOSTREAMS_LIB)
+LINKLIBS =  $(SUITESPARSE_LIBS) $(LAPACK_LIBS) $(BLAS_LIBS)  $(METIS_LIB) $(DEFAULT_LIB) $(PTHREAD_LIBS) $(OPT_LIBS) $(BOOST_LDFLAGS) $(BOOST_PROGRAM_OPTIONS_LIB) $(BOOST_IOSTREAMS_LIB) $(HDF5_LDFLAGS)  $(HDF5_LIBS)
 
 noinst_PROGRAMS = numerics
 numerics_SOURCES = main.cpp ../../openfpm_vcluster/src/VCluster.cpp ../../openfpm_devices/src/memory/HeapMemory.cpp ../../openfpm_devices/src/memory/PtrMemory.cpp ../../openfpm_devices/src/Memleak_check.cpp
-numerics_CXXFLAGS = $(INCLUDES_PATH) $(BOOST_CPPFLAGS) $(SUITESPARSE_INCLUDE) $(METIS_INCLUDE) $(EIGEN_INCLUDE) -Wno-deprecated-declarations -Wno-unused-local-typedefs
+numerics_CXXFLAGS = $(HDF5_CPPFLAGS) -fext-numeric-literals $(INCLUDES_PATH) $(BOOST_CPPFLAGS) $(SUITESPARSE_INCLUDE) $(METIS_INCLUDE) $(EIGEN_INCLUDE) -Wno-deprecated-declarations -Wno-unused-local-typedefs
 numerics_CFLAGS = $(CUDA_CFLAGS)
-numerics_LDADD = $(LINKLIBS) -lmetis
-nobase_include_HEADERS = PSE/Kernels.hpp
+numerics_LDADD = $(LINKLIBS) -lmetis -lquadmath -lparmetis
+nobase_include_HEADERS = PSE/Kernels.hpp PSE/Kernels_test_util.hpp
 
 .cu.o :
 	$(NVCC) $(NVCCFLAGS) -o $@ -c $<
diff --git a/src/Matrix/SparseMatrix_Eigen.hpp b/src/Matrix/SparseMatrix_Eigen.hpp
index 43097c4a..706c818c 100644
--- a/src/Matrix/SparseMatrix_Eigen.hpp
+++ b/src/Matrix/SparseMatrix_Eigen.hpp
@@ -69,35 +69,7 @@ private:
 
 	Eigen::SparseMatrix<T,0,id_t> mat;
 	openfpm::vector<triplet_type> trpl;
-
-	/*! \brief Call-back to allocate buffer to receive incoming elements (particles)
-	 *
-	 * \param msg_i size required to receive the message from i
-	 * \param total_msg total size to receive from all the processors
-	 * \param total_p the total number of processor that want to communicate with you
-	 * \param i processor id
-	 * \param ri request id (it is an id that goes from 0 to total_p, and is unique
-	 *           every time message_alloc is called)
-	 * \param ptr a pointer to the vector_dist structure
-	 *
-	 * \return the pointer where to store the message for the processor i
-	 *
-	 */
-	template<typename triplet> static void * msg_alloc(size_t msg_i ,size_t total_msg, size_t total_p, size_t i, size_t ri, void * ptr)
-	{
-		openfpm::vector<openfpm::vector<triplet> *> * trpl = (openfpm::vector<openfpm::vector<triplet> *> *)ptr;
-
-		if (trpl == NULL)
-			std::cerr << __FILE__ << ":" << __LINE__ << " Internal error this processor is not suppose to receive\n";
-
-		// We need one more slot
-		trpl->add();
-
-		trpl->last()->resize(msg_i/sizeof(triplet));
-
-		// return the pointer
-		return trpl->last()->getPointer();
-	}
+	openfpm::vector<triplet_type> trpl_recv;
 
 	/*! \brief Assemble the matrix
 	 *
@@ -105,7 +77,7 @@ private:
 	 * \param mat Matrix to assemble
 	 *
 	 */
-	template<typename triplet, typename mat_impl> void assembleMatrix(openfpm::vector<openfpm::vector<triplet> *> & trpl, SparseMatrix<double,int,mat_impl> & mat)
+	void assemble()
 	{
 		Vcluster & vcl = *global_v_cluster;
 
@@ -113,38 +85,13 @@ private:
 		// we assemble the Matrix from the collected data
 		if (vcl.getProcessingUnits() != 1)
 		{
-			// count the total triplet we have
-			size_t tot_trpl = 0;
-
-			for (size_t i = 0 ; i < trpl.size() ; i++)
-				tot_trpl += trpl.get(i).size();
-
-			openfpm::vector<triplet> mat_t;
-			mat_t.resize(tot_trpl);
-
-			// id zero
-			size_t id = 0;
-
-			// Add all the received triplet in one array
-			for (size_t i = 0 ; i < trpl.size() ; i++)
-			{
-				for (size_t j = 0 ; j < trpl.get(i).size() ; j++)
-				{
-					mat_t.get(id) = trpl.get(i).get(j);
-					id++;
-				}
-			}
-
-			mat.setFromTriplets(mat_t);
+			collect();
+			// only master assemble the Matrix
+			if (vcl.getProcessUnitID() == 0)
+				mat.setFromTriplets(trpl_recv.begin(),trpl_recv.end());
 		}
 		else
-		{
-			//
-			if (trpl.size() != 1)
-				std::cerr << "Internal error: " << __FILE__ << ":" << __LINE__ << " in case of single processor we must have a single triplet set\n";
-
-			mat.setFromTriplets(*trpl.get(0));
-		}
+			mat.setFromTriplets(trpl.begin(),trpl.end());
 	}
 
 	/*! \brief Here we collect the full matrix on master
@@ -152,39 +99,12 @@ private:
 	 */
 	void collect()
 	{
-		Vcluster & vcl = global_v_cluster;
-
-		// If we are on master collect the information
-		if (vcl.getProcessUnitID() == 0)
-		{
-			// send buffer (master does not send anything) so send req and send_buf
-			// remain buffer with size 0
-			openfpm::vector<size_t> send_req;
-			openfpm::vector<openfpm::vector<triplet_type>> send_buf;
-
-			// for each processor we are going to receive a set of triplet
-			openfpm::vector<openfpm::vector<triplet_type>> trpl;
+		Vcluster & vcl = *global_v_cluster;
 
-			// Send and recv multiple messages
-			vcl.sendrecvMultipleMessagesNBX(send_req, send_buf,msg_alloc<triplet>,&trpl);
+		trpl_recv.clear();
 
-			assembleMatrix<triplet,Eigen::SparseMatrix<T,0,id_t>>(trpl);
-		}
-		else
-		{
-			// send buffer (master does not send anything) so send req and send_buf
-			// remain buffer with size 0
-			openfpm::vector<size_t> send_req;
-			send_req.add(0);
-			openfpm::vector<openfpm::vector<triplet_type> *> send_buf;
-			send_buf.add(&A);
-
-			// for each processor we are going to receive a set of triplet
-			openfpm::vector<openfpm::vector<triplet_type>> trpl;
-
-			// Send and recv multiple messages
-			vcl.sendrecvMultipleMessagesNBX(send_req, send_buf,msg_alloc<triplet_type>,NULL);
-		}
+		// here we collect all the triplet in one array on the root node
+		vcl.SGather(trpl,trpl_recv,0);
 	}
 
 public:
@@ -240,7 +160,6 @@ public:
 	const Eigen::SparseMatrix<T,0,id_t> & getMat() const
 	{
 		// Here we collect the information on master
-		collect();
 		assemble();
 
 		return mat;
@@ -253,7 +172,6 @@ public:
 	 */
 	Eigen::SparseMatrix<T,0,id_t> & getMat()
 	{
-		collect();
 		assemble();
 
 		return mat;
diff --git a/src/PSE/Kernels.hpp b/src/PSE/Kernels.hpp
index a3084375..7388e0ad 100644
--- a/src/PSE/Kernels.hpp
+++ b/src/PSE/Kernels.hpp
@@ -22,11 +22,11 @@
  *
  */
 template<unsigned int dim, typename T, unsigned int ord=2, unsigned int impl=KER_GAUSSIAN>
-struct Lap
+struct Lap_PSE
 {
 	T epsilon;
 
-	Lap(T epsilon)
+	Lap_PSE(T epsilon)
 	:epsilon(epsilon)
 	{}
 
@@ -36,7 +36,7 @@ struct Lap
 	 * \param y where we calculate the kernel
 	 *
 	 */
-	inline double value(T (&x)[dim], T (&y)[dim])
+	inline T value(T (&x)[dim], T (&y)[dim])
 	{
 		std::cerr << "Error " << __FILE__ << ":" << __LINE__ << " The laplacian for order:" << ord << " in dimension " << dim << " has not been implemented";
 		return 0.0;
@@ -48,7 +48,7 @@ struct Lap
 	 * \param y where we calculate the kernel
 	 *
 	 */
-	inline double value(T (&x)[dim], const Point<dim,T> & y)
+	inline T value(T (&x)[dim], const Point<dim,T> & y)
 	{
 		std::cerr << "Error " << __FILE__ << ":" << __LINE__ << " The laplacian for order:" << ord << " in dimension " << dim << " has not been implemented";
 		return 0.0;
@@ -60,7 +60,7 @@ struct Lap
 	 * \param y where we calculate the kernel
 	 *
 	 */
-	double value(const Point<dim,T> & x, T (&y)[dim])
+	inline T value(const Point<dim,T> & x, T (&y)[dim])
 	{
 		std::cerr << "Error " << __FILE__ << ":" << __LINE__ << " The laplacian for order:" << ord << " in dimension " << dim << " has not been implemented";
 		return 0.0;
@@ -72,7 +72,7 @@ struct Lap
 	 * \param y where we calculate the kernel
 	 *
 	 */
-	double value(const Point<dim,T> & x, const Point<dim,T> & y)
+	inline T value(const Point<dim,T> & x, const Point<dim,T> & y)
 	{
 		std::cerr << "Error " << __FILE__ << ":" << __LINE__ << " The laplacian for order:" << ord << " in dimension " << dim << " has not been implemented";
 		return 0.0;
@@ -80,11 +80,11 @@ struct Lap
 };
 
 template<typename T>
-struct Lap<1,T,2,KER_GAUSSIAN>
+struct Lap_PSE<1,T,2,KER_GAUSSIAN>
 {
 	T epsilon;
 
-	inline Lap(T epsilon)
+	inline Lap_PSE(T epsilon)
 	:epsilon(epsilon)
 	{}
 
@@ -94,14 +94,14 @@ struct Lap<1,T,2,KER_GAUSSIAN>
 	 * \param y where we calculate the kernel
 	 *
 	 */
-	inline double value(T (&x)[1], T (&y)[1])
+	inline T value(T (&x)[1], T (&y)[1])
 	{
-		double d = 0.0;
+		T d = 0.0;
 		for (size_t i = 0 ; i < 1 ; i++)
 			d += (x[i] - y[i]) * (x[i] - y[i]);
 		d = sqrt(d) / epsilon;
 
-		return 4.0 / epsilon / boost::math::constants::root_pi<T>() * exp(-d*d);
+		return T(4.0) / epsilon / boost::math::constants::root_pi<T>() * exp(-d*d);
 	}
 
 	/*! \brief From a kernel centered in x, it give the value of the kernel in y
@@ -110,14 +110,14 @@ struct Lap<1,T,2,KER_GAUSSIAN>
 	 * \param y where we calculate the kernel
 	 *
 	 */
-	inline double value(T (&x)[1], const Point<1,T> & y)
+	inline T value(T (&x)[1], const Point<1,T> & y)
 	{
-		double d = 0.0;
+		T d = 0.0;
 		for (size_t i = 0 ; i < 1 ; i++)
 			d += (x[i] - y.get(i)) * (x[i] - y.get(i));
 		d = sqrt(d) / epsilon;
 
-		return 4.0 / epsilon / boost::math::constants::root_pi<T>() * exp(-d*d);
+		return T(4.0) / epsilon / boost::math::constants::root_pi<T>() * exp(-d*d);
 	}
 
 	/*! \brief From a kernel centered in x, it give the value of the kernel in y
@@ -126,14 +126,14 @@ struct Lap<1,T,2,KER_GAUSSIAN>
 	 * \param y where we calculate the kernel
 	 *
 	 */
-	inline double value(const Point<1,T> & x, T (&y)[1])
+	inline T value(const Point<1,T> & x, T (&y)[1])
 	{
-		double d = 0.0;
+		T d = 0.0;
 		for (size_t i = 0 ; i < 1 ; i++)
 			d += (x.get(i) - y[i]) * (x.get(i) - y[i]);
 		d = sqrt(d) / epsilon;
 
-		return 4.0 / epsilon / boost::math::constants::root_pi<T>() * exp(-d*d);
+		return T(4.0) / epsilon / boost::math::constants::root_pi<T>() * exp(-d*d);
 	}
 
 	/*! \brief From a kernel centered in x, it give the value of the kernel in y
@@ -142,23 +142,23 @@ struct Lap<1,T,2,KER_GAUSSIAN>
 	 * \param y where we calculate the kernel
 	 *
 	 */
-	inline double value(const Point<1,T> & x, const Point<1,T> & y)
+	inline T value(const Point<1,T> & x, const Point<1,T> & y)
 	{
-		double d = 0.0;
+		T d = 0.0;
 		for (size_t i = 0 ; i < 1 ; i++)
 			d += (x.get(i) - y.get(i)) * (x.get(i) - y.get(i));
 		d = sqrt(d) / epsilon;
 
-		return 4.0 / epsilon / boost::math::constants::root_pi<T>() * exp(-d*d);
+		return T(4.0) / epsilon / boost::math::constants::root_pi<T>() * exp(-d*d);
 	}
 };
 
 template<typename T>
-struct Lap<1,T,4,KER_GAUSSIAN>
+struct Lap_PSE<1,T,4,KER_GAUSSIAN>
 {
 	T epsilon;
 
-	inline Lap(T epsilon)
+	inline Lap_PSE(T epsilon)
 	:epsilon(epsilon)
 	{}
 
@@ -168,14 +168,14 @@ struct Lap<1,T,4,KER_GAUSSIAN>
 	 * \param y where we calculate the kernel
 	 *
 	 */
-	inline double value(T (&x)[1], T (&y)[1])
+	inline T value(T (&x)[1], T (&y)[1])
 	{
-		double d = 0.0;
+		T d = 0.0;
 		for (size_t i = 0 ; i < 1 ; i++)
 			d += (x[i] - y[i]) * (x[i] - y[i]);
 		d = sqrt(d) / epsilon;
 
-		return d / epsilon / boost::math::constants::root_pi<T>() * exp(-d*d) * (10.0-4.0*d*d);
+		return T(1.0) / epsilon / boost::math::constants::root_pi<T>() * exp(-d*d) * (-4.0*d*d+10.0);
 	}
 
 	/*! \brief From a kernel centered in x, it give the value of the kernel in y
@@ -184,14 +184,14 @@ struct Lap<1,T,4,KER_GAUSSIAN>
 	 * \param y where we calculate the kernel
 	 *
 	 */
-	inline double value(T (&x)[1], const Point<1,T> & y)
+	inline T value(T (&x)[1], const Point<1,T> & y)
 	{
-		double d = 0.0;
+		T d = 0.0;
 		for (size_t i = 0 ; i < 1 ; i++)
 			d += (x[i] - y.get(i)) * (x[i] - y.get(i));
 		d = sqrt(d) / epsilon;
 
-		return d / epsilon / boost::math::constants::root_pi<T>() * exp(-d*d) * (10.0-4.0*d*d);
+		return T(1.0) / epsilon / boost::math::constants::root_pi<T>() * exp(-d*d) * (-4.0*d*d+10.0);
 	}
 
 	/*! \brief From a kernel centered in x, it give the value of the kernel in y
@@ -200,14 +200,14 @@ struct Lap<1,T,4,KER_GAUSSIAN>
 	 * \param y where we calculate the kernel
 	 *
 	 */
-	inline double value(const Point<1,T> & x, T (&y)[1])
+	inline T value(const Point<1,T> & x, T (&y)[1])
 	{
-		double d = 0.0;
+		T d = 0.0;
 		for (size_t i = 0 ; i < 1 ; i++)
 			d += (x.get(i) - y[i]) * (x.get(i) - y[i]);
 		d = sqrt(d) / epsilon;
 
-		return d / epsilon/ boost::math::constants::root_pi<T>() * exp(-d*d) * (10.0-4.0*d*d);
+		return T(1.0) / epsilon / boost::math::constants::root_pi<T>() * exp(-d*d) * (-4.0*d*d+10.0);
 	}
 
 	/*! \brief From a kernel centered in x, it give the value of the kernel in y
@@ -216,23 +216,23 @@ struct Lap<1,T,4,KER_GAUSSIAN>
 	 * \param y where we calculate the kernel
 	 *
 	 */
-	inline double value(const Point<1,T> & x, const Point<1,T> & y)
+	inline T value(const Point<1,T> & x, const Point<1,T> & y)
 	{
-		double d = 0.0;
+		T d = 0.0;
 		for (size_t i = 0 ; i < 1 ; i++)
 			d += (x.get(i) - y.get(i)) * (x.get(i) - y.get(i));
 		d = sqrt(d) / epsilon;
 
-		return d / epsilon / boost::math::constants::root_pi<T>() * exp(-d*d) * (10.0-4.0*d*d);
+		return T(1.0) / epsilon / boost::math::constants::root_pi<T>() * exp(-d*d) * (-4.0*d*d+10.0);
 	}
 };
 
 template<typename T>
-struct Lap<1,T,6,KER_GAUSSIAN>
+struct Lap_PSE<1,T,6,KER_GAUSSIAN>
 {
 	T epsilon;
 
-	inline Lap(T epsilon)
+	inline Lap_PSE(T epsilon)
 	:epsilon(epsilon)
 	{}
 
@@ -242,14 +242,14 @@ struct Lap<1,T,6,KER_GAUSSIAN>
 	 * \param y where we calculate the kernel
 	 *
 	 */
-	inline double value(T (&x)[1], T (&y)[1])
+	inline T value(T (&x)[1], T (&y)[1])
 	{
-		double d = 0.0;
+		T d = 0.0;
 		for (size_t i = 0 ; i < 1 ; i++)
 			d += (x[i] - y[i]) * (x[i] - y[i]);
 		d = sqrt(d) / epsilon;
 
-		return d / epsilon / boost::math::constants::root_pi<T>() * exp(-d*d) * (35.0/2.0-14.0*d*d+2.0*d*d*d*d);
+		return T(1.0) / epsilon / boost::math::constants::root_pi<T>() * exp(-d*d) * (2.0*d*d*d*d-14.0*d*d+35.0/2.0);
 	}
 
 	/*! \brief From a kernel centered in x, it give the value of the kernel in y
@@ -258,14 +258,14 @@ struct Lap<1,T,6,KER_GAUSSIAN>
 	 * \param y where we calculate the kernel
 	 *
 	 */
-	inline double value(T (&x)[1], const Point<1,T> & y)
+	inline T value(T (&x)[1], const Point<1,T> & y)
 	{
-		double d = 0.0;
+		T d = 0.0;
 		for (size_t i = 0 ; i < 1 ; i++)
 			d += (x[i] - y.get(i)) * (x[i] - y.get(i));
 		d = sqrt(d) / epsilon;
 
-		return d / epsilon / boost::math::constants::root_pi<T>() * exp(-d*d) * (35.0/2.0-14.0*d*d+2.0*d*d*d*d);
+		return T(1.0) / epsilon / boost::math::constants::root_pi<T>() * exp(-d*d) * (2.0*d*d*d*d-14.0*d*d+35.0/2.0);
 	}
 
 	/*! \brief From a kernel centered in x, it give the value of the kernel in y
@@ -274,14 +274,14 @@ struct Lap<1,T,6,KER_GAUSSIAN>
 	 * \param y where we calculate the kernel
 	 *
 	 */
-	inline double value(const Point<1,T> & x, T (&y)[1])
+	inline T value(const Point<1,T> & x, T (&y)[1])
 	{
-		double d = 0.0;
+		T d = 0.0;
 		for (size_t i = 0 ; i < 1 ; i++)
 			d += (x.get(i) - y[i]) * (x.get(i) - y[i]);
 		d = sqrt(d) / epsilon;
 
-		return d / epsilon / boost::math::constants::root_pi<T>() * exp(-d*d) * (35.0/2.0-14.0*d*d+2.0*d*d*d*d);
+		return T(1.0) / epsilon / boost::math::constants::root_pi<T>() * exp(-d*d) * (2.0*d*d*d*d-14.0*d*d+35.0/2.0);
 	}
 
 	/*! \brief From a kernel centered in x, it give the value of the kernel in y
@@ -290,23 +290,23 @@ struct Lap<1,T,6,KER_GAUSSIAN>
 	 * \param y where we calculate the kernel
 	 *
 	 */
-	inline double value(const Point<1,T> & x, const Point<1,T> & y)
+	inline T value(const Point<1,T> & x, const Point<1,T> & y)
 	{
-		double d = 0.0;
+		T d = 0.0;
 		for (size_t i = 0 ; i < 1 ; i++)
 			d += (x.get(i) - y.get(i)) * (x.get(i) - y.get(i));
 		d = sqrt(d) / epsilon;
 
-		return d / epsilon / boost::math::constants::root_pi<T>() * exp(-d*d) * (35.0/2.0-14.0*d*d+2.0*d*d*d*d);
+		return T(1.0) / epsilon / boost::math::constants::root_pi<T>() * exp(-d*d) * (2.0*d*d*d*d-14.0*d*d+35.0/2.0);
 	}
 };
 
 template<typename T>
-struct Lap<1,T,8,KER_GAUSSIAN>
+struct Lap_PSE<1,T,8,KER_GAUSSIAN>
 {
 	T epsilon;
 
-	inline Lap(T epsilon)
+	inline Lap_PSE(T epsilon)
 	:epsilon(epsilon)
 	{}
 
@@ -316,14 +316,14 @@ struct Lap<1,T,8,KER_GAUSSIAN>
 	 * \param y where we calculate the kernel
 	 *
 	 */
-	inline double value(T (&x)[1], T (&y)[1])
+	inline T value(T (&x)[1], T (&y)[1])
 	{
-		double d = 0.0;
+		T d = 0.0;
 		for (size_t i = 0 ; i < 1 ; i++)
 			d += (x[i] - y[i]) * (x[i] - y[i]);
 		d = sqrt(d) / epsilon;
 
-		return d / epsilon / boost::math::constants::root_pi<T>() * exp(-d*d) * (105.0/4.0-63.0/2.0*d*d+9.0*d*d*d*d-2.0/3.0*d*d*d*d*d*d);
+		return T(1.0) / epsilon / boost::math::constants::root_pi<T>() * exp(-d*d) * (-T(2.0)/T(3.0)*d*d*d*d*d*d+9.0*d*d*d*d-63.0/2.0*d*d+105.0/4.0);
 	}
 
 	/*! \brief From a kernel centered in x, it give the value of the kernel in y
@@ -332,14 +332,14 @@ struct Lap<1,T,8,KER_GAUSSIAN>
 	 * \param y where we calculate the kernel
 	 *
 	 */
-	inline double value(T (&x)[1], const Point<1,T> & y)
+	inline T value(T (&x)[1], const Point<1,T> & y)
 	{
-		double d = 0.0;
+		T d = 0.0;
 		for (size_t i = 0 ; i < 1 ; i++)
 			d += (x[i] - y.get(i)) * (x[i] - y.get(i));
 		d = sqrt(d) / epsilon;
 
-		return d / epsilon / boost::math::constants::root_pi<T>() * exp(-d*d) * (105.0/4.0-63.0/2.0*d*d+9.0*d*d*d*d-2.0/3.0*d*d*d*d*d*d);
+		return T(1.0) / epsilon / boost::math::constants::root_pi<T>() * exp(-d*d) * (-T(2.0)/T(3.0)*d*d*d*d*d*d+9.0*d*d*d*d-63.0/2.0*d*d+105.0/4.0);
 	}
 
 	/*! \brief From a kernel centered in x, it give the value of the kernel in y
@@ -348,14 +348,14 @@ struct Lap<1,T,8,KER_GAUSSIAN>
 	 * \param y where we calculate the kernel
 	 *
 	 */
-	inline double value(const Point<1,T> & x, T (&y)[1])
+	inline T value(const Point<1,T> & x, T (&y)[1])
 	{
-		double d = 0.0;
+		T d = 0.0;
 		for (size_t i = 0 ; i < 1 ; i++)
 			d += (x.get(i) - y[i]) * (x.get(i) - y[i]);
 		d = sqrt(d) / epsilon;
 
-		return d / epsilon / boost::math::constants::root_pi<T>() * exp(-d*d) * (105.0/4.0-63.0/2.0*d*d+9.0*d*d*d*d-2.0/3.0*d*d*d*d*d*d);
+		return T(1.0) / epsilon / boost::math::constants::root_pi<T>() * exp(-d*d) * (-T(2.0)/T(3.0)*d*d*d*d*d*d+9.0*d*d*d*d-63.0/2.0*d*d+105.0/4.0);
 	}
 
 	/*! \brief From a kernel centered in x, it give the value of the kernel in y
@@ -364,14 +364,14 @@ struct Lap<1,T,8,KER_GAUSSIAN>
 	 * \param y where we calculate the kernel
 	 *
 	 */
-	inline double value(const Point<1,T> & x, const Point<1,T> & y)
+	inline T value(const Point<1,T> & x, const Point<1,T> & y)
 	{
-		double d = 0.0;
+		T d = 0.0;
 		for (size_t i = 0 ; i < 1 ; i++)
 			d += (x.get(i) - y.get(i)) * (x.get(i) - y.get(i));
 		d = sqrt(d) / epsilon;
 
-		return d / epsilon / boost::math::constants::root_pi<T>() * exp(-d*d) * (105.0/4.0-63.0/2.0*d*d+9.0*d*d*d*d-2.0/3.0*d*d*d*d*d*d);
+		return T(1.0) / epsilon / boost::math::constants::root_pi<T>() * exp(-d*d) * (-T(2.0)/T(3.0)*d*d*d*d*d*d+9.0*d*d*d*d-63.0/2.0*d*d+105.0/4.0);
 	}
 };
 
diff --git a/src/Solvers/umfpack_solver.hpp b/src/Solvers/umfpack_solver.hpp
index 24b58df6..826343cd 100644
--- a/src/Solvers/umfpack_solver.hpp
+++ b/src/Solvers/umfpack_solver.hpp
@@ -44,7 +44,7 @@ public:
 	 *	\tparam impl Implementation of the SparseMatrix
 	 *
 	 */
-	template<typename impl> static Vector<double> solve(const SparseMatrix<double,int,impl> & A, const Vector<double> & b, size_t opt = UMFPACK_NONE)
+	template<typename impl> static Vector<double> solve(SparseMatrix<double,int,impl> & A, const Vector<double> & b, size_t opt = UMFPACK_NONE)
 	{
 		Vcluster & v_cl = *global_v_cluster;
 
@@ -80,7 +80,7 @@ public:
 			}
 
 			// Vector is only on master, scatter back the information
-			x.sync();
+			x.scatter();
 		}
 		return x;
 	}
diff --git a/src/main.cpp b/src/main.cpp
index edcbe04c..57fb816d 100755
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -2,9 +2,12 @@
 #define BOOST_TEST_MODULE "C++ test module for OpenFPM_numerics project"
 #include <boost/test/included/unit_test.hpp>
 
+#include "unit_test_init_cleanup.hpp"
+
 #include "Equations/eq_unit_tests.hpp"
 #include "FiniteDifference/FDScheme_unit_tests.hpp"
 #include "FiniteDifference/util/common_test.hpp"
 #include "FiniteDifference/eq_unit_test.hpp"
 #include "FiniteDifference/eq_unit_test_3d.hpp"
 #include "util/util_num_unit_tests.hpp"
+#include "PSE/Kernels_unit_tests.hpp"
-- 
GitLab