diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..f50fedc563f51a776cb48d242fcdfc187bb0c5ef
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,74 @@
+# Compiled source #
+###################
+*.com
+*.class
+*.dll
+*.exe
+*.o
+*.so
+
+# Packages #
+############
+# it's better to unpack these files and commit the raw source
+# git has its own built in compression methods
+*.7z
+*.dmg
+*.gz
+*.iso
+*.jar
+*.rar
+*.tar
+*.zip
+
+# Logs and databases #
+######################
+*.log
+*.sql
+*.sqlite
+
+# OS generated files #
+######################
+.DS_Store
+.DS_Store?
+._*
+.Spotlight-V100
+.Trashes
+ehthumbs.db
+Thumbs.db
+
+###### Other
+
+*.vtk
+AUTHORS
+COPYING
+INSTALL
+NEWS
+README
+**/vtk/Makefile
+**/src/Makefile
+./Makefile
+Makefile.in
+config.status
+configure
+numerics
+**/.deps
+**/src/config
+aclocal.m4
+**/autom4te.cache
+example.mk
+src/pdata
+vtk/cart_dec
+vtk/dom_box
+vtk/metis_dec
+.autotools
+.cproject
+.project
+.settings
+ar-lib
+compile
+config.guess
+config.sub
+depcomp
+install-sh
+missing
+install_dir
diff --git a/.gitmodules b/.gitmodules
index 34adb2b886fec12365f1658f98c77d737a5a3d1a..6e3fd50b51812a1f685daf8fec649be368904e06 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,15 +1,15 @@
 [submodule "openfpm_vcluster"]
 	path = openfpm_vcluster
-	url = ssh://git@ppmcore.mpi-cbg.de:/incardon/openfpm_vcluster.git
+	url = ssh://git@ppmcore.mpi-cbg.de/incardon/openfpm_vcluster.git
 [submodule "openfpm_devices"]
 	path = openfpm_devices
-	url = ssh://git@ppmcore.mpi-cbg.de:/incardon/openfpm_devices.git
+	url = ssh://git@ppmcore.mpi-cbg.de/incardon/openfpm_devices.git
 [submodule "openfpm_io"]
 	path = openfpm_io
-	url = ssh://git@ppmcore.mpi-cbg.de:/incardon/openfpm_io.git
+	url = ssh://git@ppmcore.mpi-cbg.de/incardon/openfpm_io.git
 [submodule "openfpm_data"]
 	path = openfpm_data
-	url = ssh://git@ppmcore.mpi-cbg.de:/incardon/openfpm_data.git
+	url = ssh://git@ppmcore.mpi-cbg.de/incardon/openfpm_data.git
 [submodule "openfpm_numerics"]
 	path = openfpm_numerics
-	url = ssh://git@ppmcore.mpi-cbg.de:/incardon/openfpm_numerics.git
+	url = ssh://git@ppmcore.mpi-cbg.de/incardon/openfpm_numerics.git
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000000000000000000000000000000000000..54f0e06a995be1881ecfb4a5faa00bb3d713ed79
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,33 @@
+# Change Log
+All notable changes to this project will be documented in this file.
+
+## [0.2.0] - 2015-02-17
+### Added
+- PSE 1D example with multiple precision
+- Plot example for GoogleChart plotting
+- Distributed data structure now support 128bit floating point precision (on Beta)
+- OpenFPM support for Microsoft Windows (Cygwin) compilation
+
+### Fixed
+- Detection 32 bit system and report as an error
+
+### Changed
+- Nothing to report
+
+## [0.1.0] - 2015-02-05
+### Added
+- PSE 1D example
+- Cell list example
+- Verlet list example
+- Kickstart for OpenFPM_numeric
+- Automated dependency installation for SUITESPRASE EIGEN OPENBLAS(LAPACK)
+
+
+### Fixed
+- CRITICAL BUG in periodic bondary condition
+- BOOST auto updated to 1.60
+- Compilation with multiple .cpp files
+
+### Changed
+- Nothing to report
+
diff --git a/Makefile.am b/Makefile.am
index 2a675a02ee48361d4af1c61b67be361fa39e71af..a6fbc5374b3fd1f026c8d0ad28d5b5b393c69a7d 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -1,3 +1,3 @@
-SUBDIRS = src vtk openfpm_data openfpm_io openfpm_devices openfpm_vcluster
+SUBDIRS = src vtk openfpm_data openfpm_io openfpm_devices openfpm_vcluster openfpm_numerics
 
 bin_PROGRAMS = 
diff --git a/build_pdata.sh b/build_pdata.sh
index 8db1260f876aa66e07785e70556fcae067a9dd88..71304560947aa150b6b4d525338e8678c1c73c97 100644
--- a/build_pdata.sh
+++ b/build_pdata.sh
@@ -1,6 +1,6 @@
 #! /bin/bash
 
-# Make a directory in /tmp/OpenFPM_data
+# Make a directory in /tmp/OpenFPM_pdata
 
 echo "Directory: $1"
 echo "Machine: $2"
@@ -19,6 +19,8 @@ if [ $? -ne 0 ]; then
   exit 1
 fi
 
+mkdir openfpm_numerics/src/config
+
 # pull from all the projects
 cd openfpm_data
 git checkout develop
@@ -57,12 +59,10 @@ if [ $? -ne 0 ]; then
 fi
 cd ..
 
-
 if [ "$2" == "gin" ]
 then
  echo "Compiling on gin\n"
  source ~/.bashrc
- sh ./autogen.sh
  module load gcc/4.9.2
  ./install -s -c "--prefix=/home/jenkins/openfpm_install"
  make
@@ -218,38 +218,38 @@ then
  module load boost/1.56.0-gnu4.9.1
  module unload gcc/4.9.1
  module load gcc/4.9.3
- module load openmpi/1.8.7
+ module load openmpi/1.8.8-gnu
  module unload bullxmpi
  module load metis/5.1.0
 
- ./install -s -c"--with-metis=$METIS_ROOT --with-boost=$BOOST_ROOT CXX=mpic++"
+ ./install -s -c"--with-metis=/sw/global/libraries/metis/5.1.0/x86_64/ --with-boost=/sw/taurus/libraries/boost/1.56.0-gnu4.9.1 CXX=mpic++"
  make
  if [ $? -ne 0 ]; then
    curl -X POST --data "payload={\"icon_emoji\": \":jenkins:\", \"username\": \"jenkins\"  , \"attachments\":[{ \"title\":\"Error:\", \"color\": \"#FF0000\", \"text\":\"$2 failed to complete the openfpm_pdata test \" }] }" https://hooks.slack.com/services/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
    exit 1 ; 
  fi
 
- salloc --nodes=1 --ntasks-per-node=16 --time=00:10:00 --mem-per-cpu=1900 --partition=haswell bash -c "ulimit -s unlimited && mpirun -np 16 src/pdata"
+ salloc --nodes=1 --ntasks-per-node=24 --exclude=taurusi[6300-6400],taurusi[5400-5500] --time=00:5:00 --mem-per-cpu=1900 --partition=haswell bash -c "ulimit -s unlimited && mpirun -np 24 src/pdata --report_level=no"
  if [ $? -ne 0 ]; then
    curl -X POST --data "payload={\"icon_emoji\": \":jenkins:\", \"username\": \"jenkins\"  , \"attachments\":[{ \"title\":\"Error:\", \"color\": \"#FF0000\", \"text\":\"$2 failed to complete the openfpm_pdata test \" }] }" https://hooks.slack.com/services/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
    exit 1 ; 
  fi
- salloc --nodes=2 --ntasks-per-node=16 --time=00:10:00 --mem-per-cpu=1900 --partition=haswell bash -c "ulimit -s unlimited && mpirun -np 32 src/pdata"
+ salloc --nodes=2 --ntasks-per-node=24 --exclude=taurusi[6300-6400],taurusi[5400-5500] --time=00:5:00 --mem-per-cpu=1900 --partition=haswell bash -c "ulimit -s unlimited && mpirun -np 48 src/pdata --report_level=no"
  if [ $? -ne 0 ]; then
    curl -X POST --data "payload={\"icon_emoji\": \":jenkins:\", \"username\": \"jenkins\"  , \"attachments\":[{ \"title\":\"Error:\", \"color\": \"#FF0000\", \"text\":\"$2 failed to complete the openfpm_pdata test \" }] }" https://hooks.slack.com/services/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
    exit 1 ; 
  fi
- salloc --nodes=4 --ntasks-per-node=16 --time=00:10:00 --mem-per-cpu=1900 --partition=haswell bash -c "ulimit -s unlimited && mpirun -np 64 src/pdata"
+ salloc --nodes=4 --ntasks-per-node=24 --exclude=taurusi[6300-6400],taurusi[5400-5500] --time=00:5:00 --mem-per-cpu=1900 --partition=haswell bash -c "ulimit -s unlimited && mpirun -np 96 src/pdata --report_level=no"
  if [ $? -ne 0 ]; then 
    curl -X POST --data "payload={\"icon_emoji\": \":jenkins:\", \"username\": \"jenkins\"  , \"attachments\":[{ \"title\":\"Error:\", \"color\": \"#FF0000\", \"text\":\"$2 failed to complete the openfpm_pdata test \" }] }" https://hooks.slack.com/services/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
    exit 1 ; 
  fi
- salloc --nodes=8 --ntasks-per-node=16 --time=00:10:00 --mem-per-cpu=1900 --partition=haswell bash -c "ulimit -s unlimited && mpirun -np 128 src/pdata"
+ salloc --nodes=8 --ntasks-per-node=24 --exclude=taurusi[6300-6400],taurusi[5400-5500] --time=00:5:00 --mem-per-cpu=1900 --partition=haswell bash -c "ulimit -s unlimited && mpirun -np 192 src/pdata --report_level=no"
  if [ $? -ne 0 ]; then
    curl -X POST --data "payload={\"icon_emoji\": \":jenkins:\", \"username\": \"jenkins\"  , \"attachments\":[{ \"title\":\"Error:\", \"color\": \"#FF0000\", \"text\":\"$2 failed to complete the openfpm_pdata test \" }] }" https://hooks.slack.com/services/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
    exit 1 ; 
  fi
- salloc --nodes=16 --ntasks-per-node=16 --time=00:10:00 --mem-per-cpu=1900 --partition=haswell bash -c "ulimit -s unlimited && mpirun -np 256 src/pdata"
+ salloc --nodes=10 --ntasks-per-node=24 --exclude=taurusi[6300-6400],taurusi[5400-5500] --time=00:5:00 --mem-per-cpu=1900 --partition=haswell bash -c "ulimit -s unlimited && mpirun -np 240 src/pdata --report_level=no"
  if [ $? -ne 0 ]; then
    curl -X POST --data "payload={\"icon_emoji\": \":jenkins:\", \"username\": \"jenkins\"  , \"attachments\":[{ \"title\":\"Error:\", \"color\": \"#FF0000\", \"text\":\"$2 failed to complete the openfpm_pdata test \" }] }" https://hooks.slack.com/services/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
    exit 1 ; 
@@ -258,7 +258,6 @@ then
 else
  echo "Compiling general"
  source ~/.bashrc
- sh ./autogen.sh
  ./install -s
 
  mpirun -np 2 ./src/pdata
diff --git a/configure.ac b/configure.ac
index 4da0b7c59d1367a2b29eabd789dcbfc16fbe24c0..1b9c8738b693a0b9c61548410b724567c46d5186 100644
--- a/configure.ac
+++ b/configure.ac
@@ -7,7 +7,7 @@ AC_PREREQ(2.59)
 AC_INIT(FULL-PACKAGE-NAME, VERSION, BUG-REPORT-ADDRESS)
 AC_CANONICAL_SYSTEM
 AC_CONFIG_SRCDIR([src/main.cpp])
-AC_CONFIG_SUBDIRS([openfpm_data openfpm_devices openfpm_vcluster openfpm_io])
+AC_CONFIG_SUBDIRS([openfpm_data openfpm_devices openfpm_vcluster openfpm_io openfpm_numerics])
 
 #### Adding --with-pdata option and openfpm_pdata to prefix folder
 
@@ -25,20 +25,42 @@ ac_configure_args="$ac_configure_args --with-pdata=../../src"
 AM_INIT_AUTOMAKE
 AC_CONFIG_HEADER([src/config/config.h])
 m4_ifdef([ACX_PTHREAD],,[m4_include([m4/acx_pthread.m4])])
-m4_ifdef([AX_BOOST],,[m4_include([m4/ax_boost.m4])])
 m4_ifdef([ACX_MPI],,[m4_include([m4/acx_mpi.m4])])
 m4_ifdef([AX_OPENMP],,[m4_include([m4/ax_openmp.m4])])
 m4_ifdef([AX_CUDA],,[m4_include([m4/ax_cuda.m4])])
 m4_ifdef([IMMDX_LIB_METIS],,[m4_include([m4/immdx_lib_metis.m4])])
 m4_ifdef([IMMDX_LIB_PARMETIS],,[m4_include([m4/immdx_lib_parmetis.m4])])
+m4_ifdef([AX_BOOST_BASE],,[m4_include([m4/ax_boost_base.m4])])
+m4_ifdef([AX_BOOST_IOSTREAMS],,[m4_include([m4/ax_boost_iostreams.m4])])
+m4_ifdef([AX_BOOST_PROGRAM_OPTIONS],,[m4_include([m4/ax_boost_program_options.m4])])
+m4_ifdef([AX_BOOST_UNIT_TEST_FRAMEWORK],,[m4_include([m4/ax_boost_unit_test_framework.m4])])
+m4_ifdef([AX_BLAS],,[m4_include([m4/ax_blas.m4])])
+m4_ifdef([AX_LAPACK],,[m4_include([m4/ax_lapack.m4])])
+m4_ifdef([AX_SUITESPARSE],,[m4_include([m4/ax_suitesparse.m4])])
+m4_ifdef([AX_EIGEN],,[m4_include([m4/ax_eigen.m4])])
+
+
+case $host_os in
+   *cygwin*)
+        # Do something specific for cygwin
+        CXXFLAGS+=" --std=gnu++11 "
+        ;;
+    *)
+        #Default Case
+        CXXFLAGS+=" --std=c++11 "
+        ;;
+esac
 
-CXXFLAGS+=" --std=c++11 "
 NVCCFLAGS=" "
 INCLUDES_PATH=" "
 
 # Create a file with the install base folder
 echo "$base" > install_dir
 
+# Needed for build library
+AC_PROG_RANLIB
+AM_PROG_AR
+
 # Checks for programs.
 AC_PROG_CXX
 
@@ -82,29 +104,32 @@ else
 	NVCCFLAGS+="$NVCCFLAGS -O3 "
 fi
 
-## Check for memcheck
+#########
+
+## Check for Metis
+
+IMMDX_LIB_METIS([],[echo "Cannot detect metis, use the --with-metis option if it is not installed in the default location"
+                    exit 201])
+
+########
 
-AC_MSG_CHECKING(whether to build with memcheck capabilities)
-AC_ARG_ENABLE(memcheck,
+## Enable scan coverty
+
+AC_MSG_CHECKING(whether to build for scan coverty compilation)
+AC_ARG_ENABLE(scan-coverty,
         AC_HELP_STRING(
-            [--enable-memcheck],
-            [enable memory check (def=no)]
+            [--enable-scan-coverty],
+            [enable scan-coverty compilation (def=no)]
         ),
-        memcheck="$enableval"
+        scancoverty="$enableval"
 )
 
-AC_MSG_RESULT($memcheck)
+AC_MSG_RESULT($scancoverty)
 
-if test x"$memcheck" = x"yes"; then
-        AC_DEFINE([MEMLEAK_CHECK],[],[Memory check, corruption and leak])
+if test x"$scancoverty" = x"yes"; then
+        AC_DEFINE([COVERTY_SCAN],[],[Compile for coverty scan])
 fi
 
-#########
-
-## Check for Metis
-
-IMMDX_LIB_METIS([],[echo "Cannot detect metis, use the --with-metis option if it is not installed in the default location"
-                    exit 201])
 
 ## Check for parMetis
 
@@ -113,7 +138,7 @@ IMMDX_LIB_PARMETIS([],[echo "Cannot detect parmetis, use the --with-parmetis opt
 
 ####### include OpenFPM_devices include path
 
-INCLUDES_PATH+="-I. -Isrc/config/ -I../openfpm_io/src -I../openfpm_data/src -I../openfpm_devices/src -I../openfpm_vcluster/src/"
+INCLUDES_PATH+="-I. -Iconfig/ -I../openfpm_io/src -I../openfpm_data/src -I../openfpm_devices/src -I../openfpm_vcluster/src/"
 
 ###### Check for se-class1
 
@@ -190,8 +215,60 @@ fi
 
 ##### CHECK FOR BOOST ##############
 
-AX_BOOST([1.52],[],[echo "boost not found"
+AX_BOOST_BASE([1.52],[],[echo "boost not found"
                     exit 202])
+AX_BOOST_UNIT_TEST_FRAMEWORK
+AX_BOOST_PROGRAM_OPTIONS
+AX_BOOST_IOSTREAMS
+
+if test x"$ax_cv_boost_unit_test_framework" = x"no"; then
+  echo "Notify boost not usable"
+  exit 202
+fi
+
+if test x"$ax_cv_boost_iostreams" = x"no"; then
+  echo "Notify boost not usable"
+  exit 202
+fi
+
+if test x"$ax_cv_boost_programs_options" = x"no"; then
+  echo "Notify boost not usable"
+  exit 202
+fi
+
+### Unfortunately a lot of linux distros install a pretty old MPI in the system wide folder,
+### override such MPI with the installed one is extremely difficult and tricky, because we want
+### to include "some" system library but exclude mpi. One possibility is to give specifically
+### the wanted libmpi.so directly to the linker. But this is not possible because this lib is
+### given by mpic++ in the form -L/path/to/mpi -lmpi, the other is completely eliminate every
+### -L with a system default library
+###
+
+# eliminate any /usr/lib and and /usr/include from $BOOST_LIB and $BOOST_INCLUDE
+
+BOOST_LDFLAGS=$(echo "$BOOST_LDFLAGS" | sed -e 's/ -L\/usr\/lib64[ \b]//g' | sed -e 's/ -L\/usr\/lib[ \b]//g')
+BOOST_CPPFLAGS=$(echo "$BOOST_CPPFLAGS" | sed -e 's/-I\/usr\/include[ \b]//g')
+
+AC_SUBST(BOOST_LDFLAGS)
+AC_SUBST(BOOST_CPPFLAGS)
+
+###### Checking for OpenBLAS
+
+AX_BLAS([],[echo "blas not found"
+                    exit 204])
+
+AX_LAPACK([],[echo "lapack not found"
+                    exit 204])
+
+###### Checking for SUITESPARSE
+
+AX_SUITESPARSE([],[echo "suitesparse not found"
+                    exit 205])
+
+###### Checking for EIGEN
+
+AX_EIGEN([],[echo "eigen not found"
+                    exit 206])
 
 ####### Checking for GPU support
 
diff --git a/example/Grid/0_simple/main.cpp b/example/Grid/0_simple/main.cpp
index b725c8bf2d54aa72741d9166d66aace939f605d0..deeb3f39667c588834bc3c870d35f503a31a98d0 100644
--- a/example/Grid/0_simple/main.cpp
+++ b/example/Grid/0_simple/main.cpp
@@ -1,5 +1,5 @@
 #include "Grid/grid_dist_id.hpp"
-#include "data_type/scalar.hpp"
+#include "data_type/aggregate.hpp"
 #include "Decomposition/CartDecomposition.hpp"
 
 /*
@@ -26,9 +26,9 @@ int main(int argc, char* argv[])
 	//
 	// ### WIKI 3 ###
 	//
-	// Create several object needed later, in particular
+	// Create
 	// * A 3D box that define the domain
-	// * an array of 3 unsigned integer that define the size of the grid on each dimension
+	// * an array of 3 unsigned integer that will define the size of the grid on each dimension
 	// * A Ghost object that will define the extension of the ghost part for each sub-domain in physical units
 	
 	Box<3,float> domain({0.0,0.0,0.0},{1.0,1.0,1.0});
@@ -43,8 +43,9 @@ int main(int argc, char* argv[])
 	//
 	// ### WIKI 4 ###
 	//
-	// Create a distributed grid in 3D (1Â° template parameter) defined in R^3 with float precision (2Â° template parameter)
-	// using a CartesianDecomposition strategy (3Â° parameter) (the parameter 1Â° and 2Â° inside CartDecomposition must match 1Â° and 2Â°
+	// Create a distributed grid in 3D (1Â° template parameter) space in with float precision (2Â° template parameter)
+	// each grid point contain a vector of dimension 3 (float[3]),
+	// using a CartesianDecomposition strategy (4Â° parameter) (the parameter 1Â° and 2Â° inside CartDecomposition must match 1Â° and 2Â°
 	// of grid_dist_id)
 	//
 	// Constructor parameters:
@@ -53,11 +54,11 @@ int main(int argc, char* argv[])
 	// * domain: where the grid is defined
 	// * g: ghost extension
 	//
-	grid_dist_id<3, float, scalar<float[3]>, CartDecomposition<3,float>> g_dist(sz,domain,g);
+	grid_dist_id<3, float, aggregate<float[3]>, CartDecomposition<3,float>> g_dist(sz,domain,g);
 	
 	// ### WIKI 5 ###
 	//
-	// Get an iterator that go throught the point of the domain (No ghost)
+	// Get an iterator that go through the points of the grid (No ghost)
 	//
 	auto dom = g_dist.getDomainIterator();
 
@@ -71,8 +72,8 @@ int main(int argc, char* argv[])
 		//
 		// ### WIKI 6 ###
 		//
-		// Get the local grid key, the local grid key store internaly the sub-domain id (each sub-domain contain a grid)
-		// and the local grid point id identified by 2 integers in 2D 3 integer in 3D and so on. These two dinstinc element are
+		// Get the local grid key, the local grid key store internally the sub-domain id (each sub-domain contain a grid)
+		// and the local grid point id identified by 2 integers in 2D 3 integer in 3D and so on. These two distinct elements are
 		// available with key.getSub() and key.getKey()
 		//
 		auto key = dom.get();
@@ -120,7 +121,7 @@ int main(int argc, char* argv[])
 	//
 	// count contain the number of points the local processor contain, if we are interested to count the total number across the processor
 	// we can use the function add, to sum across processors. First we have to get an instance of Vcluster, queue an operation of add with
-	// the variable count and finaly execute. All the operation are asynchronous, execute work like a barrier and ensure that all the 
+	// the variable count and finally execute. All the operation are asynchronous, execute work like a barrier and ensure that all the
 	// queued operations are executed
 	//
 	Vcluster & vcl = g_dist.getVC();
@@ -142,9 +143,9 @@ int main(int argc, char* argv[])
 	//
 	// ### WIKI 13 ###
 	//
-	// For debugging porpouse and demostration we output the decomposition
+	// For debugging purpose and demonstration we output the decomposition
 	//
-	g_dist.getDecomposition().write("dec/out");
+	g_dist.getDecomposition().write("out_dec");
 	
 	//
 	// ### WIKI 14 ###
diff --git a/example/Grid/1_stencil/main.cpp b/example/Grid/1_stencil/main.cpp
index 3d4470c6cbc3606b3448cc56f781ee0477b410c6..14a08adc1b6567ced665bc9ce2bdda7f0d1db8c7 100644
--- a/example/Grid/1_stencil/main.cpp
+++ b/example/Grid/1_stencil/main.cpp
@@ -1,5 +1,5 @@
 #include "Grid/grid_dist_id.hpp"
-#include "data_type/scalar.hpp"
+#include "data_type/aggregate.hpp"
 #include "Decomposition/CartDecomposition.hpp"
 
 /*
@@ -7,9 +7,8 @@
  *
  * ## Simple example
  *
- * This example show how to move grid_key in order to create a laplacian stencil,
- * be carefull, the function move are convenient, we suggest to not use in case speed
- * of a speed critical part of the code
+ * This example show how to move grid_key in order to create a Laplacian stencil,
+ * be careful, the function move are convenient, but not the fastest implementation
  *
  * ### WIKI END ###
  *
@@ -19,13 +18,18 @@
  *
  * ### WIKI 2 ###
  *
- * Define some convenient constant
+ * Define some convenient constants and types
  *
  */
 constexpr size_t x = 0;
 constexpr size_t y = 1;
 constexpr size_t z = 2;
 
+constexpr size_t A = 0;
+constexpr size_t B = 0;
+
+typedef aggregate<float[3],float[3]> grid_point;
+
 int main(int argc, char* argv[])
 {
 	//
@@ -55,8 +59,9 @@ int main(int argc, char* argv[])
 	//
 	// ### WIKI 4 ###
 	//
-	// Create a distributed grid in 3D (1Â° template parameter) defined in R^3 with float precision (2Â° template parameter)
-	// using a CartesianDecomposition strategy (3Â° parameter) (the parameter 1Â° and 2Â° inside CartDecomposition must match 1Â° and 2Â°
+	// Create a distributed grid in 3D (1Â° template parameter) space in with float precision (2Â° template parameter)
+	// each grid point contain a vector of dimension 3 (float[3]),
+	// using a CartesianDecomposition strategy (4Â° parameter) (the parameter 1Â° and 2Â° inside CartDecomposition must match 1Â° and 2Â°
 	// of grid_dist_id)
 	//
 	// Constructor parameters:
@@ -65,7 +70,7 @@ int main(int argc, char* argv[])
 	// * domain: where the grid is defined
 	// * g: ghost extension
 	//
-	grid_dist_id<3, float, scalar<float[3]>, CartDecomposition<3,float>> g_dist(sz,domain,g);
+	grid_dist_id<3, float, grid_point, CartDecomposition<3,float>> g_dist(sz,domain,g);
 
 	// ### WIKI 5 ###
 	//
@@ -80,8 +85,8 @@ int main(int argc, char* argv[])
 		//
 		// ### WIKI 6 ###
 		//
-		// Get the local grid key, the local grid key store internaly the sub-domain id (each sub-domain contain a grid)
-		// and the local grid point id identified by 2 integers in 2D 3 integer in 3D and so on. These two dinstinc element are
+		// Get the local grid key, the local grid key store internally the sub-domain id (each sub-domain contain a grid)
+		// and the local grid point id identified by 2 integers in 2D 3 integer in 3D and so on. These two distinct elements are
 		// available with key.getSub() and key.getKey()
 		//
 		auto key = dom.get();
@@ -126,20 +131,20 @@ int main(int argc, char* argv[])
 	// Get again another iterator, iterate across all the domain points, calculating a Laplace stencil
 	//
 	//
-	dom = g_dist.getDomainIterator();
+	auto dom2 = g_dist.getDomainIterator();
 	
-	while (dom.isNext())
+	while (dom2.isNext())
 	{
-		auto key = dom.get();
+		auto key = dom2.get();
 
 		// Laplace stencil
-		g_dist.template get<0>(key)[1] = g_dist.template get<0>(key.move(x,1))[0] + g_dist.template get<0>(key.move(x,-1))[0] +
-		                                 g_dist.template get<0>(key.move(y,1))[0] + g_dist.template get<0>(key.move(y,-1))[0] +
-										 g_dist.template get<0>(key.move(z,1))[0] + g_dist.template get<0>(key.move(z,-1))[0] -
-										 6*g_dist.template get<0>(key)[0];
+		g_dist.template get<B>(key)[1] = g_dist.template get<A>(key.move(x,1))[0] + g_dist.template get<A>(key.move(x,-1))[0] +
+		                                 g_dist.template get<A>(key.move(y,1))[0] + g_dist.template get<A>(key.move(y,-1))[0] +
+										 g_dist.template get<A>(key.move(z,1))[0] + g_dist.template get<A>(key.move(z,-1))[0] -
+										 6*g_dist.template get<A>(key)[0];
 		                    
 
-		++dom;
+		++dom2;
 	}
 
 	//
diff --git a/example/Numerics/Makefile b/example/Numerics/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..63062d5be263b695c74fa60c6542aeed2fbeda88
--- /dev/null
+++ b/example/Numerics/Makefile
@@ -0,0 +1,11 @@
+SUBDIRS := $(wildcard */.)
+
+all clean:
+	for dir in $(SUBDIRS); do \
+          $(MAKE) -C $$dir $@; \
+        done
+
+clean: $(SUBDIRS)
+
+.PHONY: all clean $(SUBDIRS)
+
diff --git a/example/Numerics/PSE/0_Derivative_approx_1D/Makefile b/example/Numerics/PSE/0_Derivative_approx_1D/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..78de2f17f6641795c738b0bbdf3bb3f242aeecc7
--- /dev/null
+++ b/example/Numerics/PSE/0_Derivative_approx_1D/Makefile
@@ -0,0 +1,21 @@
+include ../../../example.mk
+
+CC=mpic++
+
+LDIR =
+
+OBJ = main.o
+
+%.o: %.cpp
+	$(CC) -O3 -c --std=c++11 -o $@ $< $(INCLUDE_PATH)
+
+pse_1d: $(OBJ)
+	$(CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS)
+
+all: pse_1d
+
+.PHONY: clean all
+
+clean:
+	rm -f *.o *~ core pse_1d
+
diff --git a/example/Numerics/PSE/0_Derivative_approx_1D/main.cpp b/example/Numerics/PSE/0_Derivative_approx_1D/main.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..8db45fa2a813aa69f0f69d4c7ff0c84f67e14506
--- /dev/null
+++ b/example/Numerics/PSE/0_Derivative_approx_1D/main.cpp
@@ -0,0 +1,309 @@
+/*
+ * ### WIKI 1 ###
+ *
+ * ## Simple example
+ *
+ * In this example we show 1D PSE derivative function approximation
+ *
+ * ### WIKI END ###
+ *
+ */
+
+#include "Vector/vector_dist.hpp"
+#include "Decomposition/CartDecomposition.hpp"
+#include "PSE/Kernels.hpp"
+#include "data_type/aggregate.hpp"
+#include <cmath>
+
+
+/*
+ * ### WIKI 2 ###
+ *
+ * Here we define the function x*e^(-x*x) and its
+ * second derivative in analytic form
+ *
+ * 2x*(2*x-3)*e^(-x^2)
+ *
+ */
+
+double f_xex2(double x)
+{
+	return x*exp(-x*x);
+}
+
+double f_xex2(Point<1,double> & x)
+{
+	return x.get(0)*exp(-x.get(0)*x.get(0));
+}
+
+double Lapf_xex2(Point<1,double> & x)
+{
+	return 2.0*x.get(0)*(2.0*x.get(0)*x.get(0) - 3.0)*exp(-x.get(0)*x.get(0));
+}
+
+/*
+ *
+ * ### WIKI END ###
+ *
+ */
+
+int main(int argc, char* argv[])
+{
+	//
+	// ### WIKI 3 ###
+	//
+	// Some useful parameters. Like
+	//
+	// * Number of particles
+	// * Minimum number of padding particles
+	// * The computational domain
+	// * The spacing
+	// * The mollification length
+	// * Second order Laplacian kernel in 1D
+	//
+
+	// Number of particles
+	const size_t Npart = 125;
+
+	// Number of padding particles (At least)
+	const size_t Npad = 40;
+
+	// The domain
+	Box<1,double> box({0.0},{4.0});
+
+	// Calculated spacing
+	double spacing = box.getHigh(0) / Npart;
+
+	// Epsilon of the particle kernel
+	const double eps = 2*spacing;
+
+	// Laplacian PSE kernel 1 dimension, on double, second order
+	Lap<1,double,2> lker(eps);
+
+	//
+	// ### WIKI 2 ###
+	//
+	// Here we Initialize the library and we define Ghost size
+	// and non-periodic boundary conditions
+	//
+	init_global_v_cluster(&argc,&argv);
+	Vcluster & v_cl = *global_v_cluster;
+
+    size_t bc[1]={NON_PERIODIC};
+	Ghost<1,double> g(12*eps);
+
+	//
+	// ### WIKI 3 ###
+	//
+	// Here we are creating a distributed vector defined by the following parameters
+	//
+	// we create a set of N+1 particles to have a fully covered domain of particles between 0.0 and 4.0
+	// Suppose we have a spacing given by 1.0 you need 4 +1 particles to cover your domain
+	//
+	vector_dist<1,double, aggregate<double>, CartDecomposition<1,double> > vd(Npart+1,box,bc,g);
+
+	//
+	// ### WIKI 4 ###
+	//
+	// We assign the position to the particles, the scalar property is set to
+	// the function x*e^(-x*x) value.
+	// Each processor has parts of the particles and fill part of the space, the
+	// position is assigned independently from the decomposition.
+	//
+	// In this case, if there are 1001 particles and 3 processors the in the
+	// domain from 0.0 to 4.0
+	//
+	// * processor 0 place particles from 0.0 to 1.332 (334 particles)
+	// * processor 1 place particles from 1.336 to 2.668 (334 particles)
+	// * processor 2 place particles from 2.672 to 4.0 (333 particles)
+	//
+
+	// It return how many particles the processors with id < rank has in total
+	size_t base = vd.init_size_accum(Npart+1);
+	auto it2 = vd.getIterator();
+
+	while (it2.isNext())
+	{
+		auto key = it2.get();
+
+		// set the position of the particles
+		vd.template getPos<0>(key)[0] = (key.getKey() + base) * spacing;
+		//set the property of the particles
+		vd.template getProp<0>(key) = f_xex2((key.getKey() + base) * spacing);
+
+		++it2;
+	}
+
+	//
+	// ### WIKI 5 ###
+	//
+	// Once defined the position, we distribute them across the processors
+	// following the decomposition, finally we get the ghost part
+	//
+	vd.map();
+	vd.ghost_get<0>();
+
+	//
+	// ### WIKI 6 ###
+	//
+	// near the boundary we have two options, or we use one sided kernels,
+	// or we add additional particles, it is required that such particles
+	// produces a 2 time differentiable function. In order to obtain such
+	// result we extend for x < 0.0 and x > 4.0 with the test function xe^(-x*x).
+	//
+	// Note that for x < 0.0 such extension is equivalent to mirror the
+	// particles changing the sign of the strength
+	//
+	// \verbatim
+	//
+	// 0.6  -0.5      0.5  0.6   Strength
+	//  +----+----*----*----*-
+	//          0.0              Position
+	//
+	//  with * = real particle
+	//       + = mirror particle
+	//
+	// \endverbatim
+	//
+	//
+	Box<1,double> m_pad({0.0},{0.1});
+	Box<1,double> m_pad2({3.9},{4.0});
+	double enlarge = 0.1;
+
+	// Create a box
+	if (Npad * spacing > 0.1)
+	{
+		m_pad.setHigh(0,Npad * spacing);
+		m_pad2.setLow(0,4.0 - Npad*spacing);
+		enlarge = Npad * spacing;
+	}
+
+	auto it = vd.getDomainIterator();
+
+	while (it.isNext())
+	{
+		auto key = it.get();
+
+		// set the position of the particles
+		if (m_pad.isInsideNB(vd.template getPos<0>(key)) == true)
+		{
+			vd.add();
+			vd.template getLastPos<0>()[0] = - vd.template getPos<0>(key)[0];
+			vd.template getLastProp<0>() = - vd.template getProp<0>(key);
+		}
+
+		// set the position of the particles
+		if (m_pad2.isInsideNB(vd.template getPos<0>(key)) == true)
+		{
+			vd.add();
+			vd.template getLastPos<0>()[0] = 2.0 * box.getHigh(0) - vd.template getPos<0>(key)[0];
+			vd.template getLastProp<0>() = f_xex2(vd.template getLastPos<0>()[0]);
+		}
+
+		++it;
+	}
+
+	//
+	// ### WIKI 6 ###
+	//
+	// We create a CellList with cell spacing 12 sigma
+	//
+
+    // get and construct the Cell list
+
+	Ghost<1,double> gp(enlarge);
+    auto cl = vd.getCellList(12*eps,gp);
+
+    // Maximum infinity norm
+    double linf = 0.0;
+
+	//
+	// ### WIKI 6 ###
+	//
+    // For each particle get the neighborhood of each particle
+    //
+    // This cycle is literally the formula from PSE operator approximation
+	//
+    //
+    //
+    //
+
+    auto it_p = vd.getDomainIterator();
+    while (it_p.isNext())
+    {
+    	// double PSE integration accumulator
+    	double pse = 0;
+
+    	// key
+    	vect_dist_key_dx key = it_p.get();
+
+    	// Get the position of the particles
+    	Point<1,double> p = vd.template getPos<0>(key);
+
+    	// We are not interested in calculating out the domain
+    	// note added padding particle are considered domain particles
+    	if (p.get(0) < 0.0 || p.get(0) >= 4.0)
+    	{
+    		++it_p;
+    		continue;
+    	}
+
+    	// Get f(x) at the position of the particle
+    	double prp_x = vd.template getProp<0>(key);
+
+    	// Get the neighborhood of the particles
+    	auto NN = cl.template getNNIterator<NO_CHECK>(cl.getCell(p));
+    	while(NN.isNext())
+    	{
+    		auto nnp = NN.get();
+
+    		// Calculate contribution given by the kernel value at position p,
+    		// given by the Near particle
+    		if (nnp != key.getKey())
+    		{
+    			// W(x-y)
+    			double ker = lker.value(p,vd.template getPos<0>(nnp));
+
+    			// f(y)
+    			double prp_y = vd.template getProp<0>(nnp);
+
+    			// 1.0/(eps)^2 [f(y)-f(x)]*W(x,y)*V_q
+    			double prp = 1.0/eps/eps * (prp_y - prp_x) * spacing;
+    			pse += prp * ker;
+    		}
+
+    		// Next particle
+    		++NN;
+    	}
+
+    	// Here we calculate the L_infinity norm or the maximum difference
+    	// of the analytic solution from the PSE calculated
+
+    	double sol = Lapf_xex2(p);
+
+    	if (fabs(pse - sol) > linf)
+    		linf = fabs(pse - sol);
+
+    	++it_p;
+    }
+
+	//
+	// ### WIKI 7 ###
+	//
+    // Calculate the maximum infinity norm across processors and
+    // print it
+    //
+
+    v_cl.max(linf);
+    v_cl.execute();
+
+    if (v_cl.getProcessUnitID() == 0)
+    	std::cout << "Norm infinity: " << linf << "\n";
+
+	//
+	// ### WIKI 8 ###
+	//
+	// Deinitialize the library
+	//
+	delete_global_v_cluster();
+}
diff --git a/example/Numerics/PSE/Makefile b/example/Numerics/PSE/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..63062d5be263b695c74fa60c6542aeed2fbeda88
--- /dev/null
+++ b/example/Numerics/PSE/Makefile
@@ -0,0 +1,11 @@
+SUBDIRS := $(wildcard */.)
+
+all clean:
+	for dir in $(SUBDIRS); do \
+          $(MAKE) -C $$dir $@; \
+        done
+
+clean: $(SUBDIRS)
+
+.PHONY: all clean $(SUBDIRS)
+
diff --git a/example/SE/0_classes/Makefile b/example/SE/0_classes/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..3cd88b493e5e1adc65ccc44010a4aee0c9036fab
--- /dev/null
+++ b/example/SE/0_classes/Makefile
@@ -0,0 +1,21 @@
+include ../../example.mk
+
+CC=mpic++
+
+LDIR =
+
+OBJ = main.o
+
+%.o: %.cpp
+	$(CC) -O3 -g3 -c --std=c++11 -o $@ $< $(INCLUDE_PATH)
+
+se_classes: $(OBJ)
+	$(CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS)
+
+all: se_classes
+
+.PHONY: clean all
+
+clean:
+	rm -f *.o *~ core se_classes
+
diff --git a/example/SE/0_classes/config.cfg b/example/SE/0_classes/config.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..1eecbac3577c765edca7f90cf5f61cfb6b9f4880
--- /dev/null
+++ b/example/SE/0_classes/config.cfg
@@ -0,0 +1,2 @@
+[pack]
+files = main.cpp Makefile
diff --git a/example/SE/0_classes/main.cpp b/example/SE/0_classes/main.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..640c5572977977188f9064a53867a8508ae98539
--- /dev/null
+++ b/example/SE/0_classes/main.cpp
@@ -0,0 +1,211 @@
+/*
+ * ### WIKI 1 ###
+ *
+ * ## Security enhancement example
+ *
+ * This example show several basic functionalities of Security Enhancements
+ *
+ */
+
+#define SE_CLASS1
+#define SE_CLASS2
+#define SE_CLASS3
+#define THROW_ON_ERROR
+#include "Memleak_check.hpp"
+#include "Vector/vector_dist.hpp"
+#include "Decomposition/CartDecomposition.hpp"
+#include "Point_test.hpp"
+
+/*
+ * ### WIKI END ###
+ */
+
+
+int main(int argc, char* argv[])
+{
+	//
+	// ### WIKI 2 ###
+	//
+	// With print_unalloc we can check how much memory has been allocated and which structure
+	// has been allocated, initially there are not
+	//
+
+	std::cout << "Allocated memory before initializing \n";
+    print_alloc();
+    std::cout << "\n";
+    std::cout << "\n";
+    std::cout << "\n";
+
+	//
+	// ### WIKI 3 ###
+	//
+	// Here we Initialize the library, than we create a uniform random generator between 0 and 1 to to generate particles
+	// randomly in the domain, we create a Box that define our domain, boundary conditions and ghost
+	//
+	init_global_v_cluster(&argc,&argv);
+	Vcluster & v_cl = *global_v_cluster;
+	
+	typedef Point<2,float> s;
+
+	Box<2,float> box({0.0,0.0},{1.0,1.0});
+        size_t bc[2]={NON_PERIODIC,NON_PERIODIC};
+	Ghost<2,float> g(0.01);
+
+	//
+	// ### WIKI 4 ###
+	//
+	// Here we ask again for the used memory, as we can see Vcluster and several other structures encapsulated inside
+	// Vcluster register itself
+	//
+	if (v_cl.getProcessUnitID() == 0)
+	{
+		std::cout << "Allocated memory after initialization \n";
+		print_alloc();
+		std::cout << "\n";
+		std::cout << "\n";
+		std::cout << "\n";
+	}
+
+	//
+	// ### WIKI 5 ###
+	//
+	// Here we are creating a distributed vector defined by the following parameters
+	// 
+	// * Dimensionality of the space where the objects live 2D (1Â° template parameters)
+	// * Type of the space, float (2Â° template parameters)
+	// * Information stored by each object (3* template parameters), in this case a Point_test store 4 scalars
+	//   1 vector and an asymmetric tensor of rank 2
+	// * Strategy used to decompose the space
+	// 
+	// The Constructor instead require:
+	//
+	// * Number of particles 4096 in this case
+	// * Domain where is defined this structure
+	//
+	// The following construct a vector where each processor has 4096 / N_proc (N_proc = number of processor)
+	// objects with an undefined position in space. This non-space decomposition is also called data-driven
+	// decomposition
+	//
+	{
+		vector_dist<2,float, Point_test<float>, CartDecomposition<2,float> > vd(4096,box,bc,g);
+
+		//
+		// ### WIKI 6 ###
+		//
+		// we create a key that for sure overflow the local datastructure, 2048 with this program is started with more than 3
+		// processors, try and catch are optionals in case you want to recover from a buffer overflow
+		//
+		try
+        {
+			vect_dist_key_dx vt(5048);
+			auto it = vd.getPos<0>(vt);
+        }
+		catch (size_t e)
+		{
+			std::cerr << "Error notification of overflow \n";
+		}
+	}
+	//
+	// ### WIKI 7 ###
+	//
+	// At this point the vector went out of the scope and if destroyed
+	// we create, now two of them using new
+	//
+
+	vector_dist<2,float, Point_test<float>, CartDecomposition<2,float> > * vd1 = new vector_dist<2,float, Point_test<float>, CartDecomposition<2,float> >(4096,box,bc,g);
+	vector_dist<2,float, Point_test<float>, CartDecomposition<2,float> > * vd2 = new vector_dist<2,float, Point_test<float>, CartDecomposition<2,float> >(4096,box,bc,g);
+
+	//
+	// ### WIKI 8 ###
+	//
+	// we can check that these two structure produce an explicit allocation checking
+	// for registered pointers and structures with print_alloc, in the list we see 2 additional
+	// entry for distributed vector in yellow, pdata to work use the data structures that register
+	// itself in magenta, the same things happen for the real memory allocation from devices in
+	// fully green
+	//
+
+	if (v_cl.getProcessUnitID() == 0)
+	{
+		std::cout << "Allocated memory with 2 vectors \n";
+		print_alloc();
+		std::cout << "\n";
+		std::cout << "\n";
+		std::cout << "\n";
+	}
+
+	//
+	// ### WIKI 9 ###
+	//
+	// we can also ask to the structure to identify their-self in the list
+	//
+
+    std::cout << "Vector id: " << vd1->who() << "\n";
+    std::cout << "Vector id: " << vd2->who() << "\n";
+
+	//
+	// ### WIKI 10 ###
+	//
+	// delete vd1 and print allocated memory, one distributed vector disappear
+	//
+
+	delete vd1;
+
+	if (v_cl.getProcessUnitID() == 0)
+	{
+		std::cout << "Allocated memory with 1 vector \n";
+		print_alloc();
+    	std::cout << "\n";
+    	std::cout << "\n";
+    	std::cout << "\n";
+	}
+
+	//
+	// ### WIKI 11 ###
+	//
+	// delete vd2 and print allocated memory, all distributed vector de-register
+	//
+
+	delete vd2;
+
+	if (v_cl.getProcessUnitID() == 0)
+	{
+		std::cout << "Allocated memory with 1 vector \n";
+		print_alloc();
+		std::cout << "\n";
+		std::cout << "\n";
+		std::cout << "\n";
+	}
+
+	//
+	// ### WIKI 12 ###
+	//
+	// Try to use a deleted object
+	//
+	try
+    {
+		vect_dist_key_dx vt(0);
+		auto it = vd1->getPos<0>(vt);
+    }
+	catch (size_t e)
+	{
+		std::cerr << "Error notification of invalid usage of deleted object \n";
+	}
+
+	//
+	// ### WIKI 13 ###
+	//
+	// Deinitialize the library
+	//
+	delete_global_v_cluster();
+
+	if (v_cl.getProcessUnitID() == 0)
+	{
+		std::cout << "Allocated memory at the end \n";
+		print_alloc();
+		std::cout << "\n";
+		std::cout << "\n";
+		std::cout << "\n";
+	}
+}
+
diff --git a/example/SE/1_classes/Makefile b/example/SE/1_classes/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..172ffbd665222f8f35a17e1b68d2d628a8f07208
--- /dev/null
+++ b/example/SE/1_classes/Makefile
@@ -0,0 +1,21 @@
+include ../../example.mk
+
+CC=mpic++
+
+LDIR =
+
+OBJ = main.o
+
+%.o: %.cpp
+	$(CC) -O0 -g3 -c --std=c++11 -o $@ $< $(INCLUDE_PATH)
+
+se_classes: $(OBJ)
+	$(CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS)
+
+all: se_classes
+
+.PHONY: clean all
+
+clean:
+	rm -f *.o *~ core se_classes
+
diff --git a/example/SE/1_classes/config.cfg b/example/SE/1_classes/config.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..1eecbac3577c765edca7f90cf5f61cfb6b9f4880
--- /dev/null
+++ b/example/SE/1_classes/config.cfg
@@ -0,0 +1,2 @@
+[pack]
+files = main.cpp Makefile
diff --git a/example/SE/1_classes/main.cpp b/example/SE/1_classes/main.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..13721c7f9aa089213da0b01a647cfd67f79dc36b
--- /dev/null
+++ b/example/SE/1_classes/main.cpp
@@ -0,0 +1,109 @@
+/*
+ * ### WIKI 1 ###
+ *
+ * ## Security enhancement example
+ *
+ * This example show how to see where an allocation or corruption happen offline and online.
+ * Every time an error occur, the library output where the detection happen filename and line,
+ *  in order to debug, there is an online option and an offline option
+ *
+ *  * online: put a breakpoint on the indicated line with your preferred debugger
+ *  * offline: set ulimit -c unlimited to activate the core dump file and open the core dump with your debugger
+ *
+ */
+
+#define SE_CLASS1
+#define SE_CLASS2
+#define SE_CLASS3
+#define THROW_ON_ERROR
+#include "Memleak_check.hpp"
+#include "data_type/scalar.hpp"
+#include "Grid/grid_dist_id.hpp"
+#include "Decomposition/CartDecomposition.hpp"
+#include "Point_test.hpp"
+
+/*
+ * ### WIKI END ###
+ */
+
+
+int main(int argc, char* argv[])
+{
+	//
+	// ### WIKI 2 ###
+	//
+	// Here we Initialize the library,
+	// * message_on_allocation set a message to print when one allocation is reached, the filename and line number can be used to set a breakpoint and analyze the stacktrace.
+	// * throw_on_allocation throw when one allocation is reached, producing the termination of the program and a core dump (if no try catch is set-up)
+	//
+	init_global_v_cluster(&argc,&argv);
+	Vcluster & v_cl = *global_v_cluster;
+
+	throw_on_alloc(10);
+	// message_on_alloc(10);
+
+	//
+	// ### WIKI 3 ###
+	//
+	// Create several object needed later, in particular
+	// * A 3D box that define the domain
+	// * an array of 3 unsigned integer that define the size of the grid on each dimension
+	// * A Ghost object that will define the extension of the ghost part for each sub-domain in physical units
+	Box<3,float> domain({0.0,0.0,0.0},{1.0,1.0,1.0});
+	size_t sz[3];
+	sz[0] = 100;
+	sz[1] = 100;
+	sz[2] = 100;
+	
+	// Ghost
+	Ghost<3,float> g(0.01);
+	
+	//
+	// ### WIKI 4 ###
+	//
+	// Create a distributed grid in 3D (1Â° template parameter) defined in R^3 with float precision (2Â° template parameter)
+	// using a CartesianDecomposition strategy (3Â° parameter) (the parameter 1Â° and 2Â° inside CartDecomposition must match 1Â° and 2Â°
+	// of grid_dist_id)
+	//
+	// Constructor parameters:
+	//
+	// * sz: size of the grid on each dimension
+	// * domain: where the grid is defined
+	// * g: ghost extension
+	//
+	//
+	grid_dist_id<3, float, scalar<float[3]>, CartDecomposition<3,float>> * g_dist = new grid_dist_id<3, float, scalar<float[3]>, CartDecomposition<3,float>>(sz,domain,g);
+
+	//
+	// ### WIKI 6 ###
+	//
+	// print allocated structures
+	//
+
+	if (v_cl.getProcessUnitID() == 0)
+		print_alloc();
+
+	//
+	// ### WIKI 5 ###
+	//
+	// delete g_dist
+	//
+
+	delete g_dist;
+
+	//
+	// ### WIKI 6 ###
+	//
+	// On purpose we try to access a deleted object
+	//
+
+	g_dist->getGridInfo();
+
+	//
+	// ### WIKI 13 ###
+	//
+	// Deinitialize the library
+	//
+	delete_global_v_cluster();
+}
+
diff --git a/example/SE/Makefile b/example/SE/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..63062d5be263b695c74fa60c6542aeed2fbeda88
--- /dev/null
+++ b/example/SE/Makefile
@@ -0,0 +1,11 @@
+SUBDIRS := $(wildcard */.)
+
+all clean:
+	for dir in $(SUBDIRS); do \
+          $(MAKE) -C $$dir $@; \
+        done
+
+clean: $(SUBDIRS)
+
+.PHONY: all clean $(SUBDIRS)
+
diff --git a/example/VCluster/0_simple/Makefile b/example/VCluster/0_simple/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..95471100101e8ab43a96f7f5eccdff1fd37849bd
--- /dev/null
+++ b/example/VCluster/0_simple/Makefile
@@ -0,0 +1,21 @@
+include ../../example.mk
+
+CC=mpic++
+
+LDIR =
+
+OBJ = main.o
+
+%.o: %.cpp
+	$(CC) -O3 -c --std=c++11 -o $@ $< $(INCLUDE_PATH)
+
+vcluster: $(OBJ)
+	$(CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS)
+
+all: vcluster
+
+.PHONY: clean all
+
+clean:
+	rm -f *.o *~ core vcluster
+
diff --git a/example/VCluster/0_simple/config.cfg b/example/VCluster/0_simple/config.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..1eecbac3577c765edca7f90cf5f61cfb6b9f4880
--- /dev/null
+++ b/example/VCluster/0_simple/config.cfg
@@ -0,0 +1,2 @@
+[pack]
+files = main.cpp Makefile
diff --git a/example/VCluster/0_simple/main.cpp b/example/VCluster/0_simple/main.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..fe3892b1d28f018ffa327cc6c63245db28bd21fd
--- /dev/null
+++ b/example/VCluster/0_simple/main.cpp
@@ -0,0 +1,177 @@
+#include "Grid/grid_dist_id.hpp"
+#include "data_type/aggregate.hpp"
+#include "Decomposition/CartDecomposition.hpp"
+#include "VCluster.hpp"
+
+/*
+ * ### WIKI 1 ###
+ *
+ * ## Simple example
+ * 
+ * This example show several basic functionalities of VCluster
+ * 
+ * ### WIKI END ###
+ * 
+ */
+
+
+int main(int argc, char* argv[])
+{
+	//
+	// ### WIKI 2 ###
+	//
+	// Initialize the library and several objects 
+	//
+	init_global_v_cluster(&argc,&argv);
+	
+	//
+	// ### WIKI 3 ###
+	//
+	// Get the vcluster object and the number of processor
+	//
+
+	Vcluster & v_cl = *global_v_cluster;
+	size_t N_prc = v_cl.getProcessingUnits();
+
+	//
+	// ### WIKI 3 ###
+	//
+	// We find the maximum of the processors rank, that should be the Number of
+	// processora minus one, only processor 0 print on terminal
+	//
+
+	size_t id = v_cl.getProcessUnitID();
+
+	v_cl.max(id);
+	v_cl.execute();
+	if (v_cl.getProcessUnitID() == 0)
+		std::cout << "Maximum processor rank: " << id << "\n";
+
+	//
+	// ### WIKI 4 ###
+	//
+	// We sum all the processor ranks the maximum, the result should be that should
+	// be $\frac{(n-1)n}{2}$, only processor 0 print on terminal
+	//
+
+	size_t id2 = v_cl.getProcessUnitID();
+
+	v_cl.sum(id2);
+	v_cl.execute();
+	if (v_cl.getProcessUnitID() == 0)
+		std::cout << "Sum of all processors rank: " << id2 << "\n";
+
+	//
+	// ### WIKI 5 ###
+	//
+	// we can collect information from all processors using the function gather
+	//
+
+	size_t id3 = v_cl.getProcessUnitID();
+	openfpm::vector<size_t> v;
+	
+	v_cl.allGather(id3,v);
+	v_cl.execute();
+	
+	if (v_cl.getProcessUnitID() == 0)
+	{
+		std::cout << "Collected ids: ";
+		for(size_t i = 0 ; i < v.size() ; i++)
+			std::cout << " " << v.get(i) << " ";
+
+		std::cout << "\n";
+	}
+
+	//
+	// ### WIKI 5 ###
+	//
+	// we can also send messages to specific processors, with the condition that the receiving
+	// processors know we want to communicate with them, if you are searching for a more
+	// free way to communicate where the receiving processors does not know which one processor
+	// want to communicate with us, see the example 1_dsde
+	//
+
+	std::stringstream ss_message_1;
+	std::stringstream ss_message_2;
+	ss_message_1 << "Hello from " << std::setw(8) << v_cl.getProcessUnitID() << "\n";
+	ss_message_2 << "Hello from " << std::setw(8) << v_cl.getProcessUnitID() << "\n";
+	std::string message_1 = ss_message_1.str();
+	std::string message_2 = ss_message_2.str();
+	size_t msg_size = message_1.size();
+	
+	// Processor 0 send to processors 1,2 , 1 to 2,1, 2 to 0,1
+
+	v_cl.send(((id3+1)%N_prc + N_prc)%N_prc,0,message_1.c_str(),msg_size);
+	v_cl.send(((id3+2)%N_prc + N_prc)%N_prc,0,message_2.c_str(),msg_size);
+
+	openfpm::vector<char> v_one;
+	v_one.resize(msg_size);
+	openfpm::vector<char> v_two(msg_size);
+	v_two.resize(msg_size);
+
+	v_cl.recv(((id3-1)%N_prc + N_prc)%N_prc,0,(void *)v_one.getPointer(),msg_size);
+	v_cl.recv(((id3-2)%N_prc + N_prc)%N_prc,0,(void *)v_two.getPointer(),msg_size);
+	v_cl.execute();
+
+	if (v_cl.getProcessUnitID() == 0)
+	{
+		for (size_t i = 0 ; i < msg_size ; i++)
+			std::cout << v_one.get(i);
+
+		for (size_t i = 0 ; i < msg_size ; i++)
+			std::cout << v_two.get(i);
+	}
+
+	//
+	// ### WIKI 5 ###
+	//
+	// we can also do what we did before in one shot
+	//
+
+	id = v_cl.getProcessUnitID();
+	id2 = v_cl.getProcessUnitID();
+	id3 = v_cl.getProcessUnitID();
+	v.clear();
+
+	// convert the string into a vector
+
+	openfpm::vector<char> message_1_v(msg_size);
+	openfpm::vector<char> message_2_v(msg_size);
+
+	for (size_t i = 0 ; i < msg_size ; i++)
+		message_1_v.get(i) = message_1[i];
+
+	for (size_t i = 0 ; i < msg_size ; i++)
+		message_2_v.get(i) = message_2[i];
+
+	v_cl.max(id);
+	v_cl.sum(id2);
+	v_cl.allGather(id3,v);
+
+	// in the case of vector we have special functions that avoid to specify the size
+	v_cl.send(((id+1)%N_prc + N_prc)%N_prc,0,message_1_v);
+	v_cl.send(((id+2)%N_prc + N_prc)%N_prc,0,message_2_v);
+	v_cl.recv(((id-1)%N_prc + N_prc)%N_prc,0,v_one);
+	v_cl.recv(((id-2)%N_prc + N_prc)%N_prc,0,v_two);
+	v_cl.execute();
+
+	if (v_cl.getProcessUnitID() == 0)
+	{
+		std::cout << "Maximum processor rank: " << id << "\n";
+		std::cout << "Sum of all processors rank: " << id << "\n";
+
+		std::cout << "Collected ids: ";
+		for(size_t i = 0 ; i < v.size() ; i++)
+			std::cout << " " << v.get(i) << " ";
+
+		std::cout << "\n";
+
+		for (size_t i = 0 ; i < msg_size ; i++)
+			std::cout << v_one.get(i);
+
+		for (size_t i = 0 ; i < msg_size ; i++)
+			std::cout << v_two.get(i);
+	}
+
+	delete_global_v_cluster();
+}
diff --git a/example/VCluster/Makefile b/example/VCluster/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..63062d5be263b695c74fa60c6542aeed2fbeda88
--- /dev/null
+++ b/example/VCluster/Makefile
@@ -0,0 +1,11 @@
+SUBDIRS := $(wildcard */.)
+
+all clean:
+	for dir in $(SUBDIRS); do \
+          $(MAKE) -C $$dir $@; \
+        done
+
+clean: $(SUBDIRS)
+
+.PHONY: all clean $(SUBDIRS)
+
diff --git a/example/Vector/0_simple/Makefile b/example/Vector/0_simple/Makefile
index 4ee13178b1637208b40e15f5b1aa49822b310fc6..7fc11a405e05a10a05ca04ff48b82fdb9637fbc8 100644
--- a/example/Vector/0_simple/Makefile
+++ b/example/Vector/0_simple/Makefile
@@ -9,13 +9,13 @@ OBJ = main.o
 %.o: %.cpp
 	$(CC) -O3 -c --std=c++11 -o $@ $< $(INCLUDE_PATH)
 
-vector: $(OBJ)
+vect: $(OBJ)
 	$(CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS)
 
-all: vector
+all: vect
 
 .PHONY: clean all
 
 clean:
-	rm -f *.o *~ core vector
+	rm -f *.o *~ core vect
 
diff --git a/example/Vector/0_simple/main.cpp b/example/Vector/0_simple/main.cpp
index 8fb81a1c9f96ab275babd98bc622ea227372537a..288e104208ea1f9fc063ddc195bfd9f58a4dbade 100644
--- a/example/Vector/0_simple/main.cpp
+++ b/example/Vector/0_simple/main.cpp
@@ -1,6 +1,5 @@
 #include "Vector/vector_dist.hpp"
 #include "Decomposition/CartDecomposition.hpp"
-#include "Point_test.hpp"
 
 /*
  * ### WIKI 1 ###
@@ -13,6 +12,29 @@
  * 
  */
 
+/*
+ * ### WIKI 2 ###
+ *
+ * We define a particle structure it contain 4 scalars one vector with 3 components
+ * and a tensor of rank 2 3x3
+ *
+ * ### WIKI END ###
+ *
+ */
+
+template<typename T> class Particle
+{
+public:
+
+	typedef boost::fusion::vector<T,T[3],T[3][3]> type;
+
+	type data;
+
+	static const unsigned int s = 0;
+	static const unsigned int v = 1;
+	static const unsigned int t = 2;
+	static const unsigned int max_prop = 3;
+};
 
 int main(int argc, char* argv[])
 {
@@ -20,7 +42,7 @@ int main(int argc, char* argv[])
 	// ### WIKI 2 ###
 	//
 	// Here we Initialize the library, than we create a uniform random generator between 0 and 1 to to generate particles
-	// randomly in the domain, we create a Box that define our domain
+	// randomly in the domain, we create a Box that define our domain, boundary conditions, and ghost
 	//
 	init_global_v_cluster(&argc,&argv);
 	Vcluster & v_cl = *global_v_cluster;
@@ -33,7 +55,9 @@ int main(int argc, char* argv[])
 	std::default_random_engine eg;
 	std::uniform_real_distribution<float> ud(0.0f, 1.0f);
 
-	Box<2,float> box({0.0,0.0},{1.0,1.0});
+	Box<2,float> domain({0.0,0.0},{1.0,1.0});
+    size_t bc[2]={PERIODIC,PERIODIC};
+	Ghost<2,float> g(0.01);
 	
 	//
 	// ### WIKI 3 ###
@@ -55,12 +79,12 @@ int main(int argc, char* argv[])
 	// objects with an undefined position in space. This non-space decomposition is also called data-driven
 	// decomposition
 	//
-	vector_dist<2,float, Point_test<float>, CartDecomposition<2,float> > vd(4096,box);
+	vector_dist<2,float, Particle<float>, CartDecomposition<2,float> > vd(4096,domain,bc,g);
 
 	//
 	// ### WIKI 5 ###
 	//
-	// Get an iterator that go throught the objects, in an undefined position state and define its position
+	// Get an iterator that go through the particles, in an undefined position state and define its position
 	//
 	auto it = vd.getIterator();
 
@@ -97,10 +121,29 @@ int main(int argc, char* argv[])
 	{
 		auto key = it.get();
 
-		
-		if (ct.isLocal(vd.template getPos<s::x>(key)) == false)
+		// The template parameter is unuseful and will probably disappear
+		if (ct.isLocal(vd.template getPos<0>(key)) == false)
 			std::cerr << "Error particle is not local" << "\n";
 
+		// set the all the properties to 0.0
+
+		// scalar
+		vd.template getProp<0>(key) = 0.0;
+
+		vd.template getProp<1>(key)[0] = 0.0;
+		vd.template getProp<1>(key)[1] = 0.0;
+		vd.template getProp<1>(key)[2] = 0.0;
+
+		vd.template getProp<2>(key)[0][0] = 0.0;
+		vd.template getProp<2>(key)[0][1] = 0.0;
+		vd.template getProp<2>(key)[0][2] = 0.0;
+		vd.template getProp<2>(key)[1][0] = 0.0;
+		vd.template getProp<2>(key)[1][1] = 0.0;
+		vd.template getProp<2>(key)[1][2] = 0.0;
+		vd.template getProp<2>(key)[2][0] = 0.0;
+		vd.template getProp<2>(key)[2][1] = 0.0;
+		vd.template getProp<2>(key)[2][2] = 0.0;
+
 		cnt++;
 
 		++it;
diff --git a/example/Vector/1_celllist/config.cfg b/example/Vector/1_celllist/config.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..1eecbac3577c765edca7f90cf5f61cfb6b9f4880
--- /dev/null
+++ b/example/Vector/1_celllist/config.cfg
@@ -0,0 +1,2 @@
+[pack]
+files = main.cpp Makefile
diff --git a/example/Vector/1_celllist/main.cpp b/example/Vector/1_celllist/main.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f49e9aabd6879588807462398d3f9a5d3c902a67
--- /dev/null
+++ b/example/Vector/1_celllist/main.cpp
@@ -0,0 +1,181 @@
+
+#include "Vector/vector_dist.hpp"
+#include "Decomposition/CartDecomposition.hpp"
+#include "data_type/aggregate.hpp"
+
+/*
+ * ### WIKI 1 ###
+ *
+ * ## Simple example
+ *
+ * This example show cell lists for the distributed vector
+ *
+ * ### WIKI END ###
+ *
+ */
+
+int main(int argc, char* argv[])
+{
+	//
+	// ### WIKI 2 ###
+	//
+	// Here we Initialize the library, we create a Box that define our domain, boundary conditions, ghost
+	// and the grid size
+	//
+	init_global_v_cluster(&argc,&argv);
+	Vcluster & v_cl = *global_v_cluster;
+
+	// we create a 128x128x128 Grid iterator
+	size_t sz[3] = {128,128,128};
+
+	Box<3,float> box({0.0,0.0,0.0},{1.0,1.0,1.0});
+
+	// Boundary conditions
+	size_t bc[3]={PERIODIC,PERIODIC,PERIODIC};
+
+	// ghost, big enough to contain the interaction radius
+	Ghost<3,float> ghost(1.0/(128-2));
+
+	//
+	// ### WIKI 3 ###
+	//
+	// Here we define a distributed vector in 3D, containing 3 properties, a
+	// scalar double, a vector double[3], and a tensor or rank 2 double[3][3].
+	// In this case the vector contain 0 particles in total
+	//
+	vector_dist<3,float, aggregate<double,double[3],double[3][3]>, CartDecomposition<3,float> > vd(0,box,bc,ghost);
+
+	//
+	// ### WIKI 4 ###
+	//
+	// We define a grid iterator, to create particles on a grid like way.
+	// An important note is that the grid iterator, iterator only on the
+	// local nodes for each processor for example suppose to have a domain like
+	// the one in figure
+	//
+	//   +---------+
+	//   |* * *|* *|
+	//   |  2  |   |
+	//   |* * *|* *|
+	//   |   ---   |
+	//   |* *|* * *|
+	//   |   |     |
+	//   |* *|* * *|
+	//   |   |  1  |
+	//   |* *|* * *|
+	//   +---------+
+	//
+	// divided in 2 processors, the processor 1 will iterate only on the points
+	// inside the portion of space marked with one. A note grid iterator follow the
+	// boundary condition specified in vector. For a perdiodic 2D 5x5 grid we have
+	//
+	//   +---------+
+	//   * * * * * |
+	//   |         |
+	//   * * * * * |
+	//   |         |
+	//   * * * * * |
+	//   |         |
+	//   * * * * * |
+	//   |         |
+	//   *-*-*-*-*-+
+	//
+	// Because the right border is equivalent to the left border, while for a non periodic we have the
+	// following distribution of points
+	//
+	//   *-*-*-*-*
+	//   |       |
+	//   * * * * *
+	//   |       |
+	//   * * * * *
+	//   |       |
+	//   * * * * *
+	//   |       |
+	//   *-*-*-*-*
+	//
+	// So in this loop each processor will place particles on a grid
+	//
+	auto it = vd.getGridIterator(sz);
+
+	while (it.isNext())
+	{
+		vd.add();
+
+		auto key = it.get();
+
+		vd.template getLastPos<0>()[0] = key.get(0) * it.getSpacing(0);
+		vd.template getLastPos<0>()[1] = key.get(1) * it.getSpacing(1);
+		vd.template getLastPos<0>()[2] = key.get(2) * it.getSpacing(2);
+
+		++it;
+	}
+
+	//
+	// ### WIKI 5 ###
+	//
+	// we synchronize the ghost, the scalar property, the vector, and the rank 2 tensor
+	// (just for fun)
+	vd.ghost_get<0,1,2>();
+
+	//
+	// ### WIKI 6 ###
+	//
+	// If the particle does not move, or does not move that much we can create a verlet list
+	// for each particle, it internally use CellList to find the neighborhood but it is still
+	// an expensive operation
+	//
+	float r_cut = 1.0/(128-2);
+	auto NN = vd.getCellList(r_cut);
+
+	auto it2 = vd.getDomainIterator();
+
+	while (it2.isNext())
+	{
+		auto p = it2.get();
+
+		Point<3,float> xp = vd.getPos<0>(p);
+
+		auto Np = NN.getIterator(NN.getCell(vd.getPos<0>(p)));
+
+		while (Np.isNext())
+		{
+			auto q = Np.get();
+
+			// repulsive
+
+			Point<3,float> xq = vd.getPos<0>(q);
+			Point<3,float> f = (xp - xq);
+
+			// we sum the distance of all the particles
+			vd.template getProp<0>(p) += f.norm();;
+
+			// we sum the distance of all the particles
+			vd.template getProp<1>(p)[0] += f.get(0);
+			vd.template getProp<1>(p)[1] += f.get(0);
+			vd.template getProp<1>(p.getKey())[2] += f.get(0);
+
+			vd.template getProp<2>(p)[0][0] += xp.get(0) - xq.get(0);
+			vd.template getProp<2>(p)[0][1] += xp.get(0) - xq.get(1);
+			vd.template getProp<2>(p)[0][2] += xp.get(0) - xq.get(2);
+			vd.template getProp<2>(p)[1][0] += xp.get(1) - xq.get(0);
+			vd.template getProp<2>(p)[1][1] += xp.get(1) - xq.get(1);
+			vd.template getProp<2>(p)[1][2] += xp.get(1) - xq.get(2);
+			vd.template getProp<2>(p)[2][0] += xp.get(2) - xq.get(0);
+			vd.template getProp<2>(p)[2][1] += xp.get(2) - xq.get(1);
+			vd.template getProp<2>(p)[2][2] += xp.get(2) - xq.get(2);
+		}
+
+		++it2;
+	}
+
+	//
+	// ### WIKI 10 ###
+	//
+	// Deinitialize the library
+	//
+	delete_global_v_cluster();
+}
+
+
+
+
diff --git a/example/Vector/1_verlet/config.cfg b/example/Vector/1_verlet/config.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..1eecbac3577c765edca7f90cf5f61cfb6b9f4880
--- /dev/null
+++ b/example/Vector/1_verlet/config.cfg
@@ -0,0 +1,2 @@
+[pack]
+files = main.cpp Makefile
diff --git a/example/Vector/1_verlet/main.cpp b/example/Vector/1_verlet/main.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..d65993e360b4008207b8648b3a62ad7f78f4c754
--- /dev/null
+++ b/example/Vector/1_verlet/main.cpp
@@ -0,0 +1,179 @@
+#include "Vector/vector_dist.hpp"
+#include "Decomposition/CartDecomposition.hpp"
+#include "data_type/aggregate.hpp"
+
+/*
+ * ### WIKI 1 ###
+ *
+ * ## Simple example
+ *
+ * This example show cell and verlet list of the distributed vector
+ *
+ * ### WIKI END ###
+ *
+ */
+
+int main(int argc, char* argv[])
+{
+	//
+	// ### WIKI 2 ###
+	//
+	// Here we Initialize the library, we create a Box that define our domain, boundary conditions, ghost
+	// and the grid size
+	//
+	init_global_v_cluster(&argc,&argv);
+	Vcluster & v_cl = *global_v_cluster;
+
+	// we create a 128x128x128 Grid iterator
+	size_t sz[3] = {128,128,128};
+
+	Box<3,float> box({0.0,0.0,0.0},{1.0,1.0,1.0});
+
+	// Boundary conditions
+	size_t bc[3]={PERIODIC,PERIODIC,PERIODIC};
+
+	// ghost, big enough to contain the interaction radius
+	Ghost<3,float> ghost(1.0/(128-2));
+
+	//
+	// ### WIKI 3 ###
+	//
+	// Here we define a distributed vector in 3D, containing 3 properties, a
+	// scalar double, a vector double[3], and a tensor or rank 2 double[3][3].
+	// In this case the vector contain 0 particles in total
+	//
+	vector_dist<3,float, aggregate<double,double[3],double[3][3]>, CartDecomposition<3,float> > vd(0,box,bc,ghost);
+
+	//
+	// ### WIKI 4 ###
+	//
+	// We define a grid iterator, to create particles on a grid like way.
+	// An important note is that the grid iterator, iterator only on the
+	// local nodes for each processor for example suppose to have a domain like
+	// the one in figure
+	//
+	//   +---------+
+	//   |* * *|* *|
+	//   |  2  |   |
+	//   |* * *|* *|
+	//   |   ---   |
+	//   |* *|* * *|
+	//   |   |     |
+	//   |* *|* * *|
+	//   |   |  1  |
+	//   |* *|* * *|
+	//   +---------+
+	//
+	// divided in 2 processors, the processor 1 will iterate only on the points
+	// inside the portion of space marked with one. A note grid iterator follow the
+	// boundary condition specified in vector. For a perdiodic 2D 5x5 grid we have
+	//
+	//   +---------+
+	//   * * * * * |
+	//   |         |
+	//   * * * * * |
+	//   |         |
+	//   * * * * * |
+	//   |         |
+	//   * * * * * |
+	//   |         |
+	//   *-*-*-*-*-+
+	//
+	// Because the right border is equivalent to the left border, while for a non periodic we have the
+	// following distribution of points
+	//
+	//   *-*-*-*-*
+	//   |       |
+	//   * * * * *
+	//   |       |
+	//   * * * * *
+	//   |       |
+	//   * * * * *
+	//   |       |
+	//   *-*-*-*-*
+	//
+	// So in this loop each processor will place particles on a grid
+	//
+	auto it = vd.getGridIterator(sz);
+
+	while (it.isNext())
+	{
+		vd.add();
+
+		auto key = it.get();
+
+		vd.template getLastPos<0>()[0] = key.get(0) * it.getSpacing(0);
+		vd.template getLastPos<0>()[1] = key.get(1) * it.getSpacing(1);
+		vd.template getLastPos<0>()[2] = key.get(2) * it.getSpacing(2);
+
+		++it;
+	}
+
+	//
+	// ### WIKI 5 ###
+	//
+	// we synchronize the ghost, the scalar property, the vector, and the rank 2 tensor
+	// (just for fun)
+	vd.ghost_get<0,1,2>();
+
+	//
+	// ### WIKI 6 ###
+	//
+	// If the particle does not move, or does not move that much we can create a verlet list
+	// for each particle, it internally use CellList to find the neighborhood but it is still
+	// an expensive operation
+	//
+	openfpm::vector<openfpm::vector<size_t>> verlet;
+
+	// cutting radius
+	float r_cut = 1.0/(128-2);
+	vd.getVerlet(verlet,r_cut);
+
+	//
+	// ### WIKI 7 ###
+	//
+	// for each particle we iterate across the neighborhoods particles and we
+	// do some demo calculation
+	//
+	for (size_t i = 0 ; i < verlet.size() ; i++)
+	{
+
+		Point<3,float> p = vd.getPos<0>(i);
+
+		// for each neighborhood particle
+		for (size_t j = 0 ; j < verlet.get(i).size() ; j++)
+		{
+			auto & NN = verlet.get(i);
+
+			Point<3,float> q = vd.getPos<0>(NN.get(j));
+
+			// some non-sense calculation as usage demo
+
+			// we sum the distance of all the particles
+			vd.template getProp<0>(i) += p.distance(q);
+
+			// we sum the distance of all the particles
+			vd.template getProp<1>(i)[0] += p.get(0) - q.get(0);
+			vd.template getProp<1>(i)[1] += p.get(0) - q.get(0);
+			vd.template getProp<1>(i)[2] += p.get(0) - q.get(0);
+
+			vd.template getProp<2>(i)[0][0] += p.get(0) - q.get(0);
+			vd.template getProp<2>(i)[0][1] += p.get(0) - q.get(1);
+			vd.template getProp<2>(i)[0][2] += p.get(0) - q.get(2);
+			vd.template getProp<2>(i)[1][0] += p.get(1) - q.get(0);
+			vd.template getProp<2>(i)[1][1] += p.get(1) - q.get(1);
+			vd.template getProp<2>(i)[1][2] += p.get(1) - q.get(2);
+			vd.template getProp<2>(i)[2][0] += p.get(2) - q.get(0);
+			vd.template getProp<2>(i)[2][1] += p.get(2) - q.get(1);
+			vd.template getProp<2>(i)[2][2] += p.get(2) - q.get(2);
+		}
+	}
+
+	//
+	// ### WIKI 8 ###
+	//
+	// Deinitialize the library
+	//
+	delete_global_v_cluster();
+}
+
diff --git a/install b/install
index fdd8c599897303bab53ce407d63d4c3b23fd3deb..6bcb17830b7ea7db22b01423104e6dd10b1d2fab 100755
--- a/install
+++ b/install
@@ -4,6 +4,7 @@ source script/help
 source script/discover_os
 source script/show_solutions
 source script/pre_req
+source script/remove_old
 
 ##
 
@@ -39,6 +40,14 @@ done
 
 discover_os
 
+## Remove old dependencies and installations
+
+if [ x"$i_dir" == x" " ]; then
+ i_dir="$HOME"
+fi
+
+remove_old $i_dir
+
 ## Check and try to install the prerequisites
 
 pre_req
@@ -63,7 +72,6 @@ cd openfpm_data
 if [ ! -d src/config  ]; then
   mkdir src/config
 fi
-git checkout develop
 cd ..
 
 cd openfpm_devices
@@ -84,6 +92,12 @@ if [ ! -d src/config  ]; then
 fi
 cd ..
 
+cd openfpm_numerics
+if [ ! -d src/config  ]; then
+  mkdir src/config
+fi
+cd ..
+
 # Create config directory
 
 if [ ! -d src/config ]; then
@@ -100,14 +114,11 @@ fi
 
 ## check for options
 
-if [ x"$i_dir" == x" " ]; then
- i_dir="$HOME"
-fi
-
 echo -e "\033[1;34;5mDEPENCENCIES INSTALLATION DIR      \033[0m"
 echo -e "Every required dependencies if needed will be installed into: \033[1;34;5m$i_dir\033[0m"
 echo -e "if you want to install somewhere else do ./install -i /somewhere/else"
 if [ $sq -eq 0 ]; then
+  unset commands
   commands[0]="Continue"
   possible_solutions "${commands[@]}"
 fi
@@ -123,38 +134,18 @@ echo -e "Installing requirements into: $i_dir "
 MPI_installed=0
 METIS_installed=0
 BOOST_installed=0
+LAPACK_installed=0
+SUITESPARSE_installed=0
+EIGEN_installed=0
+blas_options=""
 conf_err=1
 
-## MPI
-
-command -v mpic++ >/dev/null 2>&1
-if [ $? -eq 0 ]; then
-  options="CXX=mpic++"
-fi
-
-## if a new compiler has been installed reinstall all the dependencies
-
-if [ x"$compiler_opt" != x"" ]; then
-  ./script/install_MPI.sh $i_dir $compiler_opt
-  export PATH="$PATH:$i_dir/MPI/bin"
-  configure_options="$configure_options CXX=mpic++ "
-  MPI_installed=1
-  ./script/install_BOOST.sh $i_dir $compiler_opt
-  configure_options=" $configure_options --with-boost=$i_dir/BOOST "
-  BOOST_installed=1
-  ./script/install_METIS.sh $i_dir $compiler_gcc $compiler_gpp
-  configure_options=" $configure_options --with-metis=$i_dir/METIS "
-  METIS_installed=1
-fi
-
-echo "./configure $options $configure_options" 
-
 if [ $install_req -eq 0 ]; then
-    ./configure $options $configure_options
+    ./configure $options $configure_options "$blas_options"
 else
     while [ $conf_err -ne 0 ]
     do
-        ./configure $options $configure_options
+        ./configure $options $configure_options "$blas_options"
         conf_err=$?
 
 	echo "Configure script terminated with $conf_err"
@@ -180,7 +171,7 @@ else
             METIS_installed=1
             configure_options=" $configure_options --with-metis=$i_dir/METIS "
         elif [ $conf_err -eq 202 ]; then
-            echo "Boost not found try to install"
+            echo "Boost not found try to install in $i_dir with $compiler_opt"
             if [ $BOOST_installed -eq 1  ]; then
                 echo "Error the installation of Boost failed"
                 exit 1
@@ -188,6 +179,40 @@ else
             ./script/install_BOOST.sh $i_dir $compiler_opt
             BOOST_installed=1
             configure_options=" $configure_options --with-boost=$i_dir/BOOST "
+        elif [ $conf_err -eq 204 ]; then
+            echo "Lapack not found try to install"
+            if [ $LAPACK_installed -eq 1  ]; then
+                echo "Error the installation of LAPACK failed"
+                exit 1
+            fi
+            ./script/install_OPENBLAS.sh $i_dir $compiler_opt
+            LAPACK_installed=1
+            blas_options="--with-blas=-L$i_dir/OPENBLAS/lib/ -lopenblas"
+        elif [ $conf_err -eq 205 ]; then
+            ## First we install openblas, we ignore default lapack
+            ## this SuiteSparse is programmed to use libopenblas
+            ./script/install_OPENBLAS.sh $i_dir $compiler_opt
+            LAPACK_installed=1
+            blas_options="--with-blas=-L$i_dir/OPENBLAS/lib/ -lopenblas"
+        
+            ## Then suite sparse
+            echo "SuiteSparse not found try to install"
+            if [ $SUITESPARSE_installed -eq 1  ]; then
+                echo "Error the installation of SuiteSparse failed"
+                exit 1
+            fi
+            ./script/install_SUITESPARSE.sh $i_dir $compiler_opt
+            configure_options=" $configure_options --with-suitesparse=$i_dir/SUITESPARSE "
+            SUITESPARSE_installed=1
+        elif [ $conf_err -eq 206 ]; then
+            echo "Eigen not found try to install"
+            if [ $EIGEN_installed -eq 1  ]; then
+                echo "Error the installation of Eigen failed"
+                exit 1
+            fi
+            ./script/install_EIGEN.sh $i_dir $compiler_opt
+            configure_options=" $configure_options --with-eigen=$i_dir/EIGEN "
+            EIGEN_installed=1
         elif [ $conf_err -ne 0 ]; then
             echo "I do not know how to recover from this error"
             exit 1
@@ -197,9 +222,10 @@ fi
 
 ### Create example.mk
 install_base=$(cat install_dir)
-echo "INCLUDE_PATH=-I. -I$install_base/openfpm_pdata/include/config -I$install_base/openfpm_pdata/include -I$install_base/openfpm_data/include -I$install_base/openfpm_vcluster/include -I$install_base/openfpm_io/include -I$install_base/openfpm_devices/include -I$i_dir/METIS/include -I$i_dir/BOOST/include" > example.mk
-echo "LIBS_PATH=-L$install_base/openfpm_devices/lib -L$install_base/openfpm_vcluster/lib -L$i_dir/METIS/lib -L$i_dir/BOOST/lib " >> example.mk
-echo "LIBS=-lvcluster -lofpmmemory -lmetis -lboost_iostreams" >> example.mk
+echo "INCLUDE_PATH=-I. -I$install_base/openfpm_numerics/include -I$install_base/openfpm_pdata/include/config -I$install_base/openfpm_pdata/include -I$install_base/openfpm_data/include -I$install_base/openfpm_vcluster/include -I$install_base/openfpm_io/include -I$install_base/openfpm_devices/include -I$i_dir/METIS/include -I$i_dir/BOOST/include" > example.mk
+echo "LIBS_PATH= -L$install_base/openfpm_devices/lib -L$install_base/openfpm_pdata/lib  -L$install_base/openfpm_vcluster/lib -L$i_dir/METIS/lib -L$i_dir/BOOST/lib " >> example.mk
+echo "LIBS=-lvcluster -lofpm_pdata -lofpmmemory -lmetis -lboost_iostreams" >> example.mk
+echo "LIBS_SE2=-lvcluster -lofpmmemory_se2 -lmetis -lboost_iostreams" >> example.mk
 cp example.mk src/example.mk
 cp example.mk example/example.mk
 
@@ -220,7 +246,7 @@ fi
 
 echo "Command used to configure"
 echo ""
-echo -e "\033[1m ./configure $options $configure_options \033[0m "
+echo -e "\033[1m ./configure $options $configure_options "$blas_options" \033[0m "
 echo ""
 if [ $MPI_installed -eq 1 ]; then
   echo -e "\033[1;34;5m ---------------------------------------  \033[0m"
@@ -228,7 +254,7 @@ if [ $MPI_installed -eq 1 ]; then
   echo -e "  MPI has been installed into: \033[1m $i_dir/MPI \033[0m"
   echo ""
   if [ x"$platform" = x"linux" ]; then
-    echo -e "\033[1m  export PATH=\"\$PATH:$i_dir/MPI/bin \" \033[0m "
+    echo -e "\033[1m  export PATH=\"\$PATH:$i_dir/MPI/bin\" \033[0m "
     echo -e "\033[1m  export LD_LIBRARY_PATH=\"\$LD_LIBRARY_PATH:$i_dir/MPI/lib\" \033[0m "
   else
     echo -e "\033[1m  export PATH=\"\$PATH:$i_dir/MPI/bin \" \033[0m "
@@ -261,6 +287,30 @@ if [ $BOOST_installed -eq 1 ]; then
     echo -e "\033[1m  export DYLD_LIBRARY_PATH=\"\$DYLD_LIBRARY_PATH:$i_dir/BOOST/lib\" \033[0m"
   fi
 fi
+if [ $LAPACK_installed -eq 1 ]; then
+  echo ""
+  echo -e "\033[1;34;5m ---------------------------------------  \033[0m"
+  echo -e "\033[1;34;5m --------------- OPENBLAS -------------- \033[0m"
+  echo -e "  OPENBLAS has been installed into: \033[1m $i_dir/OPENBLAS \033[0m"
+  echo ""
+  if [ x"$platform" = x"linux" ]; then
+    echo -e "\033[1m  export LD_LIBRARY_PATH=\"\$LD_LIBRARY_PATH:$i_dir/OPENBLAS/lib\" \033[0m "
+  else
+    echo -e "\033[1m  export DYLD_LIBRARY_PATH=\"\$DYLD_LIBRARY_PATH:$i_dir/OPENBLAS/lib\" \033[0m"
+  fi
+fi
+if [ $SUITESPARSE_installed -eq 1 ]; then
+  echo ""
+  echo -e "\033[1;34;5m ---------------------------------------  \033[0m"
+  echo -e "\033[1;34;5m ------------- SUITESPARSE ------------- \033[0m"
+  echo -e "  SUITESPARSE has been installed into: \033[1m $i_dir/SUITESPARSE \033[0m"
+  echo ""
+  if [ x"$platform" = x"linux" ]; then
+    echo -e "\033[1m  export LD_LIBRARY_PATH=\"\$LD_LIBRARY_PATH:$i_dir/SUITESPARSE/lib\" \033[0m "
+  else
+    echo -e "\033[1m  export DYLD_LIBRARY_PATH=\"\$DYLD_LIBRARY_PATH:$i_dir/SUITESPARSE/lib\" \033[0m"
+  fi
+fi
 echo ""
 echo ""
 if [ $conf_err -ne 0 ]; then
diff --git a/m4/ax_blas.m4 b/m4/ax_blas.m4
new file mode 100644
index 0000000000000000000000000000000000000000..87e7ae039365ba6b667f845c38a3f8af1e24539c
--- /dev/null
+++ b/m4/ax_blas.m4
@@ -0,0 +1,239 @@
+# ===========================================================================
+#          http://www.gnu.org/software/autoconf-archive/ax_blas.html
+# ===========================================================================
+#
+# SYNOPSIS
+#
+#   AX_BLAS([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]])
+#
+# DESCRIPTION
+#
+#   This macro looks for a library that implements the BLAS linear-algebra
+#   interface (see http://www.netlib.org/blas/). On success, it sets the
+#   BLAS_LIBS output variable to hold the requisite library linkages.
+#
+#   To link with BLAS, you should link with:
+#
+#     $BLAS_LIBS $LIBS $FLIBS
+#
+#   in that order. FLIBS is the output variable of the
+#   AC_F77_LIBRARY_LDFLAGS macro (called if necessary by AX_BLAS), and is
+#   sometimes necessary in order to link with F77 libraries. Users will also
+#   need to use AC_F77_DUMMY_MAIN (see the autoconf manual), for the same
+#   reason.
+#
+#   Many libraries are searched for, from ATLAS to CXML to ESSL. The user
+#   may also use --with-blas=<lib> in order to use some specific BLAS
+#   library <lib>. In order to link successfully, however, be aware that you
+#   will probably need to use the same Fortran compiler (which can be set
+#   via the F77 env. var.) as was used to compile the BLAS library.
+#
+#   ACTION-IF-FOUND is a list of shell commands to run if a BLAS library is
+#   found, and ACTION-IF-NOT-FOUND is a list of commands to run it if it is
+#   not found. If ACTION-IF-FOUND is not specified, the default action will
+#   define HAVE_BLAS.
+#
+# LICENSE
+#
+#   Copyright (c) 2008 Steven G. Johnson <stevenj@alum.mit.edu>
+#
+#   This program is free software: you can redistribute it and/or modify it
+#   under the terms of the GNU General Public License as published by the
+#   Free Software Foundation, either version 3 of the License, or (at your
+#   option) any later version.
+#
+#   This program is distributed in the hope that it will be useful, but
+#   WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+#   Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License along
+#   with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+#   As a special exception, the respective Autoconf Macro's copyright owner
+#   gives unlimited permission to copy, distribute and modify the configure
+#   scripts that are the output of Autoconf when processing the Macro. You
+#   need not follow the terms of the GNU General Public License when using
+#   or distributing such scripts, even though portions of the text of the
+#   Macro appear in them. The GNU General Public License (GPL) does govern
+#   all other use of the material that constitutes the Autoconf Macro.
+#
+#   This special exception to the GPL applies to versions of the Autoconf
+#   Macro released by the Autoconf Archive. When you make and distribute a
+#   modified version of the Autoconf Macro, you may extend this special
+#   exception to the GPL to apply to your modified version as well.
+
+#serial 14
+
+AU_ALIAS([ACX_BLAS], [AX_BLAS])
+AC_DEFUN([AX_BLAS], [
+AC_PREREQ(2.50)
+AC_REQUIRE([AC_F77_LIBRARY_LDFLAGS])
+AC_REQUIRE([AC_CANONICAL_HOST])
+ax_blas_ok=no
+
+AC_ARG_WITH(blas,
+	[AS_HELP_STRING([--with-blas=<lib>], [use BLAS library <lib>])])
+case $with_blas in
+	yes | "") ;;
+	no) ax_blas_ok=disable ;;
+	-* | */* | *.a | *.so | *.so.* | *.o) BLAS_LIBS="$with_blas" ;;
+	*) BLAS_LIBS="-l$with_blas" ;;
+esac
+
+# Get fortran linker names of BLAS functions to check for.
+AC_F77_FUNC(sgemm)
+AC_F77_FUNC(dgemm)
+
+ax_blas_save_LIBS="$LIBS"
+LIBS="$LIBS $FLIBS"
+
+# First, check BLAS_LIBS environment variable
+if test $ax_blas_ok = no; then
+if test "x$BLAS_LIBS" != x; then
+	save_LIBS="$LIBS"; LIBS="$BLAS_LIBS $LIBS"
+	AC_MSG_CHECKING([for $sgemm in $BLAS_LIBS])
+	AC_TRY_LINK_FUNC($sgemm, [ax_blas_ok=yes], [BLAS_LIBS=""])
+	AC_MSG_RESULT($ax_blas_ok)
+	LIBS="$save_LIBS"
+fi
+fi
+
+# BLAS linked to by default?  (happens on some supercomputers)
+if test $ax_blas_ok = no; then
+	save_LIBS="$LIBS"; LIBS="$LIBS"
+	AC_MSG_CHECKING([if $sgemm is being linked in already])
+	AC_TRY_LINK_FUNC($sgemm, [ax_blas_ok=yes])
+	AC_MSG_RESULT($ax_blas_ok)
+	LIBS="$save_LIBS"
+fi
+
+# BLAS in OpenBLAS library? (http://xianyi.github.com/OpenBLAS/)
+if test $ax_blas_ok = no; then
+	AC_CHECK_LIB(openblas, $sgemm, [ax_blas_ok=yes
+			                BLAS_LIBS="-lopenblas"])
+fi
+
+# BLAS in ATLAS library? (http://math-atlas.sourceforge.net/)
+if test $ax_blas_ok = no; then
+	AC_CHECK_LIB(atlas, ATL_xerbla,
+		[AC_CHECK_LIB(f77blas, $sgemm,
+		[AC_CHECK_LIB(cblas, cblas_dgemm,
+			[ax_blas_ok=yes
+			 BLAS_LIBS="-lcblas -lf77blas -latlas"],
+			[], [-lf77blas -latlas])],
+			[], [-latlas])])
+fi
+
+# BLAS in PhiPACK libraries? (requires generic BLAS lib, too)
+if test $ax_blas_ok = no; then
+	AC_CHECK_LIB(blas, $sgemm,
+		[AC_CHECK_LIB(dgemm, $dgemm,
+		[AC_CHECK_LIB(sgemm, $sgemm,
+			[ax_blas_ok=yes; BLAS_LIBS="-lsgemm -ldgemm -lblas"],
+			[], [-lblas])],
+			[], [-lblas])])
+fi
+
+# BLAS in Intel MKL library?
+if test $ax_blas_ok = no; then
+	# MKL for gfortran
+	if test x"$ac_cv_fc_compiler_gnu" = xyes; then
+		# 64 bit
+		if test $host_cpu = x86_64; then
+			AC_CHECK_LIB(mkl_gf_lp64, $sgemm,
+			[ax_blas_ok=yes;BLAS_LIBS="-lmkl_gf_lp64 -lmkl_sequential -lmkl_core -lpthread"],,
+			[-lmkl_gf_lp64 -lmkl_sequential -lmkl_core -lpthread])
+		# 32 bit
+		elif test $host_cpu = i686; then
+			AC_CHECK_LIB(mkl_gf, $sgemm,
+				[ax_blas_ok=yes;BLAS_LIBS="-lmkl_gf -lmkl_sequential -lmkl_core -lpthread"],,
+				[-lmkl_gf -lmkl_sequential -lmkl_core -lpthread])
+		fi
+	# MKL for other compilers (Intel, PGI, ...?)
+	else
+		# 64-bit
+		if test $host_cpu = x86_64; then
+			AC_CHECK_LIB(mkl_intel_lp64, $sgemm,
+				[ax_blas_ok=yes;BLAS_LIBS="-lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread"],,
+				[-lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread])
+		# 32-bit
+		elif test $host_cpu = i686; then
+			AC_CHECK_LIB(mkl_intel, $sgemm,
+				[ax_blas_ok=yes;BLAS_LIBS="-lmkl_intel -lmkl_sequential -lmkl_core -lpthread"],,
+				[-lmkl_intel -lmkl_sequential -lmkl_core -lpthread])
+		fi
+	fi
+fi
+# Old versions of MKL
+if test $ax_blas_ok = no; then
+	AC_CHECK_LIB(mkl, $sgemm, [ax_blas_ok=yes;BLAS_LIBS="-lmkl -lguide -lpthread"],,[-lguide -lpthread])
+fi
+
+# BLAS in Apple vecLib library?
+if test $ax_blas_ok = no; then
+	save_LIBS="$LIBS"; LIBS="-framework vecLib $LIBS"
+	AC_MSG_CHECKING([for $sgemm in -framework vecLib])
+	AC_TRY_LINK_FUNC($sgemm, [ax_blas_ok=yes;BLAS_LIBS="-framework vecLib"])
+	AC_MSG_RESULT($ax_blas_ok)
+	LIBS="$save_LIBS"
+fi
+
+# BLAS in Alpha CXML library?
+if test $ax_blas_ok = no; then
+	AC_CHECK_LIB(cxml, $sgemm, [ax_blas_ok=yes;BLAS_LIBS="-lcxml"])
+fi
+
+# BLAS in Alpha DXML library? (now called CXML, see above)
+if test $ax_blas_ok = no; then
+	AC_CHECK_LIB(dxml, $sgemm, [ax_blas_ok=yes;BLAS_LIBS="-ldxml"])
+fi
+
+# BLAS in Sun Performance library?
+if test $ax_blas_ok = no; then
+	if test "x$GCC" != xyes; then # only works with Sun CC
+		AC_CHECK_LIB(sunmath, acosp,
+			[AC_CHECK_LIB(sunperf, $sgemm,
+				[BLAS_LIBS="-xlic_lib=sunperf -lsunmath"
+                                 ax_blas_ok=yes],[],[-lsunmath])])
+	fi
+fi
+
+# BLAS in SCSL library?  (SGI/Cray Scientific Library)
+if test $ax_blas_ok = no; then
+	AC_CHECK_LIB(scs, $sgemm, [ax_blas_ok=yes; BLAS_LIBS="-lscs"])
+fi
+
+# BLAS in SGIMATH library?
+if test $ax_blas_ok = no; then
+	AC_CHECK_LIB(complib.sgimath, $sgemm,
+		     [ax_blas_ok=yes; BLAS_LIBS="-lcomplib.sgimath"])
+fi
+
+# BLAS in IBM ESSL library? (requires generic BLAS lib, too)
+if test $ax_blas_ok = no; then
+	AC_CHECK_LIB(blas, $sgemm,
+		[AC_CHECK_LIB(essl, $sgemm,
+			[ax_blas_ok=yes; BLAS_LIBS="-lessl -lblas"],
+			[], [-lblas $FLIBS])])
+fi
+
+# Generic BLAS library?
+if test $ax_blas_ok = no; then
+	AC_CHECK_LIB(blas, $sgemm, [ax_blas_ok=yes; BLAS_LIBS="-lblas"])
+fi
+
+AC_SUBST(BLAS_LIBS)
+
+LIBS="$ax_blas_save_LIBS"
+
+# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND:
+if test x"$ax_blas_ok" = xyes; then
+        ifelse([$1],,AC_DEFINE(HAVE_BLAS,1,[Define if you have a BLAS library.]),[$1])
+        :
+else
+        ax_blas_ok=no
+        $2
+fi
+])dnl AX_BLAS
+
diff --git a/m4/ax_boost.m4 b/m4/ax_boost.m4
deleted file mode 100644
index 7ce27b63a64331de19612d79b1fdec5ac6ff90e0..0000000000000000000000000000000000000000
--- a/m4/ax_boost.m4
+++ /dev/null
@@ -1,523 +0,0 @@
-# ===========================================================================
-#                http://autoconf-archive.cryp.to/ax_boost.html
-# ===========================================================================
-#
-# OBSOLETE MACRO
-#
-#   Use AX_BOOST_BASE in combination with library-specific macros.
-#
-# SYNOPSIS
-#
-#   AX_BOOST([MINIMUM-VERSION], [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND])
-#
-# DESCRIPTION
-#
-#   Test for the Boost C++ libraries of a particular version (or newer)
-#
-#   If no path to the installed boost library is given the macro searchs
-#   under /usr, /usr/local, and /opt, and evaluates the $BOOST_ROOT
-#   environment variable. Further documentation is available at
-#   <http://randspringer.de/boost/index.html>.
-#
-#   This macro calls:
-#
-#     AC_SUBST(BOOST_CPPFLAGS) / AC_SUBST(BOOST_LDFLAGS)
-#     AC_SUBST(BOOST_FILESYSTEM_LIB)
-#     AC_SUBST(BOOST_PROGRAM_OPTIONS_LIB)
-#     AC_SUBST(BOOST_THREAD_LIB)
-#     AC_SUBST(BOOST_IOSTREAMS_LIB)
-#     AC_SUBST(BOOST_SERIALIZATION_LIB)
-#     AC_SUBST(BOOST_WSERIALIZATION_LIB)
-#     AC_SUBST(BOOST_SIGNALS_LIB)
-#     AC_SUBST(BOOST_DATE_TIME_LIB)
-#     AC_SUBST(BOOST_REGEX_LIB)
-#     AC_SUBST(BOOST_UNIT_TEST_FRAMEWORK_LIB)
-#
-#   And sets:
-#
-#     HAVE_BOOST
-#     HAVE_BOOST_FILESYSTEM
-#     HAVE_BOOST_PROGRAM_OPTIONS
-#     HAVE_BOOST_THREAD
-#     HAVE_BOOST_IOSTREAMS
-#     HAVE_BOOST_SERIALIZATION
-#     HAVE_BOOST_SIGNALS
-#     HAVE_BOOST_DATE_TIME
-#     HAVE_BOOST_REGEX
-#     HAVE_BOOST_UNIT_TEST_FRAMEWORK
-#
-# LAST MODIFICATION
-#
-#   2008-04-12
-#
-# COPYLEFT
-#
-#   Copyright (c) 2008 Thomas Porschberg <thomas@randspringer.de>
-#
-#   Copying and distribution of this file, with or without modification, are
-#   permitted in any medium without royalty provided the copyright notice
-#   and this notice are preserved.
-
-AC_DEFUN([AX_BOOST],
-[
-    AC_ARG_WITH([boost],
-                AS_HELP_STRING([--with-boost=DIR],
-                [use boost (default is yes) specify the root directory for boost library (optional)]),
-                [
-                if test "$withval" = "no"; then
-		            want_boost="no"
-                elif test "$withval" = "yes"; then
-                    want_boost="yes"
-                    ac_boost_path=""
-                else
-			        want_boost="yes"
-            		ac_boost_path="$withval"
-		        fi
-            	],
-                [want_boost="yes"])
-
-    AC_CANONICAL_BUILD
-	if test "x$want_boost" = "xyes"; then
-        AC_REQUIRE([AC_PROG_CC])
-		boost_lib_version_req=ifelse([$1], ,1.20.0,$1)
-		boost_lib_version_req_shorten=`expr $boost_lib_version_req : '\([[0-9]]*\.[[0-9]]*\)'`
-		boost_lib_version_req_major=`expr $boost_lib_version_req : '\([[0-9]]*\)'`
-		boost_lib_version_req_minor=`expr $boost_lib_version_req : '[[0-9]]*\.\([[0-9]]*\)'`
-		boost_lib_version_req_sub_minor=`expr $boost_lib_version_req : '[[0-9]]*\.[[0-9]]*\.\([[0-9]]*\)'`
-		if test "x$boost_lib_version_req_sub_minor" = "x" ; then
-			boost_lib_version_req_sub_minor="0"
-    	fi
-		WANT_BOOST_VERSION=`expr $boost_lib_version_req_major \* 100000 \+  $boost_lib_version_req_minor \* 100 \+ $boost_lib_version_req_sub_minor`
-		AC_MSG_CHECKING(for boostlib >= $boost_lib_version_req)
-		succeeded=no
-
-		dnl first we check the system location for boost libraries
-		dnl this location ist chosen if boost libraries are installed with the --layout=system option
-		dnl or if you install boost with RPM
-		if test "$ac_boost_path" != ""; then
-			BOOST_LDFLAGS="-L$ac_boost_path/lib"
-			BOOST_CPPFLAGS="-I$ac_boost_path/include"
-		else
-			for ac_boost_path_tmp in /usr /usr/local /opt ; do
-				if test -d "$ac_boost_path_tmp/include/boost" && test -r "$ac_boost_path_tmp/include/boost"; then
-					BOOST_LDFLAGS="-L$ac_boost_path_tmp/lib"
-					BOOST_CPPFLAGS="-I$ac_boost_path_tmp/include"
-					break;
-				fi
-			done
-		fi
-
-		CPPFLAGS_SAVED="$CPPFLAGS"
-		CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS"
-		export CPPFLAGS
-
-		LDFLAGS_SAVED="$LDFLAGS"
-		LDFLAGS="$LDFLAGS $BOOST_LDFLAGS"
-		export LDFLAGS
-
-	AC_LANG_PUSH(C++)
-     	AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
-@%:@include <boost/version.hpp>
-]],
-       [[
-#if BOOST_VERSION >= $WANT_BOOST_VERSION
-// Everything is okay
-#else
-#  error Boost version is too old
-#endif
-
-		]])],
-    	[
-         AC_MSG_RESULT(yes)
-		 succeeded=yes
-		 found_system=yes
-         ifelse([$2], , :, [$2])
-       ],
-       [
-       ])
-       AC_LANG_POP([C++])
-		dnl if we found no boost with system layout we search for boost libraries
-		dnl built and installed without the --layout=system option or for a staged(not installed) version
-		if test "x$succeeded" != "xyes"; then
-			_version=0
-			if test "$ac_boost_path" != ""; then
-                BOOST_LDFLAGS="-L$ac_boost_path/lib"
-				if test -d "$ac_boost_path" && test -r "$ac_boost_path"; then
-					for i in `ls -d $ac_boost_path/include/boost-* 2>/dev/null`; do
-						_version_tmp=`echo $i | sed "s#$ac_boost_path##" | sed 's/\/include\/boost-//' | sed 's/_/./'`
-						V_CHECK=`expr $_version_tmp \> $_version`
-						if test "$V_CHECK" = "1" ; then
-							_version=$_version_tmp
-						fi
-						VERSION_UNDERSCORE=`echo $_version | sed 's/\./_/'`
-						BOOST_CPPFLAGS="-I$ac_boost_path/include/boost-$VERSION_UNDERSCORE"
-					done
-				fi
-			else
-				for ac_boost_path in /usr /usr/local /opt ; do
-					if test -d "$ac_boost_path" && test -r "$ac_boost_path"; then
-						for i in `ls -d $ac_boost_path/include/boost-* 2>/dev/null`; do
-							_version_tmp=`echo $i | sed "s#$ac_boost_path##" | sed 's/\/include\/boost-//' | sed 's/_/./'`
-							V_CHECK=`expr $_version_tmp \> $_version`
-							if test "$V_CHECK" = "1" ; then
-								_version=$_version_tmp
-								best_path=$ac_boost_path
-							fi
-						done
-					fi
-				done
-
-				VERSION_UNDERSCORE=`echo $_version | sed 's/\./_/'`
-				BOOST_CPPFLAGS="-I$best_path/include/boost-$VERSION_UNDERSCORE"
-				BOOST_LDFLAGS="-L$best_path/lib"
-
-	    		if test "x$BOOST_ROOT" != "x"; then
-                    if test -d "$BOOST_ROOT" && test -r "$BOOST_ROOT" && test -d "$BOOST_ROOT/stage/lib" && test -r "$BOOST_ROOT/stage/lib"; then
-						version_dir=`expr //$BOOST_ROOT : '.*/\(.*\)'`
-						stage_version=`echo $version_dir | sed 's/boost_//' | sed 's/_/./g'`
-						stage_version_shorten=`expr $stage_version : '\([[0-9]]*\.[[0-9]]*\)'`
-						V_CHECK=`expr $stage_version_shorten \>\= $_version`
-						if test "$V_CHECK" = "1" ; then
-							AC_MSG_NOTICE(We will use a staged boost library from $BOOST_ROOT)
-							BOOST_CPPFLAGS="-I$BOOST_ROOT"
-							BOOST_LDFLAGS="-L$BOOST_ROOT/stage/lib"
-						fi
-					fi
-	    		fi
-			fi
-
-			CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS"
-			export CPPFLAGS
-			LDFLAGS="$LDFLAGS $BOOST_LDFLAGS"
-			export LDFLAGS
-
-            AC_LANG_PUSH(C++)
-            AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
-@%:@include <boost/version.hpp>
-]],
-       [[
-#if BOOST_VERSION >= $WANT_BOOST_VERSION
-// Everything is okay
-#else
-#  error Boost version is too old
-#endif
-
-		]])],
-    	[
-         AC_MSG_RESULT(yes ($_version))
-		 succeeded=yes
-         ifelse([$2], , :, [$2])
-       ],
-       [
-         AC_MSG_RESULT(no ($_version))
-         ifelse([$3], , :, [$3])
-       ])
-    	AC_LANG_POP([C++])
-		fi
-
-		if test "$succeeded" != "yes" ; then
-			if test "$_version" = "0" ; then
-				AC_MSG_ERROR([[We could not detect the boost libraries (version $boost_lib_version_req_shorten or higher). If you have a staged boost library (still not installed) please specify \$BOOST_ROOT in your environment and do not give a PATH to --with-boost option.  If you are sure you have boost installed, then check your version number looking in <boost/version.hpp>. See http://randspringer.de/boost for more documentation.]])
-			else
-				AC_MSG_ERROR('Your boost libraries seems to old (version $_version).  We need at least $boost_lib_version_shorten')
-			fi
-		else
-			AC_SUBST(BOOST_CPPFLAGS)
-			AC_SUBST(BOOST_LDFLAGS)
-			AC_DEFINE(HAVE_BOOST,,[define if the Boost library is available])
-
-			AC_CACHE_CHECK([whether the Boost::Filesystem library is available],
-						   ax_cv_boost_filesystem,
-						[AC_LANG_PUSH([C++])
-			 AC_COMPILE_IFELSE(AC_LANG_PROGRAM([[@%:@include <boost/filesystem/path.hpp>]],
-                                   [[using namespace boost::filesystem;
-                                   path my_path( "foo/bar/data.txt" );
-                                   return 0;]]),
-            				       ax_cv_boost_filesystem=yes, ax_cv_boost_filesystem=no)
-                                   AC_LANG_POP([C++])
-			])
-			if test "$ax_cv_boost_filesystem" = "yes"; then
-				AC_DEFINE(HAVE_BOOST_FILESYSTEM,,[define if the Boost::FILESYSTEM library is available])
-				BN=boost_filesystem
-				for ax_lib in $BN $BN-$CC $BN-$CC-mt $BN-$CC-mt-s $BN-$CC-s \
-                              lib$BN lib$BN-$CC lib$BN-$CC-mt lib$BN-$CC-mt-s lib$BN-$CC-s \
-                              $BN-mgw $BN-mgw $BN-mgw-mt $BN-mgw-mt-s $BN-mgw-s ; do
-				    AC_CHECK_LIB($ax_lib, exit,
-                                 [BOOST_FILESYSTEM_LIB="-l$ax_lib"; AC_SUBST(BOOST_FILESYSTEM_LIB) link_filesystem="yes"; break],
-                                 [link_filesystem="no"])
-  				done
-				if test "x$link_filesystem" = "xno"; then
-					AC_MSG_NOTICE(Could not link against $ax_lib !)
-				fi
-			fi
-
-			AC_CACHE_CHECK([whether the Boost::Program_Options library is available],
-						   ax_cv_boost_program_options,
-						   [AC_LANG_PUSH([C++])
-			               AC_COMPILE_IFELSE(AC_LANG_PROGRAM([[@%:@include <boost/program_options.hpp>]],
-                                   [[boost::program_options::options_description generic("Generic options");
-                                   return 0;]]),
-                           ax_cv_boost_program_options=yes, ax_cv_boost_program_options=no)
-                           AC_LANG_POP([C++])
-			])
-			if test "$ax_cv_boost_program_options" = yes; then
-				AC_DEFINE(HAVE_BOOST_PROGRAM_OPTIONS,,[define if the Boost::PROGRAM_OPTIONS library is available])
-				BN=boost_program_options
-				for ax_lib in $BN $BN-$CC $BN-$CC-mt $BN-$CC-mt-s $BN-$CC-s \
-                              lib$BN lib$BN-$CC lib$BN-$CC-mt lib$BN-$CC-mt-s lib$BN-$CC-s \
-                              $BN-mgw $BN-mgw $BN-mgw-mt $BN-mgw-mt-s $BN-mgw-s ; do
-				    AC_CHECK_LIB($ax_lib, exit,
-                                 [BOOST_PROGRAM_OPTIONS_LIB="-l$ax_lib"; AC_SUBST(BOOST_PROGRAM_OPTIONS_LIB) link_program_options="yes"; break],
-                                 [link_program_options="no"])
-  				done
-				if test "x$link_program_options="no"" = "xno"; then
-					AC_MSG_NOTICE(Could not link against $ax_lib !)
-				fi
-			fi
-
-			AC_CACHE_CHECK(whether the Boost::Thread library is available,
-						   ax_cv_boost_thread,
-						[AC_LANG_PUSH([C++])
-			 CXXFLAGS_SAVE=$CXXFLAGS
-
-			 if test "x$build_os" = "xsolaris" ; then
-  				 CXXFLAGS="-pthreads $CXXFLAGS"
-			 elif test "x$build_os" = "xming32" ; then
-				 CXXFLAGS="-mthreads $CXXFLAGS"
-			 else
-				CXXFLAGS="-pthread $CXXFLAGS"
-			 fi
-			 AC_COMPILE_IFELSE(AC_LANG_PROGRAM([[@%:@include <boost/thread/thread.hpp>]],
-                                   [[boost::thread_group thrds;
-                                   return 0;]]),
-                   ax_cv_boost_thread=yes, ax_cv_boost_thread=no)
-			 CXXFLAGS=$CXXFLAGS_SAVE
-             AC_LANG_POP([C++])
-			])
-			if test "x$ax_cv_boost_thread" = "xyes"; then
-               if test "x$build_os" = "xsolaris" ; then
- 				  BOOST_CPPFLAGS="-pthreads $BOOST_CPPFLAGS"
-			   elif test "x$build_os" = "xming32" ; then
- 				  BOOST_CPPFLAGS="-mthreads $BOOST_CPPFLAGS"
-			   else
-				  BOOST_CPPFLAGS="-pthread $BOOST_CPPFLAGS"
-			   fi
-
-				AC_SUBST(BOOST_CPPFLAGS)
-				AC_DEFINE(HAVE_BOOST_THREAD,,[define if the Boost::THREAD library is available])
-				BN=boost_thread
-     			LDFLAGS_SAVE=$LDFLAGS
-                        case "x$build_os" in
-                          *bsd* )
-                               LDFLAGS="-pthread $LDFLAGS"
-                          break;
-                          ;;
-                        esac
-
-				for ax_lib in $BN $BN-$CC $BN-$CC-mt $BN-$CC-mt-s $BN-$CC-s \
-                              lib$BN lib$BN-$CC lib$BN-$CC-mt lib$BN-$CC-mt-s lib$BN-$CC-s \
-                              $BN-mgw $BN-mgw $BN-mgw-mt $BN-mgw-mt-s $BN-mgw-s ; do
-				    AC_CHECK_LIB($ax_lib, exit, [BOOST_THREAD_LIB="-l$ax_lib"; AC_SUBST(BOOST_THREAD_LIB) link_thread="yes"; break],
-                                 [link_thread="no"])
-  				done
-				if test "x$link_thread" = "xno"; then
-					AC_MSG_NOTICE(Could not link against $ax_lib !)
-                else
-                    case "x$build_os" in
-                       *bsd* )
-                       BOOST_LDFLAGS="-pthread $BOOST_LDFLAGS"
-                       break;
-                       ;;
-                    esac
-				fi
-			fi
-
-			AC_CACHE_CHECK(whether the Boost::IOStreams library is available,
-						   ax_cv_boost_iostreams,
-						[AC_LANG_PUSH([C++])
-			 AC_COMPILE_IFELSE(AC_LANG_PROGRAM([[@%:@include <boost/iostreams/filtering_stream.hpp>
-												 @%:@include <boost/range/iterator_range.hpp>
-												]],
-                                   [[std::string  input = "Hello World!";
-									 namespace io = boost::iostreams;
-									 io::filtering_istream  in(boost::make_iterator_range(input));
-									 return 0;
-                                   ]]),
-                   ax_cv_boost_iostreams=yes, ax_cv_boost_iostreams=no)
-			 AC_LANG_POP([C++])
-			])
-			if test "x$ax_cv_boost_iostreams" = "xyes"; then
-				AC_DEFINE(HAVE_BOOST_IOSTREAMS,,[define if the Boost::IOStreams library is available])
-				BN=boost_iostreams
-				for ax_lib in $BN $BN-$CC $BN-$CC-mt $BN-$CC-mt-s $BN-$CC-s \
-                              lib$BN lib$BN-$CC lib$BN-$CC-mt lib$BN-$CC-mt-s lib$BN-$CC-s \
-                              $BN-mgw $BN-mgw $BN-mgw-mt $BN-mgw-mt-s $BN-mgw-s ; do
-				    AC_CHECK_LIB($ax_lib, exit, [BOOST_IOSTREAMS_LIB="-l$ax_lib"; AC_SUBST(BOOST_IOSTREAMS_LIB) link_thread="yes"; break],
-                                 [link_thread="no"])
-  				done
-				if test "x$link_thread" = "xno"; then
-					AC_MSG_NOTICE(Could not link against $ax_lib !)
-				fi
-			fi
-
-			AC_CACHE_CHECK(whether the Boost::Serialization library is available,
-						   ax_cv_boost_serialization,
-						[AC_LANG_PUSH([C++])
-			 AC_COMPILE_IFELSE(AC_LANG_PROGRAM([[@%:@include <fstream>
-												 @%:@include <boost/archive/text_oarchive.hpp>
-                                                 @%:@include <boost/archive/text_iarchive.hpp>
-												]],
-                                   [[std::ofstream ofs("filename");
-									boost::archive::text_oarchive oa(ofs);
-									 return 0;
-                                   ]]),
-                   ax_cv_boost_serialization=yes, ax_cv_boost_serialization=no)
-			 AC_LANG_POP([C++])
-			])
-			if test "x$ax_cv_boost_serialization" = "xyes"; then
-				AC_DEFINE(HAVE_BOOST_SERIALIZATION,,[define if the Boost::Serialization library is available])
-				BN=boost_serialization
-				for ax_lib in $BN $BN-$CC $BN-$CC-mt $BN-$CC-mt-s $BN-$CC-s \
-                              lib$BN lib$BN-$CC lib$BN-$CC-mt lib$BN-$CC-mt-s lib$BN-$CC-s \
-                              $BN-mgw $BN-mgw $BN-mgw-mt $BN-mgw-mt-s $BN-mgw-s ; do
-				    AC_CHECK_LIB($ax_lib, exit,
-                                 [BOOST_SERIALIZATION_LIB="-l$ax_lib"; AC_SUBST(BOOST_SERIALIZATION_LIB) link_serialization="yes"; break],
-                                 [link_serialization="no"])
-  				done
-				if test "x$link_serialization" = "xno"; then
-					AC_MSG_NOTICE(Could not link against $ax_lib !)
-				fi
-
-				BN=boost_wserialization
-				for ax_lib in $BN $BN-$CC $BN-$CC-mt $BN-$CC-mt-s $BN-$CC-s \
-                              lib$BN lib$BN-$CC lib$BN-$CC-mt lib$BN-$CC-mt-s lib$BN-$CC-s \
-                              $BN-mgw $BN-mgw $BN-mgw-mt $BN-mgw-mt-s $BN-mgw-s ; do
-				    AC_CHECK_LIB($ax_lib, exit,
-                                 [BOOST_WSERIALIZATION_LIB="-l$ax_lib"; AC_SUBST(BOOST_WSERIALIZATION_LIB) link_wserialization="yes"; break],
-                                 [link_wserialization="no"])
-  				done
-				if test "x$link_wserialization" = "xno"; then
-					AC_MSG_NOTICE(Could not link against $ax_lib !)
-				fi
-			fi
-
-			AC_CACHE_CHECK(whether the Boost::Signals library is available,
-						   ax_cv_boost_signals,
-						[AC_LANG_PUSH([C++])
-			 AC_COMPILE_IFELSE(AC_LANG_PROGRAM([[@%:@include <boost/signal.hpp>
-												]],
-                                   [[boost::signal<void ()> sig;
-                                     return 0;
-                                   ]]),
-                   ax_cv_boost_signals=yes, ax_cv_boost_signals=no)
-			 AC_LANG_POP([C++])
-			])
-			if test "x$ax_cv_boost_signals" = "xyes"; then
-				AC_DEFINE(HAVE_BOOST_SIGNALS,,[define if the Boost::Signals library is available])
-				BN=boost_signals
-				for ax_lib in $BN $BN-$CC $BN-$CC-mt $BN-$CC-mt-s $BN-$CC-s \
-                              lib$BN lib$BN-$CC lib$BN-$CC-mt lib$BN-$CC-mt-s lib$BN-$CC-s \
-                              $BN-mgw $BN-mgw $BN-mgw-mt $BN-mgw-mt-s $BN-mgw-s ; do
-				    AC_CHECK_LIB($ax_lib, exit, [BOOST_SIGNALS_LIB="-l$ax_lib"; AC_SUBST(BOOST_SIGNALS_LIB) link_signals="yes"; break],
-                                 [link_signals="no"])
-  				done
-				if test "x$link_signals" = "xno"; then
-					AC_MSG_NOTICE(Could not link against $ax_lib !)
-				fi
-			fi
-
-			AC_CACHE_CHECK(whether the Boost::Date_Time library is available,
-						   ax_cv_boost_date_time,
-						[AC_LANG_PUSH([C++])
-			 AC_COMPILE_IFELSE(AC_LANG_PROGRAM([[@%:@include <boost/date_time/gregorian/gregorian_types.hpp>
-												]],
-                                   [[using namespace boost::gregorian; date d(2002,Jan,10);
-                                     return 0;
-                                   ]]),
-                   ax_cv_boost_date_time=yes, ax_cv_boost_date_time=no)
-			 AC_LANG_POP([C++])
-			])
-			if test "x$ax_cv_boost_date_time" = "xyes"; then
-				AC_DEFINE(HAVE_BOOST_DATE_TIME,,[define if the Boost::Date_Time library is available])
-				BN=boost_date_time
-				for ax_lib in $BN $BN-$CC $BN-$CC-mt $BN-$CC-mt-s $BN-$CC-s \
-                              lib$BN lib$BN-$CC lib$BN-$CC-mt lib$BN-$CC-mt-s lib$BN-$CC-s \
-                              $BN-mgw $BN-mgw $BN-mgw-mt $BN-mgw-mt-s $BN-mgw-s ; do
-				    AC_CHECK_LIB($ax_lib, exit, [BOOST_DATE_TIME_LIB="-l$ax_lib"; AC_SUBST(BOOST_DATE_TIME_LIB) link_thread="yes"; break],
-                                 [link_thread="no"])
-  				done
-				if test "x$link_thread"="no" = "xno"; then
-					AC_MSG_NOTICE(Could not link against $ax_lib !)
-				fi
-			fi
-
-			AC_CACHE_CHECK(whether the Boost::Regex library is available,
-						   ax_cv_boost_regex,
-						[AC_LANG_PUSH([C++])
-			 AC_COMPILE_IFELSE(AC_LANG_PROGRAM([[@%:@include <boost/regex.hpp>
-												]],
-                                   [[boost::regex r(); return 0;]]),
-                   ax_cv_boost_regex=yes, ax_cv_boost_regex=no)
-			 AC_LANG_POP([C++])
-			])
-			if test "x$ax_cv_boost_regex" = "xyes"; then
-				AC_DEFINE(HAVE_BOOST_REGEX,,[define if the Boost::Regex library is available])
-				BN=boost_regex
-				for ax_lib in $BN $BN-$CC $BN-$CC-mt $BN-$CC-mt-s $BN-$CC-s \
-                              lib$BN lib$BN-$CC lib$BN-$CC-mt lib$BN-$CC-mt-s lib$BN-$CC-s \
-                              $BN-mgw $BN-mgw $BN-mgw-mt $BN-mgw-mt-s $BN-mgw-s ; do
-				    AC_CHECK_LIB($ax_lib, exit, [BOOST_REGEX_LIB="-l$ax_lib"; AC_SUBST(BOOST_REGEX_LIB) link_regex="yes"; break],
-                                 [link_regex="no"])
-  				done
-				if test "x$link_regex" = "xno"; then
-					AC_MSG_NOTICE(Could not link against $ax_lib !)
-				fi
-			fi
-
-			AC_CACHE_CHECK(whether the Boost::UnitTestFramework library is available,
-						   ax_cv_boost_unit_test_framework,
-						[AC_LANG_PUSH([C++])
-			 AC_COMPILE_IFELSE(AC_LANG_PROGRAM([[@%:@include <boost/test/unit_test.hpp>]],
-                                    [[using boost::unit_test::test_suite;
-					                 test_suite* test= BOOST_TEST_SUITE( "Unit test example 1" ); return 0;]]),
-                   ax_cv_boost_unit_test_framework=yes, ax_cv_boost_unit_test_framework=no)
-			 AC_LANG_POP([C++])
-			])
-			if test "x$ax_cv_boost_unit_test_framework" = "xyes"; then
-    		AC_DEFINE(HAVE_BOOST_UNIT_TEST_FRAMEWORK,,[define if the Boost::Unit_test_framework library is available])
-			BN=boost_unit_test_framework
-    		saved_ldflags="${LDFLAGS}"
-			for ax_lib in $BN $BN-$CC $BN-$CC-mt $BN-$CC-mt-s $BN-$CC-s \
-                          lib$BN lib$BN-$CC lib$BN-$CC-mt lib$BN-$CC-mt-s lib$BN-$CC-s \
-                          $BN-mgw $BN-mgw $BN-mgw-mt $BN-mgw-mt-s $BN-mgw-s ; do
-                LDFLAGS="${LDFLAGS} -l$ax_lib"
-    			AC_CACHE_CHECK(the name of the Boost::UnitTestFramework library,
-	      					   ax_cv_boost_unit_test_framework_link,
-						[AC_LANG_PUSH([C++])
-                   AC_LINK_IFELSE([AC_LANG_PROGRAM([[@%:@include <boost/test/unit_test.hpp>
-                                                     using boost::unit_test::test_suite;
-                                                     test_suite* init_unit_test_suite( int argc, char * argv[] ) {
-                                                     test_suite* test= BOOST_TEST_SUITE( "Unit test example 1" );
-                                                     return test;
-                                                     }
-                                                   ]],
-                                 [[ return 0;]])],
-                                 link_unit_test_framework="yes",link_unit_test_framework="no")
-			 AC_LANG_POP([C++])
-               ])
-                LDFLAGS="${saved_ldflags}"
-			    if test "x$link_unit_test_framework" = "xyes"; then
-                    BOOST_UNIT_TEST_FRAMEWORK_LIB="-l$ax_lib"
-                    AC_SUBST(BOOST_UNIT_TEST_FRAMEWORK_LIB)
-					break
-				fi
-              done
-			    if test "x$link_unit_test_framework" = "xno"; then
-				   AC_MSG_NOTICE(Could not link against $ax_lib !)
-				fi
-			fi
-		fi
-        CPPFLAGS="$CPPFLAGS_SAVED"
-        LDFLAGS="$LDFLAGS_SAVED"
-	fi
-])
diff --git a/m4/ax_boost_base.m4 b/m4/ax_boost_base.m4
new file mode 100644
index 0000000000000000000000000000000000000000..b8ffb03f616cb96c2762bcc24afe9133d690fc4b
--- /dev/null
+++ b/m4/ax_boost_base.m4
@@ -0,0 +1,286 @@
+# ===========================================================================
+#       http://www.gnu.org/software/autoconf-archive/ax_boost_base.html
+# ===========================================================================
+#
+# SYNOPSIS
+#
+#   AX_BOOST_BASE([MINIMUM-VERSION], [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND])
+#
+# DESCRIPTION
+#
+#   Test for the Boost C++ libraries of a particular version (or newer)
+#
+#   If no path to the installed boost library is given the macro searchs
+#   under /usr, /usr/local, /opt and /opt/local and evaluates the
+#   $BOOST_ROOT environment variable. Further documentation is available at
+#   <http://randspringer.de/boost/index.html>.
+#
+#   This macro calls:
+#
+#     AC_SUBST(BOOST_CPPFLAGS) / AC_SUBST(BOOST_LDFLAGS)
+#
+#   And sets:
+#
+#     HAVE_BOOST
+#
+# LICENSE
+#
+#   Copyright (c) 2008 Thomas Porschberg <thomas@randspringer.de>
+#   Copyright (c) 2009 Peter Adolphs
+#
+#   Copying and distribution of this file, with or without modification, are
+#   permitted in any medium without royalty provided the copyright notice
+#   and this notice are preserved. This file is offered as-is, without any
+#   warranty.
+
+#serial 26
+
+AC_DEFUN([AX_BOOST_BASE],
+[
+AC_ARG_WITH([boost],
+  [AS_HELP_STRING([--with-boost@<:@=ARG@:>@],
+    [use Boost library from a standard location (ARG=yes),
+     from the specified location (ARG=<path>),
+     or disable it (ARG=no)
+     @<:@ARG=yes@:>@ ])],
+    [
+    if test "$withval" = "no"; then
+        want_boost="no"
+    elif test "$withval" = "yes"; then
+        want_boost="yes"
+        ac_boost_path=""
+    else
+        want_boost="yes"
+        ac_boost_path="$withval"
+    fi
+    ],
+    [want_boost="yes"])
+
+
+AC_ARG_WITH([boost-libdir],
+        AS_HELP_STRING([--with-boost-libdir=LIB_DIR],
+        [Force given directory for boost libraries. Note that this will override library path detection, so use this parameter only if default library detection fails and you know exactly where your boost libraries are located.]),
+        [
+        if test -d "$withval"
+        then
+                ac_boost_lib_path="$withval"
+        else
+                AC_MSG_ERROR(--with-boost-libdir expected directory name)
+        fi
+        ],
+        [ac_boost_lib_path=""]
+)
+
+if test "x$want_boost" = "xyes"; then
+    boost_lib_version_req=ifelse([$1], ,1.20.0,$1)
+    boost_lib_version_req_shorten=`expr $boost_lib_version_req : '\([[0-9]]*\.[[0-9]]*\)'`
+    boost_lib_version_req_major=`expr $boost_lib_version_req : '\([[0-9]]*\)'`
+    boost_lib_version_req_minor=`expr $boost_lib_version_req : '[[0-9]]*\.\([[0-9]]*\)'`
+    boost_lib_version_req_sub_minor=`expr $boost_lib_version_req : '[[0-9]]*\.[[0-9]]*\.\([[0-9]]*\)'`
+    if test "x$boost_lib_version_req_sub_minor" = "x" ; then
+        boost_lib_version_req_sub_minor="0"
+        fi
+    WANT_BOOST_VERSION=`expr $boost_lib_version_req_major \* 100000 \+  $boost_lib_version_req_minor \* 100 \+ $boost_lib_version_req_sub_minor`
+    AC_MSG_CHECKING(for boostlib >= $boost_lib_version_req)
+    succeeded=no
+
+    dnl On 64-bit systems check for system libraries in both lib64 and lib.
+    dnl The former is specified by FHS, but e.g. Debian does not adhere to
+    dnl this (as it rises problems for generic multi-arch support).
+    dnl The last entry in the list is chosen by default when no libraries
+    dnl are found, e.g. when only header-only libraries are installed!
+    libsubdirs="lib"
+    ax_arch=`uname -m`
+    case $ax_arch in
+      x86_64)
+        libsubdirs="lib64 libx32 lib lib64"
+        ;;
+      ppc64|s390x|sparc64|aarch64|ppc64le)
+        libsubdirs="lib64 lib lib64 ppc64le"
+        ;;
+    esac
+
+    dnl allow for real multi-arch paths e.g. /usr/lib/x86_64-linux-gnu. Give
+    dnl them priority over the other paths since, if libs are found there, they
+    dnl are almost assuredly the ones desired.
+    AC_REQUIRE([AC_CANONICAL_HOST])
+    libsubdirs="lib/${host_cpu}-${host_os} $libsubdirs"
+
+    case ${host_cpu} in
+      i?86)
+        libsubdirs="lib/i386-${host_os} $libsubdirs"
+        ;;
+    esac
+
+    dnl first we check the system location for boost libraries
+    dnl this location ist chosen if boost libraries are installed with the --layout=system option
+    dnl or if you install boost with RPM
+    if test "$ac_boost_path" != ""; then
+        BOOST_CPPFLAGS="-I$ac_boost_path/include"
+        for ac_boost_path_tmp in $libsubdirs; do
+                if test -d "$ac_boost_path"/"$ac_boost_path_tmp" ; then
+                        BOOST_LDFLAGS="-L$ac_boost_path/$ac_boost_path_tmp"
+                        break
+                fi
+        done
+    elif test "$cross_compiling" != yes; then
+        for ac_boost_path_tmp in /usr /usr/local /opt /opt/local ; do
+            if test -d "$ac_boost_path_tmp/include/boost" && test -r "$ac_boost_path_tmp/include/boost"; then
+                for libsubdir in $libsubdirs ; do
+                    if ls "$ac_boost_path_tmp/$libsubdir/libboost_"* >/dev/null 2>&1 ; then break; fi
+                done
+                BOOST_LDFLAGS="-L$ac_boost_path_tmp/$libsubdir"
+                BOOST_CPPFLAGS="-I$ac_boost_path_tmp/include"
+                break;
+            fi
+        done
+    fi
+
+    dnl overwrite ld flags if we have required special directory with
+    dnl --with-boost-libdir parameter
+    if test "$ac_boost_lib_path" != ""; then
+       BOOST_LDFLAGS="-L$ac_boost_lib_path"
+    fi
+
+    CPPFLAGS_SAVED="$CPPFLAGS"
+    CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS"
+    export CPPFLAGS
+
+    LDFLAGS_SAVED="$LDFLAGS"
+    LDFLAGS="$LDFLAGS $BOOST_LDFLAGS"
+    export LDFLAGS
+
+    AC_REQUIRE([AC_PROG_CXX])
+    AC_LANG_PUSH(C++)
+        AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
+    @%:@include <boost/version.hpp>
+    ]], [[
+    #if BOOST_VERSION >= $WANT_BOOST_VERSION
+    // Everything is okay
+    #else
+    #  error Boost version is too old
+    #endif
+    ]])],[
+        AC_MSG_RESULT(yes)
+    succeeded=yes
+    found_system=yes
+        ],[
+        ])
+    AC_LANG_POP([C++])
+
+
+
+    dnl if we found no boost with system layout we search for boost libraries
+    dnl built and installed without the --layout=system option or for a staged(not installed) version
+    if test "x$succeeded" != "xyes"; then
+        CPPFLAGS="$CPPFLAGS_SAVED"
+        LDFLAGS="$LDFLAGS_SAVED"
+        BOOST_CPPFLAGS=
+        BOOST_LDFLAGS=
+        _version=0
+        if test "$ac_boost_path" != ""; then
+            if test -d "$ac_boost_path" && test -r "$ac_boost_path"; then
+                for i in `ls -d $ac_boost_path/include/boost-* 2>/dev/null`; do
+                    _version_tmp=`echo $i | sed "s#$ac_boost_path##" | sed 's/\/include\/boost-//' | sed 's/_/./'`
+                    V_CHECK=`expr $_version_tmp \> $_version`
+                    if test "$V_CHECK" = "1" ; then
+                        _version=$_version_tmp
+                    fi
+                    VERSION_UNDERSCORE=`echo $_version | sed 's/\./_/'`
+                    BOOST_CPPFLAGS="-I$ac_boost_path/include/boost-$VERSION_UNDERSCORE"
+                done
+                dnl if nothing found search for layout used in Windows distributions
+                if test -z "$BOOST_CPPFLAGS"; then
+                    if test -d "$ac_boost_path/boost" && test -r "$ac_boost_path/boost"; then
+                        BOOST_CPPFLAGS="-I$ac_boost_path"
+                    fi
+                fi
+            fi
+        else
+            if test "$cross_compiling" != yes; then
+                for ac_boost_path in /usr /usr/local /opt /opt/local ; do
+                    if test -d "$ac_boost_path" && test -r "$ac_boost_path"; then
+                        for i in `ls -d $ac_boost_path/include/boost-* 2>/dev/null`; do
+                            _version_tmp=`echo $i | sed "s#$ac_boost_path##" | sed 's/\/include\/boost-//' | sed 's/_/./'`
+                            V_CHECK=`expr $_version_tmp \> $_version`
+                            if test "$V_CHECK" = "1" ; then
+                                _version=$_version_tmp
+                                best_path=$ac_boost_path
+                            fi
+                        done
+                    fi
+                done
+
+                VERSION_UNDERSCORE=`echo $_version | sed 's/\./_/'`
+                BOOST_CPPFLAGS="-I$best_path/include/boost-$VERSION_UNDERSCORE"
+                if test "$ac_boost_lib_path" = ""; then
+                    for libsubdir in $libsubdirs ; do
+                        if ls "$best_path/$libsubdir/libboost_"* >/dev/null 2>&1 ; then break; fi
+                    done
+                    BOOST_LDFLAGS="-L$best_path/$libsubdir"
+                fi
+            fi
+
+            if test "x$BOOST_ROOT" != "x"; then
+                for libsubdir in $libsubdirs ; do
+                    if ls "$BOOST_ROOT/stage/$libsubdir/libboost_"* >/dev/null 2>&1 ; then break; fi
+                done
+                if test -d "$BOOST_ROOT" && test -r "$BOOST_ROOT" && test -d "$BOOST_ROOT/stage/$libsubdir" && test -r "$BOOST_ROOT/stage/$libsubdir"; then
+                    version_dir=`expr //$BOOST_ROOT : '.*/\(.*\)'`
+                    stage_version=`echo $version_dir | sed 's/boost_//' | sed 's/_/./g'`
+                        stage_version_shorten=`expr $stage_version : '\([[0-9]]*\.[[0-9]]*\)'`
+                    V_CHECK=`expr $stage_version_shorten \>\= $_version`
+                    if test "$V_CHECK" = "1" -a "$ac_boost_lib_path" = "" ; then
+                        AC_MSG_NOTICE(We will use a staged boost library from $BOOST_ROOT)
+                        BOOST_CPPFLAGS="-I$BOOST_ROOT"
+                        BOOST_LDFLAGS="-L$BOOST_ROOT/stage/$libsubdir"
+                    fi
+                fi
+            fi
+        fi
+
+        CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS"
+        export CPPFLAGS
+        LDFLAGS="$LDFLAGS $BOOST_LDFLAGS"
+        export LDFLAGS
+
+        AC_LANG_PUSH(C++)
+            AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
+        @%:@include <boost/version.hpp>
+        ]], [[
+        #if BOOST_VERSION >= $WANT_BOOST_VERSION
+        // Everything is okay
+        #else
+        #  error Boost version is too old
+        #endif
+        ]])],[
+            AC_MSG_RESULT(yes)
+        succeeded=yes
+        found_system=yes
+            ],[
+            ])
+        AC_LANG_POP([C++])
+    fi
+
+    if test "$succeeded" != "yes" ; then
+        if test "$_version" = "0" ; then
+            AC_MSG_NOTICE([[We could not detect the boost libraries (version $boost_lib_version_req_shorten or higher). If you have a staged boost library (still not installed) please specify \$BOOST_ROOT in your environment and do not give a PATH to --with-boost option.  If you are sure you have boost installed, then check your version number looking in <boost/version.hpp>. See http://randspringer.de/boost for more documentation.]])
+        else
+            AC_MSG_NOTICE([Your boost libraries seems to old (version $_version).])
+        fi
+        # execute ACTION-IF-NOT-FOUND (if present):
+        ifelse([$3], , :, [$3])
+    else
+        AC_SUBST(BOOST_CPPFLAGS)
+        AC_SUBST(BOOST_LDFLAGS)
+        AC_DEFINE(HAVE_BOOST,,[define if the Boost library is available])
+        # execute ACTION-IF-FOUND (if present):
+        ifelse([$2], , :, [$2])
+    fi
+
+    CPPFLAGS="$CPPFLAGS_SAVED"
+    LDFLAGS="$LDFLAGS_SAVED"
+fi
+
+])
+ 
diff --git a/m4/ax_boost_iostreams.m4 b/m4/ax_boost_iostreams.m4
new file mode 100644
index 0000000000000000000000000000000000000000..b4e970be04bbba9e5c889ae21dcc6865ede0547b
--- /dev/null
+++ b/m4/ax_boost_iostreams.m4
@@ -0,0 +1,119 @@
+# ===========================================================================
+#    http://www.gnu.org/software/autoconf-archive/ax_boost_iostreams.html
+# ===========================================================================
+#
+# SYNOPSIS
+#
+#   AX_BOOST_IOSTREAMS
+#
+# DESCRIPTION
+#
+#   Test for IOStreams library from the Boost C++ libraries. The macro
+#   requires a preceding call to AX_BOOST_BASE. Further documentation is
+#   available at <http://randspringer.de/boost/index.html>.
+#
+#   This macro calls:
+#
+#     AC_SUBST(BOOST_IOSTREAMS_LIB)
+#
+#   And sets:
+#
+#     HAVE_BOOST_IOSTREAMS
+#
+# LICENSE
+#
+#   Copyright (c) 2008 Thomas Porschberg <thomas@randspringer.de>
+#
+#   Copying and distribution of this file, with or without modification, are
+#   permitted in any medium without royalty provided the copyright notice
+#   and this notice are preserved. This file is offered as-is, without any
+#   warranty.
+
+#serial 20
+
+AC_DEFUN([AX_BOOST_IOSTREAMS],
+[
+	AC_ARG_WITH([boost-iostreams],
+	AS_HELP_STRING([--with-boost-iostreams@<:@=special-lib@:>@],
+                   [use the IOStreams library from boost - it is possible to specify a certain library for the linker
+                        e.g. --with-boost-iostreams=boost_iostreams-gcc-mt-d-1_33_1 ]),
+        [
+        if test "$withval" = "no"; then
+			want_boost="no"
+        elif test "$withval" = "yes"; then
+            want_boost="yes"
+            ax_boost_user_iostreams_lib=""
+        else
+		    want_boost="yes"
+		ax_boost_user_iostreams_lib="$withval"
+		fi
+        ],
+        [want_boost="yes"]
+	)
+
+	if test "x$want_boost" = "xyes"; then
+        AC_REQUIRE([AC_PROG_CC])
+		CPPFLAGS_SAVED="$CPPFLAGS"
+		CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS"
+		export CPPFLAGS
+
+		LDFLAGS_SAVED="$LDFLAGS"
+		LDFLAGS="$LDFLAGS $BOOST_LDFLAGS"
+		export LDFLAGS
+
+        AC_CACHE_CHECK(whether the Boost::IOStreams library is available,
+					   ax_cv_boost_iostreams,
+        [AC_LANG_PUSH([C++])
+		 AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[@%:@include <boost/iostreams/filtering_stream.hpp>
+											 @%:@include <boost/range/iterator_range.hpp>
+											]],
+                                  [[std::string  input = "Hello World!";
+								 namespace io = boost::iostreams;
+									 io::filtering_istream  in(boost::make_iterator_range(input));
+									 return 0;
+                                   ]])],
+                             ax_cv_boost_iostreams=yes, ax_cv_boost_iostreams=no)
+         AC_LANG_POP([C++])
+		])
+		if test "x$ax_cv_boost_iostreams" = "xyes"; then
+			AC_DEFINE(HAVE_BOOST_IOSTREAMS,,[define if the Boost::IOStreams library is available])
+            BOOSTLIBDIR=`echo $BOOST_LDFLAGS | sed -e 's/@<:@^\/@:>@*//'`
+            if test "x$ax_boost_user_iostreams_lib" = "x"; then
+                for libextension in `ls $BOOSTLIBDIR/libboost_iostreams*.so* $BOOSTLIBDIR/libboost_iostream*.dylib* $BOOSTLIBDIR/libboost_iostreams*.a* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^lib\(boost_iostreams.*\)\.so.*$;\1;' -e 's;^lib\(boost_iostream.*\)\.dylib.*$;\1;' -e 's;^lib\(boost_iostreams.*\)\.a.*$;\1;'` ; do
+                     ax_lib=${libextension}
+				    AC_CHECK_LIB($ax_lib, exit,
+                                 [BOOST_IOSTREAMS_LIB="-l$ax_lib"; AC_SUBST(BOOST_IOSTREAMS_LIB) link_iostreams="yes"; break],
+                                 [link_iostreams="no"])
+				done
+                if test "x$link_iostreams" != "xyes"; then
+                for libextension in `ls $BOOSTLIBDIR/boost_iostreams*.dll* $BOOSTLIBDIR/boost_iostreams*.a* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^\(boost_iostreams.*\)\.dll.*$;\1;' -e 's;^\(boost_iostreams.*\)\.a.*$;\1;'` ; do
+                     ax_lib=${libextension}
+				    AC_CHECK_LIB($ax_lib, exit,
+                                 [BOOST_IOSTREAMS_LIB="-l$ax_lib"; AC_SUBST(BOOST_IOSTREAMS_LIB) link_iostreams="yes"; break],
+                                 [link_iostreams="no"])
+				done
+                fi
+
+            else
+               for ax_lib in $ax_boost_user_iostreams_lib boost_iostreams-$ax_boost_user_iostreams_lib; do
+				      AC_CHECK_LIB($ax_lib, main,
+                                   [BOOST_IOSTREAMS_LIB="-l$ax_lib"; AC_SUBST(BOOST_IOSTREAMS_LIB) link_iostreams="yes"; break],
+                                   [link_iostreams="no"])
+                  done
+
+            fi
+            if test "x$ax_lib" = "x"; then
+                echo "Could not find a version of the library!"
+                exit 202
+            fi
+			if test "x$link_iostreams" != "xyes"; then
+				echo "Could not link against $ax_lib !"
+                                exit 202
+			fi
+		fi
+
+		CPPFLAGS="$CPPFLAGS_SAVED"
+	LDFLAGS="$LDFLAGS_SAVED"
+	fi
+])
+ 
diff --git a/m4/ax_boost_program_options.m4 b/m4/ax_boost_program_options.m4
new file mode 100644
index 0000000000000000000000000000000000000000..e508ea3783eb4b4eb3795e57a57cb45326c9f6dd
--- /dev/null
+++ b/m4/ax_boost_program_options.m4
@@ -0,0 +1,110 @@
+ # ============================================================================
+#  http://www.gnu.org/software/autoconf-archive/ax_boost_program_options.html
+# ============================================================================
+#
+# SYNOPSIS
+#
+#   AX_BOOST_PROGRAM_OPTIONS
+#
+# DESCRIPTION
+#
+#   Test for program options library from the Boost C++ libraries. The macro
+#   requires a preceding call to AX_BOOST_BASE. Further documentation is
+#   available at <http://randspringer.de/boost/index.html>.
+#
+#   This macro calls:
+#
+#     AC_SUBST(BOOST_PROGRAM_OPTIONS_LIB)
+#
+#   And sets:
+#
+#     HAVE_BOOST_PROGRAM_OPTIONS
+#
+# LICENSE
+#
+#   Copyright (c) 2009 Thomas Porschberg <thomas@randspringer.de>
+#
+#   Copying and distribution of this file, with or without modification, are
+#   permitted in any medium without royalty provided the copyright notice
+#   and this notice are preserved. This file is offered as-is, without any
+#   warranty.
+
+#serial 24
+
+AC_DEFUN([AX_BOOST_PROGRAM_OPTIONS],
+[
+	AC_ARG_WITH([boost-program-options],
+		AS_HELP_STRING([--with-boost-program-options@<:@=special-lib@:>@],
+                       [use the program options library from boost - it is possible to specify a certain library for the linker
+                        e.g. --with-boost-program-options=boost_program_options-gcc-mt-1_33_1 ]),
+        [
+        if test "$withval" = "no"; then
+			want_boost="no"
+        elif test "$withval" = "yes"; then
+            want_boost="yes"
+            ax_boost_user_program_options_lib=""
+        else
+		    want_boost="yes"
+		ax_boost_user_program_options_lib="$withval"
+		fi
+        ],
+        [want_boost="yes"]
+	)
+
+	if test "x$want_boost" = "xyes"; then
+        AC_REQUIRE([AC_PROG_CC])
+	    export want_boost
+		CPPFLAGS_SAVED="$CPPFLAGS"
+		CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS"
+		export CPPFLAGS
+		LDFLAGS_SAVED="$LDFLAGS"
+		LDFLAGS="$LDFLAGS $BOOST_LDFLAGS"
+		export LDFLAGS
+		AC_CACHE_CHECK([whether the Boost::Program_Options library is available],
+					   ax_cv_boost_program_options,
+					   [AC_LANG_PUSH(C++)
+				AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[@%:@include <boost/program_options/errors.hpp>
+                                                          ]],
+                                  [[boost::program_options::error err("Error message");
+                                   return 0;]])],
+                           ax_cv_boost_program_options=yes, ax_cv_boost_program_options=no)
+					AC_LANG_POP([C++])
+		])
+		if test "$ax_cv_boost_program_options" = yes; then
+				AC_DEFINE(HAVE_BOOST_PROGRAM_OPTIONS,,[define if the Boost::PROGRAM_OPTIONS library is available])
+                  BOOSTLIBDIR=`echo $BOOST_LDFLAGS | sed -e 's/@<:@^\/@:>@*//'`
+                if test "x$ax_boost_user_program_options_lib" = "x"; then
+                for libextension in `ls $BOOSTLIBDIR/libboost_program_options*.so* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^lib\(boost_program_options.*\)\.so.*$;\1;'` `ls $BOOSTLIBDIR/libboost_program_options*.dylib* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^lib\(boost_program_options.*\)\.dylib.*$;\1;'` `ls $BOOSTLIBDIR/libboost_program_options*.a* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^lib\(boost_program_options.*\)\.a.*$;\1;'` ; do
+                     ax_lib=${libextension}
+				    AC_CHECK_LIB($ax_lib, exit,
+                                 [BOOST_PROGRAM_OPTIONS_LIB="-l$ax_lib"; AC_SUBST(BOOST_PROGRAM_OPTIONS_LIB) link_program_options="yes"; break],
+                                 [link_program_options="no"])
+				done
+                if test "x$link_program_options" != "xyes"; then
+                for libextension in `ls $BOOSTLIBDIR/boost_program_options*.dll* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^\(boost_program_options.*\)\.dll.*$;\1;'` `ls $BOOSTLIBDIR/boost_program_options*.a* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^\(boost_program_options.*\)\.a.*$;\1;'` ; do
+                     ax_lib=${libextension}
+				    AC_CHECK_LIB($ax_lib, exit,
+                                 [BOOST_PROGRAM_OPTIONS_LIB="-l$ax_lib"; AC_SUBST(BOOST_PROGRAM_OPTIONS_LIB) link_program_options="yes"; break],
+                                 [link_program_options="no"])
+				done
+                fi
+                else
+                  for ax_lib in $ax_boost_user_program_options_lib boost_program_options-$ax_boost_user_program_options_lib; do
+				      AC_CHECK_LIB($ax_lib, main,
+                                   [BOOST_PROGRAM_OPTIONS_LIB="-l$ax_lib"; AC_SUBST(BOOST_PROGRAM_OPTIONS_LIB) link_program_options="yes"; break],
+                                   [link_program_options="no"])
+                  done
+                fi
+            if test "x$ax_lib" = "x"; then
+                echo "Could not find a version of the library!"
+                exit 202
+            fi
+				if test "x$link_program_options" != "xyes"; then
+					echo "Could not link against [$ax_lib] !"
+                                        exit 202
+				fi
+		fi
+		CPPFLAGS="$CPPFLAGS_SAVED"
+	LDFLAGS="$LDFLAGS_SAVED"
+	fi
+])
diff --git a/m4/ax_boost_unit_test_framework.m4 b/m4/ax_boost_unit_test_framework.m4
new file mode 100644
index 0000000000000000000000000000000000000000..ff3e8b0f9ac309ab6c156e7bc5d48bb51a4ff886
--- /dev/null
+++ b/m4/ax_boost_unit_test_framework.m4
@@ -0,0 +1,139 @@
+# ================================================================================
+#  http://www.gnu.org/software/autoconf-archive/ax_boost_unit_test_framework.html
+# ================================================================================
+#
+# SYNOPSIS
+#
+#   AX_BOOST_UNIT_TEST_FRAMEWORK
+#
+# DESCRIPTION
+#
+#   Test for Unit_Test_Framework library from the Boost C++ libraries. The
+#   macro requires a preceding call to AX_BOOST_BASE. Further documentation
+#   is available at <http://randspringer.de/boost/index.html>.
+#
+#   This macro calls:
+#
+#     AC_SUBST(BOOST_UNIT_TEST_FRAMEWORK_LIB)
+#
+#   And sets:
+#
+#     HAVE_BOOST_UNIT_TEST_FRAMEWORK
+#
+# LICENSE
+#
+#   Copyright (c) 2008 Thomas Porschberg <thomas@randspringer.de>
+#
+#   Copying and distribution of this file, with or without modification, are
+#   permitted in any medium without royalty provided the copyright notice
+#   and this notice are preserved. This file is offered as-is, without any
+#   warranty.
+
+#serial 19
+
+AC_DEFUN([AX_BOOST_UNIT_TEST_FRAMEWORK],
+[
+	AC_ARG_WITH([boost-unit-test-framework],
+	AS_HELP_STRING([--with-boost-unit-test-framework@<:@=special-lib@:>@],
+                   [use the Unit_Test_Framework library from boost - it is possible to specify a certain library for the linker
+                        e.g. --with-boost-unit-test-framework=boost_unit_test_framework-gcc ]),
+        [
+        if test "$withval" = "no"; then
+			want_boost="no"
+        elif test "$withval" = "yes"; then
+            want_boost="yes"
+            ax_boost_user_unit_test_framework_lib=""
+        else
+		    want_boost="yes"
+		ax_boost_user_unit_test_framework_lib="$withval"
+		fi
+        ],
+        [want_boost="yes"]
+	)
+
+	if test "x$want_boost" = "xyes"; then
+        AC_REQUIRE([AC_PROG_CC])
+		CPPFLAGS_SAVED="$CPPFLAGS"
+		CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS"
+		export CPPFLAGS
+
+		LDFLAGS_SAVED="$LDFLAGS"
+		LDFLAGS="$LDFLAGS $BOOST_LDFLAGS"
+		export LDFLAGS
+
+        AC_CACHE_CHECK(whether the Boost::Unit_Test_Framework library is available,
+					   ax_cv_boost_unit_test_framework,
+        [AC_LANG_PUSH([C++])
+			 AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[@%:@include <boost/test/unit_test.hpp>]],
+                                    [[using boost::unit_test::test_suite;
+							 test_suite* test= BOOST_TEST_SUITE( "Unit test example 1" ); return 0;]])],
+                   ax_cv_boost_unit_test_framework=yes, ax_cv_boost_unit_test_framework=no)
+         AC_LANG_POP([C++])
+		])
+		if test "x$ax_cv_boost_unit_test_framework" = "xyes"; then
+			AC_DEFINE(HAVE_BOOST_UNIT_TEST_FRAMEWORK,,[define if the Boost::Unit_Test_Framework library is available])
+            BOOSTLIBDIR=`echo $BOOST_LDFLAGS | sed -e 's/@<:@^\/@:>@*//'`
+
+            if test "x$ax_boost_user_unit_test_framework_lib" = "x"; then
+			saved_ldflags="${LDFLAGS}"
+                for monitor_library in `ls $BOOSTLIBDIR/libboost_unit_test_framework*.so* $BOOSTLIBDIR/libboost_unit_test_framework*.dylib* $BOOSTLIBDIR/libboost_unit_test_framework*.a* 2>/dev/null` ; do
+                    if test -r $monitor_library ; then
+                       libextension=`echo $monitor_library | sed 's,.*/,,' | sed -e 's;^lib\(boost_unit_test_framework.*\)\.so.*$;\1;' -e 's;^lib\(boost_unit_test_framework.*\)\.dylib.*$;\1;' -e 's;^lib\(boost_unit_test_framework.*\)\.a.*$;\1;'`
+                       ax_lib=${libextension}
+                       link_unit_test_framework="yes"
+                    else
+                       link_unit_test_framework="no"
+                    fi
+
+			    if test "x$link_unit_test_framework" = "xyes"; then
+                      BOOST_UNIT_TEST_FRAMEWORK_LIB="-l$ax_lib"
+                      AC_SUBST(BOOST_UNIT_TEST_FRAMEWORK_LIB)
+					  break
+				    fi
+                done
+                if test "x$link_unit_test_framework" != "xyes"; then
+                for libextension in `ls $BOOSTLIBDIR/boost_unit_test_framework*.dll* $BOOSTLIBDIR/boost_unit_test_framework*.a* 2>/dev/null  | sed 's,.*/,,' | sed -e 's;^\(boost_unit_test_framework.*\)\.dll.*$;\1;' -e 's;^\(boost_unit_test_framework.*\)\.a.*$;\1;'` ; do
+                     ax_lib=${libextension}
+				    AC_CHECK_LIB($ax_lib, exit,
+                                 [BOOST_UNIT_TEST_FRAMEWORK_LIB="-l$ax_lib"; AC_SUBST(BOOST_UNIT_TEST_FRAMEWORK_LIB) link_unit_test_framework="yes"; break],
+                                 [link_unit_test_framework="no"])
+				done
+                fi
+            else
+                link_unit_test_framework="no"
+			saved_ldflags="${LDFLAGS}"
+                for ax_lib in boost_unit_test_framework-$ax_boost_user_unit_test_framework_lib $ax_boost_user_unit_test_framework_lib ; do
+                   if test "x$link_unit_test_framework" = "xyes"; then
+                      break;
+                   fi
+                   for unittest_library in `ls $BOOSTLIBDIR/lib${ax_lib}.so* $BOOSTLIBDIR/lib${ax_lib}.a* 2>/dev/null` ; do
+                   if test -r $unittest_library ; then
+                       libextension=`echo $unittest_library | sed 's,.*/,,' | sed -e 's;^lib\(boost_unit_test_framework.*\)\.so.*$;\1;' -e 's;^lib\(boost_unit_test_framework.*\)\.a*$;\1;'`
+                       ax_lib=${libextension}
+                       link_unit_test_framework="yes"
+                    else
+                       link_unit_test_framework="no"
+                    fi
+
+				if test "x$link_unit_test_framework" = "xyes"; then
+                        BOOST_UNIT_TEST_FRAMEWORK_LIB="-l$ax_lib"
+                        AC_SUBST(BOOST_UNIT_TEST_FRAMEWORK_LIB)
+					    break
+				    fi
+                  done
+               done
+            fi
+            if test "x$ax_lib" = "x"; then
+                echo "Could not find a version of the library!"
+                exit 202
+            fi
+			if test "x$link_unit_test_framework" != "xyes"; then
+				echo "Could not link against $ax_lib !"
+                                exit 202
+			fi
+		fi
+
+		CPPFLAGS="$CPPFLAGS_SAVED"
+	LDFLAGS="$LDFLAGS_SAVED"
+	fi
+])
diff --git a/m4/ax_eigen.m4 b/m4/ax_eigen.m4
new file mode 100644
index 0000000000000000000000000000000000000000..c59846ad3c76c243757ef3f81f780d70862bd07b
--- /dev/null
+++ b/m4/ax_eigen.m4
@@ -0,0 +1,87 @@
+#
+#   AX_EIGEN([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]])
+#
+# DESCRIPTION
+#
+#   This macro looks for EIGEN library (see http://www.netlib.org/lapack/)
+#   On success, it sets the EIGEN_INCLUDE output variable to hold the 
+#   requisite includes.
+#
+#   The user may also use --with-eigen=<include> in order to use some specific
+#   Eigen library.
+#
+#   ACTION-IF-FOUND is a list of shell commands to run if a Eigen library
+#   is found, and ACTION-IF-NOT-FOUND is a list of commands to run it if it
+#   is not found. If ACTION-IF-FOUND is not specified, the default action
+#   will define HAVE_EIGEN.
+#
+# LICENSE
+#
+#   Copyright (c) 2009 Steven G. Johnson <stevenj@alum.mit.edu>
+#
+#   This program is free software: you can redistribute it and/or modify it
+#   under the terms of the GNU General Public License as published by the
+#   Free Software Foundation, either version 3 of the License, or (at your
+#   option) any later version.
+#
+#   This program is distributed in the hope that it will be useful, but
+#   WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+#   Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License along
+#   with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+#   As a special exception, the respective Autoconf Macro's copyright owner
+#   gives unlimited permission to copy, distribute and modify the configure
+#   scripts that are the output of Autoconf when processing the Macro. You
+#   need not follow the terms of the GNU General Public License when using
+#   or distributing such scripts, even though portions of the text of the
+#   Macro appear in them. The GNU General Public License (GPL) does govern
+#   all other use of the material that constitutes the Autoconf Macro.
+#
+#   This special exception to the GPL applies to versions of the Autoconf
+#   Macro released by the Autoconf Archive. When you make and distribute a
+#   modified version of the Autoconf Macro, you may extend this special
+#   exception to the GPL to apply to your modified version as well.
+
+#serial 1
+
+AU_ALIAS([ACX_EIGEN], [AX_EIGEN])
+AC_DEFUN([AX_EIGEN], [
+ax_eigen_ok=no
+
+AC_ARG_WITH(eigen,
+        [AS_HELP_STRING([--with-eigen=directory], [use Eigen directory])],[
+                        EIGEN_INCLUDE="-I$with_eigen/include"
+                        ])
+
+# First, check EIGEN_INCLUDE environment variable
+if test "x$EIGEN_INCLUDE" != x; then
+	old_CXXFLAGS="$CXXFLAGS"
+        AC_LANG_PUSH([C++])
+	CXXFLAGS+=" -I${withval} -DEIGEN"
+
+        # Check for the EIGEN header files
+        AC_CHECK_HEADERS([Eigen/Dense Eigen/LU],[ax_eigen_ok=yes],[ax_eigen_ok=no])
+        AC_LANG_POP()
+        CFLAGS="$old_CFLAGS"
+else
+        old_CXXFLAGS="$CXXFLAGS"
+        CXXFLAGS+=' -I/usr/include/eigen3 -DEIGEN'
+        AC_CHECK_HEADERS([Eigen/Dense Eigen/LU],[ax_eigen_ok=yes],[ax_eigen_ok=no])
+        CFLAGS="$old_CFLAGS"
+fi
+
+AC_SUBST(EIGEN_INCLUDE)
+
+# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND:
+if test x"$ax_eigen_ok" = xyes; then
+        ifelse([$1],,AC_DEFINE(HAVE_EIGEN,1,[Define if you have EIGEN library.]),[$1])
+        :
+else
+        ax_eigen_ok=no
+        $2
+fi
+])dnl AX_EIGEN
+
diff --git a/m4/ax_lapack.m4 b/m4/ax_lapack.m4
new file mode 100644
index 0000000000000000000000000000000000000000..74bd51255be997daaa7f3d7288ec165c2afd9423
--- /dev/null
+++ b/m4/ax_lapack.m4
@@ -0,0 +1,132 @@
+# ===========================================================================
+#         http://www.gnu.org/software/autoconf-archive/ax_lapack.html
+# ===========================================================================
+#
+# SYNOPSIS
+#
+#   AX_LAPACK([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]])
+#
+# DESCRIPTION
+#
+#   This macro looks for a library that implements the LAPACK linear-algebra
+#   interface (see http://www.netlib.org/lapack/). On success, it sets the
+#   LAPACK_LIBS output variable to hold the requisite library linkages.
+#
+#   To link with LAPACK, you should link with:
+#
+#     $LAPACK_LIBS $BLAS_LIBS $LIBS $FLIBS
+#
+#   in that order. BLAS_LIBS is the output variable of the AX_BLAS macro,
+#   called automatically. FLIBS is the output variable of the
+#   AC_F77_LIBRARY_LDFLAGS macro (called if necessary by AX_BLAS), and is
+#   sometimes necessary in order to link with F77 libraries. Users will also
+#   need to use AC_F77_DUMMY_MAIN (see the autoconf manual), for the same
+#   reason.
+#
+#   The user may also use --with-lapack=<lib> in order to use some specific
+#   LAPACK library <lib>. In order to link successfully, however, be aware
+#   that you will probably need to use the same Fortran compiler (which can
+#   be set via the F77 env. var.) as was used to compile the LAPACK and BLAS
+#   libraries.
+#
+#   ACTION-IF-FOUND is a list of shell commands to run if a LAPACK library
+#   is found, and ACTION-IF-NOT-FOUND is a list of commands to run it if it
+#   is not found. If ACTION-IF-FOUND is not specified, the default action
+#   will define HAVE_LAPACK.
+#
+# LICENSE
+#
+#   Copyright (c) 2009 Steven G. Johnson <stevenj@alum.mit.edu>
+#
+#   This program is free software: you can redistribute it and/or modify it
+#   under the terms of the GNU General Public License as published by the
+#   Free Software Foundation, either version 3 of the License, or (at your
+#   option) any later version.
+#
+#   This program is distributed in the hope that it will be useful, but
+#   WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+#   Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License along
+#   with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+#   As a special exception, the respective Autoconf Macro's copyright owner
+#   gives unlimited permission to copy, distribute and modify the configure
+#   scripts that are the output of Autoconf when processing the Macro. You
+#   need not follow the terms of the GNU General Public License when using
+#   or distributing such scripts, even though portions of the text of the
+#   Macro appear in them. The GNU General Public License (GPL) does govern
+#   all other use of the material that constitutes the Autoconf Macro.
+#
+#   This special exception to the GPL applies to versions of the Autoconf
+#   Macro released by the Autoconf Archive. When you make and distribute a
+#   modified version of the Autoconf Macro, you may extend this special
+#   exception to the GPL to apply to your modified version as well.
+
+#serial 7
+
+AU_ALIAS([ACX_LAPACK], [AX_LAPACK])
+AC_DEFUN([AX_LAPACK], [
+AC_REQUIRE([AX_BLAS])
+ax_lapack_ok=no
+
+AC_ARG_WITH(lapack,
+        [AS_HELP_STRING([--with-lapack=<lib>], [use LAPACK library <lib>])])
+case $with_lapack in
+        yes | "") ;;
+        no) ax_lapack_ok=disable ;;
+        -* | */* | *.a | *.so | *.so.* | *.o) LAPACK_LIBS="$with_lapack" ;;
+        *) LAPACK_LIBS="-l$with_lapack" ;;
+esac
+
+# Get fortran linker name of LAPACK function to check for.
+AC_F77_FUNC(cheev)
+
+# We cannot use LAPACK if BLAS is not found
+if test "x$ax_blas_ok" != xyes; then
+        ax_lapack_ok=noblas
+        LAPACK_LIBS=""
+fi
+
+# First, check LAPACK_LIBS environment variable
+if test "x$LAPACK_LIBS" != x; then
+        save_LIBS="$LIBS"; LIBS="$LAPACK_LIBS $BLAS_LIBS $LIBS $FLIBS"
+        AC_MSG_CHECKING([for $cheev in $LAPACK_LIBS])
+        AC_TRY_LINK_FUNC($cheev, [ax_lapack_ok=yes], [LAPACK_LIBS=""])
+        AC_MSG_RESULT($ax_lapack_ok)
+        LIBS="$save_LIBS"
+        if test $ax_lapack_ok = no; then
+                LAPACK_LIBS=""
+        fi
+fi
+
+# LAPACK linked to by default?  (is sometimes included in BLAS lib)
+if test $ax_lapack_ok = no; then
+        save_LIBS="$LIBS"; LIBS="$LIBS $BLAS_LIBS $FLIBS"
+        AC_CHECK_FUNC($cheev, [ax_lapack_ok=yes])
+        LIBS="$save_LIBS"
+fi
+
+# Generic LAPACK library?
+for lapack in lapack lapack_rs6k; do
+        if test $ax_lapack_ok = no; then
+                save_LIBS="$LIBS"; LIBS="$BLAS_LIBS $LIBS"
+                AC_CHECK_LIB($lapack, $cheev,
+                    [ax_lapack_ok=yes; LAPACK_LIBS="-l$lapack"], [], [$FLIBS])
+                LIBS="$save_LIBS"
+        fi
+done
+
+AC_SUBST(LAPACK_LIBS)
+
+# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND:
+if test x"$ax_lapack_ok" = xyes; then
+        ifelse([$1],,AC_DEFINE(HAVE_LAPACK,1,[Define if you have LAPACK library.]),[$1])
+        :
+else
+        ax_lapack_ok=no
+        $2
+fi
+])dnl AX_LAPACK
+
diff --git a/m4/ax_suitesparse.m4 b/m4/ax_suitesparse.m4
new file mode 100644
index 0000000000000000000000000000000000000000..ea32ce0ed11348b27b1d17511ceed7c7f564e849
--- /dev/null
+++ b/m4/ax_suitesparse.m4
@@ -0,0 +1,131 @@
+# ===========================================================================
+#         http://www.gnu.org/software/autoconf-archive/ax_lapack.html
+# ===========================================================================
+#
+# SYNOPSIS
+#
+#   AX_SUITESPARSE([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]])
+#
+# DESCRIPTION
+#
+#   This macro looks for SuiteSparse library (see http://www.netlib.org/lapack/)
+#   On success, it sets the SUITESPARSE_LIBS output variable to hold the 
+#   requisite library linkages.
+#
+#   To link with SUITESPARSE, you should link with:
+#
+#     $SUITESPARSE_LIBS
+#
+#   The user may also use --with-suitesparse=<lib> in order to use some specific
+#   SuiteSparse library <lib>.
+#
+#   ACTION-IF-FOUND is a list of shell commands to run if a SUITESPARSE library
+#   is found, and ACTION-IF-NOT-FOUND is a list of commands to run it if it
+#   is not found. If ACTION-IF-FOUND is not specified, the default action
+#   will define HAVE_LAPACK.
+#
+# LICENSE
+#
+#   Copyright (c) 2009 Steven G. Johnson <stevenj@alum.mit.edu>
+#
+#   This program is free software: you can redistribute it and/or modify it
+#   under the terms of the GNU General Public License as published by the
+#   Free Software Foundation, either version 3 of the License, or (at your
+#   option) any later version.
+#
+#   This program is distributed in the hope that it will be useful, but
+#   WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+#   Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License along
+#   with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+#   As a special exception, the respective Autoconf Macro's copyright owner
+#   gives unlimited permission to copy, distribute and modify the configure
+#   scripts that are the output of Autoconf when processing the Macro. You
+#   need not follow the terms of the GNU General Public License when using
+#   or distributing such scripts, even though portions of the text of the
+#   Macro appear in them. The GNU General Public License (GPL) does govern
+#   all other use of the material that constitutes the Autoconf Macro.
+#
+#   This special exception to the GPL applies to versions of the Autoconf
+#   Macro released by the Autoconf Archive. When you make and distribute a
+#   modified version of the Autoconf Macro, you may extend this special
+#   exception to the GPL to apply to your modified version as well.
+
+#serial 1
+
+AU_ALIAS([ACX_SUITESPARSE], [AX_SUITESPARSE])
+AC_DEFUN([AX_SUITESPARSE], [
+ax_suitesparse_ok=no
+
+AC_ARG_WITH(suitesparse,
+        [AS_HELP_STRING([--with-suitesparse=directory], [use SuiteSparse directory])],[
+                        SUITESPARSE_LIBS="-L$with_suitesparse/lib"
+                        SUITESPARSE_INCLUDE="-I$with_suitesparse/include"
+                        ])
+
+#
+# Platform specific setup
+#
+#############################
+AC_CANONICAL_HOST
+# Check for which host we are on and setup a few things
+# specifically based on the host
+case $host_os in
+  darwin* )
+        RT_LIB=""
+        ;;
+  linux*)
+        RT_LIB="-lrt"
+        ;;
+    *)
+        RT_LIB="-lrt"
+        ;;
+esac
+
+# First, check SUITESPARSE_LIBS environment variable
+if test "x$SUITESPARSE_LIBS" != x; then
+        save_LIBS="$LIBS"; LIBS="$SUITESPARSE_LIBS -lumfpack -lamd -lbtf -lcamd -lccolamd -lcholmod -lcolamd -lcxsparse -lklu -ldl -lrbio -lspqr -lsuitesparseconfig -lm  $RT_LIB"
+        AC_MSG_CHECKING([for umf_l_malloc])
+        AC_TRY_LINK_FUNC(umf_l_malloc, [ax_suitesparse_ok=yes
+                                        SUITESPARSE_LIBS="$SUITESPARSE_LIBS -lumfpack -lamd -lbtf -lcamd -lccolamd -lcholmod -lcolamd -lcxsparse -lklu -ldl -lrbio -lspqr -lsuitesparseconfig"
+                                       ], [SUITRSPARSE_LIBS=""])
+        AC_MSG_RESULT($ax_suitesparse_ok)
+        LIBS="$save_LIBS"
+        if test $ax_suitesparse_ok = no; then
+                SUITESPARSE_LIBS=""
+        fi
+	old_CFLAGS="$CFLAGS"
+        CFLAGS=$SUITESPARSE_INCLUDE
+	AC_CHECK_HEADER(umfpack.h,[],[SUITESPARSE_INCLUDE=""
+                                     ax_suitesparse_ok=no])
+                                     
+        CFLAGS="$old_CFLAGS"
+else
+        AC_CHECK_LIB(umfpack,umf_l_alloc,[SUITESPARSE_LIBS="$SUITESPARSE_LIBS -lumfpack -lamd -lbtf -lcamd -lccolamd -lcholmod -lcolamd -lcxsparse -lklu -ldl -lrbio -lspqr -lsuitesparseconfig"],[
+                                                                  SUITESPARSE_LIBS=""
+                                                                  ax_suitesparse_ok=no
+                                                                  ])
+        old_CFLAGS="$CFLAGS"
+        AC_CHECK_HEADER(umfpack.h,[],[SUITESPARSE_INCLUDE=""
+                                      ax_suitesparse_ok=no])
+        
+                                      
+        CFLAGS="$old_CFLAGS"
+fi
+
+AC_SUBST(SUITESPARSE_LIBS)
+AC_SUBST(SUITESPARSE_INCLUDE)
+
+# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND:
+if test x"$ax_suitesparse_ok" = xyes; then
+        ifelse([$1],,AC_DEFINE(HAVE_SUITESPARSE,1,[Define if you have SUITESPARSE library.]),[$1])
+        :
+else
+        ax_suitesparse_ok=no
+        $2
+fi
+])dnl AX_SUITESPARSE
+
diff --git a/openfpm_data b/openfpm_data
index 524a87a925b8230b019264ca957293f686974f88..a0c140b7772c3b39a817f03d3da76b7a60a9c074 160000
--- a/openfpm_data
+++ b/openfpm_data
@@ -1 +1 @@
-Subproject commit 524a87a925b8230b019264ca957293f686974f88
+Subproject commit a0c140b7772c3b39a817f03d3da76b7a60a9c074
diff --git a/openfpm_pdata.doc b/openfpm_pdata.doc
index 9775ccd311b45b0bc5d8ebeae15c8ac2fb974ae0..b2284a9e8702ee47966d78b0128bd05cc7764411 100644
--- a/openfpm_pdata.doc
+++ b/openfpm_pdata.doc
@@ -743,7 +743,7 @@ WARN_LOGFILE           =
 # spaces.
 # Note: If this tag is empty the current directory is searched.
 
-INPUT                  = src
+INPUT                  = src openfpm_data/src
 
 # This tag can be used to specify the character encoding of the source files
 # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
@@ -811,7 +811,7 @@ EXCLUDE_SYMBOLS        =
 # that contain example code fragments that are included (see the \include
 # command).
 
-EXAMPLE_PATH           = src
+EXAMPLE_PATH           = src openfpm_data/src
 
 # If the value of the EXAMPLE_PATH tag contains directories, you can use the
 # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and
@@ -1407,7 +1407,7 @@ FORMULA_TRANSPARENT    = YES
 # The default value is: NO.
 # This tag requires that the tag GENERATE_HTML is set to YES.
 
-USE_MATHJAX            = NO
+USE_MATHJAX            = YES
 
 # When MathJax is enabled you can set the default output format to be used for
 # the MathJax output. See the MathJax site (see:
diff --git a/script/detect_fortran b/script/detect_fortran
new file mode 100755
index 0000000000000000000000000000000000000000..9508e239911934413cb26a0cd4662395f3e7f152
--- /dev/null
+++ b/script/detect_fortran
@@ -0,0 +1,16 @@
+#! /bin/bash
+
+function detect_fortran()
+{
+    command -v $1 >/dev/null 2>&1
+    if [ $? -ne 0 ]; then
+      dgc_ret=0
+      return
+    fi
+
+    echo -e "$1 \033[92;1m SUCCESS \033[0m"
+
+    dgc_ret=1
+}
+
+
diff --git a/script/detect_gcc b/script/detect_gcc
index 87b5e510a77b34ccd7128b26861b1591a92e4ed9..be1e0ba55f9d141ea6f184b11c7463cfd206572d 100644
--- a/script/detect_gcc
+++ b/script/detect_gcc
@@ -14,29 +14,31 @@ function detect_gcc_or_clang()
     if [ $? == 0 ]; then
         echo "Apple LLVM based g++"
 
+        echo "Apple LLVM based g++"
+
         # we try the detect the version
-        major=$(g++ --version | grep LLVM | sed 's/.*\([0-9][0-9]*\)\.\([0-9][0-9]*\)\.\([0-9][0-9]*\).*(.*/\1/g')
+        major=$(g++ --version | grep LLVM | sed 's/.*\([0-9][0-9]*\)\.\([0-9][0-9]*\)\.\([0-9][0-9]*\).*(clang.*/\1/g')
 
         # we check if the detection is garbage
         echo "$major" | egrep -q '^[0-9]+$'
         if [ $? == 0 ]; then
-           dgc_major=$($1 --version | grep LLVM | sed 's/.*\([0-9][0-9]*\)\.\([0-9][0-9]*\)\.\([0-9][0-9]*\).*(.*/\1/g')
-           dgc_middle=$($1 --version | grep LLVM | sed 's/.*\([0-9][0-9]*\)\.\([0-9][0-9]*\)\.\([0-9][0-9]*\).*(.*/\2/g')
-           dgc_minor=$($1 --version | grep LLVM | sed 's/.*\([0-9][0-9]*\)\.\([0-9][0-9]*\)\.\([0-9][0-9]*\).*(.*/\3/g')
+           dgc_major=$($1 --version | grep LLVM | sed 's/.*\([0-9][0-9]*\)\.\([0-9][0-9]*\)\.\([0-9][0-9]*\).*(clang.*/\1/g')
+           dgc_middle=$($1 --version | grep LLVM | sed 's/.*\([0-9][0-9]*\)\.\([0-9][0-9]*\)\.\([0-9][0-9]*\).*(clang.*/\2/g')
+           dgc_minor=$($1 --version | grep LLVM | sed 's/.*\([0-9][0-9]*\)\.\([0-9][0-9]*\)\.\([0-9][0-9]*\).*(clang.*/\3/g')
 
            if [ $dgc_major -gt 6 ]; then
              echo -e "clang++ $dgc_major.$dgc_middle.$dgc_minor \033[92;1m SUCCESS \033[0m"
              dgc_compiler=clang++
              dgc_ret=1
            elif [ $dgc_major -lt 6 ]; then
-             echo -e "clang++ $dgc_major.$dgc_middle.$dgc_minor \033[91;5;1m FAILED \033[0m your Apple clang $major.middle;$minor is too old, 6.1.0 or higher is required"
+             echo -e "clang++ $dgc_major.$dgc_middle.$dgc_minor \033[91;5;1m FAILED \033[0m your Apple clang $dgc_major.$dgc_middle;$dgc_minor is too old, 6.1.0 or higher is required"
              dgc_ret=0
            elif [ $dgc_middle -gt 1  ]; then
              echo -e "clang++ $dgc_major.$dgc_middle.$dgc_minor \033[92;1m SUCCESS \033[0m"
              dgc_compiler=clang++
              dgc_ret=1
-           elif [ $dgc_minor -lt 1 ]; then
-             echo -e "clang++ $dgc_major.$dgc_middle.$dgc_minor \033[91;5;1m FAILED \033[0m your Apple clang $major.middle;$minor is too old, 6.1.0 or higher is required"
+           elif [ $dgc_middle -lt 1 ]; then
+             echo -e "clang++ $dgc_major.$dgc_middle.$dgc_minor \033[91;5;1m FAILED \033[0m your Apple clang $dgc_major.$dgc_middle;$dgc_minor is too old, 6.1.0 or higher is required"
              dgc_ret=0
            else
              echo -e "Apple clang++ version $dgc_major.$dgc_middle.$dgc_minor \033[92;1m SUCCESS \033[0m"
@@ -45,12 +47,13 @@ function detect_gcc_or_clang()
            fi
            return
         fi
+
         dgc_ret=2
         return
     fi
 
     $1 --version | grep $1
-    if [ $? == 0 -a x"$1" == x"gcc" ]; then
+    if [ $? == 0 -a x"$1" == x"g++" ]; then
         dgc_major=$($1 --version | grep $1 | sed 's/.*\([0-9][0-9]*\)\.\([0-9][0-9]*\)\.\([0-9][0-9]*\).*/\1/g')
         dgc_middle=$($1 --version | grep $1 | sed 's/.*\([0-9][0-9]*\)\.\([0-9][0-9]*\)\.\([0-9][0-9]*\).*/\2/g')
         dgc_minor=$($1 --version | grep $1 | sed 's/.*\([0-9][0-9]*\)\.\([0-9][0-9]*\)\.\([0-9][0-9]*\).*/\3/g')
diff --git a/script/discover_os b/script/discover_os
index 722665c7a1443e232862d748320d40962cfda035..4abdd2de3e9737ab0f71a87527572c74bfe07539 100644
--- a/script/discover_os
+++ b/script/discover_os
@@ -15,8 +15,6 @@ platform=unknown
         platform=osx
   elif [[ "$OSTYPE" == "cygwin" ]]; then
         echo -e "We are on\033[1;34m CYGWIN \033[0m"
-        echo "This platform is not supported"
-        exit 1
   elif [[ "$OSTYPE" == "msys" ]]; then
         echo -e "We are on\033[1;34m Microsoft Window \033[0m"
         echo "This platform is not supported"
diff --git a/script/discover_package_manager b/script/discover_package_manager
index 65a96dea2ea25853d7dda4746a26e7feafcf3a4a..8ea9d7279771da732af9f9dda1eb7df52c17e903 100644
--- a/script/discover_package_manager
+++ b/script/discover_package_manager
@@ -27,7 +27,7 @@ function discover_package_manager() {
       return
     fi
     command -v port >/dev/null
-    if [ command -v port >/dev/null 2>&1 ]; then
+    if [ $? -eq 0 ]; then
       discover_package_manager_ret="sudo port"
       return
     else
@@ -56,5 +56,4 @@ function discover_package_manager() {
   fi
 }
 
-echo "$discover_package_manager_ret"
-
+echo "$discover_package_manager_ret" 
diff --git a/script/install_BOOST.sh b/script/install_BOOST.sh
index 6d0daab43ee20f4de5110b683249e6c25c179589..1317d6f3bfae2b9e0b2da488ebc2382b489aa7bb 100644
--- a/script/install_BOOST.sh
+++ b/script/install_BOOST.sh
@@ -7,10 +7,11 @@ if [ -d "$1/BOOST" ]; then
   exit 0
 fi
 
-wget http://ppmcore.mpi-cbg.de/upload/boost_1_58_0.tar.bz2
-tar -xvf boost_1_58_0.tar.bz2
-cd boost_1_58_0
+wget http://ppmcore.mpi-cbg.de/upload/boost_1_60_0.tar.bz2
+tar -xvf boost_1_60_0.tar.bz2
+cd boost_1_60_0
 ./bootstrap.sh
 mkdir $1/BOOST
 ./b2 -j 4 install --prefix=$1/BOOST
+rm -rf boost_1_60_0
 
diff --git a/script/install_EIGEN.sh b/script/install_EIGEN.sh
new file mode 100755
index 0000000000000000000000000000000000000000..8ff1d220c1ec716daf08ea470247a5ecc44d4d24
--- /dev/null
+++ b/script/install_EIGEN.sh
@@ -0,0 +1,20 @@
+#! /bin/bash
+
+# check if the directory $1/EIGEN exist
+
+if [ -d "$1/EIGEN" ]; then
+  echo "EIGEN already installed"
+  exit 0
+fi
+
+wget http://ppmcore.mpi-cbg.de/upload/eigen-3.2.7.tar.bz2
+rm -rf eigen-eigen-b30b87236a1b
+tar -xf eigen-3.2.7.tar.bz2
+
+cd eigen-eigen-b30b87236a1b
+mkdir $1/EIGEN/
+mv Eigen $1/EIGEN/Eigen
+
+cd ..
+rm -rf eigen-eigen-b30b87236a1b
+
diff --git a/script/install_MPI.sh b/script/install_MPI.sh
index 9eebc151e0362b5e5056caa5c5149337bbb4ed8f..80a509f09c455889c281a044ae700be5cd12dd44 100644
--- a/script/install_MPI.sh
+++ b/script/install_MPI.sh
@@ -10,6 +10,20 @@ fi
 wget http://www.open-mpi.de/software/ompi/v1.8/downloads/openmpi-1.8.7.tar.bz2
 tar -xvf openmpi-1.8.7.tar.bz2
 cd openmpi-1.8.7
+
+#
+#                  --disable-mca-dso \
+#                 --disable-sysv-shmem \
+#                 --enable-cxx-exceptions \
+#                 --with-threads=posix \
+#                 --without-cs-fs \
+#                 --with-mpi-param_check=always \
+#                 --enable-contrib-no-build=vt,libompitrace \
+#
+#--enable-mca-no-build=paffinity,installdirs-windows,timer-windows,shmem-sysv
+#
+#
+
 sh ./configure --prefix=$1/MPI --enable-opal-multi-threads --enable-mpi-f90 $2 $3
 make -j 4
 mkdir $1/MPI
diff --git a/script/install_OPENBLAS.sh b/script/install_OPENBLAS.sh
new file mode 100755
index 0000000000000000000000000000000000000000..04b863c916991c7f54b6bae8689a1ed3025d1e07
--- /dev/null
+++ b/script/install_OPENBLAS.sh
@@ -0,0 +1,21 @@
+#! /bin/bash
+
+# check if the directory $1/OPENBLAS exist
+
+if [ -d "$1/OPENBLAS" ]; then
+  echo "OPENBLAS already installed"
+  exit 0
+fi
+
+wget http://ppmcore.mpi-cbg.de/upload/OpenBLAS-0.2.15.tar.gz
+rm -rf OpenBLAS-0.2.15
+tar -xf OpenBLAS-0.2.15.tar.gz
+cd OpenBLAS-0.2.15
+
+# configuration
+
+make
+mkdir $1/OPENBLAS
+make install PREFIX=$1/OPENBLAS
+rm -rf OpenBLAS-0.2.15
+
diff --git a/script/install_SUITESPARSE.sh b/script/install_SUITESPARSE.sh
new file mode 100755
index 0000000000000000000000000000000000000000..b96df00adeb85212a022fd8f602fc704b91a6283
--- /dev/null
+++ b/script/install_SUITESPARSE.sh
@@ -0,0 +1,60 @@
+#! /bin/bash
+
+source script/discover_os
+
+discover_os
+
+# check if the directory $1/SUITESPARSE exist
+
+if [ -d "$1/SUITESPARSE" ]; then
+  echo "SUITESPARSE already installed"
+  exit 0
+fi
+
+wget http://ppmcore.mpi-cbg.de/upload/SuiteSparse-4.4.5.tar.gz
+rm -rf SuiteSparse
+tar -xf SuiteSparse-4.4.5.tar.gz
+if [ $? != 0 ]; then
+  echo "Fail to download SuiteSparse"
+  exit 1
+fi
+cd SuiteSparse
+
+# configuration
+
+if [ x"$platform" = x"osx"  ]; then
+    # installation for OSX
+
+    sed -i "" -e "s|INSTALL_LIB = \/usr\/local\/lib|INSTALL_LIB = "$1"\/SUITESPARSE\/lib|" SuiteSparse_config/SuiteSparse_config_Mac.mk
+    sed -i "" -e "s|INSTALL_INCLUDE = \/usr\/local\/include|INSTALL_INCLUDE = "$1"\/SUITESPARSE\/include|" SuiteSparse_config/SuiteSparse_config_Mac.mk
+    sed -i "" -e "s| LAPACK = -llapack|LAPACK = |" SuiteSparse_config/SuiteSparse_config_Mac.mk
+    sed -i "" -e "s| BLAS = -lopenblas|BLAS = -L"$1"/OPENBLAS/lib -lopenblas|" SuiteSparse_config/SuiteSparse_config_Mac.mk
+
+    ### Overwrite SuiteSparse_config.mk
+
+    rm SuiteSparse_config/SuiteSparse_config.mk
+    mv SuiteSparse_config/SuiteSparse_config_Mac.mk SuiteSparse_config/SuiteSparse_config.mk
+
+else
+    # Installation for linux
+
+    sed -i "/INSTALL_LIB\s=\s\/usr\/local\/lib/c\INSTALL_LIB = $1\/SUITESPARSE\/lib" SuiteSparse_config/SuiteSparse_config.mk
+    sed -i "/INSTALL_INCLUDE\s=\s\/usr\/local\/include/c\INSTALL_INCLUDE = $1\/SUITESPARSE\/include" SuiteSparse_config/SuiteSparse_config.mk
+    sed -i "/\sLAPACK\s=\s-llapack/c\LAPACK = " SuiteSparse_config/SuiteSparse_config.mk
+    sed -i "/\sBLAS\s=\s\-lopenblas/c\BLAS = -L$1/OPENBLAS/lib -lopenblas" SuiteSparse_config/SuiteSparse_config.mk
+
+fi
+
+export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$1/OPENBLAS/lib"
+
+make
+if [ $? != 0 ]; then
+  echo "Fail to compile SuiteSparse"
+  exit 1
+fi
+mkdir $1/SUITESPARSE
+mkdir $1/SUITESPARSE/lib
+mkdir $1/SUITESPARSE/include
+make install
+rm -rf SuiteSparse
+rm SuiteSparse-4.4.5.tar.gz
diff --git a/script/pre_req b/script/pre_req
index 41d6f0cbcfce3aacb023e83f3da57c51c2352f47..c7d808607ea323739f43166bb279617b48b61ea1 100644
--- a/script/pre_req
+++ b/script/pre_req
@@ -6,6 +6,7 @@ source script/discover_package_manager
 source script/discover_os
 source script/solve_autoconf
 source script/solve_gpp
+source script/solve_gfortran
 source script/solve_cmake
 source script/solve_git
 source script/solve_wget
@@ -14,6 +15,8 @@ source script/solve_brew
 source script/solve_libtool
 source script/detect_gcc
 source script/detect_osx
+source script/show_solutions
+source script/detect_fortran
 
 discover_os
 discover_package_manager $platform
@@ -30,6 +33,45 @@ if [ x"$platform" = x"osx" -a x"$pcman" = x""  ]; then
   fi
 fi
 
+### Additional package required for apt-get based distros
+if [ x"$platform" = x"linux" -a x"$pcman" = x"sudo apt-get"  ]; then
+  dpkg-query -l libbz2-dev
+  ret1=$?
+  dpkg-query -l python-dev
+  ret2=$?
+  dpkg-query -l libxml2-dev
+  ret3=$?
+  dpkg-query -l libxslt-dev
+  ret4=$?
+  if [ $ret1 -ne 0 -o $ret2 -ne 0 -o $ret3 -ne 0 -o $ret4 -ne 0 ]; then
+    echo "OpenFPM require additional packages in order to install correctly"
+    commands[0]="$pcman install libbz2-dev python-dev libxml2-dev libxslt-dev"
+    possible_solutions "${commands[@]}"
+    echo "Executing: ${command[$possible_solutions_ret]}"
+    eval ${commands[$possible_solutions_ret]}
+  fi
+fi
+
+### Additional package required for yum based distros
+if [ x"$platform" = x"linux" -a x"$pcman" = x"yum"  ]; then
+  rpm -V bzip2-devel
+  ret1=$?
+  rpm -V python-devel
+  ret2=$?
+  rpm -V libxml2-devel
+  ret3=$?
+  rpm -V libxslt-devel
+  ret4=$?
+  if [ $ret1 -ne 0 -o $ret2 -ne 0 -o $ret3 -ne 0 -o $ret4 -ne 0 ]; then
+    echo "OpenFPM require additional packages in order to install correctly"
+    commands[0]="su -c \"$pcman install bzip2-devel python-devel libxml2-devel libxslt-devel\""
+    commands[1]="sudo $pcman install bzip2-devel python-devel libxml2-devel libxslt-devel"
+    possible_solutions "${commands[@]}"
+    echo "Executing: ${command[$possible_solutions_ret]}"
+    eval ${commands[$possible_solutions_ret]}
+  fi
+fi
+
 command -v cmake >/dev/null 2>&1
 if [ $? -ne 0 ]; then
   echo >&2
@@ -141,10 +183,11 @@ fi
 
 #### Detecting g++
 
-detect_gcc_or_clang gcc
+detect_gcc_or_clang g++
 if [ $dgc_ret -eq 0 ]; then
+  echo -e "g++ \033[91;5;1m FAILED \033[0m"
   solve_gpp $platform
-  detect_gcc_or_clang gcc-4.9
+  detect_gcc_or_clang $compiler_gpp
   if [ $dgc_ret -eq 0 ]; then
     exit 1
   fi
@@ -157,25 +200,43 @@ elif [ $dgc_ret -eq 2 ]; then
   fi
 fi
 
+#### Detection gfortran
+
+detect_fortran gfortran
+if [ $dgc_ret -eq 0 ]; then
+  echo -e "gfortran\033[91;5;1m FAILED \033[0m"
+  solve_gfortran $platform
+  detect_fortran $compiler_fortran
+  if [ $dgc_ret -eq 0 ]; then
+    exit 1
+  fi
+else
+  echo -e "gfortran \033[92;1m SUCCESS \033[0m"
+fi
+
+
 command -v mpirun >/dev/null 2>&1
 if [ $? -ne 0 ]; then
   echo -e "No MPI"
 else
-  major=$(mpirun --version | grep mpirun | sed 's/.*\([0-9][0-9]*\)\.\([0-9][0-9]*\)\.\([0-9][0-9]*\).*/\1/g')
-  middle=$(mpirun --version | grep mpirun | sed 's/.*\([0-9][0-9]*\)\.\([0-9][0-9]*\)\.\([0-9][0-9]*\).*/\2/g')
-  minor=$(mpirun --version | grep mpirun | sed 's/.*\([0-9][0-9]*\)\.\([0-9][0-9]*\)\.\([0-9][0-9]*\).*/\3/g')
+  major=$(mpirun --version 2>&1 | grep mpirun | sed 's/.*\([0-9][0-9]*\)\.\([0-9][0-9]*\)\.\([0-9][0-9]*\).*/\1/g')
+  middle=$(mpirun --version 2>&1 | grep mpirun | sed 's/.*\([0-9][0-9]*\)\.\([0-9][0-9]*\)\.\([0-9][0-9]*\).*/\2/g')
+  minor=$(mpirun --version 2>&1 | grep mpirun | sed 's/.*\([0-9][0-9]*\)\.\([0-9][0-9]*\)\.\([0-9][0-9]*\).*/\3/g')
   if [ $major -gt 1 ]; then
     echo -e "mpirun $major.$middle.$minor \033[92;1m SUCCESS \033[0m"
   elif [ $major -lt 1 ]; then
     echo -e "mpirun $major.$middle.$minor \033[91;5;1m FAILED \033[0m is too old, 1.8.1 at least required"
+    MPI_installation_required=yes
   elif [ $middle -gt 8  ]; then
     echo -e "mpirun $major.$middle.$minor \033[92;1m SUCCESS \033[0m"
   elif [ $middle -lt 8 ]; then
     echo -e "mpirun $major.$middle.$minor \033[91;5;1m FAILED \033[0m is too old, 1.8.1 at least required"
+    MPI_installation_required=yes
   elif [ $minor -gt 0  ]; then
     echo -e "mpirun $major.$middle.$minor \033[92;1m SUCCESS \033[0m"
   else
-    echo -e "mpirun $major.$middle.$minor \033[92;1m FAILED \033[0m is too old, 1.8.1 at least required"
+    echo -e "mpirun $major.$middle.$minor \033[91;5;1m FAILED \033[0m is too old, 1.8.1 at least required"
+    MPI_installation_required=yes
   fi 
 fi
 }
diff --git a/script/remove_old b/script/remove_old
new file mode 100755
index 0000000000000000000000000000000000000000..56d1b51c2161f351704d5292d8568fb61540286e
--- /dev/null
+++ b/script/remove_old
@@ -0,0 +1,89 @@
+#! /bin/bash
+
+function remove_old()
+{
+    ## Get the previos openFPM installation
+    previous_inst=$(cat $PWD/install_dir)
+    
+    if [ x"$previous_inst" != x"" ]; then
+        echo "Found previous installation"
+        if [ -d "$previous_inst/openfpm_pdata" ]; then
+            echo "Removing the folder: $previous_inst/openfpm_pdata"
+            rm -rf "$previous_inst/openfpm_pdata"
+            if [ -d "$previous_inst/openfpm_pdata" ]; then
+                echo -e "\033[91;5;1m Remove failed, manually remove the folder $previous_inst/openfpm_pdata, I am waiting ... \033[0m"
+            fi
+            while [ -d "$previous_inst/openfpm_pdata" ]
+            do
+                sleep 1
+            done
+            echo "Thanks"
+        fi
+
+        if [ -d "$previous_inst/openfpm_data" ]; then
+            echo "Removing the folder: $previous_inst/openfpm_data"
+            rm -rf "$previous_inst/openfpm_data"
+            if [ -d "$previous_inst/openfpm_data" ]; then
+                echo -e "\033[91;5;1m Remove failed, manually remove the folder $previous_inst/openfpm_data, I am waiting ... \033[0m"
+            fi
+            while [ -d "$previous_inst/openfpm_data" ]
+            do
+                sleep 1
+            done
+            echo "Thanks"
+        fi
+    
+        if [ -d "$previous_inst/openfpm_devices" ]; then
+            echo "Removing the folder: $previous_inst/openfpm_devices"
+            rm -rf "$previous_inst/openfpm_devices"
+            if [ -d "$previous_inst/openfpm_devices" ]; then
+                echo -e "\033[91;5;1m Remove failed, manually remove the folder $previous_inst/openfpm_devices, I am waiting ... \033[0m"
+            fi
+            while [ -d "$previous_inst/openfpm_devices" ]
+            do
+                sleep 1
+            done
+            echo "Thanks"
+        fi
+    
+        if [ -d "$previous_inst/openfpm_io" ]; then
+            echo "Removing the folder: $previous_inst/openfpm_io"
+            rm -rf "$previous_inst/openfpm_io"
+            if [ -d "$previous_inst/openfpm_io" ]; then
+                echo -e "\033[91;5;1m Remove failed, manually remove the folder $previous_inst/openfpm_io, I am waiting ... \033[0m"
+            fi
+            while [ -d "$previous_inst/openfpm_io" ]
+            do
+                sleep 1
+            done
+            echo "Thanks"
+        fi
+    
+        if [ -d "$previous_inst/openfpm_vcluster" ]; then
+            echo "Removing the folder: $previous_inst/openfpm_vcluster"
+            rm -rf "$previous_inst/openfpm_vcluster"
+            if [ -d "$previous_inst/openfpm_vcluster" ]; then
+                echo -e "\033[91;5;1m Remove failed, manually remove the folder $previous_inst/openfpm_vcluster, I am waiting ... \033[0m"
+            fi
+            while [ -d "$previous_inst/openfpm_vcluster" ]
+            do
+                sleep 1
+            done
+            echo "Thanks"
+        fi
+    fi
+    
+    ## Check the installed version of the dependencies
+    
+    is_update=$(cat $1/BOOST/include/boost/version.hpp | grep "#define BOOST_VERSION 106000")
+    if [ x"$is_update" == x"" ]; then
+    	echo "New boost version, removing the old one"
+        rm -rf $1/BOOST/include
+        rm -rf $1/BOOST/lib
+	rm -rf $1/BOOST
+    fi
+
+    
+}
+
+
diff --git a/script/solve_gfortran b/script/solve_gfortran
new file mode 100755
index 0000000000000000000000000000000000000000..af6d465c489115ae347a13c09544aec4f2b785c3
--- /dev/null
+++ b/script/solve_gfortran
@@ -0,0 +1,26 @@
+#! /bin/bash 
+
+function solve_gfortran() {        
+source script/show_solutions
+source script/discover_package_manager
+discover_package_manager $1
+pcman=$discover_package_manager_ret
+
+if [ x"$pcman" = x"" ]; then
+        exit 1
+fi
+
+if [ x"$1" = x"osx" ]; then 
+        commands[0]="$pcman install gcc"
+        possible_solutions "${commands[@]}"
+        echo "Executing: ${command[$possible_solutions_ret]}"
+        eval ${commands[$possible_solutions_ret]}
+elif [ x"$1" = x"linux"  ]; then
+        commands[0]="su -c \"$pcman install gfortran\""
+        commands[1]="sudo $pcman install gfortran"
+        possible_solutions "${commands[@]}"
+        echo "Executing: ${command[$possible_solutions_ret]}"
+        eval ${commands[$possible_solutions_ret]}
+fi 
+}
+
diff --git a/script/solve_gpp b/script/solve_gpp
index 548fb6ca5735baaf196a4074d21dd27aaaa29956..f94d74ddeb7845bb0cae6e60c2f8288a51ab54f0 100644
--- a/script/solve_gpp
+++ b/script/solve_gpp
@@ -19,6 +19,14 @@ if [ x"$1" = x"osx" ]; then
         compiler_opt=" CXX=g++-4.9 CC=gcc-4.9  "
         compiler_gcc="gcc-4.9"
         compiler_gpp="g++-4.9"
+elif [ x"$1" = x"linux"  ]; then
+        commands[0]="su -c \"$pcman install gcc-c++\""
+        commands[1]="sudo $pcman install gcc-c++"
+        possible_solutions "${commands[@]}"
+        echo "Executing: ${command[$possible_solutions_ret]}"
+        eval ${commands[$possible_solutions_ret]}
+        compiler_gcc="gcc"
+        compiler_gpp="g++"
 fi
 }
 
diff --git a/src/Decomposition/DLB.hpp b/src/DLB/DLB.hpp
similarity index 100%
rename from src/Decomposition/DLB.hpp
rename to src/DLB/DLB.hpp
diff --git a/src/Decomposition/DLB_unit_test.hpp b/src/DLB/DLB_unit_test.hpp
similarity index 100%
rename from src/Decomposition/DLB_unit_test.hpp
rename to src/DLB/DLB_unit_test.hpp
diff --git a/src/Decomposition/CartDecomposition.hpp b/src/Decomposition/CartDecomposition.hpp
index 28245e258c1591044376e9a5c954594c9ba48c6c..0ca9ec24bbb0390e16246cd23f3d350ddc946e6b 100755
--- a/src/Decomposition/CartDecomposition.hpp
+++ b/src/Decomposition/CartDecomposition.hpp
@@ -2,7 +2,7 @@
  * CartDecomposition.hpp
  *
  *  Created on: Oct 07, 2015
- *      Author: Antonio Leo
+ *      Author: Pietro Incardona, Antonio Leo
  */
 
 #ifndef CARTDECOMPOSITION_HPP
@@ -29,37 +29,29 @@
 #include "ie_loc_ghost.hpp"
 #include "ie_ghost.hpp"
 #include "nn_processor.hpp"
-#include "GraphMLWriter.hpp"
-#include "ParMetisDistribution.hpp"
-#include "DistParMetisDistribution.hpp"
-#include "MetisDistribution.hpp"
-#include "DLB.hpp"
+#include "GraphMLWriter/GraphMLWriter.hpp"
+#include "Distribution/ParMetisDistribution.hpp"
+#include "Distribution/DistParMetisDistribution.hpp"
+#include "Distribution/MetisDistribution.hpp"
+#include "DLB/DLB.hpp"
+#include "util/se_util.hpp"
+#include "util/mathutil.hpp"
 
 #define CARTDEC_ERROR 2000lu
 
-// Macro that decide what to do in case of error
-#ifdef STOP_ON_ERROR
-#define ACTION_ON_ERROR() exit(1);
-#elif defined(THROW_ON_ERROR)
-#define ACTION_ON_ERROR() throw CARTDEC_ERROR;
-#else
-#define ACTION_ON_ERROR()
-#endif
-
 /**
  * \brief This class decompose a space into subspaces
  *
  * \tparam dim is the dimensionality of the physical domain we are going to decompose.
  * \tparam T type of the space we decompose, Real, Integer, Complex ...
  * \tparam Memory Memory factory used to allocate memory
- * \tparam Domain Structure that contain the information of your physical domain
  * \tparam Distribution type of distribution, can be ParMetisDistribution or MetisDistribution
  *
  * Given an N-dimensional space, this class decompose the space into a Cartesian grid of small
- * sub-sub-domain. At each sub-sub-domain is assigned  an id that identify which processor is
- * going to take care of that part of space (in general the space assigned to a processor is
- * simply connected), a second step merge several sub-sub-domain with same id into bigger region
- *  sub-domain with the id. Each sub-domain has an extended space called ghost part
+ * sub-sub-domain. To each sub-sub-domain is assigned an id that identify at which processor is
+ * assigned (in general the union of all the sub-sub-domain assigned to a processor is
+ * simply connected space), a second step merge several sub-sub-domain with same id into bigger region
+ *  sub-domain. Each sub-domain has an extended space called ghost part
  *
  * Assuming that VCluster.getProcessUnitID(), equivalent to the MPI processor rank, return the processor local
  * processor id, we define
@@ -87,7 +79,7 @@
  *
  */
 
-template<unsigned int dim, typename T, typename Memory = HeapMemory, template<unsigned int, typename > class Domain = Box, typename Distribution = ParMetisDistribution<dim, T>>
+template<unsigned int dim, typename T, typename Memory = HeapMemory, typename Distribution = ParMetisDistribution<dim, T>>
 class CartDecomposition: public ie_loc_ghost<dim, T>, public nn_prcs<dim, T>, public ie_ghost<dim, T>
 {
 
@@ -123,7 +115,7 @@ private:
 	CellDecomposer_sm<dim, T> cd;
 
 	//! rectangular domain to decompose
-	Domain<dim, T> domain;
+	::Box<dim,T> domain;
 
 	//! Box Spacing
 	T spacing[dim];
@@ -131,11 +123,28 @@ private:
 	//! Runtime virtual cluster machine
 	Vcluster & v_cl;
 
-	//! Create ditribution
+	//! Create distribution
 	Distribution dist;
 
-	//! Cell-list that store the geometrical information of the local internal ghost boxes
-	CellList<dim, T, FAST> lgeo_cell;
+	// Smallest subdivision on each direction
+	::Box<dim,T> ss_box;
+
+	::Box<dim,T> bbox;
+
+	// Heap memory receiver
+	HeapMemory hp_recv;
+
+	// vector v_proc
+	openfpm::vector<size_t> v_proc;
+
+	// reference counter of the object in case is shared between object
+	long int ref_cnt;
+
+	// ghost info
+	Ghost<dim,T> ghost;
+
+	// Boundary condition info
+	size_t bc[dim];
 
 	// Heap memory receiver
 	HeapMemory hp_recv;
@@ -154,7 +163,7 @@ private:
 	 * \param v_cl Virtual cluster, used internally for communications
 	 *
 	 */
-	void createSubdomains(Vcluster & v_cl)
+	void createSubdomains(Vcluster & v_cl, const size_t (& bc)[dim])
 	{
 #ifdef SE_CLASS1
 		if (&v_cl == NULL)
@@ -188,7 +197,7 @@ private:
 		openfpm::vector<::Box<dim, size_t>> loc_box;
 
 		// optimize the decomposition
-		d_o.template optimize<nm_v::sub_id, nm_v::proc_id>(dist.getGraph(), p_id, loc_box, box_nn_processor);
+		d_o.template optimize<nm_v::sub_id, nm_v::proc_id>(dist.getGraph(), p_id, loc_box, box_nn_processor,bc);
 
 		// Initialize ss_box and bbox
 		if (loc_box.size() >= 0)
@@ -197,18 +206,22 @@ private:
 			SpaceBox<dim, T> sub_d(sub_dc);
 			sub_d.mul(spacing);
 			sub_d.expand(spacing);
+			sub_d += domain.getP1();
+
+			// we add the
 
 			// Fixing sub-domains to cover all the domain
 
 			// Fixing sub_d
-			// if (loc_box) is a the boundary we have to ensure that the box span the full
+			// if (loc_box) is at the boundary we have to ensure that the box span the full
 			// domain (avoiding rounding off error)
 			for (size_t i = 0; i < dim; i++)
 			{
 				if (sub_dc.getHigh(i) == cd.getGrid().size(i) - 1)
-				{
 					sub_d.setHigh(i, domain.getHigh(i));
-				}
+
+				if (sub_dc.getLow(i) == 0)
+					sub_d.setLow(i,domain.getLow(i));
 			}
 
 			// add the sub-domain
@@ -219,6 +232,10 @@ private:
 			bbox = sub_d;
 		}
 
+/*		if (loc_box.size())
+		bbox.zero();
+		ss_box = domain;*/
+
 		// convert into sub-domain
 		for (size_t s = 1; s < loc_box.size(); s++)
 		{
@@ -228,6 +245,7 @@ private:
 			// re-scale and add spacing (the end is the starting point of the next domain + spacing)
 			sub_d.mul(spacing);
 			sub_d.expand(spacing);
+			sub_d += domain.getP1();
 
 			// Fixing sub-domains to cover all the domain
 
@@ -237,9 +255,10 @@ private:
 			for (size_t i = 0; i < dim; i++)
 			{
 				if (sub_dc.getHigh(i) == cd.getGrid().size(i) - 1)
-				{
 					sub_d.setHigh(i, domain.getHigh(i));
-				}
+
+				if (sub_dc.getLow(i) == 0)
+					sub_d.setLow(i,domain.getLow(i));
 			}
 
 			// add the sub-domain
@@ -252,7 +271,9 @@ private:
 			ss_box.contained(sub_d);
 		}
 
-		nn_prcs<dim, T>::create(box_nn_processor, sub_domains);
+		nn_prcs<dim,T>::create(box_nn_processor, sub_domains);
+		nn_prcs<dim,T>::refine_ss_box(ss_box);
+		nn_prcs<dim,T>::applyBC(domain,ghost,bc);
 
 		// fill fine_s structure
 		// fine_s structure contain the processor id for each sub-sub-domain
@@ -270,10 +291,22 @@ private:
 			++it;
 		}
 
+		Initialize_geo_cell_lists();
+	}
+
+	/*! \brief Initialize geo_cell lists
+	 *
+	 *
+	 *
+	 */
+	void Initialize_geo_cell_lists()
+	{
 		// Get the smallest sub-division on each direction
 		::Box<dim, T> unit = getSmallestSubdivision();
 		// Get the processor bounding Box
-		::Box<dim, T> bound = getProcessorBounds();
+		::Box<dim,T> bound = getProcessorBounds();
+		// Not necessary, but I prefer
+		bound.enlarge(ghost);
 
 		// calculate the sub-divisions
 		size_t div[dim];
@@ -288,9 +321,10 @@ private:
 			orig.get(i) = bound.getLow(i);
 
 		// Initialize the geo_cell structure
-		ie_ghost<dim, T>::Initialize_geo_cell(domain, div, orig);
-		lgeo_cell.Initialize(domain, div, orig);
+		ie_ghost<dim,T>::Initialize_geo_cell(bound,div,orig);
 
+		// Initialize shift vectors
+		ie_ghost<dim,T>::generateShiftVectors(domain);
 	}
 
 	/*! \brief Calculate communication and migration costs
@@ -309,7 +343,7 @@ private:
 		float gh_v = (gh_s * b_s);
 
 		// multiply for sub-sub-domain side for each domain
-		for (int i = 2; i < dim; i++)
+		for (size_t i = 2; i < dim; i++)
 			gh_v *= b_s;
 
 		size_t norm = (size_t) (1.0 / gh_v);
@@ -330,9 +364,6 @@ private:
 		}
 	}
 
-	// Save the ghost boundaries
-	Ghost<dim, T> ghost;
-
 	/*! \brief Create the subspaces that decompose your domain
 	 *
 	 */
@@ -368,21 +399,177 @@ private:
 		}
 	}
 
+
+	/*! \brief It copy the sub-domains into another CartesianDecomposition object extending them
+	 *
+	 * \see duplicate (in case of extended domain)
+	 *
+	 * \param cart Cartesian decomposition object
+	 * \param box Extended domain
+	 *
+	 */
+	void extend_subdomains(CartDecomposition<dim,T> & cart, const ::Box<dim,T> & ext_dom) const
+	{
+		// Box
+		typedef ::Box<dim,T> b;
+
+		cart.bbox = ext_dom;
+		cart.ss_box = ext_dom;
+
+		for (size_t i = 0 ; i < sub_domains.size() ; i++)
+		{
+			::Box<dim,T> box;
+
+			// Calculate the extended box
+			for (size_t j = 0 ; j < dim ; j++)
+			{
+				if (sub_domains.template get<b::p1>(i)[j] == domain.getLow(j))
+					box.setLow(j,ext_dom.getLow(j));
+				else
+					box.setLow(j,sub_domains.template get<b::p1>(i)[j]);
+
+				if (sub_domains.template get<b::p2>(i)[j] == domain.getHigh(j))
+					box.setHigh(j,ext_dom.getHigh(j));
+				else
+					box.setHigh(j,sub_domains.template get<b::p2>(i)[j]);
+			}
+
+			// add the subdomain
+			cart.sub_domains.add(box);
+
+			// Calculate the bound box
+			cart.bbox.enclose(box);
+
+			// Create the smallest box contained in all sub-domain
+			cart.ss_box.contained(box);
+		}
+	}
+
+	/*! \brief Extend the fines for the new Cartesian decomposition
+	 *
+	 * \param new_fines extended fine_s
+	 * \param old_fines old fine_s
+	 *
+	 */
+	void extend_fines(CartDecomposition<dim,T> & cart) const
+	{
+		// Extension, first we calculate the extensions of the new domain compared
+		// to the old one in cell units (each cell unit is a sub-sub-domain)
+		::Box<dim,size_t> ext;
+		// Extension of the new fines structure
+		::Box<dim,size_t> n_fines_ext;
+		// Extension of the old fines structure
+		::Box<dim,size_t> o_fines_ext;
+
+		size_t sz_new[dim];
+		size_t sz_old[dim];
+
+		for (size_t i = 0; i < dim ; i++)
+		{
+			size_t p1 = (domain.getLow(i) - this->domain.getLow(i)) / cd.getCellBox().getHigh(i) + 1;
+			size_t p2 = (domain.getLow(i) - this->domain.getLow(i)) / cd.getCellBox().getHigh(i) + 1;
+
+			ext.setLow(i,p1);
+			ext.setHigh(i,p2);
+			sz_new[i] = p1+p2+cd.getGrid().size(i);
+			sz_old[i] = cd.getGrid().size(i);
+		}
+
+		grid_sm<dim,void> info_new(sz_new);
+		grid_sm<dim,void> info_old(sz_old);
+
+		// resize the new fines
+		cart.fine_s.resize(info_new.size());
+
+		// we create an iterator that iterate across the full new fines
+		grid_key_dx_iterator<dim> fines_t(info_new);
+
+		while (fines_t.isNext())
+		{
+			auto key = fines_t.get();
+
+			// new_fines is bigger than old_fines structure
+			// out of bound key must be adjusted
+			// The adjustment produce a natural extension
+			// a representation can be seen in the figure of
+			// CartDecomposition duplicate function with extended domains
+
+			grid_key_dx<dim> key_old;
+			for (size_t i = 0 ; i < dim ; i++)
+			{
+				key_old.set_d(i,(long int)key.get(i) - ext.getLow(i));
+				if (key_old.get(i) < 0)
+					key_old.set_d(i,0);
+				else if(key_old.get(i) >= (long int)info_old.size(i) )
+					key_old.set_d(i,info_old.size(i)-1);
+			}
+
+			cart.fine_s.get(info_new.LinId(key)) = fine_s.get(info_old.LinId(key_old));
+
+			++fines_t;
+		}
+
+		cart.gr.setDimensions(sz_new);
+
+		// the new extended CellDecomposer must be consistent with the old cellDecomposer.
+		cart.cd.setDimensions(cd,ext);
+	}
+
 public:
 
-	/*! \brief Cart decomposition constructor
+	static constexpr int dims = dim;
+
+	typedef T stype;
+
+	//! Increment the reference counter
+	void incRef()
+	{ref_cnt++;}
+
+	//! Decrement the reference counter
+	void decRef()
+	{ref_cnt--;}
+
+	//! Return the reference counter
+	long int ref()
+	{
+		return ref_cnt;
+	}
+
+	/*! \brief Cartesian decomposition constructor
 	 *
 	 * \param v_cl Virtual cluster, used internally to handle or pipeline communication
 	 *
 	 */
 	CartDecomposition(Vcluster & v_cl) :
-			nn_prcs<dim, T>(v_cl), v_cl(v_cl), dist(v_cl)
+			nn_prcs<dim, T>(v_cl), v_cl(v_cl), dist(v_cl),ref_cnt(0)
 	{
 		// Reset the box to zero
 		bbox.zero();
 	}
 
-	//! Cart decomposition destructor
+	/*! \brief Cartesian decomposition copy constructor
+	 *
+     * \param cart object to copy
+	 *
+	 */
+	CartDecomposition(const CartDecomposition<dim,T,Memory> & cart)
+	:nn_prcs<dim,T>(cart.v_cl),v_cl(cart.v_cl),dist(v_cl),ref_cnt(0)
+	{
+		this->operator=(cart);
+	}
+
+	/*! \brief Cartesian decomposition copy constructor
+	 *
+     * \param cart object to copy
+	 *
+	 */
+	CartDecomposition(CartDecomposition<dim,T,Memory> && cart)
+	:nn_prcs<dim,T>(cart.v_cl),v_cl(cart.v_cl),dist(v_cl),ref_cnt(0)
+	{
+		this->operator=(cart);
+	}
+
+	//! Cartesian decomposition destructor
 	~CartDecomposition()
 	{
 	}
@@ -447,6 +634,68 @@ public:
 		}
 	};
 
+	/*! \brief class to select the returned id by ghost_processorID
+	 *
+	 */
+	class shift_id
+	{
+	public:
+		/*! \brief Return the shift id
+		 *
+		 * \param p structure containing the id informations
+		 * \param b_id box_id
+		 *
+		 * \return shift_id id
+		 *
+		 */
+		inline static size_t id(p_box<dim,T> & p, size_t b_id)
+		{
+			return p.shift_id;
+		}
+	};
+
+	/*! \brief Apply boundary condition to the point
+	 *
+	 * \param p Point to apply the boundary condition
+	 *
+	 */
+	void applyPointBC(float (& pt)[dim]) const
+	{
+		for (size_t i = 0 ; i < dim ; i++)
+		{
+			if (bc[i] == PERIODIC)
+				pt[i] = openfpm::math::periodic_l(pt[i],domain.getHigh(i),domain.getLow(i));
+		}
+	}
+
+	/*! \brief Apply boundary condition to the point
+	 *
+	 * \param p Point to apply the boundary condition
+	 *
+	 */
+	void applyPointBC(Point<dim,T> & pt) const
+	{
+		for (size_t i = 0 ; i < dim ; i++)
+		{
+			if (bc[i] == PERIODIC)
+				pt.get(i) = openfpm::math::periodic_l(pt.get(i),domain.getHigh(i),domain.getLow(i));
+		}
+	}
+
+	/*! \brief Apply boundary condition to the point
+	 *
+	 * \param encapsulated object
+	 *
+	 */
+	template<typename Mem> void applyPointBC(encapc<1,Point<dim,T>,Mem> && pt) const
+	{
+		for (size_t i = 0 ; i < dim ; i++)
+		{
+			if (bc[i] == PERIODIC)
+				pt.template get<0>()[i] = openfpm::math::periodic_l(pt.template get<0>()[i],domain.getHigh(i),domain.getLow(i));
+		}
+	}
+
 	/*! It calculate the internal ghost boxes
 	 *
 	 * Example: Processor 10 calculate
@@ -456,63 +705,61 @@ public:
 	 *
 	 \verbatim
 
-	 +----------------------------------------------------+
-	 |                                                    |
-	 |                 Processor 8                        |
-	 |                 Sub-domain 0                       +-----------------------------------+
-	 |                                                    |                                   |
-	 |                                                    |                                   |
-	 ++--------------+---+---------------------------+----+        Processor 9                |
-	 |              |   |     B8_0                  |    |        Subdomain 0                |
-	 |              +------------------------------------+                                   |
-	 |              |   |                           |    |                                   |
-	 |              |   |  XXXXXXXXXXXXX XX         |B9_0|                                   |
-	 |              | B |  X Processor 10 X         |    |                                   |
-	 | Processor 5  | 5 |  X Sub-domain 0 X         |    |                                   |
-	 | Subdomain 0  | _ |  X              X         +----------------------------------------+
-	 |              | 0 |  XXXXXXXXXXXXXXXX         |    |                                   |
-	 |              |   |                           |    |                                   |
-	 |              |   |                           |    |        Processor 9                |
-	 |              |   |                           |B9_1|        Subdomain 1                |
-	 |              |   |                           |    |                                   |
-	 |              |   |                           |    |                                   |
-	 |              |   |                           |    |                                   |
-	 +--------------+---+---------------------------+----+                                   |
-														 |                                   |
-														 +-----------------------------------+
-
-	 \endverbatim
-
-	 and also
-	 G8_0 G9_0 G9_1 G5_0 (External ghost boxes)
-
-	 \verbatim
-
-	 +----------------------------------------------------+
-	 |                                                    |
-	 |                 Processor 8                        |
-	 |                 Sub-domain 0                       +-----------------------------------+
-	 |           +---------------------------------------------+                              |
-	 |           |         G8_0                           |    |                              |
-	 ++--------------+------------------------------------+    |   Processor 9                |
-	 |          |   |                                    |    |   Subdomain 0                |
-	 |          |   |                                    |G9_0|                              |
-	 |          |   |                                    |    |                              |
-	 |          |   |      XXXXXXXXXXXXX XX              |    |                              |
-	 |          |   |      X Processor 10 X              |    |                              |
-	 | Processor|5  |      X Sub-domain 0 X              |    |                              |
-	 | Subdomain|0  |      X              X              +-----------------------------------+
-	 |          |   |      XXXXXXXXXXXXXXXX              |    |                              |
-	 |          | G |                                    |    |                              |
-	 |          | 5 |                                    |    |   Processor 9                |
-	 |          | | |                                    |    |   Subdomain 1                |
-	 |          | 0 |                                    |G9_1|                              |
-	 |          |   |                                    |    |                              |
-	 |          |   |                                    |    |                              |
-	 +--------------+------------------------------------+    |                              |
-				 |                                        |    |                              |
-				 +----------------------------------------+----+------------------------------+
-
++----------------------------------------------------+
+|                                                    |
+|                 Processor 8                        |
+|                 Sub+domain 0                       +-----------------------------------+
+|                                                    |                                   |
+|                                                    |                                   |
+++--------------+---+---------------------------+----+        Processor 9                |
+ |              |   |     B8_0                  |    |        Subdomain 0                |
+ |              +------------------------------------+                                   |
+ |              |   |                           |    |                                   |
+ |              |   |                           |B9_0|                                   |
+ |              | B |    Local processor        |    |                                   |
+ | Processor 5  | 5 |    Subdomain 0            |    |                                   |
+ | Subdomain 0  | _ |                           +----------------------------------------+
+ |              | 0 |                           |    |                                   |
+ |              |   |                           |    |                                   |
+ |              |   |                           |    |        Processor 9                |
+ |              |   |                           |B9_1|        Subdomain 1                |
+ |              |   |                           |    |                                   |
+ |              |   |                           |    |                                   |
+ |              |   |                           |    |                                   |
+ +--------------+---+---------------------------+----+                                   |
+                                                     |                                   |
+                                                     +-----------------------------------+
+
+
+ \endverbatim
+
+       and also
+       G8_0 G9_0 G9_1 G5_0 (External ghost boxes)
+
+      +----------------------------------------------------+
+      |                 Processor 8                        |
+      |                 Subdomain 0                        +-----------------------------------+
+      |                                                    |                                   |
+      |           +---------------------------------------------+                              |
+      |           |         G8_0                           |    |                              |
++-----+---------------+------------------------------------+    |   Processor 9                |
+|                 |   |                                    |    |   Subdomain 0                |
+|                 |   |                                    |G9_0|                              |
+|                 |   |                                    |    |                              |
+|                 |   |                                    |    |                              |
+|                 |   |        Local processor             |    |                              |
+|  Processor 5    |   |        Sub+domain 0                |    |                              |
+|  Subdomain 0    |   |                                    +-----------------------------------+
+|                 |   |                                    |    |                              |
+|                 | G |                                    |    |                              |
+|                 | 5 |                                    |    |   Processor 9                |
+|                 | | |                                    |    |   Subdomain 1                |
+|                 | 0 |                                    |G9_1|                              |
+|                 |   |                                    |    |                              |
+|                 |   |                                    |    |                              |
++---------------------+------------------------------------+    |                              |
+                  |                                        |    |                              |
+                  +----------------------------------------+----+------------------------------+
 
 	 \endverbatim
 
@@ -523,18 +770,19 @@ public:
 	 *
 	 *
 	 \verbatim
-					  ^ p2[1]
-					  |
-					  |
-				 +----+----+
-				 |         |
-				 |         |
-	  p1[0]<-----+         +----> p2[0]
-				 |         |
-				 |         |
-				 +----+----+
-					  |
-					  v  p1[1]
+
+	 	 	 	 	 ^ p2[1]
+	 	 	 	 	 |
+	 	 	 	 	 |
+	 	 	 	+----+----+
+	 	 	 	|         |
+	 	 	 	|         |
+	 p1[0]<-----+         +----> p2[0]
+	 	 	 	|         |
+	 	 	 	|         |
+	 	 	 	+----+----+
+	 	 	 	 	 |
+	 	 	 	 	 v  p1[1]
 
 	 \endverbatim
 
@@ -549,9 +797,9 @@ public:
 
 		for (size_t i = 0; i < dim; i++)
 		{
-			if (ghost.template getLow(i) >= domain.template getHigh(i) / gr.size(i) || ghost.template getHigh(i) >= domain.template getHigh(i) / gr.size(i))
+			if (fabs(ghost.template getLow(i)) >= ss_box.getHigh(i) || ghost.template getHigh(i) >= ss_box.getHigh(i))
 			{
-				std::cerr << "Error " << __FILE__ << ":" << __LINE__ << " : Ghost are bigger than one domain" << "\n";
+				std::cerr << "Error " << __FILE__ << ":" << __LINE__  << " : Ghost are bigger than one sub-domain" << "\n";
 			}
 		}
 #endif
@@ -562,21 +810,238 @@ public:
 		ie_ghost<dim, T>::create_box_nn_processor_ext(v_cl, ghost, sub_domains, box_nn_processor, *this);
 		ie_ghost<dim, T>::create_box_nn_processor_int(v_cl, ghost, sub_domains, box_nn_processor, *this);
 
-		// ebox must come after ibox (in this case)
-
-		ie_loc_ghost<dim, T>::create_loc_ghost_ibox(ghost, sub_domains);
-		ie_loc_ghost<dim, T>::create_loc_ghost_ebox(ghost, sub_domains);
+		ie_loc_ghost<dim,T>::create(sub_domains,domain,ghost,bc);
 
 		// get the smallest sub-domain dimension on each direction
 		for (size_t i = 0; i < dim; i++)
 		{
-			if (ghost.template getLow(i) >= ss_box.getHigh(i) || ghost.template getHigh(i) >= domain.template getHigh(i) / gr.size(i))
+			if (fabs(ghost.template getLow(i)) >= ss_box.getHigh(i) || ghost.template getHigh(i) >= ss_box.getHigh(i))
 			{
-				std::cerr << "Error " << __FILE__ << ":" << __LINE__ << " : Ghost are bigger than one domain" << "\n";
+				std::cerr << "Error " << __FILE__ << ":" << __LINE__  << " : Ghost are bigger than one sub-domain" << "\n";
 			}
 		}
 	}
 
+	/*! \brief It create another object that contain the same decomposition information but with different ghost boxes
+	 *
+	 * \param g ghost
+	 *
+	 * \return a duplicated decomposition with different ghost boxes
+	 *
+	 */
+	CartDecomposition<dim,T,Memory> duplicate(const Ghost<dim,T> & g) const
+	{
+		CartDecomposition<dim,T,Memory> cart(v_cl);
+
+		cart.box_nn_processor = box_nn_processor;
+		cart.sub_domains = sub_domains;
+		cart.fine_s = fine_s;
+
+		cart.gr = gr;
+		cart.cd = cd;
+		cart.domain = domain;
+		std::copy(spacing,spacing+3,cart.spacing);
+
+		//! Runtime virtual cluster
+		cart.v_cl = v_cl;
+
+		cart.bbox = bbox;
+		cart.ss_box = ss_box;
+		cart.ghost = g;
+
+		cart.dist = dist;
+
+		for (size_t i = 0 ; i < dim ; i++)
+			cart.bc[i] = bc[i];
+
+		(static_cast<nn_prcs<dim,T> &>(cart)).create(box_nn_processor, sub_domains);
+		(static_cast<nn_prcs<dim,T> &>(cart)).applyBC(domain,ghost,bc);
+
+		cart.Initialize_geo_cell_lists();
+		cart.calculateGhostBoxes();
+
+		return cart;
+	}
+
+	/*! \brief It create another object that contain the same decomposition information but with different ghost boxes and an extended domain
+	 *
+	 * The domain extension is produced extending the boxes at the border like in figure
+	 *
+	 * \verbatim
+	 *
++--------------^--------^----------^----------+
+|              |        |          |          |
+|        A     |    E   |     F    |    N     |
+|    +-----------------------------------+---->
+|    |         |        |          |     |    |
+|  A |   A     |        |     F    |     |    |
+|    |         |        |          |     |    |
+|    |         |    E   +----------+  N  |  N |
+<--------------+        |          |     |    |
+|    |         |        |          |     |    |
+|    |         |        |     G    |     |    |
+|    |         |        |          +---------->
+|  B |   B     |        +----------+     |    |
+|    |         +--------+          |  M  |  M |
+|    |         |        |     H    |     |    |
+|    |         |        +-----+----+---------->
+<--------------+    D   |     |          |    |
+|    |         |        |  I  |     L    |  L |
+|  C |   C     |        |     |          |    |
+|    |         |        |     |          |    |
+|    +-----------------------------------+    |
+|              |        |     |               |
+|        C     |    D   |  I  |     L         |
++--------------v--------v-----v---------------+
+
+	 *
+	 * \endverbatim
+	 *
+	 * \param g ghost
+	 * \param domain extended domain (MUST be extended)
+	 *
+	 * \return a duplicated decomposition with different ghost boxes and an extended domain
+	 *
+	 */
+	CartDecomposition<dim,T,Memory> duplicate(const Ghost<dim,T> & g, const ::Box<dim,T> & ext_domain) const
+	{
+		CartDecomposition<dim,T,Memory> cart(v_cl);
+
+		cart.box_nn_processor = box_nn_processor;
+
+		// Calculate new sub-domains for extended domain
+		extend_subdomains(cart,ext_domain);
+
+		// Calculate fine_s structure for the extended domain
+		// update the cell decomposer and gr
+		extend_fines(cart);
+
+		// Get the old sub-sub-domain grid extension
+
+		cart.domain = ext_domain;
+
+		// spacing does not change
+		std::copy(spacing,spacing+3,cart.spacing);
+
+		//! Runtime virtual cluster
+		cart.v_cl = v_cl;
+
+		cart.ghost = g;
+		cart.dist = dist;
+
+		for (size_t i = 0 ; i < dim ; i++)
+			cart.bc[i] = bc[i];
+
+		(static_cast<nn_prcs<dim,T> &>(cart)).create(cart.box_nn_processor, cart.sub_domains);
+		(static_cast<nn_prcs<dim,T> &>(cart)).applyBC(ext_domain,ghost,bc);
+
+		cart.Initialize_geo_cell_lists();
+		cart.calculateGhostBoxes();
+
+		return cart;
+	}
+
+	/*! \brief It create another object that contain the same information and act in the same way
+	 *
+	 * \return a duplicated decomposition
+	 *
+	 */
+	CartDecomposition<dim,T,Memory> duplicate() const
+	{
+		CartDecomposition<dim,T,Memory> cart(v_cl);
+
+		(static_cast<ie_loc_ghost<dim,T>*>(&cart))->operator=(static_cast<ie_loc_ghost<dim,T>>(*this));
+		(static_cast<nn_prcs<dim,T>*>(&cart))->operator=(static_cast<nn_prcs<dim,T>>(*this));
+		(static_cast<ie_ghost<dim,T>*>(&cart))->operator=(static_cast<ie_ghost<dim,T>>(*this));
+
+		cart.sub_domains = sub_domains;
+		cart.box_nn_processor = box_nn_processor;
+		cart.fine_s = fine_s;
+		cart.gr = gr;
+		cart.cd = cd;
+		cart.domain = domain;
+		std::copy(spacing,spacing+3,cart.spacing);
+
+		//! Runtime virtual cluster
+		cart.v_cl = v_cl;
+
+		cart.ghost = ghost;
+
+		cart.bbox = bbox;
+		cart.ss_box = ss_box;
+
+		for (size_t i = 0 ; i < dim ; i++)
+			cart.bc[i] = this->bc[i];
+
+		return cart;
+	}
+
+	/*! \brief Copy the element
+	 *
+	 * \param cart element to copy
+	 *
+	 */
+	CartDecomposition<dim,T,Memory> & operator=(const CartDecomposition & cart)
+	{
+		static_cast<ie_loc_ghost<dim,T>*>(this)->operator=(static_cast<ie_loc_ghost<dim,T>>(cart));
+		static_cast<nn_prcs<dim,T>*>(this)->operator=(static_cast<nn_prcs<dim,T>>(cart));
+		static_cast<ie_ghost<dim,T>*>(this)->operator=(static_cast<ie_ghost<dim,T>>(cart));
+
+		sub_domains = cart.sub_domains;
+		box_nn_processor = cart.box_nn_processor;
+		fine_s = cart.fine_s;
+		gr = cart.gr;
+		cd = cart.cd;
+		domain = cart.domain;
+		std::copy(cart.spacing,cart.spacing+3,spacing);
+
+		//! Runtime virtual cluster
+		v_cl = cart.v_cl;
+
+		ghost = cart.ghost;
+
+		bbox = cart.bbox;
+		ss_box = cart.ss_box;
+
+		for (size_t i = 0 ; i < dim ; i++)
+			bc[i] = cart.bc[i];
+
+		return *this;
+	}
+
+	/*! \brief Copy the element, move semantic
+	 *
+	 * \param cart element to copy
+	 *
+	 */
+	CartDecomposition<dim,T,Memory> & operator=(CartDecomposition && cart)
+	{
+		static_cast<ie_loc_ghost<dim,T>*>(this)->operator=(static_cast<ie_loc_ghost<dim,T>*>(cart));
+		static_cast<nn_prcs<dim,T>*>(this)->operator=(static_cast<nn_prcs<dim,T>*>(cart));
+		static_cast<ie_ghost<dim,T>*>(this)->operator=(static_cast<ie_ghost<dim,T>*>(cart));
+
+		sub_domains.swap(cart.sub_domains);
+		box_nn_processor.swap(cart.box_nn_processor);
+		fine_s.swap(cart.fine_s);
+		gr = cart.gr;
+		cd = cart.cd;
+		domain = cart.domain;
+		std::copy(cart.spacing,cart.spacing+3,spacing);
+
+		//! Runtime virtual cluster
+		v_cl = cart.v_cl;
+
+		ghost = cart.ghost;
+
+		cart.bbox = bbox;
+		cart.ss_box = ss_box;
+
+		for (size_t i = 0 ; i < dim ; i++)
+			cart.bc[i] = bc[i];
+
+		return *this;
+	}
+
 	/*! \brief The default grid size
 	 *
 	 *  The default grid is always an isotropic grid that adapt with the number of processors,
@@ -596,43 +1061,110 @@ public:
 	 * \return processorID
 	 *
 	 */
-	template<typename Mem> size_t inline processorID(encapc<1, Point<dim, T>, Mem> p)
+	template<typename Mem, typename ofb> size_t inline processorID(encapc<1, Point<dim,T>, Mem> p)
+	{
+		return fine_s.get(cd.template getCell<ofb>(p));
+	}
+
+	/*! \brief Given a point return in which processor the particle should go
+	 *
+	 * \return processorID
+	 *
+	 */
+	size_t inline processorID(const Point<dim,T> &p) const
 	{
 		return fine_s.get(cd.getCell(p));
 	}
 
-	// Smallest subdivision on each direction
-	::Box<dim, T> ss_box;
+	/*! \brief Given a point return in which processor the particle should go
+	 *
+	 * \return processorID
+	 *
+	 */
+	size_t inline processorID(const T (&p)[dim]) const
+	{
+		return fine_s.get(cd.getCell(p));
+	}
 
-	/*! \brief Get the smallest subdivision of the domain on each direction
+	/*! \brief Given a point return in which processor the particle should go
 	 *
-	 * \return a box p1 is set to zero
+	 * Boundary conditions are considered
+	 *
+	 * \return processorID
 	 *
 	 */
-	const ::Box<dim, T> & getSmallestSubdivision()
+	template<typename Mem> size_t inline processorIDBC(encapc<1, Point<dim,T>, Mem> p)
 	{
-		return ss_box;
+		Point<dim,T> pt = p;
+		applyPointBC(pt);
+
+		return fine_s.get(cd.getCell(pt));
 	}
 
 	/*! \brief Given a point return in which processor the particle should go
+	 *
+	 * Boundary conditions are considered
 	 *
 	 * \return processorID
 	 *
 	 */
+	template<typename ofb> size_t inline processorIDBC(const Point<dim,T> &p) const
+	{
+		Point<dim,T> pt = p;
+		applyPointBC(pt);
 
-	size_t inline processorID(const T (&p)[dim]) const
+		return fine_s.get(cd.getCell(p));
+	}
+
+	/*! \brief Given a point return in which processor the particle should go
+	 *
+	 * Boundary consition are considered
+	 *
+	 * \return processorID
+	 *
+	 */
+	template<typename ofb> size_t inline processorIDBC(const T (&p)[dim]) const
 	{
+		Point<dim,T> pt = p;
+		applyPointBC(pt);
+
 		return fine_s.get(cd.getCell(p));
 	}
 
+	/*! \brief Get the smallest subdivision of the domain on each direction
+	 *
+	 * \return a box p1 is set to zero
+	 *
+	 */
+	const ::Box<dim,T> & getSmallestSubdivision()
+	{
+		return ss_box;
+	}
+
+	/*! \brief Get the periodicity on i dimension
+	 *
+	 * \param i dimension
+	 *
+	 * \return the periodicity in direction i
+	 *
+	 */
+	size_t isPeriodic(size_t i)
+	{
+		return bc[i];
+	}
+
 	/*! \brief Set the parameter of the decomposition
 	 *
 	 * \param div_ storing into how many domain to decompose on each dimension
 	 * \param domain_ domain to decompose
 	 *
 	 */
-	void setParameters(const size_t (&div_)[dim], Domain<dim, T> domain_, Ghost<dim, T> ghost = Ghost<dim, T>())
+	void setParameters(const size_t (& div_)[dim], ::Box<dim,T> domain_, const size_t (& bc)[dim] ,const Ghost<dim,T> & ghost)
 	{
+		// set the boundary conditions
+		for (size_t i = 0 ; i < dim ; i++)
+			this->bc[i] = bc[i];
+
 		// set the ghost
 		this->ghost = ghost;
 
@@ -642,7 +1174,7 @@ public:
 		cd.setDimensions(domain, div_, 0);
 
 		// init distribution
-		dist.init(gr, domain);
+		dist.createCartGraph(gr, domain);
 
 	}
 
@@ -655,7 +1187,7 @@ public:
 
 		dist.decompose();
 
-		//createSubdomains(v_cl);
+		createSubdomains(v_cl,bc);
 	}
 
 	/*! \brief Refine the decomposition, available only for ParMetis distribution, for Metis it is a null call
@@ -720,7 +1252,7 @@ public:
 	 */
 	inline void getSubSubDomainPosition(size_t id, T (&pos)[dim])
 	{
-		dist.getVertexPosition(id, pos);
+		dist.getSubSubDomainPosition(id, pos);
 	}
 
 	/*! \brief Get the number of sub-sub-domains in this sub-graph
@@ -739,7 +1271,7 @@ public:
 	 */
 	inline void setSubSubDomainComputationCost(size_t id, size_t weight)
 	{
-		dist.setVertexWeight(id, weight);
+		dist.setComputationCost(id, weight);
 	}
 
 	/*! \brief function that set the weight of the vertex
@@ -749,7 +1281,7 @@ public:
 	 */
 	inline size_t getSubSubDomainComputationCost(size_t id)
 	{
-		return dist.getVertexWeight(id);
+		return dist.getComputationCost(id);
 	}
 
 	/*! \brief Operator to access the size of the sub-graph
@@ -811,17 +1343,19 @@ public:
 		return sp;
 	}
 
-	/*! \brief Return the structure that store the physical domain
+	/*! \brief Return the box of the physical domain
 	 *
-	 * \return The physical domain
+	 * \return The physical domain box
 	 *
 	 */
-	Domain<dim, T> & getDomain()
+	const ::Box<dim,T> & getDomain()
 	{
 		return domain;
 	}
 
 	/*! \brief Check if the particle is local
+	 *
+	 * \warning if the particle id outside the domain the result is unreliable
 	 *
 	 * \param p object position
 	 *
@@ -834,6 +1368,8 @@ public:
 	}
 
 	/*! \brief Check if the particle is local
+	 *
+	 * \warning if the particle id outside the domain the result is unreliable
 	 *
 	 * \param p object position
 	 *
@@ -845,7 +1381,50 @@ public:
 		return processorID(pos) == v_cl.getProcessUnitID();
 	}
 
-	::Box<dim, T> bbox;
+	/*! \brief Check if the particle is local considering boundary conditions
+	 *
+	 * \warning if the particle id outside the domain and non periodic the result
+	 *          is unreliable
+	 *
+	 *
+	 * \param p object position
+	 *
+	 * \return true if it is local
+	 *
+	 */
+	template<typename Mem> bool isLocalBC(const encapc<1, Point<dim,T>, Mem> p, const size_t (& bc)[dim]) const
+	{
+		Point<dim,T> pt = p;
+
+		for (size_t i = 0 ; i < dim ; i++)
+		{
+			if (bc[i] == PERIODIC)
+				pt.get(i) = openfpm::math::periodic_l(p.template get<0>()[i],domain.getHigh(i),domain.getLow(i));
+		}
+
+		return processorID<Mem>(pt) == v_cl.getProcessUnitID();
+	}
+
+	/*! \brief Check if the particle is local considering boundary conditions
+	 *
+	 * \param p object position
+	 *
+	 * \return true if it is local
+	 *
+	 */
+	bool isLocalBC(const T (&p)[dim], const size_t (& bc)[dim]) const
+	{
+		Point<dim,T> pt = p;
+
+		for (size_t i = 0 ; i < dim ; i++)
+		{
+			if (bc[i] == PERIODIC)
+				pt.get(i) = openfpm::math::periodic_l(p[i],domain.getHigh(i),domain.getLow(i));
+		}
+
+		return processorID(pt) == v_cl.getProcessUnitID();
+	}
+
 
 	/*! \brief Return the bounding box containing union of all the sub-domains for the local processor
 	 *
@@ -857,6 +1436,16 @@ public:
 		return bbox;
 	}
 
+
+	/*! \brief Return the ghost
+	 *
+	 *
+	 */
+	const Ghost<dim,T> & getGhost() const
+	{
+		return ghost;
+	}
+
 	////////////// Functions to get decomposition information ///////////////
 
 	/*! \brief Write the decomposition as VTK file
@@ -889,6 +1478,19 @@ public:
 		return true;
 	}
 
+	/*! \brief Get the Virtual Cluster machine
+	 *
+	 * \return the Virtual cluster machine
+	 *
+	 */
+	Vcluster & getVC() const
+	{
+#ifdef SE_CLASS2
+		check_valid(this,8);
+#endif
+		return v_cl;
+	}
+
 	/*! \brief function to check the consistency of the information of the decomposition
 	 *
 	 * \return false if is inconsistent
@@ -934,33 +1536,100 @@ public:
 		}
 	}
 
-	/*! \brief Print current graph and save it to file with name test_graph_[id]
+	/*! \brief Check if the CartDecomposition contain the same information
 	 *
-	 * \param id to attach to the filename
+	 * \param ele Element to check
 	 *
 	 */
-	void printCurrentDecomposition(int id)
+	bool is_equal(CartDecomposition<dim,T,Memory> & cart)
 	{
-		dist.printCurrentDecomposition(id);
-	}
+		if (static_cast<ie_loc_ghost<dim,T>*>(this)->is_equal(static_cast<ie_loc_ghost<dim,T>&>(cart)) == false)
+			return false;
 
-	//! Increment the reference counter
-	void incRef()
-	{
-		ref_cnt++;
+		if (static_cast<nn_prcs<dim,T>*>(this)->is_equal(static_cast<nn_prcs<dim,T>&>(cart)) == false)
+			return false;
+
+		if (static_cast<ie_ghost<dim,T>*>(this)->is_equal(static_cast<ie_ghost<dim,T>&>(cart)) == false)
+			return false;
+
+		if (sub_domains != cart.sub_domains)
+			return false;
+
+		if (box_nn_processor != cart.box_nn_processor)
+			return false;
+
+		if (fine_s != cart.fine_s)
+			return false;
+
+		if (gr != cart.gr)
+			return false;
+
+		if (cd != cart.cd)
+			return false;
+
+		if (domain != cart.domain)
+			return false;
+
+		if (meta_compare<T[dim]>::meta_compare_f(cart.spacing,spacing) == false)
+			return false;
+
+		if (ghost != cart.ghost)
+			return false;
+
+		return true;
 	}
 
-	//! Decrement the reference counter
-	void decRef()
+	/*! \brief Check if the CartDecomposition contain the same information with the exception of the ghost part
+	 * It is anyway required that the ghost come from the same sub-domains decomposition
+	 *
+	 * \param ele Element to check
+	 *
+	 */
+	bool is_equal_ng(CartDecomposition<dim,T,Memory> & cart)
 	{
-		ref_cnt--;
+		if (static_cast<ie_loc_ghost<dim,T>*>(this)->is_equal_ng(static_cast<ie_loc_ghost<dim,T>&>(cart)) == false)
+			return false;
+
+		if (static_cast<nn_prcs<dim,T>*>(this)->is_equal(static_cast<nn_prcs<dim,T>&>(cart)) == false)
+			return false;
+
+		if (static_cast<ie_ghost<dim,T>*>(this)->is_equal_ng(static_cast<ie_ghost<dim,T>&>(cart)) == false)
+			return false;
+
+		if (sub_domains != cart.sub_domains)
+			return false;
+
+		if (box_nn_processor != cart.box_nn_processor)
+			return false;
+
+		if (fine_s != cart.fine_s)
+			return false;
+
+		if (gr != cart.gr)
+			return false;
+
+		if (cd != cart.cd)
+			return false;
+
+		if (domain != cart.domain)
+			return false;
+
+		if (meta_compare<T[dim]>::meta_compare_f(cart.spacing,spacing) == false)
+			return false;
+
+		return true;
 	}
 
-	//! Return the reference counter
-	long int ref()
+	/*! \brief Return the distribution object
+	 *
+	 * \return the distribution object
+	 *
+	 */
+	Distribution & getDistribution()
 	{
-		return ref_cnt;
+		return dist;
 	}
 };
 
+
 #endif
diff --git a/src/Decomposition/CartDecomposition_unit_test.hpp b/src/Decomposition/CartDecomposition_unit_test.hpp
index 115a094e6cd1a190c064faeb63b46df3ea92bef8..41ddb240950b41594a0ced151b8c6baaf8164552 100755
--- a/src/Decomposition/CartDecomposition_unit_test.hpp
+++ b/src/Decomposition/CartDecomposition_unit_test.hpp
@@ -3,61 +3,50 @@
 
 #include "CartDecomposition.hpp"
 #include "util/mathutil.hpp"
-#include "DLB.hpp"
-#include <boost/algorithm/string.hpp>
 
-BOOST_AUTO_TEST_SUITE (CartDecomposition_test)
+BOOST_AUTO_TEST_SUITE( CartDecomposition_test )
 
 #define SUB_UNIT_FACTOR 64
-#define DIM 2
 
-void setComputationCosts(CartDecomposition<DIM, float> &dec, size_t n_v, Point<DIM, float> center, float radius, size_t weight_h, size_t weight_l)
+void setComputationCosts(CartDecomposition<2, float> &dec, size_t n_v, Point<2, float> center, float radius, size_t weight_h, size_t weight_l)
 {
 	float radius2 = pow(radius, 2);
 	float eq;
 
 	// Position structure for the single vertex
-	float pos[DIM];
+	float pos[2];
 
-	for (int i = 0; i < n_v; i++)
+	for (size_t i = 0; i < n_v; i++)
 	{
 		dec.getSubSubDomainPosition(i, pos);
 
 		eq = pow((pos[0] - center.get(0)), 2) + pow((pos[1] - center.get(1)), 2);
 
 		if (eq <= radius2)
-		{
 			dec.setSubSubDomainComputationCost(i, weight_h);
-		}
 		else
-		{
 			dec.setSubSubDomainComputationCost(i, weight_l);
-		}
 	}
 }
 
 void setComputationCosts3D(CartDecomposition<3, float> &dec, size_t n_v, Point<3, float> center, float radius, size_t weight_h, size_t weight_l)
 {
-	float radius2 = pow(radius, 2);
+	float radius2 = radius * radius;
 	float eq;
 
 	// Position structure for the single vertex
 	float pos[3];
 
-	for (int i = 0; i < n_v; i++)
+	for (size_t i = 0; i < n_v; i++)
 	{
 		dec.getSubSubDomainPosition(i, pos);
 
 		eq = pow((pos[0] - center.get(0)), 2) + pow((pos[1] - center.get(1)), 2) + pow((pos[2] - center.get(2)), 2);
 
 		if (eq <= radius2)
-		{
 			dec.setSubSubDomainComputationCost(i, weight_h);
-		}
 		else
-		{
 			dec.setSubSubDomainComputationCost(i, weight_l);
-		}
 	}
 }
 
@@ -68,18 +57,21 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_test_2D )
 	// Vcluster
 	Vcluster & vcl = *global_v_cluster;
 
+	// non-periodic boundary condition
+	size_t bc[2] = {NON_PERIODIC,NON_PERIODIC};
+
 	// Initialize the global VCluster
 	init_global_v_cluster(&boost::unit_test::framework::master_test_suite().argc,&boost::unit_test::framework::master_test_suite().argv);
 
 	//! [Create CartDecomposition]
-	CartDecomposition<DIM, float> dec(vcl);
+	CartDecomposition<2, float> dec(vcl);
 
 	// Init DLB tool
 	DLB dlb(vcl);
 
 	// Physical domain
-	Box<DIM, float> box( { 0.0, 0.0 }, { 10.0, 10.0 });
-	size_t div[DIM];
+	Box<2, float> box( { 0.0, 0.0 }, { 10.0, 10.0 });
+	size_t div[2];
 
 	// Get the number of processor and calculate the number of sub-domain
 	// for each processor (SUB_UNIT_FACTOR=64)
@@ -87,16 +79,16 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_test_2D )
 	size_t n_sub = n_proc * SUB_UNIT_FACTOR;
 
 	// Set the number of sub-domains on each dimension (in a scalable way)
-	for (int i = 0; i < DIM; i++)
+	for (int i = 0; i < 2; i++)
 	{
-		div[i] = openfpm::math::round_big_2(pow(n_sub,1.0/DIM));
+		div[i] = openfpm::math::round_big_2(pow(n_sub,1.0/2));
 	}
 
 	// Define ghost
-	Ghost<DIM, float> g(0.01);
+	Ghost<2, float> g(0.01);
 
 	// Decompose
-	dec.setParameters(div, box, g);
+	dec.setParameters(div, box, bc, g);
 
 	// Set unbalance threshold
 	dlb.setHeurisitc(DLB::Heuristic::UNBALANCE_THRLD);
@@ -105,7 +97,7 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_test_2D )
 	// Add weights to points
 
 	// First create the center of the weights distribution, check it is coherent to the size of the domain
-	Point<DIM, float> center( { 2.0, 2.0 });
+	Point<2, float> center( { 2.0, 2.0 });
 
 	// Radius of the weights distribution
 	float radius = 2.0;
@@ -115,11 +107,11 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_test_2D )
 
 	setComputationCosts(dec, dec.getNSubSubDomains(), center, radius, weight_h, weight_l);
 
-	dec.printCurrentDecomposition(0);
+	dec.getDistribution().write("DLB_test_graph_0.vtk");
 
 	dec.decompose();
 
-	dec.printCurrentDecomposition(1);
+	dec.getDistribution().write("DLB_test_graph_1.vtk");
 
 	float stime = 0.0, etime = 10.0, tstep = 0.1;
 
@@ -143,7 +135,10 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_test_2D )
 
 		dec.rebalance(dlb);
 
-		dec.printCurrentDecomposition(i+1);
+		std::stringstream str;
+		str << "DLB_test_graph_" << i + 1 << ".vtk";
+		dec.getDistribution().write(str.str());
+
 
 	}
 	// create a ghost border
@@ -152,14 +147,14 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_test_2D )
 	// For each calculated ghost box
 	for (size_t i = 0; i < dec.getNIGhostBox(); i++)
 	{
-		SpaceBox<DIM,float> b = dec.getIGhostBox(i);
+		SpaceBox<2,float> b = dec.getIGhostBox(i);
 		size_t proc = dec.getIGhostBoxProcessor(i);
 
 		// sample one point inside the box
-		Point<DIM,float> p = b.rnd();
+		Point<2,float> p = b.rnd();
 
 		// Check that ghost_processorsID return that processor number
-		const openfpm::vector<size_t> & pr = dec.template ghost_processorID<CartDecomposition<DIM,float>::processor_id>(p);
+		const openfpm::vector<size_t> & pr = dec.template ghost_processorID<CartDecomposition<2,float>::processor_id>(p);
 
 		bool found = false;
 
@@ -171,7 +166,7 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_test_2D )
 
 		if (found == false)
 		{
-			const openfpm::vector<size_t> pr2 = dec.template ghost_processorID<CartDecomposition<DIM,float>::processor_id>(p);
+			const openfpm::vector<size_t> pr2 = dec.template ghost_processorID<CartDecomposition<2,float>::processor_id>(p);
 		}
 
 		BOOST_REQUIRE_EQUAL(found,true);
@@ -188,18 +183,21 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_test_2D_sar)
 	// Vcluster
 	Vcluster & vcl = *global_v_cluster;
 
+	// non-periodic boundary condition
+	size_t bc[2] = {NON_PERIODIC,NON_PERIODIC};
+
 	// Initialize the global VCluster
 	init_global_v_cluster(&boost::unit_test::framework::master_test_suite().argc,&boost::unit_test::framework::master_test_suite().argv);
 
 	//! [Create CartDecomposition]
-	CartDecomposition<DIM, float> dec(vcl);
+	CartDecomposition<2, float> dec(vcl);
 
 	// Init DLB tool
 	DLB dlb(vcl);
 
 	// Physical domain
-	Box<DIM, float> box( { 0.0, 0.0 }, { 10.0, 10.0 });
-	size_t div[DIM];
+	Box<2, float> box( { 0.0, 0.0 }, { 10.0, 10.0 });
+	size_t div[2];
 
 	// Get the number of processor and calculate the number of sub-domain
 	// for each processor (SUB_UNIT_FACTOR=64)
@@ -207,16 +205,16 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_test_2D_sar)
 	size_t n_sub = n_proc * SUB_UNIT_FACTOR;
 
 	// Set the number of sub-domains on each dimension (in a scalable way)
-	for (int i = 0; i < DIM; i++)
+	for (int i = 0; i < 2; i++)
 	{
-		div[i] = openfpm::math::round_big_2(pow(n_sub,1.0/DIM));
+		div[i] = openfpm::math::round_big_2(pow(n_sub,1.0/2));
 	}
 
 	// Define ghost
-	Ghost<DIM, float> g(0.01);
+	Ghost<2, float> g(0.01);
 
 	// Decompose
-	dec.setParameters(div, box, g);
+	dec.setParameters(div, box, bc, g);
 
 	// Set type of heuristic
 	dlb.setHeurisitc(DLB::Heuristic::SAR_HEURISTIC);
@@ -224,7 +222,7 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_test_2D_sar)
 	// Add weights to points
 
 	// First create the center of the weights distribution, check it is coherent to the size of the domain
-	Point<DIM, float> center( { 2.0, 2.0 });
+	Point<2, float> center( { 2.0, 2.0 });
 
 	// Radius of the weights distribution
 	float radius = 2.0;
@@ -232,13 +230,13 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_test_2D_sar)
 	// Weight if the distribution (high)
 	size_t weight_h = 5, weight_l = 1;
 
-	size_t n_v = pow(div[0], DIM);
+	size_t n_v = pow(div[0], 2);
 
 	setComputationCosts(dec, n_v, center, radius, weight_h, weight_l);
 
 	dec.decompose();
 
-	dec.printCurrentDecomposition(0);
+	dec.getDistribution().write("DLB_test_graph_0.vtk");
 
 	float stime = 0.0, etime = 10.0, tstep = 0.1;
 
@@ -268,7 +266,9 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_test_2D_sar)
 
 		dec.rebalance(dlb);
 
-		dec.printCurrentDecomposition(i);
+		std::stringstream str;
+		str << "DLB_test_graph_" << i << ".vtk";
+		dec.getDistribution().write(str.str());
 	}
 
 	// create a ghost border
@@ -277,14 +277,14 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_test_2D_sar)
 	// For each calculated ghost box
 	for (size_t i = 0; i < dec.getNIGhostBox(); i++)
 	{
-		SpaceBox<DIM,float> b = dec.getIGhostBox(i);
+		SpaceBox<2,float> b = dec.getIGhostBox(i);
 		size_t proc = dec.getIGhostBoxProcessor(i);
 
 		// sample one point inside the box
-		Point<DIM,float> p = b.rnd();
+		Point<2,float> p = b.rnd();
 
 		// Check that ghost_processorsID return that processor number
-		const openfpm::vector<size_t> & pr = dec.template ghost_processorID<CartDecomposition<DIM,float>::processor_id>(p);
+		const openfpm::vector<size_t> & pr = dec.template ghost_processorID<CartDecomposition<2,float>::processor_id>(p);
 
 		bool found = false;
 
@@ -296,7 +296,7 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_test_2D_sar)
 
 		if (found == false)
 		{
-			const openfpm::vector<size_t> pr2 = dec.template ghost_processorID<CartDecomposition<DIM,float>::processor_id>(p);
+			const openfpm::vector<size_t> pr2 = dec.template ghost_processorID<CartDecomposition<2,float>::processor_id>(p);
 		}
 
 		BOOST_REQUIRE_EQUAL(found,true);
@@ -313,6 +313,9 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_test_3D)
 	// Vcluster
 	Vcluster & vcl = *global_v_cluster;
 
+	// non-periodic boundary condition
+	size_t bc[3] = {NON_PERIODIC,NON_PERIODIC,NON_PERIODIC};
+
 	// Initialize the global VCluster
 	init_global_v_cluster(&boost::unit_test::framework::master_test_suite().argc,&boost::unit_test::framework::master_test_suite().argv);
 
@@ -341,7 +344,7 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_test_3D)
 	Ghost<3, float> g(0.01);
 
 	// Decompose
-	dec.setParameters(div, box, g);
+	dec.setParameters(div, box, bc, g);
 
 	// Set unbalance threshold
 	dlb.setHeurisitc(DLB::Heuristic::UNBALANCE_THRLD);
@@ -364,7 +367,7 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_test_3D)
 
 	dec.decompose();
 
-	dec.printCurrentDecomposition(0);
+	dec.getDistribution().write("DLB_test_graph_0.vtk");
 
 	float stime = 0.0, etime = 10.0, tstep = 0.1;
 
@@ -390,7 +393,9 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_test_3D)
 
 		dec.rebalance(dlb);
 
-		dec.printCurrentDecomposition(i);
+		std::stringstream str;
+		str << "DLB_test_graph_" << i << ".vtk";
+		dec.getDistribution().write(str.str());
 	}
 
 	// create a ghost border
@@ -430,6 +435,204 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_test_3D)
 	BOOST_REQUIRE_EQUAL(val,true);
 }
 
+BOOST_AUTO_TEST_CASE( CartDecomposition_non_periodic_test)
+{
+	// Vcluster
+	Vcluster & vcl = *global_v_cluster;
+
+	// Initialize the global VCluster
+	init_global_v_cluster(&boost::unit_test::framework::master_test_suite().argc,&boost::unit_test::framework::master_test_suite().argv);
+
+	//! [Create CartDecomposition]
+	CartDecomposition<3,float> dec(vcl);
+
+	// Physical domain
+	Box<3,float> box({0.0,0.0,0.0},{1.0,1.0,1.0});
+	size_t div[3];
+
+	// Get the number of processor and calculate the number of sub-domain
+	// for each processor (SUB_UNIT_FACTOR=64)
+	size_t n_proc = vcl.getProcessingUnits();
+	size_t n_sub = n_proc * SUB_UNIT_FACTOR;
+
+	// Set the number of sub-domains on each dimension (in a scalable way)
+	for (int i = 0 ; i < 3 ; i++)
+	{div[i] = openfpm::math::round_big_2(pow(n_sub,1.0/3));}
+
+	// Define ghost
+	Ghost<3,float> g(0.01);
+
+	// Boundary conditions
+	size_t bc[] = {NON_PERIODIC,NON_PERIODIC,NON_PERIODIC};
+
+	// Decompose
+	dec.setParameters(div,box,bc,g);
+	dec.decompose();
+
+	// create a ghost border
+	dec.calculateGhostBoxes();
+
+	//! [Create CartDecomposition]
+
+	// For each calculated ghost box
+	for (size_t i = 0 ; i < dec.getNIGhostBox() ; i++)
+	{
+		SpaceBox<3,float> b = dec.getIGhostBox(i);
+		size_t proc = dec.getIGhostBoxProcessor(i);
+
+		// sample one point inside the box
+		Point<3,float> p = b.rnd();
+
+		// Check that ghost_processorsID return that processor number
+		const openfpm::vector<size_t> & pr = dec.template ghost_processorID<CartDecomposition<3,float>::processor_id>(p);
+
+		bool found = false;
+
+		for (size_t j = 0; j < pr.size() ; j++)
+		{
+			if (pr.get(j) == proc)
+			{found = true; break;}
+		}
+
+		if (found == false)
+		{
+			const openfpm::vector<size_t> pr2 = dec.template ghost_processorID<CartDecomposition<3,float>::processor_id>(p);
+		}
+
+		BOOST_REQUIRE_EQUAL(found,true);
+	}
+
+	// Check the consistency
+
+	bool val = dec.check_consistency();
+	BOOST_REQUIRE_EQUAL(val,true);
+
+	// We duplicate the decomposition
+	CartDecomposition<3,float> dec2 = dec.duplicate();
+	dec2.check_consistency();
+
+	// check that dec and dec2 contain the same information
+	bool ret = dec.is_equal(dec2);
+
+	// We check if the two decomposition are equal
+	BOOST_REQUIRE_EQUAL(ret,true);
+
+	// We duplicate the decomposition redefining the ghost
+
+	// Define ghost
+	Ghost<3,float> g3(0.005);
+
+	// We duplicate the decomposition redefining the ghost
+	CartDecomposition<3,float> dec3 = dec.duplicate(g3);
+
+	ret = dec3.check_consistency();
+	BOOST_REQUIRE_EQUAL(ret,true);
+
+	// Check that dec3 is equal to dec2 with the exception of the ghost part
+	ret = dec3.is_equal_ng(dec2);
+	BOOST_REQUIRE_EQUAL(ret,true);
+}
+
+
+BOOST_AUTO_TEST_CASE( CartDecomposition_periodic_test)
+{
+	// Vcluster
+	Vcluster & vcl = *global_v_cluster;
+
+	// Initialize the global VCluster
+	init_global_v_cluster(&boost::unit_test::framework::master_test_suite().argc,&boost::unit_test::framework::master_test_suite().argv);
+
+	//! [Create CartDecomposition]
+	CartDecomposition<3,float> dec(vcl);
+
+	// Physical domain
+	Box<3,float> box({0.0,0.0,0.0},{1.0,1.0,1.0});
+	size_t div[3];
+
+	// Get the number of processor and calculate the number of sub-domain
+	// for each processor (SUB_UNIT_FACTOR=64)
+	size_t n_proc = vcl.getProcessingUnits();
+	size_t n_sub = n_proc * SUB_UNIT_FACTOR;
+
+	// Set the number of sub-domains on each dimension (in a scalable way)
+	for (int i = 0 ; i < 3 ; i++)
+	{div[i] = openfpm::math::round_big_2(pow(n_sub,1.0/3));}
+
+	// Define ghost
+	Ghost<3,float> g(0.01);
+
+	// Boundary conditions
+	size_t bc[] = {PERIODIC,PERIODIC,PERIODIC};
+
+	// Decompose
+	dec.setParameters(div,box,bc,g);
+	dec.decompose();
+
+	// create a ghost border
+	dec.calculateGhostBoxes();
+
+	//! [Create CartDecomposition]
+
+	// For each calculated ghost box
+	for (size_t i = 0 ; i < dec.getNIGhostBox() ; i++)
+	{
+		SpaceBox<3,float> b = dec.getIGhostBox(i);
+		size_t proc = dec.getIGhostBoxProcessor(i);
+
+		// sample one point inside the box
+		Point<3,float> p = b.rnd();
+
+		// Check that ghost_processorsID return that processor number
+		const openfpm::vector<size_t> & pr = dec.template ghost_processorID<CartDecomposition<3,float>::processor_id>(p);
+
+		bool found = false;
+
+		for (size_t j = 0; j < pr.size() ; j++)
+		{
+			if (pr.get(j) == proc)
+			{found = true; break;}
+		}
+
+		if (found == false)
+		{
+			const openfpm::vector<size_t> pr2 = dec.template ghost_processorID<CartDecomposition<3,float>::processor_id>(p);
+		}
+
+		BOOST_REQUIRE_EQUAL(found,true);
+	}
+
+	// Check the consistency
+	bool val = dec.check_consistency();
+	BOOST_REQUIRE_EQUAL(val,true);
+
+	// We duplicate the decomposition
+	CartDecomposition<3,float> dec2 = dec.duplicate();
+	dec2.check_consistency();
+
+	bool ret = dec.is_equal(dec2);
+
+	// We check if the two decomposition are equal
+	BOOST_REQUIRE_EQUAL(ret,true);
+
+	// check that dec and dec2 contain the same information
+
+	// We duplicate the decomposition redefining the ghost
+
+	// Define ghost
+	Ghost<3,float> g3(0.005);
+
+	// We duplicate the decomposition refefining the ghost
+	CartDecomposition<3,float> dec3 = dec.duplicate(g3);
+
+	ret = dec3.check_consistency();
+	BOOST_REQUIRE_EQUAL(ret,true);
+
+	// Check that g3 is equal to dec2 with the exception of the ghost part
+	ret = dec3.is_equal_ng(dec2);
+	BOOST_REQUIRE_EQUAL(ret,true);
+}
+
 BOOST_AUTO_TEST_SUITE_END()
 
+
 #endif
diff --git a/src/Decomposition/DistParMetisDistribution.hpp b/src/Decomposition/Distribution/DistParMetisDistribution.hpp
similarity index 91%
rename from src/Decomposition/DistParMetisDistribution.hpp
rename to src/Decomposition/Distribution/DistParMetisDistribution.hpp
index 49e136fcee59997bb6e244532bcbf9d6ed624717..cb926fa3407fa09ea479d9afa7d5dea30afe5984 100644
--- a/src/Decomposition/DistParMetisDistribution.hpp
+++ b/src/Decomposition/Distribution/DistParMetisDistribution.hpp
@@ -12,7 +12,7 @@
 #ifndef SRC_DECOMPOSITION_DISTPARMETISDISTRIBUTION_HPP_
 #define SRC_DECOMPOSITION_DISTPARMETISDISTRIBUTION_HPP_
 
-template<unsigned int dim, typename T, template<unsigned int, typename > class Domain = Box>
+template<unsigned int dim, typename T>
 class DistParMetisDistribution
 {
 	//! Vcluster
@@ -22,7 +22,7 @@ class DistParMetisDistribution
 	grid_sm<dim, void> gr;
 
 	//! rectangular domain to decompose
-	Domain<dim, T> domain;
+	Box<dim, T> domain;
 
 	//! Processor sub-sub-domain graph
 	DistGraph_CSR<nm_v, nm_e> sub_g;
@@ -83,7 +83,7 @@ public:
 	 * /param grid Grid
 	 * /param dom Domain
 	 */
-	void init(grid_sm<dim, void> & grid, Domain<dim, T> dom)
+	void init(grid_sm<dim, void> & grid, Box<dim, T> dom)
 	{
 		//! Set grid and domain
 		gr = grid;
@@ -330,6 +330,34 @@ public:
 		VTKWriter<DistGraph_CSR<nm_v, nm_e>, DIST_GRAPH> gv2(sub_g);
 		gv2.write("test_dist_graph_" + std::to_string(id) + ".vtk");
 	}
+
+	const DistParMetisDistribution<dim,T> & operator=(const DistParMetisDistribution<dim,T> & dist)
+	{
+		v_cl = dist.v_cl;
+		gr = dist.gr;
+		domain = dist.domain;
+		sub_g = dist.sub_g;
+		vtxdist = dist.vtxdist;
+		partitions = dist.partitions;
+		v_per_proc = dist.v_per_proc;
+		verticesGotWeights = dist.verticesGotWeights;
+
+		return *this;
+	}
+
+	const DistParMetisDistribution<dim,T> & operator=(const DistParMetisDistribution<dim,T> && dist)
+	{
+		v_cl = dist.v_cl;
+		gr = dist.gr;
+		domain = dist.domain;
+		sub_g.swap(dist.sub_g);
+		vtxdist.swap(dist.vtxdist);
+		partitions.swap(dist.partitions);
+		v_per_proc.swap(dist.v_per_proc);
+		verticesGotWeights = dist.verticesGotWeights;
+
+		return *this;
+	}
 };
 
 #endif /* SRC_DECOMPOSITION_PARMETISDISTRIBUTION_HPP_ */
diff --git a/src/Decomposition/Distribution/Distribution_unit_tests.hpp b/src/Decomposition/Distribution/Distribution_unit_tests.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..6c16693964bbc0727944c6065191a5b8f112cfa1
--- /dev/null
+++ b/src/Decomposition/Distribution/Distribution_unit_tests.hpp
@@ -0,0 +1,246 @@
+/*
+ * Distribution_unit_tests.hpp
+ *
+ *  Created on: Feb 27, 2016
+ *      Author: i-bird
+ */
+
+#ifndef SRC_DECOMPOSITION_DISTRIBUTION_DISTRIBUTION_UNIT_TESTS_HPP_
+#define SRC_DECOMPOSITION_DISTRIBUTION_DISTRIBUTION_UNIT_TESTS_HPP_
+
+/*! \brief Set a sphere as high computation cost
+ *
+ * \param dist Distribution structure
+ * \param gr grid info
+ * \param center of the sphere
+ * \param radius radius of the sphere
+ * \param max_l maximum load of the processor
+ * \param min_l minimum load of the processor
+ *
+ */
+template<unsigned int dim, typename Distribution> void setSphereComputationCosts(Distribution & dist, grid_sm<dim,void> & gr, Point<3, float> center, float radius, size_t max_l, size_t min_l)
+{
+	float radius2 = radius * radius;
+	float eq;
+
+	// Position structure for the single vertex
+	float pos[3];
+
+	for (size_t i = 0; i < gr.size() ; i++)
+	{
+		dist.getSubSubDomainPosition(i, pos);
+
+		eq = 0;
+		for (size_t j = 0 ; j < dim ; j++)
+			eq += (pos[j] - center.get(j)) * (pos[j] - center.get(j));
+
+		if (eq <= radius2)
+		{
+			dist.setComputationCost(i, max_l);
+			dist.setMigrationCost(i, max_l*2);
+		}
+		else
+		{
+			dist.setComputationCost(i, min_l);
+			dist.setMigrationCost(i, min_l*2);
+		}
+
+		// set Migration cost and communication cost
+		for (size_t j = 0 ; j < dist.getNSubSubDomainNeighbors(i) ; j++)
+			dist.setCommunicationCost(i,j,1);
+	}
+}
+
+BOOST_AUTO_TEST_SUITE( Distribution_test )
+
+BOOST_AUTO_TEST_CASE( Metis_distribution_test)
+{
+	Vcluster & v_cl = *global_v_cluster;
+
+	if (v_cl.getProcessingUnits() != 3)
+		return;
+
+	if (v_cl.getProcessUnitID() != 0)
+		return;
+
+	//! [Initialize a Metis Cartesian graph and decompose]
+
+	MetisDistribution<3,float> met_dist(v_cl);
+
+	// Cartesian grid
+	size_t sz[3] = {GS_SIZE,GS_SIZE,GS_SIZE};
+
+	// Box
+	Box<3,float> box({0.0,0.0,0.0},{1.0,1.0,1.0});
+
+	// Grid info
+	grid_sm<3,void> info(sz);
+
+	// Set metis on test, It fix the seed (not required if we are not testing)
+	met_dist.onTest();
+
+	// Initialize Cart graph and decompose
+
+	met_dist.createCartGraph(info,box);
+	met_dist.decompose();
+
+	//! [Initialize a Metis Cartesian graph and decompose]
+
+	BOOST_REQUIRE(met_dist.getUnbalance() < 0.03);
+
+	met_dist.write("vtk_metis_distribution.vtk");
+
+	size_t b = GS_SIZE*GS_SIZE*GS_SIZE/5;
+
+	//! [Decomposition Metis with weights]
+
+	// Initialize the weights to 1.0
+	// not required, if we set ALL Computation,Migration,Communication cost
+	met_dist.initWeights();
+
+	// Change set some weight on the graph and re-decompose
+
+	for (size_t i = 0 ; i < met_dist.getNSubSubDomains() ; i++)
+	{
+		if (i == 0 || i == b || i == 2*b || i == 3*b || i == 4*b)
+			met_dist.setComputationCost(i,10);
+		else
+			met_dist.setComputationCost(i,1);
+
+		// We also show how to set some Communication and Migration cost
+
+		met_dist.setMigrationCost(i,1);
+
+		for (size_t j = 0 ; j < met_dist.getNSubSubDomainNeighbors(i) ; j++)
+			met_dist.setCommunicationCost(i,j,1);
+	}
+
+	met_dist.decompose();
+
+	//! [Decomposition Metis with weights]
+
+	BOOST_REQUIRE(met_dist.getUnbalance() < 0.03);
+
+	met_dist.write("vtk_metis_distribution_red.vtk");
+
+	// check that match
+
+	bool test = compare("vtk_metis_distribution.vtk","src/Decomposition/Distribution/test_data/vtk_metis_distribution_test.vtk");
+	BOOST_REQUIRE_EQUAL(true,test);
+
+	test = compare("vtk_metis_distribution_red.vtk","src/Decomposition/Distribution/test_data/vtk_metis_distribution_red_test.vtk");
+	BOOST_REQUIRE_EQUAL(true,test);
+
+	// Copy the Metis distribution
+
+	MetisDistribution<3,float> met_dist2(v_cl);
+
+	met_dist2 = met_dist;
+
+	test = (met_dist2 == met_dist);
+
+	BOOST_REQUIRE_EQUAL(test,true);
+
+	// We fix the size of MetisDistribution if you are gointg to change this number
+	// please check the following
+	// duplicate functions
+	// swap functions
+	// Copy constructors
+	// operator= functions
+	// operator== functions
+
+	BOOST_REQUIRE_EQUAL(sizeof(MetisDistribution<3,float>),568ul);
+}
+
+BOOST_AUTO_TEST_CASE( Parmetis_distribution_test)
+{
+	Vcluster & v_cl = *global_v_cluster;
+
+	if (v_cl.getProcessingUnits() != 3)
+		return;
+
+	//! [Initialize a ParMetis Cartesian graph and decompose]
+
+	ParMetisDistribution<3,float> pmet_dist(v_cl);
+
+	// Physical domain
+	Box<3,float> box({0.0,0.0,0.0},{10.0,10.0,10.0});
+
+	// Grid info
+	grid_sm<3,void> info({GS_SIZE,GS_SIZE,GS_SIZE});
+
+	// Initialize Cart graph and decompose
+	pmet_dist.createCartGraph(info,box);
+
+	// First create the center of the weights distribution, check it is coherent to the size of the domain
+	Point<3,float> center({2.0,2.0,2.0});
+
+	// It produces a sphere of radius 2.0
+	// with high computation cost (5) inside the sphere and (1) outside
+	setSphereComputationCosts(pmet_dist, info, center, 2.0f, 5ul, 1ul);
+
+	// first decomposition
+	pmet_dist.decompose();
+
+	//! [Initialize a ParMetis Cartesian graph and decompose]
+
+	if (v_cl.getProcessingUnits() == 0)
+	{
+		// write the first decomposition
+		pmet_dist.write("vtk_parmetis_distribution_0.vtk");
+
+		bool test = compare("vtk_parmetis_distribution_0.vtk","src/Decomposition/Distribution/test_data/vtk_parmetis_distribution_0_test.vtk");
+		BOOST_REQUIRE_EQUAL(true,test);
+	}
+
+	//! [refine with parmetis the decomposition]
+
+	float stime = 0.0, etime = 10.0, tstep = 0.1;
+
+	// Shift of the sphere at each iteration
+	Point<3,float> shift({tstep,tstep,tstep});
+
+	size_t iter = 1;
+
+	for(float t = stime; t < etime; t = t + tstep, iter++)
+	{
+		if(t < etime/2)
+			center += shift;
+		else
+			center -= shift;
+
+		setSphereComputationCosts(pmet_dist, info, center, 2.0f, 5, 1);
+
+		// With some regularity refine and write the parmetis distribution
+		if ((size_t)iter % 10 == 0)
+		{
+			pmet_dist.refine();
+
+			if (v_cl.getProcessUnitID() == 0)
+			{
+				std::stringstream str;
+				str << "vtk_parmetis_distribution_" << iter;
+				pmet_dist.write(str.str() + ".vtk");
+
+				// Check
+
+				bool test = compare(str.str() + ".vtk",std::string("src/Decomposition/Distribution/test_data/") + str.str() + "_test.vtk");
+				BOOST_REQUIRE_EQUAL(true,test);
+			}
+		}
+	}
+
+	//! [refine with parmetis the decomposition]
+
+	BOOST_REQUIRE_EQUAL(sizeof(MetisDistribution<3,float>),568ul);
+}
+
+BOOST_AUTO_TEST_CASE( DistPametis_distribution_test)
+{
+
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+
+
+#endif /* SRC_DECOMPOSITION_DISTRIBUTION_DISTRIBUTION_UNIT_TESTS_HPP_ */
diff --git a/src/Decomposition/Distribution/MetisDistribution.hpp b/src/Decomposition/Distribution/MetisDistribution.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..0c470d55b16d0482697c828add7f109f2567fa39
--- /dev/null
+++ b/src/Decomposition/Distribution/MetisDistribution.hpp
@@ -0,0 +1,499 @@
+/*
+ * MetisDistribution.hpp
+ *
+ *  Created on: Nov 19, 2015
+ *      Author: Antonio Leo
+ */
+
+#ifndef SRC_DECOMPOSITION_METISDISTRIBUTION_HPP_
+#define SRC_DECOMPOSITION_METISDISTRIBUTION_HPP_
+
+#include "SubdomainGraphNodes.hpp"
+#include "metis_util.hpp"
+
+#define METIS_DISTRIBUTION_ERROR 100001
+
+/*! \brief Class that distribute sub-sub-domains across processors using Metis Library
+ *
+ * Given a graph and setting Computational cost, Communication cost (on the edge) and
+ * Migration cost or total Communication costs, it produce the optimal distribution
+ *
+ * ### Initialize a Cartesian graph and decompose
+ * \snippet Distribution_unit_tests.hpp Initialize a Metis Cartesian graph and decompose
+ *
+ * ### Set Computation Communication and Migration cost
+ * \snippet Distribution_unit_tests.hpp Decomposition Metis with weights
+ *
+ */
+
+template<unsigned int dim, typename T>
+class MetisDistribution
+{
+	//! Vcluster
+	Vcluster & v_cl;
+
+	//! Structure that store the cartesian grid information
+	grid_sm<dim, void> gr;
+
+	//! rectangular domain to decompose
+	Box<dim, T> domain;
+
+	//! Global sub-sub-domain graph
+	Graph_CSR<nm_v, nm_e> gp;
+
+	//! Flag to check if weights are used on vertices
+	bool useWeights = false;
+
+	//! Flag that indicate if we are doing a test (In general it fix the seed)
+	bool testing = false;
+
+	/*! \brief Check that the sub-sub-domain id exist
+	 *
+	 * \param id sub-sub-domain id
+	 *
+	 */
+	inline void check_overflow(size_t id)
+	{
+#ifdef SE_CLASS1
+		if (id >= gp.getNVertex())
+		{
+			std::cerr << "Error " << __FILE__ ":" << __LINE__ << " such sub-sub-domain doesn't exist (id = " << id << ", " << "total size = " << gp.getNVertex() << ")\n";
+			ACTION_ON_ERROR(METIS_DISTRIBUTION_ERROR)
+		}
+#endif
+	}
+
+	/*! \brief Check that the sub-sub-domain id exist
+	 *
+	 * \param id sub-sub-domain id
+	 *
+	 */
+	inline void check_overflowe(size_t id, size_t e)
+	{
+#ifdef SE_CLASS1
+		if (e >= gp.getNChilds(id))
+		{
+			std::cerr << "Error " << __FILE__ ":" << __LINE__ << " for the sub-sub-domain " << id << " such neighborhood doesn't exist (e = " << e << ", " << "total size = " << gp.getNChilds(id) << ")\n";
+			ACTION_ON_ERROR(METIS_DISTRIBUTION_ERROR)
+		}
+#endif
+	}
+
+public:
+
+	static constexpr unsigned int computation = nm_v::computation;
+
+	//! constructor
+	MetisDistribution(Vcluster & v_cl) :
+			v_cl(v_cl)
+	{
+#ifdef SE_CLASS2
+			check_new(this,8,VECTOR_EVENT,1);
+#endif
+	}
+
+	/*! \brief Copy constructor
+	 *
+	 *
+	 */
+	MetisDistribution(const MetisDistribution & mt)
+	:v_cl(mt.v_cl)
+	{
+#ifdef SE_CLASS2
+			check_valid(mt);
+			check_new(this,8,VECTOR_EVENT,1);
+#endif
+		this->operator=(mt);
+	}
+
+	/*! \brief Copy constructor
+	 *
+	 *
+	 */
+	MetisDistribution(MetisDistribution && mt)
+	{
+#ifdef SE_CLASS2
+			check_valid(mt);
+			check_new(this,8,VECTOR_EVENT,1);
+#endif
+		this->operator=(mt);
+	}
+
+	/*! \brief Destructor
+	 *
+	 *
+	 */
+	~MetisDistribution()
+	{
+#ifdef SE_CLASS2
+		check_delete(this);
+#endif
+	}
+
+
+	/*! \brief create a Cartesian distribution graph
+	 *
+	 * \param grid grid info (sub-sub somains on each dimension)
+	 * \param dom domain (domain where the sub-sub-domains are defined)
+	 *
+	 */
+	void createCartGraph(grid_sm<dim, void> & grid, Box<dim, T> dom)
+	{
+#ifdef SE_CLASS2
+			check_valid(this,8);
+#endif
+		// NON periodic boundary conditions
+		size_t bc[dim];
+
+		for (size_t i = 0 ; i < dim ; i++)
+			bc[i] = NON_PERIODIC;
+
+		// Set grid and domain
+		gr = grid;
+		domain = dom;
+
+		// Create a cartesian grid graph
+		CartesianGraphFactory<dim, Graph_CSR<nm_v, nm_e>> g_factory_part;
+		gp = g_factory_part.template construct<NO_EDGE, nm_v::id, T, dim - 1, 0, 1, 2>(gr.getSize(), domain, bc);
+
+		// Init to 0.0 axis z (to fix in graphFactory)
+		if (dim < 3)
+		{
+			for (size_t i = 0; i < gp.getNVertex(); i++)
+			{
+				gp.vertex(i).template get<nm_v::x>()[2] = 0.0;
+			}
+		}
+
+		for (size_t i = 0; i < gp.getNVertex(); i++)
+			gp.vertex(i).template get<nm_v::global_id>() = i;
+	}
+
+	/*! \brief Get the current graph (main)
+	 *
+	 * \return the current sub-sub domain Graph
+	 *
+	 */
+	Graph_CSR<nm_v, nm_e> & getGraph()
+	{
+#ifdef SE_CLASS2
+			check_valid(this,8);
+#endif
+		return gp;
+	}
+
+	/*! \brief Distribute the sub-sub-domains
+	 *
+	 */
+	void decompose()
+	{
+#ifdef SE_CLASS2
+			check_valid(this,8);
+#endif
+		Metis<Graph_CSR<nm_v, nm_e>> met(gp, v_cl.getProcessingUnits(), useWeights);
+		met.onTest(testing);
+
+		// decompose
+		met.decompose<nm_v::proc_id>();
+	}
+
+	/*! \brief Refine current decomposition (NOT AVAILABLE on Metis)
+	 *
+	 * Disabled for MetisDistribution
+	 *
+	 */
+	void refine()
+	{
+#ifdef SE_CLASS2
+			check_valid(this,8);
+#endif
+		std::cerr << "Error: " << __FILE__ << ":" << __LINE__ << " MetisDistribution does not have refine functionality";
+		ACTION_ON_ERROR(METIS_DISTRIBUTION_ERROR);
+	}
+
+	/*! \brief Function that return the position (point P1) of the sub-sub domain box in the space
+	 *
+	 * \param id vertex id
+	 * \param pos vector that contain x, y, z
+	 *
+	 */
+	void getSSDomainPos(size_t id, T (&pos)[dim])
+	{
+#ifdef SE_CLASS2
+			check_valid(this,8);
+#endif
+		check_overflow(id);
+
+		// Copy the geometrical informations inside the pos vector
+		pos[0] = gp.vertex(id).template get<nm_v::x>()[0];
+		pos[1] = gp.vertex(id).template get<nm_v::x>()[1];
+		if (dim == 3)
+			pos[2] = gp.vertex(id).template get<nm_v::x>()[2];
+	}
+
+	/*! \brief Checks if Computational/Migration/Communication Cost are used
+	 *
+	 * \return true if such weights are used
+	 *
+	 */
+	bool weightsAreUsed()
+	{
+#ifdef SE_CLASS2
+			check_valid(this,8);
+#endif
+		return useWeights;
+	}
+
+	/*! \brief function that get the computational cost of the sub-sub-domain
+	 *
+	 * \param id sub-sub-domain
+	 *
+	 * \return the comutational cost
+	 *
+	 */
+	size_t getComputationalCost(size_t id)
+	{
+#ifdef SE_CLASS2
+			check_valid(this,8);
+#endif
+		check_overflow(id);
+		return gp.vertex(id).template get<nm_v::computation>();
+	}
+
+	/*! \brief Initialize all the weight
+	 *
+	 * Initialize Computation/Communication/Migration costs to 1
+	 *
+	 */
+	void initWeights()
+	{
+#ifdef SE_CLASS2
+			check_valid(this,8);
+#endif
+		for (size_t i = 0 ; i < getNSubSubDomains() ; i++)
+		{
+			setComputationCost(i,1);
+			setMigrationCost(i,1);
+			for (size_t j = 0 ; j < getNSubSubDomainNeighbors(i) ; j++)
+				setCommunicationCost(i,j,1);
+		}
+	}
+
+	/*! \brief Set computation cost on a sub-sub domain
+	 *
+	 * \param id sub-sub domain id
+	 * \param cost
+	 *
+	 */
+	void setComputationCost(size_t id, size_t cost)
+	{
+#ifdef SE_CLASS2
+			check_valid(this,8);
+#endif
+		check_overflow(id);
+
+		useWeights = true;
+
+		gp.vertex(id).template get<nm_v::computation>() = cost;
+	}
+
+	/*! \brief Set migration cost on a sub-sub domain
+	 *
+	 * \param id of the sub-sub domain
+	 * \param cost
+	 */
+	void setMigrationCost(size_t id, size_t cost)
+	{
+#ifdef SE_CLASS2
+		check_valid(this,8);
+#endif
+		check_overflow(id);
+
+		gp.vertex(id).template get<nm_v::migration>() = cost;
+	}
+
+	/*! \brief Set communication cost between neighborhood sub-sub-domains (weight on the edge)
+	 *
+	 * \param id sub-sub domain
+	 * \param e id in the neighborhood list (id in the adjacency list)
+	 * \param cost
+	 */
+	void setCommunicationCost(size_t id, size_t e, size_t cost)
+	{
+#ifdef SE_CLASS2
+		check_valid(this,8);
+#endif
+		check_overflow(id);
+		check_overflowe(id,e);
+
+		gp.getChildEdge(id, e).template get<nm_e::communication>() = cost;
+	}
+
+	/*! \brief Returns total number of sub-sub-domains
+	 *
+	 * \return sub-sub domain numbers
+	 *
+	 */
+	size_t getNSubSubDomains()
+	{
+#ifdef SE_CLASS2
+			check_valid(this,8);
+#endif
+		return gp.getNVertex();
+	}
+
+	/*! \brief Returns total number of neighbors of one sub-sub-domain
+	 *
+	 * \param id of the sub-sub-domain
+	 */
+	size_t getNSubSubDomainNeighbors(size_t id)
+	{
+#ifdef SE_CLASS2
+			check_valid(this,8);
+#endif
+		check_overflow(id);
+
+		return gp.getNChilds(id);
+	}
+
+	/*! \brief Compute the unbalance of the processor compared to the optimal balance
+	 *
+	 * \return the unbalance from the optimal one 0.01 mean 1%
+	 */
+	float getUnbalance()
+	{
+#ifdef SE_CLASS2
+			check_valid(this,8);
+#endif
+		long int min, max, sum;
+		openfpm::vector<long int> loads(v_cl.getProcessingUnits());
+
+		for (size_t i = 0; i < loads.size(); i++)
+			loads.get(i) = 0;
+
+		if (useWeights == false)
+		{
+			for (size_t i = 0; i < gp.getNVertex(); i++)
+				loads.get(gp.vertex(i).template get<nm_v::proc_id>())++;
+		}
+		else
+		{
+			for (size_t i = 0; i < gp.getNVertex(); i++)
+				loads.get(gp.vertex(i).template get<nm_v::proc_id>()) += (gp.vertex(i).template get<nm_v::computation>() == 0)?1:gp.vertex(i).template get<nm_v::computation>();
+		}
+
+		max = *std::max_element(loads.begin(), loads.end());
+		min = *std::min_element(loads.begin(), loads.end());
+		sum = std::accumulate(loads.begin(),loads.end(),0);
+
+		float unbalance = ((float) (max - min)) / ((float) sum / v_cl.getProcessingUnits());
+
+		return unbalance;
+	}
+
+	/*! \brief It set the Classs on test mode
+	 *
+	 * At the moment it fix the seed to have reproducible results
+	 *
+	 */
+	void onTest()
+	{
+#ifdef SE_CLASS2
+			check_valid(this,8);
+#endif
+		testing = true;
+	}
+
+	/*! \brief Write the distribution graph into file
+	 *
+	 * \param out output filename
+	 *
+	 */
+	void write(std::string out)
+	{
+#ifdef SE_CLASS2
+			check_valid(this,8);
+#endif
+		VTKWriter<Graph_CSR<nm_v, nm_e>, VTK_GRAPH> gv2(gp);
+		gv2.write(out);
+
+	}
+
+	/*! \brief Compute the total computational cost of the processor
+	 *
+	 * \return the total computation cost
+	 */
+	size_t getProcessorLoad()
+	{
+#ifdef SE_CLASS2
+			check_valid(this,8);
+#endif
+		size_t load = 0;
+
+		for (size_t i = 0; i < gp.getNVertex(); i++)
+		{
+			if (gp.vertex(i).template get<nm_v::proc_id>() == v_cl.getProcessUnitID())
+				load += gp.vertex(i).template get<nm_v::computation>();
+		}
+
+		return load;
+	}
+
+	/*! \brief operator=
+	 *
+	 *
+	 */
+	MetisDistribution & operator=(const MetisDistribution & mt)
+	{
+#ifdef SE_CLASS2
+			check_valid(mt);
+			check_valid(this,8);
+#endif
+		this->v_cl = mt.v_cl;
+		this->gr = mt.gr;
+		this->domain = mt.domain;
+		this->gp = mt.gp;
+		this->useWeights = mt.useWeights;
+		return *this;
+	}
+
+	/*! \brief operator=
+	 *
+	 *
+	 */
+	MetisDistribution & operator=(MetisDistribution && mt)
+	{
+#ifdef SE_CLASS2
+			check_valid(mt);
+			check_valid(this,8);
+#endif
+		this->v_cl = mt.v_cl;
+		this->gr = mt.gr;
+		this->domain = mt.domain;
+		this->gp.swap(mt.gp);
+		this->useWeights = mt.useWeights;
+		return *this;
+	}
+
+	/*! \brief operator==
+	 *
+	 * \return true if the distribution match
+	 *
+	 */
+	inline bool operator==(const MetisDistribution & mt)
+	{
+#ifdef SE_CLASS2
+			check_valid(mt);
+			check_valid(this,8);
+#endif
+		bool ret = true;
+
+		ret &= (this->gr == mt.gr);
+		ret &= (this->domain == mt.domain);
+		ret &= (this->gp == mt.gp);
+		ret &= (this->useWeights == mt.useWeights);
+		return ret;
+	}
+};
+
+#endif /* SRC_DECOMPOSITION_METISDISTRIBUTION_HPP_ */
diff --git a/src/Decomposition/ParMetisDistribution.hpp b/src/Decomposition/Distribution/ParMetisDistribution.hpp
similarity index 53%
rename from src/Decomposition/ParMetisDistribution.hpp
rename to src/Decomposition/Distribution/ParMetisDistribution.hpp
index cb0df5c08af3ae91507a3b6a67b4ae13443c505c..5893bbebd13f1103d75ac2f105b0dd6807fa98a8 100644
--- a/src/Decomposition/ParMetisDistribution.hpp
+++ b/src/Decomposition/Distribution/ParMetisDistribution.hpp
@@ -5,13 +5,34 @@
  *      Author: Antonio Leo
  */
 
+
+#ifndef SRC_DECOMPOSITION_PARMETISDISTRIBUTION_HPP_
+#define SRC_DECOMPOSITION_PARMETISDISTRIBUTION_HPP_
+
+
 #include "SubdomainGraphNodes.hpp"
 #include "parmetis_util.hpp"
 #include "Graph/dist_map_graph.hpp"
-#ifndef SRC_DECOMPOSITION_PARMETISDISTRIBUTION_HPP_
-#define SRC_DECOMPOSITION_PARMETISDISTRIBUTION_HPP_
+#include "Graph/ids.hpp"
 
-template<unsigned int dim, typename T, template<unsigned int, typename > class Domain = Box>
+#define PARMETIS_DISTRIBUTION_ERROR 100002
+
+/*! \brief Class that distribute sub-sub-domains across processors using ParMetis Library
+ *
+ * Given a graph and setting Computational cost, Communication cost (on the edge) and
+ * Migration cost or total Communication costs, it produce the optimal balanced distribution
+ *
+ * In addition to Metis it provide the functionality to refine the previously computed
+ * decomposition
+ *
+ * ### Initialize a Cartesian graph and decompose
+ * \snippet Distribution_unit_tests.hpp Initialize a ParMetis Cartesian graph and decompose
+ *
+ * ### Refine the decomposition
+ * \snippet Distribution_unit_tests.hpp refine with parmetis the decomposition
+ *
+ */
+template<unsigned int dim, typename T>
 class ParMetisDistribution
 {
 	//! Vcluster
@@ -21,182 +42,159 @@ class ParMetisDistribution
 	grid_sm<dim, void> gr;
 
 	//! rectangular domain to decompose
-	Domain<dim, T> domain;
+	Box<dim, T> domain;
 
 	//! Global sub-sub-domain graph
 	Graph_CSR<nm_v, nm_e> gp;
 
-	//! Processor sub-sub-domain graph
-	Graph_CSR<nm_v, nm_e> sub_g;
-
 	//! Convert the graph to parmetis format
 	Parmetis<Graph_CSR<nm_v, nm_e>> parmetis_graph;
 
 	//! Init vtxdist needed for Parmetis
-	openfpm::vector<idx_t> vtxdist;
+	//
+	// vtxdist is a common array across processor, it indicate how
+	// vertex are distributed across processors
+	//
+	// Example we have 3 processors
+	//
+	// processor 0 has 3 vertices
+	// processor 1 has 5 vertices
+	// processor 2 has 4 vertices
+	//
+	// vtxdist contain, 0,3,8,12
+	//
+	// vtx dist is the unique global-id of the vertices
+	//
+	openfpm::vector<rid> vtxdist;
 
 	//! partitions
 	openfpm::vector<openfpm::vector<idx_t>> partitions;
 
 	//! Init data structure to keep trace of new vertices distribution in processors (needed to update main graph)
-	openfpm::vector<openfpm::vector<size_t>> v_per_proc;
-
-	//! Number of moved vertices in all iterations
-	size_t g_moved = 0;
+	openfpm::vector<openfpm::vector<gid>> v_per_proc;
 
-	//! Max number of moved vertices in all iterations
-	size_t m_moved = 0;
+	//! Hashmap to access to the global position given the re-mapped one (needed for access the map)
+	std::unordered_map<rid, gid> m2g;
 
 	//! Flag to check if weights are used on vertices
 	bool verticesGotWeights = false;
 
-	/*! \brief Fill the graph of the processor with the first decomposition (linear)
-	 * Put vertices into processor graph (different for each processor)
-	 */
-	void fillSubGraph()
-	{
-
-		int Np = v_cl.getProcessingUnits();
-		int p_id = v_cl.getProcessUnitID();
-
-		for (size_t j = vtxdist.get(p_id), local_j = 0; j < vtxdist.get(p_id + 1); j++, local_j++)
-		{
-			// Add vertex
-
-			nm_v pv = gp.vertexByMapId(j);
-			sub_g.addVertex(pv, gp.vertexByMapId(j).template get<nm_v::global_id>());
-
-			// Add edges of vertex
-			for (size_t s = 0; s < gp.getNChilds(j); s++)
-			{
-				if (gp.vertex(gp.getChild(j, s)).template get<nm_v::proc_id>() != v_cl.getProcessUnitID())
-					gp.vertex(gp.getChild(j, s)).template get<nm_v::fake_v>() = 1;
-				else
-					gp.vertex(gp.getChild(j, s)).template get<nm_v::fake_v>() = 0;
-
-				// Add Edge
-				nm_e pe = gp.edge(j + s);
-				sub_g.template addEdge<NoCheck>(local_j, gp.getChild(j, s), pe);
-			}
-		}
-
-		// Just for output purpose
-		if (p_id == 0)
-		{
-			for (int i = 0; i < Np; i++)
-			{
-				for (size_t j = vtxdist.get(i); j < vtxdist.get(i + 1); j++)
-				{
-					gp.vertexByMapId(j).template get<nm_v::proc_id>() = i;
-				}
-			}
-		}
-	}
-
-	/*! \brief Update main graph ad subgraph with the partition in partitions param and renumber graphs
+	/*! \brief Update main graph ad subgraph with the received data of the partitions from the other processors
 	 *
 	 */
 	void updateGraphs()
 	{
-
-		int Np = v_cl.getProcessingUnits();
-		int p_id = v_cl.getProcessUnitID();
-
-		//stats info
-		size_t moved = 0;
-
-		// reset sub graph and local subgroph index
-		int local_j = 0;
-		sub_g.clear();
+		size_t Np = v_cl.getProcessingUnits();
 
 		// Init n_vtxdist to gather informations about the new decomposition
-		openfpm::vector<idx_t> n_vtxdist(Np + 1);
-		for (int i = 0; i <= Np; i++)
-			n_vtxdist.get(i) = 0;
+		openfpm::vector<rid> n_vtxdist(Np + 1);
+		for (size_t i = 0; i <= Np; i++)
+			n_vtxdist.get(i).id = 0;
 
-		// Update main graph with other partitions made by Parmetis in other processors and the local partition
-		for (int i = 0; i < Np; i++)
+		// Update the main graph with received data from processor i
+		for (size_t i = 0; i < Np; i++)
 		{
-			int ndata = partitions.get(i).size();
+			size_t ndata = partitions.get(i).size();
+			size_t k = 0;
 
-			// Update the main graph with received informations
-			for (int k = 0, l = vtxdist.get(i); k < ndata && l < vtxdist.get(i + 1); k++, l++)
+			// Update the main graph with the received informations
+			for (rid l = vtxdist.get(i); k < ndata && l < vtxdist.get(i + 1); k++, ++l)
 			{
+				// Create new n_vtxdist (just count processors vertices)
+				++n_vtxdist.get(partitions.get(i).get(k) + 1);
 
-				// Create new n_vtxdist (1) (just count processors vertices)
-				n_vtxdist.get(partitions.get(i).get(k) + 1)++;
-
-				if
-(				gp.vertexByMapId(l).template get<nm_v::proc_id>()
-				!= partitions.get(i).get(k))
-				{
-					moved++;
-				}
-
-				// Update proc id in the vertex
-				gp.vertexByMapId(l).template get<nm_v::proc_id>() = partitions.get(i).get(k);
+				// Update proc id in the vertex (using the old map)
+				vertexByMapId(l).template get<nm_v::proc_id>() = partitions.get(i).get(k);
 
 				// Add vertex to temporary structure of distribution (needed to update main graph)
-				v_per_proc.get(partitions.get(i).get(k)).add(gp.getVertexGlobalId(l));
-
-				// Add vertices belonging to this processor in sub graph
-				if (partitions.get(i).get(k) == p_id)
-				{
-
-					nm_v pv = gp.vertexByMapId(l);
-					sub_g.addVertex(pv, pv.template get<nm_v::global_id>());
-
-					// Add edges of vertex
-					for (size_t s = 0; s < gp.getNChildsByMapId(l); s++)
-					{
-						if (gp.vertex(gp.getChildByVertexId(l, s)).template get<nm_v::proc_id>() != v_cl.getProcessUnitID())
-							gp.vertex(gp.getChildByVertexId(l, s)).template get<nm_v::fake_v>() = 1;
-						else
-							gp.vertex(gp.getChildByVertexId(l, s)).template get<nm_v::fake_v>() = 0;
-
-						nm_e pe = gp.edge(l + s);
-						sub_g.template addEdge<NoCheck>(local_j, gp.getChildByVertexId(l, s), pe);
-					}
-
-					local_j++;
-				}
-
+				v_per_proc.get(partitions.get(i).get(k)).add(getVertexGlobalId(l));
 			}
 		}
 
-		// Create new n_vtxdist (2) (write boundaries)
-		for (int i = 2; i <= Np; i++)
-		{
+		// Create new n_vtxdist (accumulate the counters)
+		for (size_t i = 2; i <= Np; i++)
 			n_vtxdist.get(i) += n_vtxdist.get(i - 1);
-		}
 
 		// Copy the new decomposition in the main vtxdist
-		for (int i = 0; i <= Np; i++)
-		{
+		for (size_t i = 0; i <= Np; i++)
 			vtxdist.get(i) = n_vtxdist.get(i);
-		}
 
-		// Renumbering subgraph
-		sub_g.resetLocalToGlobalMap();
-		for (size_t j = vtxdist.get(p_id), i = 0; j < vtxdist.get(p_id + 1); j++, i++)
+		// Renumber the main graph and re-create the map
+		for (size_t p = 0; p < (size_t)Np; p++)
 		{
-			sub_g.setMapId<nm_v::id>(j, sub_g.vertex(i).template get<nm_v::global_id>(), i);
-		}
-
-		// Renumbering main graph
-		for (size_t p = 0; p < Np; p++)
-		{
-			for (size_t j = vtxdist.get(p), i = 0; j < vtxdist.get(p + 1); j++, i++)
+			size_t i = 0;
+			for (rid j = vtxdist.get(p); j < vtxdist.get(p + 1); ++j, i++)
 			{
-				gp.setMapId<nm_v::id>(j, v_per_proc.get(p).get(i), v_per_proc.get(p).get(i));
+				setMapId(j, v_per_proc.get(p).get(i));
+				gp.vertex(v_per_proc.get(p).get(i).id).template get<nm_v::id>() = j.id;
 			}
 		}
+	}
 
-		g_moved += moved;
+	void createMapsFromGlobalGraph(openfpm::vector<size_t> & vtxdist)
+	{
+/*		openfpm::vector<size_t> cnt_np;
+
+		for (size_t i = 0 ; i < gp.getNVertex() ; i++)
+		{
+			cnt_np(gp.template vertex<nm_v::proc_id>)++;
+
+			gp.setMapId()
+		}*/
+	}
 
-		if (moved > m_moved)
-			m_moved = moved;
+	/*! \brief operator to access the vertex by mapped position
+	 *
+	 * operator to access the vertex
+	 *
+	 * \param id re-mapped id of the vertex to access
+	 *
+	 */
+	inline auto vertexByMapId(rid id) -> decltype( gp.vertex(m2g.find(id)->second.id) )
+	{
+		return gp.vertex(m2g.find(id)->second.id);
+	}
 
+	/*! \brief operator to remap vertex to a new position
+	 *
+	 * \param n re-mapped position
+	 * \param g global position
+	 *
+	 */
+	inline void setMapId(rid n, gid g)
+	{
+		m2g[n] = g;
+	}
+
+	/*! \brief Get the global id of the vertex given the re-mapped one
+	 *
+	 * \param remapped id
+	 * \return global id
+	 *
+	 */
+	gid getVertexGlobalId(rid n)
+	{
+		return m2g.find(n)->second;
+	}
+
+	/*! \brief operator to init ids vector
+	 *
+	 * operator to init ids vector
+	 *
+	 */
+	void initLocalToGlobalMap()
+	{
+		gid g;
+		rid i;
+		i.id = 0;
+
+		m2g.clear();
+		for ( ; (size_t)i.id < gp.getNVertex(); ++i)
+		{
+			g.id = i.id;
+
+			m2g.insert( { i, g });
+		}
 	}
 
 	/*! \brief Callback of the sendrecv to set the size of the array received
@@ -221,33 +219,71 @@ public:
 
 	/*! Constructor for the ParMetis class
 	 *
-	 * @param v_cl Vcluster to use as communication object in this class
+	 * \param v_cl Vcluster to use as communication object in this class
+	 */
+	ParMetisDistribution(Vcluster & v_cl)
+	:v_cl(v_cl), parmetis_graph(v_cl, v_cl.getProcessingUnits()), vtxdist(v_cl.getProcessingUnits() + 1), partitions(v_cl.getProcessingUnits()), v_per_proc(v_cl.getProcessingUnits())
+	{
+	}
+
+	/*! Copy constructor
+	 *
+	 * \param pm Distribution to copy
+	 *
 	 */
-	ParMetisDistribution(Vcluster & v_cl) :
-			v_cl(v_cl), parmetis_graph(v_cl, v_cl.getProcessingUnits()), vtxdist(v_cl.getProcessingUnits() + 1), partitions(v_cl.getProcessingUnits()), v_per_proc(v_cl.getProcessingUnits())
+	ParMetisDistribution(const ParMetisDistribution<dim,T> & pm)
+	:v_cl(pm.v_cl),parmetis_graph(v_cl, v_cl.getProcessingUnits())
+	{
+		this->operator=(pm);
+	}
 
+	/*! Copy constructor
+	 *
+	 * \param pm Distribution to copy
+	 *
+	 */
+	ParMetisDistribution(ParMetisDistribution<dim,T> && pm)
 	{
+		this->operator=(pm);
 	}
 
-	/*! \brief Initialize the distribution graph
+	/*! \brief Create the Cartesian graph
 	 *
-	 * @param grid
-	 * @param dom
+	 * \param grid info
+	 * \param dom domain
 	 */
-	void init(grid_sm<dim, void> & grid, Domain<dim, T> dom)
+	void createCartGraph(grid_sm<dim, void> & grid, Box<dim, T> dom)
 	{
+		size_t bc[dim];
+
+		for (size_t i = 0 ; i < dim ; i++)
+			bc[i] = NON_PERIODIC;
+
 		// Set grid and domain
 		gr = grid;
 		domain = dom;
 
 		// Create a cartesian grid graph
 		CartesianGraphFactory<dim, Graph_CSR<nm_v, nm_e>> g_factory_part;
-		gp = g_factory_part.template construct<NO_EDGE, nm_v::id, T, dim - 1, 0, 1, 2>(gr.getSize(), domain);
-		gp.initLocalToGlobalMap();
+		gp = g_factory_part.template construct<NO_EDGE, nm_v::id, T, dim - 1, 0, 1, 2>(gr.getSize(), domain, bc);
+		initLocalToGlobalMap();
+
+		//! Get the number of processing units
+		size_t Np = v_cl.getProcessingUnits();
+
+		//! Division of vertices in Np graphs
+		//! Put (div+1) vertices in mod graphs
+		//! Put div vertices in the rest of the graphs
+		size_t mod_v = gr.size() % Np;
+		size_t div_v = gr.size() / Np;
 
-		// Create sub graph
-		DistCartesianGraphFactory<dim, Graph_CSR<nm_v, nm_e>> dist_g_factory;
-		sub_g = dist_g_factory.template construct<NO_EDGE, nm_v::id, nm_v::global_id, nm_e::srcgid, nm_e::dstgid, T, dim - 1, 0, 1, 2>(gr.getSize(), domain, vtxdist);
+		for (size_t i = 0; i <= Np; i++)
+		{
+			if (i < mod_v)
+				vtxdist.get(i).id = (div_v + 1) * i;
+			else
+				vtxdist.get(i).id = (div_v) * i + mod_v;
+		}
 
 		// Init to 0.0 axis z (to fix in graphFactory)
 		if (dim < 3)
@@ -272,7 +308,7 @@ public:
 		return gp;
 	}
 
-	/*! \brief Create first decomposition, it divides the graph in slices and give each slice to a processor
+	/*! \brief Create the decomposition
 	 *
 	 */
 	void decompose()
@@ -284,18 +320,23 @@ public:
 		//! Get the number of processing units
 		size_t Np = v_cl.getProcessingUnits();
 
-		parmetis_graph.initSubGraph(sub_g, verticesGotWeights);
+		// Number of local vertex
+		size_t nl_vertex = vtxdist.get(p_id+1).id - vtxdist.get(p_id).id;
+
+		parmetis_graph.initSubGraph(gp, vtxdist, m2g, verticesGotWeights);
 
 		//! Decompose
-		parmetis_graph.decompose<nm_v::proc_id>(vtxdist, sub_g);
+		parmetis_graph.decompose<nm_v::proc_id>(vtxdist);
 
 		//! Get result partition for this processors
 		idx_t *partition = parmetis_graph.getPartition();
 
 		//! Prepare vector of arrays to contain all partitions
-		partitions.get(p_id).resize(sub_g.getNVertex());
-		std::copy(partition, partition + sub_g.getNVertex(), &partitions.get(p_id).get(0));
+		partitions.get(p_id).resize(nl_vertex);
+		std::copy(partition, partition + nl_vertex, &partitions.get(p_id).get(0));
 
+		// Communicate the local distribution to the other processors
+		// to reconstruct individually the global graph
 		openfpm::vector<size_t> prc;
 		openfpm::vector<size_t> sz;
 		openfpm::vector<void *> ptr;
@@ -305,7 +346,7 @@ public:
 			if (i != v_cl.getProcessUnitID())
 			{
 				prc.add(i);
-				sz.add(sub_g.getNVertex() * sizeof(idx_t));
+				sz.add(nl_vertex * sizeof(idx_t));
 				ptr.add(partitions.get(p_id).getPointer());
 			}
 		}
@@ -313,13 +354,8 @@ public:
 		v_cl.sendrecvMultipleMessagesNBX(prc.size(), &sz.get(0), &prc.get(0), &ptr.get(0), message_receive, &partitions,
 		NONE);
 
-		// Update graphs with the new distributions
+		// Update graphs with the received data
 		updateGraphs();
-
-		// reset statistical variables, we only need it in refinement
-		g_moved = 0;
-		m_moved = 0;
-
 	}
 
 	/*! \brief Refine current decomposition
@@ -333,20 +369,23 @@ public:
 		size_t Np = v_cl.getProcessingUnits();
 		size_t p_id = v_cl.getProcessUnitID();
 
+		// Number of local vertex
+		rid nl_vertex = vtxdist.get(p_id+1) - vtxdist.get(p_id);
+
 		// Reset parmetis graph and reconstruct it
-		parmetis_graph.reset(gp, sub_g);
+		parmetis_graph.reset(gp, vtxdist, m2g);
 
 		// Refine
-		parmetis_graph.refine<nm_v::proc_id>(vtxdist, sub_g);
+		parmetis_graph.refine<nm_v::proc_id>(vtxdist);
 
 		// Get result partition for this processor
 		idx_t * partition = parmetis_graph.getPartition();
 
-		partitions.get(p_id).resize(sub_g.getNVertex());
-		std::copy(partition, partition + sub_g.getNVertex(), &partitions.get(p_id).get(0));
+		partitions.get(p_id).resize(nl_vertex.id);
+		std::copy(partition, partition + nl_vertex.id, &partitions.get(p_id).get(0));
 
 		// Reset data structure to keep trace of new vertices distribution in processors (needed to update main graph)
-		for (int i = 0; i < Np; ++i)
+		for (size_t i = 0; i < Np; ++i)
 		{
 			v_per_proc.get(i).clear();
 		}
@@ -361,7 +400,7 @@ public:
 			{
 				partitions.get(i).clear();
 				prc.add(i);
-				sz.add(sub_g.getNVertex() * sizeof(idx_t));
+				sz.add(nl_vertex.id * sizeof(idx_t));
 				ptr.add(partitions.get(p_id).getPointer());
 			}
 		}
@@ -374,9 +413,9 @@ public:
 		updateGraphs();
 	}
 
-	/*! \brief Compute the unbalance value
+	/*! \brief Compute the unbalance of the processor compared to the optimal balance
 	 *
-	 * \return the unbalance value
+	 * \return the unbalance from the optimal one 0.01 mean 1%
 	 */
 	float getUnbalance()
 	{
@@ -396,7 +435,7 @@ public:
 		v_cl.sum(sum);
 		v_cl.execute();
 
-		unbalance = ((float) (max - min)) / (float) sum;
+		unbalance = ((float) (max - min)) / (float) (sum / v_cl.getProcessingUnits());
 
 		std::cout << std::endl;
 		return unbalance * 100;
@@ -408,7 +447,7 @@ public:
 	 * \param pos vector that will contain x, y, z
 	 *
 	 */
-	void getVertexPosition(size_t id, T (&pos)[dim])
+	void getSubSubDomainPosition(size_t id, T (&pos)[dim])
 	{
 		if (id >= gp.getNVertex())
 			std::cerr << "Such vertex doesn't exist (id = " << id << ", " << "total size = " << gp.getNVertex() << ")\n";
@@ -436,7 +475,7 @@ public:
 	 * \param weight to give to the vertex
 	 *
 	 */
-	inline void setVertexWeight(size_t id, size_t weight)
+	inline void setComputationCost(size_t id, size_t weight)
 	{
 		if (!verticesGotWeights)
 			verticesGotWeights = true;
@@ -444,12 +483,6 @@ public:
 		if (id >= gp.getNVertex())
 			std::cerr << "Such vertex doesn't exist (id = " << id << ", " << "total size = " << gp.getNVertex() << ")\n";
 
-		// If the vertex is inside this processor update the value
-		if (sub_g.vertexIsInThisGraph(id))
-		{
-			sub_g.getLocalVertexByGlobalId(id).template get<nm_v::computation>() = weight;
-		}
-
 		// Update vertex in main graph
 		gp.vertex(id).template get<nm_v::computation>() = weight;
 	}
@@ -484,38 +517,18 @@ public:
 	{
 		size_t load = 0;
 
-		for (size_t i = 0; i < sub_g.getNVertex(); i++)
+		// Processor id
+		size_t p_id = v_cl.getProcessUnitID();
+
+
+		for (rid i = vtxdist.get(p_id); i < vtxdist.get(p_id+1) ; ++i)
 		{
-			load += sub_g.vertex(i).template get<nm_v::computation>();
+			load += gp.vertex(m2g.find(i)->second.id).template get<nm_v::computation>();
 		}
 		//std::cout << v_cl.getProcessUnitID() << " weight " << load << " size " << sub_g.getNVertex() << "\n";
 		return load;
 	}
 
-	/*! \brief return number of moved vertices in all iterations so far
-	 *
-	 * \param id vertex id
-	 *
-	 * \return vector with x, y, z
-	 *
-	 */
-	size_t getTotalMovedV()
-	{
-		return g_moved;
-	}
-
-	/*! \brief return number of moved vertices in all iterations so far
-	 *
-	 * \param id vertex id
-	 *
-	 * \return vector with x, y, z
-	 *
-	 */
-	size_t getMaxMovedV()
-	{
-		return m_moved;
-	}
-
 	/*! \brief Set migration cost of the vertex id
 	 *
 	 * \param id of the vertex to update
@@ -526,12 +539,6 @@ public:
 		if (id >= gp.getNVertex())
 			std::cerr << "Such vertex doesn't exist (id = " << id << ", " << "total size = " << gp.getNVertex() << ")\n";
 
-		// If the vertex is inside this processor update the value
-		if (sub_g.vertexIsInThisGraph(id))
-		{
-			sub_g.getLocalVertexByGlobalId(id).template get<nm_v::migration>() = migration;
-		}
-
 		gp.vertex(id).template get<nm_v::migration>() = migration;
 	}
 
@@ -548,14 +555,6 @@ public:
 		if (e_id >= gp.getNEdge())
 			std::cerr << "Such edge doesn't exist (id = " << e_id << ", " << "total size = " << gp.getNEdge() << ")\n";
 
-		// If the vertex is inside this processor update the value
-		if (sub_g.vertexIsInThisGraph(v_id))
-		{
-			// Get the local id of the vertex
-			size_t local_id = sub_g.getLocalIdFromGlobalId(v_id);
-			sub_g.getChildEdge(local_id, e).template get<nm_e::communication>() = communication;
-		}
-
 		gp.getChildEdge(v_id, e).template get<nm_e::communication>() = communication;
 	}
 
@@ -579,20 +578,48 @@ public:
 		return gp.getNChilds(id);
 	}
 
-	/*! \brief Print current graph and save it to file with name test_graph_[id]
+	/*! \brief Print the current distribution and save it to VTK file
 	 *
-	 * \param id to attach to the filename
+	 * \param file filename
 	 *
 	 */
-	void printCurrentDecomposition(int id)
+	void write(const std::string & file)
 	{
 		if (v_cl.getProcessUnitID() == 0)
 		{
-			VTKWriter<Graph_CSR<nm_v, nm_e>, GRAPH> gv2(gp);
-			gv2.write("test_graph_" + std::to_string(id) + ".vtk");
+			VTKWriter<Graph_CSR<nm_v, nm_e>, VTK_GRAPH> gv2(gp);
+			gv2.write(file);
 		}
 
 	}
+
+	const ParMetisDistribution<dim,T> & operator=(const ParMetisDistribution<dim,T> & dist)
+	{
+		v_cl = dist.v_cl;
+		gr = dist.gr;
+		domain = dist.domain;
+		gp = dist.gp;
+		vtxdist = dist.vtxdist;
+		partitions = dist.partitions;
+		v_per_proc = dist.v_per_proc;
+		verticesGotWeights = dist.verticesGotWeights;
+
+		return *this;
+	}
+
+	const ParMetisDistribution<dim,T> & operator=(ParMetisDistribution<dim,T> && dist)
+	{
+		v_cl = dist.v_cl;
+		gr = dist.gr;
+		domain = dist.domain;
+		gp.swap(dist.gp);
+		vtxdist.swap(dist.vtxdist);
+		partitions.swap(dist.partitions);
+		v_per_proc.swap(dist.v_per_proc);
+		verticesGotWeights = dist.verticesGotWeights;
+
+		return *this;
+	}
 };
 
 #endif /* SRC_DECOMPOSITION_PARMETISDISTRIBUTION_HPP_ */
diff --git a/src/metis_util.hpp b/src/Decomposition/Distribution/metis_util.hpp
similarity index 90%
rename from src/metis_util.hpp
rename to src/Decomposition/Distribution/metis_util.hpp
index e7024e4d163605f5513443114f3edc0fe5100380..50a47e11c3cd4a5dd243f359c7182a60ee52a7cc 100644
--- a/src/metis_util.hpp
+++ b/src/Decomposition/Distribution/metis_util.hpp
@@ -10,8 +10,8 @@
 
 #include <iostream>
 #include "metis.h"
-#include "VTKWriter.hpp"
 #include "SubdomainGraphNodes.hpp"
+#include "VTKWriter/VTKWriter.hpp"
 
 /*! \brief Metis graph structure
  *
@@ -129,9 +129,6 @@ class Metis
 	 */
 	void constructAdjListWithWeights(Graph & g)
 	{
-		Mg.vwgt = new idx_t[1];
-		Mg.vwgt[0] = 2;
-
 		// create xadj, adjlist, vwgt, adjwgt and vsize
 		Mg.xadj = new idx_t[g.getNVertex() + 1];
 		Mg.adjncy = new idx_t[g.getNEdge()];
@@ -150,7 +147,9 @@ class Metis
 		{
 			// Add weight to vertex and migration cost
 			Mg.vwgt[i] = g.vertex(i).template get<nm_v::computation>();
+			Mg.vwgt[i] = (Mg.adjwgt[i] == 0)?1:Mg.vwgt[i];
 			Mg.vsize[i] = g.vertex(i).template get<nm_v::migration>();
+			Mg.vsize[i] = (Mg.vsize[i] == 0)?1:Mg.vsize[i];
 
 			// Calculate the starting point in the adjacency list
 			Mg.xadj[id] = prev;
@@ -160,7 +159,9 @@ class Metis
 			{
 				Mg.adjncy[prev + s] = g.getChild(i, s);
 
+				// zero values on Metis are dangerous
 				Mg.adjwgt[prev + s] = g.getChildEdge(i, s).template get<nm_e::communication>();
+				Mg.adjwgt[prev + s] = (Mg.adjwgt[prev + s] == 0)?1:Mg.adjwgt[prev + s];
 			}
 
 			// update the position for the next vertex
@@ -244,7 +245,7 @@ public:
 
 		Mg.tpwgts = NULL;
 
-		//! Set to null the partition load imbalance tollerace
+		//! Set to null the partition load imbalance tolerance
 
 		Mg.ubvec = NULL;
 
@@ -258,7 +259,7 @@ public:
 		//! Is an output vector containing the partition for each vertex
 		Mg.part = new idx_t[g.getNVertex()];
 
-		for (int i = 0; i < g.getNVertex(); i++)
+		for (size_t i = 0; i < g.getNVertex(); i++)
 			Mg.part[i] = 0;
 	}
 
@@ -396,6 +397,31 @@ public:
 			++it;
 		}
 	}
+
+	/*! \brief It set Metis on test
+	 *
+	 * \param testing set to true to disable the testing
+	 *
+	 * At the moment disable the seed randomness to keep the result
+	 * reproducible
+	 *
+	 */
+	void onTest(bool testing)
+	{
+		if (testing == false)
+			return;
+
+		if (Mg.options == NULL)
+		{
+			// allocate
+			Mg.options = new idx_t[METIS_NOPTIONS];
+
+			// set default options
+			METIS_SetDefaultOptions(Mg.options);
+		}
+
+		Mg.options[METIS_OPTION_SEED] = 0;
+	}
 };
 
 #endif
diff --git a/src/metis_util_unit_test.hpp b/src/Decomposition/Distribution/metis_util_unit_test.hpp
similarity index 52%
rename from src/metis_util_unit_test.hpp
rename to src/Decomposition/Distribution/metis_util_unit_test.hpp
index f63d2097fd29c693660d53eda56c2dd287839de1..d25821deb72b02150afae530a814481c20e34a35 100644
--- a/src/metis_util_unit_test.hpp
+++ b/src/Decomposition/Distribution/metis_util_unit_test.hpp
@@ -25,6 +25,14 @@ BOOST_AUTO_TEST_SUITE( Metis_test )
 
 BOOST_AUTO_TEST_CASE( Metis_test_use)
 {
+	Vcluster & v_cl = *global_v_cluster;
+
+	if (v_cl.getProcessingUnits() != 3)
+		return;
+
+	if (v_cl.getProcessUnitID() != 0)
+		return;
+
 	CartesianGraphFactory<3,Graph_CSR<nm_v,nm_e>> g_factory;
 	CartesianGraphFactory<3,Graph_CSR<nm_part_v,nm_part_e>> g_factory_part;
 
@@ -34,13 +42,14 @@ BOOST_AUTO_TEST_CASE( Metis_test_use)
 	// Box
 	Box<3,float> box({0.0,0.0,0.0},{1.0,1.0,1.0});
 
-	// Graph to decompose
+	// Boundary conditions, non periodic
+	size_t bc[] = {NON_PERIODIC,NON_PERIODIC,NON_PERIODIC};
 
-	Graph_CSR<nm_v,nm_e> g = g_factory.construct<nm_e::communication,float,2,0,1,2>(sz,box);
+	// Graph to decompose
+	Graph_CSR<nm_v,nm_e> g = g_factory.construct<nm_e::communication,NO_VERTEX_ID,float,2,0,1,2>(sz,box,bc);
 
 	// Processor graph
-
-	Graph_CSR<nm_part_v,nm_part_e> gp = g_factory_part.construct<NO_EDGE,float,2>(sz,box);
+	Graph_CSR<nm_part_v,nm_part_e> gp = g_factory_part.construct<NO_EDGE,NO_VERTEX_ID,float,2>(sz,box,bc);
 
 	// Convert the graph to metis
 
@@ -49,7 +58,22 @@ BOOST_AUTO_TEST_CASE( Metis_test_use)
 	// decompose
 
 	met.decompose<nm_part_v::id>(gp);
-	met.decompose<nm_v::id>();
+	met.decompose<nm_v::proc_id>();
+
+	// Write the VTK file
+
+	VTKWriter<Graph_CSR<nm_part_v,nm_part_e>,VTK_GRAPH> vtk(gp);
+	vtk.write("vtk_metis_util_gp.vtk");
+
+	VTKWriter<Graph_CSR<nm_v,nm_e>,VTK_GRAPH> vtk2(g);
+	vtk2.write("vtk_metis_util_g.vtk");
+
+	// check that match
+
+	bool test = compare("vtk_metis_util_gp.vtk","src/Decomposition/Distribution/test_data/vtk_metis_util_gp_test.vtk");
+	bool test2 = compare("vtk_metis_util_g.vtk","src/Decomposition/Distribution/test_data/vtk_metis_util_g_test.vtk");
+	BOOST_REQUIRE_EQUAL(true,test);
+	BOOST_REQUIRE_EQUAL(true,test2);
 }
 
 BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/parmetis_dist_util.hpp b/src/Decomposition/Distribution/parmetis_dist_util.hpp
similarity index 99%
rename from src/parmetis_dist_util.hpp
rename to src/Decomposition/Distribution/parmetis_dist_util.hpp
index a9713995ed429b437f8088f45094a1e2f5178b88..e7405130503b4857b1eac5fb172123be0196bf99 100755
--- a/src/parmetis_dist_util.hpp
+++ b/src/Decomposition/Distribution/parmetis_dist_util.hpp
@@ -10,7 +10,7 @@
 
 #include <iostream>
 #include "parmetis.h"
-#include "VTKWriter.hpp"
+#include "VTKWriter/VTKWriter.hpp"
 #include "VCluster.hpp"
 
 /*! \brief Metis graph structure
diff --git a/src/parmetis_util.hpp b/src/Decomposition/Distribution/parmetis_util.hpp
similarity index 73%
rename from src/parmetis_util.hpp
rename to src/Decomposition/Distribution/parmetis_util.hpp
index 89b34e7528fe2f5bcba5b09e9d1f7c1e8e7da21a..de3b9125eb2d0023a1babb8c8baad49b2f952df7 100755
--- a/src/parmetis_util.hpp
+++ b/src/Decomposition/Distribution/parmetis_util.hpp
@@ -10,8 +10,9 @@
 
 #include <iostream>
 #include "parmetis.h"
-#include "VTKWriter.hpp"
+#include "VTKWriter/VTKWriter.hpp"
 #include "VCluster.hpp"
+#include "Graph/ids.hpp"
 
 /*! \brief Metis graph structure
  *
@@ -114,27 +115,42 @@ class Parmetis
 	// nc Number of partition
 	size_t nc = 0;
 
+	// first re-mapped id
+	rid first;
+
+	// last re-mapped id
+	rid last;
+
+	// number of vertices that the processor has
+	size_t nvertex;
+
 	/*! \brief Construct Adjacency list
 	 *
-	 * \param g Reference graph to get informations
+	 * \param g Global graph
 	 *
 	 */
-	void constructAdjList(Graph &refGraph, Graph & sub_g)
+	void constructAdjList(Graph &g, const std::unordered_map<rid,gid> & m2g)
 	{
 		// init basic graph informations and part vector
 		// Put the total communication size to NULL
 
-		Mg.nvtxs[0] = sub_g.getNVertex();
-		Mg.part = new idx_t[sub_g.getNVertex()];
-		for (int i = 0; i < sub_g.getNVertex(); i++)
+		Mg.nvtxs[0] = nvertex;
+		Mg.part = new idx_t[nvertex];
+
+		size_t nedge = 0;
+		size_t i = 0;
+		for (rid j = first; i < nvertex ; i++, ++j)
+		{
 			Mg.part[i] = p_id;
+			nedge += g.getNChilds(m2g.find(j)->second.id);
+		}
 
 		// create xadj, adjlist, vwgt, adjwgt and vsize
-		Mg.xadj = new idx_t[sub_g.getNVertex() + 1];
-		Mg.adjncy = new idx_t[sub_g.getNEdge()];
-		Mg.vwgt = new idx_t[sub_g.getNVertex()];
-		Mg.adjwgt = new idx_t[sub_g.getNEdge()];
-		Mg.vsize = new idx_t[sub_g.getNVertex()];
+		Mg.xadj = new idx_t[nvertex + 1];
+		Mg.adjncy = new idx_t[nedge];
+		Mg.vwgt = new idx_t[nvertex];
+		Mg.adjwgt = new idx_t[nedge];
+		Mg.vsize = new idx_t[nvertex];
 
 		//! starting point in the adjacency list
 		size_t prev = 0;
@@ -142,49 +158,32 @@ class Parmetis
 		// actual position
 		size_t id = 0;
 
-		// property id
-		size_t real_id;
-
-		// boolan to check if ref is the main graph
-		bool main = refGraph.getNVertex() != sub_g.getNVertex();
+		size_t j = 0;
 
 		// for each vertex calculate the position of the starting point in the adjacency list
-		for (size_t i = 0; i < sub_g.getNVertex(); i++)
+		for (rid i = first ; i <= last; ++i, j++)
 		{
+			gid idx = m2g.find(i)->second;
 
 			// Add weight to vertex and migration cost
-			Mg.vwgt[i] = sub_g.vertex(i).template get<nm_v::computation>();
-			Mg.vsize[i] = sub_g.vertex(i).template get<nm_v::migration>();;
+			Mg.vwgt[j] = g.vertex(idx.id).template get<nm_v::computation>();
+			Mg.vsize[j] = g.vertex(idx.id).template get<nm_v::migration>();;
 
 			// Calculate the starting point in the adjacency list
 			Mg.xadj[id] = prev;
 
-			if (main)
-				real_id = sub_g.getGlobalIdFromMap(sub_g.vertex(i).template get<nm_v::id>());
-			else
-				real_id = i;
-
 			// Create the adjacency list and the weights for edges
-			for (size_t s = 0; s < refGraph.getNChilds(real_id); s++)
+			for (size_t s = 0; s < g.getNChilds(idx.id); s++)
 			{
 
-				size_t child = refGraph.getChild(real_id, s);
-
-				if (main)
-				{
-					Mg.adjncy[prev + s] = refGraph.vertex(child).template get<nm_v::id>();
-				}
-				else
-				{
-					Mg.adjncy[prev + s] = child;
-				}
-
-				Mg.adjwgt[prev + s] = refGraph.edge(prev+s).template get<nm_e::communication>();
+				size_t child = g.getChild(idx.id, s);
 
+				Mg.adjncy[prev + s] = g.vertex(child).template get<nm_v::id>();
+				Mg.adjwgt[prev + s] = g.getChildEdge(idx.id,s).template get<nm_e::communication>();
 			}
 
 			// update the position for the next vertex
-			prev += refGraph.getNChilds(real_id);
+			prev += g.getNChilds(idx.id);
 
 			id++;
 		}
@@ -203,11 +202,30 @@ public:
 	 * \param nc number of partitions
 	 *
 	 */
-	Parmetis(Vcluster & v_cl, size_t nc) :
-			v_cl(v_cl), nc(nc)
+	Parmetis(Vcluster & v_cl, size_t nc)
+	:v_cl(v_cl), nc(nc)
 	{
 		// TODO Move into VCluster
 		MPI_Comm_dup(MPI_COMM_WORLD, &comm);
+
+		// Nullify Mg
+		Mg.nvtxs = NULL;
+		Mg.ncon = NULL;
+		Mg.xadj = NULL;
+		Mg.adjncy = NULL;
+		Mg.vwgt = NULL;
+		Mg.vsize = NULL;
+		Mg.adjwgt = NULL;
+		Mg.nparts = NULL;
+		Mg.tpwgts = NULL;
+		Mg.ubvec = NULL;
+		Mg.options = NULL;
+		Mg.objval = NULL;
+		Mg.part = NULL;
+		Mg.edgecut = NULL;
+		Mg.itr = NULL;
+		Mg.numflag = NULL;
+		Mg.wgtflag = NULL;
 	}
 
 	//TODO deconstruct new variables
@@ -292,112 +310,35 @@ public:
 
 	/*! \brief Set the Sub-graph
 	 *
-	 * \param sub_g Sub-graph to set
+	 * \param g Global graph to set
 	 * \param w true if vertices have weights
 	 */
-	void initSubGraph(Graph & sub_g, bool w)
+	void initSubGraph(Graph & g, const openfpm::vector<rid> & vtxdist, const std::unordered_map<rid,gid> & m2g, bool w)
 	{
 		p_id = v_cl.getProcessUnitID();
 
-		// Get the number of vertex
-
-		Mg.nvtxs = new idx_t[1];
-		Mg.nvtxs[0] = sub_g.getNVertex();
+		first = vtxdist.get(p_id);
+		last = vtxdist.get(p_id+1)-1;
+		nvertex = last.id - first.id + 1;
 
-		// Set the number of constrains
-
-		Mg.ncon = new idx_t[1];
-		Mg.ncon[0] = 1;
-
-		// Set to null the weight of the vertex (init after in constructAdjList) (can be removed)
-
-		Mg.vwgt = NULL;
-
-		// Set to null the weight of the edge (init after in constructAdjList) (can be removed)
-
-		Mg.adjwgt = NULL;
+		setDefaultParameters(w);
 
 		// construct the adjacency list
-
-		constructAdjList(sub_g, sub_g);
-
-		// Set the total number of partitions
-
-		Mg.nparts = new idx_t[1];
-		Mg.nparts[0] = nc;
-
-		//! Set option for the graph partitioning (set as default)
-
-		Mg.options = new idx_t[4];
-		Mg.options[0] = 0;
-		Mg.options[1] = 0;
-		Mg.options[2] = 0;
-		Mg.options[3] = 0;
-
-		//! is an output vector containing the partition for each vertex
-
-		Mg.part = new idx_t[sub_g.getNVertex()];
-		for (int i = 0; i < sub_g.getNVertex(); i++)
-			Mg.part[i] = p_id;
-
-		//! adaptiveRepart itr value
-		Mg.itr = new real_t[1];
-		Mg.itr[0] = 1000.0;
-
-		//! init tpwgts to have balanced vertices and ubvec
-
-		Mg.tpwgts = new real_t[Mg.nparts[0]];
-		Mg.ubvec = new real_t[Mg.nparts[0]];
-
-		for (int s = 0; s < Mg.nparts[0]; s++)
-		{
-			Mg.tpwgts[s] = 1.0 / Mg.nparts[0];
-			Mg.ubvec[s] = 1.05;
-		}
-
-		Mg.edgecut = new idx_t[1];
-		Mg.edgecut[0] = 0;
-
-		//! This is used to indicate the numbering scheme that is used for the vtxdist, xadj, adjncy, and part arrays. (0 for C-style, start from 0 index)
-		Mg.numflag = new idx_t[1];
-		Mg.numflag[0] = 0;
-
-		//! This is used to indicate if the graph is weighted. wgtflag can take one of four values:
-		Mg.wgtflag = new idx_t[1];
-
-		//if(w)
-			Mg.wgtflag[0] = 2;
-		//else
-			//Mg.wgtflag[0] = 0;
+		constructAdjList(g, m2g);
 	}
 
 	/*! \brief Decompose the graph
 	 *
 	 * \tparam i which property store the decomposition
 	 *
-	 *
-	 *
 	 */
 	template<unsigned int i>
-	void decompose(openfpm::vector<idx_t> & vtxdist, Graph & sub_g)
+	void decompose(const openfpm::vector<rid> & vtxdist)
 	{
-
 		// Decompose
 
 		ParMETIS_V3_PartKway((idx_t *) vtxdist.getPointer(), Mg.xadj, Mg.adjncy, Mg.vwgt, Mg.adjwgt, Mg.wgtflag,
 				Mg.numflag, Mg.ncon, Mg.nparts, Mg.tpwgts, Mg.ubvec, Mg.options, Mg.edgecut, Mg.part, &comm);
-		/*
-		 ParMETIS_V3_AdaptiveRepart( (idx_t *) vtxdist.getPointer(), Mg.xadj,Mg.adjncy,Mg.vwgt,Mg.vsize,Mg.adjwgt, Mg.wgtflag, Mg.numflag,
-		 Mg.ncon, Mg.nparts, Mg.tpwgts, Mg.ubvec, Mg.itr, Mg.options, Mg.edgecut,
-		 Mg.part, &comm );
-		*/
-
-		// For each vertex store the processor that contain the data
-		for (size_t j = 0, id = 0; j < sub_g.getNVertex(); j++, id++)
-		{
-			sub_g.vertex(j).template get<i>() = Mg.part[id];
-		}
-
 	}
 
 	/*! \brief Refine the graph
@@ -407,20 +348,13 @@ public:
 	 */
 
 	template<unsigned int i>
-	void refine(openfpm::vector<idx_t> & vtxdist, Graph & sub_g)
+	void refine(openfpm::vector<rid> & vtxdist)
 	{
 		// Refine
 
 		ParMETIS_V3_AdaptiveRepart((idx_t *) vtxdist.getPointer(), Mg.xadj, Mg.adjncy, Mg.vwgt, Mg.vsize, Mg.adjwgt,
 				Mg.wgtflag, Mg.numflag, Mg.ncon, Mg.nparts, Mg.tpwgts, Mg.ubvec, Mg.itr, Mg.options, Mg.edgecut,
 				Mg.part, &comm);
-
-		// For each vertex store the processor that contain the data
-
-		for (size_t j = 0, id = 0; j < sub_g.getNVertex(); j++, id++)
-		{
-			sub_g.vertex(j).template get<i>() = Mg.part[id];
-		}
 	}
 
 	/*! \brief Get graph partition vector
@@ -432,10 +366,16 @@ public:
 	}
 
 	/*! \brief Reset graph and reconstruct it
+	 *
+	 * \param Global graph
 	 *
 	 */
-	void reset(Graph & mainGraph, Graph & sub_g)
+	void reset(Graph & g,const openfpm::vector<rid> & vtxdist, const std::unordered_map<rid,gid> & m2g)
 	{
+		first = vtxdist.get(p_id);
+		last = vtxdist.get(p_id+1)-1;
+		nvertex = last.id - first.id + 1;
+
 		// Deallocate the graph structures
 
 		if (Mg.xadj != NULL)
@@ -464,7 +404,94 @@ public:
 		}
 
 		// construct the adjacency list
-		constructAdjList(mainGraph, sub_g);
+		constructAdjList(g,m2g);
+	}
+
+	/*! \brief Seth the default parameters for parmetis
+	 *
+	 *
+	 */
+	void setDefaultParameters(bool w)
+	{
+		Mg.nvtxs = new idx_t[1];
+
+		// Set the number of constrains
+		Mg.ncon = new idx_t[1];
+		Mg.ncon[0] = 1;
+
+		// Set to null the weight of the vertex (init after in constructAdjList) (can be removed)
+		Mg.vwgt = NULL;
+
+		// Set to null the weight of the edge (init after in constructAdjList) (can be removed)
+		Mg.adjwgt = NULL;
+
+		// Set the total number of partitions
+		Mg.nparts = new idx_t[1];
+		Mg.nparts[0] = nc;
+
+		//! Set option for the graph partitioning (set as default)
+
+		Mg.options = new idx_t[4];
+		Mg.options[0] = 0;
+		Mg.options[1] = 0;
+		Mg.options[2] = 0;
+		Mg.options[3] = 0;
+
+		//! is an output vector containing the partition for each vertex
+
+		//! adaptiveRepart itr value
+		Mg.itr = new real_t[1];
+		Mg.itr[0] = 1000.0;
+
+		//! init tpwgts to have balanced vertices and ubvec
+
+		Mg.tpwgts = new real_t[Mg.nparts[0]];
+		Mg.ubvec = new real_t[Mg.nparts[0]];
+
+		for (size_t s = 0; s < (size_t)Mg.nparts[0]; s++)
+		{
+			Mg.tpwgts[s] = 1.0 / Mg.nparts[0];
+			Mg.ubvec[s] = 1.05;
+		}
+
+		Mg.edgecut = new idx_t[1];
+		Mg.edgecut[0] = 0;
+
+		//! This is used to indicate the numbering scheme that is used for the vtxdist, xadj, adjncy, and part arrays. (0 for C-style, start from 0 index)
+		Mg.numflag = new idx_t[1];
+		Mg.numflag[0] = 0;
+
+		//! This is used to indicate if the graph is weighted. wgtflag can take one of four values:
+		Mg.wgtflag = new idx_t[1];
+
+		if(w)
+			Mg.wgtflag[0] = 3;
+		else
+			Mg.wgtflag[0] = 0;
+	}
+
+	const Parmetis<Graph> & operator=(const Parmetis<Graph> & pm)
+	{
+		comm = pm.comm;
+		v_cl = pm.v_cl;
+		p_id = pm.p_id;
+		nc = pm.nc;
+
+		setDefaultParameters(pm.Mg.wgtflag[0] == 3);
+
+		return *this;
+	}
+
+	const Parmetis<Graph> & operator=(Parmetis<Graph> && pm)
+	{
+		comm = pm.comm;
+		v_cl = pm.v_cl;
+		p_id = pm.p_id;
+		nc = pm.nc;
+
+		setDefaultParameters(pm.Mg.wgtflag[0] == 3);
+
+		return *this;
 	}
 
 };
diff --git a/src/Decomposition/MetisDistribution.hpp b/src/Decomposition/MetisDistribution.hpp
deleted file mode 100644
index 667d5645a9ad795f325d56f73afa82cf129f50c5..0000000000000000000000000000000000000000
--- a/src/Decomposition/MetisDistribution.hpp
+++ /dev/null
@@ -1,254 +0,0 @@
-/*
- * MetisDistribution.hpp
- *
- *  Created on: Nov 19, 2015
- *      Author: Antonio Leo
- */
-
-#ifndef SRC_DECOMPOSITION_METISDISTRIBUTION_HPP_
-#define SRC_DECOMPOSITION_METISDISTRIBUTION_HPP_
-
-#include "SubdomainGraphNodes.hpp"
-#include "metis_util.hpp"
-
-template<unsigned int dim, typename T, template<unsigned int, typename > class Domain = Box>
-class MetisDistribution
-{
-	//! Vcluster
-	Vcluster & v_cl;
-
-	//! Structure that store the cartesian grid information
-	grid_sm<dim, void> gr;
-
-	//! rectangular domain to decompose
-	Domain<dim, T> domain;
-
-	//! Global sub-sub-domain graph
-	Graph_CSR<nm_v, nm_e> gp;
-
-	//! Flag to check if weights are used on vertices
-	bool verticesGotWeights = false;
-
-public:
-
-	//! constructor
-	MetisDistribution(Vcluster & v_cl) :
-			v_cl(v_cl)
-	{
-	}
-
-	/*! \brief Initialize the distribution graph
-	 *
-	 */
-	void init(grid_sm<dim, void> & grid, Domain<dim, T> dom)
-	{
-		// Set grid and domain
-		gr = grid;
-		domain = dom;
-
-		// Create a cartesian grid graph
-		CartesianGraphFactory<dim, Graph_CSR<nm_v, nm_e>> g_factory_part;
-		gp = g_factory_part.template construct<NO_EDGE, nm_v::id, T, dim - 1, 0, 1, 2>(gr.getSize(), domain);
-
-		// Init to 0.0 axis z (to fix in graphFactory)
-		if (dim < 3)
-		{
-			for (size_t i = 0; i < gp.getNVertex(); i++)
-			{
-				gp.vertex(i).template get<nm_v::x>()[2] = 0.0;
-			}
-		}
-
-		for (size_t i = 0; i < gp.getNVertex(); i++)
-		{
-			gp.vertex(i).template get<nm_v::global_id>() = i;
-		}
-	}
-
-	/*! \brief Get the current graph (main)
-	 *
-	 */
-	Graph_CSR<nm_v, nm_e> & getGraph()
-	{
-		return gp;
-	}
-
-	/*! \brief Create first decomposition, it divides the graph in slices and give each slice to a processor
-	 *
-	 */
-	void decompose()
-	{
-		Metis<Graph_CSR<nm_v, nm_e>> met(gp, v_cl.getProcessingUnits(), verticesGotWeights);
-
-		// decompose
-		met.decompose<nm_v::proc_id>();
-	}
-
-	/*! \brief Refine current decomposition (NOT AVAILABLE on Metis)
-	 *
-	 * It has no function
-	 */
-	void refine()
-	{
-	}
-
-	/*! \brief function that return the position of the vertex in the space
-	 *
-	 * \param id vertex id
-	 * \param pos vector that will contain x, y, z
-	 *
-	 */
-	void getVertexPosition(size_t id, T (&pos)[dim])
-	{
-		if (id >= gp.getNVertex())
-			std::cerr << "Such vertex doesn't exist (id = " << id << ", " << "total size = " << gp.getNVertex() << ")\n";
-
-		// Copy the geometrical informations inside the pos vector
-		pos[0] = gp.vertex(id).template get<nm_v::x>()[0];
-		pos[1] = gp.vertex(id).template get<nm_v::x>()[1];
-		if (dim == 3)
-			pos[2] = gp.vertex(id).template get<nm_v::x>()[2];
-	}
-
-	/*! \brief function that set the weight of the vertex
-	 *
-	 * \param id vertex id
-	 * \param wieght to give to the vertex
-	 *
-	 */
-	void setVertexWeight(size_t id, size_t weight)
-	{
-		if (!verticesGotWeights)
-			verticesGotWeights = true;
-
-		if (id >= gp.getNVertex())
-			std::cerr << "Such vertex doesn't exist (id = " << id << ", " << "total size = " << gp.getNVertex() << ")\n";
-
-		gp.vertex(id).template get<nm_v::computation>() = weight;
-	}
-
-	/*! \brief Checks if weights are used on the vertices
-	 *
-	 */
-	bool weightsAreUsed()
-	{
-		return verticesGotWeights;
-	}
-
-	/*! \brief function that get the weight of the vertex
-	 *
-	 * \param id vertex id
-	 *
-	 */
-	size_t getVertexWeight(size_t id)
-	{
-		if (id >= gp.getNVertex())
-			std::cerr << "Such vertex doesn't exist (id = " << id << ", " << "total size = " << gp.getNVertex() << ")\n";
-
-		return gp.vertex(id).template get<nm_v::computation>();
-	}
-
-	/*! \brief Set migration cost of the vertex id
-	 *
-	 * \param id of the vertex to update
-	 * \param migration cost of the migration
-	 */
-	void setMigrationCost(size_t id, size_t migration)
-	{
-		if (id >= gp.getNVertex())
-			std::cerr << "Such vertex doesn't exist (id = " << id << ", " << "total size = " << gp.getNVertex() << ")\n";
-
-		gp.vertex(id).template get<nm_v::migration>() = migration;
-	}
-
-	/*! \brief Set communication cost of the edge id
-	 *
-	 * \param v_id Id of the source vertex of the edge
-	  * \param e i child of the vertex
-	 * \param communication Communication value
-	 */
-	void setCommunicationCost(size_t v_id, size_t e, size_t communication)
-	{
-		size_t e_id = v_id + e;
-
-		if (e_id >= gp.getNEdge())
-			std::cerr << "Such edge doesn't exist (id = " << e_id << ", " << "total size = " << gp.getNEdge() << ")\n";
-
-		gp.getChildEdge(v_id, e).template get<nm_e::communication>() = communication;
-	}
-
-	/*! \brief Returns total number of sub-sub-domains in the distribution graph
-	 *
-	 */
-	size_t getNSubSubDomains()
-	{
-		return gp.getNVertex();
-	}
-
-	/*! \brief Returns total number of neighbors of the sub-sub-domain id
-	 *
-	 * \param i id of the sub-sub-domain
-	 */
-	size_t getNSubSubDomainNeighbors(size_t id)
-	{
-		if (id >= gp.getNVertex())
-			std::cerr << "Such vertex doesn't exist (id = " << id << ", " << "total size = " << gp.getNVertex() << ")\n";
-
-		return gp.getNChilds(id);
-	}
-
-	/*! \brief Print current graph and save it to file with name test_graph_[id]
-	 *
-	 * \param id to attach to the filename
-	 *
-	 */
-	void printCurrentDecomposition(int id)
-	{
-		VTKWriter<Graph_CSR<nm_v, nm_e>, GRAPH> gv2(gp);
-		gv2.write("test_graph_" + std::to_string(id) + ".vtk");
-
-	}
-
-	/*! \brief Compute the unbalance value
-	 *
-	 * \return the unbalance value
-	 */
-	float getUnbalance()
-	{
-		long min, max, sum;
-		std::vector<long> loads(v_cl.getProcessingUnits());
-
-		for (size_t i = 0; i < loads.size(); i++)
-			loads[i] = 0;
-
-		for (size_t i = 0; i < gp.getNVertex(); i++)
-			loads[gp.vertex(i).template get<nm_v::proc_id>()]++;
-
-		max = *std::max_element(loads.begin(), loads.end());
-		min = *std::min_element(loads.begin(), loads.end());
-		sum = std::accumulate(loads.begin(), loads.end(), 0);
-
-		float unbalance = ((float) (max - min)) / (float) sum;
-
-		return unbalance;
-	}
-
-	/*! \brief Compute the processor load counting the total weights of its vertices
-	 *
-	 * \return the computational load of the processor graph
-	 */
-	size_t getProcessorLoad()
-	{
-		size_t load = 0;
-
-		for (size_t i = 0; i < gp.getNVertex(); i++)
-		{
-			if (gp.vertex(i).template get<nm_v::proc_id>() == v_cl.getProcessUnitID())
-				load += gp.vertex(i).template get<nm_v::computation>();
-		}
-
-		return load;
-	}
-};
-
-#endif /* SRC_DECOMPOSITION_METISDISTRIBUTION_HPP_ */
diff --git a/src/Decomposition/common.hpp b/src/Decomposition/common.hpp
index 94f8b5f5abfb7b73a0350e4ed38290d3eb5818ef..b3f0380328e81865f12a70ffd9e80fd939fccf5f 100755
--- a/src/Decomposition/common.hpp
+++ b/src/Decomposition/common.hpp
@@ -13,26 +13,69 @@
 
 #include "Vector/map_vector.hpp"
 
+/*! \brief for each sub-domain box sub contain the real the sub-domain id
+ *
+ * When we apply boundary conditions real sub-domains are copied along the border
+ * sub, contain the id of the real sub_domain
+ *
+ * \tparam dim Dimensionality of the box
+ * \tparam T in witch space this box live
+ *
+ */
+template<unsigned int dim, typename T>
+struct Box_loc_sub
+{
+	Box<dim,T> bx;
+
+	// Domain id
+	size_t sub;
+
+	// in witch sector this sub-domain live, when
+	comb<dim> cmb;
+
+	Box_loc_sub()
+	{
+		cmb.zero();
+	};
+
+	Box_loc_sub(const Box<dim,T> & bx, size_t sub, const comb<dim> & cmb)
+	:bx(bx),sub(sub),cmb(cmb)
+	{};
+
+	template <typename Memory> Box_loc_sub(const Box_loc_sub<dim,T> & bls)
+	{
+		bx = bls.bx;
+		this->sub = bls.sub;
+	};
+
+	Box_loc_sub operator=(const Box<dim,T> & box)
+	{
+		::Box<dim,T>::operator=(box);
 
+		return *this;
+	}
+
+
+};
 
 /*! It contain a box definition and from witch sub-domain it come from (in the local processor)
  * and an unique across adjacent processors (for communication)
  *
  * If the box come from the intersection of an expanded sub-domain and a sub-domain
  *
- * Assuming we are considering the adjacent processor i (0 to getNNProcessors())
+ * Assuming we are considering the near processors i (0 to getNNProcessors())
  *
  * ### external ghost box
  *
- * id = id_exp + N_non_exp + id_non_exp
+ * id = id_exp * N_non_exp + id_non_exp
  *
- * id_exp = the id in the vector proc_adj_box.get(i) of the expanded sub-domain
+ * id_exp = the id in the vector proc_adj_box.get(i) of the expanded sub-domain (sent local sub-domains)
  *
- * id_non_exp = the id in the vector nn_processor_subdomains[i] of the sub-domain
+ * id_non_exp = the id in the vector nn_processor_subdomains[i] of the sub-domain (received sub-domains from near processors)
  *
  * ### internal ghost box
  *
- * id = id_exp + N_non_exp + id_non_exp
+ * id = id_exp * N_non_exp + id_non_exp
  *
  * id_exp = the id in the vector nn_processor_subdomains[i] of the expanded sub-domain
  *
@@ -40,8 +83,10 @@
  *
  */
 template<unsigned int dim, typename T>
-struct Box_sub : public Box<dim,T>
+struct Box_sub
 {
+	Box<dim,T> bx;
+
 	// Domain id
 	size_t sub;
 
@@ -50,7 +95,7 @@ struct Box_sub : public Box<dim,T>
 
 	Box_sub operator=(const Box<dim,T> & box)
 	{
-		::Box<dim,T>::operator=(box);
+		bx = box;
 
 		return *this;
 	}
@@ -60,17 +105,28 @@ struct Box_sub : public Box<dim,T>
 
 //! Particular case for local internal ghost boxes
 template<unsigned int dim, typename T>
-struct Box_sub_k : public Box<dim,T>
+struct Box_sub_k
 {
+	Box<dim,T> bx;
+
 	// Domain id
 	size_t sub;
 
+	// Where this sub_domain live
+	comb<dim> cmb;
+
 	//! k \see getLocalGhostIBoxE
 	long int k;
 
+	Box_sub_k()
+	:k(-1)
+	{
+		cmb.zero();
+	}
+
 	Box_sub_k operator=(const Box<dim,T> & box)
 	{
-		::Box<dim,T>::operator=(box);
+		bx = box;
 
 		return *this;
 	}
@@ -129,8 +185,82 @@ struct N_box
 	// id of the processor in the nn_processor list (local processor id)
 	size_t id;
 
-	// Near processor sub-domains
+	// near processor sub-domains
 	typename openfpm::vector<::Box<dim,T>> bx;
+
+	// near processor sector position (or where they live outside the domain)
+	openfpm::vector<comb<dim>> pos;
+
+	//! Default constructor
+	N_box()
+	:id((size_t)-1)
+	{};
+
+	//! Copy constructor
+	N_box(const N_box<dim,T> & b)
+	{
+		this->operator=(b);
+	}
+
+	//! Copy constructor
+	N_box(N_box<dim,T> && b)
+	{
+		this->operator=(b);
+	}
+
+	/*! \brief Copy the element
+	 *
+	 * \param ele element to copy
+	 *
+	 */
+	N_box<dim,T> & operator=(const N_box<dim,T> & ele)
+	{
+		id = ele.id;
+		bx = ele.bx;
+		pos = ele.pos;
+
+		return * this;
+	}
+
+	/*! \brief Copy the element
+	 *
+	 * \param ele element to copy
+	 *
+	 */
+	N_box<dim,T> & operator=(N_box<dim,T> && ele)
+	{
+		id = ele.id;
+		bx.swap(ele.bx);
+		pos = ele.pos;
+
+		return * this;
+	}
+
+	/*! \brief Compare two N_box object
+	 *
+	 * \return true if they match
+	 *
+	 */
+	bool operator==(const N_box<dim,T> & ele) const
+	{
+		if (id != ele.id)
+			return false;
+
+		if (pos != ele.pos)
+			return false;
+
+		return bx == ele.bx;
+	}
+
+	/*! \brief Compare two N_box object
+	 *
+	 * \return true if they match
+	 *
+	 */
+	bool operator!=(const N_box<dim,T> & ele) const
+	{
+		return ! this->operator==(ele);
+	}
 };
 
 // It store all the boxes of the near processors in a linear array
@@ -142,9 +272,12 @@ struct p_box
 	::Box<dim,T> box;
 	//! local processor id
 	size_t lc_proc;
-	//! processor id
+	//! processor rank
 	size_t proc;
 
+	//! shift vector id
+	size_t shift_id;
+
 	/*! \brief Check if two p_box are the same
 	 *
 	 * \param pb box to check
diff --git a/src/Decomposition/ie_ghost.hpp b/src/Decomposition/ie_ghost.hpp
index b92ddbc613b4d21ad1b721380787f7f86bdf180b..d06c61345d2a50e7ade475366ecf4a571db582e9 100755
--- a/src/Decomposition/ie_ghost.hpp
+++ b/src/Decomposition/ie_ghost.hpp
@@ -41,8 +41,135 @@ class ie_ghost
 	//! Cell-list that store the geometrical information of the internal ghost boxes
 	CellList<dim,T,FAST> geo_cell;
 
+	//! shift vectors
+	openfpm::vector<Point<dim,T>> shifts;
+
+	// Temporal buffers to return information for ghost_processorID
+	openfpm::vector<std::pair<size_t,size_t>> ids_p;
+	openfpm::vector<size_t> ids;
+
+
+	/*! \brief Given a local sub-domain i, it give the id of such sub-domain in the sent list
+	 *         for the processor p_id
+	 *
+	 * Processor 5 send its sub-domains to processor 6 and will receive the list from 6
+	 *
+	 * This function search if a local sub-domain has been sent to a processor p_id, if
+	 * found it return at witch position is in the list of the sent sub-domains
+	 *
+	 * \param nn_p structure that store the processor graph as near processor
+	 * \param p_id near processor rank
+	 * \param i sub-domain
+	 *
+	 * \return Given a local sub-domain i, it give the id of such sub-domain in the sent list
+	 *         for the processor p_id
+	 *
+	 */
+	inline size_t link_ebx_ibx(const nn_prcs<dim,T> & nn_p, size_t p_id, size_t i)
+	{
+		// Search for the correct id
+		size_t k = 0;
+		size_t p_idp = nn_p.ProctoID(p_id);
+		for (k = 0 ; k < nn_p.getSentSubdomains(p_idp).size() ; k++)
+		{
+			if (nn_p.getSentSubdomains(p_idp).get(k) == i)
+				break;
+		}
+		if (k == nn_p.getSentSubdomains(p_idp).size())
+			std::cerr << "Error: " << __FILE__ << ":" << __LINE__ << " sub-domain not found\n";
+
+		return k;
+	}
+
+	/*! \brief This is the external and internal ghost box link formula
+	*
+	* This formula is pretty important and require an extensive explanation
+	*
+	* \verbatim
+
+	+------------+
+	|            |
+	|            +---+---------+
+	| Processor 5|   |         |
+	|            | E | Proc 6  |
+	|  Sub 0     | 0 |         |
+	|            | _ | Sub 9   |
+	|            | 9 |         |
+	|            |   |         |
+	|            +---+---------+
+	|            |
+	+------------+
+
+	* \endverbatim
+	*
+	* E0_6 is an external ghost box from the prospective of processor 5 and an internal
+	* ghost boxes from the prospective of processor 6. So for every external
+	* ghost box that processor 5 compute, exist an internal ghost box in processor 6
+	*
+	* Here we link this information with an unique id, for processor 5 and 6.
+	* Consider Processor 5 sending to processor 6
+	* its sub-domains, including the one in figure with id 0 in the list, and
+	* receive from processor 6 the sub-domain in figure as id 9. Consider also
+	*  we have 16 processor, E0_9 come from the intersection of the sub-domains
+	* 0 and 9 (Careful the id is related to the send and receive position in the list)
+	*
+	* The id of the external box and (and linked internal) is calculated as
+	*
+	* (0 * (Number of sub-domains received from 6) + 9) * 16 + 6
+	*
+	* \param k sub-domain sent ( 0 )
+	* \param b sub-domain received ( 9 )
+	* \param p_id processor id ( 6 )
+	* \param c sector where the sub-domain b live
+	* \param N_b number of sub-domain received from p_id
+	* \param v_cl Vcluster
+	*
+	* \return id of the external box
+	*
+	*/
+	inline size_t ebx_ibx_form(size_t k, size_t b, size_t p_id, const comb<dim> & c ,size_t N_b, Vcluster & v_cl)
+	{
+		return ((k * N_b + b) * v_cl.getProcessingUnits() + p_id) * openfpm::math::pow(3,dim) + c.lin();
+	}
+
 protected:
 
+	/*! \brief Here we generare the shift vectors
+	 *
+	 * \param domain box that describe the domain
+	 *
+	 */
+	void generateShiftVectors(const Box<dim,T> & domain)
+	{
+		shifts.resize(openfpm::math::pow(3,dim));
+
+		HyperCube<dim> hyp;
+
+		for (long int i = dim-1 ; i >= 0 ; i--)
+		{
+			std::vector<comb<dim>> cmbs = hyp.getCombinations_R(i);
+
+			for (size_t j = 0 ; j < cmbs.size() ; j++)
+			{
+				for (size_t k = 0 ; k < dim ; k++)
+				{
+					switch (cmbs[j][k])
+					{
+					case 1:
+						shifts.get(cmbs[j].lin()).template get<0>()[k] = -domain.getHigh(k);
+						break;
+					case 0:
+						shifts.get(cmbs[j].lin()).template get<0>()[k] = 0;
+						break;
+					case -1:
+						shifts.get(cmbs[j].lin()).template get<0>()[k] = domain.getHigh(k);
+						break;
+					}
+				}
+			}
+		}
+	}
+
 	/*! \brief Initialize the geo cell list structure
 	 *
 	 * The geo cell list structure exist to speed up the labelling the points if they fall on some
@@ -57,8 +184,8 @@ protected:
 
 	/*! \brief Create the box_nn_processor_int (bx part)  structure
 	 *
-	 * This structure store for each sub-domain of this processors enlarged by the ghost size the boxes that
-	 *  come from the intersection with the near processors sub-domains (External ghost box)
+	 * For each sub-domain of the local processor it store the intersection between the enlarged
+	 * sub-domain of the calling processor with the adjacent processors sub-domains (External ghost box)
 	 *
 	 * \param ghost margins
 	 *
@@ -79,22 +206,23 @@ protected:
 			// enlarge the sub-domain with the ghost
 			sub_with_ghost.enlarge(ghost);
 
-			// resize based on the number of adjacent processors
+			// resize based on the number of near processors
 			box_nn_processor_int.get(i).resize(box_nn_processor.get(i).size());
 
-			// For each processor adjacent to this sub-domain
+			// For each processor near to this sub-domain
 			for (size_t j = 0 ; j < box_nn_processor.get(i).size() ; j++)
 			{
-				// Contiguous processor
+				// near processor
 				size_t p_id = box_nn_processor.get(i).get(j);
 
-				// store the box in proc_int_box storing from which sub-domain they come from
+				// used later
 				Box_dom<dim,T> & proc_int_box_g = proc_int_box.get(nn_p.ProctoID(p_id));
 
-				// get the set of sub-domains of the adjacent processor p_id
-				const openfpm::vector< ::Box<dim,T> > & nn_processor_subdomains_g = nn_p.getExternalAdjSubdomain(p_id).bx;
+				// get the set of sub-domains of the near processor p_id
+				const openfpm::vector< ::Box<dim,T> > & nn_processor_subdomains_g = nn_p.getNearSubdomains(p_id);
+				const openfpm::vector< comb<dim> > & nnpsg_pos = nn_p.getNearSubdomainsPos(p_id);
 
-				// near processor sub-domain intersections
+				// used later
 				openfpm::vector< ::Box<dim,T> > & box_nn_processor_int_gg = box_nn_processor_int.get(i).get(j).bx;
 
 				// for each near processor sub-domain intersect with the enlarged local sub-domain and store it
@@ -111,6 +239,7 @@ protected:
 						pb.box = bi;
 						pb.proc = p_id;
 						pb.lc_proc = nn_p.ProctoID(p_id);
+						pb.shift_id = (size_t)-1;
 
 						//
 						// Updating
@@ -122,24 +251,17 @@ protected:
 						// They all store the same information but organized in different ways
 						// read the description of each for more information
 						//
+
 						vb_ext.add(pb);
 						box_nn_processor_int_gg.add(bi);
 						proc_int_box_g.ebx.add();
 						proc_int_box_g.ebx.last() = bi;
 						proc_int_box_g.ebx.last().sub = i;
 
-						// Search for the correct id
-						size_t k = 0;
-						size_t p_idp = nn_p.ProctoID(p_id);
-						for (k = 0 ; k < nn_p.getInternalAdjSubdomain(p_idp).size() ; k++)
-						{
-							if (nn_p.getInternalAdjSubdomain(p_idp).get(k) == i)
-								break;
-						}
-						if (k == nn_p.getInternalAdjSubdomain(p_idp).size())
-							std::cerr << "Error: " << __FILE__ << ":" << __LINE__ << " sub-domain not found\n";
+						// Search where the sub-domain i is in the sent list for processor p_id
+						size_t k = link_ebx_ibx(nn_p,p_id,i);
 
-						proc_int_box_g.ebx.last().id = (k * nn_processor_subdomains_g.size() + b) * v_cl.getProcessingUnits() + p_id;
+						proc_int_box_g.ebx.last().id = ebx_ibx_form(k,b,p_id,nnpsg_pos.get(b),nn_processor_subdomains_g.size(),v_cl);
 					}
 				}
 			}
@@ -178,15 +300,17 @@ protected:
 				size_t p_id = box_nn_processor.get(i).get(j);
 
 				// get the set of sub-domains of the contiguous processor p_id
-				const openfpm::vector< ::Box<dim,T> > & nn_p_box = nn_p.getExternalAdjSubdomain(p_id).bx;
+				const openfpm::vector< ::Box<dim,T> > & nn_p_box = nn_p.getNearSubdomains(p_id);
+
+				// get the sector position for each sub-domain in the list
+				const openfpm::vector< comb<dim> > nn_p_box_pos = nn_p.getNearSubdomainsPos(p_id);
 
 				// get the local processor id
-				size_t lc_proc = nn_p.getAdjacentProcessor(p_id);
+				size_t lc_proc = nn_p.getNearProcessor(p_id);
 
 				// For each near processor sub-domains enlarge and intersect with the local sub-domain and store the result
 				for (size_t k = 0 ; k < nn_p_box.size() ; k++)
 				{
-
 					// enlarge the near-processor sub-domain
 					::Box<dim,T> n_sub = nn_p_box.get(k);
 
@@ -209,6 +333,9 @@ protected:
 						// fill the local processor id
 						b_int.lc_proc = lc_proc;
 
+						// fill the shift id
+						b_int.shift_id = nn_p_box_pos.get(k).lin();
+
 						//
 						// Updating
 						//
@@ -226,24 +353,19 @@ protected:
 						vb_int.add(b_int);
 
 						// store the box in proc_int_box storing from which sub-domain they come from
-						Box_dom<dim,T> & pr_box_int = proc_int_box.get(nn_p.ProctoID(p_id));
 						Box_sub<dim,T> sb;
 						sb = b_int.box;
 						sb.sub = i;
 
-						// Search for the correct id
-						size_t s = 0;
 						size_t p_idp = nn_p.ProctoID(p_id);
-						for (s = 0 ; s < nn_p.getInternalAdjSubdomain(p_idp).size() ; s++)
-						{
-							if (nn_p.getInternalAdjSubdomain(p_idp).get(s) == i)
-								break;
-						}
-						if (s == nn_p.getInternalAdjSubdomain(p_idp).size())
-							std::cerr << "Error: " << __FILE__ << ":" << __LINE__ << " sub-domain not found\n";
 
-						sb.id = (k * nn_p.getInternalAdjSubdomain(p_idp).size() + s) * v_cl.getProcessingUnits() + v_cl.getProcessUnitID();
+						// Search where the sub-domain i is in the sent list for processor p_id
+						size_t s = link_ebx_ibx(nn_p,p_id,i);
+
+						// calculate the id of the internal box
+						sb.id = ebx_ibx_form(k,s,v_cl.getProcessUnitID(),nn_p_box_pos.get(k),nn_p.getSentSubdomains(p_idp).size(),v_cl);
 
+						Box_dom<dim,T> & pr_box_int = proc_int_box.get(nn_p.ProctoID(p_id));
 						pr_box_int.ibx.add(sb);
 
 						// update the geo_cell list
@@ -271,6 +393,102 @@ protected:
 
 public:
 
+	//! Default constructor
+	ie_ghost() {};
+
+	//! Copy constructor
+	ie_ghost(const ie_ghost<dim,T> & ie)
+	{
+		this->operator =(ie);
+	}
+
+	//! Copy constructor
+	ie_ghost(ie_ghost<dim,T> && ie)
+	{
+		this->operator=(ie);
+	}
+
+	//! Copy operator
+	inline ie_ghost<dim,T> & operator=(ie_ghost<dim,T> && ie)
+	{
+		box_nn_processor_int.swap(ie.box_nn_processor_int);
+		proc_int_box.swap(ie.proc_int_box);
+		vb_ext.swap(ie.vb_ext);
+		vb_int.swap(ie.vb_int);
+		geo_cell.swap(ie.geo_cell);
+		shifts.swap(ie.shifts);
+		ids_p.swap(ie.ids_p);
+		ids.swap(ie.ids);
+
+		return *this;
+	}
+
+	//! Copy operator
+	inline ie_ghost<dim,T> & operator=(const ie_ghost<dim,T> & ie)
+	{
+		box_nn_processor_int = ie.box_nn_processor_int;
+		proc_int_box = ie.proc_int_box;
+		vb_ext = ie.vb_ext;
+		vb_int = ie.vb_int;
+		geo_cell = geo_cell;
+		shifts = ie.shifts;
+		ids_p = ie.ids_p;
+		ids = ie.ids;
+
+		return *this;
+	}
+
+	/*! It return the shift vector
+	 *
+	 * Consider a domain with some ghost, at the border of the domain the
+	 * ghost must be treated in a special way, depending on the periodicity
+	 * of the boundary
+	 *
+		\verbatim
+
+															[1,1]
+			+---------+------------------------+---------+
+			| (1,-1)  |                        | (1,1)   |
+			|   |     |    (1,0) --> 7         |   |     |
+			|   v     |                        |   v     |
+			|   6     |                        |   8     |
+			+--------------------------------------------+
+			|         |                        |         |
+			|         |                        |         |
+			|         |                        |         |
+			| (-1,0)  |                        | (1,0)   |
+			|    |    |                        |   |     |
+			|    v    |      (0,0) --> 4       |   v     |
+			|    3    |                        |   5     |
+			|         |                        |         |
+		 B	|         |                        |     A   |
+		*	|         |                        |    *    |
+			|         |                        |         |
+			|         |                        |         |
+			|         |                        |         |
+			+--------------------------------------------+
+			| (-1,-1) |                        | (-1,1)  |
+			|    |    |   (-1,0) --> 1         |    |    |
+			|    v    |                        |    v    |
+			|    0    |                        |    2    |
+			+---------+------------------------+---------+
+
+
+		\endverbatim
+	 *
+	 *
+	 * if a particle is bound in (1,0) linearized to 5, before communicate this particle (A in figure)
+	 * must be shifted on -1.0 on x (B in figure)
+	 *
+	 * This function return the set of shift vectors that determine such shift, for example
+	 * in the example above the shift at position 5 will be (0,-1.0)
+	 *
+	 */
+	const openfpm::vector<Point<dim,T>> & getShiftVectors()
+	{
+		return shifts;
+	}
+
 	/*! \brief Get the number of Internal ghost boxes for one processor
 	 *
 	 * \param id near processor list id (the id go from 0 to getNNProcessor())
@@ -302,7 +520,7 @@ public:
 	 */
 	inline const ::Box<dim,T> & getProcessorIGhostBox(size_t id, size_t j) const
 	{
-		return proc_int_box.get(id).ibx.get(j);
+		return proc_int_box.get(id).ibx.get(j).bx;
 	}
 
 	/*! \brief Get the j External ghost box
@@ -314,7 +532,7 @@ public:
 	 */
 	inline const ::Box<dim,T> & getProcessorEGhostBox(size_t id, size_t j) const
 	{
-		return proc_int_box.get(id).ebx.get(j);
+		return proc_int_box.get(id).ebx.get(j).bx;
 	}
 
 	/*! \brief Get the j Internal ghost box id
@@ -349,7 +567,7 @@ public:
 	 * \return sub-domain at which belong the internal ghost box
 	 *
 	 */
-	inline const size_t getProcessorIGhostSub(size_t id, size_t j) const
+	inline size_t getProcessorIGhostSub(size_t id, size_t j) const
 	{
 		return proc_int_box.get(id).ibx.get(j).sub;
 	}
@@ -361,7 +579,7 @@ public:
 	 * \return sub-domain at which belong the external ghost box
 	 *
 	 */
-	inline const size_t getProcessorEGhostSub(size_t id, size_t j) const
+	inline size_t getProcessorEGhostSub(size_t id, size_t j) const
 	{
 		return proc_int_box.get(id).ebx.get(j).sub;
 	}
@@ -451,12 +669,64 @@ public:
 		return geo_cell.getIterator(geo_cell.getCell(p));
 	}
 
-	openfpm::vector<size_t> ids;
+	/*! \brief Given a position it return if the position belong to any neighborhood processor ghost
+	 * (Internal ghost)
+	 *
+	 * if the particle come from an internal ghost from the periodicity of the domain, position must be shifted
+	 * this function return the id of the shift vector
+	 *
+	 * \see getShiftVector
+	 *
+	 * \tparam id type of id to get box_id processor_id lc_processor_id shift_id
+	 * \param p Particle position
+	 * \param opt intersection boxes of the same processor can overlap, so in general the function
+	 *        can produce more entry with the same processor, the UNIQUE option eliminate double entries
+	 *        (UNIQUE) is for particle data (MULTIPLE) is for grid data [default MULTIPLE]
+	 *
+	 * \param return the processor ids (not the rank, the id in the near processor list)
+	 *
+	 */
+	template <typename id1, typename id2> inline const openfpm::vector<std::pair<size_t,size_t>> ghost_processorID_pair(Point<dim,T> & p, const int opt = MULTIPLE)
+	{
+		ids_p.clear();
+
+		// Check with geo-cell if a particle is inside one Cell containing boxes
+
+		auto cell_it = geo_cell.getIterator(geo_cell.getCell(p));
+
+		// For each element in the cell, check if the point is inside the box
+		// if it is, store the processor id
+		while (cell_it.isNext())
+		{
+			size_t bid = cell_it.get();
+
+			if (vb_int.get(bid).box.isInside(p) == true)
+			{
+				ids_p.add(std::pair<size_t,size_t>(id1::id(vb_int.get(bid),bid),id2::id(vb_int.get(bid),bid)));
+			}
+
+			++cell_it;
+		}
+
+		// Make the id unique
+		if (opt == UNIQUE)
+		{
+			ids_p.sort();
+			ids_p.unique();
+		}
+
+		return ids_p;
+	}
 
 	/*! \brief Given a position it return if the position belong to any neighborhood processor ghost
 	 * (Internal ghost)
 	 *
-	 * \tparam id type of if to get box_id processor_id lc_processor_id
+	 * if the particle come from an internal ghost from the periodicity of the domain, position must be shifted
+	 * this function return the id of the shift vector
+	 *
+	 * \see getShiftVector
+	 *
+	 * \tparam id type of id to get box_id processor_id lc_processor_id shift_id
 	 * \param p Particle position
 	 * \param opt intersection boxes of the same processor can overlap, so in general the function
 	 *        can produce more entry with the same processor, the UNIQUE option eliminate double entries
@@ -489,11 +759,55 @@ public:
 
 		// Make the id unique
 		if (opt == UNIQUE)
-			ids.unique();
+		{
+			ids_p.sort();
+			ids_p.unique();
+		}
 
 		return ids;
 	}
 
+	/*! \brief Given a position it return if the position belong to any neighborhood processor ghost
+	 * (Internal ghost)
+	 *
+	 * \tparam id type of if to get box_id processor_id lc_processor_id
+	 * \param p Particle position
+	 *
+	 * \param return the processor ids
+	 *
+	 */
+	template<typename id1, typename id2, typename Mem> inline const openfpm::vector<std::pair<size_t,size_t>> ghost_processorID_pair(const encapc<1,Point<dim,T>,Mem> & p, const int opt = MULTIPLE)
+	{
+		ids_p.clear();
+
+		// Check with geo-cell if a particle is inside one Cell containing boxes
+
+		auto cell_it = geo_cell.getIterator(geo_cell.getCell(p));
+
+		// For each element in the cell, check if the point is inside the box
+		// if it is, store the processor id
+		while (cell_it.isNext())
+		{
+			size_t bid = cell_it.get();
+
+			if (vb_int.get(bid).box.isInside(p) == true)
+			{
+				ids_p.add(std::pair<size_t,size_t>(id1::id(vb_int.get(bid),bid),id2::id(vb_int.get(bid),bid)));
+			}
+
+			++cell_it;
+		}
+
+		// Make the id unique
+		if (opt == UNIQUE)
+		{
+			ids_p.sort();
+			ids_p.unique();
+		}
+
+		return ids_p;
+	}
+
 	/*! \brief Given a position it return if the position belong to any neighborhood processor ghost
 	 * (Internal ghost)
 	 *
@@ -527,7 +841,10 @@ public:
 
 		// Make the id unique
 		if (opt == UNIQUE)
-			ids.unique();
+		{
+			ids_p.sort();
+			ids_p.unique();
+		}
 
 		return ids;
 	}
@@ -567,6 +884,125 @@ public:
 
 		return true;
 	}
+
+	/*! \brief Check if the ie_loc_ghosts contain the same information
+	 *
+	 * \param ele Element to check
+	 *
+	 */
+	bool is_equal(ie_ghost<dim,T> & ig)
+	{
+		if (getNEGhostBox() != ig.getNEGhostBox())
+			return false;
+
+		if (getNIGhostBox() != ig.getNIGhostBox())
+			return false;
+
+		for (size_t i = 0 ; i < getNIGhostBox() ; i++)
+		{
+			if (getIGhostBox(i) != ig.getIGhostBox(i))
+				return false;
+			if (getIGhostBoxProcessor(i) != ig.getIGhostBoxProcessor(i))
+				return false;
+		}
+
+		for (size_t i = 0 ; i < proc_int_box.size() ; i++)
+		{
+			if (getProcessorNIGhost(i) != ig.getProcessorNIGhost(i))
+				return false;
+			for (size_t j = 0 ; j < getProcessorNIGhost(i) ; j++)
+			{
+				if (getProcessorIGhostBox(i,j) != ig.getProcessorIGhostBox(i,j))
+					return false;
+				if (getProcessorIGhostId(i,j) != ig.getProcessorIGhostId(i,j))
+					return false;
+				if (getProcessorIGhostSub(i,j) != ig.getProcessorIGhostSub(i,j))
+					return false;
+			}
+		}
+
+		for (size_t i = 0 ; i < getNEGhostBox() ; i++)
+		{
+			if (getEGhostBox(i) != ig.getEGhostBox(i))
+				return false;
+			if (getEGhostBoxProcessor(i) != ig.getEGhostBoxProcessor(i))
+				return false;
+		}
+
+		for (size_t i = 0 ; i < proc_int_box.size() ; i++)
+		{
+			if (getProcessorNEGhost(i) != ig.getProcessorNEGhost(i))
+				return false;
+			for (size_t j = 0 ; j < getProcessorNEGhost(i) ; j++)
+			{
+				if (getProcessorEGhostBox(i,j) != ig.getProcessorEGhostBox(i,j))
+					return false;
+				if (getProcessorEGhostId(i,j) != ig.getProcessorEGhostId(i,j))
+					return false;
+				if (getProcessorEGhostSub(i,j) != ig.getProcessorEGhostSub(i,j))
+					return false;
+			}
+		}
+
+		return true;
+	}
+
+	/*! \brief Check if the ie_loc_ghosts contain the same information with the exception of the ghost part
+	 * It is anyway required that the ghost come from the same sub-domains decomposition
+	 *
+	 * \param ele Element to check
+	 *
+	 */
+	bool is_equal_ng(ie_ghost<dim,T> & ig)
+	{
+		Box<dim,T> bt;
+
+		if (getNEGhostBox() != ig.getNEGhostBox())
+			return false;
+
+		if (getNIGhostBox() != ig.getNIGhostBox())
+			return false;
+
+		for (size_t i = 0 ; i < proc_int_box.size() ; i++)
+		{
+			if (getProcessorNIGhost(i) != ig.getProcessorNIGhost(i))
+				return false;
+			for (size_t j = 0 ; j < getProcessorNIGhost(i) ; j++)
+			{
+				if (getProcessorIGhostBox(i,j).Intersect(ig.getProcessorIGhostBox(i,j),bt) == false)
+					return false;
+				if (getProcessorIGhostId(i,j) != ig.getProcessorIGhostId(i,j))
+					return false;
+				if (getProcessorIGhostSub(i,j) != ig.getProcessorIGhostSub(i,j))
+					return false;
+			}
+			if (getIGhostBox(i).Intersect(ig.getIGhostBox(i),bt) == false)
+				return false;
+			if (getIGhostBoxProcessor(i) != ig.getIGhostBoxProcessor(i))
+				return false;
+		}
+
+		for (size_t i = 0 ; i < proc_int_box.size() ; i++)
+		{
+			if (getProcessorNEGhost(i) != ig.getProcessorNEGhost(i))
+				return false;
+			for (size_t j = 0 ; j < getProcessorNEGhost(i) ; j++)
+			{
+				if (getProcessorEGhostBox(i,j).Intersect(ig.getProcessorEGhostBox(i,j),bt) == false)
+					return false;
+				if (getProcessorEGhostId(i,j) !=  ig.getProcessorEGhostId(i,j))
+					return false;
+				if (getProcessorEGhostSub(i,j) != ig.getProcessorEGhostSub(i,j))
+					return false;
+			}
+			if (getEGhostBox(i).Intersect(ig.getEGhostBox(i),bt) == false)
+				return false;
+			if (getEGhostBoxProcessor(i) != ig.getEGhostBoxProcessor(i))
+				return false;
+		}
+
+		return true;
+	}
 };
 
 
diff --git a/src/Decomposition/ie_loc_ghost.hpp b/src/Decomposition/ie_loc_ghost.hpp
index 06f13c43b246a59f68f2c6d15a1d9e8db7f80901..3772b98871b41ce2733144220f3a70bad42ba3c1 100755
--- a/src/Decomposition/ie_loc_ghost.hpp
+++ b/src/Decomposition/ie_loc_ghost.hpp
@@ -12,7 +12,8 @@
 #include "Space/Ghost.hpp"
 #include "Space/SpaceBox.hpp"
 #include "common.hpp"
-#include "VTKWriter.hpp"
+#include "VTKWriter/VTKWriter.hpp"
+#include "nn_processor.hpp"
 
 /*! \brief structure that store and compute the internal and external local ghost box
  *
@@ -27,10 +28,8 @@ class ie_loc_ghost
 {
 	openfpm::vector<lBox_dom<dim,T>> loc_ghost_box;
 
-	// Save the ghost boundaries
-//	Ghost<dim,T> ghost;
-
-protected:
+	//! temporal added sub-domains
+	openfpm::vector<Box_loc_sub<dim,T>> sub_domains_tmp;
 
 	/*! \brief Create the external local ghost boxes
 	 *
@@ -38,10 +37,10 @@ protected:
 	 * \param local sub-domain
 	 *
 	 */
-	void create_loc_ghost_ebox(Ghost<dim,T> & ghost, openfpm::vector<SpaceBox<dim,T>> & sub_domains)
+	void create_loc_ghost_ebox(Ghost<dim,T> & ghost, openfpm::vector<SpaceBox<dim,T>> & sub_domains, openfpm::vector<Box_loc_sub<dim,T>> & sub_domains_prc)
 	{
-		// Save the ghost
-//		this->ghost = ghost;
+		comb<dim> zero;
+		zero.zero();
 
 		loc_ghost_box.resize(sub_domains.size());
 
@@ -54,30 +53,32 @@ protected:
 			sub_with_ghost.enlarge(ghost);
 
 			// intersect with the other local sub-domains
-			for (size_t j = 0 ; j < sub_domains.size() ; j++)
+			for (size_t j = 0 ; j < sub_domains_prc.size() ; j++)
 			{
-				if (i == j)
+				size_t rj = sub_domains_prc.get(j).sub;
+
+				if (rj == i && sub_domains_prc.get(j).cmb == zero)
 					continue;
 
 				::Box<dim,T> bi;
 
-				bool intersect = sub_with_ghost.Intersect(::SpaceBox<dim,T>(sub_domains.get(j)),bi);
+				bool intersect = sub_with_ghost.Intersect(sub_domains_prc.get(j).bx,bi);
 
 				if (intersect == true)
 				{
 					Box_sub<dim,T> b;
-					b.sub = j;
+					b.sub = rj;
 					b = bi;
 
 					// local external ghost box
 					loc_ghost_box.get(i).ebx.add(b);
 
 					// search this box in the internal box of the sub-domain j
-					for (size_t k = 0; k < loc_ghost_box.get(j).ibx.size() ; k++)
+					for (size_t k = 0; k < loc_ghost_box.get(rj).ibx.size() ; k++)
 					{
-						if (loc_ghost_box.get(j).ibx.get(k).sub == i)
+						if (loc_ghost_box.get(rj).ibx.get(k).sub == i && loc_ghost_box.get(rj).ibx.get(k).cmb == sub_domains_prc.get(j).cmb.operator-())
 						{
-							loc_ghost_box.get(j).ibx.get(k).k = loc_ghost_box.get(i).ebx.size()-1;
+							loc_ghost_box.get(rj).ibx.get(k).k = loc_ghost_box.get(i).ebx.size()-1;
 							break;
 						}
 					}
@@ -91,20 +92,26 @@ protected:
 	 * \param ghost margin to enlarge
 	 *
 	 */
-	void create_loc_ghost_ibox(Ghost<dim,T> & ghost, openfpm::vector<SpaceBox<dim,T>> & sub_domains)
+	void create_loc_ghost_ibox(Ghost<dim,T> & ghost, openfpm::vector<SpaceBox<dim,T>> & sub_domains, openfpm::vector<Box_loc_sub<dim,T>> & sub_domains_prc)
 	{
+		comb<dim> zero;
+		zero.zero();
+
 		loc_ghost_box.resize(sub_domains.size());
 
 		// For each sub-domain
 		for (size_t i = 0 ; i < sub_domains.size() ; i++)
 		{
 			// intersect with the others local sub-domains
-			for (size_t j = 0 ; j < sub_domains.size() ; j++)
+			for (size_t j = 0 ; j < sub_domains_prc.size() ; j++)
 			{
-				if (i == j)
+				SpaceBox<dim,T> sub_with_ghost = sub_domains_prc.get(j).bx;
+				size_t rj = sub_domains_prc.get(j).sub;
+
+				// Avoid to intersect the box with itself
+				if (rj == i && sub_domains_prc.get(j).cmb == zero)
 					continue;
 
-				SpaceBox<dim,T> sub_with_ghost = sub_domains.get(j);
 				// enlarge the sub-domain with the ghost
 				sub_with_ghost.enlarge(ghost);
 
@@ -115,9 +122,10 @@ protected:
 				if (intersect == true)
 				{
 					Box_sub_k<dim,T> b;
-					b.sub = j;
+					b.sub = rj;
 					b = bi;
 					b.k = -1;
+					b.cmb = sub_domains_prc.get(j).cmb;
 
 					loc_ghost_box.get(i).ibx.add(b);
 				}
@@ -125,8 +133,178 @@ protected:
 		}
 	}
 
+	/*! \brief In case of periodic boundary conditions we replicate the sub-domains at the border
+	 *
+	 * \param list of sub-domains
+	 * \param domain Domain box
+	 * \param boundary conditions
+	 * \param ghost ghost part
+	 *
+	 */
+	void applyBC(openfpm::vector<Box_loc_sub<dim,T>> & sub_domains, const Box<dim,T> & domain, const Ghost<dim,T> & ghost, const size_t (&bc)[dim])
+	{
+		HyperCube<dim> hyp;
+
+		// first we create boxes at the border of the domain used to detect the sub-domain
+		// that must be adjusted, each of this boxes define a shift in case of periodic boundary condition
+		for (long int i = dim-1 ; i >= 0 ; i--)
+		{
+			std::vector<comb<dim>> cmbs = hyp.getCombinations_R(i);
+
+			for (size_t j = 0 ; j < cmbs.size() ; j++)
+			{
+				if (nn_prcs<dim,T>::check_valid(cmbs[j],bc) == false)
+					continue;
+
+				Box<dim,T> bp;
+				Point<dim,T> shift;
+
+				for (size_t k = 0 ; k < dim ; k++)
+				{
+					switch (cmbs[j][k])
+					{
+					case 1:
+						bp.setLow(k,domain.getHigh(k)+ghost.getLow(k));
+						bp.setHigh(k,domain.getHigh(k));
+						shift.get(k) = -domain.getHigh(k)+domain.getLow(k);
+						break;
+					case 0:
+						bp.setLow(k,domain.getLow(k));
+						bp.setHigh(k,domain.getHigh(k));
+						shift.get(k) = 0;
+						break;
+					case -1:
+						bp.setLow(k,domain.getLow(k));
+						bp.setHigh(k,ghost.getHigh(k));
+						shift.get(k) = domain.getHigh(k)-domain.getLow(k);
+						break;
+					}
+				}
+
+				// Detect all the sub-domain involved, shift them and add to the list
+				// Detection is performed intersecting the sub-domains with the ghost
+				// parts near the domain borders
+				for (size_t k = 0 ; k < sub_domains.size() ; k++)
+				{
+					Box<dim,T> sub = sub_domains.get(k).bx;
+					Box<dim,T> b_int;
+
+					if (sub.Intersect(bp,b_int) == true)
+					{
+						sub += shift;
+						add_subdomain(Box_loc_sub<dim,T>(sub,k,cmbs[j]));
+					}
+				}
+			}
+		}
+
+		flush(sub_domains);
+	}
+
+
+
+	/*! \brief add sub-domains to a temporal list
+	 *
+	 * \param bx Box to add
+	 *
+	 */
+	inline void add_subdomain(const Box_loc_sub<dim,T> & bx)
+	{
+		sub_domains_tmp.add(bx);
+	}
+
+	/*! \brief Flush the temporal added sub-domain to the sub-domain list
+	 *
+	 *
+	 */
+	void flush(openfpm::vector<Box_loc_sub<dim,T>> & sub_domains)
+	{
+		for (size_t i = 0 ; i < sub_domains_tmp.size() ; i++)
+		{
+			sub_domains.add(sub_domains_tmp.get(i));
+		}
+
+		sub_domains_tmp.clear();
+	}
+
 public:
 
+	/*! \brief Create external and internal local ghosts
+	 *
+	 * \param ghost boundary
+	 * \param sub_domain
+	 * \param bc Boundary conditions
+	 *
+	 */
+	void create(openfpm::vector<SpaceBox<dim,T>> & sub_domains, Box<dim,T> & domain , Ghost<dim,T> & ghost , const size_t (&bc)[dim] )
+	{
+		// It will store local sub-domains + borders
+		openfpm::vector<Box_loc_sub<dim,T>> sub_domains_prc;
+
+		comb<dim> zero;
+		zero.zero();
+
+		// Copy sub_domains into sub_domains_prc
+		for (size_t i = 0 ; i < sub_domains.size() ; i++)
+		{
+			Box_loc_sub<dim,T> bls(SpaceBox<dim,T>(sub_domains.get(i)),i,zero);
+			sub_domains_prc.add(bls);
+			sub_domains_prc.last().sub = i;
+		}
+
+		applyBC(sub_domains_prc,domain,ghost,bc);
+
+		create_loc_ghost_ibox(ghost,sub_domains,sub_domains_prc);
+		create_loc_ghost_ebox(ghost,sub_domains,sub_domains_prc);
+	}
+
+	//! Default constructor
+	ie_loc_ghost()	{};
+
+	//! Constructor from another ie_loc_ghost
+	ie_loc_ghost(const ie_loc_ghost<dim,T> & ilg)
+	{
+		this->operator=(ilg);
+	};
+
+	//! Constructor from temporal ie_loc_ghost
+	ie_loc_ghost(ie_loc_ghost<dim,T> && ilg)
+	{
+		this->operator=(ilg);
+	}
+
+	/*! \brief copy the ie_loc_ghost
+	 *
+	 * \param ilg object to copy
+	 *
+	 */
+	ie_loc_ghost<dim,T> & operator=(const ie_loc_ghost<dim,T> & ilg)
+	{
+		loc_ghost_box = ilg.loc_ghost_box;
+		return *this;
+	}
+
+	/*! \brief copy the ie_loc_ghost
+	 *
+	 * \param ilg object to copy
+	 *
+	 */
+	ie_loc_ghost<dim,T> & operator=(ie_loc_ghost<dim,T> && ilg)
+	{
+		loc_ghost_box.swap(ilg.loc_ghost_box);
+		return *this;
+	}
+
+	/*! \brief Get the number of local sub-domains
+	 *
+	 * \return the number of local sub-domains
+	 *
+	 */
+	inline size_t getNLocalSub()
+	{
+		return loc_ghost_box.size();
+	}
+
 	/*! \brief Get the number of external local ghost box for each sub-domain
 	 *
 	 * \param id sub-domain id
@@ -179,7 +357,62 @@ public:
 	 */
 	inline const ::Box<dim,T> & getLocalIGhostBox(size_t i, size_t j) const
 	{
-		return loc_ghost_box.get(i).ibx.get(j);
+		return loc_ghost_box.get(i).ibx.get(j).bx;
+	}
+
+	/*! \brief Get the j internal local ghost box boundary position for the i sub-domain of the local processor
+	 *
+	 * \note For the sub-domain i intersected with the sub-domain j enlarged, the associated
+	 *       external ghost box is located in getLocalIGhostBox(j,k) with
+	 *       getLocalIGhostSub(j,k) == i
+	 *
+	 * To get k use getLocalIGhostE
+	 *
+	 * \see getLocalIGhostE
+	 *
+	 * Some of the intersection boxes has special position, because they are at the boundary, this function
+	 * return their position at the border
+	 *
+		\verbatim
+
+															[1,1]
+			+---------+------------------------+---------+
+			| (1,-1)  |                        | (1,1)   |
+			|   |     |    (1,0) --> 7         |   |     |
+			|   v     |                        |   v     |
+			|   6     |                        |   8     |
+			+--------------------------------------------+
+			|         |                        |         |
+			|         |                        |         |
+			|         |                        |         |
+			| (-1,0)  |                        | (1,0)   |
+			|    |    |                        |   |     |
+			|    v    |      (0,0) --> 4       |   v     |
+			|    3    |                        |   5     |
+			|         |                        |         |
+		 	|         |                        |         |
+			|         |                        |         |
+			|         |                        |         |
+			|         |                        |         |
+			|         |                        |         |
+			+--------------------------------------------+
+			| (-1,-1) |                        | (-1,1)  |
+			|    |    |   (-1,0) --> 1         |    |    |
+			|    v    |                        |    v    |
+			|    0    |                        |    2    |
+			+---------+------------------------+---------+
+
+
+		\endverbatim
+	 *
+	 * \param i sub-domain
+	 * \param j box
+	 * \return the box
+	 *
+	 */
+	inline const comb<dim> & getLocalIGhostPos(size_t i, size_t j) const
+	{
+		return loc_ghost_box.get(i).ibx.get(j).cmb;
 	}
 
 	/*! \brief Get the j external local ghost box for the local processor
@@ -191,7 +424,7 @@ public:
 	 */
 	inline const ::Box<dim,T> & getLocalEGhostBox(size_t i, size_t j) const
 	{
-		return loc_ghost_box.get(i).ebx.get(j);
+		return loc_ghost_box.get(i).ebx.get(j).bx;
 	}
 
 	/*! \brief Considering that sub-domain has N internal local ghost box identified
@@ -239,19 +472,39 @@ public:
 	 */
 	bool write(std::string output, size_t p_id) const
 	{
-		//! local_internal_ghost_X.vtk internal local ghost boxes for the local processor (X)
-		VTKWriter<openfpm::vector_std<Box_sub_k<dim,T>>,VECTOR_BOX> vtk_box5;
+		// Copy the Box_sub_k into a vector of boxes
+		openfpm::vector<openfpm::vector<Box<dim,T>>> vv5;
+
 		for (size_t p = 0 ; p < loc_ghost_box.size() ; p++)
 		{
-			vtk_box5.add(loc_ghost_box.get(p).ibx);
+			vv5.add();
+			for (size_t i = 0 ; i < loc_ghost_box.get(p).ibx.size() ; i++)
+				vv5.last().add(loc_ghost_box.get(p).ibx.get(i).bx);
+		}
+
+		//! local_internal_ghost_X.vtk internal local ghost boxes for the local processor (X)
+		VTKWriter<openfpm::vector<Box<dim,T>>,VECTOR_BOX> vtk_box5;
+		for (size_t p = 0 ; p < vv5.size() ; p++)
+		{
+			vtk_box5.add(vv5.get(p));
 		}
 		vtk_box5.write(output + std::string("local_internal_ghost_") + std::to_string(p_id) + std::string(".vtk"));
 
-		//! local_external_ghost_X.vtk external local ghost boxes for the local processor (X)
-		VTKWriter<openfpm::vector_std<Box_sub<dim,T>>,VECTOR_BOX> vtk_box6;
+		// Copy the Box_sub_k into a vector of boxes
+		openfpm::vector<openfpm::vector<Box<dim,T>>> vv6;
+
 		for (size_t p = 0 ; p < loc_ghost_box.size() ; p++)
 		{
-			vtk_box6.add(loc_ghost_box.get(p).ebx);
+			vv6.add();
+			for (size_t i = 0 ; i < loc_ghost_box.get(p).ebx.size() ; i++)
+				vv6.last().add(loc_ghost_box.get(p).ebx.get(i).bx);
+		}
+
+		//! local_external_ghost_X.vtk external local ghost boxes for the local processor (X)
+		VTKWriter<openfpm::vector<Box<dim,T>>,VECTOR_BOX> vtk_box6;
+		for (size_t p = 0 ; p < vv6.size() ; p++)
+		{
+			vtk_box6.add(vv6.get(p));
 		}
 		vtk_box6.write(output + std::string("local_external_ghost_") + std::to_string(p_id) + std::string(".vtk"));
 
@@ -273,7 +526,10 @@ public:
 			for (size_t j = 0 ; j < loc_ghost_box.get(i).ibx.size() ; j++)
 			{
 				if (loc_ghost_box.get(i).ibx.get(j).k == -1)
+				{
+					std::cout << "No ibx link" << "\n";
 					return false;
+				}
 			}
 		}
 
@@ -282,10 +538,103 @@ public:
 			for (size_t i = 0 ; i < loc_ghost_box.size() ; i++)
 			{
 				if (loc_ghost_box.get(i).ibx.size() == 0)
+				{
+					std::cout << "Zero ibx" << "\n";
 					return false;
+				}
 				if (loc_ghost_box.get(i).ebx.size() == 0)
+				{
+					std::cout << "Zero ebx" << "\n";
+					return false;
+				}
+			}
+		}
+
+		return true;
+	}
+
+	/*! \brief Check if the ie_loc_ghosts contain the same information
+	 *
+	 * \param ele Element to check
+	 *
+	 */
+	bool is_equal(ie_loc_ghost<dim,T> & ilg)
+	{
+		if (ilg.loc_ghost_box.size() != loc_ghost_box.size())
+			return false;
+
+		// Explore all the subdomains
+		for (size_t i = 0 ; i < loc_ghost_box.size() ; i++)
+		{
+			if (getLocalNIGhost(i) != ilg.getLocalNIGhost(i))
+				return false;
+
+			if (getLocalNEGhost(i) != ilg.getLocalNEGhost(i))
+				return false;
+
+			for (size_t j = 0 ; j < getLocalNIGhost(i) ; j++)
+			{
+				if (getLocalIGhostE(i,j) != ilg.getLocalIGhostE(i,j))
+					return false;
+				if (getLocalIGhostBox(i,j) != ilg.getLocalIGhostBox(i,j))
+					return false;
+				if (getLocalIGhostSub(i,j) != ilg.getLocalIGhostSub(i,j))
 					return false;
 			}
+			for (size_t j = 0 ; j < getLocalNEGhost(i) ; j++)
+			{
+				if (getLocalEGhostBox(i,j) != ilg.getLocalEGhostBox(i,j))
+					return false;
+				if (getLocalEGhostSub(i,j) != ilg.getLocalEGhostSub(i,j))
+					return false;
+			}
+
+		}
+
+		return true;
+	}
+
+
+
+	/*! \brief Check if the ie_loc_ghosts contain the same information
+	 * with the exception of the ghost part
+	 *
+	 * \param ele Element to check
+	 *
+	 */
+	bool is_equal_ng(ie_loc_ghost<dim,T> & ilg)
+	{
+		Box<dim,T> bt;
+
+		if (ilg.loc_ghost_box.size() != loc_ghost_box.size())
+			return false;
+
+		// Explore all the subdomains
+		for (size_t i = 0 ; i < loc_ghost_box.size() ; i++)
+		{
+			if (getLocalNIGhost(i) != ilg.getLocalNIGhost(i))
+				return false;
+
+			if (getLocalNEGhost(i) != ilg.getLocalNEGhost(i))
+				return false;
+
+			for (size_t j = 0 ; j < getLocalNIGhost(i) ; j++)
+			{
+				if (getLocalIGhostE(i,j) != ilg.getLocalIGhostE(i,j))
+					return false;
+				if (getLocalIGhostBox(i,j).Intersect(ilg.getLocalIGhostBox(i,j),bt) == false)
+					return false;
+				if (getLocalIGhostSub(i,j) != ilg.getLocalIGhostSub(i,j))
+					return false;
+			}
+			for (size_t j = 0 ; j < getLocalNEGhost(i) ; j++)
+			{
+				if (getLocalEGhostBox(i,j).Intersect(ilg.getLocalEGhostBox(i,j),bt) == false)
+					return false;
+				if (getLocalEGhostSub(i,j) != ilg.getLocalEGhostSub(i,j))
+					return false;
+			}
+
 		}
 
 		return true;
diff --git a/src/Decomposition/nn_processor.hpp b/src/Decomposition/nn_processor.hpp
index 73ccf5f73c4e80014214f12fcd69aba5a0740275..ddda06c463c591a17ea5159a298b5474b7d6bbc6 100755
--- a/src/Decomposition/nn_processor.hpp
+++ b/src/Decomposition/nn_processor.hpp
@@ -9,6 +9,7 @@
 #define SRC_DECOMPOSITION_NN_PROCESSOR_HPP_
 
 #include "common.hpp"
+#include <unordered_map>
 
 /*! \brief This class store the adjacent processors and the adjacent sub_domains
  *
@@ -27,18 +28,26 @@ class nn_prcs
 	//! List of adjacent processors
 	openfpm::vector<size_t> nn_processors;
 
-	// for each near-processor store the sub-domain of the near processor
+	// for each near processor store the sub-domains of the near processors
 	std::unordered_map<size_t, N_box<dim,T>> nn_processor_subdomains;
 
-	// for each processor store the set of the sub-domains sent to the adjacent processors
+	// when we add new boxes, are added here
+	std::unordered_map<size_t, N_box<dim,T>> nn_processor_subdomains_tmp;
+
+	// contain the same information as the member boxes with the difference that
+	// instead of the Box itself, it contain the sub-domain id in the list of the
+	// local sub-domains
 	openfpm::vector<openfpm::vector<size_t>> proc_adj_box;
 
-	//! contain the internal adjacent sub-domains sent to the other processors
+	//! contain the set of sub-domains sent to the other processors
 	openfpm::vector< openfpm::vector< ::SpaceBox<dim,T>> > boxes;
 
 	// Receive counter
 	size_t recv_cnt;
 
+	//! applyBC function is suppose to be called only one time
+	bool aBC;
+
 	/*! \brief Message allocation
 	 *
 	 * \param message size required to receive from i
@@ -57,17 +66,211 @@ class nn_prcs
 		// cast the pointer
 		nn_prcs<dim,T> * cd = static_cast< nn_prcs<dim,T> *>(ptr);
 
-		// Resize the memory
 		cd->nn_processor_subdomains[i].bx.resize(msg_i / sizeof(::Box<dim,T>) );
 
 		// Return the receive pointer
 		return cd->nn_processor_subdomains[i].bx.getPointer();
 	}
 
+	/*! \brief add sub-domains to processor for a near processor i
+	 *
+	 * \param i near processor
+	 * \param bx Box to add
+	 * \param c from which sector the sub-domain come from
+	 *
+	 */
+	inline void add_nn_subdomain(size_t i, const Box<dim,T> & bx, const comb<dim> & c)
+	{
+		N_box<dim,T> & nnpst = nn_processor_subdomains_tmp[i];
+		nnpst.bx.add(bx);
+		nnpst.pos.add(c);
+	}
+
+	/*! \brief In case of periodic boundary conditions we replicate the sub-domains at the border
+	 *
+	 * \param domain Domain box
+	 * \param boundary boundary conditions
+	 * \param ghost ghost part
+	 *
+	 */
+	void add_box_periodic(const Box<dim,T> & domain, const Ghost<dim,T> & ghost, const size_t (&bc)[dim])
+	{
+		HyperCube<dim> hyp;
+
+		// first we create boxes at the border of the domain used to detect the sub-domain
+		// that must be adjusted, each of this boxes define a shift in case of periodic boundary condition
+		for (long int i = dim-1 ; i >= 0 ; i--)
+		{
+			std::vector<comb<dim>> cmbs = hyp.getCombinations_R(i);
+
+			for (size_t j = 0 ; j < cmbs.size() ; j++)
+			{
+				if (check_valid(cmbs[j],bc) == false)
+					continue;
+
+				// Calculate the sector box
+				Box<dim,T> bp;
+				Point<dim,T> shift;
+
+				for (size_t k = 0 ; k < dim ; k++)
+				{
+					switch (cmbs[j][k])
+					{
+					case 1:
+						bp.setLow(k,domain.getHigh(k)+ghost.getLow(k));
+						bp.setHigh(k,domain.getHigh(k));
+						shift.get(k) = -domain.getHigh(k)+domain.getLow(k);
+						break;
+					case 0:
+						bp.setLow(k,domain.getLow(k));
+						bp.setHigh(k,domain.getHigh(k));
+						shift.get(k) = 0;
+						break;
+					case -1:
+						bp.setLow(k,domain.getLow(k));
+						bp.setHigh(k,ghost.getHigh(k));
+						shift.get(k) = domain.getHigh(k)-domain.getLow(k);
+						break;
+					}
+				}
+
+				// Detect all the sub-domain involved, shift them and add to the list
+				// Detection is performed intersecting the sub-domains with the ghost
+				// parts near the domain borders
+				for (size_t k = 0 ; k < getNNProcessors() ; k++)
+				{
+					// sub-domains of the near processor
+					const openfpm::vector< ::Box<dim,T> > & nn_sub = getNearSubdomains(IDtoProc(k));
+
+					for (size_t l = 0 ; l < nn_sub.size(); l++)
+					{
+						Box<dim,T> sub = nn_sub.get(l);
+						Box<dim,T> b_int;
+
+						if (sub.Intersect(bp,b_int) == true)
+						{
+							sub += shift;
+							add_nn_subdomain(IDtoProc(k),sub,cmbs[j]);
+						}
+					}
+				}
+			}
+		}
+
+		flush();
+	}
+
+	/*! \brief Flush the temporal added sub-domain to the processor sub-domain
+	 *
+	 *
+	 */
+	void flush()
+	{
+		for ( auto it = nn_processor_subdomains_tmp.begin(); it != nn_processor_subdomains_tmp.end(); ++it )
+		{
+			const N_box<dim,T> & nnp_bx = it->second;
+
+			for (size_t i = 0 ; i < nnp_bx.bx.size() ; i++)
+			{
+				N_box<dim,T> & nnps = nn_processor_subdomains[it->first];
+				const N_box<dim,T> & nnps_tmp = nn_processor_subdomains_tmp[it->first];
+
+				nnps.bx.add(nnps_tmp.bx.get(i));
+				nnps.pos.add(nnps_tmp.pos.get(i));
+			}
+		}
+
+		nn_processor_subdomains_tmp.clear();
+	}
+
 public:
 
 	nn_prcs(Vcluster & v_cl)
-	:v_cl(v_cl){}
+	:v_cl(v_cl),recv_cnt(0),aBC(false)
+	{}
+
+	//! Constructor from another nn_prcs
+	nn_prcs(const nn_prcs<dim,T> & ilg)
+	:v_cl(ilg.v_cl),recv_cnt(0),aBC(false)
+	{
+		this->operator=(ilg);
+	};
+
+	//! Constructor from temporal ie_loc_ghost
+	nn_prcs(nn_prcs<dim,T> && ilg)
+	:v_cl(ilg.v_cl),recv_cnt(0),aBC(false)
+	{
+		this->operator=(ilg);
+	}
+
+	/*! Check that the compination is valid
+	 *
+	 * \param cmb combination
+	 * \param bc boundary conditions
+	 *
+	 */
+	static bool inline check_valid(comb<dim> cmb,const size_t (& bc)[dim])
+	{
+		// the combination 0 is not valid
+		if (cmb.n_zero() == dim)
+			return false;
+
+		for (size_t i = 0 ; i < dim ; i++)
+		{
+			if (bc[i] == NON_PERIODIC && cmb.getComb()[i] != 0)
+				return false;
+		}
+		return true;
+	}
+
+	/*! \brief Copy the object
+	 *
+	 * \param nnp object to copy
+	 *
+	 */
+	nn_prcs<dim,T> & operator=(const nn_prcs<dim,T> & nnp)
+	{
+		v_cl = nnp.v_cl;
+		nn_processors = nnp.nn_processors;
+		nn_processor_subdomains = nnp.nn_processor_subdomains;
+		proc_adj_box = nnp.proc_adj_box;
+		boxes = nnp.boxes;
+
+		return *this;
+	}
+
+	/*! \brief Copy the object
+	 *
+	 * \param nnp object to copy
+	 *
+	 */
+	nn_prcs<dim,T> & operator=(nn_prcs<dim,T> && nnp)
+	{
+		v_cl = nnp.v_cl;
+		nn_processors.swap(nnp.nn_processors);
+		nn_processor_subdomains.swap(nnp.nn_processor_subdomains);
+		proc_adj_box.swap(nnp.proc_adj_box);
+		boxes = nnp.boxes;
+
+		return *this;
+	}
+
+	/*! \brief Refine the ss_box to have the smallest size on each direction of the local sub-domains and adjacent (from other processor) one
+	 *
+	 * \param ss_box box that store the smallest size of the sub-domain
+	 *
+	 */
+	void refine_ss_box(Box<dim,T> & ss_box)
+	{
+		for (size_t p = 0 ; p < getNNProcessors() ; p++)
+		{
+			const openfpm::vector< ::Box<dim,T> > & list_p_box = getNearSubdomains(IDtoProc(p));
+
+			// Create the smallest box contained in all sub-domain
+			for (size_t b = 0 ; b < list_p_box.size() ; b++)
+				ss_box.contained(list_p_box.get(b));
+		}
+	}
 
 	/*! \brief Create the list of adjacent processors and the list of adjacent sub-domains
 	 *
@@ -76,8 +279,7 @@ public:
 	 */
 	void create(const openfpm::vector<openfpm::vector<long unsigned int> > & box_nn_processor, const openfpm::vector<SpaceBox<dim,T>> & sub_domains)
 	{
-		// produce the list of the contiguous processor (nn_processors) and link nn_processor_subdomains to the
-		// processor list
+		// produce the list of the adjacent processor (nn_processors) list
 		for (size_t i = 0 ;  i < box_nn_processor.size() ; i++)
 		{
 			for (size_t j = 0 ; j < box_nn_processor.get(i).size() ; j++)
@@ -86,18 +288,19 @@ public:
 			}
 		}
 
-		// make the list sorted and unique
+		// make the list of the processor sort and unique
 	    std::sort(nn_processors.begin(), nn_processors.end());
 	    auto last = std::unique(nn_processors.begin(), nn_processors.end());
 	    nn_processors.erase(last, nn_processors.end());
 
-        // produce the list of the contiguous processor (nn_processors) and link nn_processor_subdomains to the
-        // processor list (nn_processors)
+        // link nn_processor_subdomains to nn_processors
+	    // it is used to quickly convert the Processor rank to the position in the list of the
+	    // near processors
         for (size_t i = 0 ;  i < box_nn_processor.size() ; i++)
         {
                 for (size_t j = 0 ; j < box_nn_processor.get(i).size() ; j++)
                 {
-                        // processor id near to this sub-domain
+                        // processor id adjacent to this sub-domain
                         size_t proc_id = box_nn_processor.get(i).get(j);
 
                         size_t k = 0;
@@ -109,13 +312,10 @@ public:
                 }
         }
 
-
-		// create a buffer with the sub-domains of this processor, the informations ( the boxes )
-		// of the sub-domains contiguous to the processor A are sent to the processor A and
-		// the information of the contiguous sub-domains in the near processors are received
-		//
+		// create a buffer with the sub-domains that can have an intersection with
+        // the near processors
 		proc_adj_box.resize(getNNProcessors());
-		boxes.resize(nn_processors.size());
+		boxes.resize(getNNProcessors());
 
 		for (size_t b = 0 ; b < box_nn_processor.size() ; b++)
 		{
@@ -125,19 +325,33 @@ public:
 
 				// id of the processor in the processor list
 				// [value between 0 and the number of the near processors]
-				size_t id = nn_processor_subdomains[prc].id;
+				size_t id = ProctoID(prc);
 
 				boxes.get(id).add(sub_domains.get(b));
 				proc_adj_box.get(id).add(b);
 			}
 		}
 
-		// Intersect all the local sub-domains with the sub-domains of the contiguous processors
+		nn_processor_subdomains.reserve(nn_processors.size());
 
 		// Get the sub-domains of the near processors
 		v_cl.sendrecvMultipleMessagesNBX(nn_processors,boxes,nn_prcs<dim,T>::message_alloc, this ,NEED_ALL_SIZE);
 
+		// Add to all the received sub-domains the information that they live in the central sector
+		for ( auto it = nn_processor_subdomains.begin(); it != nn_processor_subdomains.end(); ++it )
+		{
+			const N_box<dim,T> & nnp_bx = it->second;
+
+			for (size_t i = 0 ; i < nnp_bx.bx.size() ; i++)
+			{
+				comb<dim> c;
+				c.zero();
+
+				N_box<dim,T> & nnps = nn_processor_subdomains[it->first];
 
+				nnps.pos.add(c);
+			}
+		}
 	}
 
 	/*! \brief Get the number of Near processors
@@ -162,73 +376,76 @@ public:
 		return nn_processors.get(id);
 	}
 
-	/*! \brief Get the sub-domain pf an adjacent processor
+	/*! \brief Get the sub-domain of a near processor
 	 *
-	 * \param p_id adjacent processor (id from 0 to getNNProcessors())
+	 * \param p_id near processor rank
 	 *
 	 * \return the sub-domains
 	 *
 	 */
-	inline const openfpm::vector< ::Box<dim,T> > & getAdjacentSubdomain(size_t p_id) const
+	inline const openfpm::vector< ::Box<dim,T> > & getNearSubdomains(size_t p_id) const
 	{
 		auto key = nn_processor_subdomains.find(p_id);
-#ifdef DEBUG
+#ifdef SE_CLASS1
 		if (key == nn_processor_subdomains.end())
 		{
 			std::cerr << "Error " << __FILE__ << ":" << __LINE__ << " error this process rank is not adjacent to the local processor";
 		}
 #endif
+
 		return key->second.bx;
 	}
 
-	/*! \brief Get the adjacent processor id
+	/*! \brief Get the sub-domain of a near processor
 	 *
-	 * \param p_id adjacent processor (id from 0 to getNNProcessors())
+	 * \param p_id near processor rank
 	 *
-	 * \return the processor rank
+	 * \return the sub-domains
 	 *
 	 */
-	inline size_t getAdjacentProcessor(size_t p_id) const
+	inline const openfpm::vector< comb<dim> > & getNearSubdomainsPos(size_t p_id) const
 	{
 		auto key = nn_processor_subdomains.find(p_id);
-#ifdef DEBUG
+#ifdef SE_CLASS1
 		if (key == nn_processor_subdomains.end())
 		{
 			std::cerr << "Error " << __FILE__ << ":" << __LINE__ << " error this process rank is not adjacent to the local processor";
 		}
 #endif
-		return key->second.id;
+		return key->second.pos;
 	}
 
-
-	/*! \brief Get the local sub-domains adjacent to a processor p_id
+	/*! \brief Get the adjacent processor id
 	 *
-	 * \param p_id adjacent processor (id from 0 to getNNProcessors())
+	 * \param p_id adjacent processor rank
 	 *
-	 * \return the sub-domains
-	 *
-	 */
-	inline const openfpm::vector<size_t> & getInternalAdjSubdomain(size_t p_id) const
-	{
-		return proc_adj_box.get(p_id);
-	}
-
-	/*! \brief Get the external sub-domain adjacent to a processor p_id
-	 *
-	 * \param p_id processor rank
-	 * \return the set of adjacent sub-domain comming from the processor p_id
+	 * \return the processor rank
 	 *
 	 */
-	inline const N_box<dim,T> getExternalAdjSubdomain(size_t p_id) const
+	inline size_t getNearProcessor(size_t p_id) const
 	{
 		auto key = nn_processor_subdomains.find(p_id);
-#ifdef DEBUG
+#ifdef SE_CLASS1
 		if (key == nn_processor_subdomains.end())
 		{
 			std::cerr << "Error " << __FILE__ << ":" << __LINE__ << " error this process rank is not adjacent to the local processor";
 		}
 #endif
-		return key->second;
+		return key->second.id;
+	}
+
+
+	/*! \brief For each near processor it give a vector with the id
+	 *         of the local sub-domain sent to that processor
+	 *
+	 * \param p_id adjacent processor (id from 0 to getNNProcessors())
+	 *
+	 * \return a vector of sub-domains id
+	 *
+	 */
+	inline const openfpm::vector<size_t> & getSentSubdomains(size_t p_id) const
+	{
+		return proc_adj_box.get(p_id);
 	}
 
 	/*! \brief Convert the processor rank to the id in the list
@@ -241,7 +458,7 @@ public:
 	inline size_t ProctoID(size_t p) const
 	{
 		auto key = nn_processor_subdomains.find(p);
-#ifdef DEBUG
+#ifdef SE_CLASS1
 		if (key == nn_processor_subdomains.end())
 		{
 			std::cerr << "Error " << __FILE__ << ":" << __LINE__ << " error this process rank is not adjacent to the local processor";
@@ -279,6 +496,60 @@ public:
 		return true;
 	}
 
+	/*! \brief Apply boundary conditions
+	 *
+	 * \param domain The simulation domain
+	 * \param ghost ghost part
+	 * \param bc Boundary conditions
+	 *
+	 */
+	void applyBC(const Box<dim,T> & domain, const Ghost<dim,T> & ghost, const size_t (&bc)[dim])
+	{
+		if (aBC == true)
+		{
+			std::cerr << "Warning " << __FILE__ << ":" << __LINE__ << " apply BC is suppose to be called only one time\n";
+			return;
+		}
+
+		aBC=true;
+
+		return add_box_periodic(domain,ghost,bc);
+	}
+
+	/*! \brief Check if the nn_prcs contain the same information
+	 *
+	 * \param ele Element to check
+	 *
+	 */
+	bool is_equal(nn_prcs<dim,T> & np)
+	{
+		if (np.getNNProcessors() != getNNProcessors())
+			return false;
+
+		for (size_t p = 0 ; p < getNNProcessors() ; p++)
+		{
+			if (getNearSubdomains(IDtoProc(p)) != np.getNearSubdomains(IDtoProc(p)))
+				return false;
+			if (getNearProcessor(IDtoProc(p)) != np.getNearProcessor(IDtoProc(p)))
+				return false;
+			if (getSentSubdomains(p) != np.getSentSubdomains(p))
+				return false;
+		}
+
+		return true;
+	}
+
+	//! Used for testing porpose do not use
+	std::unordered_map<size_t, N_box<dim,T>> & get_nn_processor_subdomains()
+	{
+		return nn_processor_subdomains;
+	}
+
+	//! Used for testing porpose do not use
+	openfpm::vector<size_t> & get_nn_processors()
+	{
+		return nn_processors;
+	}
 };
 
 
diff --git a/src/Decomposition/nn_processor_unit_test.hpp b/src/Decomposition/nn_processor_unit_test.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..bae4403cb73d19f20a1fa86507f2b68567eba0f7
--- /dev/null
+++ b/src/Decomposition/nn_processor_unit_test.hpp
@@ -0,0 +1,228 @@
+/*
+ * nn_processor_unit_test.hpp
+ *
+ *  Created on: Dec 16, 2015
+ *      Author: i-bird
+ */
+
+#ifndef SRC_DECOMPOSITION_NN_PROCESSOR_UNIT_TEST_HPP_
+#define SRC_DECOMPOSITION_NN_PROCESSOR_UNIT_TEST_HPP_
+
+#include "VCluster.hpp"
+
+
+BOOST_AUTO_TEST_SUITE( nn_processor_test )
+
+BOOST_AUTO_TEST_CASE( nn_processor_np_test)
+{
+	constexpr unsigned int dim = 2;
+	typedef float T;
+
+	// Adjacent processor for each sub-domain
+	openfpm::vector<openfpm::vector<long unsigned int> > box_nn_processor;
+
+	// Vcluster
+	Vcluster & v_cl = *global_v_cluster;
+
+	const size_t bc[dim] = {NON_PERIODIC,NON_PERIODIC};
+
+	SpaceBox<dim,float> domain({0.0,0.0},{1.0,1.0});
+
+	size_t sz[dim] = {8,8};
+	//! Structure that store the cartesian grid information
+	grid_sm<dim,void> gr(sz);
+
+	CellDecomposer_sm<dim,T> cd;
+	cd.setDimensions(domain,sz,0);
+
+	//! Box Spacing
+	T spacing[dim];
+
+	// Calculate the total number of box and and the spacing
+	// on each direction
+	// Get the box containing the domain
+	SpaceBox<2,T> bs = domain.getBox();
+
+	//! the set of all local sub-domain as vector
+	openfpm::vector<SpaceBox<dim,T>> sub_domains;
+
+	/////////// From Cart decomposition ///////////
+
+	for (unsigned int i = 0; i < dim ; i++)
+	{
+		// Calculate the spacing
+		spacing[i] = (bs.getHigh(i) - bs.getLow(i)) / gr.size(i);
+	}
+
+	// Here we use METIS
+	// Create a cartesian grid graph
+	CartesianGraphFactory<dim,Graph_CSR<nm_part_v,nm_part_e>> g_factory_part;
+
+	// the graph has only non perdiodic boundary conditions
+	size_t bc_o[dim];
+	for (size_t i = 0 ; i < dim ; i++)
+		bc_o[i] = NON_PERIODIC;
+
+	// sub-sub-domain graph
+	Graph_CSR<nm_part_v,nm_part_e> gp = g_factory_part.template construct<NO_EDGE,T,2-1>(gr.getSize(),domain,bc_o);
+
+	// Get the number of processing units
+	size_t Np = v_cl.getProcessingUnits();
+
+	// Get the processor id
+	long int p_id = v_cl.getProcessUnitID();
+
+	// Convert the graph to metis
+	Metis<Graph_CSR<nm_part_v,nm_part_e>> met(gp,Np);
+
+	// decompose
+	met.decompose<nm_part_v::id>();
+
+	// Optimize the decomposition creating bigger spaces
+	// And reducing Ghost over-stress
+	dec_optimizer<2,Graph_CSR<nm_part_v,nm_part_e>> d_o(gp,gr.getSize());
+
+	// set of Boxes produced by the decomposition optimizer
+	openfpm::vector<::Box<2,size_t>> loc_box;
+
+	// optimize the decomposition
+	d_o.template optimize<nm_part_v::sub_id,nm_part_v::id>(gp,p_id,loc_box,box_nn_processor,bc);
+
+	// Initialize ss_box and bbox
+	if (loc_box.size() >= 0)
+	{
+		SpaceBox<dim,size_t> sub_dc = loc_box.get(0);
+		SpaceBox<dim,T> sub_d(sub_dc);
+		sub_d.mul(spacing);
+		sub_d.expand(spacing);
+
+		// Fixing sub-domains to cover all the domain
+
+		// Fixing sub_d
+		// if (loc_box) is a the boundary we have to ensure that the box span the full
+		// domain (avoiding rounding off error)
+		for (size_t i = 0 ; i < dim ; i++)
+		{
+			if (sub_dc.getHigh(i) == cd.getGrid().size(i) - 1)
+			{
+				sub_d.setHigh(i,domain.getHigh(i));
+			}
+		}
+
+		// add the sub-domain
+		sub_domains.add(sub_d);
+	}
+
+	// convert into sub-domain
+	for (size_t s = 1 ; s < loc_box.size() ; s++)
+	{
+		SpaceBox<dim,size_t> sub_dc = loc_box.get(s);
+		SpaceBox<dim,T> sub_d(sub_dc);
+
+		// re-scale and add spacing (the end is the starting point of the next domain + spacing)
+		sub_d.mul(spacing);
+		sub_d.expand(spacing);
+
+		// Fixing sub-domains to cover all the domain
+
+		// Fixing sub_d
+		// if (loc_box) is a the boundary we have to ensure that the box span the full
+		// domain (avoiding rounding off error)
+		for (size_t i = 0 ; i < 2 ; i++)
+		{
+			if (sub_dc.getHigh(i) == cd.getGrid().size(i) - 1)
+			{
+				sub_d.setHigh(i,domain.getHigh(i));
+			}
+		}
+
+		// add the sub-domain
+		sub_domains.add(sub_d);
+
+	}
+
+	nn_prcs<dim,T> nnp(v_cl);
+	nnp.create(box_nn_processor, sub_domains);
+
+	if (v_cl.getProcessingUnits() == 1)
+	{
+		BOOST_REQUIRE(nnp.getNNProcessors() == 0);
+	}
+	else if (v_cl.getProcessingUnits() == 2)
+	{
+		BOOST_REQUIRE(nnp.getNNProcessors() == 1);
+	}
+	else
+	{
+		BOOST_REQUIRE(nnp.getNNProcessors() >= 1);
+	}
+}
+
+BOOST_AUTO_TEST_CASE( nn_processor_box_periodic_test)
+{
+	constexpr unsigned int dim = 3;
+	typedef float T;
+
+	Box<dim,T> domain({0.0,0.0,0.0},{1.0,1.0,1.0});
+	Point<dim,T> middle({0.5,0.5,0.5});
+
+	const size_t bc[dim] = {PERIODIC,PERIODIC,PERIODIC};
+
+	// Vcluster
+	Vcluster & v_cl = *global_v_cluster;
+
+	Ghost<dim,T> ghost(0.01);
+
+	//////////////
+
+	nn_prcs<dim,T> nnp(v_cl);
+
+	std::unordered_map<size_t, N_box<dim,T>> & nnp_sub = nnp.get_nn_processor_subdomains();
+	openfpm::vector<size_t> & nnp_np = nnp.get_nn_processors();
+
+	// we add the boxes
+
+	size_t tot_n = 0;
+	HyperCube<dim> hyp;
+
+	for (long int i = dim-1 ; i >= 0 ; i--)
+	{
+		std::vector<comb<dim>> cmbs = hyp.getCombinations_R(i);
+
+		for (size_t j = 0 ; j < cmbs.size() ; j++)
+		{
+			// Create a fake processor number
+			size_t prc = i;
+
+			Point<dim,T> p1 = (middle * toPoint<dim,T>::convert(cmbs[j]) + middle)* 1.0/1.1;
+			Point<dim,T> p2 = p1 + Point<dim,T>({0.1,0.1,0.1}) * 1.0/1.1;
+
+			Box<dim,T> bx(p1,p2);
+			nnp_sub[prc+1].id = prc;
+			nnp_sub[prc+1].bx.add(bx);
+
+			tot_n++;
+		}
+	}
+
+	for (size_t i = 0; i < dim; i++)
+	{
+		nnp_np.add(i+1);
+	}
+
+	// check that nn_processor contain the correct boxes
+
+//	nnp.write("nnp_output_before");
+
+	nnp.applyBC(domain,ghost,bc);
+
+//	nnp.write("nnp_output_after");
+
+	BOOST_REQUIRE_EQUAL(nnp.getNearSubdomains(nnp.IDtoProc(2)).size(),12ul);
+	BOOST_REQUIRE_EQUAL(nnp.getNearSubdomains(nnp.IDtoProc(0)).size(),8ul*8ul);
+	BOOST_REQUIRE_EQUAL(nnp.getNearSubdomains(nnp.IDtoProc(1)).size(),12ul*4ul);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+
+#endif /* SRC_DECOMPOSITION_NN_PROCESSOR_UNIT_TEST_HPP_ */
diff --git a/src/Graph/CartesianGraphFactory.hpp b/src/Graph/CartesianGraphFactory.hpp
index 2dc6539db237498cc1cd703db3904457d68b33db..358f5e03077005e35e254eb1685f743227fef862 100755
--- a/src/Graph/CartesianGraphFactory.hpp
+++ b/src/Graph/CartesianGraphFactory.hpp
@@ -14,6 +14,8 @@
 #include "Space/Shape/Box.hpp"
 #include "Space/Shape/HyperCube.hpp"
 
+#define NO_VERTEX_ID -1
+
 /*! \brief Operator to fill the property 'prp' with the linearization of indexes
  *
  *  \tparam dim Dimension of the space
@@ -34,7 +36,7 @@ struct fill_id
  *  \tparam G_v Graph
  */
 template<unsigned int dim, typename G_v>
-struct fill_id<dim, G_v, -1>
+struct fill_id<dim, G_v, NO_VERTEX_ID>
 {
 	static inline void fill(G_v & g_v, const grid_key_dx<dim> & gk, const grid_sm<dim, void> & gs)
 	{
@@ -203,7 +205,7 @@ public:
 	{
 		typedef typename boost::fusion::result_of::at<v, boost::mpl::int_<0>>::type t_val;
 
-		g_v.template get<t_val::value>()[T::value] = gk.get(T::value) * szd[T::value];
+		g_v.template get<t_val::value>()[T::value] = gk.get(T::value) * static_cast<float>(szd[T::value]);
 		fill_id<dim, G_v, lin_id>::fill(g_v, gk, gs);
 	}
 };
@@ -259,7 +261,7 @@ class Graph_constructor_impl
 {
 public:
 	//! Construct cartesian graph
-	static Graph construct(const size_t (&sz)[dim], Box<dim, T> dom)
+	static Graph construct(const size_t (& sz)[dim], Box<dim,T> dom, const size_t(& bc)[dim])
 	{
 		// Calculate the size of the hyper-cubes on each dimension
 		T szd[dim];
@@ -316,7 +318,7 @@ public:
 
 			// Get the combinations of dimension d
 
-			for (size_t d = dim - 1; d >= dim_c; d--)
+			for (long int d = dim-1 ; d >= dim_c ; d--)
 			{
 				// create the edges for that dimension
 
@@ -332,16 +334,16 @@ public:
 
 					// for each dimension multiply and reduce
 
-					for (size_t s = 0; s < dim; s++)
-					{
+
+					for (size_t s = 0 ; s < dim ; s++)
 						ele_sz += szd[s] * abs(c[j][s]);
-					}
 
 					// Calculate the end point vertex id
 					// Calculate the start point id
 
 					size_t start_v = g.LinId(key);
-					size_t end_v = g.template LinId<CheckExistence>(key, c[j].getComb());
+
+					size_t end_v = g.template LinId<CheckExistence>(key,c[j].getComb(),bc);
 
 					// Add an edge and set the the edge property to the size of the face (communication weight)
 					gp.template addEdge<CheckExistence>(start_v, end_v).template get<se>() = ele_sz;
@@ -371,7 +373,7 @@ class Graph_constructor_impl<dim, lin_id, Graph, NO_EDGE, T, dim_c, pos...>
 {
 public:
 	//! Construct cartesian graph
-	static Graph construct(const size_t (&sz)[dim], Box<dim, T> dom)
+	static Graph construct(const size_t ( & sz)[dim], Box<dim,T> dom, const size_t(& bc)[dim])
 	{
 		// Calculate the size of the hyper-cubes on each dimension
 
@@ -424,11 +426,11 @@ public:
 
 			// fill properties
 
-			boost::mpl::for_each<boost::mpl::range_c<int, 0, sizeof...(pos)> >(flp);
+			boost::mpl::for_each_ref<boost::mpl::range_c<int, 0, sizeof...(pos)> >(flp);
 
 			// Get the combinations of dimension d
 
-			for (size_t d = dim - 1; d >= dim_c; d--)
+			for (long int d = dim-1 ; d >= dim_c ; d--)
 			{
 				// create the edges for that dimension
 
@@ -438,22 +440,12 @@ public:
 
 				for (size_t j = 0; j < c.size(); j++)
 				{
-					// Calculate the element size
-
-					T ele_sz = 0;
-
-					// for each dimension multiply and reduce
-
-					for (size_t s = 0; s < dim; s++)
-					{
-						ele_sz += szd[s] * abs(c[j][s]);
-					}
-
 					// Calculate the end point vertex id
 					// Calculate the start point id
 
 					size_t start_v = g.LinId(key);
-					size_t end_v = g.template LinId<CheckExistence>(key, c[j].getComb());
+
+					size_t end_v = g.template LinId<CheckExistence>(key,c[j].getComb(),bc);
 
 					// Add an edge and set the the edge property to the size of the face (communication weight)
 					gp.template addEdge<CheckExistence>(start_v, end_v);
@@ -506,11 +498,12 @@ public:
 	 * \tparam pos... (optional)one or more integer indicating the spatial properties
 	 *
 	 */
-	template<int se, typename T, unsigned int dim_c, int ... pos>
-	static Graph construct(const size_t (&sz)[dim], Box<dim, T> dom)
+
+/*	template <int se,typename T, unsigned int dim_c, int... pos>
+	static Graph construct(const size_t (& sz)[dim], Box<dim,T> dom )
 	{
-		return Graph_constructor_impl<dim, -1, Graph, se, T, dim_c, pos...>::construct(sz, dom);
-	}
+		return Graph_constructor_impl<dim,Graph,se,T,dim_c,pos...>::construct(sz,dom,bc);
+	}*/
 
 	/*!
 	 *
@@ -525,21 +518,22 @@ public:
 	 *
 	 * \param sz Vector that store the size of the grid on each dimension
 	 * \param dom Box enclosing the physical domain
+	 * \param bc boundary conditions {PERIODIC and NON_PERIODIC}
 	 *
 	 * \tparam se Indicate which properties fill with the contact size. The
 	 *           contact size is the point, line , surface, d-dimensional object size
 	 *           in contact (in common) between two hyper-cube. NO_EDGE indicate
 	 *           no property will store this information
-	 * \tparam id_prp property 'id' that stores the vertex id
+	 * \tparam id_prp property 'id' that stores the vertex id (with -1 it skip)
 	 * \tparam T type of the domain like (int real complex ... )
 	 * \tparam dim_c Connectivity dimension
 	 * \tparam pos... (optional)one or more integer indicating the spatial properties
 	 *
 	 */
 	template<int se, int id_prp, typename T, unsigned int dim_c, int ... pos>
-	static Graph construct(const size_t (&sz)[dim], Box<dim, T> dom)
+	static Graph construct(const size_t (&sz)[dim], Box<dim, T> dom, const size_t (& bc)[dim])
 	{
-		return Graph_constructor_impl<dim, id_prp, Graph, se, T, dim_c, pos...>::construct(sz, dom);
+		return Graph_constructor_impl<dim, id_prp, Graph, se, T, dim_c, pos...>::construct(sz, dom, bc);
 	}
 };
 
diff --git a/src/Graph/CartesianGraphFactory_unit_test.hpp b/src/Graph/CartesianGraphFactory_unit_test.hpp
index 08690f901e68a5a953f866fef741cf5ef8d8a95e..0c4df0ba127339509b353e51111f626719da2f3c 100644
--- a/src/Graph/CartesianGraphFactory_unit_test.hpp
+++ b/src/Graph/CartesianGraphFactory_unit_test.hpp
@@ -40,7 +40,7 @@ const std::string node_cp::attributes::name[] = {"communication","computation","
 
 BOOST_AUTO_TEST_SUITE( CartesianGraphFactory_test )
 
-BOOST_AUTO_TEST_CASE( CartesianGraphFactory_use)
+BOOST_AUTO_TEST_CASE( CartesianGraphFactory_use_np)
 {
 	typedef node_cp node;
 
@@ -52,7 +52,40 @@ BOOST_AUTO_TEST_CASE( CartesianGraphFactory_use)
 	// Box
 	Box<3,float> box({0.0,0.0,0.0},{1.0,1.0,1.0});
 
-	Graph_CSR<Point_test<float>,Point_test<float>> g = g_factory.construct<node::communication,float,2>(sz,box);
+	// Boundary conditions, non periodic
+	size_t bc[] = {NON_PERIODIC,NON_PERIODIC,NON_PERIODIC};
+
+	Graph_CSR<Point_test<float>,Point_test<float>> g = g_factory.construct<node::communication,NO_VERTEX_ID,float,2>(sz,box,bc);
+
+	// check that the number of vertex are equal to GS_SIZE^3
+	BOOST_REQUIRE_EQUAL(g.getNVertex(),(size_t)GS_SIZE*GS_SIZE*GS_SIZE);
+
+	// check that the number of vertex are equal to GS_SIZE^3
+	BOOST_REQUIRE_EQUAL(g.getNEdge(),(size_t)3*8+4*(GS_SIZE-2)*12+6*(GS_SIZE-2)*(GS_SIZE-2)*5+(GS_SIZE-2)*(GS_SIZE-2)*(GS_SIZE-2)*6);
+}
+
+BOOST_AUTO_TEST_CASE( CartesianGraphFactory_use_p)
+{
+	typedef node_cp node;
+
+	CartesianGraphFactory<3,Graph_CSR<Point_test<float>,Point_test<float>>> g_factory;
+
+	// Cartesian grid
+	size_t sz[3] = {GS_SIZE,GS_SIZE,GS_SIZE};
+
+	// Box
+	Box<3,float> box({0.0,0.0,0.0},{1.0,1.0,1.0});
+
+	// Boundary conditions, non periodic
+	size_t bc[] = {PERIODIC,PERIODIC,PERIODIC};
+
+	Graph_CSR<Point_test<float>,Point_test<float>> g = g_factory.construct<node::communication,NO_VERTEX_ID,float,2>(sz,box,bc);
+
+	// check that the number of vertex are equal to GS_SIZE^3
+	BOOST_REQUIRE_EQUAL(g.getNVertex(),(size_t)GS_SIZE*GS_SIZE*GS_SIZE);
+
+	// check that the number of vertex are equal to GS_SIZE^3
+	BOOST_REQUIRE_EQUAL(g.getNEdge(),(size_t)(GS_SIZE)*(GS_SIZE)*(GS_SIZE)*6);
 }
 
 BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/Graph/DistCartesianGraphFactory.hpp b/src/Graph/DistCartesianGraphFactory.hpp
index cff1fd0d608fc6bceb826d5789650606a00d7ffa..2d8040a5b2348c8a4a804cd877eaf52403e2b48e 100755
--- a/src/Graph/DistCartesianGraphFactory.hpp
+++ b/src/Graph/DistCartesianGraphFactory.hpp
@@ -209,7 +209,7 @@ public:
 	{
 		typedef typename boost::fusion::result_of::at<v, boost::mpl::int_<0>>::type t_val;
 
-		g_v.template get<t_val::value>()[T::value] = gk.get(T::value) * szd[T::value];
+		g_v.template get<t_val::value>()[T::value] = gk.get(T::value) * static_cast<float>(szd[T::value]);
 		dist_fill_id<dim, G_v, loc_id, glob_id>::fill(g_v, gk, gs);
 	}
 };
@@ -447,7 +447,7 @@ public:
 		size_t mod_v = g.size() % Np;
 		size_t div_v = g.size() / Np;
 
-		for (int i = 0; i <= Np; i++)
+		for (size_t i = 0; i <= Np; i++)
 		{
 			if (i < mod_v)
 				vtxdist.get(i) = (div_v + 1) * (i);
@@ -480,7 +480,7 @@ public:
 		{
 			size_t v_id = g.LinId(k_it.get());
 
-			if (v_id < vtxdist.get(p_id + 1) && v_id >= vtxdist.get(p_id))
+			if (v_id < (size_t)vtxdist.get(p_id + 1) && v_id >= (size_t)vtxdist.get(p_id))
 			{
 				grid_key_dx<dim> key = k_it.get();
 
diff --git a/src/Graph/DistCartesianGraphFactory_unit_test.hpp b/src/Graph/DistCartesianGraphFactory_unit_test.hpp
index a2beccafa74cead9adfac8faf2038af8c5962ebc..34c356bbc686cc22478bbbfcd18a247da1a6449a 100644
--- a/src/Graph/DistCartesianGraphFactory_unit_test.hpp
+++ b/src/Graph/DistCartesianGraphFactory_unit_test.hpp
@@ -3,11 +3,11 @@
 
 #include "Graph/DistCartesianGraphFactory.hpp"
 #include "Graph/map_graph.hpp"
-#include "Packer.hpp"
-#include "Unpacker.hpp"
+#include "Packer_Unpacker/Packer.hpp"
+#include "Packer_Unpacker/Unpacker.hpp"
 #include "SubdomainGraphNodes.hpp"
 
-#define GS_SIZE 4
+#define DGS_SIZE 4
 
 /*!
  *
@@ -89,6 +89,8 @@ BOOST_AUTO_TEST_SUITE (DistCartesianGraphFactory_test)
 
 BOOST_AUTO_TEST_CASE( DistCartesianGraphFactory_3D_use)
 {
+	// Boundary conditions, non periodic
+	size_t bc[] = {NON_PERIODIC,NON_PERIODIC,NON_PERIODIC};
 
 	// Vcluster
 	Vcluster & vcl = *global_v_cluster;
@@ -97,7 +99,7 @@ BOOST_AUTO_TEST_CASE( DistCartesianGraphFactory_3D_use)
 	init_global_v_cluster(&boost::unit_test::framework::master_test_suite().argc,&boost::unit_test::framework::master_test_suite().argv);
 
 	// Cartesian grid
-	size_t sz[3] = { GS_SIZE, GS_SIZE, GS_SIZE };
+	size_t sz[3] = { DGS_SIZE, DGS_SIZE, DGS_SIZE };
 
 	// Box
 	Box<3, float> box( { 0.0, 0.0, 0.0 }, { 1.0, 1.0, 1.0 });
@@ -106,7 +108,7 @@ BOOST_AUTO_TEST_CASE( DistCartesianGraphFactory_3D_use)
 	CartesianGraphFactory<3,Graph_CSR<Point_test<float>,Point_test<float>>> g_factory;
 
 	// Standard graph
-	Graph_CSR<Point_test<float>,Point_test<float>> g = g_factory.template construct<NO_EDGE, node::id, float, 3 - 1, 0, 1, 2>(sz, box);
+	Graph_CSR<Point_test<float>,Point_test<float>> g = g_factory.template construct<NO_EDGE, node::id, float, 3 - 1, 0, 1, 2>(sz, box, bc);
 
 	// Distribution vector
 	openfpm::vector<idx_t> vtxdist(vcl.getProcessingUnits() + 1);
@@ -132,7 +134,7 @@ BOOST_AUTO_TEST_CASE( DistCartesianGraphFactory_3D_use)
 
 	BOOST_REQUIRE_EQUAL(count, gd.getNVertex());
 
-	for(size_t i = vtxdist.get(vcl.getProcessUnitID()), local_i = 0; i < vtxdist.get(vcl.getProcessUnitID()+1); i++, local_i++)
+	for(size_t i = (size_t)vtxdist.get(vcl.getProcessUnitID()), local_i = 0; i < (size_t)vtxdist.get(vcl.getProcessUnitID()+1); i++, local_i++)
 	{
 		BOOST_REQUIRE_EQUAL(gd.vertex(local_i).template get<node::id>(), g.vertex(i).template get<node::id>());
 		BOOST_REQUIRE_EQUAL(gd.getNChilds(local_i), g.getNChilds(i));
@@ -148,6 +150,8 @@ BOOST_AUTO_TEST_CASE( DistCartesianGraphFactory_3D_use)
 
 BOOST_AUTO_TEST_CASE( DistCartesianGraphFactory_2D_use)
 {
+	// Boundary conditions, non periodic
+	size_t bc[] = {NON_PERIODIC,NON_PERIODIC};
 
 	// Vcluster
 	Vcluster & vcl = *global_v_cluster;
@@ -159,7 +163,7 @@ BOOST_AUTO_TEST_CASE( DistCartesianGraphFactory_2D_use)
 	init_global_v_cluster(&boost::unit_test::framework::master_test_suite().argc,&boost::unit_test::framework::master_test_suite().argv);
 
 	// Cartesian grid
-	size_t sz[2] = { GS_SIZE, GS_SIZE };
+	size_t sz[2] = { DGS_SIZE, DGS_SIZE };
 
 	// Box
 	Box<2, float> box( { 0.0, 0.0}, { 1.0, 1.0} );
@@ -168,7 +172,7 @@ BOOST_AUTO_TEST_CASE( DistCartesianGraphFactory_2D_use)
 	CartesianGraphFactory<2,Graph_CSR<node, node>> g_factory;
 
 	// Standard graph
-	Graph_CSR<node, node> g = g_factory.template construct<NO_EDGE, node::id, float, 2 - 1, 0, 1, 2>(sz, box);
+	Graph_CSR<node, node> g = g_factory.template construct<NO_EDGE, node::id, float, 2 - 1, 0, 1, 2>(sz, box, bc);
 
 	// Distribution vector
 	openfpm::vector<idx_t> vtxdist(vcl.getProcessingUnits() + 1);
@@ -194,7 +198,7 @@ BOOST_AUTO_TEST_CASE( DistCartesianGraphFactory_2D_use)
 
 	BOOST_REQUIRE_EQUAL(count, gd.getNVertex());
 
-	for(size_t i = vtxdist.get(vcl.getProcessUnitID()), local_i = 0; i < vtxdist.get(vcl.getProcessUnitID()+1); i++, local_i++)
+	for(size_t i = (size_t)vtxdist.get(vcl.getProcessUnitID()), local_i = 0; i < (size_t)vtxdist.get(vcl.getProcessUnitID()+1); i++, local_i++)
 	{
 		BOOST_REQUIRE_EQUAL(gd.vertex(local_i).template get<node::id>(), g.vertex(i).template get<node::id>());
 		BOOST_REQUIRE_EQUAL(gd.getNChilds(local_i), g.getNChilds(i));
@@ -246,14 +250,14 @@ BOOST_AUTO_TEST_CASE( DistCartesianGraphFactory_2D_use)
 
 		size_t size;
 		Unpacker<size_t,HeapMemory>::unpack(mem,size,ps);
-		BOOST_REQUIRE_EQUAL(size, 3);
+		BOOST_REQUIRE_EQUAL(size, 3ul);
 
 		Unpacker<node,HeapMemory>::unpack(mem,v1,ps);
 		Unpacker<node,HeapMemory>::unpack(mem,v2,ps);
 		Unpacker<node,HeapMemory>::unpack(mem,v3,ps);
-		BOOST_REQUIRE_EQUAL(v1.template get<node::id>(), 0);
-		BOOST_REQUIRE_EQUAL(v2.template get<node::id>(), 1);
-		BOOST_REQUIRE_EQUAL(v3.template get<node::id>(), 2);
+		BOOST_REQUIRE_EQUAL(v1.template get<node::id>(), 0ul);
+		BOOST_REQUIRE_EQUAL(v2.template get<node::id>(), 1ul);
+		BOOST_REQUIRE_EQUAL(v3.template get<node::id>(), 2ul);
 	}
 
 	//! [Exchange n vertices packed]
@@ -307,7 +311,7 @@ BOOST_AUTO_TEST_CASE( DistCartesianGraphFactory_2D_use)
 		size_t size;
 		Unpacker<size_t,HeapMemory>::unpack(mem,size,ps);
 
-		BOOST_REQUIRE_EQUAL(size, 3);
+		BOOST_REQUIRE_EQUAL(size, 3ul);
 
 		v.resize(size);
 
@@ -316,9 +320,9 @@ BOOST_AUTO_TEST_CASE( DistCartesianGraphFactory_2D_use)
 			Unpacker<node,HeapMemory>::unpack(mem,v_n,ps);
 			v.set(i, v_n);
 		}
-		BOOST_REQUIRE_EQUAL(v.get(0).template get<node::id>(), 0);
-		BOOST_REQUIRE_EQUAL(v.get(1).template get<node::id>(), 1);
-		BOOST_REQUIRE_EQUAL(v.get(2).template get<node::id>(), 2);
+		BOOST_REQUIRE_EQUAL(v.get(0).template get<node::id>(), 0ul);
+		BOOST_REQUIRE_EQUAL(v.get(1).template get<node::id>(), 1ul);
+		BOOST_REQUIRE_EQUAL(v.get(2).template get<node::id>(), 2ul);
 	}
 
 	if(vcl.getProcessUnitID() == 0){
@@ -331,7 +335,7 @@ BOOST_AUTO_TEST_CASE( DistCartesianGraphFactory_2D_use)
 		size_t size;
 		Unpacker<size_t,HeapMemory>::unpack(mem,size,ps);
 
-		BOOST_REQUIRE_EQUAL(size, 3);
+		BOOST_REQUIRE_EQUAL(size, 3ul);
 
 		v.resize(size);
 
@@ -341,9 +345,9 @@ BOOST_AUTO_TEST_CASE( DistCartesianGraphFactory_2D_use)
 			v.set(i, v_n);
 		}
 
-		BOOST_REQUIRE_EQUAL(v.get(0).template get<node::id>(), 8);
-		BOOST_REQUIRE_EQUAL(v.get(1).template get<node::id>(), 9);
-		BOOST_REQUIRE_EQUAL(v.get(2).template get<node::id>(), 10);
+		BOOST_REQUIRE_EQUAL(v.get(0).template get<node::id>(), 8ul);
+		BOOST_REQUIRE_EQUAL(v.get(1).template get<node::id>(), 9ul);
+		BOOST_REQUIRE_EQUAL(v.get(2).template get<node::id>(), 10ul);
 	}
 
 
diff --git a/src/Graph/dist_map_graph.hpp b/src/Graph/dist_map_graph.hpp
index ee67614a6cf448ffd0d5c0803358adbe4aa51f96..9adb97b2c898e108ef3628daf43724077935f7f3 100644
--- a/src/Graph/dist_map_graph.hpp
+++ b/src/Graph/dist_map_graph.hpp
@@ -62,8 +62,8 @@
 #include "Vector/map_vector.hpp"
 #include "Graph/map_graph.hpp"
 #include <unordered_map>
-#include "Packer.hpp"
-#include "Unpacker.hpp"
+#include "Packer_Unpacker/Packer.hpp"
+#include "Packer_Unpacker/Unpacker.hpp"
 #include "VCluster.hpp"
 
 #define NO_EDGE -1
@@ -1170,6 +1170,8 @@ public:
 	DistGraph_CSR<V, E, VertexList, EdgeList, Memory> & operator=(const DistGraph_CSR<V, E, VertexList, EdgeList, Memory> & g)
 	{
 		swap(g.duplicate());
+
+		return *this;
 	}
 
 	/*! \brief operator to access the vertex
diff --git a/src/Graph/ids.hpp b/src/Graph/ids.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..63c3dc79fd78ffaea2b101a227e7dfcab3bd99ac
--- /dev/null
+++ b/src/Graph/ids.hpp
@@ -0,0 +1,130 @@
+/*
+ * ids.hpp
+ *
+ *  Created on: Mar 1, 2016
+ *      Author: i-bird
+ */
+
+#ifndef SRC_GRAPH_IDS_HPP_
+#define SRC_GRAPH_IDS_HPP_
+
+/*! Here we define different the remapped-id
+ *
+ * rid, gid and lid are all unsigned long integer, and can be easily interchanged by mistake
+ *  encapsulating avoid that this could happen. The second is readability, from the definition
+ *  of function/structure we see immediately which id parameter accept/store
+ *
+ */
+struct rid
+{
+	idx_t id;
+
+	inline bool operator<=(const rid & r) const
+	{
+		return id <= r.id;
+	}
+
+	inline bool operator<(const rid & r) const
+	{
+		return id < r.id;
+	}
+
+	inline rid operator-(int i) const
+	{
+		struct rid tmp;
+		tmp.id = id - i;
+		return tmp;
+	}
+
+	inline rid operator-(struct rid i) const
+	{
+		struct rid tmp;
+		tmp.id = id - i.id;
+		return tmp;
+	}
+
+	inline rid operator+(int i) const
+	{
+		struct rid tmp;
+		tmp.id = id + i;
+		return tmp;
+	}
+
+	inline rid & operator+=(const rid & i)
+	{
+		id += i.id;
+		return *this;
+	}
+
+	inline rid & operator++()
+	{
+		id++;
+
+		return *this;
+	}
+
+	inline bool operator==(const rid & r) const
+	{
+		return id == r.id;
+	}
+};
+
+/*! Here we define different the remapped-id
+ *
+ * rid, gid and lid are all unsigned long integer, and can be easily interchanged by mistake
+ *  encapsulating avoid that this could happen. The second is readability, from the definition
+ *  of function/structure we see immediately which id parameter accept/store
+ *
+ */
+struct gid
+{
+	size_t id;
+};
+
+/*! Here we define different the remapped-id
+ *
+ * rid, gid and lid are all unsigned long integer, and can be easily interchanged by mistake
+ *  encapsulating avoid that this could happen. The second is readability, from the definition
+ *  of function/structure we see immediately which id parameter accept/store
+ *
+ */
+struct lid
+{
+	size_t id;
+};
+
+// define hash map for gid rid and lid
+
+namespace std
+{
+	template <>
+	struct hash<rid>
+	{
+		inline std::size_t operator()(const rid& k) const
+		{
+			return k.id;
+		}
+	};
+
+	template <>
+	struct hash<gid>
+	{
+		inline std::size_t operator()(const gid& k) const
+		{
+			return k.id;
+		}
+	};
+
+	template <>
+	struct hash<lid>
+	{
+		inline std::size_t operator()(const lid& k) const
+		{
+			return k.id;
+		}
+	};
+
+}
+
+
+#endif /* SRC_GRAPH_IDS_HPP_ */
diff --git a/src/Grid/grid_dist_id.hpp b/src/Grid/grid_dist_id.hpp
index 6c50b172645e028ce88041088ca60a02ffb894c1..14d068520153f6bd6e12adcbb5180c5b5a3695d6 100644
--- a/src/Grid/grid_dist_id.hpp
+++ b/src/Grid/grid_dist_id.hpp
@@ -7,14 +7,16 @@
 #include "VCluster.hpp"
 #include "Space/SpaceBox.hpp"
 #include "util/mathutil.hpp"
+#include "grid_dist_id_iterator_dec.hpp"
 #include "grid_dist_id_iterator.hpp"
+#include "grid_dist_id_iterator_sub.hpp"
 #include "grid_dist_key.hpp"
 #include "NN/CellList/CellDecomposer.hpp"
 #include "util/object_util.hpp"
 #include "memory/ExtPreAlloc.hpp"
-#include "VTKWriter.hpp"
-#include "Packer.hpp"
-#include "Unpacker.hpp"
+#include "VTKWriter/VTKWriter.hpp"
+#include "Packer_Unpacker/Packer.hpp"
+#include "Packer_Unpacker/Unpacker.hpp"
 
 #define SUB_UNIT_FACTOR 64
 
@@ -39,6 +41,8 @@
  * \snippet grid_dist_id_unit_test.hpp Create and access a distributed grid complex
  * ### Synchronize a distributed grid for complex structures
  * \snippet grid_dist_id_unit_test.hpp Synchronized distributed grid complex
+ * ### Usage of a grid dist iterator sub
+ * \snippet grid_dist_id_unit_test.hpp Usage of a sub_grid iterator
  * ### Construct two grid with the same decomposition
  * \snippet grid_dist_id_unit_test.hpp Construct two grid with the same decomposition
  *
@@ -53,10 +57,10 @@ class grid_dist_id
 	Ghost<dim,St> ghost;
 
 	//! Local grids
-	Vcluster_object_array<device_grid> loc_grid;
+	openfpm::vector<device_grid> loc_grid;
 
 	//! Space Decomposition
-	Decomposition & dec;
+	Decomposition dec;
 
 	//! Extension of each grid: Domain and ghost + domain
 	openfpm::vector<GBoxes<device_grid::dims>> gdb_ext;
@@ -79,6 +83,12 @@ class grid_dist_id
 	// Receiving buffer for particles ghost get
 	openfpm::vector<HeapMemory> recv_mem_gg;
 
+	// Grid informations object
+	grid_sm<dim,T> ginfo;
+
+	// Grid informations object without type
+	grid_sm<dim,void> ginfo_v;
+
 	/*! \brief Call-back to allocate buffer to receive incoming objects (external ghost boxes)
 	 *
 	 * \param msg_i message size required to receive from i
@@ -349,47 +359,99 @@ class grid_dist_id
 		}
 	}
 
-public:
+	/*! \brief Create the grids on memory
+	 *
+	 */
+	void Create()
+	{
+		Box<dim,St> g_rnd_box;
+		for (size_t i = 0 ; i < dim ; i++)	{g_rnd_box.setHigh(i,0.5); g_rnd_box.setLow(i,-0.5);}
+
+		// Get the number of local grid needed
+		size_t n_grid = dec.getNLocalHyperCube();
+
+		// create gdb
+		create_gdb_ext<dim,Decomposition>(gdb_ext,dec,cd_sm);
+
+		// create local grids for each hyper-cube
+		loc_grid.resize(n_grid);
+
+		// Size of the grid on each dimension
+		size_t l_res[dim];
+
+		// Allocate the grids
+		for (size_t i = 0 ; i < n_grid ; i++)
+		{
+
+			SpaceBox<dim,long int> sp_tg = gdb_ext.get(i).GDbox;
 
-	//! constructor
-	grid_dist_id(Decomposition & dec, const size_t (& g_sz)[dim], const Box<dim,St> & domain, const Ghost<dim,St> & ghost)
-	:domain(domain),ghost(ghost),dec(dec),v_cl(*global_v_cluster)
+			// Get the size of the local grid
+			// The boxes indicate the extension of the index the size
+			// is this extension +1
+			// for example a 1D box (interval) from 0 to 3 in one dimension have
+			// the points 0,1,2,3 = so a total of 4 points
+			for (size_t i = 0 ; i < dim ; i++)
+				l_res[i] = (sp_tg.getHigh(i) >= 0)?(sp_tg.getHigh(i)+1):0;
+
+			// Set the dimensions of the local grid
+			loc_grid.get(i).resize(l_res);
+		}
+	}
+
+	/*! \brief Default Copy constructor on this class make no sense and is unsafe, this definition disable it
+	 *
+	 */
+	grid_dist_id(const grid_dist_id<dim,St,T,Decomposition,Memory,device_grid> & g)
 	{
-		// Increment the reference counter of the decomposition
-		dec.incRef();
+#ifdef SE_CLASS2
+		check_new(this,8,GRID_DIST_EVENT,4);
+#endif
+	}
 
-		check_size(g_sz);
+	void write_ie_boxes(std::string output)
+	{
+		// Write internal ghost box
+		VTKWriter<openfpm::vector<::Box<dim,size_t>>,VECTOR_BOX> vtk_box1;
 
-		// For a 5x5 grid you have 4x4 Cell
-		size_t c_g[dim];
-		for (size_t i = 0 ; i < dim ; i++)	{c_g[i] = g_sz[i]-1;}
+		openfpm::vector< openfpm::vector< ::Box<dim,size_t> > > boxes;
 
-		// Initialize the cell decomposer
-		cd_sm.setDimensions(domain,c_g,0);
+		//! Carefully we have to ensure that boxes does not reallocate inside the for loop
+		boxes.reserve(ig_box.size());
 
-		// fill the global size of the grid
-		for (size_t i = 0 ; i < dim ; i++)	{this->g_sz[i] = g_sz[i];}
+		//! Write internal ghost in grid units (Color encoded)
+		for (size_t p = 0 ; p < ig_box.size() ; p++)
+		{
+			boxes.add();
 
-		// Create local grid
-		Create();
+			// Create a vector of boxes
+			for (size_t j = 0 ; j < ig_box.get(p).bid.size() ; j++)
+			{
+				boxes.last().add(ig_box.get(p).bid.get(j).box);
+			}
 
-		// Calculate ghost boxes
-		dec.calculateGhostBoxes();
+			vtk_box1.add(boxes.last());
+		}
+		vtk_box1.write(output + std::string("internal_ghost_") + std::to_string(v_cl.getProcessUnitID()) + std::string(".vtk"));
 	}
 
-	/*! \brief Constrcuctor
+    /*! \brief Initialize the Cell decomposer of the grid enforcing perfect overlap of the cells
 	 *
-	 * \param g_sz array with the grid size on each dimension
-	 * \param domain domain where this grid live
-	 * \param g Ghost
+	 * \param cd_old the CellDecomposer we are trying to mach
+	 * \param ext extension of the domain
 	 *
 	 */
-	grid_dist_id(const size_t (& g_sz)[dim],const Box<dim,St> & domain, const Ghost<dim,St> & g)
-	:domain(domain),ghost(g),dec(*new Decomposition(*global_v_cluster)),v_cl(*global_v_cluster)
+	inline void InitializeCellDecomposer(const CellDecomposer_sm<dim,St> & cd_old, const Box<dim,size_t> & ext)
 	{
-		// Increment the reference counter of the decomposition
-		dec.incRef();
+		// Initialize the cell decomposer
+		cd_sm.setDimensions(cd_old,ext);
+	}
 
+    /*! \brief Initialize the Cell decomposer of the grid
+	 *
+	 *
+	 */
+	inline void InitializeCellDecomposer(const size_t (& g_sz)[dim])
+	{
 		// check that the grid has valid size
 		check_size(g_sz);
 
@@ -399,7 +461,15 @@ public:
 
 		// Initialize the cell decomposer
 		cd_sm.setDimensions(domain,c_g,0);
+	}
 
+	/*! \brief Initialize the grid
+	 *
+	 * \param g_sz Global size of the grid
+	 *
+	 */
+	inline void InitializeDecomposition(const size_t (& g_sz)[dim])
+	{
 		// fill the global size of the grid
 		for (size_t i = 0 ; i < dim ; i++)	{this->g_sz[i] = g_sz[i];}
 
@@ -414,85 +484,335 @@ public:
 		for (size_t i = 0 ; i < dim ; i++)
 		{div[i] = openfpm::math::round_big_2(pow(n_sub,1.0/dim));}
 
-		// Create the sub-domains
-		dec.setParameters(div,domain,ghost);
+		// boundary conditions
+		size_t bc[dim];
+		for (size_t i = 0 ; i < dim ; i++)
+			bc[i] = NON_PERIODIC;
 
+		// Create the sub-domains
+		dec.setParameters(div,domain,bc,ghost);
 		dec.decompose();
 
+		// Calculate ghost boxes
+		dec.calculateGhostBoxes();
+	}
+
+	/*! \brief Initialize the grid
+	 *
+	 * \param g_sz Global size of the grid
+	 *
+	 */
+	inline void InitializeStructures(const size_t (& g_sz)[dim])
+	{
+		// fill the global size of the grid
+		for (size_t i = 0 ; i < dim ; i++)	{this->g_sz[i] = g_sz[i];}
+
 		// Create local grid
 		Create();
+	}
 
-		// Calculate ghost boxes
-		dec.calculateGhostBoxes();
+protected:
+
+	/*! \brief Get the point where it start the origin of the grid in the sub-domain i
+	 *
+	 * \return the point
+	 *
+	 */
+	Point<dim,St> getOffset(size_t i)
+	{
+		return Point<dim,St>(gdb_ext.get(i).origin) * cd_sm.getCellBox().getP2();
 	}
 
-	/*! \brief Get the object that store the information about the decomposition
+	/*! \brief Given a local sub-domain i with a local grid Domain + ghost return the part of the local grid that is domain
 	 *
-	 * \return the decomposition object
+	 * \return the Box defining the domain in the local grid
 	 *
 	 */
-	Decomposition & getDecomposition()
+	Box<dim,size_t> getDomain(size_t i)
 	{
-		return dec;
+		return gdb_ext.get(i).Dbox;
 	}
 
-	/*! \brief Return the cell decomposer
+public:
+
+	// Which kind of grid the structure store
+	typedef device_grid d_grid;
+
+	// Decomposition used
+	typedef Decomposition decomposition;
+
+	// value_type
+	typedef T value_type;
+
+	/*! \brief Return the total number of points in the grid
 	 *
-	 * \return the cell decomposer
+	 * \return number of points
 	 *
 	 */
-	const CellDecomposer_sm<dim,St> & getCellDecomposer()
+	size_t size() const
 	{
-		return cd_sm;
+		return ginfo_v.size();
 	}
 
-	/*! \brief Create the grids on memory
+	/*! \brief Return the total number of points in the grid
+	 *
+	 * \param i direction
+	 *
+	 * \return number of points on direction i
 	 *
 	 */
-	void Create()
+	size_t size(size_t i) const
 	{
-		Box<dim,St> g_rnd_box;
-		for (size_t i = 0 ; i < dim ; i++)	{g_rnd_box.setHigh(i,0.5); g_rnd_box.setLow(i,-0.5);}
+		return ginfo_v.size(i);
+	}
 
-		// Get the number of local grid needed
-		size_t n_grid = dec.getNLocalHyperCube();
+	static inline Ghost<dim,float> convert_ghost(const Ghost<dim,long int> & gd,const CellDecomposer_sm<dim,St> & cd_sm)
+	{
+		Ghost<dim,float> gc;
 
-		// create local grids for each hyper-cube
-		loc_grid = v_cl.allocate<device_grid>(n_grid);
+		// get the grid spacing
+		Box<dim,St> sp = cd_sm.getCellBox();
 
-		// Size of the grid on each dimension
-		size_t l_res[dim];
+		// enlarge 0.001 of the spacing
+		sp.magnify_fix_P1(1.1);
 
-		// Allocate the grids
-		for (size_t i = 0 ; i < n_grid ; i++)
+		// set the ghost
+		for (size_t i = 0 ; i < dim ; i++)
 		{
-			gdb_ext.add();
+			gc.setLow(i,-sp.getHigh(i));
+			gc.setHigh(i,sp.getHigh(i));
+		}
 
-			// Get the local hyper-cube
-			SpaceBox<dim,St> sp = dec.getLocalHyperCube(i);
-			SpaceBox<dim,St> sp_g = dec.getSubDomainWithGhost(i);
+		return gc;
+	}
 
-			// Convert from SpaceBox<dim,St> to SpaceBox<dim,long int>
-			SpaceBox<dim,long int> sp_t = cd_sm.convertDomainSpaceIntoGridUnits(sp);
-			SpaceBox<dim,long int> sp_tg = cd_sm.convertDomainSpaceIntoGridUnits(sp_g);
+	/*! \brief This constructor is special, it construct an expanded grid that perfectly overlap with the previous
+	 *
+	 * The key-word here is "perfectly overlap". Using the default constructor you could create
+	 * something similar, but because of rounding-off error it can happen that it is not perfectly overlapping
+	 *
+	 * \param g previous grid
+	 * \param ext extension of the grid (must be positive on every direction)
+	 *
+	 */
+	grid_dist_id(const grid_dist_id<dim,St,T,Decomposition,Memory,device_grid> & g, Box<dim,size_t> ext)
+	:ghost(g.ghost),dec(g.dec),v_cl(*global_v_cluster)
+	{
+#ifdef SE_CLASS2
+		check_new(this,8,GRID_DIST_EVENT,4);
+#endif
 
-			//! Save the origin of the sub-domain of the local grid
-			gdb_ext.last().origin = sp_tg.getP1();
+		this->dec.incRef();
 
-			// save information about the local grid: domain box seen inside the domain + ghost box (see GDBoxes for a visual meaning)
-			// and where the GDBox start, or the origin of the local grid (+ghost) in global coordinate
-			gdb_ext.last().Dbox = sp_t;
-			gdb_ext.last().Dbox -= sp_tg.getP1();
+		InitializeCellDecomposer(g.cd_sm,ext);
 
-			// center to zero
-			sp_tg -= sp_tg.getP1();
+		// Extend the grid by the extension part and calculate the domain
 
-			// Get the size of the local grid
-			for (size_t i = 0 ; i < dim ; i++) {l_res[i] = (sp_tg.getHigh(i) >= 0)?(sp_tg.getHigh(i)+1):0;}
+		for (size_t i = 0 ; i < dim ; i++)
+		{
+			g_sz[i] = g.g_sz[i] + ext.getLow(i) + ext.getHigh(i);
 
-			// Set the dimensions of the local grid
-			loc_grid.get(i).resize(l_res);
+			this->domain.setLow(i,g.domain.getLow(i) - ext.getLow(i) * g.spacing(i));
+			this->domain.setHigh(i,g.domain.getHigh(i) + ext.getHigh(i) * g.spacing(i));
 		}
+
+		InitializeStructures(g_sz);
+	}
+
+    //! constructor
+    grid_dist_id(const Decomposition & dec, const size_t (& g_sz)[dim], const Box<dim,St> & domain, const Ghost<dim,St> & ghost)
+    :domain(domain),ghost(ghost),dec(dec),v_cl(*global_v_cluster)
+	{
+		// Increment the reference counter of the decomposition
+		this->dec.incRef();
+
+		InitializeCellDecomposer(g_sz);
+		InitializeStructures(g_sz);
+	}
+
+    //! constructor
+    grid_dist_id(Decomposition && dec, const size_t (& g_sz)[dim], const Box<dim,St> & domain, const Ghost<dim,St> & ghost)
+    :domain(domain),ghost(ghost),dec(dec),v_cl(*global_v_cluster)
+	{
+#ifdef SE_CLASS2
+		check_new(this,8,GRID_DIST_EVENT,4);
+#endif
+
+		InitializeCellDecomposer(g_sz);
+		InitializeStructures(g_sz);
+	}
+
+    /*! \brief Get the spacing of the grid in direction i
+     *
+     * \return the spacing
+     *
+     */
+    inline St spacing(size_t i) const
+    {
+    	return cd_sm.getCellBox().getHigh(i);
+    }
+
+	/*! \brief Constrcuctor
+	 *
+	 * \param g_sz array with the grid size on each dimension
+	 * \param domain domain where this grid live
+	 * \param g Ghost given in grid units
+	 *
+	 */
+	grid_dist_id(const Decomposition & dec, const size_t (& g_sz)[dim],const Box<dim,St> & domain, const Ghost<dim,long int> & g)
+	:domain(domain),dec(dec),v_cl(*global_v_cluster),ginfo(g_sz),ginfo_v(g_sz)
+	{
+#ifdef SE_CLASS2
+		check_new(this,8,GRID_DIST_EVENT,4);
+#endif
+
+		InitializeCellDecomposer(g_sz);
+
+		ghost = convert_ghost(g,cd_sm);
+
+		// Initialize structures
+		InitializeStructures(g_sz);
+	}
+
+	/*! \brief Constrcuctor
+	 *
+	 * \param g_sz array with the grid size on each dimension
+	 * \param domain domain where this grid live
+	 * \param g Ghost given in grid units
+	 *
+	 */
+	grid_dist_id(Decomposition && dec, const size_t (& g_sz)[dim],const Box<dim,St> & domain, const Ghost<dim,long int> & g)
+	:domain(domain),dec(dec),v_cl(*global_v_cluster),ginfo(g_sz),ginfo_v(g_sz)
+	{
+#ifdef SE_CLASS2
+		check_new(this,8,GRID_DIST_EVENT,4);
+#endif
+		InitializeCellDecomposer(g_sz);
+
+		ghost = convert_ghost(g,cd_sm);
+
+		// Initialize structures
+		InitializeStructures(g_sz);
+	}
+
+	/*! \brief Constrcuctor
+	 *
+	 * \param g_sz array with the grid size on each dimension
+	 * \param domain domain where this grid live
+	 * \param g Ghost
+	 *
+	 */
+	grid_dist_id(const size_t (& g_sz)[dim],const Box<dim,St> & domain, const Ghost<dim,St> & g)
+	:domain(domain),ghost(g),dec(*global_v_cluster),v_cl(*global_v_cluster),ginfo(g_sz),ginfo_v(g_sz)
+	{
+#ifdef SE_CLASS2
+		check_new(this,8,GRID_DIST_EVENT,4);
+#endif
+		// Increment the reference counter of the decomposition
+		this->dec.incRef();
+
+		InitializeCellDecomposer(g_sz);
+		InitializeDecomposition(g_sz);
+		InitializeStructures(g_sz);
+	}
+
+	/*! \brief Constrcuctor
+	 *
+	 * \param g_sz array with the grid size on each dimension
+	 * \param domain domain where this grid live
+	 * \param g Ghost given in grid units
+	 *
+	 */
+	grid_dist_id(const size_t (& g_sz)[dim],const Box<dim,St> & domain, const Ghost<dim,long int> & g)
+	:domain(domain),dec(*global_v_cluster),v_cl(*global_v_cluster),ginfo(g_sz),ginfo_v(g_sz)
+	{
+#ifdef SE_CLASS2
+		check_new(this,8,GRID_DIST_EVENT,4);
+#endif
+		InitializeCellDecomposer(g_sz);
+
+		ghost = convert_ghost(g,cd_sm);
+
+		InitializeDecomposition(g_sz);
+		// Initialize structures
+		InitializeStructures(g_sz);
+	}
+
+	/*! \brief Constrcuctor
+	 *
+	 * \param g_sz std::vector with the grid size on each dimension
+	 * \param domain domain where this grid live
+	 * \param g Ghost given in grid units
+	 *
+	 */
+	grid_dist_id(const Decomposition & dec, const std::vector<size_t> & g_sz,const Box<dim,St> & domain, const Ghost<dim,long int> & g)
+	:grid_dist_id(dec,*static_cast<const size_t(*) [dim]>(static_cast<const void*>(&g_sz[0])),domain,g)
+	{
+
+	}
+
+	/*! \brief Constrcuctor
+	 *
+	 * \param g_sz std::vector with the grid size on each dimension
+	 * \param domain domain where this grid live
+	 * \param g Ghost given in grid units
+	 *
+	 */
+	grid_dist_id(Decomposition && dec,const std::vector<size_t> & g_sz,const Box<dim,St> & domain, const Ghost<dim,long int> & g)
+	:grid_dist_id(dec, *static_cast<const size_t(*) [dim]>(static_cast<const void*>(&g_sz[0])) , domain, g)
+	{
+	}
+
+	/*! \brief Get an object containing the grid informations
+	 *
+	 * \return an information object about this grid
+	 *
+	 */
+	const grid_sm<dim,T> & getGridInfo() const
+	{
+#ifdef SE_CLASS2
+		check_valid(this,8);
+#endif
+		return ginfo;
+	}
+
+	/*! \brief Get an object containing the grid informations without type
+	 *
+	 * \return an information object about this grid
+	 *
+	 */
+	const grid_sm<dim,void> & getGridInfoVoid() const
+	{
+#ifdef SE_CLASS2
+		check_valid(this,8);
+#endif
+		return ginfo_v;
+	}
+
+	/*! \brief Get the object that store the information about the decomposition
+	 *
+	 * \return the decomposition object
+	 *
+	 */
+	Decomposition & getDecomposition()
+	{
+#ifdef SE_CLASS2
+		check_valid(this,8);
+#endif
+		return dec;
+	}
+
+	/*! \brief Return the cell decomposer
+	 *
+	 * \return the cell decomposer
+	 *
+	 */
+	const CellDecomposer_sm<dim,St> & getCellDecomposer()
+	{
+#ifdef SE_CLASS2
+		check_valid(this,8);
+#endif
+		return cd_sm;
 	}
 
 	/*! \brief Check that the global grid key is inside the grid domain
@@ -502,6 +822,9 @@ public:
 	 */
 	bool isInside(const grid_key_dx<dim> & gk) const
 	{
+#ifdef SE_CLASS2
+		check_valid(this,8);
+#endif
 		for (size_t i = 0 ; i < dim ; i++)
 		{
 			if (gk.get(i) < 0 || gk.get(i) >= (long int)g_sz[i])
@@ -511,6 +834,39 @@ public:
 		return true;
 	}
 
+	/*! \brief Get the size of local domain grids
+	 *
+	 * \return The size of the local domain
+	 *
+	 */
+	size_t getLocalDomainSize()
+	{
+#ifdef SE_CLASS2
+		check_valid(this,8);
+#endif
+		size_t total = 0;
+
+		for (size_t i = 0 ; i < gdb_ext.size() ; i++)
+		{
+			total += gdb_ext.get(i).Dbox.getVolumeKey();
+		}
+
+		return total;
+	}
+
+	/*! \brief It return the informations about the local grids
+	 *
+	 * \return The information about the local grids
+	 *
+	 */
+	const openfpm::vector<GBoxes<device_grid::dims>> & getLocalGridsInfo()
+	{
+#ifdef SE_CLASS2
+		check_valid(this,8);
+#endif
+		return gdb_ext;
+	}
+
 	/*! \brief It return an iterator that span the full grid domain (each processor span its local domain)
 	 *
 	 * \return the iterator
@@ -518,7 +874,16 @@ public:
 	 */
 	grid_dist_iterator<dim,device_grid,FREE> getDomainIterator()
 	{
-		grid_dist_iterator<dim,device_grid,FREE> it(loc_grid,gdb_ext);
+#ifdef SE_CLASS2
+		check_valid(this,8);
+#endif
+
+		grid_key_dx<dim> stop(ginfo_v.getSize());
+		grid_key_dx<dim> one;
+		one.one();
+		stop = stop - one;
+
+		grid_dist_iterator<dim,device_grid,FREE> it(loc_grid,gdb_ext,stop);
 
 		return it;
 	}
@@ -527,21 +892,60 @@ public:
 	 *
 	 *
 	 */
-	grid_dist_iterator<dim,device_grid,FIXED> getDomainGhostIterator()
+	grid_dist_iterator<dim,device_grid,FIXED> getDomainGhostIterator() const
 	{
+#ifdef SE_CLASS2
+		check_valid(this,8);
+#endif
 		grid_dist_iterator<dim,device_grid,FIXED> it(loc_grid,gdb_ext);
 
 		return it;
 	}
 
+	/*! \brief It return an iterator that span the grid domain only in the specified
+	 * part
+	 *
+	 * The key spanned are the one inside the box spanned by the start point and the end
+	 * point included
+	 *
+	 * \param start point
+	 * \param stop point
+	 *
+	 */
+	grid_dist_iterator_sub<dim,device_grid> getSubDomainIterator(const grid_key_dx<dim> & start, const grid_key_dx<dim> & stop) const
+	{
+#ifdef SE_CLASS2
+		check_valid(this,8);
+#endif
+		grid_dist_iterator_sub<dim,device_grid> it(start,stop,loc_grid,gdb_ext);
+
+		return it;
+	}
+
+	/*! \brief It return an iterator that span the grid domain only in the specified
+	 * part
+	 *
+	 * The key spanned are the one inside the box spanned by the start point and the end
+	 * point included
+	 *
+	 * \param start point
+	 * \param stop point
+	 *
+	 */
+	grid_dist_iterator_sub<dim,device_grid> getSubDomainIterator(const long int (& start)[dim], const long int (& stop)[dim]) const
+	{
+		grid_dist_iterator_sub<dim,device_grid> it(grid_key_dx<dim>(start),grid_key_dx<dim>(stop),loc_grid,gdb_ext);
+
+		return it;
+	}
+
 	//! Destructor
 	~grid_dist_id()
 	{
+#ifdef SE_CLASS2
+		check_delete(this);
+#endif
 		dec.decRef();
-
-		// if we reach the 0, destroy the object
-		if (dec.ref() == 0)
-			delete &dec;
 	}
 
 	/*! \brief Get the Virtual Cluster machine
@@ -549,12 +953,38 @@ public:
 	 * \return the Virtual cluster machine
 	 *
 	 */
-
 	Vcluster & getVC()
 	{
+#ifdef SE_CLASS2
+		check_valid(this,8);
+#endif
 		return v_cl;
 	}
 
+	/*! \brief Indicate that this grid is not staggered
+	 *
+	 * \return false
+	 *
+	 */
+	bool is_staggered()
+	{
+		return false;
+	}
+
+	/*! \brief Get the reference of the selected element
+	 *
+	 * \param p property to get (is an integer)
+	 * \param v1 grid_key that identify the element in the grid
+	 *
+	 */
+	template <unsigned int p>inline auto get(const grid_dist_key_dx<dim> & v1) const -> typename std::add_lvalue_reference<decltype(loc_grid.get(v1.getSub()).template get<p>(v1.getKey()))>::type
+	{
+#ifdef SE_CLASS2
+		check_valid(this,8);
+#endif
+		return loc_grid.get(v1.getSub()).template get<p>(v1.getKey());
+	}
+
 	/*! \brief Get the reference of the selected element
 	 *
 	 * \param p property to get (is an integer)
@@ -563,6 +993,9 @@ public:
 	 */
 	template <unsigned int p>inline auto get(const grid_dist_key_dx<dim> & v1) -> typename std::add_lvalue_reference<decltype(loc_grid.get(v1.getSub()).template get<p>(v1.getKey()))>::type
 	{
+#ifdef SE_CLASS2
+		check_valid(this,8);
+#endif
 		return loc_grid.get(v1.getSub()).template get<p>(v1.getKey());
 	}
 
@@ -699,6 +1132,10 @@ public:
 	 */
 	template<int... prp> void ghost_get()
 	{
+#ifdef SE_CLASS2
+		check_valid(this,8);
+#endif
+
 		// Sending property object
 		typedef object<typename object_creator<typename T::type,prp...>::type> prp_object;
 
@@ -858,6 +1295,16 @@ public:
 		}
 	}
 
+	/*! \brief Get the spacing on each dimension
+	 *
+	 * \param get the spacing
+	 *
+	 */
+	Point<dim,St> getSpacing()
+	{
+		return cd_sm.getCellBox().getP2();
+	}
+
 	/*! \brief Convert a g_dist_key_dx into a global key
 	 *
 	 * \see grid_dist_key_dx
@@ -868,6 +1315,9 @@ public:
 	 */
 	inline grid_key_dx<dim> getGKey(const grid_dist_key_dx<dim> & k)
 	{
+#ifdef SE_CLASS2
+		check_valid(this,8);
+#endif
 		// Get the sub-domain id
 		size_t sub_id = k.getSub();
 
@@ -889,41 +1339,59 @@ public:
 	 */
 	bool write(std::string output)
 	{
+#ifdef SE_CLASS2
+		check_valid(this,8);
+#endif
+
 		// Create a writer and write
 		VTKWriter<boost::mpl::pair<device_grid,float>,VECTOR_GRIDS> vtk_g;
 		for (size_t i = 0 ; i < loc_grid.size() ; i++)
 		{
-			Point<dim,St> offset = Point<dim,St>(gdb_ext.get(i).origin) * cd_sm.getCellBox().getP2();
+			Point<dim,St> offset = getOffset(i);
 			vtk_g.add(loc_grid.get(i),offset,cd_sm.getCellBox().getP2(),gdb_ext.get(i).Dbox);
 		}
 		vtk_g.write(output + "_grid_" + std::to_string(v_cl.getProcessUnitID()) + ".vtk");
 
-		// Write internal ghost box
-		VTKWriter<openfpm::vector<::Box<dim,size_t>>,VECTOR_BOX> vtk_box1;
-
-		openfpm::vector< openfpm::vector< ::Box<dim,size_t> > > boxes;
+		write_ie_boxes(output);
 
-		//! Carefully we have to ensure that boxes does not reallocate inside the for loop
-		boxes.reserve(ig_box.size());
-
-		//! Write internal ghost in grid units (Color encoded)
-		for (size_t p = 0 ; p < ig_box.size() ; p++)
-		{
-			boxes.add();
+		return true;
+	}
 
-			// Create a vector of boxes
-			for (size_t j = 0 ; j < ig_box.get(p).bid.size() ; j++)
-			{
-				boxes.last().add(ig_box.get(p).bid.get(j).box);
-			}
+	/*! \brief Get the i sub-domain grid
+	 *
+	 * \param i sub-domain
+	 *
+	 * \return local grid
+	 *
+	 */
+	device_grid & get_loc_grid(size_t i)
+	{
+		return loc_grid.get(i);
+	}
 
-			vtk_box1.add(boxes.last());
-		}
-		vtk_box1.write(output + std::string("internal_ghost_") + std::to_string(v_cl.getProcessUnitID()) + std::string(".vtk"));
+	/*! \brief Return the number of local grid
+	 *
+	 * \return the number of local grid
+	 *
+	 */
+	size_t getN_loc_grid()
+	{
+		return loc_grid.size();
+	}
 
-		vtk_g.write("vtk_grids.vtk");
 
-		return true;
+	/* \brief It return the id of structure in the allocation list
+	 *
+	 * \see print_alloc and SE_CLASS2
+	 *
+	 */
+	long int who()
+	{
+#ifdef SE_CLASS2
+		return check_whoami(this,8);
+#else
+			return -1;
+#endif
 	}
 };
 
diff --git a/src/Grid/grid_dist_id_iterator.hpp b/src/Grid/grid_dist_id_iterator.hpp
index f7b64159c3773b213d6740fd4cd5f8e5ec878d94..7c32710e6417b25cf0c46335f2e20616705ec02d 100644
--- a/src/Grid/grid_dist_id_iterator.hpp
+++ b/src/Grid/grid_dist_id_iterator.hpp
@@ -85,7 +85,7 @@ class grid_dist_iterator<dim,device_grid,FREE>
 	size_t g_c;
 
 	//! List of the grids we are going to iterate
-	Vcluster_object_array<device_grid> & gList;
+	const openfpm::vector<device_grid> & gList;
 
 	//! Extension of each grid: domain and ghost + domain
 	openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext;
@@ -93,8 +93,8 @@ class grid_dist_iterator<dim,device_grid,FREE>
 	//! Actual iterator
 	grid_key_dx_iterator_sub<dim> a_it;
 
-	//! margin of the grid iterator
-	size_t m;
+	//! stop point (is the grid size)
+	grid_key_dx<dim> stop;
 
 	/*! \brief from g_c increment g_c until you find a valid grid
 	 *
@@ -102,40 +102,24 @@ class grid_dist_iterator<dim,device_grid,FREE>
 	void selectValidGrid()
 	{
 		// When the grid has size 0 potentially all the other informations are garbage
-		while (g_c < gList.size() && (gList[g_c].size() == 0 || gdb_ext.get(g_c).Dbox.isValid() == false ) ) g_c++;
+		while (g_c < gList.size() && (gList.get(g_c).size() == 0 || gdb_ext.get(g_c).Dbox.isValid() == false ) ) g_c++;
 
 		// get the next grid iterator
 		if (g_c < gList.size())
 		{
-			a_it.reinitialize(gList[g_c].getIterator(gdb_ext.get(g_c).Dbox.getKP1(),gdb_ext.get(g_c).Dbox.getKP2()));
+			a_it.reinitialize(gList.get(g_c).getIterator(gdb_ext.get(g_c).Dbox.getKP1(),gdb_ext.get(g_c).Dbox.getKP2()));
 		}
 	}
 
 	public:
 
-	/*! \brief Copy operator=
-	*
-	* \param tmp iterator to copy
-	*
-	*/
-	grid_dist_iterator<dim,device_grid,FREE> & operator=(const grid_dist_iterator<dim,device_grid,FREE> & tmp)
-	{
-		g_c = tmp.g_c;
-		gList = tmp.gList;
-		gdb_ext = tmp.gdb_ext;
-		a_it.reinitialize(tmp.a_it);
-		m = tmp.m;
-
-		return *this;
-	}
-
 	/*! \brief Constructor of the distributed grid iterator
 	 *
 	 * \param gk std::vector of the local grid
 	 *
 	 */
-	grid_dist_iterator(Vcluster_object_array<device_grid> & gk, openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext)
-	:g_c(0),gList(gk),gdb_ext(gdb_ext),m(0)
+	grid_dist_iterator(const openfpm::vector<device_grid> & gk, openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext, grid_key_dx<dim> stop)
+	:g_c(0),gList(gk),gdb_ext(gdb_ext),stop(stop)
 	{
 		// Initialize the current iterator
 		// with the first grid
@@ -196,6 +180,34 @@ class grid_dist_iterator<dim,device_grid,FREE>
 	{
 		return grid_dist_key_dx<dim>(g_c,a_it.get());
 	}
+
+	/*! \brief it return the stop point of the iterator
+	 *
+	 * The stop point of the iterator is just the grid size
+	 *
+	 * \return the stop point
+	 *
+	 */
+	inline grid_key_dx<dim> getStop() const
+	{
+		return stop;
+	}
+
+	/*! \brief it return the start point of the iterator
+	 *
+	 * The start point of the iterator is the point with all coordinates zeros
+	 *
+	 * \return the start point
+	 *
+	 */
+	inline grid_key_dx<dim> getStart() const
+	{
+		grid_key_dx<dim> start;
+
+		start.zero();
+
+		return start;
+	}
 };
 
 
@@ -215,7 +227,7 @@ class grid_dist_iterator<dim,device_grid,FIXED>
 	size_t g_c;
 
 	//! List of the grids we are going to iterate
-	Vcluster_object_array<device_grid> & gList;
+	const openfpm::vector<device_grid> & gList;
 
 	//! Extension of each grid: domain and ghost + domain
 	const openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext;
@@ -229,12 +241,12 @@ class grid_dist_iterator<dim,device_grid,FIXED>
 	void selectValidGrid()
 	{
 		// When the grid has size 0 potentially all the other informations are garbage
-		while (g_c < gList.size() && (gList[g_c].size() == 0 || gdb_ext.get(g_c).Dbox.isValid() == false ) ) g_c++;
+		while (g_c < gList.size() && (gList.get(g_c).size() == 0 || gdb_ext.get(g_c).Dbox.isValid() == false ) ) g_c++;
 
 		// get the next grid iterator
 		if (g_c < gList.size())
 		{
-			a_it.reinitialize(gList[g_c].getIterator(gdb_ext.get(g_c).Dbox.getKP1(),gdb_ext.get(g_c).Dbox.getKP2()));
+			a_it.reinitialize(gList.get(g_c).getIterator(gdb_ext.get(g_c).Dbox.getKP1(),gdb_ext.get(g_c).Dbox.getKP2()));
 		}
 	}
 
@@ -260,7 +272,7 @@ class grid_dist_iterator<dim,device_grid,FIXED>
 	 * \param gk std::vector of the local grid
 	 *
 	 */
-	grid_dist_iterator(Vcluster_object_array<device_grid> & gk, const openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext)
+	grid_dist_iterator(const openfpm::vector<device_grid> & gk, const openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext)
 	:g_c(0),gList(gk),gdb_ext(gdb_ext)
 	{
 		// Initialize the current iterator
diff --git a/src/Grid/grid_dist_id_iterator_dec.hpp b/src/Grid/grid_dist_id_iterator_dec.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..1a541a072d17ff44f360190364822f8d1aae9537
--- /dev/null
+++ b/src/Grid/grid_dist_id_iterator_dec.hpp
@@ -0,0 +1,262 @@
+/*
+ * grid_dist_id_iterator_dec.hpp
+ *
+ *  Created on: Jan 27, 2016
+ *      Author: i-bird
+ */
+
+#ifndef SRC_GRID_GRID_DIST_ID_ITERATOR_DEC_HPP_
+#define SRC_GRID_GRID_DIST_ID_ITERATOR_DEC_HPP_
+
+#include "grid_dist_id_iterator.hpp"
+#include "grid_dist_util.hpp"
+
+/*! \brief Given the decomposition it create an iterator
+ *
+ * Iterator across the local elements of the distributed grid
+ *
+ * \tparam dec Decomposition type
+ *
+ */
+template<typename Decomposition>
+class grid_dist_id_iterator_dec
+{
+	//! grid list counter
+	size_t g_c;
+
+	//! Extension of each grid: domain and ghost + domain
+	openfpm::vector<GBoxes<Decomposition::dims>> gdb_ext;
+
+	//! Actual iterator
+	grid_key_dx_iterator_sub<Decomposition::dims> a_it;
+
+	//! start key
+	grid_key_dx<Decomposition::dims> start;
+
+	//! stop key
+	grid_key_dx<Decomposition::dims> stop;
+
+	//! Spacing
+	typename Decomposition::stype spacing[Decomposition::dims];
+
+
+	/*! \brief compute the subset where it has to iterate
+	 *
+	 * \param g_c Actual grid
+	 * \param start_c adjusted start point for the grid g_c
+	 * \param stop_c adjusted stop point for the grid g_c
+	 *
+	 * \return false if the sub-set does not contain points
+	 *
+	 */
+	bool compute_subset(size_t gc, grid_key_dx<Decomposition::dims> & start_c, grid_key_dx<Decomposition::dims> & stop_c)
+	{
+		// Intersect the grid keys
+
+		for (size_t i = 0 ; i < Decomposition::dims ; i++)
+		{
+			long int start_p = gdb_ext.get(g_c).Dbox.getP1().get(i) + gdb_ext.get(g_c).origin.get(i);
+			long int stop_p = gdb_ext.get(g_c).Dbox.getP2().get(i) + gdb_ext.get(g_c).origin.get(i);
+			if (start.get(i) <= start_p)
+				start_c.set_d(i,gdb_ext.get(g_c).Dbox.getP1().get(i));
+			else if (start.get(i) <= stop_p)
+				start_c.set_d(i,start.get(i) - gdb_ext.get(g_c).origin.get(i));
+			else
+				return false;
+
+			if (stop.get(i) >= stop_p)
+				stop_c.set_d(i,gdb_ext.get(g_c).Dbox.getP2().get(i));
+			else if (stop.get(i) >= start_p)
+				stop_c.set_d(i,stop.get(i) - gdb_ext.get(g_c).origin.get(i));
+			else
+				return false;
+		}
+
+		return true;
+	}
+
+	/*! \brief from g_c increment g_c until you find a valid grid
+	 *
+	 */
+	void selectValidGrid()
+	{
+		// start and stop for the subset grid
+		grid_key_dx<Decomposition::dims> start_c;
+		grid_key_dx<Decomposition::dims> stop_c;
+
+		// When the grid has size 0 potentially all the other informations are garbage
+		while (g_c < gdb_ext.size() &&
+			   (gdb_ext.get(g_c).Dbox.isValid() == false || compute_subset(g_c,start_c,stop_c) == false ))
+		{g_c++;}
+
+		// get the next grid iterator
+		if (g_c < gdb_ext.size())
+		{
+			// Calculate the resolution of the local grid
+			size_t sz[Decomposition::dims];
+			for (size_t i = 0 ; i < Decomposition::dims ; i++)
+				sz[i] = gdb_ext.get(g_c).GDbox.getP2()[i] + 1;
+
+			grid_sm<Decomposition::dims,void> g_sm(sz);
+			a_it.reinitialize(grid_key_dx_iterator_sub<Decomposition::dims>(g_sm,start_c,stop_c));
+		}
+	}
+
+	/*! \brief Get the actual key
+	 *
+	 * \return the actual key
+	 *
+	 */
+	inline grid_dist_key_dx<Decomposition::dims> get_int()
+	{
+		return grid_dist_key_dx<Decomposition::dims>(g_c,a_it.get());
+	}
+
+	public:
+
+	/*! \brief Copy operator=
+	*
+	* \param tmp iterator to copy
+	*
+	*/
+	grid_dist_id_iterator_dec<Decomposition> & operator=(const grid_dist_id_iterator_dec<Decomposition> & tmp)
+	{
+		g_c = tmp.g_c;
+		gdb_ext = tmp.gdb_ext;
+		a_it.reinitialize(tmp.a_it);
+
+		start = tmp.start;
+		stop = tmp.stop;
+
+		return *this;
+	}
+
+	/*! \brief Copy constructor
+	*
+	* \param tmp iterator to copy
+	*
+	*/
+	grid_dist_id_iterator_dec(const grid_dist_id_iterator_dec<Decomposition> & tmp)
+	{
+		this->operator=(tmp);
+	}
+
+	/*! \brief Constructor of the distributed grid iterator
+	 *
+	 * \param dec Decomposition
+	 * \param sz size of the grid
+	 *
+	 */
+	grid_dist_id_iterator_dec(Decomposition & dec, const size_t (& sz)[Decomposition::dims])
+	:g_c(0)
+	{
+		// Initialize start and stop
+		start.zero();
+		for (size_t i = 0 ; i < Decomposition::dims ; i++) stop.set_d(i,sz[i]-1);
+
+		// From the decomposition construct gdb_ext
+		create_gdb_ext<Decomposition::dims,Decomposition>(gdb_ext,dec,sz,dec.getDomain(),spacing);
+
+		// Initialize the current iterator
+		// with the first grid
+		selectValidGrid();
+	}
+
+	/*! \brief Constructor of the distributed grid iterator
+	 *
+	 * \param dec Decomposition
+	 * \param sz size of the grid
+	 * \param start point
+	 * \param stop point
+	 *
+	 */
+	grid_dist_id_iterator_dec(Decomposition & dec, const size_t (& sz)[Decomposition::dims], grid_key_dx<Decomposition::dims> start, grid_key_dx<Decomposition::dims> stop)
+	:g_c(0),start(start),stop(stop)
+	{
+		// From the decomposition construct gdb_ext
+		create_gdb_ext<Decomposition::dims,Decomposition>(gdb_ext,dec,sz,dec.getDomain(),spacing);
+
+		// Initialize the current iterator
+		// with the first grid
+		selectValidGrid();
+	}
+
+	// Destructor
+	~grid_dist_id_iterator_dec()
+	{
+	}
+
+	/*! \brief Get the next element
+	 *
+	 * \return the next grid_key
+	 *
+	 */
+
+	inline grid_dist_id_iterator_dec<Decomposition> operator++()
+	{
+		++a_it;
+
+		// check if a_it is at the end
+
+		if (a_it.isNext() == true)
+			return *this;
+		else
+		{
+			// switch to the new grid
+			g_c++;
+
+			selectValidGrid();
+		}
+
+		return *this;
+	}
+
+	/*! \brief Check if there is the next element
+	 *
+	 * \return true if there is the next, false otherwise
+	 *
+	 */
+	inline bool isNext()
+	{
+		// If there are no other grid stop
+
+		if (g_c >= gdb_ext.size())
+			return false;
+
+		return true;
+	}
+
+	/*! \brief Get the spacing of the grid
+	 *
+	 * \param i
+	 *
+	 */
+	inline typename Decomposition::stype getSpacing(size_t i)
+	{
+		return spacing[i];
+	}
+
+	/*! \brief Get the actual global key of the grid
+	 *
+	 *
+	 * \return the global position in the grid
+	 *
+	 */
+	inline grid_key_dx<Decomposition::dims> get()
+	{
+		const grid_dist_key_dx<Decomposition::dims> & k = get_int();
+
+		// Get the sub-domain id
+		size_t sub_id = k.getSub();
+
+		grid_key_dx<Decomposition::dims> k_glob = k.getKey();
+
+		// shift
+		k_glob = k_glob + gdb_ext.get(sub_id).origin;
+
+		return k_glob;
+	}
+};
+
+
+#endif /* SRC_GRID_GRID_DIST_ID_ITERATOR_DEC_HPP_ */
diff --git a/src/Grid/grid_dist_id_iterator_sub.hpp b/src/Grid/grid_dist_id_iterator_sub.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..244fa2e589faf36f5b46cf65c43de329d0e6a780
--- /dev/null
+++ b/src/Grid/grid_dist_id_iterator_sub.hpp
@@ -0,0 +1,254 @@
+/*
+ * grid_dist_id_iterator_sub.hpp
+ *
+ *  Created on: Oct 14, 2015
+ *      Author: i-bird
+ */
+
+#ifndef SRC_GRID_GRID_DIST_ID_ITERATOR_SUB_HPP_
+#define SRC_GRID_GRID_DIST_ID_ITERATOR_SUB_HPP_
+
+
+/*! \brief Distributed grid iterator
+ *
+ * Iterator across the local elements of the distributed grid
+ *
+ * \tparam dim dimensionality of the grid
+ * \tparam device_grid type of basic grid
+ * \tparam impl implementation
+ *
+ */
+template<unsigned int dim, typename device_grid>
+class grid_dist_iterator_sub
+{
+	// sub_set of the grid where to iterate
+	struct sub_set
+	{
+		//! start point where iterate
+		grid_key_dx<dim> start;
+		// ! stop point where iterate
+		grid_key_dx<dim> stop;
+	};
+
+	//! grid list counter
+	size_t g_c;
+
+	//! List of the grids we are going to iterate
+	const openfpm::vector<device_grid> & gList;
+
+	//! Extension of each grid: domain and ghost + domain
+	const openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext;
+
+	//! Actual iterator
+	grid_key_dx_iterator_sub<dim> a_it;
+
+	//! start key
+	grid_key_dx<dim> start;
+
+	//! stop key
+	grid_key_dx<dim> stop;
+
+	/*! \brief compute the subset where it has to iterate
+	 *
+	 * \param g_c Actual grid
+	 * \param start_c adjusted start point for the grid g_c
+	 * \param stop_c adjusted stop point for the grid g_c
+	 *
+	 * \return false if the sub-set does not contain points
+	 *
+	 */
+	bool compute_subset(size_t gc, grid_key_dx<dim> & start_c, grid_key_dx<dim> & stop_c)
+	{
+		// Intersect the grid keys
+
+		for (size_t i = 0 ; i < dim ; i++)
+		{
+			long int start_p = gdb_ext.get(g_c).Dbox.getP1().get(i) + gdb_ext.get(g_c).origin.get(i);
+			long int stop_p = gdb_ext.get(g_c).Dbox.getP2().get(i) + gdb_ext.get(g_c).origin.get(i);
+			if (start.get(i) <= start_p)
+				start_c.set_d(i,gdb_ext.get(g_c).Dbox.getP1().get(i));
+			else if (start.get(i) <= stop_p)
+				start_c.set_d(i,start.get(i) - gdb_ext.get(g_c).origin.get(i));
+			else
+				return false;
+
+			if (stop.get(i) >= stop_p)
+				stop_c.set_d(i,gdb_ext.get(g_c).Dbox.getP2().get(i));
+			else if (stop.get(i) >= start_p)
+				stop_c.set_d(i,stop.get(i) - gdb_ext.get(g_c).origin.get(i));
+			else
+				return false;
+		}
+
+		return true;
+	}
+
+	/*! \brief from g_c increment g_c until you find a valid grid
+	 *
+	 */
+	void selectValidGrid()
+	{
+		// start and stop for the subset grid
+		grid_key_dx<dim> start_c;
+		grid_key_dx<dim> stop_c;
+
+		// When the grid has size 0 potentially all the other informations are garbage
+		while (g_c < gList.size() &&
+			   (gList.get(g_c).size() == 0 || gdb_ext.get(g_c).Dbox.isValid() == false || compute_subset(g_c,start_c,stop_c) == false ))
+		{g_c++;}
+
+		// get the next grid iterator
+		if (g_c < gList.size())
+		{
+			a_it.reinitialize(gList.get(g_c).getIterator(start_c,stop_c));
+		}
+	}
+
+	public:
+
+	/*! \brief Copy operator=
+	*
+	* \param tmp iterator to copy
+	*
+	*/
+	grid_dist_iterator_sub<dim,device_grid> & operator=(const grid_dist_iterator_sub<dim,device_grid> & tmp)
+	{
+		g_c = tmp.g_c;
+		gList = tmp.gList;
+		gdb_ext = tmp.gdb_ext;
+		start = tmp.start;
+		stop = tmp.stop;
+		a_it.reinitialize(tmp.a_it);
+
+		return *this;
+	}
+
+	/*! \brief Copy constructor
+	*
+	* \param tmp iterator to copy
+	*
+	*/
+	grid_dist_iterator_sub(const grid_dist_iterator_sub<dim,device_grid> & tmp)
+	:g_c(tmp.g_c),gList(tmp.gList),gdb_ext(gdb_ext),start(tmp.start),stop(tmp.stop)
+	{
+		// get the next grid iterator
+		if (g_c < gList.size())
+		{
+			a_it.reinitialize(tmp.a_it);
+		}
+	}
+
+	/*! \brief Constructor of the distributed grid iterator
+	 *
+	 * \param start position
+	 * \param stop position
+	 * \param gk std::vector of the local grid
+	 * \param gdb_ext information about the local grids
+	 *
+	 */
+	grid_dist_iterator_sub(const grid_key_dx<dim> & start, const grid_key_dx<dim> & stop ,const openfpm::vector<device_grid> & gk, const openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext)
+	:g_c(0),gList(gk),gdb_ext(gdb_ext),start(start),stop(stop)
+	{
+		// Initialize the current iterator
+		// with the first grid
+		selectValidGrid();
+	}
+
+	// Destructor
+	~grid_dist_iterator_sub()
+	{
+	}
+
+	/*! \brief Get the next element
+	 *
+	 * \return the next grid_key
+	 *
+	 */
+
+	inline grid_dist_iterator_sub<dim,device_grid> operator++()
+	{
+		++a_it;
+
+		// check if a_it is at the end
+
+		if (a_it.isNext() == true)
+			return *this;
+		else
+		{
+			// switch to the new grid
+			g_c++;
+
+			selectValidGrid();
+		}
+
+		return *this;
+	}
+
+	/*! \brief Check if there is the next element
+	 *
+	 * \return true if there is the next, false otherwise
+	 *
+	 */
+	inline bool isNext()
+	{
+		// If there are no other grid stop
+
+		if (g_c >= gList.size())
+			return false;
+
+		return true;
+	}
+
+	/*! \brief Get the actual key
+	 *
+	 * \return the actual key
+	 *
+	 */
+	inline grid_dist_key_dx<dim> get()
+	{
+		return grid_dist_key_dx<dim>(g_c,a_it.get());
+	}
+
+	/*! \brief Convert a g_dist_key_dx into a global key
+	 *
+	 * \see grid_dist_key_dx
+	 * \see grid_dist_iterator
+	 *
+	 * \return the global position in the grid
+	 *
+	 */
+	inline grid_key_dx<dim> getGKey(const grid_dist_key_dx<dim> & k)
+	{
+		// Get the sub-domain id
+		size_t sub_id = k.getSub();
+
+		grid_key_dx<dim> k_glob = k.getKey();
+
+		// shift
+		k_glob = k_glob + gdb_ext.get(sub_id).origin;
+
+		return k_glob;
+	}
+
+	/* \brief Get the starting point of the grid iterator
+	 *
+	 * \return the starting point
+	 *
+	 */
+	inline grid_key_dx<dim> getStart() const
+	{
+		return start;
+	}
+
+	/* \brief Get the stop point of the grid iterator
+	 *
+	 * \return the stop point
+	 *
+	 */
+	inline grid_key_dx<dim> getStop() const
+	{
+		return stop;
+	}
+};
+
+#endif /* SRC_GRID_GRID_DIST_ID_ITERATOR_SUB_HPP_ */
diff --git a/src/Grid/grid_dist_id_unit_test.hpp b/src/Grid/grid_dist_id_unit_test.hpp
index dfc8d3493d3dc4c3af387f585fe6c4df04c60a78..3d3241dac5a131c3b8c17d6d9fb1dcf712f77aeb 100644
--- a/src/Grid/grid_dist_id_unit_test.hpp
+++ b/src/Grid/grid_dist_id_unit_test.hpp
@@ -3,6 +3,8 @@
 
 #include "grid_dist_id.hpp"
 #include "data_type/scalar.hpp"
+#include "data_type/aggregate.hpp"
+
 
 BOOST_AUTO_TEST_SUITE( grid_dist_id_test )
 
@@ -74,11 +76,138 @@ BOOST_AUTO_TEST_CASE( grid_dist_id_domain_grid_unit_converter_test)
 	}
 }
 
+void Test2D_sub(const Box<2,float> & domain, long int k)
+{
+	long int big_step = k / 30;
+	big_step = (big_step == 0)?1:big_step;
+	long int small_step = 21;
+
+	// this test is only performed when the number of processor is <= 32
+	if (global_v_cluster->getProcessingUnits() > 32)
+		return;
+
+	print_test( "Testing 2D grid sub iterator k<=",k);
+
+	// 2D test
+	for ( ; k >= 2 ; k-= (k > 2*big_step)?big_step:small_step )
+	{
+		BOOST_TEST_CHECKPOINT( "Testing 2D grid k=" << k );
+
+		//! [Create and access a distributed grid]
+
+		// grid size
+		size_t sz[2];
+		sz[0] = k;
+		sz[1] = k;
+
+		float factor = pow(global_v_cluster->getProcessingUnits()/2.0f,1.0f/2.0f);
+
+		// Ghost
+		Ghost<2,float> g(0.01 / factor);
+
+		// Distributed grid with id decomposition
+		grid_dist_id<2, float, scalar<float>, CartDecomposition<2,float>> g_dist(sz,domain,g);
+
+		// check the consistency of the decomposition
+		bool val = g_dist.getDecomposition().check_consistency();
+		BOOST_REQUIRE_EQUAL(val,true);
+
+		size_t count;
+
+		// Grid sm
+		grid_sm<2,void> info(sz);
+
+		{
+		//! [Usage of a sub_grid iterator]
+
+		grid_key_dx<2> one(1,1);
+		grid_key_dx<2> one_end(k-2,k-2);
+
+		bool check = true;
+		count = 0;
+
+		// get the sub-domain iterator
+		auto dom = g_dist.getSubDomainIterator(one,one_end);
+
+		while (dom.isNext())
+		{
+			auto key = dom.get();
+			auto key_g = g_dist.getGKey(key);
+
+			// key_g should never be 1 or k-1
+			check &= (key_g.get(0) == 0 || key_g.get(0) == k-1)?false:true;
+			check &= (key_g.get(1) == 0 || key_g.get(1) == k-1)?false:true;
+
+			g_dist.template get<0>(key) = info.LinId(key_g);
+
+			// Count the point
+			count++;
+
+			++dom;
+		}
+
+		BOOST_REQUIRE_EQUAL(check,true);
+
+		//! [Usage of a sub_grid iterator]
+
+		}
+
+		// Get the virtual cluster machine
+		Vcluster & vcl = g_dist.getVC();
+
+		// reduce
+		vcl.sum(count);
+		vcl.execute();
+
+		// Check
+		BOOST_REQUIRE_EQUAL(count,(size_t)(k-2)*(k-2));
+
+		// check with a 1x1 square
+
+		{
+
+		grid_key_dx<2> one(k/2,k/2);
+		grid_key_dx<2> one_end(k/2,k/2);
+
+		count = 0;
+
+		// get the sub-domain iterator
+		auto dom = g_dist.getSubDomainIterator(one,one_end);
+
+		while (dom.isNext())
+		{
+			auto key = dom.get();
+			auto key_g = g_dist.getGKey(key);
+
+			// key_g
+			BOOST_REQUIRE_EQUAL(key_g.get(0),k/2);
+			BOOST_REQUIRE_EQUAL(key_g.get(1),k/2);
+
+			auto key_s_it = dom.getGKey(key);
+
+			BOOST_REQUIRE_EQUAL(key_g.get(0),key_s_it.get(0));
+			BOOST_REQUIRE_EQUAL(key_g.get(1),key_s_it.get(1));
+
+			// Count the point
+			count++;
+
+			++dom;
+		}
+
+		// reduce
+		vcl.sum(count);
+		vcl.execute();
+
+		BOOST_REQUIRE_EQUAL(count,1ul);
+		}
+	}
+}
+
 void Test2D(const Box<2,float> & domain, long int k)
 {
 	long int big_step = k / 30;
 	big_step = (big_step == 0)?1:big_step;
-	long int small_step = 1;
+	long int small_step = 21;
 
 	print_test( "Testing 2D grid k<=",k);
 
@@ -137,10 +266,19 @@ void Test2D(const Box<2,float> & domain, long int k)
 		vcl.execute();
 
 		// Check
-		BOOST_REQUIRE_EQUAL(count,k*k);
+		BOOST_REQUIRE_EQUAL(count,(size_t)k*k);
 
 		auto dom2 = g_dist.getDomainIterator();
 
+		grid_key_dx<2> start = dom2.getStart();
+		grid_key_dx<2> stop = dom2.getStop();
+
+		BOOST_REQUIRE_EQUAL((long int)stop.get(0),(long int)g_dist.size(0)-1);
+		BOOST_REQUIRE_EQUAL((long int)stop.get(1),(long int)g_dist.size(1)-1);
+
+		BOOST_REQUIRE_EQUAL(start.get(0),0);
+		BOOST_REQUIRE_EQUAL(start.get(1),0);
+
 		bool match = true;
 
 		// check that the grid store the correct information
@@ -179,15 +317,128 @@ void Test2D(const Box<2,float> & domain, long int k)
 	}
 }
 
+void Test3D_sub(const Box<3,float> & domain, long int k)
+{
+	long int big_step = k / 30;
+	big_step = (big_step == 0)?1:big_step;
+	long int small_step = 21;
+
+	// this test is only performed when the number of processor is <= 32
+	if (global_v_cluster->getProcessingUnits() > 32)
+		return;
+
+	print_test( "Testing 3D grid sub k<=",k);
+
+	// 3D test
+	for ( ; k >= 2 ; k-= (k > 2*big_step)?big_step:small_step )
+	{
+		BOOST_TEST_CHECKPOINT( "Testing 3D grid sub k=" << k );
+
+		// grid size
+		size_t sz[3];
+		sz[0] = k;
+		sz[1] = k;
+		sz[2] = k;
+
+		// factor
+		float factor = pow(global_v_cluster->getProcessingUnits()/2.0f,1.0f/3.0f);
+
+		// Ghost
+		Ghost<3,float> g(0.01 / factor);
+
+		// Distributed grid with id decomposition
+		grid_dist_id<3, float, scalar<float>, CartDecomposition<3,float>> g_dist(sz,domain,g);
+
+		// check the consistency of the decomposition
+		bool val = g_dist.getDecomposition().check_consistency();
+		BOOST_REQUIRE_EQUAL(val,true);
+
+		// Grid sm
+		grid_sm<3,void> info(sz);
+
+		// get the domain iterator
+		size_t count = 0;
+
+		grid_key_dx<3> one(1,1,1);
+		grid_key_dx<3> one_end(k-2,k-2,k-2);
+
+		// Sub-domain iterator
+		auto dom = g_dist.getSubDomainIterator(one,one_end);
+
+		while (dom.isNext())
+		{
+			auto key = dom.get();
+			auto key_g = g_dist.getGKey(key);
+
+			g_dist.template get<0>(key) = info.LinId(key_g);
+
+			// Count the point
+			count++;
+
+			++dom;
+		}
+
+		// Get the virtual cluster machine
+		Vcluster & vcl = g_dist.getVC();
+
+		// reduce
+		vcl.sum(count);
+		vcl.execute();
+
+		// Check
+		BOOST_REQUIRE_EQUAL(count,(size_t)(k-2)*(k-2)*(k-2));
+
+		// check with a 1x1x1 square
+		{
+
+		grid_key_dx<3> one(k/2,k/2,k/2);
+		grid_key_dx<3> one_end(k/2,k/2,k/2);
+
+		count = 0;
+
+		// get the sub-domain iterator
+		auto dom = g_dist.getSubDomainIterator(one,one_end);
+
+		while (dom.isNext())
+		{
+			auto key = dom.get();
+			auto key_g = g_dist.getGKey(key);
+
+			// key_g
+			BOOST_REQUIRE_EQUAL(key_g.get(0),k/2);
+			BOOST_REQUIRE_EQUAL(key_g.get(1),k/2);
+			BOOST_REQUIRE_EQUAL(key_g.get(2),k/2);
+
+			auto key_s_it = dom.getGKey(key);
+
+			BOOST_REQUIRE_EQUAL(key_g.get(0),key_s_it.get(0));
+			BOOST_REQUIRE_EQUAL(key_g.get(1),key_s_it.get(1));
+			BOOST_REQUIRE_EQUAL(key_g.get(2),key_s_it.get(2));
+
+			// Count the point
+			count++;
+
+			++dom;
+		}
+
+		// reduce
+		vcl.sum(count);
+		vcl.execute();
+
+		BOOST_REQUIRE_EQUAL(count,1ul);
+		}
+	}
+}
+
 void Test3D(const Box<3,float> & domain, long int k)
 {
 	long int big_step = k / 30;
 	big_step = (big_step == 0)?1:big_step;
-	long int small_step = 1;
+	long int small_step = 21;
 
 	print_test( "Testing 3D grid k<=",k);
 
-	// 2D test
+	// 3D test
 	for ( ; k >= 2 ; k-= (k > 2*big_step)?big_step:small_step )
 	{
 		BOOST_TEST_CHECKPOINT( "Testing 3D grid k=" << k );
@@ -240,7 +491,7 @@ void Test3D(const Box<3,float> & domain, long int k)
 		vcl.execute();
 
 		// Check
-		BOOST_REQUIRE_EQUAL(count,k*k*k);
+		BOOST_REQUIRE_EQUAL(count,(size_t)k*k*k);
 
 		bool match = true;
 
@@ -288,13 +539,170 @@ void Test3D(const Box<3,float> & domain, long int k)
 	}
 }
 
+
+void Test3D_gg(const Box<3,float> & domain, long int k, long int gk)
+{
+	long int big_step = k / 30;
+	big_step = (big_step == 0)?1:big_step;
+
+	// this test is only performed when the number of processor is <= 32
+	if (global_v_cluster->getProcessingUnits() > 32)
+		return;
+
+	print_test( "Testing 3D grid k<=",k);
+
+	// 3D test
+	for ( ; k > 64 ; k /= 2 )
+	{
+		BOOST_TEST_CHECKPOINT( "Testing 3D grid ghost integer k=" << k );
+
+		// grid size
+		size_t sz[3];
+		sz[0] = k;
+		sz[1] = k;
+		sz[2] = k;
+
+		// Ghost
+		Ghost<3,long int> g(gk);
+
+		// Distributed grid with id decomposition
+		grid_dist_id<3, float, scalar<float>, CartDecomposition<3,float>> g_dist(sz,domain,g);
+
+		// check the consistency of the decomposition
+		bool val = g_dist.getDecomposition().check_consistency();
+		BOOST_REQUIRE_EQUAL(val,true);
+
+		auto lg = g_dist.getLocalGridsInfo();
+
+		// for each local grid check that the border is 1 point
+		// (Warning this property can only be ensured with k is a multiple of 2)
+		// in the other case it will be mostly like that but cannot be ensured
+
+		for (size_t i = 0 ; i < lg.size() ; i++)
+		{
+			for (size_t j = 0 ; j < 3 ; j++)
+			{
+				BOOST_REQUIRE(lg.get(i).Dbox.getLow(j) >= gk);
+				BOOST_REQUIRE((lg.get(i).GDbox.getHigh(j) - lg.get(i).Dbox.getHigh(j)) >= gk);
+			}
+		}
+	}
+}
+
+/*! \brief Test when the domain is not from 0.0 to 1.0
+ *
+ *
+ */
+
+void Test3D_domain(const Box<3,float> & domain, long int k)
+{
+	long int big_step = k / 30;
+	big_step = (big_step == 0)?1:big_step;
+	long int small_step = 21;
+
+	print_test( "Testing 3D grid k<=",k);
+
+	// 3D test
+	for ( ; k >= 2 ; k-= (k > 2*big_step)?big_step:small_step )
+	{
+		BOOST_TEST_CHECKPOINT( "Testing 3D grid k=" << k );
+
+		// grid size
+		size_t sz[3];
+		sz[0] = k;
+		sz[1] = k;
+		sz[2] = k;
+
+		// factor
+		float factor = pow(global_v_cluster->getProcessingUnits()/2.0f,1.0f/3.0f);
+
+		// Ghost
+		Ghost<3,float> g(0.01 / factor);
+
+		// Distributed grid with id decomposition
+		grid_dist_id<3, float, aggregate<long int,long int>, CartDecomposition<3,float>> g_dist(sz,domain,g);
+
+		// check the consistency of the decomposition
+		bool val = g_dist.getDecomposition().check_consistency();
+		BOOST_REQUIRE_EQUAL(val,true);
+
+		// Grid sm
+		grid_sm<3,void> info(sz);
+
+		// get the domain iterator
+		size_t count = 0;
+
+		auto dom = g_dist.getDomainIterator();
+
+		while (dom.isNext())
+		{
+			auto key = dom.get();
+			auto key_g = g_dist.getGKey(key);
+
+			g_dist.template get<0>(key) = count;
+			g_dist.template get<1>(key) = info.LinId(key_g);
+
+			// Count the point
+			count++;
+
+			++dom;
+		}
+
+		size_t count2 = count;
+		openfpm::vector<size_t> pnt;
+
+		// Get the total size of the local grids on each processors
+		// and the total size
+		Vcluster & v_cl = g_dist.getVC();
+		v_cl.sum(count2);
+		v_cl.allGather(count,pnt);
+		v_cl.execute();
+		size_t s_pnt = 0;
+
+		// calculate the starting point for this processor
+		for (size_t i = 0 ; i < v_cl.getProcessUnitID() ; i++)
+			s_pnt += pnt.get(i);
+
+		// Check
+		BOOST_REQUIRE_EQUAL(count2,(size_t)k*k*k);
+
+		// sync the ghost
+		g_dist.template ghost_get<0,1>();
+
+		bool match = true;
+
+		// check that the communication is correctly completed
+
+		auto domg = g_dist.getDomainGhostIterator();
+
+		// check that the grid with the ghost past store the correct information
+		while (domg.isNext())
+		{
+			auto key = domg.get();
+			auto key_g = g_dist.getGKey(key);
+
+			// In this case the boundary condition are non periodic
+			if (g_dist.isInside(key_g))
+			{
+				match &= (g_dist.template get<1>(key) == info.LinId(key_g))?true:false;
+			}
+
+			++domg;
+		}
+
+		BOOST_REQUIRE_EQUAL(match,true);
+	}
+}
+
+
+
 void Test2D_complex(const Box<2,float> & domain, long int k)
 {
 	typedef Point_test<float> p;
 
 	long int big_step = k / 30;
 	big_step = (big_step == 0)?1:big_step;
-	long int small_step = 1;
+	long int small_step = 21;
 
 	print_test( "Testing 2D complex grid k<=",k);
 
@@ -370,7 +778,7 @@ void Test2D_complex(const Box<2,float> & domain, long int k)
 		vcl.execute();
 
 		// Check
-		BOOST_REQUIRE_EQUAL(count,k*k);
+		BOOST_REQUIRE_EQUAL(count,(size_t)k*k);
 
 		auto dom2 = g_dist.getDomainIterator();
 
@@ -458,7 +866,7 @@ void Test3D_complex(const Box<3,float> & domain, long int k)
 
 	long int big_step = k / 30;
 	big_step = (big_step == 0)?1:big_step;
-	long int small_step = 1;
+	long int small_step = 21;
 
 	print_test( "Testing 3D grid complex k<=",k);
 
@@ -532,7 +940,7 @@ void Test3D_complex(const Box<3,float> & domain, long int k)
 		vcl.execute();
 
 		// Check
-		BOOST_REQUIRE_EQUAL(count,k*k*k);
+		BOOST_REQUIRE_EQUAL(count,(size_t)k*k*k);
 
 		bool match = true;
 
@@ -618,7 +1026,8 @@ void Test3D_dup(const Box<3,float> & domain, long int k)
 {
 	long int big_step = k / 30;
 	big_step = (big_step == 0)?1:big_step;
-	long int small_step = 1;
+	long int small_step = 21;
+	long int k_old = k;
 
 	Vcluster & v_cl = *global_v_cluster;
 
@@ -654,6 +1063,12 @@ void Test3D_dup(const Box<3,float> & domain, long int k)
 
 		//! [Construct two grid with the same decomposition]
 
+		bool ret = g_dist2.getDecomposition().check_consistency();
+		BOOST_REQUIRE_EQUAL(ret,true);
+		ret = g_dist2.getDecomposition().is_equal(g_dist2.getDecomposition());
+		BOOST_REQUIRE_EQUAL(ret,true);
+
+
 		auto dom_g1 = g_dist1.getDomainIterator();
 		auto dom_g2 = g_dist2.getDomainIterator();
 
@@ -673,6 +1088,8 @@ void Test3D_dup(const Box<3,float> & domain, long int k)
 		BOOST_REQUIRE_EQUAL(check,true);
 	}
 
+	k = k_old;
+
 	// 3D test
 	for ( ; k >= 2 ; k-= (k > 2*big_step)?big_step:small_step )
 	{
@@ -700,16 +1117,159 @@ void Test3D_dup(const Box<3,float> & domain, long int k)
 
 		//! [Construct two grid with the same decomposition]
 
-		BOOST_REQUIRE_EQUAL(g_dist2->getDecomposition().ref(),2);
-
 		delete g_dist1;
 
-		BOOST_REQUIRE_EQUAL(g_dist2->getDecomposition().ref(),1);
-		BOOST_REQUIRE_EQUAL(g_dist2->getDecomposition().getLocalNEGhost(0) != 0, true);
-		BOOST_REQUIRE_EQUAL(g_dist2->getDecomposition().check_consistency(),false);
+		bool ret = g_dist2->getDecomposition().check_consistency();
+		BOOST_REQUIRE_EQUAL(ret,true);
+
+		delete g_dist2;
+	}
+}
+
+// Test decomposition grid iterator
+
+void Test3D_decit(const Box<3,float> & domain, long int k)
+{
+	size_t k_bck = k;
+	{
+		typedef Point_test<float> p;
+
+		Vcluster & v_cl = *global_v_cluster;
+
+		if ( v_cl.getProcessingUnits() > 32 )
+			return;
+
+		long int big_step = k / 30;
+		big_step = (big_step == 0)?1:big_step;
+		long int small_step = 21;
+
+		print_test( "Testing grid iterator from decomposition k<=",k);
+
+		// 3D test
+		for ( ; k >= 2 ; k-= (k > 2*big_step)?big_step:small_step )
+		{
+			BOOST_TEST_CHECKPOINT( "Testing grid iterator from decomposition k<=" << k );
+
+			// grid size
+			size_t sz[3];
+			sz[0] = k;
+			sz[1] = k;
+			sz[2] = k;
+
+			// factor
+			float factor = pow(global_v_cluster->getProcessingUnits()/2.0f,1.0f/3.0f);
+
+			// Ghost
+			Ghost<3,float> g(0.01 / factor);
+
+			// Distributed grid with id decomposition
+			grid_dist_id<3, float, Point_test<float>, CartDecomposition<3,float>> g_dist(sz,domain,g);
+
+			// check the consistency of the decomposition
+			bool val = g_dist.getDecomposition().check_consistency();
+			BOOST_REQUIRE_EQUAL(val,true);
+
+			// Grid sm
+			grid_sm<3,void> info(sz);
+
+			auto dom = g_dist.getDomainIterator();
+
+			bool match = true;
+
+			// create a grid iterator from the decomposition
+
+			grid_dist_id_iterator_dec<CartDecomposition<3,float>> it_dec(g_dist.getDecomposition(),g_dist.getGridInfoVoid().getSize());
+
+			while (dom.isNext())
+			{
+				auto key = dom.get();
+				auto key_g = g_dist.getGKey(key);
+
+				auto key_dec = it_dec.get();
+
+				// Check if the two keys match
+				match &= (key_dec == key_g);
+
+				++dom;
+				++it_dec;
+			}
+
+			BOOST_REQUIRE_EQUAL(match,true);
+		}
+	}
+
+	k = k_bck;
+
+	{
+		typedef Point_test<float> p;
+
+		Vcluster & v_cl = *global_v_cluster;
+
+		if ( v_cl.getProcessingUnits() > 32 )
+			return;
+
+		long int big_step = k / 30;
+		big_step = (big_step == 0)?1:big_step;
+		long int small_step = 21;
+
+		print_test( "Testing grid iterator from decomposition subset k<=",k);
+
+		// 3D test
+		for ( ; k >= 2 ; k-= (k > 2*big_step)?big_step:small_step )
+		{
+			BOOST_TEST_CHECKPOINT( "Testing grid iterator from decomposition k<=" << k );
+
+			// grid size
+			size_t sz[3];
+			sz[0] = k;
+			sz[1] = k;
+			sz[2] = k;
+
+			// factor
+			float factor = pow(global_v_cluster->getProcessingUnits()/2.0f,1.0f/3.0f);
+
+			// Ghost
+			Ghost<3,float> g(0.01 / factor);
+
+			// Distributed grid with id decomposition
+			grid_dist_id<3, float, Point_test<float>, CartDecomposition<3,float>> g_dist(sz,domain,g);
+
+			// check the consistency of the decomposition
+			bool val = g_dist.getDecomposition().check_consistency();
+			BOOST_REQUIRE_EQUAL(val,true);
+
+			// Grid sm
+			grid_sm<3,void> info(sz);
+
+			auto dom = g_dist.getSubDomainIterator({0,0,0},{(long int)sz[0]-2,(long int)sz[1]-2,(long int)sz[2]-2});
+
+			bool match = true;
+
+			// create a grid iterator from the decomposition
+
+			grid_dist_id_iterator_dec<CartDecomposition<3,float>> it_dec(g_dist.getDecomposition(),sz,{0,0,0},{sz[0]-2,sz[1]-2,sz[2]-2});
+
+			while (dom.isNext())
+			{
+				auto key = dom.get();
+				auto key_g = g_dist.getGKey(key);
+
+				auto key_dec = it_dec.get();
+
+				// Check if the two keys match
+				match &= (key_dec == key_g);
+
+				++dom;
+				++it_dec;
+			}
+
+			BOOST_REQUIRE_EQUAL(match,true);
+		}
 	}
 }
 
+#include "grid_dist_id_unit_test_ext_dom.hpp"
+
 BOOST_AUTO_TEST_CASE( grid_dist_id_iterator_test_use)
 {
 	// Domain
@@ -730,7 +1290,106 @@ BOOST_AUTO_TEST_CASE( grid_dist_id_iterator_test_use)
 	k = std::pow(k, 1/3.);
 	Test3D(domain3,k);
 	Test3D_complex(domain3,k);
-	//Test3D_dup(domain3,k);
+}
+
+BOOST_AUTO_TEST_CASE( grid_dist_id_dup)
+{
+	// Domain
+	Box<3,float> domain3({0.0,0.0,0.0},{1.0,1.0,1.0});
+
+	long int k = 128*128*128*global_v_cluster->getProcessingUnits();
+	k = std::pow(k, 1/3.);
+	Test3D_dup(domain3,k);
+}
+
+BOOST_AUTO_TEST_CASE( grid_dist_id_sub)
+{
+	// Domain
+	Box<3,float> domain3({0.0,0.0,0.0},{1.0,1.0,1.0});
+
+	long int k = 128*128*128*global_v_cluster->getProcessingUnits();
+	k = std::pow(k, 1/3.);
+	Test3D_sub(domain3,k);
+}
+
+BOOST_AUTO_TEST_CASE( grid_dist_id_sub_iterator_test_use)
+{
+	// Domain
+	Box<2,float> domain({0.0,0.0},{1.0,1.0});
+
+	// Initialize the global VCluster
+	init_global_v_cluster(&boost::unit_test::framework::master_test_suite().argc,&boost::unit_test::framework::master_test_suite().argv);
+
+	long int k = 1024*1024*global_v_cluster->getProcessingUnits();
+	k = std::pow(k, 1/2.);
+
+	Test2D_sub(domain,k);
+}
+
+BOOST_AUTO_TEST_CASE( grid_dist_id_with_grid_unit_ghost )
+{
+	// Domain
+	Box<2,float> domain({0.0,0.0},{1.0,1.0});
+
+	// Initialize the global VCluster
+	init_global_v_cluster(&boost::unit_test::framework::master_test_suite().argc,&boost::unit_test::framework::master_test_suite().argv);
+
+	long int k = 1024*1024*global_v_cluster->getProcessingUnits();
+	k = std::pow(k, 1/2.);
+
+	// Domain
+	Box<3,float> domain3({0.0,0.0,0.0},{1.0,1.0,1.0});
+
+	k = 128*128*128*global_v_cluster->getProcessingUnits();
+	k = std::pow(k, 1/3.);
+	Test3D_gg(domain3,k,1);
+}
+
+BOOST_AUTO_TEST_CASE( grid_dist_id_decomposition_iterator )
+{
+	// Domain
+	Box<2,float> domain({0.0,0.0},{1.0,1.0});
+
+	// Initialize the global VCluster
+	init_global_v_cluster(&boost::unit_test::framework::master_test_suite().argc,&boost::unit_test::framework::master_test_suite().argv);
+
+	long int k = 1024*1024*global_v_cluster->getProcessingUnits();
+	k = std::pow(k, 1/2.);
+
+	// Domain
+	Box<3,float> domain3({0.0,0.0,0.0},{1.0,1.0,1.0});
+
+	k = 128*128*128*global_v_cluster->getProcessingUnits();
+	k = std::pow(k, 1/3.);
+	Test3D_decit(domain3,k);
+}
+
+
+BOOST_AUTO_TEST_CASE( grid_dist_id_domain_test_use)
+{
+	// Initialize the global VCluster
+	init_global_v_cluster(&boost::unit_test::framework::master_test_suite().argc,&boost::unit_test::framework::master_test_suite().argv);
+
+	// Domain
+	Box<3,float> domain3({0.1,0.1,0.1},{1.1,1.1,1.1});
+
+	long int k = 128*128*128*global_v_cluster->getProcessingUnits();
+	k = std::pow(k, 1/3.);
+	Test3D_domain(domain3,k);
+}
+
+BOOST_AUTO_TEST_CASE( grid_dist_id_extended )
+{
+	// Initialize the global VCluster
+	init_global_v_cluster(&boost::unit_test::framework::master_test_suite().argc,&boost::unit_test::framework::master_test_suite().argv);
+
+	// Domain
+	Box<3,float> domain3({0.1,0.1,0.1},{1.1,1.1,1.1});
+
+	long int k = 128*128*128*global_v_cluster->getProcessingUnits();
+	k = std::pow(k, 1/3.);
+
+	Test3D_extended_grid(domain3,k);
 }
 
 BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/Grid/grid_dist_id_unit_test_ext_dom.hpp b/src/Grid/grid_dist_id_unit_test_ext_dom.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..bbf2f9785d73479ea9ba77ec83800573bb9a753e
--- /dev/null
+++ b/src/Grid/grid_dist_id_unit_test_ext_dom.hpp
@@ -0,0 +1,100 @@
+/*
+ * grid_dist_id_unit_test_ext_dom.hpp
+ *
+ *  Created on: Feb 24, 2016
+ *      Author: i-bird
+ */
+
+#ifndef SRC_GRID_GRID_DIST_ID_UNIT_TEST_EXT_DOM_HPP_
+#define SRC_GRID_GRID_DIST_ID_UNIT_TEST_EXT_DOM_HPP_
+
+
+// Test duplicated topology
+
+void Test3D_extended_grid(const Box<3,float> & domain, long int k)
+{
+	long int big_step = k / 30;
+	big_step = (big_step == 0)?1:big_step;
+	long int small_step = 21;
+
+	Vcluster & v_cl = *global_v_cluster;
+
+	if ( v_cl.getProcessingUnits() > 32 )
+		return;
+
+	print_test( "Testing 3D extended grid k<=",k);
+
+	// 3D test
+	for ( ; k >= 2 ; k-= (k > 2*big_step)?big_step:small_step )
+	{
+		BOOST_TEST_CHECKPOINT( "Testing 3D extended grid k=" << k );
+
+		// grid size
+		size_t sz[3];
+		sz[0] = k;
+		sz[1] = k;
+		sz[2] = k;
+
+		// factor
+		float factor = pow(global_v_cluster->getProcessingUnits()/2.0f,1.0f/3.0f);
+
+		// Ghost
+		Ghost<3,float> g(0.01 / factor);
+
+		//! [Construct an extended grid]
+
+		// Distributed grid with id decomposition
+		grid_dist_id<3, float, Point_test<float>, CartDecomposition<3,float>> g_dist1(sz,domain,g);
+
+		// Extend the grid by 2 points
+		Box<3,size_t> ext({2,2,2},{2,2,2});
+
+		// another grid perfectly overlapping the previous, extended by 2 points
+		grid_dist_id<3, float, Point_test<float>, CartDecomposition<3,float>> g_dist2(g_dist1,ext);
+
+		//! [Construct an extended grid]
+
+		bool ret = g_dist2.getDecomposition().check_consistency();
+		BOOST_REQUIRE_EQUAL(ret,true);
+
+		// Given an iterator on grid 1
+		auto dom_g1 = g_dist1.getDomainIterator();
+		// And a sub-iterator on grid 2 overlapping grid 1
+		auto dom_g2 = g_dist2.getSubDomainIterator({2,2,2},{k+2-1,k+2-1,k+2-1});
+
+		grid_key_dx<3> kb({2l,2l,2l});
+
+		// the 2 iterator must match
+
+		bool check = true;
+
+		while (dom_g2.isNext())
+		{
+			auto key1 = dom_g1.get();
+			auto key2 = dom_g2.get();
+
+			grid_key_dx<3> g1_k = g_dist1.getGKey(key1);
+			grid_key_dx<3> g2_k = g_dist2.getGKey(key2);
+
+			g2_k = g2_k - kb;
+
+			check &= (g1_k == g2_k)?true:false;
+
+			std::cout << "KEY: " << g1_k.to_string() << "   " << g2_k.to_string() << "\n";
+
+			if (check == false)
+			{
+				std::cout << "ERROR: " << g1_k.to_string() << "   " << g2_k.to_string() << "\n";
+				break;
+			}
+
+			++dom_g1;
+			++dom_g2;
+		}
+
+		BOOST_REQUIRE_EQUAL(check,true);
+	}
+}
+
+
+#endif /* SRC_GRID_GRID_DIST_ID_UNIT_TEST_EXT_DOM_HPP_ */
diff --git a/src/Grid/grid_dist_key.hpp b/src/Grid/grid_dist_key.hpp
index e62d3b0f812234724e9ddcf30733d481475f0816..7d69d5b20551fcc6c8a6c309dd87cf15c0d615f1 100644
--- a/src/Grid/grid_dist_key.hpp
+++ b/src/Grid/grid_dist_key.hpp
@@ -40,6 +40,17 @@ public:
 		return key;
 	}
 
+
+	/*! \brief Get the reference key
+	 *
+	 * \return the local key
+	 *
+	 */
+	inline grid_key_dx<dim> & getKeyRef()
+	{
+		return key;
+	}
+
 	/* \brief Check if two key are the same
 	 *
 	 * \param key_t key to check
@@ -73,10 +84,39 @@ public:
 		return grid_dist_key_dx<dim>(getSub(),key);
 	}
 
-	inline grid_dist_key_dx(int g_c, grid_key_dx<dim> key)
+	/*! \brief Create a new key moving the old one
+	 *
+	 * \param c where to move for each component
+	 *
+	 * \return new key
+	 *
+	 */
+	inline grid_dist_key_dx<dim> move(const comb<dim> & c)
+	{
+		grid_key_dx<dim> key = getKey();
+		for (size_t i = 0 ; i < dim ; i++)
+			key.set_d(i,key.get(i) + c[i]);
+		return grid_dist_key_dx<dim>(getSub(),key);
+	}
+
+	inline grid_dist_key_dx(int g_c, const grid_key_dx<dim> & key)
 	:g_c(g_c),key(key)
 	{
 	}
+
+	std::string to_string()
+	{
+		std::stringstream str;
+
+		str << "sub_domain=" << g_c << " ";
+
+		for (size_t i = 0 ; i < dim ; i++)
+			str << "x[" << i << "]=" << key.get(i) << " ";
+
+		str << "\n";
+
+		return str.str();
+	}
 };
 
 #endif
diff --git a/src/Grid/grid_dist_util.hpp b/src/Grid/grid_dist_util.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..ad043f31a8d6ca644954c550b5b2c5f157fb3b31
--- /dev/null
+++ b/src/Grid/grid_dist_util.hpp
@@ -0,0 +1,86 @@
+/*
+ * grid_dist_util.hpp
+ *
+ *  Created on: Jan 28, 2016
+ *      Author: i-bird
+ */
+
+#ifndef SRC_GRID_GRID_DIST_UTIL_HPP_
+#define SRC_GRID_GRID_DIST_UTIL_HPP_
+
+#include "NN/CellList/CellDecomposer.hpp"
+
+/*! \brief Create the gdb_ext
+ *
+ * It is a fundamental function, because it create the structure that store the information of the local grids. In
+ * particular from the continuous decomposed domain it calculate the grid that each sub-domain store
+ *
+ * \param gdb_ext output Vector of Boxes that define the local grids extension
+ * \param dec Decomposition
+ * \param cd_sm CellDecomposer the size of cell is equal to the distance between grid points
+ *
+ */
+template<int dim, typename Decomposition> inline void create_gdb_ext(openfpm::vector<GBoxes<Decomposition::dims>> & gdb_ext, Decomposition & dec, CellDecomposer_sm<Decomposition::dims,typename Decomposition::stype> & cd_sm)
+{
+	Box<Decomposition::dims, typename Decomposition::stype> g_rnd_box;
+	for (size_t i = 0 ; i < Decomposition::dims ; i++)	{g_rnd_box.setHigh(i,0.5); g_rnd_box.setLow(i,-0.5);}
+
+	// Get the number of local grid needed
+	size_t n_grid = dec.getNLocalHyperCube();
+
+	// Allocate the grids
+	for (size_t i = 0 ; i < n_grid ; i++)
+	{
+		gdb_ext.add();
+
+		// Get the local hyper-cube
+		SpaceBox<Decomposition::dims, typename Decomposition::stype> sp = dec.getLocalHyperCube(i);
+		SpaceBox<Decomposition::dims, typename Decomposition::stype> sp_g = dec.getSubDomainWithGhost(i);
+
+		// Convert from SpaceBox<dim,St> to SpaceBox<dim,long int>
+		SpaceBox<Decomposition::dims,long int> sp_t = cd_sm.convertDomainSpaceIntoGridUnits(sp);
+		SpaceBox<Decomposition::dims,long int> sp_tg = cd_sm.convertDomainSpaceIntoGridUnits(sp_g);
+
+		//! Save the origin of the sub-domain of the local grid
+		gdb_ext.last().origin = sp_tg.getP1();
+
+		// save information about the local grid: domain box seen inside the domain + ghost box (see GDBoxes for a visual meaning)
+		// and where the GDBox start, or the origin of the local grid (+ghost) in global coordinate
+		gdb_ext.last().Dbox = sp_t;
+		gdb_ext.last().Dbox -= sp_tg.getP1();
+
+		gdb_ext.last().GDbox = sp_tg;
+		gdb_ext.last().GDbox -= sp_tg.getP1();
+	}
+}
+
+/*! \brief Create the gdb_ext
+ *
+ * \param gdb_ext Vector of Boxes that define the local grids extension
+ * \param dec Decomposition
+ * \param sz Global grid grid size
+ * \param domain Domain where the grid is defined
+ * \param spacing Define the spacing of the grid
+ *
+ */
+template<int dim, typename Decomposition> inline void create_gdb_ext(openfpm::vector<GBoxes<dim>> & gdb_ext, Decomposition & dec, const size_t (& sz)[dim], const Box<Decomposition::dims,typename Decomposition::stype> & domain, typename Decomposition::stype (& spacing)[dim])
+{
+	// Create the cell decomposer
+
+	CellDecomposer_sm<Decomposition::dims,typename Decomposition::stype> cd_sm;
+
+	size_t sz_cell[Decomposition::dims];
+	for (size_t i = 0 ; i < dim ; i++)
+		sz_cell[i] = sz[i] - 1;
+
+	// Careful cd_sm require the number of cell
+	cd_sm.setDimensions(domain,sz_cell,0);
+
+	create_gdb_ext<dim,Decomposition>(gdb_ext,dec,cd_sm);
+
+	// fill the spacing
+	for (size_t i = 0 ; i < dim ; i++)
+		spacing[i] = cd_sm.getCellBox().getP2()[i];
+}
+
+#endif /* SRC_GRID_GRID_DIST_UTIL_HPP_ */
diff --git a/src/Grid/staggered_dist_grid.hpp b/src/Grid/staggered_dist_grid.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..75809182460dbe54de6068b3885d680ee136f7d1
--- /dev/null
+++ b/src/Grid/staggered_dist_grid.hpp
@@ -0,0 +1,137 @@
+/*
+ * staggered_grid.hpp
+ *
+ *  Created on: Aug 19, 2015
+ *      Author: i-bird
+ */
+
+#ifndef SRC_GRID_STAGGERED_DIST_GRID_HPP_
+#define SRC_GRID_STAGGERED_DIST_GRID_HPP_
+
+#include "Grid/grid_dist_id.hpp"
+#include "staggered_dist_grid_util.hpp"
+#include "VTKWriter/VTKWriter.hpp"
+
+
+/*! \brief Implementation of the staggered grid
+ *
+ * \param dim Dimensionality of the staggered grid
+ * \param ele elements object on each dimensional objects, must be a stag_elements
+ *
+ * \verbatim
+
+		+--#--+--#--+--#--+--#--+--#--+--#--+
+		|     |     |     |     |     |     |
+		#  *  #  *  #  *  #  *  #  *  #  *  #
+		|     |     |     |     |     |     |
+		+--#--+--#--+--#--+--#--+--#--+--#--+
+		|     |     |     |     |     |     |
+		#  *  #  *  #  *  #  *  #  *  #  *  #
+		|     |     |     |     |     |     |
+		+--#--+--#--+--#--+--#--+--#--+--#--+
+		|     |     |     |     |     |     |
+		#  *  #  *  #  *  #  *  #  *  #  *  #
+		|     |     |     |     |     |     |
+		+--#--+--#--+--#--+--#--+--#--+--#--+
+		|     |     |     |     |     |     |
+		#  *  #  *  #  *  #  *  #  *  #  *  #
+		|     |     |     |     |     |     |
+		+--#--+--#--+--#--+--#--+--#--+--#--+
+		|     |     |     |     |     |     |
+		#  *  #  *  #  *  #  *  #  *  #  *  #
+		|     |     |     |     |     |     |
+		+--#--+--#--+--#--+--#--+--#--+--#--+
+
+\endverbatim
+
+		In the case of a 2D staggered grid we have 3 (in general dim+1 ) elements
+
+		+ = vertex
+		# = edge
+		* = volume
+
+        ele = stag_ele<scalar<float>,Point_test<float>,scalar<float>>
+
+        It place a scalar on (*) an object Point_test<float> on (#) and an object scalar<float> on (+)
+
+ *
+ *
+ *
+ */
+template<unsigned int dim, typename St, typename T, typename Decomposition,typename Memory=HeapMemory , typename device_grid=grid_cpu<dim,T>>
+class staggered_grid_dist : public grid_dist_id<dim,St,T,Decomposition,Memory,device_grid>
+{
+	openfpm::vector<comb<dim>> c_prp[T::max_prop];
+
+public:
+
+	typedef T value_type;
+
+	staggered_grid_dist(const size_t (& g_sz)[dim], const Box<dim,St> & domain, const Ghost<dim,St> & ghost)
+	:grid_dist_id<dim,St,T,Decomposition,Memory,device_grid>(g_sz,domain,ghost)
+	{}
+
+	/*! \brief Get the staggered positions
+	 *
+	 * \return a vector of combination
+	 *
+	 */
+	template<unsigned int p> void setStagPosition(openfpm::vector<comb<dim>> & cmb)
+	{
+#ifdef SE_CLASS1
+		if (extends< typename boost::mpl::at<typename T::type,boost::mpl::int_<p> >::type >::mul() != cmb.size())
+			std::cerr << __FILE__ << ":" << __LINE__ << " error properties has " << extends< typename boost::mpl::at<typename T::type,boost::mpl::int_<p> >::type >::mul() << " components, but " << cmb.size() << "has been defined \n";
+#endif
+		c_prp.get(p) = cmb;
+	}
+
+	/*! \brief It set all the properties defined to be staggered on the default location
+	 *
+	 */
+	void setDefaultStagPosition()
+	{
+		// for each properties
+
+		stag_set_position<dim,typename T::type> ssp(c_prp);
+
+		boost::mpl::for_each_ref< boost::mpl::range_c<int,0,T::max_prop> >(ssp);
+	}
+
+	/*! \brief Get the staggered positions
+	 *
+	 * \return The vector of the staggered positions
+	 *
+	 */
+	const openfpm::vector<comb<dim>>  (& getStagPositions()) [T::max_prop]
+	{
+		return c_prp;
+	}
+
+	/*! \brief Write a vtk file with the information of the staggered grid
+	 *
+	 * \param str vtk output file
+	 *
+	 */
+	void write(std::string str)
+	{
+		stag_create_and_add_grid<dim,staggered_grid_dist<dim,St,T,Decomposition,Memory,device_grid>,St> sgw(*this, this->getVC().getProcessUnitID());
+
+		boost::mpl::for_each_ref< boost::mpl::range_c<int,0,T::max_prop> >(sgw);
+	}
+
+	/*! \brief Return if the properties is a staggered property or not
+	 *
+	 * \param prp property to check
+	 *
+	 * \return true if the property is staggered
+	 *
+	 */
+	bool is_staggered_prop(size_t prp)
+	{
+		return c_prp[prp].size() != 0;
+	}
+
+	friend class stag_create_and_add_grid<dim,staggered_grid_dist<dim,St,T,Decomposition,Memory,device_grid>,St>;
+};
+
+#endif /* SRC_GRID_STAGGERED_DIST_GRID_HPP_ */
diff --git a/src/Grid/staggered_dist_grid_util.hpp b/src/Grid/staggered_dist_grid_util.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..5a269985abadcdf4efd169f8d55e733100ad1122
--- /dev/null
+++ b/src/Grid/staggered_dist_grid_util.hpp
@@ -0,0 +1,661 @@
+/*
+ * staggered_util.hpp
+ *
+ *  Created on: Aug 19, 2015
+ *      Author: i-bird
+ */
+
+#ifndef SRC_GRID_STAGGERED_DIST_GRID_UTIL_HPP_
+#define SRC_GRID_STAGGERED_DIST_GRID_UTIL_HPP_
+
+#include "util/common.hpp"
+#include "VTKWriter/VTKWriter.hpp"
+#include "util/convert.hpp"
+
+
+/*! \brief write a property that has attributes
+ *
+ * \tparam ele object we are writing
+ * \tparam vtk vtk writer
+ * \tparam true in case the basic object has attributes
+ *
+ */
+template<typename ele, typename vtk, bool has_attributes=has_attributes<ele>::value>
+struct vtk_write
+{
+	/*! \brief Add the grid with attributes name
+	 *
+	 * \param vtk_w VTK writer
+	 * \param output where to write
+	 * \param i property to write
+	 *
+	 */
+	vtk_write(vtk vtk_w, const std::string output, const size_t i)
+	{
+		vtk_w.write(output + "_" + ele::attributes::name[i] + ".vtk",ele::attributes::name[i]);
+	}
+};
+
+/*! \brief Add to the vtk writer the key
+ *
+ * \tparam ele object we are writing
+ * \tparam vtk vtk writer
+ * \tparam false in case the basic object has not attributes
+ *
+ */
+template<typename ele, typename vtk>
+struct vtk_write<ele,vtk,false>
+{
+	/*! \brief Add the grid with attributes name
+	 *
+	 * \param vtk_w VTK writer
+	 * \param output where to write
+	 * \param i property to write
+	 *
+	 */
+	vtk_write(vtk vtk_w, const std::string output, const size_t i)
+	{
+		vtk_w.write(output + "_" + std::to_string(i) + ".vtk","attr" + std::to_string(i));
+	}
+};
+
+
+/*! \brief Classes to get the number of components of the properties
+ *
+ */
+template<typename T>
+struct extends
+{
+	static inline size_t mul()
+	{
+		return 1;
+	}
+
+	static inline size_t dim()
+	{
+		return 0;
+	}
+};
+
+//! Partial specialization for N=1 1D-Array
+template<typename T,size_t N1>
+struct extends<T[N1]>
+{
+	static inline size_t mul()
+	{
+		return N1;
+	}
+
+	static inline size_t dim()
+	{
+		return 1;
+	}
+};
+
+//! Partial specialization for N=2 2D-Array
+template<typename T,size_t N1,size_t N2>
+struct extends<T[N1][N2]>
+{
+	static inline size_t mul()
+	{
+		return N1 * N2;
+	}
+
+	static inline size_t dim()
+	{
+		return 2;
+	}
+};
+
+//! Partial specialization for N=3
+template<typename T,size_t N1,size_t N2,size_t N3>
+struct extends<T[N1][N2][N3]>
+{
+	static inline size_t mul()
+	{
+		return N1 * N2 * N3;
+	}
+
+	static inline size_t dim()
+	{
+		return 3;
+	}
+};
+
+//! Partial specialization for N=4
+template<typename T,size_t N1,size_t N2,size_t N3,size_t N4>
+struct extends<T[N1][N2][N3][N4]>
+{
+	static inline size_t mul()
+	{
+		return N1 * N2 * N3 * N4;
+	}
+
+	static inline size_t dim()
+	{
+		return 4;
+	}
+};
+
+//! Partial specialization for N=5
+template<typename T,size_t N1,size_t N2,size_t N3,size_t N4,size_t N5>
+struct extends<T[N1][N2][N3][N4][N5]>
+{
+	static inline size_t mul()
+	{
+		return N1 * N2 * N3 * N4 * N5;
+	}
+
+	static inline size_t dim()
+	{
+		return 5;
+	}
+};
+
+//! Partial specialization for N=6
+template<typename T,size_t N1,size_t N2,size_t N3,size_t N4,size_t N5, size_t N6>
+struct extends<T[N1][N2][N3][N4][N5][N6]>
+{
+	static inline size_t mul()
+	{
+		return N1 * N2 * N3 * N4 * N5 * N6;
+	}
+
+	static inline size_t dim()
+	{
+		return 6;
+	}
+};
+
+//! Partial specialization for N=7
+template<typename T,size_t N1,size_t N2,size_t N3,size_t N4,size_t N5, size_t N6, size_t N7>
+struct extends<T[N1][N2][N3][N4][N5][N6][N7]>
+{
+	static inline size_t mul()
+	{
+		return N1 * N2 * N3 * N4 * N5 * N6 * N7;
+	}
+
+	static inline size_t dim()
+	{
+		return 7;
+	}
+};
+
+//! Partial specialization for N=8
+template<typename T,size_t N1,size_t N2,size_t N3,size_t N4,size_t N5, size_t N6, size_t N7, size_t N8>
+struct extends<T[N1][N2][N3][N4][N5][N6][N7][N8]>
+{
+	static inline size_t mul()
+	{
+		return N1 * N2 * N3 * N4 * N5 * N6 * N7 * N8;
+	}
+
+	static inline size_t dim()
+	{
+		return 8;
+	}
+};
+
+//! Partial specialization for N=9
+template<typename T,size_t N1,size_t N2,size_t N3,size_t N4,size_t N5, size_t N6, size_t N7, size_t N8, size_t N9>
+struct extends<T[N1][N2][N3][N4][N5][N6][N7][N8][N9]>
+{
+	static inline size_t mul()
+	{
+		return N1 * N2 * N3 * N4 * N5 * N6 * N7 * N8 * N9;
+	}
+
+	static inline size_t dim()
+	{
+		return 9;
+	}
+};
+
+//! Partial specialization for N=10
+template<typename T,size_t N1,size_t N2,size_t N3,size_t N4,size_t N5, size_t N6, size_t N7, size_t N8, size_t N9, size_t N10>
+struct extends<T[N1][N2][N3][N4][N5][N6][N7][N8][N9][N10]>
+{
+	static inline size_t mul()
+	{
+		return N1 * N2 * N3 * N4 * N5 * N6 * N7 * N8 * N9 * N10;
+	}
+
+	static inline size_t dim()
+	{
+		return 10;
+	}
+};
+
+///////////////////// Copy grid extends
+
+/*! \brief Classes to copy each component into a grid and add to the VTKWriter the grid
+ *
+ * \param T property to write
+ * \param dim dimansionality
+ * \param St type of space
+ * \param VTKW VTK writer
+ * \param
+ *
+ */
+template<typename T>
+struct write_stag
+{
+	/*! \brieg write the staggered grid
+	 *
+	 * \tparam p_val property we are going to write
+	 * \tparam sg staggered grid type
+	 * \tparam v_g vector of grids
+	 *
+	 * \param st_g staggered grid
+	 * \param v_g vector of grids
+	 * \param i local grid of the staggered grid we are writing
+	 *
+	 */
+	template<unsigned int p_val, typename sg, typename v_g> static inline void write(sg & st_g, v_g & vg,size_t lg)
+	{
+		// Add a grid;
+		vg.add();
+		size_t k = vg.size() - 1;
+
+		// Get the source and destination grid
+		auto & g_src = st_g.get_loc_grid(lg);
+		auto & g_dst = vg.get(k);
+
+		// Set dimensions and memory
+		g_dst.resize(g_src.getGrid().getSize());
+
+		// copy
+
+		auto it = vg.get(k).getIterator();
+
+		while(it.isNext())
+		{
+			g_dst.template get<0>(it.get()) = g_src.template get<p_val>(it.get());
+
+			++it;
+		}
+	}
+};
+
+/*! \brief for each component add a grid fill it, and add to the VTK writer
+ *
+ * \param T Property to copy
+ * \param N1 number of components
+ *
+ */
+template<typename T,size_t N1>
+struct write_stag<T[N1]>
+{
+	/*! \brieg write the staggered grid
+	 *
+	 * \tparam p_val property we are going to write
+	 * \tparam sg staggered grid type
+	 * \tparam v_g vector of grids
+	 *
+	 * \param st_g staggered grid
+	 * \param v_g vector of grids
+	 * \param i local grid of the staggered grid we are writing
+	 *
+	 */
+	template<unsigned int p_val, typename sg, typename v_g> static inline void write(sg & st_g, v_g & vg,size_t lg)
+	{
+		for (size_t i = 0 ; i < N1 ; i++)
+		{
+			// Add a grid;
+			vg.add();
+			size_t k = vg.size() - 1;
+
+			// Get the source and destination grid
+			auto & g_src = st_g.get_loc_grid(lg);
+			auto & g_dst = vg.get(k);
+
+			// Set dimensions and memory
+			g_dst.resize(g_src.getGrid().getSize());
+
+			auto it = vg.get(k).getIterator();
+
+			while(it.isNext())
+			{
+				g_dst.template get<0>(it.get()) = g_src.template get<p_val>(it.get())[i];
+
+				++it;
+			}
+		}
+	}
+};
+
+//! Partial specialization for N=2 2D-Array
+template<typename T,size_t N1,size_t N2>
+struct write_stag<T[N1][N2]>
+{
+	/*! \brieg write the staggered grid
+	 *
+	 * \tparam p_val property we are going to write
+	 * \tparam sg staggered grid type
+	 * \tparam v_g vector of grids
+	 *
+	 * \param st_g staggered grid
+	 * \param v_g vector of grids
+	 * \param i local grid of the staggered grid we are writing
+	 *
+	 */
+	template<unsigned int p_val, typename sg, typename v_g> static inline void write(sg & st_g, v_g & vg,size_t lg)
+	{
+		for (size_t i = 0 ; i < N1 ; i++)
+		{
+			for (size_t j = 0 ; j < N2 ; j++)
+			{
+				// Add a grid;
+				vg.add();
+				size_t k = vg.size() - 1;
+
+				// Set dimensions and memory
+				vg.get(k).resize(st_g.get_loc_grid(lg).getGrid().getSize());
+
+				// copy
+				auto & g_src = st_g.get_loc_grid(lg);
+				auto & g_dst = vg.get(k);
+				auto it = vg.get(k).getIterator();
+
+				while(it.isNext())
+				{
+					g_dst.template get<0>(it.get()) = g_src.template get<p_val>(it.get())[i][j];
+
+					++it;
+				}
+			}
+		}
+	}
+};
+
+///////////////////// Staggered default positioning ////////////////////////
+
+/*! \brief this class is a functor for "for_each" algorithm
+ *
+ * For each element of the boost::vector the operator() is called.
+ * Is mainly used to produce a default position vector for each
+ * property
+ *
+ * \tparam dim dimensionality
+ * \tparam v boost::fusion::vector of properties
+ * \tparam has_posMask case when v has a position mask
+ *
+ */
+
+template<unsigned int dim, typename v, bool has_pM = has_posMask<v>::value>
+class stag_set_position
+{
+	openfpm::vector<comb<dim>> (& pos_prp)[boost::fusion::result_of::size<v>::type::value];
+
+public:
+
+	stag_set_position( openfpm::vector<comb<dim>> (& pos_prp)[boost::fusion::result_of::size<v>::type::value])
+	:pos_prp(pos_prp)
+	{}
+
+	//! It call the copy function for each property
+	template<typename T>
+	void operator()(T& t) const
+	{
+		// This is the type of the object we have to copy
+		typedef typename boost::mpl::at<v,typename boost::mpl::int_<T::value>>::type prop;
+
+		bool val = prop::stag_pos_mask[T::value];
+
+		if (val == false)
+			return;
+
+		// Dimension of the object
+		size_t dim_prp = extends<prop>::dim();
+
+		// It is a scalar
+		if (dim_prp == 0)
+		{
+			comb<dim> c;
+			c.zero();
+
+			// It stay in the center
+			pos_prp[T::value].add(c);
+		}
+		else if (dim_prp == 1)
+		{
+			// It stay on the object of dimension dim-1 (Negative part)
+			for (size_t i = 0 ; i < dim ; i++)
+			{
+				comb<dim> c;
+				c.zero();
+				c.value(i) = -1;
+
+				pos_prp[T::value].add(c);
+			}
+		}
+		else if (dim_prp == 2)
+		{
+			// Create an hypercube object
+			HyperCube<dim> hyp;
+
+			// Diagonal part live in
+			for (size_t i = 0 ; i < dim ; i++)
+			{
+				comb<dim> c1 = pos_prp[T::value-1].get(i);
+				for (size_t j = 0 ; j < dim ; j++)
+				{
+					comb<dim> c2;
+					c2.zero();
+					c2.value(i) = 1;
+
+					comb<dim> c_res = (c1 + c2) & 0x1;
+
+					pos_prp[T::value].add(c_res);
+				}
+			}
+		}
+		else if (dim_prp > 2)
+		{
+			std::cerr << __FILE__ << ":" << __LINE__ << " Tensor of rank bigger than 2 are not supported";
+		}
+	}
+};
+
+///////////////////// Staggered default positioning ////////////////////////
+
+/*! \brief this class is a functor for "for_each" algorithm
+ *
+ * For each element of the boost::vector the operator() is called.
+ * Is mainly used to produce a default position vector for each
+ * property
+ *
+ * \tparam vector of properties
+ *
+ */
+
+template<unsigned int dim, typename v>
+class stag_set_position<dim,v,false>
+{
+private:
+	openfpm::vector<comb<dim>> (& pos_prp)[boost::fusion::result_of::size<v>::type::value];
+
+
+public:
+	stag_set_position( openfpm::vector<comb<dim>> (& pos_prp)[boost::fusion::result_of::size<v>::type::value])
+	:pos_prp(pos_prp)
+	{}
+
+	//! It call the copy function for each property
+	template<typename T>
+	void operator()(T& t) const
+	{
+		// This is the type of the object we have to copy
+		typedef typename boost::mpl::at<v,typename boost::mpl::int_<T::value>>::type prop;
+
+		// Dimension of the object
+		size_t dim_prp = extends<prop>::dim();
+
+		// It is a scalar
+		if (dim_prp == 0)
+		{
+			comb<dim> c;
+			c.zero();
+
+			// It stay in the center
+			pos_prp[T::value].add(c);
+		}
+		else if (dim_prp == 1)
+		{
+			// It stay on the object of dimension dim-1 (Negative part)
+			for (size_t i = 0 ; i < dim ; i++)
+			{
+				comb<dim> c;
+				c.zero();
+				c.getComb()[i] = -1;
+
+				pos_prp[T::value].add(c);
+			}
+		}
+		else if (dim_prp == 2)
+		{
+			// Diagonal part live in
+			for (size_t i = 0 ; i < dim ; i++)
+			{
+				comb<dim> c1 = pos_prp[T::value-1].get(i);
+				for (size_t j = 0 ; j < dim ; j++)
+				{
+					comb<dim> c2;
+					c2.zero();
+					c2.getComb()[j] = 1;
+
+					comb<dim> c_res = (c2 + c1).flip();
+
+					pos_prp[T::value].add(c_res);
+				}
+			}
+		}
+		else if (dim_prp > 2)
+		{
+			std::cerr << __FILE__ << ":" << __LINE__ << " Tensor of rank bigger than 2 are not supported";
+		}
+	}
+};
+
+/*! \brief It create separated grid for each properties to write them into a file
+ *
+ * \tparam dim dimensionality of the grids
+ * \tparam obj type object to print, must be in OpenFPM format
+ *
+ */
+template<unsigned int dim, typename st_grid, typename St>
+class stag_create_and_add_grid
+{
+
+	size_t p_id;
+
+	// staggered grid to write
+	st_grid & st_g;
+
+public:
+
+	/*! \brief Constructor
+	 *
+	 * \param st_g staggered grid
+	 * \param p_id process id
+	 *
+	 */
+	stag_create_and_add_grid(st_grid & st_g, size_t p_id)
+	:p_id(p_id),st_g(st_g)
+	{}
+
+	template<unsigned int p_val> void out_normal()
+	{
+		// property type
+		typedef typename boost::mpl::at< typename st_grid::value_type::type , typename boost::mpl::int_<p_val> >::type ele;
+
+		// create an openfpm format object from the property type
+		typedef object<typename boost::fusion::vector<ele>> d_object;
+
+		VTKWriter<boost::mpl::pair<grid_cpu<dim, d_object >,St>,VECTOR_GRIDS> vtk_w;
+
+		// Create a vector of grids
+
+		openfpm::vector< grid_cpu<dim, d_object > > vg(st_g.getN_loc_grid());
+
+		// for each domain grid
+		for (size_t i = 0 ; i < vg.size() ; i++)
+		{
+			// Set dimansions and memory
+			vg.get(i).resize(st_g.get_loc_grid(i).getGrid().getSize());
+
+			auto & g_src = st_g.get_loc_grid(i);
+			auto & g_dst = vg.get(i);
+
+			auto it = vg.get(i).getIterator();
+
+			while(it.isNext())
+			{
+				object_si_d< decltype(g_src.get_o(it.get())),decltype(g_dst.get_o(it.get())) ,OBJ_ENCAP,p_val>(g_src.get_o(it.get()),g_dst.get_o(it.get()));
+
+				++it;
+			}
+
+			Point<dim,St> offset = st_g.getOffset(i);
+			Point<dim,St> spacing = st_g.getSpacing();
+			Box<dim,size_t> dom = st_g.getDomain(i);
+
+			vtk_w.add(g_dst,offset,spacing,dom);
+		}
+
+		vtk_w.write("vtk_grids_st_" + std::to_string(p_id) + "_" + std::to_string(p_val) + ".vtk");
+	}
+
+	template<unsigned int p_val> void out_staggered()
+	{
+		// property type
+		typedef typename boost::mpl::at< typename st_grid::value_type::type , typename boost::mpl::int_<p_val> >::type ele;
+
+		// Eliminate the extends
+		typedef typename std::remove_all_extents<ele>::type r_ele;
+
+		// create an openfpm format object from the property type
+		typedef object<typename boost::fusion::vector<r_ele>> d_object;
+
+		VTKWriter<boost::mpl::pair<grid_cpu<dim, d_object >,St>,VECTOR_ST_GRIDS> vtk_w;
+
+		// Create a vector of grids
+		openfpm::vector< grid_cpu<dim, d_object > > vg;
+		vg.reserve(st_g.getN_loc_grid() * extends<ele>::mul());
+
+		size_t k = 0;
+
+		// for each domain grid
+		for (size_t i = 0 ; i < st_g.getN_loc_grid() ; i++)
+		{
+			write_stag<ele>::template write<p_val, st_grid,openfpm::vector< grid_cpu<dim, d_object > > >(st_g,vg,i);
+
+			// for each component
+			for ( ; k < vg.size() ; k++)
+			{
+				Point<dim,St> offset = st_g.getOffset(i);
+				Point<dim,St> spacing = st_g.getSpacing();
+				Box<dim,size_t> dom = st_g.getDomain(i);
+
+				vtk_w.add(i,vg.get(k),offset,spacing,dom,st_g.c_prp[p_val].get(k));
+			}
+
+			k = vg.size();
+		}
+
+		vtk_write<typename st_grid::value_type,VTKWriter<boost::mpl::pair<grid_cpu<dim, d_object >,St>,VECTOR_ST_GRIDS>> v(vtk_w,"vtk_grids_st_" + std::to_string(p_id),p_val);
+	}
+
+	//! It call the copy function for each property
+	template<typename T>
+	void operator()(T& t)
+	{
+		if (st_g.is_staggered_prop(T::value) == false)
+			out_normal<T::value>();
+		else
+			out_staggered<T::value>();
+	}
+};
+
+#endif /* SRC_GRID_STAGGERED_DIST_GRID_UTIL_HPP_ */
diff --git a/src/Grid/staggered_grid_dist_unit_test.hpp b/src/Grid/staggered_grid_dist_unit_test.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..bbda59d822f692b7919b76c50409b1c6e79b41dc
--- /dev/null
+++ b/src/Grid/staggered_grid_dist_unit_test.hpp
@@ -0,0 +1,60 @@
+/*
+ * staggered_grid_unit_test.hpp
+ *
+ *  Created on: Aug 20, 2015
+ *      Author: i-bird
+ */
+
+#ifndef SRC_GRID_STAGGERED_GRID_DIST_UNIT_TEST_HPP_
+#define SRC_GRID_STAGGERED_GRID_DIST_UNIT_TEST_HPP_
+
+#include "staggered_dist_grid.hpp"
+#include "Point_test.hpp"
+
+BOOST_AUTO_TEST_SUITE( staggered_grid_dist_id_test )
+
+
+BOOST_AUTO_TEST_CASE( staggered_grid_dist_unit_test)
+{
+	typedef Point2D_test<float> p;
+
+	// Domain
+	Box<2,float> domain({0.0,0.0},{1.0,1.0});
+
+	size_t k = 1024;
+
+	// grid size
+	size_t sz[2] = {k,k};
+
+	// Ghost
+	Ghost<2,float> g(0.0);
+
+	staggered_grid_dist<2,float,Point2D_test<float>,CartDecomposition<2,float>> sg(sz,domain,g);
+	sg.setDefaultStagPosition();
+
+	// We check that the staggered position is correct
+	const openfpm::vector<comb<2>> (& cmbs)[6] = sg.getStagPositions();
+
+
+	BOOST_REQUIRE_EQUAL(cmbs[0].size(),1ul);
+	BOOST_REQUIRE_EQUAL(cmbs[1].size(),1ul);
+	BOOST_REQUIRE_EQUAL(cmbs[2].size(),1ul);
+	BOOST_REQUIRE_EQUAL(cmbs[3].size(),1ul);
+	BOOST_REQUIRE_EQUAL(cmbs[4].size(),2ul);
+	BOOST_REQUIRE_EQUAL(cmbs[5].size(),4ul);
+
+	BOOST_REQUIRE(cmbs[0].get(0) == comb<2>({0,0}));
+	BOOST_REQUIRE(cmbs[1].get(0) == comb<2>({0,0}));
+	BOOST_REQUIRE(cmbs[2].get(0) == comb<2>({0,0}));
+	BOOST_REQUIRE(cmbs[3].get(0) == comb<2>({0,0}));
+	BOOST_REQUIRE(cmbs[4].get(0) == comb<2>({0,-1}));
+	BOOST_REQUIRE(cmbs[4].get(1) == comb<2>({-1,0}));
+	BOOST_REQUIRE(cmbs[5].get(0) == comb<2>({0,0}));
+	BOOST_REQUIRE(cmbs[5].get(1) == comb<2>({-1,-1}));
+	BOOST_REQUIRE(cmbs[5].get(2) == comb<2>({-1,-1}));
+	BOOST_REQUIRE(cmbs[5].get(3) == comb<2>({0,0}));
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+
+#endif /* SRC_GRID_STAGGERED_GRID_DIST_UNIT_TEST_HPP_ */
diff --git a/src/Makefile.am b/src/Makefile.am
index f28a9e454f8235f976ef3e8678b48a914d5fdddd..d037d07467cf91958cbf0687e3a4b67a62ec7e54 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -1,18 +1,23 @@
 LINKLIBS = $(METIS_LIB) $(PTHREAD_LIBS) $(OPT_LIBS) $(BOOST_LDFLAGS) $(BOOST_IOSTREAMS_LIB) $(CUDA_LIBS)
 
 noinst_PROGRAMS = pdata
-pdata_SOURCES = main.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster.cpp ../openfpm_devices/src/Memleak_check.cpp
-pdata_CXXFLAGS = $(CUDA_CFLAGS) $(INCLUDES_PATH) $(METIS_INCLUDE) $(BOOST_CPPFLAGS) -Wno-unused-function
+pdata_SOURCES = main.cpp lib/pdata.cpp test_multiple_o.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster.cpp ../openfpm_devices/src/Memleak_check.cpp
+pdata_CXXFLAGS = $(CUDA_CFLAGS) $(INCLUDES_PATH) $(METIS_INCLUDE) $(BOOST_CPPFLAGS) -Wno-unused-local-typedefs
 pdata_CFLAGS = $(CUDA_CFLAGS)
 pdata_LDADD = $(LINKLIBS) -lmetis -lparmetis
 nobase_include_HEADERS = Decomposition/CartDecomposition.hpp Decomposition/common.hpp Decomposition/Decomposition.hpp  Decomposition/ie_ghost.hpp \
          Decomposition/nn_processor.hpp Decomposition/ie_loc_ghost.hpp Decomposition/ORB.hpp \
          Graph/CartesianGraphFactory.hpp \
-         Grid/grid_dist_id.hpp Grid/grid_dist_id_iterator.hpp Grid/grid_dist_key.hpp \
-         Vector/vector_dist.hpp Vector/vector_dist_iterator.hpp Vector/vector_dist_key.hpp \
+         Grid/grid_dist_id.hpp Grid/grid_dist_id_iterator_dec.hpp Grid/grid_dist_util.hpp  Grid/grid_dist_id_iterator_sub.hpp Grid/grid_dist_id_iterator.hpp Grid/grid_dist_key.hpp \
+         Vector/vector_dist.hpp Vector/vector_dist_ofb.hpp Vector/vector_dist_iterator.hpp Vector/vector_dist_key.hpp \
          config/config.h \
          example.mk \
-         metis_util.hpp dec_optimizer.hpp SubdomainGraphNodes.hpp
+         Decomposition/Distribution/metis_util.hpp Decomposition/Distribution/parmetis_util.hpp  dec_optimizer.hpp SubdomainGraphNodes.hpp
+
+lib_LIBRARIES = libofpm_pdata.a
+libofpm_pdata_a_SOURCES = lib/pdata.cpp
+libofpm_pdata_a_CXXFLAGS = $(INCLUDES_PATH) $(BOOST_CPPFLAGS) -I/usr/local/include
+libofpm_pdata_a_CFLAGS =
 
 .cu.o :
 	$(NVCC) $(NVCCFLAGS) -o $@ -c $<
diff --git a/src/SubdomainGraphNodes.hpp b/src/SubdomainGraphNodes.hpp
index 06cf7a8324973a6adb9df2ec6d681a92f8c95fc2..aaf936e1d5f70a17d7130a836cdb1a272b524cd0 100755
--- a/src/SubdomainGraphNodes.hpp
+++ b/src/SubdomainGraphNodes.hpp
@@ -1,6 +1,10 @@
 #ifndef SUBDOMAIN_NODES_HPP
 #define SUBDOMAIN_NODES_HPP
 
+#include <boost/fusion/container/vector.hpp>
+#include <boost/fusion/include/at_c.hpp>
+#include "Grid/Encap.hpp"
+
 /* In a decomposition graph each node represent a sub-domain while an edge represent
  * an interaction between sub-domain (it mean that they have to communicate).
  *
@@ -30,10 +34,7 @@
 struct nm_v
 {
 	//! The node contain 3 unsigned long integer for communication computation memory and id
-	typedef boost::fusion::vector<float[3], size_t, size_t, size_t, size_t, size_t, size_t, size_t> type;
-
-	typedef typename memory_traits_inte<type>::type memory_int;
-	typedef typename memory_traits_lin<type>::type memory_lin;
+	typedef boost::fusion::vector<float[3], size_t, size_t, size_t, size_t, size_t, size_t> type;
 
 	//! type of the positional field
 	typedef float s_type;
@@ -61,11 +62,9 @@ struct nm_v
 	static const unsigned int sub_id = 5;
 	//! proc_id property id in boost::fusion::vector
 	static const unsigned int proc_id = 6;
-	//! fake_v property id in boost::fusion::vector
-	static const unsigned int fake_v = 7;
 
 	//! total number of properties boost::fusion::vector
-	static const unsigned int max_prop = 8;
+	static const unsigned int max_prop = 7;
 
 	//! default constructor
 	nm_v()
@@ -84,7 +83,6 @@ struct nm_v
 		boost::fusion::at_c<4>(data) = boost::fusion::at_c<4>(p.data);
 		boost::fusion::at_c<5>(data) = boost::fusion::at_c<5>(p.data);
 		boost::fusion::at_c<6>(data) = boost::fusion::at_c<6>(p.data);
-		boost::fusion::at_c<7>(data) = boost::fusion::at_c<7>(p.data);
 	}
 
 	template<unsigned int dim, typename Mem> inline nm_v(const encapc<dim, nm_v, Mem> & p)
@@ -103,7 +101,6 @@ struct nm_v
 		boost::fusion::at_c<4>(data) = p.template get<4>();
 		boost::fusion::at_c<5>(data) = p.template get<5>();
 		boost::fusion::at_c<6>(data) = p.template get<6>();
-		boost::fusion::at_c<7>(data) = p.template get<7>();
 
 		return *this;
 	}
@@ -125,8 +122,6 @@ struct nm_v
 
 };
 
-const std::string nm_v::attributes::name[] = { "x", "migration", "computation", "global_id", "id", "sub_id", "proc_id", "fake_v" };
-
 /*! \brief sub-domain edge graph node
  *
  */
@@ -136,9 +131,6 @@ struct nm_e
 	//! The node contain 3 unsigned long integer for comunication computation and memory
 	typedef boost::fusion::vector<size_t, size_t, size_t> type;
 
-	typedef typename memory_traits_inte<type>::type memory_int;
-	typedef typename memory_traits_lin<type>::type memory_lin;
-
 	//! Attributes name
 	struct attributes
 	{
@@ -179,8 +171,6 @@ struct nm_e
 	}
 };
 
-const std::string nm_e::attributes::name[] = { "communication", "srcgid", "dstgid" };
-
 /*! \brief Reduced sub-domain vertex graph node
  *
  * It contain only the processor id for each node
@@ -192,9 +182,6 @@ struct nm_part_v
 	//! The node contain 3 unsigned long integer for comunication computation and memory
 	typedef boost::fusion::vector<size_t, size_t> type;
 
-	typedef typename memory_traits_inte<type>::type memory_int;
-	typedef typename memory_traits_lin<type>::type memory_lin;
-
 	typedef float s_type;
 
 	//! Attributes name
@@ -229,8 +216,6 @@ struct nm_part_v
 
 };
 
-const std::string nm_part_v::attributes::name[] = { "id", "sub_id" };
-
 /*! \brief Reduced edge graph node
  *
  * It contain only the communication between nodes
@@ -242,9 +227,6 @@ struct nm_part_e
 	//! The node contain 3 unsigned long integer for comunication computation and memory
 	typedef boost::fusion::vector<> type;
 
-	typedef typename memory_traits_inte<type>::type memory_int;
-	typedef typename memory_traits_lin<type>::type memory_lin;
-
 	//! The data
 
 	type data;
@@ -259,6 +241,6 @@ struct nm_part_e
 	};
 };
 
-const std::string nm_part_e::attributes::name[] = { "id" };
+
 
 #endif
diff --git a/src/Vector/vector_dist.hpp b/src/Vector/vector_dist.hpp
index c17f40cd1105f60527fce38e68a583911956843c..85a333f4acc28136857c3fe650dc302f9495863f 100644
--- a/src/Vector/vector_dist.hpp
+++ b/src/Vector/vector_dist.hpp
@@ -19,8 +19,10 @@
 #include "util/common.hpp"
 #include "util/object_util.hpp"
 #include "memory/ExtPreAlloc.hpp"
-#include "CSVWriter.hpp"
+#include "CSVWriter/CSVWriter.hpp"
 #include "Decomposition/common.hpp"
+#include "Grid/grid_dist_id_iterator_dec.hpp"
+#include "Vector/vector_dist_ofb.hpp"
 
 #define V_SUB_UNIT_FACTOR 64
 
@@ -31,8 +33,6 @@
 #define GET	1
 #define PUT 2
 
-#define INTERNAL 0
-
 #define NO_POSITION 1
 #define WITH_POSITION 2
 
@@ -78,204 +78,387 @@ private:
 
 	//! Particle position vector, (It has 2 elements) the first has real particles assigned to a processor
 	//! the second element contain unassigned particles
-	Vcluster_object_array<openfpm::vector<Point<dim,St>>> v_pos;
+	openfpm::vector<Point<dim,St>> v_pos;
 
 	//! Particle properties vector, (It has 2 elements) the first has real particles assigned to a processor
 	//! the second element contain unassigned particles
-	Vcluster_object_array<openfpm::vector<prop>> v_prp;
+	openfpm::vector<prop> v_prp;
 
 	//! Virtual cluster
 	Vcluster & v_cl;
 
-public:
+	// definition of the send vector for position
+	typedef  openfpm::vector<Point<dim,St>,ExtPreAlloc<Memory>,openfpm::grow_policy_identity> send_pos_vector;
 
-	/*! \brief Constructor
-	 *
-	 * \param np number of elements
-	 * \param box domain where the vector of elements live
-	 * \param g Ghost margins
-	 *
-	 */
-	vector_dist(size_t np, Box<dim,St> box, Ghost<dim,St> g = Ghost<dim,St>())
-	:dec(*global_v_cluster),v_cl(*global_v_cluster)
-	{
-		// Allocate unassigned particles vectors
-		v_pos = v_cl.template allocate<openfpm::vector<Point<dim,St>>>(1);
-		v_prp = v_cl.template allocate<openfpm::vector<prop>>(1);
+	//////////////////////////////
+	// COMMUNICATION variables
+	//////////////////////////////
 
-		// convert to a local number of elements
-		size_t p_np = np / v_cl.getProcessingUnits();
+	//! It map the processor id with the communication request into map procedure
+	openfpm::vector<size_t> p_map_req;
 
-		// Get non divisible part
-		size_t r = np % v_cl.getProcessingUnits();
+	//! For each near processor, outgoing particle id and shift vector
+	openfpm::vector<openfpm::vector<size_t>> opart;
 
-		// Distribute the remain particles
-		if (v_cl.getProcessUnitID() < r)
-			p_np++;
+	//! For each near processor, particle shift vector
+	openfpm::vector<openfpm::vector<size_t>> oshift;
 
-		// resize the position vector
-		v_pos.get(0).resize(p_np);
+	//! For each adjacent processor the size of the ghost sending buffer
+	openfpm::vector<size_t> ghost_prc_sz;
 
-		// resize the properties vector
-		v_prp.get(0).resize(p_np);
+	//! Sending buffer for the ghost particles properties
+	Memory g_prp_mem;
 
-		// Create a valid decomposition of the space
-		// Get the number of processor and calculate the number of sub-domain
-		// for decomposition
-		size_t n_proc = v_cl.getProcessingUnits();
-		size_t n_sub = n_proc * getDefaultNsubsub();
+	//! Sending buffer for the ghost particles position
+	Memory g_pos_mem;
 
-		// Calculate the maximum number (before merging) of sub-domain on
-		// each dimension
-		size_t div[dim];
-		for (size_t i = 0 ; i < dim ; i++)
-		{div[i] = openfpm::math::round_big_2(pow(n_sub,1.0/dim));}
+	//! For each adjacent processor it store the size of the receiving message in byte
+	openfpm::vector<size_t> recv_sz;
 
-		// Create the sub-domains
-		dec.setParameters(div,box,g);
-		dec.decompose();
+	//! For each adjacent processor it store the received message for ghost get
+	openfpm::vector<HeapMemory> recv_mem_gg;
 
-		Point<dim,St> p;
-		p.zero();
-	}
+	//! For each processor it store the received message for global map
+	openfpm::vector<HeapMemory> recv_mem_gm;
 
-	/*! \brief Get the number of minimum sub-domain
+	/*! \brief It store for each processor the position and properties vector of the particles
 	 *
-	 * \return minimum number
 	 *
 	 */
-	static size_t getDefaultNsubsub()
+	struct pos_prop
 	{
-		return  V_SUB_UNIT_FACTOR;
-	}
+		//! position vector
+		openfpm::vector<Point<dim,St>,PreAllocHeapMemory<2>,openfpm::grow_policy_identity> pos;
+		//! properties vector
+		openfpm::vector<prop,PreAllocHeapMemory<2>,openfpm::grow_policy_identity> prp;
+	};
 
-	/*! \brief return the local size of the vector
+	/*! \brief Label particles for mappings
 	 *
-	 * \return local size
+	 * \param lbl_p Particle labeled
+	 * \param prc_sz For each processor the number of particles to send
+	 * \param opart id of the particles to send
 	 *
 	 */
-	size_t size_local()
+	template<typename obp> void labelParticleProcessor(openfpm::vector<openfpm::vector<size_t>> & lbl_p, openfpm::vector<size_t> & prc_sz, openfpm::vector<size_t> & opart)
 	{
-		return v_pos.get(0).size();
-	}
+		// reset lbl_p
+		lbl_p.resize(v_cl.getProcessingUnits());
+		for (size_t i = 0 ; i < lbl_p.size() ; i++)
+			lbl_p.get(i).clear();
 
-	/*! \brief Get the position of an element
-	 *
-	 * see the vector_dist iterator usage to get an element key
-	 *
-	 * \param vec_key element
-	 *
-	 * \return the position of the element in space
-	 *
-	 */
-	template<unsigned int id> inline auto getPos(vect_dist_key_dx vec_key) -> decltype(v_pos.get(vec_key.getSub()).template get<id>(vec_key.getKey()))
-	{
-		return v_pos.get(vec_key.getSub()).template get<id>(vec_key.getKey());
+		// resize the label buffer
+		prc_sz.resize(v_cl.getProcessingUnits());
+
+		auto it = v_pos.getIterator();
+
+		// Label all the particles with the processor id where they should go
+		while (it.isNext())
+		{
+			auto key = it.get();
+
+			// Apply the boundary conditions
+			dec.applyPointBC(v_pos.get(key));
+
+			size_t p_id = 0;
+
+			// Check if the particle is inside the domain
+			if (dec.getDomain().isInside(v_pos.get(key)) == true)
+				p_id = dec.processorIDBC(v_pos.get(key));
+			else
+				p_id = obp::out(key,v_cl.getProcessUnitID());
+
+
+			// Particle to move
+			if (p_id != v_cl.getProcessUnitID())
+			{
+				if ((long int)p_id != -1)
+				{
+					prc_sz.get(p_id)++;
+					lbl_p.get(p_id).add(key);
+					opart.add(key);
+				}
+				else
+				{
+					opart.add(key);
+				}
+			}
+
+			// Add processors and add size
+
+			++it;
+		}
 	}
 
-	/*! \brief Get the property of an element
+	/*! \brief Label the particles
 	 *
-	 * see the vector_dist iterator usage to get an element key
+	 * It count the number of particle to send to each processors and save its ids
 	 *
-	 * \tparam id property id
-	 * \param vec_key vector element
+	 * \param prc_sz For each processor the number of particles to send
+	 * \param opart id if of the particles to send
+	 * \param shift_id shift correction id
 	 *
-	 * \return return the selected property of the vector element
+	 * \see nn_prcs::getShiftvectors()
 	 *
 	 */
-	template<unsigned int id> inline auto getProp(vect_dist_key_dx vec_key) -> decltype(v_prp.get(vec_key.getSub()).template get<id>(vec_key.getKey()))
+	void labelParticlesGhost()
 	{
-		return v_prp.get(vec_key.getSub()).template get<id>(vec_key.getKey());
+		// Buffer that contain the number of elements to send for each processor
+		ghost_prc_sz.clear();
+		ghost_prc_sz.resize(dec.getNNProcessors());
+
+		// Buffer that contain for each processor the id of the particle to send
+		opart.clear();
+		opart.resize(dec.getNNProcessors());
+
+		// Buffer that contain for each processor the id of the shift vector
+		oshift.clear();
+		oshift.resize(dec.getNNProcessors());
+
+		// Iterate over all particles
+		auto it = v_pos.getIterator();
+		while (it.isNext())
+		{
+			auto key = it.get();
+
+			// Given a particle, it return which processor require it (first id) and shift id, second id
+			// For an explanation about shifts vectors please consult getShiftVector in ie_ghost
+			const openfpm::vector<std::pair<size_t,size_t>> & vp_id = dec.template ghost_processorID_pair<typename Decomposition::lc_processor_id, typename Decomposition::shift_id>(v_pos.get(key),UNIQUE);
+
+			for (size_t i = 0 ; i < vp_id.size() ; i++)
+			{
+				// processor id
+				size_t p_id = vp_id.get(i).first;
+
+				// add particle to communicate
+				ghost_prc_sz.get(p_id)++;
+				opart.get(p_id).add(key);
+				oshift.get(p_id).add(vp_id.get(i).second);
+			}
+
+			++it;
+		}
 	}
 
-	/*! \brief It store for each processor the position and properties vector of the particles
+	/*! \brief Add local particles based on the boundary conditions
 	 *
+	 * In order to understand what this function use the following
 	 *
-	 */
-	struct pos_prop
-	{
-		//! position vector
-		openfpm::vector<Point<dim,St>,PreAllocHeapMemory<2>,openfpm::grow_policy_identity> pos;
-		//! properties vector
-		openfpm::vector<prop,PreAllocHeapMemory<2>,openfpm::grow_policy_identity> prp;
-	};
-
-	//! It map the processor id with the communication request into map procedure
-	openfpm::vector<size_t> p_map_req;
+		\verbatim
+
+															[1,1]
+			+---------+------------------------+---------+
+			| (1,-1)  |                        | (1,1)   |
+			|   |     |    (1,0) --> 7         |   |     |
+			|   v     |                        |   v     |
+			|   6     |                        |   8     |
+			+--------------------------------------------+
+			|         |                        |         |
+			|         |                        |         |
+			|         |                        |         |
+			| (-1,0)  |                        | (1,0)   |
+			|    |    |                        |   |     |
+			|    v    |      (0,0) --> 4       |   v     |
+			|    3    |                        |   5     |
+			|         |                        |         |
+		 B	|         |                        |     A   |
+		*	|         |                        |    *    |
+			|         |                        |         |
+			|         |                        |         |
+			|         |                        |         |
+			+--------------------------------------------+
+			| (-1,-1) |                        | (-1,1)  |
+			|    |    |   (-1,0) --> 1         |    |    |
+			|    v    |                        |    v    |
+			|    0    |                        |    2    |
+			+---------+------------------------+---------+
+
+
+		\endverbatim
 
-	/*! \brief It move all the particles that does not belong to the local processor to the respective processor
 	 *
-	 * In general this function is called after moving the particles to move the
-	 * elements out the local processor. Or just after initialization if each processor
-	 * contain non local particle
+	 *  The box is the domain, while all boxes at the border (so not (0,0) ) are the
+	 *  ghost part at the border of the domain. If a particle A is in the position in figure
+	 *  a particle B must be created. This function duplicate the particle A, if A and B are
+	 *  local
 	 *
 	 */
-	void map()
+	void add_loc_particles_bc()
 	{
-		// outgoing particles-id
-		openfpm::vector<size_t> opart;
+		// get the shift vectors
+		const openfpm::vector<Point<dim,St>> & shifts = dec.getShiftVectors();
 
-		// Processor communication size
-		openfpm::vector<size_t> prc_sz(v_cl.getProcessingUnits());
+		// this map is used to check if a combination is already present
+		std::unordered_map<size_t, size_t> map_cmb;
 
-		// Unassigned particle vector, is always the last vector
-		size_t up_v = v_pos.size()-1;
+		// Add local particles coming from periodic boundary, the only boxes that count are the one
+		// touching the border, filter them
 
-		// Contain the map of the processors, this processors should communicate with
-		openfpm::vector<size_t> p_map(v_cl.getProcessingUnits());
+		// The boxes touching the border of the domain are divided in groups (first vector)
+		// each group contain internal ghost coming from sub-domain of the same section
+		openfpm::vector_std<openfpm::vector_std<Box<dim,St>>> box_f;
+		openfpm::vector_std<comb<dim>> box_cmb;
 
-		// Contain the processor id of each particle (basically where they have to go)
-		openfpm::vector<size_t> lbl_p(v_pos.get(up_v).size());
+		for (size_t i = 0 ; i < dec.getNLocalSub() ; i++)
+		{
+			size_t Nl = dec.getLocalNIGhost(i);
 
-		// It contain the list of the processors this processor should to communicate with
-		openfpm::vector<size_t> p_list;
+			for (size_t j = 0 ; j < Nl ; j++)
+			{
+				// If the ghost does not come from the intersection with an out of
+				// border sub-domain the combination is all zero and n_zero return dim
+				if (dec.getLocalIGhostPos(i,j).n_zero() == dim)
+					continue;
+
+				// Check if we already have boxes with such combination
+				auto it = map_cmb.find(dec.getLocalIGhostPos(i,j).lin());
+				if (it == map_cmb.end())
+				{
+					// we do not have it
+					box_f.add();
+					box_f.last().add(dec.getLocalIGhostBox(i,j));
+					box_cmb.add(dec.getLocalIGhostPos(i,j));
+					map_cmb[dec.getLocalIGhostPos(i,j).lin()] = box_f.size()-1;
+				}
+				else
+				{
+					// we have it
+					box_f.get(it->second).add(dec.getLocalIGhostBox(i,j));
+				}
 
-		auto it = v_pos.get(up_v).getIterator();
 
-		// Label all the particles with the processor id where they should go
-		while (it.isNext())
-		{
-			auto key = it.get();
+			}
+		}
 
-			size_t p_id = dec.processorID(v_pos.get(up_v).get(key));
 
-			lbl_p.get(key) = p_id;
+		if (box_f.size() == 0)
+			return;
+		else
+		{
+			// Label the internal (assigned) particles
+			auto it = v_pos.getIteratorTo(g_m);
 
-			// It has to communicate
-			if (p_id != v_cl.getProcessUnitID())
+			while (it.isNext())
 			{
-				p_map.get(p_id) = 1;
-				prc_sz.get(p_id)++;
+				auto key = it.get();
+
+				// If particles are inside these boxes
+				for (size_t i = 0 ; i < box_f.size() ; i++)
+				{
+					for (size_t j = 0 ; j < box_f.get(i).size() ; j++)
+					{
+						if (box_f.get(i).get(j).isInside(v_pos.get(key)) == true)
+						{
+							Point<dim,St> p = v_pos.get(key);
+							// shift
+							p -= shifts.get(box_cmb.get(i).lin());
+
+							// add this particle shifting its position
+							v_pos.add(p);
+							v_prp.add();
+							v_prp.last() = v_prp.get(key);
+
+							// boxes in one group can be overlapping
+							// we do not have to search for the other
+							// boxes otherwise we will have duplicate particles
+							//
+							// A small note overlap of boxes across groups is fine
+							// (and needed) because each group has different shift
+							// producing non overlapping particles
+							//
+							break;
+						}
+					}
+				}
 
-				opart.add(key);
+				++it;
 			}
+		}
+	}
 
-			// Add processors and add size
+	/*! \brief This function fill the send buffer for the particle position after the particles has been label with labelParticles
+	 *
+	 * \param g_pos_send Send buffer to fill
+	 * \param prAlloc_pos Memory object for the send buffer
+	 *
+	 */
+	void fill_send_ghost_pos_buf(openfpm::vector<send_pos_vector> & g_pos_send, ExtPreAlloc<Memory> * prAlloc_pos)
+	{
+		// get the shift vectors
+		const openfpm::vector<Point<dim,St>> & shifts = dec.getShiftVectors();
 
-			++it;
+		// create a number of send buffers equal to the near processors
+		g_pos_send.resize(ghost_prc_sz.size());
+		for (size_t i = 0 ; i < g_pos_send.size() ; i++)
+		{
+			// set the preallocated memory to ensure contiguity
+			g_pos_send.get(i).setMemory(*prAlloc_pos);
+
+			// resize the sending vector (No allocation is produced)
+			g_pos_send.get(i).resize(ghost_prc_sz.get(i));
 		}
 
-		// resize the map
-		p_map_req.resize(v_cl.getProcessingUnits());
+		// Fill the send buffer
+		for ( size_t i = 0 ; i < opart.size() ; i++ )
+		{
+			for (size_t j = 0 ; j < opart.get(i).size() ; j++)
+			{
+				Point<dim,St> s = v_pos.get(opart.get(i).get(j));
+				s -= shifts.get(oshift.get(i).get(j));
+				g_pos_send.get(i).set(j,s);
+			}
+		}
+	}
 
-		// Create the sz and prc buffer
+	/*! \brief This function fill the send buffer for properties after the particles has been label with labelParticles
+	 *
+	 * \tparam send_vector type used to send data
+	 * \tparam prp_object object containing only the properties to send
+	 * \tparam prp set of properties to send
+	 *
+	 * \param g_send_prp Send buffer to fill
+	 * \param prAlloc_prp Memory object for the send buffer
+	 *
+	 */
+	template<typename send_vector,typename prp_object, int... prp> void fill_send_ghost_prp_buf(openfpm::vector<send_vector> & g_send_prp, ExtPreAlloc<Memory> * prAlloc_prp)
+	{
+		// create a number of send buffers equal to the near processors
+		g_send_prp.resize(ghost_prc_sz.size());
+		for (size_t i = 0 ; i < g_send_prp.size() ; i++)
+		{
+			// set the preallocated memory to ensure contiguity
+			g_send_prp.get(i).setMemory(*prAlloc_prp);
 
-		openfpm::vector<size_t> prc_sz_r;
-		openfpm::vector<size_t> prc_r;
+			// resize the sending vector (No allocation is produced)
+			g_send_prp.get(i).resize(ghost_prc_sz.get(i));
+		}
 
-		for (size_t i = 0 ; i < v_cl.getProcessingUnits() ; i++)
+		// Fill the send buffer
+		for ( size_t i = 0 ; i < opart.size() ; i++ )
 		{
-			if (p_map.get(i) == 1)
+			for (size_t j = 0 ; j < opart.get(i).size() ; j++)
 			{
-				p_map_req.get(i) = prc_r.size();
-				prc_r.add(i);
-				prc_sz_r.add(prc_sz.get(i));
+				// source object type
+				typedef encapc<1,prop,typename openfpm::vector<prop>::memory_conf> encap_src;
+				// destination object type
+				typedef encapc<1,prp_object,typename openfpm::vector<prp_object>::memory_conf> encap_dst;
+
+				// Copy only the selected properties
+				object_si_d<encap_src,encap_dst,OBJ_ENCAP,prp...>(v_prp.get(opart.get(i).get(j)),g_send_prp.get(i).get(j));
 			}
 		}
+	}
 
-		// Allocate all the buffers
-
-		openfpm::vector<pos_prop> pb(prc_r.size());
+	/*! \brief allocate and fill the send buffer for the map function
+	 *
+	 * \param prc_r List of processor rank involved in the send
+	 * \param prc_r_sz For each processor in the list the size of the message to send
+	 * \param pb send buffer
+	 *
+	 */
+	void fill_send_map_buf(openfpm::vector<size_t> & prc_r, openfpm::vector<size_t> & prc_sz_r, openfpm::vector<pos_prop> & pb)
+	{
+		pb.resize(prc_r.size());
 
 		for (size_t i = 0 ;  i < prc_r.size() ; i++)
 		{
@@ -297,68 +480,107 @@ public:
 		}
 
 
-		// Run through all the particles and fill pb, the sending buffer
-
-		openfpm::vector<size_t> prc_cnt(prc_r.size());
-		prc_cnt.fill(0);
-
-		it = lbl_p.getIterator();
+		// Run through all the particles and fill the sending buffer
 
-		while (it.isNext())
+		for (size_t i = 0 ; i < opart.size() ; i++)
 		{
-			auto key = it.get();
+			auto it = opart.get(i).getIterator();
+			size_t lbl = p_map_req.get(i);
 
-			size_t lbl = lbl_p.get(key);
-			if (lbl == v_cl.getProcessUnitID())
+			while (it.isNext())
 			{
+				size_t key = it.get();
+				size_t id = opart.get(i).get(key);
+
+				pb.get(lbl).pos.set(key,v_pos.get(id));
+				pb.get(lbl).prp.set(key,v_prp.get(id));
+
 				++it;
-				continue;
 			}
+		}
+	}
 
-			lbl = p_map_req.get(lbl);
+	/*! \brief This function process the receiced data for the properties and populate the ghost
+	 *
+	 * \tparam send_vector type used to send data
+	 * \tparam prp_object object containing only the properties to send
+	 * \tparam prp set of properties to send
+	 *
+	 */
+	template<typename send_vector,typename prp_object, int... prp> void process_received_ghost_prp()
+	{
+		// Mark the ghost part
+		g_m = v_prp.size();
 
-			pb.get(lbl).pos.set(prc_cnt.get(lbl),v_pos.get(up_v).get(key));
-			pb.get(lbl).prp.set(prc_cnt.get(lbl),v_prp.get(up_v).get(key));
+		// Process the received data (recv_mem_gg != 0 if you have data)
+		for (size_t i = 0 ; i < dec.getNNProcessors() && recv_mem_gg.size() != 0 ; i++)
+		{
+			// calculate the number of received elements
+			size_t n_ele = recv_sz.get(i) / sizeof(prp_object);
 
-			prc_cnt.get(lbl)++;
+			// add the received particles to the vector
+			PtrMemory * ptr1 = new PtrMemory(recv_mem_gg.get(i).getPointer(),recv_sz.get(i));
 
-			// Add processors and add size
-			++it;
-		}
+			// create vector representation to a piece of memory already allocated
+			openfpm::vector<prp_object,PtrMemory,openfpm::grow_policy_identity> v2;
 
-		// Create the set of pointers
-		openfpm::vector<void *> ptr(prc_r.size());
-		for (size_t i = 0 ; i < prc_r.size() ; i++)
-		{
-			ptr.get(i) = pb.get(i).pos.getPointer();
+			v2.setMemory(*ptr1);
+
+			// resize with the number of elements
+			v2.resize(n_ele);
+
+			// Add the ghost particle
+			v_prp.template add_prp<prp_object,PtrMemory,openfpm::grow_policy_identity,prp...>(v2);
 		}
+	}
 
-		// convert the particle number to buffer size
-		for (size_t i = 0 ; i < prc_sz_r.size() ; i++)
+	/*! \brief This function process the received data for the properties and populate the ghost
+	 *
+	 */
+	void process_received_ghost_pos()
+	{
+		// Process the received data (recv_mem_gg != 0 if you have data)
+		for (size_t i = 0 ; i < dec.getNNProcessors() && recv_mem_gg.size() != 0 ; i++)
 		{
-			prc_sz_r.get(i) = prc_sz_r.get(i)*(sizeof(prop) + sizeof(Point<dim,St>));
-		}
+			// calculate the number of received elements
+			size_t n_ele = recv_sz.get(i) / sizeof(Point<dim,St>);
 
-		// Send and receive the particles
+			// add the received particles to the vector
+			PtrMemory * ptr1 = new PtrMemory(recv_mem_gg.get(i).getPointer(),recv_sz.get(i));
+
+			// create vector representation to a piece of memory already allocated
+
+			openfpm::vector<Point<dim,St>,PtrMemory,openfpm::grow_policy_identity> v2;
+
+			v2.setMemory(*ptr1);
 
-		recv_cnt = 0;
-		v_cl.sendrecvMultipleMessagesPCX(prc_sz_r.size(),&p_map.get(0), (size_t *)prc_sz_r.getPointer(), (size_t *)prc_r.getPointer() , (void **)ptr.getPointer() , vector_dist::message_alloc_map, this ,NEED_ALL_SIZE);
+			// resize with the number of elements
+			v2.resize(n_ele);
 
-		// overwrite the outcoming particle with the incoming particle and resize the vectors
+			// Add the ghost particle
+			v_pos.template add<PtrMemory,openfpm::grow_policy_identity>(v2);
+		}
+	}
 
-		size_t total_element = 0;
+	/*! \brief Process the received particles
+	 *
+	 * \param list of the out-going particles
+	 *
+	 */
+	void process_received_map(openfpm::vector<size_t> & out_part)
+	{
 		size_t o_p_id = 0;
 
-		for (size_t i = 0 ; i < v_proc.size() ; i++)
+		for (size_t i = 0 ; i < recv_mem_gm.size() ; i++)
 		{
 			// Get the number of elements
 
-			size_t n_ele = v_proc.get(i) / (sizeof(Point<dim,St>) + sizeof(prop));
+			size_t n_ele = recv_mem_gm.get(i).size() / (sizeof(Point<dim,St>) + sizeof(prop));
 
 			// Pointer of the received positions for each near processor
-			void * ptr_pos = ((unsigned char *)hp_recv.getPointer()) + (total_element * (sizeof(Point<dim,St>) + sizeof(prop)));
+			void * ptr_pos = (unsigned char *)recv_mem_gm.get(i).getPointer();
 			// Pointer of the received properties for each near processor
-			void * ptr_prp = ((unsigned char *)hp_recv.getPointer()) + (total_element * (sizeof(Point<dim,St>) + sizeof(prop))) + n_ele * sizeof(Point<dim,St>);
+			void * ptr_prp = (unsigned char *)recv_mem_gm.get(i).getPointer() + n_ele * sizeof(Point<dim,St>);
 
 			PtrMemory * ptr1 = new PtrMemory(ptr_pos,n_ele * sizeof(Point<dim,St>));
 			PtrMemory * ptr2 = new PtrMemory(ptr_prp,n_ele * sizeof(prop));
@@ -377,41 +599,239 @@ public:
 			// Add the received particles to v_pos and v_prp
 
 			size_t j = 0;
-			for ( ; j < vpos.size() && o_p_id < opart.size() ; j++, o_p_id++)
+			for ( ; j < vpos.size() && o_p_id < out_part.size() ; j++, o_p_id++)
 			{
-				v_pos.get(0).set(opart.get(o_p_id),vpos.get(j));
-				v_prp.get(0).set(opart.get(o_p_id),vprp.get(j));
+				v_pos.set(out_part.get(o_p_id),vpos.get(j));
+				v_prp.set(out_part.get(o_p_id),vprp.get(j));
 			}
 
 			for ( ; j < vpos.size(); j++)
 			{
-				v_pos.get(0).add();
-				v_pos.get(0).set(v_pos.get(0).size()-1,vpos.get(j));
-				v_prp.get(0).add();
-				v_prp.get(0).set(v_prp.get(0).size()-1,vprp.get(j));
+				v_pos.add();
+				v_pos.set(v_pos.size()-1,vpos.get(j));
+				v_prp.add();
+				v_prp.set(v_prp.size()-1,vprp.get(j));
 			}
+		}
+
+		// remove the (out-going particles) in the vector
+
+		v_pos.remove(out_part,o_p_id);
+		v_prp.remove(out_part,o_p_id);
+	}
+
+	/*! \brief Calculate send buffers total size and allocation
+	 *
+	 * \tparam prp_object object containing only the properties to send
+	 *
+	 * \param size_byte_prp total size for the property buffer
+	 * \param size_byte_pos total size for the position buffer
+	 * \param pap_prp allocation sequence for the property buffer
+	 * \param pap_pos allocation sequence for the position buffer
+	 *
+	 */
+	template<typename prp_object> void calc_send_ghost_buf(size_t & size_byte_prp, size_t & size_byte_pos, std::vector<size_t> & pap_prp, std::vector<size_t> & pap_pos)
+	{
+		// Calculate the total size required for the sending buffer
+		for ( size_t i = 0 ; i < ghost_prc_sz.size() ; i++ )
+		{
+			size_t alloc_ele = openfpm::vector<prp_object,HeapMemory,openfpm::grow_policy_identity>::calculateMem(ghost_prc_sz.get(i),0);
+			pap_prp.push_back(alloc_ele);
+			size_byte_prp += alloc_ele;
+
+			alloc_ele = openfpm::vector<Point<dim,St>,HeapMemory,openfpm::grow_policy_identity>::calculateMem(ghost_prc_sz.get(i),0);
+			pap_pos.push_back(alloc_ele);
+			size_byte_pos += alloc_ele;
+		}
+	}
+
+public:
+
+	/*! \brief Constructor
+	 *
+	 * \param np number of elements
+	 * \param box domain where the vector of elements live
+	 * \param boundary conditions
+	 * \param g Ghost margins
+	 *
+	 */
+	vector_dist(size_t np, Box<dim,St> box, const size_t (& bc)[dim] ,const Ghost<dim,St> & g)
+	:dec(*global_v_cluster),v_cl(*global_v_cluster)
+	{
+#ifdef SE_CLASS2
+		check_new(this,8,VECTOR_DIST_EVENT,4);
+#endif
+
+		// convert to a local number of elements
+		size_t p_np = np / v_cl.getProcessingUnits();
+
+		// Get non divisible part
+		size_t r = np % v_cl.getProcessingUnits();
+
+		// Distribute the remain particles
+		if (v_cl.getProcessUnitID() < r)
+			p_np++;
+
+		// resize the position vector
+		v_pos.resize(p_np);
+
+		// resize the properties vector
+		v_prp.resize(p_np);
+
+		g_m = p_np;
+
+		// Create a valid decomposition of the space
+		// Get the number of processor and calculate the number of sub-domain
+		// for decomposition
+		size_t n_proc = v_cl.getProcessingUnits();
+		size_t n_sub = n_proc * getDefaultNsubsub();
+
+		// Calculate the maximum number (before merging) of sub-domain on
+		// each dimension
+		size_t div[dim];
+		for (size_t i = 0 ; i < dim ; i++)
+		{div[i] = openfpm::math::round_big_2(pow(n_sub,1.0/dim));}
+
+		// Create the sub-domains
+		dec.setParameters(div,box,bc,g);
+		dec.decompose();
+
+		// and create the ghost boxes
+		dec.calculateGhostBoxes();
+	}
+
+	~vector_dist()
+	{
+#ifdef SE_CLASS2
+		check_delete(this);
+#endif
+	}
+
+	/*! \brief Get the number of minimum sub-domain
+	 *
+	 * \return minimum number
+	 *
+	 */
+	static size_t getDefaultNsubsub()
+	{
+		return  V_SUB_UNIT_FACTOR;
+	}
+
+	/*! \brief return the local size of the vector
+	 *
+	 * \return local size
+	 *
+	 */
+	size_t size_local()
+	{
+		return g_m;
+	}
+
+	/*! \brief Get the position of an element
+	 *
+	 * see the vector_dist iterator usage to get an element key
+	 *
+	 * \param vec_key element
+	 *
+	 * \return the position of the element in space
+	 *
+	 */
+	template<unsigned int id> inline auto getPos(vect_dist_key_dx vec_key) -> decltype(v_pos.template get<id>(vec_key.getKey()))
+	{
+		return v_pos.template get<id>(vec_key.getKey());
+	}
+
+	/*! \brief Get the property of an element
+	 *
+	 * see the vector_dist iterator usage to get an element key
+	 *
+	 * \tparam id property id
+	 * \param vec_key vector element
+	 *
+	 * \return return the selected property of the vector element
+	 *
+	 */
+	template<unsigned int id> inline auto getProp(vect_dist_key_dx vec_key) -> decltype(v_prp.template get<id>(vec_key.getKey()))
+	{
+		return v_prp.template get<id>(vec_key.getKey());
+	}
+
+	/*! \brief It move all the particles that does not belong to the local processor to the respective processor
+	 *
+	 * \tparam out of bound policy it specify what to do when the particles are detected out of bound
+	 *
+	 * In general this function is called after moving the particles to move the
+	 * elements out the local processor. Or just after initialization if each processor
+	 * contain non local particles
+	 *
+	 */
+	template<typename obp=KillParticle> void map()
+	{
+		// outgoing particles-id
+		openfpm::vector<size_t> out_part;
+
+		// Processor communication size
+		openfpm::vector<size_t> prc_sz(v_cl.getProcessingUnits());
+
+		// It contain the list of the processors this processor should to communicate with
+		openfpm::vector<size_t> p_list;
+
+		// map completely reset the ghost part
+		v_pos.resize(g_m);
+		v_prp.resize(g_m);
+
+		// Contain the processor id of each particle (basically where they have to go)
+		labelParticleProcessor<obp>(opart,prc_sz,out_part);
+
+		// Calculate the sending buffer size for each processor, put this information in
+		// a contiguous buffer
+		p_map_req.resize(v_cl.getProcessingUnits());
+		openfpm::vector<size_t> prc_sz_r;
+		openfpm::vector<size_t> prc_r;
+
+		for (size_t i = 0 ; i < v_cl.getProcessingUnits() ; i++)
+		{
+			if (prc_sz.get(i) != 0)
+			{
+				p_map_req.get(i) = prc_r.size();
+				prc_r.add(i);
+				prc_sz_r.add(prc_sz.get(i));
+			}
+		}
+
+		// Allocate the send buffers
+
+		openfpm::vector<pos_prop> pb;
 
-			// increment the total number of element counter
-			total_element += n_ele;
+		// fill the send buffers
+		fill_send_map_buf(prc_r,prc_sz_r,pb);
+
+		// Create the set of pointers
+		openfpm::vector<void *> ptr(prc_r.size());
+		for (size_t i = 0 ; i < prc_r.size() ; i++)
+		{
+			ptr.get(i) = pb.get(i).pos.getPointer();
 		}
 
-		// remove the hole (out-going particles) in the vector
+		// convert the particle number to buffer size
+		for (size_t i = 0 ; i < prc_sz_r.size() ; i++)
+		{
+			prc_sz_r.get(i) = prc_sz_r.get(i)*(sizeof(prop) + sizeof(Point<dim,St>));
+		}
 
-		v_pos.get(0).remove(opart,o_p_id);
-		v_prp.get(0).remove(opart,o_p_id);
-	}
+		// Send and receive the particles
 
-	//! For each adjacent processor outgoing particles-ids
-	openfpm::vector<openfpm::vector<size_t>> opart;
+		recv_mem_gm.clear();
+		v_cl.sendrecvMultipleMessagesNBX(prc_sz_r.size(), (size_t *)prc_sz_r.getPointer(), (size_t *)prc_r.getPointer() , (void **)ptr.getPointer() , vector_dist::message_alloc_map, this ,NEED_ALL_SIZE);
 
-	//! For each adjacent processor the size of the ghost sending buffer
-	openfpm::vector<size_t> ghost_prc_sz;
+		// Process the incoming particles
 
-	//! Sending buffer for the ghost particles properties
-	Memory g_prp_mem;
+		process_received_map(out_part);
 
-	//! Sending buffer for the ghost particles position
-	Memory g_pos_mem;
+		// mark the ghost part
+
+		g_m = v_pos.size();
+	}
 
 	/*! \brief It synchronize the properties and position of the ghost particles
 	 *
@@ -425,198 +845,67 @@ public:
 		typedef object<typename object_creator<typename prop::type,prp...>::type> prp_object;
 
 		// send vector for each processor
-		typedef  openfpm::vector<prp_object,ExtPreAlloc<Memory>> send_vector;
-
-		// Buffer that contain the number of elements to send for each processor
-		ghost_prc_sz.clear();
-		ghost_prc_sz.resize(dec.getNNProcessors());
-		// Buffer that contain for each processor the id of the particle to send
-		opart.clear();
-		opart.resize(dec.getNNProcessors());
-
-		// Label the internal (assigned) particles
-		auto it = v_pos.get(INTERNAL).getIterator();
-
-		// Label all the particles with the processor id, where they should go
-		while (it.isNext())
-		{
-			auto key = it.get();
-
-			const openfpm::vector<size_t> & vp_id = dec.template ghost_processorID<typename Decomposition::lc_processor_id>(v_pos.get(INTERNAL).get(key),UNIQUE);
-
-			for (size_t i = 0 ; i < vp_id.size() ; i++)
-			{
-				// processor id
-				size_t p_id = vp_id.get(i);
+		typedef  openfpm::vector<prp_object,ExtPreAlloc<Memory>,openfpm::grow_policy_identity> send_vector;
 
-				// add particle to communicate
-				ghost_prc_sz.get(p_id)++;
+		// reset the ghost part
+		v_pos.resize(g_m);
+		v_prp.resize(g_m);
 
-				opart.get(p_id).add(key);
-			}
+		// Label all the particles
+		labelParticlesGhost();
 
-			++it;
-		}
+		// Calculate memory and allocation for the send buffers
 
-		// Send buffer size in byte ( one buffer for all processors )
+		// Total size
 		size_t size_byte_prp = 0;
 		size_t size_byte_pos = 0;
 
-		// sequence of pre-allocation pattern for property and position send buffer
+		// allocation patterns for property and position send buffer
 		std::vector<size_t> pap_prp;
 		std::vector<size_t> pap_pos;
 
-		// Calculate the total size required for the sending buffer
-		for ( size_t i = 0 ; i < ghost_prc_sz.size() ; i++ )
-		{
-			size_t alloc_ele = openfpm::vector<prp_object>::calculateMem(ghost_prc_sz.get(i),0);
-			pap_prp.push_back(alloc_ele);
-			size_byte_prp += alloc_ele;
+		calc_send_ghost_buf<prp_object>(size_byte_prp,size_byte_pos,pap_prp,pap_pos);
 
-			alloc_ele = openfpm::vector<Point<dim,St>>::calculateMem(ghost_prc_sz.get(i),0);
-			pap_pos.push_back(alloc_ele);
-			size_byte_pos += alloc_ele;
-		}
+		// Create memory for the send buffer
 
-		// resize the property buffer memory
 		g_prp_mem.resize(size_byte_prp);
-		// resize the position buffer memory
 		if (opt != NO_POSITION) g_pos_mem.resize(size_byte_pos);
 
-		// Create an object of preallocated memory for properties
-		ExtPreAlloc<Memory> * prAlloc_prp = new ExtPreAlloc<Memory>(pap_prp,g_prp_mem);
-
-		ExtPreAlloc<Memory> * prAlloc_pos;
-		// Create an object of preallocated memory for position
-		if (opt != NO_POSITION) prAlloc_pos = new ExtPreAlloc<Memory>(pap_pos,g_pos_mem);
+		// Create and fill send buffer for particle properties
 
-		// create a vector of send vector (ExtPreAlloc warrant that all the created vector are contiguous)
+		ExtPreAlloc<Memory> * prAlloc_prp = new ExtPreAlloc<Memory>(pap_prp,g_prp_mem);
 		openfpm::vector<send_vector> g_send_prp;
+		fill_send_ghost_prp_buf<send_vector,prp_object,prp...>(g_send_prp,prAlloc_prp);
 
-		// create a number of send buffers equal to the near processors
-		g_send_prp.resize(ghost_prc_sz.size());
-		for (size_t i = 0 ; i < g_send_prp.size() ; i++)
-		{
-			// set the preallocated memory to ensure contiguity
-			g_send_prp.get(i).setMemory(*prAlloc_prp);
-
-			// resize the sending vector (No allocation is produced)
-			g_send_prp.get(i).resize(ghost_prc_sz.get(i));
-		}
-
-		// Fill the send buffer
-		for ( size_t i = 0 ; i < opart.size() ; i++ )
-		{
-			for (size_t j = 0 ; j < opart.get(i).size() ; j++)
-			{
-				// source object type
-				typedef encapc<1,prop,typename openfpm::vector<prop>::memory_conf> encap_src;
-				// destination object type
-				typedef encapc<1,prp_object,typename openfpm::vector<prp_object>::memory_conf> encap_dst;
-
-				// Copy only the selected properties
-				object_si_d<encap_src,encap_dst,OBJ_ENCAP,prp...>(v_prp.get(INTERNAL).get(opart.get(i).get(j)),g_send_prp.get(i).get(j));
-			}
-		}
-
-		// Create the buffer for particle position
-
-		// definition of the send vector for position for each processor
-		typedef  openfpm::vector<Point<dim,St>,ExtPreAlloc<Memory>> send_pos_vector;
+		// Create and fill the send buffer for the particle position
 
+		ExtPreAlloc<Memory> * prAlloc_pos;
 		openfpm::vector<send_pos_vector> g_pos_send;
 		if (opt != NO_POSITION)
 		{
-			// create a number of send buffers equal to the near processors
-			g_pos_send.resize(ghost_prc_sz.size());
-			for (size_t i = 0 ; i < g_pos_send.size() ; i++)
-			{
-				// set the preallocated memory to ensure contiguity
-				g_pos_send.get(i).setMemory(*prAlloc_pos);
-
-				// resize the sending vector (No allocation is produced)
-				g_pos_send.get(i).resize(ghost_prc_sz.get(i));
-			}
-
-			// Fill the send buffer
-			for ( size_t i = 0 ; i < opart.size() ; i++ )
-			{
-				for (size_t j = 0 ; j < opart.get(i).size() ; j++)
-				{
-					g_pos_send.get(i).set(j,v_pos.get(INTERNAL).get(opart.get(i).get(j)));
-				}
-			}
+			prAlloc_pos = new ExtPreAlloc<Memory>(pap_pos,g_pos_mem);
+			fill_send_ghost_pos_buf(g_pos_send,prAlloc_pos);
 		}
 
-		// Create processor buffer pattern
-
+		// Create processor list
 		openfpm::vector<size_t> prc;
 		for (size_t i = 0 ; i < opart.size() ; i++)
-		{
 			prc.add(dec.IDtoProc(i));
-		}
 
-		// Send receive the particles properties information
+		// Send/receive the particle properties information
 		v_cl.sendrecvMultipleMessagesNBX(prc,g_send_prp,msg_alloc_ghost_get,this);
+		process_received_ghost_prp<send_vector,prp_object,prp...>();
 
-		// Mark the ghost part
-		g_m = v_prp.get(INTERNAL).size();
-
-		// Process the received data (recv_mem_gg != 0 if you have data)
-		for (size_t i = 0 ; i < dec.getNNProcessors() && recv_mem_gg.size() != 0 ; i++)
-		{
-			// calculate the number of received elements
-			size_t n_ele = recv_sz.get(i) / sizeof(prp_object);
-
-			// add the received particles to the vector
-			PtrMemory * ptr1 = new PtrMemory(recv_mem_gg.get(i).getPointer(),recv_sz.get(i));
-
-			// create vector representation to a piece of memory already allocated
-			openfpm::vector<prp_object,PtrMemory,openfpm::grow_policy_identity> v2;
-
-			v2.setMemory(*ptr1);
-
-			// resize with the number of elements
-			v2.resize(n_ele);
-
-			// Add the ghost particle
-			v_prp.get(INTERNAL).template add_prp<prp_object,PtrMemory,openfpm::grow_policy_identity,prp...>(v2);
-		}
 
 		if (opt != NO_POSITION)
 		{
-			// Send receive the particles properties information
+			// Send/receive the particle properties information
 			v_cl.sendrecvMultipleMessagesNBX(prc,g_pos_send,msg_alloc_ghost_get,this);
-
-			// Process the received data (recv_mem_gg != 0 if you have data)
-			for (size_t i = 0 ; i < dec.getNNProcessors() && recv_mem_gg.size() != 0 ; i++)
-			{
-				// calculate the number of received elements
-				size_t n_ele = recv_sz.get(i) / sizeof(Point<dim,St>);
-
-				// add the received particles to the vector
-				PtrMemory * ptr1 = new PtrMemory(recv_mem_gg.get(i).getPointer(),recv_sz.get(i));
-
-				// create vector representation to a piece of memory already allocated
-
-				openfpm::vector<Point<dim,St>,PtrMemory,openfpm::grow_policy_identity> v2;
-
-				v2.setMemory(*ptr1);
-
-				// resize with the number of elements
-				v2.resize(n_ele);
-
-				// Add the ghost particle
-				v_pos.get(INTERNAL).template add<PtrMemory,openfpm::grow_policy_identity>(v2);
-			}
+			process_received_ghost_pos();
 		}
-	}
-
-	//! For each adjacent processor it store the size of the receiving message in byte
-	openfpm::vector<size_t> recv_sz;
 
-	//! For each adjacent processot it store the received message
-	openfpm::vector<HeapMemory> recv_mem_gg;
+		add_loc_particles_bc();
+	}
 
 	/*! \brief Call-back to allocate buffer to receive incoming elements (particles)
 	 *
@@ -646,15 +935,6 @@ public:
 		return v->recv_mem_gg.get(lc_id).getPointer();
 	}
 
-	//! Receive buffer for global communication
-	HeapMemory hp_recv;
-
-	//! For each message contain the processor from which processor come from
-	openfpm::vector<size_t> v_proc;
-
-	//! Total size of the received buffer
-	size_t recv_cnt;
-
 	/*! \brief Call-back to allocate buffer to receive incoming elements (particles)
 	 *
 	 * \param msg_i size required to receive the message from i
@@ -673,61 +953,274 @@ public:
 		// cast the pointer
 		vector_dist<dim,St,prop,Decomposition,Memory> * vd = static_cast<vector_dist<dim,St,prop,Decomposition,Memory> *>(ptr);
 
-		// Resize the receive buffer, and the size of each message buffer
-		vd->hp_recv.resize(total_msg);
-		vd->v_proc.resize(total_p);
+		vd->recv_mem_gm.resize(vd->v_cl.getProcessingUnits());
+		vd->recv_mem_gm.get(i).resize(msg_i);
 
-		// Return the receive pointer
-		void * recv_ptr = (unsigned char *)vd->hp_recv.getPointer() + vd->recv_cnt;
+		return vd->recv_mem_gm.get(i).getPointer();
+	}
 
-		// increment the receive pointer
-		vd->recv_cnt += msg_i;
+	/*! \brief Add local particle
+	 *
+	 * It add a local particle, with "local" we mean in this processor
+	 * the particle can be also created out of the processor domain, in this
+	 * case a call to map is required. Added particles are always created at the
+	 * end and can be accessed with getLastPos and getLastProp
+	 *
+	 */
+	void add()
+	{
+		v_prp.insert(g_m);
+		v_pos.insert(g_m);
 
-		// Save the processor message size
-		vd->v_proc.get(ri) = msg_i;
+		g_m++;
+	}
 
-		return recv_ptr;
+	/*! \brief Get the position of the last element
+	 *
+	 * \return the position of the element in space
+	 *
+	 */
+	template<unsigned int id> inline auto getLastPos() -> decltype(v_pos.template get<id>(0))
+	{
+		return v_pos.template get<id>(g_m-1);
 	}
 
+	/*! \brief Get the property of the last element
+	 *
+	 * see the vector_dist iterator usage to get an element key
+	 *
+	 * \tparam id property id
+	 * \param vec_key vector element
+	 *
+	 * \return return the selected property of the vector element
+	 *
+	 */
+	template<unsigned int id> inline auto getLastProp() -> decltype(v_prp.template get<id>(0))
+	{
+		return v_prp.template get<id>(g_m-1);
+	}
 
-	/*! \brief Get the iterator across the position of the particles
+	/*! \brief Construct a cell list starting from the stored particles
 	 *
-	 * \return an iterator
+	 * \tparam CellL CellList type to construct
+	 *
+	 * \param r_cut interation radius, or size of each cell
+	 *
+	 * \return the Cell list
 	 *
 	 */
-	vector_dist_iterator<openfpm::vector<Point<dim,St>>> getIterator()
+	template<typename CellL=CellList<dim,St,FAST,shift<dim,St> > > CellL getCellList(St r_cut)
 	{
-		return vector_dist_iterator<openfpm::vector<Point<dim,St>>>(v_pos);
+		return getCellList(r_cut,dec.getGhost());
 	}
 
-	/*! \brief Get the iterator across the position of the ghost particles
+	/*! \brief Construct a cell list starting from the stored particles
+	 *
+	 * It differ from the get getCellList for an additional parameter, in case the
+	 * domain + ghost is not big enough to contain additional padding particles, a Cell list
+	 * with bigger space can be created
+	 * (padding particles in general are particles added by the user out of the domains)
+	 *
+	 * \tparam CellL CellList type to construct
+	 *
+	 * \param r_cut interation radius, or size of each cell
+	 * \param enlarge In case of padding particles the cell list must be enlarged, like a ghost this parameter say how much must be enlarged
+	 *
+	 */
+	template<typename CellL=CellList<dim,St,FAST,shift<dim,St> > > CellL getCellList(St r_cut, const Ghost<dim,St> & enlarge)
+	{
+		CellL cell_list;
+
+		// calculate the parameters of the cell list
+
+		// get the processor bounding box
+		Box<dim,St> pbox = dec.getProcessorBounds();
+		// extend by the ghost
+		pbox.enlarge(enlarge);
+
+		Box<dim,St> cell_box;
+
+		size_t div[dim];
+
+		// Calculate the division array and the cell box
+		for (size_t i = 0 ; i < dim ; i++)
+		{
+			div[i] = static_cast<size_t>((pbox.getP2().get(i) - pbox.getP1().get(i))/ r_cut);
+			div[i]++;
+
+			cell_box.setLow(i,0.0);
+			cell_box.setHigh(i,div[i]*r_cut);
+		}
+
+		cell_list.Initialize(cell_box,div,pbox.getP1());
+
+		// for each particle add the particle to the cell list
+
+		auto it = getIterator();
+
+		while (it.isNext())
+		{
+			auto key = it.get();
+
+			cell_list.add(this->template getPos<0>(key),key.getKey());
+
+			++it;
+		}
+
+		return cell_list;
+	}
+
+	/*! \brief for each particle get the verlet list
+	 *
+	 * \param verlet output verlet list for each particle
+	 * \param r_cut cut-off radius
+	 *
+	 */
+	void getVerlet(openfpm::vector<openfpm::vector<size_t>> & verlet, St r_cut)
+	{
+		// resize verlet to store the number of particles
+		verlet.resize(size_local());
+
+		// get the cell-list
+		auto cl = getCellList(r_cut);
+
+		// square of the cutting radius
+		St r_cut2 = r_cut*r_cut;
+
+		// iterate the particles
+	    auto it_p = this->getDomainIterator();
+	    while (it_p.isNext())
+	    {
+	    	// key
+	    	vect_dist_key_dx key = it_p.get();
+
+	    	// Get the position of the particles
+	    	Point<dim,St> p = this->template getPos<0>(key);
+
+	    	// Clear the neighborhood of the particle
+	    	verlet.get(key.getKey()).clear();
+
+	    	// Get the neighborhood of the particle
+	    	auto NN = cl.template getNNIterator<NO_CHECK>(cl.getCell(p));
+	    	while(NN.isNext())
+	    	{
+	    		auto nnp = NN.get();
+
+	    		// p != q
+	    		if (nnp == key.getKey())
+	    		{
+	    			++NN;
+	    			continue;
+	    		}
+
+	    		Point<dim,St> q = this->template getPos<0>(nnp);
+
+	    		if (p.distance2(q) < r_cut2)
+	    			verlet.get(key.getKey()).add(nnp);
+
+	    		// Next particle
+	    		++NN;
+	    	}
+
+	    	// next particle
+	    	++it_p;
+	    }
+	}
+
+	/*! \brief It return the number of particles contained by the previous processors
+	 *
+	 * \Warning It only work with the initial decomposition
+	 *
+	 * Given 1000 particles and 3 processors, you will get
+	 *
+	 * * Processor 0: 0
+	 * * Processor 1: 334
+	 * * Processor 2: 667
+	 *
+	 * \param np initial number of particles
+	 *
+	 */
+	size_t init_size_accum(size_t np)
+	{
+		size_t accum = 0;
+
+		// convert to a local number of elements
+		size_t p_np = np / v_cl.getProcessingUnits();
+
+		// Get non divisible part
+		size_t r = np % v_cl.getProcessingUnits();
+
+		accum = p_np * v_cl.getProcessUnitID();
+
+		// Distribute the remain particles
+		if (v_cl.getProcessUnitID() <= r)
+			accum += v_cl.getProcessUnitID();
+		else
+			accum += r;
+
+		return accum;
+	}
+
+	/*! \brief Get an iterator that traverse domain and ghost particles
 	 *
 	 * \return an iterator
 	 *
 	 */
-	vector_dist_iterator<openfpm::vector<Point<dim,St>>> getGhostIterator()
+	vector_dist_iterator getIterator()
+	{
+		return vector_dist_iterator(0,v_pos.size());
+	}
+
+	/*! /brief Get a grid Iterator
+	 *
+	 * Usefull function to place particles on a grid or grid-like (grid + noise)
+	 *
+	 * \return a Grid iterator
+	 *
+	 */
+	inline grid_dist_id_iterator_dec<Decomposition> getGridIterator(const size_t (& sz)[dim])
 	{
-		return vector_dist_iterator<openfpm::vector<Point<dim,St>>>(v_pos,g_m);
+		size_t sz_g[dim];
+		grid_key_dx<dim> start;
+		grid_key_dx<dim> stop;
+		for (size_t i = 0 ; i < dim ; i++)
+		{
+			start.set_d(i,0);
+			if (dec.isPeriodic(i) == PERIODIC)
+			{
+				sz_g[i] = sz[i];
+				stop.set_d(i,sz_g[i]-2);
+			}
+			else
+			{
+				sz_g[i] = sz[i];
+				stop.set_d(i,sz_g[i]-1);
+			}
+		}
+
+		grid_dist_id_iterator_dec<Decomposition> it_dec(dec,sz_g,start,stop);
+		return it_dec;
 	}
 
-	/*! \brief Get the iterator across the properties of the particles
+	/*! \brief Get the iterator across the position of the ghost particles
 	 *
 	 * \return an iterator
 	 *
 	 */
-	vector_dist_iterator<openfpm::vector<prop>> getPropIterator()
+	vector_dist_iterator getGhostIterator()
 	{
-		return vector_dist_iterator<openfpm::vector<prop>>(v_prp);
+		return vector_dist_iterator(g_m,v_pos.size());
 	}
 
-	/*! \brief Get the iterator across the properties of the ghost particles
+
+	/*! \brief Get an iterator that traverse the particles in the domain
 	 *
 	 * \return an iterator
 	 *
 	 */
-	vector_dist_iterator<openfpm::vector<prop>> getGhostPropIterator()
+	vector_dist_iterator getDomainIterator()
 	{
-		return vector_dist_iterator<openfpm::vector<prop>>(v_prp,g_m);
+		return vector_dist_iterator(0,g_m);
 	}
 
 	/*! \brief Get the decomposition
@@ -756,7 +1249,35 @@ public:
 		std::string output = std::to_string(out + std::to_string(v_cl.getProcessUnitID()) + std::to_string(".csv"));
 
 		// Write the CSV
-		return csv_writer.write(output,v_pos.get(INTERNAL),v_prp.get(INTERNAL));
+		return csv_writer.write(output,v_pos,v_prp);
+	}
+
+	/* \brief It return the id of structure in the allocation list
+	 *
+	 * \see print_alloc and SE_CLASS2
+	 *
+	 */
+	long int who()
+	{
+#ifdef SE_CLASS2
+		return check_whoami(this,8);
+#else
+		return -1;
+#endif
+	}
+
+	/*! \brief Get the Virtual Cluster machine
+	 *
+	 * \return the Virtual cluster machine
+	 *
+	 */
+
+	Vcluster & getVC()
+	{
+#ifdef SE_CLASS2
+		check_valid(this,8);
+#endif
+		return v_cl;
 	}
 };
 
diff --git a/src/Vector/vector_dist_iterator.hpp b/src/Vector/vector_dist_iterator.hpp
index 7ac4c34501cc158daf5ecce4805f0ef39cb38c93..795e4290b9c2d24067599d891cefe8f3842007e1 100644
--- a/src/Vector/vector_dist_iterator.hpp
+++ b/src/Vector/vector_dist_iterator.hpp
@@ -11,18 +11,13 @@
 #include "vector_dist_key.hpp"
 #include "VCluster.hpp"
 
-template<typename device_v>
 class vector_dist_iterator
 {
-	//! vector list counter
-	size_t v_c;
-
-	//! List of the grids we are going to iterate
-	Vcluster_object_array<device_v> & vList;
-
 	//! Actual iterator
 	size_t v_it;
 
+	size_t stop;
+
 	public:
 
 	/*! \brief Constructor of the distributed grid
@@ -31,11 +26,9 @@ class vector_dist_iterator
 	 * \param offset iterator starting point
 	 *
 	 */
-	vector_dist_iterator(Vcluster_object_array<device_v> & gk, size_t offset = 0)
-	:v_c(0),vList(gk),v_it(offset)
+	vector_dist_iterator(size_t start, size_t stop)
+	:v_it(start),stop(stop)
 	{
-		if ( offset >= vList.get(0).size() )
-			v_c++;
 	}
 
 	// Destructor
@@ -43,46 +36,16 @@ class vector_dist_iterator
 	{
 	}
 
-	/*! \brief operator=
-	 *
-	 * assign
-	 *
-	 */
-	vector_dist_iterator<device_v> & operator=(const vector_dist_iterator<device_v> & vdi)
-	{
-		v_c = vdi.v_c;
-		vList = vdi.vList;
-		v_it = vdi.v_it;
-
-		return *this;
-	}
-
 	/*! \brief Get the next element
 	 *
 	 * \return the next grid_key
 	 *
 	 */
 
-	vector_dist_iterator<device_v> operator++()
+	vector_dist_iterator operator++()
 	{
 		++v_it;
 
-		// check if a_it is at the end
-
-		if (v_it < vList.get(v_c).size())
-			return *this;
-		else
-		{
-			// switch to the new grid
-
-			v_c++;
-
-			// get the next grid iterator
-
-			if (v_c < vList.size())
-				v_it = 0;
-		}
-
 		return *this;
 	}
 
@@ -96,7 +59,7 @@ class vector_dist_iterator
 	{
 		// If there are no other grid stop
 
-		if (v_c >= vList.size())
+		if (v_it >= stop)
 			return false;
 
 		return true;
@@ -109,7 +72,7 @@ class vector_dist_iterator
 	 */
 	vect_dist_key_dx get()
 	{
-		return vect_dist_key_dx(v_c,v_it);
+		return vect_dist_key_dx(v_it);
 	}
 };
 
diff --git a/src/Vector/vector_dist_key.hpp b/src/Vector/vector_dist_key.hpp
index 227ee708528592772d344dc5dc3e7eab7e23e9c3..80f77580f6b0306c392ecde39a385e8b79d6b0a8 100644
--- a/src/Vector/vector_dist_key.hpp
+++ b/src/Vector/vector_dist_key.hpp
@@ -18,24 +18,20 @@
 
 class vect_dist_key_dx
 {
-	//! grid list counter
-
-	size_t v_c;
-
 	//! Local grid iterator
 
 	size_t key;
 
 public:
 
-	/*! \brief Get the local grid
+	/*! \brief set the key
 	 *
-	 * \return the id of the local grid
+	 * \return the local key
 	 *
 	 */
-	size_t getSub()
+	inline void setKey(size_t key)
 	{
-		return v_c;
+		this->key = key;
 	}
 
 	/*! \brief Get the key
@@ -43,7 +39,7 @@ public:
 	 * \return the local key
 	 *
 	 */
-	size_t getKey()
+	inline size_t getKey()
 	{
 		return key;
 	}
@@ -60,8 +56,12 @@ public:
 		return ts.str();
 	}
 
-	vect_dist_key_dx(int v_c, size_t key)
-	:v_c(v_c),key(key)
+	inline vect_dist_key_dx(size_t key)
+	:key(key)
+	{
+	}
+
+	inline vect_dist_key_dx()
 	{
 	}
 };
diff --git a/src/Vector/vector_dist_ofb.hpp b/src/Vector/vector_dist_ofb.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..bb9923766e43ee7be8c76dab98fc3166d506ac94
--- /dev/null
+++ b/src/Vector/vector_dist_ofb.hpp
@@ -0,0 +1,78 @@
+/*
+ * vector_dist_ofb.hpp
+ *
+ *  Created on: Jan 13, 2016
+ *      Author: i-bird
+ */
+
+#ifndef SRC_VECTOR_VECTOR_DIST_OFB_HPP_
+#define SRC_VECTOR_VECTOR_DIST_OFB_HPP_
+
+/*! \brief Out of bound policy it detect out of bound particles and decide what to do
+ *
+ */
+
+
+struct KillParticle
+{
+	/*! \brief It decide what to do when the particle is out
+	 *
+	 * \param pp_id particle id
+	 * \param p_id processor id
+	 *
+	 */
+	static size_t out(size_t pp_id, size_t p_id)
+	{
+		return -1;
+	}
+};
+
+struct KillParticleWithWarning
+{
+	/*! \brief It decide what to do when the particle is out
+	 *
+	 * \param pp_id particle id
+	 * \param p_id processor id
+	 *
+	 */
+	static size_t out(size_t p_id)
+	{
+		std::cerr << "Warning: " << __FILE__ << ":" << __LINE__ << " out of bound particle detected ";
+
+		return -1;
+	}
+};
+
+struct Nothing
+{
+	/*! \brief It decide what to do when the particle is out
+	 *
+	 * \param pp_id particle id
+	 * \param p_id processor id
+	 *
+	 */
+	static size_t out(size_t p_id)
+	{
+		return p_id;
+	}
+};
+
+struct Error
+{
+	/*! \brief It decide what to do when the particle is out
+	 *
+	 * \param pp_id particle id
+	 * \param p_id processor id
+	 *
+	 */
+	static size_t out(size_t p_id)
+	{
+		std::cerr << "Error: " << __FILE__ << ":" << __LINE__ << " out of bound particle detected ";
+
+		exit(-1);
+
+		return -1;
+	}
+};
+
+#endif /* SRC_VECTOR_VECTOR_DIST_OFB_HPP_ */
diff --git a/src/Vector/vector_dist_unit_test.hpp b/src/Vector/vector_dist_unit_test.hpp
index b3e45434f31aa5808506c2c6ef0ec0343687ba6f..8a5c813cbe7959adf4e5db810a512b10de223b44 100644
--- a/src/Vector/vector_dist_unit_test.hpp
+++ b/src/Vector/vector_dist_unit_test.hpp
@@ -11,8 +11,93 @@
 #include <random>
 #include "Vector/vector_dist.hpp"
 
+/*! \brief Count the total number of particles
+ *
+ * \param vd distributed vector
+ * \param bc boundary conditions
+ *
+ */
+template<unsigned int dim> size_t total_n_part_lc(vector_dist<dim,float, Point_test<float>, CartDecomposition<dim,float> > & vd, size_t (& bc)[dim])
+{
+	typedef Point<dim,float> s;
+
+	Vcluster & v_cl = vd.getVC();
+	auto it2 = vd.getDomainIterator();
+	const CartDecomposition<3,float> & ct = vd.getDecomposition();
+
+	bool noOut = true;
+
+	size_t cnt = 0;
+	while (it2.isNext())
+	{
+		auto key = it2.get();
+
+		noOut &= ct.isLocal(vd.template getPos<s::x>(key));
+
+		cnt++;
+
+		++it2;
+	}
+
+	BOOST_REQUIRE_EQUAL(noOut,true);
+
+	//
+	v_cl.sum(cnt);
+	v_cl.execute();
+
+	return cnt;
+}
+
+/*! \brief Count local and non local
+ *
+ * \param vd distributed vector
+ * \param it iterator
+ * \param bc boundary conditions
+ * \param box domain box
+ * \param dom_ext domain + ghost box
+ * \param l_cnt local particles counter
+ * \param nl_cnt non local particles counter
+ * \param n_out out of domain + ghost particles counter
+ *
+ */
+template<unsigned int dim> inline void count_local_n_local(vector_dist<dim,float, Point_test<float>, CartDecomposition<dim,float> > & vd, vector_dist_iterator & it, size_t (& bc)[dim] , Box<dim,float> & box, Box<dim,float> & dom_ext, size_t & l_cnt, size_t & nl_cnt, size_t & n_out)
+{
+	typedef Point<dim,float> s;
+	const CartDecomposition<dim,float> & ct = vd.getDecomposition();
+
+	while (it.isNext())
+	{
+		auto key = it.get();
+		// Check if it is in the domain
+		if (box.isInsideNP(vd.template getPos<s::x>(key)) == true)
+		{
+			// Check if local
+			if (ct.isLocalBC(vd.template getPos<s::x>(key),bc) == true)
+				l_cnt++;
+			else
+				nl_cnt++;
+		}
+		else
+		{
+			nl_cnt++;
+		}
+
+		// Check that all particles are inside the Domain + Ghost part
+		if (dom_ext.isInside(vd.template getPos<s::x>(key)) == false)
+				n_out++;
+
+		++it;
+	}
+}
+
 BOOST_AUTO_TEST_SUITE( vector_dist_test )
 
+void print_test(std::string test, size_t sz)
+{
+	if (global_v_cluster->getProcessUnitID() == 0)
+		std::cout << test << " " << sz << "\n";
+}
+
 BOOST_AUTO_TEST_CASE( vector_dist_ghost )
 {
 	// Communication object
@@ -60,8 +145,11 @@ BOOST_AUTO_TEST_CASE( vector_dist_ghost )
 	// set the ghost based on the radius cut off (make just a little bit smaller than the spacing)
 	Ghost<2,float> g(spacing.get(0) - spacing .get(0) * 0.0001);
 
+	// Boundary conditions
+	size_t bc[2]={NON_PERIODIC,NON_PERIODIC};
+
 	// Vector of particles
-	vector_dist<2,float, Point_test<float>, CartDecomposition<2,float> > vd(g_info.size(),box,g);
+	vector_dist<2,float, Point_test<float>, CartDecomposition<2,float> > vd(g_info.size(),box,bc,g);
 
 	// size_t
 	size_t cobj = 0;
@@ -98,11 +186,11 @@ BOOST_AUTO_TEST_CASE( vector_dist_ghost )
 	// redistribute the particles according to the decomposition
 	vd.map();
 
-	v_it = vd.getIterator();
+	auto v_it2 = vd.getIterator();
 
-	while (v_it.isNext())
+	while (v_it2.isNext())
 	{
-		auto key = v_it.get();
+		auto key = v_it2.get();
 
 		// fill with the processor ID where these particle live
 		vd.template getProp<p::s>(key) = vd.getPos<s::x>(key)[0] + vd.getPos<s::x>(key)[1] * 16;
@@ -110,7 +198,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_ghost )
 		vd.template getProp<p::v>(key)[1] = v_cl.getProcessUnitID();
 		vd.template getProp<p::v>(key)[2] = v_cl.getProcessUnitID();
 
-		++v_it;
+		++v_it2;
 	}
 
 	// do a ghost get
@@ -127,6 +215,8 @@ BOOST_AUTO_TEST_CASE( vector_dist_ghost )
 	// Get the ghost iterator
 	auto g_it = vd.getGhostIterator();
 
+	size_t n_part = 0;
+
 	// Check if the ghost particles contain the correct information
 	while (g_it.isNext())
 	{
@@ -139,10 +229,10 @@ BOOST_AUTO_TEST_CASE( vector_dist_ghost )
 		size_t b = 0;
 		size_t lb = 0;
 
-		// check if the received data is in one of the ghost boxes
+		// check if the received data are in one of the ghost boxes
 		for ( ; b < dec.getNEGhostBox() ; b++)
 		{
-			if (dec.getEGhostBox(b).isInside(vd.getPos<s::x>(key)) == true)
+			if (dec.getEGhostBox(b).isInside(vd.getPos<s::x>(key)) == true )
 			{
 				is_in = true;
 
@@ -156,9 +246,12 @@ BOOST_AUTO_TEST_CASE( vector_dist_ghost )
 		// Check that the particle come from the correct processor
 		BOOST_REQUIRE_EQUAL(vd.getProp<p::v>(key)[0],dec.getEGhostBoxProcessor(lb));
 
+		n_part++;
 		++g_it;
 	}
 
+	BOOST_REQUIRE(n_part != 0);
+
     CellDecomposer_sm<2,float> cd(SpaceBox<2,float>(box),g_div,0);
 
 	for (size_t i = 0 ; i < vb.size() ; i++)
@@ -166,7 +259,11 @@ BOOST_AUTO_TEST_CASE( vector_dist_ghost )
 		// Calculate how many particle should be in the box
 		size_t n_point = cd.getGridPoints(dec.getEGhostBox(i)).getVolumeKey();
 
-		BOOST_REQUIRE_EQUAL(n_point,vb.get(i));
+		if (n_point != vb.get(i))
+		{
+			std::cout << n_point << "  " << dec.getEGhostBoxProcessor(i) << "  " << v_cl.getProcessUnitID() << dec.getEGhostBox(i).toString() << "\n";
+		}
+		//BOOST_REQUIRE_EQUAL(n_point,vb.get(i));
 	}
 }
 
@@ -202,9 +299,9 @@ BOOST_AUTO_TEST_CASE( vector_dist_iterator_test_use_2d )
     std::default_random_engine eg;
     std::uniform_real_distribution<float> ud(0.0f, 1.0f);
 
-    long int k = 4096 * v_cl.getProcessingUnits();
+    long int k = 524288 * v_cl.getProcessingUnits();
 
-	long int big_step = k / 30;
+	long int big_step = k / 4;
 	big_step = (big_step == 0)?1:big_step;
 
 	print_test_v( "Testing 2D vector k<=",k);
@@ -217,7 +314,11 @@ BOOST_AUTO_TEST_CASE( vector_dist_iterator_test_use_2d )
 		//! [Create a vector of random elements on each processor 2D]
 
 		Box<2,float> box({0.0,0.0},{1.0,1.0});
-		vector_dist<2,float, Point_test<float>, CartDecomposition<2,float> > vd(k,box);
+
+		// Boundary conditions
+		size_t bc[2]={NON_PERIODIC,NON_PERIODIC};
+
+		vector_dist<2,float, Point_test<float>, CartDecomposition<2,float> > vd(k,box,bc,Ghost<2,float>(0.0));
 
 		auto it = vd.getIterator();
 
@@ -238,24 +339,24 @@ BOOST_AUTO_TEST_CASE( vector_dist_iterator_test_use_2d )
 		// Check if we have all the local particles
 		size_t cnt = 0;
 		const CartDecomposition<2,float> & ct = vd.getDecomposition();
-		it = vd.getIterator();
+		auto it2 = vd.getIterator();
 
-		while (it.isNext())
+		while (it2.isNext())
 		{
-			auto key = it.get();
+			auto key = it2.get();
 
 			// Check if local
 			BOOST_REQUIRE_EQUAL(ct.isLocal(vd.template getPos<s::x>(key)),true);
 
 			cnt++;
 
-			++it;
+			++it2;
 		}
 
 		//
 		v_cl.sum(cnt);
 		v_cl.execute();
-		BOOST_REQUIRE_EQUAL(cnt,k);
+		BOOST_REQUIRE_EQUAL((long int)cnt,k);
 	}
 }
 
@@ -271,9 +372,9 @@ BOOST_AUTO_TEST_CASE( vector_dist_iterator_test_use_3d )
     std::default_random_engine eg;
     std::uniform_real_distribution<float> ud(0.0f, 1.0f);
 
-    long int k = 4096 * v_cl.getProcessingUnits();
+    long int k = 524288 * v_cl.getProcessingUnits();
 
-	long int big_step = k / 30;
+	long int big_step = k / 4;
 	big_step = (big_step == 0)?1:big_step;
 
 	print_test_v( "Testing 3D vector k<=",k);
@@ -286,7 +387,11 @@ BOOST_AUTO_TEST_CASE( vector_dist_iterator_test_use_3d )
 		//! [Create a vector of random elements on each processor 3D]
 
 		Box<3,float> box({0.0,0.0,0.0},{1.0,1.0,1.0});
-		vector_dist<3,float, Point_test<float>, CartDecomposition<3,float> > vd(k,box);
+
+		// Boundary conditions
+		size_t bc[3]={NON_PERIODIC,NON_PERIODIC,NON_PERIODIC};
+
+		vector_dist<3,float, Point_test<float>, CartDecomposition<3,float> > vd(k,box,bc,Ghost<3,float>(0.0));
 
 		auto it = vd.getIterator();
 
@@ -308,24 +413,745 @@ BOOST_AUTO_TEST_CASE( vector_dist_iterator_test_use_3d )
 		// Check if we have all the local particles
 		size_t cnt = 0;
 		const CartDecomposition<3,float> & ct = vd.getDecomposition();
-		it = vd.getIterator();
+		auto it2 = vd.getIterator();
 
-		while (it.isNext())
+		while (it2.isNext())
 		{
-			auto key = it.get();
+			auto key = it2.get();
 
 			// Check if local
 			BOOST_REQUIRE_EQUAL(ct.isLocal(vd.template getPos<s::x>(key)),true);
 
 			cnt++;
 
-			++it;
+			++it2;
 		}
 
 		//
 		v_cl.sum(cnt);
 		v_cl.execute();
-		BOOST_REQUIRE_EQUAL(cnt,k);
+		BOOST_REQUIRE_EQUAL(cnt,(size_t)k);
+	}
+}
+
+BOOST_AUTO_TEST_CASE( vector_dist_periodic_test_use_2d )
+{
+	typedef Point<2,float> s;
+
+	Vcluster & v_cl = *global_v_cluster;
+
+    // set the seed
+	// create the random generator engine
+	std::srand(v_cl.getProcessUnitID());
+    std::default_random_engine eg;
+    std::uniform_real_distribution<float> ud(0.0f, 1.0f);
+
+    long int k = 524288 * v_cl.getProcessingUnits();
+
+	long int big_step = k / 4;
+	big_step = (big_step == 0)?1:big_step;
+
+	print_test_v( "Testing 2D periodic vector k<=",k);
+
+	// 2D test
+	for ( ; k >= 2 ; k-= decrement(k,big_step) )
+	{
+		BOOST_TEST_CHECKPOINT( "Testing 2D periodic vector k=" << k );
+
+		//! [Create a vector of random elements on each processor 2D]
+
+		Box<2,float> box({0.0,0.0},{1.0,1.0});
+
+		// Boundary conditions
+		size_t bc[2]={PERIODIC,PERIODIC};
+
+		// factor
+		float factor = pow(global_v_cluster->getProcessingUnits()/2.0f,1.0f/3.0f);
+
+		// ghost
+		Ghost<2,float> ghost(0.01 / factor);
+
+		// ghost2 (a little bigger because of round off error)
+		Ghost<2,float> ghost2(0.05001 / factor);
+
+		// Distributed vector
+		vector_dist<2,float, Point_test<float>, CartDecomposition<2,float> > vd(k,box,bc,ghost);
+
+		auto it = vd.getIterator();
+
+		while (it.isNext())
+		{
+			auto key = it.get();
+
+			vd.template getPos<s::x>(key)[0] = ud(eg);
+			vd.template getPos<s::x>(key)[1] = ud(eg);
+
+			++it;
+		}
+
+		vd.map();
+
+		// sync the ghost, only the property zero
+		vd.ghost_get<0>();
+
+		//! [Create a vector of random elements on each processor 2D]
+
+		// Domain + ghost box
+		Box<2,float> dom_ext = box;
+		dom_ext.enlarge(ghost2);
+
+		// Iterate on all particles domain + ghost
+		size_t l_cnt = 0;
+		size_t nl_cnt = 0;
+		size_t n_out = 0;
+
+
+		auto it2 = vd.getIterator();
+		count_local_n_local(vd,it2,bc,box,dom_ext,l_cnt,nl_cnt,n_out);
+
+		// No particles should be out of domain + ghost
+		BOOST_REQUIRE_EQUAL(n_out,0ul);
+
+		// Ghost must populated because we synchronized them
+		if (k > 524288)
+		{
+			BOOST_REQUIRE(nl_cnt != 0);
+			BOOST_REQUIRE(l_cnt > nl_cnt);
+		}
+
+		// Sum all the particles inside the domain
+		v_cl.sum(l_cnt);
+		v_cl.execute();
+
+		// count that they are equal to the initial total number
+		BOOST_REQUIRE_EQUAL((long int)l_cnt,k);
+
+		l_cnt = 0;
+		nl_cnt = 0;
+
+		// Iterate only on the ghost particles
+		auto itg = vd.getGhostIterator();
+		count_local_n_local(vd,itg,bc,box,dom_ext,l_cnt,nl_cnt,n_out);
+
+		// No particle on the ghost must be inside the domain
+		BOOST_REQUIRE_EQUAL(l_cnt,0ul);
+
+		// Ghost must be populated
+		if (k > 524288)
+		{
+			BOOST_REQUIRE(nl_cnt != 0);
+		}
+	}
+}
+
+BOOST_AUTO_TEST_CASE( vector_dist_periodic_test_use_3d )
+{
+	typedef Point<3,float> s;
+
+	Vcluster & v_cl = *global_v_cluster;
+
+    // set the seed
+	// create the random generator engine
+	std::srand(v_cl.getProcessUnitID());
+    std::default_random_engine eg;
+    std::uniform_real_distribution<float> ud(0.0f, 1.0f);
+
+    long int k = 524288 * v_cl.getProcessingUnits();
+
+	long int big_step = k / 4;
+	big_step = (big_step == 0)?1:big_step;
+
+	print_test_v( "Testing 3D periodic vector k<=",k);
+
+	// 3D test
+	for ( ; k >= 2 ; k-= decrement(k,big_step) )
+	{
+		BOOST_TEST_CHECKPOINT( "Testing 3D periodic vector k=" << k );
+
+		//! [Create a vector of random elements on each processor 3D]
+
+		Box<3,float> box({0.0,0.0,0.0},{1.0,1.0,1.0});
+
+		// Boundary conditions
+		size_t bc[3]={PERIODIC,PERIODIC,PERIODIC};
+
+		// factor
+		float factor = pow(global_v_cluster->getProcessingUnits()/2.0f,1.0f/3.0f);
+
+		// ghost
+		Ghost<3,float> ghost(0.05 / factor);
+
+		// ghost2 (a little bigger because of round off error)
+		Ghost<3,float> ghost2(0.05001 / factor);
+
+		// Distributed vector
+		vector_dist<3,float, Point_test<float>, CartDecomposition<3,float> > vd(k,box,bc,ghost);
+
+		auto it = vd.getIterator();
+
+		while (it.isNext())
+		{
+			auto key = it.get();
+
+			vd.template getPos<s::x>(key)[0] = ud(eg);
+			vd.template getPos<s::x>(key)[1] = ud(eg);
+			vd.template getPos<s::x>(key)[2] = ud(eg);
+
+			++it;
+		}
+
+		vd.map();
+
+		// sync the ghost
+		vd.ghost_get<0>();
+
+		//! [Create a vector of random elements on each processor 3D]
+
+		// Domain + ghost
+		Box<3,float> dom_ext = box;
+		dom_ext.enlarge(ghost2);
+
+		// Iterate on all particles domain + ghost
+		size_t l_cnt = 0;
+		size_t nl_cnt = 0;
+		size_t n_out = 0;
+
+		auto it2 = vd.getIterator();
+		count_local_n_local(vd,it2,bc,box,dom_ext,l_cnt,nl_cnt,n_out);
+
+		// No particles should be out of domain + ghost
+		BOOST_REQUIRE_EQUAL(n_out,0ul);
+
+		// Ghost must populated because we synchronized them
+		if (k > 524288)
+		{
+			BOOST_REQUIRE(nl_cnt != 0);
+			BOOST_REQUIRE(l_cnt > nl_cnt);
+		}
+
+		// Sum all the particles inside the domain
+		v_cl.sum(l_cnt);
+		v_cl.execute();
+		BOOST_REQUIRE_EQUAL(l_cnt,(size_t)k);
+
+		l_cnt = 0;
+		nl_cnt = 0;
+
+		// Iterate only on the ghost particles
+		auto itg = vd.getGhostIterator();
+		count_local_n_local(vd,itg,bc,box,dom_ext,l_cnt,nl_cnt,n_out);
+
+		// No particle on the ghost must be inside the domain
+		BOOST_REQUIRE_EQUAL(l_cnt,0ul);
+
+		// Ghost must be populated
+		if (k > 524288)
+		{
+			BOOST_REQUIRE(nl_cnt != 0);
+		}
+	}
+}
+
+BOOST_AUTO_TEST_CASE( vector_dist_periodic_test_random_walk )
+{
+	typedef Point<3,float> s;
+
+	Vcluster & v_cl = *global_v_cluster;
+
+    // set the seed
+	// create the random generator engine
+	std::srand(v_cl.getProcessUnitID());
+    std::default_random_engine eg;
+    std::uniform_real_distribution<float> ud(0.0f, 1.0f);
+	
+	size_t nsz[] = {0,32,4};
+	nsz[0] = 65536 * v_cl.getProcessingUnits();
+
+	print_test_v( "Testing 3D random walk vector k<=",nsz[0]);
+
+	// 3D test
+	for (size_t i = 0 ; i < 3 ; i++ )
+	{
+		size_t k = nsz[i];
+
+		BOOST_TEST_CHECKPOINT( "Testing 3D random walk vector k=" << k );
+
+		Box<3,float> box({0.0,0.0,0.0},{1.0,1.0,1.0});
+
+		// Boundary conditions
+		size_t bc[3]={PERIODIC,PERIODIC,PERIODIC};
+
+		// factor
+		float factor = pow(global_v_cluster->getProcessingUnits()/2.0f,1.0f/3.0f);
+
+		// ghost
+		Ghost<3,float> ghost(0.01 / factor);
+
+		// Distributed vector
+		vector_dist<3,float, Point_test<float>, CartDecomposition<3,float> > vd(k,box,bc,ghost);
+
+		auto it = vd.getIterator();
+
+		while (it.isNext())
+		{
+			auto key = it.get();
+
+			vd.template getPos<s::x>(key)[0] = ud(eg);
+			vd.template getPos<s::x>(key)[1] = ud(eg);
+			vd.template getPos<s::x>(key)[2] = ud(eg);
+
+			++it;
+		}
+
+		vd.map();
+
+		// 10 step random walk
+
+		for (size_t j = 0 ; j < 4 ; j++)
+		{
+			auto it = vd.getDomainIterator();
+
+			while (it.isNext())
+			{
+				auto key = it.get();
+
+				vd.template getPos<s::x>(key)[0] += 0.02 * ud(eg);
+				vd.template getPos<s::x>(key)[1] += 0.02 * ud(eg);
+				vd.template getPos<s::x>(key)[2] += 0.02 * ud(eg);
+
+				++it;
+			}
+
+			vd.map();
+
+			vd.ghost_get<0>();
+
+			// Count the local particles and check that the total number is consistent
+			size_t cnt = total_n_part_lc(vd,bc);
+
+			BOOST_REQUIRE_EQUAL((size_t)k,cnt);
+		}
+	}
+}
+
+BOOST_AUTO_TEST_CASE( vector_dist_periodic_map )
+{
+	typedef Point<3,float> s;
+
+	Box<3,float> box({0.0,0.0,0.0},{1.0,1.0,1.0});
+
+	// Boundary conditions
+	size_t bc[3]={PERIODIC,PERIODIC,PERIODIC};
+
+	// factor
+	float factor = pow(global_v_cluster->getProcessingUnits()/2.0f,1.0f/3.0f);
+
+	// ghost
+	Ghost<3,float> ghost(0.05 / factor);
+
+	// Distributed vector
+	vector_dist<3,float, Point_test<float>, CartDecomposition<3,float> > vd(1,box,bc,ghost);
+
+	// put particles al 1.0, check that they go to 0.0
+
+	auto it = vd.getIterator();
+
+	while (it.isNext())
+	{
+		auto key = it.get();
+
+		vd.template getPos<s::x>(key)[0] = 1.0;
+		vd.template getPos<s::x>(key)[1] = 1.0;
+		vd.template getPos<s::x>(key)[2] = 1.0;
+
+		++it;
+	}
+
+	vd.map();
+
+	auto it2 = vd.getIterator();
+
+	while (it2.isNext())
+	{
+		auto key = it2.get();
+
+		float f = vd.template getPos<s::x>(key)[0];
+		BOOST_REQUIRE_EQUAL(f, 0.0);
+		f = vd.template getPos<s::x>(key)[1];
+		BOOST_REQUIRE_EQUAL(f, 0.0);
+		f = vd.template getPos<s::x>(key)[2];
+		BOOST_REQUIRE_EQUAL(f, 0.0);
+
+		++it2;
+	}
+}
+
+BOOST_AUTO_TEST_CASE( vector_dist_not_periodic_map )
+{
+	typedef Point<3,float> s;
+
+	Box<3,float> box({0.0,0.0,0.0},{1.0,1.0,1.0});
+
+	// Boundary conditions
+	size_t bc[3]={NON_PERIODIC,NON_PERIODIC,NON_PERIODIC};
+
+	// factor
+	float factor = pow(global_v_cluster->getProcessingUnits()/2.0f,1.0f/3.0f);
+
+	// ghost
+	Ghost<3,float> ghost(0.05 / factor);
+
+	// Distributed vector
+	vector_dist<3,float, Point_test<float>, CartDecomposition<3,float> > vd(1,box,bc,ghost);
+
+	// put particles al 1.0, check that they go to 0.0
+
+	auto it = vd.getIterator();
+
+	while (it.isNext())
+	{
+		auto key = it.get();
+
+		vd.template getPos<s::x>(key)[0] = 1.0;
+		vd.template getPos<s::x>(key)[1] = 1.0;
+		vd.template getPos<s::x>(key)[2] = 1.0;
+
+		++it;
+	}
+
+	vd.map();
+
+	auto it2 = vd.getIterator();
+
+	while (it2.isNext())
+	{
+		auto key = it2.get();
+
+		float f = vd.template getPos<s::x>(key)[0];
+		BOOST_REQUIRE_EQUAL(f, 1.0);
+		f = vd.template getPos<s::x>(key)[1];
+		BOOST_REQUIRE_EQUAL(f, 1.0);
+		f = vd.template getPos<s::x>(key)[2];
+		BOOST_REQUIRE_EQUAL(f, 1.0);
+
+		++it2;
+	}
+}
+
+BOOST_AUTO_TEST_CASE( vector_dist_out_of_bound_policy )
+{
+	Vcluster & v_cl = *global_v_cluster;
+
+	if (v_cl.getProcessingUnits() > 8)
+		return;
+
+	typedef Point<3,float> s;
+
+	Box<3,float> box({0.0,0.0,0.0},{1.0,1.0,1.0});
+
+	// Boundary conditions
+	size_t bc[3]={NON_PERIODIC,NON_PERIODIC,NON_PERIODIC};
+
+	// factor
+	float factor = pow(global_v_cluster->getProcessingUnits()/2.0f,1.0f/3.0f);
+
+	// ghost
+	Ghost<3,float> ghost(0.05 / factor);
+
+	// Distributed vector
+	vector_dist<3,float, Point_test<float>, CartDecomposition<3,float> > vd(100,box,bc,ghost);
+
+	// put particles at out of the boundary, they must be detected and and killed
+
+	auto it = vd.getIterator();
+
+	size_t cnt = 0;
+
+	while (it.isNext())
+	{
+		auto key = it.get();
+
+		if (cnt < 1)
+		{
+			vd.template getPos<s::x>(key)[0] = -0.06;
+			vd.template getPos<s::x>(key)[1] = -0.06;
+			vd.template getPos<s::x>(key)[2] = -0.06;
+		}
+		else
+		{
+			vd.template getPos<s::x>(key)[0] = 0.06;
+			vd.template getPos<s::x>(key)[1] = 0.06;
+			vd.template getPos<s::x>(key)[2] = 0.06;
+		}
+
+		cnt++;
+		++it;
+	}
+
+	vd.map();
+
+	// Particles out of the boundary are killed
+
+	size_t cnt_l = vd.size_local();
+
+	v_cl.sum(cnt_l);
+	v_cl.execute();
+
+	BOOST_REQUIRE_EQUAL(cnt_l,100-v_cl.getProcessingUnits());
+}
+
+BOOST_AUTO_TEST_CASE( vector_dist_periodic_test_interacting_particles )
+{
+
+	typedef Point<3,float> s;
+
+	Vcluster & v_cl = *global_v_cluster;
+
+	if (v_cl.getProcessingUnits() > 8)
+		return;
+
+    // set the seed
+	// create the random generator engine
+	std::srand(v_cl.getProcessUnitID());
+    std::default_random_engine eg;
+    std::uniform_real_distribution<float> ud(0.0f, 1.0f);
+
+	size_t nsz[] = {0,32,4};
+	nsz[0] = 65536 * v_cl.getProcessingUnits();
+
+	print_test_v("Testing 3D random walk interacting particles vector k=", nsz[0]);
+
+	// 3D test
+	for (size_t i = 0 ; i < 3 ; i++ )
+	{
+		size_t k = nsz[i];
+
+		BOOST_TEST_CHECKPOINT( "Testing 3D random walk interacting particles vector k=" << k );
+
+		Box<3,float> box({0.0,0.0,0.0},{1.0,1.0,1.0});
+
+		// Boundary conditions
+		size_t bc[3]={PERIODIC,PERIODIC,PERIODIC};
+
+		// factor
+		float factor = pow(global_v_cluster->getProcessingUnits()/2.0f,1.0f/3.0f);
+
+		// interaction radius
+		float r_cut = 0.01 / factor;
+
+		// ghost
+		Ghost<3,float> ghost(r_cut);
+
+		// Distributed vector
+		vector_dist<3,float, Point_test<float>, CartDecomposition<3,float> > vd(k,box,bc,ghost);
+
+		auto it = vd.getIterator();
+
+		while (it.isNext())
+		{
+			auto key = it.get();
+
+			vd.template getPos<s::x>(key)[0] = ud(eg);
+			vd.template getPos<s::x>(key)[1] = ud(eg);
+			vd.template getPos<s::x>(key)[2] = ud(eg);
+
+			++it;
+		}
+
+		vd.map();
+
+		// 4 step random walk
+
+		for (size_t j = 0 ; j < 4 ; j++)
+		{
+			auto it = vd.getDomainIterator();
+
+			// Move the particles
+
+			while (it.isNext())
+			{
+				auto key = it.get();
+
+				vd.template getPos<s::x>(key)[0] += 0.02 * ud(eg);
+				vd.template getPos<s::x>(key)[1] += 0.02 * ud(eg);
+				vd.template getPos<s::x>(key)[2] += 0.02 * ud(eg);
+
+				++it;
+			}
+
+			vd.map();
+
+			vd.ghost_get<0>();
+
+			// get the cell list with a cutoff radius
+
+			bool error = false;
+
+			auto NN = vd.getCellList(0.01 / factor);
+
+			// iterate across the domain particle
+
+			auto it2 = vd.getDomainIterator();
+
+			while (it2.isNext())
+			{
+				auto p = it2.get();
+
+				Point<3,float> xp = vd.getPos<0>(p);
+
+				auto Np = NN.getIterator(NN.getCell(vd.getPos<0>(p)));
+
+				while (Np.isNext())
+				{
+					auto q = Np.get();
+
+					// repulsive
+
+					Point<3,float> xq = vd.getPos<0>(q);
+					Point<3,float> f = (xp - xq);
+
+					float distance = f.norm();
+
+					// Particle should be inside 2 * r_cut range
+
+					if (distance > 2*r_cut*sqrt(2))
+						error = true;
+
+					++Np;
+				}
+
+				++it2;
+			}
+
+			// Error
+
+			BOOST_REQUIRE_EQUAL(error,false);
+
+			// Count the local particles and check that the total number is consistent
+			size_t cnt = total_n_part_lc(vd,bc);
+
+			BOOST_REQUIRE_EQUAL((size_t)k,cnt);
+		}
+	}
+}
+
+BOOST_AUTO_TEST_CASE( vector_dist_cell_verlet_test )
+{
+	long int k = 64*64*64*global_v_cluster->getProcessingUnits();
+	k = std::pow(k, 1/3.);
+
+	long int big_step = k / 30;
+	big_step = (big_step == 0)?1:big_step;
+	long int small_step = 21;
+
+	print_test( "Testing cell and verlet list k<=",k);
+
+	// 3D test
+	for ( ; k > 8*big_step ; k-= (k > 2*big_step)?big_step:small_step )
+	{
+		typedef Point<3,float> s;
+
+		Vcluster & v_cl = *global_v_cluster;
+
+		const size_t Ng = k;
+
+		// we create a 128x128x128 Grid iterator
+		size_t sz[3] = {Ng,Ng,Ng};
+
+		Box<3,float> box({0.0,0.0,0.0},{1.0,1.0,1.0});
+
+		// Boundary conditions
+		size_t bc[3]={PERIODIC,PERIODIC,PERIODIC};
+
+		// ghost
+		Ghost<3,float> ghost(1.0/(Ng-2));
+
+		// Distributed vector
+		vector_dist<3,float, Point_test<float>, CartDecomposition<3,float> > vd(0,box,bc,ghost);
+
+		// Put particles on a grid creating a Grid iterator
+		auto it = vd.getGridIterator(sz);
+
+		while (it.isNext())
+		{
+			vd.add();
+
+			auto key = it.get();
+
+			vd.template getLastPos<s::x>()[0] = key.get(0) * it.getSpacing(0);
+			vd.template getLastPos<s::x>()[1] = key.get(1) * it.getSpacing(1);
+			vd.template getLastPos<s::x>()[2] = key.get(2) * it.getSpacing(2);
+
+			++it;
+		}
+
+		// distribute particles and sync ghost
+		vd.map();
+
+		// Check that the sum of all the particles is the grid size
+		size_t total = vd.size_local();
+		v_cl.sum(total);
+		v_cl.execute();
+
+		BOOST_REQUIRE_EQUAL(total,(Ng-1) * (Ng-1) * (Ng-1));
+
+		vd.ghost_get<0>();
+
+		// calculate the distance of the first, second and third neighborhood particle
+		// Consider that they are on a regular grid
+
+		float spacing = it.getSpacing(0);
+		float first_dist = spacing;
+		float second_dist = sqrt(2.0*spacing*spacing);
+		float third_dist = sqrt(3.0 * spacing*spacing);
+
+		// add a 5% to dist
+
+		first_dist += first_dist * 0.05;
+		second_dist += second_dist * 0.05;
+		third_dist += third_dist * 0.05;
+
+		// Create a verlet list for each particle
+
+		openfpm::vector<openfpm::vector<size_t>> verlet;
+
+		vd.getVerlet(verlet,third_dist);
+
+		bool correct = true;
+
+		// for each particle
+		for (size_t i = 0 ; i < verlet.size() ; i++)
+		{
+			// first NN
+			size_t first_NN = 0;
+			size_t second_NN = 0;
+			size_t third_NN = 0;
+
+			Point<3,float> p = vd.getPos<0>(i);
+
+			// for each neighborhood particle
+			for (size_t j = 0 ; j < verlet.get(i).size() ; j++)
+			{
+				auto & NN = verlet.get(i);
+
+				Point<3,float> q = vd.getPos<0>(NN.get(j));
+
+				float dist = p.distance(q);
+
+				if (dist <= first_dist)
+					first_NN++;
+				else if (dist <= second_dist)
+					second_NN++;
+				else
+					third_NN++;
+			}
+
+			correct &= (first_NN == 6);
+			correct &= (second_NN == 12);
+			correct &= (third_NN == 8);
+		}
+
+		BOOST_REQUIRE_EQUAL(correct,true);
 	}
 }
 
diff --git a/src/dec_optimizer.hpp b/src/dec_optimizer.hpp
index dc65243a41fbbc32323531772accf94b4eb95346..20604488016616d487839f1cedd1e59ec563e0a3 100644
--- a/src/dec_optimizer.hpp
+++ b/src/dec_optimizer.hpp
@@ -1,6 +1,8 @@
 #ifndef DEC_OPTIMIZER_HPP
 #define DEC_OPTIMIZER_HPP
 
+#include "Grid/iterators/grid_key_dx_iterator_sub.hpp"
+
 /*! \brief this class represent a wavefront of dimension dim
  *
  * \dim Dimensionality of the wavefront (dimensionality of the space
@@ -111,6 +113,7 @@ private:
 	 * \param v_w wavefronts
 	 * \param w_comb wavefront expansion combinations
 	 * \param d direction of expansion
+	 * \param bc boundary condition
 	 *
 	 */
 	void expand_one_wf(openfpm::vector<wavefront<dim>> & v_w, std::vector<comb<dim>> & w_comb , size_t d)
@@ -234,9 +237,10 @@ private:
 	 * \param w_comb hyper-cube combinations
 	 * \param p_id processor id
 	 * \param box_nn_processor list of neighborhood processors for the box
+	 * \param bc Boundary conditions
 	 *
 	 */
-	template<unsigned int p_sub, unsigned int p_id> void add_to_queue(openfpm::vector<size_t> & domains, openfpm::vector<wavefront<dim>> & v_w, Graph & graph,  std::vector<comb<dim>> & w_comb, long int pr_id, openfpm::vector< openfpm::vector<size_t> > & box_nn_processor)
+	template<unsigned int p_sub, unsigned int p_id> void add_to_queue(openfpm::vector<size_t> & domains, openfpm::vector<wavefront<dim>> & v_w, Graph & graph,  std::vector<comb<dim>> & w_comb, long int pr_id, openfpm::vector< openfpm::vector<size_t> > & box_nn_processor, const size_t(& bc)[dim])
 	{
 		// create a new queue
 		openfpm::vector<size_t> domains_new;
@@ -268,7 +272,7 @@ private:
 		for (size_t d = 0 ; d < v_w.size() ; d++)
 		{
 			// Create a sub-grid iterator
-			grid_key_dx_iterator_sub<dim,do_not_print_warning_on_adjustment<dim>> g_sub(gh,v_w.template get<wavefront<dim>::start>(d),v_w.template get<wavefront<dim>::stop>(d));
+			grid_key_dx_iterator_sub_bc<dim,do_not_print_warning_on_adjustment<dim>> g_sub(gh,v_w.template get<wavefront<dim>::start>(d),v_w.template get<wavefront<dim>::stop>(d),bc);
 
 			// iterate through all grid points
 
@@ -290,12 +294,13 @@ private:
 					box_nn_processor.get(box_nn_processor.size()-1).add(pp_id);
 				}
 
+				// if the sub-sub-domain is not assigned
 				if (pid < 0)
 				{
-					// ... and the p_id different from -1
+					// ... and we are not processing the full graph
 					if (pr_id != -1)
 					{
-						// ... and the processor id of the sub-sub-domain match p_id, add to the queue
+						// ... and the processor id of the sub-sub-domain match the part we are processing, add to the queue
 
 						if ( pr_id == pp_id)
 							domains_new.add(gh.LinId(gk));
@@ -561,7 +566,7 @@ public:
 	 * \param graph we are processing
 	 *
 	 */
-	template <unsigned int p_sub, unsigned int p_id> void optimize(grid_key_dx<dim> & start_p, Graph & graph)
+	template <unsigned int p_sub, unsigned int p_id> void optimize(grid_key_dx<dim> & start_p, Graph & graph, const size_t (& bc)[dim])
 	{
 		// temporal vector
 		openfpm::vector<Box<dim,size_t>> tmp;
@@ -570,7 +575,7 @@ public:
 		openfpm::vector< openfpm::vector<size_t> > box_nn_processor;
 
 		// optimize
-		optimize<p_sub,p_id>(start_p,graph,-1,tmp, box_nn_processor);
+		optimize<p_sub,p_id>(start_p,graph,-1,tmp, box_nn_processor,bc);
 	}
 
 	/*! \brief optimize the graph
@@ -587,13 +592,13 @@ public:
 	 * \param list of sub-domain boxes
 	 *
 	 */
-	template <unsigned int p_sub, unsigned int p_id> void optimize(Graph & graph, long int pr_id, openfpm::vector<Box<dim,size_t>> & lb, openfpm::vector< openfpm::vector<size_t> > & box_nn_processor)
+	template <unsigned int p_sub, unsigned int p_id> void optimize(Graph & graph, long int pr_id, openfpm::vector<Box<dim,size_t>> & lb, openfpm::vector< openfpm::vector<size_t> > & box_nn_processor, const size_t (& bc)[dim])
 	{
 		// search for the first seed
 		grid_key_dx<dim> key_seed = search_first_seed<p_id>(graph,pr_id);
 
 		// optimize
-		optimize<p_sub,p_id>(key_seed,graph,pr_id,lb,box_nn_processor);
+		optimize<p_sub,p_id>(key_seed,graph,pr_id,lb,box_nn_processor,bc);
 	}
 
 	/*! \brief optimize the graph
@@ -611,9 +616,10 @@ public:
 	 * \param p_id Processor id (if p_id == -1 the optimization is done for all the processors)
 	 * \param list of sub-domain boxes produced by the algorithm
 	 * \param box_nn_processor for each box it list all the neighborhood processor
+	 * \param bc Boundary condition
 	 *
 	 */
-	template <unsigned int p_sub, unsigned int p_id> void optimize(grid_key_dx<dim> & start_p, Graph & graph, long int pr_id, openfpm::vector<Box<dim,size_t>> & lb, openfpm::vector< openfpm::vector<size_t> > & box_nn_processor )
+	template <unsigned int p_sub, unsigned int p_id> void optimize(grid_key_dx<dim> & start_p, Graph & graph, long int pr_id, openfpm::vector<Box<dim,size_t>> & lb, openfpm::vector< openfpm::vector<size_t> > & box_nn_processor, const size_t (& bc)[dim])
 	{
 		// sub-domain id
 		size_t sub_id =  0;
@@ -660,7 +666,7 @@ public:
 			fill_domain<p_sub>(graph,box,sub_id);
 
 			// add the surrounding sub-domain to the queue
-			add_to_queue<p_sub,p_id>(v_q,v_w,graph,w_comb,pr_id,box_nn_processor);
+			add_to_queue<p_sub,p_id>(v_q,v_w,graph,w_comb,pr_id,box_nn_processor,bc);
 
 			// increment the sub_id
 			sub_id++;
diff --git a/src/dec_optimizer_unit_test.hpp b/src/dec_optimizer_unit_test.hpp
index bcfbc4a8e428724643a7acd9c239fa6443707193..7a2a8e8e3e7dc9fb6b15dfeafa3decd0cda47671 100644
--- a/src/dec_optimizer_unit_test.hpp
+++ b/src/dec_optimizer_unit_test.hpp
@@ -11,7 +11,7 @@
 
 #include "Graph/CartesianGraphFactory.hpp"
 #include "Graph/map_graph.hpp"
-#include "metis_util.hpp"
+#include "Decomposition/Distribution/metis_util.hpp"
 #include "dec_optimizer.hpp"
 
 
@@ -20,7 +20,7 @@
 
 BOOST_AUTO_TEST_SUITE( dec_optimizer_test )
 
-BOOST_AUTO_TEST_CASE( dec_optimizer_test_use)
+BOOST_AUTO_TEST_CASE( dec_optimizer_test_use_np)
 {
 	CartesianGraphFactory<3,Graph_CSR<nm_v,nm_e>> g_factory;
 	CartesianGraphFactory<3,Graph_CSR<nm_part_v,nm_part_e>> g_factory_part;
@@ -31,11 +31,14 @@ BOOST_AUTO_TEST_CASE( dec_optimizer_test_use)
 	// Box
 	Box<3,float> box({0.0,0.0,0.0},{1.0,1.0,1.0});
 
+	// Boundary conditions, non periodic
+	size_t bc[] = {NON_PERIODIC,NON_PERIODIC,NON_PERIODIC};
+
 	// Graph to decompose
-	Graph_CSR<nm_v,nm_e> g = g_factory.construct<nm_e::communication,float,2,0,1,2>(sz,box);
+	Graph_CSR<nm_v,nm_e> g = g_factory.construct<nm_e::communication,NO_VERTEX_ID,float,2,0,1,2>(sz,box,bc);
 
 	// Processor graph
-	Graph_CSR<nm_part_v,nm_part_e> gp = g_factory_part.construct<NO_EDGE,float,2>(sz,box);
+	Graph_CSR<nm_part_v,nm_part_e> gp = g_factory_part.construct<NO_EDGE,NO_VERTEX_ID,float,2>(sz,box,bc);
 
 	// Convert the graph to metis
 	Metis<Graph_CSR<nm_v,nm_e>> met(g,16);
@@ -48,13 +51,98 @@ BOOST_AUTO_TEST_CASE( dec_optimizer_test_use)
 	dec_optimizer<3,Graph_CSR<nm_v,nm_e>> d_o(g,sz);
 
 	grid_key_dx<3> keyZero(0,0,0);
-	d_o.optimize<nm_v::sub_id,nm_v::id>(keyZero,g);
+	d_o.optimize<nm_v::sub_id,nm_v::id>(keyZero,g,bc);
+}
+
+BOOST_AUTO_TEST_CASE( dec_optimizer_test_use_p)
+{
+	CartesianGraphFactory<3,Graph_CSR<nm_v,nm_e>> g_factory;
+	CartesianGraphFactory<3,Graph_CSR<nm_part_v,nm_part_e>> g_factory_part;
+
+	// Cartesian grid
+	size_t sz[3] = {GS_SIZE,GS_SIZE,GS_SIZE};
+
+	//! Grid info
+	grid_sm<3,void> gs(sz);
+
+	// Box
+	Box<3,float> box({0.0,0.0,0.0},{1.0,1.0,1.0});
+
+	// Boundary conditions, non periodic
+	size_t bc[] = {PERIODIC,PERIODIC,PERIODIC};
+
+	// Graph to decompose
+	Graph_CSR<nm_v,nm_e> g = g_factory.construct<nm_e::communication,NO_VERTEX_ID,float,2,0,1,2>(sz,box,bc);
+
+	// Processor graph
+	Graph_CSR<nm_part_v,nm_part_e> gp = g_factory_part.construct<NO_EDGE,NO_VERTEX_ID,float,2>(sz,box,bc);
+
+	bool p[3];
+
+	// Divide in 8 parts your graph
+
+	// decompose
+	for (size_t i = 0 ; i < GS_SIZE ; i++)
+	{
+		p[0] = (i < GS_SIZE/2)?false:true;
+		for (size_t j = 0 ; j < GS_SIZE ; j++)
+		{
+			p[1] = (j < GS_SIZE/2)?false:true;
+			for (size_t k = 0 ; k < GS_SIZE ; k++)
+			{
+				p[2] = (k < GS_SIZE/2)?false:true;
+				size_t id = 4*p[2] + 2*p[1] + p[0];
+
+				grid_key_dx<3> key(i,j,k);
+				gp.vertex(gs.LinId(key)).template get<nm_part_v::id>() = id;
+				g.vertex(gs.LinId(key)).template get<nm_v::id>() = id;
+			}
+		}
+	}
+
+	// optimize
+	dec_optimizer<3,Graph_CSR<nm_v,nm_e>> d_o(g,sz);
+
+	grid_key_dx<3> keyZero(0,0,0);
+
+	// Set of sub-domain produced by dec-optimizer
+	openfpm::vector<Box<3,size_t>> dec_o;
+
+	// For each sub-domain check the neighborhood processors
+	openfpm::vector< openfpm::vector<size_t> > box_nn_processor;
+
+	// key
+	grid_key_dx<3> zero;
+	zero.zero();
+
+	// gp,p_id,loc_box,box_nn_processor,bc
+	d_o.optimize<nm_v::sub_id,nm_v::id>(zero,g,-1,dec_o,box_nn_processor,bc);
+
+	BOOST_REQUIRE_EQUAL(box_nn_processor.size(),8ul);
+
+	for(size_t i = 0 ; i < box_nn_processor.size() ; i++)
+	{
+		bool nn[] = {false,false,false,false,false,false,false,false};
+		BOOST_REQUIRE_EQUAL(box_nn_processor.get(i).size(),7ul);
+		for (size_t j = 0 ; j < box_nn_processor.get(i).size(); j++)
+		{
+			BOOST_REQUIRE(box_nn_processor.get(i).get(j) < 8);
+			nn[box_nn_processor.get(i).get(j)] = true;
+		}
+
+		// search the neighborhood
+
+		size_t cnt = 0;
+		for(size_t i = 0 ; i < 8 ; i++)
+		{
+			if (nn[i] == false)
+				cnt++;
+		}
+
+		BOOST_REQUIRE_EQUAL(cnt,1ul);
+	}
 
-	// Write the VTK file
-	VTKWriter<Graph_CSR<nm_part_v,nm_part_e>,GRAPH> vtk(gp);
-	vtk.write("vtk_partition.vtk");
-	VTKWriter<Graph_CSR<nm_v,nm_e>,GRAPH> vtk2(g);
-	vtk2.write("vtk_partition2.vtk");
+	// check
 }
 
 BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/gargabe.hpp b/src/gargabe.hpp
index ec8d815fc1ba402a1fd622001b8a9ebe12ec6efd..20e091ad5b9e72b1b994f9a9e3143e9fc5eeb4f2 100644
--- a/src/gargabe.hpp
+++ b/src/gargabe.hpp
@@ -389,4 +389,236 @@
 
 
 >>>>>>> Jenkin script for taurus
+
+
+/*! \brief Allocate a set of objects
+ *
+ * \tparam obj
+ * \param n number of object
+ *
+ * \return an object representing an array of objects
+ *
+ */
+/*	template <typename obj> Vcluster_object_array<obj> allocate(size_t n)
+{
+	// Vcluster object array
+	Vcluster_object_array<obj> vo;
+
+	// resize the array
+	vo.resize(n);
+
+	// Create the object on memory and return a Vcluster_object_array
+	return vo;
+}*/
+
+
+/*template<typename T>
+class Vcluster_object_array : public VObject
+{
+	std::vector<T> objects;
+
+public:*/
+
+	/*! \brief Constructor of object array
+	 *
+	 */
+/*	Vcluster_object_array()
+	{
+
+	}*/
+
+	/*! \brief Return the size of the objects array
+	 *
+	 * \return the size of the array
+	 *
+	 */
+/*	size_t size() const
+	{
+		return objects.size();
+	}*/
+
+	/*! \brief Return the element i
+	 *
+	 * \return a reference to the object i
+	 *
+	 */
+
+/*	T & get(unsigned int i)
+	{
+		return objects[i];
+	}*/
+
+	/*! \brief Return the element i
+	 *
+	 * \return a reference to the object i
+	 *
+	 */
+/*	const T & get(unsigned int i) const
+	{
+		return objects[i];
+	}*/
+
+	/*! \brief Check if this Object is an array
+	 *
+	 * \return true, it is an array
+	 *
+	 */
+/*	bool isArray()
+	{
+		return true;
+	}*/
+
+	/*! \brief Destroy the object
+	 *
+	 */
+/*	virtual void destroy()
+	{
+		// Destroy the objects
+		objects.clear();
+	}*/
+
+	/*! \brief Get the size of the memory needed to pack the object
+	 *
+	 * \return the size of the message to pack the object
+	 *
+	 */
+/*	size_t packObjectSize()
+	{
+		size_t message = 0;
+
+		// Destroy each objects
+		for (size_t i = 0 ; i < objects.size() ; i++)
+		{
+			message += objects[i].packObjectSize();
+		}
+
+		return message;
+	}*/
+
+
+	/*! \brief Get the size of the memory needed to pack the object
+	 *
+	 * \param Memory where to write the packed object
+	 *
+	 * \return the size of the message to pack the object
+	 *
+	 */
+/*	size_t packObject(void * mem)
+	{
+		// Pointer is zero
+		size_t ptr = 0;
+		unsigned char * m = (unsigned char *)mem;
+
+		// pack each object
+		for (size_t i = 0 ; i < objects.size() ; i++)
+		{
+			ptr += objects[i].packObject(&m[ptr]);
+		}
+
+#ifdef DEBUG
+		if (ptr != packObjectSize())
+		{
+			std::cerr << "Error " << __FILE__ << " " << __LINE__ << " the pack object size does not match the message" << "\n";
+		}
+#endif
+
+		return ptr;
+	}*/
+
+	/*! \brief Calculate the size to pack an object in the array
+	 *
+	 * \param array object index
+	 *
+	 */
+/*	size_t packObjectInArraySize(size_t i)
+	{
+		return objects[i].packObjectSize();
+	}*/
+
+	/*! \brief pack the object in the array (the message produced can be used to move one)
+	 * object from one processor to another
+	 *
+	 * \param i index of the object to pack
+	 * \param p Memory of the packed object message
+	 *
+	 */
+/*	size_t packObjectInArray(size_t i, void * p)
+	{
+		return objects[i].packObject(p);
+	}*/
+
+	/*! \brief Destroy an object from the array
+	 *
+	 * \param i object to destroy
+	 *
+	 */
+/*	void destroy(size_t i)
+	{
+		objects.erase(objects.begin() + i);
+	}*/
+
+	/*! \brief Return the object j in the array
+	 *
+	 * \param j element j
+	 *
+	 */
+/*	T & operator[](size_t j)
+	{
+		return objects[j];
+	}*/
+
+	/*! \brief Return the object j in the array
+	 *
+	 * \param j element j
+	 *
+	 */
+/*	const T & operator[](size_t j) const
+	{
+		return objects[j];
+	}*/
+
+	/*! \brief Resize the array
+	 *
+	 * \param size
+	 *
+	 */
+/*	void resize(size_t n)
+	{
+		objects.resize(n);
+	}
+};*/
+
+/*! \brief VObject
+ *
+ * Any object produced by the Virtual cluster (MUST) inherit this class
+ *
+ */
+
+/*class VObject
+{
+public:
+
+	// Check if this Object is an array
+	virtual bool isArray() = 0;
+
+	// destroy the object
+	virtual void destroy() = 0;
+
+	// get the size of the memory needed to pack the object
+	virtual size_t packObjectSize() = 0;
+
+	// pack the object
+	virtual size_t packObject(void *) = 0;
+
+	// get the size of the memory needed to pack the object in the array
+	virtual size_t packObjectInArraySize(size_t i) = 0;
+
+	// pack the object in the array (the message produced can be used to move one)
+	// object from one processor to another
+	virtual size_t packObjectInArray(size_t i, void * p) = 0;
+
+	// destroy an element from the array
+	virtual void destroy(size_t n) = 0;
+};*/
+
 #endif /* GARGABE_HPP_ */
diff --git a/src/lib/pdata.cpp b/src/lib/pdata.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..9697504a586b6f656df9a72c555c43f1da5bd5fb
--- /dev/null
+++ b/src/lib/pdata.cpp
@@ -0,0 +1,16 @@
+/*
+ * pdata.cpp
+ *
+ *  Created on: Feb 5, 2016
+ *      Author: Pietro Incardona
+ */
+
+#include "SubdomainGraphNodes.hpp"
+
+const std::string nm_v::attributes::name[] = {"x","migration","computation","global_id","id","sub_id","proc_id","id","fake_v"};
+const std::string nm_e::attributes::name[] = {"communication","srcgid","dstgid"};
+const std::string nm_part_v::attributes::name[] = {"id","sub_id"};
+const std::string nm_part_e::attributes::name[] = {"id"};
+
+
+
diff --git a/src/main.cpp b/src/main.cpp
index a1415740dc263d7084cc82b586331611153ce37d..877907ca777e7ab84b40c58d2e2969e585519a00 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -20,15 +20,18 @@
 #include "util.hpp"
 
 #include "unit_test_init_cleanup.hpp"
+#include "Graph/CartesianGraphFactory_unit_test.hpp"
 #include "Decomposition/CartDecomposition_unit_test.hpp"
 #include "Decomposition/ORB_unit_test.hpp"
-#include "Graph/CartesianGraphFactory_unit_test.hpp"
 #include "Graph/DistCartesianGraphFactory_unit_test.hpp"
-#include "metis_util_unit_test.hpp"
+#include "Decomposition/Distribution/metis_util_unit_test.hpp"
 #include "dec_optimizer_unit_test.hpp"
 #include "Grid/grid_dist_id_unit_test.hpp"
 #include "Vector/vector_dist_unit_test.hpp"
+#include "Decomposition/Distribution/Distribution_unit_tests.hpp"
 //#include "Decomposition/DLB_unit_test.hpp"
 #include "Graph/dist_map_graph_unit_test.hpp"
 #include "Graph/DistGraphFactory.hpp"
 #include "parmetis_dist_util_unit_test.hpp"
+//#include "Decomposition/nn_processor_unit_test.hpp"
+#include "Grid/staggered_grid_dist_unit_test.hpp"
diff --git a/src/test_multiple_o.cpp b/src/test_multiple_o.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..5e45e1d284739bb0f6820b12744ba2de72cd08a2
--- /dev/null
+++ b/src/test_multiple_o.cpp
@@ -0,0 +1,35 @@
+/*
+ * test_multiple_o.cpp
+ *
+ *  Created on: Feb 5, 2016
+ *      Author: i-bird
+ *
+ *
+ *  It just test that the compilation with multiple translation unit (*.o) does not
+ *  produce error, if we have duplicated symbol in the translation unit we will get error
+ *
+ */
+
+#include "Vector/vector_dist.hpp"
+#include "Grid/grid_dist_id.hpp"
+#include "data_type/aggregate.hpp"
+#include "Decomposition/CartDecomposition.hpp"
+
+void f()
+{
+	// Ghost
+	Ghost<3,float> g(0.01);
+
+	size_t bc[3]={PERIODIC,PERIODIC,PERIODIC};
+
+	Box<3,float> domain({0.0,0.0,0.0},{1.0,1.0,1.0});
+	size_t sz[3];
+	sz[0] = 100;
+	sz[1] = 100;
+	sz[2] = 100;
+
+	vector_dist<3,float, aggregate<float>, CartDecomposition<3,float> > vd(4096,domain,bc,g);
+	grid_dist_id<3, float, aggregate<float[3]>, CartDecomposition<3,float>> g_dist(sz,domain,g);
+}
+
+
diff --git a/vtk/CartDecomposition_gen_vtk.cpp b/vtk/CartDecomposition_gen_vtk.cpp
index 1c2f6d5d4e2632ccff6deb9cc2e46ca1e0606cd8..5315d45f4ad42973b967b264aed24ef3cbfd93e7 100644
--- a/vtk/CartDecomposition_gen_vtk.cpp
+++ b/vtk/CartDecomposition_gen_vtk.cpp
@@ -27,8 +27,11 @@ int main(int argc, char ** argv)
 	// Define ghost
 	Ghost<2,float> g(0.01);
 
+	// boundary conditions
+	size_t bc[2] = {PERIODIC,PERIODIC};
+
 	// Decompose and write the decomposed graph
-	dec.setParameters(div,box,g);
+	dec.setParameters(div,box,bc,g);
 
 	// create a ghost border
 	dec.calculateGhostBoxes();
diff --git a/vtk/Makefile.am b/vtk/Makefile.am
index edf8462bcc47ae8dbe26cc23d9dad0c32114f164..efa8de45a1cac44d1c8a7b145ce419f462ee3f13 100644
--- a/vtk/Makefile.am
+++ b/vtk/Makefile.am
@@ -2,18 +2,18 @@
 LINKLIBS = $(METIS_LIB) $(PTHREAD_LIBS) $(OPT_LIBS) $(BOOST_LDFLAGS) $(BOOST_IOSTREAMS_LIB) $(CUDA_LIBS)
 
 noinst_PROGRAMS = cart_dec metis_dec dom_box
-cart_dec_SOURCES = CartDecomposition_gen_vtk.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster.cpp ../openfpm_devices/src/Memleak_check.cpp
-cart_dec_CXXFLAGS = $(CUDA_CFLAGS) $(INCLUDES_PATH) $(METIS_INCLUDE) $(BOOST_CPPFLAGS) -I../src -Wno-unused-function
+cart_dec_SOURCES = CartDecomposition_gen_vtk.cpp ../src/lib/pdata.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster.cpp ../openfpm_devices/src/Memleak_check.cpp
+cart_dec_CXXFLAGS = $(CUDA_CFLAGS) $(INCLUDES_PATH) $(METIS_INCLUDE) $(BOOST_CPPFLAGS) -I../src -Wno-unused-function -Wno-unused-local-typedefs
 cart_dec_CFLAGS = $(CUDA_CFLAGS)
 cart_dec_LDADD = $(LINKLIBS) -lmetis
 
-metis_dec_SOURCES = Metis_gen_vtk.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster.cpp ../openfpm_devices/src/Memleak_check.cpp
-metis_dec_CXXFLAGS = $(CUDA_CFLAGS) $(INCLUDES_PATH) $(METIS_INCLUDE) $(BOOST_CPPFLAGS) -I../src -Wno-unused-function
+metis_dec_SOURCES = Metis_gen_vtk.cpp ../src/lib/pdata.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster.cpp ../openfpm_devices/src/Memleak_check.cpp
+metis_dec_CXXFLAGS = $(CUDA_CFLAGS) $(INCLUDES_PATH) $(METIS_INCLUDE) $(BOOST_CPPFLAGS) -I../src -Wno-unused-function -Wno-unused-local-typedefs
 metis_dec_CFLAGS = $(CUDA_CFLAGS)
 metis_dec_LDADD = $(LINKLIBS) -lmetis
 
-dom_box_SOURCES = domain_gen_vtk.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster.cpp ../openfpm_devices/src/Memleak_check.cpp
-dom_box_CXXFLAGS = $(CUDA_CFLAGS) $(INCLUDES_PATH) $(METIS_INCLUDE) $(BOOST_CPPFLAGS) -I../src -Wno-unused-function
+dom_box_SOURCES = domain_gen_vtk.cpp ../src/lib/pdata.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster.cpp ../openfpm_devices/src/Memleak_check.cpp
+dom_box_CXXFLAGS = $(CUDA_CFLAGS) $(INCLUDES_PATH) $(METIS_INCLUDE) $(BOOST_CPPFLAGS) -I../src -Wno-unused-function -Wno-unused-local-typedefs
 dom_box_CFLAGS = $(CUDA_CFLAGS)
 dom_box_LDADD = $(LINKLIBS)
 
diff --git a/vtk/Metis_gen_vtk.cpp b/vtk/Metis_gen_vtk.cpp
index 08101d9eedf8c294b0df1ccaad399a18f45f6e97..fab84b629b64ed764f6e86a55be4db0f1fa7b96d 100644
--- a/vtk/Metis_gen_vtk.cpp
+++ b/vtk/Metis_gen_vtk.cpp
@@ -11,7 +11,7 @@
 #include <iostream>
 #include "Graph/CartesianGraphFactory.hpp"
 #include "Graph/map_graph.hpp"
-#include "metis_util.hpp"
+#include "Decomposition/Distribution/metis_util.hpp"
 #include "SubdomainGraphNodes.hpp"
 
 int main(int argc, char ** argv)
@@ -24,9 +24,11 @@ int main(int argc, char ** argv)
 	// Box
 	Box<2,float> box({0.0,0.0},{1.0,1.0});
 
+	const size_t bc[] = {NON_PERIODIC,NON_PERIODIC};
+
 	// Graph to decompose
 
-	Graph_CSR<nm_v,nm_e> g = g_factory.construct<nm_e::communication,float,1,0,1>(sz,box);
+	Graph_CSR<nm_v,nm_e> g = g_factory.construct<nm_e::communication,NO_VERTEX_ID,float,1,0,1>(sz,box,bc);
 
 	// Convert the graph to metis
 
@@ -38,7 +40,7 @@ int main(int argc, char ** argv)
 
 	// Write the decomposition
 
-	VTKWriter<Graph_CSR<nm_v,nm_e>,GRAPH> vtk(g);
+	VTKWriter<Graph_CSR<nm_v,nm_e>,VTK_GRAPH> vtk(g);
 	vtk.write("Metis/vtk_partition.vtk");
 }
 
diff --git a/vtk/domain_gen_vtk.cpp b/vtk/domain_gen_vtk.cpp
index 8d4bbcd9f8e8ddc5d987ef54cd7272e2d5b52655..70a4a27275999a855560ba3f008463a64fa979a7 100644
--- a/vtk/domain_gen_vtk.cpp
+++ b/vtk/domain_gen_vtk.cpp
@@ -8,7 +8,7 @@
 #include <iostream>
 #include "Space/Shape/Box.hpp"
 #include "Vector/map_vector.hpp"
-#include "VTKWriter.hpp"
+#include "VTKWriter/VTKWriter.hpp"
 
 int main(int argc, char ** argv)
 {