diff --git a/CHANGELOG.md b/CHANGELOG.md
index cc7d1d3242ce0f89a6bb3373c2593d9a77763734..8f9b375ab449f0a5523d6989877a9cd8d490f012 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,17 +1,34 @@
 # Change Log
 All notable changes to this project will be documented in this file.
 
-## [2.0.0] December 2018
+## [2.0.0] December 2018 (Codename Elisa)
 
 ### Added
 
-- Adding GPU support (see example 3_molecular_dynamic_gpu)
+- Adding GPU support (see example 1_gpu_first_step
+				  3_molecular_dynamic_gpu 
+                                  7_sph_dlb_gpu 
+                                  7_sph_dlb_gpu_opt)
+
+### Fixed 
+
+- Detection of clang 10.0.0 on mac-osx mojave
+- In VTK binary format all 64 bit types are casted to 32 bit. Either the long/unsigned_long are bugged in Paraview we tested, either I do not understand how they work.
 
 ### Changed
 
 - The type Vcluster now is templated and the standard Vcluster is Vcluster<>
+           Most probably you have to change in your code from Vcluster to Vcluster<>
+
+## [1.1.1] December 2018 (Codename Poisson)
+
+### Fixed 
+
+- Detection of clang 10.0.0 on mac-osx mojave
+
+## [1.0.X ] End of life (Theese versions are not enymore supported)
 
-## [1.1.0] February 2018
+## [1.1.0] February 2018 (Condename Ring)
 
 ### Added
 
diff --git a/Jenkinsfile_numerics_mpi b/Jenkinsfile_numerics_mpi
index c730e4622975a28abea65fe713695870472dca87..c32ec2c1f0d461b9d60fae86c4eb96273211480a 100644
--- a/Jenkinsfile_numerics_mpi
+++ b/Jenkinsfile_numerics_mpi
@@ -70,12 +70,8 @@ parallel (
 
                     stage ('run mac')
                     {
-                      parallel (
-                      "1" : {sh "cd openfpm_numerics && ./run.sh $WORKSPACE $NODE_NAME 1 0 0 numerics"},
-                      "2" : {sh "cd openfpm_numerics && ./run.sh $WORKSPACE $NODE_NAME 2 0 0 numerics"},
-                      "3" : {sh "cd openfpm_numerics && ./run.sh $WORKSPACE $NODE_NAME 3 0 0 numerics"},
-                      "4" : {sh "cd openfpm_numerics && ./run.sh $WORKSPACE $NODE_NAME 4 0 0 numerics"}
-                      )
+                      sh "cd openfpm_numerics && ./run.sh $WORKSPACE $NODE_NAME 1 0 0 numerics"
+                      sh "cd openfpm_numerics && ./run.sh $WORKSPACE $NODE_NAME 2 0 0 numerics"
                       sh "./success.sh 2 sbalzarini-mac-15 openfpm_numerics"
                     }
                   }
diff --git a/example/Vector/0_simple/main.cpp b/example/Vector/0_simple/main.cpp
index b96ee352117245a5c74134acacd04740de6f90a1..f1270d56353f475f54603f8fa0886157c87c0ac2 100644
--- a/example/Vector/0_simple/main.cpp
+++ b/example/Vector/0_simple/main.cpp
@@ -4,8 +4,10 @@
  * \subpage Vector_1_celllist
  * \subpage Vector_1_ghost_get
  * \subpage Vector_1_HDF5
+ * \subpage Vector_1_gpu_first_step
  * \subpage Vector_2_expression
  * \subpage Vector_3_md
+ * \subpage Vector_3_md_dyn_gpu
  * \subpage Vector_4_reo_root
  * \subpage Vector_4_cp
  * \subpage Vector_4_mp_cl
@@ -14,6 +16,7 @@
  * \subpage Vector_6_complex_usage
  * \subpage Vector_7_sph_dlb
  * \subpage Vector_7_sph_dlb_opt
+ * \subpage Vector_7_sph_dlb_gpu_opt
  * \subpage Vector_8_DEM
  *
  */
diff --git a/example/Vector/1_HDF5_save_load/main.cpp b/example/Vector/1_HDF5_save_load/main.cpp
index 1f01e4bc402ff8d4d73b79c013967c2f634bdb80..71606b30bb5f9dac7fdc68f3f40bad16035b08a2 100644
--- a/example/Vector/1_HDF5_save_load/main.cpp
+++ b/example/Vector/1_HDF5_save_load/main.cpp
@@ -1,5 +1,5 @@
 /*!
- * \page Vector_1_HDF5 HDF5 save and load
+ * \page Vector_1_HDF5 Vector 1 HDF5 save and load
  *
  *
  * [TOC]
diff --git a/example/Vector/7_SPH_dlb_gpu/main.cu b/example/Vector/7_SPH_dlb_gpu/main.cu
index 70c49aa52f98fc08741c77b523aa79ae67b65ad6..4d0a58f751cc78aee628ffc74576005a04bcdf38 100644
--- a/example/Vector/7_SPH_dlb_gpu/main.cu
+++ b/example/Vector/7_SPH_dlb_gpu/main.cu
@@ -1,5 +1,4 @@
-/*!
- * \page Vector_7_sph_dlb_gpu Vector 7 SPH Dam break simulation with Dynamic load balacing on GPU
+/*! \page Vector_7_sph_dlb_gpu Vector 7 SPH Dam break simulation with Dynamic load balacing on Multi-GPU
  *
  *
  * [TOC]
@@ -8,50 +7,26 @@
  * # SPH with Dynamic load Balancing on GPU # {#SPH_dlb_gpu}
  *
  *
- * This example show the classical SPH Dam break simulation with Load Balancing and Dynamic load balancing. With
- * Load balancing and Dynamic load balancing we indicate the possibility of the system to re-adapt the domain
- * decomposition to keep all the processor load and reduce idle time.
+ * This example show the classical SPH Dam break simulation with load balancing and dynamic load balancing. The main difference with
+ * \ref{SPH_dlb} is that here we use GPU and 1.2 Millions particles.
  *
  * \htmlonly
  * <a href="#" onclick="hide_show('vector-video-3')" >Simulation video 1</a><br>
  * <div style="display:none" id="vector-video-3">
- * <video id="vid3" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_speed.mp4" type="video/mp4"></video>
+ * <video id="vid3" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_gpu1.mp4" type="video/mp4"></video>
  * </div>
  * <a href="#" onclick="hide_show('vector-video-4')" >Simulation video 2</a><br>
  * <div style="display:none" id="vector-video-4">
- * <video id="vid4" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_speed2.mp4" type="video/mp4"></video>
+ * <video id="vid4" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_gpu2.mp4" type="video/mp4"></video>
  * </div>
- * <a href="#" onclick="hide_show('vector-video-15')" >Simulation dynamic load balancing video 1</a><br>
+ * <a href="#" onclick="hide_show('vector-video-15')" >Simulation video 3</a><br>
  * <div style="display:none" id="vector-video-15">
- * <video id="vid15" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_dlb.mp4" type="video/mp4"></video>
- * </div>
- * <a href="#" onclick="hide_show('vector-video-16')" >Simulation dynamic load balancing video 2</a><br>
- * <div style="display:none" id="vector-video-16">
- * <video id="vid16" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_dlb2.mp4" type="video/mp4"></video>
- * </div>
- * <a href="#" onclick="hide_show('vector-video-17')" >Simulation countour prospective 1</a><br>
- * <div style="display:none" id="vector-video-17">
- * <video id="vid17" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_zoom.mp4" type="video/mp4"></video>
- * </div>
- * <a href="#" onclick="hide_show('vector-video-18')" >Simulation countour prospective 2</a><br>
- * <div style="display:none" id="vector-video-18">
- * <video id="vid18" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_back.mp4" type="video/mp4"></video>
- * </div>
- * <a href="#" onclick="hide_show('vector-video-19')" >Simulation countour prospective 3</a><br>
- * <div style="display:none" id="vector-video-19">
- * <video id="vid19" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_all.mp4" type="video/mp4"></video>
+ * <video id="vid15" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_gpu3.mp4" type="video/mp4"></video>
  * </div>
  * \endhtmlonly
  *
- * \htmlonly
- * <img src="http://ppmcore.mpi-cbg.de/web/images/examples/7_SPH_dlb/dam_break_all.jpg"/>
- * \endhtmlonly
- *
- * ## GPU ## {#e7_sph_inclusion}
- *
- * This example does not differ from the example in \ref{SPH_dlb}
  *
- * \snippet Vector/7_SPH_dlb_gpu/main.cpp inclusion
+ * \snippet Vector/7_SPH_dlb_gpu_opt/main.cpp inclusion
  *
  */
 
@@ -550,7 +525,7 @@ __global__ void verlet_int_gpu(vector_dist_type vd, real_number dt, real_number
 
     // Check if the particle go out of range in space and in density
     if (vd.getPos(a)[0] <  0.000263878 || vd.getPos(a)[1] < 0.000263878 || vd.getPos(a)[2] < 0.000263878 ||
-        vd.getPos(a)[0] >  0.000263878+1.59947 || vd.getPos(a)[1] > 0.000263878+0.672972 || vd.getPos(a)[2] > 0.000263878+0.903944 ||
+        vd.getPos(a)[0] >  0.000263878+1.59947 || vd.getPos(a)[1] > 0.000263878+0.672972 || vd.getPos(a)[2] > 0.50 ||
 		vd.template getProp<rho>(a) < RhoMin || vd.template getProp<rho>(a) > RhoMax)
     {vd.template getProp<red>(a) = 1;}
     else
@@ -629,7 +604,7 @@ __global__ void euler_int_gpu(vector_type vd,real_number dt, real_number dt205)
 
     // Check if the particle go out of range in space and in density
     if (vd.getPos(a)[0] <  0.000263878 || vd.getPos(a)[1] < 0.000263878 || vd.getPos(a)[2] < 0.000263878 ||
-        vd.getPos(a)[0] >  0.000263878+1.59947 || vd.getPos(a)[1] > 0.000263878+0.672972 || vd.getPos(a)[2] > 0.000263878+0.903944 ||
+        vd.getPos(a)[0] >  0.000263878+1.59947 || vd.getPos(a)[1] > 0.000263878+0.672972 || vd.getPos(a)[2] > 0.50 ||
 		vd.template getProp<rho>(a) < RhoMin || vd.template getProp<rho>(a) > RhoMax)
     {vd.template getProp<red>(a) = 1;}
     else
diff --git a/example/Vector/7_SPH_dlb_gpu_opt/main.cu b/example/Vector/7_SPH_dlb_gpu_opt/main.cu
index 95bb5db2bbdf3c8052350acbd639a424caab64ff..b0177e4612e5b1ee5f98fe17c8acbe0cfbd54ea6 100644
--- a/example/Vector/7_SPH_dlb_gpu_opt/main.cu
+++ b/example/Vector/7_SPH_dlb_gpu_opt/main.cu
@@ -1,57 +1,37 @@
-/*!
- * \page Vector_7_sph_dlb_gpu Vector 7 SPH Dam break simulation with Dynamic load balacing on GPU
+/*! \page Vector_7_sph_dlb_gpu_opt Vector 7 SPH Dam break simulation with Dynamic load balacing on Multi-GPU (optimized version)
  *
  *
  * [TOC]
  *
  *
- * # SPH with Dynamic load Balancing on GPU # {#SPH_dlb_gpu}
+ * # SPH with Dynamic load Balancing on GPU # {#SPH_dlb_gpu_opt}
  *
  *
- * This example show the classical SPH Dam break simulation with Load Balancing and Dynamic load balancing. With
- * Load balancing and Dynamic load balancing we indicate the possibility of the system to re-adapt the domain
- * decomposition to keep all the processor load and reduce idle time.
+ * This example show the classical SPH Dam break simulation with load balancing and dynamic load balancing. The main difference with
+ * \ref{SPH_dlb} is that here we use GPU and 1.2 Millions particles. Simulate 1.5 second should be duable on a 1050Ti within a couple
+ * of hours.
  *
  * \htmlonly
  * <a href="#" onclick="hide_show('vector-video-3')" >Simulation video 1</a><br>
  * <div style="display:none" id="vector-video-3">
- * <video id="vid3" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_speed.mp4" type="video/mp4"></video>
+ * <video id="vid3" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_gpu1.mp4" type="video/mp4"></video>
  * </div>
  * <a href="#" onclick="hide_show('vector-video-4')" >Simulation video 2</a><br>
  * <div style="display:none" id="vector-video-4">
- * <video id="vid4" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_speed2.mp4" type="video/mp4"></video>
+ * <video id="vid4" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_gpu2.mp4" type="video/mp4"></video>
  * </div>
- * <a href="#" onclick="hide_show('vector-video-15')" >Simulation dynamic load balancing video 1</a><br>
+ * <a href="#" onclick="hide_show('vector-video-15')" >Simulation video 3</a><br>
  * <div style="display:none" id="vector-video-15">
- * <video id="vid15" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_dlb.mp4" type="video/mp4"></video>
- * </div>
- * <a href="#" onclick="hide_show('vector-video-16')" >Simulation dynamic load balancing video 2</a><br>
- * <div style="display:none" id="vector-video-16">
- * <video id="vid16" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_dlb2.mp4" type="video/mp4"></video>
- * </div>
- * <a href="#" onclick="hide_show('vector-video-17')" >Simulation countour prospective 1</a><br>
- * <div style="display:none" id="vector-video-17">
- * <video id="vid17" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_zoom.mp4" type="video/mp4"></video>
- * </div>
- * <a href="#" onclick="hide_show('vector-video-18')" >Simulation countour prospective 2</a><br>
- * <div style="display:none" id="vector-video-18">
- * <video id="vid18" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_back.mp4" type="video/mp4"></video>
- * </div>
- * <a href="#" onclick="hide_show('vector-video-19')" >Simulation countour prospective 3</a><br>
- * <div style="display:none" id="vector-video-19">
- * <video id="vid19" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_all.mp4" type="video/mp4"></video>
+ * <video id="vid15" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_gpu3.mp4" type="video/mp4"></video>
  * </div>
  * \endhtmlonly
  *
- * \htmlonly
- * <img src="http://ppmcore.mpi-cbg.de/web/images/examples/7_SPH_dlb/dam_break_all.jpg"/>
- * \endhtmlonly
  *
  * ## GPU ## {#e7_sph_inclusion}
  *
- * This example does not differ from the example in \ref{SPH_dlb}
+ * This example is the port on GPU of the following example \ref{SPH_dlb}
  *
- * \snippet Vector/7_SPH_dlb_gpu/main.cpp inclusion
+ * \snippet Vector/7_SPH_dlb_gpu_opt/main.cpp inclusion
  *
  */
 
@@ -163,7 +143,7 @@ const int red = 8;
 const int red2 = 9;
 
 // Type of the vector containing particles
-typedef vector_dist_gpu<3,real_number,aggregate<size_t,real_number,  real_number,    real_number,     real_number,     real_number[3], real_number[3], real_number[3], real_number, real_number>> particles;
+typedef vector_dist_gpu<3,real_number,aggregate<unsigned int,real_number,  real_number,    real_number,     real_number,     real_number[3], real_number[3], real_number[3], real_number, real_number>> particles;
 //                                              |          |             |               |                |                |               |               |               |            |
 //                                              |          |             |               |                |                |               |               |               |            |
 //                                             type      density       density        Pressure          delta            force          velocity        velocity        reduction     another
@@ -509,8 +489,8 @@ __global__ void verlet_int_gpu(vector_dist_type vd, real_number dt, real_number
 	vd.template getProp<rho>(a) = vd.template getProp<rho_prev>(a) + dt2*vd.template getProp<drho>(a);
 
     // Check if the particle go out of range in space and in density
-    if (vd.getPos(a)[0] <  0.000263878 || vd.getPos(a)[1] < 0.000263878 || vd.getPos(a)[2] < 0.000263878 ||
-        vd.getPos(a)[0] >  0.000263878+1.59947 || vd.getPos(a)[1] > 0.000263878+0.672972 || vd.getPos(a)[2] > 0.000263878+0.903944 ||
+    if (vd.getPos(a)[0] <  0.0 || vd.getPos(a)[1] < 0.0 || vd.getPos(a)[2] < 0.0 ||
+        vd.getPos(a)[0] >  1.61 || vd.getPos(a)[1] > 0.68 || vd.getPos(a)[2] > 0.50 ||
 		vd.template getProp<rho>(a) < RhoMin || vd.template getProp<rho>(a) > RhoMax)
     {vd.template getProp<red>(a) = 1;}
     else
@@ -588,8 +568,8 @@ __global__ void euler_int_gpu(vector_type vd,real_number dt, real_number dt205)
    	vd.template getProp<rho>(a) = vd.template getProp<rho>(a) + dt*vd.template getProp<drho>(a);
 
     // Check if the particle go out of range in space and in density
-    if (vd.getPos(a)[0] <  0.000263878 || vd.getPos(a)[1] < 0.000263878 || vd.getPos(a)[2] < 0.000263878 ||
-        vd.getPos(a)[0] >  0.000263878+1.59947 || vd.getPos(a)[1] > 0.000263878+0.672972 || vd.getPos(a)[2] > 0.000263878+0.903944 ||
+    if (vd.getPos(a)[0] <  0.0 || vd.getPos(a)[1] < 0.0 || vd.getPos(a)[2] < 0.0 ||
+        vd.getPos(a)[0] >  1.61 || vd.getPos(a)[1] > 0.68 || vd.getPos(a)[2] > 0.50 ||
 		vd.template getProp<rho>(a) < RhoMin || vd.template getProp<rho>(a) > RhoMax)
     {vd.template getProp<red>(a) = 1;}
     else
@@ -718,8 +698,8 @@ int main(int argc, char* argv[])
 	probes.add({0.754,0.31,0.02});
 
 	// Here we define our domain a 2D box with internals from 0 to 1.0 for x and y
-	Box<3,real_number> domain({-0.05,-0.05,-0.05},{1.7010,0.7065,0.5025});
-	size_t sz[3] = {413,179,131};
+	Box<3,real_number> domain({-0.05,-0.05,-0.05},{1.7010,0.7065,0.511});
+	size_t sz[3] = {413,179,133};
 
 	// Fill W_dap
 	W_dap = 1.0/Wab(H/1.5);
@@ -939,7 +919,7 @@ int main(int argc, char* argv[])
 			vd.deviceToHostPos();
 			vd.deviceToHostProp<type,rho,rho_prev,Pressure,drho,force,velocity,velocity_prev,red,red2>();
 
-			vd.write_frame("Geometry",write);
+			vd.write_frame("Geometry",write,VTK_WRITER | FORMAT_BINARY);
 			write++;
 
 			if (v_cl.getProcessUnitID() == 0)
diff --git a/install b/install
index 2cb62bd9e016043d766c6821c0b9742def07e595..2d3eb3ccd88d5f77347bda1a1adb9753dc170d2d 100755
--- a/install
+++ b/install
@@ -411,7 +411,7 @@ fi
 
 ### Create example.mk
 install_base=$(cat install_dir)
-openmp_flags="$(cat openmp_flags) $(cat openfpm_flags)"
+openmp_flags="$(cat openmp_flags)"
 
 if [ -d "$i_dir/HDF5/lib" ]; then
   hdf5_lib=$i_dir/HDF5/lib
@@ -419,7 +419,7 @@ elif [ -d "$i_dir/HDF5/lib64" ]; then
   hdf5_lib=$i_dir/HDF5/lib64
 fi
 
-echo "INCLUDE_PATH= $openmp_flags  -I. -I$install_base/openfpm_numerics/include -I$install_base/openfpm_pdata/include/config -I$install_base/openfpm_pdata/include -I$install_base/openfpm_data/include -I$install_base/openfpm_vcluster/include -I$install_base/openfpm_io/include -I$install_base/openfpm_devices/include -I$i_dir/METIS/include -I$i_dir/PARMETIS/include -I$i_dir/BOOST/include -I$i_dir/HDF5/include -I$i_dir/LIBHILBERT/include  $lin_alg_inc" > example.mk
+echo "INCLUDE_PATH=-Wno-deprecated-declarations $openmp_flags  -I. -I$install_base/openfpm_numerics/include -I$install_base/openfpm_pdata/include/config -I$install_base/openfpm_pdata/include -I$install_base/openfpm_data/include -I$install_base/openfpm_vcluster/include -I$install_base/openfpm_io/include -I$install_base/openfpm_devices/include -I$i_dir/METIS/include -I$i_dir/PARMETIS/include -I$i_dir/BOOST/include -I$i_dir/HDF5/include -I$i_dir/LIBHILBERT/include  $lin_alg_inc" > example.mk
 echo "LIBS_PATH= $openmp_flags -L$install_base/openfpm_devices/lib -L$install_base/openfpm_pdata/lib  -L$install_base/openfpm_vcluster/lib -L$i_dir/METIS/lib -L$i_dir/PARMETIS/lib  -L$i_dir/BOOST/lib -L$hdf5_lib -L$i_dir/LIBHILBERT/lib  $lin_alg_dir" >> example.mk
 if [ x"$gpu_support" == x"1" ]; then
     echo "LIBS=-lvcluster -lofpm_pdata -lofpmmemory -lparmetis -lmetis -lboost_iostreams -lhdf5 -llibhilbert $(cat cuda_lib) $lin_alg_lib" >> example.mk
@@ -428,7 +428,7 @@ else
     echo "LIBS=-lvcluster -lofpm_pdata -lofpmmemory -lparmetis -lmetis -lboost_iostreams -lhdf5 -llibhilbert  $lin_alg_lib" >> example.mk
     echo "LIBS_SE2=-lvcluster -lofpmmemory_se2 -lparmetis -lmetis -lboost_iostreams -lhdf5 -llibhilbert $lin_alg_lib" >> example.mk
 fi
-echo "INCLUDE_PATH_NVCC=$(cat openmp_flags) -Xcudafe \"--display_error_number --diag_suppress=2885 --diag_suppress=2887  --diag_suppress=2888 --diag_suppress=186 --diag_suppress=111\" --expt-extended-lambda -I. -I$install_base/openfpm_numerics/include -I$install_base/openfpm_pdata/include/config -I$install_base/openfpm_pdata/include -I$install_base/openfpm_data/include -I$install_base/openfpm_vcluster/include -I$install_base/openfpm_io/include -I$install_base/openfpm_devices/include -I$i_dir/METIS/include -I$i_dir/PARMETIS/include -I$i_dir/BOOST/include -I$i_dir/HDF5/include -I$i_dir/LIBHILBERT/include  $lin_alg_inc" >> example.mk
+echo "INCLUDE_PATH_NVCC=-Xcompiler="-Wno-deprecated-declarations" $(cat openmp_flags) -Xcudafe \"--display_error_number --diag_suppress=2885  --diag_suppress=2886 --diag_suppress=2887  --diag_suppress=2888 --diag_suppress=186 --diag_suppress=111\" --expt-extended-lambda -I. -I$install_base/openfpm_numerics/include -I$install_base/openfpm_pdata/include/config -I$install_base/openfpm_pdata/include -I$install_base/openfpm_data/include -I$install_base/openfpm_vcluster/include -I$install_base/openfpm_io/include -I$install_base/openfpm_devices/include -I$i_dir/METIS/include -I$i_dir/PARMETIS/include -I$i_dir/BOOST/include -I$i_dir/HDF5/include -I$i_dir/LIBHILBERT/include  $lin_alg_inc" >> example.mk
 cp example.mk src/example.mk
 cp example.mk example/example.mk
 
diff --git a/openfpm_pdata.doc b/openfpm_pdata.doc
index e356c1f644c459d47927b845e8cb1a8d06b43103..0c4e8ff295dd62a02fb6a5039839908f1de63d93 100644
--- a/openfpm_pdata.doc
+++ b/openfpm_pdata.doc
@@ -38,7 +38,7 @@ PROJECT_NAME           = "OpenFPM_pdata"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = 1.1.0
+PROJECT_NUMBER         = 2.0.0
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
@@ -763,7 +763,8 @@ INPUT_ENCODING         = UTF-8
 # *.md, *.mm, *.dox, *.py, *.f90, *.f, *.for, *.tcl, *.vhd, *.vhdl, *.ucf,
 # *.qsf, *.as and *.js.
 
-FILE_PATTERNS          = *.hpp *.cpp *.h
+FILE_PATTERNS          = *.cu *.cuh *.hpp *.cpp *.h
+EXTENSION_MAPPING = cu=c++ cuh=c++
 
 # The RECURSIVE tag can be used to specify whether or not subdirectories should
 # be searched for input files as well.
@@ -1927,6 +1928,7 @@ INCLUDE_PATH           =
 
 INCLUDE_FILE_PATTERNS  =
 
+
 # The PREDEFINED tag can be used to specify one or more macro names that are
 # defined before the preprocessor is started (similar to the -D option of e.g.
 # gcc). The argument of the tag is a list of macros of the form: name or
diff --git a/script/install_Parmetis.sh b/script/install_Parmetis.sh
index 91b8ec0d8ecf02e65cd083ef3d3cd0cde0d31d5a..e155f7608b5b798cb418231b4717faba1939e54b 100755
--- a/script/install_Parmetis.sh
+++ b/script/install_Parmetis.sh
@@ -12,6 +12,7 @@ fi
 
 ## Remove old download
 rm -rf parmetis-4.0.3
+rm parmetis-4.0.3.tar.gz
 
 wget http://glaros.dtc.umn.edu/gkhome/fetch/sw/parmetis/parmetis-4.0.3.tar.gz
 tar -xf parmetis-4.0.3.tar.gz
diff --git a/src/Decomposition/Domain_NN_calculator_cart.hpp b/src/Decomposition/Domain_NN_calculator_cart.hpp
index 3f2c144e16b5048768a6ff47e3230b5f50947568..d18180432cf165cea8ecc9e4d23924b8610a563c 100644
--- a/src/Decomposition/Domain_NN_calculator_cart.hpp
+++ b/src/Decomposition/Domain_NN_calculator_cart.hpp
@@ -217,8 +217,8 @@ class domain_nn_calculator_cart
 		anom_lin.clear();
 		for (size_t i = 0 ; i < anom.size() ; i++)
 		{
-			anom_lin.add();
 			grid_key_dx<dim> tmp = anom.get(i).subsub + shift;
+			anom_lin.add();
 			anom_lin.last().subsub = gs.LinId(tmp);
 
 			long int self_cell = -1;
@@ -281,7 +281,7 @@ public:
 			dom_cells_lin.clear();
 			for (size_t i = 0 ; i < dom_cells.size() ; i++)
 			{
-				grid_key_dx<dim> tmp = dom_cells.get(i) + shift;
+				grid_key_dx<dim> tmp =  dom_cells.get(i) + shift;
 				dom_cells_lin.add(gs.LinId(tmp));
 			}
 
diff --git a/src/Vector/cuda/vector_dist_comm_util_funcs.cuh b/src/Vector/cuda/vector_dist_comm_util_funcs.cuh
index 841232874a804d72517edad36013ca6fd6d8aea6..3b6aba61774ce98cec1084beac71c3aafc904ed8 100644
--- a/src/Vector/cuda/vector_dist_comm_util_funcs.cuh
+++ b/src/Vector/cuda/vector_dist_comm_util_funcs.cuh
@@ -277,7 +277,7 @@ struct local_ghost_from_dec_impl<dim,St,prop,Memory,layout_base,true>
 		<<<ite.wthr,ite.thr>>>
 		(box_f_dev.toKernel(),box_f_sv.toKernel(),
 		 v_pos.toKernel(),v_prp.toKernel(),
-		 starts.toKernel(),shifts.toKernel(),o_part_loc.toKernel(),old);
+		 starts.toKernel(),shifts.toKernel(),o_part_loc.toKernel(),old,g_m);
 
 #else
 		std::cout << __FILE__ << ":" << __LINE__ << " error: to use the option RUN_ON_DEVICE you must compile with NVCC" << std::endl;
diff --git a/src/Vector/cuda/vector_dist_cuda_func_test.cu b/src/Vector/cuda/vector_dist_cuda_func_test.cu
index 3219e05cfa45c424e2e818a167a4d33c1a54d6bc..d78b4a294d7d3f6b7f59421893a2e4db9ecd213d 100644
--- a/src/Vector/cuda/vector_dist_cuda_func_test.cu
+++ b/src/Vector/cuda/vector_dist_cuda_func_test.cu
@@ -166,7 +166,7 @@ BOOST_AUTO_TEST_CASE( vector_ghost_process_local_particles )
 	<<<ite.wthr,ite.thr>>>
 	(box_f_dev.toKernel(),box_f_sv.toKernel(),
 	 v_pos.toKernel(),v_prp.toKernel(),
-	 starts.toKernel(),shifts.toKernel(),o_part_loc2.toKernel(),old);
+	 starts.toKernel(),shifts.toKernel(),o_part_loc2.toKernel(),old,v_pos.size());
 
 	v_pos.deviceToHost<0>();
 	o_part_loc2.deviceToHost<0,1>();
diff --git a/src/Vector/cuda/vector_dist_cuda_funcs.cuh b/src/Vector/cuda/vector_dist_cuda_funcs.cuh
index 74b42c0a827161523e21a0a89d10cc3d5f383890..76a52e4805e194cab4b6143ac262598397c278ae 100644
--- a/src/Vector/cuda/vector_dist_cuda_funcs.cuh
+++ b/src/Vector/cuda/vector_dist_cuda_funcs.cuh
@@ -222,12 +222,12 @@ template<unsigned int dim, typename St,
 __global__ void shift_ghost_each_part(vector_of_box box_f, vector_of_shifts box_f_sv,
 		                              vector_type_pos v_pos, vector_type_prp v_prp,
 		                              start_type start, shifts_type shifts,
-		                              output_type output, unsigned int offset)
+		                              output_type output, unsigned int offset,unsigned int g_m)
 {
 	unsigned int old_shift = (unsigned int)-1;
 	int p = threadIdx.x + blockIdx.x * blockDim.x;
 
-    if (p >= v_pos.size()) return;
+    if (p >= g_m) return;
 
     Point<dim,St> xp = v_pos.template get<0>(p);
 
@@ -251,15 +251,8 @@ __global__ void shift_ghost_each_part(vector_of_box box_f, vector_of_shifts box_
     			v_pos.template get<0>(base+n)[j] = xp.get(j) - shifts.template get<0>(shift_actual)[j];
     		}
 
-    		if (base_o + n < output.size())
-    		{
-    			output.template get<0>(base_o+n) = p;
-    			output.template get<1>(base_o+n) = shift_actual;
-    		}
-    		else
-    		{
-    			printf("OVERFLOW \n");
-    		}
+    		output.template get<0>(base_o+n) = p;
+    		output.template get<1>(base_o+n) = shift_actual;
 
     		v_prp.set(base+n,v_prp.get(p));
 
@@ -290,7 +283,7 @@ struct _max_: mgpu::maximum_t<red_type>
 {};
 
 template<unsigned int prp, template <typename> class op, typename vector_type>
-auto reduce(vector_type & vd) -> typename std::remove_reference<decltype(vd.template getProp<prp>(0))>::type
+auto reduce_local(vector_type & vd) -> typename std::remove_reference<decltype(vd.template getProp<prp>(0))>::type
 {
 	typedef typename std::remove_reference<decltype(vd.template getProp<prp>(0))>::type reduce_type;
 
diff --git a/src/Vector/cuda/vector_dist_gpu_unit_tests.cu b/src/Vector/cuda/vector_dist_gpu_unit_tests.cu
index 2be62fa2334642f29e0eb0ea08cb2e3f1ea70cab..8bb80a1e34c1968498fbfa79d7db29c97e3774a9 100644
--- a/src/Vector/cuda/vector_dist_gpu_unit_tests.cu
+++ b/src/Vector/cuda/vector_dist_gpu_unit_tests.cu
@@ -736,20 +736,20 @@ BOOST_AUTO_TEST_CASE(vector_dist_reduce)
 
 	vd.template hostToDeviceProp<0,1,2,3>();
 
-	float redf = reduce<0,_add_>(vd);
-	double redd = reduce<1,_add_>(vd);
-	int redi = reduce<2,_add_>(vd);
-	size_t reds = reduce<3,_add_>(vd);
+	float redf = reduce_local<0,_add_>(vd);
+	double redd = reduce_local<1,_add_>(vd);
+	int redi = reduce_local<2,_add_>(vd);
+	size_t reds = reduce_local<3,_add_>(vd);
 
 	BOOST_REQUIRE_EQUAL(redf,(vd.size_local()+1.0)*(vd.size_local())/2.0);
 	BOOST_REQUIRE_EQUAL(redd,(vd.size_local()+1.0)*(vd.size_local())/2.0);
 	BOOST_REQUIRE_EQUAL(redi,(vd.size_local()+1)*(vd.size_local())/2);
 	BOOST_REQUIRE_EQUAL(reds,(vd.size_local()+1)*(vd.size_local())/2);
 
-	float redf2 = reduce<0,_max_>(vd);
-	double redd2 = reduce<1,_max_>(vd);
-	int redi2 = reduce<2,_max_>(vd);
-	size_t reds2 = reduce<3,_max_>(vd);
+	float redf2 = reduce_local<0,_max_>(vd);
+	double redd2 = reduce_local<1,_max_>(vd);
+	int redi2 = reduce_local<2,_max_>(vd);
+	size_t reds2 = reduce_local<3,_max_>(vd);
 
 	BOOST_REQUIRE_EQUAL(redf2,vd.size_local());
 	BOOST_REQUIRE_EQUAL(redd2,vd.size_local());
diff --git a/src/Vector/vector_dist_comm.hpp b/src/Vector/vector_dist_comm.hpp
index 3f5a9c464175ff5cde6c9dab6cd3fa8604da9054..45fa45e77f4373024c6dcb06641cf284368211fd 100644
--- a/src/Vector/vector_dist_comm.hpp
+++ b/src/Vector/vector_dist_comm.hpp
@@ -1567,6 +1567,12 @@ public:
 	 */
 	template<unsigned int ... prp> void map_list_(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp, size_t & g_m, size_t opt)
 	{
+		if (opt & RUN_ON_DEVICE)
+		{
+			std::cout << "Error: " << __FILE__ << ":" << __LINE__ << " map_list is unsupported on device (coming soon)" << std::endl;
+			return;
+		}
+
 		typedef KillParticle obp;
 
 		// Processor communication size