diff --git a/example/Vector/3_molecular_dynamic_gpu_opt/main_gpu.cu b/example/Vector/3_molecular_dynamic_gpu_opt/main_gpu.cu
index 8d20134e5772b9ff99171c246059cf54d45988fe..791963be4adc60d137f91cee0f4697655a0acf62 100644
--- a/example/Vector/3_molecular_dynamic_gpu_opt/main_gpu.cu
+++ b/example/Vector/3_molecular_dynamic_gpu_opt/main_gpu.cu
@@ -262,7 +262,7 @@ template<typename CellList> void calc_forces(vector_dist_gpu<3,real_number, aggr
 
 	//! \cond [calc_force_sorted] \endcond
 
-	CUDA_LAUNCH(calc_force_gpu,it2.wthr,vd.toKernel_sorted(),NN.toKernel(),sigma12,sigma6,r_cut2);
+	CUDA_LAUNCH(calc_force_gpu,it2,vd.toKernel_sorted(),NN.toKernel(),sigma12,sigma6,r_cut2);
 
 	//! \cond [calc_force_sorted] \endcond
 
diff --git a/example/Vector/7_SPH_dlb_gpu_more_opt/main.cu b/example/Vector/7_SPH_dlb_gpu_more_opt/main.cu
index 7e7eab9d99f8c2b10a2aa04e124e098e5cbe0891..bb4757617af0646f583365875fea074239a3f32a 100644
--- a/example/Vector/7_SPH_dlb_gpu_more_opt/main.cu
+++ b/example/Vector/7_SPH_dlb_gpu_more_opt/main.cu
@@ -62,7 +62,6 @@
 #include "Vector/vector_dist.hpp"
 #include <math.h>
 #include "Draw/DrawParticles.hpp"
-#include <cuda_profiler_api.h>
 
 
 
@@ -324,16 +323,16 @@ __global__ void calc_forces_fluid_gpu(particles_type vd, fluid_ids_type fids, NN
 	Point<3,real_number> xa = vd.getPos(a);
 
 	// Type of the particle
-	unsigned int typea = vd.getProp<type>(a);
+	unsigned int typea = vd.template getProp<type>(a);
 
 	// Get the density of the of the particle a
-	real_number rhoa = vd.getProp<rho>(a);
+	real_number rhoa = vd.template getProp<rho>(a);
 
 	// Get the pressure of the particle a
-	real_number Pa = vd.getProp<Pressure>(a);
+	real_number Pa = vd.template getProp<Pressure>(a);
 
 	// Get the Velocity of the particle a
-	Point<3,real_number> va = vd.getProp<velocity>(a);
+	Point<3,real_number> va = vd.template getProp<velocity>(a);
 
 	Point<3,real_number> force_;
 	force_.get(0) = 0.0f;
@@ -356,12 +355,12 @@ __global__ void calc_forces_fluid_gpu(particles_type vd, fluid_ids_type fids, NN
 		// if (p == q) skip this particle this condition should be done in the r^2 = 0
 		//if (a == b)	{++Np; continue;};
 
-        unsigned int typeb = vd.getProp<type>(b);
+        unsigned int typeb = vd.template getProp<type>(b);
 
         real_number massb = (typeb == FLUID)?MassFluid:MassBound;
-        Point<3,real_number> vb = vd.getProp<velocity>(b);
-        real_number Pb = vd.getProp<Pressure>(b);
-        real_number rhob = vd.getProp<rho>(b);
+        Point<3,real_number> vb = vd.template getProp<velocity>(b);
+        real_number Pb = vd.template getProp<Pressure>(b);
+        real_number rhob = vd.template getProp<rho>(b);
 
 		// Get the distance between p and q
 		Point<3,real_number> dr = xa - xb;
@@ -395,7 +394,7 @@ __global__ void calc_forces_fluid_gpu(particles_type vd, fluid_ids_type fids, NN
 		++Np;
 	}
 
-	vd.getProp<red>(a) = max_visc;
+	vd.template getProp<red>(a) = max_visc;
 
 	vd.template getProp<force>(a)[0] = force_.get(0);
 	vd.template getProp<force>(a)[1] = force_.get(1);
@@ -417,10 +416,10 @@ __global__ void calc_forces_border_gpu(particles_type vd, fluid_ids_type fbord,
 	Point<3,real_number> xa = vd.getPos(a);
 
 	// Type of the particle
-	unsigned int typea = vd.getProp<type>(a);
+	unsigned int typea = vd.template getProp<type>(a);
 
 	// Get the Velocity of the particle a
-	Point<3,real_number> va = vd.getProp<velocity>(a);
+	Point<3,real_number> va = vd.template getProp<velocity>(a);
 
 	real_number drho_ = 0.0f;
 
@@ -439,10 +438,10 @@ __global__ void calc_forces_border_gpu(particles_type vd, fluid_ids_type fbord,
 		// if (p == q) skip this particle this condition should be done in the r^2 = 0
 		//if (a == b)	{++Np; continue;};
 
-        unsigned int typeb = vd.getProp<type>(b);
+        unsigned int typeb = vd.template getProp<type>(b);
 
         real_number massb = (typeb == FLUID)?MassFluid:MassBound;
-        Point<3,real_number> vb = vd.getProp<velocity>(b);
+        Point<3,real_number> vb = vd.template getProp<velocity>(b);
 
 		// Get the distance between p and q
 		Point<3,real_number> dr = xa - xb;
@@ -467,7 +466,7 @@ __global__ void calc_forces_border_gpu(particles_type vd, fluid_ids_type fbord,
 		++Np;
 	}
 
-	vd.getProp<red>(a) = max_visc;
+	vd.template getProp<red>(a) = max_visc;
 
 	vd.template getProp<drho>(a) = drho_;
 }
@@ -519,11 +518,11 @@ __global__ void max_acceleration_and_velocity_gpu(vector_type vd)
 {
 	auto a = GET_PARTICLE(vd);
 
-	Point<3,real_number> acc(vd.getProp<force>(a));
-	vd.getProp<red>(a) = norm(acc);
+	Point<3,real_number> acc(vd.template getProp<force>(a));
+	vd.template getProp<red>(a) = norm(acc);
 
-	Point<3,real_number> vel(vd.getProp<velocity>(a));
-	vd.getProp<red2>(a) = norm(vel);
+	Point<3,real_number> vel(vd.template getProp<velocity>(a));
+	vd.template getProp<red2>(a) = norm(vel);
 }
 
 void max_acceleration_and_velocity(particles & vd, real_number & max_acc, real_number & max_vel)
@@ -638,7 +637,7 @@ void verlet_int(particles & vd, real_number dt)
 	real_number dt205 = dt*dt*0.5;
 	real_number dt2 = dt*2.0;
 
-	verlet_int_gpu<<<part.wthr,part.thr>>>(vd.toKernel(),dt,dt2,dt205);
+	CUDA_LAUNCH(verlet_int_gpu,part,vd.toKernel(),dt,dt2,dt205);
 
 	// remove the particles marked
 	remove_marked<red>(vd);
@@ -790,7 +789,7 @@ inline void sensor_pressure(Vector & vd,
         // if the probe is inside the processor domain
 		if (vd.getDecomposition().isLocal(probes.get(i)) == true)
 		{
-			sensor_pressure_gpu<<<1,1>>>(vd.toKernel_sorted(),NN.toKernel(),probes.get(i),(real_number *)press_tmp_.toKernel());
+			CUDA_LAUNCH_DIM3(sensor_pressure_gpu,1,1,vd.toKernel_sorted(),NN.toKernel(),probes.get(i),(real_number *)press_tmp_.toKernel());
 
 			vd.merge<Pressure>(NN);
 
@@ -818,7 +817,9 @@ int main(int argc, char* argv[])
 	openfpm::vector_gpu<aggregate<int>> fluid_ids;
 	openfpm::vector_gpu<aggregate<int>> border_ids;
 
+#ifdef CUDIFY_USE_CUDA
 	cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
+#endif
 
 	// It contain for each time-step the value detected by the probes
 	openfpm::vector<openfpm::vector<real_number>> press_t;
diff --git a/example/Vector/9_gpu_cuda_interop/main.cu b/example/Vector/9_gpu_cuda_interop/main.cu
index a9cc33b72a72bc6e7bbe95e02e484df06e955a32..b8e3a0b009a586cdea5c1174295d5e3516a94834 100644
--- a/example/Vector/9_gpu_cuda_interop/main.cu
+++ b/example/Vector/9_gpu_cuda_interop/main.cu
@@ -229,7 +229,7 @@ int main(int argc, char* argv[])
 
 	vd.template hostToDeviceProp<0,1,2>();
 
-	print_data_particle_50<<<100,1>>>((float *)vd.getPropVector().template getDeviceBuffer<0>(),
+	CUDA_LAUNCH_DIM3(print_data_particle_50,100,1,(float *)vd.getPropVector().template getDeviceBuffer<0>(),
 			               (float *)vd.getPropVector().template getDeviceBuffer<1>(),
 			               (float *)vd.getPropVector().template getDeviceBuffer<2>(),
 			               vd.getPropVector().capacity());
diff --git a/install b/install
index 6f83ff2674450869af5d8a26e7eef3b97be30ad4..91591d5e927a9c31e288544bcf1c4fc82e843a19 100755
--- a/install
+++ b/install
@@ -90,6 +90,16 @@ while getopts di:v:smghc:nul FLAG; do
 	esac
 done
 
+
+if [ x"$gpu_support" == x"0" ]; then
+  if [[ $configure_options == *"--with-cuda-on-backend=CUDA"* ]]; then
+    gpu_support=1
+  fi
+else
+  configure_options=" $configure_options --with-cuda-on-backend=CUDA"
+fi
+
+
 ### Configure options
 
 i_dir_old=$i_dir
@@ -298,10 +308,6 @@ configure_options=" $configure_options --with-parmetis=$i_dir/PARMETIS "
 ./script/install_Metis.sh $i_dir $CC $CXX $ncore
 configure_options=" $configure_options --with-metis=$i_dir/METIS "
 
-if [ x"$gpu_support" == x"1" ]; then
-	configure_options=" $configure_options --with-cuda-on-backend=CUDA"
-fi
-
 MPI_installed=0
 MPI_System_prv=1
 METIS_installed=1