diff --git a/example/Vector/0_simple/main.cpp b/example/Vector/0_simple/main.cpp
index aea942c9ff33014ec4f9502acc96fd8b62183ac8..cb4059b2b9e83058adb008c589b98370743db236 100644
--- a/example/Vector/0_simple/main.cpp
+++ b/example/Vector/0_simple/main.cpp
@@ -19,6 +19,7 @@
  * \subpage Vector_7_sph_dlb_opt
  * \subpage Vector_7_sph_dlb_gpu
  * \subpage Vector_7_sph_dlb_gpu_opt
+ * \subpage Vector_7_sph_dlb_gpu_more_opt
  * \subpage Vector_8_DEM
  * \subpage Vector_9_gpu_cuda_interop
  *
diff --git a/example/Vector/7_SPH_dlb_gpu_more_opt/main.cu b/example/Vector/7_SPH_dlb_gpu_more_opt/main.cu
index 99f011623aaa8a3cceefb5677aa62a661bdf6e19..2d823d7f4178b32fdf5aa3e3e74896ea73074fb3 100644
--- a/example/Vector/7_SPH_dlb_gpu_more_opt/main.cu
+++ b/example/Vector/7_SPH_dlb_gpu_more_opt/main.cu
@@ -1,15 +1,17 @@
-/*! \page Vector_7_sph_dlb_gpu_opt Vector 7 SPH Dam break simulation with Dynamic load balacing on Multi-GPU (optimized version)
+/*! \page Vector_7_sph_dlb_gpu_more_opt Vector 7 SPH Dam break simulation with Dynamic load balacing on Multi-GPU (more optimized version)
  *
  *
  * [TOC]
  *
  *
- * # SPH with Dynamic load Balancing on GPU (Optimized) # {#SPH_dlb_gpu_opt}
+ * # SPH with Dynamic load Balancing on GPU (More Optimized) # {#SPH_dlb_gpu_more_opt}
  *
  *
  * This example show the classical SPH Dam break simulation with load balancing and dynamic load balancing. The main difference with
- * \ref{SPH_dlb} is that here we use GPU and 1.2 Millions particles. Simulate 1.5 second should be duable on a 1050Ti within a couple
- * of hours.
+ * \ref{SPH_dlb_gpu_opt} is that here we use 2 kernel to calculate forces one for fluid and one for boundaries. Also we use the function
+ * get_indexes_by_type to get the indexes of the fluid and boundary particles and use these two set to launch two distinct kernel
+ * (one over fluid and one over boundary) to calculate forces and density change. set. Simulate 1.5 second should be duable on mobile
+ *  1050Ti in about 1 hour and 7 minutes
  *
  * \htmlonly
  * <a href="#" onclick="hide_show('vector-video-3')" >Simulation video 1</a><br>
@@ -27,10 +29,19 @@
  * \endhtmlonly
  *
  *
- * ## GPU ## {#e7_sph_inclusion}
+ * ## get_indexes_by_type ## {#e7_sph_more_opt_gibt}
  *
- * This example is an optimization of the example \ref SPH_dlb_gpu all the optimization operated on this example has been explained
- * here \ref e3_md_gpu_opt so we will not go into the details
+ * This function can be used to get the indexes of a certain type on a particle set and save such indexes in an openfpm::vector<aggregate<unsigned int>>
+ * the constructed set of indices can be used to run a kernel on a specific set of particles.
+ *
+ * \snippet Vector/7_SPH_dlb_gpu_more_opt/main.cu get indexes by type
+ *
+ * the function get_indexes_by_type has three arguments the first is the vector of the properties of the particles. In
+ * this case because we use the sorted particles to calculate forces, so we have to get the indexes for the sorted
+ * particles with vd.getPropVectorSort(). In case we want to use the non sorted we use vd.getPropVector(). The second
+ * argument is the output containing the indexes of the particles types we want to get. Because the vector can contain
+ * ghost particles and real particles setting with the third argument we indicate we want only real particles and no ghost particles
+ * The last argument is the GPU context handle
  *
  * we report the full code here
  *
@@ -480,19 +491,22 @@ template<typename CellList> inline void calc_forces(particles & vd, CellList & N
 	// Update the cell-list
 	vd.updateCellList<type,rho,Pressure,velocity>(NN);
 
+	//! \cond [get indexes by type] \endcond
+
 	// get the particles fluid ids
 	get_indexes_by_type<type,type_is_fluid>(vd.getPropVectorSort(),fluid_ids,vd.size_local(),vd.getVC().getmgpuContext());
 
 	// get the particles fluid ids
 	get_indexes_by_type<type,type_is_border>(vd.getPropVectorSort(),border_ids,vd.size_local(),vd.getVC().getmgpuContext());
 
-
 	auto part = fluid_ids.getGPUIterator(96);
 	CUDA_LAUNCH(calc_forces_fluid_gpu,part,vd.toKernel_sorted(),fluid_ids.toKernel(),NN.toKernel(),W_dap,cbar);
 
 	part = border_ids.getGPUIterator(96);
 	CUDA_LAUNCH(calc_forces_border_gpu,part,vd.toKernel_sorted(),border_ids.toKernel(),NN.toKernel(),W_dap,cbar);
 
+	//! \cond [get indexes by type] \endcond
+
 	vd.merge_sort<force,drho,red>(NN);
 
 	max_visc = reduce_local<red,_max_>(vd);
diff --git a/script/discover_package_manager b/script/discover_package_manager
index f8a9ebfd39b58867a4ebfe4a768bcb4365e0ae7c..f0002597ccf14fc91076a1a4ef50604e23d9b814 100755
--- a/script/discover_package_manager
+++ b/script/discover_package_manager
@@ -15,16 +15,6 @@ if [ x"$1" == x"osx" ]; then
             discover_package_ret="sudo brew"
             echo 'Package Manager: '"$discover_package_manager_ret"
             return
-        else
-            if [ ! -w $brew_idir ]; then
-                echo -e "\033[43;30;1;5mWARNING: \033[0m $brew_idir is not writtable, brew require that $brew_idir is writtable and $brew_idir/bin is in your PATH, otherwise it will be not possible to install with brew"
-                commands[0]="sudo chown -R $USER $brew_idir && chmod -R u+w $brew_idir"
-                possible_solutions "${commands[@]}"
-            fi
-        fi
-        if [ ! -w  ]; then
-            echo -e "\033[43,33;5mWARNING: \033[0m $brew_idir is not writtable, brew require that $brew_idir is writtable and $brew_idir/bin is in your PATH, otherwise it will be not possible to install with brew"
-            sleep 10
         fi
         echo echo 'Package Manager: '"$discover_package_manager_ret"
         return
diff --git a/src/Grid/grid_dist_id_comm.hpp b/src/Grid/grid_dist_id_comm.hpp
index c3a061d3f6f84c62daed9334927bbb18eb73f463..2a11a37972a2507c3418370018e102d28b92811f 100644
--- a/src/Grid/grid_dist_id_comm.hpp
+++ b/src/Grid/grid_dist_id_comm.hpp
@@ -137,7 +137,7 @@ template<unsigned int dim, typename St, typename T, typename Decomposition = Car
 class grid_dist_id_comm
 {
 	//! VCluster
-	Vcluster<> & v_cl;
+	Vcluster<Memory> & v_cl;
 
 	//! Maps the processor id with the communication request into map procedure
 	openfpm::vector<size_t> p_map_req;
@@ -1108,14 +1108,14 @@ public:
 		queue_recv_data_get<prp_object>(eg_box,prp_recv,prRecv_prp);
 
 		for (size_t i = 0 ; i < loc_grid.size() ; i++)
-		{loc_grid.get(i).removeCopyReset();}
+		{loc_grid.get(i).removeAddUnpackReset();}
 
 		ghost_get_local<prp...>(loc_ig_box,loc_eg_box,gdb_ext,loc_grid,g_id_to_external_ghost_box,ginfo,use_bx_def);
 
 		merge_received_data_get<prp ...>(loc_grid,eg_box,prp_recv,prRecv_prp,g_id_to_external_ghost_box,eb_gid_list,opt);
 
 		for (size_t i = 0 ; i < loc_grid.size() ; i++)
-		{loc_grid.get(i).removeCopyFinalize(v_cl.getmgpuContext());}
+		{loc_grid.get(i).template removeAddUnpackFinalize<prp ...>(v_cl.getmgpuContext());}
 	}
 
 	/*! \brief It merge the information in the ghost with the
@@ -1249,7 +1249,7 @@ public:
 	 *
 	 */
 	grid_dist_id_comm()
-	:v_cl(create_vcluster())
+	:v_cl(create_vcluster<Memory>())
 	{
 
 	}
diff --git a/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu b/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu
index 3ef536e5b9438b34bfcd161d915d78bde45f5224..eb1ecd75206570df67df8af9c2e87327ebaaf9f9 100644
--- a/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu
+++ b/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu
@@ -220,9 +220,12 @@ BOOST_AUTO_TEST_CASE( sgrid_gpu_test_ghost_get )
 	gdist.template flush<smax_<0>>(flush_type::FLUSH_ON_DEVICE);
 
 	gdist.template deviceToHost<0>();
-//	gdist.write("broken");
+	gdist.write("before_ghost");
 
-//	gdist.template ghost_get<0>(RUN_ON_DEVICE);
+	gdist.template ghost_get<0>(RUN_ON_DEVICE);
+
+	gdist.template deviceToHost<0>();
+	gdist.write("after_ghost");
 }