diff --git a/src/Grid/grid_base_implementation.hpp b/src/Grid/grid_base_implementation.hpp
index 8409c6bd688e7466f1c5fcaa6976f20b2d113557..799e81fa0b578be96f528248660d2071a7d38bbb 100644
--- a/src/Grid/grid_base_implementation.hpp
+++ b/src/Grid/grid_base_implementation.hpp
@@ -752,7 +752,7 @@ public:
 		{
 			auto key = it.get();
 
-			if (this->get_o(key) != this->get_o(key))
+			if (this->get_o(key) != g.get_o(key))
 				return false;
 
 			++it;
diff --git a/src/Grid/grid_key.hpp b/src/Grid/grid_key.hpp
index 106f7ee5f4835ebc97d91e9c4697810cbace22fe..90fab130df3b04587ad44f6bbd62a96ef1d5960d 100644
--- a/src/Grid/grid_key.hpp
+++ b/src/Grid/grid_key.hpp
@@ -397,7 +397,7 @@ public:
 	 *
 	 */
 	template<typename a, typename ...T>
-	inline void set(a v, T...t)
+	__device__ __host__ inline void set(a v, T...t)
 	{
 #ifdef SE_CLASS1
 		if (sizeof...(t) != dim -1)
diff --git a/src/Grid/grid_sm.hpp b/src/Grid/grid_sm.hpp
index ed96216e14832599a3f2a1a1cddc1871c9679ced..fa611b06a6c5fe61e001d56bfffbc71137f1a2ef 100644
--- a/src/Grid/grid_sm.hpp
+++ b/src/Grid/grid_sm.hpp
@@ -186,7 +186,7 @@ class grid_sm
 	 *
 	 */
 
-	inline void Initialize(const size_t sz)
+	__device__ __host__ inline void Initialize(const size_t sz)
 	{
 		//! Initialize the basic structure for each dimension
 		sz_s[0] = sz;
@@ -217,7 +217,7 @@ class grid_sm
 	 *
 	 */
 
-	inline void Initialize(const size_t (& sz)[N])
+	__device__ __host__ inline void Initialize(const size_t (& sz)[N])
 	{
 		//! Initialize the basic structure for each dimension
 		sz_s[0] = sz[0];
@@ -353,7 +353,8 @@ public:
 	 *
 	 */
 
-	template<typename S> inline grid_sm(const grid_sm<N,S> & g)
+	template<typename S>
+	__device__ __host__ inline grid_sm(const grid_sm<N,S> & g)
 	{
 		size_tot = g.size_tot;
 
@@ -380,7 +381,7 @@ public:
 
 	// Static element to calculate total size
 
-	inline size_t totalSize(const size_t (& sz)[N])
+	__device__ __host__ inline size_t totalSize(const size_t (& sz)[N])
 	{
 		size_t tSz = 1;
 
@@ -415,7 +416,7 @@ public:
 	 *
 	 */
 
-	inline grid_sm(const size_t (& sz)[N])
+	__device__ __host__ inline grid_sm(const size_t (& sz)[N])
 	: size_tot(totalSize(sz))
 	{
 		Initialize(sz);
@@ -625,7 +626,7 @@ public:
 	}
 
 	//! Destructor
-	~grid_sm() {};
+	__device__ __host__ ~grid_sm() {};
 
 	/*! \brief Return the size of the grid
 	 *
@@ -713,7 +714,7 @@ public:
 	 *
 	 */
 
-	inline size_t size_s(unsigned int i) const
+	__device__ __host__ inline size_t size_s(unsigned int i) const
 	{
 		return sz_s[i];
 	}
@@ -737,11 +738,21 @@ public:
 	 * \return get the size of the grid as an array
 	 *
 	 */
-	inline const size_t (& getSize() const)[N]
+	__device__ __host__ inline const size_t (& getSize() const)[N]
 	{
 		return sz;
 	}
 
+	/*! \brief Returns the size of the grid in the passed array
+	 *
+	 * \return the size of the grid in the passed array
+	 *
+	 */
+	__device__ __host__ inline void getSize(size_t (&size) [N]) const
+	{
+		for (size_t i = 0; i < N; ++i) size[i] = sz[i];
+	}
+
 	/*! \brief Return a sub-grid iterator
 	 *
 	 * Return a sub-grid iterator, to iterate through the grid
diff --git a/src/NN/CellList/cuda/CellDecomposer_gpu_ker.cuh b/src/NN/CellList/cuda/CellDecomposer_gpu_ker.cuh
index fc108d88969ee087844aa85a389dd66527a1847e..3981ce07658ac506f4c771afccb78736a1da76ff 100644
--- a/src/NN/CellList/cuda/CellDecomposer_gpu_ker.cuh
+++ b/src/NN/CellList/cuda/CellDecomposer_gpu_ker.cuh
@@ -29,30 +29,71 @@ class CellDecomposer_gpu_ker
 	//! transformation
 	transform t;
 
+	//! Unit box of the Cell list
+	SpaceBox<dim,T> box_unit;
+
+	//! Grid structure of the Cell list
+	grid_sm<dim,void> gr_cell;
+
+	//! cell_shift
+	Point<dim,long int> cell_shift;
 public:
 
 	__device__ __host__ CellDecomposer_gpu_ker()
 	{}
 
-	__device__ __host__ CellDecomposer_gpu_ker(openfpm::array<T,dim,cnt_type> & spacing_c,
-	         	 	 	  openfpm::array<ids_type,dim,cnt_type> & div_c,
-	         	 	 	  openfpm::array<ids_type,dim,cnt_type> & off,
-	         	 	 	  const transform & t)
-	:spacing_c(spacing_c),div_c(div_c),off(off),t(t)
+	__device__ __host__ CellDecomposer_gpu_ker(
+		openfpm::array<T,dim,cnt_type> & spacing_c,
+		openfpm::array<ids_type,dim,cnt_type> & div_c,
+		openfpm::array<ids_type,dim,cnt_type> & off,
+		const transform & t)
+	: spacing_c(spacing_c),div_c(div_c),off(off),t(t)
 	{}
 
-	__host__ grid_sm<dim,void> getGrid()
+	__device__ __host__ CellDecomposer_gpu_ker(
+		openfpm::array<T,dim,cnt_type> & spacing_c,
+		openfpm::array<ids_type,dim,cnt_type> & div_c,
+		openfpm::array<ids_type,dim,cnt_type> & off,
+		const transform & t,
+		SpaceBox<dim,T> box_unit,
+		grid_sm<dim,void> gr_cell,
+		Point<dim,long int> cell_shift)
+	: spacing_c(spacing_c),div_c(div_c),off(off),t(t),
+	box_unit(box_unit), gr_cell(gr_cell), cell_shift(cell_shift)
+	{}
+
+	__device__ __host__ grid_sm<dim,void> getGrid()
 	{
 		size_t sz[dim];
 
 		for (size_t i = 0 ; i < dim ; i++)
 		{
-			sz[i] = div_c[i] + 2*off[i];
+			sz[i] = div_c[i];
 		}
 
 		return grid_sm<dim,void> (sz);
 	}
 
+	__device__ __host__ void getGridSize(size_t (& sz)[dim]) const
+	{
+		for (size_t i = 0 ; i < dim ; i++)
+		{
+			sz[i] = div_c[i] + 2*off[i];
+		}
+	}
+
+	template<typename ids_type2>
+	__device__ __host__ mem_id getGridLinId(const grid_key_dx<dim,ids_type2> & gk) const
+	{
+		mem_id lid = gk.get(0);
+		for (mem_id i = 1 ; i < dim ; i++)
+		{
+			lid += gk.get(i) * (div_c[i-1] + 2*off[i-1]);
+		}
+
+		return lid;
+	}
+
 	__device__ __host__ inline grid_key_dx<dim,ids_type> getCell(const Point<dim,T> & xp) const
 	{
 		return cid_<dim,cnt_type,ids_type,transform>::get_cid_key(spacing_c,off,t,xp);
@@ -82,6 +123,46 @@ public:
 	{
 		return t;
 	}
+
+	__device__ __host__ inline grid_key_dx<dim> getCellGrid(const T (& pos)[dim]) const
+	{
+		grid_key_dx<dim> key;
+		key.set_d(0,ConvertToID(pos,0));
+
+		for (size_t s = 1 ; s < dim ; s++)
+		{
+			key.set_d(s,ConvertToID(pos,s));
+		}
+
+		return key;
+	}
+
+	__device__ __host__ inline grid_key_dx<dim> getCellGrid(const Point<dim,T> & pos) const
+	{
+		grid_key_dx<dim> key;
+		key.set_d(0,ConvertToID(pos,0));
+
+		for (size_t s = 1 ; s < dim ; s++)
+		{
+			key.set_d(s,ConvertToID(pos,s));
+		}
+
+		return key;
+	}
+
+	__device__ __host__ inline size_t ConvertToID(const T (&x)[dim] ,size_t s) const
+	{
+		size_t id = openfpm::math::size_t_floor(t.transform(x,s) / box_unit.getHigh(s)) + off[s];
+		id = (id >= gr_cell.size(s))?(gr_cell.size(s)-1-cell_shift.get(s)):id-cell_shift.get(s);
+		return id;
+	}
+
+	__device__ __host__ inline size_t ConvertToID(const Point<dim,T> & x ,size_t s, size_t sc = 0) const
+	{
+		size_t id = openfpm::math::size_t_floor(t.transform(x,s) / box_unit.getHigh(s)) + off[s];
+		id = (id >= gr_cell.size(s))?(gr_cell.size(s)-1-cell_shift.get(s)):id-cell_shift.get(s);
+		return id;
+	}
 };
 
 #endif /* CELLDECOMPOSER_GPU_KER_HPP_ */
diff --git a/src/NN/CellList/cuda/CellList_gpu.hpp b/src/NN/CellList/cuda/CellList_gpu.hpp
index c76235835f37b501dd4ddebabd8363cdb7926c64..defee388cdefe38e20acf4e8a7397863c6ff2b70 100644
--- a/src/NN/CellList/cuda/CellList_gpu.hpp
+++ b/src/NN/CellList/cuda/CellList_gpu.hpp
@@ -44,7 +44,10 @@ struct CellList_gpu_ker_selector
 																			 openfpm::array<ids_type,dim,cnt_type> & div_c,
 																			 openfpm::array<ids_type,dim,cnt_type> & off,
 																			 const transform & t,
-																			 unsigned int g_m)
+																			 unsigned int g_m,
+																			 const SpaceBox<dim,T>& box_unit,
+																			 const grid_sm<dim,void>& gr_cell,
+																			 const Point<dim,long int>& cell_shift)
 	{
 		return CellList_gpu_ker<dim,T,cnt_type,ids_type,transform,is_sparse>(starts.toKernel(),
 																			sorted_to_not_sorted.toKernel(),
@@ -54,7 +57,10 @@ struct CellList_gpu_ker_selector
 																			div_c,
 																			off,
 																			t,
-																			g_m);
+																			g_m,
+																			box_unit,
+																			gr_cell,
+																			cell_shift);
 	}
 };
 
@@ -73,10 +79,14 @@ struct CellList_gpu_ker_selector<dim,T,cnt_type,ids_type,Memory,transform,vector
 			 vector_cnt_type & dprt,
 			 openfpm::vector<aggregate<int>,Memory,memory_traits_inte> & nnc_rad,
 			 openfpm::array<T,dim,cnt_type> & spacing_c,
-	         openfpm::array<ids_type,dim,cnt_type> & div_c,
-	         openfpm::array<ids_type,dim,cnt_type> & off,
-	         const transform & t,
-	         unsigned int g_m)
+			 openfpm::array<ids_type,dim,cnt_type> & div_c,
+			 openfpm::array<ids_type,dim,cnt_type> & off,
+			 const transform & t,
+			 unsigned int g_m,
+			 const SpaceBox<dim,T>& box_unit,
+			 const grid_sm<dim,void>& gr_cell,
+			 const Point<dim,long int>& cell_shift)
+
 	{
 		return CellList_gpu_ker<dim,T,cnt_type,ids_type,transform,true>(cell_nn.toKernel(),
 																		cell_nn_list.toKernel(),
@@ -86,7 +96,11 @@ struct CellList_gpu_ker_selector<dim,T,cnt_type,ids_type,Memory,transform,vector
 																		spacing_c,
 																		div_c,
 																		off,
-																		t,g_m);
+																		t,
+																		g_m,
+																		box_unit,
+																		gr_cell,
+																		cell_shift);
 	}
 };
 
@@ -648,14 +662,17 @@ public:
 				cells_nn,
 				cells_nn_list,
 				cl_sparse,
-		    	sorted_to_not_sorted,
-		    	sorted_domain_particles_ids,
-		    	nnc_rad,
-		        spacing_c,
-		        div_c,
-		        off,
-		        this->getTransform(),
-		        g_m);
+				sorted_to_not_sorted,
+				sorted_domain_particles_ids,
+				nnc_rad,
+				spacing_c,
+				div_c,
+				off,
+				this->getTransform(),
+				g_m,
+				this->box_unit,
+				this->gr_cell,
+				this->cell_shift);
 	}
 
 	/*! \brief Clear the structure
diff --git a/src/NN/CellList/cuda/CellList_gpu_ker.cuh b/src/NN/CellList/cuda/CellList_gpu_ker.cuh
index 004716a7dd1872a5fef8c8adb0876ed5ae1f1808..becb50c5e3f7c12a5e04921fd3971218e0df11fc 100644
--- a/src/NN/CellList/cuda/CellList_gpu_ker.cuh
+++ b/src/NN/CellList/cuda/CellList_gpu_ker.cuh
@@ -449,11 +449,14 @@ public:
 					 openfpm::vector_gpu_ker<aggregate<cnt_type>,memory_traits_inte> dprt,
 					 openfpm::vector_gpu_ker<aggregate<int>,memory_traits_inte> rad_cells,
 					 openfpm::array<T,dim,cnt_type> & spacing_c,
-			         openfpm::array<ids_type,dim,cnt_type> & div_c,
-			         openfpm::array<ids_type,dim,cnt_type> & off,
-			         const transform & t,
-			         unsigned int g_m)
-	:CellDecomposer_gpu_ker<dim,T,cnt_type,ids_type,transform>(spacing_c,div_c,off,t),
+					 openfpm::array<ids_type,dim,cnt_type> & div_c,
+					 openfpm::array<ids_type,dim,cnt_type> & off,
+					 const transform & t,
+					 unsigned int g_m,
+					 SpaceBox<dim,T> box_unit,
+					 grid_sm<dim,void> gr_cell,
+					 Point<dim,long int> cell_shift)
+	:CellDecomposer_gpu_ker<dim,T,cnt_type,ids_type,transform>(spacing_c,div_c,off,t,box_unit,gr_cell,cell_shift),
 	 starts(starts),srt(srt),dprt(dprt),rad_cells(rad_cells),g_m(g_m)
 	{
 	}
@@ -616,12 +619,16 @@ public:
 					 openfpm::vector_gpu_ker<aggregate<cnt_type>,memory_traits_inte> srt,
 					 openfpm::vector_gpu_ker<aggregate<cnt_type>,memory_traits_inte> dprt,
 					 openfpm::array<T,dim,cnt_type> & spacing_c,
-			         openfpm::array<ids_type,dim,cnt_type> & div_c,
-			         openfpm::array<ids_type,dim,cnt_type> & off,
-			         const transform & t,
-			         unsigned int g_m)
-	:CellDecomposer_gpu_ker<dim,T,cnt_type,ids_type,transform>(spacing_c,div_c,off,t),cell_nn(cell_nn),cell_nn_list(cell_nn_list),srt(srt),dprt(dprt),
-	 cl_sparse(cl_sparse),g_m(g_m)
+					 openfpm::array<ids_type,dim,cnt_type> & div_c,
+					 openfpm::array<ids_type,dim,cnt_type> & off,
+					 const transform & t,
+					 unsigned int g_m,
+					 SpaceBox<dim,T> box_unit,
+					 grid_sm<dim,void> gr_cell,
+					 Point<dim,long int> cell_shift)
+
+	:CellDecomposer_gpu_ker<dim,T,cnt_type,ids_type,transform>(spacing_c,div_c,off,t,box_unit,gr_cell,cell_shift),
+	cell_nn(cell_nn),cell_nn_list(cell_nn_list),srt(srt),dprt(dprt),cl_sparse(cl_sparse),g_m(g_m)
 	{
 	}
 
diff --git a/src/Space/Shape/Point.hpp b/src/Space/Shape/Point.hpp
index 6391bb54123ddd76ecf97cce676f9c0d3a7c492a..1fb4a801bed16802869e22a9815259f86a21300c 100644
--- a/src/Space/Shape/Point.hpp
+++ b/src/Space/Shape/Point.hpp
@@ -416,7 +416,7 @@ template<unsigned int dim ,typename T> class Point
 	 * \return the reference
 	 *
 	 */
-	T & value(size_t i)
+	__device__ __host__ T & value(size_t i)
 	{
 		return get(i);
 	}
diff --git a/src/Vector/cuda/map_vector_std_cuda.hpp b/src/Vector/cuda/map_vector_std_cuda.hpp
index 319add7ba4037341e89664ff30ab578c0dbe1bec..d384a51f598adfab38d575a7d0884de46ab0bdfa 100644
--- a/src/Vector/cuda/map_vector_std_cuda.hpp
+++ b/src/Vector/cuda/map_vector_std_cuda.hpp
@@ -277,8 +277,11 @@ public:
 	 *
 	 */
 	vector(const std::initializer_list<T> & v)
-	:base(v)
+	// :base(v)
 	{
+		// causes error if simply passed to base
+		for (T t: v)
+			add(t);
 	}
 
 	//! Constructor from another vector
@@ -288,6 +291,16 @@ public:
 		base.swap(v.base);
 	}
 
+	//! Constructor from iterators
+	template< class InputIt >
+	vector(InputIt first, InputIt last)
+	{
+		while(first != last) {
+			add(*first);
+			++first;
+		}
+	}
+
 	//! destructor
 	~vector() noexcept
 	{
diff --git a/src/Vector/cuda/map_vector_std_cuda_ker.cuh b/src/Vector/cuda/map_vector_std_cuda_ker.cuh
index cb1b054f1b15e1271ef8cd78ad1e05acc1970b79..ea58382657d70965a7de5f2a18e0362be019fd03 100644
--- a/src/Vector/cuda/map_vector_std_cuda_ker.cuh
+++ b/src/Vector/cuda/map_vector_std_cuda_ker.cuh
@@ -126,6 +126,27 @@ public:
 		return base.template get<0>(id);
 	}
 
+	/*! \brief Return the pointer to the chunk of memory
+	 *
+	 * \return the pointer to the chunk of memory
+	 *
+	 */
+	__device__ __host__ void * getPointer()
+	{
+		return &base.template get<0>(0);
+	}
+
+	/*! \brief Return the pointer to the chunk of memory
+	 *
+	 * \return the pointer to the chunk of memory
+	 *
+	 */
+	__device__ __host__ const void * getPointer() const
+	{
+		return &base.template get<0>(0);
+	}
+
+
 	vector_custd_ker()
 	{}
 
diff --git a/src/Vector/map_vector_std.hpp b/src/Vector/map_vector_std.hpp
index 2d4a875fcec92d3d7870efc5a6cea211d0d9be7f..c600e49d425fa74584990b6a7152a9486f4ee03c 100644
--- a/src/Vector/map_vector_std.hpp
+++ b/src/Vector/map_vector_std.hpp
@@ -644,6 +644,16 @@ public:
 		base.swap(v.base);
 	}
 
+	//! Constructor from iterators
+	template< class InputIt >
+	vector(InputIt first, InputIt last)
+	{
+		while(first != last) {
+			add(*first);
+			++first;
+		}
+	}
+
 	//! destructor
 	~vector() noexcept
 	{
diff --git a/src/util/mathutil.hpp b/src/util/mathutil.hpp
index 1b05785a90aeac2155b2bae58745656c1a23daf4..e813f30b29097a444903dc365d19acbef57f811b 100644
--- a/src/util/mathutil.hpp
+++ b/src/util/mathutil.hpp
@@ -19,7 +19,7 @@ namespace openfpm
 		 *
 		 */
 		template <typename T>
-		int sgn(T val)
+		__host__ __device__ int sgn(T val)
 		{
 			return (T(0) < val) - (val < T(0));
 		}
@@ -35,7 +35,8 @@ namespace openfpm
 		 * \return the factorial
 		 *
 		 */
-		static inline size_t factorial(size_t f)
+		__host__ __device__ static inline
+		size_t factorial(size_t f)
 		{
 			size_t fc = 1;
 
@@ -60,7 +61,8 @@ namespace openfpm
 		 * \param k
 		 *
 		 */
-		static inline size_t C(size_t n, size_t k)
+		__host__ __device__ static inline
+		size_t C(size_t n, size_t k)
 		{
 			return factorial(n)/(factorial(k)*factorial(n-k));
 		}
@@ -75,7 +77,8 @@ namespace openfpm
 		 *
 		 *
 		 */
-		static inline size_t round_big_2(size_t n)
+		__host__ __device__ static inline
+		size_t round_big_2(size_t n)
 		{
 			n--;
 			n |= n >> 1;   // Divide by 2^k for consecutive doublings of k up to 32,
@@ -102,14 +105,15 @@ namespace openfpm
 		 */
 
 		template<class T>
-		inline constexpr size_t pow(const T base, unsigned const exponent)
+		__host__ __device__ inline constexpr
+		size_t pow(const T base, unsigned const exponent)
 		{
 			// (parentheses not required in next line)
 			return (exponent == 0) ? 1 : (base * pow(base, exponent-1));
 		}
 
         template<class T>
-        double intpowlog(const T x, unsigned const e)
+        __host__ __device__ double intpowlog(const T x, unsigned const e)
         {
             if (e == 0) return 1.0;
             if (e % 2 == 0)
@@ -135,7 +139,8 @@ namespace openfpm
 		 * \param n modulo
 		 *
 		 */
-		static inline long int positive_modulo(long int i, long int n)
+		__host__ __device__ static inline
+		long int positive_modulo(long int i, long int n)
 		{
 		    return (i % n + n) % n;
 		}
@@ -155,7 +160,9 @@ namespace openfpm
 		 * \return the bound number
 		 *
 		 */
-		template<typename T> static inline T periodic(const T & pos, const T & p2, const T & p1)
+		template<typename T>
+		__host__ __device__ static inline
+		T periodic(const T & pos, const T & p2, const T & p1)
 		{
 			T pos_tmp;
 
@@ -182,7 +189,9 @@ namespace openfpm
 		 * \return the bound number
 		 *
 		 */
-		template<typename T> __device__ __host__ static inline T periodic_l(const T & pos, const T & p2, const T & p1)
+		template<typename T>
+		__device__ __host__ static inline
+		T periodic_l(const T & pos, const T & p2, const T & p1)
 		{
 			T pos_tmp = pos;
 
@@ -207,7 +216,7 @@ namespace openfpm
 		 *
 		 *
 		 */
-		inline long int size_t_floor(double x)
+		__host__ __device__ inline long int size_t_floor(double x)
 		{
 		  size_t i = (long int)x; /* truncate */
 		  return i - ( i > x ); /* convert trunc to floor */
@@ -217,7 +226,7 @@ namespace openfpm
 		 *
 		 *
 		 */
-		inline long int size_t_floor(float x)
+		__host__ __device__ inline long int size_t_floor(float x)
 		{
 		  size_t i = (long int)x; /* truncate */
 		  return i - ( i > x ); /* convert trunc to floor */
@@ -258,7 +267,7 @@ namespace openfpm
 		 * \param value
 		 *
 		 */
-		inline int log2_64 (uint64_t value)
+		__host__ __device__ inline int log2_64 (uint64_t value)
 		{
 		    value |= value >> 1;
 		    value |= value >> 2;
@@ -276,7 +285,7 @@ namespace openfpm
 		 *
 		 *
 		 */
-		inline long int size_t_floor(boost::multiprecision::float128 x)
+		__host__ __device__ inline long int size_t_floor(boost::multiprecision::float128 x)
 		{
 		  size_t i = (long int)x; /* truncate */
 		  return i - ( i > x ); /* convert trunc to floor */