Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • mosaic/software/parallel-computing/openfpm/openfpm_data
  • argupta/openfpm_data
2 results
Show changes
Commits on Source (427)
Showing
with 641 additions and 74 deletions
...@@ -36,7 +36,10 @@ find_package(Vc REQUIRED) ...@@ -36,7 +36,10 @@ find_package(Vc REQUIRED)
if (CUDA_ON_BACKEND STREQUAL "HIP" AND NOT HIP_FOUND) if (CUDA_ON_BACKEND STREQUAL "HIP" AND NOT HIP_FOUND)
find_package(HIP) find_package(HIP)
endif() endif()
find_package(OpenMP) if (NOT CUDA_ON_BACKEND STREQUAL "HIP")
find_package(OpenMP)
endif()
###### CONFIG.h FILE ###### ###### CONFIG.h FILE ######
......
...@@ -93,6 +93,8 @@ public: ...@@ -93,6 +93,8 @@ public:
} }
} }
static bool noPointers() {return true;}
#ifdef __NVCC__ #ifdef __NVCC__
//Constructors from dim3 and uint3 objects //Constructors from dim3 and uint3 objects
__host__ __device__ grid_smb(const dim3 blockDimensions) __host__ __device__ grid_smb(const dim3 blockDimensions)
...@@ -340,6 +342,11 @@ public: ...@@ -340,6 +342,11 @@ public:
return sz; return sz;
} }
__host__ __device__ inline indexT getBlockEgdeSize() const
{
return blockEdgeSize;
}
__host__ __device__ inline indexT getBlockSize() const __host__ __device__ inline indexT getBlockSize() const
{ {
return blockSize; return blockSize;
......
...@@ -141,6 +141,11 @@ public: ...@@ -141,6 +141,11 @@ public:
grid_smb<dim,blockEdgeSize,indexT>::swap(other); grid_smb<dim,blockEdgeSize,indexT>::swap(other);
} }
__host__ __device__ inline indexT getBlockEgdeSize() const
{
return blockEdgeSize;
}
__host__ __device__ inline indexT size() const __host__ __device__ inline indexT size() const
{ {
return grid_smb<dim,blockEdgeSize,indexT>::size(); return grid_smb<dim,blockEdgeSize,indexT>::size();
......
...@@ -34,7 +34,7 @@ template<unsigned int dim> ...@@ -34,7 +34,7 @@ template<unsigned int dim>
struct comb struct comb
{ {
//! Array that store the combination //! Array that store the combination
char c[dim]; signed char c[dim];
/*! \brief check if it is a valid combination /*! \brief check if it is a valid combination
* *
...@@ -105,7 +105,7 @@ struct comb ...@@ -105,7 +105,7 @@ struct comb
* \return Result combination * \return Result combination
* *
*/ */
inline comb<dim> operator&(char c_) inline comb<dim> operator&(signed char c_)
{ {
comb<dim> ret; comb<dim> ret;
...@@ -246,7 +246,7 @@ struct comb ...@@ -246,7 +246,7 @@ struct comb
* *
*/ */
inline char operator[](int i) const inline signed char operator[](int i) const
{ {
return c[i]; return c[i];
} }
...@@ -257,7 +257,7 @@ struct comb ...@@ -257,7 +257,7 @@ struct comb
* *
*/ */
inline char * getComb() inline signed char * getComb()
{ {
return c; return c;
} }
...@@ -268,7 +268,7 @@ struct comb ...@@ -268,7 +268,7 @@ struct comb
* *
*/ */
inline const char * getComb() const inline const signed char * getComb() const
{ {
return c; return c;
} }
...@@ -282,7 +282,7 @@ struct comb ...@@ -282,7 +282,7 @@ struct comb
* \return value of the i index * \return value of the i index
* *
*/ */
inline char value(int i) const inline signed char value(int i) const
{ {
return c[i]; return c[i];
} }
...@@ -314,10 +314,10 @@ struct comb ...@@ -314,10 +314,10 @@ struct comb
* \param c list of numbers * \param c list of numbers
* *
*/ */
comb(std::initializer_list<char> c) comb(std::initializer_list<signed char> c)
{ {
size_t i = 0; size_t i = 0;
for(char x : c) for(signed char x : c)
{this->c[c.size() - i - 1] = x;i++;} {this->c[c.size() - i - 1] = x;i++;}
} }
...@@ -397,7 +397,7 @@ template<> ...@@ -397,7 +397,7 @@ template<>
struct comb<0> struct comb<0>
{ {
//! FIX //! FIX
char c[0]; signed char c[0];
/*! \brief check if it is a valid combination /*! \brief check if it is a valid combination
* *
...@@ -467,7 +467,7 @@ struct comb<0> ...@@ -467,7 +467,7 @@ struct comb<0>
* *
*/ */
inline char operator[](int i) inline signed char operator[](int i)
{ {
return 0; return 0;
} }
...@@ -478,7 +478,7 @@ struct comb<0> ...@@ -478,7 +478,7 @@ struct comb<0>
* *
*/ */
inline char * getComb() inline signed char * getComb()
{ {
return c; return c;
} }
...@@ -492,7 +492,7 @@ struct comb<0> ...@@ -492,7 +492,7 @@ struct comb<0>
* \return value of the i index * \return value of the i index
* *
*/ */
inline char value(int i) const inline signed char value(int i) const
{ {
return c[i]; return c[i];
} }
......
...@@ -94,12 +94,12 @@ __global__ void copy_ndim_grid_device(grid_type src, grid_type dst) ...@@ -94,12 +94,12 @@ __global__ void copy_ndim_grid_device(grid_type src, grid_type dst)
#endif #endif
template<bool inte_or_lin,unsigned int dim, typename T> template<bool inte_or_lin, typename base_grid, unsigned int dim, typename T>
struct grid_toKernelImpl struct grid_toKernelImpl
{ {
template<typename grid_type> static grid_gpu_ker<dim,T,memory_traits_lin,typename grid_type::linearizer_type> toKernel(grid_type & gc) template<typename grid_type> static base_grid toKernel(grid_type & gc)
{ {
grid_gpu_ker<dim,T,memory_traits_lin,typename grid_type::linearizer_type> g(gc.getGrid()); /*grid_gpu_ker<dim,T,memory_traits_lin,typename grid_type::linearizer_type>*/base_grid g(gc.getGrid());
g.get_data_().disable_manage_memory(); g.get_data_().disable_manage_memory();
g.get_data_().mem = gc.get_internal_data_().mem; g.get_data_().mem = gc.get_internal_data_().mem;
...@@ -112,12 +112,12 @@ struct grid_toKernelImpl ...@@ -112,12 +112,12 @@ struct grid_toKernelImpl
} }
}; };
template<unsigned int dim, typename T> template<typename base_grid, unsigned int dim, typename T>
struct grid_toKernelImpl<true,dim,T> struct grid_toKernelImpl<true,base_grid,dim,T>
{ {
template<typename grid_type> static grid_gpu_ker<dim,T,memory_traits_inte, typename grid_type::linearizer_type> toKernel(grid_type & gc) template<typename grid_type> static base_grid toKernel(grid_type & gc)
{ {
grid_gpu_ker<dim,T,memory_traits_inte, typename grid_type::linearizer_type> g(gc.getGrid()); /*grid_gpu_ker<dim,T,memory_traits_inte, typename grid_type::linearizer_type>*/ base_grid g(gc.getGrid());
copy_switch_memory_c_no_cpy<typename std::remove_reference<decltype(gc.get_internal_data_())>::type, copy_switch_memory_c_no_cpy<typename std::remove_reference<decltype(gc.get_internal_data_())>::type,
typename std::remove_reference<decltype(g.get_data_())>::type> cp_mc(gc.get_internal_data_(),g.get_data_()); typename std::remove_reference<decltype(g.get_data_())>::type> cp_mc(gc.get_internal_data_(),g.get_data_());
......
...@@ -160,6 +160,70 @@ BOOST_AUTO_TEST_CASE (gpu_computation) ...@@ -160,6 +160,70 @@ BOOST_AUTO_TEST_CASE (gpu_computation)
#endif #endif
} }
BOOST_AUTO_TEST_CASE (gpu_computation_lambda)
{
#ifdef CUDA_GPU
{
size_t sz[3] = {64,64,64};
grid_gpu<3, aggregate<float,float[2],float[2][2]> > c3(sz);
c3.setMemory();
// Assign
auto c3_k = c3.toKernel();
auto lamb = [c3_k] __device__ (dim3 & blockIdx, dim3 & threadIdx)
{
grid_key_dx<3,int> p({blockIdx.x * blockDim.x + threadIdx.x,
blockIdx.y * blockDim.y + threadIdx.y,
blockIdx.z * blockDim.z + threadIdx.z});
c3_k.template get<0>(p) = 5.0;
c3_k.template get<1>(p)[0] = 5.0;
c3_k.template get<1>(p)[1] = 5.0;
c3_k.template get<2>(p)[0][0] = 5.0;
c3_k.template get<2>(p)[0][1] = 5.0;
c3_k.template get<2>(p)[1][0] = 5.0;
c3_k.template get<2>(p)[1][1] = 5.0;
};
auto ite = c3.getGPUIterator({0,0,0},{63,63,63});
CUDA_LAUNCH_LAMBDA(ite,lamb);
c3.deviceToHost<0,1,2>();
auto it = c3.getIterator();
bool good = true;
while(it.isNext())
{
auto key = it.get();
good &= c3.template get<0>(key) == 5.0;
good &= c3.template get<1>(key)[0] == 5.0;
good &= c3.template get<1>(key)[1] == 5.0;
good &= c3.template get<2>(key)[0][0] == 5.0;
good &= c3.template get<2>(key)[0][1] == 5.0;
good &= c3.template get<2>(key)[1][0] == 5.0;
good &= c3.template get<2>(key)[1][1] == 5.0;
++it;
}
BOOST_REQUIRE_EQUAL(good,true);
}
#endif
}
BOOST_AUTO_TEST_CASE (gpu_computation_stencil) BOOST_AUTO_TEST_CASE (gpu_computation_stencil)
{ {
#ifdef CUDA_GPU #ifdef CUDA_GPU
......
...@@ -158,7 +158,7 @@ class grid_gpu_ker ...@@ -158,7 +158,7 @@ class grid_gpu_ker
typedef typename layout_base<T_>::type layout; typedef typename layout_base<T_>::type layout;
//! layout data //! layout data
layout data_; mutable layout data_;
...@@ -264,14 +264,32 @@ public: ...@@ -264,14 +264,32 @@ public:
* \return the const reference of the element * \return the const reference of the element
* *
*/ */
template <unsigned int p, typename ids_type, typename r_type=decltype(layout_base<T_>::template get_c<p>(data_,g1,grid_key_dx<dim>()))> template <unsigned int p, typename ids_type, typename r_type=decltype(layout_base<T_>::template get<p>(data_,g1,grid_key_dx<dim>()))>
__device__ __host__ inline const r_type get(const grid_key_dx<dim,ids_type> & v1) const __device__ __host__ inline r_type get_debug(const grid_key_dx<dim,ids_type> & v1) const
{ {
#ifdef SE_CLASS1 #ifdef SE_CLASS1
if (check_bound(v1) == false) if (check_bound(v1) == false)
{fill_grid_error_array_overflow<dim,p>(this->template getPointer<p>(),v1);} {fill_grid_error_array_overflow<dim,p>(this->template getPointer<p>(),v1);}
#endif #endif
return layout_base<T_>::template get_c<p>(data_,g1,v1);
return layout_base<T_>::template get<p>(data_,g1,v1);
}
/*! \brief Get the const reference of the selected element
*
* \param v1 grid_key that identify the element in the grid
*
* \return the const reference of the element
*
*/
template <unsigned int p, typename ids_type, typename r_type=decltype(layout_base<T_>::template get<p>(data_,g1,grid_key_dx<dim>()))>
__device__ __host__ inline r_type get(const grid_key_dx<dim,ids_type> & v1) const
{
#ifdef SE_CLASS1
if (check_bound(v1) == false)
{fill_grid_error_array_overflow<dim,p>(this->template getPointer<p>(),v1);}
#endif
return layout_base<T_>::template get<p>(data_,g1,v1);
} }
/*! \brief Get the reference of the selected element /*! \brief Get the reference of the selected element
......
...@@ -620,7 +620,7 @@ private: ...@@ -620,7 +620,7 @@ private:
#if defined(CUDIFY_USE_SEQUENTIAL) || defined(CUDIFY_USE_OPENMP) #if defined(CUDIFY_USE_SEQUENTIAL) || defined(CUDIFY_USE_OPENMP)
base_gpu = grid_toKernelImpl<is_layout_inte<layout_base<T_>>::value,dim,T_>::toKernel(*this); base_gpu = grid_toKernelImpl<is_layout_inte<layout_base<T_>>::value,grid_gpu_ker<dim,T_,layout_base,linearizer_type>,dim,T_>::toKernel(*this);
#endif #endif
} }
...@@ -705,8 +705,8 @@ public: ...@@ -705,8 +705,8 @@ public:
#if defined(CUDIFY_USE_SEQUENTIAL) || defined(CUDIFY_USE_OPENMP) #if defined(CUDIFY_USE_SEQUENTIAL) || defined(CUDIFY_USE_OPENMP)
base_gpu = grid_toKernelImpl<is_layout_inte<layout_base<T_>>::value,dim,T_>::toKernel(*this); base_gpu = grid_toKernelImpl<is_layout_inte<layout_base<T_>>::value,grid_gpu_ker<dim,T_,layout_base,linearizer_type>,dim,T_>::toKernel(*this);
g.base_gpu = grid_toKernelImpl<is_layout_inte<layout_base<T_>>::value,dim,T_>::toKernel(g); g.base_gpu = grid_toKernelImpl<is_layout_inte<layout_base<T_>>::value,grid_gpu_ker<dim,T_,layout_base,linearizer_type>,dim,T_>::toKernel(g);
#endif #endif
...@@ -726,7 +726,7 @@ public: ...@@ -726,7 +726,7 @@ public:
#if defined(CUDIFY_USE_SEQUENTIAL) || defined(CUDIFY_USE_OPENMP) #if defined(CUDIFY_USE_SEQUENTIAL) || defined(CUDIFY_USE_OPENMP)
base_gpu = grid_toKernelImpl<is_layout_inte<layout_base<T_>>::value,dim,T_>::toKernel(*this); base_gpu = grid_toKernelImpl<is_layout_inte<layout_base<T_>>::value,grid_gpu_ker<dim,T,layout_base,linearizer_type>,dim,T_>::toKernel(*this);
#endif #endif
...@@ -809,7 +809,7 @@ public: ...@@ -809,7 +809,7 @@ public:
* \param stop end point * \param stop end point
* *
*/ */
struct ite_gpu<dim> getGPUIterator(grid_key_dx<dim,long int> & key1, grid_key_dx<dim,long int> & key2, size_t n_thr = default_kernel_wg_threads_) const struct ite_gpu<dim> getGPUIterator(const grid_key_dx<dim,long int> & key1, const grid_key_dx<dim,long int> & key2, size_t n_thr = default_kernel_wg_threads_) const
{ {
return getGPUIterator_impl<dim>(g1,key1,key2,n_thr); return getGPUIterator_impl<dim>(g1,key1,key2,n_thr);
} }
...@@ -850,12 +850,25 @@ public: ...@@ -850,12 +850,25 @@ public:
#if defined(CUDIFY_USE_SEQUENTIAL) || defined(CUDIFY_USE_OPENMP) #if defined(CUDIFY_USE_SEQUENTIAL) || defined(CUDIFY_USE_OPENMP)
base_gpu = grid_toKernelImpl<is_layout_inte<layout_base<T_>>::value,dim,T_>::toKernel(*this); base_gpu = grid_toKernelImpl<is_layout_inte<layout_base<T_>>::value,grid_gpu_ker<dim,T_,layout_base,linearizer_type>,dim,T_>::toKernel(*this);
#endif #endif
} }
/*! \brief Return the memory object
*
* Return the memory object
*
* \tparam p array to retrieve
*
*/
template<unsigned int p>
auto getMemory() -> decltype(boost::fusion::at_c<p>(data_).getMemory())
{
return boost::fusion::at_c<p>(data_).getMemory();
}
/*! \brief Set the object that provide memory from outside /*! \brief Set the object that provide memory from outside
* *
* An external allocator is useful with allocator like PreAllocHeapMem * An external allocator is useful with allocator like PreAllocHeapMem
...@@ -880,7 +893,7 @@ public: ...@@ -880,7 +893,7 @@ public:
#if defined(CUDIFY_USE_SEQUENTIAL) || defined(CUDIFY_USE_OPENMP) #if defined(CUDIFY_USE_SEQUENTIAL) || defined(CUDIFY_USE_OPENMP)
base_gpu = grid_toKernelImpl<is_layout_inte<layout_base<T_>>::value,dim,T_>::toKernel(*this); base_gpu = grid_toKernelImpl<is_layout_inte<layout_base<T_>>::value,grid_gpu_ker<dim,T_,layout_base,linearizer_type>,dim,T_>::toKernel(*this);
#endif #endif
} }
...@@ -909,7 +922,7 @@ public: ...@@ -909,7 +922,7 @@ public:
#if defined(CUDIFY_USE_SEQUENTIAL) || defined(CUDIFY_USE_OPENMP) #if defined(CUDIFY_USE_SEQUENTIAL) || defined(CUDIFY_USE_OPENMP)
base_gpu = grid_toKernelImpl<is_layout_inte<layout_base<T_>>::value,dim,T_>::toKernel(*this); base_gpu = grid_toKernelImpl<is_layout_inte<layout_base<T_>>::value,grid_gpu_ker<dim,T_,layout_base,linearizer_type>,dim,T_>::toKernel(*this);
#endif #endif
} }
...@@ -1506,7 +1519,7 @@ public: ...@@ -1506,7 +1519,7 @@ public:
#if defined(CUDIFY_USE_SEQUENTIAL) || defined(CUDIFY_USE_OPENMP) #if defined(CUDIFY_USE_SEQUENTIAL) || defined(CUDIFY_USE_OPENMP)
base_gpu = grid_toKernelImpl<is_layout_inte<layout_base<T_>>::value,dim,T_>::toKernel(*this); base_gpu = grid_toKernelImpl<is_layout_inte<layout_base<T_>>::value,grid_gpu_ker<dim,T_,layout_base,linearizer_type>,dim,T_>::toKernel(*this);
#endif #endif
} }
...@@ -1536,8 +1549,8 @@ public: ...@@ -1536,8 +1549,8 @@ public:
#if defined(CUDIFY_USE_SEQUENTIAL) || defined(CUDIFY_USE_OPENMP) #if defined(CUDIFY_USE_SEQUENTIAL) || defined(CUDIFY_USE_OPENMP)
base_gpu = grid_toKernelImpl<is_layout_inte<layout_base<T_>>::value,dim,T_>::toKernel(*this); base_gpu = grid_toKernelImpl<is_layout_inte<layout_base<T_>>::value,grid_gpu_ker<dim,T_,layout_base,linearizer_type>,dim,T_>::toKernel(*this);
grid.base_gpu = grid_toKernelImpl<is_layout_inte<layout_base<T_>>::value,dim,T_>::toKernel(grid); grid.base_gpu = grid_toKernelImpl<is_layout_inte<layout_base<T_>>::value,grid_gpu_ker<dim,T_,layout_base,linearizer_type>,dim,T_>::toKernel(grid);
#endif #endif
} }
...@@ -1876,7 +1889,7 @@ public: ...@@ -1876,7 +1889,7 @@ public:
*/ */
grid_gpu_ker<dim,T_,layout_base,linearizer_type> toKernel() grid_gpu_ker<dim,T_,layout_base,linearizer_type> toKernel()
{ {
return grid_toKernelImpl<is_layout_inte<layout_base<T_>>::value,dim,T_>::toKernel(*this); return grid_toKernelImpl<is_layout_inte<layout_base<T_>>::value,grid_gpu_ker<dim,T_,layout_base,linearizer_type>,dim,T_>::toKernel(*this);
} }
/*! \brief Convert the grid into a data-structure compatible for computing into GPU /*! \brief Convert the grid into a data-structure compatible for computing into GPU
...@@ -1886,7 +1899,7 @@ public: ...@@ -1886,7 +1899,7 @@ public:
*/ */
const grid_gpu_ker<dim,T_,layout_base,linearizer_type> toKernel() const const grid_gpu_ker<dim,T_,layout_base,linearizer_type> toKernel() const
{ {
return grid_toKernelImpl<is_layout_inte<layout_base<T_>>::value,dim,T_>::toKernel(*this); return grid_toKernelImpl<is_layout_inte<layout_base<T_>>::value,grid_gpu_ker<dim,T_,layout_base,linearizer_type>,dim,T_>::toKernel(*this);
} }
#endif #endif
......
...@@ -108,12 +108,12 @@ struct ite_gpu ...@@ -108,12 +108,12 @@ struct ite_gpu
grid_key_dx<dim,int> start; grid_key_dx<dim,int> start;
grid_key_dx<dim,int> stop; grid_key_dx<dim,int> stop;
size_t nblocks() size_t nblocks() const
{ {
return wthr.x * wthr.y * wthr.z; return wthr.x * wthr.y * wthr.z;
} }
size_t nthrs() size_t nthrs() const
{ {
return thr.x * thr.y * thr.z; return thr.x * thr.y * thr.z;
} }
...@@ -365,6 +365,25 @@ public: ...@@ -365,6 +365,25 @@ public:
} }
} }
/*! \brief copy constructor
*
* \param g grid info
*
* construct a grid from another grid. As a copy constructor can't be template,
* the one above won't work for const grid_sm<N,S> & g, where S=T
*
*/
__device__ __host__ inline grid_sm(const grid_sm<N,T> & g)
{
size_tot = g.size_tot;
for (size_t i = 0 ; i < N ; i++)
{
sz[i] = g.sz[i];
sz_s[i] = g.sz_s[i];
}
}
// Static element to calculate total size // Static element to calculate total size
inline size_t totalSize(const size_t sz) inline size_t totalSize(const size_t sz)
...@@ -432,7 +451,7 @@ public: ...@@ -432,7 +451,7 @@ public:
*/ */
template<typename check=NoCheck, typename ids_type> template<typename check=NoCheck, typename ids_type>
inline mem_id LinId(const grid_key_dx<N,ids_type> & gk, const char sum_id[N]) const inline mem_id LinId(const grid_key_dx<N,ids_type> & gk, const signed char sum_id[N]) const
{ {
mem_id lid; mem_id lid;
...@@ -468,7 +487,7 @@ public: ...@@ -468,7 +487,7 @@ public:
*/ */
template<typename check=NoCheck,typename ids_type> template<typename check=NoCheck,typename ids_type>
inline mem_id LinId(const grid_key_dx<N,ids_type> & gk, const char sum_id[N], const size_t (&bc)[N]) const inline mem_id LinId(const grid_key_dx<N,ids_type> & gk, const signed char sum_id[N], const size_t (&bc)[N]) const
{ {
mem_id lid; mem_id lid;
......
...@@ -43,7 +43,7 @@ ...@@ -43,7 +43,7 @@
* *
* *
*/ */
template <typename T, bool has_noPointers> template <typename T, int has_noPointers>
struct Pack_selector_unknown_type_impl struct Pack_selector_unknown_type_impl
{ {
enum enum
...@@ -53,7 +53,7 @@ struct Pack_selector_unknown_type_impl ...@@ -53,7 +53,7 @@ struct Pack_selector_unknown_type_impl
}; };
template <typename T> template <typename T>
struct Pack_selector_unknown_type_impl<T,false> struct Pack_selector_unknown_type_impl<T,0>
{ {
enum enum
{ {
...@@ -61,6 +61,28 @@ struct Pack_selector_unknown_type_impl<T,false> ...@@ -61,6 +61,28 @@ struct Pack_selector_unknown_type_impl<T,false>
}; };
}; };
/*! \brief Pack selector for unknown type
*
*
*/
template <typename T>
struct Pack_selector_unknown_type_impl<T,2>
{
enum
{
value = PACKER_ARRAY_CP_PRIMITIVE
};
};
template <typename T>
struct Pack_selector_unknown_type_impl<T,3>
{
enum
{
value = PACKER_ARRAY_CP_PRIMITIVE
};
};
/*! \brief Pack selector for unknown type /*! \brief Pack selector for unknown type
* *
* *
...@@ -70,7 +92,7 @@ struct Pack_selector_known_type_impl ...@@ -70,7 +92,7 @@ struct Pack_selector_known_type_impl
{ {
enum enum
{ {
value = Pack_selector_unknown_type_impl<T, has_noPointers<T>::value >::value value = Pack_selector_unknown_type_impl<T, has_noPointers<T>::value + 2*std::is_array<T>::value >::value
}; };
}; };
......
...@@ -228,7 +228,7 @@ template<unsigned int dim ,typename T> class Point ...@@ -228,7 +228,7 @@ template<unsigned int dim ,typename T> class Point
* \return the norm of the vector * \return the norm of the vector
* *
*/ */
__device__ __host__ T norm() __device__ __host__ T norm() const
{ {
T n = 0.0; T n = 0.0;
......
...@@ -14,6 +14,29 @@ using BlockTypeOf = typename std::remove_reference<typename boost::fusion::resul ...@@ -14,6 +14,29 @@ using BlockTypeOf = typename std::remove_reference<typename boost::fusion::resul
template<typename AggregateT, unsigned int p> template<typename AggregateT, unsigned int p>
using ScalarTypeOf = typename std::remove_reference<typename boost::fusion::result_of::at_c<typename AggregateT::type, p>::type>::type::scalarType; using ScalarTypeOf = typename std::remove_reference<typename boost::fusion::result_of::at_c<typename AggregateT::type, p>::type>::type::scalarType;
template<typename T>
struct meta_copy_set_bck
{
template<typename destType>
inline static void set(destType & bP ,T & backgroundValue, int j)
{
bP[j] = backgroundValue;
}
};
template<unsigned int N, typename T>
struct meta_copy_set_bck<T[N]>
{
template<typename destType>
inline static void set(destType & bP ,T * backgroundValue, int j)
{
for (int i = 0 ; i < N ; i++)
{
bP[i][j] = backgroundValue[i];
}
}
};
template<typename AggregateBlockT, unsigned int threadBlockSize=128, typename indexT=long int, template<typename> class layout_base=memory_traits_inte> template<typename AggregateBlockT, unsigned int threadBlockSize=128, typename indexT=long int, template<typename> class layout_base=memory_traits_inte>
class BlockMapGpu class BlockMapGpu
{ {
...@@ -280,19 +303,21 @@ public: ...@@ -280,19 +303,21 @@ public:
* \tparam p property p * \tparam p property p
* *
*/ */
template<unsigned int p> template<unsigned int p, typename TypeBck>
void setBackgroundValue(ScalarTypeOf<AggregateBlockT, p> backgroundValue) void setBackgroundValue(TypeBck backgroundValue)
{ {
// NOTE: Here we assume user only passes Blocks and not scalars in the templated aggregate type // NOTE: Here we assume user only passes Blocks and not scalars in the templated aggregate type
typedef BlockTypeOf<AggregateInternalT, p> BlockT; typedef BlockTypeOf<AggregateInternalT, p> BlockT;
typedef typename std::remove_all_extents<BlockTypeOf<AggregateInternalT, p>>::type BlockT_noarr;
typedef BlockTypeOf<AggregateInternalT, pMask> BlockM; typedef BlockTypeOf<AggregateInternalT, pMask> BlockM;
BlockT bP; BlockT bP;
BlockM bM; BlockM bM;
for (unsigned int i = 0; i < BlockT::size; ++i) for (unsigned int i = 0; i < BlockT_noarr::size; ++i)
{ {
bP[i] = backgroundValue; meta_copy_set_bck<TypeBck>::set(bP,backgroundValue,i);
//meta_copy<TypeBck>::meta_copy_(backgroundValue,bP[][i]);
bM[i] = 0; bM[i] = 0;
} }
......
...@@ -718,6 +718,8 @@ public: ...@@ -718,6 +718,8 @@ public:
typedef sparse_grid_gpu_index<self> base_key; typedef sparse_grid_gpu_index<self> base_key;
typedef indexT indexT_;
typedef decltype(std::declval<BMG>().toKernel().insertBlock(0)) insert_encap; typedef decltype(std::declval<BMG>().toKernel().insertBlock(0)) insert_encap;
/*! \brief return the size of the grid /*! \brief return the size of the grid
...@@ -1045,6 +1047,7 @@ private: ...@@ -1045,6 +1047,7 @@ private:
} }
template <typename stencil, typename... Args> template <typename stencil, typename... Args>
void applyStencilInPlaceNoShared(const Box<dim,int> & box, StencilMode & mode,Args... args) void applyStencilInPlaceNoShared(const Box<dim,int> & box, StencilMode & mode,Args... args)
{ {
...@@ -1795,6 +1798,11 @@ public: ...@@ -1795,6 +1798,11 @@ public:
void setMemory() void setMemory()
{} {}
auto insertBlockFlush(size_t block) -> decltype(BMG::insertBlockFlush(block))
{
return BMG::insertBlockFlush(block);
}
/*! \brief Return the grid information object /*! \brief Return the grid information object
* *
* \return grid information object * \return grid information object
...@@ -2628,6 +2636,28 @@ public: ...@@ -2628,6 +2636,28 @@ public:
applyStencils< SparseGridGpuKernels::stencil_func_conv2_b<dim,nLoop,prop_src1,prop_src2,prop_dst1,prop_dst2,stencil_size> >(box,STENCIL_MODE_INPLACE,func, args ...); applyStencils< SparseGridGpuKernels::stencil_func_conv2_b<dim,nLoop,prop_src1,prop_src2,prop_dst1,prop_dst2,stencil_size> >(box,STENCIL_MODE_INPLACE,func, args ...);
} }
/*! \brief Apply a free type convolution using blocks
*
*
*/
template<unsigned int prop_src1, unsigned int prop_src2, unsigned int prop_src3,
unsigned int prop_dst1 , unsigned int prop_dst2, unsigned int prop_dst3,
unsigned int stencil_size, typename lambda_f, typename ... ArgsT >
void conv3_b(grid_key_dx<dim> start, grid_key_dx<dim> stop , lambda_f func, ArgsT ... args)
{
Box<dim,int> box;
for (int i = 0 ; i < dim ; i++)
{
box.setLow(i,start.get(i));
box.setHigh(i,stop.get(i));
}
constexpr unsigned int nLoop = UIntDivCeil<(IntPow<blockEdgeSize + 2, dim>::value), (blockSize)>::value;
applyStencils< SparseGridGpuKernels::stencil_func_conv3_b<dim,nLoop,prop_src1,prop_src2,prop_src3,prop_dst1,prop_dst2,prop_dst3,stencil_size> >(box,STENCIL_MODE_INPLACE,func, args ...);
}
/*! \brief Apply a free type convolution using blocks /*! \brief Apply a free type convolution using blocks
* *
* *
...@@ -2753,12 +2783,37 @@ public: ...@@ -2753,12 +2783,37 @@ public:
typedef BlockMapGpu<AggregateInternalT, threadBlockSize, indexT, layout_base> BMG; typedef BlockMapGpu<AggregateInternalT, threadBlockSize, indexT, layout_base> BMG;
auto block_data = this->insertBlockFlush(block_id); auto block_data = BlockMapGpu<AggregateInternalT, threadBlockSize, indexT, layout_base>::insertBlockFlush(block_id);
block_data.template get<BMG::pMask>()[local_id] = 1; block_data.template get<BMG::pMask>()[local_id] = 1;
return block_data.template get<p>()[local_id]; return block_data.template get<p>()[local_id];
} }
/*! \brief Insert the point on host side and flush directly
*
* First you have to move everything on host with deviceToHost, insertFlush and than move to GPU again
*
* \param grid point where to insert
*
* \return a reference to the data to fill
*
*
*/
template<typename CoordT>
auto insertBlockFlush(const grid_key_dx<dim,CoordT> &coord, indexT & local_id) -> decltype(BlockMapGpu<AggregateInternalT, threadBlockSize, indexT, layout_base>::insertBlockFlush(0))
{
auto lin = gridGeometry.LinId(coord);
indexT block_id = lin / blockSize;
local_id = lin % blockSize;
typedef BlockMapGpu<AggregateInternalT, threadBlockSize, indexT, layout_base> BMG;
auto block_data = BlockMapGpu<AggregateInternalT, threadBlockSize, indexT, layout_base>::insertBlockFlush(block_id);
block_data.template get<BMG::pMask>()[local_id] = 1;
return block_data;
}
/*! \brief Insert the point on host side and flush directly /*! \brief Insert the point on host side and flush directly
* *
* First you have to move everything on host with deviceToHost, insertFlush and than move to GPU again * First you have to move everything on host with deviceToHost, insertFlush and than move to GPU again
...@@ -2779,7 +2834,7 @@ public: ...@@ -2779,7 +2834,7 @@ public:
typedef BlockMapGpu<AggregateInternalT, threadBlockSize, indexT, layout_base> BMG; typedef BlockMapGpu<AggregateInternalT, threadBlockSize, indexT, layout_base> BMG;
auto block_data = this->insertBlockFlush(block_id); auto block_data = BlockMapGpu<AggregateInternalT, threadBlockSize, indexT, layout_base>::insertBlockFlush(block_id);
block_data.template get<BMG::pMask>()[local_id] = 1; block_data.template get<BMG::pMask>()[local_id] = 1;
return block_data.template get<p>()[local_id]; return block_data.template get<p>()[local_id];
...@@ -2817,15 +2872,105 @@ public: ...@@ -2817,15 +2872,105 @@ public:
* *
*/ */
template<unsigned int p> template<unsigned int p>
void setBackgroundValue(ScalarTypeOf<AggregateBlockT, p> backgroundValue) void setBackgroundValue(typename boost::mpl::at<typename AggregateT::type,boost::mpl::int_<p>>::type backgroundValue)
{ {
bck.template get<p>() = backgroundValue; meta_copy<typename boost::mpl::at<typename AggregateT::type,boost::mpl::int_<p>>::type>::meta_copy_(backgroundValue,bck.template get<p>());
BMG::template setBackgroundValue<p>(backgroundValue); BMG::template setBackgroundValue<p,typename boost::mpl::at<typename AggregateT::type,boost::mpl::int_<p>>::type>(backgroundValue);
} }
/////////////////////////////////// DISTRIBUTED INTERFACE /////////////////////// /////////////////////////////////// DISTRIBUTED INTERFACE ///////////////////////
//Functions to check if the packing object is complex
static bool pack()
{
return true;
}
//Functions to check if the packing object is complex
static bool packRequest()
{
return true;
}
/*! \brief Asking to pack a SparseGrid GPU without GPU context pack the grid on CPU and host memory
*
*
*/
template<int ... prp> inline
void packRequest(size_t & req) const
{
// To fill
auto & indexBuffer = BlockMapGpu<AggregateInternalT, threadBlockSize, indexT, layout_base>::blockMap.getIndexBuffer();
auto & dataBuffer = BlockMapGpu<AggregateInternalT, threadBlockSize, indexT, layout_base>::blockMap.getDataBuffer();
indexBuffer.template packRequest<prp ...>(req);
dataBuffer.template packRequest<prp ...>(req);
Packer<decltype(gridGeometry),HeapMemory>::packRequest(req);
}
/*! \brief Pack the object into the memory
*
* \tparam prp properties to pack
*
* \param mem preallocated memory where to pack the objects
*
* \param sts pack statistic
*
*/
template<int ... prp> void pack(ExtPreAlloc<HeapMemory> & mem,
Pack_stat & sts) const
{
auto & indexBuffer = BlockMapGpu<AggregateInternalT, threadBlockSize, indexT, layout_base>::blockMap.getIndexBuffer();
auto & dataBuffer = BlockMapGpu<AggregateInternalT, threadBlockSize, indexT, layout_base>::blockMap.getDataBuffer();
// To fill
indexBuffer.template pack<prp ...>(mem,sts);
dataBuffer.template pack<prp ...>(mem,sts);
Packer<decltype(gridGeometry),HeapMemory>::pack(mem,gridGeometry,sts);
}
/*! \brief Unpack the object into the memory
*
* \tparam prp properties to pack
*
* \param mem preallocated memory where to pack the objects
*
* \param sts pack statistic
*
*/
template<int ... prp> void unpack(ExtPreAlloc<HeapMemory> & mem,
Unpack_stat & ps)
{
auto & indexBuffer = BlockMapGpu<AggregateInternalT, threadBlockSize, indexT, layout_base>::blockMap.getIndexBuffer();
auto & dataBuffer = BlockMapGpu<AggregateInternalT, threadBlockSize, indexT, layout_base>::blockMap.getDataBuffer();
// To fill
indexBuffer.template unpack<prp ...>(mem,ps);
dataBuffer.template unpack<prp ...>(mem,ps);
Unpacker<decltype(gridGeometry),HeapMemory>::unpack(mem,gridGeometry,ps);
}
/*! \brief Unpack the object into the memory
*
* \tparam prp properties to pack
*
* \param mem preallocated memory where to pack the objects
*
* \param sts pack statistic
*
*/
template<int ... prp> void unpack(ExtPreAlloc<CudaMemory> & mem,
Unpack_stat & ps)
{
if (mem.size() != 0)
{std::cout << __FILE__ << ":" << __LINE__ << " not implemented: " << std::endl;}
}
/*! \brief memory requested to pack this object /*! \brief memory requested to pack this object
* *
* \param req request * \param req request
...@@ -3253,6 +3398,8 @@ public: ...@@ -3253,6 +3398,8 @@ public:
*/ */
void swap(self & gr) void swap(self & gr)
{ {
gridGeometry.swap(gr.gridGeometry);
BMG::swap(gr); BMG::swap(gr);
} }
...@@ -3422,6 +3569,7 @@ public: ...@@ -3422,6 +3569,7 @@ public:
result.allocate(sizeof(int)); result.allocate(sizeof(int));
if (pointers.size())
CUDA_LAUNCH_DIM3((SparseGridGpuKernels::unpack_headers<decltype(std::declval<self>().toKernel())>),1,pointers.size(), CUDA_LAUNCH_DIM3((SparseGridGpuKernels::unpack_headers<decltype(std::declval<self>().toKernel())>),1,pointers.size(),
pointers.toKernel(), pointers.toKernel(),
headers.toKernel(), headers.toKernel(),
......
...@@ -158,6 +158,19 @@ namespace SparseGridGpuKernels ...@@ -158,6 +158,19 @@ namespace SparseGridGpuKernels
{ {
f(res1,res2,cpb1,cpb2,DataBlockLoad,offset,coord[0],coord[1],coord[2]); f(res1,res2,cpb1,cpb2,DataBlockLoad,offset,coord[0],coord[1],coord[2]);
} }
template<typename ScalarT, typename coordType, typename CpBlockType, typename DataBlockWrapperT, typename lambda_func, typename ... ArgsT>
__device__ static inline void stencil3_block(ScalarT & res1, ScalarT & res2, ScalarT & res3, coordType & coord ,
CpBlockType & cpb1,
CpBlockType & cpb2,
CpBlockType & cpb3,
DataBlockWrapperT & DataBlockLoad,
int offset,
lambda_func f,
ArgsT ... args)
{
f(res1,res2,res3,cpb1,cpb2,cpb3,DataBlockLoad,offset,coord[0],coord[1],coord[2]);
}
}; };
template<> template<>
...@@ -204,6 +217,19 @@ namespace SparseGridGpuKernels ...@@ -204,6 +217,19 @@ namespace SparseGridGpuKernels
{ {
f(res1,res2,cpb1,cpb2,DataBlockLoad,offset,coord[0],coord[1]); f(res1,res2,cpb1,cpb2,DataBlockLoad,offset,coord[0],coord[1]);
} }
template<typename ScalarT, typename coordType, typename CpBlockType, typename DataBlockWrapperT, typename lambda_func, typename ... ArgsT>
__device__ static inline void stencil3_block(ScalarT & res1, ScalarT & res2, ScalarT & res3, coordType & coord ,
CpBlockType & cpb1,
CpBlockType & cpb2,
CpBlockType & cpb3,
DataBlockWrapperT & DataBlockLoad,
int offset,
lambda_func f,
ArgsT ... args)
{
f(res1,res2,res3,cpb1,cpb2,cpb3,DataBlockLoad,offset,coord[0],coord[1]);
}
}; };
template<unsigned int dim, unsigned int n_loop, unsigned int p_src, unsigned int p_dst, unsigned int stencil_size> template<unsigned int dim, unsigned int n_loop, unsigned int p_src, unsigned int p_dst, unsigned int stencil_size>
...@@ -424,6 +450,86 @@ namespace SparseGridGpuKernels ...@@ -424,6 +450,86 @@ namespace SparseGridGpuKernels
} }
}; };
template<unsigned int dim, unsigned int n_loop,
unsigned int p_src1, unsigned int p_src2, unsigned int p_src3,
unsigned int p_dst1, unsigned int p_dst2, unsigned int p_dst3,
unsigned int stencil_size>
struct stencil_func_conv3_b
{
typedef NNStar<dim> stencil_type;
static constexpr unsigned int supportRadius = stencil_size;
template<typename SparseGridT, typename DataBlockWrapperT, typename lambda_func, typename ... ArgT>
static inline __device__ void stencil(
SparseGridT & sparseGrid,
const unsigned int dataBlockId,
openfpm::sparse_index<unsigned int> dataBlockIdPos,
unsigned int offset,
grid_key_dx<dim, int> & pointCoord,
DataBlockWrapperT & dataBlockLoad,
DataBlockWrapperT & dataBlockStore,
unsigned char curMask,
lambda_func f,
ArgT ... args)
{
typedef typename SparseGridT::AggregateBlockType AggregateT;
typedef ScalarTypeOf<AggregateT, p_src1> ScalarT1;
typedef ScalarTypeOf<AggregateT, p_src1> ScalarT2;
typedef ScalarTypeOf<AggregateT, p_src1> ScalarT3;
constexpr unsigned int enlargedBlockSize = IntPow<
SparseGridT::getBlockEdgeSize() + 2 * supportRadius, dim>::value;
__shared__ ScalarT1 enlargedBlock1[enlargedBlockSize];
__shared__ ScalarT2 enlargedBlock2[enlargedBlockSize];
__shared__ ScalarT3 enlargedBlock3[enlargedBlockSize];
// fill with background
typedef typename vmpl_create_constant<dim,SparseGridT::blockEdgeSize_>::type block_sizes;
typedef typename vmpl_sum_constant<2*stencil_size,block_sizes>::type vmpl_sizes;
cp_block<ScalarT1,stencil_size,vmpl_sizes,dim> cpb1(enlargedBlock1);
cp_block<ScalarT2,stencil_size,vmpl_sizes,dim> cpb2(enlargedBlock2);
cp_block<ScalarT3,stencil_size,vmpl_sizes,dim> cpb3(enlargedBlock3);
sparseGrid.template loadGhostBlock<p_src1>(dataBlockLoad, dataBlockIdPos, enlargedBlock1);
sparseGrid.template loadGhostBlock<p_src2>(dataBlockLoad, dataBlockIdPos, enlargedBlock2);
sparseGrid.template loadGhostBlock<p_src3>(dataBlockLoad, dataBlockIdPos, enlargedBlock3);
__syncthreads();
ScalarT1 res1 = 0;
ScalarT2 res2 = 0;
ScalarT3 res3 = 0;
if ((curMask & mask_sparse::EXIST) && !(curMask & mask_sparse::PADDING))
{
int coord[dim];
unsigned int linIdTmp = offset;
for (unsigned int d = 0; d < dim; ++d)
{
coord[d] = linIdTmp % SparseGridT::blockEdgeSize_;
linIdTmp /= SparseGridT::blockEdgeSize_;
}
stencil_conv_func_impl<dim>::stencil3_block(res1,res2,res3,coord,cpb1,cpb2,cpb3,dataBlockLoad,offset,f,args...);
dataBlockStore.template get<p_dst1>()[offset] = res1;
dataBlockStore.template get<p_dst2>()[offset] = res2;
dataBlockStore.template get<p_dst3>()[offset] = res3;
}
}
template <typename SparseGridT, typename CtxT>
static inline void __host__ flush(SparseGridT & sparseGrid, CtxT & ctx)
{
// No flush
}
};
template<unsigned int dim, unsigned int n_loop, unsigned int p_src1, unsigned int p_src2, unsigned int p_dst1, unsigned int p_dst2, unsigned int stencil_size> template<unsigned int dim, unsigned int n_loop, unsigned int p_src1, unsigned int p_src2, unsigned int p_dst1, unsigned int p_dst2, unsigned int stencil_size>
struct stencil_func_conv2 struct stencil_func_conv2
{ {
...@@ -966,7 +1072,7 @@ namespace SparseGridGpuKernels ...@@ -966,7 +1072,7 @@ namespace SparseGridGpuKernels
auto dataBlockLoad = dataBuffer.get(dataBlockPos); // Avoid binary searches as much as possible auto dataBlockLoad = dataBuffer.get(dataBlockPos); // Avoid binary searches as much as possible
// todo: Add management of RED-BLACK stencil application! :) // todo: Add management of RED-BLACK stencil application! :)
const unsigned int dataBlockId = indexBuffer.template get<pIndex>(dataBlockPos); const auto dataBlockId = indexBuffer.template get<pIndex>(dataBlockPos);
grid_key_dx<dim, int> pointCoord = sparseGrid.getCoord(dataBlockId * blockSize + offset); grid_key_dx<dim, int> pointCoord = sparseGrid.getCoord(dataBlockId * blockSize + offset);
unsigned char curMask; unsigned char curMask;
......
...@@ -172,6 +172,11 @@ namespace openfpm ...@@ -172,6 +172,11 @@ namespace openfpm
return v_size; return v_size;
} }
__host__ __device__ size_t size_local() const
{
return size();
}
/*! \brief return the maximum capacity of the vector before reallocation /*! \brief return the maximum capacity of the vector before reallocation
* *
* \return the capacity of the vector * \return the capacity of the vector
...@@ -204,6 +209,38 @@ namespace openfpm ...@@ -204,6 +209,38 @@ namespace openfpm
return base.template get<p>(key); return base.template get<p>(key);
} }
/*! \brief Get an element of the vector
*
* Get an element of the vector
*
* \tparam p Property to get
* \param id Element to get
*
* \return the element value requested
*
*/
template <unsigned int p>
__device__ __host__ inline auto getProp(unsigned int id) const -> decltype(base.template get<p>(grid_key_dx<1>(0)))
{
return this->get<p>(id);
}
/*! \brief Get an element of the vector
*
* Get an element of the vector
*
* \tparam p Property to get
* \param id Element to get
*
* \return the element value requested
*
*/
template <unsigned int p, typename key_type>
__device__ __host__ inline auto getProp(key_type id) const -> decltype(base.template get<p>(grid_key_dx<1>(0)))
{
return this->get<p>(id.getKey());
}
/*! \brief Get an element of the vector /*! \brief Get an element of the vector
* *
...@@ -488,7 +525,22 @@ namespace openfpm ...@@ -488,7 +525,22 @@ namespace openfpm
return base.getGPUIterator(start,stop,n_thr); return base.getGPUIterator(start,stop,n_thr);
} }
/*! \brief Get a domain iterator for the GPU
*
*
*/
ite_gpu<1> getDomainIteratorGPU(size_t n_thr = default_kernel_wg_threads_) const
{
return getGPUIterator(n_thr);
}
//Stub for some expression
void init() const {}
__host__ __device__ auto value(unsigned int p) -> decltype(base.template get<0>(grid_key_dx<1>(0)))
{
return get<0>(p);
}
/*! \brief Get an iterator for the GPU /*! \brief Get an iterator for the GPU
* *
* *
...@@ -501,6 +553,7 @@ namespace openfpm ...@@ -501,6 +553,7 @@ namespace openfpm
return base.getGPUIterator(start,stop_,n_thr); return base.getGPUIterator(start,stop_,n_thr);
} }
/*! \brief operator= this operator absorb the pointers, consider that this object wrap device pointers /*! \brief operator= this operator absorb the pointers, consider that this object wrap device pointers
* *
* \param object to copy * \param object to copy
...@@ -514,6 +567,16 @@ namespace openfpm ...@@ -514,6 +567,16 @@ namespace openfpm
return *this; return *this;
} }
__device__ __host__ vector_gpu_ker<T,layout_base> & getVector()
{
return *this;
}
__device__ __host__ const vector_gpu_ker<T,layout_base> & getVector() const
{
return *this;
}
/*! \brief Return the base /*! \brief Return the base
* *
* \return the base * \return the base
...@@ -596,6 +659,11 @@ namespace openfpm ...@@ -596,6 +659,11 @@ namespace openfpm
return vref.size(); return vref.size();
} }
__host__ __device__ size_t size_local() const
{
return size();
}
__device__ __host__ unsigned int capacity() const __device__ __host__ unsigned int capacity() const
{ {
return vref.capacity; return vref.capacity;
...@@ -692,6 +760,16 @@ namespace openfpm ...@@ -692,6 +760,16 @@ namespace openfpm
return vref.getGPUItertatorTo(stop,n_thr); return vref.getGPUItertatorTo(stop,n_thr);
} }
vector_gpu_ker<T,layout_base> & getVector()
{
return *this;
}
const vector_gpu_ker<T,layout_base> & getVector() const
{
return *this;
}
__host__ vector_gpu_ker_ref<T,layout_base> & operator=(const vector_gpu_ker<T,layout_base> & v) __host__ vector_gpu_ker_ref<T,layout_base> & operator=(const vector_gpu_ker<T,layout_base> & v)
{ {
vref.operator=(v); vref.operator=(v);
......
...@@ -336,7 +336,7 @@ namespace openfpm ...@@ -336,7 +336,7 @@ namespace openfpm
* *
* \return the size * \return the size
* *
*/ */ //remove host device
size_t size_local() const size_t size_local() const
{ {
return v_size; return v_size;
...@@ -1359,7 +1359,21 @@ namespace openfpm ...@@ -1359,7 +1359,21 @@ namespace openfpm
return base.get_o(key); return base.get_o(key);
} }
/*! \brief Get an element of the vector
*
* Get an element of the vector
*
* \tparam p Property to get
* \param id Element to get
*
* \return the element value requested
*
*/ //remove device host
template <unsigned int p>
inline auto getProp(const unsigned int & id) -> decltype(base.template get<p>(grid_key_dx<1>(0)))
{ //uncomment this
return this->template get<p>(id);
}
/*! \brief Get an element of the vector /*! \brief Get an element of the vector
* *
* Get an element of the vector * Get an element of the vector
...@@ -1369,12 +1383,12 @@ namespace openfpm ...@@ -1369,12 +1383,12 @@ namespace openfpm
* *
* \return the element value requested * \return the element value requested
* *
*/ *///remove host device
template <unsigned int p,typename KeyType>
template <unsigned int p,typename KeyType> inline auto getProp(const KeyType & id) -> decltype(base.template get<p>(grid_key_dx<1>(0)))
inline auto getProp(const KeyType & id) -> decltype(base.template get<p>(grid_key_dx<1>(0)))
{ {
return this->template get<p>(id.getKey()); //uncomment this
return this->template get<p>(id.getKey());
} }
/*! \brief Get an element of the vector /*! \brief Get an element of the vector
...@@ -1386,10 +1400,10 @@ namespace openfpm ...@@ -1386,10 +1400,10 @@ namespace openfpm
* *
* \return the element value requested * \return the element value requested
* *
*/ */ //remove device host
template <unsigned int p, typename keyType> template <unsigned int p, typename keyType>
inline auto getProp(const keyType & id) const -> decltype(base.template get<p>(grid_key_dx<1>(0))) inline auto getProp(const keyType & id) const -> decltype(base.template get<p>(grid_key_dx<1>(0)))
{ { //uncomment this
return this->template get<p>(id.getKey()); return this->template get<p>(id.getKey());
} }
...@@ -1930,6 +1944,7 @@ namespace openfpm ...@@ -1930,6 +1944,7 @@ namespace openfpm
return base.getGPUIterator(start,stop_,n_thr); return base.getGPUIterator(start,stop_,n_thr);
} }
#endif #endif
/*! \brief Get the vector elements iterator /*! \brief Get the vector elements iterator
...@@ -1987,6 +2002,14 @@ namespace openfpm ...@@ -1987,6 +2002,14 @@ namespace openfpm
return base.getGPUIterator(start,stop,n_thr); return base.getGPUIterator(start,stop,n_thr);
} }
/*! \brief Get a domain iterator for the GPU
*
*
*/
ite_gpu<1> getDomainIteratorGPU(size_t n_thr = default_kernel_wg_threads_) const
{
return getGPUIterator(n_thr);
}
#endif #endif
/*! \brief Return the size of the message needed to pack this object /*! \brief Return the size of the message needed to pack this object
...@@ -2071,6 +2094,19 @@ namespace openfpm ...@@ -2071,6 +2094,19 @@ namespace openfpm
return 1; return 1;
} }
/*! \brief Return the memory object
*
* Return the memory object
*
* \tparam p array to retrieve
*
*/
template<unsigned int p>
auto getMemory() -> decltype(base.template getMemory<p>())
{
return base.template getMemory<p>();
}
/*! \brief Set the memory of the base structure using an object /*! \brief Set the memory of the base structure using an object
* *
* \param mem Memory object to use for allocation * \param mem Memory object to use for allocation
......
...@@ -1016,6 +1016,18 @@ public: ...@@ -1016,6 +1016,18 @@ public:
return base_type::get(id); return base_type::get(id);
} }
/*! \brief Get an element of the vector
*
* \param id element to get
*
* \return the element reference
*
*/
inline T & operator[](size_t id)
{
return base_type::get(id);
}
/*! \brief Get an element of the vector /*! \brief Get an element of the vector
* *
* \param id element to get * \param id element to get
......
...@@ -339,7 +339,7 @@ template<int ... prp> inline void pack(ExtPreAlloc<HeapMemory> & mem, Pack_stat ...@@ -339,7 +339,7 @@ template<int ... prp> inline void pack(ExtPreAlloc<HeapMemory> & mem, Pack_stat
* \param mem preallocated memory from where to unpack the vector * \param mem preallocated memory from where to unpack the vector
* \param ps unpack-stat info * \param ps unpack-stat info
*/ */
template<int ... prp> inline void unpack(ExtPreAlloc<HeapMemory> & mem, Unpack_stat & ps) template<int ... prp, typename MemType> inline void unpack(ExtPreAlloc<MemType> & mem, Unpack_stat & ps)
{ {
//if all of the aggregate properties are simple (don't have "pack()" member) //if all of the aggregate properties are simple (don't have "pack()" member)
if (has_pack_agg<T,prp...>::result::value == false) if (has_pack_agg<T,prp...>::result::value == false)
...@@ -353,7 +353,7 @@ template<int ... prp> inline void unpack(ExtPreAlloc<HeapMemory> & mem, Unpack_s ...@@ -353,7 +353,7 @@ template<int ... prp> inline void unpack(ExtPreAlloc<HeapMemory> & mem, Unpack_s
{ {
//Unpack a size of a source vector //Unpack a size of a source vector
size_t u2 = 0; size_t u2 = 0;
Unpacker<size_t, HeapMemory>::unpack(mem,u2,ps); Unpacker<size_t, MemType>::unpack(mem,u2,ps);
//Resize a destination vector //Resize a destination vector
this->resize(u2); this->resize(u2);
...@@ -361,7 +361,7 @@ template<int ... prp> inline void unpack(ExtPreAlloc<HeapMemory> & mem, Unpack_s ...@@ -361,7 +361,7 @@ template<int ... prp> inline void unpack(ExtPreAlloc<HeapMemory> & mem, Unpack_s
for (size_t i = 0 ; i < this->size() ; i++) for (size_t i = 0 ; i < this->size() ; i++)
{ {
//Call an unpacker in nested way //Call an unpacker in nested way
call_aggregateUnpack<decltype(this->get(i)),HeapMemory,prp ... >::call_unpack(this->get(i),mem,ps); call_aggregateUnpack<decltype(this->get(i)),MemType,prp ... >::call_unpack(this->get(i),mem,ps);
} }
} }
} }
......
...@@ -24,11 +24,14 @@ ...@@ -24,11 +24,14 @@
#define __CUDACC__ #define __CUDACC__
#define __CUDA__ #define __CUDA__
#else #else
#include "util/cuda/moderngpu/kernel_merge.hxx" #include <thrust/merge.h>
#include <thrust/execution_policy.h>
#endif #endif
#endif #endif
#else #else
#include "util/cuda/moderngpu/kernel_merge.hxx" #include <thrust/merge.h>
#include <thrust/execution_policy.h>
// #include "util/cuda/moderngpu/kernel_merge.hxx"
#endif #endif
#include "util/cuda/ofp_context.hxx" #include "util/cuda/ofp_context.hxx"
...@@ -98,7 +101,14 @@ ...@@ -98,7 +101,14 @@
#else #else
mgpu::merge(a_keys,a_vals,a_count,b_keys,b_vals,b_count,c_keys,c_vals,comp,context); // It seems broken on some CUDA on some hardware. Anyway is not anymore supported
// on some hardware ... we move to thrust
// mgpu::merge(a_keys,a_vals,a_count,b_keys,b_vals,b_count,c_keys,c_vals,comp,context);
thrust::merge_by_key(thrust::device, a_keys,a_keys + a_count,
b_keys,b_keys + b_count,
a_vals,b_vals,
c_keys,c_vals,comp);
#endif #endif
......
...@@ -27,7 +27,8 @@ ...@@ -27,7 +27,8 @@
#else #else
// Here we have old CUDA // Here we have old CUDA
#include "cub_old/cub.cuh" #include "cub_old/cub.cuh"
#include "util/cuda/moderngpu/kernel_reduce.hxx" //#include "util/cuda/moderngpu/kernel_reduce.hxx"
#define REDUCE_WITH_CUB
#endif #endif
#include "util/cuda/ofp_context.hxx" #include "util/cuda/ofp_context.hxx"
...@@ -98,4 +99,4 @@ namespace openfpm ...@@ -98,4 +99,4 @@ namespace openfpm
#endif #endif
#endif /* REDUCE_OFP_HPP_ */ #endif /* REDUCE_OFP_HPP_ */
\ No newline at end of file