Commit 8d7bbbfa authored by incardon's avatar incardon

Adding vector_gpu test + adding compatibility for nested vector of vector

parent 0261ed4b
......@@ -76,7 +76,7 @@ struct grid_toKernelImpl
// Increment the reference of mem
g.data_.mem->incRef();
g.data_.mem_r.bind_ref(gc.get_internal_data_().mem_r);
g.data_.switchToDevicePtrNoCopy();
g.data_.switchToDevicePtr();
return g;
}
......@@ -88,7 +88,8 @@ struct grid_toKernelImpl<true,dim,T>
template<typename grid_type> static grid_gpu_ker<dim,T,memory_traits_inte> toKernel(grid_type & gc)
{
grid_gpu_ker<dim,T,memory_traits_inte> g(gc.getGrid());
copy_switch_memory_c_no_cpy<T> cp_mc(gc.get_internal_data_(),g.data_);
copy_switch_memory_c_no_cpy<typename std::remove_reference<decltype(gc.get_internal_data_())>::type,
typename std::remove_reference<decltype(g.data_)>::type> cp_mc(gc.get_internal_data_(),g.data_);
boost::mpl::for_each_ref< boost::mpl::range_c<int,0,T::max_prop> >(cp_mc);
......
......@@ -21,7 +21,7 @@ BOOST_AUTO_TEST_CASE (gpu_computation_func)
#ifdef CUDA_GPU
size_t sz[3] = {64,64,64};
grid_gpu<3, Point_test<float> > c3(sz);
grid_gpu<3, Point_aggr_test > c3(sz);
grid_key_dx<3> k1({1,1,1});
grid_key_dx<3> k2({62,62,62});
......@@ -102,7 +102,7 @@ BOOST_AUTO_TEST_CASE (gpu_computation)
{
size_t sz[3] = {64,64,64};
grid_gpu<3, Point_test<float> > c3(sz);
grid_gpu<3, Point_aggr_test > c3(sz);
c3.setMemory();
test_layout_gridNd<3>(c3,sz[0]);
......@@ -136,8 +136,8 @@ BOOST_AUTO_TEST_CASE (gpu_computation_stencil)
{
size_t sz[3] = {64,64,64};
grid_gpu<3, Point_test<float> > c3(sz);
grid_gpu<3, Point_test<float> > c2(sz);
grid_gpu<3, Point_aggr_test > c3(sz);
grid_gpu<3, Point_aggr_test > c2(sz);
grid_key_dx<3> key1({1,1,1});
grid_key_dx<3> key2({62,62,62});
......@@ -216,8 +216,8 @@ BOOST_AUTO_TEST_CASE (gpu_computation_grid_stencil)
{
size_t sz[3] = {64,64,64};
grid_gpu<3, Point_test<float> > c3(sz);
grid_gpu<3, Point_test<float> > c2(sz);
grid_gpu<3, Point_aggr_test > c3(sz);
grid_gpu<3, Point_aggr_test > c2(sz);
grid_key_dx<3> key1({1,1,1});
grid_key_dx<3> zero({0,0,0});
grid_key_dx<3> key2({62,62,62});
......@@ -301,8 +301,8 @@ BOOST_AUTO_TEST_CASE (gpu_computation_grid_stencil_vector)
{
size_t sz[3] = {64,64,64};
grid_gpu<3, Point_test<float> > c3(sz);
grid_gpu<3, Point_test<float> > c2(sz);
grid_gpu<3, Point_aggr_test > c3(sz);
grid_gpu<3, Point_aggr_test > c2(sz);
grid_key_dx<3> key1({1,1,1});
grid_key_dx<3> zero({0,0,0});
grid_key_dx<3> key2({62,62,62});
......@@ -372,8 +372,8 @@ BOOST_AUTO_TEST_CASE (gpu_swap_vector)
{
size_t sz[3] = {64,64,64};
grid_gpu<3, Point_test<float> > c3(sz);
grid_gpu<3, Point_test<float> > c2(sz);
grid_gpu<3, Point_aggr_test > c3(sz);
grid_gpu<3, Point_aggr_test > c2(sz);
grid_key_dx<3> key1({1,1,1});
grid_key_dx<3> zero({0,0,0});
grid_key_dx<3> key2({62,62,62});
......@@ -482,7 +482,7 @@ void gpu_copy_device_test()
for (size_t i = 0 ; i < dim ; i++)
{sz[i] = 13;}
grid_gpu<dim, Point_test<float> > c3(sz);
grid_gpu<dim, Point_aggr_test > c3(sz);
grid_sm<dim,void> g(sz);
c3.setMemory();
......
......@@ -2,8 +2,9 @@
#include <Grid/map_grid.hpp>
#include "Point_test.hpp"
#include <stdio.h>
#include "cuda_grid_unit_tests_func.cuh"
__global__ void grid_gradient_vector(grid_gpu_ker<3,Point_test<float>,memory_traits_inte> g1, grid_gpu_ker<3,Point_test<float>,memory_traits_inte> g2, ite_gpu<3> ite_gpu)
__global__ void grid_gradient_vector(grid_gpu_ker<3,Point_aggr_test,memory_traits_inte> g1, grid_gpu_ker<3,Point_aggr_test,memory_traits_inte> g2, ite_gpu<3> ite_gpu)
{
GRID_ID_3(ite_gpu);
......@@ -12,7 +13,7 @@ __global__ void grid_gradient_vector(grid_gpu_ker<3,Point_test<float>,memory_tra
g2.template get<4>(key)[2] = (g1.template get<0>(key.move(2,1)) - g1.template get<0>(key.move(2,-1))) / 2.0;
}
__global__ void grid_fill_vector(grid_gpu_ker<3,Point_test<float>,memory_traits_inte> g1, ite_gpu<3> ite_gpu)
__global__ void grid_fill_vector(grid_gpu_ker<3,Point_aggr_test,memory_traits_inte> g1, ite_gpu<3> ite_gpu)
{
GRID_ID_3(ite_gpu);
......@@ -21,7 +22,7 @@ __global__ void grid_fill_vector(grid_gpu_ker<3,Point_test<float>,memory_traits_
g1.template get<4>(key)[2] = 3.0;
}
__global__ void grid_fill_vector2(grid_gpu_ker<3,Point_test<float>,memory_traits_inte> g1, ite_gpu<3> ite_gpu)
__global__ void grid_fill_vector2(grid_gpu_ker<3,Point_aggr_test,memory_traits_inte> g1, ite_gpu<3> ite_gpu)
{
GRID_ID_3(ite_gpu);
......@@ -31,7 +32,7 @@ __global__ void grid_fill_vector2(grid_gpu_ker<3,Point_test<float>,memory_traits
}
__global__ void compute_stencil_grid(grid_gpu_ker<3,Point_test<float>,memory_traits_inte> g1, grid_gpu_ker<3,Point_test<float>,memory_traits_inte> g2, ite_gpu<3> ite_gpu)
__global__ void compute_stencil_grid(grid_gpu_ker<3,Point_aggr_test,memory_traits_inte> g1, grid_gpu_ker<3,Point_aggr_test,memory_traits_inte> g2, ite_gpu<3> ite_gpu)
{
GRID_ID_3(ite_gpu);
......@@ -73,7 +74,7 @@ __global__ void fill_count(float * prp_0,int sz)
// call compute
void gpu_grid_3D_one(grid_gpu<3,Point_test<float>> & g)
void gpu_grid_3D_one(grid_gpu<3,Point_aggr_test> & g)
{
// Setup execution parameters
dim3 threads(8,8,8);
......@@ -86,7 +87,7 @@ void gpu_grid_3D_one(grid_gpu<3,Point_test<float>> & g)
// call compute
void gpu_grid_3D_compute(grid_gpu<3,Point_test<float>> & g)
void gpu_grid_3D_compute(grid_gpu<3,Point_aggr_test> & g)
{
// Setup execution parameters
dim3 threads(8,8,8);
......@@ -97,7 +98,7 @@ void gpu_grid_3D_compute(grid_gpu<3,Point_test<float>> & g)
fill_count<<< grid, threads >>>(prp_0,64);
}
void gpu_grid_3D_compute_stencil(grid_gpu<3,Point_test<float>> & g1, grid_gpu<3,Point_test<float>> & g2,
void gpu_grid_3D_compute_stencil(grid_gpu<3,Point_aggr_test> & g1, grid_gpu<3,Point_aggr_test> & g2,
grid_key_dx<3> & start, grid_key_dx<3> & stop)
{
// Setup execution parameters
......@@ -110,7 +111,7 @@ void gpu_grid_3D_compute_stencil(grid_gpu<3,Point_test<float>> & g1, grid_gpu<3,
compute_stencil<<< gpu_it.thr, gpu_it.wthr >>>(prp_0,prp_1,64,start,stop);
}
void gpu_grid_3D_compute_grid_stencil(grid_gpu<3,Point_test<float>> & g1, grid_gpu<3,Point_test<float>> & g2,
void gpu_grid_3D_compute_grid_stencil(grid_gpu<3,Point_aggr_test> & g1, grid_gpu<3,Point_aggr_test> & g2,
grid_key_dx<3> & start, grid_key_dx<3> & stop)
{
auto gpu_it = g2.getGPUIterator(start,stop);
......@@ -121,21 +122,21 @@ void gpu_grid_3D_compute_grid_stencil(grid_gpu<3,Point_test<float>> & g1, grid_g
compute_stencil_grid<<< gpu_it.thr, gpu_it.wthr >>>(g1k,g2k,gpu_it);
}
void gpu_grid_fill_vector(grid_gpu<3,Point_test<float>> & g1, grid_key_dx<3> & start, grid_key_dx<3> & stop)
void gpu_grid_fill_vector(grid_gpu<3,Point_aggr_test> & g1, grid_key_dx<3> & start, grid_key_dx<3> & stop)
{
auto gpu_it = g1.getGPUIterator(start,stop);
grid_fill_vector<<< gpu_it.thr, gpu_it.wthr >>>(g1.toKernel(),gpu_it);
}
void gpu_grid_fill_vector2(grid_gpu<3,Point_test<float>> & g1, grid_key_dx<3> & start, grid_key_dx<3> & stop)
void gpu_grid_fill_vector2(grid_gpu<3,Point_aggr_test> & g1, grid_key_dx<3> & start, grid_key_dx<3> & stop)
{
auto gpu_it = g1.getGPUIterator(start,stop);
grid_fill_vector2<<< gpu_it.thr, gpu_it.wthr >>>(g1.toKernel(),gpu_it);
}
void gpu_grid_gradient_vector(grid_gpu<3,Point_test<float>> & g1, grid_gpu<3,Point_test<float>> & g2, grid_key_dx<3> & start, grid_key_dx<3> & stop)
void gpu_grid_gradient_vector(grid_gpu<3,Point_aggr_test> & g1, grid_gpu<3,Point_aggr_test> & g2, grid_key_dx<3> & start, grid_key_dx<3> & stop)
{
auto gpu_it = g1.getGPUIterator(start,stop);
......
......@@ -8,19 +8,20 @@
#ifndef OPENFPM_DATA_SRC_GRID_CUDA_GPU_COMPUTE_CUH_
#define OPENFPM_DATA_SRC_GRID_CUDA_GPU_COMPUTE_CUH_
typedef aggregate<float, float, float, float, float [3], float [3][3]> Point_aggr_test;
void gpu_grid_3D_compute(grid_gpu<3,Point_test<float>> & g);
void gpu_grid_3D_compute_stencil(grid_gpu<3,Point_test<float>> & g1, grid_gpu<3,Point_test<float>> & g2,
void gpu_grid_3D_compute(grid_gpu<3,Point_aggr_test> & g);
void gpu_grid_3D_compute_stencil(grid_gpu<3,Point_aggr_test> & g1, grid_gpu<3,Point_aggr_test> & g2,
grid_key_dx<3> & key1, grid_key_dx<3> & key2);
void gpu_grid_3D_one(grid_gpu<3,Point_test<float>> & g);
void gpu_grid_3D_compute_grid_stencil(grid_gpu<3,Point_test<float>> & g1, grid_gpu<3,Point_test<float>> & g2,
void gpu_grid_3D_one(grid_gpu<3,Point_aggr_test> & g);
void gpu_grid_3D_compute_grid_stencil(grid_gpu<3,Point_aggr_test> & g1, grid_gpu<3,Point_aggr_test> & g2,
grid_key_dx<3> & start, grid_key_dx<3> & stop);
void gpu_grid_fill_vector(grid_gpu<3,Point_test<float>> & g1, grid_key_dx<3> & start, grid_key_dx<3> & stop);
void gpu_grid_fill_vector(grid_gpu<3,Point_aggr_test> & g1, grid_key_dx<3> & start, grid_key_dx<3> & stop);
void gpu_grid_fill_vector2(grid_gpu<3,Point_test<float>> & g1, grid_key_dx<3> & start, grid_key_dx<3> & stop);
void gpu_grid_fill_vector2(grid_gpu<3,Point_aggr_test> & g1, grid_key_dx<3> & start, grid_key_dx<3> & stop);
void gpu_grid_gradient_vector(grid_gpu<3,Point_test<float>> & g1, grid_gpu<3,Point_test<float>> & g2, grid_key_dx<3> & start, grid_key_dx<3> & stop);
void gpu_grid_gradient_vector(grid_gpu<3,Point_aggr_test> & g1, grid_gpu<3,Point_aggr_test> & g2, grid_key_dx<3> & start, grid_key_dx<3> & stop);
#endif /* OPENFPM_DATA_SRC_GRID_CUDA_GPU_COMPUTE_CUH_ */
......@@ -9,6 +9,7 @@
#define MAP_GRID_CUDA_KER_HPP_
#include "Grid/grid_base_impl_layout.hpp"
#include "util/tokernel_transformation.hpp"
/*! \brief this class is a functor for "for_each" algorithm
*
......@@ -21,13 +22,13 @@
*
*/
template<typename T_type>
template<typename T_type_src,typename T_type_dst>
struct copy_switch_memory_c_no_cpy
{
//! encapsulated source object
const typename memory_traits_inte<T_type>::type & src;
const T_type_src & src;
//! encapsulated destination object
typename memory_traits_inte<T_type>::type & dst;
T_type_dst & dst;
/*! \brief constructor
......@@ -36,8 +37,8 @@ struct copy_switch_memory_c_no_cpy
* \param dst source encapsulated object
*
*/
inline copy_switch_memory_c_no_cpy(const typename memory_traits_inte<T_type>::type & src,
typename memory_traits_inte<T_type>::type & dst)
inline copy_switch_memory_c_no_cpy(const T_type_src & src,
T_type_dst & dst)
:src(src),dst(dst)
{
};
......@@ -45,13 +46,13 @@ struct copy_switch_memory_c_no_cpy
//! It call the copy function for each property
template<typename T>
inline void operator()(T& t) const
inline void operator()(T& t)
{
boost::fusion::at_c<T::value>(dst).mem = boost::fusion::at_c<T::value>(src).mem;
// Increment the reference of mem
boost::fusion::at_c<T::value>(dst).mem->incRef();
boost::fusion::at_c<T::value>(dst).mem_r.bind_ref(boost::fusion::at_c<T::value>(src).mem_r);
boost::fusion::at_c<T::value>(dst).switchToDevicePtrNoCopy();
boost::fusion::at_c<T::value>(dst).switchToDevicePtr();
}
};
......@@ -60,7 +61,7 @@ struct grid_gpu_ker_constructor_impl
{
template<typename ggk_type> static inline void construct(const ggk_type & cpy,ggk_type & this_)
{
copy_switch_memory_c_no_cpy<T> bp_mc(cpy.data_,this_.data_);
copy_switch_memory_c_no_cpy<decltype(cpy.data_),decltype(this_.data_)> bp_mc(cpy.data_,this_.data_);
boost::mpl::for_each_ref< boost::mpl::range_c<int,0,T::max_prop> >(bp_mc);
}
......@@ -75,7 +76,7 @@ struct grid_gpu_ker_constructor_impl<false,T>
// Increment the reference of mem
this_.data_.mem->incRef();
this_.data_.mem_r.bind_ref(cpy.data_.mem_r);
this_.data_.switchToDevicePtrNoCopy();
this_.data_.switchToDevicePtr();
}
};
......@@ -87,11 +88,14 @@ struct grid_gpu_ker_constructor_impl<false,T>
template<unsigned int dim, typename T, template <typename> class layout_base>
struct grid_gpu_ker
{
//! Type T
typedef typename apply_transform<layout_base,T>::type T_;
//! grid information
grid_sm<dim,void> g1;
//! type of layout of the structure
typedef typename layout_base<T>::type layout;
typedef typename layout_base<T_>::type layout;
//! layout data
layout data_;
......@@ -107,7 +111,7 @@ struct grid_gpu_ker
grid_gpu_ker(const grid_gpu_ker & cpy)
:g1(cpy.g1)
{
grid_gpu_ker_constructor_impl<is_layout_inte<layout_base<T>>::value,T>::construct(cpy,*this);
grid_gpu_ker_constructor_impl<is_layout_inte<layout_base<T_>>::value,T_>::construct(cpy,*this);
}
/*! \brief Return the internal grid information
......@@ -129,10 +133,10 @@ struct grid_gpu_ker
* \return the reference of the element
*
*/
template <unsigned int p, typename r_type=decltype(mem_get<p,layout_base<T>,layout,grid_sm<dim,T>,grid_key_dx<dim>>::get(data_,g1,grid_key_dx<dim>()))>
template <unsigned int p, typename r_type=decltype(mem_get<p,layout_base<T_>,layout,grid_sm<dim,T_>,grid_key_dx<dim>>::get(data_,g1,grid_key_dx<dim>()))>
__device__ __host__ inline r_type get(const grid_key_dx<dim> & v1)
{
return mem_get<p,layout_base<T>,decltype(this->data_),decltype(this->g1),decltype(v1)>::get(data_,g1,v1);
return mem_get<p,layout_base<T_>,decltype(this->data_),decltype(this->g1),decltype(v1)>::get(data_,g1,v1);
}
/*! \brief Get the const reference of the selected element
......@@ -142,10 +146,10 @@ struct grid_gpu_ker
* \return the const reference of the element
*
*/
template <unsigned int p, typename r_type=decltype(mem_get<p,layout_base<T>,layout,grid_sm<dim,T>,grid_key_dx<dim>>::get_c(data_,g1,grid_key_dx<dim>()))>
template <unsigned int p, typename r_type=decltype(mem_get<p,layout_base<T_>,layout,grid_sm<dim,T_>,grid_key_dx<dim>>::get_c(data_,g1,grid_key_dx<dim>()))>
__device__ __host__ inline const r_type get(const grid_key_dx<dim> & v1) const
{
return mem_get<p,layout_base<T>,decltype(this->data_),decltype(this->g1),decltype(v1)>::get_c(data_,g1,v1);
return mem_get<p,layout_base<T_>,decltype(this->data_),decltype(this->g1),decltype(v1)>::get_c(data_,g1,v1);
}
/*! \brief Get the reference of the selected element
......@@ -155,10 +159,10 @@ struct grid_gpu_ker
* \return the reference of the element
*
*/
template <unsigned int p, typename r_type=decltype(mem_get<p,layout_base<T>,layout,grid_sm<dim,T>,grid_key_dx<dim>>::get_lin(data_,g1,0))>
template <unsigned int p, typename r_type=decltype(mem_get<p,layout_base<T_>,layout,grid_sm<dim,T_>,grid_key_dx<dim>>::get_lin(data_,g1,0))>
__device__ __host__ inline r_type get(const size_t lin_id)
{
return mem_get<p,memory_traits_inte<T>,decltype(this->data_),decltype(this->g1),grid_key_dx<dim>>::get_lin(data_,g1,lin_id);
return mem_get<p,memory_traits_inte<T_>,decltype(this->data_),decltype(this->g1),grid_key_dx<dim>>::get_lin(data_,g1,lin_id);
}
/*! \brief Get the const reference of the selected element
......@@ -168,10 +172,10 @@ struct grid_gpu_ker
* \return the const reference of the element
*
*/
template <unsigned int p, typename r_type=decltype(mem_get<p,layout_base<T>,layout,grid_sm<dim,T>,grid_key_dx<dim>>::get_lin(data_,g1,0))>
template <unsigned int p, typename r_type=decltype(mem_get<p,layout_base<T_>,layout,grid_sm<dim,T_>,grid_key_dx<dim>>::get_lin(data_,g1,0))>
__device__ __host__ inline const r_type get(size_t lin_id) const
{
return mem_get<p,layout_base<T>,decltype(this->data_),decltype(this->g1),grid_key_dx<dim>>::get_lin(data_,g1,lin_id);
return mem_get<p,layout_base<T_>,decltype(this->data_),decltype(this->g1),grid_key_dx<dim>>::get_lin(data_,g1,lin_id);
}
/*! \brief Get the of the selected element as a boost::fusion::vector
......@@ -185,9 +189,9 @@ struct grid_gpu_ker
* \return an encap_c that is the representation of the object (careful is not the object)
*
*/
__device__ inline encapc<dim,T,layout> get_o(const grid_key_dx<dim> & v1)
__device__ inline encapc<dim,T_,layout> get_o(const grid_key_dx<dim> & v1)
{
return mem_geto<dim,T,layout_base<T>,decltype(this->data_),decltype(this->g1),decltype(v1)>::get(data_,g1,v1);
return mem_geto<dim,T_,layout_base<T_>,decltype(this->data_),decltype(this->g1),decltype(v1)>::get(data_,g1,v1);
}
/*! \brief Get the of the selected element as a boost::fusion::vector
......@@ -201,13 +205,13 @@ struct grid_gpu_ker
* \return an encap_c that is the representation of the object (careful is not the object)
*
*/
__device__ inline const encapc<dim,T,layout> get_o(const grid_key_dx<dim> & v1) const
__device__ inline const encapc<dim,T_,layout> get_o(const grid_key_dx<dim> & v1) const
{
return mem_geto<dim,T,layout_base<T>,decltype(this->data_),decltype(this->g1),decltype(v1)>::get(const_cast<decltype(this->data_) &>(data_),g1,v1);
return mem_geto<dim,T,layout_base<T_>,decltype(this->data_),decltype(this->g1),decltype(v1)>::get(const_cast<decltype(this->data_) &>(data_),g1,v1);
}
__device__ inline void set(const grid_key_dx<dim> & key1,const grid_gpu_ker<dim,T,layout_base> & g, const grid_key_dx<dim> & key2)
__device__ inline void set(const grid_key_dx<dim> & key1,const grid_gpu_ker<dim,T_,layout_base> & g, const grid_key_dx<dim> & key2)
{
this->get_o(key1) = g.get_o(key2);
}
......
......@@ -312,6 +312,90 @@ private:
#endif
void resize_impl_device(const size_t (& sz)[dim],grid_base_impl<dim,T,S,layout,layout_base> & grid_new)
{
#if defined(CUDA_GPU) && defined(__NVCC__)
grid_key_dx<dim> start;
grid_key_dx<dim> stop;
for (size_t i = 0 ; i < dim ; i++)
{
start.set_d(i,0);
stop.set_d(i,g1.size(i)-1);
}
// if (dim == 1)
// {
// copy_fast_1d_device_memory<is_layout_mlin<layout_base<T>>::value,decltype(grid_new.data_),S> cf1dm(data_,grid_new.data_);
// boost::mpl::for_each_ref<boost::mpl::range_c<int,0,T::max_prop>>(cf1dm);
// }
if (dim <= 3)
{
auto ite = this->getGPUIterator(start,stop);
copy_ndim_grid_device<dim,decltype(grid_new.toKernel())><<<ite.wthr,ite.thr>>>(this->toKernel(),grid_new.toKernel());
}
else
{
grid_key_dx<1> start;
start.set_d(0,0);
grid_key_dx<1> stop({});
stop.set_d(0,this->g1.size());
size_t sz[1];
sz[0]= this->g1.size();
grid_sm<1,void> g_sm_copy(sz);
auto ite = getGPUIterator_impl<1,void>(g_sm_copy,start,stop);
copy_ndim_grid_device<dim,decltype(grid_new.toKernel())><<<ite.wthr,ite.thr>>>(this->toKernel(),grid_new.toKernel());
}
#else
std::cout << __FILE__ << ":" << __LINE__ << " error: the function resize require the launch of a kernel, but it seem that this" <<
" file (grid_base_implementation.hpp) has not been compiled with NVCC " << std::endl;
#endif
}
void resize_impl_host(const size_t (& sz)[dim], grid_base_impl<dim,T,S,layout,layout_base> & grid_new)
{
size_t sz_c[dim];
for (size_t i = 0 ; i < dim ; i++)
{sz_c[i] = (g1.size(i) < sz[i])?g1.size(i):sz[i];}
grid_sm<dim,void> g1_c(sz_c);
//! create a source grid iterator
grid_key_dx_iterator<dim> it(g1_c);
while(it.isNext())
{
// get the grid key
grid_key_dx<dim> key = it.get();
// create a copy element
grid_new.get_o(key) = this->get_o(key);
++it;
}
}
void resize_impl_memset(grid_base_impl<dim,T,S,layout,layout_base> & grid_new)
{
//! Set the allocator and allocate the memory
if (isExternal == true)
{
mem_setext<typename std::remove_reference<decltype(grid_new)>::type,S,layout_base<T>,decltype(data_)>::set(grid_new,*this,this->data_);
}
else
grid_new.setMemory();
}
public:
// Implementation of packer and unpacker for grid
......@@ -826,18 +910,7 @@ public:
grid_base_impl<dim,T,S,layout,layout_base> grid_new(sz);
//! Set the allocator and allocate the memory
if (isExternal == true)
{
/* grid_new.setMemory(static_cast<S&>(data_.getMemory()));
// Create an empty memory allocator for the actual structure
setMemory();*/
mem_setext<decltype(grid_new),S,layout_base<T>,decltype(data_)>::set(grid_new,*this,this->data_);
}
else
grid_new.setMemory();
resize_impl_memset(grid_new);
// We know that, if it is 1D we can safely copy the memory
......@@ -853,82 +926,40 @@ public:
//! N-D copy
if (opt & DATA_ON_HOST)
{
size_t sz_c[dim];
for (size_t i = 0 ; i < dim ; i++)
{sz_c[i] = (g1.size(i) < sz[i])?g1.size(i):sz[i];}
grid_sm<dim,void> g1_c(sz_c);
//! create a source grid iterator
grid_key_dx_iterator<dim> it(g1_c);
while(it.isNext())
{
// get the grid key
grid_key_dx<dim> key = it.get();
// create a copy element
grid_new.get_o(key) = this->get_o(key);
++it;
}
}
{resize_impl_host(sz,grid_new);}
if (opt & DATA_ON_DEVICE && S::isDeviceHostSame() == false)
{
#if defined(CUDA_GPU) && defined(__NVCC__)
grid_key_dx<dim> start;
grid_key_dx<dim> stop;
for (size_t i = 0 ; i < dim ; i++)
{
start.set_d(i,0);
stop.set_d(i,g1.size(i)-1);
}
// if (dim == 1)
// {
// copy_fast_1d_device_memory<is_layout_mlin<layout_base<T>>::value,decltype(grid_new.data_),S> cf1dm(data_,grid_new.data_);
// boost::mpl::for_each_ref<boost::mpl::range_c<int,0,T::max_prop>>(cf1dm);
// }
if (dim <= 3)
{
auto ite = this->getGPUIterator(start,stop);
copy_ndim_grid_device<dim,decltype(grid_new.toKernel())><<<ite.wthr,ite.thr>>>(this->toKernel(),grid_new.toKernel());
}
else
{
grid_key_dx<1> start;
start.set_d(0,0);
grid_key_dx<1> stop({});
stop.set_d(0,this->g1.size());
size_t sz[1];
sz[0]= this->g1.size();
{resize_impl_device(sz,grid_new);}
grid_sm<1,void> g_sm_copy(sz);
auto ite = getGPUIterator_impl<1,void>(g_sm_copy,start,stop);
// }
copy_ndim_grid_device<dim,decltype(grid_new.toKernel())><<<ite.wthr,ite.thr>>>(this->toKernel(),grid_new.toKernel());
}
#else
// copy grid_new to the base
std::cout << __FILE__ << ":" << __LINE__ << " error: the function resize require the launch of a kernel, but it seem that this" <<
" file (grid_base_implementation.hpp) has not been compiled with NVCC " << std::endl;
this->swap(grid_new);
}
/*! \brief Resize the space
*
* Resize the space to a new grid, the element are retained on the new grid,
* if the new grid is bigger the new element are now initialized, if is smaller
* the data are cropped
*
* \param sz reference to an array of dimension dim
* \param opt options for resize. In case we know that the data are only on device memory we can use DATA_ONLY_DEVICE,
* In case we know that the data are only on host memory we can use DATA_ONLY_HOST
*
*/
void resize_no_device(const size_t (& sz)[dim])
{
#ifdef SE_CLASS2
check_valid(this,8);
#endif
//! Create a completely new grid with sz
}
// }
grid_base_impl<dim,T,S,layout,layout_base> grid_new(sz);