Commit e3dbbe76 authored by incardon's avatar incardon

Testing resize ...

parent 66fdd0bd
......@@ -247,7 +247,7 @@ have_quad_head=no
AC_CHECK_LIB(quadmath, sinq, [have_quad_lib=yes], [])
AC_CHECK_HEADER(quadmath.h,[have_quad_head=yes],[])
if [x"have_quad_math" == x"yes" $&& x"have_quad_math" == x"yes" ]; then
if test x"have_quad_math" == x"yes"; then
AC_DEFINE(HAVE_LIBQUADMATH,[],[Have quad math lib])
LIBQUADMATH=" -lquadmath "
fi
......
......@@ -358,7 +358,7 @@ public:
* \return itself
*
*/
inline encapc<dim,T,Mem> & operator=(const encapc<dim,T,Mem> & ec)
__device__ inline encapc<dim,T,Mem> & operator=(const encapc<dim,T,Mem> & ec)
{
copy_cpu_encap_encap<encapc<dim,T,Mem>,encapc<dim,T,Mem>> cp(ec,*this);
......@@ -374,7 +374,7 @@ public:
* \return itself
*
*/
inline encapc<dim,T,Mem> & operator=(const T & obj)
__device__ inline encapc<dim,T,Mem> & operator=(const T & obj)
{
copy_fusion_vector<typename T::type> cp(obj.data,data_c);
......
......@@ -10,6 +10,84 @@
#include "Grid/iterators/grid_key_dx_iterator.hpp"
//////////////////////////////////// Functor to copy 1D grid in device memory ////////////
/*! \brief this class is a functor for "for_each" algorithm
*
* This class is a functor for "for_each" algorithm. For each
* element of the boost::vector the operator() is called.
* Is mainly used to copy one device memory into another device memory
*
* \tparam encap source
* \tparam encap dst
*
*/
template<bool lin_or_inte,typename data_type,typename S>
struct copy_fast_1d_device_memory
{
//! set of pointers
data_type & data_src;
data_type & data_dst;
/*! \brief constructor
*
* \param v set of pointer buffers to set
*
*/
inline copy_fast_1d_device_memory(data_type & data_src, data_type & data_dst)
:data_src(data_src),data_dst(data_dst)
{};
//! It call the copy function for each property
template<typename T>
inline void operator()(T& t) const
{
static_cast<S *>(boost::fusion::at_c<T::value>(data_dst).mem)->copyDeviceToDevice(*static_cast<S *>(boost::fusion::at_c<T::value>(data_src).mem));
}
};
/*! \brief this class is a functor for "for_each" algorithm
*
* This class is a functor for "for_each" algorithm. For each
* element of the boost::vector the operator() is called.
* Is mainly used to copy one device memory into another device memory
*
* \tparam encap source
* \tparam encap dst
*
*/
template<typename data_type,typename S>
struct copy_fast_1d_device_memory<true,data_type,S>
{
//! set of pointers
data_type & data_src;
data_type & data_dst;
/*! \brief constructor
*
* \param v set of pointer buffers to set
*
*/
inline copy_fast_1d_device_memory(data_type & data_src, data_type & data_dst)
:data_src(data_src),data_dst(data_dst)
{};
//! It call the copy function for each property
template<typename T>
inline void operator()(T& t) const
{
if (T::value == 0)
{
static_cast<S *>(data_dst.mem)->copyDeviceToDevice(*static_cast<S *>(data_src.mem));
}
}
};
///////////////////////////////////////////////////////////////////////////////////////////
/*! \brief This is a way to quickly copy a grid into another grid
*
*
......@@ -476,12 +554,6 @@ struct pack_with_iterator
*/
static void pack(grid & gr, it & sub_it, dtype & dest)
{
// Sending property object
typedef object<typename object_creator<
boost_vct,
prp...>::type
> prp_object;
size_t id = 0;
// Packing the information
......@@ -525,12 +597,6 @@ struct pack_with_iterator<true,3,grid,encap_src,encap_dst,boost_vct,it,dtype,prp
*/
static void pack(grid & gr, it & sub_it, dtype & dest)
{
// Sending property object
typedef object<typename object_creator<
boost_vct,
prp...>::type
> prp_object;
size_t id = 0;
size_t lin_src = 0;
......@@ -589,12 +655,6 @@ struct pack_with_iterator<true,2,grid,encap_src,encap_dst,boost_vct,it,dtype,prp
*/
static void pack(grid & gr, it & sub_it, dtype & dest)
{
// Sending property object
typedef object<typename object_creator<
boost_vct,
prp...>::type
> prp_object;
size_t id = 0;
size_t lin_src = 0;
......@@ -931,12 +991,6 @@ struct pack_with_iterator<true,1,grid,encap_src,encap_dst,boost_vct,it,dtype,prp
*/
static void pack(grid & gr, it & sub_it, dtype & dest)
{
// Sending property object
typedef object<typename object_creator<
boost_vct,
prp...>::type
> prp_object;
size_t id = 0;
size_t lin_src = 0;
......@@ -992,9 +1046,6 @@ struct unpack_with_iterator
{
size_t id = 0;
// Sending property object
typedef object<typename object_creator<boost_vct,prp...>::type> prp_object;
// unpacking the information
while (sub_it.isNext())
{
......@@ -1043,9 +1094,6 @@ struct unpack_with_iterator<3,grid,
{
size_t id = 0;
// Sending property object
typedef object<typename object_creator<boost_vct,prp...>::type> prp_object;
size_t lin_dst = 0;
auto & gs_dst = gr.getGrid();
......
/*
* cuda_grid_gpu_funcs.cuh
*
* Created on: Aug 20, 2018
* Author: i-bird
*/
#ifndef CUDA_GRID_GPU_FUNCS_CUH_
#define CUDA_GRID_GPU_FUNCS_CUH_
#include "map_grid_cuda_ker.cuh"
#if defined(CUDA_GPU) && defined(__NVCC__)
template<unsigned int dim, typename grid_type>
struct copy_ndim_grid_impl
{
static __device__ void copy(grid_type & src, grid_type & dst)
{
unsigned int i = threadIdx.x + blockIdx.x * blockDim.x;
auto key_src = src.getGrid().InvLinId(i);
dst.get_o(key_src) = src.get_o(key_src);
}
};
template<typename grid_type>
struct copy_ndim_grid_impl<2,grid_type>
{
static __device__ void copy(grid_type & src, grid_type & dst)
{
grid_key_dx<2> key_src;
key_src.set_d(0,threadIdx.x + blockIdx.x * blockDim.x);
key_src.set_d(1,threadIdx.y + blockIdx.y * blockDim.y);
dst.get_o(key_src) = src.get_o(key_src);
}
};
template<typename grid_type>
struct copy_ndim_grid_impl<3,grid_type>
{
static __device__ void copy(grid_type & src, grid_type & dst)
{
grid_key_dx<3> key_src;
key_src.set_d(0,threadIdx.x + blockIdx.x * blockDim.x);
key_src.set_d(1,threadIdx.y + blockIdx.y * blockDim.y);
key_src.set_d(2,threadIdx.y + blockIdx.y * blockDim.y);
dst.get_o(key_src) = src.get_o(key_src);
}
};
template<unsigned int dim, typename grid_type>
__global__ void copy_ndim_grid_device(grid_type src, grid_type dst)
{
copy_ndim_grid_impl<dim,grid_type>::copy(src,dst);
}
#endif
template<bool inte_or_lin,unsigned int dim, typename T>
struct grid_toKernelImpl
{
template<typename grid_type> static grid_gpu_ker<dim,T,memory_traits_lin> toKernel(grid_type & gc)
{
grid_gpu_ker<dim,T,memory_traits_lin> g(gc.getGrid());
g.data_.mem = gc.get_internal_data_().mem;
// Increment the reference of mem
g.data_.mem->incRef();
g.data_.mem_r.bind_ref(gc.get_internal_data_().mem_r);
g.data_.switchToDevicePtrNoCopy();
return g;
}
};
template<unsigned int dim, typename T>
struct grid_toKernelImpl<true,dim,T>
{
template<typename grid_type> static grid_gpu_ker<dim,T,memory_traits_inte> toKernel(grid_type & gc)
{
grid_gpu_ker<dim,T,memory_traits_inte> g(gc.getGrid());
copy_switch_memory_c_no_cpy<T> cp_mc(gc.get_internal_data_(),g.data_);
boost::mpl::for_each_ref< boost::mpl::range_c<int,0,T::max_prop> >(cp_mc);
return g;
}
};
#endif /* CUDA_GRID_GPU_FUNCS_CUH_ */
......@@ -3,7 +3,7 @@
#include "Point_test.hpp"
#include <stdio.h>
__global__ void grid_gradient_vector(grid_gpu_ker<3,Point_test<float>> g1, grid_gpu_ker<3,Point_test<float>> g2, ite_gpu<3> ite_gpu)
__global__ void grid_gradient_vector(grid_gpu_ker<3,Point_test<float>,memory_traits_inte> g1, grid_gpu_ker<3,Point_test<float>,memory_traits_inte> g2, ite_gpu<3> ite_gpu)
{
GRID_ID_3(ite_gpu);
......@@ -12,7 +12,7 @@ __global__ void grid_gradient_vector(grid_gpu_ker<3,Point_test<float>> g1, grid_
g2.template get<4>(key)[2] = (g1.template get<0>(key.move(2,1)) - g1.template get<0>(key.move(2,-1))) / 2.0;
}
__global__ void grid_fill_vector(grid_gpu_ker<3,Point_test<float>> g1, ite_gpu<3> ite_gpu)
__global__ void grid_fill_vector(grid_gpu_ker<3,Point_test<float>,memory_traits_inte> g1, ite_gpu<3> ite_gpu)
{
GRID_ID_3(ite_gpu);
......@@ -21,7 +21,7 @@ __global__ void grid_fill_vector(grid_gpu_ker<3,Point_test<float>> g1, ite_gpu<
g1.template get<4>(key)[2] = 3.0;
}
__global__ void grid_fill_vector2(grid_gpu_ker<3,Point_test<float>> g1, ite_gpu<3> ite_gpu)
__global__ void grid_fill_vector2(grid_gpu_ker<3,Point_test<float>,memory_traits_inte> g1, ite_gpu<3> ite_gpu)
{
GRID_ID_3(ite_gpu);
......@@ -31,7 +31,7 @@ __global__ void grid_fill_vector2(grid_gpu_ker<3,Point_test<float>> g1, ite_gpu
}
__global__ void compute_stencil_grid(grid_gpu_ker<3,Point_test<float>> g1, grid_gpu_ker<3,Point_test<float>> g2, ite_gpu<3> ite_gpu)
__global__ void compute_stencil_grid(grid_gpu_ker<3,Point_test<float>,memory_traits_inte> g1, grid_gpu_ker<3,Point_test<float>,memory_traits_inte> g2, ite_gpu<3> ite_gpu)
{
GRID_ID_3(ite_gpu);
......
/*
* cuda_gpu_func.cpp
*
* Created on: Jun 3, 2018
* Author: i-bird
*/
#include "config.h"
#define BOOST_TEST_DYN_LINK
#include <boost/test/unit_test.hpp>
#include "Grid/map_grid.hpp"
#include "Point_test.hpp"
#include "Grid/grid_util_test.hpp"
#include "cuda_grid_unit_tests_func.cuh"
BOOST_AUTO_TEST_SUITE( grid_gpu_func_test )
BOOST_AUTO_TEST_CASE (gpu_computation_func)
{
#ifdef CUDA_GPU
size_t sz[3] = {64,64,64};
grid_gpu<3, Point_test<float> > c3(sz);
grid_key_dx<3> k1({1,1,1});
grid_key_dx<3> k2({62,62,62});
c3.setMemory();
auto gcf = c3.getGPUIterator(k1,k2);
BOOST_REQUIRE_EQUAL(gcf.thr.x,16ul);
BOOST_REQUIRE_EQUAL(gcf.thr.y,8ul);
BOOST_REQUIRE_EQUAL(gcf.thr.z,8ul);
BOOST_REQUIRE_EQUAL(gcf.wthr.x,4ul);
BOOST_REQUIRE_EQUAL(gcf.wthr.y,8ul);
BOOST_REQUIRE_EQUAL(gcf.wthr.z,8ul);
grid_key_dx<3> k3({50,50,50});
grid_key_dx<3> k4({62,62,62});
grid_key_dx<3> k5({60,61,62});
auto gcf2 = c3.getGPUIterator(k3,k4);
BOOST_REQUIRE_EQUAL(gcf2.thr.x,13ul);
BOOST_REQUIRE_EQUAL(gcf2.thr.y,8ul);
BOOST_REQUIRE_EQUAL(gcf2.thr.z,8ul);
BOOST_REQUIRE_EQUAL(gcf2.wthr.x,1ul);
BOOST_REQUIRE_EQUAL(gcf2.wthr.y,2ul);
BOOST_REQUIRE_EQUAL(gcf2.wthr.z,2ul);
gcf2 = c3.getGPUIterator(k3,k4,511);
BOOST_REQUIRE_EQUAL(gcf2.thr.x,8ul);
BOOST_REQUIRE_EQUAL(gcf2.thr.y,8ul);
BOOST_REQUIRE_EQUAL(gcf2.thr.z,4ul);
BOOST_REQUIRE_EQUAL(gcf2.wthr.x,2ul);
BOOST_REQUIRE_EQUAL(gcf2.wthr.y,2ul);
BOOST_REQUIRE_EQUAL(gcf2.wthr.z,4ul);
gcf2 = c3.getGPUIterator(k3,k4,1);
BOOST_REQUIRE_EQUAL(gcf2.thr.x,1ul);
BOOST_REQUIRE_EQUAL(gcf2.thr.y,1ul);
BOOST_REQUIRE_EQUAL(gcf2.thr.z,1ul);
BOOST_REQUIRE_EQUAL(gcf2.wthr.x,13ul);
BOOST_REQUIRE_EQUAL(gcf2.wthr.y,13ul);
BOOST_REQUIRE_EQUAL(gcf2.wthr.z,13ul);
gcf2 = c3.getGPUIterator(k3,k5,32);
BOOST_REQUIRE_EQUAL(gcf2.thr.x,4ul);
BOOST_REQUIRE_EQUAL(gcf2.thr.y,4ul);
BOOST_REQUIRE_EQUAL(gcf2.thr.z,2ul);
BOOST_REQUIRE_EQUAL(gcf2.wthr.x,3ul);
BOOST_REQUIRE_EQUAL(gcf2.wthr.y,3ul);
BOOST_REQUIRE_EQUAL(gcf2.wthr.z,7ul);
gcf2 = c3.getGPUIterator(k3,k5,1);
BOOST_REQUIRE_EQUAL(gcf2.thr.x,1ul);
BOOST_REQUIRE_EQUAL(gcf2.thr.y,1ul);
BOOST_REQUIRE_EQUAL(gcf2.thr.z,1ul);
BOOST_REQUIRE_EQUAL(gcf2.wthr.x,11ul);
BOOST_REQUIRE_EQUAL(gcf2.wthr.y,12ul);
BOOST_REQUIRE_EQUAL(gcf2.wthr.z,13ul);
#endif
}
BOOST_AUTO_TEST_CASE (gpu_computation)
{
#ifdef CUDA_GPU
{
size_t sz[3] = {64,64,64};
grid_gpu<3, Point_test<float> > c3(sz);
c3.setMemory();
test_layout_gridNd<3>(c3,sz[0]);
gpu_grid_3D_compute(c3);
c3.deviceToHost<0>();
auto it = c3.getIterator();
bool good = true;
while(it.isNext())
{
auto key = it.get();
good &= c3.getGrid().LinId(key) == c3.template get<0>(key);
++it;
}
BOOST_REQUIRE_EQUAL(good,true);
}
#endif
}
BOOST_AUTO_TEST_CASE (gpu_computation_stencil)
{
#ifdef CUDA_GPU
{
size_t sz[3] = {64,64,64};
grid_gpu<3, Point_test<float> > c3(sz);
grid_gpu<3, Point_test<float> > c2(sz);
grid_key_dx<3> key1({1,1,1});
grid_key_dx<3> key2({62,62,62});
c3.setMemory();
c2.setMemory();
test_layout_gridNd<3>(c3,sz[0]);
test_layout_gridNd<3>(c2,sz[0]);
gpu_grid_3D_one(c2);
// Check property 1 is 1.0
c2.deviceToHost<0>();
{
auto it = c2.getIterator();
bool good = true;
while(it.isNext())
{
auto key = it.get();
good &= c2.get<0>(key) == 1.0;
++it;
}
BOOST_REQUIRE_EQUAL(good,true);
}
gpu_grid_3D_compute(c3);
c3.deviceToHost<0>();
{
auto it = c3.getIterator();
bool good = true;
while(it.isNext())
{
auto key = it.get();
good &= c3.getGrid().LinId(key) == c3.get<0>(key);
++it;
}
BOOST_REQUIRE_EQUAL(good,true);
}
gpu_grid_3D_compute_stencil(c3,c2,key1,key2);
c2.deviceToHost<0>();
auto it = c2.getIterator(key1,key2);
bool good = true;
while(it.isNext())
{
auto key = it.get();
good &= c2.get<0>(key) == 0;
++it;
}
BOOST_REQUIRE_EQUAL(good,true);
}
#endif
}
BOOST_AUTO_TEST_CASE (gpu_computation_grid_stencil)
{
#ifdef CUDA_GPU
{
size_t sz[3] = {64,64,64};
grid_gpu<3, Point_test<float> > c3(sz);
grid_gpu<3, Point_test<float> > c2(sz);
grid_key_dx<3> key1({1,1,1});
grid_key_dx<3> zero({0,0,0});
grid_key_dx<3> key2({62,62,62});
grid_key_dx<3> keyl({63,63,63});
c3.setMemory();
c2.setMemory();
test_layout_gridNd<3>(c3,sz[0]);
test_layout_gridNd<3>(c2,sz[0]);
gpu_grid_3D_one(c2);
// Check property 1 is 1.0
c2.deviceToHost<0>();
{
auto it = c2.getIterator();
bool good = true;
while(it.isNext())
{
auto key = it.get();
good &= c2.get<0>(key) == 1.0;
++it;
}
BOOST_REQUIRE_EQUAL(good,true);
}
gpu_grid_3D_compute(c3);
c3.deviceToHost<0>();
{
auto it = c3.getIterator();
bool good = true;
while(it.isNext())
{
auto key = it.get();
good &= c3.getGrid().LinId(key) == c3.get<0>(key);
++it;
}
BOOST_REQUIRE_EQUAL(good,true);
}
gpu_grid_3D_compute_grid_stencil(c3,c2,key1,key2);
c2.deviceToHost<0>();
auto it = c2.getIterator(key1,key2);
bool good = true;
while(it.isNext())
{
auto key = it.get();
good &= c2.get<0>(key) == 0;
++it;
}
BOOST_REQUIRE_EQUAL(good,true);
// We also try to fill a vectorial quantity
gpu_grid_fill_vector(c3,zero,keyl);
}
#endif
}
BOOST_AUTO_TEST_CASE (gpu_computation_grid_stencil_vector)
{
#ifdef CUDA_GPU
{
size_t sz[3] = {64,64,64};
grid_gpu<3, Point_test<float> > c3(sz);
grid_gpu<3, Point_test<float> > c2(sz);
grid_key_dx<3> key1({1,1,1});
grid_key_dx<3> zero({0,0,0});