Commit 46ea1553 authored by incardon's avatar incardon

vector of vector tested

parent 28385eaa
......@@ -228,6 +228,30 @@ struct grid_gpu_ker
{
this->get_o(key1) = obj;
}
/*! \brief Get the pointer for the property p
*
* \tparam property p
*
*/
template<unsigned int p> __device__ void * getPointer()
{
return mem_getpointer<decltype(data_),layout_base<T>>::template getPointer<p>(data_);
}
/*! \brief operator= this operator absorb the pointers, consider that this object wrap device pointers
*
* \param object to copy
*
*/
grid_gpu_ker<dim,T,layout_base> & operator=(const grid_gpu_ker<dim,T,layout_base> & g)
{
g1 = g.g1;
grid_gpu_ker_constructor_impl<is_layout_inte<layout_base<T_>>::value,T_>::construct(g,*this);
return *this;
}
};
......
......@@ -280,7 +280,7 @@ struct mem_swap<T,layout,data_type,grid_type,1>
template<typename data_type, typename layout, unsigned int sel = 2*is_layout_mlin<layout>::value + is_layout_inte<layout>::value>
struct mem_getpointer
{
template<unsigned int d> static void * getPointer(data_type & data_)
template<unsigned int d> __device__ __host__ static void * getPointer(data_type & data_)
{
return data_.mem_r.get_pointer();
}
......@@ -289,7 +289,7 @@ struct mem_getpointer
template<typename data_type, typename layout>
struct mem_getpointer<data_type,layout,1>
{
template<unsigned int p> static void * getPointer(data_type & data_)
template<unsigned int p> __device__ __host__ static void * getPointer(data_type & data_)
{
return boost::fusion::at_c<p>(data_).mem_r.get_pointer();
}
......
......@@ -8,6 +8,8 @@
#ifndef OPENFPM_DATA_SRC_GRID_GRID_COMMON_HPP_
#define OPENFPM_DATA_SRC_GRID_GRID_COMMON_HPP_
#include "util/tokernel_transformation.hpp"
/*! \brief this class is a functor for "for_each" algorithm
*
* This class is a functor for "for_each" algorithm. For each
......@@ -32,9 +34,30 @@ struct host_to_dev_all_prp
}
};
template<typename T, bool is_vector>
template<typename T, typename T_ker, template<typename> class layout_base , bool is_vector>
struct call_recursive_host_device_if_vector
{
template<typename mem_type, typename obj_type> static void transform(mem_type * mem, obj_type & obj, size_t start, size_t stop)
{
// The type of device and the type on host does not match (in general)
// So we have to convert before transfer
T * ptr = static_cast<T *>(obj.get_pointer());
mem_type tmp;
tmp.allocate(mem->size());
T_ker * ptr_tt = static_cast<T_ker *>(tmp.getPointer());
for(size_t i = start ; i < stop ; i++)
{
ptr_tt[i] = ptr[i].toKernel();
}
mem->hostToDevice(tmp);
}
//! It is a vector recursively call deviceToHost
template<typename obj_type>
static void call(obj_type & obj, size_t start, size_t stop)
......@@ -50,9 +73,14 @@ struct call_recursive_host_device_if_vector
}
};
template<typename T>
struct call_recursive_host_device_if_vector<T,false>
template<typename T, typename T_ker ,template<typename> class layout_base>
struct call_recursive_host_device_if_vector<T,T_ker,layout_base,false>
{
template<typename mem_type,typename obj_type> static void transform(mem_type * mem, obj_type & obj, size_t start, size_t stop)
{
mem->hostToDevice();
}
//! It is not a vector nothing to do
template<typename obj_type>
static void call(obj_type & obj, size_t start, size_t stop) {}
......
......@@ -203,6 +203,19 @@ public:
this->data_.mem->hostToDevice();
}
/*! \brief Copy the memory from host to device
*
* \tparam (all properties are copied to prp is useless in this case)
*
* \param start start point
* \param stop stop point
*
*/
template<unsigned int ... prp> void hostToDevice(size_t start, size_t stop)
{
this->data_.mem->hostToDevice(start,stop);
}
/*! \brief It return the properties arrays.
*
* In case of Cuda memory it return the device pointers to pass to the kernels
......@@ -314,7 +327,7 @@ struct switch_copy_host_to_device
* \tparam encap dst
*
*/
template<typename T_type, unsigned int ... prp>
template<typename T_type, template<typename> class layout_base , typename Memory, unsigned int ... prp>
struct host_to_device_impl
{
//! encapsulated destination object
......@@ -346,10 +359,21 @@ struct host_to_device_impl
{
typedef decltype(boost::fusion::at_c<boost::mpl::at<v_prp,boost::mpl::int_<T::value>>::type::value>(dst).mem_r) mem_r_type;
boost::fusion::at_c<boost::mpl::at<v_prp,boost::mpl::int_<T::value>>::type::value>(dst).mem->hostToDevice();
typedef typename toKernel_transform<layout_base,typename mem_r_type::value_type>::type kernel_type;
call_recursive_host_device_if_vector<typename mem_r_type::value_type,
kernel_type,
layout_base,
is_vector<typename mem_r_type::value_type>::value>
::template transform<Memory,mem_r_type>(static_cast<CudaMemory *>(boost::fusion::at_c<boost::mpl::at<v_prp,boost::mpl::int_<T::value>>::type::value>(dst).mem),
boost::fusion::at_c<boost::mpl::at<v_prp,boost::mpl::int_<T::value>>::type::value>(dst).mem_r,
start,
stop);
// here we have to recursively call hostToDevice for each nested vector
call_recursive_host_device_if_vector<typename mem_r_type::value_type,
kernel_type,
layout_base,
is_vector<typename mem_r_type::value_type>::value>
::call(boost::fusion::at_c<boost::mpl::at<v_prp,boost::mpl::int_<T::value>>::type::value>(dst).mem_r,start,stop);
}
......@@ -441,52 +465,7 @@ struct device_to_host_start_stop_impl
}
};
/*! \brief this class is a functor for "for_each" algorithm
*
* This class is a functor for "for_each" algorithm. For each
* element of the boost::vector the operator() is called.
* Is mainly used to copy one encap into another encap object
*
* \tparam encap source
* \tparam encap dst
*
*/
template<typename T_type, unsigned int ... prp>
struct host_to_device_start_stop_impl
{
//! encapsulated destination object
typename memory_traits_inte<T_type>::type & dst;
//! Convert the packed properties into an MPL vector
typedef typename to_boost_vmpl<prp...>::type v_prp;
//! start
size_t start;
//! stop
size_t stop;
/*! \brief constructor
*
* \param src source encapsulated object
* \param dst source encapsulated object
*
*/
inline host_to_device_start_stop_impl(typename memory_traits_inte<T_type>::type & dst,size_t start,size_t stop)
:dst(dst),start(start),stop(stop)
{
};
//! It call the copy function for each property
template<typename T>
inline void operator()(T& t) const
{
typedef typename boost::mpl::at<typename T_type::type,T>::type p_type;
boost::fusion::at_c<boost::mpl::at<v_prp,boost::mpl::int_<T::value>>::type::value>(dst).mem->hostToDevice(start*sizeof(p_type),(stop+1)*sizeof(p_type));
}
};
struct dim3_
{
......@@ -588,7 +567,7 @@ public:
*/
template<unsigned int ... prp> void hostToDevice()
{
host_to_device_impl<T,prp ...> htd(this->data_,0,this->getGrid().size());
host_to_device_impl<T,memory_traits_inte,S,prp ...> htd(this->data_,0,this->getGrid().size());
boost::mpl::for_each_ref< boost::mpl::range_c<int,0,sizeof...(prp)> >(htd);
}
......@@ -641,7 +620,7 @@ public:
*/
template<unsigned int ... prp> void hostToDevice(size_t start, size_t stop)
{
host_to_device_start_stop_impl<T, prp ...> dth(this->data_,start,stop);
host_to_device_impl<T,memory_traits_inte,S, prp ...> dth(this->data_,start,stop);
boost::mpl::for_each_ref< boost::mpl::range_c<int,0,sizeof...(prp)> >(dth);
}
......
......@@ -40,7 +40,6 @@
#include "data_type/aggregate.hpp"
#include "vector_map_iterator.hpp"
#include "util/cuda_util.hpp"
#include "util/tokernel_transformation.hpp"
#include "cuda/map_vector_cuda_ker.cuh"
namespace openfpm
......@@ -1528,7 +1527,7 @@ namespace openfpm
*/
template<unsigned int ... prp> void hostToDevice()
{
base.template hostToDevice<prp ...>();
base.template hostToDevice<prp ...>(0,v_size);
}
/*! \brief Synchronize the memory buffer in the device with the memory in the host
......@@ -1596,6 +1595,15 @@ namespace openfpm
#endif
void * internal_get_size_pointer() {return &v_size;}
void print_size()
{
std::cout << "the size of: " << demangle(typeid(self_type).name()) << " is " << sizeof(self_type) << std::endl;
std::cout << " " << demangle(typeid(decltype(v_size)).name()) << ":" << sizeof(decltype(v_size)) << std::endl;
std::cout << " " << demangle(typeid(decltype(base)).name()) << ":" << sizeof(decltype(base)) << std::endl;
}
};
template <typename T> using vector_std = vector<T, HeapMemory, typename memory_traits_lin<T>::type, memory_traits_lin, openfpm::grow_policy_double, STD_VECTOR>;
......
......@@ -22,11 +22,98 @@ __global__ void vv_test_size(vector_vector_type vvt, vector_out_type out)
if (p >= vvt.size()) return;
// out.template get<0>(p) = vvt.template get<0>(p).size();
out.template get<0>(p) = vvt.template get<0>(p).size();
}
template<typename vector_vector_type, typename vector_out_type>
__global__ void vv_test_pointer(vector_vector_type vvt, vector_out_type out)
{
int p = threadIdx.x + blockIdx.x * blockDim.x;
if (p >= vvt.size()) return;
out.template get<0>(p) = (size_t)vvt.template get<0>(p).template getPointer<0>();
out.template get<1>(p) = (size_t)vvt.template get<0>(p).template getPointer<1>();
}
template<typename vector_vector_type, typename vector_out_type>
__global__ void vv_test_data_get(vector_vector_type vvt, vector_out_type out, int i_sz)
{
int p = threadIdx.x + blockIdx.x * blockDim.x;
if (p >= out.size()) return;
int id1 = p/i_sz;
int id2 = p%i_sz;
out.template get<0>(p)[0] = (size_t)vvt.template get<0>(id1).template get<0>(id2)[0];
out.template get<0>(p)[1] = (size_t)vvt.template get<0>(id1).template get<0>(id2)[1];
out.template get<0>(p)[2] = (size_t)vvt.template get<0>(id1).template get<0>(id2)[2];
out.template get<1>(p)[0] = (size_t)vvt.template get<0>(id1).template get<1>(id2)[0];
out.template get<1>(p)[1] = (size_t)vvt.template get<0>(id1).template get<1>(id2)[1];
out.template get<1>(p)[2] = (size_t)vvt.template get<0>(id1).template get<1>(id2)[2];
}
BOOST_AUTO_TEST_SUITE( vector_cuda_tests )
//BOOST_AUTO_TEST_CASE (test_size_of_vector_and_vector_gpu_ker)
//{
// typedef openfpm::vector<Box<3,float>,CudaMemory,typename memory_traits_inte<Box<3,float>>::type,memory_traits_inte> proc_boxes;
// typedef openfpm::vector_gpu_ker<Box<3,float>,memory_traits_inte> proc_boxes_ker;
//
///* BOOST_REQUIRE_EQUAL(sizeof(openfpm::vector_gpu_ker<aggregate<int>,memory_traits_inte>),
// sizeof(openfpm::vector<aggregate<int>,CudaMemory,typename memory_traits_inte<aggregate<int>>::type,memory_traits_inte>));
//
//
// BOOST_REQUIRE_EQUAL(sizeof(openfpm::vector_gpu_ker<aggregate<int,int>,memory_traits_inte>),
// sizeof(openfpm::vector<aggregate<int,int>,CudaMemory,typename memory_traits_inte<aggregate<int,int>>::type,memory_traits_inte>));
//
// BOOST_REQUIRE_EQUAL(sizeof(openfpm::vector_gpu_ker<aggregate<int,int,float>,memory_traits_inte>),
// sizeof(openfpm::vector<aggregate<int,int,float>,CudaMemory,typename memory_traits_inte<aggregate<int,int,float>>::type,memory_traits_inte>));
//
//
// BOOST_REQUIRE_EQUAL(sizeof(openfpm::vector_gpu_ker<aggregate<proc_boxes_ker>,memory_traits_inte>),
// sizeof(openfpm::vector<aggregate<proc_boxes>,CudaMemory,typename memory_traits_inte<aggregate<proc_boxes>>::type,memory_traits_inte>));*/
//
//
//
//// BOOST_REQUIRE_EQUAL(sizeof(proc_boxes),sizeof(proc_boxes_ker));
//
// openfpm::vector<aggregate<proc_boxes>,CudaMemory,typename memory_traits_inte<aggregate<proc_boxes>>::type,memory_traits_inte> v_test;
//
// v_test.print_size();
//
// auto v_test_ker = v_test.toKernel();
//
// std::cout << std::endl << std::endl << std::endl;
//
// v_test_ker.print_size();
//
///* v_test.resize_no_device(5);
//
// for (size_t i = 0 ; i< v_test.size() ; i++)
// {
// v_test.template get<0>(i).resize(7);
// }
//
// auto v_test_ker = v_test.toKernel();
//
// std::cout << "SIZE: " << sizeof(proc_boxes) << " " << sizeof(proc_boxes_ker) << std::endl;
//
// size_t base = reinterpret_cast<size_t>(v_test.template get<0>(1).internal_get_size_pointer()) - reinterpret_cast<size_t>(v_test.template get<0>(0).internal_get_size_pointer());
// std::cout << std::hex << "BASE: " << base << " " << reinterpret_cast<size_t>(v_test.template get<0>(0).internal_get_size_pointer()) << std::endl;
//
// base = reinterpret_cast<size_t>(v_test_ker.template get<0>(1).internal_get_size_pointer()) - reinterpret_cast<size_t>(v_test_ker.template get<0>(0).internal_get_size_pointer());
// std::cout << "BASE: " << base << std::endl;
//
// base = reinterpret_cast<size_t>(v_test.template get<0>(2).internal_get_size_pointer()) - reinterpret_cast<size_t>(v_test.template get<0>(0).internal_get_size_pointer());
// std::cout << "BASE: " << base << std::endl;
//
// base = reinterpret_cast<size_t>(v_test_ker.template get<0>(2).internal_get_size_pointer()) - reinterpret_cast<size_t>(v_test_ker.template get<0>(0).internal_get_size_pointer());
// std::cout << "BASE: " << base << std::endl;*/
//}
BOOST_AUTO_TEST_CASE ( test_vector_of_vector_gpu )
{
typedef openfpm::vector<Box<3,float>,CudaMemory,typename memory_traits_inte<Box<3,float>>::type,memory_traits_inte> proc_boxes;
......@@ -35,20 +122,25 @@ BOOST_AUTO_TEST_CASE ( test_vector_of_vector_gpu )
vb_int_proc.resize_no_device(5);
openfpm::vector<std::pair<void *,void *>> ptr_dev;
ptr_dev.resize(5);
for (size_t i = 0 ; i< vb_int_proc.size() ; i++)
{
vb_int_proc.template get<0>(i).resize(5);
vb_int_proc.template get<0>(i).resize(7);
for (size_t j = 0 ; j < vb_int_proc.template get<0>(i).size() ; j++)
{
for (size_t k = 0 ; k < 3 ; k++)
{
vb_int_proc.template get<0>(i).template get<0>(j)[k] = i+j;
vb_int_proc.template get<0>(i).template get<1>(j)[k] = 100+i+j;
vb_int_proc.template get<0>(i).template get<0>(j)[k] = i+j+k;
vb_int_proc.template get<0>(i).template get<1>(j)[k] = 100+i+j+k;
}
}
vb_int_proc.template get<0>(i).template hostToDevice<0,1>();
ptr_dev.get(i).first = vb_int_proc.template get<0>(i).template getDeviceBuffer<0>();
ptr_dev.get(i).second = vb_int_proc.template get<0>(i).template getDeviceBuffer<1>();
}
vb_int_proc.template hostToDevice<0>();
......@@ -58,11 +150,52 @@ BOOST_AUTO_TEST_CASE ( test_vector_of_vector_gpu )
auto ite = vb_int_proc.getGPUIterator();
auto test = vb_int_proc.toKernel();
vv_test_size<decltype(vb_int_proc.toKernel()),decltype(out.toKernel())><<<ite.wthr,ite.thr>>>(vb_int_proc.toKernel(),out.toKernel());
std::cout << std::string(demangle(typeid(decltype(test)).name())) << std::endl;
out.deviceToHost<0>();
// vv_test_size<decltype(vb_int_proc.toKernel()),decltype(out.toKernel())><<<ite.wthr,ite.thr>>>(vb_int_proc.toKernel(),out.toKernel());
for (size_t i = 0 ; i < out.size() ; i++)
{
BOOST_REQUIRE_EQUAL(out.template get<0>(i),7);
}
openfpm::vector_gpu<aggregate<size_t,size_t>> out_pointer;
out_pointer.resize(vb_int_proc.size());
vv_test_pointer<decltype(vb_int_proc.toKernel()),decltype(out_pointer.toKernel())><<<ite.wthr,ite.thr>>>(vb_int_proc.toKernel(),out_pointer.toKernel());
out_pointer.deviceToHost<0,1>();
for (size_t i = 0 ; i < out_pointer.size() ; i++)
{
BOOST_REQUIRE_EQUAL((size_t)out_pointer.template get<0>(i),(size_t)ptr_dev.get(i).first);
BOOST_REQUIRE_EQUAL((size_t)out_pointer.template get<1>(i),(size_t)ptr_dev.get(i).second);
}
openfpm::vector_gpu<aggregate<float[3],float[3]>> out_data;
out_data.resize(vb_int_proc.size()*7);
auto ite2 = out_data.getGPUIterator();
vv_test_data_get<decltype(vb_int_proc.toKernel()),decltype(out_data.toKernel())><<<ite2.wthr,ite2.thr>>>(vb_int_proc.toKernel(),out_data.toKernel(),7);
out_data.template deviceToHost<0,1>();
size_t i_sz = 7;
for (size_t p = 0 ; p < out_data.size() ; p++)
{
int id1 = p/i_sz;
int id2 = p%i_sz;
BOOST_REQUIRE_EQUAL(out_data.template get<0>(p)[0],vb_int_proc.template get<0>(id1).template get<0>(id2)[0] );
BOOST_REQUIRE_EQUAL(out_data.template get<0>(p)[1],vb_int_proc.template get<0>(id1).template get<0>(id2)[1] );
BOOST_REQUIRE_EQUAL(out_data.template get<0>(p)[2],vb_int_proc.template get<0>(id1).template get<0>(id2)[2] );
BOOST_REQUIRE_EQUAL(out_data.template get<1>(p)[0],vb_int_proc.template get<0>(id1).template get<1>(id2)[0] );
BOOST_REQUIRE_EQUAL(out_data.template get<1>(p)[1],vb_int_proc.template get<0>(id1).template get<1>(id2)[1] );
BOOST_REQUIRE_EQUAL(out_data.template get<1>(p)[2],vb_int_proc.template get<0>(id1).template get<1>(id2)[2] );
}
}
BOOST_AUTO_TEST_SUITE_END()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment