Commit bdcacd3a authored by incardon's avatar incardon
Browse files

working vector for cuda with modified boost fusion

parent c3bedcd4
......@@ -222,10 +222,10 @@ if test x"$debuger" = x"yes"; then
AC_DEFINE([DEBUG_MODE],[],[Debug])
AC_DEFINE([DEBUG],[],[Debug])
CXXFLAGS="$CXXFLAGS -g3 -Wall -O0 "
NVCCFLAGS+="$NVCCFLAGS -g -O0 "
NVCCFLAGS="$NVCCFLAGS --std=c++11 -g -O0 "
else
CXXFLAGS="$CXXFLAGS -Wall -O3 -g3 -funroll-loops "
NVCCFLAGS+="$NVCCFLAGS -O3 "
NVCCFLAGS="$NVCCFLAGS --std=c++11 -O3 "
fi
##########
......@@ -287,7 +287,6 @@ fi
#
NVCCFLAGS="--std=c++11"
if test x$gpu_support = x"no"; then
CUDA_LIBS=""
......
......@@ -46,7 +46,7 @@ struct copy_cpu_encap_encap
* \param dst source encapsulated object
*
*/
inline copy_cpu_encap_encap(const e_src & src, e_dst & dst)
__device__ __host__ inline copy_cpu_encap_encap(const e_src & src, e_dst & dst)
:src(src),dst(dst)
{
#ifdef SE_CLASS1
......@@ -72,7 +72,7 @@ struct copy_cpu_encap_encap
//! It call the copy function for each property
template<typename T>
inline void operator()(T& t) const
__device__ __host__ inline void operator()(T& t) const
{
// Remove the reference from the type to copy
typedef typename boost::remove_reference<decltype(dst.template get<T::value>())>::type copy_rtype;
......@@ -506,7 +506,7 @@ public:
* \return The reference of the data
*
*/
template <unsigned int p> auto get() -> decltype(boost::fusion::at_c<p>(data).mem_r.operator[](k))
template <unsigned int p> __device__ __host__ auto get() -> decltype(boost::fusion::at_c<p>(data).mem_r.operator[](k))
{
return boost::fusion::at_c<p>(data).mem_r.operator[](k);
}
......@@ -518,7 +518,7 @@ public:
* \return The reference of the data
*
*/
template <unsigned int p> auto get() const -> decltype(boost::fusion::at_c<p>(data).mem_r.operator[](k))
template <unsigned int p> __device__ __host__ auto get() const -> decltype(boost::fusion::at_c<p>(data).mem_r.operator[](k))
{
return boost::fusion::at_c<p>(data).mem_r.operator[](k);
}
......@@ -530,7 +530,7 @@ public:
* \return itself
*
*/
inline encapc<dim,T,Mem> & operator=(const encapc<dim,T,Mem> & ec)
__device__ __host__ inline encapc<dim,T,Mem> & operator=(const encapc<dim,T,Mem> & ec)
{
copy_cpu_encap_encap<encapc<dim,T,Mem>,encapc<dim,T,Mem>> cp(ec,*this);
......
......@@ -161,7 +161,7 @@ struct mem_setm<S,layout,data_type,g1_type,1>
template<unsigned int dim , typename T, typename layout, typename data_type, typename g1_type, typename key_type, unsigned int sel = 2*is_layout_mlin<layout>::value + is_layout_inte<layout>::value >
struct mem_geto
{
static inline encapc<dim,T,typename layout::type> get(data_type & data_, const g1_type & g1, const key_type & v1)
__device__ __host__ static inline encapc<dim,T,typename layout::type> get(data_type & data_, const g1_type & g1, const key_type & v1)
{
return encapc<dim,T,typename layout::type>(data_.mem_r.operator[](g1.LinId(v1)));
}
......@@ -171,7 +171,7 @@ struct mem_geto
template<unsigned int dim, typename T,typename layout, typename data_type, typename g1_type, typename key_type>
struct mem_geto<dim,T,layout,data_type,g1_type,key_type,1>
{
static inline encapc<dim,T,typename layout::type> get(data_type & data_, const g1_type & g1, const key_type & v1)
__device__ __host__ static inline encapc<dim,T,typename layout::type> get(data_type & data_, const g1_type & g1, const key_type & v1)
{
return encapc<dim,T,typename layout::type>(data_,g1.LinId(v1));
}
......
......@@ -100,9 +100,9 @@ public:
* \param t the other numbers
*
*/
template<typename ...T> inline grid_key_dx(const size_t v,const T...t)
template<typename ...T> __device__ __host__ inline grid_key_dx(const size_t v,const T...t)
{
#ifdef DEBUG
#ifdef SE_CLASS1
if (sizeof...(t) != dim -1)
std::cerr << "Error grid_key: " << __FILE__ << " " << __LINE__ << " creating a key of dimension " << dim << " require " << dim << " numbers " << sizeof...(t) + 1 << " provided" << "\n";
#endif
......
......@@ -266,103 +266,9 @@ struct copy_host_to_device
}
};
/*! \brief grid interface available when on gpu
*
* \tparam n_buf number of template buffers
*
*/
template<unsigned int dim, typename T>
struct grid_gpu_ker
{
//! grid information
grid_sm<dim,void> g1;
//! type of layout of the structure
typedef typename memory_traits_inte<T>::type layout;
//! layout data
layout data_;
#include "map_grid_cuda_ker.cuh"
grid_gpu_ker()
{}
grid_gpu_ker(const grid_sm<dim,void> & g1)
:g1(g1)
{}
grid_gpu_ker(const grid_gpu_ker & cpy)
:g1(cpy.g1)
{
copy_switch_memory_c_no_cpy<T> bp_mc(cpy.data_,this->data_);
boost::mpl::for_each_ref< boost::mpl::range_c<int,0,T::max_prop> >(bp_mc);
}
/*! \brief Return the internal grid information
*
* Return the internal grid information
*
* \return the internal grid
*
*/
__device__ __host__ const grid_sm<dim,void> & getGrid() const
{
return g1;
}
/*! \brief Get the reference of the selected element
*
* \param v1 grid_key that identify the element in the grid
*
* \return the reference of the element
*
*/
template <unsigned int p, typename r_type=decltype(mem_get<p,memory_traits_inte<T>,layout,grid_sm<dim,T>,grid_key_dx<dim>>::get(data_,g1,grid_key_dx<dim>()))>
__device__ __host__ inline r_type get(const grid_key_dx<dim> & v1)
{
return mem_get<p,memory_traits_inte<T>,decltype(this->data_),decltype(this->g1),decltype(v1)>::get(data_,g1,v1);
}
/*! \brief Get the const reference of the selected element
*
* \param v1 grid_key that identify the element in the grid
*
* \return the const reference of the element
*
*/
template <unsigned int p, typename r_type=decltype(mem_get<p,memory_traits_inte<T>,layout,grid_sm<dim,T>,grid_key_dx<dim>>::get(data_,g1,grid_key_dx<dim>()))>
__device__ __host__ inline const r_type get(const grid_key_dx<dim> & v1) const
{
return mem_get<p,memory_traits_inte<T>,decltype(this->data_),decltype(this->g1),decltype(v1)>::get(data_,g1,v1);
}
/*! \brief Get the reference of the selected element
*
* \param lin_id linearized element that identify the element in the grid
*
* \return the reference of the element
*
*/
template <unsigned int p, typename r_type=decltype(mem_get<p,memory_traits_inte<T>,layout,grid_sm<dim,T>,grid_key_dx<dim>>::get_lin(data_,g1,0))>
__device__ __host__ inline r_type get(const size_t lin_id)
{
return mem_get<p,memory_traits_inte<T>,decltype(this->data_),decltype(this->g1),grid_key_dx<dim>>::get_lin(data_,g1,lin_id);
}
/*! \brief Get the const reference of the selected element
*
* \param lin_id linearized element that identify the element in the grid
*
* \return the const reference of the element
*
*/
template <unsigned int p, typename r_type=decltype(mem_get<p,memory_traits_inte<T>,layout,grid_sm<dim,T>,grid_key_dx<dim>>::get_lin(data_,g1,0))>
__device__ __host__ inline const r_type get(size_t lin_id) const
{
return mem_get<p,memory_traits_inte<T>,decltype(this->data_),decltype(this->g1),grid_key_dx<dim>>::get_lin(data_,g1,lin_id);
}
};
struct dim3_
{
......@@ -447,17 +353,37 @@ public:
{
}
/*! \brief Copy the memory from host to device
*
*/
template<unsigned int id> void hostToDevice()
{
boost::fusion::at_c<id>(this->data_).mem->getDevicePointer();
}
/*! \brief It return the properties arrays.
*
* In case of Cuda memory it return the device pointers to pass to the kernels
*
*
*/
template<unsigned int id> void * getDeviceBuffer()
template<unsigned int id> void * getDeviceBufferCopy()
{
return boost::fusion::at_c<id>(this->data_).mem->getDevicePointer();
}
/*! \brief It return the properties arrays.
*
* In case of Cuda memory it return the device pointers to pass to the kernels
*
* This variant does not copy the host memory to the device memory
*
*/
template<unsigned int id> void * getDeviceBuffer()
{
return boost::fusion::at_c<id>(this->data_).mem->getDevicePointerNoCopy();
}
/*! \brief Synchronize the memory buffer in the device with the memory in the host
*
*
......
/*
* map_grid_cuda_ker.hpp
*
* Created on: Jun 28, 2018
* Author: i-bird
*/
#ifndef MAP_GRID_CUDA_KER_HPP_
#define MAP_GRID_CUDA_KER_HPP_
/*! \brief grid interface available when on gpu
*
* \tparam n_buf number of template buffers
*
*/
template<unsigned int dim, typename T>
struct grid_gpu_ker
{
//! grid information
grid_sm<dim,void> g1;
//! type of layout of the structure
typedef typename memory_traits_inte<T>::type layout;
//! layout data
layout data_;
grid_gpu_ker()
{}
grid_gpu_ker(const grid_sm<dim,void> & g1)
:g1(g1)
{
}
grid_gpu_ker(const grid_gpu_ker & cpy)
:g1(cpy.g1)
{
copy_switch_memory_c_no_cpy<T> bp_mc(cpy.data_,this->data_);
boost::mpl::for_each_ref< boost::mpl::range_c<int,0,T::max_prop> >(bp_mc);
}
/*! \brief Return the internal grid information
*
* Return the internal grid information
*
* \return the internal grid
*
*/
__device__ __host__ const grid_sm<dim,void> & getGrid() const
{
return g1;
}
/*! \brief Get the reference of the selected element
*
* \param v1 grid_key that identify the element in the grid
*
* \return the reference of the element
*
*/
template <unsigned int p, typename r_type=decltype(mem_get<p,memory_traits_inte<T>,layout,grid_sm<dim,T>,grid_key_dx<dim>>::get(data_,g1,grid_key_dx<dim>()))>
__device__ __host__ inline r_type get(const grid_key_dx<dim> & v1)
{
return mem_get<p,memory_traits_inte<T>,decltype(this->data_),decltype(this->g1),decltype(v1)>::get(data_,g1,v1);
}
/*! \brief Get the const reference of the selected element
*
* \param v1 grid_key that identify the element in the grid
*
* \return the const reference of the element
*
*/
template <unsigned int p, typename r_type=decltype(mem_get<p,memory_traits_inte<T>,layout,grid_sm<dim,T>,grid_key_dx<dim>>::get(data_,g1,grid_key_dx<dim>()))>
__device__ __host__ inline const r_type get(const grid_key_dx<dim> & v1) const
{
return mem_get<p,memory_traits_inte<T>,decltype(this->data_),decltype(this->g1),decltype(v1)>::get(data_,g1,v1);
}
/*! \brief Get the reference of the selected element
*
* \param lin_id linearized element that identify the element in the grid
*
* \return the reference of the element
*
*/
template <unsigned int p, typename r_type=decltype(mem_get<p,memory_traits_inte<T>,layout,grid_sm<dim,T>,grid_key_dx<dim>>::get_lin(data_,g1,0))>
__device__ __host__ inline r_type get(const size_t lin_id)
{
return mem_get<p,memory_traits_inte<T>,decltype(this->data_),decltype(this->g1),grid_key_dx<dim>>::get_lin(data_,g1,lin_id);
}
/*! \brief Get the const reference of the selected element
*
* \param lin_id linearized element that identify the element in the grid
*
* \return the const reference of the element
*
*/
template <unsigned int p, typename r_type=decltype(mem_get<p,memory_traits_inte<T>,layout,grid_sm<dim,T>,grid_key_dx<dim>>::get_lin(data_,g1,0))>
__device__ __host__ inline const r_type get(size_t lin_id) const
{
return mem_get<p,memory_traits_inte<T>,decltype(this->data_),decltype(this->g1),grid_key_dx<dim>>::get_lin(data_,g1,lin_id);
}
/*! \brief Get the of the selected element as a boost::fusion::vector
*
* Get the selected element as a boost::fusion::vector
*
* \param v1 grid_key that identify the element in the grid
*
* \see encap_c
*
* \return an encap_c that is the representation of the object (careful is not the object)
*
*/
__device__ inline encapc<dim,T,layout> get_o(const grid_key_dx<dim> & v1)
{
return mem_geto<dim,T,memory_traits_inte<T>,decltype(this->data_),decltype(this->g1),decltype(v1)>::get(data_,g1,v1);
}
/*! \brief Get the of the selected element as a boost::fusion::vector
*
* Get the selected element as a boost::fusion::vector
*
* \param v1 grid_key that identify the element in the grid
*
* \see encap_c
*
* \return an encap_c that is the representation of the object (careful is not the object)
*
*/
__device__ inline const encapc<dim,T,layout> get_o(const grid_key_dx<dim> & v1) const
{
return mem_geto<dim,T,memory_traits_inte<T>,decltype(this->data_),decltype(this->g1),decltype(v1)>::get(const_cast<decltype(this->data_) &>(data_),g1,v1);
}
/*! \brief Get the of the selected element as a boost::fusion::vector
*
* Get the selected element as a boost::fusion::vector
*
* \param v1 grid_key that identify the element in the grid
*
* \see encap_c
*
* \return an encap_c that is the representation of the object (careful is not the object)
*
*/
// __device__ inline encapc<dim,T,layout> get_o(int v1)
// {
// return mem_geto<dim,T,memory_traits_inte<T>,decltype(this->data_),decltype(this->g1),decltype(v1)>::get(data_,g1,v1);
// }
/*! \brief Get the of the selected element as a boost::fusion::vector
*
* Get the selected element as a boost::fusion::vector
*
* \param v1 grid_key that identify the element in the grid
*
* \see encap_c
*
* \return an encap_c that is the representation of the object (careful is not the object)
*
*/
// __device__ inline const encapc<dim,T,layout> get_o(int v1) const
// {
// return mem_geto<dim,T,memory_traits_inte<T>,decltype(this->data_),decltype(this->g1),decltype(v1)>::get(const_cast<decltype(this->data_) &>(data_),g1,v1);
// }
/*! \brief Set an element of the grid from another element of another grid
*
* \param key1 element of the grid to set
* \param g source grid
* \param key2 element of the source grid to copy
*
*/
__device__ inline void set(const grid_key_dx<dim> & key1,const grid_gpu_ker<dim,T> & g, const grid_key_dx<dim> & key2)
{
this->get_o(key1) = g.get_o(key2);
}
};
#endif /* MAP_GRID_CUDA_KER_HPP_ */
LINKLIBS = $(LIBHILBERT_LIB) $(PTHREAD_LIBS) $(OPT_LIBS) $(BOOST_LDFLAGS) $(BOOST_PROGRAM_OPTIONS_LIB) $(CUDA_LIBS) $(BOOST_IOSTREAMS_LIB) $(LOCAL_LIBS) $(BOOST_UNIT_TEST_FRAMEWORK_LIB) $(BOOST_CHRONO_LIB) $(BOOST_TIMER_LIB) $(BOOST_SYSTEM_LIB)
NVCCFLAGS:=$(NVCCFLAGS) -g --std=c++11
FLAGS_NVCC = $(NVCCFLAGS) -g
if BUILDCUDA
CUDA_SOURCES=../../openfpm_devices/src/memory/CudaMemory.cu
......@@ -10,7 +10,7 @@ else
endif
noinst_PROGRAMS = mem_map
mem_map_SOURCES = ../../openfpm_devices/src/Memleak_check.cpp main.cpp NN/CellList/CellList_gpu_test.cu Grid/gpu_test/cuda_gpu_func.cpp $(CUDA_SOURCES) ../../openfpm_devices/src/memory/HeapMemory.cpp ../../openfpm_devices/src/memory/PtrMemory.cpp Grid/gpu_test/cuda_grid_unit_tests.cu
mem_map_SOURCES = ../../openfpm_devices/src/Memleak_check.cpp main.cpp NN/CellList/CellList_gpu_test.cu util/cuda/scan_cuda_unit_tests.cu Grid/gpu_test/cuda_gpu_func.cpp $(CUDA_SOURCES) ../../openfpm_devices/src/memory/HeapMemory.cpp ../../openfpm_devices/src/memory/PtrMemory.cpp Grid/gpu_test/cuda_grid_unit_tests.cu
mem_map_CXXFLAGS = $(AM_CXXFLAGS) $(LIBHILBERT_INCLUDE) $(CUDA_CFLAGS) $(INCLUDES_PATH) $(BOOST_CPPFLAGS) -I/usr/local/include -I/usr/local/libhilbert/include
mem_map_CFLAGS = $(CUDA_CFLAGS)
mem_map_LDADD = $(LINKLIBS)
......@@ -32,7 +32,7 @@ util/copy_compare/compare_fusion_vector.hpp util/SimpleRNG.hpp util/copy_compare
Packer_Unpacker/Pack_selector.hpp Packer_Unpacker/Packer_nested_tests.hpp Packer_Unpacker/Packer_unit_tests.hpp Packer_Unpacker/Packer.hpp Packer_Unpacker/Unpacker.hpp Packer_Unpacker/Packer_util.hpp Packer_Unpacker/prp_all_zero.hpp Packer_Unpacker/has_pack_encap.hpp Packer_Unpacker/has_pack_agg.hpp Packer_Unpacker/has_max_prop.hpp
.cu.o :
$(NVCC) $(NVCCFLAGS) $(INCLUDES_PATH) -o $@ -c $<
$(NVCC) $(FLAGS_NVCC) $(INCLUDES_PATH) -o $@ -c $<
test: mem_map
source $(HOME)/openfpm_vars && cd .. && ./src/mem_map
......
......@@ -214,7 +214,7 @@ BOOST_AUTO_TEST_CASE( CellDecomposer_consistent_use )
// we create 2 grid_sm for the 2 CellDecomposer (used to check the consistency)
size_t div_ext[3] = {6+16,6+16,6+16};
// The old one has a padding 1 the new padding 3, result is padding 2
grid_key_dx<3> key_base(2,2,2);
grid_key_dx<3> key_base({2,2,2});
grid_sm<3,void> cd_gr(div);
grid_sm<3,void> cd2_gr(div_ext);
......
......@@ -13,6 +13,9 @@
#include "util/cuda_util.hpp"
#include "CellList_gpu.hpp"
#include "CellList.hpp"
#include "util/boost/boost_array_openfpm.hpp"
#include "Point_test.hpp"
BOOST_AUTO_TEST_SUITE( CellList_gpu_test )
......@@ -143,163 +146,252 @@ void test_sub_index()
BOOST_REQUIRE_EQUAL(cl_n.template get<0>(gr.LinId({0,0,0})),1);
}
template<typename cnt_type, typename ids_type>
void test_compress()
template<unsigned int dim, typename T>
void create_n_part(int n_part,
openfpm::vector<Point<dim,T>,CudaMemory,typename memory_traits_inte<Point<dim,T>>::type,memory_traits_inte> & pl,
CellList<dim,T, Mem_fast> & cl)
{
openfpm::vector<aggregate<cnt_type,cnt_type>,CudaMemory,typename memory_traits_inte<aggregate<cnt_type,cnt_type>>::type,memory_traits_inte> cl_n;
openfpm::vector<aggregate<ids_type>,CudaMemory,typename memory_traits_inte<aggregate<ids_type>>::type,memory_traits_inte> compressed;
pl.resize(n_part);
// fill some counting
auto it = pl.getIterator();
cl_n.resize(12);
while(it.isNext())
{
auto p = it.get();
cl_n.template get<0>(0) = 3;
cl_n.template get<0>(1) = 5;
cl_n.template get<0>(2) = 8;
cl_n.template get<0>(3) = 1;
cl_n.template get<0>(4) = 0;
cl_n.template get<0>(5) = 0;
cl_n.template get<0>(6) = 21;
cl_n.template get<0>(7) = 4;
cl_n.template get<0>(8) = 4;
cl_n.template get<0>(9) = 6;
cl_n.template get<0>(10) = 10;
pl.template get<0>(p)[0] = (double)rand()/RAND_MAX;
pl.template get<0>(p)[1] = (double)rand()/RAND_MAX;
pl.template get<0>(p)[2] = (double)rand()/RAND_MAX;
compressed.resize(cl_n.size());
Point<dim,T> xp;
xp.get(0) = pl.template get<0>(p)[0];
xp.get(1) = pl.template get<0>(p)[1];
xp.get(2) = pl.template get<0>(p)[2];
auto ite = cl_n.getGPUIterator();
ite.thr.x /= 4;
size_t c = cl.getCell(xp);
cl.addCell(c,p);
compress4<cnt_type,ids_type><<<ite.wthr,ite.thr>>>(cl_n.size(),
static_cast<cnt_type *>(cl_n.template getDeviceBuffer<0>()),
static_cast<ids_type *>(compressed.template getDeviceBuffer<0>()));
compressed.template deviceToHost<0>();
BOOST_REQUIRE_EQUAL(compressed.template get<0>(0),3);
BOOST_REQUIRE_EQUAL(compressed.template get<0>(1),5);
BOOST_REQUIRE_EQUAL(compressed.template get<0>(2),8);
BOOST_REQUIRE_EQUAL(compressed.template get<0>(3),1);
BOOST_REQUIRE_EQUAL(compressed.template get<0>(4),0);
BOOST_REQUIRE_EQUAL(compressed.template get<0>(5),0);
BOOST_REQUIRE_EQUAL(compressed.template get<0>(6),21);
BOOST_REQUIRE_EQUAL(compressed.template get<0>(7),4);
BOOST_REQUIRE_EQUAL(compressed.template get<0>(8),4);
BOOST_REQUIRE_EQUAL(compressed.template get<0>(9),6);
BOOST_REQUIRE_EQUAL(compressed.template get<0>(10),10);
++it;
}
}
template<typename cnt_type, typename ids_type>
void test_breduce()
template<unsigned int dim, typename T, typename cnt_type, typename ids_type>
void create_starts_and_parts_ids(CellList<dim,T, Mem_fast> & cl,
grid_sm<dim,void> & gr,
size_t n_part,
size_t n_cell,
openfpm::vector<aggregate<cnt_type>,CudaMemory,typename memory_traits_inte<aggregate<cnt_type>>::type,memory_traits_inte> & starts,
openfpm::vector<aggregate<ids_type[dim+1]>,CudaMemory,typename memory_traits_inte<aggregate<ids_type[dim+1]>>::type,memory_traits_inte> & part_ids)
{
openfpm::vector<aggregate<ids_type>,CudaMemory,typename memory_traits_inte<aggregate<ids_type>>::type,memory_traits_inte> cl_n;
// Construct starts and part_ids
openfpm::vector<aggregate<cnt_type>,CudaMemory,typename memory_traits_inte<aggregate<cnt_type>>::type,memory_traits_inte> red;
part_ids.resize(n_part);
starts.resize(n_cell);
cl_n.resize(8192);
grid_key_dx_iterator<dim> itg(gr);
constexpr int THREADS = 128;
constexpr int ratio = 4*sizeof(cnt_type)/sizeof(ids_type);
size_t start = 0;
// fill with some data
while (itg.isNext())
{
auto cell = itg.get();
for (size_t i = 0 ; i < cl_n.size() ; i++)
{cl_n.template get<0>(i) = i%16;}
size_t clin = gr.LinId(cell);
int nblocks = ((cl_n.size() / (ratio) ) + THREADS - 1 ) / THREADS;
for (size_t j = 0 ; j < cl.getNelements(clin) ; j++)
{
size_t p_id = cl.get(clin,j);
red.resize(nblocks);
for (size_t k = 0 ; k < dim ; k++)
{part_ids.template get<0>(p_id)[k] = cell.get(k);}
breduce<THREADS/32,cnt_type,ids_type,ratio_reduction<cnt_type,ids_type>><<<nblocks,THREADS>>>(cl_n.size()/ratio,
static_cast<cnt_type *>(cl_n.template getDeviceBuffer<0>()),
static_cast<cnt_type *>(red.template getDeviceBuffer<0>()));