Commit db133299 authored by incardon's avatar incardon
Browse files

GPU Cell-list

parent 4a48a861
......@@ -10,7 +10,7 @@ else
endif
noinst_PROGRAMS = mem_map
mem_map_SOURCES = ../../openfpm_devices/src/Memleak_check.cpp main.cpp NN/CellList/CellList_gpu_test.cu util/cuda/scan_cuda_unit_tests.cu Grid/gpu_test/cuda_gpu_func.cpp $(CUDA_SOURCES) ../../openfpm_devices/src/memory/HeapMemory.cpp ../../openfpm_devices/src/memory/PtrMemory.cpp Grid/gpu_test/cuda_grid_unit_tests.cu
mem_map_SOURCES = ../../openfpm_devices/src/Memleak_check.cpp main.cpp NN/CellList/CellList_gpu_test.cu util/multi_array_openfpm/multi_array_ref_openfpm_unit_test.cpp util/cuda/scan_cuda_unit_tests.cu Grid/gpu_test/cuda_gpu_func.cpp $(CUDA_SOURCES) ../../openfpm_devices/src/memory/HeapMemory.cpp ../../openfpm_devices/src/memory/PtrMemory.cpp Grid/gpu_test/cuda_grid_unit_tests.cu
mem_map_CXXFLAGS = $(AM_CXXFLAGS) $(LIBHILBERT_INCLUDE) $(CUDA_CFLAGS) $(INCLUDES_PATH) $(BOOST_CPPFLAGS) -I/usr/local/include -I/usr/local/libhilbert/include
mem_map_CFLAGS = $(CUDA_CFLAGS)
mem_map_LDADD = $(LINKLIBS)
......
......@@ -82,6 +82,8 @@ void test_sub_index()
auto ite = pl.getGPUIterator();
pl.template hostToDevice<0>();
subindex<dim,T,cnt_type,ids_type><<<ite.wthr,ite.thr>>>(*static_cast<ids_type (*)[dim]>(div.getDevicePointer()),
*static_cast<T (*)[dim]>(spacing.getDevicePointer()),
pl.capacity(),
......@@ -181,12 +183,14 @@ void create_starts_and_parts_ids(CellList<dim,T, Mem_fast> & cl,
size_t n_part,
size_t n_cell,
openfpm::vector<aggregate<cnt_type>,CudaMemory,typename memory_traits_inte<aggregate<cnt_type>>::type,memory_traits_inte> & starts,
openfpm::vector<aggregate<ids_type[dim+1]>,CudaMemory,typename memory_traits_inte<aggregate<ids_type[dim+1]>>::type,memory_traits_inte> & part_ids)
openfpm::vector<aggregate<ids_type[dim+1]>,CudaMemory,typename memory_traits_inte<aggregate<ids_type[dim+1]>>::type,memory_traits_inte> & part_ids,
openfpm::vector<aggregate<cnt_type>,CudaMemory,typename memory_traits_inte<aggregate<cnt_type>>::type,memory_traits_inte> & cells)
{
// Construct starts and part_ids
part_ids.resize(n_part);
starts.resize(n_cell);
cells.resize(n_part);
grid_key_dx_iterator<dim> itg(gr);
......@@ -206,6 +210,8 @@ void create_starts_and_parts_ids(CellList<dim,T, Mem_fast> & cl,
{part_ids.template get<0>(p_id)[k] = cell.get(k);}
part_ids.template get<0>(p_id)[dim] = j;
cells.template get<0>(start+j) = p_id;
}
starts.template get<0>(clin) = start;
......@@ -219,9 +225,11 @@ template<unsigned int dim, typename T, typename cnt_type, typename ids_type>
void test_fill_cell()
{
openfpm::vector<aggregate<cnt_type>,CudaMemory,typename memory_traits_inte<aggregate<cnt_type>>::type,memory_traits_inte> cells;
openfpm::vector<aggregate<cnt_type>,CudaMemory,typename memory_traits_inte<aggregate<cnt_type>>::type,memory_traits_inte> cells_out;
openfpm::vector<aggregate<cnt_type>,CudaMemory,typename memory_traits_inte<aggregate<cnt_type>>::type,memory_traits_inte> starts;
openfpm::vector<aggregate<ids_type[dim+1]>,CudaMemory,typename memory_traits_inte<aggregate<ids_type[dim+1]>>::type,memory_traits_inte> part_ids;
// CellList to check the result
Box<dim,T> domain;
......@@ -249,7 +257,7 @@ void test_fill_cell()
grid_sm<dim,void> gr(div_host);
create_starts_and_parts_ids(cl,gr,pl.size(),tot,starts,part_ids);
create_starts_and_parts_ids(cl,gr,pl.size(),tot,starts,part_ids,cells_out);
bool check = true;
cells.resize(pl.size());
......@@ -338,6 +346,7 @@ void test_reorder_parts(size_t n_part)
{
// Create n_part
openfpm::vector<aggregate<cnt_type>,CudaMemory,typename memory_traits_inte<aggregate<cnt_type>>::type,memory_traits_inte> cells;
openfpm::vector<aggregate<cnt_type>,CudaMemory,typename memory_traits_inte<aggregate<cnt_type>>::type,memory_traits_inte> cells_out;
openfpm::vector<aggregate<cnt_type>,CudaMemory,typename memory_traits_inte<aggregate<cnt_type>>::type,memory_traits_inte> starts;
openfpm::vector<aggregate<ids_type[dim+1]>,CudaMemory,typename memory_traits_inte<aggregate<ids_type[dim+1]>>::type,memory_traits_inte> part_ids;
......@@ -396,25 +405,41 @@ void test_reorder_parts(size_t n_part)
++p_it;
}
parts_prp_out.set(0,parts_prp,0);
grid_sm<dim,void> gr(div_host);
create_starts_and_parts_ids(cl,gr,pl.size(),tot,starts,part_ids);
create_starts_and_parts_ids(cl,gr,pl.size(),tot,starts,part_ids,cells_out);
auto itgg = pl.getGPUIterator();
starts.template hostToDevice<0>();
cells_out.template hostToDevice<0>();
auto ite = pl.getGPUIterator();
// Here we test fill cell
reorder_parts<decltype(parts_prp.template toGPU<0,1,2,3>()),cnt_type,shift_ph<0,cnt_type>><<<1,1>>>(pl.size(),
reorder_parts<decltype(parts_prp.template toGPU<0,1,2,3>()),cnt_type,shift_ph<0,cnt_type>><<<ite.wthr,ite.thr>>>(pl.size(),
parts_prp.template toGPU<0,1,2,3>(),
parts_prp_out.template toGPU<>(),
static_cast<cnt_type *>(starts.template getDeviceBuffer<0>()));
static_cast<cnt_type *>(cells_out.template getDeviceBuffer<0>()));
bool check = true;
parts_prp_out.template deviceToHost<0>();
size_t st = 0;
for (size_t i = 0 ; i < tot ; i++)
{
size_t n = cl.getNelements(i);
for (size_t j = 0 ; j < n ; j++)
{
size_t p = cl.get(i,j);
check &= parts_prp_out.template get<0>(st) == parts_prp.template get<0>(p);
st++;
}
}
BOOST_REQUIRE_EQUAL(check,true);
}
BOOST_AUTO_TEST_CASE ( test_reorder_particles )
......
......@@ -90,7 +90,7 @@ public:
* \return the element (encapsulated)
*
*/
inline auto get(size_t id) -> decltype(base.get_o(grid_key_dx<1>(id)))
inline __device__ auto get(size_t id) -> decltype(base.get_o(grid_key_dx<1>(id)))
{
grid_key_dx<1> key(id);
......@@ -106,7 +106,7 @@ public:
* \return the element (encapsulated)
*
*/
inline auto get(size_t id) const -> const decltype(base.get_o(grid_key_dx<1>(id)))
inline __device__ auto get(size_t id) const -> const decltype(base.get_o(grid_key_dx<1>(id)))
{
grid_key_dx<1> key(id);
......@@ -125,13 +125,31 @@ public:
*
*/
inline auto get_o(size_t id) const -> decltype(base.get_o(id))
inline __device__ auto get_o(size_t id) const -> decltype(base.get_o(id))
{
grid_key_dx<1> key(id);
return base.get_o(key);
}
/*! \brief Get an element of the vector
*
* \deprecated
*
* exactly as get, exist to keep the compatibility with grid
*
* \param id Element to get
*
* \return the element (encapsulated)
*
*/
inline __device__ auto get_o(size_t id) -> decltype(base.get_o(id))
{
grid_key_dx<1> key(id);
return base.get_o(key);
}
/*! \brief Get the last element of the vector
*
......
......@@ -10,8 +10,11 @@
#include <boost/fusion/include/mpl.hpp>
#include <boost/mpl/vector.hpp>
#include <array>
#include <boost/mpl/pop_front.hpp>
#include <boost/mpl/push_front.hpp>
#include "util/boost/boost_multi_array_openfpm.hpp"
#include "util/multi_array_openfpm/multi_array_ref_openfpm.hpp"
#include "util/ct_array.hpp"
#include "memory_array.hpp"
#include "memory/memory.hpp"
......@@ -397,6 +400,9 @@ class memory_c<multi_array<T>, MEMORY_C_STANDARD, D>
enum { value = true };
};
//! Remove the first element
typedef typename boost::mpl::push_front<typename boost::mpl::pop_front<T>::type,boost::mpl::int_<-1>>::type Tv;
//! define boost::mpl::int_ without boost::mpl
template<int S> using int_ = boost::mpl::int_<S>;
......@@ -409,7 +415,7 @@ class memory_c<multi_array<T>, MEMORY_C_STANDARD, D>
typedef typename at<T,0>::type base;
//! define size_type
typedef typename boost::multi_array_ref<base,size_p::value>::size_type size_type;
typedef typename openfpm::multi_array_ref_openfpm<base,size_p::value,Tv>::size_type size_type;
//#ifdef __NVCC__
......@@ -508,15 +514,9 @@ class memory_c<multi_array<T>, MEMORY_C_STANDARD, D>
// We create an array dims from the boost::mpl::vector
typedef typename generate_array_vector<size_type,T>::result dims;
//! buffer to store the dimensions of the full multi_array buffer
std::array<size_type ,size_p::value> dimensions;
// fill runtime, and the other dimensions
dimensions[0] = sz;
for (int i = 0 ; i < size_p::value-1 ; i++)
{dimensions[i+1] = dims::data[i];}
boost::multi_array_ref_openfpm<base,size_p::value> tmp(static_cast<base *>(mem->getPointer()),dimensions,boost::general_storage_order<size_p::value>(boost::ofp_storage_order()));
openfpm::multi_array_ref_openfpm<base,size_p::value,Tv> tmp(static_cast<base *>(mem->getPointer()),
sz,
openfpm::general_storage_order<size_p::value>(openfpm::ofp_storage_order()));
//! we create the representation for the memory buffer
mem_r.swap(tmp);
......@@ -532,13 +532,12 @@ class memory_c<multi_array<T>, MEMORY_C_STANDARD, D>
D * mem;
//! object that represent the memory as a multi-dimensional array of objects T
boost::multi_array_ref_openfpm<base,boost::mpl::size<T>::value> mem_r;
openfpm::multi_array_ref_openfpm<base,boost::mpl::size<T>::value,Tv> mem_r;
//! constructor
memory_c()
:mem(NULL),
mem_r(static_cast<base *>(NULL),zero_dims<size_type,size_p::value>(),
boost::ofp_storage_order())
mem_r(static_cast<base *>(NULL),0,openfpm::ofp_storage_order())
{}
//! destructor
......
......@@ -268,9 +268,7 @@ class const_multi_array_ref_openfpm : public detail::multi_array::multi_array_im
std::copy(extents.begin(),extents.end(),extent_list_.begin());
this->compute_strides(stride_list_,extent_list_,storage_);
origin_offset_ =
this->calculate_origin_offset(stride_list_,extent_list_,
storage_,index_base_list_);
origin_offset_ = this->calculate_origin_offset(stride_list_,extent_list_, storage_,index_base_list_);
}
size_type num_dimensions() const { return NumDims; }
......@@ -688,9 +686,6 @@ public:
__device__ __host__ reference operator[](index idx) {
printf("Strides ORIG: %d %d %d \n",this->strides()[0],this->strides()[1], this->num_dimensions());
return super_type::access(boost::type<reference>(),
idx,origin(),
this->shape(),this->strides(),
......
......@@ -162,7 +162,7 @@ public:
__host__ __device__ const index* strides() const { return strides_; }
__host__ __device__ const index* index_bases() const { return index_base_; }
size_type num_elements() const {
size_type num_elements() const {
return std::accumulate(shape(),shape() + num_dimensions(),
size_type(1), std::multiplies<size_type>());
}
......@@ -193,30 +193,6 @@ private:
const_sub_array_openfpm& operator=(const const_sub_array_openfpm&);
};
template<unsigned int NumDims>
struct print_debug
{
template<typename T, typename T2> static void print(const T & obj1, const T2 & obj2)
{
printf("HELLO 1 \n");
}
};
template<>
struct print_debug<2>
{
template<typename T, typename T2> static void print(T && obj1,const T2 & obj2)
{
float * ptr = obj1.origin();
printf("ORIGIN DST: %p \n",ptr);
const float * ptr2 = obj2.origin();
printf("ORIGIN SRC: %p \n",ptr2);
obj1 = obj2;
}
};
//
// sub_array
// multi_array's proxy class to allow multiple overloads of
......@@ -264,8 +240,6 @@ public:
#endif
// iterator-based copy
// std::copy(other.begin(),other.end(),begin());
printf("CHECK: %p \n",other.origin());
printf("CHECK SIZE: %p \n",other.size());
this->operator[](0) = other[0];
this->operator[](1) = other[1];
......@@ -277,24 +251,6 @@ public:
return *this;
}
__device__ __host__ sub_array_openfpm& copy_secondary(const sub_array_openfpm& other) {
if (&other != this) {
#ifdef SE_CLASS1
// make sure the dimensions agree
BOOST_ASSERT(other.num_dimensions() == this->num_dimensions());
// BOOST_ASSERT(std::equal(other.shape(),
// other.shape()+this->num_dimensions(),
// this->shape()));
#endif
// iterator-based copy
//std::copy(other.begin(),other.end(),begin());
for (int i = 0 ; i < (int)other.size() ; i++)
{this->operator[](i) = other[i];}
}
return *this;
}
__device__ __host__ sub_array_openfpm& operator=(const sub_array_openfpm& other) {
if (&other != this) {
......@@ -308,26 +264,8 @@ public:
// iterator-based copy
//std::copy(other.begin(),other.end(),begin());
if (this->num_dimensions() < 2)
{
for (int i = 0 ; i < (int)other.size() ; i++)
{this->operator[](i) = other[i];}
}
else
{
const T * test1 = other.origin();
const T * test2 = this->origin();
printf("ORIGIN: %p DESTINATION: %p \n",test1,test2);
printf("S0: %d S1: %d \n",other.strides()[0],other.strides()[1]);
printf("S_0: %d S_1: %d \n",this->strides()[0],this->strides()[1]);
printf("S0_p: %p S1_p: %p \n",&other.strides()[0],&other.strides()[1]);
printf("dims %d \n",other.num_dimensions());
auto a = other[0];
print_debug<NumDims>::print(this->operator[](1),other[1]);
}
}
return *this;
}
......
......@@ -53,22 +53,23 @@ public:
// template typedefs
template <std::size_t NDims>
struct const_array_view_openfpm {
struct const_array_view_openfpm
{
typedef boost::detail::multi_array::const_multi_array_view_openfpm<T,NDims> type;
};
template <std::size_t NDims>
struct array_view_openfpm {
struct array_view_openfpm
{
typedef boost::detail::multi_array::multi_array_view_openfpm<T,NDims> type;
};
template <typename OPtr>
const_multi_array_view_openfpm(const
const_multi_array_view_openfpm<T,NumDims,OPtr>& other) :
base_(other.base_), origin_offset_(other.origin_offset_),
num_elements_(other.num_elements_), extent_list_(other.extent_list_),
stride_list_(other.stride_list_), index_base_list_(other.index_base_list_)
{ }
const_multi_array_view_openfpm(const const_multi_array_view_openfpm<T,NumDims,OPtr>& other)
:base_(other.base_), origin_offset_(other.origin_offset_),
num_elements_(other.num_elements_), extent_list_(other.extent_list_),
stride_list_(other.stride_list_), index_base_list_(other.index_base_list_)
{}
template <class BaseList>
......
......@@ -3,6 +3,7 @@
#include "copy_general.hpp"
#include "util/cuda_util.hpp"
#include "util/multi_array_openfpm/multi_array_ref_openfpm.hpp"
/*! \brief This class copy general objects
*
......
......@@ -39,6 +39,8 @@ void test_compress()
auto ite = cl_n.getGPUIterator();
ite.thr.x /= 4;
cl_n.template hostToDevice<0>();
compress4<cnt_type,ids_type><<<ite.wthr,ite.thr>>>(cl_n.size(),
static_cast<cnt_type *>(cl_n.template getDeviceBuffer<0>()),
static_cast<ids_type *>(compressed.template getDeviceBuffer<0>()));
......@@ -79,6 +81,8 @@ void test_breduce()
red.resize(nblocks);
cl_n.template hostToDevice<0>();
breduce<THREADS/32,cnt_type,ids_type,ratio_reduction<cnt_type,ids_type>><<<nblocks,THREADS>>>(cl_n.size()/ratio,
static_cast<cnt_type *>(cl_n.template getDeviceBuffer<0>()),
static_cast<cnt_type *>(red.template getDeviceBuffer<0>()));
......@@ -107,6 +111,8 @@ void test_bexscan()
int nblocks = base.size();
base.template hostToDevice<0>();
bexscan<THREADS,cnt_type><<<1,THREADS,nblocks*sizeof(unsigned int)>>>(nblocks,
static_cast<cnt_type *>(base.template getDeviceBuffer<0>()));
......@@ -143,6 +149,9 @@ void test_gexscan()
int nblocks = cl_n.size() / 16;
cl_n.template hostToDevice<0>();
base.template hostToDevice<0>();
gexscan<THREADS/32,ratio_extend<unsigned int,unsigned char>> <<< cl_n.size() / 16 / THREADS, THREADS >>>(nblocks,
static_cast<ratio_extend<unsigned int,unsigned char>::cnt_type4 *>(cl_n.template getDeviceBuffer<0>()),
static_cast<cnt_type *>(base.template getDeviceBuffer<0>()),
......
......@@ -32,6 +32,8 @@
#include <boost/type_traits/is_same.hpp>
#include <boost/utility/value_init.hpp>
#pragma hd_warning_disable
namespace boost { namespace mpl {
namespace aux {
......@@ -117,4 +119,6 @@ void for_each_ref(F & f, Sequence* = 0)
}}
#pragma hd_warning_enable
#endif // OPENFPM_FOR_EACH_HPP_INCLUDED
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment