Commit bd89b6d0 authored by incardon's avatar incardon
Browse files

Adding CellList_cpu_ker

parent e9617064
......@@ -166,7 +166,7 @@ struct mem_geto
static inline encapc<dim,T,typename layout::type> get_lin(data_type & data_, const size_t & v1)
{
return encapc<dim,T,typename layout::type>(data_.mem_r->operator[](v1));
return encapc<dim,T,typename layout::type>(data_.mem_r.operator[](v1));
}
};
......
......@@ -11,6 +11,24 @@
#include "grid_base_impl_layout.hpp"
#include "util/cuda_util.hpp"
template<bool np,typename T>
struct skip_init
{
static bool skip_()
{
return true;
}
};
template<typename T>
struct skip_init<true,T>
{
static bool skip_()
{
return T::noPointers();
}
};
#ifdef CUDA_GPU
#ifndef __NVCC__
......@@ -521,7 +539,9 @@ public:
//! Allocate the memory and create the reppresentation
// if (g1.size() != 0) data_.allocate(g1.size());
mem_setmemory<decltype(data_),S,layout_base<T>>::template setMemory<p>(data_,m,g1.size(),T::noPointers);
bool skip_ini = skip_init<has_noPointers<T>::value,T>::skip_();
mem_setmemory<decltype(data_),S,layout_base<T>>::template setMemory<p>(data_,m,g1.size(),skip_ini);
is_mem_init = true;
}
......@@ -740,11 +760,17 @@ public:
* \param fl byte pattern to fill
*
*/
template<int prp>
void fill(unsigned char fl)
{
#ifdef SE_CLASS2
check_valid(this,8);
#endif
if (prp != 0 || is_layout_mlin<layout_base<T>>::type::value == false)
{
std::cout << "Error: " << __FILE__ << ":" << __LINE__ << " unsupported fill operation " << std::endl;
}
memset(getPointer(),fl,size() * sizeof(T));
}
......
......@@ -86,6 +86,9 @@ template<unsigned int dim,typename stencil=no_stencil,typename warn=print_warnin
template<unsigned int N, typename T>
class grid_sm
{
//! Box enclosing the grid
Box<N,size_t> box;
//! total number of the elements in the grid
size_t size_tot;
......@@ -110,11 +113,19 @@ class grid_sm
sz_s[0] = sz;
this->sz[0] = sz;
// set the box
box.setHigh(0,sz);
box.setLow(0,0);
for (size_t i = 1 ; i < N ; i++)
{
/* coverity[dead_error_begin] */
sz_s[i] = sz*sz_s[i-1];
this->sz[i] = sz;
// set the box
box.setHigh(i,sz);
box.setLow(i,0);
}
}
......@@ -133,11 +144,19 @@ class grid_sm
sz_s[0] = sz[0];
this->sz[0] = sz[0];
// set the box
box.setHigh(0,sz[0]);
box.setLow(0,0);
for (size_t i = 1 ; i < N ; i++)
{
/* coverity[dead_error_begin] */
sz_s[i] = sz[i]*sz_s[i-1];
this->sz[i] = sz[i];
// set the box
box.setHigh(i,sz[i]);
box.setLow(i,0);
}
}
......@@ -153,10 +172,18 @@ class grid_sm
sz_s[0] = 0;
this->sz[0] = 0;
// set the box
box.setHigh(0,0);
box.setLow(0,0);
for (size_t i = 1 ; i < N ; i++)
{
/* coverity[dead_error_begin] */
sz_s[i] = sz[i]*sz_s[i-1];
// set the box
box.setHigh(i,sz[i]);
box.setLow(i,0);
}
}
......@@ -169,20 +196,6 @@ public:
*/
inline Box<N,size_t> getBox() const
{
//! Box enclosing the grid
Box<N,size_t> box;
// set the box
box.setHigh(0,sz[0]);
box.setLow(0,0);
for (size_t i = 1 ; i < N ; i++)
{
// set the box
box.setHigh(i,sz[i]);
box.setLow(i,0);
}
return box;
}
......@@ -578,6 +591,8 @@ public:
sz_s[i] = g.sz_s[i];
}
box = g.box;
return *this;
}
......
......@@ -57,6 +57,7 @@
#include "Packer_Unpacker/Packer_util.hpp"
#include "Packer_Unpacker/has_pack_agg.hpp"
#include "grid_base_implementation.hpp"
#include "map_grid_cuda_ker.cuh"
#ifndef CUDA_GPU
typedef HeapMemory CudaMemory;
......@@ -189,55 +190,71 @@ public:
{
return false;
}
};
/*! \brief this class is a functor for "for_each" algorithm
*
* This class is a functor for "for_each" algorithm. For each
* element of the boost::vector the operator() is called.
* Is mainly used to copy one encap into another encap object
*
* \tparam encap source
* \tparam encap dst
*
*/
#ifdef CUDA_GPU
template<typename T_type>
struct copy_switch_memory_c_no_cpy
{
//! encapsulated source object
const typename memory_traits_inte<T_type>::type & src;
//! encapsulated destination object
typename memory_traits_inte<T_type>::type & dst;
/*! \brief Copy the memory from host to device
*
* \tparam (all properties are copied to prp is useless in this case)
*
*/
template<unsigned int ... prp> void hostToDevice()
{
this->mem->getDevicePointer();
}
/*! \brief It return the properties arrays.
*
* In case of Cuda memory it return the device pointers to pass to the kernels
*
*
*/
template<unsigned int id> void * getDeviceBufferCopy()
{
return this->mem->getDevicePointer();
}
/*! \brief constructor
/*! \brief It return the properties arrays.
*
* \param src source encapsulated object
* \param dst source encapsulated object
* In case of Cuda memory it return the device pointers to pass to the kernels
*
* This variant does not copy the host memory to the device memory
*
*/
inline copy_switch_memory_c_no_cpy(const typename memory_traits_inte<T_type>::type & src,
typename memory_traits_inte<T_type>::type & dst)
:src(src),dst(dst)
template<unsigned int id> void * getDeviceBuffer()
{
};
return this->mem->getDevicePointerNoCopy();
}
/*! \brief Synchronize the memory buffer in the device with the memory in the host
*
*
*/
template<unsigned int ... prp> void deviceToHost()
{
this->mem->deviceToHost();
}
//! It call the copy function for each property
template<typename T>
inline void operator()(T& t) const
/*! \brief Convert the grid into a data-structure compatible for computing into GPU
*
* The object created can be considered like a reference of the original
*
*/
grid_gpu_ker<dim,T> toKernel()
{
boost::fusion::at_c<T::value>(dst).mem = boost::fusion::at_c<T::value>(src).mem;
// Increment the reference of mem
boost::fusion::at_c<T::value>(dst).mem->incRef();
boost::fusion::at_c<T::value>(dst).mem_r.bind_ref(boost::fusion::at_c<T::value>(src).mem_r);
boost::fusion::at_c<T::value>(dst).switchToDevicePtrNoCopy();
grid_gpu_ker<dim,T> g(this->g1);
// copy_switch_memory_c_no_cpy<T,memory_traits_lin> cp_mc(this->data_,g.data_);
// boost::mpl::for_each_ref< boost::mpl::range_c<int,0,1> >(cp_mc);
return g;
}
#endif
};
/*! \brief this class is a functor for "for_each" algorithm
*
* This class is a functor for "for_each" algorithm. For each
......@@ -356,10 +373,6 @@ struct device_to_host_impl
};
#include "map_grid_cuda_ker.cuh"
struct dim3_
{
//! size in x dimension
......
......@@ -9,6 +9,51 @@
#define MAP_GRID_CUDA_KER_HPP_
/*! \brief this class is a functor for "for_each" algorithm
*
* This class is a functor for "for_each" algorithm. For each
* element of the boost::vector the operator() is called.
* Is mainly used to copy one encap into another encap object
*
* \tparam encap source
* \tparam encap dst
*
*/
template<typename T_type>
struct copy_switch_memory_c_no_cpy
{
//! encapsulated source object
const typename memory_traits_inte<T_type>::type & src;
//! encapsulated destination object
typename memory_traits_inte<T_type>::type & dst;
/*! \brief constructor
*
* \param src source encapsulated object
* \param dst source encapsulated object
*
*/
inline copy_switch_memory_c_no_cpy(const typename memory_traits_inte<T_type>::type & src,
typename memory_traits_inte<T_type>::type & dst)
:src(src),dst(dst)
{
};
//! It call the copy function for each property
template<typename T>
inline void operator()(T& t) const
{
boost::fusion::at_c<T::value>(dst).mem = boost::fusion::at_c<T::value>(src).mem;
// Increment the reference of mem
boost::fusion::at_c<T::value>(dst).mem->incRef();
boost::fusion::at_c<T::value>(dst).mem_r.bind_ref(boost::fusion::at_c<T::value>(src).mem_r);
boost::fusion::at_c<T::value>(dst).switchToDevicePtrNoCopy();
}
};
/*! \brief grid interface available when on gpu
*
* \tparam n_buf number of template buffers
......
......@@ -26,6 +26,7 @@
#include "NN/Mem_type/MemBalanced.hpp"
#include "NN/Mem_type/MemMemoryWise.hpp"
#include "NN/CellList/NNc_array.hpp"
#include "cuda/CellList_cpu_ker.cuh"
//! Wrapper of the unordered map
template<typename key,typename val>
......@@ -1059,31 +1060,43 @@ public:
this->g_m = g_m;
}
#ifdef CUDA_GPU
CellList_cpu_ker<dim,T,typename Mem_type::toKernel_type,transform> toKernel()
{
CellList_cpu_ker<dim,T,typename Mem_type::toKernel_type,transform> cl(Mem_type::toKernel());
return cl;
}
#endif
/////////////////////////////////////
/////////////////////////////////////
/////////////////////////////////////
/*! \brief Set the n_dec number
*
* \param n_dec
*
*/
void set_ndec(size_t n_dec)
{
this->n_dec = n_dec;
}
/*! \brief Set the n_dec number
*
* \param n_dec
*
*/
void set_ndec(size_t n_dec)
{
this->n_dec = n_dec;
}
/*! \brief Set the n_dec number
*
* \return n_dec
*
*/
size_t get_ndec() const
{
return n_dec;
}
/*! \brief Set the n_dec number
*
* \return n_dec
*
*/
size_t get_ndec() const
{
return n_dec;
}
/////////////////////////////////////
/////////////////////////////////////
};
/*! \brief Calculate parameters for the cell list
......
......@@ -8,8 +8,8 @@
#ifndef OPENFPM_DATA_SRC_NN_CELLLIST_CELLLISTM_HPP_
#define OPENFPM_DATA_SRC_NN_CELLLIST_CELLLISTM_HPP_
#include "CellNNIteratorM.hpp"
#include "CellList.hpp"
#include "CellNNIteratorM.hpp"
struct PV_cl
{
......
......@@ -293,7 +293,7 @@ void test_sub_index2()
template<unsigned int dim, typename T>
void create_n_part(int n_part,
openfpm::vector<Point<dim,T>,CudaMemory,typename memory_traits_inte<Point<dim,T>>::type,memory_traits_inte> & pl,
CellList<dim,T, Mem_fast> & cl)
CellList<dim,T, Mem_fast<>> & cl)
{
pl.resize(n_part);
......@@ -320,7 +320,7 @@ void create_n_part(int n_part,
}
template<unsigned int dim, typename T, typename cnt_type, typename ids_type>
void create_starts_and_parts_ids(CellList<dim,T, Mem_fast> & cl,
void create_starts_and_parts_ids(CellList<dim,T, Mem_fast<>> & cl,
grid_sm<dim,void> & gr,
size_t n_part,
size_t n_cell,
......@@ -394,7 +394,7 @@ void test_fill_cell()
off[i] = 0;
}
CellList<dim,T, Mem_fast> cl(domain,div_host,0);
CellList<dim,T, Mem_fast<>> cl(domain,div_host,0);
openfpm::vector<Point<dim,T>,CudaMemory,typename memory_traits_inte<Point<dim,T>>::type,memory_traits_inte> pl;
create_n_part(5000,pl,cl);
......@@ -520,7 +520,7 @@ void test_reorder_parts(size_t n_part)
domain.setHigh(i,1.0);
}
CellList<dim,T, Mem_fast> cl(domain,div_host,0);
CellList<dim,T, Mem_fast<>> cl(domain,div_host,0);
openfpm::vector<Point<dim,T>,CudaMemory,typename memory_traits_inte<Point<dim,T>>::type,memory_traits_inte> pl;
openfpm::vector<Point<dim,T>,CudaMemory,typename memory_traits_inte<Point<dim,T>>::type,memory_traits_inte> pl_out;
......@@ -870,7 +870,7 @@ template<unsigned int dim, typename T, typename CellS> void Test_cell_gpu_force(
// Construct an equivalent CPU cell-list
CellList<dim,T,Mem_fast,shift<dim,T>> cl_cpu(box,div);
CellList<dim,T,Mem_fast<>,shift<dim,T>> cl_cpu(box,div);
// construct
......@@ -1038,5 +1038,49 @@ BOOST_AUTO_TEST_CASE( CellList_gpu_use_calc_force)
// Test the cell list
}
__global__ void cl_offload_gpu()
{
}
BOOST_AUTO_TEST_CASE( CellList_use_cpu_offload_test )
{
std::cout << "Test cell list offload gpu" << "\n";
// Subdivisions
size_t div[3] = {10,10,10};
// grid info
grid_sm<3,void> g_info(div);
Box<3,float> box({-1.0,-1.0,-1.0},{1.0,1.0,1.0});
// CellS = CellListM<dim,T,8>
CellList<3,float,Mem_fast<CudaMemory,int>> cl1(box,div);
openfpm::vector<Point<3,float>> v;
v.resize(10000);
for (size_t i = 0 ; i < v.size() ; i++)
{
v.template get<0>(i)[0] = 2.0 * (float)rand() / RAND_MAX - 1.0;
v.template get<0>(i)[1] = 2.0 * (float)rand() / RAND_MAX - 1.0;
v.template get<0>(i)[2] = 2.0 * (float)rand() / RAND_MAX - 1.0;
Point<3,float> xp = v.template get<0>(i);
size_t cl = cl1.getCell(xp);
cl1.add(cl,i);
}
auto test = cl1.toKernel();
std::cout << "End cell list offload gpu" << "\n";
// Test the cell list
}
BOOST_AUTO_TEST_SUITE_END()
/*
* CellList_gpu_ker.cuh
*
* Created on: Jul 30, 2018
* Author: i-bird
*/
#ifndef CELLLIST_CPU_KER_CUH_
#define CELLLIST_CPU_KER_CUH_
template<unsigned int dim, typename T, typename Mem_type, typename transform>
class CellList_cpu_ker: Mem_type
{
public:
CellList_cpu_ker(const Mem_type & mt)
:Mem_type(mt)
{}
};
#endif /* CELLLIST_GPU_KER_CUH_ */
......@@ -52,6 +52,8 @@ class Mem_bal
public:
typedef void toKernel_type;
//! expose the type of the local index
typedef local_index loc_index;
......
......@@ -8,6 +8,7 @@
#ifndef MEMFAST_HPP_
#define MEMFAST_HPP_
#include "config.h"
#include "Space/SpaceBox.hpp"
#include "util/mathutil.hpp"
#include "Space/Shape/HyperCube.hpp"
......@@ -16,6 +17,29 @@
#include "util/common.hpp"
#include "Vector/map_vector.hpp"
template <typename Memory, typename local_index>
class Mem_fast_ker
{
//! Number of slot for each cell
local_index slot;
//! number of particle in each cell list
openfpm::vector_gpu_ker<aggregate<local_index>> cl_n;
//! base that store the data
typedef openfpm::vector_gpu_ker<aggregate<local_index>> base;
//! elements that each cell store (each cell can store a number
//! of elements == slot )
base cl_base;
public:
Mem_fast_ker(openfpm::vector_gpu_ker<aggregate<local_index>> cl_n, openfpm::vector_gpu_ker<aggregate<local_index>> cl_base)
:cl_n(cl_n),cl_base(cl_base)
{}
};
/*! \brief It is a class that work like a vector of vector
*
* \tparam local_index type used for the local index
......@@ -26,17 +50,17 @@
* is the maximum number of elements across the vectors
*
*/
template <typename local_index = size_t>
template <typename Memory = HeapMemory, typename local_index = size_t>
class Mem_fast
{
//! Number of slot for each cell
local_index slot;
//! number of particle in each cell list
openfpm::vector<local_index> cl_n;
openfpm::vector<aggregate<local_index>,Memory> cl_n;
//! base that store the data
typedef typename openfpm::vector<local_index> base;
typedef typename openfpm::vector<aggregate<local_index>,Memory> base;
//! elements that each cell store (each cell can store a number
//! of elements == slot )
......@@ -55,8 +79,8 @@ class Mem_fast
// copy cl_base
for (size_t i = 0 ; i < cl_n.size() ; i++)
{
for (size_t j = 0 ; j < cl_n.get(i) ; j++)
cl_base_.get(2*i*slot + j) = cl_base.get(slot * i + j);
for (local_index j = 0 ; j < cl_n.template get<0>(i) ; j++)
{cl_base_.template get<0>(2*i*slot + j) = cl_base.template get<0>(slot * i + j);}
}
// Double the number of slots
......@@ -69,6 +93,8 @@ class Mem_fast