Commit e7f888c1 authored by incardon's avatar incardon

Fixing general interface

parent e7010bdc
Pipeline #461 failed with stages
in 1 second
......@@ -501,7 +501,7 @@ public:
* \return key of the grid that id identify
*
*/
__device__ inline grid_key_dx<N> InvLinId(mem_id id) const
__device__ __host__ inline grid_key_dx<N> InvLinId(mem_id id) const
{
// Inversion of linearize
......
......@@ -798,7 +798,7 @@ __global__ void calc_force_number(vector_pos pos, vector_ns s_t_ns, CellList_typ
while (it.isNext())
{
auto q = it.get();
auto q = it.get_sort();
int s1 = s_t_ns.template get<0>(q);
......@@ -823,7 +823,7 @@ __global__ void calc_force_list(vector_pos pos, vector_ns s_t_ns, CellList_type
while (it.isNext())
{
auto q = it.get();
auto q = it.get_sort();
int s1 = s_t_ns.template get<0>(q);
......@@ -945,7 +945,8 @@ template<unsigned int dim, typename T, typename CellS> void Test_cell_gpu_force(
openfpm::vector<aggregate<unsigned int>,CudaMemory,typename memory_traits_inte<aggregate<unsigned int>>::type,memory_traits_inte> n_out_scan;
openfpm::vector<aggregate<unsigned int>,CudaMemory,typename memory_traits_inte<aggregate<unsigned int>>::type,memory_traits_inte> nn_list;
scan<unsigned int,unsigned char>(n_out,n_out_scan);
scan<unsigned int,unsigned char> sc;
sc.scan_(n_out,n_out_scan);
n_out_scan.template deviceToHost<0>();
if (n_out_scan.template get<0>(pl.size()) == 0)
......@@ -1126,7 +1127,8 @@ BOOST_AUTO_TEST_CASE( CellList_use_cpu_offload_test )
openfpm::vector_gpu<aggregate<int>> os_scan;
os_scan.resize(v.size());
scan<int,int>(os,os_scan);
scan<int,int>sc;
sc.scan_(os,os_scan);
os_scan.deviceToHost<0>();
os.deviceToHost<0>(os.size()-1,os.size()-1);
......
......@@ -52,6 +52,9 @@ class CellList_gpu : public CellDecomposer_sm<dim,T,transform>
//! \brief cell padding
openfpm::array<ids_type,dim,cnt_type> off;
//! scan object
scan<cnt_type,ids_type> sc;
//! Additional information in general (used to understand if the cell-list)
//! has been constructed from an old decomposition
size_t n_dec;
......@@ -191,8 +194,7 @@ public:
static_cast<ids_type *>(part_ids.template getDeviceBuffer<0>()));
// now we scan
scan<cnt_type,ids_type>(cl_n,starts);
sc.scan_(cl_n,starts);
// now we construct the cells
......
......@@ -83,12 +83,12 @@ public:
SelectValid();
}
__device__ cnt_type get()
__device__ cnt_type get_sort()
{
return p_id;
}
__device__ cnt_type get_orig()
__device__ cnt_type get()
{
return srt.template get<0>(p_id);
}
......
......@@ -1104,6 +1104,115 @@ public:
}
};
/*! \brief Implementation of 1-D std::vector like structure
*
* this implementation is just a wrapper for the std::vector. It work a little different from vector.
* In general for a normal vector of objects A vector<A> if you resize to zero, the destructor of
* the object A is called.This vector differ in this behaviour. the destructor is not called. This give the possibility
* to have a set of fully retained objects. This class is just a simple wrapper for the normal openfpm::vector where
* size and resize are redefined to change the behaviour. A destructive resize is callable with resize_base(), and the internal
* size of the base vactor can be querried with size_base()
*
* \param T base type
*
*/
template<typename T>
class vector_fr
:private vector<T,HeapMemory,typename memory_traits_lin<T>::type,memory_traits_lin,grow_policy_double,STD_VECTOR>
{
typedef vector<T,HeapMemory,typename memory_traits_lin<T>::type,memory_traits_lin,grow_policy_double,STD_VECTOR> base_type;
//! size of the vector
size_t v_size = 0;
public:
/*! \brief return the size of the vector
*
* \return the size
*
*/
size_t size()
{
return v_size;
}
/*! \brief return the base size of the vector
*
* \return the size
*
*/
size_t size_base()
{
return base_type::size();
}
/*! \brief resize the vector retaining the objects
*
* \param new size of the vector
*
*/
void resize(size_t sz)
{
if (sz <= base_type::size())
{
v_size = sz;
return;
}
base_type::resize(sz);
v_size = sz;
}
/*! \brief resize the base vector (this kill the objects)
*
* \param new size of the vector
*
*/
size_t resize_base(size_t sz)
{
base_type::resize(sz);
v_size = sz;
}
/*! \brief Get an element of the vector
*
* \param id element to get
*
* \return the element reference
*
*/
inline T & get(size_t id)
{
return base_type::get(id);
}
/*! \brief Get an element of the vector
*
* \param id element to get
*
* \return the element reference
*
*/
inline T & last()
{
return base_type::get(size()-1);
}
/*! swap the content of the vector
*
* \param v vector to be swapped with
*
*/
void swap(openfpm::vector_fr<T> & v)
{
size_t v_size_tmp = v.v_size;
v.v_size = v_size;
v_size = v_size_tmp;
base_type::swap(v);
}
};
#endif /* MAP_VECTOR_STD_HPP_ */
......@@ -464,49 +464,68 @@ __global__ void gexscan(int n,
{vout[i] = val[i];}
}
/*! \brief Scan is a class because it use internally temporary buffers that are heavy to reconstruct
*
*
*
*/
template<typename cnt_type, typename ids_type>
void scan(openfpm::vector<aggregate<cnt_type>,CudaMemory,typename memory_traits_inte<aggregate<cnt_type>>::type,memory_traits_inte> & cl_n,
openfpm::vector<aggregate<cnt_type>,CudaMemory,typename memory_traits_inte<aggregate<cnt_type>>::type,memory_traits_inte> & cl_n_scan)
class scan
{
constexpr int THREADS = 128;
constexpr int ratio = 4*sizeof(cnt_type)/sizeof(ids_type);
openfpm::vector<aggregate<cnt_type>,CudaMemory,typename memory_traits_inte<aggregate<cnt_type>>::type,memory_traits_inte> red;
openfpm::vector<aggregate<ids_type>,CudaMemory,typename memory_traits_inte<aggregate<ids_type>>::type,memory_traits_inte> compressed;
int nblocks = (cl_n.size() + THREADS * ratio - 1 ) / (THREADS * ratio);
red.resize(nblocks);
public:
auto ite = cl_n.getGPUIterator();
void scan_(openfpm::vector<aggregate<cnt_type>,CudaMemory,typename memory_traits_inte<aggregate<cnt_type>>::type,memory_traits_inte> & cl_n,
openfpm::vector<aggregate<cnt_type>,CudaMemory,typename memory_traits_inte<aggregate<cnt_type>>::type,memory_traits_inte> & cl_n_scan)
{
constexpr int THREADS = 128;
constexpr int ratio = 4*sizeof(cnt_type)/sizeof(ids_type);
compressed.resize(cl_n.size());
compress4<cnt_type,ids_type><<<ite.wthr,ite.thr>>>((cnt_type)cl_n.size(),
static_cast<cnt_type *>(cl_n.template getDeviceBuffer<0>()),
static_cast<ids_type *>(compressed.template getDeviceBuffer<0>()));
int nblocks = (cl_n.size() + THREADS * ratio - 1 ) / (THREADS * ratio);
red.resize(nblocks);
breduce<THREADS/32,cnt_type,ids_type,ratio_reduction<cnt_type,ids_type>><<<nblocks, THREADS >>>(cl_n.size() / ratio * 4,
(cnt_type *)compressed.template getDeviceBuffer<0>(),
static_cast<cnt_type *>(red.template getDeviceBuffer<0>()));
auto ite = cl_n.getGPUIterator();
compressed.resize(cl_n.size());
compress4<cnt_type,ids_type><<<ite.wthr,ite.thr>>>((cnt_type)cl_n.size(),
static_cast<cnt_type *>(cl_n.template getDeviceBuffer<0>()),
static_cast<ids_type *>(compressed.template getDeviceBuffer<0>()));
bexscan<THREADS,cnt_type><<<1, THREADS, nblocks*sizeof(uint)>>>(nblocks,
static_cast<cnt_type *>(red.template getDeviceBuffer<0>()));
breduce<THREADS/32,cnt_type,ids_type,ratio_reduction<cnt_type,ids_type>><<<nblocks, THREADS >>>(cl_n.size() / ratio * 4,
(cnt_type *)compressed.template getDeviceBuffer<0>(),
static_cast<cnt_type *>(red.template getDeviceBuffer<0>()));
size_t raw_size = cl_n.size();
// resize to be multiple of 16
bexscan<THREADS,cnt_type><<<1, THREADS, nblocks*sizeof(uint)>>>(nblocks,
static_cast<cnt_type *>(red.template getDeviceBuffer<0>()));
size_t ml = ((raw_size + ratio - 1) / ratio) *ratio;
cl_n_scan.resize(ml);
size_t raw_size = cl_n.size();
gexscan<THREADS/32,ratio_extend<cnt_type,ids_type>> <<< nblocks, THREADS >>>((cl_n.size() + ratio - 1 ) / ratio,
static_cast<typename ratio_extend<cnt_type,ids_type>::cnt_type4 *>(compressed.template getDeviceBuffer<0>()),
static_cast<cnt_type *>(red.template getDeviceBuffer<0>()),
static_cast<typename ratio_extend<cnt_type,ids_type>::cnt_type4 *>(cl_n_scan.template getDeviceBuffer<0>()));
// resize to be multiple of 16
cl_n_scan.resize(raw_size);
}
size_t ml = ((raw_size + ratio - 1) / ratio) *ratio;
cl_n_scan.resize(ml);
gexscan<THREADS/32,ratio_extend<cnt_type,ids_type>> <<< nblocks, THREADS >>>((cl_n.size() + ratio - 1 ) / ratio,
static_cast<typename ratio_extend<cnt_type,ids_type>::cnt_type4 *>(compressed.template getDeviceBuffer<0>()),
static_cast<cnt_type *>(red.template getDeviceBuffer<0>()),
static_cast<typename ratio_extend<cnt_type,ids_type>::cnt_type4 *>(cl_n_scan.template getDeviceBuffer<0>()));
cl_n_scan.resize(raw_size);
}
};
#else
// In case we do not have NVCC we create a stub
template<typename cnt_type, typename ids_type>
class scan
{
};
#endif
......
......@@ -313,7 +313,8 @@ void test_scan(size_t num)
cl_n.template hostToDevice<0>();
scan<cnt_type,ids_type>(cl_n,cl_n_scan);
scan<cnt_type,ids_type>sc;
sc.scan_(cl_n,cl_n_scan);
cl_n_scan.template deviceToHost<0>();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment