Commit 7ae9fb58 authored by incardon's avatar incardon

Fixing force cell-list on gpu

parent 7eb370cf
......@@ -34,7 +34,7 @@ template<typename e_src, typename e_dst, unsigned int ... prp>
struct copy_cpu_encap_encap_prp
{
//! encapsulated source object
const e_src & src;
e_src & src;
//! encapsulated destination object
e_dst & dst;
......@@ -46,7 +46,7 @@ struct copy_cpu_encap_encap_prp
* \param dst source encapsulated object
*
*/
__device__ __host__ inline copy_cpu_encap_encap_prp(const e_src & src, e_dst & dst)
__device__ __host__ inline copy_cpu_encap_encap_prp(e_src & src, e_dst & dst)
:src(src),dst(dst)
{
#ifdef SE_CLASS1
......
......@@ -218,7 +218,9 @@ struct grid_gpu_ker
template<unsigned int ... prp> __device__ inline void set(const grid_key_dx<dim> & key1,const grid_gpu_ker<dim,T_,layout_base> & g, const grid_key_dx<dim> & key2)
{
copy_cpu_encap_encap_prp<decltype(this->get_o(key1)),decltype(g.get_o(key2)),prp...> ec(this->get_o(key1),g.get_o(key2));
auto edest = this->get_o(key1);
copy_cpu_encap_encap_prp<decltype(g.get_o(key2)),decltype(this->get_o(key1)),prp...> ec(g.get_o(key2),edest);
boost::mpl::for_each_ref<boost::mpl::range_c<int,0,sizeof...(prp)>>(ec);
}
......
......@@ -1054,6 +1054,22 @@ public:
swap(grid);
}
/*! \brief set only some properties
*
* \param key1 destination point
* \param g source
* \param key2 source point
*/
template<unsigned int ... prp>
__device__ inline void set(const grid_key_dx<dim> & key1,const grid_base_impl & g, const grid_key_dx<dim> & key2)
{
auto edest = this->get_o(key1);
copy_cpu_encap_encap_prp<decltype(g.get_o(key2)),decltype(this->get_o(key1)),prp...> ec(g.get_o(key2),edest);
boost::mpl::for_each_ref<boost::mpl::range_c<int,0,sizeof...(prp)>>(ec);
}
/*! \brief set an element of the grid
*
* set an element of the grid
......@@ -1062,7 +1078,6 @@ public:
* \param obj value to set
*
*/
template<typename Memory> inline void set(grid_key_dx<dim> dx, const encapc<1,T,Memory> & obj)
{
#ifdef SE_CLASS2
......
......@@ -332,6 +332,8 @@ BOOST_AUTO_TEST_CASE( grid_safety_check )
grid_cpu<3,Point_test<float>> g(sz);
grid_cpu<3,Point_test<float>> g2(sz);
// try to access uninitialized grid
grid_key_dx<3> keyOut(23,1,1);
grid_key_dx<3> keyGood(15,1,1);
......@@ -447,6 +449,22 @@ BOOST_AUTO_TEST_CASE( grid_safety_check )
#endif
}
BOOST_AUTO_TEST_CASE( grid_set_prp_check )
{
size_t szz[2] = {8,8};
grid_cpu<2, Point_test<float> > c2(szz);
grid_cpu<2, Point_test<float> > c1(szz);
grid_key_dx<2> k1({0,0});
c2.template get<1>(k1) = 5.0;
c1.template set<1>(k1,c2,k1);
BOOST_REQUIRE_EQUAL(c1.template get<1>(k1),5.0);
}
BOOST_AUTO_TEST_CASE( grid_use)
{
/* tensor<int,3,3,3> c;
......
......@@ -589,6 +589,7 @@ void test_reorder_parts(size_t n_part)
bool check = true;
parts_prp_out.template deviceToHost<0>();
sort_to_not_sort.template deviceToHost<0>();
non_sort_to_sort.template deviceToHost<0>();
size_t st = 0;
for (size_t i = 0 ; i < tot ; i++)
......@@ -601,6 +602,7 @@ void test_reorder_parts(size_t n_part)
check &= parts_prp_out.template get<0>(st) == parts_prp.template get<0>(p);
check &= sort_to_not_sort.template get<0>(st) == p;
check &= non_sort_to_sort.template get<0>(p) == st;
st++;
}
......@@ -695,7 +697,9 @@ template<unsigned int dim, typename T, typename CellS> void Test_cell_gpu(SpaceB
pl.template hostToDevice<0>();
pl_prp.template hostToDevice<0,1,2>();
cl2.template construct<decltype(pl),decltype(pl_prp)>(pl,pl_out,pl_prp,pl_prp_out);
// create an mgpu context
mgpu::standard_context_t context(false);
cl2.template construct<decltype(pl),decltype(pl_prp)>(pl,pl_out,pl_prp,pl_prp_out,context);
// Check
......@@ -736,7 +740,7 @@ template<unsigned int dim, typename T, typename CellS> void Test_cell_gpu(SpaceB
// Check the sort to non sort buffer
auto & vsrt = cl2.private_get_sort_to_not_sorted();
auto & vsrt = cl2.getSortToNonSort();
vsrt.template deviceToHost<0>();
BOOST_REQUIRE_EQUAL(vsrt.size(),9);
......@@ -751,6 +755,19 @@ template<unsigned int dim, typename T, typename CellS> void Test_cell_gpu(SpaceB
BOOST_REQUIRE_EQUAL(vsrt.template get<0>(7),6);
BOOST_REQUIRE_EQUAL(vsrt.template get<0>(8),7);
auto & vnsrt = cl2.getNonSortToSort();
BOOST_REQUIRE_EQUAL(vnsrt.size(),9);
BOOST_REQUIRE_EQUAL(vnsrt.template get<0>(8),0);
BOOST_REQUIRE_EQUAL(vnsrt.template get<0>(0),1);
BOOST_REQUIRE_EQUAL(vnsrt.template get<0>(1),2);
BOOST_REQUIRE_EQUAL(vnsrt.template get<0>(2),3);
BOOST_REQUIRE_EQUAL(vnsrt.template get<0>(4),4);
BOOST_REQUIRE_EQUAL(vnsrt.template get<0>(3),5);
BOOST_REQUIRE_EQUAL(vnsrt.template get<0>(5),6);
BOOST_REQUIRE_EQUAL(vnsrt.template get<0>(6),7);
BOOST_REQUIRE_EQUAL(vnsrt.template get<0>(7),8);
}
......@@ -864,7 +881,7 @@ template<unsigned int dim, typename T, typename CellS> void Test_cell_gpu_force(
pl_prp_out.resize(pl.size());
pl_out.resize(pl.size());
pl_out.resize(pl.size()+1);
n_out.resize(pl.size()+1);
n_out.fill<0>(0);
pl_prp.resize(pl.size());
......@@ -892,7 +909,10 @@ template<unsigned int dim, typename T, typename CellS> void Test_cell_gpu_force(
++it2;
}
cl2.template construct<decltype(pl),decltype(pl_prp)>(pl,pl_out,pl_prp,pl_prp_out);
size_t g_m = pl.size() / 2;
mgpu::standard_context_t context(false);
cl2.template construct<decltype(pl),decltype(pl_prp)>(pl,pl_out,pl_prp,pl_prp_out,context,g_m);
auto & s_t_ns = cl2.getSortToNonSort();
pl.template hostToDevice<0>();
......@@ -908,6 +928,21 @@ template<unsigned int dim, typename T, typename CellS> void Test_cell_gpu_force(
cl2.toKernel(),
n_out.toKernel());
// Domain particles
auto & gdsi = cl2.getDomainSortIds();
gdsi.template deviceToHost<0>();
bool match = true;
for (size_t i = 0 ; i < g_m ; i++)
{
unsigned int p = gdsi.template get<0>(i);
match &= (s_t_ns.template get<0>(p) < g_m);
}
BOOST_REQUIRE_EQUAL(match,true);
// Check
n_out.deviceToHost<0>();
......
......@@ -11,6 +11,11 @@
#define CL_SYMMETRIC 1
#define CL_NON_SYMMETRIC 2
#if defined(CUDA_GPU) && defined(__NVCC__)
#include "util/cuda/moderngpu/context.hxx"
#include "util/cuda/moderngpu/kernel_mergesort.hxx"
#endif
/*! \brief Check this is a gpu or cpu type cell-list
*
*/
......@@ -52,6 +57,7 @@ struct populate_cell_list_no_sym_impl
openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base > & v_prp,
openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base > & v_prp_out,
CellList & cli,
mgpu::standard_context_t & context,
size_t g_m)
{
cli.clear();
......@@ -72,12 +78,13 @@ struct populate_cell_list_no_sym_impl<true>
openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base > & v_prp,
openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base > & v_prp_out,
CellList & cli,
mgpu::standard_context_t & context,
size_t g_m)
{
v_prp_out.resize(pos.size());
v_pos_out.resize(pos.size());
cli.template construct<decltype(pos),decltype(v_prp)>(pos,v_pos_out,v_prp,v_prp_out);
cli.template construct<decltype(pos),decltype(v_prp)>(pos,v_pos_out,v_prp,v_prp_out,context,g_m);
}
};
......@@ -133,9 +140,10 @@ void populate_cell_list_no_sym(openfpm::vector<Point<dim,T>,Memory,typename layo
openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base > & v_prp,
openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base > & v_prp_out,
CellList & cli,
mgpu::standard_context_t & mgpu,
size_t g_m)
{
populate_cell_list_no_sym_impl<is_gpu_celllist<CellList>::value>::populate(pos,v_pos_out,v_prp,v_prp_out,cli,g_m);
populate_cell_list_no_sym_impl<is_gpu_celllist<CellList>::value>::populate(pos,v_pos_out,v_prp,v_prp_out,cli,mgpu,g_m);
}
/*! \brief populate the Cell-list with particles symmetric case
......@@ -175,11 +183,12 @@ void populate_cell_list(openfpm::vector<Point<dim,T>,Memory,typename layout_base
openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base > & v_prp,
openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base > & v_prp_out,
CellList & cli,
mgpu::standard_context_t & context,
size_t g_m,
size_t opt)
{
if (opt == CL_NON_SYMMETRIC)
{populate_cell_list_no_sym(pos,v_pos_out,v_prp,v_prp_out,cli,g_m);}
{populate_cell_list_no_sym(pos,v_pos_out,v_prp,v_prp_out,cli,context,g_m);}
else
{populate_cell_list_sym(pos,cli,g_m);}
}
......@@ -201,6 +210,7 @@ void populate_cell_list(openfpm::vector<Point<dim,T>,Memory,typename layout_base
template<unsigned int dim, typename T, typename Memory, template <typename> class layout_base, typename CellList>
void populate_cell_list(openfpm::vector<Point<dim,T>,Memory,typename layout_base<Point<dim,T>>::type,layout_base> & pos,
CellList & cli,
mgpu::standard_context_t & context,
size_t g_m,
size_t opt)
{
......@@ -211,7 +221,7 @@ void populate_cell_list(openfpm::vector<Point<dim,T>,Memory,typename layout_base
openfpm::vector<Point<dim,T>,Memory,typename layout_base<Point<dim,T>>::type,layout_base> stub3;
populate_cell_list(pos,stub3,stub1,stub2,cli,g_m,opt);
populate_cell_list(pos,stub3,stub1,stub2,cli,context,g_m,opt);
}
/*! \brief Structure that contain a reference to a vector of particles
......
......@@ -18,6 +18,7 @@
#include "util/cuda/scan_cuda.cuh"
#include "NN/CellList/cuda/CellList_gpu_ker.cuh"
#include "util/cuda_util.hpp"
#include "NN/CellList/CellList_util.hpp"
constexpr int count = 0;
constexpr int start = 1;
......@@ -43,6 +44,12 @@ class CellList_gpu : public CellDecomposer_sm<dim,T,transform>
//! \brief for each sorted index it show the index in the unordered
vector_cnt_type sorted_to_not_sorted;
//! Sorted domain particles domain or ghost
vector_cnt_type sorted_domain_particles_dg;
//! \brief the index of all the domain particles in the sorted vector
vector_cnt_type sorted_domain_particles_ids;
//! \brief for each non sorted index it show the index in the ordered vector
vector_cnt_type non_sorted_to_sorted;
......@@ -164,11 +171,16 @@ public:
return sorted_to_not_sorted;
}
vector_cnt_type & getNonSortedToSorted()
vector_cnt_type & getNonSortToSort()
{
return non_sorted_to_sorted;
}
vector_cnt_type & getDomainSortIds()
{
return sorted_domain_particles_ids;
}
/*! \brief construct from a list of particles
*
* \warning pl is assumed to be already be in device memory
......@@ -176,7 +188,7 @@ public:
* \param pl Particles list
*
*/
template<typename vector, typename vector_prp> void construct(vector & pl, vector & pl_out, vector_prp & pl_prp, vector_prp & pl_prp_out)
template<typename vector, typename vector_prp> void construct(vector & pl, vector & pl_out, vector_prp & pl_prp, vector_prp & pl_prp_out, mgpu::standard_context_t & mgpuContext, size_t g_m = 0)
{
#ifdef __NVCC__
......@@ -220,6 +232,10 @@ public:
sorted_to_not_sorted.resize(pl.size());
non_sorted_to_sorted.resize(pl.size());
sorted_domain_particles_ids.resize(pl.size());
sorted_domain_particles_dg.resize(pl.size());
auto ite = pl.getGPUIterator();
// Here we test fill cell
......@@ -236,6 +252,15 @@ public:
static_cast<cnt_type *>(cells.template getDeviceBuffer<0>()));
ite = sorted_domain_particles_ids.getGPUIterator();
mark_domain_particles<<<ite.wthr,ite.thr>>>(sorted_to_not_sorted.toKernel(),sorted_domain_particles_ids.toKernel(),sorted_domain_particles_dg.toKernel(),g_m);
// now we sort the particles
mergesort((int *)sorted_domain_particles_dg.template getDeviceBuffer<0>(),(int *)sorted_domain_particles_ids.template getDeviceBuffer<0>(),
sorted_domain_particles_dg.size(), mgpu::template less_t<int>(), mgpuContext);
#else
std::cout << "Error: " << __FILE__ << ":" << __LINE__ << " you are calling CellList_gpu.construct() this function is suppose must be compiled with NVCC compiler, but it look like has been compiled by the standard system compiler" << std::endl;
......@@ -249,18 +274,13 @@ public:
return CellList_gpu_ker<dim,T,cnt_type,ids_type,transform>
(starts.toKernel(),
sorted_to_not_sorted.toKernel(),
sorted_domain_particles_ids.toKernel(),
spacing_c,
div_c,
off,
this->getTransform());
}
vector_cnt_type & private_get_sort_to_not_sorted()
{
return sorted_to_not_sorted;
}
/*! \brief Clear the structure
*
*
......
......@@ -162,10 +162,15 @@ public:
template<unsigned int dim, typename T, typename cnt_type, typename ids_type, typename transform>
class CellList_gpu_ker
{
//! starting point for each cell
openfpm::vector_gpu_ker<aggregate<cnt_type>,memory_traits_inte> starts;
//! Sorted to non sorted ids conversion
openfpm::vector_gpu_ker<aggregate<cnt_type>,memory_traits_inte> srt;
//! Domain particles ids
openfpm::vector_gpu_ker<aggregate<cnt_type>,memory_traits_inte> dprt;
//! Spacing
openfpm::array<T,dim,cnt_type> spacing_c;
......@@ -182,11 +187,12 @@ public:
CellList_gpu_ker(openfpm::vector_gpu_ker<aggregate<cnt_type>,memory_traits_inte> starts,
openfpm::vector_gpu_ker<aggregate<cnt_type>,memory_traits_inte> srt,
openfpm::vector_gpu_ker<aggregate<cnt_type>,memory_traits_inte> dprt,
openfpm::array<T,dim,cnt_type> & spacing_c,
openfpm::array<ids_type,dim,cnt_type> & div_c,
openfpm::array<ids_type,dim,cnt_type> & off,
const transform & t)
:starts(starts),srt(srt),spacing_c(spacing_c),div_c(div_c),off(off),t(t)
:starts(starts),srt(srt),dprt(dprt),spacing_c(spacing_c),div_c(div_c),off(off),t(t)
{}
inline __device__ grid_key_dx<dim,ids_type> getCell(const Point<dim,T> & xp) const
......@@ -194,12 +200,22 @@ public:
return cid_<dim,cnt_type,ids_type,transform>::get_cid_key(spacing_c,off,t,xp);
}
__device__ NN_gpu_it<dim,cnt_type,ids_type> getNNIterator(const grid_key_dx<dim,ids_type> & cid)
inline __device__ NN_gpu_it<dim,cnt_type,ids_type> getNNIterator(const grid_key_dx<dim,ids_type> & cid)
{
NN_gpu_it<dim,cnt_type,ids_type> ngi(cid,starts,srt,div_c,off);
return ngi;
}
inline __device__ openfpm::vector_gpu_ker<aggregate<cnt_type>,memory_traits_inte> & getDomainSortIds()
{
return dprt;
}
inline __device__ openfpm::vector_gpu_ker<aggregate<cnt_type>,memory_traits_inte> & getSortToNonSort()
{
return srt;
}
};
......
......@@ -289,6 +289,18 @@ __global__ void reorder_parts(int n,
non_sorted_to_sorted.template get<0>(code) = i;
}
template<typename vector_sort_index, typename vector_out_type>
__global__ void mark_domain_particles(vector_sort_index vsi, vector_out_type vout_ids, vector_out_type vout_dg, int g_m)
{
int i = threadIdx.x + blockIdx.x * blockDim.x;
if (i >= vsi.size()) return;
vout_dg.template get<0>(i) = (vsi.template get<0>(i) < g_m)?0:1;
vout_ids.template get<0>(i) = i;
}
template<typename T>
struct to_type4
{
......
......@@ -306,10 +306,11 @@ private:
*/
void initCl(CellListImpl & cli, openfpm::vector<Point<dim,T>> & pos, size_t g_m, size_t opt)
{
mgpu::standard_context_t context(false);
if (opt & VL_SYMMETRIC || opt & VL_CRS_SYMMETRIC)
{populate_cell_list(pos,cli,g_m,CL_SYMMETRIC);}
{populate_cell_list(pos,cli,context,g_m,CL_SYMMETRIC);}
else
{populate_cell_list(pos,cli,g_m,CL_NON_SYMMETRIC);}
{populate_cell_list(pos,cli,context,g_m,CL_NON_SYMMETRIC);}
}
/*! \brief Create the Verlet list from a given cell-list
......
......@@ -32,6 +32,16 @@
#endif /* __cplusplus */
};
namespace mgpu
{
// Stub class for modern gpu
struct standard_context_t
{
standard_context_t(bool init)
{}
};
}
#else
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment