Commit 8eedc3b4 authored by incardon's avatar incardon

Some optimizations

parent c4b479f6
......@@ -756,7 +756,7 @@ public:
* \return itself
*
*/
inline encapc<dim,T,Mem> & operator=(const T & obj)
__device__ __host__ inline encapc<dim,T,Mem> & operator=(const T & obj)
{
copy_fusion_vector_encap<typename T::type,decltype(*this)> cp(obj.data,*this);
......
......@@ -213,7 +213,12 @@ struct grid_gpu_ker
__device__ inline void set(const grid_key_dx<dim> & key1,const grid_gpu_ker<dim,T_,layout_base> & g, const grid_key_dx<dim> & key2)
{
this->get_o(key1) = g.get_o(key2);
T_ tmp;
copy_encap_vector_fusion<decltype(g.get_o(key2)),typename T_::type> cp(g.get_o(key2),tmp.data);
boost::mpl::for_each_ref< boost::mpl::range_c<int,0,T::max_prop> >(cp);
this->get_o(key1) = tmp;
}
template<unsigned int ... prp> __device__ inline void set(const grid_key_dx<dim> & key1,const grid_gpu_ker<dim,T_,layout_base> & g, const grid_key_dx<dim> & key2)
......
......@@ -725,6 +725,55 @@ BOOST_AUTO_TEST_CASE(grid_resize_less)
BOOST_REQUIRE_EQUAL(g1.size(),25ul);
}
BOOST_AUTO_TEST_CASE(copy_encap_vector_fusion_test)
{
size_t sz2[] = {5,5};
grid_cpu<2,aggregate<float,float[3],float[3][3]>> g(sz2);
g.setMemory();
aggregate<float,float[3],float[3][3]>::type tmp;
grid_key_dx<2> key({0,0});
grid_key_dx<2> key1({1,1});
g.template get<0>(key) = 1.0;
g.template get<1>(key)[0] = 2.0;
g.template get<1>(key)[1] = 3.0;
g.template get<1>(key)[2] = 4.0;
g.template get<2>(key)[0][0] = 5.0;
g.template get<2>(key)[0][1] = 6.0;
g.template get<2>(key)[0][2] = 7.0;
g.template get<2>(key)[1][0] = 8.0;
g.template get<2>(key)[1][1] = 9.0;
g.template get<2>(key)[1][2] = 10.0;
g.template get<2>(key)[2][0] = 11.0;
g.template get<2>(key)[2][1] = 12.0;
g.template get<2>(key)[2][2] = 13.0;
copy_encap_vector_fusion<decltype(g.get_o(key)),typename aggregate<float,float[3],float[3][3]>::type> cp(g.get_o(key),tmp);
boost::mpl::for_each_ref< boost::mpl::range_c<int,0,aggregate<float,float[3],float[3][3]>::max_prop> >(cp);
g.get_o(key1) = tmp;
BOOST_REQUIRE_EQUAL(g.template get<0>(key),g.template get<0>(key1));
BOOST_REQUIRE_EQUAL(g.template get<1>(key)[0],g.template get<1>(key1)[0]);
BOOST_REQUIRE_EQUAL(g.template get<1>(key)[1],g.template get<1>(key1)[1]);
BOOST_REQUIRE_EQUAL(g.template get<1>(key)[2],g.template get<1>(key1)[2]);
BOOST_REQUIRE_EQUAL(g.template get<2>(key)[0][0],g.template get<2>(key1)[0][0]);
BOOST_REQUIRE_EQUAL(g.template get<2>(key)[0][1],g.template get<2>(key1)[0][1]);
BOOST_REQUIRE_EQUAL(g.template get<2>(key)[0][2],g.template get<2>(key1)[0][2]);
BOOST_REQUIRE_EQUAL(g.template get<2>(key)[1][0],g.template get<2>(key1)[1][0]);
BOOST_REQUIRE_EQUAL(g.template get<2>(key)[1][1],g.template get<2>(key1)[1][1]);
BOOST_REQUIRE_EQUAL(g.template get<2>(key)[1][2],g.template get<2>(key1)[1][2]);
BOOST_REQUIRE_EQUAL(g.template get<2>(key)[2][0],g.template get<2>(key1)[2][0]);
BOOST_REQUIRE_EQUAL(g.template get<2>(key)[2][1],g.template get<2>(key1)[2][1]);
BOOST_REQUIRE_EQUAL(g.template get<2>(key)[2][2],g.template get<2>(key1)[2][2]);
}
BOOST_AUTO_TEST_SUITE_END()
#endif
......@@ -281,7 +281,7 @@ public:
sorted_domain_particles_ids.resize(pl.size());
sorted_domain_particles_dg.resize(pl.size());
auto ite = pl.getGPUIterator();
auto ite = pl.getGPUIterator(64);
// Here we reorder the particles to improve coalescing access
CUDA_LAUNCH((reorder_parts<decltype(pl_prp.toKernel()),
......
......@@ -25,11 +25,12 @@ class NN_gpu_it
const openfpm::array<ids_type,dim,cnt_type> & off;
cnt_type p_id;
cnt_type p_id_end;
cnt_type c_id;
__device__ void SelectValid()
{
while (p_id >= starts.template get<0>(c_id+1) && isNext())
while (p_id >= p_id_end && isNext())
{
cnt_type id = cell_act.get(0);
cell_act.set_d(0,id+1);
......@@ -56,6 +57,7 @@ class NN_gpu_it
c_id = cid_<dim,cnt_type,ids_type,int>::get_cid(div_c,cell_act);
p_id = starts.template get<0>(c_id);
p_id_end = starts.template get<0>(c_id+1);
}
}
......@@ -80,6 +82,7 @@ public:
c_id = cid_<dim,cnt_type,ids_type,int>::get_cid(div_c,cell_start);
p_id = starts.template get<0>(c_id);
p_id_end = starts.template get<0>(c_id+1);
SelectValid();
}
......
......@@ -82,7 +82,7 @@ struct copy_fusion_vector_encap
* \param dst destination fusion vector
*
*/
inline copy_fusion_vector_encap(const bfv & src, enc & dst)
__device__ __host__ inline copy_fusion_vector_encap(const bfv & src, enc & dst)
:src(src),dst(dst){};
#ifdef SE_CLASS1
......@@ -109,4 +109,52 @@ struct copy_fusion_vector_encap
};
/*! \brief this class is a functor for "for_each" algorithm
*
* It copy a boost::fusion::vector into an encap
*
*/
template<typename enc, typename bfv>
struct copy_encap_vector_fusion
{
//! source fusion vector
const enc & src;
//! destination fusion vector
bfv & dst;
/*! \brief constructor
*
* It define the copy parameters.
*
* \param src source fusion vector
* \param dst destination fusion vector
*
*/
__device__ __host__ inline copy_encap_vector_fusion(const enc & src, bfv & dst)
:src(src),dst(dst){};
#ifdef SE_CLASS1
/*! \brief Constructor
*
* Calling this constructor produce an error. This class store the reference of the object,
* this mean that the object passed must not be a temporal object
*
*/
inline copy_fusion_vector_encap(const enc && src, bfv && dst)
:src(src),dst(dst)
{std::cerr << "Error: " <<__FILE__ << ":" << __LINE__ << " Passing a temporal object\n";};
#endif
//! It call the copy function for each property
template<typename T>
__device__ __host__ inline void operator()(T& t)
{
typedef typename boost::mpl::at<bfv,boost::mpl::int_<T::value> >::type copy_dst;
typedef typename std::remove_reference<decltype(src.template get<T::value>())>::type copy_src;
meta_copy_d<copy_src,copy_dst>::meta_copy_d_(src.template get<T::value>(),boost::fusion::at_c<T::value>(dst));
}
};
#endif /* OPENFPM_DATA_SRC_GRID_COPY_FUSION_VECTOR_HPP_ */
......@@ -67,7 +67,7 @@ struct meta_copy_d
* \param dst destination object
*
*/
static inline void meta_copy_d_(const Tsrc & src, Tdst & dst)
__device__ __host__ static inline void meta_copy_d_(const Tsrc & src, Tdst & dst)
{
copy_general<Tsrc>(src,dst);
}
......@@ -78,7 +78,7 @@ struct meta_copy_d
* \param dst destination object
*
*/
static inline void meta_copy_d_(const Tsrc & src, Tdst && dst)
__device__ __host__ static inline void meta_copy_d_(const Tsrc & src, Tdst && dst)
{
copy_general<Tsrc>(src,dst);
}
......@@ -145,7 +145,7 @@ struct meta_copy_d<Tsrc[N1],Tdst>
* \param dst destination object
*
*/
static inline void meta_copy_d_(const Tsrc src[N1], Tdst && dst)
__device__ __host__ static inline void meta_copy_d_(const Tsrc src[N1], Tdst && dst)
{
for (size_t i1 = 0 ; i1 < N1 ; i1++)
{
......@@ -159,7 +159,7 @@ struct meta_copy_d<Tsrc[N1],Tdst>
* \param dst destination object
*
*/
static inline void meta_copy_d_(const Tsrc src[N1], Tdst & dst)
__device__ __host__ static inline void meta_copy_d_(const Tsrc src[N1], Tdst & dst)
{
for (size_t i1 = 0 ; i1 < N1 ; i1++)
{
......@@ -178,7 +178,7 @@ struct meta_copy_d<Tsrc,Tdst[N1]>
* \param dst destination object
*
*/
static inline void meta_copy_d_(const Tsrc & src, Tdst dst[N1])
__device__ __host__ static inline void meta_copy_d_(const Tsrc & src, Tdst dst[N1])
{
for (size_t i1 = 0 ; i1 < N1 ; i1++)
{
......@@ -197,7 +197,7 @@ struct meta_copy_d<Tsrc[N1],Tdst[N1]>
* \param dst destination object
*
*/
static inline void meta_copy_d_(const Tsrc src[N1], Tdst dst[N1])
__device__ __host__ static inline void meta_copy_d_(const Tsrc src[N1], Tdst dst[N1])
{
for (size_t i1 = 0 ; i1 < N1 ; i1++)
{
......@@ -270,7 +270,7 @@ struct meta_copy<T[N1][N2]>
template<typename Tsrc, typename Tdst,size_t N1,size_t N2>
struct meta_copy_d<Tsrc[N1][N2],Tdst>
{
static inline void meta_copy_d_(const Tsrc src[N1][N2], Tdst && dst)
__device__ __host__ static inline void meta_copy_d_(const Tsrc src[N1][N2], Tdst && dst)
{
/*! \brief copy and object from src to dst
*
......@@ -287,7 +287,7 @@ struct meta_copy_d<Tsrc[N1][N2],Tdst>
}
}
static inline void meta_copy_d_(const Tsrc src[N1][N2], Tdst & dst)
__device__ __host__ static inline void meta_copy_d_(const Tsrc src[N1][N2], Tdst & dst)
{
/*! \brief copy and object from src to dst
*
......@@ -315,7 +315,7 @@ struct meta_copy_d<Tsrc,Tdst[N1][N2]>
* \param dst destination object
*
*/
static inline void meta_copy_d_(const Tsrc & src, Tdst dst[N1][N2])
__device__ __host__ static inline void meta_copy_d_(const Tsrc & src, Tdst dst[N1][N2])
{
for (size_t i1 = 0 ; i1 < N1 ; i1++)
{
......@@ -339,7 +339,7 @@ struct meta_copy_d<Tsrc[N1][N2],Tdst[N1][N2]>
* \param dst destination object
*
*/
static inline void meta_copy_d_(const Tsrc src[N1][N2], Tdst dst[N1][N2])
__device__ __host__ static inline void meta_copy_d_(const Tsrc src[N1][N2], Tdst dst[N1][N2])
{
for (size_t i1 = 0 ; i1 < N1 ; i1++)
{
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment