Commit 3c4362b5 authored by incardon's avatar incardon

Fixing Point operators

parent 8b4589e0
......@@ -359,11 +359,15 @@ struct host_to_device_impl
typedef typename toKernel_transform<layout_base,typename mem_r_type::value_type>::type kernel_type;
typedef boost::mpl::int_<(is_vector<typename mem_r_type::value_type>::value ||
is_vector_dist<typename mem_r_type::value_type>::value ||
is_gpu_celllist<typename mem_r_type::value_type>::value) + 2*std::is_array<type_prp>::value + std::rank<type_prp>::value> crh_cond;
call_recursive_host_device_if_vector<typename mem_r_type::value_type,
kernel_type,
type_prp,
layout_base,
(is_vector<typename mem_r_type::value_type>::value || is_vector_dist<typename mem_r_type::value_type>::value ) + 2*std::is_array<type_prp>::value + std::rank<type_prp>::value>
crh_cond::value>
::template transform<Memory,mem_r_type>(static_cast<Memory *>(boost::fusion::at_c<boost::mpl::at<v_prp,boost::mpl::int_<T::value>>::type::value>(dst).mem),
boost::fusion::at_c<boost::mpl::at<v_prp,boost::mpl::int_<T::value>>::type::value>(dst).mem_r,
start*sizeof(type_prp),
......@@ -374,7 +378,7 @@ struct host_to_device_impl
kernel_type,
type_prp,
layout_base,
is_vector<typename mem_r_type::value_type>::value + 2*std::is_array<type_prp>::value + std::rank<type_prp>::value>
0>
::call(boost::fusion::at_c<boost::mpl::at<v_prp,boost::mpl::int_<T::value>>::type::value>(dst).mem_r,start,stop);
}
};
......
......@@ -22,18 +22,6 @@ enum cl_construct_opt
#endif
#include "util/cuda/ofp_context.hxx"
/*! \brief Check this is a gpu or cpu type cell-list
*
*/
template<typename T, typename Sfinae = void>
struct is_gpu_celllist: std::false_type {};
template<typename T>
struct is_gpu_celllist<T, typename Void<typename T::yes_is_gpu_celllist>::type> : std::true_type
{};
/*! \brief populate the Cell-list with particles non symmetric case on GPU
*
......
......@@ -27,7 +27,12 @@
constexpr int count = 0;
constexpr int start = 1;
template<unsigned int dim, typename T, typename Memory, typename transform = no_transform_only<dim,T>, typename cnt_type = unsigned int, typename ids_type = int>
template<unsigned int dim,
typename T,
typename Memory,
typename transform = no_transform_only<dim,T>,
typename cnt_type = unsigned int,
typename ids_type = int>
class CellList_gpu : public CellDecomposer_sm<dim,T,transform>
{
typedef openfpm::vector<aggregate<cnt_type>,Memory,typename memory_traits_inte<aggregate<cnt_type>>::type,memory_traits_inte> vector_cnt_type;
......@@ -93,6 +98,21 @@ public:
//! Indicate that this cell list is a gpu type cell-list
typedef int yes_is_gpu_celllist;
//! the type of the space
typedef T stype;
//! dimensions of space
static const unsigned int dims = dim;
//! count type
typedef cnt_type cnt_type_;
//! id type
typedef ids_type ids_type_;
//! transform type
typedef transform transform_;
/*! \brief Copy constructor
*
*
......@@ -514,6 +534,12 @@ public:
}
};
// This is a tranformation node for vector_distributed for the algorithm toKernel_tranform
template<template <typename> class layout_base, typename T>
struct toKernel_transform<layout_base,T,4>
{
typedef CellList_gpu_ker<T::dims,typename T::stype,typename T::cnt_type_,typename T::ids_type_,typename T::transform_> type;
};
#endif
......
......@@ -292,6 +292,44 @@ public:
return srt;
}
/*! \brief Get the number of cells this cell-list contain
*
* \return number of cells
*/
inline __device__ unsigned int getNCells() const
{
return starts.size() - 1;
}
/*! \brief Return the number of elements in the cell
*
* \param cell_id id of the cell
*
* \return number of elements in the cell
*
*/
inline __device__ cnt_type getNelements(const cnt_type cell_id) const
{
return starts.template get<0>(cell_id+1) - starts.template get<0>(cell_id);
}
/*! \brief Get an element in the cell
*
* \tparam i property to get
*
* \param cell cell id
* \param ele element id
*
* \return The element value
*
*/
inline __device__ cnt_type get(size_t cell, size_t ele)
{
cnt_type p_id = starts.template get<0>(cell) + ele;
return srt.template get<0>(p_id);
}
inline __device__ unsigned int get_g_m()
{
return g_m;
......
This diff is collapsed.
......@@ -521,6 +521,163 @@ BOOST_AUTO_TEST_CASE( Point_expression_usage_with_array )
for (size_t i = 0 ; i < 3 ; i++) {BOOST_REQUIRE_EQUAL(p3_p[i],5.0f/9.0f) ;}
}
BOOST_AUTO_TEST_CASE( Point_expression_usage_with_conversion )
{
float scal = 0.0;
Point<3,float> p1({0.1,0.1,0.1});
Point<3,float> p2({0.2,0.3,0.4});
Point<3,float> p3({0.6,0.7,0.9});
p3 = p1 + 2;
for (size_t i = 0 ; i < 3 ; i++) {BOOST_REQUIRE_EQUAL(p3.get(i),p1.get(i) + 2);}
p3 = 2 + p2;
for (size_t i = 0 ; i < 3 ; i++) {BOOST_REQUIRE_EQUAL(p3.get(i),2 + p2.get(i));}
p3 = p1 - 2;
for (size_t i = 0 ; i < 3 ; i++) {BOOST_REQUIRE_EQUAL(p3.get(i),p1.get(i) - 2);}
p3 = 2 - p2;
for (size_t i = 0 ; i < 3 ; i++) {BOOST_REQUIRE_EQUAL(p3.get(i),2 - p2.get(i));}
p3 = p1 * 2;
for (size_t i = 0 ; i < 3 ; i++) {BOOST_REQUIRE_EQUAL(p3.get(i),p1.get(i) * 2);}
p3 = 2 * p2;
for (size_t i = 0 ; i < 3 ; i++) {BOOST_REQUIRE_EQUAL(p3.get(i),2 * p2.get(i));}
p3 = p1 / 2;
for (size_t i = 0 ; i < 3 ; i++) {BOOST_REQUIRE_EQUAL(p3.get(i),p1.get(i) / 2.0);}
p3 = 2 / p2;
for (size_t i = 0 ; i < 3 ; i++) {BOOST_REQUIRE_EQUAL(p3.get(i),(float)2 / p2.get(i));}
// Point function test
double tmp = 5 + (p1 * p2);
double check = 5 + p1.get(0)*p2.get(0) + p1.get(1)*p2.get(1) + p1.get(2)*p2.get(2);
BOOST_REQUIRE_EQUAL(tmp,check);
p3 = 5 + (p1 * p2);
for (size_t i = 0 ; i < 3 ; i++) {BOOST_REQUIRE_EQUAL(p3.get(i),check) ;}
tmp = 5 - (p1 * p2);
check = 5 - p1.get(0)*p2.get(0) - p1.get(1)*p2.get(1) - p1.get(2)*p2.get(2);
BOOST_REQUIRE_EQUAL(tmp,check);
p3 = 5 - (p1 * p2);
for (size_t i = 0 ; i < 3 ; i++) {BOOST_REQUIRE_EQUAL(p3.get(i),check) ;}
tmp = 5 * (p1 * p2);
check = 5*(p1.get(0)*p2.get(0) + p1.get(1)*p2.get(1) + p1.get(2)*p2.get(2));
BOOST_REQUIRE_EQUAL(tmp,check);
p3 = 5 * (p1 * p2);
for (size_t i = 0 ; i < 3 ; i++) {BOOST_REQUIRE_EQUAL(p3.get(i),check) ;}
tmp = 5 / (p1 * p2);
check = 5/(p1.get(0)*p2.get(0) + p1.get(1)*p2.get(1) + p1.get(2)*p2.get(2));
BOOST_REQUIRE_EQUAL(tmp,check);
p3 = 5 / (p1 * p2);
for (size_t i = 0 ; i < 3 ; i++) {BOOST_REQUIRE_EQUAL(p3.get(i),check) ;}
p3 = 2*(p1*p2)*p1;
for (size_t i = 0 ; i < 3 ; i++)
{
check = 2*(p1.get(0)*p2.get(0) + p1.get(1)*p2.get(1) + p1.get(2)*p2.get(2))*p1.get(i);
BOOST_REQUIRE_EQUAL(p3[i],check) ;
}
p3 = (p1*p2)*2*p1;
for (size_t i = 0 ; i < 3 ; i++)
{
check = 2*(p1.get(0)*p2.get(0) + p1.get(1)*p2.get(1) + p1.get(2)*p2.get(2))*p1.get(i);
BOOST_REQUIRE_EQUAL(p3[i],check) ;
}
p3 = (p1*p2)*p1*2;
for (size_t i = 0 ; i < 3 ; i++)
{
check = 2*(p1.get(0)*p2.get(0) + p1.get(1)*p2.get(1) + p1.get(2)*p2.get(2))*p1.get(i);
BOOST_REQUIRE_EQUAL(p3[i],check) ;
}
}
BOOST_AUTO_TEST_CASE( Point_expression_usage_with_conversion_array )
{
float scal = 0.0;
float p1_p[] = {0.1,0.1,0.1};
float p2_p[] = {0.2,0.3,0.4};
float p3_p[] = {0.6,0.7,0.9};
Point<3,float> pp1({0.1,0.1,0.1});
Point<3,float> pp2({0.2,0.3,0.4});
Point<3,float> pp3({0.6,0.7,0.9});
auto p1 = getExprR(p1_p);
auto p2 = getExprR(p2_p);
auto p3L = getExprL(p3_p);
p3L = p1 + 2;
for (size_t i = 0 ; i < 3 ; i++) {BOOST_REQUIRE_EQUAL(p3_p[i],p1_p[i] + 2);}
p3L = 2 + p2;
for (size_t i = 0 ; i < 3 ; i++) {BOOST_REQUIRE_EQUAL(p3_p[i],2 + p2_p[i]);}
p3L = p1 - 2;
for (size_t i = 0 ; i < 3 ; i++) {BOOST_REQUIRE_EQUAL(p3_p[i],p1_p[i] - 2);}
p3L = 2 - p2;
for (size_t i = 0 ; i < 3 ; i++) {BOOST_REQUIRE_EQUAL(p3_p[i],2 - p2_p[i]);}
p3L = p1 * 2;
for (size_t i = 0 ; i < 3 ; i++) {BOOST_REQUIRE_EQUAL(p3_p[i],p1_p[i] * 2);}
p3L = 2 * p2;
for (size_t i = 0 ; i < 3 ; i++) {BOOST_REQUIRE_EQUAL(p3_p[i],2 * p2_p[i]);}
p3L = p1 / 2;
for (size_t i = 0 ; i < 3 ; i++) {BOOST_REQUIRE_EQUAL(p3_p[i],p1_p[i] / 2);}
p3L = 2 / p2;
for (size_t i = 0 ; i < 3 ; i++) {BOOST_REQUIRE_EQUAL(p3_p[i],2 / p2_p[i]);}
// Point function test
p3L = asin(p1/5 + p2/6);
for (size_t i = 0 ; i < 3 ; i++) {BOOST_REQUIRE_CLOSE(p3_p[i],std::asin(p1_p[i]/5 + p2_p[i]/6),0.1) ;}
p3L = acos(p1/5 + p2/6);
for (size_t i = 0 ; i < 3 ; i++) {BOOST_REQUIRE_EQUAL(p3_p[i],std::acos(p1_p[i]/5 + p2_p[i]/6)) ;}
double tmp = 5 + (p1 * p2);
double check = 5 + pp1.get(0)*pp2.get(0) + pp1.get(1)*pp2.get(1) + pp1.get(2)*pp2.get(2);
BOOST_REQUIRE_EQUAL(tmp,check);
p3L = 5 + (p1 * p2);
for (size_t i = 0 ; i < 3 ; i++) {BOOST_REQUIRE_EQUAL(p3_p[i],check) ;}
tmp = 5 - (p1 * p2);
check = 5 - pp1.get(0)*pp2.get(0) - pp1.get(1)*pp2.get(1) - pp1.get(2)*pp2.get(2);
BOOST_REQUIRE_EQUAL(tmp,check);
p3L = 5 - (p1 * p2);
for (size_t i = 0 ; i < 3 ; i++) {BOOST_REQUIRE_EQUAL(p3_p[i],check) ;}
tmp = 5 * (p1 * p2);
check = 5*(pp1.get(0)*pp2.get(0) + pp1.get(1)*pp2.get(1) + pp1.get(2)*pp2.get(2));
BOOST_REQUIRE_EQUAL(tmp,check);
p3L = 5 * (p1 * p2);
for (size_t i = 0 ; i < 3 ; i++) {BOOST_REQUIRE_EQUAL(p3_p[i],check) ;}
tmp = 5 / (p1 * p2);
check = 5/(pp1.get(0)*pp2.get(0) + pp1.get(1)*pp2.get(1) + pp1.get(2)*pp2.get(2));
BOOST_REQUIRE_EQUAL(tmp,check);
p3L = 2*(p1*p2)*p1;
for (size_t i = 0 ; i < 3 ; i++)
{
check = 2*(pp1.get(0)*pp2.get(0) + pp1.get(1)*pp2.get(1) + pp1.get(2)*pp2.get(2))*pp1.get(i);
BOOST_REQUIRE_EQUAL(p3_p[i],check) ;
}
p3L = (p1*p2)*2*p1;
for (size_t i = 0 ; i < 3 ; i++)
{
check = 2*(pp1.get(0)*pp2.get(0) + pp1.get(1)*pp2.get(1) + pp1.get(2)*pp2.get(2))*pp1.get(i);
BOOST_REQUIRE_EQUAL(p3_p[i],check) ;
}
p3L = (p1*p2)*p1*2;
for (size_t i = 0 ; i < 3 ; i++)
{
check = 2*(pp1.get(0)*pp2.get(0) + pp1.get(1)*pp2.get(1) + pp1.get(2)*pp2.get(2))*pp1.get(i);
BOOST_REQUIRE_EQUAL(p3_p[i],check) ;
}
p3L = 5 / (p1 * p2);
check = 5/(pp1.get(0)*pp2.get(0) + pp1.get(1)*pp2.get(1) + pp1.get(2)*pp2.get(2));
for (size_t i = 0 ; i < 3 ; i++) {BOOST_REQUIRE_EQUAL(p3_p[i],check) ;}
}
BOOST_AUTO_TEST_SUITE_END()
......
......@@ -320,5 +320,151 @@ BOOST_AUTO_TEST_CASE( vector_cuda_host_to_device_vector_and_point_tensor )
}
}
BOOST_AUTO_TEST_CASE( vector_cuda_copy )
{
openfpm::vector_gpu<aggregate<float,float[3],float[3][3]>> v1;
openfpm::vector_gpu<aggregate<float,float[3],float[3][3]>> v2;
v1.resize(100);
auto ite = v1.getIterator();
while (ite.isNext())
{
auto p = ite.get();
v1.template get<0>(p) = p + 100;
v1.template get<0>(p) = p + 2000;
v1.template get<0>(p) = p + 3000;
v1.template get<0>(p) = p + 4000;
v1.template get<1>(p)[0] = p + 5000;
v1.template get<1>(p)[1] = p + 6000;
v1.template get<1>(p)[2] = p + 7000;
v1.template get<2>(p)[0][0] = p + 8000;
v1.template get<2>(p)[0][1] = p + 9000;
v1.template get<2>(p)[0][2] = p + 10000;
v1.template get<2>(p)[1][0] = p + 11000;
v1.template get<2>(p)[1][1] = p + 12000;
v1.template get<2>(p)[2][2] = p + 13000;
v1.template get<2>(p)[2][0] = p + 14000;
v1.template get<2>(p)[2][1] = p + 15000;
v1.template get<2>(p)[2][2] = p + 16000;
++ite;
}
v1.hostToDevice<0,1,2>();
ite = v1.getIterator();
while (ite.isNext())
{
auto p = ite.get();
v1.template get<0>(p) = p + 6100;
v1.template get<0>(p) = p + 62000;
v1.template get<0>(p) = p + 63000;
v1.template get<0>(p) = p + 64000;
v1.template get<1>(p)[0] = p + 65000;
v1.template get<1>(p)[1] = p + 66000;
v1.template get<1>(p)[2] = p + 67000;
v1.template get<2>(p)[0][0] = p + 68000;
v1.template get<2>(p)[0][1] = p + 69000;
v1.template get<2>(p)[0][2] = p + 610000;
v1.template get<2>(p)[1][0] = p + 611000;
v1.template get<2>(p)[1][1] = p + 612000;
v1.template get<2>(p)[2][2] = p + 613000;
v1.template get<2>(p)[2][0] = p + 614000;
v1.template get<2>(p)[2][1] = p + 615000;
v1.template get<2>(p)[2][2] = p + 616000;
++ite;
}
v2 = v1;
// first check the CPU
bool match = true;
ite = v2.getIterator();
while (ite.isNext())
{
auto p = ite.get();
match = v2.template get<0>(p) == p + 6100;
match = v2.template get<0>(p) == p + 62000;
match = v2.template get<0>(p) == p + 63000;
match = v2.template get<0>(p) == p + 64000;
match = v2.template get<1>(p)[0] == p + 65000;
match = v2.template get<1>(p)[1] == p + 66000;
match = v2.template get<1>(p)[2] == p + 67000;
match = v2.template get<2>(p)[0][0] == p + 68000;
match = v2.template get<2>(p)[0][1] == p + 69000;
match = v2.template get<2>(p)[0][2] == p + 610000;
match = v2.template get<2>(p)[1][0] == p + 611000;
match = v2.template get<2>(p)[1][1] == p + 612000;
match = v2.template get<2>(p)[2][2] == p + 613000;
match = v2.template get<2>(p)[2][0] == p + 614000;
match = v2.template get<2>(p)[2][1] == p + 615000;
match = v2.template get<2>(p)[2][2] == p + 616000;
++ite;
}
BOOST_REQUIRE_EQUAL(match,true);
v2.deviceToHost<0,1,2>();
ite = v2.getIterator();
while (ite.isNext())
{
auto p = ite.get();
match = v2.template get<0>(p) == p + 100;
match = v2.template get<0>(p) == p + 2000;
match = v2.template get<0>(p) == p + 3000;
match = v2.template get<0>(p) == p + 4000;
match = v2.template get<1>(p)[0] == p + 5000;
match = v2.template get<1>(p)[1] == p + 6000;
match = v2.template get<1>(p)[2] == p + 7000;
match = v2.template get<2>(p)[0][0] == p + 8000;
match = v2.template get<2>(p)[0][1] == p + 9000;
match = v2.template get<2>(p)[0][2] == p + 10000;
match = v2.template get<2>(p)[1][0] == p + 11000;
match = v2.template get<2>(p)[1][1] == p + 12000;
match = v2.template get<2>(p)[2][2] == p + 13000;
match = v2.template get<2>(p)[2][0] == p + 14000;
match = v2.template get<2>(p)[2][1] == p + 15000;
match = v2.template get<2>(p)[2][2] == p + 16000;
++ite;
}
BOOST_REQUIRE_EQUAL(match,true);
}
BOOST_AUTO_TEST_SUITE_END()
......@@ -22,6 +22,14 @@ __global__ void merge_add_prp_device_impl(vector_src_type v_src, vector_dst_type
object_s_di<decltype(v_src.get(i)),decltype(v_dst.get(old_sz+i)),OBJ_ENCAP,args...>(v_src.get(i),v_dst.get(old_sz+i));
}
template<typename vector_src_type, typename vector_dst_type>
__global__ void copy_two_vectors(vector_src_type v_src, vector_dst_type v_dst)
{
int i = threadIdx.x + blockIdx.x * blockDim.x;
v_src.get(i) = v_dst.get(i);
}
#endif
namespace openfpm
......
......@@ -1304,13 +1304,26 @@ namespace openfpm
if (base.size() < v_size)
base.resize(rsz);
// copy the object
// copy the object on cpu
for (size_t i = 0 ; i < v_size ; i++ )
{
grid_key_dx<1> key(i);
base.set(key,mv.base,key);
}
// and device
if (Memory::isDeviceHostSame() == false)
{
#ifdef __NVCC__
if (mv.size() != 0)
{
auto it = mv.getGPUIterator();
copy_two_vectors<<<it.wthr,it.thr>>>(toKernel(),mv.toKernel());
}
#endif
}
return *this;
}
......
......@@ -65,4 +65,17 @@ template<typename T>
struct is_vector_dist<T, typename Void< typename T::yes_i_am_vector_dist>::type > : std::true_type
{};
///////////////////////////////////////////////////////////////////////////////////////////////////////////
/*! \brief Check this is a gpu or cpu type cell-list
*
*/
template<typename T, typename Sfinae = void>
struct is_gpu_celllist: std::false_type {};
template<typename T>
struct is_gpu_celllist<T, typename Void<typename T::yes_is_gpu_celllist>::type> : std::true_type
{};
#endif /* SRC_VECTOR_UTIL_HPP_ */
......@@ -70,7 +70,7 @@ namespace openfpm
// Definition of the box
template<unsigned int dim , typename T> class Box;
template<template <typename> class layout_base, typename T, int = is_vector_native<T>::value + 2*is_vector_dist<T>::value >
template<template <typename> class layout_base, typename T, int = is_vector_native<T>::value + 2*is_vector_dist<T>::value + 4*is_gpu_celllist<T>::value >
struct toKernel_transform;
template<template <typename> class layout_base, typename T, typename ... args>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment