Commit a07bf056 authored by incardon's avatar incardon

Vector of Cell-list to continue ...

parent f5368a09
...@@ -4,7 +4,7 @@ cmake_minimum_required(VERSION 3.8 FATAL_ERROR) ...@@ -4,7 +4,7 @@ cmake_minimum_required(VERSION 3.8 FATAL_ERROR)
########################### Executables ########################### Executables
if(CUDA_FOUND) if(CUDA_FOUND)
set(CUDA_SOURCES Vector/cuda/vector_dist_cuda_func_test.cu Decomposition/cuda/decomposition_cuda_tests.cu Vector/cuda/vector_dist_gpu_unit_tests.cu ../openfpm_devices/src/memory/CudaMemory.cu) set(CUDA_SOURCES Vector/cuda/vector_dist_gpu_MP_tests.cu Vector/cuda/vector_dist_cuda_func_test.cu Decomposition/cuda/decomposition_cuda_tests.cu Vector/cuda/vector_dist_gpu_unit_tests.cu ../openfpm_devices/src/memory/CudaMemory.cu)
else() else()
set(CUDA_SOURCES) set(CUDA_SOURCES)
endif() endif()
...@@ -97,7 +97,7 @@ install(FILES Decomposition/CartDecomposition.hpp ...@@ -97,7 +97,7 @@ install(FILES Decomposition/CartDecomposition.hpp
Decomposition/common.hpp Decomposition/common.hpp
Decomposition/Decomposition.hpp Decomposition/Decomposition.hpp
Decomposition/ie_ghost.hpp Decomposition/ie_ghost.hpp
Decomposition/Domain_NN_calculator_cart.hpp Decomposition/Domain_NN_calculator_cart.hpp
Decomposition/nn_processor.hpp Decomposition/ie_loc_ghost.hpp Decomposition/nn_processor.hpp Decomposition/ie_loc_ghost.hpp
Decomposition/ORB.hpp Decomposition/ORB.hpp
Decomposition/dec_optimizer.hpp Decomposition/dec_optimizer.hpp
......
...@@ -864,6 +864,10 @@ BOOST_AUTO_TEST_CASE(vector_dist_merge_sort) ...@@ -864,6 +864,10 @@ BOOST_AUTO_TEST_CASE(vector_dist_merge_sort)
v_pos_out.template get<0>(i)[1] = i+10000; v_pos_out.template get<0>(i)[1] = i+10000;
v_pos_out.template get<0>(i)[2] = i+20000; v_pos_out.template get<0>(i)[2] = i+20000;
v_pos.template get<0>(i)[0] = 0;
v_pos.template get<0>(i)[1] = 0;
v_pos.template get<0>(i)[2] = 0;
v_prp_out.template get<0>(i)[0] = i+60123; v_prp_out.template get<0>(i)[0] = i+60123;
v_prp_out.template get<0>(i)[1] = i+73543; v_prp_out.template get<0>(i)[1] = i+73543;
v_prp_out.template get<0>(i)[2] = i+82432; v_prp_out.template get<0>(i)[2] = i+82432;
...@@ -944,7 +948,6 @@ BOOST_AUTO_TEST_CASE(vector_dist_merge_sort) ...@@ -944,7 +948,6 @@ BOOST_AUTO_TEST_CASE(vector_dist_merge_sort)
match &= v_prp_out.template get<2>(10000-i-1)[1] == v_prp.template get<2>(i)[1]; match &= v_prp_out.template get<2>(10000-i-1)[1] == v_prp.template get<2>(i)[1];
match &= v_prp_out.template get<2>(10000-i-1)[2] == v_prp.template get<2>(i)[2]; match &= v_prp_out.template get<2>(10000-i-1)[2] == v_prp.template get<2>(i)[2];
match &= v_pos.template get<0>(10000-i-1)[0] == 0; match &= v_pos.template get<0>(10000-i-1)[0] == 0;
match &= v_pos.template get<0>(10000-i-1)[1] == 0; match &= v_pos.template get<0>(10000-i-1)[1] == 0;
match &= v_pos.template get<0>(10000-i-1)[2] == 0; match &= v_pos.template get<0>(10000-i-1)[2] == 0;
......
...@@ -1192,4 +1192,66 @@ BOOST_AUTO_TEST_CASE(vector_dist_keep_prop_on_cuda) ...@@ -1192,4 +1192,66 @@ BOOST_AUTO_TEST_CASE(vector_dist_keep_prop_on_cuda)
} }
} }
BOOST_AUTO_TEST_CASE(vector_dist_compare_host_device)
{
Box<3,double> domain({0.0,0.0,0.0},{1.0,1.0,1.0});
Ghost<3,double> g(0.1);
size_t bc[3] = {PERIODIC,PERIODIC,PERIODIC};
if (create_vcluster().size() >= 16)
{return;}
vector_dist_gpu<3,double,aggregate<double,double[3],double[3][3]>> vdg(10000,domain,bc,g,DEC_GRAN(128));
auto it = vdg.getDomainIterator();
while (it.isNext())
{
auto p = it.get();
vdg.getPos(p)[0] = (double)rand() / RAND_MAX;
vdg.getPos(p)[1] = (double)rand() / RAND_MAX;
vdg.getPos(p)[2] = (double)rand() / RAND_MAX;
vdg.template getProp<0>(p) = (double)rand() / RAND_MAX;
vdg.template getProp<1>(p)[0] = (double)rand() / RAND_MAX;
vdg.template getProp<1>(p)[1] = (double)rand() / RAND_MAX;
vdg.template getProp<1>(p)[2] = (double)rand() / RAND_MAX;
vdg.template getProp<2>(p)[0][0] = (double)rand() / RAND_MAX;
vdg.template getProp<2>(p)[0][1] = (double)rand() / RAND_MAX;
vdg.template getProp<2>(p)[0][2] = (double)rand() / RAND_MAX;
vdg.template getProp<2>(p)[1][0] = (double)rand() / RAND_MAX;
vdg.template getProp<2>(p)[1][1] = (double)rand() / RAND_MAX;
vdg.template getProp<2>(p)[1][2] = (double)rand() / RAND_MAX;
vdg.template getProp<2>(p)[2][0] = (double)rand() / RAND_MAX;
vdg.template getProp<2>(p)[2][1] = (double)rand() / RAND_MAX;
vdg.template getProp<2>(p)[2][2] = (double)rand() / RAND_MAX;
++it;
}
vdg.map();
vdg.hostToDeviceProp<0,1,2>();
vdg.hostToDevicePos();
bool test = vdg.compareHostAndDevicePos(0.00001,0.00000001);
BOOST_REQUIRE_EQUAL(test,true);
vdg.getPos(100)[0] = 0.99999999;
test = vdg.compareHostAndDevicePos(0.00001,0.00000001);
BOOST_REQUIRE_EQUAL(test,false);
vdg.hostToDevicePos();
vdg.getPos(100)[0] = 0.99999999;
test = vdg.compareHostAndDevicePos(0.00001,0.00000001);
BOOST_REQUIRE_EQUAL(test,true);
}
BOOST_AUTO_TEST_SUITE_END() BOOST_AUTO_TEST_SUITE_END()
...@@ -131,4 +131,258 @@ __device__ inline void process_ghost_device_particle_prp(unsigned int i, unsigne ...@@ -131,4 +131,258 @@ __device__ inline void process_ghost_device_particle_prp(unsigned int i, unsigne
object_si_d<encap_src, encap_dst, OBJ_ENCAP, prp...>(v_prp.get(id), m_prp.get(i)); object_si_d<encap_src, encap_dst, OBJ_ENCAP, prp...>(v_prp.get(id), m_prp.get(i));
} }
template<typename base_type, unsigned int prp>
struct compare_host_device
{
template<typename St, typename vector_type>
static bool compare(vector_type & v_prp,St & tol, bool silent = false)
{
bool ret = true;
// Create a temporal
openfpm::vector<aggregate<base_type>> tmp;
tmp.resize(v_prp.size());
// move host memory to tmp
auto it = v_prp.getIterator();
while (it.isNext())
{
auto p = it.get();
tmp.template get<0>(p) = v_prp.template get<prp>(p);
++it;
}
v_prp.template deviceToHost<prp>();
// move host memory to tmp
it = v_prp.getIterator();
while (it.isNext())
{
auto p = it.get();
if (fabs(tmp.get(p) - v_prp.get(p)) >= tol)
{
if (silent == false)
{
std::cout << "Host and Device buffer differ over set tollerance: " << "Host[" << p.getKey() << "]=" << tmp.get(p) <<
" Device[" << p.getKey() << "]="<< v_prp.get(p) << " differ more than: " << tol << std::endl;
}
ret = false;
}
++it;
}
//restore
it = tmp.getIterator();
while (it.isNext())
{
auto p = it.get();
v_prp.template get<prp>(p) = tmp.get<0>(p);
++it;
}
return ret;
}
};
template<typename base_type,unsigned int N1, unsigned int prp>
struct compare_host_device<Point<N1,base_type>,prp>
{
template<typename St, typename vector_type>
static bool compare(vector_type & v_pos,St & tol, St & near, bool silent = false)
{
bool ret = true;
// Create a temporal
openfpm::vector<Point<N1,base_type>> tmp;
tmp.resize(v_pos.size());
// move host memory to tmp
auto it = v_pos.getIterator();
while (it.isNext())
{
auto p = it.get();
tmp.get(p) = v_pos.get(p);
++it;
}
v_pos.template deviceToHost<prp>();
// move host memory to tmp
it = v_pos.getIterator();
while (it.isNext())
{
auto p = it.get();
for (size_t j = 0 ; j < N1 ; j++)
{
if (fabs(tmp.template get<0>(p)[j] - v_pos.template get<0>(p)[j]) >= tol && (fabs(tmp.template get<0>(p)[j]) > near && fabs(v_pos.template get<0>(p)[j]) ) )
{
std::cout << "Host and Device buffer differ over set tollerance: " << "Host[" << p << "]=" << tmp.template get<0>(p)[j]
<< " Device[" << p << "]="<< v_pos.template get<0>(p)[j] <<
" differ more than: " << tol << std::endl;
ret = false;
}
}
++it;
}
//restore
it = tmp.getIterator();
while (it.isNext())
{
auto p = it.get();
v_pos.get(p) = tmp.get(p);
++it;
}
return ret;
}
};
template<typename base_type,unsigned int N1, unsigned int prp>
struct compare_host_device<base_type[N1],prp>
{
template<typename St, typename vector_type>
static void compare(vector_type & v_prp,St & tol)
{
// Create a temporal
openfpm::vector<aggregate<base_type[N1]>> tmp;
tmp.resize(v_prp.size());
// move host memory to tmp
auto it = v_prp.getIterator();
while (it.isNext())
{
auto p = it.get();
tmp.get(p) = v_prp.get(p);
++it;
}
v_prp.template deviceToHost<prp>();
// move host memory to tmp
it = v_prp.getIterator();
while (it.isNext())
{
auto p = it.get();
for (size_t j = 0 ; j < N1 ; j++)
{
if (fabs(tmp.get(p)[j] - v_prp.get(p)[j]) >= tol)
{
std::cout << "Host and Device buffer differ over set tollerance: " << "Host[" << p.getKey() << "]=" << tmp.get(p)[j] << " Device[" << p.getKey() << "]="<< v_prp.get(p)[j] << " differ more than: " << tol << std::endl;
}
}
++it;
}
//restore
it = v_prp.getIterator();
while (it.isNext())
{
auto p = it.get();
for (size_t j = 0 ; j < N1 ; j++)
{
v_prp.template get<prp>(p)[j] = tmp.get<0>(p)[j];
}
++it;
}
}
};
template<typename base_type,unsigned int N1 , unsigned int N2, unsigned int prp>
struct compare_host_device<base_type[N1][N2],prp>
{
template<typename St, typename vector_type>
void compare(vector_type & v_prp,St & tol)
{
// Create a temporal
openfpm::vector<aggregate<base_type[N1][N2]>> tmp;
tmp.resize(v_prp.size());
// move host memory to tmp
auto it = this->getIterator();
while (it.isNext())
{
auto p = it.get();
tmp.template get<0>(p) = v_prp.template get<prp>(p);
++it;
}
v_prp.template deviceToHost<prp>();
// move host memory to tmp
it = v_prp.getIterator();
while (it.isNext())
{
auto p = it.get();
for (size_t j = 0 ; j < N1 ; j++)
{
for (size_t k = 0 ; k < N2 ; k++)
{
if (fabs(tmp.template get(p)[j][k] - v_prp.get(p)[j][k]) >= tol)
{
std::cout << "Host and Device buffer differ over set tollerance: " << "Host[" << p.getKey() << "][" << j << "][" << k << "]=" << tmp.template get<0>(p)[j][k]
<< " Device[" << p.getKey() << "][" << j << "][" << k << "]=" << v_prp.template get<prp>(p)[j][k] << " differ more than: " << tol << std::endl;
}
}
}
++it;
}
//restore
it = v_prp.getIterator();
while (it.isNext())
{
auto p = it.get();
for (size_t j = 0 ; j < N1 ; j++)
{
for (size_t k = 0 ; k < N2 ; k++)
{
v_prp.template get<prp>(p)[j][k] = tmp.template get<0>(p)[j][k];
}
}
++it;
}
}
};
#endif /* VECTOR_DIST_FUNCS_HPP_ */ #endif /* VECTOR_DIST_FUNCS_HPP_ */
...@@ -232,17 +232,6 @@ template<unsigned int dim, ...@@ -232,17 +232,6 @@ template<unsigned int dim,
template<typename> class layout_base = memory_traits_lin> template<typename> class layout_base = memory_traits_lin>
class vector_dist : public vector_dist_comm<dim,St,prop,Decomposition,Memory,layout_base> class vector_dist : public vector_dist_comm<dim,St,prop,Decomposition,Memory,layout_base>
{ {
public:
//! Self type
typedef vector_dist<dim,St,prop,Decomposition,Memory,layout_base> self;
//! property object
typedef prop value_type;
typedef Decomposition Decomposition_type;
typedef openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base> internal_position_vector_type;
private: private:
...@@ -251,7 +240,7 @@ private: ...@@ -251,7 +240,7 @@ private:
//! Particle position vector, (It has 2 elements) the first has real particles assigned to a processor //! Particle position vector, (It has 2 elements) the first has real particles assigned to a processor
//! the second element contain unassigned particles //! the second element contain unassigned particles
internal_position_vector_type v_pos; openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base> v_pos;
//! Particle properties vector, (It has 2 elements) the first has real particles assigned to a processor //! Particle properties vector, (It has 2 elements) the first has real particles assigned to a processor
//! the second element contain unassigned particles //! the second element contain unassigned particles
...@@ -386,12 +375,27 @@ private: ...@@ -386,12 +375,27 @@ private:
public: public:
//! Self type
typedef vector_dist<dim,St,prop,Decomposition,Memory,layout_base> self;
//! property object
typedef prop value_type;
typedef Decomposition Decomposition_type;
typedef decltype(v_pos) internal_position_vector_type;
typedef CellList<dim, St, Mem_fast<>, shift<dim, St>, internal_position_vector_type > CellList_type;
//! space type //! space type
typedef St stype; typedef St stype;
//! dimensions of space //! dimensions of space
static const unsigned int dims = dim; static const unsigned int dims = dim;
//!
typedef int yes_i_am_vector_dist;
/*! \brief Operator= for distributed vector /*! \brief Operator= for distributed vector
* *
* \param v vector to copy * \param v vector to copy
...@@ -442,6 +446,11 @@ public: ...@@ -442,6 +446,11 @@ public:
return *this; return *this;
} }
// default constructor (structure contain garbage)
vector_dist()
:v_cl(create_vcluster<Memory>()),opt(opt)
{}
/*! \brief Copy Constructor /*! \brief Copy Constructor
* *
...@@ -2811,6 +2820,32 @@ public: ...@@ -2811,6 +2820,32 @@ public:
v_prp.swap(v_prp_out); v_prp.swap(v_prp_out);
} }
/*! \brief This function compare if the host and device buffer position match up to some tolerance
*
* \tparam prp property to check
*
* \param tol tollerance absolute
*
*/
bool compareHostAndDevicePos(St tol, St near = -1.0, bool silent = false)
{
return compare_host_device<Point<dim,St>,0>::compare(v_pos,tol,near,silent);
}
/*! \brief This function compare if the host and device buffer position match up to some tolerance
*
* \tparam prp property to check
*
* \param tol tollerance absolute
*
*/
template<unsigned int prp>
void compareHostAndDeviceProp(St tol)
{
}
#endif #endif
......
...@@ -139,6 +139,15 @@ public: ...@@ -139,6 +139,15 @@ public:
}; };
// This is a tranformation node for vector_distributed for the algorithm toKernel_tranform
template<template <typename> class layout_base, typename T>
struct toKernel_transform<layout_base,T,2>
{
typedef typename apply_transform<layout_base,typename T::value_type>::type aggr;
typedef vector_dist_ker<T::dims,typename T::stype,aggr> type;
};
#endif #endif
#endif /* VECTOR_DIST_GPU_HPP_ */ #endif /* VECTOR_DIST_GPU_HPP_ */
...@@ -12,36 +12,39 @@ ...@@ -12,36 +12,39 @@
#include "NN/VerletList/VerletListM.hpp" #include "NN/VerletList/VerletListM.hpp"
template<typename Vector, typename CL, typename T> template<typename Vector, typename CL, typename T>
VerletList<Vector::dims,typename Vector::stype,Mem_fast<>,shift<Vector::dims,typename Vector::stype>> VerletList<Vector::dims,typename Vector::stype,Mem_fast<>,shift<Vector::dims,typename Vector::stype>,typename Vector::internal_position_vector_type,CL>
createVerlet(Vector & v, Vector & v1, CL & cl, T r_cut) createVerlet(Vector & v, Vector & v1, CL & cl, T r_cut)
{ {
VerletList<Vector::dims,typename Vector::stype,Mem_fast<>,shift<Vector::dims,typename Vector::stype>> ver; VerletList<Vector::dims,typename Vector::stype,Mem_fast<>,shift<Vector::dims,typename Vector::stype>,typename Vector::internal_position_vector_type,CL> ver;
ver.Initialize(cl,r_cut,v.getPosVector(),v1.getPosVector(),v.size_local()); ver.Initialize(cl,r_cut,v.getPosVector(),v1.getPosVector(),v.size_local());
return ver; return ver;
} }
template<unsigned int sh_byte, typename Vector , typename Vector1,typename CL, typename T> VerletListM<Vector::dims,typename Vector::stype,sh_byte,CL,shift<Vector::dims,typename Vector::stype>> createVerletM(size_t pp, Vector & v, Vector1 & phases, CL & cl, T r_cut) template<unsigned int sh_byte, typename Vector , typename Vector1,typename CL, typename T> VerletListM<Vector::dims,typename Vector::stype,sh_byte,CL,shift<Vector::dims,typename Vector::stype>,typename Vector::internal_position_vector_type>
createVerletM(size_t pp, Vector & v, Vector1 & phases, CL & cl, T r_cut)
{ {
VerletListM<Vector::dims,typename Vector::stype,sh_byte,CL,shift<Vector::dims,typename Vector::stype>> ver; VerletListM<Vector::dims,typename Vector::stype,sh_byte,CL,shift<Vector::dims,typename Vector::stype>,typename Vector::internal_position_vector_type> ver;
openfpm::vector<pos_v<Vector::dims,typename Vector::stype>> v_phases; openfpm::vector<pos_v<typename Vector::internal_position_vector_type>> v_phases;
for (size_t i = 0 ; i < phases.size() ; i++) for (size_t i = 0 ; i < phases.size() ; i++)
v_phases.add(pos_v<Vector::dims,typename Vector::stype>(phases.get(i).getPosVector())); {v_phases.add(pos_v<typename Vector::internal_position_vector_type>(phases.get(i).getPosVector()));}
ver.Initialize(cl,pp,r_cut,v.getPosVector(),v_phases,v.size_local()); ver.Initialize(cl,pp,r_cut,v.getPosVector(),v_phases,v.size_local());
return ver; return ver;
} }
template<unsigned int nbit, typename Vector, typename T> CellListM<Vector::dims,typename Vector::stype,nbit> createCellListM(openfpm::vector<Vector> & phases, T r_cut) template<unsigned int nbit, typename Vector, typename T>
CellListM<Vector::dims,typename Vector::stype,nbit,typename Vector::CellList_type>
createCellListM(openfpm::vector<Vector> & phases, T r_cut)
{ {
size_t div[3];