Commit af68dafb authored by incardon's avatar incardon

Fixing for tracking Operators + async ghost

parent 3675a944
...@@ -701,8 +701,6 @@ template<typename VerletList> inline double calc_forces(particles & vd, VerletLi ...@@ -701,8 +701,6 @@ template<typename VerletList> inline double calc_forces(particles & vd, VerletLi
/* if (sum1 != sum2) /* if (sum1 != sum2)
{ {
std::cout << "PORCA TROIA: " << std::endl;
break; break;
}*/ }*/
......
openfpm_numerics @ 1897a889
Subproject commit ed016fb3b90c7176afbc6ce998c1d19dd177128f Subproject commit 1897a889d91379f7023a7c771da59533c0dfa207
...@@ -77,7 +77,7 @@ target_include_directories (pdata PUBLIC ${LIBHILBERT_INCLUDE_DIRS}) ...@@ -77,7 +77,7 @@ target_include_directories (pdata PUBLIC ${LIBHILBERT_INCLUDE_DIRS})
target_include_directories (pdata PUBLIC ${Boost_INCLUDE_DIRS}) target_include_directories (pdata PUBLIC ${Boost_INCLUDE_DIRS})
target_link_libraries(pdata ${Boost_LIBRARIES}) target_link_libraries(pdata ${Boost_LIBRARIES})
target_link_libraries(pdata -L${PARMETIS_ROOT}/lib parmetis) target_link_libraries(pdata ${PARMETIS_LIBRARIES})
target_link_libraries(pdata -L${METIS_ROOT}/lib metis) target_link_libraries(pdata -L${METIS_ROOT}/lib metis)
target_link_libraries(pdata -L${HDF5_ROOT}/lib hdf5 hdf5_hl) target_link_libraries(pdata -L${HDF5_ROOT}/lib hdf5 hdf5_hl)
target_link_libraries(pdata -L${LIBHILBERT_LIBRARY_DIRS} ${LIBHILBERT_LIBRARIES}) target_link_libraries(pdata -L${LIBHILBERT_LIBRARY_DIRS} ${LIBHILBERT_LIBRARIES})
......
...@@ -175,26 +175,14 @@ class domain_icell_calculator ...@@ -175,26 +175,14 @@ class domain_icell_calculator
auto ite = g.getGPUIterator(p1,p2,256); auto ite = g.getGPUIterator(p1,p2,256);
if (ite.wthr.x == 0)
{continue;}
vsi.setGPUInsertBuffer(ite.nblocks(),256); vsi.setGPUInsertBuffer(ite.nblocks(),256);
insert_icell<dim><<<ite.wthr,ite.thr>>>(vsi.toKernel(),cld,ite.start,p2); CUDA_LAUNCH((insert_icell<dim>),ite,vsi.toKernel(),cld,ite.start,p2);
vsi.template flush<>(v_cl.getmgpuContext(),flust_type::FLUSH_ON_DEVICE); vsi.template flush<>(v_cl.getmgpuContext(),flust_type::FLUSH_ON_DEVICE);
////////////////////////// DEBUG ////////////////////////
vsi.private_get_vct_index().template deviceToHost<0>();
auto & test = vsi.private_get_vct_index();
for (int k = 0 ; k < test.size() - 1 ; k++)
{
if (test.template get<0>(k) > test.template get<0>(k+1))
{
std::cout << "BBBBBBUUUUUUUUUUUUUUUGGGGGGG" << std::endl;
}
}
/////////////////////////////////////////////////////////
} }
// calculate the number of kernel launch // calculate the number of kernel launch
...@@ -211,17 +199,18 @@ class domain_icell_calculator ...@@ -211,17 +199,18 @@ class domain_icell_calculator
auto p1 = cld.getCell(bx.getP1()); auto p1 = cld.getCell(bx.getP1());
auto p2 = cld.getCell(pp2); auto p2 = cld.getCell(pp2);
auto ite = g.getGPUIterator(p1,p2,256); auto ite = g.getGPUIterator(p1,p2,256);
if (ite.wthr.x == 0)
{continue;}
vs.setGPUInsertBuffer(ite.nblocks(),256); vs.setGPUInsertBuffer(ite.nblocks(),256);
vsi.setGPURemoveBuffer(ite.nblocks(),256); vsi.setGPURemoveBuffer(ite.nblocks(),256);
insert_remove_icell<dim><<<ite.wthr,ite.thr>>>(vs.toKernel(),vsi.toKernel(),cld,ite.start,p2); CUDA_LAUNCH(insert_remove_icell<dim>,ite,vs.toKernel(),vsi.toKernel(),cld,ite.start,p2);
vs.template flush<>(v_cl.getmgpuContext(),flust_type::FLUSH_ON_DEVICE); vs.template flush<>(v_cl.getmgpuContext(),flust_type::FLUSH_ON_DEVICE);
vsi.flush_remove(v_cl.getmgpuContext(),flust_type::FLUSH_ON_DEVICE); vsi.flush_remove(v_cl.getmgpuContext(),flust_type::FLUSH_ON_DEVICE);
} }
vs.swapIndexVector(icells); vs.swapIndexVector(icells);
......
...@@ -1262,6 +1262,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_particle_NN_MP_iteration_gpu ) ...@@ -1262,6 +1262,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_particle_NN_MP_iteration_gpu )
// Distributed vector // Distributed vector
vector_dist_gpu<3,float,part_prop> vd(k,box,bc,ghost,BIND_DEC_TO_GHOST); vector_dist_gpu<3,float,part_prop> vd(k,box,bc,ghost,BIND_DEC_TO_GHOST);
size_t start = vd.init_size_accum(k); size_t start = vd.init_size_accum(k);
auto it = vd.getIterator(); auto it = vd.getIterator();
......
...@@ -37,6 +37,8 @@ ...@@ -37,6 +37,8 @@
#include "NN/CellList/ProcKeys.hpp" #include "NN/CellList/ProcKeys.hpp"
#include "Vector/vector_dist_kernel.hpp" #include "Vector/vector_dist_kernel.hpp"
#include "NN/CellList/cuda/CellList_gpu.hpp" #include "NN/CellList/cuda/CellList_gpu.hpp"
#include "lib/pdata.hpp"
#include "cuda/vector_dist_operators_list_ker.hpp"
#define DEC_GRAN(gr) ((size_t)gr << 32) #define DEC_GRAN(gr) ((size_t)gr << 32)
...@@ -193,6 +195,30 @@ enum reorder_opt ...@@ -193,6 +195,30 @@ enum reorder_opt
LINEAR = 2 LINEAR = 2
}; };
template<typename vector, unsigned int impl>
struct cell_list_selector
{
typedef decltype(std::declval<vector>().getCellListGPU(0.0).toKernel()) ctype;
static ctype get(vector & v,
typename vector::stype & r_cut)
{
return v.getCellListGPU(r_cut).toKernel();
}
};
template<typename vector>
struct cell_list_selector<vector,comp_host>
{
typedef decltype(std::declval<vector>().getCellList(0.0)) ctype;
static ctype get(vector & v,
typename vector::stype & r_cut)
{
return v.getCellList(r_cut);
}
};
/*! \brief Distributed vector /*! \brief Distributed vector
* *
* This class represent a distributed vector, the distribution of the structure * This class represent a distributed vector, the distribution of the structure
...@@ -227,14 +253,14 @@ enum reorder_opt ...@@ -227,14 +253,14 @@ enum reorder_opt
* \tparam Memory layout * \tparam Memory layout
* *
*/ */
template<unsigned int dim, template<unsigned int dim,
typename St, typename St,
typename prop, typename prop,
typename Decomposition = CartDecomposition<dim,St>, typename Decomposition = CartDecomposition<dim,St>,
typename Memory = HeapMemory, typename Memory = HeapMemory,
template<typename> class layout_base = memory_traits_lin> template<typename> class layout_base = memory_traits_lin>
class vector_dist : public vector_dist_comm<dim,St,prop,Decomposition,Memory,layout_base> class vector_dist : public vector_dist_comm<dim,St,prop,Decomposition,Memory,layout_base>,
private vector_dist_ker_list<vector_dist_ker<dim,St,prop,layout_base>>
{ {
public: public:
...@@ -541,6 +567,7 @@ public: ...@@ -541,6 +567,7 @@ public:
this->init_decomposition(box,bc,g,opt,gdist); this->init_decomposition(box,bc,g,opt,gdist);
#ifdef SE_CLASS3 #ifdef SE_CLASS3
se3.Initialize(); se3.Initialize();
#endif #endif
...@@ -1125,6 +1152,22 @@ public: ...@@ -1125,6 +1152,22 @@ public:
return cell_list; return cell_list;
} }
/*! \brief Construct a cell list starting from the stored particles
*
* \tparam CellL CellList type to construct
*
* \param r_cut interation radius, or size of each cell
* \param no_se3 avoid SE_CLASS3 checking
*
* \return the Cell list
*
*/
template<unsigned int impl>
typename cell_list_selector<self,impl>::ctype getCellListDev(St r_cut)
{
return cell_list_selector<self,impl>::get(*this,r_cut);
}
/*! \brief Construct a cell list starting from the stored particles /*! \brief Construct a cell list starting from the stored particles
* *
* \tparam CellL CellList type to construct * \tparam CellL CellList type to construct
...@@ -2157,6 +2200,8 @@ public: ...@@ -2157,6 +2200,8 @@ public:
this->template map_list_<prp...>(v_pos,v_prp,g_m,opt); this->template map_list_<prp...>(v_pos,v_prp,g_m,opt);
this->update(this->toKernel());
#ifdef SE_CLASS3 #ifdef SE_CLASS3
se3.map_post(); se3.map_post();
#endif #endif
...@@ -2182,6 +2227,8 @@ public: ...@@ -2182,6 +2227,8 @@ public:
this->template map_<obp>(v_pos,v_prp,g_m,opt); this->template map_<obp>(v_pos,v_prp,g_m,opt);
this->update(this->toKernel());
#ifdef SE_CLASS3 #ifdef SE_CLASS3
se3.map_post(); se3.map_post();
#endif #endif
...@@ -2210,6 +2257,8 @@ public: ...@@ -2210,6 +2257,8 @@ public:
this->template ghost_get_<GHOST_SYNC,prp...>(v_pos,v_prp,g_m,opt); this->template ghost_get_<GHOST_SYNC,prp...>(v_pos,v_prp,g_m,opt);
this->update(this->toKernel());
#ifdef SE_CLASS3 #ifdef SE_CLASS3
this->template ghost_get_<prop::max_prop_real>(v_pos,v_prp,g_m,opt | KEEP_PROPERTIES); this->template ghost_get_<prop::max_prop_real>(v_pos,v_prp,g_m,opt | KEEP_PROPERTIES);
...@@ -2262,6 +2311,8 @@ public: ...@@ -2262,6 +2311,8 @@ public:
this->template ghost_wait_<prp...>(v_pos,v_prp,g_m,opt); this->template ghost_wait_<prp...>(v_pos,v_prp,g_m,opt);
this->update(this->toKernel());
#ifdef SE_CLASS3 #ifdef SE_CLASS3
this->template ghost_get_<prop::max_prop_real>(v_pos,v_prp,g_m,opt | KEEP_PROPERTIES); this->template ghost_get_<prop::max_prop_real>(v_pos,v_prp,g_m,opt | KEEP_PROPERTIES);
...@@ -2505,6 +2556,8 @@ public: ...@@ -2505,6 +2556,8 @@ public:
v_prp.resize(rs); v_prp.resize(rs);
g_m = rs; g_m = rs;
this->update(this->toKernel());
} }
/*! \brief Output particle position and properties /*! \brief Output particle position and properties
...@@ -2795,13 +2848,24 @@ public: ...@@ -2795,13 +2848,24 @@ public:
* \return an usable vector in the kernel * \return an usable vector in the kernel
* *
*/ */
template<unsigned int ... prp> vector_dist_ker<dim,St,prop> toKernel() template<unsigned int ... prp> vector_dist_ker<dim,St,prop,layout_base> toKernel()
{ {
vector_dist_ker<dim,St,prop> v(g_m,v_pos.toKernel(), v_prp.toKernel()); vector_dist_ker<dim,St,prop,layout_base> v(g_m,v_pos.toKernel(), v_prp.toKernel());
return v; return v;
} }
/*! \brief Return the internal vector_dist_ker_list structure
*
*
*
* \return
*/
vector_dist_ker_list<vector_dist_ker<dim,St,prop,layout_base>> & private_get_vector_dist_ker_list()
{
return *this;
}
/*! \brief Convert the grid into a data-structure compatible for computing into GPU /*! \brief Convert the grid into a data-structure compatible for computing into GPU
* *
* In comparison with toGPU return a version sorted better for coalesced memory * In comparison with toGPU return a version sorted better for coalesced memory
...@@ -2809,9 +2873,9 @@ public: ...@@ -2809,9 +2873,9 @@ public:
* \return an usable vector in the kernel * \return an usable vector in the kernel
* *
*/ */
template<unsigned int ... prp> vector_dist_ker<dim,St,prop> toKernel_sorted() template<unsigned int ... prp> vector_dist_ker<dim,St,prop,layout_base> toKernel_sorted()
{ {
vector_dist_ker<dim,St,prop> v(g_m,v_pos_out.toKernel(), v_prp_out.toKernel()); vector_dist_ker<dim,St,prop,layout_base> v(g_m,v_pos_out.toKernel(), v_prp_out.toKernel());
return v; return v;
} }
......
...@@ -16,9 +16,44 @@ ...@@ -16,9 +16,44 @@
#define GET_PARTICLE_SORT(p,NN) if (blockDim.x*blockIdx.x + threadIdx.x >= NN.get_g_m()) {return;}\ #define GET_PARTICLE_SORT(p,NN) if (blockDim.x*blockIdx.x + threadIdx.x >= NN.get_g_m()) {return;}\
else{p = NN.getDomainSortIds().template get<0>(blockDim.x*blockIdx.x + threadIdx.x);} else{p = NN.getDomainSortIds().template get<0>(blockDim.x*blockIdx.x + threadIdx.x);}
/*! \brief this class is a functor for "for_each" algorithm
*
* This class is a functor for "for_each" algorithm. For each
* element of the boost::vector the operator() is called.
*
*/
template<typename vector_dist_ker>
struct check_vector_dist_kernels
{
//! op1
const vector_dist_ker & o1;
//! op2
const vector_dist_ker & o2;
bool check;
/*! \brief constructor
*
* \param src source encapsulated object
* \param dst source encapsulated object
*
*/
inline check_vector_dist_kernels(const vector_dist_ker & o1, const vector_dist_ker & o2)
:o1(o1),o2(o2),check(false)
{};
//! It call the copy function for each property
template<typename T>
__device__ __host__ inline void operator()(T& t)
{
check &= o1.template getPointer<T::value>() == o2.template getPointer<T::value>();
}
};
template<unsigned int dim, template<unsigned int dim,
typename St, typename St,
typename prop> typename prop,
template<typename> class layout_base = memory_traits_inte>
class vector_dist_ker class vector_dist_ker
{ {
//! Ghost marker, all the particle with id > g_m are ghost all with g_m < are real particle //! Ghost marker, all the particle with id > g_m are ghost all with g_m < are real particle
...@@ -26,11 +61,11 @@ class vector_dist_ker ...@@ -26,11 +61,11 @@ class vector_dist_ker
//! Particle position vector, (It has 2 elements) the first has real particles assigned to a processor //! Particle position vector, (It has 2 elements) the first has real particles assigned to a processor
//! the second element contain unassigned particles //! the second element contain unassigned particles
mutable openfpm::vector_gpu_ker<Point<dim,St>,memory_traits_inte> v_pos; mutable openfpm::vector_gpu_ker<Point<dim,St>,layout_base> v_pos;
//! Particle properties vector, (It has 2 elements) the first has real particles assigned to a processor //! Particle properties vector, (It has 2 elements) the first has real particles assigned to a processor
//! the second element contain unassigned particles //! the second element contain unassigned particles
mutable openfpm::vector_gpu_ker<prop,memory_traits_inte> v_prp; mutable openfpm::vector_gpu_ker<typename apply_transform<layout_base,prop>::type,layout_base> v_prp;
public: public:
...@@ -40,7 +75,11 @@ public: ...@@ -40,7 +75,11 @@ public:
//! dimensions of space //! dimensions of space
static const unsigned int dims = dim; static const unsigned int dims = dim;
vector_dist_ker(int g_m, const openfpm::vector_gpu_ker<Point<dim,St>,memory_traits_inte> & v_pos, const openfpm::vector_gpu_ker<prop,memory_traits_inte> & v_prp) //! tag the type as a vector that run on kernel
typedef int vector_kernel;
vector_dist_ker(int g_m, const openfpm::vector_gpu_ker<Point<dim,St>,layout_base> & v_pos,
const openfpm::vector_gpu_ker<typename apply_transform<layout_base,prop>::type,layout_base> & v_prp)
:g_m(g_m),v_pos(v_pos),v_prp(v_prp) :g_m(g_m),v_pos(v_pos),v_prp(v_prp)
{} {}
...@@ -49,14 +88,14 @@ public: ...@@ -49,14 +88,14 @@ public:
* \return the number of particles * \return the number of particles
* *
*/ */
__device__ int size_local() {return g_m;} __device__ __host__ int size_local() const {return g_m;}
/*! \brief return the number of particles /*! \brief return the number of particles
* *
* \return the number of particles * \return the number of particles
* *
*/ */
__device__ int size() {return v_pos.size();} __device__ __host__ int size() const {return v_pos.size();}
/*! \brief Get the position of an element /*! \brief Get the position of an element
* *
...@@ -72,6 +111,20 @@ public: ...@@ -72,6 +111,20 @@ public:
return v_pos.template get<0>(vec_key); return v_pos.template get<0>(vec_key);
} }
/*! \brief Get the position of an element
*
* see the vector_dist iterator usage to get an element key
*
* \param vec_key element
*
* \return the position of the element in space
*
*/
__device__ inline auto getPos(const vect_dist_key_dx & vec_key) -> decltype(v_pos.template get<0>(vec_key.getKey()))
{
return v_pos.template get<0>(vec_key.getKey());
}
/*! \brief Get the position of an element /*! \brief Get the position of an element
* *
* see the vector_dist iterator usage to get an element key * see the vector_dist iterator usage to get an element key
...@@ -86,6 +139,20 @@ public: ...@@ -86,6 +139,20 @@ public:
return v_pos.template get<0>(vec_key); return v_pos.template get<0>(vec_key);
} }
/*! \brief Get the position of an element
*
* see the vector_dist iterator usage to get an element key
*
* \param vec_key element
*
* \return the position of the element in space
*
*/
__device__ inline auto getPos(const vect_dist_key_dx & vec_key) const -> decltype(v_pos.template get<0>(vec_key.getKey()))
{
return v_pos.template get<0>(vec_key.getKey());
}
/*! \brief Get the property of an element /*! \brief Get the property of an element
* *
* see the vector_dist iterator usage to get an element key * see the vector_dist iterator usage to get an element key
...@@ -101,6 +168,21 @@ public: ...@@ -101,6 +168,21 @@ public:
return v_prp.template get<id>(vec_key); return v_prp.template get<id>(vec_key);
} }
/*! \brief Get the property of an element
*
* see the vector_dist iterator usage to get an element key
*
* \tparam id property id
* \param vec_key vector element
*
* \return return the selected property of the vector element
*
*/
template<unsigned int id> __device__ inline auto getProp(const vect_dist_key_dx & vec_key) -> decltype(v_prp.template get<id>(vec_key.getKey()))
{
return v_prp.template get<id>(vec_key.getKey());
}
/*! \brief Get the property of an element /*! \brief Get the property of an element
* *
* see the vector_dist iterator usage to get an element key * see the vector_dist iterator usage to get an element key
...@@ -116,6 +198,21 @@ public: ...@@ -116,6 +198,21 @@ public:
return v_prp.template get<id>(vec_key); return v_prp.template get<id>(vec_key);
} }
/*! \brief Get the property of an element
*
* see the vector_dist iterator usage to get an element key
*
* \tparam id property id
* \param vec_key vector element
*
* \return return the selected property of the vector element
*
*/
template<unsigned int id> __device__ inline auto getProp(const vect_dist_key_dx & vec_key) const -> decltype(v_prp.template get<id>(vec_key.getKey()))
{
return v_prp.template get<id>(vec_key.getKey());
}
/*! \brief Return the internal position vector /*! \brief Return the internal position vector
* *
* *
...@@ -137,6 +234,46 @@ public: ...@@ -137,6 +234,46 @@ public:
return v_prp; return v_prp;
} }
__host__ vector_dist_iterator getDomainIterator() const
{
std::cout << __FILE__ << ":" << __LINE__ << " error getDomainIterator used on a vector_dist_ker object is not allowed" << std::endl;
return vector_dist_iterator(0,0);
}
/*! \brief Get an iterator that traverse the particles in the domain
*
* \return an iterator
*
*/
__host__ ite_gpu<1> getDomainIteratorGPU(size_t n_thr = 1024) const
{
#ifdef SE_CLASS3
se3.getIterator();
#endif
return v_pos.getGPUIteratorTo(g_m,n_thr);
}
/*! \brief Check that the two structures are the same (at level of pointers)
*
*
* \return
*/
__host__ bool operator==(const vector_dist_ker & v)
{
if (v.v_pos.template getPointer<0>() != v_pos.template getPointer<0>())
{return false;}
check_vector_dist_kernels<openfpm::vector_gpu_ker<prop,memory_traits_inte>> cv(this->v_prp,v.v_prp);
cv.check = true;
// Do the same for the properties
boost::mpl::for_each_ref< boost::mpl::range_c<int,0,prop::max_prop> >(cv);
return cv.check;
}
}; };
// This is a tranformation node for vector_distributed for the algorithm toKernel_tranform // This is a tranformation node for vector_distributed for the algorithm toKernel_tranform
...@@ -145,7 +282,7 @@ struct toKernel_transform<layout_base,T,2> ...@@ -145,7 +282,7 @@ struct toKernel_transform<layout_base,T,2>
{ {
typedef typename apply_transform<layout_base,typename T::value_type>::type aggr; typedef typename apply_transform<layout_base,typename T::value_type>::type aggr;
typedef vector_dist_ker<T::dims,typename T::stype,aggr> type; typedef vector_dist_ker<T::dims,typename T::stype,aggr,layout_base> type;
}; };
#endif #endif
......
...@@ -29,7 +29,7 @@ public: ...@@ -29,7 +29,7 @@ public:
* \param key the local key * \param key the local key
* *
*/ */
inline void setKey(size_t key) __device__ __host__ inline void setKey(size_t key)