Commit ad71d154 authored by incardon's avatar incardon

OpenFPM First test of GPU direct

parent 9602917f
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
#include <mpi.h> #include <mpi.h>
/*! \brief Set of wrapping classing for MPI_Irecv /*! \brief Set of wrapping classing for MPI_Irecv
* *
* The purpose of these classes is to correctly choose the right call based on the type we want to receive * The purpose of these classes is to correctly choose the right call based on the type we want to receive
...@@ -46,7 +48,7 @@ public: ...@@ -46,7 +48,7 @@ public:
template<typename T> class MPI_IBcastW template<typename T> class MPI_IBcastW
{ {
public: public:
static inline void bcast(size_t proc ,openfpm::vector<T> & v, MPI_Request & req) template<typename Memory> static inline void bcast(size_t proc ,openfpm::vector<T,Memory> & v, MPI_Request & req)
{ {
MPI_SAFE_CALL(MPI_Ibcast(v.getPointer(), v.size() * sizeof(T),MPI_BYTE, proc , MPI_COMM_WORLD,&req)); MPI_SAFE_CALL(MPI_Ibcast(v.getPointer(), v.size() * sizeof(T),MPI_BYTE, proc , MPI_COMM_WORLD,&req));
} }
...@@ -174,4 +176,78 @@ public: ...@@ -174,4 +176,78 @@ public:
}; };
/*! \brief this class is a functor for "for_each" algorithm
*
* This class is a functor for "for_each" algorithm. For each
* element of the boost::vector the operator() is called.
* Is mainly used to process broadcast request for each buffer
*
*/
template<typename vect>
struct bcast_inte_impl
{
//! vector to broadcast
vect & send;
//! vector of requests
openfpm::vector<MPI_Request> & req;
//! root processor
size_t root;
/*! \brief constructor
*
* \param v set of pointer buffers to set
*
*/
inline bcast_inte_impl(vect & send,
openfpm::vector<MPI_Request> & req,
size_t root)
:send(send),req(req),root(root)
{};
//! It call the copy function for each property
template<typename T>
inline void operator()(T& t)
{
typedef typename boost::mpl::at<typename vect::value_type::type,T>::type send_type;
// Create one request
req.add();
// gather
MPI_IBcastWB::bcast(root,&send.template get<T::value>(0),send.size()*sizeof(send_type),req.last());
}
};
template<bool is_lin_or_inte>
struct b_cast_helper
{
template<typename T, typename Mem, typename lt_type, template<typename> class layout_base >
static void bcast_(openfpm::vector<MPI_Request> & req,
openfpm::vector<T,Mem,lt_type,layout_base> & v,
size_t root)
{
// Create one request
req.add();
// gather
MPI_IBcastW<T>::bcast(root,v,req.last());
}
};
template<>
struct b_cast_helper<false>
{
template<typename T, typename Mem, typename lt_type, template<typename> class layout_base >
static void bcast_(openfpm::vector<MPI_Request> & req,
openfpm::vector<T,Mem,lt_type,layout_base> & v,
size_t root)
{
bcast_inte_impl<openfpm::vector<T,Mem,lt_type,layout_base>> bc(v,req,root);
boost::mpl::for_each_ref<boost::mpl::range_c<int,0,T::max_prop>>(bc);
}
};
#endif /* OPENFPM_VCLUSTER_SRC_MPI_WRAPPER_MPI_IBCASTW_HPP_ */ #endif /* OPENFPM_VCLUSTER_SRC_MPI_WRAPPER_MPI_IBCASTW_HPP_ */
...@@ -9,13 +9,13 @@ endif ...@@ -9,13 +9,13 @@ endif
noinst_PROGRAMS = vcluster_test noinst_PROGRAMS = vcluster_test
vcluster_test_SOURCES = main.cpp VCluster/VCluster.cpp ../../openfpm_devices/src/memory/HeapMemory.cpp ../../openfpm_devices/src/memory/PtrMemory.cpp ../../openfpm_devices/src/Memleak_check.cpp $(CUDA_SOURCES) vcluster_test_SOURCES = main.cpp VCluster/VCluster.cpp ../../openfpm_devices/src/memory/HeapMemory.cpp ../../openfpm_devices/src/memory/PtrMemory.cpp ../../openfpm_devices/src/Memleak_check.cpp $(CUDA_SOURCES)
vcluster_test_CXXFLAGS = $(AM_CXXFLAGS) $(INCLUDES_PATH) $(BOOST_CPPFLAGS) $(CUDA_CFLAGS) vcluster_test_CXXFLAGS = -Wunknown-pragmas $(AM_CXXFLAGS) $(INCLUDES_PATH) $(BOOST_CPPFLAGS) $(CUDA_CFLAGS)
vcluster_test_CFLAGS = $(CUDA_CFLAGS) vcluster_test_CFLAGS = $(CUDA_CFLAGS)
vcluster_test_LDADD = $(LINKLIBS) vcluster_test_LDADD = $(LINKLIBS)
lib_LIBRARIES = libvcluster.a lib_LIBRARIES = libvcluster.a
libvcluster_a_SOURCES = VCluster/VCluster.cpp libvcluster_a_SOURCES = VCluster/VCluster.cpp
libvcluster_a_CXXFLAGS = $(AM_CXXFLAGS) $(INCLUDES_PATH) $(BOOST_CPPFLAGS) $(CUDA_CFLAGS) libvcluster_a_CXXFLAGS = -Wunknown-pragmas $(AM_CXXFLAGS) $(INCLUDES_PATH) $(BOOST_CPPFLAGS) $(CUDA_CFLAGS)
libvcluster_a_CFLAGS = libvcluster_a_CFLAGS =
nobase_include_HEADERS = MPI_wrapper/MPI_IallreduceW.hpp MPI_wrapper/MPI_IrecvW.hpp MPI_wrapper/MPI_IBcastW.hpp MPI_wrapper/MPI_IsendW.hpp MPI_wrapper/MPI_util.hpp MPI_wrapper/MPI_IAllGather.hpp \ nobase_include_HEADERS = MPI_wrapper/MPI_IallreduceW.hpp MPI_wrapper/MPI_IrecvW.hpp MPI_wrapper/MPI_IBcastW.hpp MPI_wrapper/MPI_IsendW.hpp MPI_wrapper/MPI_util.hpp MPI_wrapper/MPI_IAllGather.hpp \
......
...@@ -121,13 +121,13 @@ class Vcluster: public Vcluster_base ...@@ -121,13 +121,13 @@ class Vcluster: public Vcluster_base
Pack_stat sts; Pack_stat sts;
pack_unpack_cond_with_prp<has_max_prop<T, has_value_type<T>::value>::value, op, T, S, layout_base>::packing(mem, send.get(i), sts, send_buf); pack_unpack_cond_with_prp<has_max_prop<T, has_value_type<T>::value>::value, op, T, S, layout_base>::packing(mem, send.get(i), sts, send_buf,opt);
} }
tags.clear(); tags.clear();
// receive information // receive information
base_info bi(&recv_buf,prc_recv,sz_recv_byte,tags); base_info bi(&recv_buf,prc_recv,sz_recv_byte,tags,opt);
// Send and recv multiple messages // Send and recv multiple messages
if (opt & RECEIVE_KNOWN) if (opt & RECEIVE_KNOWN)
...@@ -198,9 +198,12 @@ class Vcluster: public Vcluster_base ...@@ -198,9 +198,12 @@ class Vcluster: public Vcluster_base
//! tags //! tags
openfpm::vector<size_t> &tags; openfpm::vector<size_t> &tags;
//! options
size_t opt;
//! constructor //! constructor
base_info(openfpm::vector<BHeapMemory> * recv_buf, openfpm::vector<size_t> & prc, openfpm::vector<size_t> & sz, openfpm::vector<size_t> & tags) base_info(openfpm::vector<BHeapMemory> * recv_buf, openfpm::vector<size_t> & prc, openfpm::vector<size_t> & sz, openfpm::vector<size_t> & tags,size_t opt)
:recv_buf(recv_buf),prc(prc),sz(sz),tags(tags) :recv_buf(recv_buf),prc(prc),sz(sz),tags(tags),opt(opt)
{} {}
}; };
...@@ -237,6 +240,17 @@ class Vcluster: public Vcluster_base ...@@ -237,6 +240,17 @@ class Vcluster: public Vcluster_base
rinfo.tags.add(tag); rinfo.tags.add(tag);
// return the pointer // return the pointer
// If we have GPU direct activated use directly the cuda buffer
if (rinfo.opt & MPI_GPU_DIRECT)
{
#if defined(MPIX_CUDA_AWARE_SUPPORT) && MPIX_CUDA_AWARE_SUPPORT
return rinfo.recv_buf->last().getDevicePointer();
#else
return rinfo.recv_buf->last().getPointer();
#endif
}
return rinfo.recv_buf->last().getPointer(); return rinfo.recv_buf->last().getPointer();
} }
...@@ -337,12 +351,12 @@ class Vcluster: public Vcluster_base ...@@ -337,12 +351,12 @@ class Vcluster: public Vcluster_base
* \return true if the function completed succefully * \return true if the function completed succefully
* *
*/ */
template<typename T, typename S> bool SGather(T & send, S & recv,size_t root) template<typename T, typename S, template <typename> class layout_base=memory_traits_lin> bool SGather(T & send, S & recv,size_t root)
{ {
openfpm::vector<size_t> prc; openfpm::vector<size_t> prc;
openfpm::vector<size_t> sz; openfpm::vector<size_t> sz;
return SGather(send,recv,prc,sz,root); return SGather<T,S,layout_base>(send,recv,prc,sz,root);
} }
//! metafunction //! metafunction
...@@ -406,17 +420,20 @@ class Vcluster: public Vcluster_base ...@@ -406,17 +420,20 @@ class Vcluster: public Vcluster_base
tags.clear(); tags.clear();
// receive information // receive information
base_info bi(&recv_buf,prc,sz,tags); base_info bi(&recv_buf,prc,sz,tags,0);
// Send and recv multiple messages // Send and recv multiple messages
sendrecvMultipleMessagesNBX(send_req.size(),NULL,NULL,NULL,msg_alloc,&bi); sendrecvMultipleMessagesNBX(send_req.size(),NULL,NULL,NULL,msg_alloc,&bi);
// we generate the list of the properties to pack // we generate the list of the properties to unpack
typedef typename ::generate_indexes<int, has_max_prop<T, has_value_type<T>::value>::number, MetaFuncOrd>::result ind_prop_to_pack; typedef typename ::generate_indexes<int, has_max_prop<T, has_value_type<T>::value>::number, MetaFuncOrd>::result ind_prop_to_pack;
// operation object // operation object
op_ssend_recv_add<void> opa; op_ssend_recv_add<void> opa;
// Reorder the buffer
reorder_buffer(prc,tags,sz);
index_gen<ind_prop_to_pack>::template process_recv<op_ssend_recv_add<void>,T,S,layout_base>(*this,recv,&sz,NULL,opa); index_gen<ind_prop_to_pack>::template process_recv<op_ssend_recv_add<void>,T,S,layout_base>(*this,recv,&sz,NULL,opa);
recv.add(send); recv.add(send);
...@@ -428,6 +445,7 @@ class Vcluster: public Vcluster_base ...@@ -428,6 +445,7 @@ class Vcluster: public Vcluster_base
// send buffer (master does not send anything) so send req and send_buf // send buffer (master does not send anything) so send req and send_buf
// remain buffer with size 0 // remain buffer with size 0
openfpm::vector<size_t> send_prc; openfpm::vector<size_t> send_prc;
openfpm::vector<size_t> send_prc_;
send_prc.add(root); send_prc.add(root);
openfpm::vector<size_t> sz; openfpm::vector<size_t> sz;
...@@ -451,13 +469,15 @@ class Vcluster: public Vcluster_base ...@@ -451,13 +469,15 @@ class Vcluster: public Vcluster_base
pack_unpack_cond_with_prp<has_max_prop<T, has_value_type<T>::value>::value,op_ssend_recv_add<void>, T, S, layout_base>::packing(mem, send, sts, send_buf); pack_unpack_cond_with_prp<has_max_prop<T, has_value_type<T>::value>::value,op_ssend_recv_add<void>, T, S, layout_base>::packing(mem, send, sts, send_buf);
pack_unpack_cond_with_prp_inte_lin<T>::construct_prc(send_prc,send_prc_);
tags.clear(); tags.clear();
// receive information // receive information
base_info bi(NULL,prc,sz,tags); base_info bi(NULL,prc,sz,tags,0);
// Send and recv multiple messages // Send and recv multiple messages
sendrecvMultipleMessagesNBX(send_prc.size(),(size_t *)sz.getPointer(),(size_t *)send_prc.getPointer(),(void **)send_buf.getPointer(),msg_alloc,(void *)&bi,NONE); sendrecvMultipleMessagesNBX(send_prc_.size(),(size_t *)sz.getPointer(),(size_t *)send_prc_.getPointer(),(void **)send_buf.getPointer(),msg_alloc,(void *)&bi,NONE);
mem.decRef(); mem.decRef();
delete &mem; delete &mem;
...@@ -519,7 +539,7 @@ class Vcluster: public Vcluster_base ...@@ -519,7 +539,7 @@ class Vcluster: public Vcluster_base
tags.clear(); tags.clear();
// receive information // receive information
base_info bi(&recv_buf,prc,sz,tags); base_info bi(&recv_buf,prc,sz,tags,0);
// Send and recv multiple messages // Send and recv multiple messages
sendrecvMultipleMessagesNBX(prc.size(),(size_t *)sz_byte.getPointer(),(size_t *)prc.getPointer(),(void **)send_buf.getPointer(),msg_alloc,(void *)&bi); sendrecvMultipleMessagesNBX(prc.size(),(size_t *)sz_byte.getPointer(),(size_t *)prc.getPointer(),(void **)send_buf.getPointer(),msg_alloc,(void *)&bi);
...@@ -540,7 +560,7 @@ class Vcluster: public Vcluster_base ...@@ -540,7 +560,7 @@ class Vcluster: public Vcluster_base
tags.clear(); tags.clear();
// receive information // receive information
base_info bi(&recv_buf,prc,sz,tags); base_info bi(&recv_buf,prc,sz,tags,0);
// Send and recv multiple messages // Send and recv multiple messages
sendrecvMultipleMessagesNBX(send_req.size(),NULL,NULL,NULL,msg_alloc,&bi); sendrecvMultipleMessagesNBX(send_req.size(),NULL,NULL,NULL,msg_alloc,&bi);
...@@ -563,7 +583,7 @@ class Vcluster: public Vcluster_base ...@@ -563,7 +583,7 @@ class Vcluster: public Vcluster_base
* \param sz_recv list of size of the receiving messages (in byte) * \param sz_recv list of size of the receiving messages (in byte)
* *
*/ */
void reorder_buffer(openfpm::vector<size_t> & prc, openfpm::vector<size_t> tags, openfpm::vector<size_t> & sz_recv) void reorder_buffer(openfpm::vector<size_t> & prc, const openfpm::vector<size_t> & tags, openfpm::vector<size_t> & sz_recv)
{ {
struct recv_buff_reorder struct recv_buff_reorder
......
...@@ -20,6 +20,9 @@ ...@@ -20,6 +20,9 @@
#include "memory/BHeapMemory.hpp" #include "memory/BHeapMemory.hpp"
#include "Packer_Unpacker/has_max_prop.hpp" #include "Packer_Unpacker/has_max_prop.hpp"
#include "data_type/aggregate.hpp" #include "data_type/aggregate.hpp"
#if defined(CUDA_GPU) && defined(__NVCC__)
#include "util/cuda/moderngpu/launch_box.hxx"
#endif
#ifdef HAVE_PETSC #ifdef HAVE_PETSC
#include <petscvec.h> #include <petscvec.h>
...@@ -37,6 +40,7 @@ ...@@ -37,6 +40,7 @@
#define RECEIVE_KNOWN 4 #define RECEIVE_KNOWN 4
#define KNOWN_ELEMENT_OR_BYTE 8 #define KNOWN_ELEMENT_OR_BYTE 8
#define MPI_GPU_DIRECT 16
// number of vcluster instances // number of vcluster instances
extern size_t n_vcluster; extern size_t n_vcluster;
...@@ -137,6 +141,17 @@ class Vcluster_base ...@@ -137,6 +141,17 @@ class Vcluster_base
//! vector of functions to execute after all the request has been performed //! vector of functions to execute after all the request has been performed
std::vector<int> post_exe; std::vector<int> post_exe;
#if defined(CUDA_GPU) && defined(__NVCC__)
//! standard context for mgpu
mgpu::standard_context_t * context;
#else
void * context = NULL;
#endif
// Object array // Object array
...@@ -211,6 +226,12 @@ public: ...@@ -211,6 +226,12 @@ public:
} }
} }
} }
#if defined(CUDA_GPU) && defined(__NVCC__)
delete context;
#endif
} }
/*! \brief Virtual cluster constructor /*! \brief Virtual cluster constructor
...@@ -262,6 +283,12 @@ public: ...@@ -262,6 +283,12 @@ public:
// Initialize bar_req // Initialize bar_req
bar_req = MPI_Request(); bar_req = MPI_Request();
bar_stat = MPI_Status(); bar_stat = MPI_Status();
#if defined(CUDA_GPU) && defined(__NVCC__)
context = new mgpu::standard_context_t();
#endif
} }
#ifdef SE_CLASS1 #ifdef SE_CLASS1
...@@ -315,6 +342,19 @@ public: ...@@ -315,6 +342,19 @@ public:
} }
} }
#endif
#if defined(CUDA_GPU) && defined(__NVCC__)
/*! \brief If nvidia cuda is activated return a mgpu context
*
*
*/
mgpu::standard_context_t & getmgpuContext()
{
return *context;
}
#endif #endif
/*! \brief Get the MPI_Communicator (or processor group) this VCluster is using /*! \brief Get the MPI_Communicator (or processor group) this VCluster is using
...@@ -544,7 +584,7 @@ public: ...@@ -544,7 +584,7 @@ public:
template<typename T> template<typename T>
void sendrecvMultipleMessagesNBX(openfpm::vector< size_t > & prc, void sendrecvMultipleMessagesNBX(openfpm::vector< size_t > & prc,
openfpm::vector< T > & data, openfpm::vector< T > & data,
void * (* msg_alloc)(size_t,size_t,size_t,size_t,size_t,void *), void * (* msg_alloc)(size_t,size_t,size_t,size_t,size_t,size_t,void *),
void * ptr_arg, long int opt=NONE) void * ptr_arg, long int opt=NONE)
{ {
#ifdef SE_CLASS1 #ifdef SE_CLASS1
...@@ -673,10 +713,10 @@ public: ...@@ -673,10 +713,10 @@ public:
* \param opt options, NONE (ignored in this moment) * \param opt options, NONE (ignored in this moment)
* *
*/ */
template<typename T> void sendrecvMultipleMessagesNBX(size_t n_send , size_t sz[], size_t prc[] ,
void sendrecvMultipleMessagesNBX(openfpm::vector< size_t > & prc, openfpm::vector< T > & data, void * ptr[], size_t n_recv, size_t prc_recv[] ,
void * (* msg_alloc)(size_t,size_t,size_t,size_t,size_t,size_t,void *), void * (* msg_alloc)(size_t,size_t,size_t,size_t,size_t,size_t,void *),
void * ptr_arg, long int opt=NONE) void * ptr_arg, long int opt=NONE)
{ {
sz_recv_tmp.resize(n_recv); sz_recv_tmp.resize(n_recv);
...@@ -700,7 +740,7 @@ public: ...@@ -700,7 +740,7 @@ public:
for (size_t i = 0 ; i < n_recv ; i++) for (size_t i = 0 ; i < n_recv ; i++)
{ {
void * ptr_recv = msg_alloc(sz_recv_tmp.get(i),0,0,prc_recv[i],i,ptr_arg); void * ptr_recv = msg_alloc(sz_recv_tmp.get(i),0,0,prc_recv[i],i,0,ptr_arg);
recv(prc_recv[i],SEND_SPARSE + NBX_cnt,ptr_recv,sz_recv_tmp.get(i)); recv(prc_recv[i],SEND_SPARSE + NBX_cnt,ptr_recv,sz_recv_tmp.get(i));
} }
...@@ -1041,17 +1081,14 @@ public: ...@@ -1041,17 +1081,14 @@ public:
* \return true if succeed false otherwise * \return true if succeed false otherwise
* *
*/ */
template<typename T, typename Mem, typename gr> bool Bcast(openfpm::vector<T,Mem,gr> & v, size_t root) template<typename T, typename Mem, typename lt_type, template<typename> class layout_base >
bool Bcast(openfpm::vector<T,Mem,lt_type,layout_base> & v, size_t root)
{ {
#ifdef SE_CLASS1 #ifdef SE_CLASS1
checkType<T>(); checkType<T>();
#endif #endif
// Create one request b_cast_helper<openfpm::vect_isel<T>::value == STD_VECTOR || is_layout_mlin<layout_base<T>>::value >::bcast_(req,v,root);
req.add();
// gather
MPI_IBcastW<T>::bcast(root,v,req.last());
return true; return true;
} }
......
...@@ -15,7 +15,6 @@ template<bool result, typename T, typename S, template<typename> class layout_ba ...@@ -15,7 +15,6 @@ template<bool result, typename T, typename S, template<typename> class layout_ba
struct unpack_selector_with_prp struct unpack_selector_with_prp
{ {
template<typename op, template<typename op,
template <typename> class layout_base,
int ... prp> int ... prp>
static void call_unpack(S & recv, static void call_unpack(S & recv,
openfpm::vector<BHeapMemory> & recv_buf, openfpm::vector<BHeapMemory> & recv_buf,
...@@ -306,7 +305,6 @@ struct call_serialize_variadic<index_tuple<prp...>> ...@@ -306,7 +305,6 @@ struct call_serialize_variadic<index_tuple<prp...>>
} }
}; };
/*! \brief this class is a functor for "for_each" algorithm /*! \brief this class is a functor for "for_each" algorithm
* *
* This class is a functor for "for_each" algorithm. For each * This class is a functor for "for_each" algorithm. For each
...@@ -317,7 +315,6 @@ struct call_serialize_variadic<index_tuple<prp...>> ...@@ -317,7 +315,6 @@ struct call_serialize_variadic<index_tuple<prp...>>
* \tparam encap dst * \tparam encap dst
* *
*/ */
template<typename sT> template<typename sT>
struct set_buf_pointer_for_each_prop struct set_buf_pointer_for_each_prop
{ {
...@@ -326,20 +323,35 @@ struct set_buf_pointer_for_each_prop ...@@ -326,20 +323,35 @@ struct set_buf_pointer_for_each_prop
openfpm::vector<const void *> & send_buf; openfpm::vector<const void *> & send_buf;
size_t opt;
/*! \brief constructor /*! \brief constructor
* *
* \param v set of pointer buffers to set * \param v set of pointer buffers to set
* *
*/ */
inline set_buf_pointer_for_each_prop(sT & v, openfpm::vector<const void *> & send_buf) inline set_buf_pointer_for_each_prop(sT & v, openfpm::vector<const void *> & send_buf, size_t opt)
:v(v),send_buf(send_buf) :v(v),send_buf(send_buf),opt(opt)
{}; {};
//! It call the copy function for each property //! It call the copy function for each property
template<typename T> template<typename T>
inline void operator()(T& t) const inline void operator()(T& t) const
{ {
send_buf.add(v.template getPointer<T::value>()); // If we have GPU direct activated use directly the cuda buffer
if (opt & MPI_GPU_DIRECT)
{
#if defined(MPIX_CUDA_AWARE_SUPPORT) && MPIX_CUDA_AWARE_SUPPORT
send_buf.add(v.template getDevicePointer<T::value>());
#else
v.template deviceToHost<T::value>();
send_buf.add(v.template getPointer<T::value>());
#endif
}
else
{
send_buf.add(v.template getPointer<T::value>());
}
} }
}; };
...@@ -384,7 +396,7 @@ struct set_buf_size_for_each_prop ...@@ -384,7 +396,7 @@ struct set_buf_size_for_each_prop
template<typename T, bool impl = is_multiple_buffer_each_prp<T>::value > template<typename T, bool impl = is_multiple_buffer_each_prp<T>::value >
struct pack_unpack_cond_with_prp_inte_lin struct pack_unpack_cond_with_prp_inte_lin
{ {
static void set_buffers(T & send, openfpm::vector<const void *> & send_buf) static void set_buffers(T & send, openfpm::vector<const void *> & send_buf, size_t opt)
{ {
send_buf.add(send.getPointer()); send_buf.add(send.getPointer());
} }
...@@ -407,9 +419,9 @@ struct pack_unpack_cond_with_prp_inte_lin ...@@ -407,9 +419,9 @@ struct pack_unpack_cond_with_prp_inte_lin
template<typename T> template<typename T>
struct pack_unpack_cond_with_prp_inte_lin<T,true> struct pack_unpack_cond_with_prp_inte_lin<T,true>
{ {
static void set_buffers(T & send, openfpm::vector<const void *> & send_buf) static void set_buffers(T & send, openfpm::vector<const void *> & send_buf, size_t opt)
{ {
set_buf_pointer_for_each_prop<T> sbp(send,send_buf); set_buf_pointer_for_each_prop<T> sbp(send,send_buf,opt);
boost::mpl::for_each_ref<boost::mpl::range_c<int,0,T::value_type::max_prop>>(sbp); boost::mpl::for_each_ref<boost::mpl::range_c<int,0,T::value_type::max_prop>>(sbp);
} }
...@@ -428,6 +440,7 @@ struct pack_unpack_cond_with_prp_inte_lin<T,true> ...@@ -428,6 +440,7 @@ struct pack_unpack_cond_with_prp_inte_lin<T,true>
for (size_t j = 0 ; j < T::value_type::max_prop ; j++) for (size_t j = 0 ; j < T::value_type::max_prop ; j++)
{prc_send_.add(prc_send.get(i));} {prc_send_.add(prc_send.get(i));}
} }
}
}; };
//! There is max_prop inside //! There is max_prop inside
...@@ -454,12 +467,12 @@ struct pack_unpack_cond_with_prp ...@@ -454,12 +467,12 @@ struct pack_unpack_cond_with_prp
} }
} }
static void packing(ExtPreAlloc<HeapMemory> & mem, T & send, Pack_stat & sts, openfpm::vector<const void *> & send_buf) static void packing(ExtPreAlloc<HeapMemory> & mem, T & send, Pack_stat & sts, openfpm::vector<const void *> & send_buf, size_t opt = 0)
{ {
typedef typename ::generate_indexes<int, has_max_prop<T, has_value_type<T>::value>::number, MetaFuncOrd>::result ind_prop_to_pack; typedef typename ::generate_indexes<int, has_max_prop<T, has_value_type<T>::value>::number, MetaFuncOrd>::result ind_prop_to_pack;
if (has_pack_gen<typename T::value_type>::value == false && is_vector<T>::value == true) if (has_pack_gen<typename T::value_type>::value == false && is_vector<T>::value == true)
{ {
pack_unpack_cond_with_prp_inte_lin<T>::set_buffers(send,send_buf); pack_unpack_cond_with_prp_inte_lin<T>::set_buffers(send,send_buf,opt);
} }
else else
{ {
...@@ -518,7 +531,7 @@ struct op_ssend_recv_add_sr<true> ...@@ -518,7 +531,7 @@ struct op_ssend_recv_add_sr<true>
// Merge the information // Merge the information
recv.template add_prp<typename T::value_type, recv.template add_prp<typename T::value_type,
HeapMemory, HeapMemory,
openfpm::grow_policy_double, typename T::grow_policy,
openfpm::vect_isel<typename T::value_type>::value, openfpm::vect_isel<typename T::value_type>::value,
layout_base, layout_base,
prp...>(v2); prp...>(v2);
......
...@@ -535,6 +535,59 @@ BOOST_AUTO_TEST_CASE (Vcluster_semantic_struct_gather) ...@@ -535,6 +535,59 @@ BOOST_AUTO_TEST_CASE (Vcluster_semantic_struct_gather)
} }
} }
template<typename Memory, template<typename> class layout_base>
void test_different_layouts()
{
for (size_t i = 0 ; i < 100 ; i++)
{
Vcluster & vcl = create_vcluster();
if (vcl.getProcessingUnits() >= 32)
return;
openfpm::vector<aggregate<int,float,size_t>,Memory,typename layout_base<aggregate<int,float,size_t>>::type,layout_base> v1;
v1.resize(vcl.getProcessUnitID());
for(size_t i = 0 ; i < vcl.getProcessUnitID() ; i++)
{
v1.template get<0>(i) = 5;
v1.template get<1>(i) = 10.0+1000.0;
v1.template get<2>(i) = 11.0+100000;
}
openfpm::vector<aggregate<int,float,size_t>,Memory,typename layout_base<aggregate<int,float,size_t>>::type,layout_base> v2;
vcl.SGather<decltype(v1),decltype(v2),layout_base>(v1,v2,(i%vcl.getProcessingUnits()));
if (vcl.getProcessUnitID() == (i%vcl.getProcessingUnits()))
{
size_t n = vcl.getProcessingUnits();
BOOST_REQUIRE_EQUAL(v2.size(),n*(n-1)/2);
bool is_correct = true;
for (size_t i = 0 ; i < v2.size() ; i++)
{
is_correct &= (v2.template get<0>(i) == 5);
is_correct &= (v2.template get<1>(i) == 10.0+1000.0);