Commit ad71d154 authored by incardon's avatar incardon

OpenFPM First test of GPU direct

parent 9602917f
......@@ -12,6 +12,8 @@
#include <mpi.h>
/*! \brief Set of wrapping classing for MPI_Irecv
*
* The purpose of these classes is to correctly choose the right call based on the type we want to receive
......@@ -46,7 +48,7 @@ public:
template<typename T> class MPI_IBcastW
{
public:
static inline void bcast(size_t proc ,openfpm::vector<T> & v, MPI_Request & req)
template<typename Memory> static inline void bcast(size_t proc ,openfpm::vector<T,Memory> & v, MPI_Request & req)
{
MPI_SAFE_CALL(MPI_Ibcast(v.getPointer(), v.size() * sizeof(T),MPI_BYTE, proc , MPI_COMM_WORLD,&req));
}
......@@ -174,4 +176,78 @@ public:
};
/*! \brief this class is a functor for "for_each" algorithm
*
* This class is a functor for "for_each" algorithm. For each
* element of the boost::vector the operator() is called.
* Is mainly used to process broadcast request for each buffer
*
*/
template<typename vect>
struct bcast_inte_impl
{
//! vector to broadcast
vect & send;
//! vector of requests
openfpm::vector<MPI_Request> & req;
//! root processor
size_t root;
/*! \brief constructor
*
* \param v set of pointer buffers to set
*
*/
inline bcast_inte_impl(vect & send,
openfpm::vector<MPI_Request> & req,
size_t root)
:send(send),req(req),root(root)
{};
//! It call the copy function for each property
template<typename T>
inline void operator()(T& t)
{
typedef typename boost::mpl::at<typename vect::value_type::type,T>::type send_type;
// Create one request
req.add();
// gather
MPI_IBcastWB::bcast(root,&send.template get<T::value>(0),send.size()*sizeof(send_type),req.last());
}
};
template<bool is_lin_or_inte>
struct b_cast_helper
{
template<typename T, typename Mem, typename lt_type, template<typename> class layout_base >
static void bcast_(openfpm::vector<MPI_Request> & req,
openfpm::vector<T,Mem,lt_type,layout_base> & v,
size_t root)
{
// Create one request
req.add();
// gather
MPI_IBcastW<T>::bcast(root,v,req.last());
}
};
template<>
struct b_cast_helper<false>
{
template<typename T, typename Mem, typename lt_type, template<typename> class layout_base >
static void bcast_(openfpm::vector<MPI_Request> & req,
openfpm::vector<T,Mem,lt_type,layout_base> & v,
size_t root)
{
bcast_inte_impl<openfpm::vector<T,Mem,lt_type,layout_base>> bc(v,req,root);
boost::mpl::for_each_ref<boost::mpl::range_c<int,0,T::max_prop>>(bc);
}
};
#endif /* OPENFPM_VCLUSTER_SRC_MPI_WRAPPER_MPI_IBCASTW_HPP_ */
......@@ -9,13 +9,13 @@ endif
noinst_PROGRAMS = vcluster_test
vcluster_test_SOURCES = main.cpp VCluster/VCluster.cpp ../../openfpm_devices/src/memory/HeapMemory.cpp ../../openfpm_devices/src/memory/PtrMemory.cpp ../../openfpm_devices/src/Memleak_check.cpp $(CUDA_SOURCES)
vcluster_test_CXXFLAGS = $(AM_CXXFLAGS) $(INCLUDES_PATH) $(BOOST_CPPFLAGS) $(CUDA_CFLAGS)
vcluster_test_CXXFLAGS = -Wunknown-pragmas $(AM_CXXFLAGS) $(INCLUDES_PATH) $(BOOST_CPPFLAGS) $(CUDA_CFLAGS)
vcluster_test_CFLAGS = $(CUDA_CFLAGS)
vcluster_test_LDADD = $(LINKLIBS)
lib_LIBRARIES = libvcluster.a
libvcluster_a_SOURCES = VCluster/VCluster.cpp
libvcluster_a_CXXFLAGS = $(AM_CXXFLAGS) $(INCLUDES_PATH) $(BOOST_CPPFLAGS) $(CUDA_CFLAGS)
libvcluster_a_CXXFLAGS = -Wunknown-pragmas $(AM_CXXFLAGS) $(INCLUDES_PATH) $(BOOST_CPPFLAGS) $(CUDA_CFLAGS)
libvcluster_a_CFLAGS =
nobase_include_HEADERS = MPI_wrapper/MPI_IallreduceW.hpp MPI_wrapper/MPI_IrecvW.hpp MPI_wrapper/MPI_IBcastW.hpp MPI_wrapper/MPI_IsendW.hpp MPI_wrapper/MPI_util.hpp MPI_wrapper/MPI_IAllGather.hpp \
......
......@@ -121,13 +121,13 @@ class Vcluster: public Vcluster_base
Pack_stat sts;
pack_unpack_cond_with_prp<has_max_prop<T, has_value_type<T>::value>::value, op, T, S, layout_base>::packing(mem, send.get(i), sts, send_buf);
pack_unpack_cond_with_prp<has_max_prop<T, has_value_type<T>::value>::value, op, T, S, layout_base>::packing(mem, send.get(i), sts, send_buf,opt);
}
tags.clear();
// receive information
base_info bi(&recv_buf,prc_recv,sz_recv_byte,tags);
base_info bi(&recv_buf,prc_recv,sz_recv_byte,tags,opt);
// Send and recv multiple messages
if (opt & RECEIVE_KNOWN)
......@@ -198,9 +198,12 @@ class Vcluster: public Vcluster_base
//! tags
openfpm::vector<size_t> &tags;
//! options
size_t opt;
//! constructor
base_info(openfpm::vector<BHeapMemory> * recv_buf, openfpm::vector<size_t> & prc, openfpm::vector<size_t> & sz, openfpm::vector<size_t> & tags)
:recv_buf(recv_buf),prc(prc),sz(sz),tags(tags)
base_info(openfpm::vector<BHeapMemory> * recv_buf, openfpm::vector<size_t> & prc, openfpm::vector<size_t> & sz, openfpm::vector<size_t> & tags,size_t opt)
:recv_buf(recv_buf),prc(prc),sz(sz),tags(tags),opt(opt)
{}
};
......@@ -237,6 +240,17 @@ class Vcluster: public Vcluster_base
rinfo.tags.add(tag);
// return the pointer
// If we have GPU direct activated use directly the cuda buffer
if (rinfo.opt & MPI_GPU_DIRECT)
{
#if defined(MPIX_CUDA_AWARE_SUPPORT) && MPIX_CUDA_AWARE_SUPPORT
return rinfo.recv_buf->last().getDevicePointer();
#else
return rinfo.recv_buf->last().getPointer();
#endif
}
return rinfo.recv_buf->last().getPointer();
}
......@@ -337,12 +351,12 @@ class Vcluster: public Vcluster_base
* \return true if the function completed succefully
*
*/
template<typename T, typename S> bool SGather(T & send, S & recv,size_t root)
template<typename T, typename S, template <typename> class layout_base=memory_traits_lin> bool SGather(T & send, S & recv,size_t root)
{
openfpm::vector<size_t> prc;
openfpm::vector<size_t> sz;
return SGather(send,recv,prc,sz,root);
return SGather<T,S,layout_base>(send,recv,prc,sz,root);
}
//! metafunction
......@@ -406,17 +420,20 @@ class Vcluster: public Vcluster_base
tags.clear();
// receive information
base_info bi(&recv_buf,prc,sz,tags);
base_info bi(&recv_buf,prc,sz,tags,0);
// Send and recv multiple messages
sendrecvMultipleMessagesNBX(send_req.size(),NULL,NULL,NULL,msg_alloc,&bi);
// we generate the list of the properties to pack
// we generate the list of the properties to unpack
typedef typename ::generate_indexes<int, has_max_prop<T, has_value_type<T>::value>::number, MetaFuncOrd>::result ind_prop_to_pack;
// operation object
op_ssend_recv_add<void> opa;
// Reorder the buffer
reorder_buffer(prc,tags,sz);
index_gen<ind_prop_to_pack>::template process_recv<op_ssend_recv_add<void>,T,S,layout_base>(*this,recv,&sz,NULL,opa);
recv.add(send);
......@@ -428,6 +445,7 @@ class Vcluster: public Vcluster_base
// send buffer (master does not send anything) so send req and send_buf
// remain buffer with size 0
openfpm::vector<size_t> send_prc;
openfpm::vector<size_t> send_prc_;
send_prc.add(root);
openfpm::vector<size_t> sz;
......@@ -451,13 +469,15 @@ class Vcluster: public Vcluster_base
pack_unpack_cond_with_prp<has_max_prop<T, has_value_type<T>::value>::value,op_ssend_recv_add<void>, T, S, layout_base>::packing(mem, send, sts, send_buf);
pack_unpack_cond_with_prp_inte_lin<T>::construct_prc(send_prc,send_prc_);
tags.clear();
// receive information
base_info bi(NULL,prc,sz,tags);
base_info bi(NULL,prc,sz,tags,0);
// Send and recv multiple messages
sendrecvMultipleMessagesNBX(send_prc.size(),(size_t *)sz.getPointer(),(size_t *)send_prc.getPointer(),(void **)send_buf.getPointer(),msg_alloc,(void *)&bi,NONE);
sendrecvMultipleMessagesNBX(send_prc_.size(),(size_t *)sz.getPointer(),(size_t *)send_prc_.getPointer(),(void **)send_buf.getPointer(),msg_alloc,(void *)&bi,NONE);
mem.decRef();
delete &mem;
......@@ -519,7 +539,7 @@ class Vcluster: public Vcluster_base
tags.clear();
// receive information
base_info bi(&recv_buf,prc,sz,tags);
base_info bi(&recv_buf,prc,sz,tags,0);
// Send and recv multiple messages
sendrecvMultipleMessagesNBX(prc.size(),(size_t *)sz_byte.getPointer(),(size_t *)prc.getPointer(),(void **)send_buf.getPointer(),msg_alloc,(void *)&bi);
......@@ -540,7 +560,7 @@ class Vcluster: public Vcluster_base
tags.clear();
// receive information
base_info bi(&recv_buf,prc,sz,tags);
base_info bi(&recv_buf,prc,sz,tags,0);
// Send and recv multiple messages
sendrecvMultipleMessagesNBX(send_req.size(),NULL,NULL,NULL,msg_alloc,&bi);
......@@ -563,7 +583,7 @@ class Vcluster: public Vcluster_base
* \param sz_recv list of size of the receiving messages (in byte)
*
*/
void reorder_buffer(openfpm::vector<size_t> & prc, openfpm::vector<size_t> tags, openfpm::vector<size_t> & sz_recv)
void reorder_buffer(openfpm::vector<size_t> & prc, const openfpm::vector<size_t> & tags, openfpm::vector<size_t> & sz_recv)
{
struct recv_buff_reorder
......
......@@ -20,6 +20,9 @@
#include "memory/BHeapMemory.hpp"
#include "Packer_Unpacker/has_max_prop.hpp"
#include "data_type/aggregate.hpp"
#if defined(CUDA_GPU) && defined(__NVCC__)
#include "util/cuda/moderngpu/launch_box.hxx"
#endif
#ifdef HAVE_PETSC
#include <petscvec.h>
......@@ -37,6 +40,7 @@
#define RECEIVE_KNOWN 4
#define KNOWN_ELEMENT_OR_BYTE 8
#define MPI_GPU_DIRECT 16
// number of vcluster instances
extern size_t n_vcluster;
......@@ -137,6 +141,17 @@ class Vcluster_base
//! vector of functions to execute after all the request has been performed
std::vector<int> post_exe;
#if defined(CUDA_GPU) && defined(__NVCC__)
//! standard context for mgpu
mgpu::standard_context_t * context;
#else
void * context = NULL;
#endif
// Object array
......@@ -211,6 +226,12 @@ public:
}
}
}
#if defined(CUDA_GPU) && defined(__NVCC__)
delete context;
#endif
}
/*! \brief Virtual cluster constructor
......@@ -262,6 +283,12 @@ public:
// Initialize bar_req
bar_req = MPI_Request();
bar_stat = MPI_Status();
#if defined(CUDA_GPU) && defined(__NVCC__)
context = new mgpu::standard_context_t();
#endif
}
#ifdef SE_CLASS1
......@@ -315,6 +342,19 @@ public:
}
}
#endif
#if defined(CUDA_GPU) && defined(__NVCC__)
/*! \brief If nvidia cuda is activated return a mgpu context
*
*
*/
mgpu::standard_context_t & getmgpuContext()
{
return *context;
}
#endif
/*! \brief Get the MPI_Communicator (or processor group) this VCluster is using
......@@ -544,7 +584,7 @@ public:
template<typename T>
void sendrecvMultipleMessagesNBX(openfpm::vector< size_t > & prc,
openfpm::vector< T > & data,
void * (* msg_alloc)(size_t,size_t,size_t,size_t,size_t,void *),
void * (* msg_alloc)(size_t,size_t,size_t,size_t,size_t,size_t,void *),
void * ptr_arg, long int opt=NONE)
{
#ifdef SE_CLASS1
......@@ -673,8 +713,8 @@ public:
* \param opt options, NONE (ignored in this moment)
*
*/
template<typename T>
void sendrecvMultipleMessagesNBX(openfpm::vector< size_t > & prc, openfpm::vector< T > & data,
void sendrecvMultipleMessagesNBX(size_t n_send , size_t sz[], size_t prc[] ,
void * ptr[], size_t n_recv, size_t prc_recv[] ,
void * (* msg_alloc)(size_t,size_t,size_t,size_t,size_t,size_t,void *),
void * ptr_arg, long int opt=NONE)
{
......@@ -700,7 +740,7 @@ public:
for (size_t i = 0 ; i < n_recv ; i++)
{
void * ptr_recv = msg_alloc(sz_recv_tmp.get(i),0,0,prc_recv[i],i,ptr_arg);
void * ptr_recv = msg_alloc(sz_recv_tmp.get(i),0,0,prc_recv[i],i,0,ptr_arg);
recv(prc_recv[i],SEND_SPARSE + NBX_cnt,ptr_recv,sz_recv_tmp.get(i));
}
......@@ -1041,17 +1081,14 @@ public:
* \return true if succeed false otherwise
*
*/
template<typename T, typename Mem, typename gr> bool Bcast(openfpm::vector<T,Mem,gr> & v, size_t root)
template<typename T, typename Mem, typename lt_type, template<typename> class layout_base >
bool Bcast(openfpm::vector<T,Mem,lt_type,layout_base> & v, size_t root)
{
#ifdef SE_CLASS1
checkType<T>();
#endif
// Create one request
req.add();
// gather
MPI_IBcastW<T>::bcast(root,v,req.last());
b_cast_helper<openfpm::vect_isel<T>::value == STD_VECTOR || is_layout_mlin<layout_base<T>>::value >::bcast_(req,v,root);
return true;
}
......
......@@ -15,7 +15,6 @@ template<bool result, typename T, typename S, template<typename> class layout_ba
struct unpack_selector_with_prp
{
template<typename op,
template <typename> class layout_base,
int ... prp>
static void call_unpack(S & recv,
openfpm::vector<BHeapMemory> & recv_buf,
......@@ -306,7 +305,6 @@ struct call_serialize_variadic<index_tuple<prp...>>
}
};
/*! \brief this class is a functor for "for_each" algorithm
*
* This class is a functor for "for_each" algorithm. For each
......@@ -317,7 +315,6 @@ struct call_serialize_variadic<index_tuple<prp...>>
* \tparam encap dst
*
*/
template<typename sT>
struct set_buf_pointer_for_each_prop
{
......@@ -326,21 +323,36 @@ struct set_buf_pointer_for_each_prop
openfpm::vector<const void *> & send_buf;
size_t opt;
/*! \brief constructor
*
* \param v set of pointer buffers to set
*
*/
inline set_buf_pointer_for_each_prop(sT & v, openfpm::vector<const void *> & send_buf)
:v(v),send_buf(send_buf)
inline set_buf_pointer_for_each_prop(sT & v, openfpm::vector<const void *> & send_buf, size_t opt)
:v(v),send_buf(send_buf),opt(opt)
{};
//! It call the copy function for each property
template<typename T>
inline void operator()(T& t) const
{
// If we have GPU direct activated use directly the cuda buffer
if (opt & MPI_GPU_DIRECT)
{
#if defined(MPIX_CUDA_AWARE_SUPPORT) && MPIX_CUDA_AWARE_SUPPORT
send_buf.add(v.template getDevicePointer<T::value>());
#else
v.template deviceToHost<T::value>();
send_buf.add(v.template getPointer<T::value>());
#endif
}
else
{
send_buf.add(v.template getPointer<T::value>());
}
}
};
/*! \brief this class is a functor for "for_each" algorithm
......@@ -384,7 +396,7 @@ struct set_buf_size_for_each_prop
template<typename T, bool impl = is_multiple_buffer_each_prp<T>::value >
struct pack_unpack_cond_with_prp_inte_lin
{
static void set_buffers(T & send, openfpm::vector<const void *> & send_buf)
static void set_buffers(T & send, openfpm::vector<const void *> & send_buf, size_t opt)
{
send_buf.add(send.getPointer());
}
......@@ -407,9 +419,9 @@ struct pack_unpack_cond_with_prp_inte_lin
template<typename T>
struct pack_unpack_cond_with_prp_inte_lin<T,true>
{
static void set_buffers(T & send, openfpm::vector<const void *> & send_buf)
static void set_buffers(T & send, openfpm::vector<const void *> & send_buf, size_t opt)
{
set_buf_pointer_for_each_prop<T> sbp(send,send_buf);
set_buf_pointer_for_each_prop<T> sbp(send,send_buf,opt);
boost::mpl::for_each_ref<boost::mpl::range_c<int,0,T::value_type::max_prop>>(sbp);
}
......@@ -428,6 +440,7 @@ struct pack_unpack_cond_with_prp_inte_lin<T,true>
for (size_t j = 0 ; j < T::value_type::max_prop ; j++)
{prc_send_.add(prc_send.get(i));}
}
}
};
//! There is max_prop inside
......@@ -454,12 +467,12 @@ struct pack_unpack_cond_with_prp
}
}
static void packing(ExtPreAlloc<HeapMemory> & mem, T & send, Pack_stat & sts, openfpm::vector<const void *> & send_buf)
static void packing(ExtPreAlloc<HeapMemory> & mem, T & send, Pack_stat & sts, openfpm::vector<const void *> & send_buf, size_t opt = 0)
{
typedef typename ::generate_indexes<int, has_max_prop<T, has_value_type<T>::value>::number, MetaFuncOrd>::result ind_prop_to_pack;
if (has_pack_gen<typename T::value_type>::value == false && is_vector<T>::value == true)
{
pack_unpack_cond_with_prp_inte_lin<T>::set_buffers(send,send_buf);
pack_unpack_cond_with_prp_inte_lin<T>::set_buffers(send,send_buf,opt);
}
else
{
......@@ -518,7 +531,7 @@ struct op_ssend_recv_add_sr<true>
// Merge the information
recv.template add_prp<typename T::value_type,
HeapMemory,
openfpm::grow_policy_double,
typename T::grow_policy,
openfpm::vect_isel<typename T::value_type>::value,
layout_base,
prp...>(v2);
......
......@@ -535,6 +535,59 @@ BOOST_AUTO_TEST_CASE (Vcluster_semantic_struct_gather)
}
}
template<typename Memory, template<typename> class layout_base>
void test_different_layouts()
{
for (size_t i = 0 ; i < 100 ; i++)
{
Vcluster & vcl = create_vcluster();
if (vcl.getProcessingUnits() >= 32)
return;
openfpm::vector<aggregate<int,float,size_t>,Memory,typename layout_base<aggregate<int,float,size_t>>::type,layout_base> v1;
v1.resize(vcl.getProcessUnitID());
for(size_t i = 0 ; i < vcl.getProcessUnitID() ; i++)
{
v1.template get<0>(i) = 5;
v1.template get<1>(i) = 10.0+1000.0;
v1.template get<2>(i) = 11.0+100000;
}
openfpm::vector<aggregate<int,float,size_t>,Memory,typename layout_base<aggregate<int,float,size_t>>::type,layout_base> v2;
vcl.SGather<decltype(v1),decltype(v2),layout_base>(v1,v2,(i%vcl.getProcessingUnits()));
if (vcl.getProcessUnitID() == (i%vcl.getProcessingUnits()))
{
size_t n = vcl.getProcessingUnits();
BOOST_REQUIRE_EQUAL(v2.size(),n*(n-1)/2);
bool is_correct = true;
for (size_t i = 0 ; i < v2.size() ; i++)
{
is_correct &= (v2.template get<0>(i) == 5);
is_correct &= (v2.template get<1>(i) == 10.0+1000.0);
is_correct &= (v2.template get<2>(i) == 11.0+100000.0);
}
BOOST_REQUIRE_EQUAL(is_correct,true);
}
if (vcl.getProcessUnitID() == 0 && i == 99)
std::cout << "Semantic gather test stop" << std::endl;
}
}
BOOST_AUTO_TEST_CASE (Vcluster_semantic_layout_inte_gather)
{
test_different_layouts<HeapMemory,memory_traits_inte>();
test_different_layouts<HeapMemory,memory_traits_lin>();
test_different_layouts<CudaMemory,memory_traits_inte>();
test_different_layouts<CudaMemory,memory_traits_lin>();
}
#define SSCATTER_MAX 7
BOOST_AUTO_TEST_CASE (Vcluster_semantic_scatter)
......@@ -1578,7 +1631,7 @@ BOOST_AUTO_TEST_CASE (Vcluster_semantic_sendrecv_6)
}
}
BOOST_AUTO_TEST_CASE( Vcluster_semantic_ssend_recv_layout_switch )
void test_ssend_recv_layout_switch(size_t opt)
{
auto & v_cl = create_vcluster();
......@@ -1607,10 +1660,16 @@ BOOST_AUTO_TEST_CASE( Vcluster_semantic_ssend_recv_layout_switch )
}
prc_send.add(i);
if (opt & MPI_GPU_DIRECT)
{vd.get(i).template hostToDevice<0,1>();}
}
v_cl.SSendRecv<openfpm::vector_gpu_single<aggregate<float,float[3]>>,decltype(collect),memory_traits_inte>(vd,collect,prc_send, prc_recv,sz_recv);
v_cl.SSendRecvP<openfpm::vector_gpu_single<aggregate<float,float[3]>>,decltype(collect),memory_traits_inte,0,1>(vd,collect2,prc_send, prc_recv,sz_recv);
v_cl.SSendRecv<openfpm::vector_gpu_single<aggregate<float,float[3]>>,decltype(collect),memory_traits_inte>
(vd,collect,prc_send, prc_recv,sz_recv,opt);
v_cl.SSendRecvP<openfpm::vector_gpu_single<aggregate<float,float[3]>>,decltype(collect),memory_traits_inte,0,1>
(vd,collect2,prc_send, prc_recv,sz_recv,opt);
// now we check what we received
......@@ -1625,6 +1684,9 @@ BOOST_AUTO_TEST_CASE( Vcluster_semantic_ssend_recv_layout_switch )
bool match = true;
for (size_t i = 0 ; i < v_cl.size() ; i++)
{
if (opt & MPI_GPU_DIRECT)
{vd.get(i).template deviceToHost<0,1>();}
for (size_t j = 0 ; j < 100 ; j++)
{
match &= collect.template get<0>(i*100 +j) == v_cl.rank()*10000 + i*100 + j;
......@@ -1646,6 +1708,15 @@ BOOST_AUTO_TEST_CASE( Vcluster_semantic_ssend_recv_layout_switch )
BOOST_REQUIRE_EQUAL(match,true);
}
BOOST_AUTO_TEST_CASE( Vcluster_semantic_ssend_recv_layout_switch )
{
test_ssend_recv_layout_switch(0);
}
BOOST_AUTO_TEST_CASE( Vcluster_semantic_gpu_direct )
{
test_ssend_recv_layout_switch(MPI_GPU_DIRECT);
}
BOOST_AUTO_TEST_SUITE_END()
......
......@@ -861,11 +861,11 @@ template<typename T> void test_single_all_gather_primitives(Vcluster & vcl)
}
template<typename T> void test_single_all_broadcast_primitives(Vcluster & vcl)
template<typename T,typename Memory, template <typename> class layout_base> void test_single_all_broadcast_primitives(Vcluster & vcl)
{
//! [bcast numbers]
openfpm::vector<T> bdata;
openfpm::vector<T,Memory,typename layout_base<T>::type,layout_base> bdata;
if (vcl.getProcessUnitID() == 0)
{
......@@ -886,7 +886,55 @@ template<typename T> void test_single_all_broadcast_primitives(Vcluster & vcl)
vcl.execute();
for (size_t i = 0 ; i < bdata.size() ; i++)
BOOST_REQUIRE_EQUAL(i,(size_t)bdata.get(i));
{BOOST_REQUIRE_EQUAL(i,(size_t)bdata.get(i));}
//! [bcast numbers]
}
template<typename T,typename Memory, template <typename> class layout_base> void test_single_all_broadcast_complex(Vcluster & vcl)
{
//! [bcast numbers]
openfpm::vector<T,Memory,typename layout_base<T>::type,layout_base> bdata;
if (vcl.getProcessUnitID() == 0)
{
bdata.add();
bdata.template get<0>(bdata.size()-1) = 0;
bdata.template get<1>(bdata.size()-1) = 1000;
bdata.add();
bdata.template get<0>(bdata.size()-1) = 1;
bdata.template get<1>(bdata.size()-1) = 1001;
bdata.add();
bdata.template get<0>(bdata.size()-1) = 2;
bdata.template get<1>(bdata.size()-1) = 1002;
bdata.add();
bdata.template get<0>(bdata.size()-1) = 3;
bdata.template get<1>(bdata.size()-1) = 1003;
bdata.add();
bdata.template get<0>(bdata.size()-1) = 4;
bdata.template get<1>(bdata.size()-1) = 1004;
bdata.add();
bdata.template get<0>(bdata.size()-1) = 5;
bdata.template get<1>(bdata.size()-1) = 1005;
bdata.add();
bdata.template get<0>(bdata.size()-1) = 6;
bdata.template get<1>(bdata.size()-1) = 1006;
}
else
{
bdata.resize(7);
}
vcl.Bcast(bdata,0);
vcl.execute();
for (size_t i = 0 ; i < bdata.size() ; i++)
{
BOOST_REQUIRE_EQUAL(i,(size_t)bdata.template get<0>(i));
BOOST_REQUIRE_EQUAL(i+1000,(size_t)bdata.template get<1>(i));
}
//! [bcast numbers]
......
......@@ -164,16 +164,28 @@ BOOST_AUTO_TEST_CASE(VCluster_bcast_test)
std::cout << "Broadcast test " << std::endl;
test_single_all_broadcast_primitives<unsigned char>(vcl);
test_single_all_broadcast_primitives<char>(vcl);
test_single_all_broadcast_primitives<short>(vcl);
test_single_all_broadcast_primitives<unsigned short>(vcl);
test_single_all_broadcast_primitives<int>(vcl);
test_single_all_broadcast_primitives<unsigned int>(vcl);
test_single_all_broadcast_primitives<long int>(vcl);
test_single_all_broadcast_primitives<unsigned long int>(vcl);
test_single_all_broadcast_primitives<float>(vcl);
test_single_all_broadcast_primitives<double>(vcl);
test_single_all_broadcast_primitives<unsigned char,HeapMemory,memory_traits_lin>(vcl);
test_single_all_broadcast_primitives<char,HeapMemory,memory_traits_lin>(vcl);
test_single_all_broadcast_primitives<short,HeapMemory,memory_traits_lin>(vcl);
test_single_all_broadcast_primitives<unsigned short,HeapMemory,memory_traits_lin>(vcl);
test_single_all_broadcast_primitives<int,HeapMemory,memory_traits_lin>(vcl);
test_single_all_broadcast_primitives<unsigned int,HeapMemory,memory_traits_lin>(vcl);
test_single_all_broadcast_primitives<long int,HeapMemory,memory_traits_lin>(vcl);
test_single_all_broadcast_primitives<unsigned long int,HeapMemory,memory_traits_lin>(vcl);
test_single_all_broadcast_primitives<float,HeapMemory,memory_traits_lin>(vcl);
test_single_all_broadcast_primitives<double,HeapMemory,memory_traits_lin>(vcl);
}
BOOST_AUTO_TEST_CASE(VCluster_bcast_complex_test)
{
Vcluster & vcl = create_vcluster();
std::cout << "Broadcast complex test " << std::endl;
test_single_all_broadcast_complex<aggregate<int,int>,HeapMemory,memory_traits_lin>(vcl);
test_single_all_broadcast_complex<aggregate<int,int>,HeapMemory,memory_traits_inte>(vcl);