Commit 3dabdf44 authored by incardon's avatar incardon

Changes for GPU

parent a9991812
...@@ -41,31 +41,43 @@ AC_DEFUN([AX_CUDA], ...@@ -41,31 +41,43 @@ AC_DEFUN([AX_CUDA],
AC_CHECK_PROG([NVCC_EXIST],[nvcc],["yes"],["no"]) AC_CHECK_PROG([NVCC_EXIST],[nvcc],["yes"],["no"])
AS_IF([test "x$NVCC_EXIST" = "xno"],[],[ AS_IF([test "x$NVCC_EXIST" = "xno"],[],[
NVCC=`which nvcc` NVCC=`which nvcc`
# Set CUDA_CFLAGS to $NVCC, where substring "bin/nvcc" # Set CUDA_CFLAGS to $NVCC, where substring "bin/nvcc"
# is substituted by "include". # is substituted by "include".
CUDA_CFLAGS=" -I${NVCC%bin//nvcc}" CUDA_CFLAGS=" ${NVCC%bin//nvcc}"
CUDA_CFLAGS=" -I${CUDA_CFLAGS%bin/nvcc}" CUDA_CFLAGS=" ${CUDA_CFLAGS%bin/nvcc}"
CUDA_CFLAGS=" -I${CUDA_CFLAGS}include" CUDA_CFLAGS=" -I${CUDA_CFLAGS}include"
#Set CUDA_CFLAGS to $NVCC, where substring "bin/nvcc" #Set CUDA_CFLAGS to $NVCC, where substring "bin/nvcc"
#is substituted by "lib". #is substituted by "lib".
CUDA_LIBS=" -L${NVCC%bin//nvcc}" CUDA_LIBS="${NVCC%bin//nvcc}"
CUDA_LIBS=" -L${CUDA_LIBS%bin/nvcc}" CUDA_LIBS="${CUDA_LIBS%bin/nvcc}"
CUDA_PATH=$CUDA_LIBS
CUDA_LIBS=" -L${CUDA_LIBS}lib" CUDA_LIBS=" -L${CUDA_LIBS}lib"
# If $build_cpu contains "_64", append "64" to CUDA_LIBS # If $build_cpu contains "_64", append "64" to CUDA_LIBS
AS_IF([echo $build_cpu | grep -q "_64"], AS_IF([echo $build_cpu | grep -q "_64"],
[CUDA_LIBS+="64"]) [
AS_IF([ test -d $CUDA_PATH/lib64 ], [ CUDA_LIBS+="64" ], [])
# Be carefull the return code 0 mean true return code 1 mean false
AS_IF([ command -v bumblebeed >/dev/null ], [ CUDA_LIBS+=" -L/usr/lib64/nvidia-bumblebee/ " ],
[
echo "bumblebee, NVIDIA optimus, not found"
])
AS_IF([ test -d /usr/local/cuda/lib64 ], [ CUDA_LIBS+=" -L/usr/local/cuda/lib64 " ],
[
AS_IF([ test -d /usr/local/cuda/lib ],[ CUDA_LIBS+=" -L/usr/local/cuda/lib " ])
])
])
# Append " -lcuda -lcudart" to CUDA_LIBS # Append " -lcuda -lcudart" to CUDA_LIBS
CUDA_LIBS+=" -lcuda -lcudart" CUDA_LIBS+=" -lcuda -lcudart"
# Make variables available in Makefile.am # Make variables available in Makefile.am
AC_SUBST(CUDA_CFLAGS) AC_SUBST([CUDA_CFLAGS])
AC_SUBST(CUDA_LIBS) AC_SUBST([CUDA_LIBS])
echo $NVCC echo $NVCC
AC_SUBST(NVCC) AC_SUBST([NVCC])
AC_DEFINE([NVCC],[],[NVCC compiling]) AC_DEFINE([NVCC],[],[NVCC compiling])
])dnl ])dnl
......
LINKLIBS = $(DEFAULT_LIB) $(PTHREAD_LIBS) $(OPT_LIBS) $(HDF5_LDFLAGS) $(HDF5_LIBS) $(BOOST_LDFLAGS) LINKLIBS = $(DEFAULT_LIB) $(PTHREAD_LIBS) $(OPT_LIBS) $(HDF5_LDFLAGS) $(HDF5_LIBS) $(BOOST_LDFLAGS) $(CUDA_LIBS)
if BUILDCUDA
CUDA_SOURCES=../../openfpm_devices/src/memory/CudaMemory.cu
else
CUDA_SOURCES=
endif
noinst_PROGRAMS = vcluster_test noinst_PROGRAMS = vcluster_test
vcluster_test_SOURCES = main.cpp VCluster/VCluster.cpp ../../openfpm_devices/src/memory/HeapMemory.cpp ../../openfpm_devices/src/memory/PtrMemory.cpp ../../openfpm_devices/src/Memleak_check.cpp vcluster_test_SOURCES = main.cpp VCluster/VCluster.cpp ../../openfpm_devices/src/memory/HeapMemory.cpp ../../openfpm_devices/src/memory/PtrMemory.cpp ../../openfpm_devices/src/Memleak_check.cpp $(CUDA_SOURCES)
vcluster_test_CXXFLAGS = $(AM_CXXFLAGS) $(INCLUDES_PATH) $(BOOST_CPPFLAGS) vcluster_test_CXXFLAGS = $(AM_CXXFLAGS) $(INCLUDES_PATH) $(BOOST_CPPFLAGS) $(CUDA_CFLAGS)
vcluster_test_CFLAGS = $(CUDA_CFLAGS) vcluster_test_CFLAGS = $(CUDA_CFLAGS)
vcluster_test_LDADD = $(LINKLIBS) vcluster_test_LDADD = $(LINKLIBS)
lib_LIBRARIES = libvcluster.a lib_LIBRARIES = libvcluster.a
libvcluster_a_SOURCES = VCluster/VCluster.cpp libvcluster_a_SOURCES = VCluster/VCluster.cpp
libvcluster_a_CXXFLAGS = $(AM_CXXFLAGS) $(INCLUDES_PATH) $(BOOST_CPPFLAGS) libvcluster_a_CXXFLAGS = $(AM_CXXFLAGS) $(INCLUDES_PATH) $(BOOST_CPPFLAGS) $(CUDA_CFLAGS)
libvcluster_a_CFLAGS = libvcluster_a_CFLAGS =
nobase_include_HEADERS = MPI_wrapper/MPI_IallreduceW.hpp MPI_wrapper/MPI_IrecvW.hpp MPI_wrapper/MPI_IBcastW.hpp MPI_wrapper/MPI_IsendW.hpp MPI_wrapper/MPI_util.hpp MPI_wrapper/MPI_IAllGather.hpp \ nobase_include_HEADERS = MPI_wrapper/MPI_IallreduceW.hpp MPI_wrapper/MPI_IrecvW.hpp MPI_wrapper/MPI_IBcastW.hpp MPI_wrapper/MPI_IsendW.hpp MPI_wrapper/MPI_util.hpp MPI_wrapper/MPI_IAllGather.hpp \
...@@ -17,7 +23,7 @@ VCluster/VCluster_base.hpp VCluster/VCluster.hpp VCluster/VCluster_meta_function ...@@ -17,7 +23,7 @@ VCluster/VCluster_base.hpp VCluster/VCluster.hpp VCluster/VCluster_meta_function
util/Vcluster_log.hpp util/Vcluster_log.hpp
.cu.o : .cu.o :
$(NVCC) $(NVCCFLAGS) -o $@ -c $< $(NVCC) $(NVCCFLAGS) $(INCLUDES_PATH) -o $@ -c $<
test: vcluster_test test: vcluster_test
source $(HOME)/openfpm_vars && cd .. && mpirun -np 3 ./src/vcluster_test && mpirun -np 4 ./src/vcluster_test source $(HOME)/openfpm_vars && cd .. && mpirun -np 3 ./src/vcluster_test && mpirun -np 4 ./src/vcluster_test
......
...@@ -43,9 +43,9 @@ class Vcluster: public Vcluster_base ...@@ -43,9 +43,9 @@ class Vcluster: public Vcluster_base
struct index_gen<index_tuple<prp...>> struct index_gen<index_tuple<prp...>>
{ {
//! Process the receive buffer //! Process the receive buffer
template<typename op, typename T, typename S> inline static void process_recv(Vcluster & vcl, S & recv, openfpm::vector<size_t> * sz_recv, openfpm::vector<size_t> * sz_recv_byte, op & op_param) template<typename op, typename T, typename S,template <typename> class layout_base> inline static void process_recv(Vcluster & vcl, S & recv, openfpm::vector<size_t> * sz_recv, openfpm::vector<size_t> * sz_recv_byte, op & op_param)
{ {
vcl.process_receive_buffer_with_prp<op,T,S,prp...>(recv,sz_recv,sz_recv_byte,op_param); vcl.process_receive_buffer_with_prp<op,T,S,layout_base, prp...>(recv,sz_recv,sz_recv_byte,op_param);
} }
}; };
...@@ -67,7 +67,7 @@ class Vcluster: public Vcluster_base ...@@ -67,7 +67,7 @@ class Vcluster: public Vcluster_base
* \param opt Options using RECEIVE_KNOWN enable patters with less latencies, in case of RECEIVE_KNOWN * \param opt Options using RECEIVE_KNOWN enable patters with less latencies, in case of RECEIVE_KNOWN
* *
*/ */
template<typename op, typename T, typename S> void prepare_send_buffer(openfpm::vector<T> & send, template<typename op, typename T, typename S, template <typename> class layout_base> void prepare_send_buffer(openfpm::vector<T> & send,
S & recv, S & recv,
openfpm::vector<size_t> & prc_send, openfpm::vector<size_t> & prc_send,
openfpm::vector<size_t> & prc_recv, openfpm::vector<size_t> & prc_recv,
...@@ -97,7 +97,7 @@ class Vcluster: public Vcluster_base ...@@ -97,7 +97,7 @@ class Vcluster: public Vcluster_base
size_t req = 0; size_t req = 0;
//Pack requesting //Pack requesting
pack_unpack_cond_with_prp<has_max_prop<T, has_value_type<T>::value>::value,op, T, S>::packingRequest(send.get(i), req, send_sz_byte); pack_unpack_cond_with_prp<has_max_prop<T, has_value_type<T>::value>::value,op, T, S, layout_base>::packingRequest(send.get(i), req, send_sz_byte);
tot_size += req; tot_size += req;
} }
...@@ -112,7 +112,7 @@ class Vcluster: public Vcluster_base ...@@ -112,7 +112,7 @@ class Vcluster: public Vcluster_base
Pack_stat sts; Pack_stat sts;
pack_unpack_cond_with_prp<has_max_prop<T, has_value_type<T>::value>::value, op, T, S>::packing(mem, send.get(i), sts, send_buf); pack_unpack_cond_with_prp<has_max_prop<T, has_value_type<T>::value>::value, op, T, S, layout_base>::packing(mem, send.get(i), sts, send_buf);
} }
// receive information // receive information
...@@ -265,7 +265,7 @@ class Vcluster: public Vcluster_base ...@@ -265,7 +265,7 @@ class Vcluster: public Vcluster_base
* \param op_param operation to do in merging the received information with recv * \param op_param operation to do in merging the received information with recv
* *
*/ */
template<typename op, typename T, typename S, unsigned int ... prp > template<typename op, typename T, typename S, template<typename> class layout_base, unsigned int ... prp >
void process_receive_buffer_with_prp(S & recv, void process_receive_buffer_with_prp(S & recv,
openfpm::vector<size_t> * sz, openfpm::vector<size_t> * sz,
openfpm::vector<size_t> * sz_byte, openfpm::vector<size_t> * sz_byte,
...@@ -274,7 +274,7 @@ class Vcluster: public Vcluster_base ...@@ -274,7 +274,7 @@ class Vcluster: public Vcluster_base
if (sz != NULL) if (sz != NULL)
sz->resize(recv_buf.size()); sz->resize(recv_buf.size());
pack_unpack_cond_with_prp<has_max_prop<T, has_value_type<T>::value>::value,op, T, S, prp... >::unpacking(recv, recv_buf, sz, sz_byte, op_param); pack_unpack_cond_with_prp<has_max_prop<T, has_value_type<T>::value>::value,op, T, S, layout_base, prp... >::unpacking(recv, recv_buf, sz, sz_byte, op_param);
} }
public: public:
...@@ -359,7 +359,7 @@ class Vcluster: public Vcluster_base ...@@ -359,7 +359,7 @@ class Vcluster: public Vcluster_base
* \return true if the function completed succefully * \return true if the function completed succefully
* *
*/ */
template<typename T, typename S> bool SGather(T & send, template<typename T, typename S, template <typename> class layout_base=memory_traits_lin> bool SGather(T & send,
S & recv, S & recv,
openfpm::vector<size_t> & prc, openfpm::vector<size_t> & prc,
openfpm::vector<size_t> & sz, openfpm::vector<size_t> & sz,
...@@ -392,7 +392,7 @@ class Vcluster: public Vcluster_base ...@@ -392,7 +392,7 @@ class Vcluster: public Vcluster_base
// operation object // operation object
op_ssend_recv_add<void> opa; op_ssend_recv_add<void> opa;
index_gen<ind_prop_to_pack>::template process_recv<op_ssend_recv_add<void>,T,S>(*this,recv,&sz,NULL,opa); index_gen<ind_prop_to_pack>::template process_recv<op_ssend_recv_add<void>,T,S,layout_base>(*this,recv,&sz,NULL,opa);
recv.add(send); recv.add(send);
prc.add(root); prc.add(root);
...@@ -413,7 +413,7 @@ class Vcluster: public Vcluster_base ...@@ -413,7 +413,7 @@ class Vcluster: public Vcluster_base
size_t tot_size = 0; size_t tot_size = 0;
pack_unpack_cond_with_prp<has_max_prop<T, has_value_type<T>::value>::value,op_ssend_recv_add<void>, T, S>::packingRequest(send, tot_size, sz); pack_unpack_cond_with_prp<has_max_prop<T, has_value_type<T>::value>::value,op_ssend_recv_add<void>, T, S, layout_base>::packingRequest(send, tot_size, sz);
HeapMemory pmem; HeapMemory pmem;
...@@ -424,7 +424,7 @@ class Vcluster: public Vcluster_base ...@@ -424,7 +424,7 @@ class Vcluster: public Vcluster_base
Pack_stat sts; Pack_stat sts;
pack_unpack_cond_with_prp<has_max_prop<T, has_value_type<T>::value>::value,op_ssend_recv_add<void>, T, S>::packing(mem, send, sts, send_buf); pack_unpack_cond_with_prp<has_max_prop<T, has_value_type<T>::value>::value,op_ssend_recv_add<void>, T, S, layout_base>::packing(mem, send, sts, send_buf);
// receive information // receive information
base_info bi(NULL,prc,sz); base_info bi(NULL,prc,sz);
...@@ -465,7 +465,7 @@ class Vcluster: public Vcluster_base ...@@ -465,7 +465,7 @@ class Vcluster: public Vcluster_base
* \return true if the function completed succefully * \return true if the function completed succefully
* *
*/ */
template<typename T, typename S> bool SScatter(T & send, S & recv, openfpm::vector<size_t> & prc, openfpm::vector<size_t> & sz, size_t root) template<typename T, typename S, template <typename> class layout_base=memory_traits_lin> bool SScatter(T & send, S & recv, openfpm::vector<size_t> & prc, openfpm::vector<size_t> & sz, size_t root)
{ {
// Reset the receive buffer // Reset the receive buffer
reset_recv_buf(); reset_recv_buf();
...@@ -501,7 +501,7 @@ class Vcluster: public Vcluster_base ...@@ -501,7 +501,7 @@ class Vcluster: public Vcluster_base
// operation object // operation object
op_ssend_recv_add<void> opa; op_ssend_recv_add<void> opa;
index_gen<ind_prop_to_pack>::template process_recv<op_ssend_recv_add<void>,T,S>(*this,recv,NULL,NULL,opa); index_gen<ind_prop_to_pack>::template process_recv<op_ssend_recv_add<void>,T,S,layout_base>(*this,recv,NULL,NULL,opa);
} }
else else
{ {
...@@ -520,7 +520,7 @@ class Vcluster: public Vcluster_base ...@@ -520,7 +520,7 @@ class Vcluster: public Vcluster_base
// operation object // operation object
op_ssend_recv_add<void> opa; op_ssend_recv_add<void> opa;
index_gen<ind_prop_to_pack>::template process_recv<op_ssend_recv_add<void>,T,S>(*this,recv,NULL,NULL,opa); index_gen<ind_prop_to_pack>::template process_recv<op_ssend_recv_add<void>,T,S,layout_base>(*this,recv,NULL,NULL,opa);
} }
return true; return true;
...@@ -620,21 +620,22 @@ class Vcluster: public Vcluster_base ...@@ -620,21 +620,22 @@ class Vcluster: public Vcluster_base
* \return true if the function completed succefully * \return true if the function completed succefully
* *
*/ */
template<typename T, typename S> bool SSendRecv(openfpm::vector<T> & send, template<typename T, typename S, template<typename> class layout_base = memory_traits_lin >
S & recv, bool SSendRecv(openfpm::vector<T> & send,
openfpm::vector<size_t> & prc_send, S & recv,
openfpm::vector<size_t> & prc_recv, openfpm::vector<size_t> & prc_send,
openfpm::vector<size_t> & sz_recv, openfpm::vector<size_t> & prc_recv,
size_t opt = NONE) openfpm::vector<size_t> & sz_recv,
size_t opt = NONE)
{ {
prepare_send_buffer<op_ssend_recv_add<void>,T,S>(send,recv,prc_send,prc_recv,sz_recv,opt); prepare_send_buffer<op_ssend_recv_add<void>,T,S,layout_base>(send,recv,prc_send,prc_recv,sz_recv,opt);
// we generate the list of the properties to pack // we generate the list of the properties to pack
typedef typename ::generate_indexes<int, has_max_prop<T, has_value_type<T>::value>::number, MetaFuncOrd>::result ind_prop_to_pack; typedef typename ::generate_indexes<int, has_max_prop<T, has_value_type<T>::value>::number, MetaFuncOrd>::result ind_prop_to_pack;
op_ssend_recv_add<void> opa; op_ssend_recv_add<void> opa;
index_gen<ind_prop_to_pack>::template process_recv<op_ssend_recv_add<void>,T,S>(*this,recv,&sz_recv,NULL,opa); index_gen<ind_prop_to_pack>::template process_recv<op_ssend_recv_add<void>,T,S,layout_base>(*this,recv,&sz_recv,NULL,opa);
return true; return true;
} }
...@@ -668,20 +669,20 @@ class Vcluster: public Vcluster_base ...@@ -668,20 +669,20 @@ class Vcluster: public Vcluster_base
* \return true if the function completed successful * \return true if the function completed successful
* *
*/ */
template<typename T, typename S, int ... prp> bool SSendRecvP(openfpm::vector<T> & send, template<typename T, typename S, template<typename> class layout_base, int ... prp> bool SSendRecvP(openfpm::vector<T> & send,
S & recv, S & recv,
openfpm::vector<size_t> & prc_send, openfpm::vector<size_t> & prc_send,
openfpm::vector<size_t> & prc_recv, openfpm::vector<size_t> & prc_recv,
openfpm::vector<size_t> & sz_recv, openfpm::vector<size_t> & sz_recv,
openfpm::vector<size_t> & sz_recv_byte) openfpm::vector<size_t> & sz_recv_byte)
{ {
prepare_send_buffer<op_ssend_recv_add<void>,T,S>(send,recv,prc_send,prc_recv,sz_recv,NONE); prepare_send_buffer<op_ssend_recv_add<void>,T,S,layout_base>(send,recv,prc_send,prc_recv,sz_recv,NONE);
// operation object // operation object
op_ssend_recv_add<void> opa; op_ssend_recv_add<void> opa;
// process the received information // process the received information
process_receive_buffer_with_prp<op_ssend_recv_add<void>,T,S,prp...>(recv,&sz_recv,&sz_recv_byte,opa); process_receive_buffer_with_prp<op_ssend_recv_add<void>,T,S,layout_base,prp...>(recv,&sz_recv,&sz_recv_byte,opa);
return true; return true;
} }
...@@ -714,19 +715,19 @@ class Vcluster: public Vcluster_base ...@@ -714,19 +715,19 @@ class Vcluster: public Vcluster_base
* \return true if the function completed succefully * \return true if the function completed succefully
* *
*/ */
template<typename T, typename S, int ... prp> bool SSendRecvP(openfpm::vector<T> & send, template<typename T, typename S, template<typename> class layout_base, int ... prp> bool SSendRecvP(openfpm::vector<T> & send,
S & recv, S & recv,
openfpm::vector<size_t> & prc_send, openfpm::vector<size_t> & prc_send,
openfpm::vector<size_t> & prc_recv, openfpm::vector<size_t> & prc_recv,
openfpm::vector<size_t> & sz_recv) openfpm::vector<size_t> & sz_recv)
{ {
prepare_send_buffer<op_ssend_recv_add<void>,T,S>(send,recv,prc_send,prc_recv,sz_recv,NONE); prepare_send_buffer<op_ssend_recv_add<void>,T,S,layout_base>(send,recv,prc_send,prc_recv,sz_recv,NONE);
// operation object // operation object
op_ssend_recv_add<void> opa; op_ssend_recv_add<void> opa;
// process the received information // process the received information
process_receive_buffer_with_prp<op_ssend_recv_add<void>,T,S,prp...>(recv,&sz_recv,NULL,opa); process_receive_buffer_with_prp<op_ssend_recv_add<void>,T,S,layout_base,prp...>(recv,&sz_recv,NULL,opa);
return true; return true;
} }
...@@ -767,7 +768,7 @@ class Vcluster: public Vcluster_base ...@@ -767,7 +768,7 @@ class Vcluster: public Vcluster_base
* \return true if the function completed successful * \return true if the function completed successful
* *
*/ */
template<typename op, typename T, typename S, int ... prp> bool SSendRecvP_op(openfpm::vector<T> & send, template<typename op, typename T, typename S, template<typename>class layout_base , int ... prp > bool SSendRecvP_op(openfpm::vector<T> & send,
S & recv, S & recv,
openfpm::vector<size_t> & prc_send, openfpm::vector<size_t> & prc_send,
op & op_param, op & op_param,
...@@ -775,10 +776,10 @@ class Vcluster: public Vcluster_base ...@@ -775,10 +776,10 @@ class Vcluster: public Vcluster_base
openfpm::vector<size_t> & recv_sz, openfpm::vector<size_t> & recv_sz,
size_t opt = NONE) size_t opt = NONE)
{ {
prepare_send_buffer<op,T,S>(send,recv,prc_send,prc_recv,recv_sz,opt); prepare_send_buffer<op,T,S,layout_base>(send,recv,prc_send,prc_recv,recv_sz,opt);
// process the received information // process the received information
process_receive_buffer_with_prp<op,T,S,prp...>(recv,NULL,NULL,op_param); process_receive_buffer_with_prp<op,T,S,layout_base,prp...>(recv,NULL,NULL,op_param);
return true; return true;
} }
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
#include "memory/BHeapMemory.hpp" #include "memory/BHeapMemory.hpp"
#include "Packer_Unpacker/has_max_prop.hpp" #include "Packer_Unpacker/has_max_prop.hpp"
template<bool result, typename T, typename S> template<bool result, typename T, typename S, template<typename> class layout_base>
struct unpack_selector_with_prp struct unpack_selector_with_prp
{ {
template<typename op, int ... prp> static void call_unpack(S & recv, openfpm::vector<BHeapMemory> & recv_buf, openfpm::vector<size_t> * sz, openfpm::vector<size_t> * sz_byte, op & op_param) template<typename op, int ... prp> static void call_unpack(S & recv, openfpm::vector<BHeapMemory> & recv_buf, openfpm::vector<size_t> * sz, openfpm::vector<size_t> * sz_byte, op & op_param)
...@@ -33,7 +33,7 @@ struct unpack_selector_with_prp ...@@ -33,7 +33,7 @@ struct unpack_selector_with_prp
size_t recv_size_old = recv.size(); size_t recv_size_old = recv.size();
// Merge the information // Merge the information
op_param.template execute<true,T,decltype(recv),decltype(unp),prp...>(recv,unp,i); op_param.template execute<true,T,decltype(recv),decltype(unp),layout_base,prp...>(recv,unp,i);
size_t recv_size_new = recv.size(); size_t recv_size_new = recv.size();
...@@ -48,43 +48,174 @@ struct unpack_selector_with_prp ...@@ -48,43 +48,174 @@ struct unpack_selector_with_prp
} }
}; };
// template<typename op, typename Vt, typename S, template<typename> class layout_base, typename v_mpl>
template<typename T, typename S> struct unpack_each_prop_buffer
struct unpack_selector_with_prp<true,T,S>
{ {
template<typename op, unsigned int ... prp> static void call_unpack(S & recv, openfpm::vector<BHeapMemory> & recv_buf, openfpm::vector<size_t> * sz, openfpm::vector<size_t> * sz_byte, op & op_param) S & recv;
openfpm::vector<BHeapMemory> & recv_buf;
size_t i;
op & op_param;
openfpm::vector<size_t> * sz;
openfpm::vector<size_t> * sz_byte;
/*! \brief constructor
*
* \param v set of pointer buffers to set
*
*/
inline unpack_each_prop_buffer(S & recv,
openfpm::vector<BHeapMemory> & recv_buf,
op & op_param,
size_t i,
openfpm::vector<size_t> * sz,
openfpm::vector<size_t> * sz_byte)
:recv(recv),recv_buf(recv_buf),op_param(op_param),i(i),sz(sz),sz_byte(sz_byte)
{};
//! It call the copy function for each property
template<typename T>
inline void operator()(T& t) const
{ {
// here we get the the type of the property at position T::value
typedef typename boost::mpl::at<typename T::value_type::type,boost::mpl::int_<T::value> >::type prp_type;
// here we get the the type of the property at position T::value
typedef typename boost::mpl::at<v_mpl,boost::mpl::int_<T::value>>::type prp_num;
// calculate the number of received elements
size_t n_ele = recv_buf.get(i).size() / sizeof(prp_type);
// add the received particles to the vector
PtrMemory * ptr1 = new PtrMemory(recv_buf.get(i).getPointer(),recv_buf.get(i).size());
// create vector representation to a piece of memory already allocated
openfpm::vector<typename Vt::value_type,PtrMemory,typename layout_base<typename Vt::value_type>::type,layout_base,openfpm::grow_policy_identity> v2;
v2.template setMemory<prp_num::value>(*ptr1);
// resize with the number of elements
v2.resize(n_ele);
// Merge the information
size_t recv_size_old = recv.size();
op_param.template execute<false,T,decltype(recv),decltype(v2),layout_base,prp_num::value>(recv,v2,i);
size_t recv_size_new = recv.size();
if (sz_byte != NULL) if (sz_byte != NULL)
sz_byte->resize(recv_buf.size()); sz_byte->get(i) = recv_buf.get(i).size();
if (sz != NULL)
sz->get(i) = recv_size_new - recv_size_old;
}
};
for (size_t i = 0 ; i < recv_buf.size() ; i++) template<bool inte_or_lin,typename T, typename S, template<typename> class layout_base>
{ struct unpack_selector_with_prp_lin
// calculate the number of received elements {
size_t n_ele = recv_buf.get(i).size() / sizeof(typename T::value_type); template<typename op, unsigned int ... prp> static int call_unpack_impl(S & recv,
openfpm::vector<BHeapMemory> & recv_buf,
openfpm::vector<size_t> * sz,
openfpm::vector<size_t> * sz_byte,
op & op_param,
size_t i)
{
// calculate the number of received elements
size_t n_ele = recv_buf.get(i).size() / sizeof(typename T::value_type);
// add the received particles to the vector // add the received particles to the vector
PtrMemory * ptr1 = new PtrMemory(recv_buf.get(i).getPointer(),recv_buf.get(i).size()); PtrMemory * ptr1 = new PtrMemory(recv_buf.get(i).getPointer(),recv_buf.get(i).size());
// create vector representation to a piece of memory already allocated // create vector representation to a piece of memory already allocated
openfpm::vector<typename T::value_type,PtrMemory,typename memory_traits_lin<typename T::value_type>::type, memory_traits_lin,openfpm::grow_policy_identity> v2; openfpm::vector<typename T::value_type,PtrMemory,typename layout_base<typename T::value_type>::type,layout_base,openfpm::grow_policy_identity> v2;
v2.setMemory(*ptr1); v2.setMemory(*ptr1);
// resize with the number of elements // resize with the number of elements
v2.resize(n_ele); v2.resize(n_ele);
// Merge the information // Merge the information
size_t recv_size_old = recv.size(); size_t recv_size_old = recv.size();
op_param.template execute<false,T,decltype(recv),decltype(v2),prp...>(recv,v2,i); op_param.template execute<false,T,decltype(recv),decltype(v2),layout_base,prp...>(recv,v2,i);
size_t recv_size_new = recv.size(); size_t recv_size_new = recv.size();
if (sz_byte != NULL) if (sz_byte != NULL)
sz_byte->get(i) = recv_buf.get(i).size(); sz_byte->get(i) = recv_buf.get(i).size();
if (sz != NULL) if (sz != NULL)
sz->get(i) = recv_size_new - recv_size_old; sz->get(i) = recv_size_new - recv_size_old;
return sizeof...(prp);
}
};
template<typename T, typename S, template<typename> class layout_base>
struct unpack_selector_with_prp_lin<true,T,S,layout_base>
{
template<typename op, unsigned int ... prp> static int call_unpack_impl(S & recv,
openfpm::vector<BHeapMemory> & recv_buf,
openfpm::vector<size_t> * sz,
openfpm::vector<size_t> * sz_byte,
op & op_param,
size_t i)
{
// calculate the number of received elements
size_t n_ele = recv_buf.get(i).size() / sizeof(typename T::value_type);
// add the received particles to the vector
PtrMemory * ptr1 = new PtrMemory(recv_buf