Commit c20c6a40 authored by Pietro Incardona's avatar Pietro Incardona

Adapting for PETSC

parent 88b8e992
......@@ -19,6 +19,7 @@ m4_ifdef([AX_BOOST_IOSTREAMS],,[m4_include([m4/ax_boost_iostreams.m4])])
m4_ifdef([AX_BOOST_PROGRAM_OPTIONS],,[m4_include([m4/ax_boost_program_options.m4])])
m4_ifdef([AX_BOOST_UNIT_TEST_FRAMEWORK],,[m4_include([m4/ax_boost_unit_test_framework.m4])])
case $host_os in
*cygwin*)
# Do something specific for cygwin
......@@ -101,6 +102,7 @@ else
NVCCFLAGS+="$NVCCFLAGS -O3 "
fi
####### include openfpm_devices include path
INCLUDES_PATH+=" -I/usr/local/include -I. -Iconfig -I../../openfpm_devices/src -I../../openfpm_data/src"
......
LINKLIBS = $(DEFAULT_LIB) $(PTHREAD_LIBS) $(OPT_LIBS) $(BOOST_LDFLAGS)
LINKLIBS = $(DEFAULT_LIB) $(PTHREAD_LIBS) $(OPT_LIBS) $(HDF5_LDFLAGS) $(HDF5_LIBS) $(BOOST_LDFLAGS)
noinst_PROGRAMS = vcluster
vcluster_SOURCES = main.cpp VCluster.cpp ../../openfpm_devices/src/memory/HeapMemory.cpp ../../openfpm_devices/src/memory/PtrMemory.cpp
vcluster_CXXFLAGS = $(INCLUDES_PATH) $(BOOST_CPPFLAGS)
vcluster_CXXFLAGS = $(INCLUDES_PATH) $(BOOST_CPPFLAGS)
vcluster_CFLAGS = $(CUDA_CFLAGS)
vcluster_LDADD = $(LINKLIBS)
......
......@@ -19,6 +19,10 @@
#include "util/Vcluster_log.hpp"
#include "memory/BHeapMemory.hpp"
#ifdef HAVE_PETSC
#include <petscvec.h>
#endif
#define MSG_LENGTH 1024
#define MSG_SEND_RECV 1025
#define SEND_SPARSE 4096
......@@ -1013,6 +1017,12 @@ static inline Vcluster & create_vcluster()
*/
static inline void openfpm_init(int *argc, char ***argv)
{
#ifdef HAVE_PETSC
PetscInitialize(argc,argv,NULL,NULL);
#endif
init_global_v_cluster_private(argc,argv);
}
......@@ -1023,6 +1033,12 @@ static inline void openfpm_init(int *argc, char ***argv)
*/
static inline void openfpm_finalize()
{
#ifdef HAVE_PETSC
PetscFinalize();
#endif
delete_global_v_cluster_private();
}
......
......@@ -78,8 +78,11 @@ static void * msg_alloc(size_t msg_i ,size_t total_msg, size_t total_p, size_t i
* \param recv receive object
*
*/
template<typename T, typename S> void process_receive_buffer(S & recv)
template<typename T, typename S> void process_receive_buffer(S & recv, openfpm::vector<size_t> * sz = NULL)
{
if (sz != NULL)
sz->resize(recv_buf.size());
for (size_t i = 0 ; i < recv_buf.size() ; i++)
{
// for each received buffer create a memory reppresentation
......@@ -99,6 +102,9 @@ template<typename T, typename S> void process_receive_buffer(S & recv)
// Merge the information
recv.add(v2);
if (sz != NULL)
sz->get(i) = v2.size();
}
}
......@@ -191,7 +197,7 @@ template<typename T, typename S> bool SGather(T & send, S & recv, openfpm::vecto
sz.get(i) /= sizeof(typename T::value_type);
// process the received information
process_receive_buffer<T,S>(recv);
process_receive_buffer<T,S>(recv,&sz);
recv.add(send);
prc.add(root);
......@@ -275,7 +281,7 @@ template<typename T, typename S> bool SScatter(T & send, S & recv, openfpm::vect
sendrecvMultipleMessagesNBX(prc.size(),(size_t *)sz_byte.getPointer(),(size_t *)prc.getPointer(),(void **)send_buf.getPointer(),msg_alloc,(void *)&bi);
// process the received information
process_receive_buffer<T,S>(recv);
process_receive_buffer<T,S>(recv,NULL);
}
else
{
......@@ -288,10 +294,71 @@ template<typename T, typename S> bool SScatter(T & send, S & recv, openfpm::vect
// Send and recv multiple messages
sendrecvMultipleMessagesNBX(send_req.size(),NULL,NULL,NULL,msg_alloc,&bi);
process_receive_buffer<T,S>(recv);
process_receive_buffer<T,S>(recv,NULL);
}
return true;
}
/*! \brief Semantic Send and receive, send the data to processors and receive from the other processors
*
* Semantic communication differ from the normal one. They in general
* follow the following model.
*
* SSendRecv(T,S,...,op=add);
*
* "SendRecv" indicate the communication pattern, or how the information flow
* T is the object to send, S is the object that will receive the data.
* In order to work S must implement the interface S.add(T).
*
* ### Example scatter a vector of structures, to other processors
* \snippet VCluster_semantic_unit_tests.hpp Scatter the data from master
*
* \tparam T type of sending object
* \tparam S type of receiving object
*
* \param Object to send
* \param Object to receive
* \param prc processor involved in the scatter
* \param sz size of each chunks
* \param root which processor should scatter the information
*
* \return true if the function completed succefully
*
*/
template<typename T, typename S> bool SSendRecv(openfpm::vector<T> & send, S & recv, openfpm::vector<size_t> & prc_send, openfpm::vector<size_t> & prc_recv, openfpm::vector<size_t> & sz_recv)
{
// Reset the receive buffer
reset_recv_buf();
#ifdef SE_CLASS1
if (send.size() != prc_send.size())
std::cerr << __FILE__ << ":" << __LINE__ << " Error, the number of processor involved \"prc.size()\" must match the number of sending buffers \"send.size()\" " << std::endl;
#endif
// Prepare the sending buffer
openfpm::vector<const void *> send_buf;
openfpm::vector<size_t> sz_byte;
sz_byte.resize(send.size());
for (size_t i = 0; i < send.size() ; i++)
{
send_buf.add((char *)send.get(i).getPointer());
sz_byte.get(i) = send.get(i).size() * sizeof(typename T::value_type);
}
// receive information
base_info bi(&recv_buf,prc_recv,sz_recv);
// Send and recv multiple messages
sendrecvMultipleMessagesNBX(prc_send.size(),(size_t *)sz_byte.getPointer(),(size_t *)prc_send.getPointer(),(void **)send_buf.getPointer(),msg_alloc,(void *)&bi);
// process the received information
process_receive_buffer<T,S>(recv,&sz_recv);
return true;
}
......@@ -190,6 +190,147 @@ BOOST_AUTO_TEST_CASE (Vcluster_semantic_struct_scatter)
}
BOOST_AUTO_TEST_CASE (Vcluster_semantic_sendrecv)
{
for (size_t i = 0 ; i < 100 ; i++)
{
Vcluster & vcl = create_vcluster();
if (vcl.getProcessingUnits() >= 32)
return;
openfpm::vector<size_t> prc_recv2;
openfpm::vector<size_t> prc_recv3;
openfpm::vector<size_t> prc_send;
openfpm::vector<size_t> sz_recv2;
openfpm::vector<size_t> sz_recv3;
openfpm::vector<openfpm::vector<size_t>> v1;
openfpm::vector<size_t> v2;
openfpm::vector<openfpm::vector<size_t>> v3;
v1.resize(vcl.getProcessingUnits());
size_t nc = vcl.getProcessingUnits() / SSCATTER_MAX;
size_t nr = vcl.getProcessingUnits() - nc * SSCATTER_MAX;
nr = ((nr-1) * nr) / 2;
size_t n_ele = nc * SSCATTER_MAX * (SSCATTER_MAX - 1) / 2 + nr;
for(size_t i = 0 ; i < v1.size() ; i++)
{
for (size_t j = 0 ; j < i % SSCATTER_MAX ; j++)
v1.get(i).add(j);
prc_send.add((i + vcl.getProcessUnitID()) % vcl.getProcessingUnits());
}
vcl.SSendRecv(v1,v2,prc_send,prc_recv2,sz_recv2);
vcl.SSendRecv(v1,v3,prc_send,prc_recv3,sz_recv3);
BOOST_REQUIRE_EQUAL(v2.size(),n_ele);
BOOST_REQUIRE_EQUAL(v3.size(),vcl.getProcessingUnits()-1-nc);
bool match = true;
size_t s = 0;
for (size_t i = 0 ; i < sz_recv2.size() ; i++)
{
for (size_t j = 0 ; j < sz_recv2.get(i) % SSCATTER_MAX ; j++)
{
match &= v2.get(s+j) == j;
}
s += sz_recv2.get(i) % SSCATTER_MAX;
}
BOOST_REQUIRE_EQUAL(match,true);
for (size_t i = 0 ; i < sz_recv3.size() ; i++)
{
for (size_t j = 0 ; j < sz_recv3.get(i) % SSCATTER_MAX ; j++)
{
match &= v3.get(i).get(j) == j;
}
}
BOOST_REQUIRE_EQUAL(match,true);
}
}
BOOST_AUTO_TEST_CASE (Vcluster_semantic_struct_sendrecv)
{
for (size_t i = 0 ; i < 100 ; i++)
{
Vcluster & vcl = create_vcluster();
if (vcl.getProcessingUnits() >= 32)
return;
openfpm::vector<size_t> prc_recv2;
openfpm::vector<size_t> prc_recv3;
openfpm::vector<size_t> prc_send;
openfpm::vector<size_t> sz_recv2;
openfpm::vector<size_t> sz_recv3;
openfpm::vector<openfpm::vector<Box<3,size_t>>> v1;
openfpm::vector<Box<3,size_t>> v2;
openfpm::vector<openfpm::vector<Box<3,size_t>>> v3;
v1.resize(vcl.getProcessingUnits());
size_t nc = vcl.getProcessingUnits() / SSCATTER_MAX;
size_t nr = vcl.getProcessingUnits() - nc * SSCATTER_MAX;
nr = ((nr-1) * nr) / 2;
size_t n_ele = nc * SSCATTER_MAX * (SSCATTER_MAX - 1) / 2 + nr;
for(size_t i = 0 ; i < v1.size() ; i++)
{
for (size_t j = 0 ; j < i % SSCATTER_MAX ; j++)
{
Box<3,size_t> b({j,j,j},{j,j,j});
v1.get(i).add(b);
}
prc_send.add((i + vcl.getProcessUnitID()) % vcl.getProcessingUnits());
}
vcl.SSendRecv(v1,v2,prc_send,prc_recv2,sz_recv2);
vcl.SSendRecv(v1,v3,prc_send,prc_recv3,sz_recv3);
BOOST_REQUIRE_EQUAL(v2.size(),n_ele);
BOOST_REQUIRE_EQUAL(v3.size(),vcl.getProcessingUnits()-1-nc);
bool match = true;
size_t s = 0;
for (size_t i = 0 ; i < sz_recv2.size() ; i++)
{
for (size_t j = 0 ; j < sz_recv2.get(i) % SSCATTER_MAX ; j++)
{
Box<3,size_t> b({j,j,j},{j,j,j});
Box<3,size_t> bt = v2.get(s+j);
match &= bt == b;
}
s += sz_recv2.get(i) % SSCATTER_MAX;
}
BOOST_REQUIRE_EQUAL(match,true);
for (size_t i = 0 ; i < sz_recv3.size() ; i++)
{
for (size_t j = 0 ; j < sz_recv3.get(i) % SSCATTER_MAX ; j++)
{
Box<3,size_t> b({j,j,j},{j,j,j});
Box<3,size_t> bt = v3.get(i).get(j);
match &= bt == b;
}
}
BOOST_REQUIRE_EQUAL(match,true);
}
}
BOOST_AUTO_TEST_SUITE_END()
#endif /* OPENFPM_VCLUSTER_SRC_VCLUSTER_SEMANTIC_UNIT_TESTS_HPP_ */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment