diff --git a/src/VCluster.hpp b/src/VCluster.hpp index a021e9b261dbbe7ee6ce74534a533e0ee086cb7c..bdad9bb258935f6ec12d5daf2d6d19dfc817ac29 100644 --- a/src/VCluster.hpp +++ b/src/VCluster.hpp @@ -18,6 +18,7 @@ #endif #include "util/Vcluster_log.hpp" #include "memory/BHeapMemory.hpp" +#include "Packer_Unpacker/has_max_prop.hpp" #ifdef HAVE_PETSC #include diff --git a/src/VCluster_semantic.ipp b/src/VCluster_semantic.ipp index a3d1ae39ae91b07b5cf18be65da5b48cf93215cf..c7ef471c9b8157971f860e427213f42d94da4979 100644 --- a/src/VCluster_semantic.ipp +++ b/src/VCluster_semantic.ipp @@ -9,6 +9,232 @@ private: + // Structures that do an unpack, depending on the existence of max_prop inside 'send' + + // + template + struct unpack_selector + { + template static void call_unpack(S & recv, openfpm::vector & recv_buf, openfpm::vector * sz = NULL) + { +#ifdef DEBUG + std::cout << "Sz.size(): " << sz->size() << std::endl; +#endif + for (size_t i = 0 ; i < recv_buf.size() ; i++) + { +#ifdef DEBUG + std::cout << "Recv_buf.get(i).size(): " << recv_buf.get(i).size() << std::endl; +#endif + T unp; + + ExtPreAlloc & mem = *(new ExtPreAlloc(recv_buf.get(i).size(),recv_buf.get(i))); + mem.incRef(); + + Unpack_stat ps; + + Unpacker::template unpack(mem, unp, ps); + + size_t recv_size_old = recv.size(); + // Merge the information + recv.add(unp); + size_t recv_size_new = recv.size(); + + if (sz != NULL) + sz->get(i) = recv_size_new - recv_size_old; + } + } + }; + + + // + template + struct unpack_selector + { + template static void call_unpack(S & recv, openfpm::vector & recv_buf, openfpm::vector * sz = NULL) + { + for (size_t i = 0 ; i < recv_buf.size() ; i++) + { + + /*ExtPreAlloc & mem = *(new ExtPreAlloc(recv_buf.get(i).size(),recv_buf.get(i))); + mem.incRef(); + + Unpack_stat ps; + + size_t n_ele = 0; + Unpacker::unpack(mem,n_ele,ps);*/ + + // calculate the number of received elements + size_t n_ele = recv_buf.get(i).size() / sizeof(typename T::value_type); + + // add the received particles to the vector + PtrMemory * ptr1 = new PtrMemory(recv_buf.get(i).getPointer(),recv_buf.get(i).size()); + + // create vector representation to a piece of memory already allocated + openfpm::vector::type, memory_traits_lin,openfpm::grow_policy_identity> v2; + + v2.setMemory(*ptr1); + + // resize with the number of elements + v2.resize(n_ele); + + // Merge the information + + size_t recv_size_old = recv.size(); + // Merge the information + recv.add(v2); + size_t recv_size_new = recv.size(); + + if (sz != NULL) + sz->get(i) = recv_size_new - recv_size_old; + } + } + }; + + + + + + template + struct call_serialize_variadic {}; + + template + struct call_serialize_variadic> + { + template inline static void call_pr(T & send, size_t & tot_size) + { + Packer::template packRequest(send,tot_size); + } + + template inline static void call_pack(ExtPreAlloc & mem, T & send, Pack_stat & sts) + { + Packer::template pack(mem,send,sts); + } + + template inline static void call_unpack(S & recv, openfpm::vector & recv_buf, openfpm::vector * sz = NULL) + { + const bool result = has_pack_gen::value == false && is_vector::value == true; + //const bool result = has_pack::type::value == false && has_pack_agg::result::value == false && is_vector::value == true; + unpack_selector::template call_unpack(recv, recv_buf, sz); + } + }; + + // Structures that do a pack request, depending on the existence of max_prop inside 'send' + + //There is max_prop inside + template + struct pack_unpack_cond + { + static void packingRequest(T & send, size_t & tot_size, openfpm::vector & sz) + { + typedef typename ::generate_indexes::value>::number, MetaFuncOrd>::result ind_prop_to_pack; + if (has_pack_gen::value == false && is_vector::value == true) + //if (has_pack::type::value == false && has_pack_agg::result::value == false && is_vector::value == true) + { +#ifdef DEBUG + std::cout << "Inside SGather pack request (has prp) (vector case) " << std::endl; +#endif + sz.add(send.size()*sizeof(typename T::value_type)); + } + else + { + call_serialize_variadic::call_pr(send,tot_size); +#ifdef DEBUG + std::cout << "Inside SGather pack request (has prp) (general case) " << std::endl; +#endif + sz.add(tot_size); + } + } + + static void packing(ExtPreAlloc & mem, T & send, Pack_stat & sts, openfpm::vector & send_buf) + { + typedef typename ::generate_indexes::value>::number, MetaFuncOrd>::result ind_prop_to_pack; + if (has_pack_gen::value == false && is_vector::value == true) + //if (has_pack::type::value == false && has_pack_agg::result::value == false && is_vector::value == true) + { +#ifdef DEBUG + std::cout << "Inside SGather pack (has prp) (vector case) " << std::endl; +#endif + //std::cout << demangle(typeid(T).name()) << std::endl; + send_buf.add(send.getPointer()); + } + else + { +#ifdef DEBUG + std::cout << "Inside SGather pack (has prp) (general case) " << std::endl; +#endif + send_buf.add(mem.getPointerEnd()); + call_serialize_variadic::call_pack(mem,send,sts); + } + } + + static void unpacking(S & recv, openfpm::vector & recv_buf, openfpm::vector * sz = NULL) + { + typedef typename ::generate_indexes::value>::number, MetaFuncOrd>::result ind_prop_to_pack; + call_serialize_variadic::template call_unpack(recv, recv_buf, sz); + } + }; + + + //There is no max_prop inside + template + struct pack_unpack_cond + { + static void packingRequest(T & send, size_t & tot_size, openfpm::vector & sz) + { + + if (has_pack::type::value == false && is_vector::value == true) + { +#ifdef DEBUG + std::cout << "Inside SGather pack request (no prp) (vector case) " << std::endl; +#endif + sz.add(send.size()*sizeof(typename T::value_type)); + } + + else + { + Packer::packRequest(send,tot_size); + +#ifdef DEBUG + std::cout << "Tot_size: " << tot_size << std::endl; +#endif + sz.add(tot_size); + } + } + + static void packing(ExtPreAlloc & mem, T & send, Pack_stat & sts, openfpm::vector & send_buf) + { + + if (has_pack::type::value == false && is_vector::value == true) + { +#ifdef DEBUG + std::cout << "Inside SGather pack (no prp) (vector case)" << std::endl; +#endif + send_buf.add(send.getPointer()); + } + + else + { +#ifdef DEBUG + std::cout << "Inside SGather pack (no prp) (genaral case) " << std::endl; +#endif + send_buf.add(mem.getPointerEnd()); + Packer::pack(mem,send,sts); + } + } + + static void unpacking(S & recv, openfpm::vector & recv_buf, openfpm::vector * sz = NULL) + { +#ifdef DEBUG + std::cout << "Inside SGather unpack (no prp) " << std::endl; +#endif + + const bool result = has_pack::type::value == false && is_vector::value == true; + + unpack_selector::template call_unpack<>(recv, recv_buf, sz); + } + }; + + /*! \brief Reset the receive buffer * * @@ -88,29 +314,7 @@ template void process_receive_buffer(S & recv, openfpm:: if (sz != NULL) sz->resize(recv_buf.size()); - for (size_t i = 0 ; i < recv_buf.size() ; i++) - { - // for each received buffer create a memory reppresentation - // calculate the number of received elements - size_t n_ele = recv_buf.get(i).size() / sizeof(typename T::value_type); - - // add the received particles to the vector - PtrMemory * ptr1 = new PtrMemory(recv_buf.get(i).getPointer(),recv_buf.get(i).size()); - - // create vector representation to a piece of memory already allocated - openfpm::vector::type, memory_traits_lin,openfpm::grow_policy_identity> v2; - - v2.setMemory(*ptr1); - - // resize with the number of elements - v2.resize(n_ele); - - // Merge the information - recv.add(v2); - - if (sz != NULL) - sz->get(i) = v2.size(); - } + pack_unpack_cond::value>::value, T, S>::unpacking(recv, recv_buf, sz); } public: @@ -150,6 +354,10 @@ template bool SGather(T & send, S & recv,size_t root) return SGather(send,recv,prc,sz,root); } +template struct MetaFuncOrd { + enum { value = index }; +}; + /*! \brief Semantic Gather, gather the data from all processors into one node * * Semantic communication differ from the normal one. They in general @@ -187,6 +395,9 @@ template bool SGather(T & send, S & recv, openfpm::vecto // If we are on master collect the information if (getProcessUnitID() == root) { +#ifdef DEBUG + std::cout << "Inside root " << root << std::endl; +#endif // send buffer (master does not send anything) so send req and send_buf // remain buffer with size 0 openfpm::vector send_req; @@ -197,10 +408,6 @@ template bool SGather(T & send, S & recv, openfpm::vecto // Send and recv multiple messages sendrecvMultipleMessagesNBX(send_req.size(),NULL,NULL,NULL,msg_alloc,&bi); - // Convert the received byte into number of elements - for (size_t i = 0 ; i < sz.size() ; i++) - sz.get(i) /= sizeof(typename T::value_type); - // process the received information process_receive_buffer(recv,&sz); @@ -210,14 +417,34 @@ template bool SGather(T & send, S & recv, openfpm::vecto } else { +#ifdef DEBUG + std::cout << "Inside slave " << getProcessUnitID() << std::endl; +#endif // send buffer (master does not send anything) so send req and send_buf // remain buffer with size 0 openfpm::vector send_prc; send_prc.add(root); - openfpm::vector send_buf; - send_buf.add(send.getPointer()); + openfpm::vector sz; - sz.add(send.size()*sizeof(typename T::value_type)); + + openfpm::vector send_buf; + + //Pack requesting + + size_t tot_size = 0; + + pack_unpack_cond::value>::value, T, S>::packingRequest(send, tot_size, sz); + + HeapMemory pmem; + + ExtPreAlloc & mem = *(new ExtPreAlloc(tot_size,pmem)); + mem.incRef(); + + //Packing + + Pack_stat sts; + + pack_unpack_cond::value>::value, T, S>::packing(mem, send, sts, send_buf); // receive information base_info bi(NULL,prc,sz); @@ -348,12 +575,31 @@ template bool SSendRecv(openfpm::vector & send, S & r openfpm::vector send_buf; openfpm::vector sz_byte; - sz_byte.resize(send.size()); - + + size_t tot_size = 0; + + for (size_t i = 0; i < send.size() ; i++) + { + size_t req = 0; + + //Pack requesting + pack_unpack_cond::value>::value, T, S>::packingRequest(send.get(i), req, sz_byte); + tot_size += req; + } + + HeapMemory pmem; + + ExtPreAlloc & mem = *(new ExtPreAlloc(tot_size,pmem)); + mem.incRef(); + for (size_t i = 0; i < send.size() ; i++) { - send_buf.add((char *)send.get(i).getPointer()); - sz_byte.get(i) = send.get(i).size() * sizeof(typename T::value_type); + //Packing + + Pack_stat sts; + + pack_unpack_cond::value>::value, T, S>::packing(mem, send.get(i), sts, send_buf); + } // receive information diff --git a/src/VCluster_semantic_unit_tests.hpp b/src/VCluster_semantic_unit_tests.hpp index 6ecd750b202b9947aad765daa079acc8615ded02..87fc8309f1e7c28f8b36b2f8236509f7124dcf43 100644 --- a/src/VCluster_semantic_unit_tests.hpp +++ b/src/VCluster_semantic_unit_tests.hpp @@ -1,364 +1,1378 @@ -/* - * VCluster_semantic_unit_test.hpp - * - * Created on: Feb 8, 2016 - * Author: i-bird - */ - -#ifndef OPENFPM_VCLUSTER_SRC_VCLUSTER_SEMANTIC_UNIT_TESTS_HPP_ -#define OPENFPM_VCLUSTER_SRC_VCLUSTER_SEMANTIC_UNIT_TESTS_HPP_ - -struct Aexample -{ - size_t a; - float b; - double c; -}; - - -BOOST_AUTO_TEST_SUITE( VCluster_semantic_test ) - -BOOST_AUTO_TEST_CASE (Vcluster_semantic_gather) -{ - for (size_t i = 0 ; i < 100 ; i++) - { - Vcluster & vcl = create_vcluster(); - - if (vcl.getProcessingUnits() >= 32) - return; - - openfpm::vector v1; - v1.resize(vcl.getProcessUnitID()); - - for(size_t i = 0 ; i < vcl.getProcessUnitID() ; i++) - v1.get(i) = 5; - - openfpm::vector v2; - - vcl.SGather(v1,v2,(i%vcl.getProcessingUnits())); - - if (vcl.getProcessUnitID() == (i%vcl.getProcessingUnits())) - { - size_t n = vcl.getProcessingUnits(); - BOOST_REQUIRE_EQUAL(v2.size(),n*(n-1)/2); - - bool is_five = true; - for (size_t i = 0 ; i < v2.size() ; i++) - is_five &= (v2.get(i) == 5); - - BOOST_REQUIRE_EQUAL(is_five,true); - } - } -} - - -BOOST_AUTO_TEST_CASE (Vcluster_semantic_struct_gather) -{ - for (size_t i = 0 ; i < 100 ; i++) - { - Vcluster & vcl = create_vcluster(); - - if (vcl.getProcessingUnits() >= 32) - return; - - openfpm::vector v1; - v1.resize(vcl.getProcessUnitID()); - - for(size_t i = 0 ; i < vcl.getProcessUnitID() ; i++) - { - v1.get(i).a = 5; - v1.get(i).b = 10.0; - v1.get(i).c = 11.0; - } - - openfpm::vector v2; - - vcl.SGather(v1,v2,(i%vcl.getProcessingUnits())); - - if (vcl.getProcessUnitID() == (i%vcl.getProcessingUnits())) - { - size_t n = vcl.getProcessingUnits(); - BOOST_REQUIRE_EQUAL(v2.size(),n*(n-1)/2); - - bool is_correct = true; - for (size_t i = 0 ; i < v2.size() ; i++) - { - is_correct &= (v2.get(i).a == 5); - is_correct &= (v2.get(i).b == 10.0); - is_correct &= (v2.get(i).c == 11.0); - } - - BOOST_REQUIRE_EQUAL(is_correct,true); - } - } -} - -#define SSCATTER_MAX 7 - -BOOST_AUTO_TEST_CASE (Vcluster_semantic_scatter) -{ - for (size_t i = 0 ; i < 100 ; i++) - { - Vcluster & vcl = create_vcluster(); - - if (vcl.getProcessingUnits() >= 32) - return; - - size_t nc = vcl.getProcessingUnits() / SSCATTER_MAX; - size_t nr = vcl.getProcessingUnits() - nc * SSCATTER_MAX; - nr = ((nr-1) * nr) / 2; - - size_t n_elements = nc * SSCATTER_MAX * (SSCATTER_MAX - 1) / 2 + nr; - - openfpm::vector v1; - v1.resize(n_elements); - - for(size_t i = 0 ; i < n_elements ; i++) - v1.get(i) = 5; - - openfpm::vector v2; - - openfpm::vector prc; - openfpm::vector sz; - - // Scatter pattern - for (size_t i = 0 ; i < vcl.getProcessingUnits() ; i++) - { - sz.add(i % SSCATTER_MAX); - prc.add(i); - } - - vcl.SScatter(v1,v2,prc,sz,(i%vcl.getProcessingUnits())); - - BOOST_REQUIRE_EQUAL(v2.size(),vcl.getProcessUnitID() % SSCATTER_MAX); - - bool is_five = true; - for (size_t i = 0 ; i < v2.size() ; i++) - is_five &= (v2.get(i) == 5); - - BOOST_REQUIRE_EQUAL(is_five,true); - } -} - - -BOOST_AUTO_TEST_CASE (Vcluster_semantic_struct_scatter) -{ - for (size_t i = 0 ; i < 100 ; i++) - { - Vcluster & vcl = create_vcluster(); - - if (vcl.getProcessingUnits() >= 32) - return; - - size_t nc = vcl.getProcessingUnits() / SSCATTER_MAX; - size_t nr = vcl.getProcessingUnits() - nc * SSCATTER_MAX; - nr = ((nr-1) * nr) / 2; - - size_t n_elements = nc * SSCATTER_MAX * (SSCATTER_MAX - 1) / 2 + nr; - - openfpm::vector v1; - v1.resize(n_elements); - - for(size_t i = 0 ; i < n_elements ; i++) - v1.get(i) = 5; - - openfpm::vector v2; - - openfpm::vector prc; - openfpm::vector sz; - - // Scatter pattern - for (size_t i = 0 ; i < vcl.getProcessingUnits() ; i++) - { - sz.add(i % SSCATTER_MAX); - prc.add(i); - } - - vcl.SScatter(v1,v2,prc,sz,(i%vcl.getProcessingUnits())); - - if (vcl.getProcessUnitID() == (i%vcl.getProcessingUnits())) - { - BOOST_REQUIRE_EQUAL(v2.size(),vcl.getProcessUnitID() % SSCATTER_MAX); - - bool is_five = true; - for (size_t i = 0 ; i < v2.size() ; i++) - is_five &= (v2.get(i) == 5); - - BOOST_REQUIRE_EQUAL(is_five,true); - } - } -} - - - -BOOST_AUTO_TEST_CASE (Vcluster_semantic_sendrecv) -{ - for (size_t i = 0 ; i < 100 ; i++) - { - Vcluster & vcl = create_vcluster(); - - if (vcl.getProcessingUnits() >= 32) - return; - - openfpm::vector prc_recv2; - openfpm::vector prc_recv3; - openfpm::vector prc_send; - openfpm::vector sz_recv2; - openfpm::vector sz_recv3; - openfpm::vector> v1; - openfpm::vector v2; - openfpm::vector> v3; - - v1.resize(vcl.getProcessingUnits()); - - size_t nc = vcl.getProcessingUnits() / SSCATTER_MAX; - size_t nr = vcl.getProcessingUnits() - nc * SSCATTER_MAX; - nr = ((nr-1) * nr) / 2; - - size_t n_ele = nc * SSCATTER_MAX * (SSCATTER_MAX - 1) / 2 + nr; - - for(size_t i = 0 ; i < v1.size() ; i++) - { - for (size_t j = 0 ; j < i % SSCATTER_MAX ; j++) - v1.get(i).add(j); - - prc_send.add((i + vcl.getProcessUnitID()) % vcl.getProcessingUnits()); - } - - vcl.SSendRecv(v1,v2,prc_send,prc_recv2,sz_recv2); - vcl.SSendRecv(v1,v3,prc_send,prc_recv3,sz_recv3); - - BOOST_REQUIRE_EQUAL(v2.size(),n_ele); - size_t nc_check = (vcl.getProcessingUnits() - 1) / SSCATTER_MAX; - BOOST_REQUIRE_EQUAL(v3.size(),vcl.getProcessingUnits()-1-nc_check); - - bool match = true; - size_t s = 0; - - for (size_t i = 0 ; i < sz_recv2.size() ; i++) - { - for (size_t j = 0 ; j < sz_recv2.get(i) % SSCATTER_MAX ; j++) - { - match &= v2.get(s+j) == j; - } - s += sz_recv2.get(i) % SSCATTER_MAX; - } - - BOOST_REQUIRE_EQUAL(match,true); - - for (size_t i = 0 ; i < sz_recv3.size() ; i++) - { - for (size_t j = 0 ; j < sz_recv3.get(i) % SSCATTER_MAX ; j++) - { - match &= v3.get(i).get(j) == j; - } - } - - BOOST_REQUIRE_EQUAL(match,true); - } -} - - -BOOST_AUTO_TEST_CASE (Vcluster_semantic_struct_sendrecv) -{ - for (size_t i = 0 ; i < 100 ; i++) - { - Vcluster & vcl = create_vcluster(); - - if (vcl.getProcessingUnits() >= 32) - return; - - openfpm::vector prc_recv2; - openfpm::vector prc_recv3; - openfpm::vector prc_send; - openfpm::vector sz_recv2; - openfpm::vector sz_recv3; - openfpm::vector>> v1; - openfpm::vector> v2; - openfpm::vector>> v3; - - v1.resize(vcl.getProcessingUnits()); - - size_t nc = vcl.getProcessingUnits() / SSCATTER_MAX; - size_t nr = vcl.getProcessingUnits() - nc * SSCATTER_MAX; - nr = ((nr-1) * nr) / 2; - - size_t n_ele = nc * SSCATTER_MAX * (SSCATTER_MAX - 1) / 2 + nr; - - for(size_t i = 0 ; i < v1.size() ; i++) - { - for (size_t j = 0 ; j < i % SSCATTER_MAX ; j++) - { - Box<3,size_t> b({j,j,j},{j,j,j}); - v1.get(i).add(b); - } - - prc_send.add((i + vcl.getProcessUnitID()) % vcl.getProcessingUnits()); - } - - vcl.SSendRecv(v1,v2,prc_send,prc_recv2,sz_recv2); - vcl.SSendRecv(v1,v3,prc_send,prc_recv3,sz_recv3); - - BOOST_REQUIRE_EQUAL(v2.size(),n_ele); - size_t nc_check = (vcl.getProcessingUnits() - 1) / SSCATTER_MAX; - BOOST_REQUIRE_EQUAL(v3.size(),vcl.getProcessingUnits()-1-nc_check); - - bool match = true; - size_t s = 0; - - for (size_t i = 0 ; i < sz_recv2.size() ; i++) - { - for (size_t j = 0 ; j < sz_recv2.get(i) % SSCATTER_MAX ; j++) - { - Box<3,size_t> b({j,j,j},{j,j,j}); - Box<3,size_t> bt = v2.get(s+j); - match &= bt == b; - } - s += sz_recv2.get(i) % SSCATTER_MAX; - } - - BOOST_REQUIRE_EQUAL(match,true); - - for (size_t i = 0 ; i < sz_recv3.size() ; i++) - { - for (size_t j = 0 ; j < sz_recv3.get(i) % SSCATTER_MAX ; j++) - { - Box<3,size_t> b({j,j,j},{j,j,j}); - Box<3,size_t> bt = v3.get(i).get(j); - match &= bt == b; - } - } - - BOOST_REQUIRE_EQUAL(match,true); - } - - // Send and receive 0 and check - - { - Vcluster & vcl = create_vcluster(); - - openfpm::vector prc_recv2; - openfpm::vector prc_send; - openfpm::vector sz_recv2; - openfpm::vector>> v1; - openfpm::vector> v2; - - v1.resize(vcl.getProcessingUnits()); - - - for(size_t i = 0 ; i < v1.size() ; i++) - { - prc_send.add((i + vcl.getProcessUnitID()) % vcl.getProcessingUnits()); - } - - vcl.SSendRecv(v1,v2,prc_send,prc_recv2,sz_recv2); - - BOOST_REQUIRE_EQUAL(v2.size(),0ul); - BOOST_REQUIRE_EQUAL(prc_recv2.size(),0ul); - BOOST_REQUIRE_EQUAL(sz_recv2.size(),0ul); - } -} - -BOOST_AUTO_TEST_SUITE_END() - -#endif /* OPENFPM_VCLUSTER_SRC_VCLUSTER_SEMANTIC_UNIT_TESTS_HPP_ */ +/* + * VCluster_semantic_unit_test.hpp + * + * Created on: Feb 8, 2016 + * Author: i-bird + */ + +#ifndef OPENFPM_VCLUSTER_SRC_VCLUSTER_SEMANTIC_UNIT_TESTS_HPP_ +#define OPENFPM_VCLUSTER_SRC_VCLUSTER_SEMANTIC_UNIT_TESTS_HPP_ + +#include "Grid/grid_util_test.hpp" +#include "data_type/aggregate.hpp" + +struct Aexample +{ + size_t a; + float b; + double c; +}; + + +BOOST_AUTO_TEST_SUITE( VCluster_semantic_test ) + +BOOST_AUTO_TEST_CASE (Vcluster_semantic_gather) +{ + for (size_t i = 0 ; i < 100 ; i++) + { + Vcluster & vcl = create_vcluster(); + + if (vcl.getProcessUnitID() == 0 && i == 0) + std::cout << "Semantic gather test start" << std::endl; + + if (vcl.getProcessingUnits() >= 32) + return; + + openfpm::vector v1; + v1.resize(vcl.getProcessUnitID()); + + for(size_t i = 0 ; i < vcl.getProcessUnitID() ; i++) + v1.get(i) = 5; + + openfpm::vector v2; + + vcl.SGather(v1,v2,(i%vcl.getProcessingUnits())); + + if (vcl.getProcessUnitID() == (i%vcl.getProcessingUnits())) + { + size_t n = vcl.getProcessingUnits(); + BOOST_REQUIRE_EQUAL(v2.size(),n*(n-1)/2); + + bool is_five = true; + for (size_t i = 0 ; i < v2.size() ; i++) + is_five &= (v2.get(i) == 5); + + BOOST_REQUIRE_EQUAL(is_five,true); + } + } +} + +BOOST_AUTO_TEST_CASE (Vcluster_semantic_gather_2) +{ + for (size_t i = 0 ; i < 100 ; i++) + { + Vcluster & vcl = create_vcluster(); + + if (vcl.getProcessingUnits() >= 32) + return; + + openfpm::vector v1; + v1.resize(vcl.getProcessUnitID()); + + for(size_t i = 0 ; i < vcl.getProcessUnitID() ; i++) + v1.get(i) = 5; + + openfpm::vector> v2; + + vcl.SGather(v1,v2,0); + + if (vcl.getProcessUnitID() == 0) + { + size_t n = vcl.getProcessingUnits(); + BOOST_REQUIRE_EQUAL(v2.size(),n); + + bool is_five = true; + for (size_t i = 0 ; i < v2.size() ; i++) + { + for (size_t j = 0 ; j < v2.get(i).size() ; j++) + is_five &= (v2.get(i).get(j) == 5); + } + BOOST_REQUIRE_EQUAL(is_five,true); + + } + + openfpm::vector> v3; + + vcl.SGather(v1,v3,1); + + if (vcl.getProcessUnitID() == 1) + { + size_t n = vcl.getProcessingUnits(); + BOOST_REQUIRE_EQUAL(v3.size(),n-1); + + bool is_five = true; + for (size_t i = 0 ; i < v3.size() ; i++) + { + for (size_t j = 0 ; j < v3.get(i).size() ; j++) + is_five &= (v3.get(i).get(j) == 5); + } + BOOST_REQUIRE_EQUAL(is_five,true); + + } + } +} + +BOOST_AUTO_TEST_CASE (Vcluster_semantic_gather_3) +{ + for (size_t i = 0 ; i < 100 ; i++) + { + Vcluster & vcl = create_vcluster(); + + if (vcl.getProcessingUnits() >= 32) + return; + + openfpm::vector, Point_test>> > v1; + + openfpm::vector, Point_test>> v1_int; + aggregate, Point_test> aggr; + openfpm::vector v1_int2; + + v1_int2.add((size_t)7); + v1_int2.add((size_t)7); + + aggr.template get<0>() = 7; + aggr.template get<1>() = v1_int2; + Point_test p; + p.fill(); + aggr.template get<2>() = p; + + v1_int.add(aggr); + v1_int.add(aggr); + v1_int.add(aggr); + + v1.add(v1_int); + v1.add(v1_int); + v1.add(v1_int); + v1.add(v1_int); + + openfpm::vector, Point_test>> > v2; + + vcl.SGather(v1,v2,0); + + if (vcl.getProcessUnitID() == 0) + { + size_t n = vcl.getProcessingUnits(); + + BOOST_REQUIRE_EQUAL(v2.size(),v1.size()*n); + + bool is_seven = true; + for (size_t i = 0 ; i < v2.size() ; i++) + { + for (size_t j = 0 ; j < v2.get(i).size() ; j++) + { + is_seven &= (v2.get(i).template get<0>(j) == 7); + + for (size_t k = 0; k < v2.get(i).template get<1>(j).size(); k++) + is_seven &= (v2.get(i).template get<1>(j).get(k) == 7); + + Point_test p = v2.get(i).template get<2>(j); + + BOOST_REQUIRE(p.template get<0>() == 1); + BOOST_REQUIRE(p.template get<1>() == 2); + BOOST_REQUIRE(p.template get<2>() == 3); + BOOST_REQUIRE(p.template get<3>() == 4); + + for (size_t l = 0 ; l < 3 ; l++) + p.template get<4>()[l] = 5; + + for (size_t m = 0 ; m < 3 ; m++) + { + for (size_t n = 0 ; n < 3 ; n++) + { + p.template get<5>()[m][n] = 6; + } + } + } + } + BOOST_REQUIRE_EQUAL(is_seven,true); + } + } +} + +BOOST_AUTO_TEST_CASE (Vcluster_semantic_gather_4) +{ + for (size_t i = 0 ; i < 100 ; i++) + { + Vcluster & vcl = create_vcluster(); + + if (vcl.getProcessingUnits() >= 32) + return; + + size_t sz[] = {16,16}; + + grid_cpu<2,Point_test> g1(sz); + g1.setMemory(); + fill_grid<2>(g1); + + openfpm::vector>> v2; + + vcl.SGather(g1,v2,0); + + typedef Point_test p; + + if (vcl.getProcessUnitID() == 0) + { + size_t n = vcl.getProcessingUnits(); + BOOST_REQUIRE_EQUAL(v2.size(),n); + + bool match = true; + for (size_t i = 0 ; i < v2.size() ; i++) + { + auto it = v2.get(i).getIterator(); + + while (it.isNext()) + { + grid_key_dx<2> key = it.get(); + + match &= (v2.get(i).template get(key) == g1.template get(key)); + match &= (v2.get(i).template get(key) == g1.template get(key)); + match &= (v2.get(i).template get(key) == g1.template get(key)); + match &= (v2.get(i).template get(key) == g1.template get(key)); + + match &= (v2.get(i).template get(key)[0] == g1.template get(key)[0]); + match &= (v2.get(i).template get(key)[1] == g1.template get(key)[1]); + match &= (v2.get(i).template get(key)[2] == g1.template get(key)[2]); + + match &= (v2.get(i).template get(key)[0][0] == g1.template get(key)[0][0]); + match &= (v2.get(i).template get(key)[0][1] == g1.template get(key)[0][1]); + match &= (v2.get(i).template get(key)[0][2] == g1.template get(key)[0][2]); + match &= (v2.get(i).template get(key)[1][0] == g1.template get(key)[1][0]); + match &= (v2.get(i).template get(key)[1][1] == g1.template get(key)[1][1]); + match &= (v2.get(i).template get(key)[1][2] == g1.template get(key)[1][2]); + match &= (v2.get(i).template get(key)[2][0] == g1.template get(key)[2][0]); + match &= (v2.get(i).template get(key)[2][1] == g1.template get(key)[2][1]); + match &= (v2.get(i).template get(key)[2][2] == g1.template get(key)[2][2]); + + ++it; + } + + } + BOOST_REQUIRE_EQUAL(match,true); + } + } +} + +BOOST_AUTO_TEST_CASE (Vcluster_semantic_gather_5) +{ + for (size_t i = 0 ; i < 100 ; i++) + { + Vcluster & vcl = create_vcluster(); + + if (vcl.getProcessingUnits() >= 32) + return; + + size_t sz[] = {16,16}; + grid_cpu<2,Point_test> g1(sz); + g1.setMemory(); + fill_grid<2>(g1); + openfpm::vector>> v1; + + v1.add(g1); + v1.add(g1); + v1.add(g1); + + openfpm::vector>> v2; + + vcl.SGather(v1,v2,1); + + typedef Point_test p; + + if (vcl.getProcessUnitID() == 1) + { + size_t n = vcl.getProcessingUnits(); + BOOST_REQUIRE_EQUAL(v2.size(),v1.size()*n); + + bool match = true; + for (size_t i = 0 ; i < v2.size() ; i++) + { + auto it = v2.get(i).getIterator(); + + while (it.isNext()) + { + grid_key_dx<2> key = it.get(); + + match &= (v2.get(i).template get(key) == g1.template get(key)); + match &= (v2.get(i).template get(key) == g1.template get(key)); + match &= (v2.get(i).template get(key) == g1.template get(key)); + match &= (v2.get(i).template get(key) == g1.template get(key)); + + match &= (v2.get(i).template get(key)[0] == g1.template get(key)[0]); + match &= (v2.get(i).template get(key)[1] == g1.template get(key)[1]); + match &= (v2.get(i).template get(key)[2] == g1.template get(key)[2]); + + match &= (v2.get(i).template get(key)[0][0] == g1.template get(key)[0][0]); + match &= (v2.get(i).template get(key)[0][1] == g1.template get(key)[0][1]); + match &= (v2.get(i).template get(key)[0][2] == g1.template get(key)[0][2]); + match &= (v2.get(i).template get(key)[1][0] == g1.template get(key)[1][0]); + match &= (v2.get(i).template get(key)[1][1] == g1.template get(key)[1][1]); + match &= (v2.get(i).template get(key)[1][2] == g1.template get(key)[1][2]); + match &= (v2.get(i).template get(key)[2][0] == g1.template get(key)[2][0]); + match &= (v2.get(i).template get(key)[2][1] == g1.template get(key)[2][1]); + match &= (v2.get(i).template get(key)[2][2] == g1.template get(key)[2][2]); + + ++it; + } + + } + BOOST_REQUIRE_EQUAL(match,true); + } + } +} + +BOOST_AUTO_TEST_CASE (Vcluster_semantic_gather_6) +{ + for (size_t i = 0 ; i < 100 ; i++) + { + Vcluster & vcl = create_vcluster(); + + if (vcl.getProcessingUnits() >= 32) + return; + + openfpm::vector>> v1; + openfpm::vector> v1_int; + openfpm::vector v1_int2; + + v1_int2.add((size_t)7); + v1_int2.add((size_t)7); + + v1_int.add(v1_int2); + v1_int.add(v1_int2); + v1_int.add(v1_int2); + + v1.add(v1_int); + v1.add(v1_int); + v1.add(v1_int); + v1.add(v1_int); + + openfpm::vector>> v2; + + vcl.SGather(v1,v2,0); + + if (vcl.getProcessUnitID() == 0) + { + size_t n = vcl.getProcessingUnits(); + + BOOST_REQUIRE_EQUAL(v2.size(),v1.size()*n); + + bool is_seven = true; + for (size_t i = 0 ; i < v2.size() ; i++) + { + for (size_t j = 0 ; j < v2.get(i).size() ; j++) + { + for (size_t k = 0 ; k < v2.get(i).get(j).size() ; k++) + is_seven &= (v2.get(i).get(j).get(k) == 7); + } + } + BOOST_REQUIRE_EQUAL(is_seven,true); + } + } +} + +BOOST_AUTO_TEST_CASE (Vcluster_semantic_gather_7) +{ + for (size_t i = 0 ; i < 100 ; i++) + { + Vcluster & vcl = create_vcluster(); + + if (vcl.getProcessingUnits() >= 32) + return; + + openfpm::vector> v1; + + Point_test p1; + p1.fill(); + + v1.resize(vcl.getProcessUnitID()); + + for(size_t i = 0 ; i < vcl.getProcessUnitID() ; i++) + v1.get(i) = p1; + + openfpm::vector>> v2; + + vcl.SGather(v1,v2,0); + + typedef Point_test p; + + if (vcl.getProcessUnitID() == 0) + { + size_t n = vcl.getProcessingUnits(); + BOOST_REQUIRE_EQUAL(v2.size(),n); + + bool match = true; + + for (size_t i = 0 ; i < v2.size() ; i++) + { + for (size_t j = 0 ; j < v2.get(i).size() ; j++) + { + Point_test p2 = v2.get(i).get(j); + //BOOST_REQUIRE(p2 == p1); + + match &= (p2.template get() == p1.template get()); + match &= (p2.template get() == p1.template get()); + match &= (p2.template get() == p1.template get()); + match &= (p2.template get() == p1.template get()); + + match &= (p2.template get()[0] == p1.template get()[0]); + match &= (p2.template get()[1] == p1.template get()[1]); + match &= (p2.template get()[2] == p1.template get()[2]); + + match &= (p2.template get()[0][0] == p1.template get()[0][0]); + match &= (p2.template get()[0][1] == p1.template get()[0][1]); + match &= (p2.template get()[0][2] == p1.template get()[0][2]); + match &= (p2.template get()[1][0] == p1.template get()[1][0]); + match &= (p2.template get()[1][1] == p1.template get()[1][1]); + match &= (p2.template get()[1][2] == p1.template get()[1][2]); + match &= (p2.template get()[2][0] == p1.template get()[2][0]); + match &= (p2.template get()[2][1] == p1.template get()[2][1]); + match &= (p2.template get()[2][2] == p1.template get()[2][2]); + } + } + BOOST_REQUIRE_EQUAL(match,true); + } + } +} + +BOOST_AUTO_TEST_CASE (Vcluster_semantic_gather_8) +{ + for (size_t i = 0 ; i < 100 ; i++) + { + Vcluster & vcl = create_vcluster(); + + if (vcl.getProcessingUnits() >= 32) + return; + + openfpm::vector> v1; + + Box<3,size_t> bx; + bx.setLow(0, 1); + bx.setLow(1, 2); + bx.setLow(2, 3); + bx.setHigh(0, 4); + bx.setHigh(1, 5); + bx.setHigh(2, 6); + + + v1.resize(vcl.getProcessUnitID()); + + for(size_t i = 0 ; i < vcl.getProcessUnitID() ; i++) + v1.get(i) = bx; + + openfpm::vector>> v2; + + vcl.SGather(v1,v2,0); + + if (vcl.getProcessUnitID() == 0) + { + size_t n = vcl.getProcessingUnits(); + BOOST_REQUIRE_EQUAL(v2.size(),n); + + for (size_t i = 0 ; i < v2.size() ; i++) + { + for (size_t j = 0 ; j < v2.get(i).size() ; j++) + { + Box<3,size_t> b2 = v2.get(i).get(j); + BOOST_REQUIRE(bx == b2); + } + } + } + } +} + +BOOST_AUTO_TEST_CASE (Vcluster_semantic_struct_gather) +{ + for (size_t i = 0 ; i < 100 ; i++) + { + Vcluster & vcl = create_vcluster(); + + if (vcl.getProcessingUnits() >= 32) + return; + + openfpm::vector v1; + v1.resize(vcl.getProcessUnitID()); + + for(size_t i = 0 ; i < vcl.getProcessUnitID() ; i++) + { + v1.get(i).a = 5; + v1.get(i).b = 10.0; + v1.get(i).c = 11.0; + } + + openfpm::vector v2; + + vcl.SGather(v1,v2,(i%vcl.getProcessingUnits())); + + if (vcl.getProcessUnitID() == (i%vcl.getProcessingUnits())) + { + size_t n = vcl.getProcessingUnits(); + BOOST_REQUIRE_EQUAL(v2.size(),n*(n-1)/2); + + bool is_correct = true; + for (size_t i = 0 ; i < v2.size() ; i++) + { + is_correct &= (v2.get(i).a == 5); + is_correct &= (v2.get(i).b == 10.0); + is_correct &= (v2.get(i).c == 11.0); + } + + BOOST_REQUIRE_EQUAL(is_correct,true); + } + if (vcl.getProcessUnitID() == 0 && i == 99) + std::cout << "Semantic gather test stop" << std::endl; + } +} + +#define SSCATTER_MAX 7 + +BOOST_AUTO_TEST_CASE (Vcluster_semantic_scatter) +{ + for (size_t i = 0 ; i < 100 ; i++) + { + Vcluster & vcl = create_vcluster(); + + if (vcl.getProcessingUnits() >= 32) + return; + + size_t nc = vcl.getProcessingUnits() / SSCATTER_MAX; + size_t nr = vcl.getProcessingUnits() - nc * SSCATTER_MAX; + nr = ((nr-1) * nr) / 2; + + size_t n_elements = nc * SSCATTER_MAX * (SSCATTER_MAX - 1) / 2 + nr; + + openfpm::vector v1; + v1.resize(n_elements); + + for(size_t i = 0 ; i < n_elements ; i++) + v1.get(i) = 5; + + openfpm::vector v2; + + openfpm::vector prc; + openfpm::vector sz; + + // Scatter pattern + for (size_t i = 0 ; i < vcl.getProcessingUnits() ; i++) + { + sz.add(i % SSCATTER_MAX); + prc.add(i); + } + + vcl.SScatter(v1,v2,prc,sz,(i%vcl.getProcessingUnits())); + + BOOST_REQUIRE_EQUAL(v2.size(),vcl.getProcessUnitID() % SSCATTER_MAX); + + bool is_five = true; + for (size_t i = 0 ; i < v2.size() ; i++) + is_five &= (v2.get(i) == 5); + + BOOST_REQUIRE_EQUAL(is_five,true); + } +} + + +BOOST_AUTO_TEST_CASE (Vcluster_semantic_struct_scatter) +{ + for (size_t i = 0 ; i < 100 ; i++) + { + Vcluster & vcl = create_vcluster(); + + if (vcl.getProcessingUnits() >= 32) + return; + + size_t nc = vcl.getProcessingUnits() / SSCATTER_MAX; + size_t nr = vcl.getProcessingUnits() - nc * SSCATTER_MAX; + nr = ((nr-1) * nr) / 2; + + size_t n_elements = nc * SSCATTER_MAX * (SSCATTER_MAX - 1) / 2 + nr; + + openfpm::vector v1; + v1.resize(n_elements); + + for(size_t i = 0 ; i < n_elements ; i++) + v1.get(i) = 5; + + openfpm::vector v2; + + openfpm::vector prc; + openfpm::vector sz; + + // Scatter pattern + for (size_t i = 0 ; i < vcl.getProcessingUnits() ; i++) + { + sz.add(i % SSCATTER_MAX); + prc.add(i); + } + + vcl.SScatter(v1,v2,prc,sz,(i%vcl.getProcessingUnits())); + + if (vcl.getProcessUnitID() == (i%vcl.getProcessingUnits())) + { + BOOST_REQUIRE_EQUAL(v2.size(),vcl.getProcessUnitID() % SSCATTER_MAX); + + bool is_five = true; + for (size_t i = 0 ; i < v2.size() ; i++) + is_five &= (v2.get(i) == 5); + + BOOST_REQUIRE_EQUAL(is_five,true); + } + } +} + + + +BOOST_AUTO_TEST_CASE (Vcluster_semantic_sendrecv) +{ + for (size_t i = 0 ; i < 100 ; i++) + { + Vcluster & vcl = create_vcluster(); + + if (vcl.getProcessUnitID() == 0 && i == 0) + std::cout << "Semantic sendrecv test start" << std::endl; + + + if (vcl.getProcessingUnits() >= 32) + return; + + openfpm::vector prc_recv2; + openfpm::vector prc_recv3; + openfpm::vector prc_send; + openfpm::vector sz_recv2; + openfpm::vector sz_recv3; + openfpm::vector> v1; + openfpm::vector v2; + openfpm::vector> v3; + + v1.resize(vcl.getProcessingUnits()); + + size_t nc = vcl.getProcessingUnits() / SSCATTER_MAX; + size_t nr = vcl.getProcessingUnits() - nc * SSCATTER_MAX; + nr = ((nr-1) * nr) / 2; + + size_t n_ele = nc * SSCATTER_MAX * (SSCATTER_MAX - 1) / 2 + nr; + + for(size_t i = 0 ; i < v1.size() ; i++) + { + for (size_t j = 0 ; j < i % SSCATTER_MAX ; j++) + v1.get(i).add(j); + + prc_send.add((i + vcl.getProcessUnitID()) % vcl.getProcessingUnits()); + } + + vcl.SSendRecv(v1,v2,prc_send,prc_recv2,sz_recv2); + vcl.SSendRecv(v1,v3,prc_send,prc_recv3,sz_recv3); + + BOOST_REQUIRE_EQUAL(v2.size(),n_ele); + size_t nc_check = (vcl.getProcessingUnits()-1) / SSCATTER_MAX; + BOOST_REQUIRE_EQUAL(v3.size(),vcl.getProcessingUnits()-1-nc_check); + + bool match = true; + size_t s = 0; + + for (size_t i = 0 ; i < sz_recv2.size() ; i++) + { + for (size_t j = 0 ; j < sz_recv2.get(i); j++) + { + match &= v2.get(s+j) == j; + } + s += sz_recv2.get(i); + } + + BOOST_REQUIRE_EQUAL(match,true); + + for (size_t i = 0 ; i < v3.size() ; i++) + { + for (size_t j = 0 ; j < v3.get(i).size() ; j++) + { + match &= v3.get(i).get(j) == j; + } + } + + BOOST_REQUIRE_EQUAL(match,true); + } +} + + +BOOST_AUTO_TEST_CASE (Vcluster_semantic_struct_sendrecv) +{ + for (size_t i = 0 ; i < 100 ; i++) + { + Vcluster & vcl = create_vcluster(); + + if (vcl.getProcessingUnits() >= 32) + return; + + openfpm::vector prc_recv2; + openfpm::vector prc_recv3; + openfpm::vector prc_send; + openfpm::vector sz_recv2; + openfpm::vector sz_recv3; + openfpm::vector>> v1; + openfpm::vector> v2; + openfpm::vector>> v3; + + v1.resize(vcl.getProcessingUnits()); + + size_t nc = vcl.getProcessingUnits() / SSCATTER_MAX; + size_t nr = vcl.getProcessingUnits() - nc * SSCATTER_MAX; + nr = ((nr-1) * nr) / 2; + + size_t n_ele = nc * SSCATTER_MAX * (SSCATTER_MAX - 1) / 2 + nr; + + for(size_t i = 0 ; i < v1.size() ; i++) + { + for (size_t j = 0 ; j < i % SSCATTER_MAX ; j++) + { + Box<3,size_t> b({j,j,j},{j,j,j}); + v1.get(i).add(b); + } + + prc_send.add((i + vcl.getProcessUnitID()) % vcl.getProcessingUnits()); + } + + vcl.SSendRecv(v1,v2,prc_send,prc_recv2,sz_recv2); + vcl.SSendRecv(v1,v3,prc_send,prc_recv3,sz_recv3); + + BOOST_REQUIRE_EQUAL(v2.size(),n_ele); + size_t nc_check = (vcl.getProcessingUnits()-1) / SSCATTER_MAX; + BOOST_REQUIRE_EQUAL(v3.size(),vcl.getProcessingUnits()-1-nc_check); + + bool match = true; + size_t s = 0; + + for (size_t i = 0 ; i < sz_recv2.size() ; i++) + { + for (size_t j = 0 ; j < sz_recv2.get(i); j++) + { + Box<3,size_t> b({j,j,j},{j,j,j}); + Box<3,size_t> bt = v2.get(s+j); + match &= bt == b; + } + s += sz_recv2.get(i); + } + + BOOST_REQUIRE_EQUAL(match,true); + + for (size_t i = 0 ; i < v3.size() ; i++) + { + for (size_t j = 0 ; j < v3.get(i).size() ; j++) + { + Box<3,size_t> b({j,j,j},{j,j,j}); + Box<3,size_t> bt = v3.get(i).get(j); + match &= bt == b; + } + } + + BOOST_REQUIRE_EQUAL(match,true); + } + + // Send and receive 0 and check + + { + Vcluster & vcl = create_vcluster(); + + openfpm::vector prc_recv2; + openfpm::vector prc_send; + openfpm::vector sz_recv2; + openfpm::vector>> v1; + openfpm::vector> v2; + + v1.resize(vcl.getProcessingUnits()); + + + for(size_t i = 0 ; i < v1.size() ; i++) + { + prc_send.add((i + vcl.getProcessUnitID()) % vcl.getProcessingUnits()); + } + + vcl.SSendRecv(v1,v2,prc_send,prc_recv2,sz_recv2); + + BOOST_REQUIRE_EQUAL(v2.size(),0ul); + BOOST_REQUIRE_EQUAL(prc_recv2.size(),0ul); + BOOST_REQUIRE_EQUAL(sz_recv2.size(),0ul); + } +} + +BOOST_AUTO_TEST_CASE (Vcluster_semantic_sendrecv_2) +{ + for (size_t i = 0 ; i < 100 ; i++) + { + Vcluster & vcl = create_vcluster(); + + if (vcl.getProcessingUnits() >= 32) + return; + + openfpm::vector prc_recv2; + openfpm::vector prc_recv3; + openfpm::vector prc_send; + openfpm::vector sz_recv2; + openfpm::vector sz_recv3; + + openfpm::vector>> > v1; + openfpm::vector>> v2; + openfpm::vector>> > v3; + + openfpm::vector>> v1_int; + aggregate> aggr; + openfpm::vector v1_int2; + + v1_int2.add(7); + v1_int2.add(7); + v1_int2.add(7); + + aggr.template get<0>() = v1_int2; + + v1_int.add(aggr); + v1_int.add(aggr); + v1_int.add(aggr); + + v1.resize(vcl.getProcessingUnits()); + + for(size_t i = 0 ; i < v1.size() ; i++) + { + for (size_t j = 0 ; j < i % SSCATTER_MAX ; j++) + { + v1.get(i).add(aggr); + } + + prc_send.add((i + vcl.getProcessUnitID()) % vcl.getProcessingUnits()); + } + + size_t nc = vcl.getProcessingUnits() / SSCATTER_MAX; + size_t nr = vcl.getProcessingUnits() - nc * SSCATTER_MAX; + nr = ((nr-1) * nr) / 2; + + size_t n_ele = nc * SSCATTER_MAX * (SSCATTER_MAX - 1) / 2 + nr; + + vcl.SSendRecv(v1,v2,prc_send,prc_recv2,sz_recv2); + + vcl.SSendRecv(v1,v3,prc_send,prc_recv3,sz_recv3); + + BOOST_REQUIRE_EQUAL(v2.size(),n_ele); + + BOOST_REQUIRE_EQUAL(v3.size(),vcl.getProcessingUnits()); + + bool match = true; + bool is_seven = true; + size_t s = 0; + + for (size_t i = 0 ; i < sz_recv2.size() ; i++) + { + for (size_t j = 0 ; j < sz_recv2.get(i); j++) + { + for (size_t k = 0; k < v2.get(s+j).template get<0>().size(); k++) + is_seven &= (v2.get(s+j).template get<0>().get(k) == 7); + } + s += sz_recv2.get(i); + } + + BOOST_REQUIRE_EQUAL(is_seven,true); + BOOST_REQUIRE_EQUAL(match,true); + + for (size_t i = 0 ; i < v3.size() ; i++) + { + for (size_t j = 0 ; j < v3.get(i).size(); j++) + { + for (size_t k = 0; k < v3.get(i).template get<0>(j).size(); k++) + is_seven &= (v3.get(i).template get<0>(j).get(k) == 7); + } + } + + BOOST_REQUIRE_EQUAL(is_seven,true); + BOOST_REQUIRE_EQUAL(match,true); + } +} + +BOOST_AUTO_TEST_CASE (Vcluster_semantic_sendrecv_3) +{ + for (size_t i = 0 ; i < 100 ; i++) + { + Vcluster & vcl = create_vcluster(); + + if (vcl.getProcessingUnits() >= 32) + return; + + openfpm::vector prc_recv2; + openfpm::vector prc_recv3; + openfpm::vector prc_send; + openfpm::vector sz_recv2; + openfpm::vector sz_recv3; + + openfpm::vector, Point_test>> > v1; + openfpm::vector, Point_test>> v2; + openfpm::vector, Point_test>> > v3; + + openfpm::vector, Point_test>> v1_int; + aggregate, Point_test> aggr; + openfpm::vector v1_int2; + + v1_int2.add((size_t)7); + v1_int2.add((size_t)7); + + aggr.template get<0>() = 7; + aggr.template get<1>() = v1_int2; + + typedef Point_test p; + p p1; + p1.fill(); + aggr.template get<2>() = p1; + + v1_int.add(aggr); + v1_int.add(aggr); + v1_int.add(aggr); + + v1.resize(vcl.getProcessingUnits()); + + for(size_t i = 0 ; i < v1.size() ; i++) + { + for (size_t j = 0 ; j < i % SSCATTER_MAX ; j++) + { + v1.get(i).add(aggr); + } + + prc_send.add((i + vcl.getProcessUnitID()) % vcl.getProcessingUnits()); + } + + size_t nc = vcl.getProcessingUnits() / SSCATTER_MAX; + size_t nr = vcl.getProcessingUnits() - nc * SSCATTER_MAX; + nr = ((nr-1) * nr) / 2; + + size_t n_ele = nc * SSCATTER_MAX * (SSCATTER_MAX - 1) / 2 + nr; + + vcl.SSendRecv(v1,v2,prc_send,prc_recv2,sz_recv2); + + vcl.SSendRecv(v1,v3,prc_send,prc_recv3,sz_recv3); + + BOOST_REQUIRE_EQUAL(v2.size(),n_ele); + + BOOST_REQUIRE_EQUAL(v3.size(),vcl.getProcessingUnits()); + + bool match = true; + bool is_seven = true; + size_t s = 0; + + for (size_t i = 0 ; i < sz_recv2.size() ; i++) + { + for (size_t j = 0 ; j < sz_recv2.get(i); j++) + { + is_seven &= (v2.get(s+j).template get<0>() == 7); + + for (size_t k = 0; k < v2.get(s+j).template get<1>().size(); k++) + is_seven &= (v2.get(s+j).template get<1>().get(k) == 7); + + Point_test p2 = v2.get(s+j).template get<2>(); + + match &= (p2.template get() == p1.template get()); + match &= (p2.template get() == p1.template get()); + match &= (p2.template get() == p1.template get()); + match &= (p2.template get() == p1.template get()); + + match &= (p2.template get()[0] == p1.template get()[0]); + match &= (p2.template get()[1] == p1.template get()[1]); + match &= (p2.template get()[2] == p1.template get()[2]); + + match &= (p2.template get()[0][0] == p1.template get()[0][0]); + match &= (p2.template get()[0][1] == p1.template get()[0][1]); + match &= (p2.template get()[0][2] == p1.template get()[0][2]); + match &= (p2.template get()[1][0] == p1.template get()[1][0]); + match &= (p2.template get()[1][1] == p1.template get()[1][1]); + match &= (p2.template get()[1][2] == p1.template get()[1][2]); + match &= (p2.template get()[2][0] == p1.template get()[2][0]); + match &= (p2.template get()[2][1] == p1.template get()[2][1]); + match &= (p2.template get()[2][2] == p1.template get()[2][2]); + } + s += sz_recv2.get(i); + } + + BOOST_REQUIRE_EQUAL(is_seven,true); + BOOST_REQUIRE_EQUAL(match,true); + + for (size_t i = 0 ; i < v3.size() ; i++) + { + for (size_t j = 0 ; j < v3.get(i).size(); j++) + { + is_seven &= (v3.get(i).get(j).template get<0>() == 7); + + for (size_t k = 0; k < v3.get(i).get(j).template get<1>().size(); k++) + is_seven &= (v3.get(i).get(j).template get<1>().get(k) == 7); + + Point_test p2 = v3.get(i).get(j).template get<2>(); + + match &= (p2.template get() == p1.template get()); + match &= (p2.template get() == p1.template get()); + match &= (p2.template get() == p1.template get()); + match &= (p2.template get() == p1.template get()); + + match &= (p2.template get()[0] == p1.template get()[0]); + match &= (p2.template get()[1] == p1.template get()[1]); + match &= (p2.template get()[2] == p1.template get()[2]); + + match &= (p2.template get()[0][0] == p1.template get()[0][0]); + match &= (p2.template get()[0][1] == p1.template get()[0][1]); + match &= (p2.template get()[0][2] == p1.template get()[0][2]); + match &= (p2.template get()[1][0] == p1.template get()[1][0]); + match &= (p2.template get()[1][1] == p1.template get()[1][1]); + match &= (p2.template get()[1][2] == p1.template get()[1][2]); + match &= (p2.template get()[2][0] == p1.template get()[2][0]); + match &= (p2.template get()[2][1] == p1.template get()[2][1]); + match &= (p2.template get()[2][2] == p1.template get()[2][2]); + } + } + + BOOST_REQUIRE_EQUAL(is_seven,true); + BOOST_REQUIRE_EQUAL(match,true); + } +} + +BOOST_AUTO_TEST_CASE (Vcluster_semantic_sendrecv_4) +{ + for (size_t i = 0 ; i < 100 ; i++) + { + Vcluster & vcl = create_vcluster(); + + if (vcl.getProcessingUnits() >= 32) + return; + + openfpm::vector prc_recv2; + openfpm::vector prc_recv3; + openfpm::vector prc_send; + openfpm::vector sz_recv2; + openfpm::vector sz_recv3; + openfpm::vector>> > v1; + openfpm::vector> > v2; + openfpm::vector>> > v3; + + v1.resize(vcl.getProcessingUnits()); + + size_t nc = vcl.getProcessingUnits() / SSCATTER_MAX; + size_t nr = vcl.getProcessingUnits() - nc * SSCATTER_MAX; + nr = ((nr-1) * nr) / 2; + + size_t n_ele = nc * SSCATTER_MAX * (SSCATTER_MAX - 1) / 2 + nr; + + //Prepare an aggregate + aggregate > aggr; + + typedef Point_test p; + + p p1; + p1.fill(); + + aggr.template get<0>() = 7; + aggr.template get<1>() = p1; + + //Fill v1 with aggregates + for(size_t i = 0 ; i < v1.size() ; i++) + { + for (size_t j = 0 ; j < i % SSCATTER_MAX ; j++) + { + v1.get(i).add(aggr); + } + + prc_send.add((i + vcl.getProcessUnitID()) % vcl.getProcessingUnits()); + } + + vcl.SSendRecv(v1,v2,prc_send,prc_recv2,sz_recv2); + vcl.SSendRecv(v1,v3,prc_send,prc_recv3,sz_recv3); + + BOOST_REQUIRE_EQUAL(v2.size(),n_ele); + size_t nc_check = (vcl.getProcessingUnits()-1) / SSCATTER_MAX; + BOOST_REQUIRE_EQUAL(v3.size(),vcl.getProcessingUnits()-1-nc_check); + bool match = true; + bool is_seven = true; + size_t s = 0; + + for (size_t i = 0 ; i < sz_recv2.size() ; i++) + { + for (size_t j = 0 ; j < sz_recv2.get(i); j++) + { + is_seven &= (v2.get(s+j).template get<0>() == 7); + + Point_test p2 = v2.get(s+j).template get<1>(); + + match &= (p2.template get() == p1.template get()); + match &= (p2.template get() == p1.template get()); + match &= (p2.template get() == p1.template get()); + match &= (p2.template get() == p1.template get()); + + match &= (p2.template get()[0] == p1.template get()[0]); + match &= (p2.template get()[1] == p1.template get()[1]); + match &= (p2.template get()[2] == p1.template get()[2]); + + match &= (p2.template get()[0][0] == p1.template get()[0][0]); + match &= (p2.template get()[0][1] == p1.template get()[0][1]); + match &= (p2.template get()[0][2] == p1.template get()[0][2]); + match &= (p2.template get()[1][0] == p1.template get()[1][0]); + match &= (p2.template get()[1][1] == p1.template get()[1][1]); + match &= (p2.template get()[1][2] == p1.template get()[1][2]); + match &= (p2.template get()[2][0] == p1.template get()[2][0]); + match &= (p2.template get()[2][1] == p1.template get()[2][1]); + match &= (p2.template get()[2][2] == p1.template get()[2][2]); + } + s += sz_recv2.get(i); + } + + BOOST_REQUIRE_EQUAL(is_seven,true); + BOOST_REQUIRE_EQUAL(match,true); + + for (size_t i = 0 ; i < v3.size() ; i++) + { + for (size_t j = 0 ; j < v3.get(i).size() ; j++) + { + is_seven &= (v3.get(i).get(j).template get<0>() == 7); + + Point_test p2 = v3.get(i).get(j).template get<1>(); + + match &= (p2.template get() == p1.template get()); + match &= (p2.template get() == p1.template get()); + match &= (p2.template get() == p1.template get()); + match &= (p2.template get() == p1.template get()); + + match &= (p2.template get()[0] == p1.template get()[0]); + match &= (p2.template get()[1] == p1.template get()[1]); + match &= (p2.template get()[2] == p1.template get()[2]); + + match &= (p2.template get()[0][0] == p1.template get()[0][0]); + match &= (p2.template get()[0][1] == p1.template get()[0][1]); + match &= (p2.template get()[0][2] == p1.template get()[0][2]); + match &= (p2.template get()[1][0] == p1.template get()[1][0]); + match &= (p2.template get()[1][1] == p1.template get()[1][1]); + match &= (p2.template get()[1][2] == p1.template get()[1][2]); + match &= (p2.template get()[2][0] == p1.template get()[2][0]); + match &= (p2.template get()[2][1] == p1.template get()[2][1]); + match &= (p2.template get()[2][2] == p1.template get()[2][2]); + } + } + + BOOST_REQUIRE_EQUAL(is_seven,true); + BOOST_REQUIRE_EQUAL(match,true); + } +} + +BOOST_AUTO_TEST_CASE (Vcluster_semantic_sendrecv_5) +{ + for (size_t i = 0 ; i < 100 ; i++) + { + Vcluster & vcl = create_vcluster(); + + if (vcl.getProcessingUnits() >= 32) + return; + + openfpm::vector prc_recv2; + openfpm::vector prc_recv3; + openfpm::vector prc_send; + openfpm::vector sz_recv2; + openfpm::vector sz_recv3; + + size_t sz[] = {16,16}; + + grid_cpu<2,Point_test> g1(sz); + g1.setMemory(); + fill_grid<2>(g1); + + aggregate>> aggr; + aggr.template get<0>() = g1; + + + openfpm::vector>>> > v1; + openfpm::vector>> > v2; + openfpm::vector>>> > v3; + + v1.resize(vcl.getProcessingUnits()); + + for(size_t i = 0 ; i < v1.size() ; i++) + { + for (size_t j = 0 ; j < i % SSCATTER_MAX ; j++) + { + v1.get(i).add(aggr); + } + + prc_send.add((i + vcl.getProcessUnitID()) % vcl.getProcessingUnits()); + } + + size_t nc = vcl.getProcessingUnits() / SSCATTER_MAX; + size_t nr = vcl.getProcessingUnits() - nc * SSCATTER_MAX; + nr = ((nr-1) * nr) / 2; + + size_t n_ele = nc * SSCATTER_MAX * (SSCATTER_MAX - 1) / 2 + nr; + + vcl.SSendRecv(v1,v2,prc_send,prc_recv2,sz_recv2); + + vcl.SSendRecv(v1,v3,prc_send,prc_recv3,sz_recv3); + + BOOST_REQUIRE_EQUAL(v2.size(),n_ele); + + BOOST_REQUIRE_EQUAL(v3.size(),vcl.getProcessingUnits()); + + bool match = true; + size_t s = 0; + typedef Point_test p; + + for (size_t i = 0 ; i < sz_recv2.size() ; i++) + { + for (size_t j = 0 ; j < sz_recv2.get(i); j++) + { + grid_cpu<2,Point_test> g2 = v2.get(s+j).template get<0>(); + + auto it = g2.getIterator(); + + while (it.isNext()) + { + grid_key_dx<2> key = it.get(); + + match &= (g2.template get(key) == g1.template get(key)); + match &= (g2.template get(key) == g1.template get(key)); + match &= (g2.template get(key) == g1.template get(key)); + match &= (g2.template get(key) == g1.template get(key)); + + match &= (g2.template get(key)[0] == g1.template get(key)[0]); + match &= (g2.template get(key)[1] == g1.template get(key)[1]); + match &= (g2.template get(key)[2] == g1.template get(key)[2]); + + match &= (g2.template get(key)[0][0] == g1.template get(key)[0][0]); + match &= (g2.template get(key)[0][1] == g1.template get(key)[0][1]); + match &= (g2.template get(key)[0][2] == g1.template get(key)[0][2]); + match &= (g2.template get(key)[1][0] == g1.template get(key)[1][0]); + match &= (g2.template get(key)[1][1] == g1.template get(key)[1][1]); + match &= (g2.template get(key)[1][2] == g1.template get(key)[1][2]); + match &= (g2.template get(key)[2][0] == g1.template get(key)[2][0]); + match &= (g2.template get(key)[2][1] == g1.template get(key)[2][1]); + match &= (g2.template get(key)[2][2] == g1.template get(key)[2][2]); + + ++it; + } + } + s += sz_recv2.get(i); + } + BOOST_REQUIRE_EQUAL(match,true); + + for (size_t i = 0 ; i < v3.size() ; i++) + { + for (size_t j = 0 ; j < v3.get(i).size(); j++) + { + grid_cpu<2,Point_test> g2 = v3.get(i).get(j).template get<0>(); + + auto it = g2.getIterator(); + + while (it.isNext()) + { + grid_key_dx<2> key = it.get(); + + match &= (g2.template get(key) == g1.template get(key)); + match &= (g2.template get(key) == g1.template get(key)); + match &= (g2.template get(key) == g1.template get(key)); + match &= (g2.template get(key) == g1.template get(key)); + + match &= (g2.template get(key)[0] == g1.template get(key)[0]); + match &= (g2.template get(key)[1] == g1.template get(key)[1]); + match &= (g2.template get(key)[2] == g1.template get(key)[2]); + + match &= (g2.template get(key)[0][0] == g1.template get(key)[0][0]); + match &= (g2.template get(key)[0][1] == g1.template get(key)[0][1]); + match &= (g2.template get(key)[0][2] == g1.template get(key)[0][2]); + match &= (g2.template get(key)[1][0] == g1.template get(key)[1][0]); + match &= (g2.template get(key)[1][1] == g1.template get(key)[1][1]); + match &= (g2.template get(key)[1][2] == g1.template get(key)[1][2]); + match &= (g2.template get(key)[2][0] == g1.template get(key)[2][0]); + match &= (g2.template get(key)[2][1] == g1.template get(key)[2][1]); + match &= (g2.template get(key)[2][2] == g1.template get(key)[2][2]); + + ++it; + } + } + } + BOOST_REQUIRE_EQUAL(match,true); + } +} + +BOOST_AUTO_TEST_CASE (Vcluster_semantic_sendrecv_6) +{ + for (size_t i = 0 ; i < 100 ; i++) + { + Vcluster & vcl = create_vcluster(); + + if (vcl.getProcessingUnits() >= 32) + return; + + openfpm::vector prc_recv2; + openfpm::vector prc_recv3; + openfpm::vector prc_send; + openfpm::vector sz_recv2; + openfpm::vector sz_recv3; + + size_t sz[] = {8,10}; + + grid_cpu<2,Point_test> g1(sz); + g1.setMemory(); + fill_grid<2>(g1); + + openfpm::vector>> v1; + openfpm::vector>> v3; + + v1.resize(vcl.getProcessingUnits()); + + for(size_t i = 0 ; i < v1.size() ; i++) + { + for (size_t j = 0 ; j < i % SSCATTER_MAX ; j++) + { + v1.get(i) = g1; + } + + prc_send.add((i + vcl.getProcessUnitID()) % vcl.getProcessingUnits()); + } + + size_t nc = vcl.getProcessingUnits() / SSCATTER_MAX; + size_t nr = vcl.getProcessingUnits() - nc * SSCATTER_MAX; + nr = ((nr-1) * nr) / 2; + + vcl.SSendRecv(v1,v3,prc_send,prc_recv3,sz_recv3); + + BOOST_REQUIRE_EQUAL(v3.size(),vcl.getProcessingUnits()); + + bool match = true; + typedef Point_test p; + + for (size_t i = 0 ; i < v3.size() ; i++) + { + for (size_t j = 0 ; j < v3.get(i).size(); j++) + { + grid_cpu<2,Point_test> g2 = v3.get(i); + + auto it = g2.getIterator(); + + while (it.isNext()) + { + grid_key_dx<2> key = it.get(); + + match &= (g2.template get(key) == g1.template get(key)); + match &= (g2.template get(key) == g1.template get(key)); + match &= (g2.template get(key) == g1.template get(key)); + match &= (g2.template get(key) == g1.template get(key)); + + match &= (g2.template get(key)[0] == g1.template get(key)[0]); + match &= (g2.template get(key)[1] == g1.template get(key)[1]); + match &= (g2.template get(key)[2] == g1.template get(key)[2]); + + match &= (g2.template get(key)[0][0] == g1.template get(key)[0][0]); + match &= (g2.template get(key)[0][1] == g1.template get(key)[0][1]); + match &= (g2.template get(key)[0][2] == g1.template get(key)[0][2]); + match &= (g2.template get(key)[1][0] == g1.template get(key)[1][0]); + match &= (g2.template get(key)[1][1] == g1.template get(key)[1][1]); + match &= (g2.template get(key)[1][2] == g1.template get(key)[1][2]); + match &= (g2.template get(key)[2][0] == g1.template get(key)[2][0]); + match &= (g2.template get(key)[2][1] == g1.template get(key)[2][1]); + match &= (g2.template get(key)[2][2] == g1.template get(key)[2][2]); + + ++it; + } + } + } + BOOST_REQUIRE_EQUAL(match,true); + + if (vcl.getProcessUnitID() == 0 && i == 99) + std::cout << "Semantic sendrecv test start" << std::endl; + } +} + +BOOST_AUTO_TEST_SUITE_END() + +#endif /* OPENFPM_VCLUSTER_SRC_VCLUSTER_SEMANTIC_UNIT_TESTS_HPP_ */ +>>>>>>> origin/gather_with_packer