diff --git a/openfpm_data b/openfpm_data index dde50897665d36fc2723aabb0917291c9573c065..a677ffb0def932b598829ea77f86b2d10588f784 160000 --- a/openfpm_data +++ b/openfpm_data @@ -1 +1 @@ -Subproject commit dde50897665d36fc2723aabb0917291c9573c065 +Subproject commit a677ffb0def932b598829ea77f86b2d10588f784 diff --git a/src/Decomposition/CartDecomposition.hpp b/src/Decomposition/CartDecomposition.hpp index f1c0ffba37597121cf22bcd1737ad7088d172eb6..56416ebcb90440dc56e53624d3bc896bc19f2d29 100755 --- a/src/Decomposition/CartDecomposition.hpp +++ b/src/Decomposition/CartDecomposition.hpp @@ -119,6 +119,9 @@ protected: //! the set of all local sub-domain as vector openfpm::vector<SpaceBox<dim, T>> sub_domains; + //! the global set of all sub-domains as vector of 'sub_domains' vectors + mutable openfpm::vector<openfpm::vector<SpaceBox<dim, T>>> sub_domains_global; + //! for each sub-domain, contain the list of the neighborhood processors openfpm::vector<openfpm::vector<long unsigned int> > box_nn_processor; @@ -1223,6 +1226,16 @@ public: return domain; } + openfpm::vector<SpaceBox<dim, T>> getSubDomains() const + { + return sub_domains; + } + + openfpm::vector<openfpm::vector<SpaceBox<dim, T>>> & getSubDomainsGlobal() + { + return sub_domains_global; + } + /*! \brief Check if the particle is local * * \warning if the particle id outside the domain the result is unreliable diff --git a/src/Decomposition/ie_ghost.hpp b/src/Decomposition/ie_ghost.hpp index 2a60db9f96aff36ce4346d78d5cc2b9ea88a5274..cf74851475b13662fd810a65ae82c7625b20dbcb 100755 --- a/src/Decomposition/ie_ghost.hpp +++ b/src/Decomposition/ie_ghost.hpp @@ -38,7 +38,7 @@ class ie_ghost openfpm::vector<p_box<dim,T> > vb_int; //! Cell-list that store the geometrical information of the internal ghost boxes - CellList<dim,T,FAST,shift<dim,T>> geo_cell; + CellList<dim,T,Mem_fast<dim,T>,shift<dim,T>> geo_cell; //! shift vectors openfpm::vector<Point<dim,T>> shifts; @@ -750,9 +750,9 @@ public: * \return An iterator with the id's of the internal boxes in which the point fall * */ - auto getInternalIDBoxes(Point<dim,T> & p) -> decltype(geo_cell.getIterator(geo_cell.getCell(p))) + auto getInternalIDBoxes(Point<dim,T> & p) -> decltype(geo_cell.getCellIterator(geo_cell.getCell(p))) { - return geo_cell.getIterator(geo_cell.getCell(p)); + return geo_cell.getCellIterator(geo_cell.getCell(p)); } /*! \brief if the point fall into the ghost of some near processor it return the processors id's in which @@ -762,9 +762,9 @@ public: * \return iterator of the processors id's * */ - inline auto labelPoint(Point<dim,T> & p) -> decltype(geo_cell.getIterator(geo_cell.getCell(p))) + inline auto labelPoint(Point<dim,T> & p) -> decltype(geo_cell.getCellIterator(geo_cell.getCell(p))) { - return geo_cell.getIterator(geo_cell.getCell(p)); + return geo_cell.getCellIterator(geo_cell.getCell(p)); } /*! \brief Given a position it return if the position belong to any neighborhood processor ghost @@ -790,7 +790,7 @@ public: // Check with geo-cell if a particle is inside one Cell containing boxes - auto cell_it = geo_cell.getIterator(geo_cell.getCell(p)); + auto cell_it = geo_cell.getCellIterator(geo_cell.getCell(p)); // For each element in the cell, check if the point is inside the box // if it is, store the processor id @@ -839,7 +839,7 @@ public: // Check with geo-cell if a particle is inside one Cell containing boxes - auto cell_it = geo_cell.getIterator(geo_cell.getCell(p)); + auto cell_it = geo_cell.getCellIterator(geo_cell.getCell(p)); // For each element in the cell, check if the point is inside the box // if it is, store the processor id @@ -883,7 +883,7 @@ public: // Check with geo-cell if a particle is inside one Cell containing boxes - auto cell_it = geo_cell.getIterator(geo_cell.getCell(p)); + auto cell_it = geo_cell.getCellIterator(geo_cell.getCell(p)); // For each element in the cell, check if the point is inside the box // if it is, store the processor id @@ -926,7 +926,7 @@ public: // Check with geo-cell if a particle is inside one Cell containing boxes - auto cell_it = geo_cell.getIterator(geo_cell.getCell(p)); + auto cell_it = geo_cell.getCellIterator(geo_cell.getCell(p)); // For each element in the cell, check if the point is inside the box // if it is, store the processor id diff --git a/src/Grid/grid_dist_id.hpp b/src/Grid/grid_dist_id.hpp index cfd6791edfb3a9942bf345ba9e33759fba09c3c6..ae3ee199bd6d16dc603c88a1c71c7dea807739c8 100644 --- a/src/Grid/grid_dist_id.hpp +++ b/src/Grid/grid_dist_id.hpp @@ -19,6 +19,8 @@ #include "Packer_Unpacker/Unpacker.hpp" #include "Decomposition/CartDecomposition.hpp" #include "data_type/aggregate.hpp" +#include "hdf5.h" +#include "grid_dist_id_comm.hpp" //! Internal ghost box sent to construct external ghost box into the other processors template<unsigned int dim> @@ -63,7 +65,7 @@ struct Box_fix * */ template<unsigned int dim, typename St, typename T, typename Decomposition = CartDecomposition<dim,St>,typename Memory=HeapMemory , typename device_grid=grid_cpu<dim,T> > -class grid_dist_id +class grid_dist_id : public grid_dist_id_comm<dim,St,T,Decomposition,Memory,device_grid> { //! Domain Box<dim,St> domain; @@ -72,7 +74,10 @@ class grid_dist_id Ghost<dim,St> ghost; //! Local grids - openfpm::vector<device_grid> loc_grid; + mutable openfpm::vector<device_grid> loc_grid; + + //! Old local grids + mutable openfpm::vector<device_grid> loc_grid_old; //! Space Decomposition Decomposition dec; @@ -80,6 +85,12 @@ class grid_dist_id //! Extension of each grid: Domain and ghost + domain openfpm::vector<GBoxes<device_grid::dims>> gdb_ext; + //! Global gdb_ext + mutable openfpm::vector<GBoxes<device_grid::dims>> gdb_ext_global; + + //! Extension of each old grid (old): Domain and ghost + domain + openfpm::vector<GBoxes<device_grid::dims>> gdb_ext_old; + //! Size of the grid on each dimension size_t g_sz[dim]; @@ -1012,6 +1023,121 @@ public: return gdb_ext; } + /*! \brief It gathers the information about local grids for all of the processors + * + * + * + */ + void getGlobalGridsInfo(openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext_global) const + { +#ifdef SE_CLASS2 + check_valid(this,8); +#endif + v_cl.SGather(gdb_ext,gdb_ext_global,0); + v_cl.execute(); + + size_t size_r; + + if (v_cl.getProcessUnitID() == 0) + { + size_t size = gdb_ext_global.size(); + for (size_t i = 0; i < v_cl.getProcessingUnits(); i++) + { + v_cl.send(i,0,&size,sizeof(size_t)); + } + } + else + { + v_cl.recv(0,0,&size_r,sizeof(size_t)); + } + v_cl.execute(); + + gdb_ext_global.resize(size_r); + + + if (v_cl.getProcessUnitID() == 0) + { + for (size_t i = 0; i < v_cl.getProcessingUnits(); i++) + { + v_cl.send(i,0,gdb_ext_global); + } + } + else + { + v_cl.recv(0,0,gdb_ext_global); + } + + v_cl.execute(); + } + + /*! \brief It gathers the local grids for all of the processors + * + * + * + */ + void getGlobalGrids(openfpm::vector<openfpm::vector<device_grid>> & loc_grid_global) const + { +#ifdef SE_CLASS2 + check_valid(this,8); +#endif + v_cl.SGather(loc_grid,loc_grid_global,0); + v_cl.execute(); + + size_t size_r; + + if (v_cl.getProcessUnitID() == 0) + { + size_t size = loc_grid_global.size(); + for (size_t i = 0; i < v_cl.getProcessingUnits(); i++) + { + v_cl.send(i,0,&size,sizeof(size_t)); + } + } + else + { + v_cl.recv(0,0,&size_r,sizeof(size_t)); + } + v_cl.execute(); + + loc_grid_global.resize(size_r); + + + if (v_cl.getProcessUnitID() == 0) + { + for (size_t i = 0; i < v_cl.getProcessingUnits(); i++) + { + v_cl.send(i,0,loc_grid_global); + } + } + else + { + v_cl.recv(0,0,loc_grid_global); + } + + v_cl.execute(); + } + + /*! \brief It return an iterator that span the full grid domain (each processor span its local domain) + * + * \return the iterator + * + */ + grid_dist_iterator<dim,device_grid,FREE> getOldDomainIterator() const + { +#ifdef SE_CLASS2 + check_valid(this,8); +#endif + + grid_key_dx<dim> stop(ginfo_v.getSize()); + grid_key_dx<dim> one; + one.one(); + stop = stop - one; + + grid_dist_iterator<dim,device_grid,FREE> it(loc_grid_old,gdb_ext_old,stop); + + return it; + } + /*! \brief It return an iterator that span the full grid domain (each processor span its local domain) * * \return the iterator @@ -1556,6 +1682,563 @@ public: } } + /*! \brief It move all the grid parts that do not belong to the local processor to the respective processor + * + * + * + * + */ + void map() + { + getGlobalGridsInfo(gdb_ext_global); +/* + std::cout << "Global size: " << gdb_ext_global.size() << std::endl; + + for (size_t i = 0; i < gdb_ext_global.size(); i++) + { + std::cout << "(" << gdb_ext_global.get(i).Dbox.getLow(0) << "; " << gdb_ext_global.get(i).Dbox.getLow(1) << "); (" << gdb_ext_global.get(i).Dbox.getHigh(0) << "; " << gdb_ext_global.get(i).Dbox.getHigh(1) << ")" << std::endl; + std::cout << "I = " << i << ", Origin is (" << gdb_ext_global.get(i).origin.get(0) << "; " << gdb_ext_global.get(i).origin.get(1) << ")" << std::endl; + } + + if (v_cl.getProcessUnitID() == 0) + { + for (size_t i = 0; i < gdb_ext.size(); i++) + { + Box<dim,long int> box = gdb_ext.get(i).Dbox; + box += gdb_ext.get(i).origin; + std::cout << "(" << box.getLow(0) << "; " << box.getLow(1) << "); (" << box.getHigh(0) << "; " << box.getHigh(1) << ")" << std::endl; + } + } + + if (v_cl.getProcessUnitID() == 0) + { + for (size_t i = 0; i < loc_grid_old.size(); i++) + { + Point<dim,St> p1; + Point<dim,St> p2; + for (size_t n = 0; n < dim; n++) + { + p1.get(n) = loc_grid_old.get(i).getGrid().getBox().getLow(n); + p2.get(n) = loc_grid_old.get(i).getGrid().getBox().getHigh(n); + } + + std::cout << "Loc_grid_old: (" << p1.get(0) << "; " << p1.get(1) << "); (" << p2.get(0) << "; " << p2.get(1) << "); " << "Gdb_ext_old: (" << gdb_ext_old.get(i).Dbox.getLow(0) << "; " << gdb_ext_old.get(i).Dbox.getLow(1) << "); (" << gdb_ext_old.get(i).Dbox.getHigh(0) << "; " << gdb_ext_old.get(i).Dbox.getHigh(1) << ")" << std::endl; + } + } +*/ + this->template map_(dec,cd_sm,loc_grid,loc_grid_old,gdb_ext,gdb_ext_old,gdb_ext_global); + } + + inline void save(const std::string & filename) const + { + //std::cout << "Loc_grid.size() before save: " << loc_grid.size() << std::endl; + //std::cout << "Gdb_ext.size() before save: " << gdb_ext.size() << std::endl; + + //Pack_request vector + size_t req = 0; + + //Pack request + Packer<decltype(loc_grid),HeapMemory>::packRequest(loc_grid,req); + Packer<decltype(gdb_ext),HeapMemory>::packRequest(gdb_ext,req); + + //std::cout << "Req: " << req << std::endl; + + // allocate the memory + HeapMemory pmem; + //pmem.allocate(req); + ExtPreAlloc<HeapMemory> & mem = *(new ExtPreAlloc<HeapMemory>(req,pmem)); + mem.incRef(); + + //Packing + + Pack_stat sts; + + Packer<decltype(loc_grid),HeapMemory>::pack(mem,loc_grid,sts); + Packer<decltype(gdb_ext),HeapMemory>::pack(mem,gdb_ext,sts); + + /***************************************************************** + * Create a new file with default creation and access properties.* + * Then create a dataset and write data to it and close the file * + * and dataset. * + *****************************************************************/ + + int mpi_rank = v_cl.getProcessUnitID(); + int mpi_size = v_cl.getProcessingUnits(); + + MPI_Comm comm = v_cl.getMPIComm(); + MPI_Info info = MPI_INFO_NULL; + + // Set up file access property list with parallel I/O access + + hid_t plist_id = H5Pcreate(H5P_FILE_ACCESS); + H5Pset_fapl_mpio(plist_id, comm, info); + + // Create a new file collectively and release property list identifier. + hid_t file = H5Fcreate (filename.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, plist_id); + H5Pclose(plist_id); + + size_t sz = pmem.size(); + //std::cout << "Pmem.size: " << pmem.size() << std::endl; + openfpm::vector<size_t> sz_others; + v_cl.allGather(sz,sz_others); + v_cl.execute(); + + size_t sum = 0; + + for (size_t i = 0; i < sz_others.size(); i++) + sum += sz_others.get(i); + + //Size for data space in file + hsize_t fdim[1] = {sum}; + + //Size for data space in file + hsize_t fdim2[1] = {(size_t)mpi_size}; + + //Create data space in file + hid_t file_dataspace_id = H5Screate_simple(1, fdim, NULL); + + //Create data space in file + hid_t file_dataspace_id_2 = H5Screate_simple(1, fdim2, NULL); + + //Size for data space in memory + hsize_t mdim[1] = {pmem.size()}; + + //Create data space in memory + hid_t mem_dataspace_id = H5Screate_simple(1, mdim, NULL); + + //if (mpi_rank == 0) + //std::cout << "Total object size: " << sum << std::endl; + + //Create data set in file + hid_t file_dataset = H5Dcreate (file, "grid_dist", H5T_NATIVE_CHAR, file_dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + + //Create data set 2 in file + hid_t file_dataset_2 = H5Dcreate (file, "metadata", H5T_NATIVE_INT, file_dataspace_id_2, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + + //H5Pclose(plist_id); + H5Sclose(file_dataspace_id); + H5Sclose(file_dataspace_id_2); + + hsize_t block[1] = {pmem.size()}; + + //hsize_t stride[1] = {1}; + + hsize_t count[1] = {1}; + + hsize_t offset[1] = {0}; + + for (int i = 0; i < mpi_rank; i++) + { + if (mpi_rank == 0) + offset[0] = 0; + else + offset[0] += sz_others.get(i); + } + + // std::cout << "MPI rank: " << mpi_rank << ", MPI size: " << mpi_size << ", Offset: " << offset[0] << ", Block: " << block[0] << std::endl; + + int metadata[mpi_size]; + + for (int i = 0; i < mpi_size; i++) + metadata[i] = sz_others.get(i); + + //Select hyperslab in the file. + file_dataspace_id = H5Dget_space(file_dataset); + H5Sselect_hyperslab(file_dataspace_id, H5S_SELECT_SET, offset, NULL, count, block); + + file_dataspace_id_2 = H5Dget_space(file_dataset_2); + + + //Create property list for collective dataset write. + plist_id = H5Pcreate(H5P_DATASET_XFER); + H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE); + + //Write a data set to a file + H5Dwrite(file_dataset, H5T_NATIVE_CHAR, mem_dataspace_id, file_dataspace_id, plist_id, (const char *)pmem.getPointer()); + + //Write a data set 2 to a file + H5Dwrite(file_dataset_2, H5T_NATIVE_INT, H5S_ALL, file_dataspace_id_2, plist_id, metadata); +/* + for (size_t i = 0; i < gdb_ext.size(); i++) + { + Box<dim,long int> box = gdb_ext.get(i).Dbox; + std::cout << "Dboxes saved: (" << box.getLow(0) << "; " << box.getLow(1) << "); (" << box.getHigh(0) << "; " << box.getHigh(1) << ")" << std::endl; + } + + for (size_t i = 0; i < loc_grid.size(); i++) + { + std::cout << "loc_grids saved: (" << loc_grid.get(i).getGrid().getBox().getLow(0) << "; " << loc_grid.get(i).getGrid().getBox().getLow(1) << "); (" << loc_grid.get(i).getGrid().getBox().getHigh(0) << "; " << loc_grid.get(i).getGrid().getBox().getHigh(1) << ")" << std::endl; + } +*/ + //Close/release resources. + H5Dclose(file_dataset); + H5Sclose(file_dataspace_id); + H5Dclose(file_dataset_2); + H5Sclose(file_dataspace_id_2); + H5Sclose(mem_dataspace_id); + H5Pclose(plist_id); + H5Fclose(file); + } + + void load_block(long int bid, hssize_t mpi_size_old, int * metadata_out, openfpm::vector<size_t> metadata_accum, hid_t plist_id, hid_t dataset_2) + { +/* if (mpi_size >= mpi_size_old) + { + if (mpi_rank >= mpi_size_old) + block[0] = 0; + else + block[0] = {(size_t)metadata_out[mpi_rank]}; + } + else + { + int x = mpi_size_old/mpi_size; + int shift = mpi_rank*x; + for (int i = 0; i < x; i++) + { + //block0.get(mpi_rank).add(metadata_out[shift]); + block[0] += metadata_out[shift]; + shift++; + } + int y = mpi_size_old%mpi_size; + if (mpi_rank < y) + { + block_add[0] += metadata_out[mpi_size*x+mpi_rank]; + //block_add0.get(mpi_rank).add(metadata_out[mpi_size*x+mpi_rank]); + } + }*/ + +// std::cout << "BID: " << bid << std::endl; + + hsize_t offset[1]; + hsize_t block[1]; + + if (bid < mpi_size_old && bid != -1) + { + offset[0] = metadata_accum.get(bid); + block[0] = metadata_out[bid]; + } + else + { + offset[0] = 0; + block[0] = 0; + } + +// std::cout << "Offset: " << offset[0] << "; Block: " << block[0]<< std::endl; +// hsize_t offset_add[1] = {0}; + +/* if (mpi_size >= mpi_size_old) + { + if (mpi_rank >= mpi_size_old) + offset[0] = 0; + else + { + for (int i = 0; i < mpi_rank; i++) + offset[0] += metadata_out[i]; + } + } + else + { + int x = mpi_size_old/mpi_size; + int shift = mpi_rank*x; + + for (int i = 0; i < shift; i++) + { + offset[0] += metadata_out[i]; + //offset0.get(mpi_rank).add(metadata_out[i]); + } + + int y = mpi_size_old%mpi_size; + if (mpi_rank < y) + { + for (int i = 0; i < mpi_size*x + mpi_rank; i++) + { + offset_add[0] += metadata_out[i]; + //offset_add0.get(mpi_rank).add(metadata_out[i]); + } + } + }*/ + + //hsize_t stride[1] = {1}; + hsize_t count[1] = {1}; + + //std::cout << "LOAD: MPI rank: " << mpi_rank << ", MPI size: " << mpi_size << ", Offset: " << offset[0] << ", Offset_add: " << offset_add[0] << ", Block: " << block[0] << ", Block_add: " << block_add[0] << std::endl; +/* + std::cout << "LOAD: MPI rank: " << mpi_rank << ", MPI size: " << mpi_size << std::endl; + for (size_t i = 0; i < offset0.get(mpi_rank).size(); i++) + std::cout << ", Offset: " << offset0.get(mpi_rank).get(i) << std::endl; + for (size_t i = 0; i < offset_add0.get(mpi_rank).size(); i++) + std::cout << ", Offset_add: " << offset_add0.get(mpi_rank).get(i) << std::endl; + for (size_t i = 0; i < block0.get(mpi_rank).size(); i++) + std::cout << ", Block: " << block0.get(mpi_rank).get(i) << std::endl; + for (size_t i = 0; i < block_add0.get(mpi_rank).size(); i++) + std::cout << ", Block_add: " << block_add0.get(mpi_rank).get(i) << std::endl; +*/ + + //Select file dataspace + hid_t file_dataspace_id_2 = H5Dget_space(dataset_2); + + H5Sselect_hyperslab(file_dataspace_id_2, H5S_SELECT_SET, offset, NULL, count, block); + + //Select file dataspace +/* hid_t file_dataspace_id_3 = H5Dget_space(dataset_2); + + H5Sselect_hyperslab(file_dataspace_id_3, H5S_SELECT_SET, offset_add, NULL, count, block_add);*/ + + hsize_t mdim_2[1] = {block[0]}; +// hsize_t mdim_3[1] = {block_add[0]}; + + + //Size for data space in memory + + /*if (mpi_rank >= mpi_size_old) + mdim_2[0] = 0; + else + mdim_2[0] = metadata_out[mpi_rank];*/ + + //Create data space in memory + hid_t mem_dataspace_id_2 = H5Screate_simple(1, mdim_2, NULL); +// hid_t mem_dataspace_id_3 = H5Screate_simple(1, mdim_3, NULL); + + //if (mpi_rank == 0) + /* { + hssize_t size2; + + size2 = H5Sget_select_npoints (mem_dataspace_id_2); + printf ("\nLOAD: memspace_id_2 size: %llu\n", size2); + size2 = H5Sget_select_npoints (file_dataspace_id_2); + printf ("LOAD: dataspace_id_2 size: %llu\n", size2); + }*/ +/* + if (mpi_rank == 0) + { + hssize_t size2; + + size2 = H5Sget_select_npoints (mem_dataspace_id_3); + printf ("\nLOAD: memspace_id_3 size: %llu\n", size2); + size2 = H5Sget_select_npoints (file_dataspace_id_3); + printf ("LOAD: dataspace_id_3 size: %llu\n", size2); + } +*/ + size_t sum = 0; + + for (int i = 0; i < mpi_size_old; i++) + { + sum += metadata_out[i]; + } + + // std::cout << "LOAD: sum: " << sum << std::endl; + + // allocate the memory + HeapMemory pmem; +// HeapMemory pmem2; + //pmem.allocate(req); + ExtPreAlloc<HeapMemory> & mem = *(new ExtPreAlloc<HeapMemory>(block[0],pmem)); + mem.incRef(); +// ExtPreAlloc<HeapMemory> & mem2 = *(new ExtPreAlloc<HeapMemory>(block_add[0],pmem2)); +// mem2.incRef(); + + // Read the dataset. + H5Dread(dataset_2, H5T_NATIVE_CHAR, mem_dataspace_id_2, file_dataspace_id_2, plist_id, (char *)mem.getPointer()); + + // Read the dataset. +// H5Dread(dataset_2, H5T_NATIVE_CHAR, mem_dataspace_id_3, file_dataspace_id_3, plist_id, (char *)mem2.getPointer()); + + mem.allocate(pmem.size()); +// mem2.allocate(pmem2.size()); + // std::cout << "Mem.size(): " << mem.size() << " = " << block[0] << std::endl; + + Unpack_stat ps; + + openfpm::vector<device_grid> loc_grid_old_unp; + openfpm::vector<GBoxes<device_grid::dims>> gdb_ext_old_unp; + + Unpacker<decltype(loc_grid_old),HeapMemory>::unpack(mem,loc_grid_old_unp,ps,1); + Unpacker<decltype(gdb_ext_old),HeapMemory>::unpack(mem,gdb_ext_old_unp,ps,1); +/* + std::cout << "Loc_grid_old.size() before merge: " << loc_grid_old.size() << std::endl; + std::cout << "Gdb_ext_old.size() before merge: " << gdb_ext_old.size() << std::endl; + + std::cout << "Loc_grid_old_unp.size() before merge: " << loc_grid_old_unp.size() << std::endl; + std::cout << "Gdb_ext_old_unp.size() before merge: " << gdb_ext_old_unp.size() << std::endl; +*/ + for (size_t i = 0; i < loc_grid_old_unp.size(); i++) + loc_grid_old.add(loc_grid_old_unp.get(i)); + + for (size_t i = 0; i < gdb_ext_old_unp.size(); i++) + gdb_ext_old.add(gdb_ext_old_unp.get(i)); + +// std::cout << "Loc_grid_old.size() after merge: " << loc_grid_old.size() << std::endl; +// std::cout << "Gdb_ext_old.size() after merge: " << gdb_ext_old.size() << std::endl; +// std::cout << "*********************************" << std::endl; + + mem.decRef(); + delete &mem; + + } + + inline void load(const std::string & filename) + { + MPI_Comm comm = v_cl.getMPIComm(); + MPI_Info info = MPI_INFO_NULL; + + int mpi_rank = v_cl.getProcessUnitID(); + //int mpi_size = v_cl.getProcessingUnits(); + + // Set up file access property list with parallel I/O access + hid_t plist_id = H5Pcreate(H5P_FILE_ACCESS); + H5Pset_fapl_mpio(plist_id, comm, info); + + //Open a file + hid_t file = H5Fopen (filename.c_str(), H5F_ACC_RDONLY, plist_id); + H5Pclose(plist_id); + + //Open dataset + hid_t dataset = H5Dopen (file, "metadata", H5P_DEFAULT); + + //Create property list for collective dataset read + plist_id = H5Pcreate(H5P_DATASET_XFER); + H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE); + + //Select file dataspace + hid_t file_dataspace_id = H5Dget_space(dataset); + + hssize_t mpi_size_old = H5Sget_select_npoints (file_dataspace_id); + + //if (mpi_rank == 0) + //printf ("\nOld MPI size: %llu\n", mpi_size_old); + + //Where to read metadata + int metadata_out[mpi_size_old]; + + for (int i = 0; i < mpi_size_old; i++) + { + metadata_out[i] = 0; + } + + //Size for data space in memory + hsize_t mdim[1] = {(size_t)mpi_size_old}; + + //Create data space in memory + hid_t mem_dataspace_id = H5Screate_simple(1, mdim, NULL); + +/* + if (mpi_rank == 0) + { + hssize_t size; + + size = H5Sget_select_npoints (mem_dataspace_id); + printf ("\nmemspace_id size: %llu\n", size); + size = H5Sget_select_npoints (file_dataspace_id); + printf ("dataspace_id size: %llu\n", size); + } +*/ + // Read the dataset. + H5Dread(dataset, H5T_NATIVE_INT, mem_dataspace_id, file_dataspace_id, plist_id, metadata_out); +/* + if (mpi_rank == 0) + { + std::cout << "Metadata_out[]: "; + for (int i = 0; i < mpi_size_old; i++) + { + std::cout << metadata_out[i] << " "; + } + std::cout << " " << std::endl; + } +*/ + + openfpm::vector<size_t> metadata_accum; + metadata_accum.resize(mpi_size_old); + + metadata_accum.get(0) = 0; + for (int i = 1 ; i < mpi_size_old ; i++) + metadata_accum.get(i) = metadata_accum.get(i-1) + metadata_out[i-1]; + + //Open dataset + hid_t dataset_2 = H5Dopen (file, "grid_dist", H5P_DEFAULT); + + //Create property list for collective dataset read + plist_id = H5Pcreate(H5P_DATASET_XFER); + H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE); + + ///////////////////////////////////// + + openfpm::vector<size_t> n_block; + n_block.resize(v_cl.getProcessingUnits()); + + + for(size_t i = 0 ; i < n_block.size() ; i++) + n_block.get(i) = mpi_size_old / v_cl.getProcessingUnits(); + + size_t rest_block = mpi_size_old % v_cl.getProcessingUnits(); + + // std::cout << "MPI size old: " << mpi_size_old << std::endl; + //std::cout << "MPI size: " << v_cl.getProcessingUnits() << std::endl; + + + // std::cout << "Rest block: " << rest_block << std::endl; + + size_t max_block; + + if (rest_block != 0) + max_block = n_block.get(0) + 1; + else + max_block = n_block.get(0); + + //for(size_t i = 0 ; i < n_block.size() ; i++) + for(size_t i = 0 ; i < rest_block ; i++) + n_block.get(i) += 1; + + + //for(size_t i = 0 ; i < n_block.size() ; i++) + //std::cout << "n_block.get(i): " << n_block.get(i) << std::endl; + + size_t start_block = 0; + size_t stop_block = 0; + + + if (v_cl.getProcessUnitID() != 0) + { + for(size_t i = 0 ; i < v_cl.getProcessUnitID() ; i++) + start_block += n_block.get(i); + } + + stop_block = start_block + n_block.get(v_cl.getProcessUnitID()); + +// std::cout << "ID: " << v_cl.getProcessUnitID() << "; Start block: " << start_block << "; " << "Stop block: " << stop_block << std::endl; + + if (mpi_rank >= mpi_size_old) + load_block(start_block,mpi_size_old,metadata_out,metadata_accum,plist_id,dataset_2); + else + { + size_t n_bl = 0; + size_t lb = start_block; + for ( ; lb < stop_block ; lb++, n_bl++) + load_block(lb,mpi_size_old,metadata_out,metadata_accum,plist_id,dataset_2); + + if (n_bl < max_block) + load_block(-1,mpi_size_old,metadata_out,metadata_accum,plist_id,dataset_2); + } + + //////////////////////////////////// + + //std::cout << "LOAD: sum: " << sum << std::endl; + + // Close the dataset. + H5Dclose(dataset); + H5Dclose(dataset_2); + // Close the file. + H5Fclose(file); + H5Pclose(plist_id); + + // Map the distributed grid + map(); +/* + for (size_t i = 0; i < loc_grid.size(); i++) + { + std::cout << "loc_grids loaded: (" << loc_grid.get(i).getGrid().getBox().getLow(0) << "; " << loc_grid.get(i).getGrid().getBox().getLow(1) << "); (" << loc_grid.get(i).getGrid().getBox().getHigh(0) << "; " << loc_grid.get(i).getGrid().getBox().getHigh(1) << ")" << std::endl; + }*/ + } + //! Define friend classes //\cond friend grid_dist_id<dim,St,T,typename Decomposition::extended_type,Memory,device_grid>; diff --git a/src/Grid/grid_dist_id_HDF5_chckpnt_restart_test.hpp b/src/Grid/grid_dist_id_HDF5_chckpnt_restart_test.hpp new file mode 100644 index 0000000000000000000000000000000000000000..cfb08eee597377cd3b7f9fd82ed8f9ac7f17b3ea --- /dev/null +++ b/src/Grid/grid_dist_id_HDF5_chckpnt_restart_test.hpp @@ -0,0 +1,174 @@ +/* + * grid_dist_id_HDF5_chckpnt_restart_test.hpp + * + * Created on: Nov 9, 2016 + * Author: Yaroslav Zaluzhnyi + */ + +#ifndef SRC_GRID_GRID_DIST_ID_HDF5_CHCKPNT_RESTART_TEST_HPP_ +#define SRC_GRID_GRID_DIST_ID_HDF5_CHCKPNT_RESTART_TEST_HPP_ + +#include "Grid/grid_dist_id.hpp" + +BOOST_AUTO_TEST_SUITE( gd_hdf5_chckpnt_rstrt_test ) + +BOOST_AUTO_TEST_CASE( grid_dist_id_hdf5_save_test ) +{ + + // Input data + size_t k = 2400; + + float ghost_part = 0.0; + + // Domain + Box<2,float> domain({0.0,0.0},{1.0,1.0}); + + Vcluster & v_cl = create_vcluster(); + + // Skip this test on big scale + if (v_cl.getProcessingUnits() >= 32) + return; + + if (v_cl.getProcessUnitID() == 0) + std::cout << "Saving Distributed 2D Grid..." << std::endl; + + // grid size + size_t sz[2]; + sz[0] = k; + sz[1] = k; + + // Ghost + Ghost<2,float> g(ghost_part); + + // Distributed grid with id decomposition + grid_dist_id<2, float, scalar<float>, CartDecomposition<2,float>> g_dist(sz,domain,g); + + // get the decomposition + auto & dec = g_dist.getDecomposition(); + + // check the consistency of the decomposition + bool val = dec.check_consistency(); + BOOST_REQUIRE_EQUAL(val,true); + + size_t count = 0; + + auto it = g_dist.getDomainIterator(); + + while (it.isNext()) + { + //key + auto key = it.get(); + + auto keyg = g_dist.getGKey(key); + + g_dist.template get<0>(key) = keyg.get(0); + + ++it; + count++; + } + + std::cout << "Count: " << count << std::endl; + + openfpm::vector<size_t> count_total; + v_cl.allGather(count,count_total); + v_cl.execute(); + + size_t sum = 0; + + for (size_t i = 0; i < count_total.size(); i++) + sum += count_total.get(i); + + std::cout << "Sum: " << sum << std::endl; + + timer t; + t.start(); + // Save the grid + g_dist.save("grid_dist_id.h5"); + t.stop(); + + std::cout << "Saving time: " << t.getwct() << std::endl; +} + +BOOST_AUTO_TEST_CASE( grid_dist_id_hdf5_load_test ) +{ + + // Input data + size_t k = 2400; + + float ghost_part = 0.0; + + // Domain + Box<2,float> domain({0.0,0.0},{1.0,1.0}); + + Vcluster & v_cl = create_vcluster(); + + // Skip this test on big scale + if (v_cl.getProcessingUnits() >= 32) + return; + + if (v_cl.getProcessUnitID() == 0) + std::cout << "Loading Distributed 2D Grid..." << std::endl; + + // grid size + size_t sz[2]; + sz[0] = k; + sz[1] = k; + + // Ghost + Ghost<2,float> g(ghost_part); + + // Distributed grid with id decomposition + grid_dist_id<2, float, scalar<float>, CartDecomposition<2,float>> g_dist(sz,domain,g); + + g_dist.getDecomposition().write("Before_load_grid_decomposition"); + g_dist.write("Before_Loaded_grid"); + + timer t; + t.start(); + // Save the grid + g_dist.load("grid_dist_id.h5"); + t.stop(); + + g_dist.write("Loaded_grid"); + g_dist.getDecomposition().write("Loaded_grid_decomposition"); + + std::cout << "Loading time: " << t.getwct() << std::endl; + + auto it = g_dist.getDomainIterator(); + + size_t count = 0; + + while (it.isNext()) + { + //key + auto key = it.get(); + + //BOOST_CHECK_CLOSE(g_dist.template get<0>(key),1,0.0001); + //std::cout << "Element: " << g_dist.template get<0>(key) << std::endl; + + auto keyg = g_dist.getGKey(key); + + BOOST_REQUIRE_EQUAL(g_dist.template get<0>(key), keyg.get(0)); + + ++it; + count++; + } + + std::cout << "COOOOOOUNT: " << count << std::endl; + + openfpm::vector<size_t> count_total; + v_cl.allGather(count,count_total); + v_cl.execute(); + + size_t sum = 0; + + for (size_t i = 0; i < count_total.size(); i++) + sum += count_total.get(i); + + BOOST_REQUIRE_EQUAL(sum, (size_t)k*k); +} + + +BOOST_AUTO_TEST_SUITE_END() + +#endif /* SRC_GRID_GRID_DIST_ID_HDF5_CHCKPNT_RESTART_TEST_HPP_ */ diff --git a/src/Grid/grid_dist_id_comm.hpp b/src/Grid/grid_dist_id_comm.hpp new file mode 100644 index 0000000000000000000000000000000000000000..d4309776938fc06faaa406ae46dd8a6e148b22b4 --- /dev/null +++ b/src/Grid/grid_dist_id_comm.hpp @@ -0,0 +1,476 @@ +/* + * grid_dist_id_comm.hpp + * + * Created on: Nov 13, 2016 + * Author: yaroslav + */ + +#ifndef SRC_GRID_GRID_DIST_ID_COMM_HPP_ +#define SRC_GRID_GRID_DIST_ID_COMM_HPP_ + +#include "Vector/vector_dist_ofb.hpp" +#include "data_type/scalar.hpp" + + +/*! \brief This class is an helper for the communication of grid_dist_id + * + * \tparam dim Dimensionality of the grid + * \tparam St Type of space where the grid is living + * \tparam T object the grid is storing + * \tparam Decomposition Class that decompose the grid for example CartDecomposition + * \tparam Memory Is the allocator + * \tparam device_grid of base structure is going to store the data + * + * \see grid_dist_id + * + */ + +template<unsigned int dim, typename St, typename T, typename Decomposition = CartDecomposition<dim,St>,typename Memory=HeapMemory , typename device_grid=grid_cpu<dim,T> > +class grid_dist_id_comm +{ + //! VCluster + Vcluster & v_cl; + + //! Maps the processor id with the communication request into map procedure + openfpm::vector<size_t> p_map_req; + + //! Stores the list of processors that communicate with us (local processor) + openfpm::vector<size_t> prc_recv_map; + + //! Stores the size of the elements added for each processor that communicate with us (local processor) + openfpm::vector<size_t> recv_sz_map; + + //! For each near processor, outgoing intersection grid + //! \warning m_oGrid is assumed to be an ordered list + //! first id is grid + //! second id is the processor id + openfpm::vector<openfpm::vector<aggregate<device_grid,SpaceBox<dim,long int>>>> m_oGrid; + +public: + + /*! \brief Reconstruct the local grids + * + * \param m_oGrid_recv Vector of labeled grids to combine into a local grid + */ + inline void grids_reconstruct(openfpm::vector<openfpm::vector<aggregate<device_grid,SpaceBox<dim,long int>>>> & m_oGrid_recv, openfpm::vector<device_grid> & loc_grid, openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext, CellDecomposer_sm<dim,St,shift<dim,St>> & cd_sm) + { + size_t count2 = 0; + for (size_t a = 0; a < m_oGrid_recv.size(); a++) + { + for (size_t k = 0; k < m_oGrid_recv.get(a).size(); k++) + { + device_grid g = m_oGrid_recv.get(a).template get<0>(k); + + size_t count = 0; + + + auto it = g.getIterator(); + + while (it.isNext()) + { + //auto key = it.get(); + + //if (g.template get<0>(key) != 1) + //std::cout << "WRONG???????" << std::endl; + + ++it; + count++; + } + + SpaceBox<dim,long int> b = m_oGrid_recv.get(a).template get<1>(k); + + //device_grid gr_send(sz); + //gr_send.setMemory(); + + //std::cout << "B: (" << b.getLow(0) << "; " << b.getLow(1) << "); (" << b.getHigh(0) << "; " << b.getHigh(1) << "); " << "G: (" << g.getGrid().getBox().getHigh(0) << "; " << g.getGrid().getBox().getHigh(1) << ")" << std::endl; + + // Set the dimensions of the local grid + //g.resize(l_res); + + Point<dim,St> p; + for (size_t n = 0; n < dim; n++) + p.get(n) = g.getGrid().getBox().getHigh(n); + + //std::cout << "G after resize: (" << g.getGrid().getBox().getLow(0) << "; " << g.getGrid().getBox().getLow(1) << "); (" << g.getGrid().getBox().getHigh(0) << "; " << g.getGrid().getBox().getHigh(1) << ")" << std::endl; + + Point<dim,St> point; + for (size_t n = 0; n < dim; n++) + point.get(n) = (b.getHigh(n) + b.getLow(n))/2; + + for (size_t j = 0; j < gdb_ext.size(); j++) + { + // Local sub-domain + SpaceBox<dim,long int> sub = gdb_ext.get(j).Dbox; + sub += gdb_ext.get(j).origin; + + if (sub.isInside(point) == true) + { + grid_key_dx<dim> start = b.getKP1() - grid_key_dx<dim>(gdb_ext.get(j).origin.asArray()); + grid_key_dx<dim> stop = b.getKP2() - grid_key_dx<dim>(gdb_ext.get(j).origin.asArray()); + + std::string start2 = start.to_string(); + std::string stop2 = stop.to_string(); + + auto it = loc_grid.get(j).getSubIterator(start,stop); + + // Copy selected elements into a local grid + while (it.isNext()) + { + auto key = it.get(); + std::string str = key.to_string(); + grid_key_dx<dim> key2 = key - start; + + //std::cout << "Key: " << str << std::endl; + loc_grid.get(j).get_o(key) = g.get_o(key2); + count2++; + + ++it; + } + } + } + } + } + //std::cout << "Count after: " << count2 << std::endl; + } + + + /*! \brief Reconstruct the local grids + * + * \param m_oGrid_recv Vector of labeled grids to combine into a local grid + */ + inline void grids_reconstruct(openfpm::vector<openfpm::vector<aggregate<device_grid,SpaceBox<dim,long int>>>> & m_oGrid_recv, openfpm::vector<device_grid> & loc_grid, openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext, CellDecomposer_sm<dim,St,shift<dim,St>> & cd_sm, openfpm::vector<size_t> & prc_r) + { + + } + + /*! \brief Label intersection grids for mappings + * + * \param prc_sz For each processor the number of grids to send to + */ + inline void labelIntersectionGridsProcessor(Decomposition & dec, CellDecomposer_sm<dim,St,shift<dim,St>> & cd_sm, openfpm::vector<device_grid> & loc_grid_old, openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext, openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext_old, openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext_global, openfpm::vector<openfpm::vector<aggregate<device_grid,SpaceBox<dim,long int>>>> & lbl_b, openfpm::vector<size_t> & prc_sz) + { + // resize the label buffer + lbl_b.resize(v_cl.getProcessingUnits()); + + size_t count2 = 0; + + // Label all the intersection grids with the processor id where they should go + + for (size_t i = 0; i < gdb_ext_old.size(); i++) + { + // Local old sub-domain in global coordinates + SpaceBox<dim,long int> sub_dom = gdb_ext_old.get(i).Dbox; + sub_dom += gdb_ext_old.get(i).origin; + + for (size_t j = 0; j < gdb_ext_global.size(); j++) + { + size_t p_id = 0; + + // Intersection box + SpaceBox<dim,long int> inte_box; + + // Global new sub-domain in global coordinates + SpaceBox<dim,long int> sub_dom_new = gdb_ext_global.get(j).Dbox; + sub_dom_new += gdb_ext_global.get(j).origin; + + bool intersect = false; + + if (sub_dom.isValid() == true && sub_dom_new.isValid() == true) + intersect = sub_dom.Intersect(sub_dom_new, inte_box); + + if (intersect == true) + { + //// DEBUG///// + count2++; + ////////////// + + //std::cout << "Inte_box: (" << inte_box.getLow(0) << "; " << inte_box.getLow(1) << "); (" << inte_box.getHigh(0) << "; " << inte_box.getHigh(1) << ")" << std::endl; + + auto inte_box_cont = cd_sm.convertCellUnitsIntoDomainSpace(inte_box); + + // Get processor ID that store intersection box + Point<dim,St> p; + for (size_t n = 0; n < dim; n++) + p.get(n) = (inte_box_cont.getHigh(n) + inte_box_cont.getLow(n))/2; + + //std::cout << "Point: (" << p.get(0) << "; " << p.get(1) << ")" << std::endl; + + p_id = dec.processorID(p); + prc_sz.get(p_id)++; + + //std::cout << "P_id: " << p_id << std::endl; + + // Transform coordinates to local + auto inte_box_local = inte_box; + + inte_box_local -= gdb_ext_old.get(i).origin; + + //std::cout << "gdb_ext_old.get(i): (" << sub_dom.getLow(0) << "; " << sub_dom.getLow(1) << "); (" << sub_dom.getHigh(0) << "; " << sub_dom.getHigh(1) << ")" << std::endl; + + //std::cout << "gdb_ext_global.get(j): (" << sub_dom_new.getLow(0) << "; " << sub_dom_new.getLow(1) << "); (" << sub_dom_new.getHigh(0) << "; " << sub_dom_new.getHigh(1) << ")" << std::endl; + + //std::cout << "Inte_box_local: (" << inte_box_local.getLow(0) << "; " << inte_box_local.getLow(1) << "); (" << inte_box_local.getHigh(0) << "; " << inte_box_local.getHigh(1) << ")" << std::endl; + + // Grid corresponding for gdb_ext_old.get(i) box + device_grid & gr = loc_grid_old.get(i); + + //std::cout << "loc_grid_old.get(i): (" << gr.getGrid().getBox().getLow(0) << "; " << gr.getGrid().getBox().getLow(1) << "); (" << gr.getGrid().getBox().getHigh(0) << "; " << gr.getGrid().getBox().getHigh(1) << ")" << std::endl; + + //for (size_t l = 0; l < dim; l++) + //std::cout << "loc_grid_old.get(i).size on " << l << " dimension: " << gr.getGrid().size(l) << std::endl; + // Size of the grid to send + size_t sz[dim]; + for (size_t l = 0; l < dim; l++) + { + sz[l] = inte_box_local.getHigh(l) - inte_box_local.getLow(l) + 1; + //std::cout << "GR_send size on " << l << " dimension: " << sz[l] << std::endl; + } + + // Grid to send + device_grid gr_send(sz); + gr_send.setMemory(); + + // Sub iterator across intersection box inside local grid + grid_key_dx<dim> start = inte_box_local.getKP1(); + grid_key_dx<dim> stop = inte_box_local.getKP2(); + + Point<dim,St> p1; + for (size_t n = 0; n < dim; n++) + p1.get(n) = gr_send.getGrid().getBox().getLow(n); + + //std::cout << "Grid send P1: (" << p1.get(0) << "; " << p1.get(1) << ")" << std::endl; + + Point<dim,St> p2; + for (size_t n = 0; n < dim; n++) + p2.get(n) = gr_send.getGrid().getBox().getHigh(n); + + //std::cout << "Grid send P2: (" << p2.get(0) << "; " << p2.get(1) << ")" << std::endl; +/* + Point<dim,St> p3; + for (size_t n = 0; n < dim; n++) + p3.get(n) = gr.getGrid().getBox().getLow(n); + + std::cout << "Grid local P1: (" << p3.get(0) << "; " << p3.get(1) << ")" << std::endl; + + Point<dim,St> p4; + for (size_t n = 0; n < dim; n++) + p4.get(n) = gr.getGrid().getBox().getHigh(n); + + std::cout << "Grid local P2: (" << p4.get(0) << "; " << p4.get(1) << ")" << std::endl; + +*/ + std::string start2 = start.to_string(); + std::string stop2 = stop.to_string(); + + //std::cout << "Start: " << start2 << "; Stop: " << stop2 << std::endl; + + auto it = gr.getSubIterator(start,stop); + + // Copy selected elements into a new sub-grid + while (it.isNext()) + { + auto key = it.get(); + grid_key_dx<dim> key2 = key - start; + std::string str = key.to_string(); + + //std::cout << "Key: " << str << std::endl; + gr_send.get_o(key2) = gr.get_o(key); +/* + ////////// DEBUG /////////////// + if (gr.template get<0>(key) == 1) + { + count++; + } + else if (gr_send.template get<0>(key2) != 1) + { + std::cout << "AHHHHHHHHHH????????" << std::endl; + } +*/ + //////////////// + + //gr_send.set(key,gr,key); + + ++it; + } + + aggregate<device_grid,SpaceBox<dim,long int>> aggr; + + aggr.template get<0>() = gr_send; + aggr.template get<1>() = inte_box; + + // Add to the labeling vector + lbl_b.get(p_id).add(aggr); + } + } + } + //std::cout << "Count for points: " << count << std::endl; + //std::cout << "Count for inte_boxes: " << count2 << std::endl; + } + + /*! \brief Moves all the grids that does not belong to the local processor to the respective processor + * + * \tparam out of bound policy it specify what to do when the particles are detected out of bound + * + * In general this function is called after moving the particles to move the + * elements out the local processor. Or just after initialization if each processor + * contain non local particles + * + * \param v_pos vector of particle positions + * \param v_prp vector of particle properties + * \param g_m ghost marker + * + */ + void map_(Decomposition & dec, CellDecomposer_sm<dim,St,shift<dim,St>> & cd_sm, openfpm::vector<device_grid> & loc_grid, openfpm::vector<device_grid> & loc_grid_old, openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext, openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext_old, openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext_global) + { + // Processor communication size + openfpm::vector<size_t> prc_sz(v_cl.getProcessingUnits()); + + // Contains the processor id of each box (basically where they have to go) + labelIntersectionGridsProcessor(dec,cd_sm,loc_grid_old,gdb_ext,gdb_ext_old,gdb_ext_global,m_oGrid,prc_sz); +/* + for (size_t i = 0; i < m_oGrid.size(); i++) + { + for (size_t k = 0; k < m_oGrid.get(i).size(); k++) + { + device_grid g = m_oGrid.get(i).template get<0>(k); + + auto it = g.getIterator(); + + while (it.isNext()) + { + auto key = it.get(); + + if (g.template get<0>(key) != 1) + std::cout << "WROOOOOOONG" << std::endl; + + ++it; + } + } + } +*/ + + // Calculate the sending buffer size for each processor, put this information in + // a contiguous buffer + p_map_req.resize(v_cl.getProcessingUnits()); + + // Vector of number of sending grids for each involved processor + openfpm::vector<size_t> prc_sz_r; + // Vector of ranks of involved processors + openfpm::vector<size_t> prc_r; + + for (size_t i = 0; i < v_cl.getProcessingUnits(); i++) + { + if (m_oGrid.get(i).size() != 0) + { + p_map_req.get(i) = prc_r.size(); + prc_r.add(i); + prc_sz_r.add(m_oGrid.get(i).size()); + } + } +/* + for (size_t i = 0; i < m_oGrid.size(); i++) + { + if(m_oGrid.get(i).size() == 0) + m_oGrid.remove(i); + } +*/ + + decltype(m_oGrid) m_oGrid_new; + for (size_t i = 0; i < v_cl.getProcessingUnits(); i++) + { + if (m_oGrid.get(i).size() != 0) + m_oGrid_new.add(m_oGrid.get(i)); + } + + // Vector for receiving of intersection grids + openfpm::vector<openfpm::vector<aggregate<device_grid,SpaceBox<dim,long int>>>> m_oGrid_recv; + + //std::cout << "vcl.getProcessUnitID(): " << v_cl.getProcessUnitID() << "; prc_r.size(): " << prc_r.size() << std::endl; + + //std::cout << "vcl.getProcessUnitID(): " << v_cl.getProcessUnitID() << "; m_oGrid_new.size(): " << m_oGrid_new.size() << std::endl; +/* + for (size_t i = 0; i < m_oGrid.size(); i++) + { + std::cout << "Processor ID:" << v_cl.getProcessUnitID() << "; I: " << i << ", Size: " << m_oGrid.get(i).size() << std::endl; + } +*/ +/* + for (size_t i = 0; i < m_oGrid_new.size(); i++) + { + for (size_t k = 0; k < m_oGrid_new.get(i).size(); k++) + { + device_grid g = m_oGrid_new.get(i).template get<0>(k); + + auto it = g.getIterator(); + + while (it.isNext()) + { + auto key = it.get(); + + if (g.template get<0>(key) != 1) + std::cout << "WRONG BEFORE SENDRCV" << std::endl; + + ++it; + } + } + } +*/ + + // Send and recieve intersection grids + v_cl.SSendRecv(m_oGrid_new,m_oGrid_recv,prc_r,prc_recv_map,recv_sz_map); +/* + for (size_t i = 0; i < m_oGrid_recv.size(); i++) + { + for (size_t k = 0; k < m_oGrid_recv.get(i).size(); k++) + { + device_grid g = m_oGrid_recv.get(i).template get<0>(k); + + auto it = g.getIterator(); + + while (it.isNext()) + { + auto key = it.get(); + + if (g.template get<0>(key) != 1) + std::cout << "WRONG AFTER SENDRCV" << std::endl; + + ++it; + } + } + } +*/ +/* + std::cout << "vcl.getProcessUnitID(): " << v_cl.getProcessUnitID() << "; m_oGrid_recv.size(): " << m_oGrid_recv.size() << std::endl; + + for (size_t i = 0; i < m_oGrid_recv.size(); i++) + { + std::cout << "Processor ID:" << v_cl.getProcessUnitID() << "; I_recv: " << i << ", Size: " << m_oGrid_recv.get(i).size() << std::endl; + } + + for (size_t i = 0; i < prc_r.size(); i++) + std::cout << "vcl.getProcessUnitID(): " << v_cl.getProcessUnitID() << "; prc_r: " << prc_r.get(i) << std::endl; + + for (size_t i = 0; i < prc_recv_map.size(); i++) + std::cout << "vcl.getProcessUnitID(): " << v_cl.getProcessUnitID() << "; prc_recv_map: " << prc_recv_map.get(i) << std::endl; + + for (size_t i = 0; i < recv_sz_map.size(); i++) + std::cout << "vcl.getProcessUnitID(): " << v_cl.getProcessUnitID() << "; recv_sz_map: " << recv_sz_map.get(i) << std::endl; +*/ + // Reconstruct the new local grids + grids_reconstruct(m_oGrid_recv,loc_grid,gdb_ext,cd_sm); + } + + /*! \brief Constructor + * + * \param dec Domain decompositon + * + */ + grid_dist_id_comm() + :v_cl(create_vcluster()) + { + + } +}; + + +#endif /* SRC_GRID_GRID_DIST_ID_COMM_HPP_ */ diff --git a/src/Vector/performance/vector_dist_performance_util.hpp b/src/Vector/performance/vector_dist_performance_util.hpp index 7cf20686b7012fd88746fe85d2e0173bf3087247..216af97e812a8d20b5bc62d8c6708924fc23976b 100644 --- a/src/Vector/performance/vector_dist_performance_util.hpp +++ b/src/Vector/performance/vector_dist_performance_util.hpp @@ -15,6 +15,73 @@ #define N_VERLET_TEST 3 #define N_STAT_TEST 30 +static inline void warning_set(int & warning_level, double mean, double mean_ref, double sigma) +{ + int warning_level_candidate; + + if (mean - mean_ref < -2.0*sigma ) + warning_level_candidate = -1; + else if (mean - mean_ref < 2.0*sigma) + warning_level_candidate = 0; + else if (mean - mean_ref < 3.0*sigma) + warning_level_candidate = 1; + else + warning_level_candidate = 2; + + if (warning_level_candidate > warning_level) + warning_level = warning_level_candidate; +} + +static inline void addchartarea(std::string & chart_area, int lvl) +{ + std::string color; + + if (lvl == -1) + { + chart_area = std::string(",chartArea: {\ + backgroundColor: {\ + stroke: '#00FF00',\ + strokeWidth: 6\ + }\ + }"); + } + else if (lvl == 0) + { + // NOTHING TO DO + } + else if (lvl == 1) + { + chart_area = std::string(",chartArea: {\ + backgroundColor: {\ + stroke: '#FFFF00',\ + strokeWidth: 6\ + }\ + }"); + } + else if (lvl == 2) + { + chart_area = std::string(",chartArea: {\ + backgroundColor: {\ + stroke: '#FF0000',\ + strokeWidth: 6\ + }\ + }"); + } + +} + +void addUpdtateTime(GoogleChart & cg) +{ + time_t t = time(0); // get time now + struct tm * now = localtime( & t ); + + std::stringstream str; + + str << "<h3>Updated: " << now->tm_mday << "/" << now->tm_mon + 1 << "/" << now->tm_year+1900 << " " << now->tm_hour << ":" << now->tm_min << ":" << now->tm_sec << std::endl; + + cg.addHTML(str.str()); +} + /*! \brief Standard deviation * * \param measures set of measures diff --git a/src/Vector/vector_dist.hpp b/src/Vector/vector_dist.hpp index 057070eb816d6c2aa554d5356ff07d5874022925..87c6eaf87cb5c24dcda0ca1f16a397c9211c6b39 100644 --- a/src/Vector/vector_dist.hpp +++ b/src/Vector/vector_dist.hpp @@ -8,6 +8,8 @@ #ifndef VECTOR_HPP_ #define VECTOR_HPP_ +#include "../../openfpm_data/src/NN/CellList/CellListFast_gen.hpp" +#include "../../openfpm_data/src/NN/CellList/CellListFast_gen.hpp" #include "HDF5_XdmfWriter/HDF5_XdmfWriter.hpp" #include "VCluster/VCluster.hpp" #include "Space/Shape/Point.hpp" @@ -16,7 +18,6 @@ #include "Vector/vector_dist_key.hpp" #include "memory/PtrMemory.hpp" #include "NN/CellList/CellList.hpp" -#include "NN/CellList/CellListFast_hilb.hpp" #include "util/common.hpp" #include "util/object_util.hpp" #include "memory/ExtPreAlloc.hpp" @@ -31,6 +32,8 @@ #include "NN/VerletList/VerletList.hpp" #include "vector_dist_comm.hpp" #include "DLB/LB_Model.hpp" +#include "NN/CellList/ParticleIt_Cells.hpp" +#include "NN/CellList/ProcKeys.hpp" #define VECTOR_DIST_ERROR_OBJECT std::runtime_error("Runtime vector distributed error"); @@ -78,8 +81,8 @@ struct gcl }; //! General function t get a cell-list -template<unsigned int dim, typename St, typename Vector> -struct gcl<dim,St,CellList_hilb<dim, St, FAST, shift<dim, St> >,Vector> +template<unsigned int dim, typename St, typename Vector, typename Mem_type> +struct gcl<dim,St,CellList_gen<dim, St, Process_keys_hilb,Mem_type, shift<dim, St> >,Vector> { /*! \brief Get the Cell list based on the type * @@ -90,7 +93,7 @@ struct gcl<dim,St,CellList_hilb<dim, St, FAST, shift<dim, St> >,Vector> * \return the constructed cell-list * */ - static inline CellList_hilb<dim, St, FAST, shift<dim, St> > get(Vector & vd, const St & r_cut, const Ghost<dim,St> & g) + static inline CellList_gen<dim, St, Process_keys_hilb, Mem_type, shift<dim, St> > get(Vector & vd, const St & r_cut, const Ghost<dim,St> & g) { return vd.getCellList_hilb(r_cut,g); } @@ -672,7 +675,7 @@ public: * \return the Cell list * */ - template<typename CellL = CellList<dim, St, FAST, shift<dim, St> > > CellL getCellListSym(St r_cut) + template<typename CellL = CellList<dim, St, Mem_fast<dim,St>, shift<dim, St> > > CellL getCellListSym(St r_cut) { #ifdef SE_CLASS1 if ((opt & BIND_DEC_TO_GHOST)) @@ -702,33 +705,6 @@ public: return cell_list; } - /*! \brief return the neighborhood cells of a cells to do symmetric interactions - * - * \warning Used in in combination of getNNIteratorSym in a Cell-list - * - * \return the neighborhood cells of a cell - * - * - */ -/* const openfpm::vector<subsub_lin<dim>> & getNNCells(size_t cell) const - { - return getDecomposition().getDomainCellNNSym(); - }*/ - - /*! \brief Construct a cell list symmetric based on a cut of radius - * - * \tparam CellL CellList type to construct - * - * \param r_cut interation radius, or size of each cell - * - * \return the Cell list - * - */ - template<typename CellL = CellList<dim, St, FAST, shift<dim, St> > > CellL getCellListSymNoBind(St r_cut) - { - return getCellList(r_cut); - } - /*! \brief Construct a cell list starting from the stored particles * @@ -739,7 +715,7 @@ public: * \return the Cell list * */ - template<typename CellL = CellList<dim, St, FAST, shift<dim, St> > > CellL getCellList(St r_cut) + template<typename CellL = CellList_gen<dim, St, Process_keys_lin, Mem_fast<dim,St>, shift<dim, St> > > CellL getCellList(St r_cut) { #ifdef SE_CLASS3 se3.getNN(); @@ -764,7 +740,7 @@ public: * \return the Cell list * */ - template<typename CellL = CellList_hilb<dim, St, FAST, shift<dim, St> > > CellL getCellList_hilb(St r_cut) + template<typename CellL = CellList_gen<dim, St, Process_keys_hilb, Mem_fast<dim,St>, shift<dim, St> > > CellL getCellList_hilb(St r_cut) { #ifdef SE_CLASS3 se3.getNN(); @@ -787,7 +763,7 @@ public: * \param cell_list Cell list to update * */ - template<typename CellL = CellList<dim, St, FAST, shift<dim, St> > > void updateCellList(CellL & cell_list) + template<typename CellL> void updateCellList(CellL & cell_list) { #ifdef SE_CLASS3 se3.getNN(); @@ -824,7 +800,7 @@ public: * \param cell_list Cell list to update * */ - template<typename CellL = CellList<dim, St, FAST, shift<dim, St> > > void updateCellListSym(CellL & cell_list) + template<typename CellL = CellList<dim, St, Mem_fast<dim,St>, shift<dim, St> > > void updateCellListSym(CellL & cell_list) { #ifdef SE_CLASS3 se3.getNN(); @@ -869,7 +845,7 @@ public: * \return the CellList * */ - template<typename CellL = CellList<dim, St, FAST, shift<dim, St> > > CellL getCellList(St r_cut, const Ghost<dim, St> & enlarge) + template<typename CellL = CellList_gen<dim, St, Process_keys_lin, Mem_fast<dim,St>, shift<dim, St> > > CellL getCellList(St r_cut, const Ghost<dim, St> & enlarge) { #ifdef SE_CLASS3 se3.getNN(); @@ -886,7 +862,7 @@ public: // Processor bounding box cl_param_calculate(pbox, div, r_cut, enlarge); - cell_list.Initialize(pbox, div); + cell_list.Initialize(pbox, div, g_m); cell_list.set_ndec(getDecomposition().get_ndec()); updateCellList(cell_list); @@ -909,7 +885,7 @@ public: * \return The Cell-list * */ - template<typename CellL = CellList_hilb<dim, St, FAST, shift<dim, St> > > CellL getCellList_hilb(St r_cut, const Ghost<dim, St> & enlarge) + template<typename CellL = CellList_gen<dim, St, Process_keys_hilb, Mem_fast<dim,St>, shift<dim, St> > > CellL getCellList_hilb(St r_cut, const Ghost<dim, St> & enlarge) { #ifdef SE_CLASS3 se3.getNN(); @@ -1128,7 +1104,7 @@ public: * \param m an order of a hilbert curve * */ - template<typename CellL=CellList<dim,St,FAST,shift<dim,St> > > void reorder (int32_t m) + template<typename CellL=CellList_gen<dim,St,Process_keys_lin,Mem_fast<dim,St>,shift<dim,St> > > void reorder (int32_t m) { reorder(m,getDecomposition().getGhost()); } @@ -1146,7 +1122,7 @@ public: * \param enlarge In case of padding particles the cell list must be enlarged, like a ghost this parameter say how much must be enlarged * */ - template<typename CellL=CellList<dim,St,FAST,shift<dim,St> > > void reorder(int32_t m, const Ghost<dim,St> & enlarge) + template<typename CellL=CellList_gen<dim,St,Process_keys_lin,Mem_fast<dim,St>,shift<dim,St> > > void reorder(int32_t m, const Ghost<dim,St> & enlarge) { // reset the ghost part v_pos.resize(g_m); @@ -1170,7 +1146,7 @@ public: div[i] = 1 << m; } - cell_list.Initialize(pbox,div); + cell_list.Initialize(pbox,div,g_m); // for each particle add the particle to the cell list @@ -1572,6 +1548,517 @@ public: dec.getDistribution().setDistTol(md.distributionTol()); } + inline void save(const std::string & filename) const + { + //Pack_request vector + size_t req = 0; + + //std::cout << "V_pos.size() before save: " << v_pos.size() << std::endl; + + //Pack request + Packer<decltype(v_pos),HeapMemory>::packRequest(v_pos,req); + Packer<decltype(v_prp),HeapMemory>::packRequest(v_prp,req); + + std::cout << "Req: " << req << std::endl; + + // allocate the memory + HeapMemory pmem; + //pmem.allocate(req); + ExtPreAlloc<HeapMemory> & mem = *(new ExtPreAlloc<HeapMemory>(req,pmem)); + mem.incRef(); + + //Packing + + Pack_stat sts; + + Packer<decltype(v_pos),HeapMemory>::pack(mem,v_pos,sts); + Packer<decltype(v_prp),HeapMemory>::pack(mem,v_prp,sts); + + /***************************************************************** + * Create a new file with default creation and access properties.* + * Then create a dataset and write data to it and close the file * + * and dataset. * + *****************************************************************/ + + int mpi_rank = v_cl.getProcessUnitID(); + int mpi_size = v_cl.getProcessingUnits(); + + MPI_Comm comm = v_cl.getMPIComm(); + MPI_Info info = MPI_INFO_NULL; +/* + //Initialize MPI + MPI_Comm_size(comm, &mpi_size); + MPI_Comm_rank(comm, &mpi_rank); +*/ + // Set up file access property list with parallel I/O access + + hid_t plist_id = H5Pcreate(H5P_FILE_ACCESS); + H5Pset_fapl_mpio(plist_id, comm, info); + + // Create a new file collectively and release property list identifier. + hid_t file = H5Fcreate (filename.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, plist_id); + H5Pclose(plist_id); + + size_t sz = pmem.size(); + //std::cout << "Pmem.size: " << pmem.size() << std::endl; + openfpm::vector<size_t> sz_others; + v_cl.allGather(sz,sz_others); + v_cl.execute(); + + size_t sum = 0; + + for (size_t i = 0; i < sz_others.size(); i++) + sum += sz_others.get(i); + + //Size for data space in file + hsize_t fdim[1] = {sum}; + + //Size for data space in file + hsize_t fdim2[1] = {(size_t)mpi_size}; + + //Create data space in file + hid_t file_dataspace_id = H5Screate_simple(1, fdim, NULL); + + //Create data space in file + hid_t file_dataspace_id_2 = H5Screate_simple(1, fdim2, NULL); + + //Size for data space in memory + hsize_t mdim[1] = {pmem.size()}; + + //Create data space in memory + hid_t mem_dataspace_id = H5Screate_simple(1, mdim, NULL); + + if (mpi_rank == 0) + std::cout << "Total object size: " << sum << std::endl; + + //Create data set in file + hid_t file_dataset = H5Dcreate (file, "vector_dist", H5T_NATIVE_CHAR, file_dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + + //Create data set 2 in file + hid_t file_dataset_2 = H5Dcreate (file, "metadata", H5T_NATIVE_INT, file_dataspace_id_2, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + + //H5Pclose(plist_id); + H5Sclose(file_dataspace_id); + H5Sclose(file_dataspace_id_2); + + hsize_t block[1] = {pmem.size()}; + + //hsize_t stride[1] = {1}; + + hsize_t count[1] = {1}; + + hsize_t offset[1] = {0}; + + for (int i = 0; i < mpi_rank; i++) + { + if (mpi_rank == 0) + offset[0] = 0; + else + offset[0] += sz_others.get(i); + } + + std::cout << "MPI rank: " << mpi_rank << ", MPI size: " << mpi_size << ", Offset: " << offset[0] << ", Block: " << block[0] << std::endl; + + int metadata[mpi_size]; + + for (int i = 0; i < mpi_size; i++) + metadata[i] = sz_others.get(i); + + //Select hyperslab in the file. + file_dataspace_id = H5Dget_space(file_dataset); + H5Sselect_hyperslab(file_dataspace_id, H5S_SELECT_SET, offset, NULL, count, block); + + file_dataspace_id_2 = H5Dget_space(file_dataset_2); + + + //Create property list for collective dataset write. + plist_id = H5Pcreate(H5P_DATASET_XFER); + H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE); + + //Write a data set to a file + H5Dwrite(file_dataset, H5T_NATIVE_CHAR, mem_dataspace_id, file_dataspace_id, plist_id, (const char *)pmem.getPointer()); + + //Write a data set 2 to a file + H5Dwrite(file_dataset_2, H5T_NATIVE_INT, H5S_ALL, file_dataspace_id_2, plist_id, metadata); + + + //Close/release resources. + H5Dclose(file_dataset); + H5Sclose(file_dataspace_id); + H5Dclose(file_dataset_2); + H5Sclose(file_dataspace_id_2); + H5Sclose(mem_dataspace_id); + H5Pclose(plist_id); + H5Fclose(file); + } + + void load_block(long int bid, hssize_t mpi_size_old, int * metadata_out, openfpm::vector<size_t> metadata_accum, hid_t plist_id, hid_t dataset_2) + { +/* if (mpi_size >= mpi_size_old) + { + if (mpi_rank >= mpi_size_old) + block[0] = 0; + else + block[0] = {(size_t)metadata_out[mpi_rank]}; + } + else + { + int x = mpi_size_old/mpi_size; + int shift = mpi_rank*x; + for (int i = 0; i < x; i++) + { + //block0.get(mpi_rank).add(metadata_out[shift]); + block[0] += metadata_out[shift]; + shift++; + } + int y = mpi_size_old%mpi_size; + if (mpi_rank < y) + { + block_add[0] += metadata_out[mpi_size*x+mpi_rank]; + //block_add0.get(mpi_rank).add(metadata_out[mpi_size*x+mpi_rank]); + } + }*/ + +// std::cout << "BID: " << bid << std::endl; + + hsize_t offset[1]; + hsize_t block[1]; + + if (bid < mpi_size_old && bid != -1) + { + offset[0] = metadata_accum.get(bid); + block[0] = metadata_out[bid]; + } + else + { + offset[0] = 0; + block[0] = 0; + } + +// std::cout << "Offset: " << offset[0] << "; Block: " << block[0]<< std::endl; +// hsize_t offset_add[1] = {0}; + +/* if (mpi_size >= mpi_size_old) + { + if (mpi_rank >= mpi_size_old) + offset[0] = 0; + else + { + for (int i = 0; i < mpi_rank; i++) + offset[0] += metadata_out[i]; + } + } + else + { + int x = mpi_size_old/mpi_size; + int shift = mpi_rank*x; + + for (int i = 0; i < shift; i++) + { + offset[0] += metadata_out[i]; + //offset0.get(mpi_rank).add(metadata_out[i]); + } + + int y = mpi_size_old%mpi_size; + if (mpi_rank < y) + { + for (int i = 0; i < mpi_size*x + mpi_rank; i++) + { + offset_add[0] += metadata_out[i]; + //offset_add0.get(mpi_rank).add(metadata_out[i]); + } + } + }*/ + + //hsize_t stride[1] = {1}; + hsize_t count[1] = {1}; + + //std::cout << "LOAD: MPI rank: " << mpi_rank << ", MPI size: " << mpi_size << ", Offset: " << offset[0] << ", Offset_add: " << offset_add[0] << ", Block: " << block[0] << ", Block_add: " << block_add[0] << std::endl; +/* + std::cout << "LOAD: MPI rank: " << mpi_rank << ", MPI size: " << mpi_size << std::endl; + for (size_t i = 0; i < offset0.get(mpi_rank).size(); i++) + std::cout << ", Offset: " << offset0.get(mpi_rank).get(i) << std::endl; + for (size_t i = 0; i < offset_add0.get(mpi_rank).size(); i++) + std::cout << ", Offset_add: " << offset_add0.get(mpi_rank).get(i) << std::endl; + for (size_t i = 0; i < block0.get(mpi_rank).size(); i++) + std::cout << ", Block: " << block0.get(mpi_rank).get(i) << std::endl; + for (size_t i = 0; i < block_add0.get(mpi_rank).size(); i++) + std::cout << ", Block_add: " << block_add0.get(mpi_rank).get(i) << std::endl; +*/ + + //Select file dataspace + hid_t file_dataspace_id_2 = H5Dget_space(dataset_2); + + H5Sselect_hyperslab(file_dataspace_id_2, H5S_SELECT_SET, offset, NULL, count, block); + + //Select file dataspace +/* hid_t file_dataspace_id_3 = H5Dget_space(dataset_2); + + H5Sselect_hyperslab(file_dataspace_id_3, H5S_SELECT_SET, offset_add, NULL, count, block_add);*/ + + hsize_t mdim_2[1] = {block[0]}; +// hsize_t mdim_3[1] = {block_add[0]}; + + + //Size for data space in memory + + /*if (mpi_rank >= mpi_size_old) + mdim_2[0] = 0; + else + mdim_2[0] = metadata_out[mpi_rank];*/ + + //Create data space in memory + hid_t mem_dataspace_id_2 = H5Screate_simple(1, mdim_2, NULL); +// hid_t mem_dataspace_id_3 = H5Screate_simple(1, mdim_3, NULL); + + //if (mpi_rank == 0) +/* { + hssize_t size2; + + size2 = H5Sget_select_npoints (mem_dataspace_id_2); + printf ("\nLOAD: memspace_id_2 size: %llu\n", size2); + size2 = H5Sget_select_npoints (file_dataspace_id_2); + printf ("LOAD: dataspace_id_2 size: %llu\n", size2); + }*/ +/* + if (mpi_rank == 0) + { + hssize_t size2; + + size2 = H5Sget_select_npoints (mem_dataspace_id_3); + printf ("\nLOAD: memspace_id_3 size: %llu\n", size2); + size2 = H5Sget_select_npoints (file_dataspace_id_3); + printf ("LOAD: dataspace_id_3 size: %llu\n", size2); + } +*/ + size_t sum = 0; + + for (int i = 0; i < mpi_size_old; i++) + { + sum += metadata_out[i]; + } + +// std::cout << "LOAD: sum: " << sum << std::endl; + + // allocate the memory + HeapMemory pmem; +// HeapMemory pmem2; + //pmem.allocate(req); + ExtPreAlloc<HeapMemory> & mem = *(new ExtPreAlloc<HeapMemory>(block[0],pmem)); + mem.incRef(); +// ExtPreAlloc<HeapMemory> & mem2 = *(new ExtPreAlloc<HeapMemory>(block_add[0],pmem2)); +// mem2.incRef(); + + // Read the dataset. + H5Dread(dataset_2, H5T_NATIVE_CHAR, mem_dataspace_id_2, file_dataspace_id_2, plist_id, (char *)mem.getPointer()); + + // Read the dataset. +// H5Dread(dataset_2, H5T_NATIVE_CHAR, mem_dataspace_id_3, file_dataspace_id_3, plist_id, (char *)mem2.getPointer()); + + mem.allocate(pmem.size()); +// mem2.allocate(pmem2.size()); +// std::cout << "Mem.size(): " << mem.size() << " = " << block[0] << std::endl; + + Unpack_stat ps; + + openfpm::vector<Point<dim, St>> v_pos_unp; + + openfpm::vector<prop> v_prp_unp; + + Unpacker<decltype(v_pos_unp),HeapMemory>::unpack(mem,v_pos_unp,ps,1); + Unpacker<decltype(v_prp_unp),HeapMemory>::unpack(mem,v_prp_unp,ps,1); + +// Unpack_stat ps2; + + +// Unpacker<decltype(v_pos),HeapMemory>::unpack(mem2,v_pos_unp,ps2,1); +// Unpacker<decltype(v_prp),HeapMemory>::unpack(mem2,v_prp_unp,ps2,1); + +// std::cout << "V_pos.size(): " << v_pos.size() << std::endl; +// std::cout << "V_pos_unp.size(): " << v_pos_unp.size() << std::endl; + + mem.decRef(); + delete &mem; + + for (size_t i = 0; i < v_pos_unp.size(); i++) + v_pos.add(v_pos_unp.get(i)); + + for (size_t i = 0; i < v_prp_unp.size(); i++) + v_prp.add(v_prp_unp.get(i)); + + g_m = v_pos.size(); + } + + inline void load(const std::string & filename) + { + v_pos.clear(); + v_prp.clear(); + + g_m = 0; + + MPI_Comm comm = v_cl.getMPIComm(); + MPI_Info info = MPI_INFO_NULL; + + int mpi_rank = v_cl.getProcessUnitID(); + //int mpi_size = v_cl.getProcessingUnits(); + + // Set up file access property list with parallel I/O access + hid_t plist_id = H5Pcreate(H5P_FILE_ACCESS); + H5Pset_fapl_mpio(plist_id, comm, info); + + //Open a file + hid_t file = H5Fopen (filename.c_str(), H5F_ACC_RDONLY, plist_id); + H5Pclose(plist_id); + + //Open dataset + hid_t dataset = H5Dopen (file, "metadata", H5P_DEFAULT); + + //Create property list for collective dataset read + plist_id = H5Pcreate(H5P_DATASET_XFER); + H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE); + + //Select file dataspace + hid_t file_dataspace_id = H5Dget_space(dataset); + + hssize_t mpi_size_old = H5Sget_select_npoints (file_dataspace_id); +/* + if (mpi_rank == 0) + printf ("\nOld MPI size: %llu\n", mpi_size_old); +*/ + //Where to read metadata + int metadata_out[mpi_size_old]; + + for (int i = 0; i < mpi_size_old; i++) + { + metadata_out[i] = 0; + } + + //Size for data space in memory + hsize_t mdim[1] = {(size_t)mpi_size_old}; + + //Create data space in memory + hid_t mem_dataspace_id = H5Screate_simple(1, mdim, NULL); +/* + if (mpi_rank == 0) + { + hssize_t size; + + size = H5Sget_select_npoints (mem_dataspace_id); + printf ("LOAD: memspace_id size: %llu\n", size); + size = H5Sget_select_npoints (file_dataspace_id); + printf ("LOAD: dataspace_id size: %llu\n", size); + } +*/ + // Read the dataset. + H5Dread(dataset, H5T_NATIVE_INT, mem_dataspace_id, file_dataspace_id, plist_id, metadata_out); +/* + if (mpi_rank == 0) + { + std::cout << "Metadata_out[]: "; + for (int i = 0; i < mpi_size_old; i++) + { + std::cout << metadata_out[i] << " "; + } + std::cout << " " << std::endl; + } +*/ + openfpm::vector<size_t> metadata_accum; + metadata_accum.resize(mpi_size_old); + + metadata_accum.get(0) = 0; + for (int i = 1 ; i < mpi_size_old ; i++) + metadata_accum.get(i) = metadata_accum.get(i-1) + metadata_out[i-1]; + + //Open dataset + hid_t dataset_2 = H5Dopen (file, "vector_dist", H5P_DEFAULT); + + //Create property list for collective dataset read + plist_id = H5Pcreate(H5P_DATASET_XFER); + H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE); + +/* + openfpm::vector<openfpm::vector<hsize_t>> block0; + openfpm::vector<openfpm::vector<hsize_t>> block_add0; + openfpm::vector<openfpm::vector<hsize_t>> offset0; + openfpm::vector<openfpm::vector<hsize_t>> offset_add0; + + block0.resize(mpi_size); + offset0.resize(mpi_size); + block_add0.resize(mpi_size); + offset_add0.resize(mpi_size); +*/ + openfpm::vector<size_t> n_block; + n_block.resize(v_cl.getProcessingUnits()); + + + for(size_t i = 0 ; i < n_block.size() ; i++) + n_block.get(i) = mpi_size_old / v_cl.getProcessingUnits(); + + size_t rest_block = mpi_size_old % v_cl.getProcessingUnits(); + + //std::cout << "MPI size old: " << mpi_size_old << std::endl; + //std::cout << "MPI size: " << v_cl.getProcessingUnits() << std::endl; + + + // std::cout << "Rest block: " << rest_block << std::endl; + + size_t max_block; + + if (rest_block != 0) + max_block = n_block.get(0) + 1; + else + max_block = n_block.get(0); + + //for(size_t i = 0 ; i < n_block.size() ; i++) + for(size_t i = 0 ; i < rest_block ; i++) + n_block.get(i) += 1; + + + //for(size_t i = 0 ; i < n_block.size() ; i++) + //std::cout << "n_block.get(i): " << n_block.get(i) << std::endl; + + size_t start_block = 0; + size_t stop_block = 0; + + + if (v_cl.getProcessUnitID() != 0) + { + for(size_t i = 0 ; i < v_cl.getProcessUnitID() ; i++) + start_block += n_block.get(i); + } + + stop_block = start_block + n_block.get(v_cl.getProcessUnitID()); + + //std::cout << "ID: " << v_cl.getProcessUnitID() << "; Start block: " << start_block << "; " << "Stop block: " << stop_block << std::endl; + + if (mpi_rank >= mpi_size_old) + load_block(start_block,mpi_size_old,metadata_out,metadata_accum,plist_id,dataset_2); + else + { + size_t n_bl = 0; + size_t lb = start_block; + for ( ; lb < stop_block ; lb++, n_bl++) + load_block(lb,mpi_size_old,metadata_out,metadata_accum,plist_id,dataset_2); + + if (n_bl < max_block) + load_block(-1,mpi_size_old,metadata_out,metadata_accum,plist_id,dataset_2); + } + + // Close the dataset. + H5Dclose(dataset); + H5Dclose(dataset_2); + // Close the file. + H5Fclose(file); + H5Pclose(plist_id); + + //std::cout << "V_pos.size() after merge: " << v_pos.size() << std::endl; + + // Map particles + //map(); + + //std::cout << "V_pos.size() after merge and map: " << v_pos.size() << std::endl; + } + /*! \brief Output particle position and properties * * \param out output diff --git a/src/Vector/vector_dist_HDF5_chckpnt_restart_test.hpp b/src/Vector/vector_dist_HDF5_chckpnt_restart_test.hpp new file mode 100644 index 0000000000000000000000000000000000000000..0c5da9f9b057b66e4833294420ddac61f1540a28 --- /dev/null +++ b/src/Vector/vector_dist_HDF5_chckpnt_restart_test.hpp @@ -0,0 +1,204 @@ +/* + * vector_dist_HDF5_save.hpp + * + * Created on: Jun 12, 2016 + * Author: Yaroslav Zaluzhnyi + */ + +#ifndef SRC_VECTOR_VECTOR_DIST_HDF5_CHCKPNT_RESTART_TEST_HPP_ +#define SRC_VECTOR_VECTOR_DIST_HDF5_CHCKPNT_RESTART_TEST_HPP_ + +#include "vector_dist.hpp" +#include "Packer_Unpacker/Pack_selector.hpp" +#include "Packer_Unpacker/Packer.hpp" +#include "Packer_Unpacker/Unpacker.hpp" +#include "Vector/performance/vector_dist_performance_util.hpp" + + +#include "hdf5.h" + +BOOST_AUTO_TEST_SUITE( vd_hdf5_chckpnt_rstrt_test ) + +// Input data + +// Number of particles +size_t k = 1000000; + +// Dimensionality +const size_t dim = 3; + +BOOST_AUTO_TEST_CASE( vector_dist_hdf5_save_test ) +{ + Vcluster & v_cl = create_vcluster(); + + if (v_cl.getProcessUnitID() == 0) + std::cout << "Saving distributed vector" << std::endl; + + Box<dim,float> box; + + for (size_t i = 0; i < dim; i++) + { + box.setLow(i,0.0); + box.setHigh(i,1.0); + } + + // Boundary conditions + size_t bc[dim]; + + const size_t Ng = cbrt(k); + + // we create a Grid iterator + size_t sz[dim] = {Ng,Ng,Ng}; + + for (size_t i = 0; i < dim; i++) + bc[i] = NON_PERIODIC; + + // ghost + Ghost<dim,float> ghost(1.0/(Ng-2)); + + vector_dist<dim,float, aggregate<float[dim]>, CartDecomposition<dim,float> > vd(0,box,bc,ghost); + + // Put particles + + auto it = vd.getGridIterator(sz); + + while (it.isNext()) + { + vd.add(); + + auto key = it.get(); + + vd.getLastPos()[0] = key.get(0) * it.getSpacing(0); + vd.getLastPos()[1] = key.get(1) * it.getSpacing(1); + vd.getLastPos()[2] = key.get(2) * it.getSpacing(2); + + ++it; + } + + //BOOST_REQUIRE_EQUAL(it.getSpacing(0),1.0f/(Ng-1)); + //BOOST_REQUIRE_EQUAL(it.getSpacing(1),1.0f/(Ng-1)); + //BOOST_REQUIRE_EQUAL(it.getSpacing(2),1.0f/(Ng-1)); + + vd.map(); + + // Put forces + + auto it2 = vd.getDomainIterator(); + + while (it2.isNext()) + { + auto key = it2.get(); + + //Put the forces + for (size_t i = 0; i < dim; i++) + vd.template getProp<0>(key)[i] = 0.51234 + vd.getPos(key)[0] + vd.getPos(key)[1]+ vd.getPos(key)[2]; + + ++it2; + } + + timer t; + t.start(); + // Save the vector + vd.save("vector_dist.h5"); + t.stop(); + + std::cout << "Saving time: " << t.getwct() << std::endl; +} + + + +BOOST_AUTO_TEST_CASE( vector_dist_hdf5_load_test ) +{ + Vcluster & v_cl = create_vcluster(); + + if (v_cl.getProcessUnitID() == 0) + std::cout << "Loading distributed vector" << std::endl; + + Box<dim,float> box; + + for (size_t i = 0; i < dim; i++) + { + box.setLow(i,0.0); + box.setHigh(i,1.0); + } + + // Boundary conditions + size_t bc[dim]; + + for (size_t i = 0; i < dim; i++) + bc[i] = NON_PERIODIC; + + + const size_t Ng = cbrt(k); + + // we create a Grid iterator + size_t sz[3] = {Ng,Ng,Ng}; + + // ghost + Ghost<dim,float> ghost(1.0/(Ng-2)); + + vector_dist<dim,float, aggregate<float[dim]>, CartDecomposition<dim,float> > vd(0,box,bc,ghost); + + vd.load("vector_dist.h5"); + + timer t; + t.start(); + // Save the vector + vd.load("vector_dist.h5"); + t.stop(); + + std::cout << "Loading time: " << t.getwct() << std::endl; + + /////////////////// Checking data /////////////////////// + + // Check total number of particles + size_t n_part = vd.size_local(); + openfpm::vector<size_t> tot_n_part; + v_cl.allGather(n_part,tot_n_part); + v_cl.execute(); + + size_t sum = 0; + + for (size_t i = 0; i < tot_n_part.size(); i++) + sum += tot_n_part.get(i); + + BOOST_REQUIRE_EQUAL(sum,k); + + //std::cout << "Sum: " << sum << std::endl; + + // Check spacing (positions) + + auto it = vd.getGridIterator(sz); + + while (it.isNext()) + { + //auto key = it.get(); + + ++it; + } + + BOOST_REQUIRE_EQUAL(it.getSpacing(0),1.0f/(Ng-1)); + BOOST_REQUIRE_EQUAL(it.getSpacing(1),1.0f/(Ng-1)); + BOOST_REQUIRE_EQUAL(it.getSpacing(2),1.0f/(Ng-1)); + + + // Check properties + + auto it2 = vd.getDomainIterator(); + + while (it2.isNext()) + { + auto key = it2.get(); + + //Put the forces + for (size_t i = 0; i < dim; i++) + BOOST_CHECK_CLOSE(vd.template getProp<0>(key)[i],0.51234 + vd.getPos(key)[0] + vd.getPos(key)[1]+ vd.getPos(key)[2],0.0001); + + ++it2; + } +} + +BOOST_AUTO_TEST_SUITE_END() + + +#endif /* SRC_VECTOR_VECTOR_DIST_HDF5_CHCKPNT_RESTART_TEST_HPP_ */ diff --git a/src/Vector/vector_dist_unit_test.hpp b/src/Vector/vector_dist_unit_test.hpp index cba0f756a41f0a2918ec3803de866308ad9a386c..5acc8093cd51101b70ac03d1f3eb27a479c6aac4 100644 --- a/src/Vector/vector_dist_unit_test.hpp +++ b/src/Vector/vector_dist_unit_test.hpp @@ -1064,7 +1064,7 @@ void Test_interacting(Box<3,float> & box) Point<3,float> xp = vd.getPos(p); - auto Np = NN.getIterator(NN.getCell(xp)); + auto Np = NN.getCellIterator(NN.getCell(xp)); while (Np.isNext()) { diff --git a/src/main.cpp b/src/main.cpp index 38e281838cccc35b3937f7b02a370ba7f7a2051d..6bdee796eaec9c4315bb4e5f10342068a0e1b7ea 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -46,6 +46,11 @@ int main(int argc, char* argv[]) #include "Decomposition/Distribution/metis_util_unit_test.hpp" #include "dec_optimizer_unit_test.hpp" #include "Vector/vector_dist_unit_test.hpp" +#include "Vector/vector_dist_HDF5_chckpnt_restart_test.hpp" +#include "Grid/grid_dist_id_HDF5_chckpnt_restart_test.hpp" +#ifdef PERFORMANCE_TEST +#include "pdata_performance.hpp" +#endif #include "Decomposition/Distribution/Distribution_unit_tests.hpp" #include "Grid/Iterators/grid_dist_id_iterators_unit_tests.hpp" //#include "DLB/DLB_unit_test.hpp" diff --git a/src/pdata_performance.cpp b/src/pdata_performance.cpp index ec7027f26451dd9530cf52c5e758a0bee8e8d5da..4137197a0214c242b1c1247ed35dd96186ceb1d6 100644 --- a/src/pdata_performance.cpp +++ b/src/pdata_performance.cpp @@ -32,73 +32,6 @@ boost::property_tree::ptree pt; BOOST_AUTO_TEST_SUITE( performance ) -static inline void warning_set(int & warning_level, double mean, double mean_ref, double sigma) -{ - int warning_level_candidate; - - if (mean - mean_ref < -2.0*sigma ) - warning_level_candidate = -1; - else if (mean - mean_ref < 2.0*sigma) - warning_level_candidate = 0; - else if (mean - mean_ref < 3.0*sigma) - warning_level_candidate = 1; - else - warning_level_candidate = 2; - - if (warning_level_candidate > warning_level) - warning_level = warning_level_candidate; -} - -static inline void addchartarea(std::string & chart_area, int lvl) -{ - std::string color; - - if (lvl == -1) - { - chart_area = std::string(",chartArea: {\ - backgroundColor: {\ - stroke: '#00FF00',\ - strokeWidth: 6\ - }\ - }"); - } - else if (lvl == 0) - { - // NOTHING TO DO - } - else if (lvl == 1) - { - chart_area = std::string(",chartArea: {\ - backgroundColor: {\ - stroke: '#FFFF00',\ - strokeWidth: 6\ - }\ - }"); - } - else if (lvl == 2) - { - chart_area = std::string(",chartArea: {\ - backgroundColor: {\ - stroke: '#FF0000',\ - strokeWidth: 6\ - }\ - }"); - } - -} - -void addUpdtateTime(GoogleChart & cg) -{ - time_t t = time(0); // get time now - struct tm * now = localtime( & t ); - - std::stringstream str; - - str << "<h3>Updated: " << now->tm_mday << "/" << now->tm_mon + 1 << "/" << now->tm_year+1900 << " " << now->tm_hour << ":" << now->tm_min << ":" << now->tm_sec << std::endl; - - cg.addHTML(str.str()); -} - #include "Vector/performance/vector_dist_performance_util.hpp" #include "Vector/performance/verlet_performance_tests.hpp" #include "Vector/performance/cell_list_part_reorder.hpp"