/* * Domain_icells_cart.hpp * * Created on: Apr 27, 2019 * Author: i-bird */ #ifndef DOMAIN_ICELLS_CART_HPP_ #define DOMAIN_ICELLS_CART_HPP_ #include "Vector/map_vector.hpp" #include "Space/Ghost.hpp" #include "NN/CellList/CellList.hpp" #include "NN/CellList/cuda/CellDecomposer_gpu_ker.cuh" #include "Vector/map_vector_sparse.hpp" #include #ifdef __NVCC__ template __global__ void insert_icell(vector_sparse_type vs, CellDecomposer_type cld, grid_key_dx start,grid_key_dx stop) { vs.init(); auto gk = grid_p::get_grid_point(cld.get_div_c()); unsigned int b = blockIdx.x + blockIdx.y * gridDim.x + blockIdx.z * gridDim.x * gridDim.y; for (unsigned int i = 0 ; i < dim ; i++) { gk.set_d(i,gk.get(i) + start.get(i)); if (gk.get(i) > stop.get(i)) {return;} } auto id = cld.LinId(gk); vs.insert_b(id,b); vs.flush_block_insert(b, threadIdx.x == 0 & threadIdx.y == 0 & threadIdx.z == 0 ); } template __global__ void insert_remove_icell(vector_sparse_type vs, vector_sparse_type vsi, CellDecomposer_type cld, grid_key_dx start,grid_key_dx stop) { vs.init(); vsi.init(); auto gk = grid_p::get_grid_point(cld.get_div_c()); unsigned int b = blockIdx.x + blockIdx.y * gridDim.x + blockIdx.z * gridDim.x * gridDim.y; for (unsigned int i = 0 ; i < dim ; i++) { gk.set_d(i,gk.get(i) + start.get(i)); if (gk.get(i) > stop.get(i)) {return;} } auto id = cld.LinId(gk); vs.insert_b(id,b); vsi.remove_b(id,b); vs.flush_block_insert(b, threadIdx.x == 0 & threadIdx.y == 0 & threadIdx.z == 0 ); vsi.flush_block_remove(b, threadIdx.x == 0 & threadIdx.y == 0 & threadIdx.z == 0); } #endif template class layout_base , typename Memory> class domain_icell_calculator { typedef unsigned int cnt_type; typedef int ids_type; openfpm::vector,Memory,typename layout_base>::type,layout_base> icells; openfpm::vector,Memory,typename layout_base>::type,layout_base> dcells; CellDecomposer_sm> cd; public: /*! \brief Calculate the subdomain that are in the skin part of the domain * \verbatim +---+---+---+---+---+---+ | 1 | 2 | 3 | 4 | 5 | 6 | +---+---+---+---+---+---+ |28 | | 7 | +---+ +---+ |27 | | 8 | +---+ +---+ |26 | | 9 | +---+ DOM1 +---+ |25 | |10 | +---+ +---+ |24 | |11 | +---+ +---+---+ |23 | |13 |12 | +---+-----------+---+---+ |22 | |14 | +---+ +---+ |21 | DOM2 |15 | +---+---+---+---+---+ |20 |19 |18 | 17|16 | +---+---+---+---+---+ <----- Domain end here | ^ | |_____________| \endverbatim * * It does it on GPU or CPU * */ template void CalculateInternalCells(VCluster_type & v_cl, openfpm::vector,Memory,typename layout_base>::type,layout_base> & ig_box, openfpm::vector,Memory,typename layout_base>::type,layout_base> & domain, Box & pbox, T r_cut, const Ghost & enlarge) { #ifdef __NVCC__ // Division array size_t div[dim]; // Calculate the parameters of the cell-list cl_param_calculate(pbox, div, r_cut, enlarge); openfpm::array spacing_c; openfpm::array div_c; openfpm::array off; for (size_t i = 0 ; i < dim ; i++) { spacing_c[i] = (pbox.getHigh(i) - pbox.getLow(i)) / div[i]; off[i] = 1; // div_c must include offset div_c[i] = div[i] + 2*off[i]; } shift_only t(Matrix::identity(),pbox.getP1()); CellDecomposer_gpu_ker> cld(spacing_c,div_c,off,t); grid_sm g = cld.getGrid(); cd.setDimensions(pbox,div,off[0]); openfpm::vector_sparse_gpu> vs; openfpm::vector_sparse_gpu> vsi; vs.template setBackground<0>(0); // insert Domain cells for (size_t i = 0 ; i < domain.size() ; i++) { Box bx = SpaceBox(domain.get(i)); auto pp2 = bx.getP2(); for (size_t j = 0 ; j < dim ; j++) {pp2.get(j) = std::nextafter(pp2.get(j),pp2.get(j) - static_cast(1.0));} auto p1 = cld.getCell(bx.getP1()); auto p2 = cld.getCell(pp2); auto ite = g.getGPUIterator(p1,p2,256); if (ite.wthr.x == 0) {continue;} vsi.setGPUInsertBuffer(ite.nblocks(),256); CUDA_LAUNCH((insert_icell),ite,vsi.toKernel(),cld,ite.start,p2); vsi.template flush<>(v_cl.getmgpuContext(),flush_type::FLUSH_ON_DEVICE); } // calculate the number of kernel launch for (size_t i = 0 ; i < ig_box.size() ; i++) { Box bx = ig_box.get(i); auto pp2 = bx.getP2(); for (size_t j = 0 ; j < dim ; j++) {pp2.get(j) = std::nextafter(pp2.get(j),pp2.get(j) - static_cast(1.0));} auto p1 = cld.getCell(bx.getP1()); auto p2 = cld.getCell(pp2); auto ite = g.getGPUIterator(p1,p2,256); if (ite.wthr.x == 0) {continue;} vs.setGPUInsertBuffer(ite.nblocks(),256); vsi.setGPURemoveBuffer(ite.nblocks(),256); CUDA_LAUNCH(insert_remove_icell,ite,vs.toKernel(),vsi.toKernel(),cld,ite.start,p2); vs.template flush<>(v_cl.getmgpuContext(),flush_type::FLUSH_ON_DEVICE); vsi.flush_remove(v_cl.getmgpuContext(),flush_type::FLUSH_ON_DEVICE); } vs.swapIndexVector(icells); vsi.swapIndexVector(dcells); #endif } /*! \brief Return the list of the internal cells * * \return the list of the internal cells * */ openfpm::vector,Memory,typename layout_base>::type,layout_base> & getIcells() { return icells; } /*! \brief Return the list of the internal cells * * \return the list of the internal cells * */ openfpm::vector,Memory,typename layout_base>::type,layout_base> & getDcells() { return dcells; } /*! \brief Given a cell index return the cell box * * \param ci cell index * * \return The box reppresenting the cell */ Box getBoxCell(unsigned int ci) { Box b; for (size_t i = 0 ; i < dim ; i++) { auto key = cd.getGrid().InvLinId(ci); Point p1 = cd.getOrig().get(i) - cd.getPadding(i)*cd.getCellBox().getHigh(i) ; b.setLow(i,p1.get(i) + key.get(i)*cd.getCellBox().getHigh(i)); b.setHigh(i,p1.get(i) + ((key.get(i) + 1)*cd.getCellBox().getHigh(i))); } return b; } /*! \brief Get the grid base information about this cell decomposition * * * \return the grid * */ const grid_sm & getGrid() { return cd.getGrid(); } }; #endif /* DOMAIN_ICELLS_CART_HPP_ */