diff --git a/openfpm_data b/openfpm_data index 1e67b751bf63de73c2d23da4793d71d1f662eaa6..9234d279fdd0735a93b01389e3a47a298e320f52 160000 --- a/openfpm_data +++ b/openfpm_data @@ -1 +1 @@ -Subproject commit 1e67b751bf63de73c2d23da4793d71d1f662eaa6 +Subproject commit 9234d279fdd0735a93b01389e3a47a298e320f52 diff --git a/openfpm_vcluster b/openfpm_vcluster index 96b8c774751903afedeabdd4d79dae43fce85510..2a56c313040d0fd816d396c3b75c651a894b5577 160000 --- a/openfpm_vcluster +++ b/openfpm_vcluster @@ -1 +1 @@ -Subproject commit 96b8c774751903afedeabdd4d79dae43fce85510 +Subproject commit 2a56c313040d0fd816d396c3b75c651a894b5577 diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4b44ec11f979368b3dd41e1dbbcdd37b387b9ef5..837aaf317eea18ab8339fb170b12a2c78863ac65 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -13,7 +13,8 @@ if(CUDA_FOUND OR CUDA_ON_CPU OR HIP_FOUND) Decomposition/cuda/decomposition_cuda_tests.cu Vector/cuda/vector_dist_gpu_unit_tests.cu Decomposition/cuda/Domain_icells_cart_unit_test.cu - Amr/tests/amr_base_gpu_unit_tests.cu) + Amr/tests/amr_base_gpu_unit_tests.cu + Grid/tests/grid_dist_id_unit_test.cu) endif() if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel") diff --git a/src/Grid/Iterators/grid_dist_id_iterator.hpp b/src/Grid/Iterators/grid_dist_id_iterator.hpp index 308567b31fff370d39ac7627b29548b65b03c940..fd8dbfa2fa0346b1305cd20d1ebeba5f0b0008b3 100644 --- a/src/Grid/Iterators/grid_dist_id_iterator.hpp +++ b/src/Grid/Iterators/grid_dist_id_iterator.hpp @@ -20,6 +20,8 @@ #include "SparseGridGpu/encap_num.hpp" #endif +#include "Grid/cuda/grid_dist_id_kernels.cuh" + template<unsigned int dim> struct launch_insert_sparse_lambda_call { @@ -183,6 +185,56 @@ struct launch_insert_sparse } }; +template<unsigned int dim> +struct launch_set_dense +{ + template<typename grid_type, typename ite_type, typename lambda_f2> + __device__ void operator()(grid_type & grid, ite_type itg, lambda_f2 f2) + { +#ifdef __NVCC__ + + printf("grid on GPU Dimension %d not implemented, yet\n",(int)dim); + +#endif + } +}; + +template<> +struct launch_set_dense<2> +{ + template<typename grid_type, typename ite_type, typename lambda_f2> + __device__ void operator()(grid_type & grid, ite_type itg, lambda_f2 f2) + { +#ifdef __NVCC__ + + GRID_ID_2_GLOBAL(itg); + + auto obj = grid.get_o(key); + + f2(obj,keyg.get(0),keyg.get(1)); + +#endif + } +}; + +template<> +struct launch_set_dense<3> +{ + template<typename grid_type, typename ite_type, typename lambda_f2> + __device__ void operator()(grid_type & grid, ite_type itg, lambda_f2 f2) + { +#ifdef __NVCC__ + + GRID_ID_3_GLOBAL(itg); + + auto obj = grid.get_o(key); + + f2(obj,keyg.get(0),keyg.get(1),keyg.get(2)); + +#endif + } +}; + template<bool is_free> struct selvg { diff --git a/src/Grid/cuda/grid_dist_id_iterator_gpu.cuh b/src/Grid/cuda/grid_dist_id_iterator_gpu.cuh index 66d1cbbdad2e316c22a311009a110cd122907de5..b3a322a393767148197f6def300c7254dcaa417d 100644 --- a/src/Grid/cuda/grid_dist_id_iterator_gpu.cuh +++ b/src/Grid/cuda/grid_dist_id_iterator_gpu.cuh @@ -77,28 +77,28 @@ class grid_dist_id_iterator_gpu * \return itself * */ - grid_dist_id_iterator_gpu<Decomposition,deviceGrids> & operator=(const grid_dist_id_iterator_gpu<Decomposition,deviceGrids> & tmp) - { - g_c = tmp.g_c; - gdb_ext = tmp.gdb_ext; +// grid_dist_id_iterator_gpu<Decomposition,deviceGrids> & operator=(const grid_dist_id_iterator_gpu<Decomposition,deviceGrids> & tmp) +// { +// g_c = tmp.g_c; +// gdb_ext = tmp.gdb_ext; - start = tmp.start; - stop = tmp.stop; - loc_grids = tmp.loc_grids; +// start = tmp.start; +// stop = tmp.stop; +// loc_grids = tmp.loc_grids; - return *this; - } +// return *this; +// } /*! \brief Copy constructor * * \param tmp iterator to copy * */ - grid_dist_id_iterator_gpu(const grid_dist_id_iterator_gpu<Decomposition,deviceGrids> & tmp) - :loc_grids(tmp.loc_grids) - { - this->operator=(tmp); - } +// grid_dist_id_iterator_gpu(const grid_dist_id_iterator_gpu<Decomposition,deviceGrids> & tmp) +// :loc_grids(tmp.loc_grids) +// { +// this->operator=(tmp); +// } /*! \brief Constructor of the distributed grid iterator * @@ -269,6 +269,7 @@ class grid_dist_id_iterator_gpu } } + /*! \brief Get the starting point of the sub-grid we are iterating * * \return the starting point diff --git a/src/Grid/cuda/grid_dist_id_kernels.cuh b/src/Grid/cuda/grid_dist_id_kernels.cuh index e01fd5199584642bf9a5fa2d4c98a83647985efc..1095325a0cc6cd35151eb5d2d5aeea439bfb2646 100644 --- a/src/Grid/cuda/grid_dist_id_kernels.cuh +++ b/src/Grid/cuda/grid_dist_id_kernels.cuh @@ -8,6 +8,8 @@ #ifndef GRID_DIST_ID_KERNELS_CUH_ #define GRID_DIST_ID_KERNELS_CUH_ +#include "config.h" + #ifdef CUDA_GPU template<unsigned int dim> diff --git a/src/Grid/grid_dist_id.hpp b/src/Grid/grid_dist_id.hpp index fd509d9fc8f41efa9e2aafb719af7db18c220642..2414d570f37cbec7f820fbeb7a5744e3e284a47a 100644 --- a/src/Grid/grid_dist_id.hpp +++ b/src/Grid/grid_dist_id.hpp @@ -1768,6 +1768,32 @@ public: #ifdef __NVCC__ + /*! \brief set existing points in the grid + * + * \param f2 lambda function to set points + */ + template<typename lambda_t2> + void setPoints(lambda_t2 f2) + { + auto it = getGridIteratorGPU(); + + it.template launch<1>(launch_set_dense<dim>(),f2); + } + + /*! \brief set point existing in the grid between start and stop + * + * \param start point + * \param stop point + * \param f2 lambda function to set points + */ + template<typename lambda_t2> + void setPoints(grid_key_dx<dim> k1, grid_key_dx<dim> k2, lambda_t2 f2) + { + auto it = getGridIteratorGPU(k1,k2); + + it.template launch<0>(launch_set_dense<dim>(),f2); + } + /*! \brief Insert point in the grid * * \param f1 lambda function to insert point @@ -1779,7 +1805,7 @@ public: auto it = getGridIteratorGPU(); it.setGPUInsertBuffer(1); - it.template launch<1>(launch_insert_sparse(),f1,f2); + it.template launch<0>(launch_insert_sparse(),f1,f2); } /*! \brief Insert point in the grid between start and stop @@ -3306,6 +3332,9 @@ using grid_dist_id_devg = grid_dist_id<dim,St,T,Decomposition,Memory,devg>; template<unsigned int dim, typename St, typename T, typename Memory = CudaMemory, typename Decomposition = CartDecomposition<dim,St,CudaMemory,memory_traits_inte> > using sgrid_dist_id_gpu = grid_dist_id<dim,St,T,Decomposition,Memory,SparseGridGpu<dim,T>>; +template<unsigned int dim, typename St, typename T, typename Memory = CudaMemory, typename Decomposition = CartDecomposition<dim,St,CudaMemory,memory_traits_inte> > +using grid_dist_id_gpu = grid_dist_id<dim,St,T,Decomposition,Memory,grid_gpu<dim,T>>; + template<unsigned int dim, typename St, typename T, typename Memory = CudaMemory, typename Decomposition = CartDecomposition<dim,St,CudaMemory,memory_traits_inte> > using sgrid_dist_sid_gpu = grid_dist_id<dim,St,T,Decomposition,Memory,SparseGridGpu<dim,T,default_edge<dim>::type::value,default_edge<dim>::tb::value,int>>; #endif diff --git a/src/Grid/tests/grid_dist_id_unit_test.cu b/src/Grid/tests/grid_dist_id_unit_test.cu new file mode 100644 index 0000000000000000000000000000000000000000..01770490120f6cd8986b253533fb357a3898c35a --- /dev/null +++ b/src/Grid/tests/grid_dist_id_unit_test.cu @@ -0,0 +1,57 @@ +#define BOOST_TEST_DYN_LINK +#include <boost/test/unit_test.hpp> + +#include "Point_test.hpp" +#include "Grid/grid_dist_id.hpp" +#include "data_type/aggregate.hpp" + +extern void print_test_v(std::string test, size_t sz); + +BOOST_AUTO_TEST_SUITE( grid_dist_id_test ) + + +BOOST_AUTO_TEST_CASE( grid_dist_id_gpu_test ) +{ + // Test grid periodic + +/* Box<3,float> domain({-1.0,-1.0,-1.0},{1.0,1.0,1.0}); + + Vcluster<> & v_cl = create_vcluster(); + + if ( v_cl.getProcessingUnits() > 32 ) + {return;} + + // grid size + size_t sz[3]; + sz[0] = 32; + sz[1] = 32; + sz[2] = 32; + + // Ghost + Ghost<3,long int> g(1); + + // periodicity + periodicity<3> pr = {{PERIODIC,PERIODIC,PERIODIC}}; + + // Distributed grid with id decomposition + grid_dist_id_gpu<3, float, aggregate<float, float>> g_dist(sz,domain,g,pr); + + Box<3,size_t> box({1,1,1},{30,30,30}); + auto it = g_dist.getGridIterator(box.getKP1(),box.getKP2()); + + float c = 5.0; + + typedef typename GetSetBlockType<decltype(g_dist)>::type BlockT; + + g_dist.setPoints(box.getKP1(),box.getKP2(), + [c] __device__ (BlockT & data, int i, int j, int k) + { + data.template get<0>() = c + i*i + j*j + k*k; + } + ); + */ + +} + + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/Vector/vector_dist.hpp b/src/Vector/vector_dist.hpp index 23677ace8892ddd08466767aecce40d39b9311ca..b795d9b043a870b759052ff7490334be66914f89 100644 --- a/src/Vector/vector_dist.hpp +++ b/src/Vector/vector_dist.hpp @@ -259,7 +259,9 @@ template<unsigned int dim, typename prop, typename Decomposition = CartDecomposition<dim,St>, typename Memory = HeapMemory, - template<typename> class layout_base = memory_traits_lin> + template<typename> class layout_base = memory_traits_lin, + typename vector_dist_pos = openfpm::vector<Point<dim, St>,Memory,layout_base>, + typename vector_dist_prop = openfpm::vector<prop,Memory,layout_base> > class vector_dist : public vector_dist_comm<dim,St,prop,Decomposition,Memory,layout_base>, #ifdef CUDA_GPU private vector_dist_ker_list<vector_dist_ker<dim,St,prop,layout_base>> @@ -280,17 +282,17 @@ private: //! Particle position vector, (It has 2 elements) the first has real particles assigned to a processor //! the second element contain unassigned particles - openfpm::vector<Point<dim, St>,Memory,layout_base> v_pos; + vector_dist_pos v_pos; //! Particle properties vector, (It has 2 elements) the first has real particles assigned to a processor //! the second element contain unassigned particles - openfpm::vector<prop,Memory,layout_base> v_prp; + vector_dist_prop v_prp; //! reordered v_pos buffer - openfpm::vector<prop,Memory,layout_base> v_prp_out; + vector_dist_prop v_prp_out; //! reordered v_prp buffer - openfpm::vector<Point<dim, St>,Memory,layout_base> v_pos_out; + vector_dist_pos v_pos_out; //! option used to create this vector size_t opt = 0; @@ -2606,8 +2608,8 @@ public: if ((opt & 0x0FFF0000) == CSV_WRITER) { // CSVWriter test - CSVWriter<openfpm::vector<Point<dim, St>,Memory,layout_base>, - openfpm::vector<prop,Memory,layout_base> > csv_writer; + CSVWriter<vector_dist_pos, + vector_dist_prop > csv_writer; std::string output = std::to_string(out + "_" + std::to_string(v_cl.getProcessUnitID()) + std::to_string(".csv")); @@ -2622,8 +2624,8 @@ public: ft = file_type::BINARY; // VTKWriter for a set of points - VTKWriter<boost::mpl::pair<openfpm::vector<Point<dim, St>,Memory,layout_base>, - openfpm::vector<prop,Memory,layout_base>>, + VTKWriter<boost::mpl::pair<vector_dist_pos, + vector_dist_prop>, VECTOR_POINTS> vtk_writer; vtk_writer.add(v_pos,v_prp,g_m); @@ -2699,8 +2701,8 @@ public: if ((opt & 0x0FFF0000) == CSV_WRITER) { // CSVWriter test - CSVWriter<openfpm::vector<Point<dim, St>,Memory,layout_base>, - openfpm::vector<prop,Memory,layout_base> > csv_writer; + CSVWriter<vector_dist_pos, + vector_dist_prop > csv_writer; std::string output = std::to_string(out + "_" + std::to_string(v_cl.getProcessUnitID()) + "_" + std::to_string(iteration) + std::to_string(".csv")); @@ -2715,8 +2717,8 @@ public: ft = file_type::BINARY; // VTKWriter for a set of points - VTKWriter<boost::mpl::pair<openfpm::vector<Point<dim, St>,Memory,layout_base>, - openfpm::vector<prop,Memory,layout_base>>, VECTOR_POINTS> vtk_writer; + VTKWriter<boost::mpl::pair<vector_dist_pos, + vector_dist_prop>, VECTOR_POINTS> vtk_writer; vtk_writer.add(v_pos,v_prp,g_m); std::string output = std::to_string(out + "_" + std::to_string(v_cl.getProcessUnitID()) + "_" + std::to_string(iteration) + std::to_string(".vtk")); @@ -2785,7 +2787,7 @@ public: * \return the particle position vector * */ - const openfpm::vector<Point<dim, St>,Memory,layout_base> & getPosVector() const + const vector_dist_pos & getPosVector() const { return v_pos; } @@ -2795,7 +2797,7 @@ public: * \return the particle position vector * */ - openfpm::vector<Point<dim, St>,Memory,layout_base> & getPosVector() + vector_dist_pos & getPosVector() { return v_pos; } @@ -2805,7 +2807,7 @@ public: * \return the particle property vector * */ - const openfpm::vector<prop,Memory,layout_base> & getPropVector() const + const vector_dist_prop & getPropVector() const { return v_prp; } @@ -2815,7 +2817,7 @@ public: * \return the particle property vector * */ - openfpm::vector<prop,Memory,layout_base> & getPropVector() + vector_dist_prop & getPropVector() { return v_prp; } @@ -2825,7 +2827,7 @@ public: * \return the particle position vector * */ - const openfpm::vector<Point<dim, St>,Memory,layout_base> & getPosVectorSort() const + const vector_dist_pos & getPosVectorSort() const { return v_pos_out; } @@ -2835,7 +2837,7 @@ public: * \return the particle position vector * */ - openfpm::vector<Point<dim, St>,Memory,layout_base> & getPosVectorSort() + vector_dist_pos & getPosVectorSort() { return v_pos_out; } @@ -2845,7 +2847,7 @@ public: * \return the particle property vector * */ - const openfpm::vector<prop,Memory,layout_base> & getPropVectorSort() const + const vector_dist_prop & getPropVectorSort() const { return v_prp_out; } @@ -2855,7 +2857,7 @@ public: * \return the particle property vector * */ - openfpm::vector<prop,Memory,layout_base> & getPropVectorSort() + vector_dist_prop & getPropVectorSort() { return v_prp_out; }