diff --git a/src/Grid/grid_dist_id.hpp b/src/Grid/grid_dist_id.hpp index 0c471f526fe5ce13edd63052fb3d67adb6e241da..0d24f5184e49cb8048a4a2cdfd727235ef30ae03 100644 --- a/src/Grid/grid_dist_id.hpp +++ b/src/Grid/grid_dist_id.hpp @@ -69,6 +69,11 @@ struct device_grid_copy { dg.insert_o(key_dst) = lg.get_o(key); } + + template<typename gdb_ext_type, typename loc_grid_type> + static void pre_load(gdb_ext_type & gdb_ext_old, loc_grid_type & loc_grid_old, gdb_ext_type & gdb_ext, loc_grid_type & loc_grid) + { + } }; template<typename e_src, typename e_dst, typename indexT> @@ -98,6 +103,21 @@ struct copy_all_prop_sparse } }; +struct ids_pl +{ + size_t id; + + bool operator<(const ids_pl & i) const + { + return id < i.id; + } + + bool operator==(const ids_pl & i) const + { + return id == i.id; + } +}; + template<> struct device_grid_copy<true> { @@ -112,6 +132,143 @@ struct device_grid_copy<true> boost::mpl::for_each_ref< boost::mpl::range_c<int,0,decltype(block_data_dst)::max_prop> >(cp); } + +/* + * + * + auto & v_cl = create_vcluster(); + auto & dg = loc_grid.get(0); + + typedef typename std::remove_reference<decltype(dg)>::type sparse_grid_type; + + grid_key_dx<sparse_grid_type::dims> kp1 = gdb_ext.get(0).Dbox.getKP1(); + + for (int j = 0 ; j < std::ceil(gdb_ext_old.size() / 32) + 1 ; j++) + { + int jbase = j*32; + int sz = gdb_ext_old.size() - jbase; + if (sz < 0) {break;} + else if (sz > 32) {sz = 32;} + dg.copyRemoveReset(); + + std::cout << "JBASE: " << jbase << " SZ: " << sz << std::endl; + + for (int i = 0 ; i < sz ; i++) + { + Box<sparse_grid_type::dims,long int> bx_dst; + Box<sparse_grid_type::dims,long int> bx_src; + for (int k = 0 ; k < sparse_grid_type::dims ; k++) + { + bx_dst.setLow(k,kp1.get(k) + gdb_ext_old.get(i+jbase).origin.get(k) + gdb_ext_old.get(i+jbase).Dbox.getKP1().get(k)); + bx_dst.setHigh(k,kp1.get(k) + gdb_ext_old.get(i+jbase).origin.get(k) + gdb_ext_old.get(i+jbase).Dbox.getKP2().get(k)); + + bx_src.setLow(k,gdb_ext_old.get(i+jbase).Dbox.getKP1().get(k)); + bx_src.setHigh(k,gdb_ext_old.get(i+jbase).Dbox.getKP2().get(k)); + } + + std::cout << jbase << " " << i << " SRC:" << gdb_ext_old.get(i+jbase).Dbox.toString() << " DST: " << bx_dst.toString() << " INDEX BUFFER: " << loc_grid_old.get(jbase + i).private_get_index_array().size() << std::endl; + loc_grid_old.get(jbase + i).template hostToDevice<0>(); + dg.copy_to(loc_grid_old.get(jbase + i),gdb_ext_old.get(jbase +i).Dbox,bx_dst); + } + + for (int i = 0 ; i < sz ; i++) + { + std::cout << "FINALIZE1" << std::endl; + loc_grid_old.get(jbase + i).template removeCopyToFinalize<0>(v_cl.getmgpuContext(), rem_copy_opt::PHASE1); + std::cout << "FINALIZE2" << std::endl; + loc_grid_old.get(jbase + i).template removeCopyToFinalize<0>(v_cl.getmgpuContext(), rem_copy_opt::PHASE2); + std::cout << "FINALIZE3" << std::endl; + loc_grid_old.get(jbase + i).template removeCopyToFinalize<0>(v_cl.getmgpuContext(), rem_copy_opt::PHASE3); + } + } + + * + * + */ + + template<typename gdb_ext_type, typename loc_grid_type> + static void pre_load(gdb_ext_type & gdb_ext_old, loc_grid_type & loc_grid_old, gdb_ext_type & gdb_ext, loc_grid_type & loc_grid) + { + auto & dg = loc_grid.get(0); + auto dlin = dg.getGrid(); + typedef typename std::remove_reference<decltype(dg)>::type sparse_grid_type; + + openfpm::vector<ids_pl> ids; + openfpm::vector<grid_key_dx<sparse_grid_type::dims>> gg; + + size_t sz[sparse_grid_type::dims]; + + for (int i = 0 ; i < sparse_grid_type::dims ; i++) + {sz[i] = 2;} + + grid_sm<sparse_grid_type::dims,void> gvoid(sz); + + grid_key_dx_iterator<sparse_grid_type::dims> it(gvoid); + while(it.isNext()) + { + auto key = it.get(); + + grid_key_dx<sparse_grid_type::dims> k; + for (int i = 0 ; i < sparse_grid_type::dims ; i++) + { + k.set_d(i,key.get(i)*dlin.getBlockEgdeSize()); + } + + gg.add(k); + ++it; + } + + + for (int i = 0 ; i < gdb_ext_old.size() ; i++) + { + + auto & lg = loc_grid_old.get(i); + auto & indexBuffer = lg.private_get_index_array(); + auto & dg = loc_grid.get(0); + + auto slin = loc_grid_old.get(i).getGrid(); + + grid_key_dx<sparse_grid_type::dims> kp1 = gdb_ext.get(0).Dbox.getKP1(); + + grid_key_dx<sparse_grid_type::dims> orig; + for (int j = 0 ; j < sparse_grid_type::dims ; j++) + { + orig.set_d(j,gdb_ext_old.get(i).origin.get(j)); + } + + for (int i = 0; i < indexBuffer.size() ; i++) + { + for (int ex = 0; ex < gg.size() ; ex++) { + auto key = slin.InvLinId(indexBuffer.template get<0>(i),0); + grid_key_dx<sparse_grid_type::dims> key_dst; + + for (int j = 0 ; j < sparse_grid_type::dims ; j++) + {key_dst.set_d(j,key.get(j) + orig.get(j) + kp1.get(j) + gg.get(ex).get(j));} + + typename sparse_grid_type::indexT_ bid; + int lid; + + dlin.LinId(key_dst,bid,lid); + + ids.add(); + ids.last().id = bid; + } + } + } + + ids.sort(); + ids.unique(); + + auto & indexBuffer = dg.private_get_index_array(); + auto & dataBuffer = dg.private_get_data_array(); + indexBuffer.reserve(ids.size()+8); + dataBuffer.reserve(ids.size()+8); + for (int i = 0 ; i < ids.size() ; i++) + { + auto & dg = loc_grid.get(0); + dg.insertBlockFlush(ids.get(i).id); + } + } }; /*! \brief This is a distributed grid @@ -3361,6 +3518,7 @@ public: } else { + device_grid_copy<device_grid::isCompressed()>::pre_load(gdb_ext_old,loc_grid_old,gdb_ext,loc_grid); for (int i = 0 ; i < gdb_ext_old.size() ; i++) { auto & lg = loc_grid_old.get(i); @@ -3376,15 +3534,15 @@ public: while (it_src.isNext()) { - auto key = it_src.get(); - grid_key_dx<dim> key_dst; + auto key = it_src.get(); + grid_key_dx<dim> key_dst; - for (int j = 0 ; j < dim ; j++) - {key_dst.set_d(j,key.get(j) + orig.get(j) + kp1.get(j));} + for (int j = 0 ; j < dim ; j++) + {key_dst.set_d(j,key.get(j) + orig.get(j) + kp1.get(j));} - device_grid_copy<device_grid::isCompressed()>::assign(key,key_dst,dg,lg); + device_grid_copy<device_grid::isCompressed()>::assign(key,key_dst,dg,lg); - ++it_src; + ++it_src; } } } diff --git a/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu b/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu index a0582fd4315f550d4dfc033511cf810af1d61636..830ca9bdbf9eca38f80a1304c9cc32493a996b07 100644 --- a/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu +++ b/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu @@ -187,6 +187,81 @@ BOOST_AUTO_TEST_CASE( sgrid_gpu_test_output ) #endif } +template<typename grid, typename box_type> +void check_sgrid(grid & gdist2, box_type & box, float c) +{ + bool match = true; + auto it2 = gdist2.getDomainIterator(); + + while (it2.isNext()) + { + auto p = it2.get(); + + auto key = it2.getGKey(p); + + auto p_xp1 = p.move(0,1); + auto p_xm1 = p.move(0,-1); + auto p_yp1 = p.move(1,1); + auto p_ym1 = p.move(1,-1); + + auto key_xp1 = key.move(0,1); + auto key_xm1 = key.move(0,-1); + auto key_yp1 = key.move(1,1); + auto key_ym1 = key.move(1,-1); + + if (box.isInside(key_xp1.toPoint())) + { + match &= gdist2.template get<0>(p_xp1) == c + key_xp1.get(0) + key_xp1.get(1); + + if (match == false) + { + std::cout << gdist2.template get<0>(p_xp1) << " " << c + key_xp1.get(0) + key_xp1.get(1) << std::endl; + std::cout << "1 " << p_xp1.getKey().toPoint().to_string() << " " << &gdist2.template get<0>(p_xp1) << std::endl; + break; + } + } + + if (box.isInside(key_xm1.toPoint())) + { + match &= gdist2.template get<0>(p_xm1) == c + key_xm1.get(0) + key_xm1.get(1); + + if (match == false) + { + std::cout << gdist2.template get<0>(p_xm1) << " " << c + key_xm1.get(0) + key_xm1.get(1) << std::endl; + std::cout << "2 " << key_xm1.to_string() << std::endl; + break; + } + } + + if (box.isInside(key_yp1.toPoint())) + { + match &= gdist2.template get<0>(p_yp1) == c + key_yp1.get(0) + key_yp1.get(1); + + if (match == false) + { + std::cout << gdist2.template get<0>(p_yp1) << " " << c + key_yp1.get(0) + key_yp1.get(1) << std::endl; + std::cout << "3 " << key_yp1.to_string() << std::endl; + break; + } + } + + if (box.isInside(key_ym1.toPoint())) + { + match &= gdist2.template get<0>(p_ym1) == c + key_ym1.get(0) + key_ym1.get(1); + + if (match == false) + { + std::cout << gdist2.template get<0>(p_ym1) << " " << c + key_ym1.get(0) + key_ym1.get(1) << std::endl; + std::cout << "4 " << key_ym1.to_string() << std::endl; + break; + } + } + + ++it2; + } + + BOOST_REQUIRE_EQUAL(match,true); +} BOOST_AUTO_TEST_CASE( sgrid_gpu_test_save_and_load ) { @@ -232,19 +307,23 @@ BOOST_AUTO_TEST_CASE( sgrid_gpu_test_save_and_load ) gdist.template deviceToHost<0>(); gdist.save("sgrid_gpu_output_hdf5"); + check_sgrid(gdist,box,c); // Now load sgrid_dist_id_gpu<2,float,aggregate<float,float>> gdist2(sz,domain,g,bc); gdist2.load("sgrid_gpu_output_hdf5"); + check_sgrid(gdist2,box,c); + gdist2.template hostToDevice<0>(); gdist2.template ghost_get<0,1>(RUN_ON_DEVICE); gdist2.deviceToHost<0,1>(); gdist.deviceToHost<0,1>(); + check_sgrid(gdist2,box,c); - bool match = true; +/* bool match = true; auto it2 = gdist2.getDomainIterator(); @@ -272,6 +351,7 @@ BOOST_AUTO_TEST_CASE( sgrid_gpu_test_save_and_load ) if (match == false) { std::cout << gdist.template get<0>(p_xp1) << " " << c + key_xp1.get(0) + key_xp1.get(1) << std::endl; + std::cout << key_xp1.to_string() << std::endl; break; } } @@ -283,6 +363,7 @@ BOOST_AUTO_TEST_CASE( sgrid_gpu_test_save_and_load ) if (match == false) { std::cout << gdist.template get<0>(p_xm1) << " " << c + key_xm1.get(0) + key_xm1.get(1) << std::endl; + std::cout << key_xm1.to_string() << std::endl; break; } } @@ -294,6 +375,7 @@ BOOST_AUTO_TEST_CASE( sgrid_gpu_test_save_and_load ) if (match == false) { std::cout << gdist.template get<0>(p_yp1) << " " << c + key_yp1.get(0) + key_yp1.get(1) << std::endl; + std::cout << key_yp1.to_string() << std::endl; break; } } @@ -305,6 +387,7 @@ BOOST_AUTO_TEST_CASE( sgrid_gpu_test_save_and_load ) if (match == false) { std::cout << gdist.template get<0>(p_ym1) << " " << c + key_ym1.get(0) + key_ym1.get(1) << std::endl; + std::cout << key_ym1.to_string() << std::endl; break; } } @@ -313,7 +396,7 @@ BOOST_AUTO_TEST_CASE( sgrid_gpu_test_save_and_load ) } - BOOST_REQUIRE_EQUAL(match,true); + BOOST_REQUIRE_EQUAL(match,true);*/ } void sgrid_ghost_get(size_t (& sz)[2],size_t (& sz2)[2])