diff --git a/openfpm_data b/openfpm_data index 8c459e2608db17c0ad8fd2a27d29e4de9b01cc2c..75b93fcc133165ba640eb0d1ab9c282d563f9c12 160000 --- a/openfpm_data +++ b/openfpm_data @@ -1 +1 @@ -Subproject commit 8c459e2608db17c0ad8fd2a27d29e4de9b01cc2c +Subproject commit 75b93fcc133165ba640eb0d1ab9c282d563f9c12 diff --git a/src/Grid/grid_dist_id_comm.hpp b/src/Grid/grid_dist_id_comm.hpp index ec045d96547d48608f483d5bb9fd266a526eeac7..27d23fb3122b9558ffefba7529e504f5d01b35f5 100644 --- a/src/Grid/grid_dist_id_comm.hpp +++ b/src/Grid/grid_dist_id_comm.hpp @@ -1017,106 +1017,135 @@ public: size_t req = 0; - // first we initialize the pack buffer on all internal grids + // Pack information + Pack_stat sts; - for (size_t i = 0 ; i < loc_grid.size() ; i++) - {loc_grid.get(i).packReset();} + // We check if skip labelling is possible in this condition + for (int i = 0 ; i < loc_grid.size() ; i++) + {opt &= (loc_grid.get(i).isSkipLabellingPossible())?(int)-1:~SKIP_LABELLING;} - // Calculating the size to pack all the data to send - for ( size_t i = 0 ; i < ig_box.size() ; i++ ) + if (!(opt & SKIP_LABELLING)) { - // for each ghost box - for (size_t j = 0 ; j < ig_box.get(i).bid.size() ; j++) + // first we initialize the pack buffer on all internal grids + + for (size_t i = 0 ; i < loc_grid.size() ; i++) + {loc_grid.get(i).packReset();} + + // Calculating the size to pack all the data to send + for ( size_t i = 0 ; i < ig_box.size() ; i++ ) { - // And linked sub-domain - size_t sub_id = ig_box.get(i).bid.get(j).sub; - // Internal ghost box - Box<dim,long int> g_ig_box = ig_box.get(i).bid.get(j).box; + // for each ghost box + for (size_t j = 0 ; j < ig_box.get(i).bid.size() ; j++) + { + // And linked sub-domain + size_t sub_id = ig_box.get(i).bid.get(j).sub; + // Internal ghost box + Box<dim,long int> g_ig_box = ig_box.get(i).bid.get(j).box; - if (g_ig_box.isValid() == false) - {continue;} + if (g_ig_box.isValid() == false) + {continue;} - g_ig_box -= gdb_ext.get(sub_id).origin.template convertPoint<size_t>(); + g_ig_box -= gdb_ext.get(sub_id).origin.template convertPoint<size_t>(); - // Pack a size_t for the internal ghost id - Packer<size_t,Memory>::packRequest(req); - // Create a sub grid iterator spanning the internal ghost layer - auto sub_it = loc_grid.get(sub_id).getIterator(g_ig_box.getKP1(),g_ig_box.getKP2()); + // Pack a size_t for the internal ghost id + Packer<size_t,Memory>::packRequest(req); + // Create a sub grid iterator spanning the internal ghost layer + auto sub_it = loc_grid.get(sub_id).getIterator(g_ig_box.getKP1(),g_ig_box.getKP2()); - // get the size to pack - Packer<device_grid,Memory>::template packRequest<decltype(sub_it),prp...>(loc_grid.get(sub_id),sub_it,req); + // get the size to pack + Packer<device_grid,Memory>::template packRequest<decltype(sub_it),prp...>(loc_grid.get(sub_id),sub_it,req); + } } - } - // Finalize calculation - for (size_t i = 0 ; i < loc_grid.size() ; i++) - {loc_grid.get(i).template packCalculate<prp ...>(req,v_cl.getmgpuContext());} + // Finalize calculation + for (size_t i = 0 ; i < loc_grid.size() ; i++) + {loc_grid.get(i).template packCalculate<prp ...>(req,v_cl.getmgpuContext());} - // resize the property buffer memory - g_send_prp_mem.resize(req); + // resize the property buffer memory + g_send_prp_mem.resize(req); - // Create an object of preallocated memory for properties - ExtPreAlloc<Memory> & prAlloc_prp = *(new ExtPreAlloc<Memory>(req,g_send_prp_mem)); + // Create an object of preallocated memory for properties + ExtPreAlloc<Memory> & prAlloc_prp = *(new ExtPreAlloc<Memory>(req,g_send_prp_mem)); - prAlloc_prp.incRef(); + prAlloc_prp.incRef(); - // Pack information - Pack_stat sts; + pointers.clear(); + pointers2.clear(); - pointers.clear(); - pointers2.clear(); + // Pack the information for each processor and send it + for ( size_t i = 0 ; i < ig_box.size() ; i++ ) + { - // Pack the information for each processor and send it - for ( size_t i = 0 ; i < ig_box.size() ; i++ ) - { + sts.mark(); - sts.mark(); + void * pointer; - void * pointer; + if (opt & RUN_ON_DEVICE) + {pointer = prAlloc_prp.getDevicePointerEnd();} + else + {pointer = prAlloc_prp.getPointerEnd();} - if (opt & RUN_ON_DEVICE) - {pointer = prAlloc_prp.getDevicePointerEnd();} - else - {pointer = prAlloc_prp.getPointerEnd();} + // for each ghost box + for (size_t j = 0 ; j < ig_box.get(i).bid.size() ; j++) + { + // we pack only if it is valid + if (ig_box.get(i).bid.get(j).box.isValid() == false) + continue; - // for each ghost box - for (size_t j = 0 ; j < ig_box.get(i).bid.size() ; j++) - { - // we pack only if it is valid - if (ig_box.get(i).bid.get(j).box.isValid() == false) - continue; + // And linked sub-domain + size_t sub_id = ig_box.get(i).bid.get(j).sub; + // Internal ghost box + Box<dim,size_t> g_ig_box = ig_box.get(i).bid.get(j).box; + g_ig_box -= gdb_ext.get(sub_id).origin.template convertPoint<size_t>(); + // Ghost box global id + size_t g_id = ig_box.get(i).bid.get(j).g_id; + + // Pack a size_t for the internal ghost id + Packer<size_t,Memory>::pack(prAlloc_prp,g_id,sts); + prAlloc_prp.hostToDevice(prAlloc_prp.getOffset(),prAlloc_prp.getOffsetEnd()); + // Create a sub grid iterator spanning the internal ghost layer + auto sub_it = loc_grid.get(sub_id).getIterator(g_ig_box.getKP1(),g_ig_box.getKP2()); + // and pack the internal ghost grid + Packer<device_grid,Memory>::template pack<decltype(sub_it),prp...>(prAlloc_prp,loc_grid.get(sub_id),sub_it,sts); + } + // send the request - // And linked sub-domain - size_t sub_id = ig_box.get(i).bid.get(j).sub; - // Internal ghost box - Box<dim,size_t> g_ig_box = ig_box.get(i).bid.get(j).box; - g_ig_box -= gdb_ext.get(sub_id).origin.template convertPoint<size_t>(); - // Ghost box global id - size_t g_id = ig_box.get(i).bid.get(j).g_id; + void * pointer2; - // Pack a size_t for the internal ghost id - Packer<size_t,Memory>::pack(prAlloc_prp,g_id,sts); - prAlloc_prp.hostToDevice(prAlloc_prp.getOffset(),prAlloc_prp.getOffsetEnd()); - // Create a sub grid iterator spanning the internal ghost layer - auto sub_it = loc_grid.get(sub_id).getIterator(g_ig_box.getKP1(),g_ig_box.getKP2()); - // and pack the internal ghost grid - Packer<device_grid,Memory>::template pack<decltype(sub_it),prp...>(prAlloc_prp,loc_grid.get(sub_id),sub_it,sts); - } - // send the request + if (opt & RUN_ON_DEVICE) + {pointer2 = prAlloc_prp.getDevicePointerEnd();} + else + {pointer2 = prAlloc_prp.getPointerEnd();} - void * pointer2; + pointers.add(pointer); + pointers2.add(pointer2); + } - if (opt & RUN_ON_DEVICE) - {pointer2 = prAlloc_prp.getDevicePointerEnd();} - else - {pointer2 = prAlloc_prp.getPointerEnd();} + for (size_t i = 0 ; i < loc_grid.size() ; i++) + { + rem_copy_opt opt_ = rem_copy_opt::NONE_OPT; + if (opt & SKIP_LABELLING == true) + {opt_ = rem_copy_opt::KEEP_GEOMETRY;} - pointers.add(pointer); - pointers2.add(pointer2); + loc_grid.get(i).template packFinalize<prp ...>(prAlloc_prp,sts,opt_,true); + } } + else + { + req = g_send_prp_mem.size(); - for (size_t i = 0 ; i < loc_grid.size() ; i++) - {loc_grid.get(i).template packFinalize<prp ...>(prAlloc_prp,sts);} + // Create an object of preallocated memory for properties + ExtPreAlloc<Memory> & prAlloc_prp = *(new ExtPreAlloc<Memory>(req,g_send_prp_mem)); + + for (size_t i = 0 ; i < loc_grid.size() ; i++) + { + rem_copy_opt opt_ = rem_copy_opt::NONE_OPT; + if (opt & SKIP_LABELLING) + {opt_ = rem_copy_opt::KEEP_GEOMETRY;} + + loc_grid.get(i).template packFinalize<prp ...>(prAlloc_prp,sts,opt_,true); + } + } for ( size_t i = 0 ; i < ig_box.size() ; i++ ) { diff --git a/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu b/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu index 0851a3e50776fc4587bbfac1884b1efabc856bae..b3db221c8dfa24ce3c1fe66b16baff1147ecca85 100644 --- a/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu +++ b/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu @@ -662,6 +662,13 @@ BOOST_AUTO_TEST_CASE( sgrid_gpu_test_skip_labelling ) typedef typename GetCpBlockType<decltype(gdist),0,1>::type CpBlockType; + gdist.template conv2<0,1,0,1,1>({0,0,0},{(int)sz[0]-1,(int)sz[1]-1,(int)sz[2]-1},[] __device__ (float & u_out, float & v_out, CpBlockType & u, CpBlockType & v,int i, int j, int k){ + u_out = 1*u(i,j,k); + v_out = 1*v(i,j,k); + }); + + gdist.template ghost_get<0,1>(RUN_ON_DEVICE | SKIP_LABELLING); + gdist.template conv2<0,1,0,1,1>({0,0,0},{(int)sz[0]-1,(int)sz[1]-1,(int)sz[2]-1},[] __device__ (float & u_out, float & v_out, CpBlockType & u, CpBlockType & v,int i, int j, int k){ u_out = 5*u(i,j,k); v_out = 5*v(i,j,k);