diff --git a/src/Grid/grid_dist_id.hpp b/src/Grid/grid_dist_id.hpp index 12d4407012ed96281033b48cf678af42c3821797..4b5954dbb05438fcddecef3d7766325306c30e70 100644 --- a/src/Grid/grid_dist_id.hpp +++ b/src/Grid/grid_dist_id.hpp @@ -1805,7 +1805,7 @@ public: auto it = getGridIteratorGPU(); it.setGPUInsertBuffer(1); - it.template launch<0>(launch_insert_sparse(),f1,f2); + it.template launch<1>(launch_insert_sparse(),f1,f2); } /*! \brief Insert point in the grid between start and stop diff --git a/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu b/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu index e26743dfb8c54d8b0e91ae85fc3c4fe67d5fa636..432cc219110021621867a3e9d9d69f120f3bbce4 100644 --- a/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu +++ b/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu @@ -300,6 +300,69 @@ BOOST_AUTO_TEST_CASE( sgrid_gpu_test_ghost_get ) sgrid_ghost_get(sz2,sz4); } +BOOST_AUTO_TEST_CASE( sgrid_gpu_app_point_test_no_box ) +{ + size_t sz[3] = {75,75,75}; + periodicity<3> bc = {PERIODIC,PERIODIC,PERIODIC}; + + Ghost<3,long int> g(1); + + Box<3,float> domain({0.0,0.0,0.0},{1.0,1.0,1.0}); + + sgrid_dist_id_gpu<3,float,aggregate<float,float,float,float>> gdist(sz,domain,g,bc); + + gdist.template setBackgroundValue<0>(666); + gdist.template setBackgroundValue<1>(666); + gdist.template setBackgroundValue<2>(666); + gdist.template setBackgroundValue<3>(666); + + /////// GPU insert + flush + + Box<3,size_t> box({1,1,1},{sz[0],sz[1],sz[2]}); + + /////// GPU Run kernel + + float c = 5.0; + + typedef typename GetAddBlockType<decltype(gdist)>::type InsertBlockT; + + CudaMemory cmem; + cmem.allocate(sizeof(int)); + + *(int *)cmem.getPointer() = 0.0; + + cmem.hostToDevice(); + + int * cnt = (int *)cmem.getDevicePointer(); + + gdist.addPoints([cnt] __device__ (int i, int j, int k) + { + atomicAdd(cnt,1); + + return true; + }, + [c] __device__ (InsertBlockT & data, int i, int j, int k) + { + data.template get<0>() = c + i + j; + data.template get<1>() = c + 1000 + i + j; + } + ); + + gdist.template flush<smax_<0>,smax_<1>>(flush_type::FLUSH_ON_DEVICE); + gdist.template ghost_get<0,1>(RUN_ON_DEVICE); + + cmem.deviceToHost(); + + int cnt_host = *(int *)cmem.getPointer(); + + auto & v_cl = create_vcluster(); + + v_cl.sum(cnt_host); + v_cl.execute(); + + BOOST_REQUIRE_EQUAL(cnt_host,75*75*75); +} + BOOST_AUTO_TEST_CASE( sgrid_gpu_app_point_test ) { @@ -352,7 +415,6 @@ BOOST_AUTO_TEST_CASE( sgrid_gpu_app_point_test ) {atomicAdd(cnt,1);} else { - printf("%d %d %d \n",i,j,k); atomicAdd(cnt_out,1); }