From ef8e58f243a09e4feb56e6196b69f8eab94a9a48 Mon Sep 17 00:00:00 2001 From: Incardona Pietro <incardon@mpi-cbg.de> Date: Wed, 22 Dec 2021 18:09:49 +0100 Subject: [PATCH] Fixing CUDIFY --- example/Performance/memBW/Makefile | 2 +- example/Performance/memBW/main.cu | 59 ++++++++++++++++--- openfpm_devices | 2 +- .../tests/sgrid_dist_id_gpu_unit_tests.cu | 1 + 4 files changed, 55 insertions(+), 9 deletions(-) diff --git a/example/Performance/memBW/Makefile b/example/Performance/memBW/Makefile index 329fcab0e..68543ae85 100644 --- a/example/Performance/memBW/Makefile +++ b/example/Performance/memBW/Makefile @@ -42,7 +42,7 @@ OBJ = main.o memBW: %.o: %.cu - $(CUDA_CC) -g -O3 $(CUDA_OPTIONS) $(OPT) -c --std=c++14 -o $@ $< $(INCLUDE_PATH_NVCC) + $(CUDA_CC) -O3 $(CUDA_OPTIONS) $(OPT) -c --std=c++14 -o $@ $< $(INCLUDE_PATH_NVCC) %.o: %.cpp $(CC) -g -O3 $(OPT) -g -c --std=c++14 -o $@ $< $(INCLUDE_PATH) diff --git a/example/Performance/memBW/main.cu b/example/Performance/memBW/main.cu index 87550d984..d7a1be566 100644 --- a/example/Performance/memBW/main.cu +++ b/example/Performance/memBW/main.cu @@ -53,18 +53,18 @@ __global__ void translate_fill_prop_write_array(float * vd_out_scal, { auto p = blockIdx.x * blockDim.x + threadIdx.x; - float a = vd_in_vec[p* + 0*stride]; - float b = vd_in_vec[p* + 1*stride]; + float a = vd_in_vec[p + 0*stride]; - vd_out_scal[p] = a + b; + vd_out_scal[p] = a; vd_out_vec[p + 0*stride] = a; - vd_out_vec[p + 1*stride] = b; + vd_out_vec[p + 1*stride] = a; vd_out_mat[p + 0*2*stride + 0*stride ] = a; - vd_out_mat[p + 0*2*stride + 1*stride ] = b; - vd_out_mat[p + 1*2*stride + 0*stride ] = a + b; - vd_out_mat[p + 1*2*stride + 1*stride ] = b - a; + vd_out_mat[p + 0*2*stride + 1*stride ] = a; + vd_out_mat[p + 1*2*stride + 0*stride ] = a; + vd_out_mat[p + 1*2*stride + 1*stride ] = a; + vd_in_vec[p + 1*stride] = a; } @@ -184,6 +184,18 @@ int main(int argc, char *argv[]) initialize_buf(in,out); + +for (int j = 0 ; j < 100 ; j++) +{ + + for (int i = 0 ; i < 16777216; i++) + { + out.get<2>(i)[1][0] = in.get<0>(i)[1]; + } +} + + return 0; + // Read write test with TLS auto ite = out.getGPUIterator(256); @@ -344,6 +356,38 @@ int main(int argc, char *argv[]) double dev_read_lamb = 0.0; standard_deviation(res,mean_read_lamb,dev_read_lamb); + // Array benchmark + + for (int i = 0 ; i < 110 ; i++) + { + cudaDeviceSynchronize(); + timer t; + t.start(); + + float * out_s = (float *)out.getDeviceBuffer<0>(); + float * out_v = (float *)out.getDeviceBuffer<1>(); + float * out_m = (float *)out.getDeviceBuffer<2>(); + float * in_v = (float *)in.getDeviceBuffer<0>(); + + CUDA_LAUNCH(translate_fill_prop_write_array,ite,out_s,out_v,out_m,in_v,out.capacity()); + + cudaDeviceSynchronize(); + + t.stop(); + + if (i >=10) + {res.get(i-10) = nele*4*9 / t.getwct() * 1e-9;} + + std::cout << "Time ARR: " << t.getwct() << std::endl; + std::cout << "BW ARR: " << nele*4*9 / t.getwct() * 1e-9 << " GB/s" << std::endl; + } + + double mean_write_arr = 0.0; + double dev_write_arr = 0.0; + standard_deviation(res,mean_write_arr,dev_write_arr); + + /////////////////// + #ifdef CUDIFY_USE_CUDA for (int i = 0 ; i < 110 ; i++) @@ -382,6 +426,7 @@ int main(int argc, char *argv[]) std::cout << "Average READ with lamb: " << mean_read_lamb << " deviation: " << dev_read_lamb << std::endl; std::cout << "Average WRITE with lamb: " << mean_write_lamb << " deviation: " << dev_write_lamb << std::endl; + std::cout << "Average WRITE with array: " << mean_write_arr << " deviation: " << dev_write_arr << std::endl; } #else diff --git a/openfpm_devices b/openfpm_devices index f95b31b6d..ccdd64f9d 160000 --- a/openfpm_devices +++ b/openfpm_devices @@ -1 +1 @@ -Subproject commit f95b31b6d0af0a3c60a2643840854feba90abb1e +Subproject commit ccdd64f9dee09116426e685ec44debfab255aa0a diff --git a/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu b/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu index cb5761cf1..e2485f245 100644 --- a/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu +++ b/src/Grid/tests/sgrid_dist_id_gpu_unit_tests.cu @@ -5,6 +5,7 @@ #include <boost/test/unit_test.hpp> #include "Grid/grid_dist_id.hpp" + BOOST_AUTO_TEST_SUITE( sgrid_gpu_test_suite ) template<unsigned int p> -- GitLab