diff --git a/example/Performance/memBW/main.cu b/example/Performance/memBW/main.cu index b38b643bf1d40e75e154568935c5325311068953..2cc424229cb2c1be4981ed8a23de9474ae837e5b 100644 --- a/example/Performance/memBW/main.cu +++ b/example/Performance/memBW/main.cu @@ -143,7 +143,7 @@ void check_read(in_type & in, out_type & out) if (success == false) { - std::cout << "FAIL READ " << i << in.template get<0>(i)[1] << " != " << a+b+c+d+e+f+g+h << std::endl; + std::cout << "FAIL READ " << i << " " << in.template get<0>(i)[1] << " != " << a+b+c+d+e+f+g+h << std::endl; exit(1); } } @@ -202,6 +202,12 @@ int main(int argc, char *argv[]) check_write(in,out); + for (int i = 0 ; i < 16777216 ; i++) + { + in.template get<0>(i)[0] = i; + in.template get<0>(i)[1] = i+100.0; + } + for (int i = 0 ; i < 110 ; i++) { cudaDeviceSynchronize(); @@ -277,6 +283,12 @@ int main(int argc, char *argv[]) double dev_write_lamb = 0.0; standard_deviation(res,mean_write_lamb,dev_write_lamb); + for (int i = 0 ; i < 16777216 ; i++) + { + in.template get<0>(i)[0] = i; + in.template get<0>(i)[1] = i+100.0; + } + for (int i = 0 ; i < 110 ; i++) { cudaDeviceSynchronize();