diff --git a/example/Performance/memBW/main.cu b/example/Performance/memBW/main.cu index 0738bcf4f463785ef505655633888a8156b6a5d2..1b77af499c3855c609c45be3ddb4f90791bd7943 100644 --- a/example/Performance/memBW/main.cu +++ b/example/Performance/memBW/main.cu @@ -10,17 +10,17 @@ __global__ void translate_fill_prop_write(vector_type vd_out, vector_type2 vd_in auto p = blockIdx.x * blockDim.x + threadIdx.x; float a = vd_in.template get<0>(p)[0]; - float b = vd_in.template get<0>(p)[1]; - vd_out.template get<0>(p) = a + b; + vd_out.template get<0>(p) = a; vd_out.template get<1>(p)[0] = a; - vd_out.template get<1>(p)[1] = b; + vd_out.template get<1>(p)[1] = a; vd_out.template get<2>(p)[0][0] = a; - vd_out.template get<2>(p)[0][1] = b; - vd_out.template get<2>(p)[1][0] = a + b; - vd_out.template get<2>(p)[1][1] = b - a; + vd_out.template get<2>(p)[0][1] = a; + vd_out.template get<2>(p)[1][0] = a; + vd_out.template get<2>(p)[1][1] = a; + vd_in.template get<0>(p)[1] = a; } @@ -39,8 +39,8 @@ __global__ void translate_fill_prop_read(vector_type vd_out, vector_type2 vd_in) float f = vd_out.template get<2>(p)[1][0]; float g = vd_out.template get<2>(p)[1][1]; - vd_in.template get<0>(p)[0] = a+b+c+d; - vd_in.template get<0>(p)[1] = e+f+g; + float h = vd_in.template get<0>(p)[0]; + vd_in.template get<0>(p)[1] = a+b+c+d+e+f+g+h; } // Arrays