diff --git a/example/Performance/memBW/main.cu b/example/Performance/memBW/main.cu
index 124fcae6dece83c199a3d12e1c9b12a8a1aa8d6c..dfa89210dba5cf670c09769914928bfc8ec4d860 100644
--- a/example/Performance/memBW/main.cu
+++ b/example/Performance/memBW/main.cu
@@ -22,8 +22,8 @@ inline __global__ void translate_fill_prop_write(vector_type vd_out, vector_type
 	vd_out.template get<2>(p)[1][0] = a + b;
 	vd_out.template get<2>(p)[1][1] = b - a;
 
-	vd_in.template get<0>(p)[0] += 0.01f;
-	vd_in.template get<0>(p)[1] += 0.01f;
+	vd_in.template get<0>(p)[0] += a;
+	vd_in.template get<0>(p)[1] += b;
 }
 
 
@@ -45,8 +45,8 @@ inline __global__ void translate_fill_prop_read(vector_type vd_out, vector_type2
 	float h = vd_in.template get<0>(p)[0];
     float i = vd_in.template get<0>(p)[1];
     
-	vd_in.template get<0>(p)[0] += a+b+c+d;
-	vd_in.template get<0>(p)[1] += e+f+g+h+i;
+	vd_in.template get<0>(p)[0] = a+b+c+d;
+	vd_in.template get<0>(p)[1] = e+f+g+h+i;
 }
 
 int main(int argc, char *argv[])
@@ -98,7 +98,7 @@ int main(int argc, char *argv[])
 
     for (int i = 0 ; i < 101 ; i++)
     {
-        cudaDeviceSynchronize();
+            cudaDeviceSynchronize();
             timer t;
             t.start();
 
@@ -110,10 +110,10 @@ int main(int argc, char *argv[])
             t.stop();
 
         if (i >=1)
-        {res.get(i-1) = nele*4*13 / t.getwct() * 1e-9;}
+        {res.get(i-1) = nele*4*11 / t.getwct() * 1e-9;}
 
             std::cout << "Time: " << t.getwct() << std::endl;
-            std::cout << "BW: " << nele*4*13 / t.getwct() * 1e-9 << " GB/s"  << std::endl;
+            std::cout << "BW: " << nele*4*11 / t.getwct() * 1e-9 << " GB/s"  << std::endl;
     }
 
     double mean_read = 0.0;