diff --git a/example/Performance/memBW/main.cu b/example/Performance/memBW/main.cu index 21dcbd5d05b7c01c943ed5c8c5acc943a02ab148..c1f1b92be1dad52315ee3aaf247ca37fc144c666 100644 --- a/example/Performance/memBW/main.cu +++ b/example/Performance/memBW/main.cu @@ -3,7 +3,7 @@ #include "Vector/map_vector.hpp" #include "util/stat/common_statistics.hpp" -#define NELEMENTS 67108864 +#define NELEMENTS 16777216 //! Memory bandwidth with small calculations template<typename vector_type, typename vector_type2> @@ -154,7 +154,7 @@ int main(int argc, char *argv[]) openfpm::vector<double> res; res.resize(100); -/* for (int i = 0 ; i < 110 ; i++) + for (int i = 0 ; i < 110 ; i++) { cudaDeviceSynchronize(); timer t; @@ -206,7 +206,7 @@ int main(int argc, char *argv[]) double dev_read_tls = 0.0; standard_deviation(res,mean_read_tls,dev_read_tls); - check_read(in,out);*/ + check_read(in,out); ////////////// @@ -407,7 +407,9 @@ int main(int argc, char *argv[]) check_read(in,out); - /////// BASE 1 core + /////// BASE 1 core (This is only valid on CPUs) + + #ifdef CUDIFY_USE_OPENMP for (int i = 0 ; i < 110 ; i++) { @@ -451,6 +453,8 @@ int main(int argc, char *argv[]) std::cout << "BW 1-CORE ARR: " << (double)nele*4*9 / t.getwct() * 1e-9 << " GB/s" << std::endl; } + #endif + /////////////////// #ifdef CUDIFY_USE_CUDA