diff --git a/example/Performance/memBW/main.cu b/example/Performance/memBW/main.cu index 2ea20b24ddd9bfba136bf3fb286b37aa9bd9a22e..0738bcf4f463785ef505655633888a8156b6a5d2 100644 --- a/example/Performance/memBW/main.cu +++ b/example/Performance/memBW/main.cu @@ -5,7 +5,7 @@ //! Memory bandwidth with small calculations template<typename vector_type, typename vector_type2> -inline __global__ void translate_fill_prop_write(vector_type vd_out, vector_type2 vd_in) +__global__ void translate_fill_prop_write(vector_type vd_out, vector_type2 vd_in) { auto p = blockIdx.x * blockDim.x + threadIdx.x; @@ -25,7 +25,7 @@ inline __global__ void translate_fill_prop_write(vector_type vd_out, vector_type template<typename vector_type, typename vector_type2> -inline __global__ void translate_fill_prop_read(vector_type vd_out, vector_type2 vd_in) +__global__ void translate_fill_prop_read(vector_type vd_out, vector_type2 vd_in) { auto p = blockIdx.x * blockDim.x + threadIdx.x;