diff --git a/example/Performance/memBW/main.cu b/example/Performance/memBW/main.cu index 0738bcf4f463785ef505655633888a8156b6a5d2..2ea20b24ddd9bfba136bf3fb286b37aa9bd9a22e 100644 --- a/example/Performance/memBW/main.cu +++ b/example/Performance/memBW/main.cu @@ -5,7 +5,7 @@ //! Memory bandwidth with small calculations template<typename vector_type, typename vector_type2> -__global__ void translate_fill_prop_write(vector_type vd_out, vector_type2 vd_in) +inline __global__ void translate_fill_prop_write(vector_type vd_out, vector_type2 vd_in) { auto p = blockIdx.x * blockDim.x + threadIdx.x; @@ -25,7 +25,7 @@ __global__ void translate_fill_prop_write(vector_type vd_out, vector_type2 vd_in template<typename vector_type, typename vector_type2> -__global__ void translate_fill_prop_read(vector_type vd_out, vector_type2 vd_in) +inline __global__ void translate_fill_prop_read(vector_type vd_out, vector_type2 vd_in) { auto p = blockIdx.x * blockDim.x + threadIdx.x;