diff --git a/example/Performance/memBW/Makefile b/example/Performance/memBW/Makefile index a5198f1860831028b6b5edcf6332dd03f03a0e51..c44bd35ded8d5919b5fbb70d5064a79054a71872 100644 --- a/example/Performance/memBW/Makefile +++ b/example/Performance/memBW/Makefile @@ -47,12 +47,12 @@ miniBUDE: %.o: %.cpp $(CC) -g -O3 $(OPT) -g -c --std=c++14 -o $@ $< $(INCLUDE_PATH) -miniBUDE: $(OBJ) +memBW: $(OBJ) $(CUDA_CC_LINK) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS_SELECT) -all: miniBUDE +all: memBW -run: miniBUDE +run: memBW mpirun --oversubscribe -np 2 ./miniBUDE .PHONY: clean all run diff --git a/example/Performance/memBW/main.cu b/example/Performance/memBW/main.cu index dd08ed890ae6ba734af24dc549f6697ed19728a8..07c516f7e2b08eaa5035ffae4c4d640a13cf1699 100644 --- a/example/Performance/memBW/main.cu +++ b/example/Performance/memBW/main.cu @@ -3,7 +3,7 @@ template<typename vector_type, typename vector_type2> -__attribute__((always_inline)) inline __global__ void translate_fill_prop(vector_type & vd_out, vector_type2 & vd_in) +inline __global__ void translate_fill_prop(vector_type vd_out, vector_type2 vd_in) { auto p = blockIdx.x * blockDim.x + threadIdx.x;