Skip to content
Snippets Groups Projects
Commit 7bee2cbf authored by Pietro Incardona's avatar Pietro Incardona
Browse files

Fixing exmaple packing

parent 66b9b575
No related branches found
No related tags found
No related merge requests found
Pipeline #3970 failed
......@@ -4,68 +4,62 @@ include ../../example.mk
### internally the example disable with the preprocessor its code if not compiled with nvcc
CUDA_CC=
CUDA_CC_LINK=
CC=mpic++
ifdef HIP
CUDA_CC=hipcc
CUDA_OPTIONS=-D__NVCC__ -D__HIP__ -DCUDART_VERSION=11000 -D__CUDACC__ -D__CUDACC_VER_MAJOR__=11 -D__CUDACC_VER_MINOR__=0 -D__CUDACC_VER_BUILD__=0
CUDA_OPTIONS= -D__NVCC__ -D__HIP__ -DCUDART_VERSION=11000 -D__CUDACC__ -D__CUDACC_VER_MAJOR__=11 -D__CUDACC_VER_MINOR__=0 -D__CUDACC_VER_BUILD__=0
LIBS_SELECT=$(LIBS)
CC=hipcc
CUDA_CC_LINK=hipcc
CUDA_CC_LINK=hipcc
else
ifdef CUDA_ON_CPU
CUDA_CC=mpic++ -x c++ $(INCLUDE_PATH)
INCLUDE_PATH_NVCC=
CUDA_CC_LINK=mpic++
CUDA_OPTIONS=-D__NVCC__ -DCUDART_VERSION=11000 -fopenmp -O3 -ffast-math -march=native -mavx
LIBS_SELECT=$(LIBS)
CUDA_OPTIONS=-DCUDA_ON_CPU -D__NVCC__ -DCUDART_VERSION=11000
LIBS_SELECT=$(LIBS)
else
ifeq (, $(shell which nvcc))
CUDA_CC=mpic++ -x c++ $(INCLUDE_PATH)
INCLUDE_PATH_NVCC=
CUDA_CC_LINK=mpic++
CUDA_OPTIONS=
LIBS_SELECT=$(LIBS)
else
CUDA_CC=nvcc -ccbin=mpic++
CUDA_CC_LINK=nvcc -ccbin=mpic++
CUDA_OPTIONS=-use_fast_math -arch=sm_61 -lineinfo
LIBS_SELECT=$(LIBS_NVCC)
endif
endif
endif
ifeq ($(PROFILE),ON)
CUDA_CC=scorep --nocompiler --cuda --mpp=mpi nvcc -ccbin=mpic++
CUDA_CC_LINK=scorep --nocompiler --cuda --mpp=mpi nvcc -ccbin=mpic++
else
CUDA_CC:=$(CUDA_CC)
CUDA_CC_LINK:=$(CUDA_CC_LINK)
endif
CC=mpic++
LDIR =
OPT=
OBJ = main.o
miniBUDE:
sph_dlb:
sph_dlb_test: OPT += -DTEST_RUN
sph_dlb_test: sph_dlb
%.o: %.cu
$(CUDA_CC) $(OPT) $(CUDA_OPTIONS) -c --std=c++14 -o $@ $< $(INCLUDE_PATH_NVCC)
$(CUDA_CC) $(CUDA_OPTIONS) $(OPT) -g -c --std=c++14 -o $@ $< $(INCLUDE_PATH_NVCC)
%.o: %.cpp
$(CC) $(OPT) -c --std=c++14 -o $@ $< $(INCLUDE_PATH)
miniBUDE: $(OBJ)
$(CUDA_CC_LINK) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS_SELECT)
$(CC) -O3 $(OPT) -g -c --std=c++14 -o $@ $< $(INCLUDE_PATH)
sph_dlb: $(OBJ)
$(CUDA_CC_LINK) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS_SELECT)
all: miniBUDE
all: sph_dlb
run: miniBUDE
mpirun --oversubscribe -np 2 ./miniBUDE
run: sph_dlb_test
mpirun --oversubscribe -np 2 ./sph_dlb
.PHONY: clean all run
clean:
rm -f *.o *~ core miniBUDE
rm -f *.o *~ core sph_dlb
[pack]
files = main.cu Makefile
......@@ -75,7 +75,7 @@ double getTimestamp()
return tv.tv_usec + tv.tv_sec*1e6;
}
void printTimings(double start, double end, double poses_per_wi)
void printTimings(double start, double end, double poses_per_wi, openfpm::vector<double> & gflops_data)
{
double ms = ((end-start)/params.iterations)*1e-3;
......@@ -88,6 +88,8 @@ void printTimings(double start, double end, double poses_per_wi)
double flops = total_ops / runtime;
double gflops = flops / 1e9;
gflops_data.add(gflops);
double interactions =
(double)params.nposes
* (double)params.natlig
......@@ -608,41 +610,45 @@ int main(int argc, char *argv[])
printf("Deck : %s\n", params.deckDir);
float *resultsRef = (float *)malloc(params.nposes*sizeof(float));
runCUDA(_openfpm);
// We run the benchmark 30 times to get mean and variace
for (int i = 0 ; i < 30 ; i++)
{
runCUDA(_openfpm);
// Load reference results from file
FILE* ref_energies = openFile(params.deckDir, FILE_REF_ENERGIES, "r", NULL);
size_t n_ref_poses = params.nposes;
if (params.nposes > REF_NPOSES) {
printf("Only validating the first %d poses.\n", REF_NPOSES);
n_ref_poses = REF_NPOSES;
}
for (size_t i = 0; i < n_ref_poses; i++)
fscanf(ref_energies, "%f", &resultsRef[i]);
// Load reference results from file
FILE* ref_energies = openFile(params.deckDir, FILE_REF_ENERGIES, "r", NULL);
size_t n_ref_poses = params.nposes;
if (params.nposes > REF_NPOSES) {
printf("Only validating the first %d poses.\n", REF_NPOSES);
n_ref_poses = REF_NPOSES;
}
fclose(ref_energies);
for (size_t i = 0; i < n_ref_poses; i++)
fscanf(ref_energies, "%f", &resultsRef[i]);
float maxdiff = -100.0f;
printf("\n Reference CUDA (diff)\n");
for (int i = 0; i < n_ref_poses; i++)
{
if (fabs(resultsRef[i]) < 1.f && fabs(_openfpm.d_results.template get<0>(i)) < 1.f) continue;
fclose(ref_energies);
float diff = fabs(resultsRef[i] - _openfpm.d_results.template get<0>(i)) / _openfpm.d_results.template get<0>(i);
if (diff > maxdiff) {
maxdiff = diff;
// printf ("Maxdiff: %.2f (%.3f vs %.3f)\n", maxdiff, resultsRef[i], resultsCUDA[i]);
}
float maxdiff = -100.0f;
printf("\n Reference CUDA (diff)\n");
for (int i = 0; i < n_ref_poses; i++)
{
if (fabs(resultsRef[i]) < 1.f && fabs(_openfpm.d_results.template get<0>(i)) < 1.f) continue;
if (i < 8)
printf("%7.2f vs %7.2f (%5.2f%%)\n", resultsRef[i], _openfpm.d_results.template get<0>(i), 100*diff);
}
printf("\nLargest difference was %.3f%%\n\n", maxdiff*100);
float diff = fabs(resultsRef[i] - _openfpm.d_results.template get<0>(i)) / _openfpm.d_results.template get<0>(i);
if (diff > maxdiff) {
maxdiff = diff;
// printf ("Maxdiff: %.2f (%.3f vs %.3f)\n", maxdiff, resultsRef[i], resultsCUDA[i]);
}
free(resultsRef);
if (i < 8)
printf("%7.2f vs %7.2f (%5.2f%%)\n", resultsRef[i], _openfpm.d_results.template get<0>(i), 100*diff);
}
printf("\nLargest difference was %.3f%%\n\n", maxdiff*100);
free(resultsRef);
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment