From 34449ccdbb6bd4862725240d83b7e861a7892b09 Mon Sep 17 00:00:00 2001
From: Pietro Incardona <incardon@mpi-cbg.de>
Date: Thu, 16 May 2019 17:03:05 +0200
Subject: [PATCH] Adding some comment to the example

---
 example/Vector/1_gpu_first_step/main.cu  | 17 +++++++++++++++++
 example/Vector/7_SPH_dlb_gpu_opt/main.cu | 18 +++++++++++++++++-
 2 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/example/Vector/1_gpu_first_step/main.cu b/example/Vector/1_gpu_first_step/main.cu
index a648b975f..cedab9c86 100644
--- a/example/Vector/1_gpu_first_step/main.cu
+++ b/example/Vector/1_gpu_first_step/main.cu
@@ -112,6 +112,9 @@
 #define OPENMPI
 //! \cond [using_openmpi] \endcond
 
+//#define SCAN_WITH_CUB <------ MODERNGPU is broken on RTX use CUB library for scan
+//#define EXTERNAL_SET_GPU <----- In case you want to distribute the GPUs differently from the default
+
 #include "Vector/vector_dist.hpp"
 
 //! \cond [kernel_translate_fill_prop] \endcond
@@ -139,6 +142,20 @@ __global__ void translate_fill_prop(vector_type vd)
 
 int main(int argc, char* argv[])
 {
+    // OpenFPM GPU distribution
+
+    // OpenFPM by default select GPU 0 for process 0, gpu 1 for process 1 and so on ... . In case of multi-node is the same each node has
+    // has a group of processes and these group of processes are distributed across the available GPU on that node.
+
+    // If you want to override this behaviour use #define EXTERNAL_SET_GPU at the very beginning of the program and use
+    // cudaSetDevice to select the GPU for that particular process before openfpm_init
+    // Note: To get the process number do MPI_Init and and use the MPI_Comm_rank. VCluster is not available before openfpm_init
+    // A code snippet in case we want to skip GPU 0
+    // MPI_Init(&argc,&argv);
+    // int rank;
+    // MPI_Comm_rank(MPI_COMM_WORLD,&rank);
+    // cudaSetDevice(1+rank);
+
 	//! \cond [cpu_like_gpu] \endcond
 
     // initialize the library
diff --git a/example/Vector/7_SPH_dlb_gpu_opt/main.cu b/example/Vector/7_SPH_dlb_gpu_opt/main.cu
index 9f43f97fb..3da62650f 100644
--- a/example/Vector/7_SPH_dlb_gpu_opt/main.cu
+++ b/example/Vector/7_SPH_dlb_gpu_opt/main.cu
@@ -40,8 +40,10 @@
 #ifdef __NVCC__
 
 #define PRINT_STACKTRACE
-#define STOP_ON_ERROR
+//#define STOP_ON_ERROR
 #define OPENMPI
+//#define SCAN_WITH_CUB <------ MODERNGPU is broken on RTX use CUB library for scan
+//#define EXTERNAL_SET_GPU <----- In case you want to distribute the GPUs differently from the default
 
 #include "Vector/vector_dist.hpp"
 #include <math.h>
@@ -698,6 +700,20 @@ inline void sensor_pressure(Vector & vd,
 
 int main(int argc, char* argv[])
 {
+    // OpenFPM GPU distribution
+
+    // OpenFPM by default select GPU 0 for process 0, gpu 1 for process 1 and so on ... . In case of multi-node is the same each node has
+    // has a group of processes and these group of processes are distributed across the available GPU on that node.
+
+    // If you want to override this behaviour use #define EXTERNAL_SET_GPU at the very beginning of the program and use
+    // cudaSetDevice to select the GPU for that particular process before openfpm_init
+    // Note: To get the process number do MPI_Init and and use the MPI_Comm_rank. VCluster is not available before openfpm_init
+    // A code snippet in case we want to skip GPU 0
+    // MPI_Init(&argc,&argv);
+    // int rank;
+    // MPI_Comm_rank(MPI_COMM_WORLD,&rank);
+    // cudaSetDevice(1+rank);
+
     // initialize the library
 	openfpm_init(&argc,&argv);
 
-- 
GitLab