Adding some comment to the example

34449ccd · Pietro Incardona · fc5810a3 · 34449ccd · 34449ccd
Commit 34449ccd authored 5 years ago by Pietro Incardona
--- a/example/Vector/1_gpu_first_step/main.cu
+++ b/example/Vector/1_gpu_first_step/main.cu
@@ -112,6 +112,9 @@
 #define OPENMPI
 //! \cond [using_openmpi] \endcond

+//#define SCAN_WITH_CUB <------ MODERNGPU is broken on RTX use CUB library for scan
+//#define EXTERNAL_SET_GPU <----- In case you want to distribute the GPUs differently from the default
+
 #include "Vector/vector_dist.hpp"

 //! \cond [kernel_translate_fill_prop] \endcond
@@ -139,6 +142,20 @@ __global__ void translate_fill_prop(vector_type vd)

 int main(int argc, char* argv[])
 {
+    // OpenFPM GPU distribution
+
+    // OpenFPM by default select GPU 0 for process 0, gpu 1 for process 1 and so on ... . In case of multi-node is the same each node has
+    // has a group of processes and these group of processes are distributed across the available GPU on that node.
+
+    // If you want to override this behaviour use #define EXTERNAL_SET_GPU at the very beginning of the program and use
+    // cudaSetDevice to select the GPU for that particular process before openfpm_init
+    // Note: To get the process number do MPI_Init and and use the MPI_Comm_rank. VCluster is not available before openfpm_init
+    // A code snippet in case we want to skip GPU 0
+    // MPI_Init(&argc,&argv);
+    // int rank;
+    // MPI_Comm_rank(MPI_COMM_WORLD,&rank);
+    // cudaSetDevice(1+rank);
+
 	//! \cond [cpu_like_gpu] \endcond

    // initialize the library

--- a/example/Vector/7_SPH_dlb_gpu_opt/main.cu
+++ b/example/Vector/7_SPH_dlb_gpu_opt/main.cu
@@ -40,8 +40,10 @@
 #ifdef __NVCC__

 #define PRINT_STACKTRACE
-#define STOP_ON_ERROR
+//#define STOP_ON_ERROR
 #define OPENMPI
+//#define SCAN_WITH_CUB <------ MODERNGPU is broken on RTX use CUB library for scan
+//#define EXTERNAL_SET_GPU <----- In case you want to distribute the GPUs differently from the default

 #include "Vector/vector_dist.hpp"
 #include <math.h>
@@ -698,6 +700,20 @@ inline void sensor_pressure(Vector & vd,

 int main(int argc, char* argv[])
 {
+    // OpenFPM GPU distribution
+
+    // OpenFPM by default select GPU 0 for process 0, gpu 1 for process 1 and so on ... . In case of multi-node is the same each node has
+    // has a group of processes and these group of processes are distributed across the available GPU on that node.
+
+    // If you want to override this behaviour use #define EXTERNAL_SET_GPU at the very beginning of the program and use
+    // cudaSetDevice to select the GPU for that particular process before openfpm_init
+    // Note: To get the process number do MPI_Init and and use the MPI_Comm_rank. VCluster is not available before openfpm_init
+    // A code snippet in case we want to skip GPU 0
+    // MPI_Init(&argc,&argv);
+    // int rank;
+    // MPI_Comm_rank(MPI_COMM_WORLD,&rank);
+    // cudaSetDevice(1+rank);
+
    // initialize the library
 	openfpm_init(&argc,&argv);