Commit 34449ccd authored by incardon's avatar incardon

Adding some comment to the example

parent fc5810a3
......@@ -112,6 +112,9 @@
#define OPENMPI
//! \cond [using_openmpi] \endcond
//#define SCAN_WITH_CUB <------ MODERNGPU is broken on RTX use CUB library for scan
//#define EXTERNAL_SET_GPU <----- In case you want to distribute the GPUs differently from the default
#include "Vector/vector_dist.hpp"
//! \cond [kernel_translate_fill_prop] \endcond
......@@ -139,6 +142,20 @@ __global__ void translate_fill_prop(vector_type vd)
int main(int argc, char* argv[])
{
// OpenFPM GPU distribution
// OpenFPM by default select GPU 0 for process 0, gpu 1 for process 1 and so on ... . In case of multi-node is the same each node has
// has a group of processes and these group of processes are distributed across the available GPU on that node.
// If you want to override this behaviour use #define EXTERNAL_SET_GPU at the very beginning of the program and use
// cudaSetDevice to select the GPU for that particular process before openfpm_init
// Note: To get the process number do MPI_Init and and use the MPI_Comm_rank. VCluster is not available before openfpm_init
// A code snippet in case we want to skip GPU 0
// MPI_Init(&argc,&argv);
// int rank;
// MPI_Comm_rank(MPI_COMM_WORLD,&rank);
// cudaSetDevice(1+rank);
//! \cond [cpu_like_gpu] \endcond
// initialize the library
......
......@@ -40,8 +40,10 @@
#ifdef __NVCC__
#define PRINT_STACKTRACE
#define STOP_ON_ERROR
//#define STOP_ON_ERROR
#define OPENMPI
//#define SCAN_WITH_CUB <------ MODERNGPU is broken on RTX use CUB library for scan
//#define EXTERNAL_SET_GPU <----- In case you want to distribute the GPUs differently from the default
#include "Vector/vector_dist.hpp"
#include <math.h>
......@@ -698,6 +700,20 @@ inline void sensor_pressure(Vector & vd,
int main(int argc, char* argv[])
{
// OpenFPM GPU distribution
// OpenFPM by default select GPU 0 for process 0, gpu 1 for process 1 and so on ... . In case of multi-node is the same each node has
// has a group of processes and these group of processes are distributed across the available GPU on that node.
// If you want to override this behaviour use #define EXTERNAL_SET_GPU at the very beginning of the program and use
// cudaSetDevice to select the GPU for that particular process before openfpm_init
// Note: To get the process number do MPI_Init and and use the MPI_Comm_rank. VCluster is not available before openfpm_init
// A code snippet in case we want to skip GPU 0
// MPI_Init(&argc,&argv);
// int rank;
// MPI_Comm_rank(MPI_COMM_WORLD,&rank);
// cudaSetDevice(1+rank);
// initialize the library
openfpm_init(&argc,&argv);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment