Skip to content
Snippets Groups Projects
Commit 34449ccd authored by Pietro Incardona's avatar Pietro Incardona
Browse files

Adding some comment to the example

parent fc5810a3
No related branches found
No related tags found
No related merge requests found
......@@ -112,6 +112,9 @@
#define OPENMPI
//! \cond [using_openmpi] \endcond
//#define SCAN_WITH_CUB <------ MODERNGPU is broken on RTX use CUB library for scan
//#define EXTERNAL_SET_GPU <----- In case you want to distribute the GPUs differently from the default
#include "Vector/vector_dist.hpp"
//! \cond [kernel_translate_fill_prop] \endcond
......@@ -139,6 +142,20 @@ __global__ void translate_fill_prop(vector_type vd)
int main(int argc, char* argv[])
{
// OpenFPM GPU distribution
// OpenFPM by default select GPU 0 for process 0, gpu 1 for process 1 and so on ... . In case of multi-node is the same each node has
// has a group of processes and these group of processes are distributed across the available GPU on that node.
// If you want to override this behaviour use #define EXTERNAL_SET_GPU at the very beginning of the program and use
// cudaSetDevice to select the GPU for that particular process before openfpm_init
// Note: To get the process number do MPI_Init and and use the MPI_Comm_rank. VCluster is not available before openfpm_init
// A code snippet in case we want to skip GPU 0
// MPI_Init(&argc,&argv);
// int rank;
// MPI_Comm_rank(MPI_COMM_WORLD,&rank);
// cudaSetDevice(1+rank);
//! \cond [cpu_like_gpu] \endcond
// initialize the library
......
......@@ -40,8 +40,10 @@
#ifdef __NVCC__
#define PRINT_STACKTRACE
#define STOP_ON_ERROR
//#define STOP_ON_ERROR
#define OPENMPI
//#define SCAN_WITH_CUB <------ MODERNGPU is broken on RTX use CUB library for scan
//#define EXTERNAL_SET_GPU <----- In case you want to distribute the GPUs differently from the default
#include "Vector/vector_dist.hpp"
#include <math.h>
......@@ -698,6 +700,20 @@ inline void sensor_pressure(Vector & vd,
int main(int argc, char* argv[])
{
// OpenFPM GPU distribution
// OpenFPM by default select GPU 0 for process 0, gpu 1 for process 1 and so on ... . In case of multi-node is the same each node has
// has a group of processes and these group of processes are distributed across the available GPU on that node.
// If you want to override this behaviour use #define EXTERNAL_SET_GPU at the very beginning of the program and use
// cudaSetDevice to select the GPU for that particular process before openfpm_init
// Note: To get the process number do MPI_Init and and use the MPI_Comm_rank. VCluster is not available before openfpm_init
// A code snippet in case we want to skip GPU 0
// MPI_Init(&argc,&argv);
// int rank;
// MPI_Comm_rank(MPI_COMM_WORLD,&rank);
// cudaSetDevice(1+rank);
// initialize the library
openfpm_init(&argc,&argv);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment