diff --git a/example/Vector/1_gpu_first_step/main.cu b/example/Vector/1_gpu_first_step/main.cu index a648b975fe6e14a76e805d6682442840dc29fe99..cedab9c868ad1b728563444aaa212e4001fda76a 100644 --- a/example/Vector/1_gpu_first_step/main.cu +++ b/example/Vector/1_gpu_first_step/main.cu @@ -112,6 +112,9 @@ #define OPENMPI //! \cond [using_openmpi] \endcond +//#define SCAN_WITH_CUB <------ MODERNGPU is broken on RTX use CUB library for scan +//#define EXTERNAL_SET_GPU <----- In case you want to distribute the GPUs differently from the default + #include "Vector/vector_dist.hpp" //! \cond [kernel_translate_fill_prop] \endcond @@ -139,6 +142,20 @@ __global__ void translate_fill_prop(vector_type vd) int main(int argc, char* argv[]) { + // OpenFPM GPU distribution + + // OpenFPM by default select GPU 0 for process 0, gpu 1 for process 1 and so on ... . In case of multi-node is the same each node has + // has a group of processes and these group of processes are distributed across the available GPU on that node. + + // If you want to override this behaviour use #define EXTERNAL_SET_GPU at the very beginning of the program and use + // cudaSetDevice to select the GPU for that particular process before openfpm_init + // Note: To get the process number do MPI_Init and and use the MPI_Comm_rank. VCluster is not available before openfpm_init + // A code snippet in case we want to skip GPU 0 + // MPI_Init(&argc,&argv); + // int rank; + // MPI_Comm_rank(MPI_COMM_WORLD,&rank); + // cudaSetDevice(1+rank); + //! \cond [cpu_like_gpu] \endcond // initialize the library diff --git a/example/Vector/7_SPH_dlb_gpu_opt/main.cu b/example/Vector/7_SPH_dlb_gpu_opt/main.cu index 9f43f97fbad0132a7b3268185f5e051fd3c53908..3da62650fe451423ac9070018b51e486a529b185 100644 --- a/example/Vector/7_SPH_dlb_gpu_opt/main.cu +++ b/example/Vector/7_SPH_dlb_gpu_opt/main.cu @@ -40,8 +40,10 @@ #ifdef __NVCC__ #define PRINT_STACKTRACE -#define STOP_ON_ERROR +//#define STOP_ON_ERROR #define OPENMPI +//#define SCAN_WITH_CUB <------ MODERNGPU is broken on RTX use CUB library for scan +//#define EXTERNAL_SET_GPU <----- In case you want to distribute the GPUs differently from the default #include "Vector/vector_dist.hpp" #include <math.h> @@ -698,6 +700,20 @@ inline void sensor_pressure(Vector & vd, int main(int argc, char* argv[]) { + // OpenFPM GPU distribution + + // OpenFPM by default select GPU 0 for process 0, gpu 1 for process 1 and so on ... . In case of multi-node is the same each node has + // has a group of processes and these group of processes are distributed across the available GPU on that node. + + // If you want to override this behaviour use #define EXTERNAL_SET_GPU at the very beginning of the program and use + // cudaSetDevice to select the GPU for that particular process before openfpm_init + // Note: To get the process number do MPI_Init and and use the MPI_Comm_rank. VCluster is not available before openfpm_init + // A code snippet in case we want to skip GPU 0 + // MPI_Init(&argc,&argv); + // int rank; + // MPI_Comm_rank(MPI_COMM_WORLD,&rank); + // cudaSetDevice(1+rank); + // initialize the library openfpm_init(&argc,&argv);