diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 253cccd8c..3da3b9819 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -15,7 +15,12 @@ before_script: echo " PICLAS_SPLIT_TYPE=OMPI_COMM_TYPE_CORE. Splitting shared memory domains on processor-level!"; export PICLAS_SPLIT_TYPE=OMPI_COMM_TYPE_CORE; else - echo "Splitting shared memory domains on node-level! Set variable DO_CORE_SPLIT=T to force core-level shared memory splitting for all regression tests."; + if [ -n "${DO_NODE_SPLIT}" ]; then + echo " PICLAS_SPLIT_TYPE=PICLAS_COMM_TYPE_NODE. Splitting shared memory domains on sub-node-level with 2 cores per node!"; + export PICLAS_SPLIT_TYPE=PICLAS_COMM_TYPE_NODE; + else + echo "Splitting shared memory domains on node-level! Set variable DO_CORE_SPLIT=T to force core-level OR DO_NODE_SPLIT=T to force sub-node-level shared memory splitting for all regression tests."; + fi fi # ---------------------------------------------------------------------------------------------------------------------------------------------------- # Stages diff --git a/docs/documentation/userguide/workflow.md b/docs/documentation/userguide/workflow.md index 18e8be75c..c2811b80b 100644 --- a/docs/documentation/userguide/workflow.md +++ b/docs/documentation/userguide/workflow.md @@ -40,6 +40,14 @@ the CMake configuration file for HDF5 (optional). output files into the VTK format * ``POSTI_USE_PARAVIEW``: Enables the compilation of the ParaView plugin, which enables the direct read-in of output files within ParaView +* ``PICLAS_SHARED_MEMORY``: Split type for creating new communicators based on colors and keys (requires MPI 3 or higher). + Options with the prefix OMPI_ are specific to Open MPI. + * ``MPI_COMM_TYPE_SHARED``: creates one shared memory domain per physical node (default) + * ``OMPI_COMM_TYPE_CORE``: creates one shared memory domain per MPI thread + * ``PICLAS_COMM_TYPE_NODE``: creates one shared memory domain per X numbers of MPI threads defined by ``PICLAS_SHARED_MEMORY_CORES`` + * ``PICLAS_SHARED_MEMORY_CORES``: Number of MPI threads per virtual node (default is 2). Assumes that all MPI threads run on the + same physical node. + (sec:solver-settings)= ## Solver settings diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 992e93df8..269f83cdd 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -43,18 +43,34 @@ SET_PROPERTY(CACHE PICLAS_TIMEDISCMETHOD PROPERTY STRINGS Euler-Explicit # ========================================================================= # Shared memory region splitting # ========================================================================= -# Get environment variable +# Get environment variable, e.g. PICLAS_SPLIT_TYPE=OMPI_COMM_TYPE_CORE SET(PICLAS_SPLIT_TYPE "$ENV{PICLAS_SPLIT_TYPE}") IF("${PICLAS_SPLIT_TYPE}" STREQUAL "") SET(PICLAS_SPLIT_TYPE "MPI_COMM_TYPE_SHARED") ELSE() - IF("${PICLAS_SPLIT_TYPE}" STREQUAL "${PICLAS_SHARED_MEMORY}") - MESSAGE(STATUS "Using user-defined environment variable PICLAS_SPLIT_TYPE=${PICLAS_SPLIT_TYPE} for shared memory communicator splitting of variale PICLAS_SHARED_MEMORY") - ENDIF() + #IF("${PICLAS_SPLIT_TYPE}" STREQUAL "${PICLAS_SHARED_MEMORY}") + MESSAGE(STATUS "Using user-defined environment variable [PICLAS_SPLIT_TYPE = ${PICLAS_SPLIT_TYPE}] for shared memory communicator splitting. Setting [PICLAS_SHARED_MEMORY = ${PICLAS_SPLIT_TYPE}]") + #ENDIF() ENDIF() SET(PICLAS_SHARED_MEMORY "${PICLAS_SPLIT_TYPE}" CACHE STRING "Split type for creating new communicators based on colors and keys (requires MPI 3 or higher). Options with the prefix OMPI_ are specific to Open MPI.") SET_PROPERTY(CACHE PICLAS_SHARED_MEMORY PROPERTY STRINGS MPI_COMM_TYPE_SHARED - OMPI_COMM_TYPE_CORE) + OMPI_COMM_TYPE_CORE + PICLAS_COMM_TYPE_NODE) + +ADD_DEFINITIONS(-DSharedMemoryMethod=${PICLAS_SHARED_MEMORY}) +IF(PICLAS_SHARED_MEMORY STREQUAL "MPI_COMM_TYPE_SHARED") + UNSET(PICLAS_SHARED_MEMORY_CORES CACHE) + ADD_DEFINITIONS(-DCORE_SPLIT=0) + MESSAGE(STATUS "Shared memory split type for subcommunicators set to node-level") +ELSEIF(PICLAS_SHARED_MEMORY STREQUAL "PICLAS_COMM_TYPE_NODE") + SET(PICLAS_SHARED_MEMORY_CORES "2" CACHE STRING "Number of cores per node when setting PICLAS_SHARED_MEMORY=PICLAS_COMM_TYPE_NODE. All cores must be on the same physical node!") + ADD_DEFINITIONS(-DCORE_SPLIT=${PICLAS_SHARED_MEMORY_CORES}) + MESSAGE(STATUS "Shared memory split type for subcommunicators set to sub-node-level with user-specific value [PICLAS_SHARED_MEMORY_CORES = ${PICLAS_SHARED_MEMORY_CORES}] cores per node") +ELSEIF(PICLAS_SHARED_MEMORY STREQUAL "OMPI_COMM_TYPE_CORE") + UNSET(PICLAS_SHARED_MEMORY_CORES CACHE) + ADD_DEFINITIONS(-DCORE_SPLIT=1) + MESSAGE(STATUS "Shared memory split type for subcommunicators set to core-level") +ENDIF() # ========================================================================= # MISC @@ -306,18 +322,6 @@ ELSE() ADD_DEFINITIONS(-DUSE_HDG=0) ENDIF(PICLAS_HDG) -# ========================================================================= -# Shared memory split type -# ========================================================================= -ADD_DEFINITIONS(-DSharedMemoryMethod=${PICLAS_SHARED_MEMORY}) -IF(PICLAS_SHARED_MEMORY STREQUAL "MPI_COMM_TYPE_SHARED") - ADD_DEFINITIONS(-DUSE_CORE_SPLIT=0) - MESSAGE(STATUS "Shared memory split type for subcommunicators set to node-level") -ELSEIF(PICLAS_SHARED_MEMORY STREQUAL "OMPI_COMM_TYPE_CORE") - ADD_DEFINITIONS(-DUSE_CORE_SPLIT=1) - MESSAGE(STATUS "Shared memory split type for subcommunicators set to core-level") -ENDIF() - # ======================================================================== # LOADBALANCE # ========================================================================= diff --git a/src/loadbalance/loaddistribution.f90 b/src/loadbalance/loaddistribution.f90 index bf068e887..ff7ae7308 100644 --- a/src/loadbalance/loaddistribution.f90 +++ b/src/loadbalance/loaddistribution.f90 @@ -1279,11 +1279,11 @@ SUBROUTINE WriteElemTimeStatistics(WriteHeader,time_opt,iter_opt) ! Convert kB to GB memory=memory/1048576. -#if USE_CORE_SPLIT +#if ! (CORE_SPLIT==0) ! When core-level splitting is used, it is not clear how many cores are on the same physical compute node. ! Therefore, the values are set to -1. memory(2:3) = -1. -#endif /*USE_CORE_SPLIT*/ +#endif /*! (CORE_SPLIT==0)*/ ! Either create new file or add info to existing file !> create new file diff --git a/src/mpi/mpi.f90 b/src/mpi/mpi.f90 index 61f05514b..3462f2a31 100644 --- a/src/mpi/mpi.f90 +++ b/src/mpi/mpi.f90 @@ -143,8 +143,9 @@ SUBROUTINE InitMPIvars() USE MOD_Globals USE MOD_PreProc USE MOD_MPI_Vars -USE MOD_Interpolation_Vars,ONLY:InterpolationInitIsDone -USE MOD_Readintools, ONLY:GETINT +USE MOD_Interpolation_Vars ,ONLY: InterpolationInitIsDone +USE MOD_Readintools ,ONLY: GETINT +USE MOD_MPI_Shared_Vars ,ONLY: nProcessors_Global ! IMPLICIT VARIABLE HANDLING IMPLICIT NONE !----------------------------------------------------------------------------------------------------------------------------------- @@ -155,11 +156,7 @@ SUBROUTINE InitMPIvars() ! LOCAL VARIABLES INTEGER :: color,groupsize !=================================================================================================================================== -IF(.NOT.InterpolationInitIsDone)THEN - CALL Abort(& - __STAMP__& - ,'InitMPITypes called before InitInterpolation') -END IF +IF(.NOT.InterpolationInitIsDone) CALL Abort(__STAMP__,'InitMPITypes called before InitInterpolation') ALLOCATE(SendRequest_U(nNbProcs) ) ALLOCATE(SendRequest_U2(nNbProcs) ) ALLOCATE(SendRequest_GEO(nNbProcs) ) @@ -198,12 +195,25 @@ SUBROUTINE InitMPIvars() GroupSize=GETINT('GroupSize','0') IF(GroupSize.LT.1)THEN ! group procs by node ! Split the node communicator (shared memory) from the global communicator on physical processor or node level -#if USE_CORE_SPLIT +#if (CORE_SPLIT==1) CALL MPI_COMM_SPLIT(MPI_COMM_WORLD,myRank,0,MPI_COMM_NODE,iError) -#else +#elif (CORE_SPLIT==0) ! Note that using SharedMemoryMethod=OMPI_COMM_TYPE_CORE somehow does not work in every case (intel/amd processors) ! Also note that OMPI_COMM_TYPE_CORE is undefined when not using OpenMPI CALL MPI_COMM_SPLIT_TYPE(MPI_COMM_WORLD,SharedMemoryMethod,0,MPI_INFO_NULL,MPI_COMM_NODE,IERROR) +#else + ! Check if more nodes than procs are required or + ! if the resulting split would create unequal procs per node + IF((CORE_SPLIT.GE.nProcessors_Global).OR.(MOD(nProcessors_Global,CORE_SPLIT).GT.0))THEN + SWRITE (*,'(A,I0,A,I0,A,F0.2,A)') ' WARNING: Either more nodes than cores selected (nodes: ',CORE_SPLIT,', cores: ',& + nProcessors_Global,') OR unequal number of cores per node (=',REAL(nProcessors_Global)/REAL(CORE_SPLIT),& + '). Setting 1 core per node for MPI_COMM_NODE!' + color = myRank + ELSE + ! Group procs so that every CORE_SPLIT procs are in the same group + color = INT(REAL(myrank*CORE_SPLIT)/REAL(nProcessors_Global))+1 + END IF ! (CORE_SPLIT.GE.nProcessors_Global).OR.(MOD().GT.0) + CALL MPI_COMM_SPLIT(MPI_COMM_WORLD,color,0,MPI_COMM_NODE,iError) #endif ELSE ! use groupsize color=myRank/GroupSize @@ -213,6 +223,14 @@ SUBROUTINE InitMPIvars() CALL MPI_COMM_SIZE(MPI_COMM_NODE,nLocalProcs,iError) MPILocalRoot=(myLocalRank.EQ.0) +IF (nProcessors_Global.EQ.nLocalProcs) THEN + SWRITE(UNIT_stdOUt,'(A,I0,A,I0,A)') ' | Starting gathered I/O communication with ',nLocalProcs,' procs in ',1,' group' +ELSE + SWRITE(UNIT_stdOUt,'(A,I0,A,I0,A,I0,A)') ' | Starting gathered I/O communication with ',nLocalProcs,' procs each in ',& + nProcessors_Global/nLocalProcs,' groups for a total number of ',& + nProcessors_Global,' procs' +END IF + ! now split global communicator into small group leaders and the others MPI_COMM_LEADERS=MPI_COMM_NULL MPI_COMM_WORKERS=MPI_COMM_NULL diff --git a/src/mpi/mpi_shared.f90 b/src/mpi/mpi_shared.f90 index 8990b3599..b47f0ecb3 100644 --- a/src/mpi/mpi_shared.f90 +++ b/src/mpi/mpi_shared.f90 @@ -126,12 +126,25 @@ SUBROUTINE InitMPIShared() nProcessors_Global = nProcessors ! Split the node communicator (shared memory) from the global communicator on physical processor or node level -#if USE_CORE_SPLIT +#if (CORE_SPLIT==1) CALL MPI_COMM_SPLIT(MPI_COMM_WORLD,myRank,0,MPI_COMM_SHARED,iError) -#else +#elif (CORE_SPLIT==0) ! Note that using SharedMemoryMethod=OMPI_COMM_TYPE_CORE somehow does not work in every case (intel/amd processors) ! Also note that OMPI_COMM_TYPE_CORE is undefined when not using OpenMPI CALL MPI_COMM_SPLIT_TYPE(MPI_COMM_WORLD,SharedMemoryMethod,0,MPI_INFO_NULL,MPI_COMM_SHARED,IERROR) +#else + ! Check if more nodes than procs are required or + ! if the resulting split would create unequal procs per node + IF((CORE_SPLIT.GE.nProcessors_Global).OR.(MOD(nProcessors_Global,CORE_SPLIT).GT.0))THEN + SWRITE (*,'(A,I0,A,I0,A,F0.2,A)') ' WARNING: Either more nodes than cores selected (nodes: ',CORE_SPLIT,', cores: ',& + nProcessors_Global,') OR unequal number of cores per node (=',REAL(nProcessors_Global)/REAL(CORE_SPLIT),& + '). Setting 1 core per node for MPI_COMM_SHARED!' + color = myRank + ELSE + ! Group procs so that every CORE_SPLIT procs are in the same group + color = INT(REAL(myrank*CORE_SPLIT)/REAL(nProcessors_Global))+1 + END IF ! (CORE_SPLIT.GE.nProcessors_Global).OR.(MOD().GT.0) + CALL MPI_COMM_SPLIT(MPI_COMM_WORLD,color,0,MPI_COMM_SHARED,iError) #endif ! Find my rank on the shared communicator, comm size and proc name @@ -142,11 +155,12 @@ SUBROUTINE InitMPIShared() IF (MOD(nProcessors_Global,nComputeNodeProcessors).NE.0) & CALL ABORT(__STAMP__,'MPI shared communication currently only supported with equal procs per node!') -IF (nProcessors_Global/nComputeNodeProcessors.EQ.1) THEN +IF (nProcessors_Global.EQ.nComputeNodeProcessors) THEN SWRITE(UNIT_stdOUt,'(A,I0,A,I0,A)') ' | Starting shared communication with ',nComputeNodeProcessors,' procs on ',1,' node' ELSE - SWRITE(UNIT_stdOUt,'(A,I0,A,I0,A)') ' | Starting shared communication with ',nComputeNodeProcessors,' procs on ', & - nProcessors_Global/nComputeNodeProcessors,' nodes' + SWRITE(UNIT_stdOUt,'(A,I0,A,I0,A,I0,A)') ' | Starting shared communication with ',nComputeNodeProcessors,' procs on ', & + nProcessors_Global/nComputeNodeProcessors,' nodes for a total number of ',& + nProcessors_Global,' procs' END IF ! Send rank of compute node root to all procs on shared comm