Skip to content

Commit

Permalink
Merge branch 'feature.user.specific.shared.memory.splitting' into 'ma…
Browse files Browse the repository at this point in the history
…ster.dev'

Added new cmake option for PICLAS_SHARED_MEMORY for splitting the shared memory

See merge request piclas/piclas!635
  • Loading branch information
scopplestone committed Apr 20, 2022
2 parents 6efed3e + 2c5dfca commit bdbd8de
Show file tree
Hide file tree
Showing 6 changed files with 83 additions and 34 deletions.
7 changes: 6 additions & 1 deletion .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,12 @@ before_script:
echo " PICLAS_SPLIT_TYPE=OMPI_COMM_TYPE_CORE. Splitting shared memory domains on processor-level!";
export PICLAS_SPLIT_TYPE=OMPI_COMM_TYPE_CORE;
else
echo "Splitting shared memory domains on node-level! Set variable DO_CORE_SPLIT=T to force core-level shared memory splitting for all regression tests.";
if [ -n "${DO_NODE_SPLIT}" ]; then
echo " PICLAS_SPLIT_TYPE=PICLAS_COMM_TYPE_NODE. Splitting shared memory domains on sub-node-level with 2 cores per node!";
export PICLAS_SPLIT_TYPE=PICLAS_COMM_TYPE_NODE;
else
echo "Splitting shared memory domains on node-level! Set variable DO_CORE_SPLIT=T to force core-level OR DO_NODE_SPLIT=T to force sub-node-level shared memory splitting for all regression tests.";
fi
fi
# ----------------------------------------------------------------------------------------------------------------------------------------------------
# Stages
Expand Down
8 changes: 8 additions & 0 deletions docs/documentation/userguide/workflow.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,14 @@ the CMake configuration file for HDF5 (optional).
output files into the VTK format
* ``POSTI_USE_PARAVIEW``: Enables the compilation of the ParaView plugin, which enables the direct read-in of output files within ParaView

* ``PICLAS_SHARED_MEMORY``: Split type for creating new communicators based on colors and keys (requires MPI 3 or higher).
Options with the prefix OMPI_ are specific to Open MPI.
* ``MPI_COMM_TYPE_SHARED``: creates one shared memory domain per physical node (default)
* ``OMPI_COMM_TYPE_CORE``: creates one shared memory domain per MPI thread
* ``PICLAS_COMM_TYPE_NODE``: creates one shared memory domain per X numbers of MPI threads defined by ``PICLAS_SHARED_MEMORY_CORES``
* ``PICLAS_SHARED_MEMORY_CORES``: Number of MPI threads per virtual node (default is 2). Assumes that all MPI threads run on the
same physical node.

(sec:solver-settings)=
## Solver settings

Expand Down
38 changes: 21 additions & 17 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,18 +43,34 @@ SET_PROPERTY(CACHE PICLAS_TIMEDISCMETHOD PROPERTY STRINGS Euler-Explicit
# =========================================================================
# Shared memory region splitting
# =========================================================================
# Get environment variable
# Get environment variable, e.g. PICLAS_SPLIT_TYPE=OMPI_COMM_TYPE_CORE
SET(PICLAS_SPLIT_TYPE "$ENV{PICLAS_SPLIT_TYPE}")
IF("${PICLAS_SPLIT_TYPE}" STREQUAL "")
SET(PICLAS_SPLIT_TYPE "MPI_COMM_TYPE_SHARED")
ELSE()
IF("${PICLAS_SPLIT_TYPE}" STREQUAL "${PICLAS_SHARED_MEMORY}")
MESSAGE(STATUS "Using user-defined environment variable PICLAS_SPLIT_TYPE=${PICLAS_SPLIT_TYPE} for shared memory communicator splitting of variale PICLAS_SHARED_MEMORY")
ENDIF()
#IF("${PICLAS_SPLIT_TYPE}" STREQUAL "${PICLAS_SHARED_MEMORY}")
MESSAGE(STATUS "Using user-defined environment variable [PICLAS_SPLIT_TYPE = ${PICLAS_SPLIT_TYPE}] for shared memory communicator splitting. Setting [PICLAS_SHARED_MEMORY = ${PICLAS_SPLIT_TYPE}]")
#ENDIF()
ENDIF()
SET(PICLAS_SHARED_MEMORY "${PICLAS_SPLIT_TYPE}" CACHE STRING "Split type for creating new communicators based on colors and keys (requires MPI 3 or higher). Options with the prefix OMPI_ are specific to Open MPI.")
SET_PROPERTY(CACHE PICLAS_SHARED_MEMORY PROPERTY STRINGS MPI_COMM_TYPE_SHARED
OMPI_COMM_TYPE_CORE)
OMPI_COMM_TYPE_CORE
PICLAS_COMM_TYPE_NODE)

ADD_DEFINITIONS(-DSharedMemoryMethod=${PICLAS_SHARED_MEMORY})
IF(PICLAS_SHARED_MEMORY STREQUAL "MPI_COMM_TYPE_SHARED")
UNSET(PICLAS_SHARED_MEMORY_CORES CACHE)
ADD_DEFINITIONS(-DCORE_SPLIT=0)
MESSAGE(STATUS "Shared memory split type for subcommunicators set to node-level")
ELSEIF(PICLAS_SHARED_MEMORY STREQUAL "PICLAS_COMM_TYPE_NODE")
SET(PICLAS_SHARED_MEMORY_CORES "2" CACHE STRING "Number of cores per node when setting PICLAS_SHARED_MEMORY=PICLAS_COMM_TYPE_NODE. All cores must be on the same physical node!")
ADD_DEFINITIONS(-DCORE_SPLIT=${PICLAS_SHARED_MEMORY_CORES})
MESSAGE(STATUS "Shared memory split type for subcommunicators set to sub-node-level with user-specific value [PICLAS_SHARED_MEMORY_CORES = ${PICLAS_SHARED_MEMORY_CORES}] cores per node")
ELSEIF(PICLAS_SHARED_MEMORY STREQUAL "OMPI_COMM_TYPE_CORE")
UNSET(PICLAS_SHARED_MEMORY_CORES CACHE)
ADD_DEFINITIONS(-DCORE_SPLIT=1)
MESSAGE(STATUS "Shared memory split type for subcommunicators set to core-level")
ENDIF()

# =========================================================================
# MISC
Expand Down Expand Up @@ -306,18 +322,6 @@ ELSE()
ADD_DEFINITIONS(-DUSE_HDG=0)
ENDIF(PICLAS_HDG)

# =========================================================================
# Shared memory split type
# =========================================================================
ADD_DEFINITIONS(-DSharedMemoryMethod=${PICLAS_SHARED_MEMORY})
IF(PICLAS_SHARED_MEMORY STREQUAL "MPI_COMM_TYPE_SHARED")
ADD_DEFINITIONS(-DUSE_CORE_SPLIT=0)
MESSAGE(STATUS "Shared memory split type for subcommunicators set to node-level")
ELSEIF(PICLAS_SHARED_MEMORY STREQUAL "OMPI_COMM_TYPE_CORE")
ADD_DEFINITIONS(-DUSE_CORE_SPLIT=1)
MESSAGE(STATUS "Shared memory split type for subcommunicators set to core-level")
ENDIF()

# ========================================================================
# LOADBALANCE
# =========================================================================
Expand Down
4 changes: 2 additions & 2 deletions src/loadbalance/loaddistribution.f90
Original file line number Diff line number Diff line change
Expand Up @@ -1279,11 +1279,11 @@ SUBROUTINE WriteElemTimeStatistics(WriteHeader,time_opt,iter_opt)

! Convert kB to GB
memory=memory/1048576.
#if USE_CORE_SPLIT
#if ! (CORE_SPLIT==0)
! When core-level splitting is used, it is not clear how many cores are on the same physical compute node.
! Therefore, the values are set to -1.
memory(2:3) = -1.
#endif /*USE_CORE_SPLIT*/
#endif /*! (CORE_SPLIT==0)*/

! Either create new file or add info to existing file
!> create new file
Expand Down
36 changes: 27 additions & 9 deletions src/mpi/mpi.f90
Original file line number Diff line number Diff line change
Expand Up @@ -143,8 +143,9 @@ SUBROUTINE InitMPIvars()
USE MOD_Globals
USE MOD_PreProc
USE MOD_MPI_Vars
USE MOD_Interpolation_Vars,ONLY:InterpolationInitIsDone
USE MOD_Readintools, ONLY:GETINT
USE MOD_Interpolation_Vars ,ONLY: InterpolationInitIsDone
USE MOD_Readintools ,ONLY: GETINT
USE MOD_MPI_Shared_Vars ,ONLY: nProcessors_Global
! IMPLICIT VARIABLE HANDLING
IMPLICIT NONE
!-----------------------------------------------------------------------------------------------------------------------------------
Expand All @@ -155,11 +156,7 @@ SUBROUTINE InitMPIvars()
! LOCAL VARIABLES
INTEGER :: color,groupsize
!===================================================================================================================================
IF(.NOT.InterpolationInitIsDone)THEN
CALL Abort(&
__STAMP__&
,'InitMPITypes called before InitInterpolation')
END IF
IF(.NOT.InterpolationInitIsDone) CALL Abort(__STAMP__,'InitMPITypes called before InitInterpolation')
ALLOCATE(SendRequest_U(nNbProcs) )
ALLOCATE(SendRequest_U2(nNbProcs) )
ALLOCATE(SendRequest_GEO(nNbProcs) )
Expand Down Expand Up @@ -198,12 +195,25 @@ SUBROUTINE InitMPIvars()
GroupSize=GETINT('GroupSize','0')
IF(GroupSize.LT.1)THEN ! group procs by node
! Split the node communicator (shared memory) from the global communicator on physical processor or node level
#if USE_CORE_SPLIT
#if (CORE_SPLIT==1)
CALL MPI_COMM_SPLIT(MPI_COMM_WORLD,myRank,0,MPI_COMM_NODE,iError)
#else
#elif (CORE_SPLIT==0)
! Note that using SharedMemoryMethod=OMPI_COMM_TYPE_CORE somehow does not work in every case (intel/amd processors)
! Also note that OMPI_COMM_TYPE_CORE is undefined when not using OpenMPI
CALL MPI_COMM_SPLIT_TYPE(MPI_COMM_WORLD,SharedMemoryMethod,0,MPI_INFO_NULL,MPI_COMM_NODE,IERROR)
#else
! Check if more nodes than procs are required or
! if the resulting split would create unequal procs per node
IF((CORE_SPLIT.GE.nProcessors_Global).OR.(MOD(nProcessors_Global,CORE_SPLIT).GT.0))THEN
SWRITE (*,'(A,I0,A,I0,A,F0.2,A)') ' WARNING: Either more nodes than cores selected (nodes: ',CORE_SPLIT,', cores: ',&
nProcessors_Global,') OR unequal number of cores per node (=',REAL(nProcessors_Global)/REAL(CORE_SPLIT),&
'). Setting 1 core per node for MPI_COMM_NODE!'
color = myRank
ELSE
! Group procs so that every CORE_SPLIT procs are in the same group
color = INT(REAL(myrank*CORE_SPLIT)/REAL(nProcessors_Global))+1
END IF ! (CORE_SPLIT.GE.nProcessors_Global).OR.(MOD().GT.0)
CALL MPI_COMM_SPLIT(MPI_COMM_WORLD,color,0,MPI_COMM_NODE,iError)
#endif
ELSE ! use groupsize
color=myRank/GroupSize
Expand All @@ -213,6 +223,14 @@ SUBROUTINE InitMPIvars()
CALL MPI_COMM_SIZE(MPI_COMM_NODE,nLocalProcs,iError)
MPILocalRoot=(myLocalRank.EQ.0)

IF (nProcessors_Global.EQ.nLocalProcs) THEN
SWRITE(UNIT_stdOUt,'(A,I0,A,I0,A)') ' | Starting gathered I/O communication with ',nLocalProcs,' procs in ',1,' group'
ELSE
SWRITE(UNIT_stdOUt,'(A,I0,A,I0,A,I0,A)') ' | Starting gathered I/O communication with ',nLocalProcs,' procs each in ',&
nProcessors_Global/nLocalProcs,' groups for a total number of ',&
nProcessors_Global,' procs'
END IF

! now split global communicator into small group leaders and the others
MPI_COMM_LEADERS=MPI_COMM_NULL
MPI_COMM_WORKERS=MPI_COMM_NULL
Expand Down
24 changes: 19 additions & 5 deletions src/mpi/mpi_shared.f90
Original file line number Diff line number Diff line change
Expand Up @@ -126,12 +126,25 @@ SUBROUTINE InitMPIShared()
nProcessors_Global = nProcessors

! Split the node communicator (shared memory) from the global communicator on physical processor or node level
#if USE_CORE_SPLIT
#if (CORE_SPLIT==1)
CALL MPI_COMM_SPLIT(MPI_COMM_WORLD,myRank,0,MPI_COMM_SHARED,iError)
#else
#elif (CORE_SPLIT==0)
! Note that using SharedMemoryMethod=OMPI_COMM_TYPE_CORE somehow does not work in every case (intel/amd processors)
! Also note that OMPI_COMM_TYPE_CORE is undefined when not using OpenMPI
CALL MPI_COMM_SPLIT_TYPE(MPI_COMM_WORLD,SharedMemoryMethod,0,MPI_INFO_NULL,MPI_COMM_SHARED,IERROR)
#else
! Check if more nodes than procs are required or
! if the resulting split would create unequal procs per node
IF((CORE_SPLIT.GE.nProcessors_Global).OR.(MOD(nProcessors_Global,CORE_SPLIT).GT.0))THEN
SWRITE (*,'(A,I0,A,I0,A,F0.2,A)') ' WARNING: Either more nodes than cores selected (nodes: ',CORE_SPLIT,', cores: ',&
nProcessors_Global,') OR unequal number of cores per node (=',REAL(nProcessors_Global)/REAL(CORE_SPLIT),&
'). Setting 1 core per node for MPI_COMM_SHARED!'
color = myRank
ELSE
! Group procs so that every CORE_SPLIT procs are in the same group
color = INT(REAL(myrank*CORE_SPLIT)/REAL(nProcessors_Global))+1
END IF ! (CORE_SPLIT.GE.nProcessors_Global).OR.(MOD().GT.0)
CALL MPI_COMM_SPLIT(MPI_COMM_WORLD,color,0,MPI_COMM_SHARED,iError)
#endif

! Find my rank on the shared communicator, comm size and proc name
Expand All @@ -142,11 +155,12 @@ SUBROUTINE InitMPIShared()
IF (MOD(nProcessors_Global,nComputeNodeProcessors).NE.0) &
CALL ABORT(__STAMP__,'MPI shared communication currently only supported with equal procs per node!')

IF (nProcessors_Global/nComputeNodeProcessors.EQ.1) THEN
IF (nProcessors_Global.EQ.nComputeNodeProcessors) THEN
SWRITE(UNIT_stdOUt,'(A,I0,A,I0,A)') ' | Starting shared communication with ',nComputeNodeProcessors,' procs on ',1,' node'
ELSE
SWRITE(UNIT_stdOUt,'(A,I0,A,I0,A)') ' | Starting shared communication with ',nComputeNodeProcessors,' procs on ', &
nProcessors_Global/nComputeNodeProcessors,' nodes'
SWRITE(UNIT_stdOUt,'(A,I0,A,I0,A,I0,A)') ' | Starting shared communication with ',nComputeNodeProcessors,' procs on ', &
nProcessors_Global/nComputeNodeProcessors,' nodes for a total number of ',&
nProcessors_Global,' procs'
END IF

! Send rank of compute node root to all procs on shared comm
Expand Down

0 comments on commit bdbd8de

Please sign in to comment.