Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[develop] Add Gaea C5 to supported platforms #898

Merged
merged 20 commits into from
Sep 27, 2023
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions .cicd/Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@ pipeline {
parameters {
// Allow job runner to filter based on platform
// Use the line below to enable all PW clusters
// choice(name: 'SRW_PLATFORM_FILTER', choices: ['all', 'cheyenne', 'gaea', 'hera', 'jet-epic', 'orion', 'pclusternoaav2use1', 'azclusternoaav2eus1', 'gclusternoaav2usc1'], description: 'Specify the platform(s) to use')
// choice(name: 'SRW_PLATFORM_FILTER', choices: ['all', 'cheyenne', 'gaea', 'gaea_c5', 'hera', 'jet-epic', 'orion', 'pclusternoaav2use1', 'azclusternoaav2eus1', 'gclusternoaav2usc1'], description: 'Specify the platform(s) to use')
// Use the line below to enable the PW AWS cluster
// choice(name: 'SRW_PLATFORM_FILTER', choices: ['all', 'cheyenne', 'gaea', 'hera', 'jet-epic', 'orion', 'pclusternoaav2use1'], description: 'Specify the platform(s) to use')
// choice(name: 'SRW_PLATFORM_FILTER', choices: ['all', 'cheyenne', 'gaea', 'hera', 'jet-epic', 'orion'], description: 'Specify the platform(s) to use')
choice(name: 'SRW_PLATFORM_FILTER', choices: ['all', 'gaea', 'hera', 'jet-epic', 'orion'], description: 'Specify the platform(s) to use')
// choice(name: 'SRW_PLATFORM_FILTER', choices: ['all', 'cheyenne', 'gaea', 'gaea_c5', 'hera', 'jet-epic', 'orion', 'pclusternoaav2use1'], description: 'Specify the platform(s) to use')
// choice(name: 'SRW_PLATFORM_FILTER', choices: ['all', 'cheyenne', 'gaea', 'gaea_c5', 'hera', 'jet-epic', 'orion'], description: 'Specify the platform(s) to use')
choice(name: 'SRW_PLATFORM_FILTER', choices: ['all', 'gaea', 'gaea_c5', 'hera', 'jet-epic', 'orion'], description: 'Specify the platform(s) to use')
// Allow job runner to filter based on compiler
choice(name: 'SRW_COMPILER_FILTER', choices: ['all', 'gnu', 'intel'], description: 'Specify the compiler(s) to use to build')
// Uncomment the following line to re-enable comprehensive tests
Expand Down Expand Up @@ -77,8 +77,8 @@ pipeline {
axes {
axis {
name 'SRW_PLATFORM'
// values 'cheyenne', 'gaea', 'hera', 'jet-epic', 'orion' //, 'pclusternoaav2use1', 'azclusternoaav2eus1', 'gclusternoaav2usc1'
values 'gaea', 'hera', 'jet-epic', 'orion' //, 'pclusternoaav2use1', 'azclusternoaav2eus1', 'gclusternoaav2usc1'
// values 'cheyenne', 'gaea', 'gaea_c5', 'hera', 'jet-epic', 'orion' //, 'pclusternoaav2use1', 'azclusternoaav2eus1', 'gclusternoaav2usc1'
values 'gaea', 'gaea_c5', 'hera', 'jet-epic', 'orion' //, 'pclusternoaav2use1', 'azclusternoaav2eus1', 'gclusternoaav2usc1'
}

axis {
Expand All @@ -92,7 +92,7 @@ pipeline {
exclude {
axis {
name 'SRW_PLATFORM'
values 'gaea', 'jet-epic', 'orion' //, 'pclusternoaav2use1' , 'azclusternoaav2eus1', 'gclusternoaav2usc1'
values 'gaea', 'gaea_c5', 'jet-epic', 'orion' //, 'pclusternoaav2use1' , 'azclusternoaav2eus1', 'gclusternoaav2usc1'
}

axis {
Expand Down
2 changes: 1 addition & 1 deletion devbuild.sh
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ set -eu
# automatically determine compiler
if [ -z "${COMPILER}" ] ; then
case ${PLATFORM} in
jet|hera|gaea) COMPILER=intel ;;
jet|hera|gaea|gaea_c5) COMPILER=intel ;;
orion) COMPILER=intel ;;
wcoss2) COMPILER=intel ;;
cheyenne) COMPILER=intel ;;
Expand Down
3 changes: 3 additions & 0 deletions etc/lmod-setup.csh
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ else if ( "$L_MACHINE" == singularity ) then
else if ( "$L_MACHINE" == gaea ) then
source /lustre/f2/dev/role.epic/contrib/Lmod_init.csh

else if ( "$L_MACHINE" == gaea_c5 ) then
source /lustre/f2/dev/role.epic/contrib/Lmod_init_C5.csh
natalie-perlin marked this conversation as resolved.
Show resolved Hide resolved

else if ( "$L_MACHINE" == odin ) then
module unload modules
unset -f module
Expand Down
3 changes: 3 additions & 0 deletions etc/lmod-setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ elif [ "$L_MACHINE" = singularity ]; then
elif [ "$L_MACHINE" = gaea ]; then
source /lustre/f2/dev/role.epic/contrib/Lmod_init.sh

elif [ "$L_MACHINE" = gaea_c5 ]; then
source /lustre/f2/dev/role.epic/contrib/Lmod_init_C5.sh

elif [ "$L_MACHINE" = odin ]; then
module unload modules
unset -f module
Expand Down
30 changes: 30 additions & 0 deletions modulefiles/build_gaea_c5_intel.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
help([[
This module loads libraries for building the UFS SRW App on
the NOAA RDHPC machine Gaea C5 using Intel-2023.1.0
]])

whatis([===[Loads libraries needed for building the UFS SRW App on Gaea C5 ]===])

load(pathJoin("cmake", os.getenv("cmake_ver") or "3.23.1"))

prepend_path("MODULEPATH","/lustre/f2/dev/role.epic/contrib/C5/hpc-stack/intel-classic-2023.1.0/modulefiles/stack")
load(pathJoin("hpc", os.getenv("hpc_ver") or "1.2.0"))
load(pathJoin("intel-classic", os.getenv("intel_classic_ver") or "2023.1.0"))
load(pathJoin("cray-mpich", os.getenv("cray_mpich_ver") or "8.1.25"))
load(pathJoin("hpc-intel-classic", os.getenv("hpc_intel_classic_ver") or "2023.1.0"))
load(pathJoin("hpc-cray-mpich", os.getenv("hpc_cray_mpich_ver") or "8.1.25"))

load("srw_common")

unload("darshan-runtime/3.4.0")
setenv("CFLAGS","-diag-disable=10441")
setenv("FFLAGS","-diag-disable=10441")

setenv("CC","cc")
setenv("FC","ftn")
setenv("CXX","CC")
setenv("CMAKE_C_COMPILER","cc")
setenv("CMAKE_Fortran_COMPILER","ftn")
setenv("CMAKE_CXX_COMPILER","CC")
setenv("CMAKE_Platform","gaea_c5.intel")

22 changes: 11 additions & 11 deletions modulefiles/srw_common.lua
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
load("jasper/2.0.25")
load("zlib/1.2.11")
load_any("png/1.6.35", "libpng/1.6.37")
load_any("jasper/2.0.25","jasper/2.0.32")
load_any("zlib/1.2.11","zlib/1.2.13")
load("libpng/1.6.37")

load_any("netcdf/4.7.4", "netcdf-c/4.7.4")
load_any("netcdf/4.7.4", "netcdf-fortran/4.5.4")
load_any("pio/2.5.7", "parallelio/2.5.2")
load_any("esmf/8.3.0b09", "esmf/8.2.0")
load("fms/2022.04")
load_any("netcdf/4.9.2", "netcdf-c/4.9.2")
load_any("netcdf/4.9.2", "netcdf-fortran/4.6.0")
load_any("pio/2.5.10","parallelio/2.5.10")
load("esmf/8.4.2")
load("fms/2023.01")

load("bufr/11.7.0")
load("bacio/2.4.1")
Expand All @@ -19,14 +19,14 @@ load("w3emc/2.9.2")

load_any("gftl-shared/v1.5.0", "gftl-shared/1.5.0")
load_any("yafyaml/v0.5.1", "yafyaml/0.5.1")
load_any("mapl/2.22.0-esmf-8.3.0b09", "mapl/2.11.0-esmf-8.2.0")
load("mapl/2.35.2-esmf-8.4.2")

load("nemsio/2.5.4")
load("sfcio/1.4.1")
load("sigio/2.3.2")
load("w3nco/2.4.1")
load("wrf_io/1.2.0")
load_any("wrf_io/1.2.0","wrf-io/1.2.0")

load("ncdiag/1.1.1")
--load("ncdiag/1.1.1")
load("ncio/1.1.2")
load("wgrib2/2.0.8")
4 changes: 4 additions & 0 deletions modulefiles/tasks/gaea_c5/plot_allvars.local.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
prepend_path("MODULEPATH","/lustre/f2/dev/role.epic/contrib/C5/miniconda3/modulefiles")
load(pathJoin("miniconda3", os.getenv("miniconda3_ver") or "4.12.0"))

setenv("SRW_ENV", "regional_workflow")
5 changes: 5 additions & 0 deletions modulefiles/tasks/gaea_c5/python_srw.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
unload("miniconda3")
prepend_path("MODULEPATH","/lustre/f2/dev/role.epic/contrib/C5/miniconda3/modulefiles")
load(pathJoin("miniconda3", os.getenv("miniconda3_ver") or "4.12.0"))

setenv("SRW_ENV", "workflow_tools")
6 changes: 6 additions & 0 deletions modulefiles/tasks/gaea_c5/run_vx.local.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
--[[
Compiler-specific modules are used for met and metplus libraries
--]]
load(pathJoin("met", os.getenv("met_ver") or "10.1.2"))
load(pathJoin("metplus", os.getenv("metplus_ver") or "4.1.3"))
load("python_srw")
23 changes: 23 additions & 0 deletions modulefiles/wflow_gaea_c5.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
help([[
This module loads python environement for running the UFS SRW App on
the NOAA RDHPC machine Gaea C5
]])

whatis([===[Loads libraries needed for running the UFS SRW App on gaea ]===])

unload("python")
load("set_pythonpath")
prepend_path("MODULEPATH","/lustre/f2/dev/role.epic/contrib/C5/miniconda3/modulefiles")
load(pathJoin("miniconda3", os.getenv("miniconda3_ver") or "4.12.0"))
prepend_path("MODULEPATH","/lustre/f2/dev/role.epic/contrib/C5/rocoto/modulefiles")
load("rocoto")

pushenv("MKLROOT", "/opt/intel/oneapi/mkl/2023.1.0/")
-- pushenv("GSI_BINARY_SOURCE_DIR", "/lustre/f2/dev/role.epic/contrib/GSI_data/fix/20230601")
-- setenv("PMI_NO_PREINITIALIZE","1")

if mode() == "load" then
LmodMsgRaw([===[Please do the following to activate conda:
> conda activate workflow_tools
]===])
end
10 changes: 10 additions & 0 deletions tests/WE2E/machine_suites/coverage.gaea_c5
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
community
grid_RRFS_CONUScompact_13km_ics_HRRR_lbcs_RAP_suite_RRFS_v1beta
grid_RRFS_CONUS_13km_ics_FV3GFS_lbcs_FV3GFS_suite_RAP
grid_RRFS_CONUS_13km_ics_FV3GFS_lbcs_FV3GFS_suite_HRRR
grid_RRFS_CONUS_3km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v15_thompson_mynn_lam3km
grid_RRFS_CONUScompact_25km_ics_HRRR_lbcs_HRRR_suite_HRRR
grid_RRFS_CONUScompact_3km_ics_HRRR_lbcs_RAP_suite_RRFS_v1beta
grid_SUBCONUS_Ind_3km_ics_RAP_lbcs_RAP_suite_RRFS_v1beta_plot
nco_ensemble
nco_grid_RRFS_CONUS_3km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v15_thompson_mynn_lam3km
2 changes: 1 addition & 1 deletion tests/WE2E/setup_WE2E_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ function usage {

}

machines=( hera jet cheyenne orion wcoss2 gaea odin singularity macos noaacloud )
machines=( hera jet cheyenne orion wcoss2 gaea gaea_c5 odin singularity macos noaacloud )

if [ "$1" = "-h" ] ; then usage ; fi
[[ $# -le 2 ]] && usage
Expand Down
2 changes: 1 addition & 1 deletion tests/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ function usage() {
exit 1
}

machines=( hera jet cheyenne orion wcoss2 gaea odin singularity macos noaacloud )
machines=( hera jet cheyenne orion wcoss2 gaea gaea_c5 odin singularity macos noaacloud )

[[ $# -gt 4 ]] && usage

Expand Down
1 change: 1 addition & 0 deletions ush/load_modules_run_task.sh
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ module list

if [ -n "${SRW_ENV:-}" ] ; then
set +u
conda deactivate
conda activate ${SRW_ENV}
set -u
fi
Expand Down
51 changes: 51 additions & 0 deletions ush/machine/gaea_c5.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
platform:
WORKFLOW_MANAGER: rocoto
NCORES_PER_NODE: 128
SCHED: slurm
TEST_CCPA_OBS_DIR: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/obs_data/ccpa/proc
TEST_MRMS_OBS_DIR: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/obs_data/mrms/proc
TEST_NDAS_OBS_DIR: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/obs_data/ndas/proc
DOMAIN_PREGEN_BASEDIR: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/FV3LAM_pregen
QUEUE_DEFAULT: normal
QUEUE_FCST: normal
QUEUE_HPSS: normal
REMOVE_MEMORY: True
PARTITION_HPSS: eslogin_c5
RUN_CMD_FCST: srun --mpi=pmi2 -n ${PE_MEMBER01}
RUN_CMD_POST: srun --mpi=pmi2 -n $nprocs
RUN_CMD_PRDGEN: srun --mpi=pmi2 -n $nprocs
RUN_CMD_SERIAL: time
RUN_CMD_UTILS: srun --mpi=pmi2 -n $nprocs
SCHED_NATIVE_CMD: --clusters=c5 --partition=batch
SCHED_NATIVE_CMD_HPSS: --clusters=es --partition=eslogin_c5 --export=NONE
PRE_TASK_CMDS: '{ ulimit -s unlimited; ulimit -a; }'
TEST_EXTRN_MDL_SOURCE_BASEDIR: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/input_model_data
TEST_PREGEN_BASEDIR: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/FV3LAM_pregen
TEST_ALT_EXTRN_MDL_SYSBASEDIR_ICS: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/dummy_FV3GFS_sys_dir
TEST_ALT_EXTRN_MDL_SYSBASEDIR_LBCS: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/dummy_FV3GFS_sys_dir
TEST_VX_FCST_INPUT_BASEDIR: '{{ "/lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/output_data/fcst_" }}{{ "ens" if (global.NUM_ENS_MEMBERS > 0) else "det" }}{{ "/{{workflow.PREDEF_GRID_NAME}}" }}{% raw %}{% endraw %}'
FIXaer: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/fix/fix_aer
FIXgsi: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/fix/fix_gsi
FIXgsm: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/fix/fix_am
FIXlut: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/fix/fix_lut
FIXorg: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/fix/fix_orog
FIXsfc: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/fix/fix_sfc_climo
FIXshp: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/NaturalEarth
EXTRN_MDL_DATA_STORES: aws
data:
ics_lbcs:
FV3GFS:
nemsio: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/input_model_data/FV3GFS/nemsio/${yyyymmdd}${hh}
grib2: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/input_model_data/FV3GFS/grib2/${yyyymmdd}${hh}
RAP: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/input_model_data/RAP/${yyyymmdd}${hh}
HRRR: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/input_model_data/HRRR/${yyyymmdd}${hh}
rocoto:
tasks:
metatask_run_ensemble:
task_run_fcst_mem#mem#:
cores: '{{ task_run_fcst.PE_MEMBER01 // 1 }}'
native: '--cpus-per-task {{ task_run_fcst.OMP_NUM_THREADS_RUN_FCST|int }} --exclusive {{ platform.SCHED_NATIVE_CMD }}'
nodes:
nnodes:
nodesize:
ppn:
2 changes: 1 addition & 1 deletion ush/valid_param_vals.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
valid_vals_RUN_ENVIR: ["nco", "community"]
valid_vals_VERBOSE: [True, False]
valid_vals_DEBUG: [True, False]
valid_vals_MACHINE: ["HERA", "WCOSS2", "ORION", "JET", "ODIN", "CHEYENNE", "STAMPEDE", "LINUX", "MACOS", "NOAACLOUD", "SINGULARITY", "GAEA"]
valid_vals_MACHINE: ["HERA", "WCOSS2", "ORION", "JET", "ODIN", "CHEYENNE", "STAMPEDE", "LINUX", "MACOS", "NOAACLOUD", "SINGULARITY", "GAEA", "GAEA_C5"]
valid_vals_SCHED: ["slurm", "pbspro", "lsf", "lsfcray", "none"]
valid_vals_FCST_MODEL: ["ufs-weather-model"]
valid_vals_WORKFLOW_MANAGER: ["rocoto", "ecflow", "none"]
Expand Down