From 1a5d0b51642eb70f71e693355e86212d3607b7b3 Mon Sep 17 00:00:00 2001 From: Walter Kolczynski - NOAA Date: Fri, 20 Oct 2023 15:54:33 +0000 Subject: [PATCH 1/9] Split clean-up into separate job (#1906) Moves the clean-up that was previously done in the archive jobs into their own separate job. The clean-up is also streamlined considerably by using only `COM_TOP` instead of going through every template. There is also additional streamlining/corrections in the function that does the actual removing. Some settings used by both jobs were elevated to `config.base`. Others only needed for cleanup were moved to the new config for that job. Also corrects a small error encountered when attempting to rerun an ensemble forecast. Resolves #583 Resolves #1872 --- jobs/JGLOBAL_CLEANUP | 17 ++ jobs/rocoto/arch.sh | 1 - jobs/rocoto/cleanup.sh | 19 +++ parm/config/gfs/config.arch | 9 - parm/config/gfs/config.base.emc.dyn | 5 +- parm/config/gfs/config.cleanup | 25 +++ parm/config/gfs/config.resources | 9 +- scripts/exgdas_enkf_earc.sh | 168 ------------------ scripts/exgdas_enkf_fcst.sh | 2 + scripts/exglobal_archive.sh | 189 --------------------- scripts/exglobal_cleanup.sh | 106 ++++++++++++ workflow/applications/gfs_cycled.py | 6 +- workflow/applications/gfs_forecast_only.py | 4 +- workflow/rocoto/gfs_tasks.py | 17 ++ workflow/rocoto/tasks.py | 2 +- 15 files changed, 204 insertions(+), 375 deletions(-) create mode 100755 jobs/JGLOBAL_CLEANUP create mode 100755 jobs/rocoto/cleanup.sh create mode 100644 parm/config/gfs/config.cleanup create mode 100755 scripts/exglobal_cleanup.sh diff --git a/jobs/JGLOBAL_CLEANUP b/jobs/JGLOBAL_CLEANUP new file mode 100755 index 0000000000..ad938ccf60 --- /dev/null +++ b/jobs/JGLOBAL_CLEANUP @@ -0,0 +1,17 @@ +#! /usr/bin/env bash + +source "${HOMEgfs}/ush/preamble.sh" +source "${HOMEgfs}/ush/jjob_header.sh" -e "cleanup" -c "base cleanup" + +"${HOMEgfs}/scripts/exglobal_cleanup.sh" +status=$? +[[ ${status} -ne 0 ]] && exit "${status}" + +########################################## +# Remove the Temporary working directory +########################################## +cd "${DATAROOT}" || (echo "${DATAROOT} does not exist. ABORT!"; exit 1) +[[ ${KEEPDATA} = "NO" ]] && rm -rf "${DATA}" + +exit 0 + diff --git a/jobs/rocoto/arch.sh b/jobs/rocoto/arch.sh index 2f62d8b354..d949b7d76f 100755 --- a/jobs/rocoto/arch.sh +++ b/jobs/rocoto/arch.sh @@ -16,5 +16,4 @@ export jobid="${job}.$$" "${HOMEgfs}"/jobs/JGLOBAL_ARCHIVE status=$? - exit "${status}" diff --git a/jobs/rocoto/cleanup.sh b/jobs/rocoto/cleanup.sh new file mode 100755 index 0000000000..96303fde57 --- /dev/null +++ b/jobs/rocoto/cleanup.sh @@ -0,0 +1,19 @@ +#! /usr/bin/env bash + +source "${HOMEgfs}/ush/preamble.sh" + +############################################################### +# Source FV3GFS workflow modules +. "${HOMEgfs}"/ush/load_fv3gfs_modules.sh +status=$? +[[ ${status} -ne 0 ]] && exit "${status}" + +export job="cleanup" +export jobid="${job}.$$" + +############################################################### +# Execute the JJOB +"${HOMEgfs}"/jobs/JGLOBAL_CLEANUP +status=$? + +exit "${status}" diff --git a/parm/config/gfs/config.arch b/parm/config/gfs/config.arch index 31a3713fb1..a23bcce6ae 100644 --- a/parm/config/gfs/config.arch +++ b/parm/config/gfs/config.arch @@ -12,13 +12,4 @@ export ARCH_GAUSSIAN="YES" export ARCH_GAUSSIAN_FHMAX=${FHMAX_GFS} export ARCH_GAUSSIAN_FHINC=${FHOUT_GFS} -#--online archive of nemsio files for fit2obs verification -export FITSARC="YES" -export FHMAX_FITS=132 -[[ "${FHMAX_FITS}" -gt "${FHMAX_GFS}" ]] && export FHMAX_FITS=${FHMAX_GFS} - -#--starting and ending hours of previous cycles to be removed from rotating directory -export RMOLDSTD=144 -export RMOLDEND=24 - echo "END: config.arch" diff --git a/parm/config/gfs/config.base.emc.dyn b/parm/config/gfs/config.base.emc.dyn index 09d8897a31..b77787794c 100644 --- a/parm/config/gfs/config.base.emc.dyn +++ b/parm/config/gfs/config.base.emc.dyn @@ -394,6 +394,9 @@ export ARCH_CYC=00 # Archive data at this cycle for warm_start capabil export ARCH_WARMICFREQ=4 # Archive frequency in days for warm_start capability export ARCH_FCSTICFREQ=1 # Archive frequency in days for gdas and gfs forecast-only capability -export DELETE_COM_IN_ARCHIVE_JOB="YES" # NO=retain ROTDIR. YES default in arch.sh and earc.sh. +#--online archive of nemsio files for fit2obs verification +export FITSARC="YES" +export FHMAX_FITS=132 +[[ "${FHMAX_FITS}" -gt "${FHMAX_GFS}" ]] && export FHMAX_FITS=${FHMAX_GFS} echo "END: config.base" diff --git a/parm/config/gfs/config.cleanup b/parm/config/gfs/config.cleanup new file mode 100644 index 0000000000..1908c91bb5 --- /dev/null +++ b/parm/config/gfs/config.cleanup @@ -0,0 +1,25 @@ +#! /usr/bin/env bash + +########## config.cleanup ########## +echo "BEGIN: config.cleanup" + +# Get task specific resources +source "${EXPDIR}/config.resources" cleanup + +export CLEANUP_COM="YES" # NO=retain ROTDIR. YES default in cleanup.sh + +#--starting and ending hours of previous cycles to be removed from rotating directory +export RMOLDSTD=144 +export RMOLDEND=24 + +# Specify the list of files to exclude from the first stage of cleanup +# Because arrays cannot be exported, list is a single string of comma- +# separated values. This string is split to form an array at runtime. +case ${RUN} in + gdas | gfs) exclude_string="*prepbufr*, *cnvstat*, *atmanl.nc" ;; + enkf*) exclude_string="*f006.ens*" ;; + *) exclude_string="" ;; +esac +export exclude_string + +echo "END: config.cleanup" \ No newline at end of file diff --git a/parm/config/gfs/config.resources b/parm/config/gfs/config.resources index d6654b61ed..6503ae5523 100644 --- a/parm/config/gfs/config.resources +++ b/parm/config/gfs/config.resources @@ -14,7 +14,7 @@ if [[ $# -ne 1 ]]; then echo "atmensanlinit atmensanlrun atmensanlfinal" echo "landanl" echo "aeroanlinit aeroanlrun aeroanlfinal" - echo "anal sfcanl analcalc analdiag fcst post vrfy fit2obs metp arch echgres" + echo "anal sfcanl analcalc analdiag fcst post vrfy fit2obs metp arch cleanup echgres" echo "eobs ediag eomg eupd ecen esfc efcs epos earc" echo "init_chem mom6ic ocnpost" echo "waveinit waveprep wavepostsbs wavepostbndpnt wavepostbndpntbll wavepostpnt" @@ -773,6 +773,13 @@ elif [[ ${step} = "arch" || ${step} = "earc" || ${step} = "getic" ]]; then eval "export memory_${step}=50GB" fi +elif [[ ${step} == "cleanup" ]]; then + export wtime_cleanup="01:00:00" + export npe_cleanup=1 + export npe_node_cleanup=1 + export nth_cleanup=1 + export memory_cleanup="4096M" + elif [[ ${step} = "stage_ic" ]]; then export wtime_stage_ic="00:15:00" diff --git a/scripts/exgdas_enkf_earc.sh b/scripts/exgdas_enkf_earc.sh index 1bb941f888..a1bcba4d79 100755 --- a/scripts/exgdas_enkf_earc.sh +++ b/scripts/exgdas_enkf_earc.sh @@ -133,172 +133,4 @@ if [ "${ENSGRP}" -eq 0 ]; then "gsistat.${RUN}.${PDY}${cyc}.ensmean" fi - -if [[ "${DELETE_COM_IN_ARCHIVE_JOB:-YES}" == NO ]] ; then - exit 0 -fi - -############################################################### -# ENSGRP 0 also does clean-up -############################################################### -if [[ "${ENSGRP}" -eq 0 ]]; then - function remove_files() { - # TODO: move this to a new location - local directory=$1 - shift - if [[ ! -d ${directory} ]]; then - echo "No directory ${directory} to remove files from, skiping" - return - fi - local exclude_list="" - if (($# > 0)); then - exclude_list=$* - fi - local file_list - declare -a file_list - # Suppress warnings about chained commands suppressing exit codes - # shellcheck disable=SC2312 - readarray -t file_list < <(find -L "${directory}" -type f) - if (( ${#file_list[@]} == 0 )); then return; fi - for exclude in ${exclude_list}; do - echo "Excluding ${exclude}" - declare -a file_list_old=("${file_list[@]}") - # Suppress warnings about chained commands suppressing exit codes - # shellcheck disable=SC2312 - readarray file_list < <(printf -- '%s\n' "${file_list_old[@]}" | grep -v "${exclude}") - if (( ${#file_list[@]} == 0 )); then return; fi - done - - for file in "${file_list[@]}"; do - rm -f "${file}" - done - # Remove directory if empty - rmdir "${directory}" || true - } - - # Start start and end dates to remove - GDATEEND=$(${NDATE} -"${RMOLDEND_ENKF:-24}" "${PDY}${cyc}") - GDATE=$(${NDATE} -"${RMOLDSTD_ENKF:-120}" "${PDY}${cyc}") - - while [ "${GDATE}" -le "${GDATEEND}" ]; do - - gPDY="${GDATE:0:8}" - gcyc="${GDATE:8:2}" - - if [[ -d ${COM_TOP} ]]; then - rocotolog="${EXPDIR}/logs/${GDATE}.log" - if [[ -f "${rocotolog}" ]]; then - set +e - # Suppress warnings about chained commands suppressing exit codes - # shellcheck disable=SC2312 - testend=$(tail -n 1 "${rocotolog}" | grep "This cycle is complete: Success") - rc=$? - set_strict - if [ "${rc}" -eq 0 ]; then - case ${CDUMP} in - gdas) nmem="${NMEM_ENS}";; - gfs) nmem="${NMEM_ENS_GFS}";; - *) - echo "FATAL ERROR: Unknown CDUMP ${CDUMP} during cleanup" - exit 10 - ;; - esac - - readarray memlist< <(seq --format="mem%03g" 1 "${nmem}") - memlist+=("ensstat") - - for mem in "${memlist[@]}"; do - # Atmos - exclude_list="f006.ens" - # Suppress warnings about chained commands suppressing exit codes - # shellcheck disable=SC2312 - templates=$(compgen -A variable | grep 'COM_ATMOS_.*_TMPL') - for template in ${templates}; do - MEMDIR="${mem}" YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" - remove_files "${directory}" "${exclude_list[@]}" - done - - # Wave - exclude_list="" - # Suppress warnings about chained commands suppressing exit codes - # shellcheck disable=SC2312 - templates=$(compgen -A variable | grep 'COM_WAVE_.*_TMPL') - for template in ${templates}; do - MEMDIR="${mem}" YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" - remove_files "${directory}" "${exclude_list[@]}" - done - - # Ocean - exclude_list="" - # Suppress warnings about chained commands suppressing exit codes - # shellcheck disable=SC2312 - templates=$(compgen -A variable | grep 'COM_OCEAN_.*_TMPL') - for template in ${templates}; do - MEMDIR="${mem}" YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" - remove_files "${directory}" "${exclude_list[@]}" - done - - # Ice - exclude_list="" - # Suppress warnings about chained commands suppressing exit codes - # shellcheck disable=SC2312 - templates=$(compgen -A variable | grep 'COM_ICE_.*_TMPL') - for template in ${templates}; do - MEMDIR="${mem}" YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" - remove_files "${directory}" "${exclude_list[@]}" - done - - # Aerosols (GOCART) - exclude_list="" - # Suppress warnings about chained commands suppressing exit codes - # shellcheck disable=SC2312 - templates=$(compgen -A variable | grep 'COM_CHEM_.*_TMPL') - for template in ${templates}; do - MEMDIR="${mem}" YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" - remove_files "${directory}" "${exclude_list[@]}" - done - - # Mediator - exclude_list="" - # Suppress warnings about chained commands suppressing exit codes - # shellcheck disable=SC2312 - templates=$(compgen -A variable | grep 'COM_MED_.*_TMPL') - for template in ${templates}; do - MEMDIR="${mem}" YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" - remove_files "${directory}" "${exclude_list[@]}" - done - done - fi - fi - fi - - # Remove any empty directories - YMD=${gPDY} HH=${gcyc} generate_com target_dir:COM_TOP_TMPL - target_dir="${ROTDIR:?}/${RUN}.${gPDY}/${gcyc}/" - if [[ -d ${target_dir} ]]; then - find "${target_dir}" -empty -type d -delete - fi - - # Advance to next cycle - GDATE=$(${NDATE} +"${assim_freq}" "${GDATE}") - done -fi - -# Remove enkf*.$rPDY for the older of GDATE or RDATE -GDATE=$(${NDATE} -"${RMOLDSTD_ENKF:-120}" "${PDY}${cyc}") -fhmax=${FHMAX_GFS} -RDATE=$(${NDATE} -"${fhmax}" "${PDY}${cyc}") -if [ "${GDATE}" -lt "${RDATE}" ]; then - RDATE=${GDATE} -fi -rPDY=$(echo "${RDATE}" | cut -c1-8) -clist="enkfgdas enkfgfs" -for ctype in ${clist}; do - COMIN="${ROTDIR}/${ctype}.${rPDY}" - [[ -d ${COMIN} ]] && rm -rf "${COMIN}" -done - -############################################################### - - exit 0 diff --git a/scripts/exgdas_enkf_fcst.sh b/scripts/exgdas_enkf_fcst.sh index 85344e4e35..7eb2a3a711 100755 --- a/scripts/exgdas_enkf_fcst.sh +++ b/scripts/exgdas_enkf_fcst.sh @@ -122,7 +122,9 @@ for imem in $(seq "${ENSBEG}" "${ENSEND}"); do skip_mem="NO" if [[ -f ${EFCSGRP}.fail ]]; then + set +e memstat=$(grep "MEMBER ${ENSMEM}" "${EFCSGRP}.fail" | grep -c "PASS") + set_strict [[ ${memstat} -eq 1 ]] && skip_mem="YES" fi diff --git a/scripts/exglobal_archive.sh b/scripts/exglobal_archive.sh index 5fea07f4ed..78a6d60b65 100755 --- a/scripts/exglobal_archive.sh +++ b/scripts/exglobal_archive.sh @@ -284,193 +284,4 @@ if [[ ${HPSSARCH} = "YES" || ${LOCALARCH} = "YES" ]]; then fi ##end of HPSS archive ############################################################### - - -############################################################### -# Clean up previous cycles; various depths -# PRIOR CYCLE: Leave the prior cycle alone -GDATE=$(${NDATE} -"${assim_freq}" "${PDY}${cyc}") - -# PREVIOUS to the PRIOR CYCLE -GDATE=$(${NDATE} -"${assim_freq}" "${GDATE}") -gPDY="${GDATE:0:8}" -gcyc="${GDATE:8:2}" - -# Remove the TMPDIR directory -# TODO Only prepbufr is currently using this directory, and all jobs should be -# cleaning up after themselves anyway -COMIN="${DATAROOT}/${GDATE}" -[[ -d ${COMIN} ]] && rm -rf "${COMIN}" - -if [[ "${DELETE_COM_IN_ARCHIVE_JOB:-YES}" == NO ]] ; then - exit 0 -fi - -# Step back every assim_freq hours and remove old rotating directories -# for successful cycles (defaults from 24h to 120h). -# Retain files needed by Fit2Obs -# TODO: This whole section needs to be revamped to remove marine component -# directories and not look at the rocoto log. -GDATEEND=$(${NDATE} -"${RMOLDEND:-24}" "${PDY}${cyc}") -GDATE=$(${NDATE} -"${RMOLDSTD:-120}" "${PDY}${cyc}") -RTOFS_DATE=$(${NDATE} -48 "${PDY}${cyc}") -function remove_files() { - # TODO: move this to a new location - local directory=$1 - shift - if [[ ! -d ${directory} ]]; then - echo "No directory ${directory} to remove files from, skiping" - return - fi - local exclude_list="" - if (($# > 0)); then - exclude_list=$* - fi - local file_list - declare -a file_list - readarray -t file_list < <(find -L "${directory}" -type f) - if (( ${#file_list[@]} == 0 )); then return; fi - # echo "Number of files to remove before exclusions: ${#file_list[@]}" - for exclude in ${exclude_list}; do - echo "Excluding ${exclude}" - declare -a file_list_old=("${file_list[@]}") - readarray file_list < <(printf -- '%s\n' "${file_list_old[@]}" | grep -v "${exclude}") - # echo "Number of files to remove after exclusion: ${#file_list[@]}" - if (( ${#file_list[@]} == 0 )); then return; fi - done - # echo "Number of files to remove after exclusions: ${#file_list[@]}" - - for file in "${file_list[@]}"; do - rm -f "${file}" - done - # Remove directory if empty - rmdir "${directory}" || true -} - -while [ "${GDATE}" -le "${GDATEEND}" ]; do - gPDY="${GDATE:0:8}" - gcyc="${GDATE:8:2}" - COMINrtofs="${ROTDIR}/rtofs.${gPDY}" - if [ -d "${COM_TOP}" ]; then - rocotolog="${EXPDIR}/logs/${GDATE}.log" - if [ -f "${rocotolog}" ]; then - set +e - testend=$(tail -n 1 "${rocotolog}" | grep "This cycle is complete: Success") - rc=$? - set_strict - - if [ "${rc}" -eq 0 ]; then - # Obs - exclude_list="prepbufr" - templates="COM_OBS" - for template in ${templates}; do - YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" - remove_files "${directory}" "${exclude_list[@]}" - done - - # Atmos - exclude_list="cnvstat atmanl.nc" - templates=$(compgen -A variable | grep 'COM_ATMOS_.*_TMPL') - for template in ${templates}; do - YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" - remove_files "${directory}" "${exclude_list[@]}" - done - - # Wave - exclude_list="" - templates=$(compgen -A variable | grep 'COM_WAVE_.*_TMPL') - for template in ${templates}; do - YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" - remove_files "${directory}" "${exclude_list[@]}" - done - - # Ocean - exclude_list="" - templates=$(compgen -A variable | grep 'COM_OCEAN_.*_TMPL') - for template in ${templates}; do - YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" - remove_files "${directory}" "${exclude_list[@]}" - done - - # Ice - exclude_list="" - templates=$(compgen -A variable | grep 'COM_ICE_.*_TMPL') - for template in ${templates}; do - YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" - remove_files "${directory}" "${exclude_list[@]}" - done - - # Aerosols (GOCART) - exclude_list="" - templates=$(compgen -A variable | grep 'COM_CHEM_.*_TMPL') - for template in ${templates}; do - YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" - remove_files "${directory}" "${exclude_list[@]}" - done - - # Mediator - exclude_list="" - templates=$(compgen -A variable | grep 'COM_MED_.*_TMPL') - for template in ${templates}; do - YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" - remove_files "${directory}" "${exclude_list[@]}" - done - - if [ -d "${COMINrtofs}" ] && [ "${GDATE}" -lt "${RTOFS_DATE}" ]; then rm -rf "${COMINrtofs}" ; fi - fi - fi - fi - - # Remove mdl gfsmos directory - if [ "${RUN}" = "gfs" ]; then - COMIN="${ROTDIR}/gfsmos.${gPDY}" - if [ -d "${COMIN}" ] && [ "${GDATE}" -lt "${CDATE_MOS}" ]; then rm -rf "${COMIN}" ; fi - fi - - # Remove any empty directories - target_dir="${ROTDIR:?}/${RUN}.${gPDY}/${gcyc}/" - if [[ -d ${target_dir} ]]; then - find "${target_dir}" -empty -type d -delete - fi - - GDATE=$(${NDATE} +"${assim_freq}" "${GDATE}") -done - -# Remove archived gaussian files used for Fit2Obs in $VFYARC that are -# $FHMAX_FITS plus a delta before $CDATE. Touch existing archived -# gaussian files to prevent the files from being removed by automatic -# scrubber present on some machines. - -if [ "${RUN}" = "gfs" ]; then - fhmax=$((FHMAX_FITS+36)) - RDATE=$(${NDATE} -"${fhmax}" "${PDY}${cyc}") - rPDY=$(echo "${RDATE}" | cut -c1-8) - COMIN="${VFYARC}/${RUN}.${rPDY}" - [[ -d ${COMIN} ]] && rm -rf "${COMIN}" - - TDATE=$(${NDATE} -"${FHMAX_FITS}" "${PDY}${cyc}") - while [ "${TDATE}" -lt "${PDY}${cyc}" ]; do - tPDY=$(echo "${TDATE}" | cut -c1-8) - tcyc=$(echo "${TDATE}" | cut -c9-10) - TDIR=${VFYARC}/${RUN}.${tPDY}/${tcyc} - [[ -d ${TDIR} ]] && touch "${TDIR}"/* - TDATE=$(${NDATE} +6 "${TDATE}") - done -fi - -# Remove $RUN.$rPDY for the older of GDATE or RDATE -GDATE=$(${NDATE} -"${RMOLDSTD:-120}" "${PDY}${cyc}") -fhmax=${FHMAX_GFS} -RDATE=$(${NDATE} -"${fhmax}" "${PDY}${cyc}") -if [ "${GDATE}" -lt "${RDATE}" ]; then - RDATE=${GDATE} -fi -rPDY=$(echo "${RDATE}" | cut -c1-8) -COMIN="${ROTDIR}/${RUN}.${rPDY}" -[[ -d ${COMIN} ]] && rm -rf "${COMIN}" - - -############################################################### - - exit 0 diff --git a/scripts/exglobal_cleanup.sh b/scripts/exglobal_cleanup.sh new file mode 100755 index 0000000000..5d7c0a9788 --- /dev/null +++ b/scripts/exglobal_cleanup.sh @@ -0,0 +1,106 @@ +#! /usr/bin/env bash + +source "${HOMEgfs}/ush/preamble.sh" + +############################################################### +# Clean up previous cycles; various depths +# PRIOR CYCLE: Leave the prior cycle alone +# shellcheck disable=SC2153 +GDATE=$(date --utc +%Y%m%d%H -d "${PDY} ${cyc} -${assim_freq} hours") +# PREVIOUS to the PRIOR CYCLE +GDATE=$(date --utc +%Y%m%d%H -d "${GDATE:0:8} ${GDATE:8:2} -${assim_freq} hours") + +# Remove the TMPDIR directory +# TODO Only prepbufr is currently using this directory, and all jobs should be +# cleaning up after themselves anyway +COMIN="${DATAROOT}/${GDATE}" +[[ -d ${COMIN} ]] && rm -rf "${COMIN}" + +if [[ "${CLEANUP_COM:-YES}" == NO ]] ; then + exit 0 +fi + +# Step back every assim_freq hours and remove old rotating directories +# for successful cycles (defaults from 24h to 120h). +# Retain files needed by Fit2Obs +last_date=$(date --utc +%Y%m%d%H -d "${PDY} ${cyc} -${RMOLDEND:-24} hours" ) +first_date=$(date --utc +%Y%m%d%H -d "${PDY} ${cyc} -${RMOLDSTD:-120} hours") +last_rtofs=$(date --utc +%Y%m%d%H -d "${PDY} ${cyc} -${RMOLDRTOFS:-48} hours") +function remove_files() { + local directory=$1 + shift + if [[ ! -d ${directory} ]]; then + echo "No directory ${directory} to remove files from, skiping" + return + fi + local find_exclude_string="" + for exclude in "$@"; do + find_exclude_string+="${find_exclude_string} -name ${exclude} -or" + done + # Chop off any trailing or + find_exclude_string="${find_exclude_string[*]/%-or}" + # Remove all regular files that do not match + # shellcheck disable=SC2086 + find "${directory}" -type f -not \( ${find_exclude_string} \) -delete + # Remove all symlinks that do not match + # shellcheck disable=SC2086 + find "${directory}" -type l -not \( ${find_exclude_string} \) -delete + # Remove any empty directories + find "${directory}" -type d -empty -delete +} + +for (( current_date=first_date; current_date <= last_date; \ + current_date=$(date --utc +%Y%m%d%H -d "${current_date:0:8} ${current_date:8:2} +${assim_freq} hours") )); do + current_PDY="${current_date:0:8}" + current_cyc="${current_date:8:2}" + rtofs_dir="${ROTDIR}/rtofs.${current_PDY}" + rocotolog="${EXPDIR}/logs/${current_date}.log" + if [[ -f "${rocotolog}" ]]; then + # TODO: This needs to be revamped to not look at the rocoto log. + # shellcheck disable=SC2312 + if [[ $(tail -n 1 "${rocotolog}") =~ "This cycle is complete: Success" ]]; then + YMD="${current_PDY}" HH="${current_cyc}" generate_com COM_TOP + if [[ -d "${COM_TOP}" ]]; then + IFS=", " read -r -a exclude_list <<< "${exclude_string:-}" + remove_files "${COM_TOP}" "${exclude_list[@]:-}" + fi + if [[ -d "${rtofs_dir}" ]] && (( current_date < last_rtofs )); then rm -rf "${rtofs_dir}" ; fi + fi + fi + + # Remove mdl gfsmos directory + if [[ "${RUN}" == "gfs" ]]; then + mos_dir="${ROTDIR}/gfsmos.${current_PDY}" + if [[ -d "${mos_dir}" ]] && (( current_date < CDATE_MOS )); then rm -rf "${mos_dir}" ; fi + fi +done + +# Remove archived gaussian files used for Fit2Obs in $VFYARC that are +# $FHMAX_FITS plus a delta before $CDATE. Touch existing archived +# gaussian files to prevent the files from being removed by automatic +# scrubber present on some machines. + +if [[ "${RUN}" == "gfs" ]]; then + fhmax=$((FHMAX_FITS + 36)) + RDATE=$(date --utc +%Y%m%d%H -d "${PDY} ${cyc} -${fhmax} hours") + verify_dir="${ROTDIR}/vrfyarch/${RUN}.${RDATE:0:8}" + [[ -d ${verify_dir} ]] && rm -rf "${verify_dir}" + + touch_date=$(date --utc +%Y%m%d%H -d "${PDY} ${cyc} -${FHMAX_FITS} hours") + while (( touch_date < "${PDY}${cyc}" )); do + touch_PDY="${touch_date:0:8}" + touch_cyc="${touch_date:8:2}" + touch_dir="${ROTDIR}/vrfyarch/${RUN}.${touch_PDY}/${touch_cyc}" + [[ -d ${touch_dir} ]] && touch "${touch_dir}"/* + touch_date=$(date --utc +%Y%m%d%H -d "${touch_PDY} ${touch_cyc} +6 hours") + done +fi + +# Remove $RUN.$rPDY for the older of GDATE or RDATE +GDATE=$(date --utc +%Y%m%d%H -d "${PDY} ${cyc} -${RMOLDSTD:-120} hours") +RDATE=$(date --utc +%Y%m%d%H -d "${PDY} ${cyc} -${FHMAX_GFS} hours") +if (( GDATE < RDATE )); then + RDATE=${GDATE} +fi +deletion_target="${ROTDIR}/${RUN}.${RDATE:0:8}" +if [[ -d ${deletion_target} ]]; then rm -rf "${deletion_target}"; fi diff --git a/workflow/applications/gfs_cycled.py b/workflow/applications/gfs_cycled.py index 633e93bac0..6eff929d5f 100644 --- a/workflow/applications/gfs_cycled.py +++ b/workflow/applications/gfs_cycled.py @@ -47,7 +47,7 @@ def _get_app_configs(self): if self.do_ocean: configs += ['ocnpost'] - configs += ['sfcanl', 'analcalc', 'fcst', 'post', 'vrfy', 'fit2obs', 'arch'] + configs += ['sfcanl', 'analcalc', 'fcst', 'post', 'vrfy', 'fit2obs', 'arch', 'cleanup'] if self.do_hybvar: if self.do_jediatmens: @@ -106,7 +106,7 @@ def get_task_names(self): # gdas_gfs_common_tasks_after_fcst += ['ocnpost'] gdas_gfs_common_tasks_after_fcst += ['vrfy'] - gdas_gfs_common_cleanup_tasks = ['arch'] + gdas_gfs_common_cleanup_tasks = ['arch', 'cleanup'] if self.do_jediatmvar: gdas_gfs_common_tasks_before_fcst += ['prepatmiodaobs', 'atmanlinit', 'atmanlrun', 'atmanlfinal'] @@ -137,7 +137,7 @@ def get_task_names(self): else: hybrid_tasks += ['eobs', 'eupd', 'echgres'] hybrid_tasks += ['ediag'] if self.lobsdiag_forenkf else ['eomg'] - hybrid_after_eupd_tasks += ['ecen', 'esfc', 'efcs', 'epos', 'earc'] + hybrid_after_eupd_tasks += ['ecen', 'esfc', 'efcs', 'epos', 'earc', 'cleanup'] # Collect all "gdas" cycle tasks gdas_tasks = gdas_gfs_common_tasks_before_fcst.copy() diff --git a/workflow/applications/gfs_forecast_only.py b/workflow/applications/gfs_forecast_only.py index e6d1ab35a2..73e17ee7aa 100644 --- a/workflow/applications/gfs_forecast_only.py +++ b/workflow/applications/gfs_forecast_only.py @@ -15,7 +15,7 @@ def _get_app_configs(self): Returns the config_files that are involved in the forecast-only app """ - configs = ['stage_ic', 'fcst', 'arch'] + configs = ['stage_ic', 'fcst', 'arch', 'cleanup'] if self.do_atm: configs += ['post', 'vrfy'] @@ -109,6 +109,6 @@ def get_task_names(self): if self.do_wafs: tasks += ['wafs', 'wafsgcip', 'wafsgrib2', 'wafsgrib20p25', 'wafsblending', 'wafsblending0p25'] - tasks += ['arch'] # arch **must** be the last task + tasks += ['arch', 'cleanup'] # arch and cleanup **must** be the last tasks return {f"{self._base['CDUMP']}": tasks} diff --git a/workflow/rocoto/gfs_tasks.py b/workflow/rocoto/gfs_tasks.py index 33e2ec82f3..e41e4ebcc1 100644 --- a/workflow/rocoto/gfs_tasks.py +++ b/workflow/rocoto/gfs_tasks.py @@ -944,6 +944,23 @@ def arch(self): return task + # Cleanup + def cleanup(self): + deps = [] + if 'enkf' in self.cdump: + dep_dict = {'type': 'metatask', 'name': 'enkfgdaseamn'} + deps.append(rocoto.add_dependency(dep_dict)) + else: + dep_dict = {'type': 'task', 'name': f'{self.cdump}arch'} + deps.append(rocoto.add_dependency(dep_dict)) + + dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) + + resources = self.get_resource('cleanup') + task = create_wf_task('cleanup', resources, cdump=self.cdump, envar=self.envars, dependency=dependencies) + + return task + # Start of ensemble tasks def eobs(self): deps = [] diff --git a/workflow/rocoto/tasks.py b/workflow/rocoto/tasks.py index b9716c938e..29ed57daf2 100644 --- a/workflow/rocoto/tasks.py +++ b/workflow/rocoto/tasks.py @@ -11,7 +11,7 @@ class Tasks: SERVICE_TASKS = ['arch', 'earc'] VALID_TASKS = ['aerosol_init', 'stage_ic', - 'prep', 'anal', 'sfcanl', 'analcalc', 'analdiag', 'arch', + 'prep', 'anal', 'sfcanl', 'analcalc', 'analdiag', 'arch', "cleanup", 'prepatmiodaobs', 'atmanlinit', 'atmanlrun', 'atmanlfinal', 'ocnanalprep', 'ocnanalbmat', 'ocnanalrun', 'ocnanalchkpt', 'ocnanalpost', 'ocnanalvrfy', 'earc', 'ecen', 'echgres', 'ediag', 'efcs', From 08ce4f8d3ed8e07b4d488a80b5054c6206b04404 Mon Sep 17 00:00:00 2001 From: Walter Kolczynski - NOAA Date: Fri, 20 Oct 2023 16:14:53 +0000 Subject: [PATCH 2/9] Fix enkfgfs cleanup dependency (#1941) When #1906 was merged, the dependency for enkf cycles was hard-coded to use the enkfgdas archive instead of depending on the `RUN`. --- workflow/rocoto/gfs_tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflow/rocoto/gfs_tasks.py b/workflow/rocoto/gfs_tasks.py index e41e4ebcc1..56449cb9d5 100644 --- a/workflow/rocoto/gfs_tasks.py +++ b/workflow/rocoto/gfs_tasks.py @@ -948,7 +948,7 @@ def arch(self): def cleanup(self): deps = [] if 'enkf' in self.cdump: - dep_dict = {'type': 'metatask', 'name': 'enkfgdaseamn'} + dep_dict = {'type': 'metatask', 'name': f'{self.cdump}eamn'} deps.append(rocoto.add_dependency(dep_dict)) else: dep_dict = {'type': 'task', 'name': f'{self.cdump}arch'} From 4b5cd0bc435fc158258ca38c3d5f44add6b60469 Mon Sep 17 00:00:00 2001 From: Rahul Mahajan Date: Mon, 23 Oct 2023 12:04:26 -0400 Subject: [PATCH 3/9] Fix nth_eupd in gfs/config.resources. remove sections of jobs not run as part of gefs from gefs/config.resources (#1952) --- parm/config/gefs/config.resources | 407 +----------------------------- parm/config/gfs/config.resources | 4 +- 2 files changed, 3 insertions(+), 408 deletions(-) diff --git a/parm/config/gefs/config.resources b/parm/config/gefs/config.resources index 91699a9886..33156a768a 100644 --- a/parm/config/gefs/config.resources +++ b/parm/config/gefs/config.resources @@ -9,17 +9,12 @@ if [[ $# -ne 1 ]]; then echo "Must specify an input task argument to set resource variables!" echo "argument can be any one of the following:" echo "coupled_ic aerosol_init" - echo "atmanlinit atmanlrun atmanlfinal" - echo "atmensanlinit atmensanlrun atmensanlfinal" - echo "landanlprep landanlinit landanlrun landanlfinal" - echo "aeroanlinit aeroanlrun aeroanlfinal" - echo "anal sfcanl analcalc analdiag fcst post vrfy fit2obs metp arch echgres" - echo "eobs ediag eomg eupd ecen esfc efcs epos earc" + echo "sfcanl analcalc analdiag fcst post vrfy fit2obs metp arch echgres" + echo "ecen esfc efcs epos earc" echo "init_chem mom6ic ocnpost" echo "waveinit waveprep wavepostsbs wavepostbndpnt wavepostbndpntbll wavepostpnt" echo "wavegempak waveawipsbulls waveawipsgridded" echo "postsnd awips gempak" - echo "ocnanalprep ocnanalbmat ocnanalrun ocnanalchkpt ocnanalpost ocnanalvrfy" exit 1 fi @@ -168,303 +163,6 @@ elif [[ ${step} = "waveawipsgridded" ]]; then export NTASKS=${npe_waveawipsgridded} export memory_waveawipsgridded_gfs="1GB" -elif [[ "${step}" = "atmanlinit" ]]; then - - export wtime_atmanlinit="00:10:00" - export npe_atmanlinit=1 - export nth_atmanlinit=1 - npe_node_atmanlinit=$(echo "${npe_node_max} / ${nth_atmanlinit}" | bc) - export npe_node_atmanlinit - export memory_atmanlinit="3072M" - -elif [[ "${step}" = "atmanlrun" ]]; then - - # make below case dependent later - export layout_x=1 - export layout_y=1 - - export wtime_atmanlrun="00:30:00" - npe_atmanlrun=$(echo "${layout_x} * ${layout_y} * 6" | bc) - export npe_atmanlrun - npe_atmanlrun_gfs=$(echo "${layout_x} * ${layout_y} * 6" | bc) - export npe_atmanlrun_gfs - export nth_atmanlrun=1 - export nth_atmanlrun_gfs=${nth_atmanlrun} - npe_node_atmanlrun=$(echo "${npe_node_max} / ${nth_atmanlrun}" | bc) - export npe_node_atmanlrun - export is_exclusive=True - -elif [[ "${step}" = "atmanlfinal" ]]; then - - export wtime_atmanlfinal="00:30:00" - export npe_atmanlfinal=${npe_node_max} - export nth_atmanlfinal=1 - npe_node_atmanlfinal=$(echo "${npe_node_max} / ${nth_atmanlfinal}" | bc) - export npe_node_atmanlfinal - export is_exclusive=True - -elif [[ "${step}" = "landanlprep" || "${step}" = "landanlinit" || "${step}" = "landanlrun" || "${step}" = "landanlfinal" ]]; then - # below lines are for creating JEDI YAML - case ${CASE} in - C768) - layout_x=6 - layout_y=6 - ;; - C384) - layout_x=5 - layout_y=5 - ;; - C192 | C96 | C48) - layout_x=1 - layout_y=1 - ;; - *) - echo "FATAL ERROR: Resolution not supported for land analysis'" - exit 1 - esac - - export layout_x - export layout_y - - if [[ "${step}" = "landanlinit" || "${step}" = "landanlfinal" ]]; then - declare -x "wtime_${step}"="00:10:00" - declare -x "npe_${step}"=1 - declare -x "nth_${step}"=1 - temp_stepname="nth_${step}" - declare -x "npe_node_${step}"="$(echo "${npe_node_max} / ${!temp_stepname}" | bc)" - declare -x "memory_${step}"="3072M" - elif [[ "${step}" = "landanlrun" ]]; then - export wtime_landanlrun="00:30:00" - npe_landanlrun=$(echo "${layout_x} * ${layout_y} * 6" | bc) - export npe_landanlrun - export nth_landanlrun=1 - npe_node_landanlrun=$(echo "${npe_node_max} / ${nth_landanlrun}" | bc) - export npe_node_landanlrun - export is_exclusive=True - elif [[ "${step}" = "landanlprep" ]]; then - export wtime_landanlprep="00:30:00" - npe_landanlprep=$(echo "${layout_x} * ${layout_y} * 6" | bc) - export npe_landanlprep - export nth_landanlprep=1 - npe_node_landanlprep=$(echo "${npe_node_max} / ${nth_landanlprep}" | bc) - export npe_node_landanlprep - export is_exclusive=True - fi - -elif [[ "${step}" = "aeroanlinit" ]]; then - - # below lines are for creating JEDI YAML - case ${CASE} in - C768) - layout_x=6 - layout_y=6 - ;; - C384) - layout_x=5 - layout_y=5 - ;; - C192 | C96 | C48) - layout_x=8 - layout_y=8 - ;; - *) - echo "FATAL ERROR: Resolution not supported for aerosol analysis'" - exit 1 - esac - - export layout_x - export layout_y - - export wtime_aeroanlinit="00:10:00" - export npe_aeroanlinit=1 - export nth_aeroanlinit=1 - npe_node_aeroanlinit=$(echo "${npe_node_max} / ${nth_aeroanlinit}" | bc) - export npe_node_aeroanlinit - export memory_aeroanlinit="3072M" - -elif [[ "${step}" = "aeroanlrun" ]]; then - - case ${CASE} in - C768) - layout_x=6 - layout_y=6 - ;; - C384) - layout_x=5 - layout_y=5 - ;; - C192 | C96 | C48) - layout_x=8 - layout_y=8 - ;; - *) - echo "FATAL ERROR: Resolution ${CASE} is not supported, ABORT!" - exit 1 - esac - - export layout_x - export layout_y - - export wtime_aeroanlrun="00:30:00" - npe_aeroanlrun=$(echo "${layout_x} * ${layout_y} * 6" | bc) - export npe_aeroanlrun - npe_aeroanlrun_gfs=$(echo "${layout_x} * ${layout_y} * 6" | bc) - export npe_aeroanlrun_gfs - export nth_aeroanlrun=1 - export nth_aeroanlrun_gfs=1 - npe_node_aeroanlrun=$(echo "${npe_node_max} / ${nth_aeroanlrun}" | bc) - export npe_node_aeroanlrun - export is_exclusive=True - -elif [[ "${step}" = "aeroanlfinal" ]]; then - - export wtime_aeroanlfinal="00:10:00" - export npe_aeroanlfinal=1 - export nth_aeroanlfinal=1 - npe_node_aeroanlfinal=$(echo "${npe_node_max} / ${nth_aeroanlfinal}" | bc) - export npe_node_aeroanlfinal - export memory_aeroanlfinal="3072M" - -elif [[ "${step}" = "ocnanalprep" ]]; then - - export wtime_ocnanalprep="00:10:00" - export npe_ocnanalprep=1 - export nth_ocnanalprep=1 - npe_node_ocnanalprep=$(echo "${npe_node_max} / ${nth_ocnanalprep}" | bc) - export npe_node_ocnanalprep - export memory_ocnanalprep="24GB" - -elif [[ "${step}" = "ocnanalbmat" ]]; then - npes=16 - case ${CASE} in - C384) - npes=480 - ;; - C48) - npes=16 - ;; - *) - echo "FATAL: Resolution not supported'" - exit 1 - esac - - export wtime_ocnanalbmat="00:30:00" - export npe_ocnanalbmat=${npes} - export nth_ocnanalbmat=1 - export is_exclusive=True - npe_node_ocnanalbmat=$(echo "${npe_node_max} / ${nth_ocnanalbmat}" | bc) - export npe_node_ocnanalbmat - -elif [[ "${step}" = "ocnanalrun" ]]; then - npes=16 - case ${CASE} in - C384) - npes=480 - ;; - C48) - npes=16 - ;; - *) - echo "FATAL: Resolution not supported'" - exit 1 - esac - - export wtime_ocnanalrun="00:30:00" - export npe_ocnanalrun=${npes} - export nth_ocnanalrun=1 - export is_exclusive=True - npe_node_ocnanalrun=$(echo "${npe_node_max} / ${nth_ocnanalrun}" | bc) - export npe_node_ocnanalrun - -elif [[ "${step}" = "ocnanalchkpt" ]]; then - - export wtime_ocnanalchkpt="00:10:00" - export npe_ocnanalchkpt=1 - export nth_ocnanalchkpt=1 - npe_node_ocnanalchkpt=$(echo "${npe_node_max} / ${nth_ocnanalchkpt}" | bc) - export npe_node_ocnanalchkpt - case ${CASE} in - C384) - export memory_ocnanalchkpt="128GB" - ;; - C48) - export memory_ocnanalchkpt="32GB" - ;; - *) - echo "FATAL: Resolution not supported'" - exit 1 - esac - -elif [[ "${step}" = "ocnanalpost" ]]; then - - export wtime_ocnanalpost="00:30:00" - export npe_ocnanalpost=${npe_node_max} - export nth_ocnanalpost=1 - npe_node_ocnanalpost=$(echo "${npe_node_max} / ${nth_ocnanalpost}" | bc) - export npe_node_ocnanalpost - -elif [[ "${step}" = "ocnanalvrfy" ]]; then - - export wtime_ocnanalvrfy="00:35:00" - export npe_ocnanalvrfy=1 - export nth_ocnanalvrfy=1 - npe_node_ocnanalvrfy=$(echo "${npe_node_max} / ${nth_ocnanalvrfy}" | bc) - export npe_node_ocnanalvrfy - export memory_ocnanalvrfy="24GB" - -elif [[ ${step} = "anal" ]]; then - - export wtime_anal="00:50:00" - export wtime_anal_gfs="00:40:00" - export npe_anal=780 - export nth_anal=5 - export npe_anal_gfs=825 - export nth_anal_gfs=5 - if [[ "${machine}" = "WCOSS2" ]]; then - export nth_anal=8 - export nth_anal_gfs=8 - fi - if [[ ${CASE} = "C384" ]]; then - export npe_anal=160 - export npe_anal_gfs=160 - export nth_anal=10 - export nth_anal_gfs=10 - if [[ ${machine} = "S4" ]]; then - #On the S4-s4 partition, this is accomplished by increasing the task - #count to a multiple of 32 - if [[ ${PARTITION_BATCH} = "s4" ]]; then - export npe_anal=416 - export npe_anal_gfs=416 - fi - #S4 is small, so run this task with just 1 thread - export nth_anal=1 - export nth_anal_gfs=1 - export wtime_anal="02:00:00" - fi - fi - if [[ ${CASE} = "C192" || ${CASE} = "C96" || ${CASE} = "C48" ]]; then - export npe_anal=84 - export npe_anal_gfs=84 - if [[ ${machine} = "S4" ]]; then - export nth_anal=4 - export nth_anal_gfs=4 - #Adjust job count for S4 - if [[ ${PARTITION_BATCH} = "s4" ]]; then - export npe_anal=88 - export npe_anal_gfs=88 - elif [[ ${PARTITION_BATCH} = "ivy" ]]; then - export npe_anal=90 - export npe_anal_gfs=90 - fi - fi - fi - npe_node_anal=$(echo "${npe_node_max} / ${nth_anal}" | bc) - export npe_node_anal - export nth_cycle=${nth_anal} - npe_node_cycle=$(echo "${npe_node_max} / ${nth_cycle}" | bc) - export npe_node_cycle - export is_exclusive=True - elif [[ ${step} = "analcalc" ]]; then export wtime_analcalc="00:10:00" @@ -726,107 +424,6 @@ elif [[ ${step} = "coupled_ic" ]]; then export nth_coupled_ic=1 export is_exclusive=True -elif [[ "${step}" = "atmensanlinit" ]]; then - - export wtime_atmensanlinit="00:10:00" - export npe_atmensanlinit=1 - export nth_atmensanlinit=1 - npe_node_atmensanlinit=$(echo "${npe_node_max} / ${nth_atmensanlinit}" | bc) - export npe_node_atmensanlinit - export memory_atmensanlinit="3072M" - -elif [[ "${step}" = "atmensanlrun" ]]; then - - # make below case dependent later - export layout_x=1 - export layout_y=1 - - export wtime_atmensanlrun="00:30:00" - npe_atmensanlrun=$(echo "${layout_x} * ${layout_y} * 6" | bc) - export npe_atmensanlrun - npe_atmensanlrun_gfs=$(echo "${layout_x} * ${layout_y} * 6" | bc) - export npe_atmensanlrun_gfs - export nth_atmensanlrun=1 - export nth_atmensanlrun_gfs=${nth_atmensanlrun} - npe_node_atmensanlrun=$(echo "${npe_node_max} / ${nth_atmensanlrun}" | bc) - export npe_node_atmensanlrun - export is_exclusive=True - -elif [[ "${step}" = "atmensanlfinal" ]]; then - - export wtime_atmensanlfinal="00:30:00" - export npe_atmensanlfinal=${npe_node_max} - export nth_atmensanlfinal=1 - npe_node_atmensanlfinal=$(echo "${npe_node_max} / ${nth_atmensanlfinal}" | bc) - export npe_node_atmensanlfinal - export is_exclusive=True - -elif [[ ${step} = "eobs" || ${step} = "eomg" ]]; then - - export wtime_eobs="00:15:00" - export wtime_eomg="01:00:00" - if [[ ${CASE} = "C768" ]]; then - export npe_eobs=200 - elif [[ ${CASE} = "C384" ]]; then - export npe_eobs=100 - elif [[ ${CASE} = "C192" || ${CASE} = "C96" || ${CASE} = "C48" ]]; then - export npe_eobs=40 - fi - export npe_eomg=${npe_eobs} - export nth_eobs=2 - export nth_eomg=${nth_eobs} - npe_node_eobs=$(echo "${npe_node_max} / ${nth_eobs}" | bc) - export npe_node_eobs - export npe_node_eomg=${npe_node_eobs} - export is_exclusive=True - #The number of tasks and cores used must be the same for eobs - #For S4, this is accomplished by running 10 tasks/node - if [[ ${machine} = "S4" ]]; then - export npe_node_eobs=10 - fi - -elif [[ ${step} = "ediag" ]]; then - - export wtime_ediag="00:15:00" - export npe_ediag=48 - export nth_ediag=1 - npe_node_ediag=$(echo "${npe_node_max} / ${nth_ediag}" | bc) - export npe_node_ediag - export memory_ediag="30GB" - -elif [[ ${step} = "eupd" ]]; then - - export wtime_eupd="00:30:00" - if [[ ${CASE} = "C768" ]]; then - export npe_eupd=480 - export nth_eupd=6 - if [[ "${machine}" = "WCOSS2" ]]; then - export npe_eupd=315 - export nth_eupd=14 - fi - elif [[ ${CASE} = "C384" ]]; then - export npe_eupd=270 - export nth_eupd=8 - if [[ "${machine}" = "WCOSS2" ]]; then - export npe_eupd=315 - export nth_eupd=14 - elif [[ "${machine}" = "HERA" || "${machine}" = "JET" ]]; then - export nth_eupd=8 - elif [[ ${machine} = "S4" ]]; then - export npe_eupd=160 - export nth_eupd=2 - fi - elif [[ ${CASE} = "C192" || ${CASE} = "C96" || ${CASE} = "C48" ]]; then - export npe_eupd=42 - export nth_eupd=2 - if [[ "${machine}" = "HERA" || "${machine}" = "JET" ]]; then - export nth_eupd=4 - fi - fi - npe_node_eupd=$(echo "${npe_node_max} / ${nth_eupd}" | bc) - export npe_node_eupd - export is_exclusive=True - elif [[ ${step} = "ecen" ]]; then export wtime_ecen="00:10:00" diff --git a/parm/config/gfs/config.resources b/parm/config/gfs/config.resources index 6503ae5523..9919b81b7e 100644 --- a/parm/config/gfs/config.resources +++ b/parm/config/gfs/config.resources @@ -868,12 +868,10 @@ elif [[ ${step} = "eupd" ]]; then fi elif [[ ${CASE} = "C384" ]]; then export npe_eupd=270 - export nth_eupd=2 + export nth_eupd=8 if [[ "${machine}" = "WCOSS2" ]]; then export npe_eupd=315 export nth_eupd=14 - elif [[ "${machine}" = "HERA" || "${machine}" = "JET" ]]; then - export nth_eupd=8 elif [[ ${machine} = "S4" ]]; then export npe_eupd=160 export nth_eupd=2 From 8940adddfe9d21189740e71487603fb2acee2336 Mon Sep 17 00:00:00 2001 From: David Huber <69919478+DavidHuber-NOAA@users.noreply.github.com> Date: Tue, 24 Oct 2023 11:02:29 -0400 Subject: [PATCH 4/9] Optimize the checkout script (#1956) * Multithread the checkout script #1953 --- sorc/checkout.sh | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/sorc/checkout.sh b/sorc/checkout.sh index 9c9addad1d..3554cd99da 100755 --- a/sorc/checkout.sh +++ b/sorc/checkout.sh @@ -34,7 +34,7 @@ function checkout() { # logdir [default: $(pwd)]: where you want logfiles written # CLEAN [default: NO]: whether to delete existing directories and create a fresh clone # - # Usage: checkout + # Usage: checkout # # Arguments # dir: Directory for the clone @@ -48,7 +48,8 @@ function checkout() { dir="$1" remote="$2" version="$3" - recursive=${4:-"YES"} + cpus="${4:-1}" # Default 1 thread + recursive=${5:-"YES"} name=$(echo "${dir}" | cut -d '.' -f 1) echo "Performing checkout of ${name}" @@ -90,7 +91,7 @@ function checkout() { fi if [[ "${recursive}" == "YES" ]]; then echo "|-- Updating submodules (if any)" - git submodule update --init --recursive >> "${logfile}" 2>&1 + git submodule update --init --recursive -j "${cpus}" >> "${logfile}" 2>&1 status=$? if ((status > 0)); then echo " WARNING: Error while updating submodules of ${name}" @@ -149,25 +150,33 @@ source "${topdir}/../workflow/gw_setup.sh" # The checkout version should always be a speciifc commit (hash or tag), not a branch errs=0 -checkout "wxflow" "https://github.com/NOAA-EMC/wxflow" "528f5ab" ; errs=$((errs + $?)) -checkout "gfs_utils.fd" "https://github.com/NOAA-EMC/gfs-utils" "a283262" ; errs=$((errs + $?)) -checkout "ufs_utils.fd" "https://github.com/ufs-community/UFS_UTILS.git" "72a0471" ; errs=$((errs + $?)) -checkout "ufs_model.fd" "https://github.com/ufs-community/ufs-weather-model" "${ufs_model_hash:-4d05445}" ; errs=$((errs + $?)) -checkout "verif-global.fd" "https://github.com/NOAA-EMC/EMC_verif-global.git" "c267780" ; errs=$((errs + $?)) +# Checkout UFS submodules in parallel +checkout "ufs_model.fd" "https://github.com/ufs-community/ufs-weather-model" "${ufs_model_hash:-4d05445}" "8" ; errs=$((errs + $?)) + +# Run all other checkouts simultaneously with just 1 core each to handle submodules. +checkout "wxflow" "https://github.com/NOAA-EMC/wxflow" "528f5ab" & +checkout "gfs_utils.fd" "https://github.com/NOAA-EMC/gfs-utils" "a283262" & +checkout "ufs_utils.fd" "https://github.com/ufs-community/UFS_UTILS.git" "72a0471" & +checkout "verif-global.fd" "https://github.com/NOAA-EMC/EMC_verif-global.git" "c267780" & if [[ ${checkout_gsi} == "YES" ]]; then - checkout "gsi_enkf.fd" "https://github.com/NOAA-EMC/GSI.git" "ca19008" "NO"; errs=$((errs + $?)) + checkout "gsi_enkf.fd" "https://github.com/NOAA-EMC/GSI.git" "ca19008" "1" "NO" & fi if [[ ${checkout_gdas} == "YES" ]]; then - checkout "gdas.cd" "https://github.com/NOAA-EMC/GDASApp.git" "d347d22"; errs=$((errs + $?)) + checkout "gdas.cd" "https://github.com/NOAA-EMC/GDASApp.git" "d347d22" & fi if [[ ${checkout_gsi} == "YES" || ${checkout_gdas} == "YES" ]]; then - checkout "gsi_utils.fd" "https://github.com/NOAA-EMC/GSI-Utils.git" "322cc7b"; errs=$((errs + $?)) - checkout "gsi_monitor.fd" "https://github.com/NOAA-EMC/GSI-Monitor.git" "45783e3"; errs=$((errs + $?)) + checkout "gsi_utils.fd" "https://github.com/NOAA-EMC/GSI-Utils.git" "322cc7b" & + checkout "gsi_monitor.fd" "https://github.com/NOAA-EMC/GSI-Monitor.git" "45783e3" & fi +# Go through each PID and verify no errors were reported. +for checkout_pid in $(jobs -p); do + wait "${checkout_pid}" || errs=$((errs + $?)) +done + if (( errs > 0 )); then echo "WARNING: One or more errors encountered during checkout process, please check logs before building" fi From e2c624d8904cd988394c73d0edb22fa593229d3f Mon Sep 17 00:00:00 2001 From: RussTreadon-NOAA <26926959+RussTreadon-NOAA@users.noreply.github.com> Date: Tue, 24 Oct 2023 13:32:52 -0400 Subject: [PATCH 5/9] Refactor UFSDA ATM var and ens layout (#1945) --- parm/config/gfs/config.atmanl | 7 ++----- parm/config/gfs/config.atmensanl | 7 ++----- parm/config/gfs/config.resources | 13 +++++++++++++ parm/config/gfs/yaml/defaults.yaml | 8 ++++++++ 4 files changed, 25 insertions(+), 10 deletions(-) diff --git a/parm/config/gfs/config.atmanl b/parm/config/gfs/config.atmanl index 0d388f94bd..abfbd80734 100644 --- a/parm/config/gfs/config.atmanl +++ b/parm/config/gfs/config.atmanl @@ -13,11 +13,8 @@ export STATICB_TYPE="gsibec" export BERROR_YAML=${HOMEgfs}/sorc/gdas.cd/parm/atm/berror/staticb_${STATICB_TYPE}.yaml export INTERP_METHOD='barycentric' -export layout_x=1 -export layout_y=1 - -export io_layout_x=1 -export io_layout_y=1 +export io_layout_x=@IO_LAYOUT_X@ +export io_layout_y=@IO_LAYOUT_Y@ export JEDIEXE=${HOMEgfs}/exec/fv3jedi_var.x diff --git a/parm/config/gfs/config.atmensanl b/parm/config/gfs/config.atmensanl index 7a696fa734..58fd7b6e22 100644 --- a/parm/config/gfs/config.atmensanl +++ b/parm/config/gfs/config.atmensanl @@ -10,11 +10,8 @@ export OBS_LIST=${HOMEgfs}/sorc/gdas.cd/parm/atm/obs/lists/lgetkf_prototype.yaml export ATMENSYAML=${HOMEgfs}/sorc/gdas.cd/parm/atm/lgetkf/lgetkf.yaml export INTERP_METHOD='barycentric' -export layout_x=1 -export layout_y=1 - -export io_layout_x=1 -export io_layout_y=1 +export io_layout_x=@IO_LAYOUT_X@ +export io_layout_y=@IO_LAYOUT_Y@ export JEDIEXE=${HOMEgfs}/exec/fv3jedi_letkf.x diff --git a/parm/config/gfs/config.resources b/parm/config/gfs/config.resources index 9919b81b7e..fc6624df98 100644 --- a/parm/config/gfs/config.resources +++ b/parm/config/gfs/config.resources @@ -189,6 +189,15 @@ elif [[ ${step} = "waveawipsgridded" ]]; then elif [[ "${step}" = "atmanlinit" ]]; then + # make below case dependent later + export layout_x=1 + export layout_y=1 + + layout_gsib_x=$(echo "${layout_x} * 3" | bc) + export layout_gsib_x + layout_gsib_y=$(echo "${layout_y} * 2" | bc) + export layout_gsib_y + export wtime_atmanlinit="00:10:00" export npe_atmanlinit=1 export nth_atmanlinit=1 @@ -790,6 +799,10 @@ elif [[ ${step} = "stage_ic" ]]; then elif [[ "${step}" = "atmensanlinit" ]]; then + # make below case dependent later + export layout_x=1 + export layout_y=1 + export wtime_atmensanlinit="00:10:00" export npe_atmensanlinit=1 export nth_atmensanlinit=1 diff --git a/parm/config/gfs/yaml/defaults.yaml b/parm/config/gfs/yaml/defaults.yaml index 8c2b4ff22b..c0298edb18 100644 --- a/parm/config/gfs/yaml/defaults.yaml +++ b/parm/config/gfs/yaml/defaults.yaml @@ -6,6 +6,14 @@ base: DO_JEDILANDDA: "NO" DO_MERGENSST: "NO" +atmanl: + IO_LAYOUT_X: 1 + IO_LAYOUT_Y: 1 + +atmensanl: + IO_LAYOUT_X: 1 + IO_LAYOUT_Y: 1 + aeroanl: IO_LAYOUT_X: 1 IO_LAYOUT_Y: 1 From 1b00224e18842cd873eb1779be08f96687e49e1f Mon Sep 17 00:00:00 2001 From: Kate Friedman Date: Tue, 24 Oct 2023 14:48:24 -0400 Subject: [PATCH 6/9] Set SENDCOM=YES for tracker/genesis tasks (#1971) Set SENDCOM to YES in config.vrfy to get outputs copied back to COM. Will reevaluate the need for SENDCOM when moving the tracker/genesis jobs out of the vrfy job with issue #235 work. Refs #1947 --- parm/config/gfs/config.vrfy | 2 ++ 1 file changed, 2 insertions(+) diff --git a/parm/config/gfs/config.vrfy b/parm/config/gfs/config.vrfy index 0f0ce4ff9d..8754609c50 100644 --- a/parm/config/gfs/config.vrfy +++ b/parm/config/gfs/config.vrfy @@ -66,6 +66,8 @@ fi # Cyclone genesis and cyclone track verification #------------------------------------------------- +export SENDCOM="YES" # Needed by tracker/genesis scripts still + export HOMEens_tracker=$BASE_GIT/TC_tracker/${tracker_ver} if [[ "${VRFYTRAK}" = "YES" ]]; then From c58deae0cf078d1ee093529064d74f60482aa3f4 Mon Sep 17 00:00:00 2001 From: "Henry R. Winterbottom" <49202169+HenryWinterbottom-NOAA@users.noreply.github.com> Date: Tue, 24 Oct 2023 16:19:04 -0600 Subject: [PATCH 7/9] Updates for NOAA CSP AWS global-workflow related file paths. (#1970) Co-authored-by: henrywinterbottom-wxdev --- docs/source/noaa_csp.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/noaa_csp.rst b/docs/source/noaa_csp.rst index 3af8956293..66317efe92 100644 --- a/docs/source/noaa_csp.rst +++ b/docs/source/noaa_csp.rst @@ -183,14 +183,14 @@ the global-workflow. The software stack supporting the ``develop`` branch of the global-workflow is provided for the user and is located beneath -``/contrib/global-workflow/spack-stack``. The modules required for the +``/contrib/emc_static/spack-stack``. The modules required for the global-workflow execution may be loaded as follows. .. code-block:: bash user@host:$ module unuse /opt/cray/craype/default/modulefiles user@host:$ module unuse /opt/cray/modulefiles - user@host:$ module use /contrib/global-workflow/spack-stack/miniconda/modulefiles/miniconda + user@host:$ module use /contrib/emc_static/spack-stack/miniconda/modulefiles/miniconda user@host:$ module load py39_4.12.0 user@host:$ module load rocoto/1.3.3 From 7cdfad4eaa7abe0769ff13396c54e6d93afebf8f Mon Sep 17 00:00:00 2001 From: TerrenceMcGuinness-NOAA Date: Tue, 24 Oct 2023 22:19:46 +0000 Subject: [PATCH 8/9] Build GDASapp for CI tests (#1964) * added -u to global checkout so CI test builds tests for GDASapps * Update check_ci.sh needed more quotes --------- Co-authored-by: TerrenceMcGuinness-NOAA --- ci/scripts/check_ci.sh | 4 ++-- ci/scripts/clone-build_ci.sh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ci/scripts/check_ci.sh b/ci/scripts/check_ci.sh index d5cf6a20bd..097e20ced4 100755 --- a/ci/scripts/check_ci.sh +++ b/ci/scripts/check_ci.sh @@ -89,9 +89,9 @@ for pr in ${pr_list}; do # Check to see if this PR that was opened by the weekly tests and if so close it if it passed on all platforms weekly_labels=$(${GH} pr view "${pr}" --repo "${REPO_URL}" --json headRefName,labels,author --jq 'select(.author.login | contains("emcbot")) | select(.headRefName | contains("weekly_ci")) | .labels[].name ') || true if [[ -n "${weekly_labels}" ]]; then - num_platforms=$(find ../platforms -type f -name "config.*" | wc -l) + num_platforms=$(find "${ROOT_DIR}/ci/platforms" -type f -name "config.*" | wc -l) passed=0 - for platforms in ../platforms/config.*; do + for platforms in "${ROOT_DIR}"/ci/platforms/config.*; do machine=$(basename "${platforms}" | cut -d. -f2) if [[ "${weekly_labels}" == *"CI-${machine^}-Passed"* ]]; then ((passed=passed+1)) diff --git a/ci/scripts/clone-build_ci.sh b/ci/scripts/clone-build_ci.sh index 796e4b7014..03eff13158 100755 --- a/ci/scripts/clone-build_ci.sh +++ b/ci/scripts/clone-build_ci.sh @@ -79,7 +79,7 @@ echo "${commit}" > "../commit" cd sorc || exit 1 set +e # TODO enable -u later when GDASApp tests are added -./checkout.sh -c -g >> log.checkout 2>&1 +./checkout.sh -c -g -u >> log.checkout 2>&1 checkout_status=$? if [[ ${checkout_status} != 0 ]]; then { From e817f5dd38c26a88f76d90eb71124f1acbfc5a8f Mon Sep 17 00:00:00 2001 From: Walter Kolczynski - NOAA Date: Wed, 25 Oct 2023 02:03:14 +0000 Subject: [PATCH 9/9] Fix path for marine products (#1966) When PR #1823 was merged, the name of the marine product template was not updated in ocnpost, leading the products to be placed in the wrong location and missed by the archive job. Resolves #1902 --- jobs/rocoto/ocnpost.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jobs/rocoto/ocnpost.sh b/jobs/rocoto/ocnpost.sh index 0766ac3b37..5a2dc091cf 100755 --- a/jobs/rocoto/ocnpost.sh +++ b/jobs/rocoto/ocnpost.sh @@ -29,7 +29,7 @@ YMD=${PDY} HH=${cyc} generate_com -rx COM_OCEAN_HISTORY COM_OCEAN_2D COM_OCEAN_3 COM_OCEAN_XSECT COM_ICE_HISTORY for grid in "0p50" "0p25"; do - YMD=${PDY} HH=${cyc} GRID=${grid} generate_com -rx "COM_OCEAN_GRIB_${grid}:COM_OCEAN_GRIB_TMPL" + YMD=${PDY} HH=${cyc} GRID=${grid} generate_com -rx "COM_OCEAN_GRIB_${grid}:COM_OCEAN_GRIB_GRID_TMPL" done for outdir in COM_OCEAN_2D COM_OCEAN_3D COM_OCEAN_XSECT COM_OCEAN_GRIB_0p25 COM_OCEAN_GRIB_0p50; do