From f159d39a3b28dfcc120cdcdf87d11a611c75061f Mon Sep 17 00:00:00 2001 From: TerrenceMcGuinness-NOAA Date: Fri, 21 Apr 2023 15:46:35 -0400 Subject: [PATCH] Add CI cron jobs (#1476) As a maintainer of the CI framework, I need a set of cron jobs that will fully automate the CI pipeline so that whenever the appropriate label on GitHub is created the PR gets cloned and built followed by a set of functional experiments that are executed and reported on. --- ci/cases/C96C48_hybatmDA.yaml | 15 +++ ci/cases/C96_atm3DVar.yaml | 14 ++ ci/experiments/C96C48_hybatmDA.yaml | 19 --- ci/experiments/C96C48_hybatmDA_also.yaml | 19 --- ci/{environments => platforms}/hera.sh | 3 - ci/{environments => platforms}/orion.sh | 4 +- ci/scripts/check_ci.sh | 115 +++++++++++++++++ ci/scripts/clone-build_ci.sh | 89 +++++++------ ci/scripts/create_experiment.py | 38 ++---- ci/scripts/driver.sh | 121 +++++++----------- ci/scripts/pygw | 1 + ci/scripts/run_ci.sh | 71 ++++++++++ modulefiles/module_gwci.hera.lua | 15 +++ modulefiles/module_gwci.orion.lua | 21 +++ ...setup.hera.lua => module_gwsetup.hera.lua} | 3 +- modulefiles/module_gwsetup.orion.lua | 17 +++ test/diff_grib_files.py | 1 + 17 files changed, 387 insertions(+), 179 deletions(-) create mode 100644 ci/cases/C96C48_hybatmDA.yaml create mode 100644 ci/cases/C96_atm3DVar.yaml delete mode 100644 ci/experiments/C96C48_hybatmDA.yaml delete mode 100644 ci/experiments/C96C48_hybatmDA_also.yaml rename ci/{environments => platforms}/hera.sh (57%) rename ci/{environments => platforms}/orion.sh (65%) create mode 100755 ci/scripts/check_ci.sh create mode 120000 ci/scripts/pygw create mode 100755 ci/scripts/run_ci.sh create mode 100644 modulefiles/module_gwci.hera.lua create mode 100644 modulefiles/module_gwci.orion.lua rename modulefiles/{module_setup.hera.lua => module_gwsetup.hera.lua} (99%) create mode 100644 modulefiles/module_gwsetup.orion.lua diff --git a/ci/cases/C96C48_hybatmDA.yaml b/ci/cases/C96C48_hybatmDA.yaml new file mode 100644 index 0000000000..9efce40900 --- /dev/null +++ b/ci/cases/C96C48_hybatmDA.yaml @@ -0,0 +1,15 @@ +experiment: + mode: cycled + +arguments: + app: ATM + resdet: 96 + resens: 48 + comrot: ${RUNTESTS}/${pslot}/COMROT + expdir: ${RUNTESTS}/${pslot}/EXPDIR + icsdir: ${ICSDIR_ROOT}/C96C48 + idate: 2021122018 + edate: 2021122200 + nens: 2 + gfs_cyc: 1 + start: cold diff --git a/ci/cases/C96_atm3DVar.yaml b/ci/cases/C96_atm3DVar.yaml new file mode 100644 index 0000000000..1648432e09 --- /dev/null +++ b/ci/cases/C96_atm3DVar.yaml @@ -0,0 +1,14 @@ +experiment: + mode: cycled + +arguments: + app: ATM + resdet: 96 + comrot: ${RUNTESTS}/${pslot}/COMROT + expdir: ${RUNTESTS}/${pslot}/EXPDIR + icsdir: ${ICSDIR_ROOT}/C96C48 + idate: 2021122018 + edate: 2021122100 + nens: 0 + gfs_cyc: 1 + start: cold diff --git a/ci/experiments/C96C48_hybatmDA.yaml b/ci/experiments/C96C48_hybatmDA.yaml deleted file mode 100644 index cf291e5081..0000000000 --- a/ci/experiments/C96C48_hybatmDA.yaml +++ /dev/null @@ -1,19 +0,0 @@ -environment: - HOMEgfs: ${HOMEGFS} # TODO - using the env var ${HOMEgfs} will cause - # the rocoto XML file to have HOMEgfs set to None -experiment: - mode: cycled - -arguments: - pslot: ${pslot} # TODO - same bug as above with HOMEgfs - app: ATM - resdet: 96 - resens: 48 - comrot: ${RUNTEST} - expdir: ${RUNTEST}/expdir - icsdir: ${ICSDIR_ROOT}/C96C48 - idate: 2021122018 - edate: 2021122200 - nens: 2 - gfs_cyc: 1 - start: cold diff --git a/ci/experiments/C96C48_hybatmDA_also.yaml b/ci/experiments/C96C48_hybatmDA_also.yaml deleted file mode 100644 index cf291e5081..0000000000 --- a/ci/experiments/C96C48_hybatmDA_also.yaml +++ /dev/null @@ -1,19 +0,0 @@ -environment: - HOMEgfs: ${HOMEGFS} # TODO - using the env var ${HOMEgfs} will cause - # the rocoto XML file to have HOMEgfs set to None -experiment: - mode: cycled - -arguments: - pslot: ${pslot} # TODO - same bug as above with HOMEgfs - app: ATM - resdet: 96 - resens: 48 - comrot: ${RUNTEST} - expdir: ${RUNTEST}/expdir - icsdir: ${ICSDIR_ROOT}/C96C48 - idate: 2021122018 - edate: 2021122200 - nens: 2 - gfs_cyc: 1 - start: cold diff --git a/ci/environments/hera.sh b/ci/platforms/hera.sh similarity index 57% rename from ci/environments/hera.sh rename to ci/platforms/hera.sh index 843b8b103b..35fe7bca91 100644 --- a/ci/environments/hera.sh +++ b/ci/platforms/hera.sh @@ -1,10 +1,7 @@ #!/usr/bin/bash export GFS_CI_ROOT=/scratch1/NCEPDEV/global/Terry.McGuinness/GFS_CI_ROOT -export GFS_MODULE_USE="${GFS_CI_ROOT}/global-workflow/modulefiles" export SLURM_ACCOUNT=fv3-cpu export SALLOC_ACCOUNT="${SLURM_ACCOUNT}" export SBATCH_ACCOUNT="${SLURM_ACCOUNT}" export SLURM_QOS=debug -export repo_url="https://github.com/NOAA-EMC/global-workflow.git" -#export repo_url="https://github.com/TerrenceMcGuinness-NOAA/global-workflow.git" export ICSDIR_ROOT="/scratch1/NCEPDEV/global/glopara/data/ICSDIR" diff --git a/ci/environments/orion.sh b/ci/platforms/orion.sh similarity index 65% rename from ci/environments/orion.sh rename to ci/platforms/orion.sh index 901d9488e9..7d69a3b276 100644 --- a/ci/environments/orion.sh +++ b/ci/platforms/orion.sh @@ -1,7 +1,7 @@ #!/usr/bin/bash -export GFS_CI_ROOT="TDB" #TODO -export GFS_MODULE_USE="${GFS_CI_ROOT}/global-workflow/modulefiles" +export GFS_CI_ROOT=/work2/noaa/global/mterry/GFS_CI_ROOT +export ICSDIR_ROOT=/work/noaa/global/glopara/data/ICSDIR export SLURM_ACCOUNT=fv3-cpu export SALLOC_ACCOUNT=${SLURM_ACCOUNT} export SBATCH_ACCOUNT=${SLURM_ACCOUNT} diff --git a/ci/scripts/check_ci.sh b/ci/scripts/check_ci.sh new file mode 100755 index 0000000000..aa48e9f894 --- /dev/null +++ b/ci/scripts/check_ci.sh @@ -0,0 +1,115 @@ +#!/bin/bash +set -eux +##################################################################################### +# +# Script description: BASH script for checking for cases in a given PR and +# running rocotostat on each to determine if the experiment has +# succeeded or faild. This script is intended +# to run from within a cron job in the CI Managers account +# Abstract TODO +##################################################################################### + +HOMEgfs="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." >/dev/null 2>&1 && pwd )" +scriptname=$(basename "${BASH_SOURCE[0]}") +echo "Begin ${scriptname} at $(date -u)" || true +export PS4='+ $(basename ${BASH_SOURCE})[${LINENO}]' + +GH=${HOME}/bin/gh +REPO_URL=${REPO_URL:-"https://github.com/NOAA-EMC/global-workflow.git"} + +######################################################################### +# Set up runtime environment varibles for accounts on supproted machines +######################################################################### + +source "${HOMEgfs}/ush/detect_machine.sh" +case ${MACHINE_ID} in + hera | orion) + echo "Running Automated Testing on ${MACHINE_ID}" + source "${HOMEgfs}/ci/platforms/${MACHINE_ID}.sh" + ;; + *) + echo "Unsupported platform. Exiting with error." + exit 1 + ;; +esac +set +x +source "${HOMEgfs}/ush/module-setup.sh" +module use "${HOMEgfs}/modulefiles" +module load "module_gwsetup.${MACHINE_ID}" +module list +set -x +rocotostat=$(which rocotostat) +if [[ -z ${rocotostat+x} ]]; then + echo "rocotostat not found on system" + exit 1 +else + echo "rocotostat being used from ${rocotostat}" +fi + +pr_list_file="open_pr_list" + +if [[ -s "${GFS_CI_ROOT}/${pr_list_file}" ]]; then + pr_list=$(cat "${GFS_CI_ROOT}/${pr_list_file}") +else + echo "no PRs to process .. exit" + exit 0 +fi + +############################################################# +# Loop throu all PRs in PR List and look for expirments in +# the RUNTESTS dir and for each one run runcotorun on them +############################################################# + +for pr in ${pr_list}; do + id=$("${GH}" pr view "${pr}" --repo "${REPO_URL}" --json id --jq '.id') + echo "Processing Pull Request #${pr} and looking for cases" + pr_dir="${GFS_CI_ROOT}/PR/${pr}" + + # If there is no RUNTESTS dir for this PR then cases have not been made yet + if [[ ! -d "${pr_dir}/RUNTESTS" ]]; then + continue + fi + num_cases=$(find "${pr_dir}/RUNTESTS" -mindepth 1 -maxdepth 1 -type d | wc -l) || true + + #Check for PR success when ${pr_dir}/RUNTESTS is void of subfolders + # since all successfull ones where previously removed + if [[ "${num_cases}" -eq 0 ]] && [[ -d "${pr_dir}/RUNTESTS" ]]; then + "${GH}" pr edit --repo "${REPO_URL}" "${pr}" --remove-label "CI-${MACHINE_ID^}-Running" --add-label "CI-${MACHINE_ID^}-Passed" + "${GH}" pr comment "${pr}" --repo "${REPO_URL}" --body-file "${GFS_CI_ROOT}/PR/${pr}/output_${id}" + sed -i "/${pr}/d" "${GFS_CI_ROOT}/${pr_list_file}" + # Completely remove the PR and its cloned repo on sucess of all cases + rm -Rf "${pr_dir}" + continue + fi + + for cases in "${pr_dir}/RUNTESTS/"*; do + pslot=$(basename "${cases}") + xml="${pr_dir}/RUNTESTS/${pslot}/EXPDIR/${pslot}/${pslot}.xml" + db="${pr_dir}/RUNTESTS/${pslot}/EXPDIR/${pslot}/${pslot}.db" + rocoto_stat_output=$("${rocotostat}" -w "${xml}" -d "${db}" -s | grep -v CYCLE) || true + num_cycles=$(echo "${rocoto_stat_output}" | wc -l) || true + num_done=$(echo "${rocoto_stat_output}" | grep -c Done) || true + num_succeeded=$("${rocotostat}" -w "${xml}" -d "${db}" -a | grep -c SUCCEEDED) || true + echo "${pslot} Total Cycles: ${num_cycles} number done: ${num_done}" || true + num_failed=$("${rocotostat}" -w "${xml}" -d "${db}" -a | grep -c -E 'FAIL|DEAD') || true + if [[ ${num_failed} -ne 0 ]]; then + { + echo "Experiment ${pslot} Terminated: *FAILED*" + echo "Experiment ${pslot} Terminated with ${num_failed} tasks failed at $(date)" || true + } >> "${GFS_CI_ROOT}/PR/${pr}/output_${id}" + "${GH}" pr edit --repo "${REPO_URL}" "${pr}" --remove-label "CI-${MACHINE_ID^}-Running" --add-label "CI-${MACHINE_ID^}-Failed" + "${GH}" pr comment "${pr}" --repo "${REPO_URL}" --body-file "${GFS_CI_ROOT}/PR/${pr}/output_${id}" + sed -i "/${pr}/d" "${GFS_CI_ROOT}/${pr_list_file}" + fi + if [[ "${num_done}" -eq "${num_cycles}" ]]; then + { + echo "Experiment ${pslot} completed: *SUCCESS*" + echo "Experiment ${pslot} Completed at $(date)" || true + echo -n "with ${num_succeeded} successfully completed jobs" || true + } >> "${GFS_CI_ROOT}/PR/${pr}/output_${id}" + "${GH}" pr comment "${pr}" --repo "${REPO_URL}" --body-file "${GFS_CI_ROOT}/PR/${pr}/output_${id}" + #Remove Experment cases that completed successfully + rm -Rf "${pr_dir}/RUNTESTS/${pslot}" + fi + done +done diff --git a/ci/scripts/clone-build_ci.sh b/ci/scripts/clone-build_ci.sh index e6b5eb53ef..022cc44378 100755 --- a/ci/scripts/clone-build_ci.sh +++ b/ci/scripts/clone-build_ci.sh @@ -1,11 +1,6 @@ #!/bin/bash set -eux -################################################################# -# TODO using static build for GitHub CLI until fixed in HPC-Stack -################################################################# -GH=/home/Terry.McGuinness/bin/gh -repo_url=${repo_url:-"https://github.com/global-workflow.git"} ##################################################################### # Usage and arguments for specfifying cloned directgory ##################################################################### @@ -45,25 +40,14 @@ while getopts "p:d:o:h" opt; do esac done -#################################################################### -# start output file -{ - echo "Automated global-workflow Testing Results:" - echo "Machine: ${CI_HOST}" - echo '```' - echo "Start: $(date) on $(hostname)" || true - echo "---------------------------------------------------" -} >> "${outfile}" -###################################################################### - -cd "${repodir}" +cd "${repodir}" || exit 1 # clone copy of repo if [[ -d global-workflow ]]; then rm -Rf global-workflow fi -git clone "${repo_url}" -cd global-workflow +git clone "${REPO_URL}" +cd global-workflow || exit 1 pr_state=$(gh pr view "${PR}" --json state --jq '.state') if [[ "${pr_state}" != "OPEN" ]]; then @@ -73,34 +57,63 @@ if [[ "${pr_state}" != "OPEN" ]]; then fi # checkout pull request -"${GH}" pr checkout "${PR}" --repo "${repo_url}" +"${GH}" pr checkout "${PR}" --repo "${REPO_URL}" +HOMEgfs="${PWD}" +source "${HOMEgfs}/ush/detect_machine.sh" + +#################################################################### +# start output file +{ + echo "Automated global-workflow Testing Results:" + echo '```' + echo "Machine: ${MACHINE_ID^}" + echo "Start: $(date) on $(hostname)" || true + echo "---------------------------------------------------" +} >> "${outfile}" +###################################################################### # get commit hash commit=$(git log --pretty=format:'%h' -n 1) echo "${commit}" > "../commit" -# run build script -cd sorc +# run checkout script +cd sorc || exit 1 +set +e +./checkout.sh -c -g -u &>> log.checkout +checkout_status=$? +if [[ ${checkout_status} != 0 ]]; then + { + echo "Checkout: *FAILED*" + echo "Checkout: Failed at $(date)" || true + echo "Checkout: see output at ${PWD}/log.checkout" + } >> "${outfile}" + exit "${checkout_status}" +else + { + echo "Checkout: *SUCCESS*" + echo "Checkout: Completed at $(date)" || true + } >> "${outfile}" +fi + +# build full cycle +source "${HOMEgfs}/ush/module-setup.sh" export BUILD_JOBS=8 rm -rf log.build -./checkout.sh -g -c -# build full cycle -./build_all.sh -g &>> log.build - -# Validations +./build_all.sh &>> log.build build_status=$? -if [[ ${build_status} -eq 0 ]]; then -{ - echo "Build: *SUCCESS*" - echo "Build: Completed at $(date)" || true -} >> "${outfile}" + +if [[ ${build_status} != 0 ]]; then + { + echo "Build: *FAILED*" + echo "Build: Failed at $(date)" || true + echo "Build: see output at ${PWD}/log.build" + } >> "${outfile}" + exit "${build_status}" else -{ - echo "Build: *FAILED*" - echo "Build: Failed at $(date)" || true - echo "Build: see output at ${PWD}/log.build" -} - echo '```' >> "${outfile}" + { + echo "Build: *SUCCESS*" + echo "Build: Completed at $(date)" || true + } >> "${outfile}" fi ./link_workflow.sh diff --git a/ci/scripts/create_experiment.py b/ci/scripts/create_experiment.py index 6b946f3a4a..ce95714d48 100755 --- a/ci/scripts/create_experiment.py +++ b/ci/scripts/create_experiment.py @@ -10,14 +10,14 @@ ${HOMEgfs}/workflow/setup_expt.py ${HOMEgfs}/workflow/setup_xml.py -The yaml file are simply the argments for these two scripts. +The yaml file are simply the arguments for these two scripts. After this scripts runs these two the use will have an experiment ready for launching Output ------ -Functionally an experement is setup as a result running the two scripts discribed above -with an error code of 0 apon success. +Functionally an experiment is setup as a result running the two scripts described above +with an error code of 0 upon success. """ import sys @@ -30,8 +30,6 @@ from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter -from workflow.hosts import Host - logger = Logger(level='DEBUG', colored_log=True) @@ -47,12 +45,7 @@ def input_args(): Description ----------- - A full path to a YAML file with the following format with required sections: environment, experiment, arguments - - environment: - HOMEgfs: ${HOMEGFS} - used to pass the environment variable $HOMEGFS - of the path to the global-workflow repo being tested + A full path to a YAML file with the following format with required sections: experiment, arguments experiment: mode: @@ -60,7 +53,7 @@ def input_args(): arguments: holds all the remaining key values pairs for all requisite arguments documented for setup_expt.py - + Note: the argument pslot is derived from the basename of the yamlfile itself Returns ------- @@ -68,10 +61,10 @@ def input_args(): args: Namespace Namespace with the value of the file path to a yaml file from the key yaml - +:w """ - description = """Single agument as a yaml file containing the + description = """Single argument as a yaml file containing the key value pairs as arguments to setup_expt.py """ @@ -79,6 +72,7 @@ def input_args(): formatter_class=ArgumentDefaultsHelpFormatter) parser.add_argument('--yaml', help='yaml configuration file per experiment', type=str, required=True) + parser.add_argument('--dir', help='full path to top level of repo of global-workflow', type=str, required=True) args = parser.parse_args() return args @@ -87,17 +81,10 @@ def input_args(): if __name__ == '__main__': user_inputs = input_args() - - try: - host = Host() - logger.info(f'Running on HOST:{host.machine}') - except NotImplementedError: - logger.error(f'HOST:{socket.gethostname()} is not currently supported') - sys.exit(1) - setup_expt_args = YAMLFile(path=user_inputs.yaml) - HOMEgfs = setup_expt_args.environment.HOMEgfs + HOMEgfs = user_inputs.dir + pslot = Path(user_inputs.yaml).stem mode = setup_expt_args.experiment.mode setup_expt_cmd = Executable(Path.absolute(Path.joinpath(Path(HOMEgfs), 'workflow', 'setup_expt.py'))) @@ -107,11 +94,14 @@ def input_args(): setup_expt_cmd.add_default_arg(f'--{conf}') setup_expt_cmd.add_default_arg(str(value)) + setup_expt_cmd.add_default_arg('--pslot') + setup_expt_cmd.add_default_arg(pslot) + logger.info(f'Run command: {setup_expt_cmd.command}') setup_expt_cmd(output='stdout_expt', error='stderr_expt') setup_xml_cmd = Executable(Path.absolute(Path.joinpath(Path(HOMEgfs), 'workflow', 'setup_xml.py'))) - expdir = Path.absolute(Path.joinpath(Path(setup_expt_args.arguments.expdir), Path(setup_expt_args.arguments.pslot))) + expdir = Path.absolute(Path.joinpath(Path(setup_expt_args.arguments.expdir), Path(pslot))) setup_xml_cmd.add_default_arg(str(expdir)) logger.info(f'Run command: {setup_xml_cmd.command}') diff --git a/ci/scripts/driver.sh b/ci/scripts/driver.sh index 60634c3352..0bd90db36c 100755 --- a/ci/scripts/driver.sh +++ b/ci/scripts/driver.sh @@ -1,5 +1,6 @@ -#!/bin/bash --login -# +#!/bin/bash +set -eux + ##################################################################################### # # Script description: Top level driver script for checking PR @@ -7,7 +8,7 @@ # # Abstract: # -# This script uses GitHub CLI to check for Pull Requests with {machine}-CI tags on the +# This script uses GitHub CLI to check for Pull Requests with CI-Ready-${machine} tags on the # development branch for the global-workflow repo. It then stages tests directories per # PR number and calls clone-build_ci.sh to perform a clone and full build from $(HOMEgfs)/sorc # of the PR. It then is ready to run a suite of regression tests with various @@ -17,50 +18,26 @@ ################################################################# # TODO using static build for GitHub CLI until fixed in HPC-Stack ################################################################# -GH=/home/Terry.McGuinness/bin/gh -repo_url=${repo_url:-"https://github.com/NOAA-EMC/global-workflow.git"} +export GH=${HOME}/bin/gh +export REPO_URL=${REPO_URL:-"https://github.com/NOAA-EMC/global-workflow.git"} ################################################################ # Setup the reletive paths to scripts and PS4 for better logging ################################################################ -WF_ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." >/dev/null 2>&1 && pwd )" +HOMEgfs="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." >/dev/null 2>&1 && pwd )" scriptname=$(basename "${BASH_SOURCE[0]}") echo "Begin ${scriptname} at $(date -u)" || true export PS4='+ $(basename ${BASH_SOURCE})[${LINENO}]' - -usage() { - set +x - echo - echo "Usage: $0 -h" - echo - echo " -h display this message and quit" - echo - echo "This is top level script to run CI tests on the global-workflow repo" - if [[ -n "${TARGET+x}" ]]; then - echo "on the DEFAULT: ${TARGET} machine" - fi - echo - exit 0 -} - - ######################################################################### # Set up runtime environment varibles for accounts on supproted machines ######################################################################### -source "${WF_ROOT_DIR}/ush/detect_machine.sh" -if [[ "${MACHINE_ID}" != "UNKNOWN" ]]; then - TARGET="${MACHINE_ID}" -else - echo "Unsupported platform. Exiting with error." - exit 1 -fi - -case ${TARGET} in +source "${HOMEgfs}/ush/detect_machine.sh" +case ${MACHINE_ID} in hera | orion) - echo "Running Automated Testing on ${TARGET}" - source "${WF_ROOT_DIR}/ci/environments/${TARGET}.sh" + echo "Running Automated Testing on ${MACHINE_ID}" + source "${HOMEgfs}/ci/platforms/${MACHINE_ID}.sh" ;; *) echo "Unsupported platform. Exiting with error." @@ -68,15 +45,22 @@ case ${TARGET} in ;; esac +###################################################### +# setup runtime env for correct python install and git +###################################################### +set +x +source "${HOMEgfs}/ush/module-setup.sh" +module use "${HOMEgfs}/modulefiles" +module load "module_gwsetup.${MACHINE_ID}" +set -x + ############################################################ # query repo and get list of open PRs with tags {machine}-CI ############################################################ -set -eux -export CI_HOST="${TARGET^}" pr_list_file="open_pr_list" -rm -f "${pr_list_file}" -list=$(${GH} pr list --repo "${repo_url}" --label "${CI_HOST}-CI" --state "open") -list=$(echo "${list}" | awk '{print $1;}' > "${GFS_CI_ROOT}/${pr_list_file}") +touch "${GFS_CI_ROOT}/${pr_list_file}" +list=$(${GH} pr list --repo "${REPO_URL}" --label "CI-${MACHINE_ID^}-Ready" --state "open") +list=$(echo "${list}" | awk '{print $1;}' >> "${GFS_CI_ROOT}/${pr_list_file}") if [[ -s "${GFS_CI_ROOT}/${pr_list_file}" ]]; then pr_list=$(cat "${GFS_CI_ROOT}/${pr_list_file}") @@ -84,71 +68,64 @@ else echo "no PRs to process .. exit" exit 0 fi - + ############################################################# # Loop throu all open PRs -# Clone, checkout, build, creat set of experiments, for each +# Clone, checkout, build, creat set of cases, for each ############################################################# -cd "${GFS_CI_ROOT}" for pr in ${pr_list}; do - "${GH}" pr edit --repo "${repo_url}" "${pr}" --remove-label "${CI_HOST}-CI" --add-label "${CI_HOST}-Running" + + "${GH}" pr edit --repo "${REPO_URL}" "${pr}" --remove-label "CI-${MACHINE_ID^}-Ready" --add-label "CI-${MACHINE_ID^}-Building" echo "Processing Pull Request #${pr}" pr_dir="${GFS_CI_ROOT}/PR/${pr}" mkdir -p "${pr_dir}" # call clone-build_ci to clone and build PR - id=$("${GH}" pr view "${pr}" --repo "${repo_url}" --json id --jq '.id') - "${WF_ROOT_DIR}/ci/scripts/clone-build_ci.sh" -p "${pr}" -d "${pr_dir}" -o "${pr_dir}/output_${id}" + id=$("${GH}" pr view "${pr}" --repo "${REPO_URL}" --json id --jq '.id') + set +e + "${HOMEgfs}/ci/scripts/clone-build_ci.sh" -p "${pr}" -d "${pr_dir}" -o "${pr_dir}/output_${id}" ci_status=$? + set -e if [[ ${ci_status} -eq 0 ]]; then - #setup runtime env for correct python install - export HOMEGFS="${pr_dir}/global-workflow" - module use "${HOMEGFS}/modulefiles" - module load "module_setup.${TARGET}" - module list #setup space to put an experiment - export RUNTEST="${pr_dir}/RUNTEST" - rm -Rf "${RUNTEST:?}"/* - mkdir -p "${RUNTEST}" - #make links to the python packages used in the PR'ed repo - cd "${WF_ROOT_DIR}/ci/scripts" - if [[ ! -L workflow ]]; then - ln -s "${HOMEGFS}/workflow" workflow - fi - if [[ ! -L pygw ]]; then - ln -s "${HOMEGFS}/ush/python/pygw/src/pygw" pygw - fi + # export RUNTESTS for yaml case files to pickup + export RUNTESTS="${pr_dir}/RUNTESTS" + rm -Rf "${pr_dir:?}/RUNTESTS/"* + ############################################################# - # loop over every yaml file in ${WF_ROOT_DIR}/ci/experiments + # loop over every yaml file in ${HOMEgfs}/ci/cases # and create an run directory for each one for this PR loop ############################################################# - for yaml_config in "${WF_ROOT_DIR}/ci/experiments/"*.yaml; do + for yaml_config in "${HOMEgfs}/ci/cases/"*.yaml; do pslot=$(basename "${yaml_config}" .yaml) || true export pslot - "${WF_ROOT_DIR}/ci/scripts/create_experiment.py" --yaml "${WF_ROOT_DIR}/ci/experiments/${pslot}.yaml" + set +e + "${HOMEgfs}/ci/scripts/create_experiment.py" --yaml "${HOMEgfs}/ci/cases/${pslot}.yaml" --dir "${pr_dir}/global-workflow" ci_status=$? + set -e if [[ ${ci_status} -eq 0 ]]; then { - echo "Created experiment" - echo "Experiment setup: Completed at $(date) for expirment ${pslot}" || true + echo "Created experiment: *SUCCESS*" + echo "Case setup: Completed at $(date) for experiment ${pslot}" || true } >> "${GFS_CI_ROOT}/PR/${pr}/output_${id}" + "${GH}" pr edit --repo "${REPO_URL}" "${pr}" --remove-label "CI-${MACHINE_ID^}-Building" --add-label "CI-${MACHINE_ID^}-Running" else { - echo "Failed on createing experiment ${pslot}" + echo "Failed to create experiment}: *FAIL* ${pslot}" echo "Experiment setup: failed at $(date) for experiment ${pslot}" || true } >> "${GFS_CI_ROOT}/PR/${pr}/output_${id}" - "${GH}" pr edit "${pr}" --repo "${repo_url}" --remove-label "${CI_HOST}-Running" --add-label "${CI_HOST}-Failed" + "${GH}" pr edit "${pr}" --repo "${REPO_URL}" --remove-label "CI-${MACHINE_ID^}-Building" --add-label "CI-${MACHINE_ID^}-Failed" fi done - "${GH}" pr comment "${pr}" --repo "${repo_url}" --body-file "${GFS_CI_ROOT}/PR/${pr}/output_${id}" - "${GH}" pr edit --repo "${repo_url}" "${pr}" --remove-label "${CI_HOST}-Running" --add-label "${CI_HOST}-Passed" + else { echo "Failed on cloning and building global-workflowi PR: ${pr}" - echo "CI on ${CI_HOST} failed to build on $(date) for repo ${repo_url}}" || true + echo "CI on ${MACHINE_ID^} failed to build on $(date) for repo ${REPO_URL}}" || true } >> "${GFS_CI_ROOT}/PR/${pr}/output_${id}" - "${GH}" pr edit "${pr}" --repo "${repo_url}" --remove-label "${CI_HOST}-Running" --add-label "${CI_HOST}-Failed" + "${GH}" pr edit "${pr}" --repo "${REPO_URL}" --remove-label "CI-${MACHINE_ID^}-Building" --add-label "CI-${MACHINE_ID^}-Failed" fi + "${GH}" pr comment "${pr}" --repo "${REPO_URL}" --body-file "${GFS_CI_ROOT}/PR/${pr}/output_${id}" done # looping over each open and labeled PR diff --git a/ci/scripts/pygw b/ci/scripts/pygw new file mode 120000 index 0000000000..77d784f6ca --- /dev/null +++ b/ci/scripts/pygw @@ -0,0 +1 @@ +../../ush/python/pygw/src/pygw \ No newline at end of file diff --git a/ci/scripts/run_ci.sh b/ci/scripts/run_ci.sh new file mode 100755 index 0000000000..c79ea06e77 --- /dev/null +++ b/ci/scripts/run_ci.sh @@ -0,0 +1,71 @@ +#!/bin/bash +set -eux + +##################################################################################### +# +# Script description: BASH script for checking for cases in a given PR and +# simply running rocotorun on each. This script is intended +# to run from within a cron job in the CI Managers account +# Abstract TODO +##################################################################################### + +HOMEgfs="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." >/dev/null 2>&1 && pwd )" +scriptname=$(basename "${BASH_SOURCE[0]}") +echo "Begin ${scriptname} at $(date -u)" || true +export PS4='+ $(basename ${BASH_SOURCE})[${LINENO}]' + +######################################################################### +# Set up runtime environment varibles for accounts on supproted machines +######################################################################### + +source "${HOMEgfs}/ush/detect_machine.sh" +case ${MACHINE_ID} in + hera | orion) + echo "Running Automated Testing on ${MACHINE_ID}" + source "${HOMEgfs}/ci/platforms/${MACHINE_ID}.sh" + ;; + *) + echo "Unsupported platform. Exiting with error." + exit 1 + ;; +esac +set +x +source "${HOMEgfs}/ush/module-setup.sh" +module use "${HOMEgfs}/modulefiles" +module load "module_gwsetup.${MACHINE_ID}" +module list +set -eux +rocotorun=$(which rocotorun) +if [[ -z ${var+x} ]]; then + echo "rocotorun being used from ${rocotorun}" +else + echo "rocotorun not found on system" + exit 1 +fi + +pr_list_file="open_pr_list" + +if [[ -s "${GFS_CI_ROOT}/${pr_list_file}" ]]; then + pr_list=$(cat "${GFS_CI_ROOT}/${pr_list_file}") +else + echo "no PRs to process .. exit" + exit 0 +fi + +############################################################# +# Loop throu all PRs in PR List and look for expirments in +# the RUNTESTS dir and for each one run runcotorun on them +############################################################# + +for pr in ${pr_list}; do + echo "Processing Pull Request #${pr} and looking for cases" + pr_dir="${GFS_CI_ROOT}/PR/${pr}" + for cases in "${pr_dir}/RUNTESTS/"*; do + pslot=$(basename "${cases}") + xml="${pr_dir}/RUNTESTS/${pslot}/EXPDIR/${pslot}/${pslot}.xml" + db="${pr_dir}/RUNTESTS/${pslot}/EXPDIR/${pslot}/${pslot}.db" + echo "Running: ${rocotorun} -v 6 -w ${xml} -d ${db}" + "${rocotorun}" -v 10 -w "${xml}" -d "${db}" + done +done + diff --git a/modulefiles/module_gwci.hera.lua b/modulefiles/module_gwci.hera.lua new file mode 100644 index 0000000000..f4b62a5fd2 --- /dev/null +++ b/modulefiles/module_gwci.hera.lua @@ -0,0 +1,15 @@ +help([[ +Load environment to run GFS workflow setup scripts on Hera +]]) + +prepend_path("MODULEPATH", "/scratch2/NCEPDEV/nwprod/hpc-stack/libs/hpc-stack/modulefiles/stack") + +load(pathJoin("hpc", "1.1.0")) +load(pathJoin("hpc-intel", "18.0.5.274")) +load(pathJoin("hpc-impi", "2018.0.4")) + +load(pathJoin("netcdf","4.7.4")) +load(pathJoin("nccmp","1.8.7.0")) +load(pathJoin("wgrib2", "2.0.8")) + +whatis("Description: GFS run setup CI environment") diff --git a/modulefiles/module_gwci.orion.lua b/modulefiles/module_gwci.orion.lua new file mode 100644 index 0000000000..779e80a454 --- /dev/null +++ b/modulefiles/module_gwci.orion.lua @@ -0,0 +1,21 @@ +help([[ +Load environment to run GFS workflow ci scripts on Orion +]]) + +prepend_path("MODULEPATH", "/apps/contrib/NCEP/hpc-stack/libs/hpc-stack/modulefiles/stack") + +load(pathJoin("hpc", "1.1.0")) +load(pathJoin("hpc-intel", "2018.4")) +load(pathJoin("hpc-impi", "2018.4")) +load(pathJoin("netcdf","4.7.4")) +load(pathJoin("nccmp"," 1.8.7.0")) +load(pathJoin("contrib","0.1")) +load(pathJoin("wgrib2","3.0.2")) + +prepend_path("MODULEPATH", "/work2/noaa/global/wkolczyn/save/hpc-stack/modulefiles/stack") +load(pathJoin("hpc", "1.2.0")) +load(pathJoin("hpc-intel", "2018.4")) +load(pathJoin("hpc-miniconda3", "4.6.14")) +load(pathJoin("gfs_workflow", "1.0.0")) + +whatis("Description: GFS run ci top-level sripts environment") diff --git a/modulefiles/module_setup.hera.lua b/modulefiles/module_gwsetup.hera.lua similarity index 99% rename from modulefiles/module_setup.hera.lua rename to modulefiles/module_gwsetup.hera.lua index 4971a3f2d9..a07b32b6a6 100644 --- a/modulefiles/module_setup.hera.lua +++ b/modulefiles/module_gwsetup.hera.lua @@ -2,10 +2,9 @@ help([[ Load environment to run GFS workflow setup scripts on Hera ]]) --- Temporary until official hpc-stack is updated - load(pathJoin("rocoto")) +-- Temporary until official hpc-stack is updated prepend_path("MODULEPATH", "/scratch2/NCEPDEV/ensemble/save/Walter.Kolczynski/hpc-stack/modulefiles/stack") load(pathJoin("hpc", "1.2.0")) load(pathJoin("hpc-miniconda3", "4.6.14")) diff --git a/modulefiles/module_gwsetup.orion.lua b/modulefiles/module_gwsetup.orion.lua new file mode 100644 index 0000000000..37f3187fb4 --- /dev/null +++ b/modulefiles/module_gwsetup.orion.lua @@ -0,0 +1,17 @@ +help([[ +Load environment to run GFS workflow ci scripts on Orion +]]) + +-- Temporary until official hpc-stack is updated + +prepend_path("MODULEPATH", "/apps/modulefiles/core") +load(pathJoin("contrib","0.1")) +load(pathJoin("rocoto","1.3.3")) +load(pathJoin("git","2.28.0")) + +prepend_path("MODULEPATH", "/work2/noaa/global/wkolczyn/save/hpc-stack/modulefiles/stack") +load(pathJoin("hpc", "1.2.0")) +load(pathJoin("hpc-miniconda3", "4.6.14")) +load(pathJoin("gfs_workflow", "1.0.0")) + +whatis("Description: GFS run ci top-level sripts environment") diff --git a/test/diff_grib_files.py b/test/diff_grib_files.py index e0eb7936db..9c01afbb18 100755 --- a/test/diff_grib_files.py +++ b/test/diff_grib_files.py @@ -72,4 +72,5 @@ def count_nonid_corr(test_string: str, quiet=False): wgrib2_cmd = f"wgrib2 {fileA} -var -rpn 'sto_1' -import_grib {fileB} -rpn 'rcl_1:print_corr'" string = subprocess.run(wgrib2_cmd, shell=True, stdout=subprocess.PIPE).stdout.decode("utf-8") + count_nonid_corr(string)