Skip to content

Commit

Permalink
Add CI cron jobs (#1476)
Browse files Browse the repository at this point in the history
As a maintainer of the CI framework, I need a set of cron jobs that will fully automate the CI pipeline so that whenever the appropriate label on GitHub is created the PR gets cloned and built followed by a set of functional experiments that are executed and reported on.
  • Loading branch information
TerrenceMcGuinness-NOAA authored Apr 21, 2023
1 parent 587e469 commit f159d39
Show file tree
Hide file tree
Showing 17 changed files with 387 additions and 179 deletions.
15 changes: 15 additions & 0 deletions ci/cases/C96C48_hybatmDA.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
experiment:
mode: cycled

arguments:
app: ATM
resdet: 96
resens: 48
comrot: ${RUNTESTS}/${pslot}/COMROT
expdir: ${RUNTESTS}/${pslot}/EXPDIR
icsdir: ${ICSDIR_ROOT}/C96C48
idate: 2021122018
edate: 2021122200
nens: 2
gfs_cyc: 1
start: cold
14 changes: 14 additions & 0 deletions ci/cases/C96_atm3DVar.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
experiment:
mode: cycled

arguments:
app: ATM
resdet: 96
comrot: ${RUNTESTS}/${pslot}/COMROT
expdir: ${RUNTESTS}/${pslot}/EXPDIR
icsdir: ${ICSDIR_ROOT}/C96C48
idate: 2021122018
edate: 2021122100
nens: 0
gfs_cyc: 1
start: cold
19 changes: 0 additions & 19 deletions ci/experiments/C96C48_hybatmDA.yaml

This file was deleted.

19 changes: 0 additions & 19 deletions ci/experiments/C96C48_hybatmDA_also.yaml

This file was deleted.

3 changes: 0 additions & 3 deletions ci/environments/hera.sh → ci/platforms/hera.sh
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
#!/usr/bin/bash
export GFS_CI_ROOT=/scratch1/NCEPDEV/global/Terry.McGuinness/GFS_CI_ROOT
export GFS_MODULE_USE="${GFS_CI_ROOT}/global-workflow/modulefiles"
export SLURM_ACCOUNT=fv3-cpu
export SALLOC_ACCOUNT="${SLURM_ACCOUNT}"
export SBATCH_ACCOUNT="${SLURM_ACCOUNT}"
export SLURM_QOS=debug
export repo_url="https://github.com/NOAA-EMC/global-workflow.git"
#export repo_url="https://github.com/TerrenceMcGuinness-NOAA/global-workflow.git"
export ICSDIR_ROOT="/scratch1/NCEPDEV/global/glopara/data/ICSDIR"
4 changes: 2 additions & 2 deletions ci/environments/orion.sh → ci/platforms/orion.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/bash

export GFS_CI_ROOT="TDB" #TODO
export GFS_MODULE_USE="${GFS_CI_ROOT}/global-workflow/modulefiles"
export GFS_CI_ROOT=/work2/noaa/global/mterry/GFS_CI_ROOT
export ICSDIR_ROOT=/work/noaa/global/glopara/data/ICSDIR
export SLURM_ACCOUNT=fv3-cpu
export SALLOC_ACCOUNT=${SLURM_ACCOUNT}
export SBATCH_ACCOUNT=${SLURM_ACCOUNT}
Expand Down
115 changes: 115 additions & 0 deletions ci/scripts/check_ci.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
#!/bin/bash
set -eux
#####################################################################################
#
# Script description: BASH script for checking for cases in a given PR and
# running rocotostat on each to determine if the experiment has
# succeeded or faild. This script is intended
# to run from within a cron job in the CI Managers account
# Abstract TODO
#####################################################################################

HOMEgfs="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." >/dev/null 2>&1 && pwd )"
scriptname=$(basename "${BASH_SOURCE[0]}")
echo "Begin ${scriptname} at $(date -u)" || true
export PS4='+ $(basename ${BASH_SOURCE})[${LINENO}]'

GH=${HOME}/bin/gh
REPO_URL=${REPO_URL:-"https://github.com/NOAA-EMC/global-workflow.git"}

#########################################################################
# Set up runtime environment varibles for accounts on supproted machines
#########################################################################

source "${HOMEgfs}/ush/detect_machine.sh"
case ${MACHINE_ID} in
hera | orion)
echo "Running Automated Testing on ${MACHINE_ID}"
source "${HOMEgfs}/ci/platforms/${MACHINE_ID}.sh"
;;
*)
echo "Unsupported platform. Exiting with error."
exit 1
;;
esac
set +x
source "${HOMEgfs}/ush/module-setup.sh"
module use "${HOMEgfs}/modulefiles"
module load "module_gwsetup.${MACHINE_ID}"
module list
set -x
rocotostat=$(which rocotostat)
if [[ -z ${rocotostat+x} ]]; then
echo "rocotostat not found on system"
exit 1
else
echo "rocotostat being used from ${rocotostat}"
fi

pr_list_file="open_pr_list"

if [[ -s "${GFS_CI_ROOT}/${pr_list_file}" ]]; then
pr_list=$(cat "${GFS_CI_ROOT}/${pr_list_file}")
else
echo "no PRs to process .. exit"
exit 0
fi

#############################################################
# Loop throu all PRs in PR List and look for expirments in
# the RUNTESTS dir and for each one run runcotorun on them
#############################################################

for pr in ${pr_list}; do
id=$("${GH}" pr view "${pr}" --repo "${REPO_URL}" --json id --jq '.id')
echo "Processing Pull Request #${pr} and looking for cases"
pr_dir="${GFS_CI_ROOT}/PR/${pr}"

# If there is no RUNTESTS dir for this PR then cases have not been made yet
if [[ ! -d "${pr_dir}/RUNTESTS" ]]; then
continue
fi
num_cases=$(find "${pr_dir}/RUNTESTS" -mindepth 1 -maxdepth 1 -type d | wc -l) || true

#Check for PR success when ${pr_dir}/RUNTESTS is void of subfolders
# since all successfull ones where previously removed
if [[ "${num_cases}" -eq 0 ]] && [[ -d "${pr_dir}/RUNTESTS" ]]; then
"${GH}" pr edit --repo "${REPO_URL}" "${pr}" --remove-label "CI-${MACHINE_ID^}-Running" --add-label "CI-${MACHINE_ID^}-Passed"
"${GH}" pr comment "${pr}" --repo "${REPO_URL}" --body-file "${GFS_CI_ROOT}/PR/${pr}/output_${id}"
sed -i "/${pr}/d" "${GFS_CI_ROOT}/${pr_list_file}"
# Completely remove the PR and its cloned repo on sucess of all cases
rm -Rf "${pr_dir}"
continue
fi

for cases in "${pr_dir}/RUNTESTS/"*; do
pslot=$(basename "${cases}")
xml="${pr_dir}/RUNTESTS/${pslot}/EXPDIR/${pslot}/${pslot}.xml"
db="${pr_dir}/RUNTESTS/${pslot}/EXPDIR/${pslot}/${pslot}.db"
rocoto_stat_output=$("${rocotostat}" -w "${xml}" -d "${db}" -s | grep -v CYCLE) || true
num_cycles=$(echo "${rocoto_stat_output}" | wc -l) || true
num_done=$(echo "${rocoto_stat_output}" | grep -c Done) || true
num_succeeded=$("${rocotostat}" -w "${xml}" -d "${db}" -a | grep -c SUCCEEDED) || true
echo "${pslot} Total Cycles: ${num_cycles} number done: ${num_done}" || true
num_failed=$("${rocotostat}" -w "${xml}" -d "${db}" -a | grep -c -E 'FAIL|DEAD') || true
if [[ ${num_failed} -ne 0 ]]; then
{
echo "Experiment ${pslot} Terminated: *FAILED*"
echo "Experiment ${pslot} Terminated with ${num_failed} tasks failed at $(date)" || true
} >> "${GFS_CI_ROOT}/PR/${pr}/output_${id}"
"${GH}" pr edit --repo "${REPO_URL}" "${pr}" --remove-label "CI-${MACHINE_ID^}-Running" --add-label "CI-${MACHINE_ID^}-Failed"
"${GH}" pr comment "${pr}" --repo "${REPO_URL}" --body-file "${GFS_CI_ROOT}/PR/${pr}/output_${id}"
sed -i "/${pr}/d" "${GFS_CI_ROOT}/${pr_list_file}"
fi
if [[ "${num_done}" -eq "${num_cycles}" ]]; then
{
echo "Experiment ${pslot} completed: *SUCCESS*"
echo "Experiment ${pslot} Completed at $(date)" || true
echo -n "with ${num_succeeded} successfully completed jobs" || true
} >> "${GFS_CI_ROOT}/PR/${pr}/output_${id}"
"${GH}" pr comment "${pr}" --repo "${REPO_URL}" --body-file "${GFS_CI_ROOT}/PR/${pr}/output_${id}"
#Remove Experment cases that completed successfully
rm -Rf "${pr_dir}/RUNTESTS/${pslot}"
fi
done
done
89 changes: 51 additions & 38 deletions ci/scripts/clone-build_ci.sh
Original file line number Diff line number Diff line change
@@ -1,11 +1,6 @@
#!/bin/bash
set -eux

#################################################################
# TODO using static build for GitHub CLI until fixed in HPC-Stack
#################################################################
GH=/home/Terry.McGuinness/bin/gh
repo_url=${repo_url:-"https://github.com/global-workflow.git"}
#####################################################################
# Usage and arguments for specfifying cloned directgory
#####################################################################
Expand Down Expand Up @@ -45,25 +40,14 @@ while getopts "p:d:o:h" opt; do
esac
done

####################################################################
# start output file
{
echo "Automated global-workflow Testing Results:"
echo "Machine: ${CI_HOST}"
echo '```'
echo "Start: $(date) on $(hostname)" || true
echo "---------------------------------------------------"
} >> "${outfile}"
######################################################################

cd "${repodir}"
cd "${repodir}" || exit 1
# clone copy of repo
if [[ -d global-workflow ]]; then
rm -Rf global-workflow
fi

git clone "${repo_url}"
cd global-workflow
git clone "${REPO_URL}"
cd global-workflow || exit 1

pr_state=$(gh pr view "${PR}" --json state --jq '.state')
if [[ "${pr_state}" != "OPEN" ]]; then
Expand All @@ -73,34 +57,63 @@ if [[ "${pr_state}" != "OPEN" ]]; then
fi

# checkout pull request
"${GH}" pr checkout "${PR}" --repo "${repo_url}"
"${GH}" pr checkout "${PR}" --repo "${REPO_URL}"
HOMEgfs="${PWD}"
source "${HOMEgfs}/ush/detect_machine.sh"

####################################################################
# start output file
{
echo "Automated global-workflow Testing Results:"
echo '```'
echo "Machine: ${MACHINE_ID^}"
echo "Start: $(date) on $(hostname)" || true
echo "---------------------------------------------------"
} >> "${outfile}"
######################################################################

# get commit hash
commit=$(git log --pretty=format:'%h' -n 1)
echo "${commit}" > "../commit"

# run build script
cd sorc
# run checkout script
cd sorc || exit 1
set +e
./checkout.sh -c -g -u &>> log.checkout
checkout_status=$?
if [[ ${checkout_status} != 0 ]]; then
{
echo "Checkout: *FAILED*"
echo "Checkout: Failed at $(date)" || true
echo "Checkout: see output at ${PWD}/log.checkout"
} >> "${outfile}"
exit "${checkout_status}"
else
{
echo "Checkout: *SUCCESS*"
echo "Checkout: Completed at $(date)" || true
} >> "${outfile}"
fi

# build full cycle
source "${HOMEgfs}/ush/module-setup.sh"
export BUILD_JOBS=8
rm -rf log.build
./checkout.sh -g -c
# build full cycle
./build_all.sh -g &>> log.build

# Validations
./build_all.sh &>> log.build
build_status=$?
if [[ ${build_status} -eq 0 ]]; then
{
echo "Build: *SUCCESS*"
echo "Build: Completed at $(date)" || true
} >> "${outfile}"

if [[ ${build_status} != 0 ]]; then
{
echo "Build: *FAILED*"
echo "Build: Failed at $(date)" || true
echo "Build: see output at ${PWD}/log.build"
} >> "${outfile}"
exit "${build_status}"
else
{
echo "Build: *FAILED*"
echo "Build: Failed at $(date)" || true
echo "Build: see output at ${PWD}/log.build"
}
echo '```' >> "${outfile}"
{
echo "Build: *SUCCESS*"
echo "Build: Completed at $(date)" || true
} >> "${outfile}"
fi

./link_workflow.sh
Expand Down
Loading

0 comments on commit f159d39

Please sign in to comment.