-
Notifications
You must be signed in to change notification settings - Fork 2.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #16357 from cevich/cron_auto_rerun
[CI:DOCS] [WIP] GHA: Auto. re-run failed cirrus-cron builds once
- Loading branch information
Showing
6 changed files
with
304 additions
and
70 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
|
||
|
||
# Send text to stderr | ||
msg() { | ||
echo "$@" > /dev/stderr | ||
} | ||
|
||
# Must be called from top-level of script, not another function. | ||
err() { | ||
# Ref: https://docs.github.com/en/free-pro-team@latest/actions/reference/workflow-commands-for-github-actions | ||
msg "::error file=${BASH_SOURCE[1]},line=${BASH_LINENO[0]}::$@" | ||
exit 1 | ||
} | ||
|
||
# Using python3 here is a compromise for readability and | ||
# properly handling quote, control and unicode character encoding. | ||
escape_query() { | ||
local json_string | ||
# Assume it's okay to squash repeated whitespaces inside the query | ||
json_string=$(printf '%s' "$1" | \ | ||
tr --delete '\r\n' | \ | ||
tr --squeeze-repeats '[[:space:]]' | \ | ||
python3 -c 'import sys,json; print(json.dumps(sys.stdin.read()))') | ||
# The $json_string in message is already quoted | ||
echo -n "$json_string" | ||
} | ||
|
||
# Given a GraphQL query/mutation, fire it at the API. | ||
# and return the output on stdout. The optional | ||
# second parameter may contain a jq filter-string. | ||
# When provided, if the GQL result is empty, null, | ||
# fails to parse, or does not match the filter-string, | ||
# non-zero will be returned. | ||
gql() { | ||
local e_query query | ||
e_query=$(escape_query "$1") | ||
query="{\"query\": $e_query}" | ||
local filter | ||
filter="$2" | ||
local output | ||
local filtered | ||
msg "::group::Posting GraphQL Query and checking result" | ||
msg "query: " | ||
if ! jq -e . <<<"$query" > /dev/stderr; then | ||
msg "::error file=${BASH_SOURCE[1]},line=${BASH_LINENO[0]}::Invalid query JSON: $query" | ||
return 1 | ||
fi | ||
if output=$(curl \ | ||
--request POST \ | ||
--silent \ | ||
--show-error \ | ||
--location \ | ||
--header 'content-type: application/json' \ | ||
--header "Authorization: Bearer $SECRET_CIRRUS_API_KEY" \ | ||
--url 'https://api.cirrus-ci.com/graphql' \ | ||
--data "$query") && [[ -n "$output" ]]; then | ||
|
||
if filtered=$(jq -e "$filter" <<<"$output") && [[ -n "$filtered" ]]; then | ||
msg "result:" | ||
# Make debugging easier w/ formatted output | ||
# to stderr for display, stdout for consumption by caller | ||
jq --indent 2 . <<<"$output" | tee /dev/stderr | ||
msg "::endgroup::" | ||
return 0 | ||
fi | ||
|
||
msg "::error file=${BASH_SOURCE[1]},line=${BASH_LINENO[0]}::Query result did not pass filter '$2': '$output'" | ||
msg "::endgroup::" | ||
return 2 | ||
fi | ||
|
||
msg "::error file=${BASH_SOURCE[1]},line=${BASH_LINENO[0]}::Query failed or result empty: '$output'" | ||
msg "::endgroup::" | ||
return 3 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
#!/bin/bash | ||
|
||
set -eo pipefail | ||
|
||
# Intended to be executed from a github action workflow step. | ||
# Input: File listing space separated failed cron build names and IDs | ||
# Output: $GITHUB_WORKSPACE/artifacts/email_body.txt file | ||
|
||
source $(dirname "${BASH_SOURCE[0]}")/lib.sh | ||
|
||
_errfmt="Expecting %s value to not be empty" | ||
if [[ -z "$GITHUB_REPOSITORY" ]]; then | ||
err $(printf "$_errfmt" "\$GITHUB_REPOSITORY") | ||
elif [[ -z "$GITHUB_WORKFLOW" ]]; then | ||
err $(printf "$_errfmt" "\$GITHUB_WORKFLOW") | ||
elif [[ ! -r "$NAME_ID_FILEPATH" ]]; then | ||
err "Expecting \$NAME_ID_FILEPATH value ($NAME_ID_FILEPATH) to be a readable file" | ||
fi | ||
|
||
mkdir -p artifacts | ||
( | ||
echo "Detected one or more Cirrus-CI cron-triggered jobs have failed recently:" | ||
echo "" | ||
|
||
while read -r NAME BID; do | ||
echo "Cron build '$NAME' Failed: https://cirrus-ci.com/build/$BID" | ||
done < "$NAME_ID_FILEPATH" | ||
|
||
echo "" | ||
echo "# Source: ${GITHUB_WORKFLOW} workflow on ${GITHUB_REPOSITORY}." | ||
# Separate content from sendgrid.com automatic footer. | ||
echo "" | ||
echo "" | ||
) > ./artifacts/email_body.txt |
112 changes: 112 additions & 0 deletions
112
.github/actions/check_cirrus_cron/rerun_failed_tasks.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
#!/bin/bash | ||
|
||
set -eo pipefail | ||
|
||
# Intended to be executed from a github action workflow step. | ||
# Input: File listing space separated failed cron build names and IDs | ||
# Output: $GITHUB_WORKSPACE/artifacts/email_body.txt file | ||
# | ||
# HOW TO TEST: This script may be manually tested assuming you have | ||
# access to the github containers-org. Cirrus API key. With that in-hand, | ||
# this script may be manually run by: | ||
# 1. export SECRET_CIRRUS_API_KEY=<value> | ||
# 2. Find an old podman build that failed on `main` or another **branch**. | ||
# For example, from https://cirrus-ci.com/github/containers/podman/main | ||
# (pick an old one from the bottom, since re-running it won't affect anybody) | ||
# 3. Create a temp. file, like /tmp/fail with a single line, of the form: | ||
# <branch> <cirrus build id number> | ||
# 4. export NAME_ID_FILEPATH=/tmp/fail | ||
# 5. execute this script, and refresh the build in the WebUI, all unsuccessful | ||
# tasks should change status to running or scheduled. Note: some later | ||
# tasks may remain red as they wait for dependencies to run and pass. | ||
# 6. After each run, cleanup with 'rm -rf ./artifacts' | ||
# (unless you want to examine them) | ||
|
||
source $(dirname "${BASH_SOURCE[0]}")/lib.sh | ||
|
||
_errfmt="Expecting %s value to not be empty" | ||
if [[ -z "$SECRET_CIRRUS_API_KEY" ]]; then | ||
err $(printf "$_errfmt" "\$SECRET_CIRRUS_API_KEY") | ||
elif [[ ! -r "$NAME_ID_FILEPATH" ]]; then # output from cron_failures.sh | ||
err $(printf "Expecting %s value to be a readable file" "\$NAME_ID_FILEPATH") | ||
fi | ||
|
||
mkdir -p artifacts | ||
# If there are no tasks, don't fail reading the file | ||
truncate -s 0 ./artifacts/rerun_tids.txt | ||
|
||
cat "$NAME_ID_FILEPATH" | \ | ||
while read -r NAME BID; do | ||
if [[ -z "$NAME" ]]; then | ||
err $(printf "$_errfmt" "\$NAME") | ||
elif [[ -z "$BID" ]]; then | ||
err $(printf "$_errfmt" "\$BID") | ||
fi | ||
|
||
id_status_q=" | ||
query { | ||
build(id: \"$BID\") { | ||
tasks { | ||
id, | ||
status | ||
} | ||
} | ||
} | ||
" | ||
task_id_status=$(gql "$id_status_q" '.data.build.tasks[0]') | ||
# Expected query result like: | ||
# { | ||
# "data": { | ||
# "build": { | ||
# "tasks": [ | ||
# { | ||
# "id": "6321184690667520", | ||
# "status": "COMPLETED" | ||
# }, | ||
# ... | ||
msg "::group::Selecting failed/aborted tasks to re-run" | ||
jq -r -e '.data.build.tasks[] | join(" ")' <<<"$task_id_status" | \ | ||
while read -r TID STATUS; do | ||
if [[ -z "$TID" ]] || [[ -z "$STATUS" ]]; then | ||
# assume empty line and/or end of file | ||
msg "Skipping TID '$TID' with status '$STATUS'" | ||
continue | ||
# Failed task dependencies will have 'aborted' status | ||
elif [[ "$STATUS" == "FAILED" ]] || [[ "$STATUS" == "ABORTED" ]]; then | ||
msg "Rerunning build $BID task $TID" | ||
# Must send result through a file into rerun_tasks array | ||
# because this section is executing in a child-shell | ||
echo "$TID" >> ./artifacts/rerun_tids.txt | ||
fi | ||
done | ||
declare -a rerun_tasks | ||
mapfile rerun_tasks <./artifacts/rerun_tids.txt | ||
msg "::endgroup::" | ||
|
||
if [[ "${#rerun_tasks[*]}" -eq 0 ]]; then | ||
msg "No tasks to re-run for build $BID" | ||
continue; | ||
fi | ||
|
||
msg "::warning::Rerunning ${#rerun_tasks[*]} tasks for build $BID" | ||
# Check-value returned if the gql call was successful | ||
canary=$(uuidgen) | ||
# Ensure the trailing ',' is stripped from the end (would be invalid JSON) | ||
task_ids=$(printf '[%s]' $(printf '"%s",' ${rerun_tasks[@]} | head -c -1)) | ||
rerun_m=" | ||
mutation { | ||
batchReRun(input: { | ||
clientMutationId: \"$canary\", | ||
taskIds: $task_ids | ||
} | ||
) { | ||
clientMutationId | ||
} | ||
} | ||
" | ||
filter='.data.batchReRun.clientMutationId' | ||
result=$(gql "$rerun_m" "$filter") | ||
if [[ $(jq -r -e "$filter"<<<"$result") != "$canary" ]]; then | ||
err "Attempt to re-run tasks for build $BID failed: ${rerun_tasks[@]}" | ||
fi | ||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.