Skip to content

Commit

Permalink
Share workflow failure alerting between ci.yml and pkgci.yml. (#19444)
Browse files Browse the repository at this point in the history
Progress on #9305.

Changes included:

1. Extracts the code for parsing the results of multiple jobs and
optionally posting alerts to Discord from `ci.yml` into a new [reusable
workflow](https://docs.github.com/en/actions/sharing-automations/reusing-workflows)
in the `workflow_summary.yml` file.
2. Uses the new reusable workflow in `pkgci.yml`.
3. Renames to `summary` step to `ci_summary` and `pkgci_summary` to
disambiguate "required checks". You'd think GitHub would use keys that
aren't ambiguous for required checks but nooooope:

![image](https://github.com/user-attachments/assets/5c3665bc-7933-41d0-8d5a-c6ecf966a3b4)
  • Loading branch information
ScottTodd authored Dec 11, 2024
2 parents cec3f69 + bbb4e5c commit e6266f7
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 33 deletions.
36 changes: 6 additions & 30 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -198,39 +198,15 @@ jobs:

##############################################################################

# Depends on all the other jobs to provide a single anchor that indicates the
# final status. Status reporting will become more sophisticated in the future
# and we can hopefully avoid the need to explicitly list every single job...
summary:
# Even if you have an explicit if condition, you still need to override
# GitHub's default behavior of not running if any dependencies failed.
# Aggregate job status and alerting on failures.
ci_summary:
if: always()
runs-on: ubuntu-20.04
needs:
- setup
- runtime
- runtime_small
- runtime_tracing
steps:
- name: "Checking out repository"
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Getting failed jobs
id: failed_jobs
run: |
echo '${{ toJson(needs) }}'
FAILED_JOBS="$(echo '${{ toJson(needs) }}' \
| jq --raw-output \
'map_values(select(.result!="success" and .result!="skipped")) | keys | join(",")' \
)"
echo "failed-jobs=${FAILED_JOBS}" >> $GITHUB_OUTPUT
if [[ "${FAILED_JOBS}" != "" ]]; then
echo "The following jobs failed: ${FAILED_JOBS}"
exit 1
fi
- name: Posting to Discord
uses: sarisia/actions-status-discord@ce8cc68e4e626000136b3c702d049a154243e490 # v1.14.7
if: failure() && github.ref_name == 'main'
with:
webhook: ${{ secrets.DISCORD_WEBHOOK }}
description: "The following jobs failed: ${{ steps.failed_jobs.outputs.failed-jobs }}"
url: "${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}/attempts/${{ github.run_attempt }}"
uses: ./.github/workflows/workflow_summary.yml
secrets: inherit
with:
jobs-json: ${{ toJson(needs) }}
25 changes: 25 additions & 0 deletions .github/workflows/pkgci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -122,3 +122,28 @@ jobs:
needs: [setup, build_packages]
if: contains(fromJson(needs.setup.outputs.enabled-jobs), 'test_pjrt')
uses: ./.github/workflows/pkgci_test_pjrt.yml

##############################################################################

# Aggregate job status and alerting on failures.
pkgci_summary:
if: always()
needs:
- setup
- build_packages
- unit_test
- regression_test
- test_amd_mi250
- test_amd_mi300
- test_amd_w7900
# - test_nvidia_t4
- test_android
- test_riscv64
- test_onnx
- test_sharktank
- test_tensorflow
- test_pjrt
uses: ./.github/workflows/workflow_summary.yml
secrets: inherit
with:
jobs-json: ${{ toJson(needs) }}
63 changes: 63 additions & 0 deletions .github/workflows/workflow_summary.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Copyright 2024 The IREE Authors
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

# Checks the result status of each job provided by 'jobs-json' and sends an
# alert if at least one job failed.
#
# Usage:
# ```yml
# jobs:
# job_1:
# ...
# job_2:
# ...
# my_summary:
# if: always()
# needs:
# - job_1
# - job_2
# uses: ./.github/workflows/workflow_summary.yml
# secrets: inherit
# with:
# jobs-json: ${{ toJson(needs) }}
# ```

name: Workflow Summary

on:
workflow_call:
inputs:
jobs-json:
type: string
description: The output of `toJson(needs)`

permissions:
contents: read

jobs:
summary:
runs-on: ubuntu-20.04
steps:
- name: Getting failed jobs
id: failed_jobs
run: |
echo '${{ inputs.jobs-json }}'
FAILED_JOBS="$(echo '${{ inputs.jobs-json }}' \
| jq --raw-output \
'map_values(select(.result!="success" and .result!="skipped")) | keys | join(",")' \
)"
echo "failed-jobs=${FAILED_JOBS}" >> $GITHUB_OUTPUT
if [[ "${FAILED_JOBS}" != "" ]]; then
echo "The following jobs failed: ${FAILED_JOBS}"
exit 1
fi
- name: Posting to Discord
uses: sarisia/actions-status-discord@ce8cc68e4e626000136b3c702d049a154243e490 # v1.14.7
if: failure() && github.ref_name == 'main' && github.repository_owner == 'iree-org'
with:
webhook: ${{ secrets.DISCORD_WEBHOOK }}
description: "The following jobs failed: ${{ steps.failed_jobs.outputs.failed-jobs }}"
url: "${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}/attempts/${{ github.run_attempt }}"
12 changes: 9 additions & 3 deletions build_tools/github_actions/configure_ci.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@
import fnmatch
import json
import os
import re
import pathlib
import re
import string
import subprocess
import sys
Expand Down Expand Up @@ -111,7 +111,12 @@ def contains(cls, val):
RUNNER_ENV_DEFAULT = "prod"
RUNNER_ENV_OPTIONS = [RUNNER_ENV_DEFAULT, "testing"]

CONTROL_JOBS = frozenset(["setup", "summary"])
CONTROL_JOB_REGEXES = frozenset(
[
re.compile("setup"),
re.compile(".*summary"),
]
)

# Jobs to run only on postsubmit by default.
# They may also run on presubmit only under certain conditions.
Expand Down Expand Up @@ -380,7 +385,8 @@ def parse_jobs_from_workflow_file(workflow_file: pathlib.Path) -> Set[str]:

workflow = yaml.load(workflow_file.read_text(), Loader=yaml.SafeLoader)
all_jobs = set(workflow["jobs"].keys())
all_jobs -= CONTROL_JOBS
for regex in CONTROL_JOB_REGEXES:
all_jobs = {j for j in all_jobs if not regex.match(j)}

if ALL_KEY in all_jobs:
raise ValueError(f"Workflow has job with reserved name '{ALL_KEY}'")
Expand Down

0 comments on commit e6266f7

Please sign in to comment.