Skip to content

Commit

Permalink
github: notify Slack when CI fails on merge to main/release (#24690)
Browse files Browse the repository at this point in the history
This change creates a reusable workflow for notifying Slack on CI
failures. The message will include useful links and information
about the failure, so product engineers can investigate and fix
any problems.

The new workflow is used by selected workflows which trigger on
merges to main or release/* branches. The notification is only
sent on failure and when the event was a push (PR merge) meaning
the number of notifications should be minimal.

The aim is to help identify and draw attention to failure across
our release branches, in particular when automated processes
happen.
  • Loading branch information
jrasell authored Dec 18, 2024
1 parent 30e57c3 commit e3ac00f
Show file tree
Hide file tree
Showing 7 changed files with 169 additions and 0 deletions.
19 changes: 19 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,25 @@ jobs:
echo "::group::Smoke test binary"
docker run --rm -v "$PWD:/src" registry.access.redhat.com/ubi7/ubi-minimal:7.9-1057 /src/nomad version
handle-failure:
needs:
- get-go-version
- get-product-version
- generate-metadata-file
- build-other
- build-linux
- build-darwin
- build-docker
- minimum-os
if: always() && github.event_name == 'push' && contains(needs.*.result, 'failure')
uses: ./.github/workflows/test-failure-notification.yml
secrets: inherit
with:
actor: ${{ github.triggering_actor }}
git-branch: ${{ github.ref_name }}
workflow-run-id: ${{ github.run_id }}
workflow-name: ${{ github.workflow }}

permissions:
contents: read
id-token: write
15 changes: 15 additions & 0 deletions .github/workflows/security-scan.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,18 @@ jobs:
uses: github/codeql-action/upload-sarif@8fd294e26a0e458834582b0fe4988d79966c7c0a # codeql-bundle-v2.18.4
with:
sarif_file: results.sarif

handle-failure:
permissions:
contents: read
id-token: write
needs:
- scan
if: always() && github.event_name == 'push' && contains(needs.*.result, 'failure')
uses: ./.github/workflows/test-failure-notification.yml
secrets: inherit
with:
actor: ${{ github.triggering_actor }}
git-branch: ${{ github.ref_name }}
workflow-run-id: ${{ github.run_id }}
workflow-name: ${{ github.workflow }}
16 changes: 16 additions & 0 deletions .github/workflows/test-core.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,22 @@ jobs:
sudo -E env "PATH=$PATH" hc-install install -version ${{env.CONSUL_VERSION}} -path '${{env.GOPATH}}/bin' consul
sudo sed -i 's!Defaults!#Defaults!g' /etc/sudoers
sudo -E env "PATH=$PATH" make test-nomad
handle-failure:
needs:
- checks
- compile
- tests-api
- tests-groups
if: always() && github.event_name == 'push' && contains(needs.*.result, 'failure')
uses: ./.github/workflows/test-failure-notification.yml
secrets: inherit
with:
actor: ${{ github.triggering_actor }}
git-branch: ${{ github.ref_name }}
workflow-run-id: ${{ github.run_id }}
workflow-name: ${{ github.workflow }}

permissions:
contents: read
id-token: write
13 changes: 13 additions & 0 deletions .github/workflows/test-e2e.yml
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,19 @@ jobs:
sudo sed -i 's!Defaults!#Defaults!g' /etc/sudoers
sudo -E env "PATH=$PATH" make integration-test-consul
handle-failure:
needs:
- test-e2e-vault
- test-e2e-consul
if: always() && github.event_name == 'push' && contains(needs.*.result, 'failure')
uses: ./.github/workflows/test-failure-notification.yml
secrets: inherit
with:
actor: ${{ github.triggering_actor }}
git-branch: ${{ github.ref_name }}
workflow-run-id: ${{ github.run_id }}
workflow-name: ${{ github.workflow }}

permissions:
contents: read
id-token: write
75 changes: 75 additions & 0 deletions .github/workflows/test-failure-notification.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
---
name: CI Test Failure Notification

on:
workflow_call:
inputs:
actor:
description: 'Triggering actor of the failed the workflow'
required: true
type: string
git-branch:
description: 'Git branch name'
required: true
type: string
workflow-name:
description: 'Name of the GitHub Action workflow'
required: true
type: string
workflow-run-id:
description: 'GitHub Action run ID that failed'
required: true
type: string

jobs:
send-notification:
runs-on: ${{ endsWith(github.repository, '-enterprise') && fromJSON('["self-hosted", "ondemand", "linux"]') || 'ubuntu-latest' }}
steps:
- name: Retrieve Vault-hosted Secrets
if: endsWith(github.repository, '-enterprise')
id: vault
uses: hashicorp/vault-action@d1720f055e0635fd932a1d2a48f87a666a57906c # v3.0.0
with:
url: ${{ vars.CI_VAULT_URL }}
method: ${{ vars.CI_VAULT_METHOD }}
path: ${{ vars.CI_VAULT_PATH }}
jwtGithubAudience: ${{ vars.CI_VAULT_AUD }}
secrets: |-
kv/data/teams/nomad/slack-webhooks feed-nomad | SLACK_FEED_NOMAD ;
- name: Send Slack notification
uses: slackapi/slack-github-action@485a9d42d3a73031f12ec201c457e2162c45d02d # v2.0.0
with:
webhook: "${{ env.SLACK_FEED_NOMAD || secrets.SLACK_FEED_NOMAD_CI_FAILURE }}"
webhook-type: incoming-webhook
payload: |
{
"text": ":x: CI Workflow '${{ inputs.workflow-name }}' has failed",
"attachments": [
{
"color": "#C41E3A",
"blocks": [
{
"type": "section",
"fields": [
{
"type": "mrkdwn",
"text": "*Branch:* <${{ github.server_url }}/${{ github.repository }}/tree/${{ inputs.git-branch }}|${{ inputs.git-branch }}>"
},
{
"type": "mrkdwn",
"text": "*From:* @${{ inputs.actor }}"
},
{
"type": "mrkdwn",
"text": "*Run:* <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ inputs.workflow-run-id }}|${{ inputs.workflow-run-id }}>"
}
]
}
]
}
]
}
permissions:
contents: read
id-token: write
15 changes: 15 additions & 0 deletions .github/workflows/test-ui.yml
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,21 @@ jobs:
PERCY_TOKEN: ${{ env.PERCY_TOKEN || secrets.PERCY_TOKEN }}
PERCY_PARALLEL_NONCE: ${{ needs.pre-test.outputs.nonce }}
run: yarn percy build:finalize

handle-failure:
needs:
- pre-test
- tests
- finalize
if: always() && github.event_name == 'push' && contains(needs.*.result, 'failure')
uses: ./.github/workflows/test-failure-notification.yml
secrets: inherit
with:
actor: ${{ github.triggering_actor }}
git-branch: ${{ github.ref_name }}
workflow-run-id: ${{ github.run_id }}
workflow-name: ${{ github.workflow }}

permissions:
contents: read
id-token: write
16 changes: 16 additions & 0 deletions .github/workflows/test-windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -98,5 +98,21 @@ jobs:
with:
name: results.xml
path: results.xml

handle-failure:
permissions:
contents: read
id-token: write
needs:
- test-windows
if: always() && github.event_name == 'push' && contains(needs.*.result, 'failure')
uses: ./.github/workflows/test-failure-notification.yml
secrets: inherit
with:
actor: ${{ github.triggering_actor }}
git-branch: ${{ github.ref_name }}
workflow-run-id: ${{ github.run_id }}
workflow-name: ${{ github.workflow }}

permissions:
contents: read

0 comments on commit e3ac00f

Please sign in to comment.