Skip to content

[loader-v2] Fixing global cache reads & read-before-write on publish #67723

[loader-v2] Fixing global cache reads & read-before-write on publish

[loader-v2] Fixing global cache reads & read-before-write on publish #67723

## IMPORTANT NOTE TO EDITORS OF THIS FILE ##
## Note that when you create a PR the jobs in this file are triggered off the
## `pull_request_target` event instead of `pull_request` event. This is because
## the `pull_request` event makes secrets only available to PRs from branches,
## not from forks, and some of these jobs require secrets. So with `pull_request_target`
## we're making secrets available to fork-based PRs too. Using `pull_request_target"
## has a side effect, which is that the workflow execution will be driven by the
## state of the <workflow>.yaml on the `main` (=target) branch, even if you edited
## the <workflow>.yaml in your PR. So when you for example add a new job here, you
## won't see that job appear in the PR itself. It will only become effective once
## you merge the PR to main. Therefore, if you want to add a new job here and want
## to test it's functionality prior to a merge to main, you have to to _temporarily_
## change the trigger event from `pull_request_target` to `pull_request`.
## Additionally, because `pull_request_target` gets secrets injected for forked PRs
## we use `https://github.com/sushichop/action-repository-permission` to ensure these
## jobs are only executed when a repo member with "write" permission has triggered
## the workflow (directly through a push or indirectly by applying a label or enabling
## auto_merge).
name: "Build+Test Docker Images"
on: # build on main branch OR when a PR is labeled with `CICD:build-images`
# Allow us to run this specific workflow without a PR
workflow_dispatch:
pull_request_target:
types: [labeled, opened, synchronize, reopened, auto_merge_enabled]
push:
branches:
- main
# release branches
- devnet
- testnet
- mainnet
- aptos-node-v*
- aptos-release-v*
# experimental branches
- performance_benchmark
- preview
- preview-networking
# grpc release branches
- aptos-indexer-grpc-devnet
- aptos-indexer-grpc-testnet
- aptos-indexer-grpc-mainnet
- aptos-indexer-grpc-v*
# cancel redundant builds
concurrency:
# for push and workflow_dispatch events we use `github.sha` in the concurrency group and don't really cancel each other out/limit concurrency
# for pull_request events newer jobs cancel earlier jobs to save on CI etc.
group: ${{ github.workflow }}-${{ github.event_name }}-${{ (github.event_name == 'push' || github.event_name == 'workflow_dispatch') && github.sha || github.head_ref || github.ref }}
cancel-in-progress: true
env:
AWS_ECR_ACCOUNT_NUM: ${{ secrets.ENV_ECR_AWS_ACCOUNT_NUM }}
# In case of pull_request events by default github actions merges main into the PR branch and then runs the tests etc
# on the prospective merge result instead of only on the tip of the PR.
# For more info also see https://github.com/actions/checkout#checkout-pull-request-head-commit-instead-of-merge-commit
GIT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
# TARGET_CACHE_ID is used as part of the docker tag / cache key inside our bake.hcl docker bake files.
# The goal here is to have a branch or PR-local cache such that consecutive pushes to a shared branch or a specific PR can
# reuse layers from a previous docker build/commit.
# We use `pr-<pr_number>` as cache-id for PRs and simply <branch_name> otherwise.
TARGET_CACHE_ID: ${{ github.event.number && format('pr-{0}', github.event.number) || github.ref_name }}
# On PRs, only build and push to GCP
# On push, build and push to all remote registries
TARGET_REGISTRY: ${{ github.event_name == 'pull_request_target' && 'remote' || 'remote-all' }}
permissions:
contents: read
id-token: write #required for GCP Workload Identity federation which we use to login into Google Artifact Registry
issues: write
pull-requests: write
# Note on the job-level `if` conditions:
# This workflow is designed such that:
# 1. Run ALL jobs when a 'push', 'workflow_dispatch' triggered the workflow or on 'pull_request's which have set auto_merge=true or have the label "CICD:run-e2e-tests".
# 2. Run ONLY the docker image building jobs on PRs with the "CICD:build[-<PROFILE/FEATURE>]-images" label.
# 3. Run ONLY the forge-e2e-test job on PRs with the "CICD:run-forge-e2e-perf" label.
# 4. Run NOTHING when neither 1. or 2. or 3. conditions are satisfied.
jobs:
permission-check:
if: |
github.event_name == 'push' ||
github.event_name == 'workflow_dispatch' ||
contains(join(github.event.pull_request.labels.*.name, ','), 'CICD:build-') ||
contains(join(github.event.pull_request.labels.*.name, ','), 'CICD:run-') ||
github.event.pull_request.auto_merge != null ||
contains(github.event.pull_request.body, '#e2e')
runs-on: ubuntu-latest
steps:
- name: Check repository permission for user which triggered workflow
uses: sushichop/action-repository-permission@13d208f5ae7a6a3fc0e5a7c2502c214983f0241c
with:
required-permission: write
comment-not-permitted: Sorry, you don't have permission to trigger this workflow.
# Because the docker build happens in a reusable workflow, have a separate job that collects the right metadata
# for the subsequent docker builds. Reusable workflows do not currently have the "env" context: https://github.com/orgs/community/discussions/26671
determine-docker-build-metadata:
needs: [permission-check]
runs-on: ubuntu-latest
steps:
- name: collect metadata
run: |
echo "GIT_SHA: ${GIT_SHA}"
echo "TARGET_CACHE_ID: ${TARGET_CACHE_ID}"
echo "TARGET_REGISTRY: ${TARGET_REGISTRY}"
outputs:
gitSha: ${{ env.GIT_SHA }}
targetCacheId: ${{ env.TARGET_CACHE_ID }}
targetRegistry: ${{ env.TARGET_REGISTRY }}
# This job determines which files were changed
file_change_determinator:
needs: [permission-check]
runs-on: ubuntu-latest
outputs:
only_docs_changed: ${{ steps.determine_file_changes.outputs.only_docs_changed }}
steps:
- uses: actions/checkout@v4
- name: Run the file change determinator
id: determine_file_changes
uses: ./.github/actions/file-change-determinator
# This job determines which tests to run
test-target-determinator:
needs: [permission-check]
runs-on: ubuntu-latest
outputs:
run_framework_upgrade_test: ${{ steps.determine_test_targets.outputs.run_framework_upgrade_test }}
steps:
- uses: actions/checkout@v4
- name: Run the test target determinator
id: determine_test_targets
uses: ./.github/actions/test-target-determinator
# This is a PR required job.
rust-images:
needs: [permission-check, determine-docker-build-metadata]
uses: aptos-labs/aptos-core/.github/workflows/workflow-run-docker-rust-build.yaml@main
secrets: inherit
with:
GIT_SHA: ${{ needs.determine-docker-build-metadata.outputs.gitSha }}
TARGET_CACHE_ID: ${{ needs.determine-docker-build-metadata.outputs.targetCacheId }}
PROFILE: release
BUILD_ADDL_TESTING_IMAGES: true
TARGET_REGISTRY: ${{ needs.determine-docker-build-metadata.outputs.targetRegistry }}
rust-images-failpoints:
needs: [permission-check, determine-docker-build-metadata]
uses: aptos-labs/aptos-core/.github/workflows/workflow-run-docker-rust-build.yaml@main
if: |
github.event_name == 'push' ||
github.event_name == 'workflow_dispatch' ||
contains(github.event.pull_request.labels.*.name, 'CICD:build-failpoints-images')
secrets: inherit
with:
GIT_SHA: ${{ needs.determine-docker-build-metadata.outputs.gitSha }}
TARGET_CACHE_ID: ${{ needs.determine-docker-build-metadata.outputs.targetCacheId }}
PROFILE: release
FEATURES: failpoints
BUILD_ADDL_TESTING_IMAGES: true
TARGET_REGISTRY: ${{ needs.determine-docker-build-metadata.outputs.targetRegistry }}
rust-images-performance:
needs: [permission-check, determine-docker-build-metadata]
uses: aptos-labs/aptos-core/.github/workflows/workflow-run-docker-rust-build.yaml@main
if: |
github.event_name == 'push' ||
github.event_name == 'workflow_dispatch' ||
contains(github.event.pull_request.labels.*.name, 'CICD:build-performance-images')
secrets: inherit
with:
GIT_SHA: ${{ needs.determine-docker-build-metadata.outputs.gitSha }}
TARGET_CACHE_ID: ${{ needs.determine-docker-build-metadata.outputs.targetCacheId }}
PROFILE: performance
BUILD_ADDL_TESTING_IMAGES: true
TARGET_REGISTRY: ${{ needs.determine-docker-build-metadata.outputs.targetRegistry }}
rust-images-consensus-only-perf-test:
needs: [permission-check, determine-docker-build-metadata]
if: |
contains(github.event.pull_request.labels.*.name, 'CICD:build-consensus-only-image') ||
contains(github.event.pull_request.labels.*.name, 'CICD:run-consensus-only-perf-test')
uses: aptos-labs/aptos-core/.github/workflows/workflow-run-docker-rust-build.yaml@main
secrets: inherit
with:
GIT_SHA: ${{ needs.determine-docker-build-metadata.outputs.gitSha }}
TARGET_CACHE_ID: ${{ needs.determine-docker-build-metadata.outputs.targetCacheId }}
PROFILE: release
FEATURES: consensus-only-perf-test
BUILD_ADDL_TESTING_IMAGES: true
TARGET_REGISTRY: ${{ needs.determine-docker-build-metadata.outputs.targetRegistry }}
# This is a PR required job.
node-api-compatibility-tests:
needs: [permission-check, rust-images, determine-docker-build-metadata, file_change_determinator] # runs with the default release docker build variant "rust-images"
if: |
(
github.event_name == 'push' ||
github.event_name == 'workflow_dispatch' ||
contains(github.event.pull_request.labels.*.name, 'CICD:run-e2e-tests') ||
github.event.pull_request.auto_merge != null) ||
contains(github.event.pull_request.body, '#e2e'
)
uses: ./.github/workflows/node-api-compatibility-tests.yaml
secrets: inherit
with:
GIT_SHA: ${{ needs.determine-docker-build-metadata.outputs.gitSha }}
SKIP_JOB: ${{ needs.file_change_determinator.outputs.only_docs_changed == 'true' }}
# This is a PR required job.
cli-e2e-tests:
needs: [permission-check, rust-images, determine-docker-build-metadata, file_change_determinator] # runs with the default release docker build variant "rust-images"
if: |
(
github.event_name == 'push' ||
github.event_name == 'workflow_dispatch' ||
contains(github.event.pull_request.labels.*.name, 'CICD:run-e2e-tests') ||
github.event.pull_request.auto_merge != null) ||
contains(github.event.pull_request.body, '#e2e'
)
uses: aptos-labs/aptos-core/.github/workflows/cli-e2e-tests.yaml@main
secrets: inherit
with:
GIT_SHA: ${{ needs.determine-docker-build-metadata.outputs.gitSha }}
SKIP_JOB: ${{ needs.file_change_determinator.outputs.only_docs_changed == 'true' }}
faucet-tests-main:
needs: [permission-check, rust-images, determine-docker-build-metadata, file_change_determinator] # runs with the default release docker build variant "rust-images"
if: |
(
github.event_name == 'push' ||
github.event_name == 'workflow_dispatch' ||
contains(github.event.pull_request.labels.*.name, 'CICD:run-e2e-tests') ||
github.event.pull_request.auto_merge != null) ||
contains(github.event.pull_request.body, '#e2e'
)
uses: ./.github/workflows/faucet-tests-main.yaml
secrets: inherit
with:
GIT_SHA: ${{ needs.determine-docker-build-metadata.outputs.gitSha }}
SKIP_JOB: ${{ needs.file_change_determinator.outputs.only_docs_changed == 'true' }}
# This is a PR required job.
forge-e2e-test:
needs:
- permission-check
- determine-docker-build-metadata
- rust-images
- rust-images-failpoints
- rust-images-performance
- rust-images-consensus-only-perf-test
- file_change_determinator
if: |
!failure() && !cancelled() && needs.permission-check.result == 'success' && (
(github.event_name == 'push' && github.ref_name != 'main') ||
github.event_name == 'workflow_dispatch' ||
contains(github.event.pull_request.labels.*.name, 'CICD:run-e2e-tests') ||
contains(github.event.pull_request.labels.*.name, 'CICD:run-forge-e2e-perf') ||
github.event.pull_request.auto_merge != null ||
contains(github.event.pull_request.body, '#e2e')
)
uses: aptos-labs/aptos-core/.github/workflows/workflow-run-forge.yaml@main
secrets: inherit
with:
GIT_SHA: ${{ needs.determine-docker-build-metadata.outputs.gitSha }}
FORGE_TEST_SUITE: realistic_env_max_load
IMAGE_TAG: ${{ needs.determine-docker-build-metadata.outputs.gitSha }}
FORGE_RUNNER_DURATION_SECS: 480
COMMENT_HEADER: forge-e2e
# Use the cache ID as the Forge namespace so we can limit Forge test concurrency on k8s, since Forge
# test lifecycle is separate from that of GHA. This protects us from the case where many Forge tests are triggered
# by this GHA. If there is a Forge namespace collision, Forge will pre-empt the existing test running in the namespace.
FORGE_NAMESPACE: forge-e2e-${{ needs.determine-docker-build-metadata.outputs.targetCacheId }}
SKIP_JOB: ${{ needs.file_change_determinator.outputs.only_docs_changed == 'true' }}
SEND_RESULTS_TO_TRUNK: true
# This job determines the last released docker image tag, which is used by forge compat test.
fetch-last-released-docker-image-tag:
needs:
- permission-check
# runs only when need to run forge-compat-test or forge-framework-upgrade-test
if: |
!failure() && !cancelled() && needs.permission-check.result == 'success' && (
(github.event_name == 'push' && github.ref_name != 'main') ||
github.event_name == 'workflow_dispatch' ||
contains(github.event.pull_request.labels.*.name, 'CICD:run-e2e-tests') ||
contains(github.event.pull_request.labels.*.name, 'CICD:run-framework-upgrade-test') ||
github.event.pull_request.auto_merge != null ||
contains(github.event.pull_request.body, '#e2e')
)
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 1
ref: ${{ github.ref }}
# actions/get-latest-docker-image-tag requires docker utilities and having authenticated to internal docker image registries
- uses: aptos-labs/aptos-core/.github/actions/docker-setup@main
id: docker-setup
with:
GCP_WORKLOAD_IDENTITY_PROVIDER: ${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }}
GCP_SERVICE_ACCOUNT_EMAIL: ${{ secrets.GCP_SERVICE_ACCOUNT_EMAIL }}
EXPORT_GCP_PROJECT_VARIABLES: "false"
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_DOCKER_ARTIFACT_REPO: ${{ secrets.AWS_DOCKER_ARTIFACT_REPO }}
GIT_CREDENTIALS: ${{ secrets.GIT_CREDENTIALS }}
- name: Get Docker Image Tag
uses: ./.github/actions/determine-or-use-target-branch-and-get-last-released-image
id: get-docker-image-tag
with:
base-branch: ${{ github.base_ref }}
variants: "failpoints performance"
- name: Add Image Tag to Step Summary
run: |
echo "## Image Tag for compat tests" >> $GITHUB_STEP_SUMMARY
echo "IMAGE_TAG: ${{ steps.get-docker-image-tag.outputs.IMAGE_TAG }}" >> $GITHUB_STEP_SUMMARY
echo "TARGET_BRANCH: ${{ steps.get-docker-image-tag.outputs.TARGET_BRANCH }}" >> $GITHUB_STEP_SUMMARY
outputs:
IMAGE_TAG: ${{ steps.get-docker-image-tag.outputs.IMAGE_TAG }}
# Run e2e compat test against testnet branch. This is a PR required job.
forge-compat-test:
needs:
- permission-check
- fetch-last-released-docker-image-tag
- determine-docker-build-metadata
- rust-images
- rust-images-failpoints
- rust-images-performance
- rust-images-consensus-only-perf-test
- file_change_determinator
if: |
!failure() && !cancelled() && needs.permission-check.result == 'success' && (
(github.event_name == 'push' && github.ref_name != 'main') ||
github.event_name == 'workflow_dispatch' ||
contains(github.event.pull_request.labels.*.name, 'CICD:run-e2e-tests') ||
github.event.pull_request.auto_merge != null ||
contains(github.event.pull_request.body, '#e2e')
)
uses: aptos-labs/aptos-core/.github/workflows/workflow-run-forge.yaml@main
secrets: inherit
with:
GIT_SHA: ${{ needs.determine-docker-build-metadata.outputs.gitSha }}
FORGE_TEST_SUITE: compat
IMAGE_TAG: ${{ needs.fetch-last-released-docker-image-tag.outputs.IMAGE_TAG }}
FORGE_RUNNER_DURATION_SECS: 300
COMMENT_HEADER: forge-compat
FORGE_NAMESPACE: forge-compat-${{ needs.determine-docker-build-metadata.outputs.targetCacheId }}
SKIP_JOB: ${{ needs.file_change_determinator.outputs.only_docs_changed == 'true' }}
SEND_RESULTS_TO_TRUNK: true
# Run forge framework upgradability test. This is a PR required job.
forge-framework-upgrade-test:
needs:
- permission-check
- fetch-last-released-docker-image-tag
- determine-docker-build-metadata
- rust-images
- rust-images-failpoints
- rust-images-performance
- rust-images-consensus-only-perf-test
- test-target-determinator
if: |
!failure() && !cancelled() && needs.permission-check.result == 'success' && (
(github.event_name == 'push' && github.ref_name != 'main') ||
github.event_name == 'workflow_dispatch' ||
contains(github.event.pull_request.labels.*.name, 'CICD:run-framework-upgrade-test') ||
github.event.pull_request.auto_merge != null
)
uses: aptos-labs/aptos-core/.github/workflows/workflow-run-forge.yaml@main
secrets: inherit
with:
GIT_SHA: ${{ needs.determine-docker-build-metadata.outputs.gitSha }}
FORGE_TEST_SUITE: framework_upgrade
IMAGE_TAG: ${{ needs.fetch-last-released-docker-image-tag.outputs.IMAGE_TAG }}
FORGE_RUNNER_DURATION_SECS: 3600
COMMENT_HEADER: forge-framework-upgrade
FORGE_NAMESPACE: forge-framework-upgrade-${{ needs.determine-docker-build-metadata.outputs.targetCacheId }}
SKIP_JOB: ${{ !contains(github.event.pull_request.labels.*.name, 'CICD:run-framework-upgrade-test') && (needs.test-target-determinator.outputs.run_framework_upgrade_test == 'false') }}
SEND_RESULTS_TO_TRUNK: true
forge-consensus-only-perf-test:
needs:
- permission-check
- determine-docker-build-metadata
- rust-images
- rust-images-failpoints
- rust-images-performance
- rust-images-consensus-only-perf-test
if: |
!failure() && !cancelled() && needs.permission-check.result == 'success' &&
contains(github.event.pull_request.labels.*.name, 'CICD:run-consensus-only-perf-test')
uses: aptos-labs/aptos-core/.github/workflows/workflow-run-forge.yaml@main
secrets: inherit
with:
GIT_SHA: ${{ needs.determine-docker-build-metadata.outputs.gitSha }}
FORGE_TEST_SUITE: consensus_only_realistic_env_max_tps
IMAGE_TAG: consensus_only_perf_test_${{ needs.determine-docker-build-metadata.outputs.gitSha }}
FORGE_RUNNER_DURATION_SECS: 300
COMMENT_HEADER: consensus-only-realistic-env-max-tps
FORGE_NAMESPACE: forge-consensus-only-realistic-env-max-tps-${{ needs.determine-docker-build-metadata.outputs.targetCacheId }}
# Run forge multiregion test. This test uses the multiregion forge cluster that deploys pods in three GCP regions.
forge-multiregion-test:
needs:
- permission-check
- determine-docker-build-metadata
- rust-images
- rust-images-failpoints
- rust-images-performance
- rust-images-consensus-only-perf-test
if: |
!failure() && !cancelled() && needs.permission-check.result == 'success' &&
contains(github.event.pull_request.labels.*.name, 'CICD:run-multiregion-test')
uses: aptos-labs/aptos-core/.github/workflows/workflow-run-forge.yaml@main
secrets: inherit
with:
GIT_SHA: ${{ needs.determine-docker-build-metadata.outputs.gitSha }}
FORGE_TEST_SUITE: multiregion_benchmark_test
IMAGE_TAG: ${{ needs.determine-docker-build-metadata.outputs.gitSha }}
FORGE_RUNNER_DURATION_SECS: 300
COMMENT_HEADER: forge-multiregion-test
FORGE_NAMESPACE: forge-multiregion-test-${{ needs.determine-docker-build-metadata.outputs.targetCacheId }}
FORGE_CLUSTER_NAME: forge-multiregion