Skip to content

Fix backlog reported by periodicsequence in python and go sdks to not include future outputs #29516

Fix backlog reported by periodicsequence in python and go sdks to not include future outputs

Fix backlog reported by periodicsequence in python and go sdks to not include future outputs #29516

Workflow file for this run

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# To learn more about GitHub Actions in Apache Beam check the CI.md
name: Build python source distribution and wheels
on:
schedule:
- cron: '10 2 * * *'
push:
branches: ['master', 'release-*']
tags: 'v*'
pull_request:
branches: ['master', 'release-*']
tags: 'v*'
paths: ['sdks/python/**', 'model/**', 'release/**']
workflow_dispatch:
# This allows a subsequently queued workflow run to interrupt previous runs
concurrency:
group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.id || github.event.sender.login}}'
cancel-in-progress: true
env:
GCP_PATH: "gs://${{ secrets.GCP_PYTHON_WHEELS_BUCKET }}/${GITHUB_REF##*/}/${GITHUB_SHA}-${GITHUB_RUN_ID}/"
jobs:
check_env_variables:
timeout-minutes: 5
name: "Check environment variables"
runs-on: [self-hosted, ubuntu-20.04, main]
env:
EVENT_NAME: ${{ github.event_name }}
# Keep in sync with py_version matrix value below - if changed, change that as well.
PY_VERSIONS_FULL: "cp38-* cp39-* cp310-* cp311-* cp312-*"
outputs:
gcp-variables-set: ${{ steps.check_gcp_variables.outputs.gcp-variables-set }}
py-versions-full: ${{ steps.set-py-versions.outputs.py-versions-full }}
py-versions-test: ${{ steps.set-py-versions.outputs.py-versions-test }}
steps:
- uses: actions/checkout@v4
- name: "Check are GCP variables set"
run: "./scripts/ci/ci_check_are_gcp_variables_set.sh"
id: check_gcp_variables
env:
GCP_SA_EMAIL: "not used by self hosted runner"
GCP_SA_KEY: "not used by self hosted runner"
GCP_PYTHON_WHEELS_BUCKET: ${{ secrets.GCP_PYTHON_WHEELS_BUCKET }}
GCP_PROJECT_ID: "not-needed-here"
GCP_REGION: "not-needed-here"
GCP_TESTING_BUCKET: "not-needed-here"
- name: Set Python Versions for different environments
id: set-py-versions
run: |
set -xeu
if [ $EVENT_NAME == "pull_request" ]; then
# run highest supported version on pull request.
echo "py-versions-test=${PY_VERSIONS_FULL##* }" >> $GITHUB_OUTPUT
else
# run full version for push and cron jobs.
echo "py-versions-test=$PY_VERSIONS_FULL" >> $GITHUB_OUTPUT
fi
# Output full set of versions so that we can test all languages on pull requests for certain platforms.
echo "py-versions-full=$PY_VERSIONS_FULL" >> $GITHUB_OUTPUT
build_source:
runs-on: [self-hosted, ubuntu-20.04, main]
name: Build python source distribution
outputs:
is_rc: ${{ steps.is_rc.outputs.is_rc }}
rc_num: ${{ steps.get_rc_version.outputs.RC_NUM }}
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install python
uses: actions/setup-python@v5
with:
python-version: 3.9
- name: Get tag
id: get_tag
run: |
echo "TAG=${GITHUB_REF#refs/*/}" >> $GITHUB_OUTPUT
- name: Check whether an -RC tag was applied to the commit.
id: is_rc
run: |
echo ${{ steps.get_tag.outputs.TAG }} > temp
OUTPUT=$( if grep -e '-RC.' -q temp; then echo 1; else echo 0; fi)
echo "is_rc=$OUTPUT" >> $GITHUB_OUTPUT
- name: Get RELEASE_VERSION and RC_NUM
if: steps.is_rc.outputs.is_rc == 1
id: get_rc_version
run: |
RC_NUM=$(sed -n "s/^.*-RC\([0-9]*\)/\1/p" temp)
RELEASE_VERSION=$(sed -n "s/^v\(.*\)-RC[0-9]/\1/p" temp)
echo "RC_NUM=$RC_NUM" >> $GITHUB_OUTPUT
echo "RELEASE_VERSION=$RELEASE_VERSION" >> $GITHUB_OUTPUT
- name: Build source
working-directory: ./sdks/python
run: pip install -U build && python -m build --sdist
- name: Add checksums
working-directory: ./sdks/python/dist
run: |
file=$(ls | grep .tar.gz | head -n 1)
sha512sum $file > ${file}.sha512
- name: Unzip source
working-directory: ./sdks/python
run: tar -xzvf dist/$(ls dist | grep .tar.gz | head -n 1)
- name: Rename source directory
working-directory: ./sdks/python
# https://github.com/pypa/setuptools/issues/4300 changed naming. Match both old and new names.
run: mv $(ls | grep "apache-beam-\|apache_beam-") apache-beam-source
- name: Upload source as artifact
uses: actions/upload-artifact@v4
with:
name: source
path: sdks/python/apache-beam-source
- name: Upload compressed sources as artifacts
uses: actions/upload-artifact@v4
with:
name: source_zip
path: sdks/python/dist
- name: Clear dist
if: steps.is_rc.outputs.is_rc == 1
working-directory: ./sdks/python
run: |
rm -r ./dist
rm -rd apache-beam-source
- name: Rewrite SDK version to include RC number
if: steps.is_rc.outputs.is_rc == 1
working-directory: ./sdks/python
run: |
RELEASE_VERSION=${{ steps.get_rc_version.outputs.RELEASE_VERSION }}
RC_NUM=${{ steps.get_rc_version.outputs.RC_NUM }}
sed -i -e "s/${RELEASE_VERSION}/${RELEASE_VERSION}rc${RC_NUM}/g" apache_beam/version.py
- name: Build RC source
if: steps.is_rc.outputs.is_rc == 1
working-directory: ./sdks/python
run: pip install -U build && python -m build --sdist
- name: Add RC checksums
if: steps.is_rc.outputs.is_rc == 1
working-directory: ./sdks/python/dist
run: |
file=$(ls | grep .tar.gz | head -n 1)
sha512sum $file > ${file}.sha512
- name: Unzip RC source
if: steps.is_rc.outputs.is_rc == 1
working-directory: ./sdks/python
run: tar -xzvf dist/$(ls dist | grep .tar.gz | head -n 1)
- name: Rename RC source directory
if: steps.is_rc.outputs.is_rc == 1
working-directory: ./sdks/python
# https://github.com/pypa/setuptools/issues/4300 changed naming. Match both old and new names.
run: mv $(ls | grep "apache-beam-\|apache_beam-") apache-beam-source-rc
- name: Upload RC source as artifact
if: steps.is_rc.outputs.is_rc == 1
uses: actions/upload-artifact@v4
with:
name: source_rc${{ steps.get_rc_version.outputs.RC_NUM }}
path: sdks/python/apache-beam-source-rc
- name: Upload compressed RC sources as artifacts
if: steps.is_rc.outputs.is_rc == 1
uses: actions/upload-artifact@v4
with:
name: source_zip_rc${{ steps.get_rc_version.outputs.RC_NUM }}
path: sdks/python/dist
prepare_gcs:
name: Prepare GCS
needs:
- build_source
- check_env_variables
runs-on: [self-hosted, ubuntu-20.04, main]
if: needs.check_env_variables.outputs.gcp-variables-set == 'true' && github.event_name != 'pull_request'
steps:
- name: Remove existing files on GCS bucket
run: gsutil rm -r ${{ env.GCP_PATH }} || true
upload_source_to_gcs:
name: Upload python source distribution to GCS bucket
needs:
- prepare_gcs
- check_env_variables
runs-on: [self-hosted, ubuntu-20.04, main]
if: needs.check_env_variables.outputs.gcp-variables-set == 'true'
steps:
- name: Download compressed sources from artifacts
# Pinned to v3 because of https://github.com/actions/download-artifact/issues/249
uses: actions/[email protected]
with:
name: source_zip
path: source/
- name: Copy sources to GCS bucket
run: gsutil cp -r -a public-read source/* ${{ env.GCP_PATH }}
build_wheels:
name: Build python wheels on ${{matrix.arch}} for ${{ matrix.os_python.os }}
needs:
- check_env_variables
- build_source
env:
CIBW_ARCHS_LINUX: ${{matrix.os_python.arch}}
runs-on: ${{ matrix.os_python.runner }}
timeout-minutes: 480
strategy:
matrix:
os_python: [
{"os": "ubuntu-20.04", "runner": [self-hosted, ubuntu-20.04, main], "python": "${{ needs.check_env_variables.outputs.py-versions-full }}", arch: "auto" },
# Temporarily pin to macos-13 because macos-latest breaks this build
# TODO(https://github.com/apache/beam/issues/31114)
{"os": "macos-13", "runner": "macos-13", "python": "${{ needs.check_env_variables.outputs.py-versions-test }}", arch: "auto" },
{"os": "windows-latest", "runner": "windows-latest", "python": "${{ needs.check_env_variables.outputs.py-versions-test }}", arch: "auto" },
{"os": "ubuntu-20.04", "runner": [self-hosted, ubuntu-20.04, main], "python": "${{ needs.check_env_variables.outputs.py-versions-test }}", arch: "aarch64" }
]
# Keep in sync with PY_VERSIONS_FULL env var abvove - if changed, change that as well.
py_version: ["cp38-*", "cp39-*", "cp310-*", "cp311-*", "cp312-*"]
steps:
- name: Download python source distribution from artifacts
# Pinned to v3 because of https://github.com/actions/download-artifact/issues/249
uses: actions/[email protected]
with:
name: source
path: apache-beam-source
- name: Download Python SDK RC source distribution from artifacts
if: ${{ needs.build_source.outputs.is_rc == 1 }}
# Pinned to v3 because of https://github.com/actions/download-artifact/issues/249
uses: actions/[email protected]
with:
name: source_rc${{ needs.build_source.outputs.rc_num }}
path: apache-beam-source-rc
- name: Install Python
uses: actions/setup-python@v5
with:
python-version: 3.9
- uses: docker/setup-qemu-action@v1
if: ${{matrix.os_python.arch == 'aarch64'}}
name: Set up QEMU
- name: Install cibuildwheel
# note: sync cibuildwheel version with gradle task sdks:python:bdistPy* steps
run: pip install cibuildwheel==2.17.0 setuptools
- name: Build wheel
# Only build wheel if it is one of the target versions for this platform, otherwise no-op
if: ${{ contains(matrix.os_python.python, matrix.py_version) }}
working-directory: apache-beam-source
env:
CIBW_BUILD: ${{ matrix.py_version }}
# TODO: https://github.com/apache/beam/issues/23048
CIBW_SKIP: "*-musllinux_*"
CIBW_BEFORE_BUILD: pip install cython==0.29.36 numpy --config-settings=setup-args="-Dallow-noblas=true" && pip install --upgrade setuptools
run: cibuildwheel --print-build-identifiers && cibuildwheel --output-dir wheelhouse
shell: bash
- name: install sha512sum on MacOS
if: startsWith(matrix.os_python.os, 'macos')
run: brew install coreutils
- name: Add checksums
if: ${{ contains(matrix.os_python.python, matrix.py_version) }}
working-directory: apache-beam-source/wheelhouse/
run: |
for file in *.whl; do
sha512sum $file > ${file}.sha512
done
shell: bash
- name: Upload wheels as artifacts
if: ${{ contains(matrix.os_python.python, matrix.py_version) }}
# Pinned to v3 because of https://github.com/actions/upload-artifact?tab=readme-ov-file#breaking-changes
uses: actions/upload-artifact@v3
with:
name: wheelhouse-${{ matrix.os_python.os }}${{ (matrix.os_python.arch == 'aarch64' && '-aarch64') || '' }}
path: apache-beam-source/wheelhouse/
- name: Build RC wheels
# Only build wheel if it is one of the target versions for this platform, otherwise no-op
if: ${{ needs.build_source.outputs.is_rc == 1 && contains(matrix.os_python.python, matrix.py_version) }}
working-directory: apache-beam-source-rc
env:
CIBW_BUILD: ${{ matrix.py_version }}
# TODO: https://github.com/apache/beam/issues/23048
CIBW_SKIP: "*-musllinux_*"
CIBW_BEFORE_BUILD: pip install cython==0.29.36 numpy --config-settings=setup-args="-Dallow-noblas=true" && pip install --upgrade setuptools
run: cibuildwheel --print-build-identifiers && cibuildwheel --output-dir wheelhouse
shell: bash
- name: Add RC checksums
if: ${{ needs.build_source.outputs.is_rc == 1 }}
working-directory: apache-beam-source-rc/wheelhouse/
run: |
for file in *.whl; do
sha512sum $file > ${file}.sha512
done
shell: bash
- name: Upload RC wheels as artifacts
if: ${{ needs.build_source.outputs.is_rc == 1 }}
# Pinned to v3 because of https://github.com/actions/download-artifact/issues/249
uses: actions/upload-artifact@v4
with:
name: wheelhouse-rc${{ needs.build_source.outputs.rc_num }}-${{ matrix.os_python.os }}${{ (matrix.arch == 'aarch64' && '-aarch64') || '' }}
path: apache-beam-source-rc/wheelhouse/
upload_wheels_to_gcs:
name: Upload wheels to GCS bucket
needs:
- build_wheels
- check_env_variables
runs-on: [self-hosted, ubuntu-20.04, main]
if: needs.check_env_variables.outputs.gcp-variables-set == 'true' && github.event_name != 'pull_request'
strategy:
matrix:
# Temporarily pin to macos-13 because macos-latest breaks this build
# TODO(https://github.com/apache/beam/issues/31114)
os : [ubuntu-20.04, macos-13, windows-latest]
arch: [auto]
include:
- os: ubuntu-20.04
arch: aarch64
steps:
- name: Download wheels from artifacts
# Pinned to v3 because of https://github.com/actions/upload-artifact?tab=readme-ov-file#breaking-changes
uses: actions/download-artifact@v3
with:
name: wheelhouse-${{ matrix.os }}${{ (matrix.arch == 'aarch64' && '-aarch64') || '' }}
path: wheelhouse/
- name: Copy wheels to GCS bucket
run: gsutil cp -r -a public-read wheelhouse/* ${{ env.GCP_PATH }}
- name: Create github action information file on GCS bucket
run: |
cat > github_action_info <<EOF
GITHUB_WORKFLOW=$GITHUB_WORKFLOW
GITHUB_RUN_ID=$GITHUB_RUN_ID
GITHUB_RUN_NUMBER=$GITHUB_RUN_NUMBER
GITHUB_ACTION=$GITHUB_ACTION
GITHUB_ACTOR=$GITHUB_ACTOR
GITHUB_REPOSITORY=$GITHUB_REPOSITORY
GITHUB_EVENT_NAME=$GITHUB_EVENT_NAME
GITHUB_SHA=$GITHUB_SHA
GITHUB_REF=$GITHUB_REF
# only for forked repositiories
GITHUB_HEAD_REF=$GITHUB_HEAD_REF
GITHUB_BASE_REF=$GITHUB_BASE_REF
EOF
echo $(cat github_action_info)
gsutil cp -a public-read github_action_info ${{ env.GCP_PATH }}
- name: Upload GitHub event file to GCS bucket
run: gsutil cp -a public-read ${GITHUB_EVENT_PATH} ${{ env.GCP_PATH }}
list_files_on_gcs:
name: List files on Google Cloud Storage Bucket
needs:
- upload_wheels_to_gcs
- check_env_variables
runs-on: [self-hosted, ubuntu-20.04, main]
if: needs.check_env_variables.outputs.gcp-variables-set == 'true' && github.event_name != 'pull_request'
steps:
- name: List file on Google Cloud Storage Bucket
run: gsutil ls "${{ env.GCP_PATH }}*"
branch_repo_nightly:
permissions:
contents: write
name: Branch repo nightly
needs:
- build_source
- build_wheels
runs-on: [self-hosted, ubuntu-20.04, main]
timeout-minutes: 60
if: github.repository_owner == 'apache' && github.event_name == 'schedule'
steps:
- name: Checkout code on master branch
uses: actions/checkout@v4
with:
persist-credentials: false
submodules: recursive
- name: Branch commit
run: |
BRANCH_NAME=${GITHUB_REF##*/}
echo "Updating nightly-${BRANCH_NAME}"
git branch -f nightly-${BRANCH_NAME} HEAD
- name: Push branch
uses: ./.github/actions/github-push-action
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
force: true
branch: nightly-${{ github.ref }}