Skip to content

Commit

Permalink
Refactor pytest workflow to allow for optional tests.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 665287930
  • Loading branch information
fineguy authored and The TensorFlow Datasets Authors committed Aug 20, 2024
1 parent 62c9456 commit 17b828b
Show file tree
Hide file tree
Showing 3 changed files with 168 additions and 123 deletions.
20 changes: 20 additions & 0 deletions .github/workflows/pytest-optional.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
name: Unittests [Optional]

on:
workflow_run:
workflows: [Unittests]
types: [requested]

jobs:
call-pytest:
strategy:
matrix:
tf-version: [tensorflow, tf-nightly]
os-version: [ubuntu-latest, macos-latest]
exclude:
- tf-version: tensorflow
- os-version: ubuntu-latest
uses: ./.github/workflows/pytest-template.yml
with:
tf-version: ${{ matrix.tf-version }}
os-version: ${{ matrix.os-version }}
139 changes: 139 additions & 0 deletions .github/workflows/pytest-template.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
name: Unittests Template

on:
workflow_call:
inputs:
tf-version:
required: true
type: string
os-version:
required: true
type: string
outputs:
activate-tests:
description: "Flag showing whether the tests should be run"
value: ${{ jobs.activate-tests.outputs.status }}

env:
PYTEST_NUM_SHARDS: 4 # Controls tests sharding enabled by `pytest-shard`

jobs:
activate-tests:
name: Check if tests should be run
runs-on: ubuntu-latest

steps:
- name: Check
id: check
# For merged PR, activate testing only on the master branch, based on:
# https://github.community/t/trigger-workflow-only-on-pull-request-merge/17359
run: |
echo "status=${{ github.ref == 'refs/heads/master' || (
github.event.action != 'closed'
&& github.event.pull_request.merged == false
) }}" >> $GITHUB_OUTPUT
outputs:
status: ${{ steps.check.outputs.status }}

shards-job:
needs: activate-tests
if: ${{ needs.activate-tests.outputs.status }}

name: Generate shards
runs-on: ubuntu-latest

steps:
- name: Create variables
id: create-vars
run: |
echo "num-shards=$(jq -n -c '[${{ env.PYTEST_NUM_SHARDS }}]')" >> $GITHUB_OUTPUT
echo "shard-ids=$(jq -n -c '[range(1;${{ env.PYTEST_NUM_SHARDS }}+1)]')" >> $GITHUB_OUTPUT
outputs:
num-shards: ${{ steps.create-vars.outputs.num-shards }}
shard-ids: ${{ steps.create-vars.outputs.shard-ids }}

pytest-job:
needs: shards-job

name: '[${{ matrix.os-version }}][${{ matrix.tf-version }}][Python ${{ matrix.python-version }}][${{ matrix.shard-id }}/${{ matrix.num-shards }}] Core TFDS tests'
runs-on: ${{ matrix.os-version }}
timeout-minutes: 30
strategy:
# Do not cancel in-progress jobs if any matrix job fails.
fail-fast: false
matrix:
tf-version: [${{ inputs.tf-version }}]
# Can't reference env variables in matrix
num-shards: ${{ fromJson(needs.shards-job.outputs.num-shards) }}
shard-id: ${{ fromJson(needs.shards-job.outputs.shard-ids) }}
# TF suppported versions: https://www.tensorflow.org/install/pip#software_requirements
python-version: ['3.10', '3.11', '3.12']
os-version: [${{ inputs.os-version }}]

steps:
- uses: actions/checkout@v3
- uses: ./.github/actions/setup
with:
tf-version: ${{ matrix.tf-version }}
python-version: ${{ matrix.python-version }}

# Run tests
# Ignores:
# * Nsynth is run in isolation due to dependency conflict (crepe).
# * Lsun tests is disabled because the tensorflow_io used in open-source
# is linked to static libraries compiled again specific TF version, which
# makes test fails with linking error (libtensorflow_io_golang.so).
# * imagenet2012_corrupted requires imagemagick binary.
# * import_without_tf_test.py, because the test relies on TensorFlow not being imported.
# * github_api is run separately to not overuse API quota.
# * wmt is run separately to avoid worker hanging.
# * Huggingface requires `datasets` library.
- name: Run core tests
run: |
pytest --durations=100 -vv -n auto --shard-id=$((${{ matrix.shard-id }} - 1)) --num-shards=${{ env.PYTEST_NUM_SHARDS }} \
--ignore="tensorflow_datasets/datasets/nsynth/nsynth_dataset_builder_test.py" \
--ignore="tensorflow_datasets/image/lsun_test.py" \
--ignore="tensorflow_datasets/datasets/imagenet2012_corrupted/imagenet2012_corrupted_dataset_builder_test.py" \
--ignore="tensorflow_datasets/scripts/documentation/build_api_docs_test.py" \
--ignore="tensorflow_datasets/import_without_tf_test.py" \
--ignore="tensorflow_datasets/core/github_api/github_path_test.py" \
--ignore="tensorflow_datasets/translate/wmt19_test.py" \
--ignore="tensorflow_datasets/core/dataset_builders/huggingface_dataset_builder_test.py" \
--ignore="tensorflow_datasets/core/utils/huggingface_utils_test.py"
# Run tests without any pytest plugins. The tests should be triggered for a single shard only.
- name: Run leftover tests
if: ${{ matrix.shard-id == 1 }}
uses: nick-fields/retry@v2
with:
timeout_minutes: 1
max_attempts: 2
retry_on: timeout
command: |
pytest -vv -o faulthandler_timeout=10 tensorflow_datasets/translate/wmt19_test.py
huggingface-pytest-job:
needs: activate-tests
if: ${{ needs.activate-tests.outputs.status }}

# HuggingFace tests need to be run separately because they're disabled without installed
# `datasets` library.
name: 'HuggingFace Python 3.10 tests'
runs-on: ubuntu-latest
timeout-minutes: 30

steps:
- uses: actions/checkout@v3
- uses: ./.github/actions/setup
with:
tf-version: tensorflow
python-version: '3.10'
extras: huggingface

- name: Run HuggingFace tests
run: |
pytest -vv -n auto \
tensorflow_datasets/core/dataset_builders/huggingface_dataset_builder_test.py \
tensorflow_datasets/core/utils/huggingface_utils_test.py
132 changes: 9 additions & 123 deletions .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,129 +33,15 @@ env:
PYTEST_NUM_SHARDS: 4 # Controls tests sharding enabled by `pytest-shard`

jobs:
activate-tests:
name: Check if tests should be run
runs-on: ubuntu-latest

steps:
- name: Check
id: check
# For merged PR, activate testing only on the master branch, based on:
# https://github.community/t/trigger-workflow-only-on-pull-request-merge/17359
run: |
echo "status=${{ github.ref == 'refs/heads/master' || (
github.event.action != 'closed'
&& github.event.pull_request.merged == false
) }}" >> $GITHUB_OUTPUT
outputs:
status: ${{ steps.check.outputs.status }}

shards-job:
needs: activate-tests
if: ${{ needs.activate-tests.outputs.status }}

name: Generate shards
runs-on: ubuntu-latest

steps:
- name: Create variables
id: create-vars
run: |
echo "num-shards=$(jq -n -c '[${{ env.PYTEST_NUM_SHARDS }}]')" >> $GITHUB_OUTPUT
echo "shard-ids=$(jq -n -c '[range(1;${{ env.PYTEST_NUM_SHARDS }}+1)]')" >> $GITHUB_OUTPUT
outputs:
num-shards: ${{ steps.create-vars.outputs.num-shards }}
shard-ids: ${{ steps.create-vars.outputs.shard-ids }}

pytest-job:
needs: shards-job

name: '[${{ matrix.os-version }}][${{ matrix.tf-version }}][Python ${{ matrix.python-version }}][${{ matrix.shard-id }}/${{ matrix.num-shards }}] Core TFDS tests'
runs-on: ${{ matrix.os-version }}
timeout-minutes: 30
strategy:
# Do not cancel in-progress jobs if any matrix job fails.
fail-fast: false
matrix:
tf-version: ['tensorflow']
# Can't reference env variables in matrix
num-shards: ${{ fromJson(needs.shards-job.outputs.num-shards) }}
shard-id: ${{ fromJson(needs.shards-job.outputs.shard-ids) }}
# TF suppported versions: https://www.tensorflow.org/install/pip#software_requirements
python-version: ['3.10', '3.11', '3.12']
os-version: [ubuntu-latest]

steps:
- uses: actions/checkout@v3
- uses: ./.github/actions/setup
with:
tf-version: ${{ matrix.tf-version }}
python-version: ${{ matrix.python-version }}

# Run tests
# Ignores:
# * Nsynth is run in isolation due to dependency conflict (crepe).
# * Lsun tests is disabled because the tensorflow_io used in open-source
# is linked to static libraries compiled again specific TF version, which
# makes test fails with linking error (libtensorflow_io_golang.so).
# * imagenet2012_corrupted requires imagemagick binary.
# * import_without_tf_test.py, because the test relies on TensorFlow not being imported.
# * github_api is run separately to not overuse API quota.
# * wmt is run separately to avoid worker hanging.
# * Huggingface requires `datasets` library.
- name: Run core tests
run: |
pytest --durations=100 -vv -n auto --shard-id=$((${{ matrix.shard-id }} - 1)) --num-shards=${{ env.PYTEST_NUM_SHARDS }} \
--ignore="tensorflow_datasets/datasets/nsynth/nsynth_dataset_builder_test.py" \
--ignore="tensorflow_datasets/image/lsun_test.py" \
--ignore="tensorflow_datasets/datasets/imagenet2012_corrupted/imagenet2012_corrupted_dataset_builder_test.py" \
--ignore="tensorflow_datasets/scripts/documentation/build_api_docs_test.py" \
--ignore="tensorflow_datasets/import_without_tf_test.py" \
--ignore="tensorflow_datasets/core/github_api/github_path_test.py" \
--ignore="tensorflow_datasets/translate/wmt19_test.py" \
--ignore="tensorflow_datasets/core/dataset_builders/huggingface_dataset_builder_test.py" \
--ignore="tensorflow_datasets/core/utils/huggingface_utils_test.py"
# Run tests without any pytest plugins. The tests should be triggered for a single shard only.
- name: Run leftover tests
if: ${{ matrix.shard-id == 1 }}
uses: nick-fields/retry@v2
with:
timeout_minutes: 1
max_attempts: 2
retry_on: timeout
command: |
pytest -vv -o faulthandler_timeout=10 tensorflow_datasets/translate/wmt19_test.py
huggingface-pytest-job:
needs: activate-tests
if: ${{ needs.activate-tests.outputs.status }}

# HuggingFace tests need to be run separately because they're disabled without installed
# `datasets` library.
name: 'HuggingFace Python 3.10 tests'
runs-on: ubuntu-latest
timeout-minutes: 30

steps:
- uses: actions/checkout@v3
- uses: ./.github/actions/setup
with:
tf-version: tensorflow
python-version: '3.10'
extras: huggingface

- name: Run HuggingFace tests
run: |
pytest -vv -n auto \
tensorflow_datasets/core/dataset_builders/huggingface_dataset_builder_test.py \
tensorflow_datasets/core/utils/huggingface_utils_test.py
call-pytest:
uses: ./.github/workflows/pytest-template.yml
with:
tf-version: tensorflow
os-version: ubuntu-latest

githubapi-pytest-job:
needs: activate-tests
if: ${{ needs.activate-tests.outputs.status }}
needs: call-pytest
if: ${{ needs.call-pytest.outputs.activate-tests }}

name: 'Github API tests'
runs-on: ubuntu-latest
Expand All @@ -173,8 +59,8 @@ jobs:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

notebook-test-job:
needs: activate-tests
if: ${{ needs.activate-tests.outputs.status }}
needs: call-pytest
if: ${{ needs.call-pytest.outputs.activate-tests }}

name: 'Notebook tests'
runs-on: ubuntu-latest
Expand Down

0 comments on commit 17b828b

Please sign in to comment.