Skip to content

Add the Flyte agent to provision and manage K8s (data) service for deep learning (GNN) use cases #14525

Add the Flyte agent to provision and manage K8s (data) service for deep learning (GNN) use cases

Add the Flyte agent to provision and manage K8s (data) service for deep learning (GNN) use cases #14525

Workflow file for this run

name: Build
# Schedule runs to run twice a day
on:
push:
branches:
- master
- 'release-v**'
pull_request:
schedule:
- cron: "0 13 * * *" # This schedule runs at 1pm UTC every day
env:
FLYTE_SDK_LOGGING_LEVEL: 10 # debug
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number }}
cancel-in-progress: true
jobs:
detect-python-versions:
runs-on: ubuntu-latest
outputs:
python-versions: ${{ env.python_versions }}
steps:
- name: Set Python versions for run
run: |
if [[ ${{ github.event_name }} == "schedule" ]]; then
echo "python_versions=[\"3.9\",\"3.10\",\"3.11\",\"3.12\"]" >> $GITHUB_ENV
else
echo "python_versions=[\"3.9\", \"3.12\"]" >> $GITHUB_ENV
fi
build:
needs:
- detect-python-versions
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
python-version: ${{fromJson(needs.detect-python-versions.outputs.python-versions)}}
steps:
- uses: actions/checkout@v4
- name: "Clear action cache"
uses: ./.github/actions/clear-action-cache
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Cache pip
uses: actions/cache@v3
with:
# This path is specific to Ubuntu
path: ~/.cache/pip
# Look to see if there is a cache hit for the corresponding requirements files
key: ${{ format('{0}-pip-{1}', runner.os, hashFiles('dev-requirements.in', 'requirements.in')) }}
- name: Install dependencies
run: |
pip install uv
make setup-global-uv
uv pip uninstall --system pandas pyarrow
uv pip freeze
- name: Test with coverage
run: |
make unit_test_codecov
- name: Codecov
uses: codecov/[email protected]
with:
fail_ci_if_error: false
files: coverage.xml
build-with-extras:
needs:
- detect-python-versions
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
python-version: ${{fromJson(needs.detect-python-versions.outputs.python-versions)}}
steps:
- uses: actions/checkout@v4
- name: "Clear action cache"
uses: ./.github/actions/clear-action-cache
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Cache pip
uses: actions/cache@v3
with:
# This path is specific to Ubuntu
path: ~/.cache/pip
# Look to see if there is a cache hit for the corresponding requirements files
key: ${{ format('{0}-pip-{1}', runner.os, hashFiles('dev-requirements.in', 'requirements.in')) }}
- name: Install dependencies
run: |
pip install uv
make setup-global-uv
uv pip uninstall --system pandas pyarrow
uv pip freeze
- name: Run extras unit tests with coverage
# Skip this step if running on python 3.12 due to https://github.com/tensorflow/tensorflow/issues/62003
# and https://github.com/pytorch/pytorch/issues/110436
if: ${{ matrix.python-version != '3.12' }}
run: |
make unit_test_extras_codecov
- name: Codecov
uses: codecov/[email protected]
with:
fail_ci_if_error: false
files: coverage.xml
build-with-pandas:
needs:
- detect-python-versions
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest]
python-version: ${{fromJson(needs.detect-python-versions.outputs.python-versions)}}
pandas: ["pandas<2.0.0", "pandas>=2.0.0"]
numpy: ["numpy<2.0.0", "numpy>=2.0.0"]
exclude:
- numpy: "numpy>=2.0.0"
pandas: "pandas<2.0.0"
- numpy: "numpy<2.0.0"
pandas: "pandas>=2.0.0"
- numpy: "numpy>=2.0.0"
python-version: "3.8"
steps:
- uses: actions/checkout@v4
- name: "Clear action cache"
uses: ./.github/actions/clear-action-cache
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Cache pip
uses: actions/cache@v3
with:
# This path is specific to Ubuntu
path: ~/.cache/pip
# Look to see if there is a cache hit for the corresponding requirements files
key: ${{ format('{0}-pip-{1}', runner.os, hashFiles('dev-requirements.in', 'requirements.in')) }}
- name: Install dependencies
run: |
pip install uv
make setup-global-uv
uv pip install --system --force-reinstall "${{ matrix.pandas }}" "${{ matrix.numpy }}"
uv pip freeze
- name: Test with coverage
run: |
make unit_test_codecov
- name: Codecov
uses: codecov/[email protected]
with:
fail_ci_if_error: false
files: coverage.xml
test-serialization:
needs:
- detect-python-versions
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest]
python-version: ${{fromJson(needs.detect-python-versions.outputs.python-versions)}}
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Cache pip
uses: actions/cache@v3
with:
# This path is specific to Ubuntu
path: ~/.cache/pip
# Look to see if there is a cache hit for the corresponding requirements files
key: ${{ format('{0}-pip-{1}', runner.os, hashFiles('dev-requirements.in', 'requirements.in')) }}
- name: Install dependencies
run: |
pip install uv
make setup-global-uv
uv pip freeze
- name: Test with coverage
run: |
make test_serialization_codecov
- name: Codecov
uses: codecov/[email protected]
with:
fail_ci_if_error: false
files: coverage.xml
integration:
needs:
- detect-python-versions
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest]
python-version: ${{fromJson(needs.detect-python-versions.outputs.python-versions)}}
makefile-cmd:
[integration_test_codecov, integration_test_lftransfers_codecov]
steps:
# As described in https://github.com/pypa/setuptools_scm/issues/414, SCM needs git history
# and tags to work.
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: "Clear action cache"
uses: ./.github/actions/clear-action-cache # sandbox has disk pressure, so we need to clear the cache to get more disk space.
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Cache pip
uses: actions/cache@v3
with:
# This path is specific to Ubuntu
path: ~/.cache/pip
# Look to see if there is a cache hit for the corresponding requirements files
key: ${{ format('{0}-pip-{1}', runner.os, hashFiles('dev-requirements.in', 'requirements.in')) }}
- name: Install dependencies
run: |
pip install uv
make setup-global-uv
uv pip freeze
- name: Install FlyteCTL
uses: unionai-oss/flytectl-setup-action@master
- name: Setup Flyte Sandbox
run: |
flytectl demo start
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
with:
driver-opts: |
image=moby/buildkit:master
network=host
- name: Build and push to local registry
uses: docker/build-push-action@v5
with:
context: .
file: Dockerfile.dev
build-args: |
PYTHON_VERSION=${{ matrix.python-version }}
PSEUDO_VERSION=1.999.0dev0
push: true
tags: localhost:30000/flytekit:dev
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Integration Test with coverage
env:
FLYTEKIT_IMAGE: localhost:30000/flytekit:dev
FLYTEKIT_CI: 1
PYTEST_OPTS: -n2
AWS_ENDPOINT_URL: 'http://localhost:30002'
AWS_ACCESS_KEY_ID: minio
AWS_SECRET_ACCESS_KEY: miniostorage
run: |
make ${{ matrix.makefile-cmd }}
- name: Codecov
uses: codecov/[email protected]
with:
fail_ci_if_error: false
build-plugins:
needs:
- build
- detect-python-versions
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: ${{fromJson(needs.detect-python-versions.outputs.python-versions)}}
plugin-names:
# Please maintain an alphabetical order in the following list
- flytekit-airflow
- flytekit-async-fsspec
- flytekit-aws-athena
- flytekit-aws-batch
- flytekit-aws-sagemaker
- flytekit-bigquery
- flytekit-comet-ml
- flytekit-dask
- flytekit-data-fsspec
- flytekit-dbt
- flytekit-deck-standard
# TODO: remove dolt plugin - https://github.com/flyteorg/flyte/issues/5350
# flytekit-dolt
- flytekit-duckdb
- flytekit-envd
- flytekit-flyteinteractive
- flytekit-greatexpectations
- flytekit-hive
- flytekit-huggingface
- flytekit-identity-aware-proxy
- flytekit-inference
- flytekit-k8s-pod
- flytekit-kf-mpi
- flytekit-kf-pytorch
- flytekit-kf-tensorflow
- flytekit-memray
- flytekit-mlflow
- flytekit-mmcloud
- flytekit-modin
- flytekit-neptune
- flytekit-onnx-pytorch
- flytekit-onnx-scikitlearn
# onnx-tensorflow needs a version of tensorflow that does not work with protobuf>4.
# The issue is being tracked on the tensorflow side in https://github.com/tensorflow/tensorflow/issues/53234#issuecomment-1330111693
# flytekit-onnx-tensorflow
- flytekit-omegaconf
- flytekit-openai
- flytekit-pandera
- flytekit-papermill
- flytekit-polars
- flytekit-ray
- flytekit-snowflake
- flytekit-spark
- flytekit-sqlalchemy
- flytekit-vaex
- flytekit-whylogs
exclude:
- python-version: 3.9
plugin-names: "flytekit-aws-sagemaker"
# flytekit-modin depends on ray which does not have a 3.11 wheel yet.
# Issue tracked in https://github.com/ray-project/ray/issues/27881
- python-version: 3.11
plugin-names: "flytekit-modin"
- python-version: 3.11
plugin-names: "flytekit-ray"
# Great-expectations does not support python 3.11 due to sqlachemy>=2.0.0
# not being supported yet:
# https://github.com/great-expectations/great_expectations/issues/7020
- python-version: 3.11
plugin-names: "flytekit-greatexpectations"
# onnxruntime does not support python 3.10 yet
# https://github.com/microsoft/onnxruntime/issues/9782
- python-version: 3.11
plugin-names: "flytekit-onnx-pytorch"
- python-version: 3.11
plugin-names: "flytekit-onnx-scikitlearn"
- python-version: 3.11
plugin-names: "flytekit-onnx-tensorflow"
# vaex currently doesn't support python 3.11
- python-version: 3.11
plugin-names: "flytekit-vaex"
# whylogs does support python 3.11 dataclass restrictions
# See: https://github.com/flyteorg/flytekit/actions/runs/4493746408/jobs/7905368664
- python-version: 3.11
plugin-names: "flytekit-whylogs"
# apache-beam, one of flytekit-airflow dependencies, does not support python 3.12: https://github.com/apache/beam/issues/29149
- python-version: 3.12
plugin-names: "flytekit-airflow"
# Tensorflow is a dependency of flytekit-mlflow tests and that is not supported yet: https://github.com/tensorflow/tensorflow/issues/62003
- python-version: 3.12
plugin-names: "flytekit-mlflow"
# modin[ray] is a dependency needed to run the tests, unfortunately this is not supported in python 3.12 yet
- python-version: 3.12
plugin-names: "flytekit-modin"
# vaex currently doesn't support python 3.12
- python-version: 3.12
plugin-names: "flytekit-vaex"
# Ray does not support python 3.12 yet: https://github.com/ray-project/ray/issues/40211
- python-version: 3.12
plugin-names: "flytekit-ray"
# Segmentation fault on python 3.12: https://github.com/flyteorg/flyte/issues/5020
- python-version: 3.12
plugin-names: "flytekit-kf-pytorch"
steps:
- uses: actions/checkout@v4
- name: "Clear action cache"
uses: ./.github/actions/clear-action-cache
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Cache pip
uses: actions/cache@v3
with:
# This path is specific to Ubuntu
path: ~/.cache/pip
# Look to see if there is a cache hit for the corresponding requirements files
key: ${{ format('{0}-pip-{1}', runner.os, hashFiles('dev-requirements.txt', format('plugins/{0}/requirements.txt', matrix.plugin-names ))) }}
- name: Install dependencies
run: |
pip install uv
# TODO: double-check if checking out all tags solves the issue
export SETUPTOOLS_SCM_PRETEND_VERSION="3.0.0"
make setup-global-uv
cd plugins/${{ matrix.plugin-names }}
uv pip install --system .
if [ -f dev-requirements.in ]; then uv pip install --system -r dev-requirements.in; fi
# TODO: move to protobuf>=5. Github issue: https://github.com/flyteorg/flyte/issues/5448
uv pip install --system -U $GITHUB_WORKSPACE "protobuf<5" "git+https://github.com/flyteorg/flyte.git@master#subdirectory=flyteidl"
# TODO: remove this when numpy v2 in onnx has been resolved
if [[ ${{ matrix.plugin-names }} == *"onnx"* || ${{ matrix.plugin-names }} == "flytekit-sqlalchemy" || ${{ matrix.plugin-names }} == "flytekit-pandera" ]]; then
uv pip install --system "numpy<2.0.0"
fi
uv pip freeze
- name: Test with coverage
run: |
cd plugins/${{ matrix.plugin-names }}
# onnx plugins does not support protobuf>4 yet (in fact it is tensorflow that
# does not support that yet). More details in https://github.com/onnx/onnx/issues/4239.
if [[ ${{ matrix.plugin-names }} == *"onnx"* || ${{ matrix.plugin-names }} == "flytekit-whylogs" || ${{ matrix.plugin-names }} == "flytekit-mlflow" ]]; then
PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python coverage run -m pytest tests --cov=./ --cov-report=xml --cov-append
else
coverage run -m pytest tests --cov=./ --cov-report=xml --cov-append
fi
- name: Codecov
uses: codecov/[email protected]
with:
fail_ci_if_error: false
lint:
runs-on: ubuntu-latest
steps:
- name: Fetch the code
uses: actions/checkout@v4
- name: Set up Python 3.12
uses: actions/setup-python@v4
with:
python-version: 3.12
- uses: actions/cache@v3
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('**/dev-requirements.in') }}
restore-keys: |
${{ runner.os }}-pip-
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install uv
make setup-global-uv
uv pip freeze
- name: Lint
run: |
make lint