Skip to content

Commit

Permalink
Some minor CI cleanup (Lightning-AI#10088)
Browse files Browse the repository at this point in the history
  • Loading branch information
carmocca authored and ninginthecloud committed Oct 27, 2021
1 parent bcad89d commit baa43f6
Show file tree
Hide file tree
Showing 23 changed files with 64 additions and 151 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci_dockers.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ jobs:
fail-fast: false
matrix:
python_version: ["3.7"]
xla_version: ["1.6", "1.8", "nightly"]
xla_version: ["1.6", "1.8", "1.10"]
steps:
- name: Checkout
uses: actions/checkout@v2
Expand Down
27 changes: 8 additions & 19 deletions .github/workflows/ci_test-base.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# this jobs runs `pytest` over the source directory. It does not install any extra dependencies.
# this is useful to catch errors where an import has been added which is not part of the basic dependencies.
name: CI basic testing

# see: https://help.github.com/en/actions/reference/events-that-trigger-workflows
Expand All @@ -13,10 +15,10 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
# max-parallel: 6
matrix:
os: [ubuntu-20.04, windows-2019, macOS-10.15]
python-version: [3.8]
os: [ubuntu-20.04]
# this will install stable torch
python-version: [3.9]

# Timeout: https://stackoverflow.com/a/59076067/4521646
timeout-minutes: 20
Expand All @@ -27,12 +29,6 @@ jobs:
with:
python-version: ${{ matrix.python-version }}

# Github Actions: Run step on specific OS: https://stackoverflow.com/a/57948488/4521646
- name: Setup macOS
if: runner.os == 'macOS'
run: |
brew install libomp # https://github.com/pytorch/pytorch/issues/20030
- name: Weekly reset caching
run: echo "::set-output name=period::$(python -c 'import time ; days = time.time() / 60 / 60 / 24 ; print(int(days / 7))' 2>&1)"
id: times
Expand All @@ -54,21 +50,14 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade --user pip
pip install --requirement ./requirements.txt --find-links https://download.pytorch.org/whl/cpu/torch_stable.html --upgrade
pip install "pytest>6.0" "pytest-cov>2.10" --upgrade-strategy only-if-needed
python --version
python -m pip install --upgrade --user pip
pip --version
pip install --requirement requirements.txt --find-links https://download.pytorch.org/whl/cpu/torch_stable.html --upgrade
pip install --requirement requirements/test.txt
pip list
shell: bash

- name: Cache datasets
uses: actions/cache@v2
with:
path: Datasets # This path is specific to Ubuntu
# Look to see if there is a cache hit for the corresponding requirements file
key: PL-dataset

- name: Test Package [only]
run: |
# NOTE: run coverage on tests does not propagate failure status for Win, https://github.com/nedbat/coveragepy/issues/1003
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ci_test-conda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name: PyTorch & Conda
# see: https://help.github.com/en/actions/reference/events-that-trigger-workflows
on: # Trigger the workflow on push or pull request, but only for the master branch
push:
branches: [ master, "release/*" ]
branches: [master, "release/*"]
pull_request:
branches: [master, "release/*"]

Expand Down
50 changes: 18 additions & 32 deletions .github/workflows/ci_test-full.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,36 +45,24 @@ jobs:
run: echo "::set-output name=period::$(python -c 'import time ; days = time.time() / 60 / 60 / 24 ; print(int(days / 7))' 2>&1)"
id: times

- name: Update pip
- name: Upgrade pip
run: |
python --version
# needed for `pip cache` command
pip install --quiet "pip==20.2" --upgrade --user
pip install --upgrade pip --user
pip --version
# Github Actions: Run step on specific OS: https://stackoverflow.com/a/57948488/4521646
- name: Setup macOS
if: runner.os == 'macOS'
run: |
brew install libomp # https://github.com/pytorch/pytorch/issues/20030
brew install openmpi libuv # Horovod on macOS requires OpenMPI, Gloo not currently supported
- name: Setup Windows
if: runner.os == 'windows'
run: |
python .github/prune-packages.py requirements/extra.txt "horovod"
# todo: re-enable when allow testing py 3.9 with min config, atm some Hydra issues
#- name: Adjust minimal for Python 3.9
# if: matrix.requires == 'minimal' && matrix.python-version == 3.9
# run: |
# import re
# def _req(fname, ptn, ver):
# req = re.sub(ptn, ver, open(fname).read())
# open(fname, 'w').write(req)
#
# _req('requirements.txt', r'torch>=[\d\.]+', 'torch>=1.8.0')
# _req('requirements/extra.txt', r'onnxruntime>=[\d\.]+', 'onnxruntime>=1.7.0')
# shell: python

- name: Set min. dependencies
if: matrix.requires == 'minimal'
run: |
Expand Down Expand Up @@ -104,37 +92,35 @@ jobs:
unzip -o checkpoints.zip
ls -l checkpoints/
# todo: re-enable testing with Horovod
- name: py3.9 - temp skip Horovod
if: matrix.python-version == 3.9
- name: Install dependencies
run: |
# pip uninstall -y horovod
python .github/prune-packages.py requirements/extra.txt "horovod"
flag=$(python -c "print('--pre' if '${{matrix.release}}' == 'pre' else '')" 2>&1)
pip install --requirement requirements.txt --find-links https://download.pytorch.org/whl/cpu/torch_stable.html --upgrade $flag
# adjust versions according installed Torch version
python ./requirements/adjust_versions.py requirements/examples.txt
pip install --requirement requirements/examples.txt --find-links https://download.pytorch.org/whl/cpu/torch_stable.html --upgrade
pip install --requirement requirements/test.txt --upgrade
pip list
shell: bash

- name: Install dependencies
- name: Install extra dependencies
env:
# MAKEFLAGS: "-j2"
HOROVOD_BUILD_ARCH_FLAGS: "-mfma"
HOROVOD_WITHOUT_MXNET: 1
HOROVOD_WITHOUT_TENSORFLOW: 1
run: |
python --version
pip --version
# python -m pip install --upgrade --user pip
flag=$(python -c "print('--pre' if '${{matrix.release}}' == 'pre' else '')" 2>&1)
pip install --requirement requirements.txt --find-links https://download.pytorch.org/whl/cpu/torch_stable.html --upgrade $flag
# adjust versions according installed Torch version
python ./requirements/adjust_versions.py requirements/extra.txt
python ./requirements/adjust_versions.py requirements/examples.txt
pip install --requirement ./requirements/devel.txt --find-links https://download.pytorch.org/whl/cpu/torch_stable.html --upgrade
pip install --requirement ./requirements/extra.txt --find-links https://download.pytorch.org/whl/cpu/torch_stable.html --upgrade
pip list
shell: bash

- name: Reinstall Horovod if necessary
# todo: re-enable horovod on py3.9 when it will be supported
if: runner.os != 'windows' && matrix.python-version != 3.9
if: runner.os != 'windows'
env:
HOROVOD_BUILD_ARCH_FLAGS: "-mfma"
HOROVOD_WITHOUT_MXNET: 1
HOROVOD_WITHOUT_TENSORFLOW: 1
run: |
HOROVOD_BUILT=$(python -c "import horovod.torch; horovod.torch.nccl_built(); print('SUCCESS')" || true)
if [[ $HOROVOD_BUILT != "SUCCESS" ]]; then
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/code-checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
- uses: actions/checkout@master
- uses: actions/setup-python@v2
with:
python-version: 3.8
python-version: 3.9
- name: Install mypy
run: |
grep mypy requirements/test.txt | xargs -0 pip install
Expand Down
6 changes: 2 additions & 4 deletions .github/workflows/docs-checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,9 @@ jobs:
- uses: actions/checkout@v2
with:
submodules: true
# lfs: true
- uses: actions/setup-python@v2
with:
python-version: 3.7
python-version: 3.9

- name: Weekly reset caching
run: echo "::set-output name=period::$(python -c 'import time ; days = time.time() / 60 / 60 / 24 ; print(int(days / 7))' 2>&1)"
Expand Down Expand Up @@ -66,7 +65,7 @@ jobs:
# lfs: true
- uses: actions/setup-python@v2
with:
python-version: 3.7
python-version: 3.9

# Note: This uses an internal pip API and may not always work
# https://github.com/actions/cache/blob/master/examples.md#multiple-oss-in-a-workflow
Expand All @@ -83,7 +82,6 @@ jobs:
sudo apt-get update
sudo apt-get install -y cmake pandoc
pip --version
# pip install --requirement requirements.txt --upgrade-strategy only-if-needed --find-links https://download.pytorch.org/whl/cpu/torch_stable.html --quiet
pip install --requirement requirements/docs.txt
# install Texlive, see https://linuxconfig.org/how-to-install-latex-on-ubuntu-20-04-focal-fossa-linux
sudo apt-get update && sudo apt-get install -y texlive-latex-extra dvipng texlive-pictures
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/events-nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
with:
python-version: 3.8
python-version: 3.9

- name: Install dependencies
run: >-
Expand Down Expand Up @@ -54,7 +54,7 @@ jobs:
fail-fast: false
matrix:
python_version: ["3.7"]
xla_version: ["1.6", "1.7", "1.8", "1.9"] # todo: , "nightly"
xla_version: ["1.6", "1.7", "1.8", "1.9"]

steps:
- name: Checkout
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/legacy-checkpoints.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:

- uses: actions/setup-python@v2
with:
python-version: 3.8
python-version: 3.9

- name: Install dependencies
run: |
Expand Down
24 changes: 1 addition & 23 deletions .github/workflows/release-docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ jobs:

- name: Publish Latest to Docker
uses: docker/[email protected]
# only on releases AND latest PT and py
# only on releases and latest Python and PyTorch
if: matrix.python_version == 3.9 && matrix.pytorch_version == 1.9
with:
repository: pytorchlightning/pytorch_lightning
Expand All @@ -48,25 +48,3 @@ jobs:
build_args: PYTHON_VERSION=${{ matrix.python_version }},PYTORCH_VERSION=${{ matrix.pytorch_version }},LIGHTNING_VERSION=${{ steps.get_version.outputs.RELEASE_VERSION }}
tags: "latest"
timeout-minutes: 55

# nvidia-PL:
# runs-on: ubuntu-20.04
# if: startsWith(github.ref, 'refs/tags/') || github.event_name == 'release'
# steps:
# - name: Checkout
# uses: actions/checkout@v2
#
# - name: Get release version
# id: get_version
# run: echo "::set-output name=RELEASE_VERSION::$(echo ${GITHUB_REF##*/})"
#
# - name: Publish Releases to Docker
# uses: docker/[email protected]
# with:
# repository: nvcr.io/pytorchlightning/pytorch_lightning
# username: ${{ secrets.DOCKER_USERNAME }}
# password: ${{ secrets.DOCKER_PASSWORD }}
# dockerfile: dockers/nvidia/Dockerfile
# build_args: LIGHTNING_VERSION=${{ steps.get_version.outputs.RELEASE_VERSION }}
# tags: "${{ steps.get_version.outputs.RELEASE_VERSION }}-nvidia"
# timeout-minutes: 55
4 changes: 2 additions & 2 deletions .github/workflows/release-pypi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
with:
python-version: 3.8
python-version: 3.9

- name: Install dependencies
run: >-
Expand Down Expand Up @@ -90,7 +90,7 @@ jobs:

- uses: actions/setup-python@v2
with:
python-version: 3.8
python-version: 3.9

- name: Weekly reset caching
run: echo "::set-output name=period::$(python -c 'import time ; days = time.time() / 60 / 60 / 24 ; print(int(days / 7))' 2>&1)"
Expand Down
9 changes: 1 addition & 8 deletions dockers/base-conda/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# Existing images:
# --build-arg PYTHON_VERSION=3.7 --build-arg PYTORCH_VERSION=1.8
# --build-arg PYTHON_VERSION=3.7 --build-arg PYTORCH_VERSION=1.6

ARG CUDA_VERSION=11.1

#FROM ubuntu:20.04
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu18.04

ARG PYTHON_VERSION=3.8
ARG PYTORCH_VERSION=1.6
ARG CONDA_VERSION=4.9.2

SHELL ["/bin/bash", "-c"]
Expand Down Expand Up @@ -70,14 +66,11 @@ ENV \

COPY environment.yml environment.yml

ARG PYTORCH_VERSION=1.6

# conda init
RUN conda update -n base -c defaults conda && \
conda create -y --name $CONDA_ENV python=${PYTHON_VERSION} pytorch=${PYTORCH_VERSION} cudatoolkit=${CUDA_VERSION} -c nvidia -c pytorch -c pytorch-test -c pytorch-nightly && \
conda init bash && \
# NOTE: this requires that the channel is presented in the yaml before packages
# replace channel to nigtly if needed, fix PT version and remove Horovod as it will be installed later
python -c "import re ; fname = 'environment.yml' ; req = re.sub(r'- python[>=]+[\d\.]+', '# - python=${PYTHON_VERSION}', open(fname).read()) ; open(fname, 'w').write(req)" && \
python -c "import re ; fname = 'environment.yml' ; req = re.sub(r'- pytorch[>=]+[\d\.]+', '# - pytorch=${PYTORCH_VERSION}', open(fname).read()) ; open(fname, 'w').write(req)" && \
cat environment.yml && \
Expand Down
14 changes: 1 addition & 13 deletions dockers/base-cuda/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# Existing images:
# --build-arg PYTHON_VERSION=3.7 --build-arg PYTORCH_VERSION=1.7 --build-arg CUDA_VERSION=10.2
# --build-arg PYTHON_VERSION=3.7 --build-arg PYTORCH_VERSION=1.6 --build-arg CUDA_VERSION=10.2

ARG CUDA_VERSION=10.2

# FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu20.04
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu18.04
# FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu18.04

ARG PYTHON_VERSION=3.8
ARG PYTORCH_VERSION=1.6

SHELL ["/bin/bash", "-c"]
# https://techoverflow.net/2019/05/18/how-to-fix-configuring-tzdata-interactive-input-when-building-docker-images/
Expand Down Expand Up @@ -80,8 +75,6 @@ ENV \
COPY ./requirements.txt requirements.txt
COPY ./requirements/ ./requirements/

ARG PYTORCH_VERSION=1.6

# conda init
RUN \
wget https://bootstrap.pypa.io/get-pip.py --progress=bar:force:noscroll --no-check-certificate && \
Expand All @@ -90,15 +83,11 @@ RUN \

# Disable cache
pip config set global.cache-dir false && \
# eventualy use pre-release
#pip install "torch==${PYTORCH_VERSION}.*" --pre && \
# set particular PyTorch version
python ./requirements/adjust_versions.py requirements.txt ${PYTORCH_VERSION} && \
python ./requirements/adjust_versions.py requirements/extra.txt ${PYTORCH_VERSION} && \
python ./requirements/adjust_versions.py requirements/examples.txt ${PYTORCH_VERSION} && \
# Install all requirements
# todo: find a way how to install nightly PT version
# --pre --extra-index-url https://download.pytorch.org/whl/nightly/cu${cuda_ver[0]}${cuda_ver[1]}/torch_nightly.html
pip install -r requirements/devel.txt --no-cache-dir && \
rm -rf requirements.* requirements/

Expand All @@ -112,7 +101,6 @@ RUN \
python -c 'from nvidia.dali.pipeline import Pipeline' ; \
fi


RUN \
# install NVIDIA apex
pip install --no-cache-dir --global-option="--cuda_ext" https://github.com/NVIDIA/apex/archive/refs/heads/master.zip && \
Expand Down
Loading

0 comments on commit baa43f6

Please sign in to comment.