[CI] Fix nightly build #2452
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Unit-tests on Linux | |
on: | |
pull_request: | |
push: | |
branches: | |
- nightly | |
- main | |
- release/* | |
workflow_dispatch: | |
env: | |
CHANNEL: "nightly" | |
concurrency: | |
# Documentation suggests ${{ github.head_ref }}, but that's only available on pull_request/pull_request_target triggers, so using ${{ github.ref }}. | |
# On master, we want all builds to complete even if merging happens faster to make it easier to discover at which point something broke. | |
group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && format('ci-master-{0}', github.sha) || format('ci-{0}', github.ref) }} | |
cancel-in-progress: true | |
jobs: | |
test-gpu: | |
strategy: | |
matrix: | |
python_version: ["3.10"] | |
cuda_arch_version: ["12.1"] | |
fail-fast: false | |
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main | |
with: | |
runner: linux.g5.4xlarge.nvidia.gpu | |
repository: pytorch/tensordict | |
gpu-arch-type: cuda | |
gpu-arch-version: ${{ matrix.cuda_arch_version }} | |
script: | | |
# Set env vars from matrix | |
export PYTHON_VERSION=${{ matrix.python_version }} | |
# Commenting these out for now because the GPU test are not working inside docker | |
export CUDA_ARCH_VERSION=${{ matrix.cuda_arch_version }} | |
export CU_VERSION="cu${CUDA_ARCH_VERSION:0:2}${CUDA_ARCH_VERSION:3:1}" | |
export TORCH_VERSION=nightly | |
# Remove the following line when the GPU tests are working inside docker, and uncomment the above lines | |
#export CU_VERSION="cpu" | |
export ARCH=x86_64 | |
echo "PYTHON_VERSION: $PYTHON_VERSION" | |
echo "CU_VERSION: $CU_VERSION" | |
## setup_env.sh | |
bash .github/unittest/linux/scripts/setup_env.sh | |
bash .github/unittest/linux/scripts/install.sh | |
bash .github/unittest/linux/scripts/run_test.sh | |
bash .github/unittest/linux/scripts/post_process.sh | |
test-cpu: | |
strategy: | |
matrix: | |
python_version: ["3.9", "3.10", "3.11", "3.12"] | |
fail-fast: false | |
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main | |
with: | |
runner: linux.12xlarge | |
repository: pytorch/tensordict | |
timeout: 90 | |
script: | | |
# Set env vars from matrix | |
export PYTHON_VERSION=${{ matrix.python_version }} | |
export CU_VERSION="cpu" | |
export TORCH_VERSION=nightly | |
export ARCH=x86_64 | |
echo "PYTHON_VERSION: $PYTHON_VERSION" | |
echo "CU_VERSION: $CU_VERSION" | |
## setup_env.sh | |
bash .github/unittest/linux/scripts/setup_env.sh | |
bash .github/unittest/linux/scripts/install.sh | |
bash .github/unittest/linux/scripts/run_test.sh | |
bash .github/unittest/linux/scripts/post_process.sh | |
test-stable-gpu: | |
strategy: | |
matrix: | |
python_version: ["3.10"] | |
cuda_arch_version: ["12.1"] | |
fail-fast: false | |
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main | |
with: | |
runner: linux.g5.4xlarge.nvidia.gpu | |
repository: pytorch/tensordict | |
gpu-arch-type: cuda | |
gpu-arch-version: ${{ matrix.cuda_arch_version }} | |
timeout: 90 | |
script: | | |
# Set env vars from matrix | |
export PYTHON_VERSION=${{ matrix.python_version }} | |
# Commenting these out for now because the GPU test are not working inside docker | |
export CUDA_ARCH_VERSION=${{ matrix.cuda_arch_version }} | |
export CU_VERSION="cu${CUDA_ARCH_VERSION:0:2}${CUDA_ARCH_VERSION:3:1}" | |
export TORCH_VERSION=stable | |
# Remove the following line when the GPU tests are working inside docker, and uncomment the above lines | |
#export CU_VERSION="cpu" | |
export ARCH=x86_64 | |
echo "PYTHON_VERSION: $PYTHON_VERSION" | |
echo "CU_VERSION: $CU_VERSION" | |
## setup_env.sh | |
bash .github/unittest/linux/scripts/setup_env.sh | |
bash .github/unittest/linux/scripts/install.sh | |
bash .github/unittest/linux/scripts/run_test.sh | |
bash .github/unittest/linux/scripts/post_process.sh | |
test-stable-cpu: | |
strategy: | |
matrix: | |
python_version: ["3.9", "3.12"] | |
fail-fast: false | |
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main | |
with: | |
runner: linux.12xlarge | |
repository: pytorch/tensordict | |
timeout: 90 | |
script: | | |
# Set env vars from matrix | |
export PYTHON_VERSION=${{ matrix.python_version }} | |
export CU_VERSION="cpu" | |
export TORCH_VERSION=stable | |
export ARCH=x86_64 | |
echo "PYTHON_VERSION: $PYTHON_VERSION" | |
echo "CU_VERSION: $CU_VERSION" | |
## setup_env.sh | |
bash .github/unittest/linux/scripts/setup_env.sh | |
bash .github/unittest/linux/scripts/install.sh | |
bash .github/unittest/linux/scripts/run_test.sh | |
bash .github/unittest/linux/scripts/post_process.sh |