From fe2c83a5232eb711db4732fb6b537020f252bc31 Mon Sep 17 00:00:00 2001 From: HolyWu Date: Wed, 4 Dec 2024 23:50:32 +0800 Subject: [PATCH 01/11] Get decompositions only for CIA ops (#3297) --- py/torch_tensorrt/dynamo/lowering/_decompositions.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/py/torch_tensorrt/dynamo/lowering/_decompositions.py b/py/torch_tensorrt/dynamo/lowering/_decompositions.py index 0c4e19d902..945d1d0d2a 100644 --- a/py/torch_tensorrt/dynamo/lowering/_decompositions.py +++ b/py/torch_tensorrt/dynamo/lowering/_decompositions.py @@ -4,8 +4,11 @@ import torch from torch._decomp import register_decomposition +from torch._export.utils import ( + _collect_all_valid_cia_ops_for_aten_namespace, + _get_decomp_for_cia, +) from torch._ops import OpOverload -from torch.export import default_decompositions from torch_tensorrt.dynamo._defaults import default_device from torch_tensorrt.dynamo.conversion.converter_utils import get_positive_dim from torch_tensorrt.dynamo.utils import to_torch_device @@ -432,7 +435,10 @@ def get_decompositions( return {**CORE_ATEN_DECOMPOSITIONS_FILTERED, **TORCH_TRT_DECOMPOSITIONS} else: # changes made here due to torch2.6 changes https://github.com/pytorch/pytorch/pull/135080 - decomp_table = default_decompositions() + decomp_table = {} + for op in _collect_all_valid_cia_ops_for_aten_namespace(): + decomp_table[op] = _get_decomp_for_cia(op) + DECOMP_TABLE_FILTERED: Dict[OpOverload, Callable[[Any], Any]] = { decomp: decomp_table[decomp] for decomp in decomp_table From f37151cadbe0539da1faf661db6c0075fc1998b5 Mon Sep 17 00:00:00 2001 From: Naren Dasan <1790613+narendasan@users.noreply.github.com> Date: Wed, 4 Dec 2024 09:05:06 -0700 Subject: [PATCH 02/11] Update docgen.yml base container Updated to pytorch/manylinux2_28-builder:cuda12.6 --- .github/workflows/docgen.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docgen.yml b/.github/workflows/docgen.yml index 593c4e90f0..29f8e83679 100644 --- a/.github/workflows/docgen.yml +++ b/.github/workflows/docgen.yml @@ -14,7 +14,7 @@ jobs: if: ${{ ! contains(github.actor, 'pytorchbot') }} environment: pytorchbot-env container: - image: docker.io/pytorch/manylinux-builder:cuda12.4 + image: docker.io/pytorch/manylinux2_28-builder:cuda12.6 options: --gpus all env: CUDA_HOME: /usr/local/cuda-12.4 From b1f01b20612c74ec0b5dfb82ee57608d4564921d Mon Sep 17 00:00:00 2001 From: Naren Dasan <1790613+narendasan@users.noreply.github.com> Date: Wed, 4 Dec 2024 09:51:22 -0700 Subject: [PATCH 03/11] Update docgen.yml --- .github/workflows/docgen.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/docgen.yml b/.github/workflows/docgen.yml index 29f8e83679..df8422e260 100644 --- a/.github/workflows/docgen.yml +++ b/.github/workflows/docgen.yml @@ -17,9 +17,9 @@ jobs: image: docker.io/pytorch/manylinux2_28-builder:cuda12.6 options: --gpus all env: - CUDA_HOME: /usr/local/cuda-12.4 - VERSION_SUFFIX: cu124 - CU_VERSION: cu124 + CUDA_HOME: /usr/local/cuda-12.6 + VERSION_SUFFIX: cu126 + CU_VERSION: cu126 CHANNEL: nightly CI_BUILD: 1 steps: From 13a2f64720b6ed4028d472d5d466318f9eb80ea3 Mon Sep 17 00:00:00 2001 From: Naren Dasan <1790613+narendasan@users.noreply.github.com> Date: Wed, 4 Dec 2024 10:45:35 -0700 Subject: [PATCH 04/11] Update docgen.yml --- .github/workflows/docgen.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/docgen.yml b/.github/workflows/docgen.yml index df8422e260..ce7ffef5d6 100644 --- a/.github/workflows/docgen.yml +++ b/.github/workflows/docgen.yml @@ -46,7 +46,9 @@ jobs: - name: Generate New Docs run: | cd docsrc - yum install -y doxygen pandoc + dnf clean all + dnf update + dnf install -y doxygen pandoc python3 -m pip install -r requirements.txt python3 -c "import torch_tensorrt; print(torch_tensorrt.__version__)" make html From 7f22d7bf413ae3aaa38002e3cbac98a13121de42 Mon Sep 17 00:00:00 2001 From: Naren Dasan <1790613+narendasan@users.noreply.github.com> Date: Wed, 4 Dec 2024 11:53:40 -0700 Subject: [PATCH 05/11] Update docgen.yml --- .github/workflows/docgen.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docgen.yml b/.github/workflows/docgen.yml index ce7ffef5d6..72da08949b 100644 --- a/.github/workflows/docgen.yml +++ b/.github/workflows/docgen.yml @@ -48,7 +48,7 @@ jobs: cd docsrc dnf clean all dnf update - dnf install -y doxygen pandoc + dnf install -y doxygen pandoc --allowerasing python3 -m pip install -r requirements.txt python3 -c "import torch_tensorrt; print(torch_tensorrt.__version__)" make html From 5cca6522cedd9ff56101f4f9c8f7f6db108f23f3 Mon Sep 17 00:00:00 2001 From: Hoonkyung Cho Date: Thu, 5 Dec 2024 21:16:40 +0900 Subject: [PATCH 06/11] fix: cumsum add_constant bug fix (add dtype for np zeros) (#3258) Co-authored-by: Hoonkyung Cho --- .../dynamo/conversion/impl/slice/ops.py | 2 +- py/torch_tensorrt/dynamo/utils.py | 26 +++++++++++++++++-- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/py/torch_tensorrt/dynamo/conversion/impl/slice/ops.py b/py/torch_tensorrt/dynamo/conversion/impl/slice/ops.py index b58435b489..3274d78c2b 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/slice/ops.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/slice/ops.py @@ -370,7 +370,7 @@ def cumsum( ) else: new_dims = tuple(data.shape) - zeros = np.zeros(new_dims) + zeros = np.zeros(new_dims, dtype=np.float32) zero_trttensor = get_trt_tensor(ctx, zeros, f"{name}_initial_value") running_sum = loop.add_recurrence(zero_trttensor) diff --git a/py/torch_tensorrt/dynamo/utils.py b/py/torch_tensorrt/dynamo/utils.py index 85aa663809..95e5f30e4d 100644 --- a/py/torch_tensorrt/dynamo/utils.py +++ b/py/torch_tensorrt/dynamo/utils.py @@ -1,6 +1,7 @@ from __future__ import annotations import logging +import warnings from dataclasses import fields, replace from enum import Enum from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union @@ -10,6 +11,8 @@ import tensorrt as trt import torch from torch._subclasses.fake_tensor import FakeTensor + +from packaging import version from torch_tensorrt._Device import Device from torch_tensorrt._enums import dtype from torch_tensorrt._features import ENABLED_FEATURES @@ -19,8 +22,6 @@ from torch_tensorrt.dynamo._engine_cache import BaseEngineCache from torch_tensorrt.dynamo._settings import CompilationSettings -from packaging import version - from .types import TRTDataType logger = logging.getLogger(__name__) @@ -494,6 +495,27 @@ def parse_dynamo_kwargs( if "options" in kwargs and len(kwargs) == 1: kwargs = kwargs["options"] + if "truncate_long_and_double" in kwargs: + if ( + "truncate_double" in kwargs + and kwargs["truncate_double"] is not _defaults.TRUNCATE_DOUBLE + ): + raise ValueError( + 'Provided configuration for "truncate_double" and deprecated API "truncate_long_and_double". ' + 'Please only use "truncate_double".' + ) + else: + kwargs["truncate_double"] = kwargs["truncate_long_and_double"] + warnings.warn( + 'Compiler option "truncate_long_and_double" is deprecated in favor of "truncate_double" as int64 is now natively supported. ' + "This option will be removed in the next version.", + DeprecationWarning, + stacklevel=2, + ) + del kwargs[ + "truncate_long_and_double" + ] # Remove deprecated key after handling + valid_attrs = {attr.name for attr in fields(settings)} valid_kwargs = {k: v for k, v in kwargs.items() if k in valid_attrs} settings = replace(settings, **valid_kwargs) From 6f0b5be8a55ce0bbedb40e6e86a5741c67f31eac Mon Sep 17 00:00:00 2001 From: Naren Dasan <1790613+narendasan@users.noreply.github.com> Date: Thu, 5 Dec 2024 12:04:12 -0700 Subject: [PATCH 07/11] Update docgen.yml --- .github/workflows/docgen.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docgen.yml b/.github/workflows/docgen.yml index 72da08949b..fe2220fbbb 100644 --- a/.github/workflows/docgen.yml +++ b/.github/workflows/docgen.yml @@ -48,7 +48,7 @@ jobs: cd docsrc dnf clean all dnf update - dnf install -y doxygen pandoc --allowerasing + dnf install -y doxygen pandoc --allowerasing --nobest python3 -m pip install -r requirements.txt python3 -c "import torch_tensorrt; print(torch_tensorrt.__version__)" make html From 1af20d33cf03deb716fba4013844678d09c05653 Mon Sep 17 00:00:00 2001 From: Naren Dasan Date: Thu, 5 Dec 2024 13:34:17 -0800 Subject: [PATCH 08/11] chore: update install doxygen in the docs container Signed-off-by: Naren Dasan Signed-off-by: Naren Dasan --- .github/workflows/docgen.yml | 115 ++++++++++++++++++----------------- 1 file changed, 59 insertions(+), 56 deletions(-) diff --git a/.github/workflows/docgen.yml b/.github/workflows/docgen.yml index fe2220fbbb..8002e0bcac 100644 --- a/.github/workflows/docgen.yml +++ b/.github/workflows/docgen.yml @@ -5,64 +5,67 @@ name: Generate Docs # Controls when the action will run. Triggers the workflow on push or pull request # events but only for the master branch on: - push: - branches: [ main ] + push: + branches: [main] jobs: - build-docs: - runs-on: linux.g5.4xlarge.nvidia.gpu - if: ${{ ! contains(github.actor, 'pytorchbot') }} - environment: pytorchbot-env - container: - image: docker.io/pytorch/manylinux2_28-builder:cuda12.6 - options: --gpus all - env: - CUDA_HOME: /usr/local/cuda-12.6 - VERSION_SUFFIX: cu126 - CU_VERSION: cu126 - CHANNEL: nightly - CI_BUILD: 1 - steps: - - uses: actions/checkout@v3 - with: - ref: ${{github.head_ref}} - token: ${{ secrets.GH_PYTORCHBOT_TOKEN }} - - name: Select Python / CUDA - run: | - git config --global --add safe.directory /__w/TensorRT/TensorRT - echo "/opt/python/cp311-cp311/bin/" >> $GITHUB_PATH + build-docs: + runs-on: linux.g5.4xlarge.nvidia.gpu + if: ${{ ! contains(github.actor, 'pytorchbot') }} + environment: pytorchbot-env + container: + image: docker.io/pytorch/manylinux2_28-builder:cuda12.6 + options: --gpus all + env: + CUDA_HOME: /usr/local/cuda-12.6 + VERSION_SUFFIX: cu126 + CU_VERSION: cu126 + CHANNEL: nightly + CI_BUILD: 1 + steps: + - uses: actions/checkout@v3 + with: + ref: ${{github.head_ref}} + token: ${{ secrets.GH_PYTORCHBOT_TOKEN }} + - name: Select Python / CUDA + run: | + git config --global --add safe.directory /__w/TensorRT/TensorRT + echo "/opt/python/cp311-cp311/bin/" >> $GITHUB_PATH - - name: Install base deps - run: | - python3 -m pip install pip --upgrade - python3 -m pip install pyyaml numpy torch --pre --extra-index-url https://download.pytorch.org/whl/nightly/cu124 - ./packaging/pre_build_script.sh - - name: Get HEAD SHA - id: vars - run: echo "sha=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT - - name: Build Python Package - run: | - python3 -m pip install --pre . --extra-index-url https://download.pytorch.org/whl/nightly/cu124 - - name: Generate New Docs - run: | - cd docsrc - dnf clean all - dnf update - dnf install -y doxygen pandoc --allowerasing --nobest - python3 -m pip install -r requirements.txt - python3 -c "import torch_tensorrt; print(torch_tensorrt.__version__)" - make html - cd .. - - uses: stefanzweifel/git-auto-commit-action@v4 - with: - # Required - commit_message: "docs: [Automated] Regenerating documenation for ${{ steps.vars.outputs.sha }}" - commit_options: "--no-verify --signoff" - file_pattern: docs/ - commit_user_name: Torch-TensorRT Github Bot - commit_user_email: torch-tensorrt.github.bot@nvidia.com - commit_author: Torch-TensorRT Github Bot + - name: Install base deps + run: | + python3 -m pip install pip --upgrade + python3 -m pip install pyyaml numpy torch --pre --extra-index-url https://download.pytorch.org/whl/nightly/cu124 + ./packaging/pre_build_script.sh + - name: Get HEAD SHA + id: vars + run: echo "sha=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT + - name: Build Python Package + run: | + python3 -m pip install --pre . --extra-index-url https://download.pytorch.org/whl/nightly/cu124 + - name: Generate New Docs + run: | + cd docsrc + dnf clean all + dnf makecache --refresh + dnf install yum-utils -y + dnf config-manager --set-enabled powertools + dnf update --skip-broken --nobest + dnf install -y doxygen pandoc + python3 -m pip install -r requirements.txt + python3 -c "import torch_tensorrt; print(torch_tensorrt.__version__)" + make html + cd .. + - uses: stefanzweifel/git-auto-commit-action@v4 + with: + # Required + commit_message: "docs: [Automated] Regenerating documenation for ${{ steps.vars.outputs.sha }}" + commit_options: "--no-verify --signoff" + file_pattern: docs/ + commit_user_name: Torch-TensorRT Github Bot + commit_user_email: torch-tensorrt.github.bot@nvidia.com + commit_author: Torch-TensorRT Github Bot concurrency: - group: ${{ github.workflow }}-${{ github.ref_name }} - cancel-in-progress: true + group: ${{ github.workflow }}-${{ github.ref_name }} + cancel-in-progress: true From 62d4fcb041040e5d12cccd2abf2474a1398804c1 Mon Sep 17 00:00:00 2001 From: Naren Dasan Date: Thu, 5 Dec 2024 13:54:01 -0800 Subject: [PATCH 09/11] chore: dropped -y in update Signed-off-by: Naren Dasan Signed-off-by: Naren Dasan --- .github/workflows/docgen.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docgen.yml b/.github/workflows/docgen.yml index 8002e0bcac..bb6adc1034 100644 --- a/.github/workflows/docgen.yml +++ b/.github/workflows/docgen.yml @@ -50,7 +50,7 @@ jobs: dnf makecache --refresh dnf install yum-utils -y dnf config-manager --set-enabled powertools - dnf update --skip-broken --nobest + dnf update --skip-broken --nobest -y dnf install -y doxygen pandoc python3 -m pip install -r requirements.txt python3 -c "import torch_tensorrt; print(torch_tensorrt.__version__)" From 5dbb84835512129ee55ab5deebfff7df7254a0f3 Mon Sep 17 00:00:00 2001 From: "Zewen (Evan) Li" Date: Fri, 6 Dec 2024 09:47:58 -0800 Subject: [PATCH 10/11] fix: change docker img from manylinux to manylinux2_28 for all CUDA versions (#3312) --- .../scripts/generate_binary_build_matrix.py | 701 ++++++++++++++++++ .github/workflows/build-test-linux.yml | 2 +- .../workflows/build-test-tensorrt-linux.yml | 2 +- .../workflows/build-test-tensorrt-windows.yml | 2 +- .github/workflows/build-test-windows.yml | 2 +- .../generate_binary_build_matrix.yml | 112 +++ .github/workflows/release-linux.yml | 2 +- .github/workflows/release-windows.yml | 2 +- 8 files changed, 819 insertions(+), 6 deletions(-) create mode 100644 .github/scripts/generate_binary_build_matrix.py create mode 100644 .github/workflows/generate_binary_build_matrix.yml diff --git a/.github/scripts/generate_binary_build_matrix.py b/.github/scripts/generate_binary_build_matrix.py new file mode 100644 index 0000000000..4ba7e0faeb --- /dev/null +++ b/.github/scripts/generate_binary_build_matrix.py @@ -0,0 +1,701 @@ +#!/usr/bin/env python3 + +"""Generates a matrix to be utilized through github actions + +Will output a condensed version of the matrix if on a pull request that only +includes the latest version of python we support built on four different +architectures: + * CPU + * Latest CUDA + * Latest ROCM + * Latest XPU +""" + + +import argparse +import json +import os +import sys +from typing import Any, Callable, Dict, List, Optional, Tuple + +PYTHON_ARCHES_DICT = { + "nightly": ["3.9", "3.10", "3.11", "3.12"], + "test": ["3.9", "3.10", "3.11", "3.12"], + "release": ["3.9", "3.10", "3.11", "3.12"], +} +CUDA_ARCHES_DICT = { + "nightly": ["11.8", "12.4", "12.6"], + "test": ["11.8", "12.1", "12.4"], + "release": ["11.8", "12.1", "12.4"], +} +ROCM_ARCHES_DICT = { + "nightly": ["6.1", "6.2"], + "test": ["6.1", "6.2"], + "release": ["6.1", "6.2"], +} + +CUDA_CUDDN_VERSIONS = { + "11.8": {"cuda": "11.8.0", "cudnn": "9"}, + "12.1": {"cuda": "12.1.1", "cudnn": "9"}, + "12.4": {"cuda": "12.4.1", "cudnn": "9"}, + "12.6": {"cuda": "12.6.2", "cudnn": "9"}, +} + +PACKAGE_TYPES = ["wheel", "conda", "libtorch"] +PRE_CXX11_ABI = "pre-cxx11" +CXX11_ABI = "cxx11-abi" +RELEASE = "release" +DEBUG = "debug" +NIGHTLY = "nightly" +TEST = "test" + +# OS constants +LINUX = "linux" +LINUX_AARCH64 = "linux-aarch64" +MACOS_ARM64 = "macos-arm64" +WINDOWS = "windows" + +# Accelerator architectures +CPU = "cpu" +CPU_AARCH64 = "cpu-aarch64" +CUDA_AARCH64 = "cuda-aarch64" +CUDA = "cuda" +ROCM = "rocm" +XPU = "xpu" + + +CURRENT_NIGHTLY_VERSION = "2.6.0" +CURRENT_CANDIDATE_VERSION = "2.5.1" +CURRENT_STABLE_VERSION = "2.5.1" +CURRENT_VERSION = CURRENT_STABLE_VERSION + +# By default use Nightly for CUDA arches +CUDA_ARCHES = CUDA_ARCHES_DICT[NIGHTLY] +ROCM_ARCHES = ROCM_ARCHES_DICT[NIGHTLY] +PYTHON_ARCHES = PYTHON_ARCHES_DICT[NIGHTLY] + +# Container images +LIBTORCH_CONTAINER_IMAGES: Dict[Tuple[str, str], str] +WHEEL_CONTAINER_IMAGES: Dict[str, str] + +LINUX_GPU_RUNNER = "linux.g5.4xlarge.nvidia.gpu" +LINUX_CPU_RUNNER = "linux.2xlarge" +LINUX_AARCH64_RUNNER = "linux.arm64.2xlarge" +LINUX_AARCH64_GPU_RUNNER = "linux.arm64.m7g.4xlarge" +WIN_GPU_RUNNER = "windows.g4dn.xlarge" +WIN_CPU_RUNNER = "windows.4xlarge" +MACOS_M1_RUNNER = "macos-m1-stable" + +PACKAGES_TO_INSTALL_WHL = "torch torchvision torchaudio" +WHL_INSTALL_BASE = "pip3 install" +DOWNLOAD_URL_BASE = "https://download.pytorch.org" + +ENABLE = "enable" +DISABLE = "disable" + + +def arch_type(arch_version: str) -> str: + if arch_version in CUDA_ARCHES: + return CUDA + elif arch_version in ROCM_ARCHES: + return ROCM + elif arch_version == CPU_AARCH64: + return CPU_AARCH64 + elif arch_version == CUDA_AARCH64: + return CUDA_AARCH64 + elif arch_version == XPU: + return XPU + else: # arch_version should always be CPU in this case + return CPU + + +def validation_runner(arch_type: str, os: str) -> str: + if os == LINUX: + if arch_type == CUDA: + return LINUX_GPU_RUNNER + else: + return LINUX_CPU_RUNNER + elif os == LINUX_AARCH64: + if arch_type == CUDA_AARCH64: + return LINUX_AARCH64_GPU_RUNNER + else: + return LINUX_AARCH64_RUNNER + elif os == WINDOWS: + if arch_type == CUDA: + return WIN_GPU_RUNNER + else: + return WIN_CPU_RUNNER + elif os == MACOS_ARM64: + return MACOS_M1_RUNNER + else: # default to linux cpu runner + return LINUX_CPU_RUNNER + + +def initialize_globals(channel: str, build_python_only: bool) -> None: + global CURRENT_VERSION, CUDA_ARCHES, ROCM_ARCHES, PYTHON_ARCHES + global WHEEL_CONTAINER_IMAGES, LIBTORCH_CONTAINER_IMAGES + if channel == TEST: + CURRENT_VERSION = CURRENT_CANDIDATE_VERSION + else: + CURRENT_VERSION = CURRENT_STABLE_VERSION + + CUDA_ARCHES = CUDA_ARCHES_DICT[channel] + ROCM_ARCHES = ROCM_ARCHES_DICT[channel] + if build_python_only: + # Only select the oldest version of python if building a python only package + PYTHON_ARCHES = [PYTHON_ARCHES_DICT[channel][0]] + else: + PYTHON_ARCHES = PYTHON_ARCHES_DICT[channel] + WHEEL_CONTAINER_IMAGES = { + "11.8": "pytorch/manylinux2_28-builder:cuda11.8", + "12.1": "pytorch/manylinux2_28-builder:cuda12.1", + "12.4": "pytorch/manylinux2_28-builder:cuda12.4", + "12.6": "pytorch/manylinux2_28-builder:cuda12.6", + **{ + gpu_arch: f"pytorch/manylinux-builder:rocm{gpu_arch}" + for gpu_arch in ROCM_ARCHES + }, + CPU: "pytorch/manylinux-builder:cpu", + XPU: "pytorch/manylinux2_28-builder:xpu", + # TODO: Migrate CUDA_AARCH64 image to manylinux2_28_aarch64-builder:cuda12.4 + CPU_AARCH64: "pytorch/manylinux2_28_aarch64-builder:cpu-aarch64", + CUDA_AARCH64: "pytorch/manylinuxaarch64-builder:cuda12.4", + } + LIBTORCH_CONTAINER_IMAGES = { + **{ + (gpu_arch, PRE_CXX11_ABI): f"pytorch/manylinux-builder:cuda{gpu_arch}" + for gpu_arch in CUDA_ARCHES + }, + **{ + (gpu_arch, CXX11_ABI): f"pytorch/libtorch-cxx11-builder:cuda{gpu_arch}" + for gpu_arch in CUDA_ARCHES + }, + **{ + (gpu_arch, PRE_CXX11_ABI): f"pytorch/manylinux-builder:rocm{gpu_arch}" + for gpu_arch in ROCM_ARCHES + }, + **{ + (gpu_arch, CXX11_ABI): f"pytorch/libtorch-cxx11-builder:rocm{gpu_arch}" + for gpu_arch in ROCM_ARCHES + }, + (CPU, PRE_CXX11_ABI): "pytorch/manylinux-builder:cpu", + (CPU, CXX11_ABI): "pytorch/libtorch-cxx11-builder:cpu", + } + + +def translate_desired_cuda(gpu_arch_type: str, gpu_arch_version: str) -> str: + return { + CPU: "cpu", + CPU_AARCH64: CPU, + CUDA_AARCH64: "cu124", + CUDA: f"cu{gpu_arch_version.replace('.', '')}", + ROCM: f"rocm{gpu_arch_version}", + XPU: "xpu", + }.get(gpu_arch_type, gpu_arch_version) + + +def list_without(in_list: List[str], without: List[str]) -> List[str]: + return [item for item in in_list if item not in without] + + +def get_base_download_url_for_repo( + repo: str, channel: str, gpu_arch_type: str, desired_cuda: str +) -> str: + base_url_for_type = f"{DOWNLOAD_URL_BASE}/{repo}" + base_url_for_type = ( + base_url_for_type if channel == RELEASE else f"{base_url_for_type}/{channel}" + ) + + if gpu_arch_type != CPU: + base_url_for_type = f"{base_url_for_type}/{desired_cuda}" + else: + base_url_for_type = f"{base_url_for_type}/{gpu_arch_type}" + + return base_url_for_type + + +def get_libtorch_install_command( + os: str, + channel: str, + gpu_arch_type: str, + libtorch_variant: str, + devtoolset: str, + desired_cuda: str, + libtorch_config: str, +) -> str: + prefix = "libtorch" if os != WINDOWS else "libtorch-win" + _libtorch_variant = ( + f"{libtorch_variant}-{libtorch_config}" + if libtorch_config == "debug" + else libtorch_variant + ) + build_name = ( + f"{prefix}-{devtoolset}-{_libtorch_variant}-latest.zip" + if devtoolset == "cxx11-abi" + else f"{prefix}-{_libtorch_variant}-latest.zip" + ) + + if os == MACOS_ARM64: + arch = "arm64" + build_name = f"libtorch-macos-{arch}-latest.zip" + if channel in [RELEASE, TEST]: + build_name = f"libtorch-macos-{arch}-{CURRENT_VERSION}.zip" + + elif os == LINUX and (channel in (RELEASE, TEST)): + build_name = ( + f"{prefix}-{devtoolset}-{_libtorch_variant}-{CURRENT_VERSION}%2B{desired_cuda}.zip" + if devtoolset == "cxx11-abi" + else f"{prefix}-{_libtorch_variant}-{CURRENT_VERSION}%2B{desired_cuda}.zip" + ) + elif os == WINDOWS and (channel in (RELEASE, TEST)): + build_name = ( + f"{prefix}-shared-with-deps-debug-{CURRENT_VERSION}%2B{desired_cuda}.zip" + if libtorch_config == "debug" + else f"{prefix}-shared-with-deps-{CURRENT_VERSION}%2B{desired_cuda}.zip" + ) + elif os == WINDOWS and channel == NIGHTLY: + build_name = ( + f"{prefix}-shared-with-deps-debug-latest.zip" + if libtorch_config == "debug" + else f"{prefix}-shared-with-deps-latest.zip" + ) + + return f"{get_base_download_url_for_repo('libtorch', channel, gpu_arch_type, desired_cuda)}/{build_name}" + + +def get_wheel_install_command( + os: str, + channel: str, + gpu_arch_type: str, + gpu_arch_version: str, + desired_cuda: str, + python_version: str, + use_only_dl_pytorch_org: bool, + use_split_build: bool = False, +) -> str: + if use_split_build: + if (gpu_arch_version in CUDA_ARCHES) and (os == LINUX) and (channel == NIGHTLY): + return f"{WHL_INSTALL_BASE} {PACKAGES_TO_INSTALL_WHL} --index-url {get_base_download_url_for_repo('whl', channel, gpu_arch_type, desired_cuda)}_pypi_pkg" # noqa: E501 + else: + raise ValueError( + "Split build is not supported for this configuration. It is only supported for CUDA 11.8, 12.4, 12.6 on Linux nightly builds." # noqa: E501 + ) + if ( + channel == RELEASE + and (not use_only_dl_pytorch_org) + and ( + (gpu_arch_version == "12.4" and os == LINUX) + or (gpu_arch_type == CPU and os in [WINDOWS, MACOS_ARM64]) + or (os == LINUX_AARCH64) + ) + ): + return f"{WHL_INSTALL_BASE} {PACKAGES_TO_INSTALL_WHL}" + else: + whl_install_command = ( + f"{WHL_INSTALL_BASE} --pre {PACKAGES_TO_INSTALL_WHL}" + if channel == "nightly" + else f"{WHL_INSTALL_BASE} {PACKAGES_TO_INSTALL_WHL}" + ) + return f"{whl_install_command} --index-url {get_base_download_url_for_repo('whl', channel, gpu_arch_type, desired_cuda)}" # noqa: E501 + + +def generate_conda_matrix( + os: str, + channel: str, + with_cuda: str, + with_rocm: str, + with_cpu: str, + with_xpu: str, + limit_pr_builds: bool, + use_only_dl_pytorch_org: bool, + use_split_build: bool = False, + python_versions: Optional[List[str]] = None, +) -> List[Dict[str, str]]: + ret: List[Dict[str, str]] = [] + # return empty list. Conda builds are deprecated, see https://github.com/pytorch/pytorch/issues/138506 + return ret + + +def generate_libtorch_matrix( + os: str, + channel: str, + with_cuda: str, + with_rocm: str, + with_cpu: str, + with_xpu: str, + limit_pr_builds: bool, + use_only_dl_pytorch_org: bool, + use_split_build: bool = False, + python_versions: Optional[List[str]] = None, + abi_versions: Optional[List[str]] = None, + arches: Optional[List[str]] = None, + libtorch_variants: Optional[List[str]] = None, +) -> List[Dict[str, str]]: + ret: List[Dict[str, str]] = [] + + if arches is None: + arches = [] + + if with_cpu == ENABLE: + arches += [CPU] + + if with_cuda == ENABLE and os in (LINUX, WINDOWS): + arches += CUDA_ARCHES + + if with_rocm == ENABLE and os == LINUX: + arches += ROCM_ARCHES + + if abi_versions is None: + if os == WINDOWS: + abi_versions = [RELEASE, DEBUG] + elif os == LINUX: + abi_versions = [PRE_CXX11_ABI, CXX11_ABI] + elif os in [MACOS_ARM64]: + abi_versions = [CXX11_ABI] + else: + abi_versions = [] + + if libtorch_variants is None: + libtorch_variants = [ + "shared-with-deps", + ] + + global LIBTORCH_CONTAINER_IMAGES + + for abi_version in abi_versions: + for arch_version in arches: + for libtorch_variant in libtorch_variants: + # one of the values in the following list must be exactly + # CXX11_ABI, but the precise value of the other one doesn't + # matter + gpu_arch_type = arch_type(arch_version) + gpu_arch_version = "" if arch_version == CPU else arch_version + + desired_cuda = translate_desired_cuda(gpu_arch_type, gpu_arch_version) + devtoolset = abi_version if os != WINDOWS else "" + libtorch_config = abi_version if os == WINDOWS else "" + ret.append( + { + "gpu_arch_type": gpu_arch_type, + "gpu_arch_version": gpu_arch_version, + "desired_cuda": desired_cuda, + "libtorch_variant": libtorch_variant, + "libtorch_config": libtorch_config, + "devtoolset": devtoolset, + "container_image": ( + LIBTORCH_CONTAINER_IMAGES[(arch_version, abi_version)] + if os != WINDOWS + else "" + ), + "package_type": "libtorch", + "build_name": f"libtorch-{gpu_arch_type}{gpu_arch_version}-{libtorch_variant}-{abi_version}".replace( # noqa: E501 + ".", "_" + ), + # Please noe since libtorch validations are minimal, we use CPU runners + "validation_runner": validation_runner(CPU, os), + "installation": get_libtorch_install_command( + os, + channel, + gpu_arch_type, + libtorch_variant, + devtoolset, + desired_cuda, + libtorch_config, + ), + "channel": channel, + "stable_version": CURRENT_VERSION, + } + ) + return ret + + +def generate_wheels_matrix( + os: str, + channel: str, + with_cuda: str, + with_rocm: str, + with_cpu: str, + with_xpu: str, + limit_pr_builds: bool, + use_only_dl_pytorch_org: bool, + use_split_build: bool = False, + python_versions: Optional[List[str]] = None, + arches: Optional[List[str]] = None, +) -> List[Dict[str, str]]: + package_type = "wheel" + + if not python_versions: + # Define default python version + python_versions = list(PYTHON_ARCHES) + + # If the list of python versions is set explicitly by the caller, stick with it instead + # of trying to add more versions behind the scene + if channel == NIGHTLY and (os in (LINUX, MACOS_ARM64, LINUX_AARCH64)): + python_versions += ["3.13"] + + if os == LINUX: + # NOTE: We only build manywheel packages for linux + package_type = "manywheel" + + upload_to_base_bucket = "yes" + if arches is None: + # Define default compute architectures + arches = [] + + if with_cpu == ENABLE: + arches += [CPU] + + if os == LINUX_AARCH64: + # Only want the one arch as the CPU type is different and + # uses different build/test scripts + arches = [CPU_AARCH64, CUDA_AARCH64] + + if with_cuda == ENABLE: + upload_to_base_bucket = "no" + if os in (LINUX, WINDOWS): + arches += CUDA_ARCHES + + if with_rocm == ENABLE and os == LINUX: + arches += ROCM_ARCHES + + if with_xpu == ENABLE and os in (LINUX, WINDOWS): + arches += [XPU] + + if limit_pr_builds: + python_versions = [python_versions[0]] + + global WHEEL_CONTAINER_IMAGES + + ret: List[Dict[str, Any]] = [] + for python_version in python_versions: + for arch_version in arches: + + # TODO: Enable Python 3.13 support for ROCM + if arch_version in ROCM_ARCHES and python_version == "3.13": + continue + + gpu_arch_type = arch_type(arch_version) + gpu_arch_version = ( + "" if arch_version in [CPU, CPU_AARCH64, XPU] else arch_version + ) + + desired_cuda = translate_desired_cuda(gpu_arch_type, gpu_arch_version) + entry = { + "python_version": python_version, + "gpu_arch_type": gpu_arch_type, + "gpu_arch_version": gpu_arch_version, + "desired_cuda": desired_cuda, + "container_image": WHEEL_CONTAINER_IMAGES[arch_version], + "package_type": package_type, + "build_name": f"{package_type}-py{python_version}-{gpu_arch_type}{gpu_arch_version}".replace( + ".", "_" + ), + "validation_runner": validation_runner(gpu_arch_type, os), + "installation": get_wheel_install_command( + os, + channel, + gpu_arch_type, + gpu_arch_version, + desired_cuda, + python_version, + use_only_dl_pytorch_org, + ), + "channel": channel, + "upload_to_base_bucket": upload_to_base_bucket, + "stable_version": CURRENT_VERSION, + "use_split_build": False, + } + ret.append(entry) + if ( + use_split_build + and (gpu_arch_version in CUDA_ARCHES) + and (os == LINUX) + and (channel == NIGHTLY) + ): + entry = entry.copy() + entry["build_name"] = ( + f"{package_type}-py{python_version}-{gpu_arch_type}{gpu_arch_version}-split".replace( + ".", "_" + ) + ) + entry["use_split_build"] = True + ret.append(entry) + + return ret + + +GENERATING_FUNCTIONS_BY_PACKAGE_TYPE: Dict[str, Callable[..., List[Dict[str, str]]]] = { + "wheel": generate_wheels_matrix, + "conda": generate_conda_matrix, + "libtorch": generate_libtorch_matrix, +} + + +def generate_build_matrix( + package_type: str, + operating_system: str, + channel: str, + with_cuda: str, + with_rocm: str, + with_cpu: str, + with_xpu: str, + limit_pr_builds: str, + use_only_dl_pytorch_org: str, + build_python_only: str, + use_split_build: str = "false", + python_versions: Optional[List[str]] = None, +) -> Dict[str, List[Dict[str, str]]]: + includes = [] + + package_types = package_type.split(",") + if len(package_types) == 1: + package_types = PACKAGE_TYPES if package_type == "all" else [package_type] + + channels = CUDA_ARCHES_DICT.keys() if channel == "all" else [channel] + + for channel in channels: + for package in package_types: + initialize_globals(channel, build_python_only == ENABLE) + includes.extend( + GENERATING_FUNCTIONS_BY_PACKAGE_TYPE[package]( + operating_system, + channel, + with_cuda, + with_rocm, + with_cpu, + with_xpu, + limit_pr_builds == "true", + use_only_dl_pytorch_org == "true", + use_split_build == "true", + python_versions, + ) + ) + + return {"include": includes} + + +def main(args: List[str]) -> None: + parser = argparse.ArgumentParser() + parser.add_argument( + "--package-type", + help="Package type to lookup for, also supports comma separated values", + type=str, + default=os.getenv("PACKAGE_TYPE", "wheel"), + ) + parser.add_argument( + "--operating-system", + help="Operating system to generate for", + type=str, + default=os.getenv("OS", LINUX), + ) + parser.add_argument( + "--channel", + help="Channel to use, default nightly", + type=str, + choices=["nightly", "test", "release", "all"], + default=os.getenv("CHANNEL", "nightly"), + ) + parser.add_argument( + "--with-cuda", + help="Build with Cuda?", + type=str, + choices=[ENABLE, DISABLE], + default=os.getenv("WITH_CUDA", ENABLE), + ) + parser.add_argument( + "--with-rocm", + help="Build with Rocm?", + type=str, + choices=[ENABLE, DISABLE], + default=os.getenv("WITH_ROCM", ENABLE), + ) + parser.add_argument( + "--with-cpu", + help="Build with CPU?", + type=str, + choices=[ENABLE, DISABLE], + default=os.getenv("WITH_CPU", ENABLE), + ) + parser.add_argument( + "--with-xpu", + help="Build with XPU?", + type=str, + choices=[ENABLE, DISABLE], + default=os.getenv("WITH_XPU", ENABLE), + ) + # By default this is false for this script but expectation is that the caller + # workflow will default this to be true most of the time, where a pull + # request is synchronized and does not contain the label "ciflow/binaries/all" + parser.add_argument( + "--limit-pr-builds", + help="Limit PR builds to single python/cuda config", + type=str, + choices=["true", "false"], + default=os.getenv("LIMIT_PR_BUILDS", "false"), + ) + # This is used when testing release builds to test release binaries + # only from download.pytorch.org. When pipy binaries are not released yet. + parser.add_argument( + "--use-only-dl-pytorch-org", + help="Use only download.pytorch.org when gen wheel install command?", + type=str, + choices=["true", "false"], + default=os.getenv("USE_ONLY_DL_PYTORCH_ORG", "false"), + ) + # Generates a single version python for building python packages only + # This basically makes it so that we only generate a matrix including the oldest + # version of python that we support + # For packages that look similar to torchtune-0.0.1-py3-none-any.whl + parser.add_argument( + "--build-python-only", + help="Build python only", + type=str, + choices=[ENABLE, DISABLE], + default=os.getenv("BUILD_PYTHON_ONLY", ENABLE), + ) + + parser.add_argument( + "--use-split-build", + help="Use split build for wheel", + type=str, + choices=["true", "false"], + default=os.getenv("USE_SPLIT_BUILD", DISABLE), + ) + + parser.add_argument( + "--python-versions", + help="Only build the select JSON-encoded list of python versions", + type=str, + default=os.getenv("PYTHON_VERSIONS", "[]"), + ) + + options = parser.parse_args(args) + try: + python_versions = json.loads(options.python_versions) + except json.JSONDecodeError: + python_versions = None + + assert ( + options.with_cuda or options.with_rocm or options.with_xpu or options.with_cpu + ), "Must build with either CUDA, ROCM, XPU, or CPU support." + + build_matrix = generate_build_matrix( + options.package_type, + options.operating_system, + options.channel, + options.with_cuda, + options.with_rocm, + options.with_cpu, + options.with_xpu, + options.limit_pr_builds, + options.use_only_dl_pytorch_org, + options.build_python_only, + options.use_split_build, + python_versions, + ) + + print(json.dumps(build_matrix)) + + +if __name__ == "__main__": + main(sys.argv[1:]) diff --git a/.github/workflows/build-test-linux.yml b/.github/workflows/build-test-linux.yml index a660fc4ef2..72d7e21b5c 100644 --- a/.github/workflows/build-test-linux.yml +++ b/.github/workflows/build-test-linux.yml @@ -15,7 +15,7 @@ on: jobs: generate-matrix: - uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main + uses: ./.github/workflows/generate_binary_build_matrix.yml with: package-type: wheel os: linux diff --git a/.github/workflows/build-test-tensorrt-linux.yml b/.github/workflows/build-test-tensorrt-linux.yml index 3f4abb9add..cfad7274dc 100644 --- a/.github/workflows/build-test-tensorrt-linux.yml +++ b/.github/workflows/build-test-tensorrt-linux.yml @@ -10,7 +10,7 @@ permissions: jobs: generate-matrix: - uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main + uses: ./.github/workflows/generate_binary_build_matrix.yml with: package-type: wheel os: linux diff --git a/.github/workflows/build-test-tensorrt-windows.yml b/.github/workflows/build-test-tensorrt-windows.yml index b6eb1d765c..d2be9febd7 100644 --- a/.github/workflows/build-test-tensorrt-windows.yml +++ b/.github/workflows/build-test-tensorrt-windows.yml @@ -10,7 +10,7 @@ permissions: jobs: generate-matrix: - uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main + uses: ./.github/workflows/generate_binary_build_matrix.yml with: package-type: wheel os: windows diff --git a/.github/workflows/build-test-windows.yml b/.github/workflows/build-test-windows.yml index 0201ab5ff2..c2b05d8994 100644 --- a/.github/workflows/build-test-windows.yml +++ b/.github/workflows/build-test-windows.yml @@ -15,7 +15,7 @@ on: jobs: generate-matrix: - uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main + uses: ./.github/workflows/generate_binary_build_matrix.yml with: package-type: wheel os: windows diff --git a/.github/workflows/generate_binary_build_matrix.yml b/.github/workflows/generate_binary_build_matrix.yml new file mode 100644 index 0000000000..11281ece94 --- /dev/null +++ b/.github/workflows/generate_binary_build_matrix.yml @@ -0,0 +1,112 @@ +name: Generates the binary build matrix + +on: + workflow_call: + inputs: + package-type: + description: "Package type to build from (wheel, conda, libtorch)" + default: "wheel" + type: string + os: + description: "Operating system to generate for (linux, windows, macos, macos-arm64)" + default: "linux" + type: string + channel: + description: "Channel to use (nightly, test, release, all)" + default: "" + type: string + test-infra-repository: + description: "Test infra repository to use" + default: "pytorch/test-infra" + type: string + test-infra-ref: + description: "Test infra reference to use" + default: "main" + type: string + with-cuda: + description: "Build with Cuda?" + default: "enable" + type: string + with-rocm: + description: "Build with Rocm?" + default: "enable" + type: string + with-cpu: + description: "Build with CPU?" + default: "enable" + type: string + with-xpu: + description: "Build with XPU?" + default: "disable" + type: string + use-only-dl-pytorch-org: + description: "Use only download.pytorch.org when generating wheel install command?" + default: "false" + type: string + build-python-only: + description: "Generate binary build matrix for a python only package (i.e. only one python version)" + default: "disable" + type: string + python-versions: + description: "A JSON-encoded list of python versions to build. An empty list means building all supported versions" + default: "[]" + type: string + use_split_build: + description: | + [Experimental] Build a libtorch only wheel and build pytorch such that + are built from the libtorch wheel. + required: false + type: boolean + default: false + + outputs: + matrix: + description: "Generated build matrix" + value: ${{ jobs.generate.outputs.matrix }} + +jobs: + generate: + outputs: + matrix: ${{ steps.generate.outputs.matrix }} + runs-on: ubuntu-latest + steps: + - uses: actions/setup-python@v5 + with: + python-version: '3.10' + - name: Checkout test-infra repository + uses: actions/checkout@v4 + with: + repository: ${{ inputs.test-infra-repository }} + ref: ${{ inputs.test-infra-ref }} + - uses: ./.github/actions/set-channel + - uses: actions/checkout@v4 + with: + repository: pytorch/tensorrt + - name: Generate test matrix + id: generate + env: + PACKAGE_TYPE: ${{ inputs.package-type }} + OS: ${{ inputs.os }} + CHANNEL: ${{ inputs.channel != '' && inputs.channel || env.CHANNEL }} + WITH_CUDA: ${{ inputs.with-cuda }} + WITH_ROCM: ${{ inputs.with-rocm }} + WITH_CPU: ${{ inputs.with-cpu }} + WITH_XPU: ${{ inputs.with-xpu }} + # limit pull request builds to one version of python unless ciflow/binaries/all is applied to the workflow + # should not affect builds that are from events that are not the pull_request event + LIMIT_PR_BUILDS: ${{ github.event_name == 'pull_request' && !contains( github.event.pull_request.labels.*.name, 'ciflow/binaries/all') }} + # This is used when testing release binaries only from download.pytorch.org. + # In cases when pipy binaries are not published yet. + USE_ONLY_DL_PYTORCH_ORG: ${{ inputs.use-only-dl-pytorch-org }} + BUILD_PYTHON_ONLY: ${{ inputs.build-python-only }} + USE_SPLIT_BUILD: ${{ inputs.use_split_build }} + PYTHON_VERSIONS: ${{ inputs.python-versions }} + run: | + set -eou pipefail + MATRIX_BLOB="$(python3 .github/scripts/generate_binary_build_matrix.py)" + echo "${MATRIX_BLOB}" + echo "matrix=${MATRIX_BLOB}" >> "${GITHUB_OUTPUT}" + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ inputs.package-type }}-${{ inputs.os }}-${{ inputs.test-infra-repository }}-${{ inputs.test-infra-ref }} + cancel-in-progress: true \ No newline at end of file diff --git a/.github/workflows/release-linux.yml b/.github/workflows/release-linux.yml index 53ed569725..ca13b37443 100644 --- a/.github/workflows/release-linux.yml +++ b/.github/workflows/release-linux.yml @@ -15,7 +15,7 @@ permissions: jobs: generate-matrix: - uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main + uses: ./.github/workflows/generate_binary_build_matrix.yml if: ${{ contains(github.event.pull_request.labels.*.name, 'build-release-artifacts') || startsWith(github.event.ref, 'refs/tags/v') }} with: package-type: wheel diff --git a/.github/workflows/release-windows.yml b/.github/workflows/release-windows.yml index e9d393f544..271547cec3 100644 --- a/.github/workflows/release-windows.yml +++ b/.github/workflows/release-windows.yml @@ -15,7 +15,7 @@ permissions: jobs: generate-matrix: - uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main + uses: ./.github/workflows/generate_binary_build_matrix.yml if: ${{ contains(github.event.pull_request.labels.*.name, 'build-release-artifacts') || startsWith(github.event.ref, 'refs/tags/v') }} with: package-type: wheel From 5f1d12f9d47b253bc23e9122dfa7c4eed3456bd4 Mon Sep 17 00:00:00 2001 From: Naren Dasan Date: Fri, 6 Dec 2024 10:06:59 -0800 Subject: [PATCH 11/11] chore: updating conf to ignore rendering triton example Signed-off-by: Naren Dasan Signed-off-by: Naren Dasan --- docsrc/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docsrc/conf.py b/docsrc/conf.py index daa1a30100..ffe341c722 100644 --- a/docsrc/conf.py +++ b/docsrc/conf.py @@ -93,7 +93,7 @@ sphinx_gallery_conf = { "examples_dirs": "../examples", "gallery_dirs": "tutorials/_rendered_examples/", - "ignore_pattern": "utils.py", + "ignore_pattern": r"(triton/\w*.py)|(utils.py)", } # Setup the breathe extension