From 78f5d2ea8e9d21fa46d29037fc768c639d17722f Mon Sep 17 00:00:00 2001 From: dan_the_3rd <43445237+danthe3rd@users.noreply.github.com> Date: Thu, 14 Mar 2024 12:36:00 +0000 Subject: [PATCH] Prepare release 0.0.25 (fairinternal/xformers#1058) * Prepare release 0.0.25 * Add note about supported PT version for binaries __original_commit__ = fairinternal/xformers@0510e145c8d950cffd31b47e47e36b0f6ef1e664 --- .github/workflows/conda.yml | 4 ++-- .github/workflows/wheels.yml | 8 ++++---- CHANGELOG.md | 12 +++++++++--- version.txt | 2 +- 4 files changed, 16 insertions(+), 10 deletions(-) diff --git a/.github/workflows/conda.yml b/.github/workflows/conda.yml index 42647802d2..577273c527 100644 --- a/.github/workflows/conda.yml +++ b/.github/workflows/conda.yml @@ -33,13 +33,13 @@ jobs: - "3.9" - "3.10" config: - - torch_version: "2.2.0" + - torch_version: "2.2.1" torch_channel: "pytorch" cuda_version: "12.1.0" cuda_dep_runtime: ">=12.0,<13.0" cuda_short_version: "121" - - torch_version: "2.2.0" + - torch_version: "2.2.1" torch_channel: "pytorch" cuda_version: "11.8.0" cuda_dep_runtime: ">=11.7,<11.9" diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 93d0889315..1a91c86064 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -27,7 +27,7 @@ jobs: - "3.10" - "3.11" torch_version: - - "2.2.0" + - "2.2.1" cuda_short_version: - "118" - "121" @@ -45,7 +45,7 @@ jobs: uses: ./.github/workflows/wheels_upload_pip.yml with: twine_username: __token__ - filter: "*torch2.2.0+cu121*" + filter: "*torch2.2.1+cu121*" execute: ${{ github.repository == 'facebookresearch/xformers' && github.event_name != 'pull_request' }} secrets: twine_password: ${{ secrets.PYPI_TOKEN }} @@ -57,7 +57,7 @@ jobs: aws_role: "arn:aws:iam::749337293305:role/pytorch_bot_uploader_role" s3_path: s3://pytorch/whl/cu118/ aws_s3_cp_extra_args: --acl public-read - filter: "*torch2.2.0+cu118*" + filter: "*torch2.2.1+cu118*" execute: ${{ github.repository == 'facebookresearch/xformers' && github.ref_type == 'tag' }} upload_pt_cu121: @@ -67,6 +67,6 @@ jobs: aws_role: "arn:aws:iam::749337293305:role/pytorch_bot_uploader_role" s3_path: s3://pytorch/whl/cu121/ aws_s3_cp_extra_args: --acl public-read - filter: "*torch2.2.0+cu121*" + filter: "*torch2.2.1+cu121*" execute: ${{ github.repository == 'facebookresearch/xformers' && github.ref_type == 'tag' }} diff --git a/CHANGELOG.md b/CHANGELOG.md index 62e6a1e445..669f2f1f68 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,9 +4,15 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [0.0.25] - TBD +## [0.0.26] - TBD ### Added -- New merge_attentions function +### Improved +### Removed + +## [0.0.25] - 2024-03-14 +Pre-built binary wheels require PyTorch 2.2.1 +### Added +- New `merge_attentions` function ### Improved - fMHA: Updated Flash-Attention to v2.5.6: this has a performance improvement for multiquery. - fMHA: triton_splitk changed and expanded. Now amalgamates using LSE. Can autotune, supports causal with a small number of queries - not just 1. Experimental support for paged attention. @@ -18,7 +24,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [0.0.24] - 2024-01-31 Pre-built binary wheels require PyTorch 2.2.0 ### Added -- Added components for model/sequence parallelism, as near-drop-in replacements for FairScale/Megatron Column&RowParallelLinear modules. They support fusing communication and computation for sequence parallelism, thus making the communication effectively free. +- Added components for model/sequence parallelism, as near-drop-in replacements for FairScale/Megatron Column&RowParallelLinear modules. They support fusing communication and computation for sequence parallelism, thus making the communication effectively free. [Read more](https://twitter.com/d_haziza/status/1753030654118211593) - Added kernels for training models with 2:4-sparsity. We introduced a very fast kernel for converting a matrix A into 24-sparse format, which can be used during training to sparsify weights dynamically, activations etc... xFormers also provides an API that is compatible with torch-compile, see `xformers.ops.sparsify24`. ### Improved - Make selective activation checkpointing be compatible with torch.compile. diff --git a/version.txt b/version.txt index 2678ff8d63..c4475d3bb7 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -0.0.25 +0.0.26