From 47eaab4dc97b553443628a6a2f04cada2829f5a6 Mon Sep 17 00:00:00 2001 From: pbialecki Date: Wed, 23 Aug 2023 11:27:16 -0700 Subject: [PATCH 01/13] update CUDA to 12.1 U1 (#3563) Summary: Follow-up of: https://github.com/pytorch/builder/pull/1485 CC atalman Pull Request resolved: https://github.com/pytorch/audio/pull/3563 Reviewed By: kit1980 Differential Revision: D48610200 Pulled By: atalman fbshipit-source-id: 61c9981da5a343a3cbce97b0a77ab91f37560087 --- packaging/windows/internal/cuda_install.bat | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packaging/windows/internal/cuda_install.bat b/packaging/windows/internal/cuda_install.bat index 6c86834f23..facf186937 100644 --- a/packaging/windows/internal/cuda_install.bat +++ b/packaging/windows/internal/cuda_install.bat @@ -57,7 +57,7 @@ goto cuda_common :cuda121 -set CUDA_INSTALL_EXE=cuda_12.1.0_531.14_windows.exe +set CUDA_INSTALL_EXE=cuda_12.1.1_531.14_windows.exe if not exist "%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%" ( curl -k -L "https://ossci-windows.s3.amazonaws.com/%CUDA_INSTALL_EXE%" --output "%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%" if errorlevel 1 exit /b 1 From 5ee254e350289ee2951cc61a1fdc3b361c48a265 Mon Sep 17 00:00:00 2001 From: moto <855818+mthrok@users.noreply.github.com> Date: Mon, 28 Aug 2023 23:51:02 -0700 Subject: [PATCH 02/13] Remove random print statement (#3577) Summary: Pull Request resolved: https://github.com/pytorch/audio/pull/3577 Reviewed By: atalman Differential Revision: D48763580 Pulled By: mthrok fbshipit-source-id: 6ab155a5dd4cf11b2a58f26ced369107f0a2f08f --- torchaudio/_backend/ffmpeg.py | 1 - 1 file changed, 1 deletion(-) diff --git a/torchaudio/_backend/ffmpeg.py b/torchaudio/_backend/ffmpeg.py index 986367fecd..a50df07be4 100644 --- a/torchaudio/_backend/ffmpeg.py +++ b/torchaudio/_backend/ffmpeg.py @@ -241,7 +241,6 @@ def _type(spec): sample_fmt = _get_flac_sample_fmt(bps) if _type("ogg"): sample_fmt = _get_flac_sample_fmt(bps) - print(ext, format, encoding, bps, "===>", muxer, encoder, sample_fmt) return muxer, encoder, sample_fmt From 6fb68544e758eeceacebf6e215253187dcaf9983 Mon Sep 17 00:00:00 2001 From: Omkar Salpekar Date: Tue, 29 Aug 2023 16:43:44 -0700 Subject: [PATCH 03/13] Separate Test Token for Conda Uploads (#3582) Summary: We will use a separate token for uploading test binaries (instead of reusing the nightly token). This PR adds that token to the caller workflow. Pull Request resolved: https://github.com/pytorch/audio/pull/3582 Reviewed By: atalman Differential Revision: D48803009 Pulled By: osalpekar fbshipit-source-id: c2af57f6946da51a7b56c975614e60f243e3f6fb --- .github/workflows/build-conda-m1.yml | 1 + .github/workflows/build-conda-windows.yml | 1 + .github/workflows/build_conda_linux.yml | 1 + .github/workflows/build_conda_macos.yml | 1 + 4 files changed, 4 insertions(+) diff --git a/.github/workflows/build-conda-m1.yml b/.github/workflows/build-conda-m1.yml index 15c1ca895f..7e72979ca1 100644 --- a/.github/workflows/build-conda-m1.yml +++ b/.github/workflows/build-conda-m1.yml @@ -50,3 +50,4 @@ jobs: trigger-event: ${{ github.event_name }} secrets: CONDA_PYTORCHBOT_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} + CONDA_PYTORCHBOT_TOKEN_TEST: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }} diff --git a/.github/workflows/build-conda-windows.yml b/.github/workflows/build-conda-windows.yml index 9781b40f41..ab652629bc 100644 --- a/.github/workflows/build-conda-windows.yml +++ b/.github/workflows/build-conda-windows.yml @@ -45,3 +45,4 @@ jobs: trigger-event: ${{ github.event_name }} secrets: CONDA_PYTORCHBOT_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} + CONDA_PYTORCHBOT_TOKEN_TEST: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }} diff --git a/.github/workflows/build_conda_linux.yml b/.github/workflows/build_conda_linux.yml index 4245c4f08d..8fbf25bd80 100644 --- a/.github/workflows/build_conda_linux.yml +++ b/.github/workflows/build_conda_linux.yml @@ -49,3 +49,4 @@ jobs: trigger-event: ${{ github.event_name }} secrets: CONDA_PYTORCHBOT_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} + CONDA_PYTORCHBOT_TOKEN_TEST: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }} diff --git a/.github/workflows/build_conda_macos.yml b/.github/workflows/build_conda_macos.yml index b51175f898..2fa598d9da 100644 --- a/.github/workflows/build_conda_macos.yml +++ b/.github/workflows/build_conda_macos.yml @@ -50,3 +50,4 @@ jobs: trigger-event: ${{ github.event_name }} secrets: CONDA_PYTORCHBOT_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} + CONDA_PYTORCHBOT_TOKEN_TEST: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }} From bf77b2a032b4b7ce0e0c03ab6c66881cc83c62c9 Mon Sep 17 00:00:00 2001 From: Omkar Salpekar Date: Wed, 30 Aug 2023 13:08:30 -0700 Subject: [PATCH 04/13] Bump Version 2.1 -> 2.2 (#3585) Summary: Pull Request resolved: https://github.com/pytorch/audio/pull/3585 Reviewed By: atalman Differential Revision: D48832146 Pulled By: osalpekar fbshipit-source-id: c36dd79a0c0f407d490a488902207c8066c01383 --- version.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.txt b/version.txt index ecaf4eea7c..887948350c 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -2.1.0a0 +2.2.0a0 From 5cf7d2dbd231918c3f260f220459c9914914ac7b Mon Sep 17 00:00:00 2001 From: atalman Date: Wed, 30 Aug 2023 15:10:48 -0700 Subject: [PATCH 05/13] Revert "Enable ROCm RNN-T Loss (#2485)" (#3586) Summary: This reverts commit c5939616ddc17093747e896db6012b1f63792627. Unblock 2.1.0 rc Pull Request resolved: https://github.com/pytorch/audio/pull/3586 Reviewed By: osalpekar Differential Revision: D48842032 Pulled By: atalman fbshipit-source-id: bbdf9e45c9aa5fde00f315a2ff491ed050bc1707 --- .gitmodules | 3 - CMakeLists.txt | 10 ---- third_party/hipify_torch | 1 - torchaudio/csrc/CMakeLists.txt | 55 +------------------ torchaudio/csrc/rnnt/gpu/compute.cu | 4 -- torchaudio/csrc/rnnt/gpu/compute_alphas.cu | 4 -- torchaudio/csrc/rnnt/gpu/compute_betas.cu | 4 -- torchaudio/csrc/rnnt/gpu/gpu_kernel_utils.cuh | 12 ---- torchaudio/csrc/rnnt/gpu/gpu_kernels.cuh | 22 -------- torchaudio/csrc/rnnt/gpu/gpu_transducer.h | 5 -- torchaudio/csrc/rnnt/gpu/kernel_utils.h | 4 -- torchaudio/csrc/rnnt/gpu/kernels.h | 5 -- torchaudio/csrc/rnnt/macros.h | 8 --- torchaudio/csrc/rnnt/options.h | 9 +-- torchaudio/csrc/rnnt/workspace.h | 16 +----- 15 files changed, 5 insertions(+), 157 deletions(-) delete mode 160000 third_party/hipify_torch diff --git a/.gitmodules b/.gitmodules index 25d307cea8..e69de29bb2 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +0,0 @@ -[submodule "third_party/hipify_torch"] - path = third_party/hipify_torch - url = https://github.com/ROCmSoftwarePlatform/hipify_torch diff --git a/CMakeLists.txt b/CMakeLists.txt index d955c5da58..f0195c87b4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -76,11 +76,6 @@ if(USE_ROCM) if(NOT PYTORCH_FOUND_HIP) set(USE_ROCM OFF) endif() - - if(CMAKE_VERSION VERSION_LESS 3.21.0) - message("Need at least CMake 3.21.0 to compile ROCm support.") - set(USE_ROCM OFF) - endif() endif() if(USE_CUDA) @@ -95,11 +90,6 @@ if(USE_CUDA) ) endif() -if(USE_ROCM) - enable_language(HIP) -endif() - -find_package(Torch REQUIRED) include(cmake/TorchAudioHelper.cmake) # https://github.com/pytorch/pytorch/issues/54174 diff --git a/third_party/hipify_torch b/third_party/hipify_torch deleted file mode 160000 index 083ff9b50c..0000000000 --- a/third_party/hipify_torch +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 083ff9b50c7ed861f7f6eddd983cdedb72e8b964 diff --git a/torchaudio/csrc/CMakeLists.txt b/torchaudio/csrc/CMakeLists.txt index f05534f647..fc0c549493 100644 --- a/torchaudio/csrc/CMakeLists.txt +++ b/torchaudio/csrc/CMakeLists.txt @@ -1,29 +1,6 @@ ################################################################################ # libtorchaudio ################################################################################ - -if(USE_ROCM) - list (APPEND CMAKE_PREFIX_PATH /opt/rocm/hip /opt/rocm) - FIND_PACKAGE(HIP REQUIRED) - MESSAGE(STATUS "hip found ${ROCM_FOUND}") - - list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/third_party/hipify_torch/cmake") - include(Hipify) - - set(CMAKE_CXX_COMPILER ${HIP_HIPCC_EXECUTABLE}) - set(CMAKE_CXX_LINKER ${HIP_HIPCC_EXECUTABLE}) - set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) - list( APPEND CMAKE_INSTALL_RPATH "/opt/rocm/llvm/lib" ) - set(OPENMP_LIBRARIES "/opt/rocm/llvm/lib/") - set(OpenMP_CXX "${CMAKE_CXX_COMPILER}") - set(OpenMP_CXX_FLAGS "-fopenmp=libomp") - #set(OpenMP_CXX_LIB_NAMES "omp") - set(OpenMP_omp_LIBRARY omp) - find_package(OpenMP REQUIRED) - -endif() - - set( sources lfilter.cpp @@ -62,37 +39,6 @@ if(BUILD_RNNT) rnnt/gpu/compute.cu ) endif() - - if (USE_ROCM) - hipify(CUDA_SOURCE_DIR ${PROJECT_SOURCE_DIR}/torchaudio/csrc/rnnt/gpu HIP_SOURCE_DIR ${PROJECT_SOURCE_DIR}/torchaudio/csrc/rnnt/hip) - if ( NOT HIP_ADD_LIBRARY_FOUND ) - list(APPEND CMAKE_MODULE_PATH /opt/rocm/hip/cmake) - find_package(HIP REQUIRED) - endif() - - list( - APPEND - sources - rnnt/hip/compute_alphas.hip - rnnt/hip/compute_betas.hip - rnnt/hip/compute.hip - ) - endif() -endif() - -if(USE_ROCM) - list( - APPEND - additional_libs - hip::host - hip::device - /opt/rocm/llvm/lib/libomp.so - ) - list( - APPEND - compile_definitions - USE_ROCM - ) endif() if(BUILD_RIR) @@ -141,6 +87,7 @@ endif() #------------------------------------------------------------------------------# # END OF CUSTOMIZATION LOGICS #------------------------------------------------------------------------------# + torchaudio_library( libtorchaudio "${sources}" diff --git a/torchaudio/csrc/rnnt/gpu/compute.cu b/torchaudio/csrc/rnnt/gpu/compute.cu index 3303053822..a6d389bf0b 100644 --- a/torchaudio/csrc/rnnt/gpu/compute.cu +++ b/torchaudio/csrc/rnnt/gpu/compute.cu @@ -1,10 +1,6 @@ #include #include -#ifdef __HIP_PLATFORM_AMD__ -#include -#else #include -#endif namespace torchaudio { namespace rnnt { diff --git a/torchaudio/csrc/rnnt/gpu/compute_alphas.cu b/torchaudio/csrc/rnnt/gpu/compute_alphas.cu index 22706f670d..918d442bf0 100644 --- a/torchaudio/csrc/rnnt/gpu/compute_alphas.cu +++ b/torchaudio/csrc/rnnt/gpu/compute_alphas.cu @@ -1,10 +1,6 @@ #include #include -#ifdef __HIP_PLATFORM_AMD__ -#include -#else #include -#endif namespace torchaudio { namespace rnnt { diff --git a/torchaudio/csrc/rnnt/gpu/compute_betas.cu b/torchaudio/csrc/rnnt/gpu/compute_betas.cu index d2a6134181..e1e4c1d90e 100644 --- a/torchaudio/csrc/rnnt/gpu/compute_betas.cu +++ b/torchaudio/csrc/rnnt/gpu/compute_betas.cu @@ -1,10 +1,6 @@ #include #include -#ifdef __HIP_PLATFORM_AMD__ -#include -#else #include -#endif namespace torchaudio { namespace rnnt { diff --git a/torchaudio/csrc/rnnt/gpu/gpu_kernel_utils.cuh b/torchaudio/csrc/rnnt/gpu/gpu_kernel_utils.cuh index cb3c615770..e5f1cfc2df 100644 --- a/torchaudio/csrc/rnnt/gpu/gpu_kernel_utils.cuh +++ b/torchaudio/csrc/rnnt/gpu/gpu_kernel_utils.cuh @@ -2,11 +2,7 @@ #ifdef USE_CUDA -#ifdef __HIP_PLATFORM_AMD__ -#include -#else #include -#endif namespace torchaudio { namespace rnnt { @@ -43,11 +39,7 @@ __global__ void ReduceMax2D( CAST_DTYPE shf; for (int stride = (WARP_SIZE >> 1); stride > 0; stride >>= 1) { -#ifdef __HIP_PLATFORM_AMD__ - shf = __shfl_down(val, stride); -#else shf = __shfl_down_sync(0xFFFFFFFF, val, stride); -#endif if (threadIdx.x < stride && threadIdx.x + stride < dim) { if (shf > val) { val = shf; @@ -89,11 +81,7 @@ __global__ void ReduceLogSumExpGivenMax2D( CAST_DTYPE shf; for (int stride = (WARP_SIZE >> 1); stride > 0; stride >>= 1) { -#ifdef __HIP_PLATFORM_AMD__ - shf = __shfl_down(val, stride); -#else shf = __shfl_down_sync(0xFFFFFFFF, val, stride); -#endif if (threadIdx.x < stride && threadIdx.x + stride < dim) { val = val + shf; } diff --git a/torchaudio/csrc/rnnt/gpu/gpu_kernels.cuh b/torchaudio/csrc/rnnt/gpu/gpu_kernels.cuh index 2b7ef45df3..4ba04b68fc 100644 --- a/torchaudio/csrc/rnnt/gpu/gpu_kernels.cuh +++ b/torchaudio/csrc/rnnt/gpu/gpu_kernels.cuh @@ -4,15 +4,9 @@ #include -#ifdef __HIP_PLATFORM_AMD__ -#include -#include -#include -#else #include #include #include -#endif namespace torchaudio { namespace rnnt { @@ -132,11 +126,7 @@ __device__ void ComputeAlphas( #pragma unroll for (int i = 1; i < warpSize; i <<= 1) { -#ifdef __HIP_PLATFORM_AMD__ - val = __shfl_up(skip_prob, i); -#else val = __shfl_up_sync(0xffffffff, skip_prob, i); -#endif if (i <= threadIdx.x) { skip_prob = skip_prob + val; } @@ -160,11 +150,7 @@ __device__ void ComputeAlphas( CAST_DTYPE out = val; for (int i = 1; i < warpSize; ++i) { -#ifdef __HIP_PLATFORM_AMD__ - val = __shfl_up(val, 1); -#else val = __shfl_up_sync(0xffffffff, val, 1); -#endif if (i == threadIdx.x) { val = math::lse(val + skip_prob, emit); out = val; @@ -239,11 +225,7 @@ __device__ void ComputeBetasCosts( #pragma unroll for (int i = 1; i < warpSize; i <<= 1) { -#ifdef __HIP_PLATFORM_AMD__ - val = __shfl_up(skip_prob, i); -#else val = __shfl_up_sync(0xffffffff, skip_prob, i); -#endif if (i <= threadIdx.x) { skip_prob = skip_prob + val; } @@ -266,11 +248,7 @@ __device__ void ComputeBetasCosts( CAST_DTYPE out = val; for (int i = 1; i < warpSize; ++i) { -#ifdef __HIP_PLATFORM_AMD__ - val = __shfl_up(val, 1); -#else val = __shfl_up_sync(0xffffffff, val, 1); -#endif if (i == threadIdx.x) { val = math::lse(val + skip_prob, emit); out = val; diff --git a/torchaudio/csrc/rnnt/gpu/gpu_transducer.h b/torchaudio/csrc/rnnt/gpu/gpu_transducer.h index 32a731bafd..72759b39f4 100644 --- a/torchaudio/csrc/rnnt/gpu/gpu_transducer.h +++ b/torchaudio/csrc/rnnt/gpu/gpu_transducer.h @@ -3,13 +3,8 @@ #ifdef USE_CUDA #include -#ifdef __HIP_PLATFORM_AMD__ -#include -#include -#else #include #include -#endif namespace torchaudio { namespace rnnt { diff --git a/torchaudio/csrc/rnnt/gpu/kernel_utils.h b/torchaudio/csrc/rnnt/gpu/kernel_utils.h index 68136fcfa3..3b2989b073 100644 --- a/torchaudio/csrc/rnnt/gpu/kernel_utils.h +++ b/torchaudio/csrc/rnnt/gpu/kernel_utils.h @@ -2,11 +2,7 @@ #include -#ifdef __HIP_PLATFORM_AMD__ -#include -#else #include -#endif namespace torchaudio { namespace rnnt { diff --git a/torchaudio/csrc/rnnt/gpu/kernels.h b/torchaudio/csrc/rnnt/gpu/kernels.h index d22443fecb..db8bb5092b 100644 --- a/torchaudio/csrc/rnnt/gpu/kernels.h +++ b/torchaudio/csrc/rnnt/gpu/kernels.h @@ -2,13 +2,8 @@ #include -#ifdef __HIP_PLATFORM_AMD__ -#include -#include -#else #include #include -#endif namespace torchaudio { namespace rnnt { diff --git a/torchaudio/csrc/rnnt/macros.h b/torchaudio/csrc/rnnt/macros.h index e569d26241..abcbc39966 100644 --- a/torchaudio/csrc/rnnt/macros.h +++ b/torchaudio/csrc/rnnt/macros.h @@ -8,14 +8,6 @@ #define FORCE_INLINE __forceinline__ #include #include -#elif USE_ROCM -#define WARP_SIZE 32 -#define MAX_THREADS_PER_BLOCK 1024 -#define REDUCE_THREADS 256 -#define HOST_AND_DEVICE __host__ __device__ -#define FORCE_INLINE __forceinline__ -#include -#include #else #define HOST_AND_DEVICE #define FORCE_INLINE inline diff --git a/torchaudio/csrc/rnnt/options.h b/torchaudio/csrc/rnnt/options.h index ecf0714a3c..f70a3c8c07 100644 --- a/torchaudio/csrc/rnnt/options.h +++ b/torchaudio/csrc/rnnt/options.h @@ -4,12 +4,7 @@ #ifdef USE_CUDA #include -typedef cudaStream_t gpuStream_t; #endif // USE_CUDA -#ifdef USE_ROCM -#include -typedef hipStream_t gpuStream_t; -#endif // USE_ROCM #include #include @@ -20,9 +15,9 @@ namespace rnnt { typedef struct Options { // the device to compute transducer loss. device_t device_; -#if defined(USE_CUDA) || defined(USE_ROCM) +#ifdef USE_CUDA // the stream to launch kernels in when using GPU. - gpuStream_t stream_; + cudaStream_t stream_; #endif // The maximum number of threads that can be used. int numThreads_; diff --git a/torchaudio/csrc/rnnt/workspace.h b/torchaudio/csrc/rnnt/workspace.h index 14ae0047ba..e833ef1cdf 100644 --- a/torchaudio/csrc/rnnt/workspace.h +++ b/torchaudio/csrc/rnnt/workspace.h @@ -131,22 +131,10 @@ class IntWorkspace { ComputeSizeForBetaCounters(options_) * sizeof(int)); } #endif // USE_CUDA -#ifdef USE_ROCM - if (data_ != nullptr && options_.device_ == GPU) { - hipMemset( - GetPointerToAlphaCounters(), - 0, - ComputeSizeForAlphaCounters(options_) * sizeof(int)); - hipMemset( - GetPointerToBetaCounters(), - 0, - ComputeSizeForBetaCounters(options_) * sizeof(int)); - } -#endif // USE_ROCM } static int ComputeSizeForAlphaCounters(const Options& options) { // B * U -#if defined(USE_CUDA) || defined(USE_ROCM) +#ifdef USE_CUDA if (options.device_ == GPU) { return options.BU(); } else { @@ -157,7 +145,7 @@ class IntWorkspace { #endif // USE_CUDA } static int ComputeSizeForBetaCounters(const Options& options) { // B * U -#if defined(USE_CUDA) || defined(USE_ROCM) +#ifdef USE_CUDA if (options.device_ == GPU) { return options.BU(); } else { From af79867623c61b647d101139ede26b9f4398fcf8 Mon Sep 17 00:00:00 2001 From: Omkar Salpekar Date: Fri, 1 Sep 2023 16:03:43 -0700 Subject: [PATCH 06/13] Automate Release-Only Changes (#3590) Summary: First Pass for automating release-only changes. Tested this in audio to ensure it works. Pull Request resolved: https://github.com/pytorch/audio/pull/3590 Reviewed By: huydhn Differential Revision: D48921925 Pulled By: osalpekar fbshipit-source-id: 2920e4d984ffb0c1957dae6fb6bab10344e65ff7 --- packaging/cut_release.sh | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 packaging/cut_release.sh diff --git a/packaging/cut_release.sh b/packaging/cut_release.sh new file mode 100644 index 0000000000..91e0e5ff15 --- /dev/null +++ b/packaging/cut_release.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +# +# Usage (run from root of project): +# TEST_INFRA_BRANCH=release/2.1 RELEASE_BRANCH=release/2.1 RELEASE_VERSION=2.1.0 packaging/cut_release.sh +# +# TEST_INFRA_BRANCH: The release branch of test-infra that houses all reusable +# workflows +# +# RELEASE_BRANCH: The name of the release branch for this repo +# +# RELEASE_VERSION: Version of this current release + +set -eou pipefail + +# Create and Check out to Release Branch +git checkout -b "${RELEASE_BRANCH}" + +# Change all GitHub Actions to reference the test-infra release branch +# as opposed to main. +for i in .github/workflows/*.yml; do + if [[ "$OSTYPE" == "darwin"* ]]; then + sed -i '' -e s#@main#@"${TEST_INFRA_BRANCH}"# $i; + sed -i '' -e s#test-infra-ref:[[:space:]]main#"test-infra-ref: ${TEST_INFRA_BRANCH}"# $i; + else + sed -i -e s#@main#@"${TEST_INFRA_BRANCH}"# $i; + sed -i -e s#test-infra-ref:[[:space:]]main#"test-infra-ref: ${TEST_INFRA_BRANCH}"# $i; + fi +done + +# Update the Release Version in version.txt +echo "${RELEASE_VERSION}" >version.txt + +# Optional +# git add ./github/workflows/*.yml version.txt +# git commit -m "[RELEASE-ONLY CHANGES] Branch Cut for Release {RELEASE_VERSION}" +# git push origin "${RELEASE_BRANCH}" From e057d7d144e2716588b80255f0a143662fd5c10d Mon Sep 17 00:00:00 2001 From: Omkar Salpekar Date: Fri, 1 Sep 2023 17:39:15 -0700 Subject: [PATCH 07/13] Moving to More Efficient Windows GPU Runner (#3587) Summary: Pull Request resolved: https://github.com/pytorch/audio/pull/3587 Reviewed By: huydhn Differential Revision: D48847224 Pulled By: osalpekar fbshipit-source-id: 15ccae56f0e5e8e366b5feb66fdb42bb5cba97e1 --- .github/workflows/unittest-windows-gpu.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/unittest-windows-gpu.yml b/.github/workflows/unittest-windows-gpu.yml index bc36124638..62f8640d79 100644 --- a/.github/workflows/unittest-windows-gpu.yml +++ b/.github/workflows/unittest-windows-gpu.yml @@ -14,7 +14,7 @@ jobs: uses: pytorch/test-infra/.github/workflows/windows_job.yml@main with: repository: pytorch/audio - runner: windows.8xlarge.nvidia.gpu + runner: windows.g5.4xlarge.nvidia.gpu timeout: 360 script: | # Mark Build Directory Safe From 3e7e696cc6b49f85e08d5f79db022f66d1c1a2fa Mon Sep 17 00:00:00 2001 From: moto <855818+mthrok@users.noreply.github.com> Date: Mon, 4 Sep 2023 06:34:52 -0700 Subject: [PATCH 08/13] Fix doc link (#3593) Summary: Pull Request resolved: https://github.com/pytorch/audio/pull/3593 Reviewed By: nateanl Differential Revision: D48933041 Pulled By: mthrok fbshipit-source-id: cd05d3cf5006206ba441fdc05548bcd922ce0598 --- docs/source/installation.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/installation.rst b/docs/source/installation.rst index 44d64a8d38..d6fccbce3b 100644 --- a/docs/source/installation.rst +++ b/docs/source/installation.rst @@ -30,7 +30,7 @@ Dependencies Optional Dependencies ~~~~~~~~~~~~~~~~~~~~~ -.. _ffmpeg: +.. _ffmpeg_dependency: * `FFmpeg `__ From 6fbc1e6869f339ebe50d94ea7b352eee9706eee7 Mon Sep 17 00:00:00 2001 From: moto <855818+mthrok@users.noreply.github.com> Date: Mon, 4 Sep 2023 07:15:08 -0700 Subject: [PATCH 09/13] Add incremental decoding support to CTC decoder (#3594) Summary: Add incremental decoding support to CTC decoder. Resolves https://github.com/pytorch/audio/issues/3574 Pull Request resolved: https://github.com/pytorch/audio/pull/3594 Reviewed By: nateanl Differential Revision: D48940584 Pulled By: mthrok fbshipit-source-id: 31871614008cf197cf3900f7183ec6cff34d2905 --- ...asr_inference_with_ctc_decoder_tutorial.py | 41 +++++++ torchaudio/models/decoder/_ctc_decoder.py | 114 +++++++++++++++--- 2 files changed, 139 insertions(+), 16 deletions(-) diff --git a/examples/tutorials/asr_inference_with_ctc_decoder_tutorial.py b/examples/tutorials/asr_inference_with_ctc_decoder_tutorial.py index 955dc3c029..624cd8066a 100644 --- a/examples/tutorials/asr_inference_with_ctc_decoder_tutorial.py +++ b/examples/tutorials/asr_inference_with_ctc_decoder_tutorial.py @@ -387,6 +387,47 @@ def forward(self, emission: torch.Tensor) -> List[str]: # and “shoktd”. # +###################################################################### +# Incremental decoding +# ~~~~~~~~~~~~~~~~~~~~ +# +# If the input speech is long, one can decode the emission in +# incremental manner. +# +# You need to first initialize the internal state of the decoder with +# :py:meth:`~torchaudio.models.decoder.CTCDecoder.decode_begin`. + +beam_search_decoder.decode_begin() + +###################################################################### +# Then, you can pass emissions to +# :py:meth:`~torchaudio.models.decoder.CTCDecoder.decode_begin`. +# Here we use the same emission but pass it to the decoder one frame +# at a time. + +for t in range(emission.size(1)): + beam_search_decoder.decode_step(emission[0, t:t + 1, :]) + +###################################################################### +# Finally, finalize the internal state of the decoder, and retrieve the +# result. + +beam_search_decoder.decode_end() +beam_search_result_inc = beam_search_decoder.get_final_hypothesis() + +###################################################################### +# The result of incremental decoding is identical to batch decoding. +# +beam_search_transcript_inc = " ".join(beam_search_result_inc[0].words).strip() +beam_search_wer_inc = torchaudio.functional.edit_distance( + actual_transcript, beam_search_result_inc[0].words) / len(actual_transcript) + +print(f"Transcript: {beam_search_transcript_inc}") +print(f"WER: {beam_search_wer_inc}") + +assert beam_search_result[0][0].words == beam_search_result_inc[0].words +assert beam_search_result[0][0].score == beam_search_result_inc[0].score +torch.testing.assert_close(beam_search_result[0][0].timesteps, beam_search_result_inc[0].timesteps) ###################################################################### # Timestep Alignments diff --git a/torchaudio/models/decoder/_ctc_decoder.py b/torchaudio/models/decoder/_ctc_decoder.py index 7fbb8769ab..4d45f12f52 100644 --- a/torchaudio/models/decoder/_ctc_decoder.py +++ b/torchaudio/models/decoder/_ctc_decoder.py @@ -261,10 +261,102 @@ def _get_timesteps(self, idxs: torch.IntTensor) -> torch.IntTensor: timesteps.append(i) return torch.IntTensor(timesteps) + def decode_begin(self): + """Initialize the internal state of the decoder. + + See :py:meth:`decode_step` for the usage. + + .. note:: + + This method is required only when performing online decoding. + It is not necessary when performing batch decoding with :py:meth:`__call__`. + """ + self.decoder.decode_begin() + + def decode_end(self): + """Finalize the internal state of the decoder. + + See :py:meth:`decode_step` for the usage. + + .. note:: + + This method is required only when performing online decoding. + It is not necessary when performing batch decoding with :py:meth:`__call__`. + """ + self.decoder.decode_end() + + def decode_step(self, emissions: torch.FloatTensor): + """Perform incremental decoding on top of the curent internal state. + + .. note:: + + This method is required only when performing online decoding. + It is not necessary when performing batch decoding with :py:meth:`__call__`. + + Args: + emissions (torch.FloatTensor): CPU tensor of shape `(frame, num_tokens)` storing sequences of + probability distribution over labels; output of acoustic model. + + Example: + >>> decoder = torchaudio.models.decoder.ctc_decoder(...) + >>> decoder.decode_begin() + >>> decoder.decode_step(emission1) + >>> decoder.decode_step(emission2) + >>> decoder.decode_end() + >>> result = decoder.get_final_hypothesis() + """ + if emissions.dtype != torch.float32: + raise ValueError("emissions must be float32.") + + if not emissions.is_cpu: + raise RuntimeError("emissions must be a CPU tensor.") + + if not emissions.is_contiguous(): + raise RuntimeError("emissions must be contiguous.") + + if emissions.ndim != 2: + raise RuntimeError(f"emissions must be 2D. Found {emissions.shape}") + + T, N = emissions.size() + self.decoder.decode_step(emissions.data_ptr(), T, N) + + def _to_hypo(self, results) -> List[CTCHypothesis]: + return [ + CTCHypothesis( + tokens=self._get_tokens(result.tokens), + words=[self.word_dict.get_entry(x) for x in result.words if x >= 0], + score=result.score, + timesteps=self._get_timesteps(result.tokens), + ) + for result in results + ] + + def get_final_hypothesis(self) -> List[CTCHypothesis]: + """Get the final hypothesis + + Returns: + List[CTCHypothesis]: + List of sorted best hypotheses. + + .. note:: + + This method is required only when performing online decoding. + It is not necessary when performing batch decoding with :py:meth:`__call__`. + """ + results = self.decoder.get_all_final_hypothesis() + return self._to_hypo(results[: self.nbest]) + def __call__( self, emissions: torch.FloatTensor, lengths: Optional[torch.Tensor] = None ) -> List[List[CTCHypothesis]]: """ + Performs batched offline decoding. + + .. note:: + + This method performs offline decoding in one go. To perform incremental decoding, + please refer to :py:meth:`decode_step`. + Args: emissions (torch.FloatTensor): CPU tensor of shape `(batch, frame, num_tokens)` storing sequences of probability distribution over labels; output of acoustic model. @@ -279,13 +371,16 @@ def __call__( if emissions.dtype != torch.float32: raise ValueError("emissions must be float32.") - if emissions.is_cuda: + if not emissions.is_cpu: raise RuntimeError("emissions must be a CPU tensor.") if not emissions.is_contiguous(): raise RuntimeError("emissions must be contiguous.") - if lengths is not None and lengths.is_cuda: + if emissions.ndim != 3: + raise RuntimeError(f"emissions must be 3D. Found {emissions.shape}") + + if lengths is not None and not lengths.is_cpu: raise RuntimeError("lengths must be a CPU tensor.") B, T, N = emissions.size() @@ -298,20 +393,7 @@ def __call__( for b in range(B): emissions_ptr = emissions.data_ptr() + float_bytes * b * emissions.stride(0) results = self.decoder.decode(emissions_ptr, lengths[b], N) - - nbest_results = results[: self.nbest] - hypos.append( - [ - CTCHypothesis( - tokens=self._get_tokens(result.tokens), - words=[self.word_dict.get_entry(x) for x in result.words if x >= 0], - score=result.score, - timesteps=self._get_timesteps(result.tokens), - ) - for result in nbest_results - ] - ) - + hypos.append(self._to_hypo(results[: self.nbest])) return hypos def idxs_to_tokens(self, idxs: torch.LongTensor) -> List: From 7d37f69c5244dda1a2ce3bc2df8788fcc1b65947 Mon Sep 17 00:00:00 2001 From: hwangjeff Date: Mon, 4 Sep 2023 11:50:49 -0700 Subject: [PATCH 10/13] Fix decoder call in Device ASR/AVSR tutorials (#3572) Summary: Fixes decoder calls and related code in Device ASR/AVSR tutorials to account for changes to RNN-T decoder introduced in https://github.com/pytorch/audio/issues/3295. Pull Request resolved: https://github.com/pytorch/audio/pull/3572 Reviewed By: mthrok Differential Revision: D48629428 Pulled By: hwangjeff fbshipit-source-id: 63ede307fb4412aa28f88972d56dca8405607b7a --- examples/tutorials/device_asr.py | 11 +++++------ examples/tutorials/device_avsr.py | 9 ++++----- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/examples/tutorials/device_asr.py b/examples/tutorials/device_asr.py index 281b596d8f..c78cfcfc61 100644 --- a/examples/tutorials/device_asr.py +++ b/examples/tutorials/device_asr.py @@ -206,16 +206,15 @@ def __init__(self, bundle: torchaudio.pipelines.RNNTBundle, beam_width: int = 10 self.beam_width = beam_width self.state = None - self.hypothesis = None + self.hypotheses = None def infer(self, segment: torch.Tensor) -> str: """Perform streaming inference""" features, length = self.feature_extractor(segment) - hypos, self.state = self.decoder.infer( - features, length, self.beam_width, state=self.state, hypothesis=self.hypothesis + self.hypotheses, self.state = self.decoder.infer( + features, length, self.beam_width, state=self.state, hypothesis=self.hypotheses ) - self.hypothesis = hypos[0] - transcript = self.token_processor(self.hypothesis[0], lstrip=False) + transcript = self.token_processor(self.hypotheses[0][0], lstrip=False) return transcript @@ -291,7 +290,7 @@ def infer(): chunk = q.get() segment = cacher(chunk[:, 0]) transcript = pipeline.infer(segment) - print(transcript, end="", flush=True) + print(transcript, end="\r", flush=True) import torch.multiprocessing as mp diff --git a/examples/tutorials/device_avsr.py b/examples/tutorials/device_avsr.py index 0bb7a5792d..cd013731a9 100644 --- a/examples/tutorials/device_avsr.py +++ b/examples/tutorials/device_avsr.py @@ -258,15 +258,14 @@ def __init__(self, preprocessor, model, decoder, token_processor): self.token_processor = token_processor self.state = None - self.hypothesis = None + self.hypotheses = None def forward(self, audio, video): audio, video = self.preprocessor(audio, video) feats = self.model(audio.unsqueeze(0), video.unsqueeze(0)) length = torch.tensor([feats.size(1)], device=audio.device) - hypos, self.state = self.decoder.infer(feats, length, 10, state=self.state, hypothesis=self.hypothesis) - self.hypothesis = hypos[0] - transcript = self.token_processor(self.hypothesis[0], lstrip=False) + self.hypotheses, self.state = self.decoder.infer(feats, length, 10, state=self.state, hypothesis=self.hypotheses) + transcript = self.token_processor(self.hypotheses[0][0], lstrip=False) return transcript @@ -370,7 +369,7 @@ def infer(): video, audio = cacher(video, audio) pipeline.state, pipeline.hypothesis = None, None transcript = pipeline(audio, video.float()) - print(transcript, end="", flush=True) + print(transcript, end="\r", flush=True) num_video_frames = 0 video_chunks = [] audio_chunks = [] From 454418d2a6da6ebd5ad85e9d4b1c09ea69531ed7 Mon Sep 17 00:00:00 2001 From: moto <855818+mthrok@users.noreply.github.com> Date: Mon, 4 Sep 2023 12:16:20 -0700 Subject: [PATCH 11/13] [BC-Breaking] Remove legacy global backend switch (#3559) Summary: This PR removes the legacy backend switch mechanism. The implementation itself is still available. Merge after v2.1 release Pull Request resolved: https://github.com/pytorch/audio/pull/3559 Reviewed By: nateanl Differential Revision: D48353764 Pulled By: mthrok fbshipit-source-id: 4d3924dbe6f334ecebe2b12fcd4591c61c4aa656 --- docs/source/torchaudio.rst | 27 ++---- .../torchaudio_unittest/backend/utils_test.py | 36 -------- torchaudio/__init__.py | 30 +++---- torchaudio/_backend/__init__.py | 23 +++-- torchaudio/backend/__init__.py | 5 -- torchaudio/backend/_common.py | 2 +- torchaudio/backend/utils.py | 84 ------------------- 7 files changed, 31 insertions(+), 176 deletions(-) delete mode 100644 test/torchaudio_unittest/backend/utils_test.py delete mode 100644 torchaudio/backend/utils.py diff --git a/docs/source/torchaudio.rst b/docs/source/torchaudio.rst index 9eeaae5c92..87abbc07ef 100644 --- a/docs/source/torchaudio.rst +++ b/docs/source/torchaudio.rst @@ -17,6 +17,7 @@ it easy to handle audio data. info load save + list_audio_backends .. _backend: @@ -91,30 +92,12 @@ please refer to https://github.com/pytorch/audio/issues/2950 * In 2.0, audio I/O backend dispatcher was introduced. Users can opt-in to using dispatcher by setting the environment variable ``TORCHAUDIO_USE_BACKEND_DISPATCHER=1``. -* In 2.1, the disptcher becomes the default mechanism for I/O. - Those who need to keep using the previous mechanism (global backend) can do - so by setting ``TORCHAUDIO_USE_BACKEND_DISPATCHER=0``. -* In 2.2, the legacy global backend mechanism will be removed. +* In 2.1, the disptcher became the default mechanism for I/O. +* In 2.2, the legacy global backend mechanism is removed. Utility functions :py:func:`get_audio_backend` and :py:func:`set_audio_backend` - become no-op. + became no-op. -Furthermore, we are removing file-like object support from libsox backend, as this +Furthermore, we removed file-like object support from libsox backend, as this is better supported by FFmpeg backend and makes the build process simpler. Therefore, beginning with 2.1, FFmpeg and Soundfile are the sole backends that support file-like objects. - -Backend Utilities ------------------ - -The following functions are effective only when backend dispatcher is disabled. - -Note that the changes in 2.1 marks :py:func:`get_audio_backend` and -:py:func:`set_audio_backend` deprecated. - -.. autosummary:: - :toctree: generated - :nosignatures: - - list_audio_backends - get_audio_backend - set_audio_backend diff --git a/test/torchaudio_unittest/backend/utils_test.py b/test/torchaudio_unittest/backend/utils_test.py deleted file mode 100644 index ea5416cf1f..0000000000 --- a/test/torchaudio_unittest/backend/utils_test.py +++ /dev/null @@ -1,36 +0,0 @@ -import torchaudio -from torchaudio_unittest import common_utils - - -class BackendSwitchMixin: - """Test set/get_audio_backend works""" - - backend = None - backend_module = None - - def test_switch(self): - torchaudio.backend.utils.set_audio_backend(self.backend) - if self.backend is None: - assert torchaudio.backend.utils.get_audio_backend() is None - else: - assert torchaudio.backend.utils.get_audio_backend() == self.backend - assert torchaudio.load == self.backend_module.load - assert torchaudio.save == self.backend_module.save - assert torchaudio.info == self.backend_module.info - - -class TestBackendSwitch_NoBackend(BackendSwitchMixin, common_utils.TorchaudioTestCase): - backend = None - backend_module = torchaudio.backend.no_backend - - -@common_utils.skipIfNoSox -class TestBackendSwitch_SoXIO(BackendSwitchMixin, common_utils.TorchaudioTestCase): - backend = "sox_io" - backend_module = torchaudio.backend.sox_io_backend - - -@common_utils.skipIfNoModule("soundfile") -class TestBackendSwitch_soundfile(BackendSwitchMixin, common_utils.TorchaudioTestCase): - backend = "soundfile" - backend_module = torchaudio.backend.soundfile_backend diff --git a/torchaudio/__init__.py b/torchaudio/__init__.py index 19cc69f6b6..a1ca98ac42 100644 --- a/torchaudio/__init__.py +++ b/torchaudio/__init__.py @@ -1,5 +1,10 @@ -from . import ( # noqa: F401 +# Initialize extension and backend first +from . import ( # noqa # usort: skip _extension, + _backend, +) +from . import ( # noqa: F401 + backend, # For BC compliance, datasets, functional, @@ -11,7 +16,7 @@ transforms, utils, ) -from ._backend.common import AudioMetaData # noqa +from ._backend import AudioMetaData, get_audio_backend, info, list_audio_backends, load, save, set_audio_backend try: from .version import __version__, git_version # noqa: F401 @@ -19,26 +24,11 @@ pass -def _is_backend_dispatcher_enabled(): - import os - - return os.getenv("TORCHAUDIO_USE_BACKEND_DISPATCHER", default="1") == "1" - - -if _is_backend_dispatcher_enabled(): - from ._backend import _init_backend, get_audio_backend, list_audio_backends, set_audio_backend -else: - from .backend import _init_backend, get_audio_backend, list_audio_backends, set_audio_backend - - -_init_backend() - -# for backward compatibility. This has to happen after _backend is imported. -from . import backend # noqa: F401 - - __all__ = [ "AudioMetaData", + "load", + "info", + "save", "io", "compliance", "datasets", diff --git a/torchaudio/_backend/__init__.py b/torchaudio/_backend/__init__.py index e4855c3bbb..2872a81320 100644 --- a/torchaudio/_backend/__init__.py +++ b/torchaudio/_backend/__init__.py @@ -1,16 +1,24 @@ from typing import List, Optional -import torchaudio from torchaudio._internal.module_utils import deprecated +from . import utils +from .common import AudioMetaData -# TODO: Once legacy global backend is removed, move this to torchaudio.__init__ -def _init_backend(): - from . import utils +__all__ = [ + "AudioMetaData", + "load", + "info", + "save", + "list_audio_backends", + "get_audio_backend", + "set_audio_backend", +] - torchaudio.info = utils.get_info_func() - torchaudio.load = utils.get_load_func() - torchaudio.save = utils.get_save_func() + +info = utils.get_info_func() +load = utils.get_load_func() +save = utils.get_save_func() def list_audio_backends() -> List[str]: @@ -24,7 +32,6 @@ def list_audio_backends() -> List[str]: - Dispatcher mode: ``"ffmpeg"``, ``"sox"`` and ``"soundfile"``. - Legacy backend mode: ``"sox_io"``, ``"soundfile"``. """ - from . import utils return list(utils.get_available_backends().keys()) diff --git a/torchaudio/backend/__init__.py b/torchaudio/backend/__init__.py index 8fa244f15b..9501a0f13b 100644 --- a/torchaudio/backend/__init__.py +++ b/torchaudio/backend/__init__.py @@ -3,11 +3,6 @@ # New things should be added to `torchaudio._backend`. # Only things related to backward compatibility should be placed here. -from .utils import _init_backend, get_audio_backend, list_audio_backends, set_audio_backend - - -__all__ = ["_init_backend", "get_audio_backend", "list_audio_backends", "set_audio_backend"] - def __getattr__(name: str): if name == "common": diff --git a/torchaudio/backend/_common.py b/torchaudio/backend/_common.py index 09a79d991c..72f9b875dd 100644 --- a/torchaudio/backend/_common.py +++ b/torchaudio/backend/_common.py @@ -7,7 +7,7 @@ def __getattr__(name: str): "`torchaudio.AudioMetaData`. Please update the import path.", stacklevel=2, ) - from torchaudio._backend.common import AudioMetaData + from torchaudio import AudioMetaData return AudioMetaData raise AttributeError(f"module {__name__!r} has no attribute {name!r}") diff --git a/torchaudio/backend/utils.py b/torchaudio/backend/utils.py deleted file mode 100644 index 2b94db483f..0000000000 --- a/torchaudio/backend/utils.py +++ /dev/null @@ -1,84 +0,0 @@ -"""Defines utilities for switching audio backends""" -import warnings -from typing import List, Optional - -import torchaudio -from torchaudio._backend import soundfile_backend -from torchaudio._internal import module_utils as _mod_utils - -from . import _no_backend as no_backend, _sox_io_backend as sox_io_backend - -__all__ = [ - "list_audio_backends", - "get_audio_backend", - "set_audio_backend", -] - - -def list_audio_backends() -> List[str]: - """List available backends - - Returns: - List[str]: The list of available backends. - """ - backends = [] - if _mod_utils.is_module_available("soundfile"): - backends.append("soundfile") - if torchaudio._extension._SOX_INITIALIZED: - backends.append("sox_io") - return backends - - -def set_audio_backend(backend: Optional[str]): - """Set the backend for I/O operation - - Args: - backend (str or None): Name of the backend. - One of ``"sox_io"`` or ``"soundfile"`` based on availability - of the system. If ``None`` is provided the current backend is unassigned. - """ - if backend is not None and backend not in list_audio_backends(): - raise RuntimeError(f'Backend "{backend}" is not one of ' f"available backends: {list_audio_backends()}.") - - if backend is None: - module = no_backend - elif backend == "sox_io": - module = sox_io_backend - elif backend == "soundfile": - module = soundfile_backend - else: - raise NotImplementedError(f'Unexpected backend "{backend}"') - - for func in ["save", "load", "info"]: - setattr(torchaudio, func, getattr(module, func)) - - -def _init_backend(): - warnings.warn( - "TorchAudio's global backend is now deprecated. " - "Please enable distpatcher by setting `TORCHAUDIO_USE_BACKEND_DISPATCHER=1`, " - "and specify backend when calling load/info/save function.", - stacklevel=3, - ) - backends = list_audio_backends() - if "sox_io" in backends: - set_audio_backend("sox_io") - elif "soundfile" in backends: - set_audio_backend("soundfile") - else: - set_audio_backend(None) - - -def get_audio_backend() -> Optional[str]: - """Get the name of the current backend - - Returns: - Optional[str]: The name of the current backend or ``None`` if no backend is assigned. - """ - if torchaudio.load == no_backend.load: - return None - if torchaudio.load == sox_io_backend.load: - return "sox_io" - if torchaudio.load == soundfile_backend.load: - return "soundfile" - raise ValueError("Unknown backend.") From 931598c1c9e5007291a8f4097825c4a20be47f33 Mon Sep 17 00:00:00 2001 From: moto <855818+mthrok@users.noreply.github.com> Date: Tue, 5 Sep 2023 07:50:32 -0700 Subject: [PATCH 12/13] Fix backward compatibility layer in backend module (#3595) Summary: The PR https://github.com/pytorch/audio/issues/3549 re-organized the backend implementations and deprecated the direct access to torchaudio.backend. The change was supposed to be BC-compatible while issuing a warning to users, but the implementation of module-level `__getattr__` was not quite right. See an issue https://github.com/pyannote/pyannote-audio/pull/1456. This commit fixes it so that the following imports work; ```python from torchaudio.backend.common import AudioMetaData from torchaudio.backend import sox_io_backend from torchaudio.backend.sox_io_backend import save, load, info from torchaudio.backend import no_backend from torchaudio.backend.no_backend import save, load, info from torchaudio.backend import soundfile_backend from torchaudio.backend.soundfile_backend import save, load, info ``` Pull Request resolved: https://github.com/pytorch/audio/pull/3595 Reviewed By: nateanl Differential Revision: D48957446 Pulled By: mthrok fbshipit-source-id: ebb256461dd3032025fd27d0455ce980888f7778 --- .../backend/soundfile/info_test.py | 1 - torchaudio/__init__.py | 18 +++++++--- torchaudio/backend/__init__.py | 33 ++----------------- torchaudio/backend/_no_backend.py | 3 +- torchaudio/backend/{_common.py => common.py} | 4 +-- torchaudio/backend/no_backend.py | 14 ++++++++ torchaudio/backend/soundfile_backend.py | 14 ++++++++ torchaudio/backend/sox_io_backend.py | 14 ++++++++ 8 files changed, 61 insertions(+), 40 deletions(-) rename torchaudio/backend/{_common.py => common.py} (94%) create mode 100644 torchaudio/backend/no_backend.py create mode 100644 torchaudio/backend/soundfile_backend.py create mode 100644 torchaudio/backend/sox_io_backend.py diff --git a/test/torchaudio_unittest/backend/soundfile/info_test.py b/test/torchaudio_unittest/backend/soundfile/info_test.py index a7c1708ac2..a9acec6f05 100644 --- a/test/torchaudio_unittest/backend/soundfile/info_test.py +++ b/test/torchaudio_unittest/backend/soundfile/info_test.py @@ -117,7 +117,6 @@ class MockSoundFileInfo: with patch("soundfile.info", _mock_info_func): with warnings.catch_warnings(record=True) as w: info = soundfile_backend.info("foo") - assert len(w) == 1 assert "UNSEEN_SUBTYPE subtype is unknown to TorchAudio" in str(w[-1].message) assert info.bits_per_sample == 0 diff --git a/torchaudio/__init__.py b/torchaudio/__init__.py index a1ca98ac42..90b411eae1 100644 --- a/torchaudio/__init__.py +++ b/torchaudio/__init__.py @@ -1,10 +1,16 @@ # Initialize extension and backend first -from . import ( # noqa # usort: skip - _extension, - _backend, +from . import _extension # noqa # usort: skip +from ._backend import ( # noqa # usort: skip + AudioMetaData, + get_audio_backend, + info, + list_audio_backends, + load, + save, + set_audio_backend, ) + from . import ( # noqa: F401 - backend, # For BC compliance, datasets, functional, @@ -16,7 +22,9 @@ transforms, utils, ) -from ._backend import AudioMetaData, get_audio_backend, info, list_audio_backends, load, save, set_audio_backend + +# For BC +from . import backend # noqa # usort: skip try: from .version import __version__, git_version # noqa: F401 diff --git a/torchaudio/backend/__init__.py b/torchaudio/backend/__init__.py index 9501a0f13b..84df7e7d69 100644 --- a/torchaudio/backend/__init__.py +++ b/torchaudio/backend/__init__.py @@ -3,35 +3,6 @@ # New things should be added to `torchaudio._backend`. # Only things related to backward compatibility should be placed here. +from . import common, no_backend, soundfile_backend, sox_io_backend # noqa -def __getattr__(name: str): - if name == "common": - from . import _common - - return _common - - if name in ["no_backend", "sox_io_backend", "soundfile_backend"]: - import warnings - - warnings.warn( - "Torchaudio's I/O functions now support par-call bakcend dispatch. " - "Importing backend implementation directly is no longer guaranteed to work. " - "Please use `backend` keyword with load/save/info function, instead of " - "calling the udnerlying implementation directly.", - stacklevel=2, - ) - - if name == "sox_io_backend": - from . import _sox_io_backend - - return _sox_io_backend - if name == "soundfile_backend": - from torchaudio._backend import soundfile_backend - - return soundfile_backend - - if name == "no_backend": - from . import _no_backend - - return _no_backend - raise AttributeError(f"module {__name__!r} has no attribute {name!r}") +__all__ = [] diff --git a/torchaudio/backend/_no_backend.py b/torchaudio/backend/_no_backend.py index 5ebeb38708..fcbb2ad84a 100644 --- a/torchaudio/backend/_no_backend.py +++ b/torchaudio/backend/_no_backend.py @@ -2,6 +2,7 @@ from typing import Callable, Optional, Tuple, Union from torch import Tensor +from torchaudio import AudioMetaData def load( @@ -20,5 +21,5 @@ def save(filepath: str, src: Tensor, sample_rate: int, precision: int = 16, chan raise RuntimeError("No audio I/O backend is available.") -def info(filepath: str) -> None: +def info(filepath: str) -> AudioMetaData: raise RuntimeError("No audio I/O backend is available.") diff --git a/torchaudio/backend/_common.py b/torchaudio/backend/common.py similarity index 94% rename from torchaudio/backend/_common.py rename to torchaudio/backend/common.py index 72f9b875dd..3f736bf401 100644 --- a/torchaudio/backend/_common.py +++ b/torchaudio/backend/common.py @@ -1,7 +1,7 @@ def __getattr__(name: str): - import warnings - if name == "AudioMetaData": + import warnings + warnings.warn( "`torchaudio.backend.common.AudioMetaData` has been moved to " "`torchaudio.AudioMetaData`. Please update the import path.", diff --git a/torchaudio/backend/no_backend.py b/torchaudio/backend/no_backend.py new file mode 100644 index 0000000000..2645a86bc8 --- /dev/null +++ b/torchaudio/backend/no_backend.py @@ -0,0 +1,14 @@ +def __getattr__(name: str): + import warnings + + warnings.warn( + "Torchaudio's I/O functions now support par-call bakcend dispatch. " + "Importing backend implementation directly is no longer guaranteed to work. " + "Please use `backend` keyword with load/save/info function, instead of " + "calling the udnerlying implementation directly.", + stacklevel=2, + ) + + from . import _no_backend + + return getattr(_no_backend, name) diff --git a/torchaudio/backend/soundfile_backend.py b/torchaudio/backend/soundfile_backend.py new file mode 100644 index 0000000000..5e81db372a --- /dev/null +++ b/torchaudio/backend/soundfile_backend.py @@ -0,0 +1,14 @@ +def __getattr__(name: str): + import warnings + + warnings.warn( + "Torchaudio's I/O functions now support par-call bakcend dispatch. " + "Importing backend implementation directly is no longer guaranteed to work. " + "Please use `backend` keyword with load/save/info function, instead of " + "calling the udnerlying implementation directly.", + stacklevel=2, + ) + + from torchaudio._backend import soundfile_backend + + return getattr(soundfile_backend, name) diff --git a/torchaudio/backend/sox_io_backend.py b/torchaudio/backend/sox_io_backend.py new file mode 100644 index 0000000000..a361ab87a5 --- /dev/null +++ b/torchaudio/backend/sox_io_backend.py @@ -0,0 +1,14 @@ +def __getattr__(name: str): + import warnings + + warnings.warn( + "Torchaudio's I/O functions now support par-call bakcend dispatch. " + "Importing backend implementation directly is no longer guaranteed to work. " + "Please use `backend` keyword with load/save/info function, instead of " + "calling the udnerlying implementation directly.", + stacklevel=2, + ) + + from . import _sox_io_backend + + return getattr(_sox_io_backend, name) From ede4309a85733a78c4a9a270916fbac7fae957d8 Mon Sep 17 00:00:00 2001 From: moto <855818+mthrok@users.noreply.github.com> Date: Tue, 5 Sep 2023 10:29:12 -0700 Subject: [PATCH 13/13] Update FFmpeg arm64 CI (#3578) Summary: Pull Request resolved: https://github.com/pytorch/audio/pull/3578 Reviewed By: nateanl Differential Revision: D48971591 Pulled By: mthrok fbshipit-source-id: a76cf8f1287c076d7b6919d901eda0ce832bdb7d --- .github/workflows/ffmpeg.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/ffmpeg.yml b/.github/workflows/ffmpeg.yml index 956ce4ef55..6ec4237c63 100644 --- a/.github/workflows/ffmpeg.yml +++ b/.github/workflows/ffmpeg.yml @@ -44,8 +44,7 @@ jobs: job-name: Build upload-artifact: ffmpeg-lgpl repository: pytorch/audio - runner: "linux.t4g.2xlarge" - no-sudo: true + runner: linux.arm64.2xlarge docker-image: pytorch/manylinuxaarch64-builder:cpu-aarch64 script: | export FFMPEG_VERSION="${{ matrix.ffmpeg_version }}"