From 47eaab4dc97b553443628a6a2f04cada2829f5a6 Mon Sep 17 00:00:00 2001
From: pbialecki <piotr.bialecki@hotmail.de>
Date: Wed, 23 Aug 2023 11:27:16 -0700
Subject: [PATCH 01/13] update CUDA to 12.1 U1 (#3563)

Summary:
Follow-up of: https://github.com/pytorch/builder/pull/1485

CC atalman

Pull Request resolved: https://github.com/pytorch/audio/pull/3563

Reviewed By: kit1980

Differential Revision: D48610200

Pulled By: atalman

fbshipit-source-id: 61c9981da5a343a3cbce97b0a77ab91f37560087
---
 packaging/windows/internal/cuda_install.bat | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packaging/windows/internal/cuda_install.bat b/packaging/windows/internal/cuda_install.bat
index 6c86834f23..facf186937 100644
--- a/packaging/windows/internal/cuda_install.bat
+++ b/packaging/windows/internal/cuda_install.bat
@@ -57,7 +57,7 @@ goto cuda_common
 
 :cuda121
 
-set CUDA_INSTALL_EXE=cuda_12.1.0_531.14_windows.exe
+set CUDA_INSTALL_EXE=cuda_12.1.1_531.14_windows.exe
 if not exist "%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%" (
     curl -k -L "https://ossci-windows.s3.amazonaws.com/%CUDA_INSTALL_EXE%" --output "%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%"
     if errorlevel 1 exit /b 1

From 5ee254e350289ee2951cc61a1fdc3b361c48a265 Mon Sep 17 00:00:00 2001
From: moto <855818+mthrok@users.noreply.github.com>
Date: Mon, 28 Aug 2023 23:51:02 -0700
Subject: [PATCH 02/13] Remove random print statement (#3577)

Summary: Pull Request resolved: https://github.com/pytorch/audio/pull/3577

Reviewed By: atalman

Differential Revision: D48763580

Pulled By: mthrok

fbshipit-source-id: 6ab155a5dd4cf11b2a58f26ced369107f0a2f08f
---
 torchaudio/_backend/ffmpeg.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/torchaudio/_backend/ffmpeg.py b/torchaudio/_backend/ffmpeg.py
index 986367fecd..a50df07be4 100644
--- a/torchaudio/_backend/ffmpeg.py
+++ b/torchaudio/_backend/ffmpeg.py
@@ -241,7 +241,6 @@ def _type(spec):
             sample_fmt = _get_flac_sample_fmt(bps)
         if _type("ogg"):
             sample_fmt = _get_flac_sample_fmt(bps)
-    print(ext, format, encoding, bps, "===>", muxer, encoder, sample_fmt)
     return muxer, encoder, sample_fmt
 
 

From 6fb68544e758eeceacebf6e215253187dcaf9983 Mon Sep 17 00:00:00 2001
From: Omkar Salpekar <osalpekar@fb.com>
Date: Tue, 29 Aug 2023 16:43:44 -0700
Subject: [PATCH 03/13] Separate Test Token for Conda Uploads (#3582)

Summary:
We will use a separate token for uploading test binaries (instead of reusing the nightly token). This PR adds that token to the caller workflow.

Pull Request resolved: https://github.com/pytorch/audio/pull/3582

Reviewed By: atalman

Differential Revision: D48803009

Pulled By: osalpekar

fbshipit-source-id: c2af57f6946da51a7b56c975614e60f243e3f6fb
---
 .github/workflows/build-conda-m1.yml      | 1 +
 .github/workflows/build-conda-windows.yml | 1 +
 .github/workflows/build_conda_linux.yml   | 1 +
 .github/workflows/build_conda_macos.yml   | 1 +
 4 files changed, 4 insertions(+)

diff --git a/.github/workflows/build-conda-m1.yml b/.github/workflows/build-conda-m1.yml
index 15c1ca895f..7e72979ca1 100644
--- a/.github/workflows/build-conda-m1.yml
+++ b/.github/workflows/build-conda-m1.yml
@@ -50,3 +50,4 @@ jobs:
       trigger-event: ${{ github.event_name }}
     secrets:
       CONDA_PYTORCHBOT_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
+      CONDA_PYTORCHBOT_TOKEN_TEST: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
diff --git a/.github/workflows/build-conda-windows.yml b/.github/workflows/build-conda-windows.yml
index 9781b40f41..ab652629bc 100644
--- a/.github/workflows/build-conda-windows.yml
+++ b/.github/workflows/build-conda-windows.yml
@@ -45,3 +45,4 @@ jobs:
       trigger-event: ${{ github.event_name }}
     secrets:
       CONDA_PYTORCHBOT_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
+      CONDA_PYTORCHBOT_TOKEN_TEST: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
diff --git a/.github/workflows/build_conda_linux.yml b/.github/workflows/build_conda_linux.yml
index 4245c4f08d..8fbf25bd80 100644
--- a/.github/workflows/build_conda_linux.yml
+++ b/.github/workflows/build_conda_linux.yml
@@ -49,3 +49,4 @@ jobs:
       trigger-event: ${{ github.event_name }}
     secrets:
       CONDA_PYTORCHBOT_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
+      CONDA_PYTORCHBOT_TOKEN_TEST: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
diff --git a/.github/workflows/build_conda_macos.yml b/.github/workflows/build_conda_macos.yml
index b51175f898..2fa598d9da 100644
--- a/.github/workflows/build_conda_macos.yml
+++ b/.github/workflows/build_conda_macos.yml
@@ -50,3 +50,4 @@ jobs:
       trigger-event: ${{ github.event_name }}
     secrets:
       CONDA_PYTORCHBOT_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
+      CONDA_PYTORCHBOT_TOKEN_TEST: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}

From bf77b2a032b4b7ce0e0c03ab6c66881cc83c62c9 Mon Sep 17 00:00:00 2001
From: Omkar Salpekar <osalpekar@fb.com>
Date: Wed, 30 Aug 2023 13:08:30 -0700
Subject: [PATCH 04/13] Bump Version 2.1 -> 2.2 (#3585)

Summary: Pull Request resolved: https://github.com/pytorch/audio/pull/3585

Reviewed By: atalman

Differential Revision: D48832146

Pulled By: osalpekar

fbshipit-source-id: c36dd79a0c0f407d490a488902207c8066c01383
---
 version.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/version.txt b/version.txt
index ecaf4eea7c..887948350c 100644
--- a/version.txt
+++ b/version.txt
@@ -1 +1 @@
-2.1.0a0
+2.2.0a0

From 5cf7d2dbd231918c3f260f220459c9914914ac7b Mon Sep 17 00:00:00 2001
From: atalman <atalman@fb.com>
Date: Wed, 30 Aug 2023 15:10:48 -0700
Subject: [PATCH 05/13] Revert "Enable ROCm RNN-T Loss (#2485)" (#3586)

Summary:
This reverts commit c5939616ddc17093747e896db6012b1f63792627.

Unblock 2.1.0 rc

Pull Request resolved: https://github.com/pytorch/audio/pull/3586

Reviewed By: osalpekar

Differential Revision: D48842032

Pulled By: atalman

fbshipit-source-id: bbdf9e45c9aa5fde00f315a2ff491ed050bc1707
---
 .gitmodules                                   |  3 -
 CMakeLists.txt                                | 10 ----
 third_party/hipify_torch                      |  1 -
 torchaudio/csrc/CMakeLists.txt                | 55 +------------------
 torchaudio/csrc/rnnt/gpu/compute.cu           |  4 --
 torchaudio/csrc/rnnt/gpu/compute_alphas.cu    |  4 --
 torchaudio/csrc/rnnt/gpu/compute_betas.cu     |  4 --
 torchaudio/csrc/rnnt/gpu/gpu_kernel_utils.cuh | 12 ----
 torchaudio/csrc/rnnt/gpu/gpu_kernels.cuh      | 22 --------
 torchaudio/csrc/rnnt/gpu/gpu_transducer.h     |  5 --
 torchaudio/csrc/rnnt/gpu/kernel_utils.h       |  4 --
 torchaudio/csrc/rnnt/gpu/kernels.h            |  5 --
 torchaudio/csrc/rnnt/macros.h                 |  8 ---
 torchaudio/csrc/rnnt/options.h                |  9 +--
 torchaudio/csrc/rnnt/workspace.h              | 16 +-----
 15 files changed, 5 insertions(+), 157 deletions(-)
 delete mode 160000 third_party/hipify_torch

diff --git a/.gitmodules b/.gitmodules
index 25d307cea8..e69de29bb2 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +0,0 @@
-[submodule "third_party/hipify_torch"]
-	path = third_party/hipify_torch
-	url = https://github.com/ROCmSoftwarePlatform/hipify_torch
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d955c5da58..f0195c87b4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -76,11 +76,6 @@ if(USE_ROCM)
   if(NOT PYTORCH_FOUND_HIP)
     set(USE_ROCM OFF)
   endif()
-
-  if(CMAKE_VERSION VERSION_LESS 3.21.0)
-    message("Need at least CMake 3.21.0 to compile ROCm support.")
-    set(USE_ROCM OFF)
-  endif()
 endif()
 
 if(USE_CUDA)
@@ -95,11 +90,6 @@ if(USE_CUDA)
   )
 endif()
 
-if(USE_ROCM)
-  enable_language(HIP)
-endif()
-
-find_package(Torch REQUIRED)
 include(cmake/TorchAudioHelper.cmake)
 
 # https://github.com/pytorch/pytorch/issues/54174
diff --git a/third_party/hipify_torch b/third_party/hipify_torch
deleted file mode 160000
index 083ff9b50c..0000000000
--- a/third_party/hipify_torch
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 083ff9b50c7ed861f7f6eddd983cdedb72e8b964
diff --git a/torchaudio/csrc/CMakeLists.txt b/torchaudio/csrc/CMakeLists.txt
index f05534f647..fc0c549493 100644
--- a/torchaudio/csrc/CMakeLists.txt
+++ b/torchaudio/csrc/CMakeLists.txt
@@ -1,29 +1,6 @@
 ################################################################################
 # libtorchaudio
 ################################################################################
-
-if(USE_ROCM)
-  list (APPEND CMAKE_PREFIX_PATH /opt/rocm/hip /opt/rocm)
-  FIND_PACKAGE(HIP REQUIRED)
-  MESSAGE(STATUS "hip found ${ROCM_FOUND}")
-
-  list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/third_party/hipify_torch/cmake")
-  include(Hipify)
-
-  set(CMAKE_CXX_COMPILER ${HIP_HIPCC_EXECUTABLE})
-  set(CMAKE_CXX_LINKER ${HIP_HIPCC_EXECUTABLE})
-  set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
-  list( APPEND CMAKE_INSTALL_RPATH "/opt/rocm/llvm/lib" )
-  set(OPENMP_LIBRARIES "/opt/rocm/llvm/lib/")
-  set(OpenMP_CXX "${CMAKE_CXX_COMPILER}")
-  set(OpenMP_CXX_FLAGS "-fopenmp=libomp")
-  #set(OpenMP_CXX_LIB_NAMES "omp")
-  set(OpenMP_omp_LIBRARY omp)
-  find_package(OpenMP REQUIRED)
-
-endif()
-
-
 set(
   sources
   lfilter.cpp
@@ -62,37 +39,6 @@ if(BUILD_RNNT)
       rnnt/gpu/compute.cu
       )
   endif()
-
-  if (USE_ROCM)
-    hipify(CUDA_SOURCE_DIR ${PROJECT_SOURCE_DIR}/torchaudio/csrc/rnnt/gpu HIP_SOURCE_DIR ${PROJECT_SOURCE_DIR}/torchaudio/csrc/rnnt/hip)
-    if ( NOT HIP_ADD_LIBRARY_FOUND )
-      list(APPEND CMAKE_MODULE_PATH /opt/rocm/hip/cmake)
-      find_package(HIP REQUIRED)
-    endif()
-
-    list(
-      APPEND
-      sources 
-      rnnt/hip/compute_alphas.hip
-      rnnt/hip/compute_betas.hip
-      rnnt/hip/compute.hip
-      )
-  endif()
-endif()
-
-if(USE_ROCM)
-  list(
-    APPEND
-    additional_libs 
-    hip::host
-    hip::device
-    /opt/rocm/llvm/lib/libomp.so
-    )
-  list(
-    APPEND
-    compile_definitions 
-    USE_ROCM
-  )
 endif()
 
 if(BUILD_RIR)
@@ -141,6 +87,7 @@ endif()
 #------------------------------------------------------------------------------#
 # END OF CUSTOMIZATION LOGICS
 #------------------------------------------------------------------------------#
+
 torchaudio_library(
   libtorchaudio
   "${sources}"
diff --git a/torchaudio/csrc/rnnt/gpu/compute.cu b/torchaudio/csrc/rnnt/gpu/compute.cu
index 3303053822..a6d389bf0b 100644
--- a/torchaudio/csrc/rnnt/gpu/compute.cu
+++ b/torchaudio/csrc/rnnt/gpu/compute.cu
@@ -1,10 +1,6 @@
 #include <c10/cuda/CUDAStream.h>
 #include <torch/types.h>
-#ifdef __HIP_PLATFORM_AMD__
-#include <torchaudio/csrc/rnnt/hip/gpu_transducer_hip.h>
-#else
 #include <torchaudio/csrc/rnnt/gpu/gpu_transducer.h>
-#endif
 
 namespace torchaudio {
 namespace rnnt {
diff --git a/torchaudio/csrc/rnnt/gpu/compute_alphas.cu b/torchaudio/csrc/rnnt/gpu/compute_alphas.cu
index 22706f670d..918d442bf0 100644
--- a/torchaudio/csrc/rnnt/gpu/compute_alphas.cu
+++ b/torchaudio/csrc/rnnt/gpu/compute_alphas.cu
@@ -1,10 +1,6 @@
 #include <c10/cuda/CUDAStream.h>
 #include <torch/types.h>
-#ifdef __HIP_PLATFORM_AMD__
-#include <torchaudio/csrc/rnnt/hip/gpu_transducer_hip.h>
-#else
 #include <torchaudio/csrc/rnnt/gpu/gpu_transducer.h>
-#endif
 
 namespace torchaudio {
 namespace rnnt {
diff --git a/torchaudio/csrc/rnnt/gpu/compute_betas.cu b/torchaudio/csrc/rnnt/gpu/compute_betas.cu
index d2a6134181..e1e4c1d90e 100644
--- a/torchaudio/csrc/rnnt/gpu/compute_betas.cu
+++ b/torchaudio/csrc/rnnt/gpu/compute_betas.cu
@@ -1,10 +1,6 @@
 #include <c10/cuda/CUDAStream.h>
 #include <torch/types.h>
-#ifdef __HIP_PLATFORM_AMD__
-#include <torchaudio/csrc/rnnt/hip/gpu_transducer_hip.h>
-#else
 #include <torchaudio/csrc/rnnt/gpu/gpu_transducer.h>
-#endif
 
 namespace torchaudio {
 namespace rnnt {
diff --git a/torchaudio/csrc/rnnt/gpu/gpu_kernel_utils.cuh b/torchaudio/csrc/rnnt/gpu/gpu_kernel_utils.cuh
index cb3c615770..e5f1cfc2df 100644
--- a/torchaudio/csrc/rnnt/gpu/gpu_kernel_utils.cuh
+++ b/torchaudio/csrc/rnnt/gpu/gpu_kernel_utils.cuh
@@ -2,11 +2,7 @@
 
 #ifdef USE_CUDA
 
-#ifdef __HIP_PLATFORM_AMD__
-#include <torchaudio/csrc/rnnt/hip/math_hip.cuh>
-#else
 #include <torchaudio/csrc/rnnt/gpu/math.cuh>
-#endif
 
 namespace torchaudio {
 namespace rnnt {
@@ -43,11 +39,7 @@ __global__ void ReduceMax2D(
 
   CAST_DTYPE shf;
   for (int stride = (WARP_SIZE >> 1); stride > 0; stride >>= 1) {
-#ifdef __HIP_PLATFORM_AMD__
-    shf = __shfl_down(val, stride);
-#else
     shf = __shfl_down_sync(0xFFFFFFFF, val, stride);
-#endif
     if (threadIdx.x < stride && threadIdx.x + stride < dim) {
       if (shf > val) {
         val = shf;
@@ -89,11 +81,7 @@ __global__ void ReduceLogSumExpGivenMax2D(
 
   CAST_DTYPE shf;
   for (int stride = (WARP_SIZE >> 1); stride > 0; stride >>= 1) {
-#ifdef __HIP_PLATFORM_AMD__
-    shf = __shfl_down(val, stride);
-#else
     shf = __shfl_down_sync(0xFFFFFFFF, val, stride);
-#endif
     if (threadIdx.x < stride && threadIdx.x + stride < dim) {
       val = val + shf;
     }
diff --git a/torchaudio/csrc/rnnt/gpu/gpu_kernels.cuh b/torchaudio/csrc/rnnt/gpu/gpu_kernels.cuh
index 2b7ef45df3..4ba04b68fc 100644
--- a/torchaudio/csrc/rnnt/gpu/gpu_kernels.cuh
+++ b/torchaudio/csrc/rnnt/gpu/gpu_kernels.cuh
@@ -4,15 +4,9 @@
 
 #include <cassert>
 
-#ifdef __HIP_PLATFORM_AMD__
-#include <torchaudio/csrc/rnnt/hip/kernel_utils.h>
-#include <torchaudio/csrc/rnnt/hip/kernels.h>
-#include <torchaudio/csrc/rnnt/hip/math_hip.cuh>
-#else
 #include <torchaudio/csrc/rnnt/gpu/kernel_utils.h>
 #include <torchaudio/csrc/rnnt/gpu/kernels.h>
 #include <torchaudio/csrc/rnnt/gpu/math.cuh>
-#endif
 
 namespace torchaudio {
 namespace rnnt {
@@ -132,11 +126,7 @@ __device__ void ComputeAlphas(
 
 #pragma unroll
     for (int i = 1; i < warpSize; i <<= 1) {
-#ifdef __HIP_PLATFORM_AMD__
-      val = __shfl_up(skip_prob, i);
-#else
       val = __shfl_up_sync(0xffffffff, skip_prob, i);
-#endif
       if (i <= threadIdx.x) {
         skip_prob = skip_prob + val;
       }
@@ -160,11 +150,7 @@ __device__ void ComputeAlphas(
     CAST_DTYPE out = val;
 
     for (int i = 1; i < warpSize; ++i) {
-#ifdef __HIP_PLATFORM_AMD__
-      val = __shfl_up(val, 1);
-#else
       val = __shfl_up_sync(0xffffffff, val, 1);
-#endif
       if (i == threadIdx.x) {
         val = math::lse(val + skip_prob, emit);
         out = val;
@@ -239,11 +225,7 @@ __device__ void ComputeBetasCosts(
 
 #pragma unroll
     for (int i = 1; i < warpSize; i <<= 1) {
-#ifdef __HIP_PLATFORM_AMD__
-      val = __shfl_up(skip_prob, i);
-#else
       val = __shfl_up_sync(0xffffffff, skip_prob, i);
-#endif
       if (i <= threadIdx.x) {
         skip_prob = skip_prob + val;
       }
@@ -266,11 +248,7 @@ __device__ void ComputeBetasCosts(
     CAST_DTYPE out = val;
 
     for (int i = 1; i < warpSize; ++i) {
-#ifdef __HIP_PLATFORM_AMD__
-      val = __shfl_up(val, 1);
-#else
       val = __shfl_up_sync(0xffffffff, val, 1);
-#endif
       if (i == threadIdx.x) {
         val = math::lse(val + skip_prob, emit);
         out = val;
diff --git a/torchaudio/csrc/rnnt/gpu/gpu_transducer.h b/torchaudio/csrc/rnnt/gpu/gpu_transducer.h
index 32a731bafd..72759b39f4 100644
--- a/torchaudio/csrc/rnnt/gpu/gpu_transducer.h
+++ b/torchaudio/csrc/rnnt/gpu/gpu_transducer.h
@@ -3,13 +3,8 @@
 #ifdef USE_CUDA
 
 #include <torchaudio/csrc/rnnt/workspace.h>
-#ifdef __HIP_PLATFORM_AMD__
-#include <torchaudio/csrc/rnnt/hip/gpu_kernel_utils_hip.cuh>
-#include <torchaudio/csrc/rnnt/hip/gpu_kernels_hip.cuh>
-#else
 #include <torchaudio/csrc/rnnt/gpu/gpu_kernel_utils.cuh>
 #include <torchaudio/csrc/rnnt/gpu/gpu_kernels.cuh>
-#endif
 
 namespace torchaudio {
 namespace rnnt {
diff --git a/torchaudio/csrc/rnnt/gpu/kernel_utils.h b/torchaudio/csrc/rnnt/gpu/kernel_utils.h
index 68136fcfa3..3b2989b073 100644
--- a/torchaudio/csrc/rnnt/gpu/kernel_utils.h
+++ b/torchaudio/csrc/rnnt/gpu/kernel_utils.h
@@ -2,11 +2,7 @@
 
 #include <cassert>
 
-#ifdef __HIP_PLATFORM_AMD__
-#include <torchaudio/csrc/rnnt/hip/math_hip.cuh>
-#else
 #include <torchaudio/csrc/rnnt/gpu/math.cuh>
-#endif
 
 namespace torchaudio {
 namespace rnnt {
diff --git a/torchaudio/csrc/rnnt/gpu/kernels.h b/torchaudio/csrc/rnnt/gpu/kernels.h
index d22443fecb..db8bb5092b 100644
--- a/torchaudio/csrc/rnnt/gpu/kernels.h
+++ b/torchaudio/csrc/rnnt/gpu/kernels.h
@@ -2,13 +2,8 @@
 
 #include <cassert>
 
-#ifdef __HIP_PLATFORM_AMD__
-#include <torchaudio/csrc/rnnt/hip/kernel_utils.h>
-#include <torchaudio/csrc/rnnt/hip/math_hip.cuh>
-#else
 #include <torchaudio/csrc/rnnt/gpu/kernel_utils.h>
 #include <torchaudio/csrc/rnnt/gpu/math.cuh>
-#endif
 
 namespace torchaudio {
 namespace rnnt {
diff --git a/torchaudio/csrc/rnnt/macros.h b/torchaudio/csrc/rnnt/macros.h
index e569d26241..abcbc39966 100644
--- a/torchaudio/csrc/rnnt/macros.h
+++ b/torchaudio/csrc/rnnt/macros.h
@@ -8,14 +8,6 @@
 #define FORCE_INLINE __forceinline__
 #include <cuda_fp16.h>
 #include <cuda_runtime.h>
-#elif USE_ROCM
-#define WARP_SIZE 32
-#define MAX_THREADS_PER_BLOCK 1024
-#define REDUCE_THREADS 256
-#define HOST_AND_DEVICE __host__ __device__
-#define FORCE_INLINE __forceinline__
-#include <hip/hip_fp16.h>
-#include <hip/hip_runtime.h>
 #else
 #define HOST_AND_DEVICE
 #define FORCE_INLINE inline
diff --git a/torchaudio/csrc/rnnt/options.h b/torchaudio/csrc/rnnt/options.h
index ecf0714a3c..f70a3c8c07 100644
--- a/torchaudio/csrc/rnnt/options.h
+++ b/torchaudio/csrc/rnnt/options.h
@@ -4,12 +4,7 @@
 
 #ifdef USE_CUDA
 #include <cuda_runtime.h>
-typedef cudaStream_t gpuStream_t;
 #endif // USE_CUDA
-#ifdef USE_ROCM
-#include <hip/hip_runtime.h>
-typedef hipStream_t gpuStream_t;
-#endif // USE_ROCM
 
 #include <torchaudio/csrc/rnnt/macros.h>
 #include <torchaudio/csrc/rnnt/types.h>
@@ -20,9 +15,9 @@ namespace rnnt {
 typedef struct Options {
   // the device to compute transducer loss.
   device_t device_;
-#if defined(USE_CUDA) || defined(USE_ROCM)
+#ifdef USE_CUDA
   // the stream to launch kernels in when using GPU.
-  gpuStream_t stream_;
+  cudaStream_t stream_;
 #endif
   // The maximum number of threads that can be used.
   int numThreads_;
diff --git a/torchaudio/csrc/rnnt/workspace.h b/torchaudio/csrc/rnnt/workspace.h
index 14ae0047ba..e833ef1cdf 100644
--- a/torchaudio/csrc/rnnt/workspace.h
+++ b/torchaudio/csrc/rnnt/workspace.h
@@ -131,22 +131,10 @@ class IntWorkspace {
           ComputeSizeForBetaCounters(options_) * sizeof(int));
     }
 #endif // USE_CUDA
-#ifdef USE_ROCM
-    if (data_ != nullptr && options_.device_ == GPU) {
-      hipMemset(
-          GetPointerToAlphaCounters(),
-          0,
-          ComputeSizeForAlphaCounters(options_) * sizeof(int));
-      hipMemset(
-          GetPointerToBetaCounters(),
-          0,
-          ComputeSizeForBetaCounters(options_) * sizeof(int));
-    }
-#endif // USE_ROCM
   }
 
   static int ComputeSizeForAlphaCounters(const Options& options) { // B * U
-#if defined(USE_CUDA) || defined(USE_ROCM)
+#ifdef USE_CUDA
     if (options.device_ == GPU) {
       return options.BU();
     } else {
@@ -157,7 +145,7 @@ class IntWorkspace {
 #endif // USE_CUDA
   }
   static int ComputeSizeForBetaCounters(const Options& options) { // B * U
-#if defined(USE_CUDA) || defined(USE_ROCM)
+#ifdef USE_CUDA
     if (options.device_ == GPU) {
       return options.BU();
     } else {

From af79867623c61b647d101139ede26b9f4398fcf8 Mon Sep 17 00:00:00 2001
From: Omkar Salpekar <osalpekar@fb.com>
Date: Fri, 1 Sep 2023 16:03:43 -0700
Subject: [PATCH 06/13] Automate Release-Only Changes (#3590)

Summary:
First Pass for automating release-only changes.

Tested this in audio to ensure it works.

Pull Request resolved: https://github.com/pytorch/audio/pull/3590

Reviewed By: huydhn

Differential Revision: D48921925

Pulled By: osalpekar

fbshipit-source-id: 2920e4d984ffb0c1957dae6fb6bab10344e65ff7
---
 packaging/cut_release.sh | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)
 create mode 100644 packaging/cut_release.sh

diff --git a/packaging/cut_release.sh b/packaging/cut_release.sh
new file mode 100644
index 0000000000..91e0e5ff15
--- /dev/null
+++ b/packaging/cut_release.sh
@@ -0,0 +1,36 @@
+#!/usr/bin/env bash
+#
+# Usage (run from root of project):
+# TEST_INFRA_BRANCH=release/2.1 RELEASE_BRANCH=release/2.1 RELEASE_VERSION=2.1.0 packaging/cut_release.sh
+#
+# TEST_INFRA_BRANCH: The release branch of test-infra that houses all reusable
+# workflows
+#
+# RELEASE_BRANCH: The name of the release branch for this repo
+#
+# RELEASE_VERSION: Version of this current release
+
+set -eou pipefail
+
+# Create and Check out to Release Branch
+git checkout -b "${RELEASE_BRANCH}"
+
+# Change all GitHub Actions to reference the test-infra release branch
+# as opposed to main.
+for i in .github/workflows/*.yml; do 
+  if [[ "$OSTYPE" == "darwin"* ]]; then
+    sed -i '' -e s#@main#@"${TEST_INFRA_BRANCH}"# $i;
+    sed -i '' -e s#test-infra-ref:[[:space:]]main#"test-infra-ref: ${TEST_INFRA_BRANCH}"# $i;
+  else
+    sed -i -e s#@main#@"${TEST_INFRA_BRANCH}"# $i;
+    sed -i -e s#test-infra-ref:[[:space:]]main#"test-infra-ref: ${TEST_INFRA_BRANCH}"# $i;
+  fi
+done
+
+# Update the Release Version in version.txt
+echo "${RELEASE_VERSION}" >version.txt
+
+# Optional
+# git add ./github/workflows/*.yml version.txt
+# git commit -m "[RELEASE-ONLY CHANGES] Branch Cut for Release {RELEASE_VERSION}"
+# git push origin "${RELEASE_BRANCH}"

From e057d7d144e2716588b80255f0a143662fd5c10d Mon Sep 17 00:00:00 2001
From: Omkar Salpekar <osalpekar@fb.com>
Date: Fri, 1 Sep 2023 17:39:15 -0700
Subject: [PATCH 07/13] Moving to More Efficient Windows GPU Runner (#3587)

Summary: Pull Request resolved: https://github.com/pytorch/audio/pull/3587

Reviewed By: huydhn

Differential Revision: D48847224

Pulled By: osalpekar

fbshipit-source-id: 15ccae56f0e5e8e366b5feb66fdb42bb5cba97e1
---
 .github/workflows/unittest-windows-gpu.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/unittest-windows-gpu.yml b/.github/workflows/unittest-windows-gpu.yml
index bc36124638..62f8640d79 100644
--- a/.github/workflows/unittest-windows-gpu.yml
+++ b/.github/workflows/unittest-windows-gpu.yml
@@ -14,7 +14,7 @@ jobs:
     uses: pytorch/test-infra/.github/workflows/windows_job.yml@main
     with:
       repository: pytorch/audio
-      runner: windows.8xlarge.nvidia.gpu
+      runner: windows.g5.4xlarge.nvidia.gpu
       timeout: 360
       script: |
         # Mark Build Directory Safe

From 3e7e696cc6b49f85e08d5f79db022f66d1c1a2fa Mon Sep 17 00:00:00 2001
From: moto <855818+mthrok@users.noreply.github.com>
Date: Mon, 4 Sep 2023 06:34:52 -0700
Subject: [PATCH 08/13] Fix doc link (#3593)

Summary: Pull Request resolved: https://github.com/pytorch/audio/pull/3593

Reviewed By: nateanl

Differential Revision: D48933041

Pulled By: mthrok

fbshipit-source-id: cd05d3cf5006206ba441fdc05548bcd922ce0598
---
 docs/source/installation.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/installation.rst b/docs/source/installation.rst
index 44d64a8d38..d6fccbce3b 100644
--- a/docs/source/installation.rst
+++ b/docs/source/installation.rst
@@ -30,7 +30,7 @@ Dependencies
 Optional Dependencies
 ~~~~~~~~~~~~~~~~~~~~~
 
-.. _ffmpeg:
+.. _ffmpeg_dependency:
 
 * `FFmpeg <https://ffmpeg.org>`__
 

From 6fbc1e6869f339ebe50d94ea7b352eee9706eee7 Mon Sep 17 00:00:00 2001
From: moto <855818+mthrok@users.noreply.github.com>
Date: Mon, 4 Sep 2023 07:15:08 -0700
Subject: [PATCH 09/13] Add incremental decoding support to CTC decoder (#3594)

Summary:
Add incremental decoding support to CTC decoder.

Resolves https://github.com/pytorch/audio/issues/3574

Pull Request resolved: https://github.com/pytorch/audio/pull/3594

Reviewed By: nateanl

Differential Revision: D48940584

Pulled By: mthrok

fbshipit-source-id: 31871614008cf197cf3900f7183ec6cff34d2905
---
 ...asr_inference_with_ctc_decoder_tutorial.py |  41 +++++++
 torchaudio/models/decoder/_ctc_decoder.py     | 114 +++++++++++++++---
 2 files changed, 139 insertions(+), 16 deletions(-)

diff --git a/examples/tutorials/asr_inference_with_ctc_decoder_tutorial.py b/examples/tutorials/asr_inference_with_ctc_decoder_tutorial.py
index 955dc3c029..624cd8066a 100644
--- a/examples/tutorials/asr_inference_with_ctc_decoder_tutorial.py
+++ b/examples/tutorials/asr_inference_with_ctc_decoder_tutorial.py
@@ -387,6 +387,47 @@ def forward(self, emission: torch.Tensor) -> List[str]:
 # and “shoktd”.
 #
 
+######################################################################
+# Incremental decoding
+# ~~~~~~~~~~~~~~~~~~~~
+#
+# If the input speech is long, one can decode the emission in
+# incremental manner.
+#
+# You need to first initialize the internal state of the decoder with
+# :py:meth:`~torchaudio.models.decoder.CTCDecoder.decode_begin`.
+
+beam_search_decoder.decode_begin()
+
+######################################################################
+# Then, you can pass emissions to
+# :py:meth:`~torchaudio.models.decoder.CTCDecoder.decode_begin`.
+# Here we use the same emission but pass it to the decoder one frame
+# at a time.
+
+for t in range(emission.size(1)):
+    beam_search_decoder.decode_step(emission[0, t:t + 1, :])
+
+######################################################################
+# Finally, finalize the internal state of the decoder, and retrieve the
+# result.
+
+beam_search_decoder.decode_end()
+beam_search_result_inc = beam_search_decoder.get_final_hypothesis()
+
+######################################################################
+# The result of incremental decoding is identical to batch decoding.
+#
+beam_search_transcript_inc = " ".join(beam_search_result_inc[0].words).strip()
+beam_search_wer_inc = torchaudio.functional.edit_distance(
+    actual_transcript, beam_search_result_inc[0].words) / len(actual_transcript)
+
+print(f"Transcript: {beam_search_transcript_inc}")
+print(f"WER: {beam_search_wer_inc}")
+
+assert beam_search_result[0][0].words == beam_search_result_inc[0].words
+assert beam_search_result[0][0].score == beam_search_result_inc[0].score
+torch.testing.assert_close(beam_search_result[0][0].timesteps, beam_search_result_inc[0].timesteps)
 
 ######################################################################
 # Timestep Alignments
diff --git a/torchaudio/models/decoder/_ctc_decoder.py b/torchaudio/models/decoder/_ctc_decoder.py
index 7fbb8769ab..4d45f12f52 100644
--- a/torchaudio/models/decoder/_ctc_decoder.py
+++ b/torchaudio/models/decoder/_ctc_decoder.py
@@ -261,10 +261,102 @@ def _get_timesteps(self, idxs: torch.IntTensor) -> torch.IntTensor:
                 timesteps.append(i)
         return torch.IntTensor(timesteps)
 
+    def decode_begin(self):
+        """Initialize the internal state of the decoder.
+
+        See :py:meth:`decode_step` for the usage.
+
+        .. note::
+
+           This method is required only when performing online decoding.
+           It is not necessary when performing batch decoding with :py:meth:`__call__`.
+        """
+        self.decoder.decode_begin()
+
+    def decode_end(self):
+        """Finalize the internal state of the decoder.
+
+        See :py:meth:`decode_step` for the usage.
+
+        .. note::
+
+           This method is required only when performing online decoding.
+           It is not necessary when performing batch decoding with :py:meth:`__call__`.
+        """
+        self.decoder.decode_end()
+
+    def decode_step(self, emissions: torch.FloatTensor):
+        """Perform incremental decoding on top of the curent internal state.
+
+        .. note::
+
+           This method is required only when performing online decoding.
+           It is not necessary when performing batch decoding with :py:meth:`__call__`.
+
+        Args:
+            emissions (torch.FloatTensor): CPU tensor of shape `(frame, num_tokens)` storing sequences of
+                probability distribution over labels; output of acoustic model.
+
+        Example:
+            >>> decoder = torchaudio.models.decoder.ctc_decoder(...)
+            >>> decoder.decode_begin()
+            >>> decoder.decode_step(emission1)
+            >>> decoder.decode_step(emission2)
+            >>> decoder.decode_end()
+            >>> result = decoder.get_final_hypothesis()
+        """
+        if emissions.dtype != torch.float32:
+            raise ValueError("emissions must be float32.")
+
+        if not emissions.is_cpu:
+            raise RuntimeError("emissions must be a CPU tensor.")
+
+        if not emissions.is_contiguous():
+            raise RuntimeError("emissions must be contiguous.")
+
+        if emissions.ndim != 2:
+            raise RuntimeError(f"emissions must be 2D. Found {emissions.shape}")
+
+        T, N = emissions.size()
+        self.decoder.decode_step(emissions.data_ptr(), T, N)
+
+    def _to_hypo(self, results) -> List[CTCHypothesis]:
+        return [
+            CTCHypothesis(
+                tokens=self._get_tokens(result.tokens),
+                words=[self.word_dict.get_entry(x) for x in result.words if x >= 0],
+                score=result.score,
+                timesteps=self._get_timesteps(result.tokens),
+            )
+            for result in results
+        ]
+
+    def get_final_hypothesis(self) -> List[CTCHypothesis]:
+        """Get the final hypothesis
+
+        Returns:
+            List[CTCHypothesis]:
+                List of sorted best hypotheses.
+
+        .. note::
+
+           This method is required only when performing online decoding.
+           It is not necessary when performing batch decoding with :py:meth:`__call__`.
+        """
+        results = self.decoder.get_all_final_hypothesis()
+        return self._to_hypo(results[: self.nbest])
+
     def __call__(
         self, emissions: torch.FloatTensor, lengths: Optional[torch.Tensor] = None
     ) -> List[List[CTCHypothesis]]:
         """
+        Performs batched offline decoding.
+
+        .. note::
+
+           This method performs offline decoding in one go. To perform incremental decoding,
+           please refer to :py:meth:`decode_step`.
+
         Args:
             emissions (torch.FloatTensor): CPU tensor of shape `(batch, frame, num_tokens)` storing sequences of
                 probability distribution over labels; output of acoustic model.
@@ -279,13 +371,16 @@ def __call__(
         if emissions.dtype != torch.float32:
             raise ValueError("emissions must be float32.")
 
-        if emissions.is_cuda:
+        if not emissions.is_cpu:
             raise RuntimeError("emissions must be a CPU tensor.")
 
         if not emissions.is_contiguous():
             raise RuntimeError("emissions must be contiguous.")
 
-        if lengths is not None and lengths.is_cuda:
+        if emissions.ndim != 3:
+            raise RuntimeError(f"emissions must be 3D. Found {emissions.shape}")
+
+        if lengths is not None and not lengths.is_cpu:
             raise RuntimeError("lengths must be a CPU tensor.")
 
         B, T, N = emissions.size()
@@ -298,20 +393,7 @@ def __call__(
         for b in range(B):
             emissions_ptr = emissions.data_ptr() + float_bytes * b * emissions.stride(0)
             results = self.decoder.decode(emissions_ptr, lengths[b], N)
-
-            nbest_results = results[: self.nbest]
-            hypos.append(
-                [
-                    CTCHypothesis(
-                        tokens=self._get_tokens(result.tokens),
-                        words=[self.word_dict.get_entry(x) for x in result.words if x >= 0],
-                        score=result.score,
-                        timesteps=self._get_timesteps(result.tokens),
-                    )
-                    for result in nbest_results
-                ]
-            )
-
+            hypos.append(self._to_hypo(results[: self.nbest]))
         return hypos
 
     def idxs_to_tokens(self, idxs: torch.LongTensor) -> List:

From 7d37f69c5244dda1a2ce3bc2df8788fcc1b65947 Mon Sep 17 00:00:00 2001
From: hwangjeff <jeffhwang@meta.com>
Date: Mon, 4 Sep 2023 11:50:49 -0700
Subject: [PATCH 10/13] Fix decoder call in Device ASR/AVSR tutorials (#3572)

Summary:
Fixes decoder calls and related code in Device ASR/AVSR tutorials to account for changes to RNN-T decoder introduced in https://github.com/pytorch/audio/issues/3295.

Pull Request resolved: https://github.com/pytorch/audio/pull/3572

Reviewed By: mthrok

Differential Revision: D48629428

Pulled By: hwangjeff

fbshipit-source-id: 63ede307fb4412aa28f88972d56dca8405607b7a
---
 examples/tutorials/device_asr.py  | 11 +++++------
 examples/tutorials/device_avsr.py |  9 ++++-----
 2 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/examples/tutorials/device_asr.py b/examples/tutorials/device_asr.py
index 281b596d8f..c78cfcfc61 100644
--- a/examples/tutorials/device_asr.py
+++ b/examples/tutorials/device_asr.py
@@ -206,16 +206,15 @@ def __init__(self, bundle: torchaudio.pipelines.RNNTBundle, beam_width: int = 10
         self.beam_width = beam_width
 
         self.state = None
-        self.hypothesis = None
+        self.hypotheses = None
 
     def infer(self, segment: torch.Tensor) -> str:
         """Perform streaming inference"""
         features, length = self.feature_extractor(segment)
-        hypos, self.state = self.decoder.infer(
-            features, length, self.beam_width, state=self.state, hypothesis=self.hypothesis
+        self.hypotheses, self.state = self.decoder.infer(
+            features, length, self.beam_width, state=self.state, hypothesis=self.hypotheses
         )
-        self.hypothesis = hypos[0]
-        transcript = self.token_processor(self.hypothesis[0], lstrip=False)
+        transcript = self.token_processor(self.hypotheses[0][0], lstrip=False)
         return transcript
 
 
@@ -291,7 +290,7 @@ def infer():
             chunk = q.get()
             segment = cacher(chunk[:, 0])
             transcript = pipeline.infer(segment)
-            print(transcript, end="", flush=True)
+            print(transcript, end="\r", flush=True)
 
     import torch.multiprocessing as mp
 
diff --git a/examples/tutorials/device_avsr.py b/examples/tutorials/device_avsr.py
index 0bb7a5792d..cd013731a9 100644
--- a/examples/tutorials/device_avsr.py
+++ b/examples/tutorials/device_avsr.py
@@ -258,15 +258,14 @@ def __init__(self, preprocessor, model, decoder, token_processor):
         self.token_processor = token_processor
 
         self.state = None
-        self.hypothesis = None
+        self.hypotheses = None
 
     def forward(self, audio, video):
         audio, video = self.preprocessor(audio, video)
         feats = self.model(audio.unsqueeze(0), video.unsqueeze(0))
         length = torch.tensor([feats.size(1)], device=audio.device)
-        hypos, self.state = self.decoder.infer(feats, length, 10, state=self.state, hypothesis=self.hypothesis)
-        self.hypothesis = hypos[0]
-        transcript = self.token_processor(self.hypothesis[0], lstrip=False)
+        self.hypotheses, self.state = self.decoder.infer(feats, length, 10, state=self.state, hypothesis=self.hypotheses)
+        transcript = self.token_processor(self.hypotheses[0][0], lstrip=False)
         return transcript
 
 
@@ -370,7 +369,7 @@ def infer():
             video, audio = cacher(video, audio)
             pipeline.state, pipeline.hypothesis = None, None
             transcript = pipeline(audio, video.float())
-            print(transcript, end="", flush=True)
+            print(transcript, end="\r", flush=True)
             num_video_frames = 0
             video_chunks = []
             audio_chunks = []

From 454418d2a6da6ebd5ad85e9d4b1c09ea69531ed7 Mon Sep 17 00:00:00 2001
From: moto <855818+mthrok@users.noreply.github.com>
Date: Mon, 4 Sep 2023 12:16:20 -0700
Subject: [PATCH 11/13] [BC-Breaking] Remove legacy global backend switch
 (#3559)

Summary:
This PR removes the legacy backend switch mechanism.
The implementation itself is still available.

Merge after v2.1 release

Pull Request resolved: https://github.com/pytorch/audio/pull/3559

Reviewed By: nateanl

Differential Revision: D48353764

Pulled By: mthrok

fbshipit-source-id: 4d3924dbe6f334ecebe2b12fcd4591c61c4aa656
---
 docs/source/torchaudio.rst                    | 27 ++----
 .../torchaudio_unittest/backend/utils_test.py | 36 --------
 torchaudio/__init__.py                        | 30 +++----
 torchaudio/_backend/__init__.py               | 23 +++--
 torchaudio/backend/__init__.py                |  5 --
 torchaudio/backend/_common.py                 |  2 +-
 torchaudio/backend/utils.py                   | 84 -------------------
 7 files changed, 31 insertions(+), 176 deletions(-)
 delete mode 100644 test/torchaudio_unittest/backend/utils_test.py
 delete mode 100644 torchaudio/backend/utils.py

diff --git a/docs/source/torchaudio.rst b/docs/source/torchaudio.rst
index 9eeaae5c92..87abbc07ef 100644
--- a/docs/source/torchaudio.rst
+++ b/docs/source/torchaudio.rst
@@ -17,6 +17,7 @@ it easy to handle audio data.
    info
    load
    save
+   list_audio_backends
 
 .. _backend:
 
@@ -91,30 +92,12 @@ please refer to https://github.com/pytorch/audio/issues/2950
 * In 2.0, audio I/O backend dispatcher was introduced.
   Users can opt-in to using dispatcher by setting the environment variable
   ``TORCHAUDIO_USE_BACKEND_DISPATCHER=1``.
-* In 2.1, the disptcher becomes the default mechanism for I/O.
-  Those who need to keep using the previous mechanism (global backend) can do
-  so by setting ``TORCHAUDIO_USE_BACKEND_DISPATCHER=0``.
-* In 2.2, the legacy global backend mechanism will be removed.
+* In 2.1, the disptcher became the default mechanism for I/O.
+* In 2.2, the legacy global backend mechanism is removed.
   Utility functions :py:func:`get_audio_backend` and :py:func:`set_audio_backend`
-  become no-op.
+  became no-op.
 
-Furthermore, we are removing file-like object support from libsox backend, as this
+Furthermore, we removed file-like object support from libsox backend, as this
 is better supported by FFmpeg backend and makes the build process simpler.
 Therefore, beginning with 2.1, FFmpeg and Soundfile are the sole backends that support
 file-like objects.
-
-Backend Utilities
------------------
-
-The following functions are effective only when backend dispatcher is disabled.
-
-Note that the changes in 2.1 marks :py:func:`get_audio_backend` and
-:py:func:`set_audio_backend` deprecated.
-
-.. autosummary::
-   :toctree: generated
-   :nosignatures:
-
-   list_audio_backends
-   get_audio_backend
-   set_audio_backend
diff --git a/test/torchaudio_unittest/backend/utils_test.py b/test/torchaudio_unittest/backend/utils_test.py
deleted file mode 100644
index ea5416cf1f..0000000000
--- a/test/torchaudio_unittest/backend/utils_test.py
+++ /dev/null
@@ -1,36 +0,0 @@
-import torchaudio
-from torchaudio_unittest import common_utils
-
-
-class BackendSwitchMixin:
-    """Test set/get_audio_backend works"""
-
-    backend = None
-    backend_module = None
-
-    def test_switch(self):
-        torchaudio.backend.utils.set_audio_backend(self.backend)
-        if self.backend is None:
-            assert torchaudio.backend.utils.get_audio_backend() is None
-        else:
-            assert torchaudio.backend.utils.get_audio_backend() == self.backend
-        assert torchaudio.load == self.backend_module.load
-        assert torchaudio.save == self.backend_module.save
-        assert torchaudio.info == self.backend_module.info
-
-
-class TestBackendSwitch_NoBackend(BackendSwitchMixin, common_utils.TorchaudioTestCase):
-    backend = None
-    backend_module = torchaudio.backend.no_backend
-
-
-@common_utils.skipIfNoSox
-class TestBackendSwitch_SoXIO(BackendSwitchMixin, common_utils.TorchaudioTestCase):
-    backend = "sox_io"
-    backend_module = torchaudio.backend.sox_io_backend
-
-
-@common_utils.skipIfNoModule("soundfile")
-class TestBackendSwitch_soundfile(BackendSwitchMixin, common_utils.TorchaudioTestCase):
-    backend = "soundfile"
-    backend_module = torchaudio.backend.soundfile_backend
diff --git a/torchaudio/__init__.py b/torchaudio/__init__.py
index 19cc69f6b6..a1ca98ac42 100644
--- a/torchaudio/__init__.py
+++ b/torchaudio/__init__.py
@@ -1,5 +1,10 @@
-from . import (  # noqa: F401
+# Initialize extension and backend first
+from . import (  # noqa  # usort: skip
     _extension,
+    _backend,
+)
+from . import (  # noqa: F401
+    backend,  # For BC
     compliance,
     datasets,
     functional,
@@ -11,7 +16,7 @@
     transforms,
     utils,
 )
-from ._backend.common import AudioMetaData  # noqa
+from ._backend import AudioMetaData, get_audio_backend, info, list_audio_backends, load, save, set_audio_backend
 
 try:
     from .version import __version__, git_version  # noqa: F401
@@ -19,26 +24,11 @@
     pass
 
 
-def _is_backend_dispatcher_enabled():
-    import os
-
-    return os.getenv("TORCHAUDIO_USE_BACKEND_DISPATCHER", default="1") == "1"
-
-
-if _is_backend_dispatcher_enabled():
-    from ._backend import _init_backend, get_audio_backend, list_audio_backends, set_audio_backend
-else:
-    from .backend import _init_backend, get_audio_backend, list_audio_backends, set_audio_backend
-
-
-_init_backend()
-
-# for backward compatibility. This has to happen after _backend is imported.
-from . import backend  # noqa: F401
-
-
 __all__ = [
     "AudioMetaData",
+    "load",
+    "info",
+    "save",
     "io",
     "compliance",
     "datasets",
diff --git a/torchaudio/_backend/__init__.py b/torchaudio/_backend/__init__.py
index e4855c3bbb..2872a81320 100644
--- a/torchaudio/_backend/__init__.py
+++ b/torchaudio/_backend/__init__.py
@@ -1,16 +1,24 @@
 from typing import List, Optional
 
-import torchaudio
 from torchaudio._internal.module_utils import deprecated
 
+from . import utils
+from .common import AudioMetaData
 
-# TODO: Once legacy global backend is removed, move this to torchaudio.__init__
-def _init_backend():
-    from . import utils
+__all__ = [
+    "AudioMetaData",
+    "load",
+    "info",
+    "save",
+    "list_audio_backends",
+    "get_audio_backend",
+    "set_audio_backend",
+]
 
-    torchaudio.info = utils.get_info_func()
-    torchaudio.load = utils.get_load_func()
-    torchaudio.save = utils.get_save_func()
+
+info = utils.get_info_func()
+load = utils.get_load_func()
+save = utils.get_save_func()
 
 
 def list_audio_backends() -> List[str]:
@@ -24,7 +32,6 @@ def list_audio_backends() -> List[str]:
         - Dispatcher mode: ``"ffmpeg"``, ``"sox"`` and ``"soundfile"``.
         - Legacy backend mode: ``"sox_io"``, ``"soundfile"``.
     """
-    from . import utils
 
     return list(utils.get_available_backends().keys())
 
diff --git a/torchaudio/backend/__init__.py b/torchaudio/backend/__init__.py
index 8fa244f15b..9501a0f13b 100644
--- a/torchaudio/backend/__init__.py
+++ b/torchaudio/backend/__init__.py
@@ -3,11 +3,6 @@
 # New things should be added to `torchaudio._backend`.
 # Only things related to backward compatibility should be placed here.
 
-from .utils import _init_backend, get_audio_backend, list_audio_backends, set_audio_backend
-
-
-__all__ = ["_init_backend", "get_audio_backend", "list_audio_backends", "set_audio_backend"]
-
 
 def __getattr__(name: str):
     if name == "common":
diff --git a/torchaudio/backend/_common.py b/torchaudio/backend/_common.py
index 09a79d991c..72f9b875dd 100644
--- a/torchaudio/backend/_common.py
+++ b/torchaudio/backend/_common.py
@@ -7,7 +7,7 @@ def __getattr__(name: str):
             "`torchaudio.AudioMetaData`. Please update the import path.",
             stacklevel=2,
         )
-        from torchaudio._backend.common import AudioMetaData
+        from torchaudio import AudioMetaData
 
         return AudioMetaData
     raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
diff --git a/torchaudio/backend/utils.py b/torchaudio/backend/utils.py
deleted file mode 100644
index 2b94db483f..0000000000
--- a/torchaudio/backend/utils.py
+++ /dev/null
@@ -1,84 +0,0 @@
-"""Defines utilities for switching audio backends"""
-import warnings
-from typing import List, Optional
-
-import torchaudio
-from torchaudio._backend import soundfile_backend
-from torchaudio._internal import module_utils as _mod_utils
-
-from . import _no_backend as no_backend, _sox_io_backend as sox_io_backend
-
-__all__ = [
-    "list_audio_backends",
-    "get_audio_backend",
-    "set_audio_backend",
-]
-
-
-def list_audio_backends() -> List[str]:
-    """List available backends
-
-    Returns:
-        List[str]: The list of available backends.
-    """
-    backends = []
-    if _mod_utils.is_module_available("soundfile"):
-        backends.append("soundfile")
-    if torchaudio._extension._SOX_INITIALIZED:
-        backends.append("sox_io")
-    return backends
-
-
-def set_audio_backend(backend: Optional[str]):
-    """Set the backend for I/O operation
-
-    Args:
-        backend (str or None): Name of the backend.
-            One of ``"sox_io"`` or ``"soundfile"`` based on availability
-            of the system. If ``None`` is provided the  current backend is unassigned.
-    """
-    if backend is not None and backend not in list_audio_backends():
-        raise RuntimeError(f'Backend "{backend}" is not one of ' f"available backends: {list_audio_backends()}.")
-
-    if backend is None:
-        module = no_backend
-    elif backend == "sox_io":
-        module = sox_io_backend
-    elif backend == "soundfile":
-        module = soundfile_backend
-    else:
-        raise NotImplementedError(f'Unexpected backend "{backend}"')
-
-    for func in ["save", "load", "info"]:
-        setattr(torchaudio, func, getattr(module, func))
-
-
-def _init_backend():
-    warnings.warn(
-        "TorchAudio's global backend is now deprecated. "
-        "Please enable distpatcher by setting `TORCHAUDIO_USE_BACKEND_DISPATCHER=1`, "
-        "and specify backend when calling load/info/save function.",
-        stacklevel=3,
-    )
-    backends = list_audio_backends()
-    if "sox_io" in backends:
-        set_audio_backend("sox_io")
-    elif "soundfile" in backends:
-        set_audio_backend("soundfile")
-    else:
-        set_audio_backend(None)
-
-
-def get_audio_backend() -> Optional[str]:
-    """Get the name of the current backend
-
-    Returns:
-        Optional[str]: The name of the current backend or ``None`` if no backend is assigned.
-    """
-    if torchaudio.load == no_backend.load:
-        return None
-    if torchaudio.load == sox_io_backend.load:
-        return "sox_io"
-    if torchaudio.load == soundfile_backend.load:
-        return "soundfile"
-    raise ValueError("Unknown backend.")

From 931598c1c9e5007291a8f4097825c4a20be47f33 Mon Sep 17 00:00:00 2001
From: moto <855818+mthrok@users.noreply.github.com>
Date: Tue, 5 Sep 2023 07:50:32 -0700
Subject: [PATCH 12/13] Fix backward compatibility layer in backend module
 (#3595)

Summary:
The PR https://github.com/pytorch/audio/issues/3549 re-organized the backend implementations and deprecated the direct access to torchaudio.backend.

The change was supposed to be BC-compatible while issuing a warning to users, but the implementation of module-level `__getattr__` was not quite right.

See an issue https://github.com/pyannote/pyannote-audio/pull/1456.

This commit fixes it so that the following imports work;

```python
from torchaudio.backend.common import AudioMetaData

from torchaudio.backend import sox_io_backend
from torchaudio.backend.sox_io_backend import save, load, info

from torchaudio.backend import no_backend
from torchaudio.backend.no_backend import save, load, info

from torchaudio.backend import soundfile_backend
from torchaudio.backend.soundfile_backend import save, load, info
```

Pull Request resolved: https://github.com/pytorch/audio/pull/3595

Reviewed By: nateanl

Differential Revision: D48957446

Pulled By: mthrok

fbshipit-source-id: ebb256461dd3032025fd27d0455ce980888f7778
---
 .../backend/soundfile/info_test.py            |  1 -
 torchaudio/__init__.py                        | 18 +++++++---
 torchaudio/backend/__init__.py                | 33 ++-----------------
 torchaudio/backend/_no_backend.py             |  3 +-
 torchaudio/backend/{_common.py => common.py}  |  4 +--
 torchaudio/backend/no_backend.py              | 14 ++++++++
 torchaudio/backend/soundfile_backend.py       | 14 ++++++++
 torchaudio/backend/sox_io_backend.py          | 14 ++++++++
 8 files changed, 61 insertions(+), 40 deletions(-)
 rename torchaudio/backend/{_common.py => common.py} (94%)
 create mode 100644 torchaudio/backend/no_backend.py
 create mode 100644 torchaudio/backend/soundfile_backend.py
 create mode 100644 torchaudio/backend/sox_io_backend.py

diff --git a/test/torchaudio_unittest/backend/soundfile/info_test.py b/test/torchaudio_unittest/backend/soundfile/info_test.py
index a7c1708ac2..a9acec6f05 100644
--- a/test/torchaudio_unittest/backend/soundfile/info_test.py
+++ b/test/torchaudio_unittest/backend/soundfile/info_test.py
@@ -117,7 +117,6 @@ class MockSoundFileInfo:
         with patch("soundfile.info", _mock_info_func):
             with warnings.catch_warnings(record=True) as w:
                 info = soundfile_backend.info("foo")
-                assert len(w) == 1
                 assert "UNSEEN_SUBTYPE subtype is unknown to TorchAudio" in str(w[-1].message)
                 assert info.bits_per_sample == 0
 
diff --git a/torchaudio/__init__.py b/torchaudio/__init__.py
index a1ca98ac42..90b411eae1 100644
--- a/torchaudio/__init__.py
+++ b/torchaudio/__init__.py
@@ -1,10 +1,16 @@
 # Initialize extension and backend first
-from . import (  # noqa  # usort: skip
-    _extension,
-    _backend,
+from . import _extension  # noqa  # usort: skip
+from ._backend import (  # noqa  # usort: skip
+    AudioMetaData,
+    get_audio_backend,
+    info,
+    list_audio_backends,
+    load,
+    save,
+    set_audio_backend,
 )
+
 from . import (  # noqa: F401
-    backend,  # For BC
     compliance,
     datasets,
     functional,
@@ -16,7 +22,9 @@
     transforms,
     utils,
 )
-from ._backend import AudioMetaData, get_audio_backend, info, list_audio_backends, load, save, set_audio_backend
+
+# For BC
+from . import backend  # noqa # usort: skip
 
 try:
     from .version import __version__, git_version  # noqa: F401
diff --git a/torchaudio/backend/__init__.py b/torchaudio/backend/__init__.py
index 9501a0f13b..84df7e7d69 100644
--- a/torchaudio/backend/__init__.py
+++ b/torchaudio/backend/__init__.py
@@ -3,35 +3,6 @@
 # New things should be added to `torchaudio._backend`.
 # Only things related to backward compatibility should be placed here.
 
+from . import common, no_backend, soundfile_backend, sox_io_backend  # noqa
 
-def __getattr__(name: str):
-    if name == "common":
-        from . import _common
-
-        return _common
-
-    if name in ["no_backend", "sox_io_backend", "soundfile_backend"]:
-        import warnings
-
-        warnings.warn(
-            "Torchaudio's I/O functions now support par-call bakcend dispatch. "
-            "Importing backend implementation directly is no longer guaranteed to work. "
-            "Please use `backend` keyword with load/save/info function, instead of "
-            "calling the udnerlying implementation directly.",
-            stacklevel=2,
-        )
-
-        if name == "sox_io_backend":
-            from . import _sox_io_backend
-
-            return _sox_io_backend
-        if name == "soundfile_backend":
-            from torchaudio._backend import soundfile_backend
-
-            return soundfile_backend
-
-        if name == "no_backend":
-            from . import _no_backend
-
-            return _no_backend
-    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
+__all__ = []
diff --git a/torchaudio/backend/_no_backend.py b/torchaudio/backend/_no_backend.py
index 5ebeb38708..fcbb2ad84a 100644
--- a/torchaudio/backend/_no_backend.py
+++ b/torchaudio/backend/_no_backend.py
@@ -2,6 +2,7 @@
 from typing import Callable, Optional, Tuple, Union
 
 from torch import Tensor
+from torchaudio import AudioMetaData
 
 
 def load(
@@ -20,5 +21,5 @@ def save(filepath: str, src: Tensor, sample_rate: int, precision: int = 16, chan
     raise RuntimeError("No audio I/O backend is available.")
 
 
-def info(filepath: str) -> None:
+def info(filepath: str) -> AudioMetaData:
     raise RuntimeError("No audio I/O backend is available.")
diff --git a/torchaudio/backend/_common.py b/torchaudio/backend/common.py
similarity index 94%
rename from torchaudio/backend/_common.py
rename to torchaudio/backend/common.py
index 72f9b875dd..3f736bf401 100644
--- a/torchaudio/backend/_common.py
+++ b/torchaudio/backend/common.py
@@ -1,7 +1,7 @@
 def __getattr__(name: str):
-    import warnings
-
     if name == "AudioMetaData":
+        import warnings
+
         warnings.warn(
             "`torchaudio.backend.common.AudioMetaData` has been moved to "
             "`torchaudio.AudioMetaData`. Please update the import path.",
diff --git a/torchaudio/backend/no_backend.py b/torchaudio/backend/no_backend.py
new file mode 100644
index 0000000000..2645a86bc8
--- /dev/null
+++ b/torchaudio/backend/no_backend.py
@@ -0,0 +1,14 @@
+def __getattr__(name: str):
+    import warnings
+
+    warnings.warn(
+        "Torchaudio's I/O functions now support par-call bakcend dispatch. "
+        "Importing backend implementation directly is no longer guaranteed to work. "
+        "Please use `backend` keyword with load/save/info function, instead of "
+        "calling the udnerlying implementation directly.",
+        stacklevel=2,
+    )
+
+    from . import _no_backend
+
+    return getattr(_no_backend, name)
diff --git a/torchaudio/backend/soundfile_backend.py b/torchaudio/backend/soundfile_backend.py
new file mode 100644
index 0000000000..5e81db372a
--- /dev/null
+++ b/torchaudio/backend/soundfile_backend.py
@@ -0,0 +1,14 @@
+def __getattr__(name: str):
+    import warnings
+
+    warnings.warn(
+        "Torchaudio's I/O functions now support par-call bakcend dispatch. "
+        "Importing backend implementation directly is no longer guaranteed to work. "
+        "Please use `backend` keyword with load/save/info function, instead of "
+        "calling the udnerlying implementation directly.",
+        stacklevel=2,
+    )
+
+    from torchaudio._backend import soundfile_backend
+
+    return getattr(soundfile_backend, name)
diff --git a/torchaudio/backend/sox_io_backend.py b/torchaudio/backend/sox_io_backend.py
new file mode 100644
index 0000000000..a361ab87a5
--- /dev/null
+++ b/torchaudio/backend/sox_io_backend.py
@@ -0,0 +1,14 @@
+def __getattr__(name: str):
+    import warnings
+
+    warnings.warn(
+        "Torchaudio's I/O functions now support par-call bakcend dispatch. "
+        "Importing backend implementation directly is no longer guaranteed to work. "
+        "Please use `backend` keyword with load/save/info function, instead of "
+        "calling the udnerlying implementation directly.",
+        stacklevel=2,
+    )
+
+    from . import _sox_io_backend
+
+    return getattr(_sox_io_backend, name)

From ede4309a85733a78c4a9a270916fbac7fae957d8 Mon Sep 17 00:00:00 2001
From: moto <855818+mthrok@users.noreply.github.com>
Date: Tue, 5 Sep 2023 10:29:12 -0700
Subject: [PATCH 13/13] Update FFmpeg arm64 CI (#3578)

Summary: Pull Request resolved: https://github.com/pytorch/audio/pull/3578

Reviewed By: nateanl

Differential Revision: D48971591

Pulled By: mthrok

fbshipit-source-id: a76cf8f1287c076d7b6919d901eda0ce832bdb7d
---
 .github/workflows/ffmpeg.yml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/workflows/ffmpeg.yml b/.github/workflows/ffmpeg.yml
index 956ce4ef55..6ec4237c63 100644
--- a/.github/workflows/ffmpeg.yml
+++ b/.github/workflows/ffmpeg.yml
@@ -44,8 +44,7 @@ jobs:
       job-name: Build
       upload-artifact: ffmpeg-lgpl
       repository: pytorch/audio
-      runner: "linux.t4g.2xlarge"
-      no-sudo: true
+      runner: linux.arm64.2xlarge
       docker-image: pytorch/manylinuxaarch64-builder:cpu-aarch64
       script: |
         export FFMPEG_VERSION="${{ matrix.ffmpeg_version }}"