Merge branch 'main' into codeowner

pytorch · Sep 5, 2023 · 3926054 · 3926054
2 parents cbd058c + ede4309
commit 3926054
Show file tree

Hide file tree

Showing 42 changed files with 285 additions and 402 deletions.
diff --git a/.github/workflows/build-conda-m1.yml b/.github/workflows/build-conda-m1.yml
@@ -50,3 +50,4 @@ jobs:
       trigger-event: ${{ github.event_name }}
     secrets:
       CONDA_PYTORCHBOT_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
+      CONDA_PYTORCHBOT_TOKEN_TEST: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
diff --git a/.github/workflows/build-conda-windows.yml b/.github/workflows/build-conda-windows.yml
@@ -45,3 +45,4 @@ jobs:
       trigger-event: ${{ github.event_name }}
     secrets:
       CONDA_PYTORCHBOT_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
+      CONDA_PYTORCHBOT_TOKEN_TEST: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
diff --git a/.github/workflows/build_conda_linux.yml b/.github/workflows/build_conda_linux.yml
@@ -49,3 +49,4 @@ jobs:
       trigger-event: ${{ github.event_name }}
     secrets:
       CONDA_PYTORCHBOT_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
+      CONDA_PYTORCHBOT_TOKEN_TEST: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
diff --git a/.github/workflows/build_conda_macos.yml b/.github/workflows/build_conda_macos.yml
@@ -50,3 +50,4 @@ jobs:
       trigger-event: ${{ github.event_name }}
     secrets:
       CONDA_PYTORCHBOT_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
+      CONDA_PYTORCHBOT_TOKEN_TEST: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
diff --git a/.github/workflows/ffmpeg.yml b/.github/workflows/ffmpeg.yml
@@ -44,8 +44,7 @@ jobs:
       job-name: Build
       upload-artifact: ffmpeg-lgpl
       repository: pytorch/audio
-      runner: "linux.t4g.2xlarge"
-      no-sudo: true
+      runner: linux.arm64.2xlarge
       docker-image: pytorch/manylinuxaarch64-builder:cpu-aarch64
       script: |
         export FFMPEG_VERSION="${{ matrix.ffmpeg_version }}"

diff --git a/.github/workflows/unittest-windows-gpu.yml b/.github/workflows/unittest-windows-gpu.yml
@@ -14,7 +14,7 @@ jobs:
     uses: pytorch/test-infra/.github/workflows/windows_job.yml@main
     with:
       repository: pytorch/audio
-      runner: windows.8xlarge.nvidia.gpu
+      runner: windows.g5.4xlarge.nvidia.gpu
       timeout: 360
       script: |
         # Mark Build Directory Safe

diff --git a/.gitmodules b/.gitmodules
@@ -1,3 +0,0 @@
-[submodule "third_party/hipify_torch"]
-	path = third_party/hipify_torch
-	url = https://github.com/ROCmSoftwarePlatform/hipify_torch

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -76,11 +76,6 @@ if(USE_ROCM)
   if(NOT PYTORCH_FOUND_HIP)
     set(USE_ROCM OFF)
   endif()
-
-  if(CMAKE_VERSION VERSION_LESS 3.21.0)
-    message("Need at least CMake 3.21.0 to compile ROCm support.")
-    set(USE_ROCM OFF)
-  endif()
 endif()
 
 if(USE_CUDA)
@@ -95,11 +90,6 @@ if(USE_CUDA)
   )
 endif()
 
-if(USE_ROCM)
-  enable_language(HIP)
-endif()
-
-find_package(Torch REQUIRED)
 include(cmake/TorchAudioHelper.cmake)
 
 # https://github.com/pytorch/pytorch/issues/54174

diff --git a/docs/source/installation.rst b/docs/source/installation.rst
@@ -30,7 +30,7 @@ Dependencies
 Optional Dependencies
 ~~~~~~~~~~~~~~~~~~~~~
 
-.. _ffmpeg:
+.. _ffmpeg_dependency:
 
 * `FFmpeg <https://ffmpeg.org>`__
 

diff --git a/docs/source/torchaudio.rst b/docs/source/torchaudio.rst
@@ -17,6 +17,7 @@ it easy to handle audio data.
    info
    load
    save
+   list_audio_backends
 
 .. _backend:
 
@@ -91,30 +92,12 @@ please refer to https://github.com/pytorch/audio/issues/2950
 * In 2.0, audio I/O backend dispatcher was introduced.
   Users can opt-in to using dispatcher by setting the environment variable
   ``TORCHAUDIO_USE_BACKEND_DISPATCHER=1``.
-* In 2.1, the disptcher becomes the default mechanism for I/O.
-  Those who need to keep using the previous mechanism (global backend) can do
-  so by setting ``TORCHAUDIO_USE_BACKEND_DISPATCHER=0``.
-* In 2.2, the legacy global backend mechanism will be removed.
+* In 2.1, the disptcher became the default mechanism for I/O.
+* In 2.2, the legacy global backend mechanism is removed.
   Utility functions :py:func:`get_audio_backend` and :py:func:`set_audio_backend`
-  become no-op.
+  became no-op.
 
-Furthermore, we are removing file-like object support from libsox backend, as this
+Furthermore, we removed file-like object support from libsox backend, as this
 is better supported by FFmpeg backend and makes the build process simpler.
 Therefore, beginning with 2.1, FFmpeg and Soundfile are the sole backends that support
 file-like objects.
-
-Backend Utilities
------------------
-
-The following functions are effective only when backend dispatcher is disabled.
-
-Note that the changes in 2.1 marks :py:func:`get_audio_backend` and
-:py:func:`set_audio_backend` deprecated.
-
-.. autosummary::
-   :toctree: generated
-   :nosignatures:
-
-   list_audio_backends
-   get_audio_backend
-   set_audio_backend
diff --git a/examples/tutorials/asr_inference_with_ctc_decoder_tutorial.py b/examples/tutorials/asr_inference_with_ctc_decoder_tutorial.py
@@ -387,6 +387,47 @@ def forward(self, emission: torch.Tensor) -> List[str]:
 # and “shoktd”.
 #
 
+######################################################################
+# Incremental decoding
+# ~~~~~~~~~~~~~~~~~~~~
+#
+# If the input speech is long, one can decode the emission in
+# incremental manner.
+#
+# You need to first initialize the internal state of the decoder with
+# :py:meth:`~torchaudio.models.decoder.CTCDecoder.decode_begin`.
+
+beam_search_decoder.decode_begin()
+
+######################################################################
+# Then, you can pass emissions to
+# :py:meth:`~torchaudio.models.decoder.CTCDecoder.decode_begin`.
+# Here we use the same emission but pass it to the decoder one frame
+# at a time.
+
+for t in range(emission.size(1)):
+    beam_search_decoder.decode_step(emission[0, t:t + 1, :])
+
+######################################################################
+# Finally, finalize the internal state of the decoder, and retrieve the
+# result.
+
+beam_search_decoder.decode_end()
+beam_search_result_inc = beam_search_decoder.get_final_hypothesis()
+
+######################################################################
+# The result of incremental decoding is identical to batch decoding.
+#
+beam_search_transcript_inc = " ".join(beam_search_result_inc[0].words).strip()
+beam_search_wer_inc = torchaudio.functional.edit_distance(
+    actual_transcript, beam_search_result_inc[0].words) / len(actual_transcript)
+
+print(f"Transcript: {beam_search_transcript_inc}")
+print(f"WER: {beam_search_wer_inc}")
+
+assert beam_search_result[0][0].words == beam_search_result_inc[0].words
+assert beam_search_result[0][0].score == beam_search_result_inc[0].score
+torch.testing.assert_close(beam_search_result[0][0].timesteps, beam_search_result_inc[0].timesteps)
 
 ######################################################################
 # Timestep Alignments

diff --git a/examples/tutorials/device_asr.py b/examples/tutorials/device_asr.py
@@ -206,16 +206,15 @@ def __init__(self, bundle: torchaudio.pipelines.RNNTBundle, beam_width: int = 10
         self.beam_width = beam_width
 
         self.state = None
-        self.hypothesis = None
+        self.hypotheses = None
 
     def infer(self, segment: torch.Tensor) -> str:
         """Perform streaming inference"""
         features, length = self.feature_extractor(segment)
-        hypos, self.state = self.decoder.infer(
-            features, length, self.beam_width, state=self.state, hypothesis=self.hypothesis
+        self.hypotheses, self.state = self.decoder.infer(
+            features, length, self.beam_width, state=self.state, hypothesis=self.hypotheses
         )
-        self.hypothesis = hypos[0]
-        transcript = self.token_processor(self.hypothesis[0], lstrip=False)
+        transcript = self.token_processor(self.hypotheses[0][0], lstrip=False)
         return transcript
 
 
@@ -291,7 +290,7 @@ def infer():
             chunk = q.get()
             segment = cacher(chunk[:, 0])
             transcript = pipeline.infer(segment)
-            print(transcript, end="", flush=True)
+            print(transcript, end="\r", flush=True)
 
     import torch.multiprocessing as mp
 

diff --git a/examples/tutorials/device_avsr.py b/examples/tutorials/device_avsr.py
@@ -258,15 +258,14 @@ def __init__(self, preprocessor, model, decoder, token_processor):
         self.token_processor = token_processor
 
         self.state = None
-        self.hypothesis = None
+        self.hypotheses = None
 
     def forward(self, audio, video):
         audio, video = self.preprocessor(audio, video)
         feats = self.model(audio.unsqueeze(0), video.unsqueeze(0))
         length = torch.tensor([feats.size(1)], device=audio.device)
-        hypos, self.state = self.decoder.infer(feats, length, 10, state=self.state, hypothesis=self.hypothesis)
-        self.hypothesis = hypos[0]
-        transcript = self.token_processor(self.hypothesis[0], lstrip=False)
+        self.hypotheses, self.state = self.decoder.infer(feats, length, 10, state=self.state, hypothesis=self.hypotheses)
+        transcript = self.token_processor(self.hypotheses[0][0], lstrip=False)
         return transcript
 
 
@@ -370,7 +369,7 @@ def infer():
             video, audio = cacher(video, audio)
             pipeline.state, pipeline.hypothesis = None, None
             transcript = pipeline(audio, video.float())
-            print(transcript, end="", flush=True)
+            print(transcript, end="\r", flush=True)
             num_video_frames = 0
             video_chunks = []
             audio_chunks = []

diff --git a/packaging/cut_release.sh b/packaging/cut_release.sh
@@ -0,0 +1,36 @@
+#!/usr/bin/env bash
+#
+# Usage (run from root of project):
+# TEST_INFRA_BRANCH=release/2.1 RELEASE_BRANCH=release/2.1 RELEASE_VERSION=2.1.0 packaging/cut_release.sh
+#
+# TEST_INFRA_BRANCH: The release branch of test-infra that houses all reusable
+# workflows
+#
+# RELEASE_BRANCH: The name of the release branch for this repo
+#
+# RELEASE_VERSION: Version of this current release
+
+set -eou pipefail
+
+# Create and Check out to Release Branch
+git checkout -b "${RELEASE_BRANCH}"
+
+# Change all GitHub Actions to reference the test-infra release branch
+# as opposed to main.
+for i in .github/workflows/*.yml; do 
+  if [[ "$OSTYPE" == "darwin"* ]]; then
+    sed -i '' -e s#@main#@"${TEST_INFRA_BRANCH}"# $i;
+    sed -i '' -e s#test-infra-ref:[[:space:]]main#"test-infra-ref: ${TEST_INFRA_BRANCH}"# $i;
+  else
+    sed -i -e s#@main#@"${TEST_INFRA_BRANCH}"# $i;
+    sed -i -e s#test-infra-ref:[[:space:]]main#"test-infra-ref: ${TEST_INFRA_BRANCH}"# $i;
+  fi
+done
+
+# Update the Release Version in version.txt
+echo "${RELEASE_VERSION}" >version.txt
+
+# Optional
+# git add ./github/workflows/*.yml version.txt
+# git commit -m "[RELEASE-ONLY CHANGES] Branch Cut for Release {RELEASE_VERSION}"
+# git push origin "${RELEASE_BRANCH}"
diff --git a/packaging/windows/internal/cuda_install.bat b/packaging/windows/internal/cuda_install.bat
@@ -57,7 +57,7 @@ goto cuda_common
 
 :cuda121
 
-set CUDA_INSTALL_EXE=cuda_12.1.0_531.14_windows.exe
+set CUDA_INSTALL_EXE=cuda_12.1.1_531.14_windows.exe
 if not exist "%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%" (
     curl -k -L "https://ossci-windows.s3.amazonaws.com/%CUDA_INSTALL_EXE%" --output "%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%"
     if errorlevel 1 exit /b 1

diff --git a/test/torchaudio_unittest/backend/soundfile/info_test.py b/test/torchaudio_unittest/backend/soundfile/info_test.py
@@ -117,7 +117,6 @@ class MockSoundFileInfo:
         with patch("soundfile.info", _mock_info_func):
             with warnings.catch_warnings(record=True) as w:
                 info = soundfile_backend.info("foo")
-                assert len(w) == 1
                 assert "UNSEEN_SUBTYPE subtype is unknown to TorchAudio" in str(w[-1].message)
                 assert info.bits_per_sample == 0
 

diff --git a/test/torchaudio_unittest/backend/utils_test.py b/test/torchaudio_unittest/backend/utils_test.py
diff --git a/third_party/hipify_torch b/third_party/hipify_torch
diff --git a/torchaudio/__init__.py b/torchaudio/__init__.py
@@ -1,5 +1,16 @@
+# Initialize extension and backend first
+from . import _extension  # noqa  # usort: skip
+from ._backend import (  # noqa  # usort: skip
+    AudioMetaData,
+    get_audio_backend,
+    info,
+    list_audio_backends,
+    load,
+    save,
+    set_audio_backend,
+)
+
 from . import (  # noqa: F401
-    _extension,
     compliance,
     datasets,
     functional,
@@ -11,34 +22,21 @@
     transforms,
     utils,
 )
-from ._backend.common import AudioMetaData  # noqa
+
+# For BC
+from . import backend  # noqa # usort: skip
 
 try:
     from .version import __version__, git_version  # noqa: F401
 except ImportError:
     pass
 
 
-def _is_backend_dispatcher_enabled():
-    import os
-
-    return os.getenv("TORCHAUDIO_USE_BACKEND_DISPATCHER", default="1") == "1"
-
-
-if _is_backend_dispatcher_enabled():
-    from ._backend import _init_backend, get_audio_backend, list_audio_backends, set_audio_backend
-else:
-    from .backend import _init_backend, get_audio_backend, list_audio_backends, set_audio_backend
-
-
-_init_backend()
-
-# for backward compatibility. This has to happen after _backend is imported.
-from . import backend  # noqa: F401
-
-
 __all__ = [
     "AudioMetaData",
+    "load",
+    "info",
+    "save",
     "io",
     "compliance",
     "datasets",