PaddlePaddle · zxcd · Jan 27, 2025 · Jan 24, 2025
diff --git a/audio/paddleaudio/utils/time.py b/audio/paddleaudio/utils/time.py
@@ -21,7 +21,7 @@
 
 
 class Timer(object):
-    '''Calculate runing speed and estimated time of arrival(ETA)'''
+    '''Calculate running speed and estimated time of arrival(ETA)'''
 
     def __init__(self, total_step: int):
         self.total_step = total_step

diff --git a/audio/tests/backends/base.py b/audio/tests/backends/base.py
@@ -30,5 +30,5 @@ def initWavInput(self):
                 urllib.request.urlretrieve(url, os.path.basename(url))
             self.files.append(os.path.basename(url))
 
-    def initParmas(self):
+    def initParams(self):
         raise NotImplementedError
diff --git a/audio/tests/backends/soundfile/base.py b/audio/tests/backends/soundfile/base.py
@@ -30,5 +30,5 @@ def initWavInput(self):
                 urllib.request.urlretrieve(url, os.path.basename(url))
             self.files.append(os.path.basename(url))
 
-    def initParmas(self):
+    def initParams(self):
         raise NotImplementedError
diff --git a/audio/tests/backends/soundfile/save_test.py b/audio/tests/backends/soundfile/save_test.py
@@ -103,7 +103,7 @@ def assert_non_wav(
             encoding=encoding,
             bits_per_sample=bits_per_sample, )
 
-        # on +Py3.8 call_args.kwargs is more descreptive
+        # on +Py3.8 call_args.kwargs is more descriptive
         args = mocked_write.call_args[1]
         assert args["file"] == filepath
         assert args["samplerate"] == sample_rate
@@ -191,7 +191,7 @@ def assert_wav(self, dtype, sample_rate, num_channels, num_frames):
     def _assert_non_wav(self, fmt, dtype, sample_rate, num_channels):
         """`soundfile_backend.save` can save non-wav format.
 
-        Due to precision missmatch, and the lack of alternative way to decode the
+        Due to precision mismatch, and the lack of alternative way to decode the
         resulting files without using soundfile, only meta data are validated.
         """
         num_frames = sample_rate * 3

diff --git a/audio/tests/common_utils/data_utils.py b/audio/tests/common_utils/data_utils.py
@@ -81,7 +81,7 @@ def convert_tensor_encoding(
 #dtype = getattr(paddle, dtype)
 #if dtype not in [paddle.float64, paddle.float32, paddle.int32, paddle.int16, paddle.uint8]:
 #raise NotImplementedError(f"dtype {dtype} is not supported.")
-## According to the doc, folking rng on all CUDA devices is slow when there are many CUDA devices,
+## According to the doc, forking rng on all CUDA devices is slow when there are many CUDA devices,
 ## so we only fork on CPU, generate values and move the data to the given device
 #with paddle.random.fork_rng([]):
 #paddle.random.manual_seed(seed)

diff --git a/audio/tests/common_utils/sox_utils.py b/audio/tests/common_utils/sox_utils.py
@@ -24,20 +24,21 @@ def get_bit_depth(dtype):
 
 
 def gen_audio_file(
-    path,
-    sample_rate,
-    num_channels,
-    *,
-    encoding=None,
-    bit_depth=None,
-    compression=None,
-    attenuation=None,
-    duration=1,
-    comment_file=None,
-):
+        path,
+        sample_rate,
+        num_channels,
+        *,
+        encoding=None,
+        bit_depth=None,
+        compression=None,
+        attenuation=None,
+        duration=1,
+        comment_file=None, ):
     """Generate synthetic audio file with `sox` command."""
     if path.endswith(".wav"):
-        warnings.warn("Use get_wav_data and save_wav to generate wav file for accurate result.")
+        warnings.warn(
+            "Use get_wav_data and save_wav to generate wav file for accurate result."
+        )
     command = [
         "sox",
         "-V3",  # verbose
@@ -81,7 +82,12 @@ def gen_audio_file(
     subprocess.run(command, check=True)
 
 
-def convert_audio_file(src_path, dst_path, *, encoding=None, bit_depth=None, compression=None):
+def convert_audio_file(src_path,
+                       dst_path,
+                       *,
+                       encoding=None,
+                       bit_depth=None,
+                       compression=None):
     """Convert audio file with `sox` command."""
     command = ["sox", "-V3", "--no-dither", "-R", str(src_path)]
     if encoding is not None:
@@ -95,17 +101,22 @@ def convert_audio_file(src_path, dst_path, *, encoding=None, bit_depth=None, com
     subprocess.run(command, check=True)
 
 
-def _flattern(effects):
+def _flatten(effects):
     if not effects:
         return effects
     if isinstance(effects[0], str):
         return effects
     return [item for sublist in effects for item in sublist]
 
 
-def run_sox_effect(input_file, output_file, effect, *, output_sample_rate=None, output_bitdepth=None):
+def run_sox_effect(input_file,
+                   output_file,
+                   effect,
+                   *,
+                   output_sample_rate=None,
+                   output_bitdepth=None):
     """Run sox effects"""
-    effect = _flattern(effect)
+    effect = _flatten(effect)
     command = ["sox", "-V", "--no-dither", input_file]
     if output_bitdepth:
         command += ["--bits", str(output_bitdepth)]

diff --git a/audio/tests/features/base.py b/audio/tests/features/base.py
@@ -24,7 +24,7 @@
 
 class FeatTest(unittest.TestCase):
     def setUp(self):
-        self.initParmas()
+        self.initParams()
         self.initWavInput()
         self.setUpDevice()
 
@@ -44,5 +44,5 @@ def initWavInput(self, url=wav_url):
         if dim == 1:
             self.waveform = np.expand_dims(self.waveform, 0)
 
-    def initParmas(self):
+    def initParams(self):
         raise NotImplementedError
diff --git a/audio/tests/features/test_istft.py b/audio/tests/features/test_istft.py
@@ -23,7 +23,7 @@
 
 
 class TestIstft(FeatTest):
-    def initParmas(self):
+    def initParams(self):
         self.n_fft = 512
         self.hop_length = 128
         self.window_str = 'hann'

diff --git a/audio/tests/features/test_kaldi.py b/audio/tests/features/test_kaldi.py
@@ -18,12 +18,11 @@
 import paddleaudio
 import torch
 import torchaudio
-
 from base import FeatTest
 
 
 class TestKaldi(FeatTest):
-    def initParmas(self):
+    def initParams(self):
         self.window_size = 1024
         self.dtype = 'float32'
 

diff --git a/audio/tests/features/test_librosa.py b/audio/tests/features/test_librosa.py
@@ -17,13 +17,12 @@
 import numpy as np
 import paddle
 import paddleaudio
-from paddleaudio.functional.window import get_window
-
 from base import FeatTest
+from paddleaudio.functional.window import get_window
 
 
 class TestLibrosa(FeatTest):
-    def initParmas(self):
+    def initParams(self):
         self.n_fft = 512
         self.hop_length = 128
         self.n_mels = 40

diff --git a/audio/tests/features/test_log_melspectrogram.py b/audio/tests/features/test_log_melspectrogram.py
@@ -22,7 +22,7 @@
 
 
 class TestLogMelSpectrogram(FeatTest):
-    def initParmas(self):
+    def initParams(self):
         self.n_fft = 512
         self.hop_length = 128
         self.n_mels = 40

diff --git a/audio/tests/features/test_spectrogram.py b/audio/tests/features/test_spectrogram.py
@@ -22,7 +22,7 @@
 
 
 class TestSpectrogram(FeatTest):
-    def initParmas(self):
+    def initParams(self):
         self.n_fft = 512
         self.hop_length = 128
 

diff --git a/audio/tests/features/test_stft.py b/audio/tests/features/test_stft.py
@@ -22,15 +22,15 @@
 
 
 class TestStft(FeatTest):
-    def initParmas(self):
+    def initParams(self):
         self.n_fft = 512
         self.hop_length = 128
         self.window_str = 'hann'
 
     def test_stft(self):
         ps_stft = Stft(self.n_fft, self.hop_length)
         ps_res = ps_stft(
-            self.waveform.T).squeeze(1).T  # (n_fft//2 + 1, n_frmaes)
+            self.waveform.T).squeeze(1).T  # (n_fft//2 + 1, n_frames)
 
         x = paddle.to_tensor(self.waveform)
         window = get_window(self.window_str, self.n_fft, dtype=x.dtype)

diff --git a/dataset/librispeech/librispeech.py b/dataset/librispeech/librispeech.py
@@ -132,7 +132,7 @@ def create_manifest(data_dir, manifest_path):
 
 
 def prepare_dataset(url, md5sum, target_dir, manifest_path):
-    """Download, unpack and create summmary manifest file.
+    """Download, unpack and create summary manifest file.
     """
     if not os.path.exists(os.path.join(target_dir, "LibriSpeech")):
         # download

diff --git a/dataset/ted_en_zh/ted_en_zh.py b/dataset/ted_en_zh/ted_en_zh.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 """Prepare Ted-En-Zh speech translation dataset
 
-Create manifest files from splited datased. 
+Create manifest files from splited dataset. 
 dev set: tst2010, test set: tst2015
 Manifest file is a json-format file with each line containing the
 meta data (i.e. audio filepath, transcript and audio duration)

diff --git a/dataset/thchs30/thchs30.py b/dataset/thchs30/thchs30.py
@@ -71,7 +71,7 @@ def read_trn(filepath):
     with open(filepath, 'r') as f:
         lines = f.read().strip().split('\n')
         assert len(lines) == 3, lines
-    # charactor text, remove withespace
+    # character text, remove whitespace
     texts.append(''.join(lines[0].split()))
     texts.extend(lines[1:])
     return texts
@@ -127,7 +127,7 @@ def create_manifest(data_dir, manifest_path_prefix):
                             'utt2spk': spk,
                             'feat': audio_path,
                             'feat_shape': (duration, ),  # second
-                            'text': word_text,  # charactor
+                            'text': word_text,  # character
                             'syllable': syllable_text,
                             'phone': phone_text,
                         },

diff --git a/dataset/timit/timit.py b/dataset/timit/timit.py
@@ -123,7 +123,7 @@ def read_algin(filepath: str) -> str:
         filepath (str): [description]
 
     Returns:
-        str: token sepearte by <space>
+        str: token separate by <space>
     """
     aligns = []  # (start, end, token)
     with open(filepath, 'r') as f:

diff --git a/dataset/timit/timit_kaldi_standard_split.py b/dataset/timit/timit_kaldi_standard_split.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 """Prepare TIMIT dataset (Standard split from Kaldi)
 
-Create manifest files from splited datased.
+Create manifest files from splited dataset.
 Manifest file is a json-format file with each line containing the
 meta data (i.e. audio filepath, transcript and audio duration)
 of each audio file in the data set.

diff --git a/dataset/voxceleb/voxceleb1.py b/dataset/voxceleb/voxceleb1.py
@@ -167,7 +167,7 @@ def prepare_dataset(base_url, data_list, target_dir, manifest_path,
 
         # check the target zip file md5sum
         if not check_md5sum(target_name, target_md5sum):
-            raise RuntimeError("{} MD5 checkssum failed".format(target_name))
+            raise RuntimeError("{} MD5 checksum failed".format(target_name))
         else:
             print("Check {} md5sum successfully".format(target_name))
 

diff --git a/dataset/voxceleb/voxceleb2.py b/dataset/voxceleb/voxceleb2.py
@@ -179,15 +179,15 @@ def download_dataset(base_url, data_list, target_data, target_dir, dataset):
 
         # check the target zip file md5sum
         if not check_md5sum(target_name, target_md5sum):
-            raise RuntimeError("{} MD5 checkssum failed".format(target_name))
+            raise RuntimeError("{} MD5 checksum failed".format(target_name))
         else:
             print("Check {} md5sum successfully".format(target_name))
 
         if dataset == "test":
             # we need make the test directory
             unzip(target_name, os.path.join(target_dir, "test"))
         else:
-            # upzip dev zip pacakge and will create the dev directory
+            # unzip dev zip package and will create the dev directory
             unzip(target_name, target_dir)
 
 

diff --git a/demos/audio_content_search/README.md b/demos/audio_content_search/README.md
@@ -14,7 +14,7 @@ Now, the search word in demo is:
 ### 1. Installation
 see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).
 
-You can choose one way from meduim and hard to install paddlespeech.
+You can choose one way from medium and hard to install paddlespeech.
 
 The dependency refers to the requirements.txt, and install the dependency as follows:
 

diff --git a/demos/audio_searching/README.md b/demos/audio_searching/README.md
@@ -19,7 +19,7 @@ Note：this demo uses the [CN-Celeb](http://openslr.org/82/) dataset of at least
 ### 1. Prepare PaddleSpeech
 Audio vector extraction requires PaddleSpeech training model, so please make sure that PaddleSpeech has been installed before running. Specific installation steps: See [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).  
 
-You can choose one way from easy, meduim and hard to install paddlespeech.
+You can choose one way from easy, medium and hard to install paddlespeech.
 
 ### 2. Prepare MySQL and Milvus services by docker-compose
 The audio similarity search system requires Milvus, MySQL services. We can start these containers with one click through [docker-compose.yaml](./docker-compose.yaml), so please make sure you have [installed Docker Engine](https://docs.docker.com/engine/install/) and [Docker Compose](https://docs.docker.com/compose/install/) before running. then

diff --git a/demos/audio_tagging/README.md b/demos/audio_tagging/README.md
@@ -11,7 +11,7 @@ This demo is an implementation to tag an audio file with 527 [AudioSet](https://
 ### 1. Installation
 see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).
 
-You can choose one way from easy, meduim and hard to install paddlespeech.
+You can choose one way from easy, medium and hard to install paddlespeech.
 
 ### 2. Prepare Input File
 The input of this demo should be a WAV file(`.wav`).

diff --git a/demos/automatic_video_subtitiles/README.md b/demos/automatic_video_subtitiles/README.md
@@ -10,7 +10,7 @@ This demo is an implementation to automatic video subtitles from a video file. I
 ### 1. Installation
 see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md). 
 
-You can choose one way from easy, meduim and hard to install paddlespeech.
+You can choose one way from easy, medium and hard to install paddlespeech.
 
 ### 2. Prepare Input
 Get a video file with the speech of the specific language:

diff --git a/demos/keyword_spotting/README.md b/demos/keyword_spotting/README.md
@@ -10,7 +10,7 @@ This demo is an implementation to recognize keyword from a specific audio file.
 ### 1. Installation
 see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).
 
-You can choose one way from easy, meduim and hard to install paddlespeech.
+You can choose one way from easy, medium and hard to install paddlespeech.
 
 ### 2. Prepare Input File
 The input of this demo should be a WAV file(`.wav`), and the sample rate must be the same as the model.

diff --git a/demos/punctuation_restoration/README.md b/demos/punctuation_restoration/README.md
@@ -9,7 +9,7 @@ This demo is an implementation to restore punctuation from raw text. It can be d
 ### 1. Installation
 see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).
 
-You can choose one way from easy, meduim and hard to install paddlespeech.
+You can choose one way from easy, medium and hard to install paddlespeech.
 
 ### 2. Prepare Input
 The input of this demo should be a text of the specific language that can be passed via argument.

diff --git a/demos/speaker_verification/README.md b/demos/speaker_verification/README.md
@@ -11,7 +11,7 @@ This demo is an implementation to extract speaker embedding from a specific audi
 ### 1. Installation
 see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).
 
-You can choose one way from easy, meduim and hard to install paddlespeech.
+You can choose one way from easy, medium and hard to install paddlespeech.
 
 ### 2. Prepare Input File
 The input of this cli demo should be a WAV file(`.wav`), and the sample rate must be the same as the model.

diff --git a/demos/speech_recognition/README.md b/demos/speech_recognition/README.md
@@ -10,7 +10,7 @@ This demo is an implementation to recognize text from a specific audio file. It
 ### 1. Installation
 see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).
 
-You can choose one way from easy, meduim and hard to install paddlespeech.
+You can choose one way from easy, medium and hard to install paddlespeech.
 
 ### 2. Prepare Input File
 The input of this demo should be a WAV file(`.wav`), and the sample rate must be the same as the model.

diff --git a/demos/speech_server/README.md b/demos/speech_server/README.md
@@ -15,7 +15,7 @@ see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/doc
 
 It is recommended to use **paddlepaddle 2.4rc** or above.
 
-You can choose one way from easy, meduim and hard to install paddlespeech.
+You can choose one way from easy, medium and hard to install paddlespeech.
 
 **If you install in easy mode, you need to prepare the yaml file by yourself, you can refer to the yaml file in the conf directory.**
 

diff --git a/demos/speech_ssl/README.md b/demos/speech_ssl/README.md
@@ -10,7 +10,7 @@ This demo is an implementation to recognize text or produce the acoustic represe
 ### 1. Installation
 see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).
 
-You can choose one way from easy, meduim and hard to install paddlespeech.
+You can choose one way from easy, medium and hard to install paddlespeech.
 
 ### 2. Prepare Input File
 The input of this demo should be a WAV file(`.wav`), and the sample rate must be the same as the model.

diff --git a/demos/speech_translation/README.md b/demos/speech_translation/README.md
@@ -9,7 +9,7 @@ This demo is an implementation to recognize text from a specific audio file and
 ### 1. Installation
 see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).
 
-You can choose one way from easy, meduim and hard to install paddlespeech.
+You can choose one way from easy, medium and hard to install paddlespeech.
 
 
 ### 2. Prepare Input File