diff --git a/audio/paddleaudio/utils/time.py b/audio/paddleaudio/utils/time.py index 105208f9106..4ea41328200 100644 --- a/audio/paddleaudio/utils/time.py +++ b/audio/paddleaudio/utils/time.py @@ -21,7 +21,7 @@ class Timer(object): - '''Calculate runing speed and estimated time of arrival(ETA)''' + '''Calculate running speed and estimated time of arrival(ETA)''' def __init__(self, total_step: int): self.total_step = total_step diff --git a/audio/tests/backends/base.py b/audio/tests/backends/base.py index a67191887ff..c2d53d209a6 100644 --- a/audio/tests/backends/base.py +++ b/audio/tests/backends/base.py @@ -30,5 +30,5 @@ def initWavInput(self): urllib.request.urlretrieve(url, os.path.basename(url)) self.files.append(os.path.basename(url)) - def initParmas(self): + def initParams(self): raise NotImplementedError diff --git a/audio/tests/backends/soundfile/base.py b/audio/tests/backends/soundfile/base.py index a67191887ff..c2d53d209a6 100644 --- a/audio/tests/backends/soundfile/base.py +++ b/audio/tests/backends/soundfile/base.py @@ -30,5 +30,5 @@ def initWavInput(self): urllib.request.urlretrieve(url, os.path.basename(url)) self.files.append(os.path.basename(url)) - def initParmas(self): + def initParams(self): raise NotImplementedError diff --git a/audio/tests/backends/soundfile/save_test.py b/audio/tests/backends/soundfile/save_test.py index 4f3df6e4804..4b5facd0823 100644 --- a/audio/tests/backends/soundfile/save_test.py +++ b/audio/tests/backends/soundfile/save_test.py @@ -103,7 +103,7 @@ def assert_non_wav( encoding=encoding, bits_per_sample=bits_per_sample, ) - # on +Py3.8 call_args.kwargs is more descreptive + # on +Py3.8 call_args.kwargs is more descriptive args = mocked_write.call_args[1] assert args["file"] == filepath assert args["samplerate"] == sample_rate @@ -191,7 +191,7 @@ def assert_wav(self, dtype, sample_rate, num_channels, num_frames): def _assert_non_wav(self, fmt, dtype, sample_rate, num_channels): """`soundfile_backend.save` can save non-wav format. - Due to precision missmatch, and the lack of alternative way to decode the + Due to precision mismatch, and the lack of alternative way to decode the resulting files without using soundfile, only meta data are validated. """ num_frames = sample_rate * 3 diff --git a/audio/tests/common_utils/data_utils.py b/audio/tests/common_utils/data_utils.py index b5618618ca4..16f575701da 100644 --- a/audio/tests/common_utils/data_utils.py +++ b/audio/tests/common_utils/data_utils.py @@ -81,7 +81,7 @@ def convert_tensor_encoding( #dtype = getattr(paddle, dtype) #if dtype not in [paddle.float64, paddle.float32, paddle.int32, paddle.int16, paddle.uint8]: #raise NotImplementedError(f"dtype {dtype} is not supported.") -## According to the doc, folking rng on all CUDA devices is slow when there are many CUDA devices, +## According to the doc, forking rng on all CUDA devices is slow when there are many CUDA devices, ## so we only fork on CPU, generate values and move the data to the given device #with paddle.random.fork_rng([]): #paddle.random.manual_seed(seed) diff --git a/audio/tests/common_utils/sox_utils.py b/audio/tests/common_utils/sox_utils.py index 6ceae081e42..4c0866ed975 100644 --- a/audio/tests/common_utils/sox_utils.py +++ b/audio/tests/common_utils/sox_utils.py @@ -24,20 +24,21 @@ def get_bit_depth(dtype): def gen_audio_file( - path, - sample_rate, - num_channels, - *, - encoding=None, - bit_depth=None, - compression=None, - attenuation=None, - duration=1, - comment_file=None, -): + path, + sample_rate, + num_channels, + *, + encoding=None, + bit_depth=None, + compression=None, + attenuation=None, + duration=1, + comment_file=None, ): """Generate synthetic audio file with `sox` command.""" if path.endswith(".wav"): - warnings.warn("Use get_wav_data and save_wav to generate wav file for accurate result.") + warnings.warn( + "Use get_wav_data and save_wav to generate wav file for accurate result." + ) command = [ "sox", "-V3", # verbose @@ -81,7 +82,12 @@ def gen_audio_file( subprocess.run(command, check=True) -def convert_audio_file(src_path, dst_path, *, encoding=None, bit_depth=None, compression=None): +def convert_audio_file(src_path, + dst_path, + *, + encoding=None, + bit_depth=None, + compression=None): """Convert audio file with `sox` command.""" command = ["sox", "-V3", "--no-dither", "-R", str(src_path)] if encoding is not None: @@ -95,7 +101,7 @@ def convert_audio_file(src_path, dst_path, *, encoding=None, bit_depth=None, com subprocess.run(command, check=True) -def _flattern(effects): +def _flatten(effects): if not effects: return effects if isinstance(effects[0], str): @@ -103,9 +109,14 @@ def _flattern(effects): return [item for sublist in effects for item in sublist] -def run_sox_effect(input_file, output_file, effect, *, output_sample_rate=None, output_bitdepth=None): +def run_sox_effect(input_file, + output_file, + effect, + *, + output_sample_rate=None, + output_bitdepth=None): """Run sox effects""" - effect = _flattern(effect) + effect = _flatten(effect) command = ["sox", "-V", "--no-dither", input_file] if output_bitdepth: command += ["--bits", str(output_bitdepth)] diff --git a/audio/tests/features/base.py b/audio/tests/features/base.py index 3bb1d1dde51..4a44e04bba9 100644 --- a/audio/tests/features/base.py +++ b/audio/tests/features/base.py @@ -24,7 +24,7 @@ class FeatTest(unittest.TestCase): def setUp(self): - self.initParmas() + self.initParams() self.initWavInput() self.setUpDevice() @@ -44,5 +44,5 @@ def initWavInput(self, url=wav_url): if dim == 1: self.waveform = np.expand_dims(self.waveform, 0) - def initParmas(self): + def initParams(self): raise NotImplementedError diff --git a/audio/tests/features/test_istft.py b/audio/tests/features/test_istft.py index ea1ee5cb63e..862a1d753b2 100644 --- a/audio/tests/features/test_istft.py +++ b/audio/tests/features/test_istft.py @@ -23,7 +23,7 @@ class TestIstft(FeatTest): - def initParmas(self): + def initParams(self): self.n_fft = 512 self.hop_length = 128 self.window_str = 'hann' diff --git a/audio/tests/features/test_kaldi.py b/audio/tests/features/test_kaldi.py index 2bd5dc7343f..50e2571ca04 100644 --- a/audio/tests/features/test_kaldi.py +++ b/audio/tests/features/test_kaldi.py @@ -18,12 +18,11 @@ import paddleaudio import torch import torchaudio - from base import FeatTest class TestKaldi(FeatTest): - def initParmas(self): + def initParams(self): self.window_size = 1024 self.dtype = 'float32' diff --git a/audio/tests/features/test_librosa.py b/audio/tests/features/test_librosa.py index 8cda25b19e5..07b117cb02c 100644 --- a/audio/tests/features/test_librosa.py +++ b/audio/tests/features/test_librosa.py @@ -17,13 +17,12 @@ import numpy as np import paddle import paddleaudio -from paddleaudio.functional.window import get_window - from base import FeatTest +from paddleaudio.functional.window import get_window class TestLibrosa(FeatTest): - def initParmas(self): + def initParams(self): self.n_fft = 512 self.hop_length = 128 self.n_mels = 40 diff --git a/audio/tests/features/test_log_melspectrogram.py b/audio/tests/features/test_log_melspectrogram.py index b2765d3bef6..6152d6ff2cc 100644 --- a/audio/tests/features/test_log_melspectrogram.py +++ b/audio/tests/features/test_log_melspectrogram.py @@ -22,7 +22,7 @@ class TestLogMelSpectrogram(FeatTest): - def initParmas(self): + def initParams(self): self.n_fft = 512 self.hop_length = 128 self.n_mels = 40 diff --git a/audio/tests/features/test_spectrogram.py b/audio/tests/features/test_spectrogram.py index 6f4609632fb..c2dced2e776 100644 --- a/audio/tests/features/test_spectrogram.py +++ b/audio/tests/features/test_spectrogram.py @@ -22,7 +22,7 @@ class TestSpectrogram(FeatTest): - def initParmas(self): + def initParams(self): self.n_fft = 512 self.hop_length = 128 diff --git a/audio/tests/features/test_stft.py b/audio/tests/features/test_stft.py index 9511a292694..5bab170be9f 100644 --- a/audio/tests/features/test_stft.py +++ b/audio/tests/features/test_stft.py @@ -22,7 +22,7 @@ class TestStft(FeatTest): - def initParmas(self): + def initParams(self): self.n_fft = 512 self.hop_length = 128 self.window_str = 'hann' @@ -30,7 +30,7 @@ def initParmas(self): def test_stft(self): ps_stft = Stft(self.n_fft, self.hop_length) ps_res = ps_stft( - self.waveform.T).squeeze(1).T # (n_fft//2 + 1, n_frmaes) + self.waveform.T).squeeze(1).T # (n_fft//2 + 1, n_frames) x = paddle.to_tensor(self.waveform) window = get_window(self.window_str, self.n_fft, dtype=x.dtype) diff --git a/dataset/librispeech/librispeech.py b/dataset/librispeech/librispeech.py index 2f5f9016cb1..ccf8d4b494f 100644 --- a/dataset/librispeech/librispeech.py +++ b/dataset/librispeech/librispeech.py @@ -132,7 +132,7 @@ def create_manifest(data_dir, manifest_path): def prepare_dataset(url, md5sum, target_dir, manifest_path): - """Download, unpack and create summmary manifest file. + """Download, unpack and create summary manifest file. """ if not os.path.exists(os.path.join(target_dir, "LibriSpeech")): # download diff --git a/dataset/ted_en_zh/ted_en_zh.py b/dataset/ted_en_zh/ted_en_zh.py index 2d1fc67100e..66810c85e9f 100644 --- a/dataset/ted_en_zh/ted_en_zh.py +++ b/dataset/ted_en_zh/ted_en_zh.py @@ -13,7 +13,7 @@ # limitations under the License. """Prepare Ted-En-Zh speech translation dataset -Create manifest files from splited datased. +Create manifest files from splited dataset. dev set: tst2010, test set: tst2015 Manifest file is a json-format file with each line containing the meta data (i.e. audio filepath, transcript and audio duration) diff --git a/dataset/thchs30/thchs30.py b/dataset/thchs30/thchs30.py index c5c3eb7a8a7..fc8338984bd 100644 --- a/dataset/thchs30/thchs30.py +++ b/dataset/thchs30/thchs30.py @@ -71,7 +71,7 @@ def read_trn(filepath): with open(filepath, 'r') as f: lines = f.read().strip().split('\n') assert len(lines) == 3, lines - # charactor text, remove withespace + # character text, remove whitespace texts.append(''.join(lines[0].split())) texts.extend(lines[1:]) return texts @@ -127,7 +127,7 @@ def create_manifest(data_dir, manifest_path_prefix): 'utt2spk': spk, 'feat': audio_path, 'feat_shape': (duration, ), # second - 'text': word_text, # charactor + 'text': word_text, # character 'syllable': syllable_text, 'phone': phone_text, }, diff --git a/dataset/timit/timit.py b/dataset/timit/timit.py index f3889d1767e..2943ff5488f 100644 --- a/dataset/timit/timit.py +++ b/dataset/timit/timit.py @@ -123,7 +123,7 @@ def read_algin(filepath: str) -> str: filepath (str): [description] Returns: - str: token sepearte by + str: token separate by """ aligns = [] # (start, end, token) with open(filepath, 'r') as f: diff --git a/dataset/timit/timit_kaldi_standard_split.py b/dataset/timit/timit_kaldi_standard_split.py index 473fc856f4f..59ce2e64adc 100644 --- a/dataset/timit/timit_kaldi_standard_split.py +++ b/dataset/timit/timit_kaldi_standard_split.py @@ -13,7 +13,7 @@ # limitations under the License. """Prepare TIMIT dataset (Standard split from Kaldi) -Create manifest files from splited datased. +Create manifest files from splited dataset. Manifest file is a json-format file with each line containing the meta data (i.e. audio filepath, transcript and audio duration) of each audio file in the data set. diff --git a/dataset/voxceleb/voxceleb1.py b/dataset/voxceleb/voxceleb1.py index 8d410067850..49a2a6baa40 100644 --- a/dataset/voxceleb/voxceleb1.py +++ b/dataset/voxceleb/voxceleb1.py @@ -167,7 +167,7 @@ def prepare_dataset(base_url, data_list, target_dir, manifest_path, # check the target zip file md5sum if not check_md5sum(target_name, target_md5sum): - raise RuntimeError("{} MD5 checkssum failed".format(target_name)) + raise RuntimeError("{} MD5 checksum failed".format(target_name)) else: print("Check {} md5sum successfully".format(target_name)) diff --git a/dataset/voxceleb/voxceleb2.py b/dataset/voxceleb/voxceleb2.py index 6df6d1f3807..faa3b99bc86 100644 --- a/dataset/voxceleb/voxceleb2.py +++ b/dataset/voxceleb/voxceleb2.py @@ -179,7 +179,7 @@ def download_dataset(base_url, data_list, target_data, target_dir, dataset): # check the target zip file md5sum if not check_md5sum(target_name, target_md5sum): - raise RuntimeError("{} MD5 checkssum failed".format(target_name)) + raise RuntimeError("{} MD5 checksum failed".format(target_name)) else: print("Check {} md5sum successfully".format(target_name)) @@ -187,7 +187,7 @@ def download_dataset(base_url, data_list, target_data, target_dir, dataset): # we need make the test directory unzip(target_name, os.path.join(target_dir, "test")) else: - # upzip dev zip pacakge and will create the dev directory + # unzip dev zip package and will create the dev directory unzip(target_name, target_dir) diff --git a/demos/audio_content_search/README.md b/demos/audio_content_search/README.md index f04ac447e71..89b1c0d896d 100644 --- a/demos/audio_content_search/README.md +++ b/demos/audio_content_search/README.md @@ -14,7 +14,7 @@ Now, the search word in demo is: ### 1. Installation see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md). -You can choose one way from meduim and hard to install paddlespeech. +You can choose one way from medium and hard to install paddlespeech. The dependency refers to the requirements.txt, and install the dependency as follows: diff --git a/demos/audio_searching/README.md b/demos/audio_searching/README.md index 0fc901432bf..528fce9e8a0 100644 --- a/demos/audio_searching/README.md +++ b/demos/audio_searching/README.md @@ -19,7 +19,7 @@ Noteļ¼šthis demo uses the [CN-Celeb](http://openslr.org/82/) dataset of at least ### 1. Prepare PaddleSpeech Audio vector extraction requires PaddleSpeech training model, so please make sure that PaddleSpeech has been installed before running. Specific installation steps: See [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md). -You can choose one way from easy, meduim and hard to install paddlespeech. +You can choose one way from easy, medium and hard to install paddlespeech. ### 2. Prepare MySQL and Milvus services by docker-compose The audio similarity search system requires Milvus, MySQL services. We can start these containers with one click through [docker-compose.yaml](./docker-compose.yaml), so please make sure you have [installed Docker Engine](https://docs.docker.com/engine/install/) and [Docker Compose](https://docs.docker.com/compose/install/) before running. then diff --git a/demos/audio_tagging/README.md b/demos/audio_tagging/README.md index fc4a334ea05..b602c60220f 100644 --- a/demos/audio_tagging/README.md +++ b/demos/audio_tagging/README.md @@ -11,7 +11,7 @@ This demo is an implementation to tag an audio file with 527 [AudioSet](https:// ### 1. Installation see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md). -You can choose one way from easy, meduim and hard to install paddlespeech. +You can choose one way from easy, medium and hard to install paddlespeech. ### 2. Prepare Input File The input of this demo should be a WAV file(`.wav`). diff --git a/demos/automatic_video_subtitiles/README.md b/demos/automatic_video_subtitiles/README.md index b815425ec42..89d8c73c975 100644 --- a/demos/automatic_video_subtitiles/README.md +++ b/demos/automatic_video_subtitiles/README.md @@ -10,7 +10,7 @@ This demo is an implementation to automatic video subtitles from a video file. I ### 1. Installation see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md). -You can choose one way from easy, meduim and hard to install paddlespeech. +You can choose one way from easy, medium and hard to install paddlespeech. ### 2. Prepare Input Get a video file with the speech of the specific language: diff --git a/demos/keyword_spotting/README.md b/demos/keyword_spotting/README.md index 6544cf71e03..b55c711243c 100644 --- a/demos/keyword_spotting/README.md +++ b/demos/keyword_spotting/README.md @@ -10,7 +10,7 @@ This demo is an implementation to recognize keyword from a specific audio file. ### 1. Installation see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md). -You can choose one way from easy, meduim and hard to install paddlespeech. +You can choose one way from easy, medium and hard to install paddlespeech. ### 2. Prepare Input File The input of this demo should be a WAV file(`.wav`), and the sample rate must be the same as the model. diff --git a/demos/punctuation_restoration/README.md b/demos/punctuation_restoration/README.md index 458ab92f9b2..3544a20602b 100644 --- a/demos/punctuation_restoration/README.md +++ b/demos/punctuation_restoration/README.md @@ -9,7 +9,7 @@ This demo is an implementation to restore punctuation from raw text. It can be d ### 1. Installation see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md). -You can choose one way from easy, meduim and hard to install paddlespeech. +You can choose one way from easy, medium and hard to install paddlespeech. ### 2. Prepare Input The input of this demo should be a text of the specific language that can be passed via argument. diff --git a/demos/speaker_verification/README.md b/demos/speaker_verification/README.md index 55f9a7360be..37c6bf3b9d2 100644 --- a/demos/speaker_verification/README.md +++ b/demos/speaker_verification/README.md @@ -11,7 +11,7 @@ This demo is an implementation to extract speaker embedding from a specific audi ### 1. Installation see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md). -You can choose one way from easy, meduim and hard to install paddlespeech. +You can choose one way from easy, medium and hard to install paddlespeech. ### 2. Prepare Input File The input of this cli demo should be a WAV file(`.wav`), and the sample rate must be the same as the model. diff --git a/demos/speech_recognition/README.md b/demos/speech_recognition/README.md index ee2acd6fd06..e406590d286 100644 --- a/demos/speech_recognition/README.md +++ b/demos/speech_recognition/README.md @@ -10,7 +10,7 @@ This demo is an implementation to recognize text from a specific audio file. It ### 1. Installation see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md). -You can choose one way from easy, meduim and hard to install paddlespeech. +You can choose one way from easy, medium and hard to install paddlespeech. ### 2. Prepare Input File The input of this demo should be a WAV file(`.wav`), and the sample rate must be the same as the model. diff --git a/demos/speech_server/README.md b/demos/speech_server/README.md index 116f1fd7b20..08788a89ebc 100644 --- a/demos/speech_server/README.md +++ b/demos/speech_server/README.md @@ -15,7 +15,7 @@ see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/doc It is recommended to use **paddlepaddle 2.4rc** or above. -You can choose one way from easy, meduim and hard to install paddlespeech. +You can choose one way from easy, medium and hard to install paddlespeech. **If you install in easy mode, you need to prepare the yaml file by yourself, you can refer to the yaml file in the conf directory.** diff --git a/demos/speech_ssl/README.md b/demos/speech_ssl/README.md index ef9b2237d38..8677ebc5782 100644 --- a/demos/speech_ssl/README.md +++ b/demos/speech_ssl/README.md @@ -10,7 +10,7 @@ This demo is an implementation to recognize text or produce the acoustic represe ### 1. Installation see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md). -You can choose one way from easy, meduim and hard to install paddlespeech. +You can choose one way from easy, medium and hard to install paddlespeech. ### 2. Prepare Input File The input of this demo should be a WAV file(`.wav`), and the sample rate must be the same as the model. diff --git a/demos/speech_translation/README.md b/demos/speech_translation/README.md index 00a9c79324a..4866336c03c 100644 --- a/demos/speech_translation/README.md +++ b/demos/speech_translation/README.md @@ -9,7 +9,7 @@ This demo is an implementation to recognize text from a specific audio file and ### 1. Installation see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md). -You can choose one way from easy, meduim and hard to install paddlespeech. +You can choose one way from easy, medium and hard to install paddlespeech. ### 2. Prepare Input File diff --git a/demos/streaming_asr_server/README.md b/demos/streaming_asr_server/README.md index 136863b96a0..423485466a1 100644 --- a/demos/streaming_asr_server/README.md +++ b/demos/streaming_asr_server/README.md @@ -18,7 +18,7 @@ see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/doc It is recommended to use **paddlepaddle 2.4rc** or above. -You can choose one way from easy, meduim and hard to install paddlespeech. +You can choose one way from easy, medium and hard to install paddlespeech. **If you install in easy mode, you need to prepare the yaml file by yourself, you can refer to diff --git a/demos/streaming_tts_server/README.md b/demos/streaming_tts_server/README.md index ca5d6f1f824..ad87bebdcc5 100644 --- a/demos/streaming_tts_server/README.md +++ b/demos/streaming_tts_server/README.md @@ -15,7 +15,7 @@ see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/doc It is recommended to use **paddlepaddle 2.4rc** or above. -You can choose one way from easy, meduim and hard to install paddlespeech. +You can choose one way from easy, medium and hard to install paddlespeech. **If you install in easy mode, you need to prepare the yaml file by yourself, you can refer to the yaml file in the conf directory.** diff --git a/demos/text_to_speech/README.md b/demos/text_to_speech/README.md index d7bb8ca1cfc..b58777defb0 100644 --- a/demos/text_to_speech/README.md +++ b/demos/text_to_speech/README.md @@ -10,7 +10,7 @@ This demo is an implementation to generate audio from the given text. It can be ### 1. Installation see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md). -You can choose one way from easy, meduim and hard to install paddlespeech. +You can choose one way from easy, medium and hard to install paddlespeech. ### 2. Prepare Input The input of this demo should be a text of the specific language that can be passed via argument. diff --git a/demos/whisper/README.md b/demos/whisper/README.md index 9b12554e6e1..6e1b8011f79 100644 --- a/demos/whisper/README.md +++ b/demos/whisper/README.md @@ -9,7 +9,7 @@ Whisper model trained by OpenAI whisper https://github.com/openai/whisper ### 1. Installation see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md). - You can choose one way from easy, meduim and hard to install paddlespeech. + You can choose one way from easy, medium and hard to install paddlespeech. ### 2. Prepare Input File The input of this demo should be a WAV file(`.wav`), and the sample rate must be the same as the model.