Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix typos #3981

Merged
merged 1 commit into from
Jan 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion audio/paddleaudio/utils/time.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@


class Timer(object):
'''Calculate runing speed and estimated time of arrival(ETA)'''
'''Calculate running speed and estimated time of arrival(ETA)'''

def __init__(self, total_step: int):
self.total_step = total_step
Expand Down
2 changes: 1 addition & 1 deletion audio/tests/backends/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,5 @@ def initWavInput(self):
urllib.request.urlretrieve(url, os.path.basename(url))
self.files.append(os.path.basename(url))

def initParmas(self):
def initParams(self):
raise NotImplementedError
2 changes: 1 addition & 1 deletion audio/tests/backends/soundfile/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,5 @@ def initWavInput(self):
urllib.request.urlretrieve(url, os.path.basename(url))
self.files.append(os.path.basename(url))

def initParmas(self):
def initParams(self):
raise NotImplementedError
4 changes: 2 additions & 2 deletions audio/tests/backends/soundfile/save_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def assert_non_wav(
encoding=encoding,
bits_per_sample=bits_per_sample, )

# on +Py3.8 call_args.kwargs is more descreptive
# on +Py3.8 call_args.kwargs is more descriptive
args = mocked_write.call_args[1]
assert args["file"] == filepath
assert args["samplerate"] == sample_rate
Expand Down Expand Up @@ -191,7 +191,7 @@ def assert_wav(self, dtype, sample_rate, num_channels, num_frames):
def _assert_non_wav(self, fmt, dtype, sample_rate, num_channels):
"""`soundfile_backend.save` can save non-wav format.

Due to precision missmatch, and the lack of alternative way to decode the
Due to precision mismatch, and the lack of alternative way to decode the
resulting files without using soundfile, only meta data are validated.
"""
num_frames = sample_rate * 3
Expand Down
2 changes: 1 addition & 1 deletion audio/tests/common_utils/data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def convert_tensor_encoding(
#dtype = getattr(paddle, dtype)
#if dtype not in [paddle.float64, paddle.float32, paddle.int32, paddle.int16, paddle.uint8]:
#raise NotImplementedError(f"dtype {dtype} is not supported.")
## According to the doc, folking rng on all CUDA devices is slow when there are many CUDA devices,
## According to the doc, forking rng on all CUDA devices is slow when there are many CUDA devices,
## so we only fork on CPU, generate values and move the data to the given device
#with paddle.random.fork_rng([]):
#paddle.random.manual_seed(seed)
Expand Down
43 changes: 27 additions & 16 deletions audio/tests/common_utils/sox_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,20 +24,21 @@ def get_bit_depth(dtype):


def gen_audio_file(
path,
sample_rate,
num_channels,
*,
encoding=None,
bit_depth=None,
compression=None,
attenuation=None,
duration=1,
comment_file=None,
):
path,
sample_rate,
num_channels,
*,
encoding=None,
bit_depth=None,
compression=None,
attenuation=None,
duration=1,
comment_file=None, ):
"""Generate synthetic audio file with `sox` command."""
if path.endswith(".wav"):
warnings.warn("Use get_wav_data and save_wav to generate wav file for accurate result.")
warnings.warn(
"Use get_wav_data and save_wav to generate wav file for accurate result."
)
command = [
"sox",
"-V3", # verbose
Expand Down Expand Up @@ -81,7 +82,12 @@ def gen_audio_file(
subprocess.run(command, check=True)


def convert_audio_file(src_path, dst_path, *, encoding=None, bit_depth=None, compression=None):
def convert_audio_file(src_path,
dst_path,
*,
encoding=None,
bit_depth=None,
compression=None):
"""Convert audio file with `sox` command."""
command = ["sox", "-V3", "--no-dither", "-R", str(src_path)]
if encoding is not None:
Expand All @@ -95,17 +101,22 @@ def convert_audio_file(src_path, dst_path, *, encoding=None, bit_depth=None, com
subprocess.run(command, check=True)


def _flattern(effects):
def _flatten(effects):
if not effects:
return effects
if isinstance(effects[0], str):
return effects
return [item for sublist in effects for item in sublist]


def run_sox_effect(input_file, output_file, effect, *, output_sample_rate=None, output_bitdepth=None):
def run_sox_effect(input_file,
output_file,
effect,
*,
output_sample_rate=None,
output_bitdepth=None):
"""Run sox effects"""
effect = _flattern(effect)
effect = _flatten(effect)
command = ["sox", "-V", "--no-dither", input_file]
if output_bitdepth:
command += ["--bits", str(output_bitdepth)]
Expand Down
4 changes: 2 additions & 2 deletions audio/tests/features/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

class FeatTest(unittest.TestCase):
def setUp(self):
self.initParmas()
self.initParams()
self.initWavInput()
self.setUpDevice()

Expand All @@ -44,5 +44,5 @@ def initWavInput(self, url=wav_url):
if dim == 1:
self.waveform = np.expand_dims(self.waveform, 0)

def initParmas(self):
def initParams(self):
raise NotImplementedError
2 changes: 1 addition & 1 deletion audio/tests/features/test_istft.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@


class TestIstft(FeatTest):
def initParmas(self):
def initParams(self):
self.n_fft = 512
self.hop_length = 128
self.window_str = 'hann'
Expand Down
3 changes: 1 addition & 2 deletions audio/tests/features/test_kaldi.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,11 @@
import paddleaudio
import torch
import torchaudio

from base import FeatTest


class TestKaldi(FeatTest):
def initParmas(self):
def initParams(self):
self.window_size = 1024
self.dtype = 'float32'

Expand Down
5 changes: 2 additions & 3 deletions audio/tests/features/test_librosa.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,12 @@
import numpy as np
import paddle
import paddleaudio
from paddleaudio.functional.window import get_window

from base import FeatTest
from paddleaudio.functional.window import get_window


class TestLibrosa(FeatTest):
def initParmas(self):
def initParams(self):
self.n_fft = 512
self.hop_length = 128
self.n_mels = 40
Expand Down
2 changes: 1 addition & 1 deletion audio/tests/features/test_log_melspectrogram.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@


class TestLogMelSpectrogram(FeatTest):
def initParmas(self):
def initParams(self):
self.n_fft = 512
self.hop_length = 128
self.n_mels = 40
Expand Down
2 changes: 1 addition & 1 deletion audio/tests/features/test_spectrogram.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@


class TestSpectrogram(FeatTest):
def initParmas(self):
def initParams(self):
self.n_fft = 512
self.hop_length = 128

Expand Down
4 changes: 2 additions & 2 deletions audio/tests/features/test_stft.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,15 @@


class TestStft(FeatTest):
def initParmas(self):
def initParams(self):
self.n_fft = 512
self.hop_length = 128
self.window_str = 'hann'

def test_stft(self):
ps_stft = Stft(self.n_fft, self.hop_length)
ps_res = ps_stft(
self.waveform.T).squeeze(1).T # (n_fft//2 + 1, n_frmaes)
self.waveform.T).squeeze(1).T # (n_fft//2 + 1, n_frames)

x = paddle.to_tensor(self.waveform)
window = get_window(self.window_str, self.n_fft, dtype=x.dtype)
Expand Down
2 changes: 1 addition & 1 deletion dataset/librispeech/librispeech.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def create_manifest(data_dir, manifest_path):


def prepare_dataset(url, md5sum, target_dir, manifest_path):
"""Download, unpack and create summmary manifest file.
"""Download, unpack and create summary manifest file.
"""
if not os.path.exists(os.path.join(target_dir, "LibriSpeech")):
# download
Expand Down
2 changes: 1 addition & 1 deletion dataset/ted_en_zh/ted_en_zh.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# limitations under the License.
"""Prepare Ted-En-Zh speech translation dataset

Create manifest files from splited datased.
Create manifest files from splited dataset.
dev set: tst2010, test set: tst2015
Manifest file is a json-format file with each line containing the
meta data (i.e. audio filepath, transcript and audio duration)
Expand Down
4 changes: 2 additions & 2 deletions dataset/thchs30/thchs30.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def read_trn(filepath):
with open(filepath, 'r') as f:
lines = f.read().strip().split('\n')
assert len(lines) == 3, lines
# charactor text, remove withespace
# character text, remove whitespace
texts.append(''.join(lines[0].split()))
texts.extend(lines[1:])
return texts
Expand Down Expand Up @@ -127,7 +127,7 @@ def create_manifest(data_dir, manifest_path_prefix):
'utt2spk': spk,
'feat': audio_path,
'feat_shape': (duration, ), # second
'text': word_text, # charactor
'text': word_text, # character
'syllable': syllable_text,
'phone': phone_text,
},
Expand Down
2 changes: 1 addition & 1 deletion dataset/timit/timit.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ def read_algin(filepath: str) -> str:
filepath (str): [description]

Returns:
str: token sepearte by <space>
str: token separate by <space>
"""
aligns = [] # (start, end, token)
with open(filepath, 'r') as f:
Expand Down
2 changes: 1 addition & 1 deletion dataset/timit/timit_kaldi_standard_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# limitations under the License.
"""Prepare TIMIT dataset (Standard split from Kaldi)

Create manifest files from splited datased.
Create manifest files from splited dataset.
Manifest file is a json-format file with each line containing the
meta data (i.e. audio filepath, transcript and audio duration)
of each audio file in the data set.
Expand Down
2 changes: 1 addition & 1 deletion dataset/voxceleb/voxceleb1.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ def prepare_dataset(base_url, data_list, target_dir, manifest_path,

# check the target zip file md5sum
if not check_md5sum(target_name, target_md5sum):
raise RuntimeError("{} MD5 checkssum failed".format(target_name))
raise RuntimeError("{} MD5 checksum failed".format(target_name))
else:
print("Check {} md5sum successfully".format(target_name))

Expand Down
4 changes: 2 additions & 2 deletions dataset/voxceleb/voxceleb2.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,15 +179,15 @@ def download_dataset(base_url, data_list, target_data, target_dir, dataset):

# check the target zip file md5sum
if not check_md5sum(target_name, target_md5sum):
raise RuntimeError("{} MD5 checkssum failed".format(target_name))
raise RuntimeError("{} MD5 checksum failed".format(target_name))
else:
print("Check {} md5sum successfully".format(target_name))

if dataset == "test":
# we need make the test directory
unzip(target_name, os.path.join(target_dir, "test"))
else:
# upzip dev zip pacakge and will create the dev directory
# unzip dev zip package and will create the dev directory
unzip(target_name, target_dir)


Expand Down
2 changes: 1 addition & 1 deletion demos/audio_content_search/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ Now, the search word in demo is:
### 1. Installation
see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).

You can choose one way from meduim and hard to install paddlespeech.
You can choose one way from medium and hard to install paddlespeech.

The dependency refers to the requirements.txt, and install the dependency as follows:

Expand Down
2 changes: 1 addition & 1 deletion demos/audio_searching/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ Note:this demo uses the [CN-Celeb](http://openslr.org/82/) dataset of at least
### 1. Prepare PaddleSpeech
Audio vector extraction requires PaddleSpeech training model, so please make sure that PaddleSpeech has been installed before running. Specific installation steps: See [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).

You can choose one way from easy, meduim and hard to install paddlespeech.
You can choose one way from easy, medium and hard to install paddlespeech.

### 2. Prepare MySQL and Milvus services by docker-compose
The audio similarity search system requires Milvus, MySQL services. We can start these containers with one click through [docker-compose.yaml](./docker-compose.yaml), so please make sure you have [installed Docker Engine](https://docs.docker.com/engine/install/) and [Docker Compose](https://docs.docker.com/compose/install/) before running. then
Expand Down
2 changes: 1 addition & 1 deletion demos/audio_tagging/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ This demo is an implementation to tag an audio file with 527 [AudioSet](https://
### 1. Installation
see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).

You can choose one way from easy, meduim and hard to install paddlespeech.
You can choose one way from easy, medium and hard to install paddlespeech.

### 2. Prepare Input File
The input of this demo should be a WAV file(`.wav`).
Expand Down
2 changes: 1 addition & 1 deletion demos/automatic_video_subtitiles/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ This demo is an implementation to automatic video subtitles from a video file. I
### 1. Installation
see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).

You can choose one way from easy, meduim and hard to install paddlespeech.
You can choose one way from easy, medium and hard to install paddlespeech.

### 2. Prepare Input
Get a video file with the speech of the specific language:
Expand Down
2 changes: 1 addition & 1 deletion demos/keyword_spotting/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ This demo is an implementation to recognize keyword from a specific audio file.
### 1. Installation
see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).

You can choose one way from easy, meduim and hard to install paddlespeech.
You can choose one way from easy, medium and hard to install paddlespeech.

### 2. Prepare Input File
The input of this demo should be a WAV file(`.wav`), and the sample rate must be the same as the model.
Expand Down
2 changes: 1 addition & 1 deletion demos/punctuation_restoration/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ This demo is an implementation to restore punctuation from raw text. It can be d
### 1. Installation
see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).

You can choose one way from easy, meduim and hard to install paddlespeech.
You can choose one way from easy, medium and hard to install paddlespeech.

### 2. Prepare Input
The input of this demo should be a text of the specific language that can be passed via argument.
Expand Down
2 changes: 1 addition & 1 deletion demos/speaker_verification/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ This demo is an implementation to extract speaker embedding from a specific audi
### 1. Installation
see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).

You can choose one way from easy, meduim and hard to install paddlespeech.
You can choose one way from easy, medium and hard to install paddlespeech.

### 2. Prepare Input File
The input of this cli demo should be a WAV file(`.wav`), and the sample rate must be the same as the model.
Expand Down
2 changes: 1 addition & 1 deletion demos/speech_recognition/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ This demo is an implementation to recognize text from a specific audio file. It
### 1. Installation
see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).

You can choose one way from easy, meduim and hard to install paddlespeech.
You can choose one way from easy, medium and hard to install paddlespeech.

### 2. Prepare Input File
The input of this demo should be a WAV file(`.wav`), and the sample rate must be the same as the model.
Expand Down
2 changes: 1 addition & 1 deletion demos/speech_server/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/doc

It is recommended to use **paddlepaddle 2.4rc** or above.

You can choose one way from easy, meduim and hard to install paddlespeech.
You can choose one way from easy, medium and hard to install paddlespeech.

**If you install in easy mode, you need to prepare the yaml file by yourself, you can refer to the yaml file in the conf directory.**

Expand Down
2 changes: 1 addition & 1 deletion demos/speech_ssl/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ This demo is an implementation to recognize text or produce the acoustic represe
### 1. Installation
see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).

You can choose one way from easy, meduim and hard to install paddlespeech.
You can choose one way from easy, medium and hard to install paddlespeech.

### 2. Prepare Input File
The input of this demo should be a WAV file(`.wav`), and the sample rate must be the same as the model.
Expand Down
2 changes: 1 addition & 1 deletion demos/speech_translation/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ This demo is an implementation to recognize text from a specific audio file and
### 1. Installation
see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).

You can choose one way from easy, meduim and hard to install paddlespeech.
You can choose one way from easy, medium and hard to install paddlespeech.


### 2. Prepare Input File
Expand Down
Loading