Skip to content

Commit

Permalink
Merge branch 'main' into t5_lm_adaptation
Browse files Browse the repository at this point in the history
  • Loading branch information
MaximumEntropy authored Feb 16, 2022
2 parents 69acc37 + b5012d0 commit 7d1626f
Show file tree
Hide file tree
Showing 10 changed files with 19 additions and 16 deletions.
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,9 @@ RUN --mount=from=nemo-src,target=/tmp/nemo cd /tmp/nemo && pip install ".[all]"
python -c "import nemo.collections.tts as nemo_tts" && \
python -c "import nemo_text_processing.text_normalization as text_normalization"

# TODO: Try to remove once 21.07 container is the base container
# TODO: Update to newer numba 0.56.0RC1 for 22.02 container
# install pinned numba version
RUN conda install -c conda-forge numba=0.54.1
# RUN conda install -c conda-forge numba==0.54.1

# copy scripts/examples/tests into container for end user
WORKDIR /workspace/nemo
Expand Down
7 changes: 4 additions & 3 deletions nemo/collections/asr/parts/preprocessing/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,8 +169,8 @@ def inverse(self, magnitude, phase):

if self.window is not None:
window_sum = librosa.filters.window_sumsquare(
self.window,
magnitude.size(-1),
window=self.window,
n_frames=magnitude.size(-1),
hop_length=self.hop_length,
win_length=self.win_length,
n_fft=self.filter_length,
Expand Down Expand Up @@ -302,7 +302,8 @@ def __init__(
highfreq = highfreq or sample_rate / 2

filterbanks = torch.tensor(
librosa.filters.mel(sample_rate, self.n_fft, n_mels=nfilt, fmin=lowfreq, fmax=highfreq), dtype=torch.float
librosa.filters.mel(sr=sample_rate, n_fft=self.n_fft, n_mels=nfilt, fmin=lowfreq, fmax=highfreq),
dtype=torch.float,
).unsqueeze(0)
self.register_buffer("fb", filterbanks)

Expand Down
4 changes: 3 additions & 1 deletion nemo/collections/asr/parts/preprocessing/perturb.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,9 @@ def perturb(self, data):
return

new_sr = int(self._sr * speed_rate)
data._samples = librosa.core.resample(data._samples, self._sr, new_sr, res_type=self._res_type)
data._samples = librosa.core.resample(
data._samples, orig_sr=self._sr, target_sr=new_sr, res_type=self._res_type
)


class TimeStretchPerturbation(Perturbation):
Expand Down
4 changes: 2 additions & 2 deletions nemo/collections/asr/parts/preprocessing/segment.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,10 @@ def __init__(self, samples, sample_rate, target_sr=None, trim=False, trim_db=60,
"""
samples = self._convert_samples_to_float32(samples)
if target_sr is not None and target_sr != sample_rate:
samples = librosa.core.resample(samples, sample_rate, target_sr)
samples = librosa.core.resample(samples, orig_sr=sample_rate, target_sr=target_sr)
sample_rate = target_sr
if trim:
samples, _ = librosa.effects.trim(samples, trim_db)
samples, _ = librosa.effects.trim(samples, top_db=trim_db)
self._samples = samples
self._sample_rate = sample_rate
if self._samples.ndim >= 2:
Expand Down
4 changes: 2 additions & 2 deletions nemo/collections/tts/data/datalayers.py
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,7 @@ def setup_noise_augmented_dataset(files_list, num_snr, kwargs_stft, dest, desc):
for line in list_file_pbar:
audio_file = line.split('|')[0]
speech = sf.read(audio_file)[0].astype(np.float32)
spec_clean = np.ascontiguousarray(librosa.stft(speech, **kwargs_stft))
spec_clean = np.ascontiguousarray(librosa.stft(y=speech, **kwargs_stft))
mag_clean = np.ascontiguousarray(np.abs(spec_clean)[..., np.newaxis])
signal_power = np.mean(np.abs(speech) ** 2)

Expand All @@ -472,7 +472,7 @@ def setup_noise_augmented_dataset(files_list, num_snr, kwargs_stft, dest, desc):
snr = librosa.db_to_power(snr_db)
noise_power = signal_power / snr
noisy = speech + np.sqrt(noise_power) * np.random.randn(len(speech))
spec_noisy = librosa.stft(noisy, **kwargs_stft)
spec_noisy = librosa.stft(y=noisy, **kwargs_stft)
spec_noisy = np.ascontiguousarray(spec_noisy)
T_x = spec_noisy.shape[1]
x = spec_noisy.view(dtype=np.float32).reshape((*spec_noisy.shape, 2))
Expand Down
2 changes: 1 addition & 1 deletion nemo/collections/tts/models/degli.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def reconstruct_wave(*args: ndarray, kwargs_istft, n_sample=-1) -> ndarray:
if spec is None:
spec = mag * np.exp(1j * phase)

wave = librosa.istft(spec, **kwargs_istft, **kwarg_len)
wave = librosa.istft(stft_matrix=spec, **kwargs_istft, **kwarg_len)
return wave


Expand Down
2 changes: 1 addition & 1 deletion nemo/collections/tts/torch/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ def __init__(
self.hop_len = self.hop_length or self.n_fft // 4
self.fb = torch.tensor(
librosa.filters.mel(
self.sample_rate, self.n_fft, n_mels=self.n_mels, fmin=self.lowfreq, fmax=self.highfreq
sr=self.sample_rate, n_fft=self.n_fft, n_mels=self.n_mels, fmin=self.lowfreq, fmax=self.highfreq
),
dtype=torch.float,
).unsqueeze(0)
Expand Down
4 changes: 2 additions & 2 deletions scripts/dataset_processing/process_vad_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ def write_manifest(

try:
x, _sr = librosa.load(file, sr=sr)
duration = librosa.get_duration(x, sr=sr)
duration = librosa.get_duration(y=x, sr=sr)

except Exception:
continue
Expand Down Expand Up @@ -312,7 +312,7 @@ def generate_variety_noise(data_dir, filename, prefix):
files = allfile.read().splitlines()

for file in files:
y, sr = librosa.load(file, sr=sampling_rate)
y, sr = librosa.load(path=file, sr=sampling_rate)

for i in range(
0, len(y) - sampling_rate, silence_stride * 100
Expand Down
2 changes: 1 addition & 1 deletion scripts/freesound_download_resample/freesound_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ def download_song(basepath, id, name, download_url):
# Delete and then re-download
if os.path.exists(fp):
try:
_ = librosa.load(fp)
_ = librosa.load(path=fp)
except Exception:
# File is currupted, delete and re-download.
os.remove(fp)
Expand Down
2 changes: 1 addition & 1 deletion scripts/freesound_download_resample/freesound_resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def resample_file(resampled_dir, filepath, ext, sample_rate):

try:
# Check if the file is readable
librosa.load(filepath)
librosa.load(path=filepath)

# if it is, force input format and try again
transform.set_input_format(file_type=ext)
Expand Down

0 comments on commit 7d1626f

Please sign in to comment.