Skip to content

Commit

Permalink
Remove librosa (#112)
Browse files Browse the repository at this point in the history
* replaced amplitude_to_db

* replaced stft, istft

* remove from setup.py, env

* fixed a few bugs in the conversion to scipy

* remove padding from stft

---------

Co-authored-by: zorea <[email protected]>
  • Loading branch information
timsainb and nuniz authored Jul 8, 2024
1 parent 313a3ff commit f33f3db
Show file tree
Hide file tree
Showing 8 changed files with 81 additions and 90 deletions.
5 changes: 2 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -118,9 +118,8 @@ y : np.ndarray [shape=(# frames,) or (# channels, # frames)], real-valued
length of the windowed signal after padding with zeros.
The number of rows in the STFT matrix ``D`` is ``(1 + n_fft/2)``.
The default value, ``n_fft=2048`` samples, corresponds to a physical
duration of 93 milliseconds at a sample rate of 22050 Hz, i.e. the
default sample rate in librosa. This value is well adapted for music
signals. However, in speech processing, the recommended value is 512,
duration of 93 milliseconds at a sample rate of 22050 Hz.
This value is well adapted for music signals. However, in speech processing, the recommended value is 512,
corresponding to 23 milliseconds at a sample rate of 22050 Hz.
In any case, we recommend setting ``n_fft`` to a power of two for
optimizing the speed of the fast Fourier transform (FFT) algorithm., by default 1024
Expand Down
1 change: 0 additions & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ dependencies:
- notebook>5.2
- scipy
- matplotlib
- librosa
- numpy
- tqdm
- pip:
Expand Down
5 changes: 2 additions & 3 deletions noisereduce/noisereduce.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,8 @@ def reduce_noise(
length of the windowed signal after padding with zeros.
The number of rows in the STFT matrix ``D`` is ``(1 + n_fft/2)``.
The default value, ``n_fft=2048`` samples, corresponds to a physical
duration of 93 milliseconds at a sample rate of 22050 Hz, i.e. the
default sample rate in librosa. This value is well adapted for music
signals. However, in speech processing, the recommended value is 512,
duration of 93 milliseconds at a sample rate of 22050 Hz.
This value is well adapted for music signals. However, in speech processing, the recommended value is 512,
corresponding to 23 milliseconds at a sample rate of 22050 Hz.
In any case, we recommend setting ``n_fft`` to a power of two for
optimizing the speed of the fast Fourier transform (FFT) algorithm., by default 1024
Expand Down
62 changes: 31 additions & 31 deletions noisereduce/spectralgate/nonstationary.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,28 @@
from noisereduce.spectralgate.base import SpectralGate
import numpy as np
from librosa import stft, istft
from scipy.signal import filtfilt, fftconvolve
import tempfile
from scipy.signal import filtfilt, fftconvolve, stft, istft
from .utils import sigmoid


class SpectralGateNonStationary(SpectralGate):
def __init__(
self,
y,
sr,
chunk_size,
padding,
n_fft,
win_length,
hop_length,
time_constant_s,
freq_mask_smooth_hz,
time_mask_smooth_ms,
thresh_n_mult_nonstationary,
sigmoid_slope_nonstationary,
tmp_folder,
prop_decrease,
use_tqdm,
n_jobs,
self,
y,
sr,
chunk_size,
padding,
n_fft,
win_length,
hop_length,
time_constant_s,
freq_mask_smooth_hz,
time_mask_smooth_ms,
thresh_n_mult_nonstationary,
sigmoid_slope_nonstationary,
tmp_folder,
prop_decrease,
use_tqdm,
n_jobs,
):
self._thresh_n_mult_nonstationary = thresh_n_mult_nonstationary
self._sigmoid_slope_nonstationary = sigmoid_slope_nonstationary
Expand All @@ -50,11 +48,12 @@ def spectral_gating_nonstationary(self, chunk):
"""non-stationary version of spectral gating"""
denoised_channels = np.zeros(chunk.shape, chunk.dtype)
for ci, channel in enumerate(chunk):
sig_stft = stft(
(channel),
n_fft=self._n_fft,
hop_length=self._hop_length,
win_length=self._win_length,
_, _, sig_stft = stft(
channel,
nfft=self._n_fft,
noverlap=self._win_length - self._hop_length,
nperseg=self._win_length,
padded=False
)
# get abs of signal stft
abs_sig_stft = np.abs(sig_stft)
Expand All @@ -81,17 +80,18 @@ def spectral_gating_nonstationary(self, chunk):
sig_mask = fftconvolve(sig_mask, self._smoothing_filter, mode="same")

sig_mask = sig_mask * self._prop_decrease + np.ones(np.shape(sig_mask)) * (
1.0 - self._prop_decrease
1.0 - self._prop_decrease
)

# multiply signal with mask
sig_stft_denoised = sig_stft * sig_mask

# invert/recover the signal
denoised_signal = istft(
_, denoised_signal = istft(
sig_stft_denoised,
hop_length=self._hop_length,
win_length=self._win_length,
nfft=self._n_fft,
noverlap=self._win_length - self._hop_length,
nperseg=self._win_length
)
denoised_channels[ci, : len(denoised_signal)] = denoised_signal
return denoised_channels
Expand All @@ -104,12 +104,12 @@ def _do_filter(self, chunk):


def get_time_smoothed_representation(
spectral, samplerate, hop_length, time_constant_s=0.001
spectral, samplerate, hop_length, time_constant_s=0.001
):
t_frames = time_constant_s * samplerate / float(hop_length)
# By default, this solves the equation for b:
# b**2 + (1 - b) / t_frames - 2 = 0
# which approximates the full-width half-max of the
# squared frequency response of the IIR low-pass filt
b = (np.sqrt(1 + 4 * t_frames**2) - 1) / (2 * t_frames**2)
b = (np.sqrt(1 + 4 * t_frames ** 2) - 1) / (2 * t_frames ** 2)
return filtfilt([b], [1, b - 1], spectral, axis=-1, padtype=None)
79 changes: 40 additions & 39 deletions noisereduce/spectralgate/stationary.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,29 @@
from noisereduce.spectralgate.base import SpectralGate
import numpy as np
from librosa import stft, istft
from scipy.signal import fftconvolve
from scipy.signal import fftconvolve, stft, istft
from .utils import _amp_to_db


class SpectralGateStationary(SpectralGate):
def __init__(
self,
y,
sr,
y_noise,
n_std_thresh_stationary,
chunk_size,
clip_noise_stationary,
padding,
n_fft,
win_length,
hop_length,
time_constant_s,
freq_mask_smooth_hz,
time_mask_smooth_ms,
tmp_folder,
prop_decrease,
use_tqdm,
n_jobs,
self,
y,
sr,
y_noise,
n_std_thresh_stationary,
chunk_size,
clip_noise_stationary,
padding,
n_fft,
win_length,
hop_length,
time_constant_s,
freq_mask_smooth_hz,
time_mask_smooth_ms,
tmp_folder,
prop_decrease,
use_tqdm,
n_jobs,
):
super().__init__(
y=y,
Expand Down Expand Up @@ -65,35 +64,36 @@ def __init__(
self.y_noise = self.y_noise[:chunk_size]

# calculate statistics over y_noise
abs_noise_stft = np.abs(
stft(
(self.y_noise),
n_fft=self._n_fft,
hop_length=self._hop_length,
win_length=self._win_length,
)
_, _, noise_stft = stft(
self.y_noise,
nfft=self._n_fft,
noverlap=self._win_length - self._hop_length,
nperseg=self._win_length,
padded=False
)
noise_stft_db = _amp_to_db(abs_noise_stft)

noise_stft_db = _amp_to_db(noise_stft)
self.mean_freq_noise = np.mean(noise_stft_db, axis=1)
self.std_freq_noise = np.std(noise_stft_db, axis=1)

self.noise_thresh = (
self.mean_freq_noise + self.std_freq_noise * self.n_std_thresh_stationary
self.mean_freq_noise + self.std_freq_noise * self.n_std_thresh_stationary
)

def spectral_gating_stationary(self, chunk):
"""non-stationary version of spectral gating"""
denoised_channels = np.zeros(chunk.shape, chunk.dtype)
for ci, channel in enumerate(chunk):
sig_stft = stft(
(channel),
n_fft=self._n_fft,
hop_length=self._hop_length,
win_length=self._win_length,
_, _, sig_stft = stft(
channel,
nfft=self._n_fft,
noverlap=self._win_length - self._hop_length,
nperseg=self._win_length,
padded=False
)

# spectrogram of signal in dB
sig_stft_db = _amp_to_db(np.abs(sig_stft))
sig_stft_db = _amp_to_db(sig_stft)

# calculate the threshold for each frequency/time bin
db_thresh = np.repeat(
Expand All @@ -106,7 +106,7 @@ def spectral_gating_stationary(self, chunk):
sig_mask = sig_stft_db > db_thresh

sig_mask = sig_mask * self._prop_decrease + np.ones(np.shape(sig_mask)) * (
1.0 - self._prop_decrease
1.0 - self._prop_decrease
)

if self.smooth_mask:
Expand All @@ -117,10 +117,11 @@ def spectral_gating_stationary(self, chunk):
sig_stft_denoised = sig_stft * sig_mask

# invert/recover the signal
denoised_signal = istft(
_, denoised_signal = istft(
sig_stft_denoised,
hop_length=self._hop_length,
win_length=self._win_length,
nfft=self._n_fft,
noverlap=self._win_length - self._hop_length,
nperseg=self._win_length
)
denoised_channels[ci, : len(denoised_signal)] = denoised_signal
return denoised_channels
Expand Down
13 changes: 3 additions & 10 deletions noisereduce/spectralgate/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import numpy as np
from librosa.core import amplitude_to_db, db_to_amplitude


def sigmoid(x, shift, mult):
Expand All @@ -9,15 +8,9 @@ def sigmoid(x, shift, mult):
return 1 / (1 + np.exp(-(x + shift) * mult))


def _amp_to_db(x):
def _amp_to_db(x, top_db=80.0, eps=np.finfo(np.float64).eps):
"""
Convert the input tensor from amplitude to decibel scale.
"""
return amplitude_to_db(x, ref=1.0, amin=1e-20, top_db=80.0)


def _db_to_amp(x, ):
"""
Convert the input tensor from decibel scale to amplitude.
"""
return db_to_amplitude(x, ref=1.0)
x_db = 20 * np.log10(np.abs(x) + eps)
return np.maximum(x_db, np.max(x_db, axis=-1, keepdims=True) - top_db)
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
-e .

matplotlib
librosa
numpy
scipy
tqdm
torch
joblib


# for testing
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
setup(
name="noisereduce",
packages=find_packages(),
version="3.0.2",
version="3.0.3",
description="Noise reduction using Spectral Gating in Python",
author="Tim Sainburg",
license="MIT",
Expand All @@ -21,7 +21,7 @@
"Topic :: Education",
"Topic :: Scientific/Engineering",
],
install_requires=["scipy", "matplotlib", "librosa", "numpy", "tqdm"],
install_requires=["scipy", "matplotlib", "numpy", "tqdm", "joblib"],
extras_require={
"PyTorch": ["torch>=1.9.0"],
},
Expand Down

0 comments on commit f33f3db

Please sign in to comment.