Remove librosa (#112)

* replaced amplitude_to_db * replaced stft, istft * remove from setup.py, env * fixed a few bugs in the conversion to scipy * remove padding from stft --------- Co-authored-by: zorea <[email protected]>
timsainb · Jul 8, 2024 · f33f3db · f33f3db
1 parent 313a3ff
commit f33f3db
Show file tree

Hide file tree

Showing 8 changed files with 81 additions and 90 deletions.
diff --git a/README.md b/README.md
@@ -118,9 +118,8 @@ y : np.ndarray [shape=(# frames,) or (# channels, # frames)], real-valued
       length of the windowed signal after padding with zeros.
       The number of rows in the STFT matrix ``D`` is ``(1 + n_fft/2)``.
       The default value, ``n_fft=2048`` samples, corresponds to a physical
-      duration of 93 milliseconds at a sample rate of 22050 Hz, i.e. the
-      default sample rate in librosa. This value is well adapted for music
-      signals. However, in speech processing, the recommended value is 512,
+      duration of 93 milliseconds at a sample rate of 22050 Hz. 
+      This value is well adapted for music signals. However, in speech processing, the recommended value is 512,
       corresponding to 23 milliseconds at a sample rate of 22050 Hz.
       In any case, we recommend setting ``n_fft`` to a power of two for
       optimizing the speed of the fast Fourier transform (FFT) algorithm., by default 1024

diff --git a/environment.yml b/environment.yml
@@ -7,7 +7,6 @@ dependencies:
   - notebook>5.2
   - scipy
   - matplotlib
-  - librosa
   - numpy
   - tqdm
   - pip:

diff --git a/noisereduce/noisereduce.py b/noisereduce/noisereduce.py
@@ -77,9 +77,8 @@ def reduce_noise(
         length of the windowed signal after padding with zeros.
         The number of rows in the STFT matrix ``D`` is ``(1 + n_fft/2)``.
         The default value, ``n_fft=2048`` samples, corresponds to a physical
-        duration of 93 milliseconds at a sample rate of 22050 Hz, i.e. the
-        default sample rate in librosa. This value is well adapted for music
-        signals. However, in speech processing, the recommended value is 512,
+        duration of 93 milliseconds at a sample rate of 22050 Hz.
+        This value is well adapted for music signals. However, in speech processing, the recommended value is 512,
         corresponding to 23 milliseconds at a sample rate of 22050 Hz.
         In any case, we recommend setting ``n_fft`` to a power of two for
         optimizing the speed of the fast Fourier transform (FFT) algorithm., by default 1024

diff --git a/noisereduce/spectralgate/nonstationary.py b/noisereduce/spectralgate/nonstationary.py
@@ -1,30 +1,28 @@
 from noisereduce.spectralgate.base import SpectralGate
 import numpy as np
-from librosa import stft, istft
-from scipy.signal import filtfilt, fftconvolve
-import tempfile
+from scipy.signal import filtfilt, fftconvolve, stft, istft
 from .utils import sigmoid
 
 
 class SpectralGateNonStationary(SpectralGate):
     def __init__(
-        self,
-        y,
-        sr,
-        chunk_size,
-        padding,
-        n_fft,
-        win_length,
-        hop_length,
-        time_constant_s,
-        freq_mask_smooth_hz,
-        time_mask_smooth_ms,
-        thresh_n_mult_nonstationary,
-        sigmoid_slope_nonstationary,
-        tmp_folder,
-        prop_decrease,
-        use_tqdm,
-        n_jobs,
+            self,
+            y,
+            sr,
+            chunk_size,
+            padding,
+            n_fft,
+            win_length,
+            hop_length,
+            time_constant_s,
+            freq_mask_smooth_hz,
+            time_mask_smooth_ms,
+            thresh_n_mult_nonstationary,
+            sigmoid_slope_nonstationary,
+            tmp_folder,
+            prop_decrease,
+            use_tqdm,
+            n_jobs,
     ):
         self._thresh_n_mult_nonstationary = thresh_n_mult_nonstationary
         self._sigmoid_slope_nonstationary = sigmoid_slope_nonstationary
@@ -50,11 +48,12 @@ def spectral_gating_nonstationary(self, chunk):
         """non-stationary version of spectral gating"""
         denoised_channels = np.zeros(chunk.shape, chunk.dtype)
         for ci, channel in enumerate(chunk):
-            sig_stft = stft(
-                (channel),
-                n_fft=self._n_fft,
-                hop_length=self._hop_length,
-                win_length=self._win_length,
+            _, _, sig_stft = stft(
+                channel,
+                nfft=self._n_fft,
+                noverlap=self._win_length - self._hop_length,
+                nperseg=self._win_length,
+                padded=False
             )
             # get abs of signal stft
             abs_sig_stft = np.abs(sig_stft)
@@ -81,17 +80,18 @@ def spectral_gating_nonstationary(self, chunk):
                 sig_mask = fftconvolve(sig_mask, self._smoothing_filter, mode="same")
 
             sig_mask = sig_mask * self._prop_decrease + np.ones(np.shape(sig_mask)) * (
-                1.0 - self._prop_decrease
+                    1.0 - self._prop_decrease
             )
 
             # multiply signal with mask
             sig_stft_denoised = sig_stft * sig_mask
 
             # invert/recover the signal
-            denoised_signal = istft(
+            _, denoised_signal = istft(
                 sig_stft_denoised,
-                hop_length=self._hop_length,
-                win_length=self._win_length,
+                nfft=self._n_fft,
+                noverlap=self._win_length - self._hop_length,
+                nperseg=self._win_length
             )
             denoised_channels[ci, : len(denoised_signal)] = denoised_signal
         return denoised_channels
@@ -104,12 +104,12 @@ def _do_filter(self, chunk):
 
 
 def get_time_smoothed_representation(
-    spectral, samplerate, hop_length, time_constant_s=0.001
+        spectral, samplerate, hop_length, time_constant_s=0.001
 ):
     t_frames = time_constant_s * samplerate / float(hop_length)
     # By default, this solves the equation for b:
     #   b**2  + (1 - b) / t_frames  - 2 = 0
     # which approximates the full-width half-max of the
     # squared frequency response of the IIR low-pass filt
-    b = (np.sqrt(1 + 4 * t_frames**2) - 1) / (2 * t_frames**2)
+    b = (np.sqrt(1 + 4 * t_frames ** 2) - 1) / (2 * t_frames ** 2)
     return filtfilt([b], [1, b - 1], spectral, axis=-1, padtype=None)
diff --git a/noisereduce/spectralgate/stationary.py b/noisereduce/spectralgate/stationary.py
@@ -1,30 +1,29 @@
 from noisereduce.spectralgate.base import SpectralGate
 import numpy as np
-from librosa import stft, istft
-from scipy.signal import fftconvolve
+from scipy.signal import fftconvolve, stft, istft
 from .utils import _amp_to_db
 
 
 class SpectralGateStationary(SpectralGate):
     def __init__(
-        self,
-        y,
-        sr,
-        y_noise,
-        n_std_thresh_stationary,
-        chunk_size,
-        clip_noise_stationary,
-        padding,
-        n_fft,
-        win_length,
-        hop_length,
-        time_constant_s,
-        freq_mask_smooth_hz,
-        time_mask_smooth_ms,
-        tmp_folder,
-        prop_decrease,
-        use_tqdm,
-        n_jobs,
+            self,
+            y,
+            sr,
+            y_noise,
+            n_std_thresh_stationary,
+            chunk_size,
+            clip_noise_stationary,
+            padding,
+            n_fft,
+            win_length,
+            hop_length,
+            time_constant_s,
+            freq_mask_smooth_hz,
+            time_mask_smooth_ms,
+            tmp_folder,
+            prop_decrease,
+            use_tqdm,
+            n_jobs,
     ):
         super().__init__(
             y=y,
@@ -65,35 +64,36 @@ def __init__(
             self.y_noise = self.y_noise[:chunk_size]
 
         # calculate statistics over y_noise
-        abs_noise_stft = np.abs(
-            stft(
-                (self.y_noise),
-                n_fft=self._n_fft,
-                hop_length=self._hop_length,
-                win_length=self._win_length,
-            )
+        _, _, noise_stft = stft(
+            self.y_noise,
+            nfft=self._n_fft,
+            noverlap=self._win_length - self._hop_length,
+            nperseg=self._win_length,
+            padded=False
         )
-        noise_stft_db = _amp_to_db(abs_noise_stft)
+
+        noise_stft_db = _amp_to_db(noise_stft)
         self.mean_freq_noise = np.mean(noise_stft_db, axis=1)
         self.std_freq_noise = np.std(noise_stft_db, axis=1)
 
         self.noise_thresh = (
-            self.mean_freq_noise + self.std_freq_noise * self.n_std_thresh_stationary
+                self.mean_freq_noise + self.std_freq_noise * self.n_std_thresh_stationary
         )
 
     def spectral_gating_stationary(self, chunk):
         """non-stationary version of spectral gating"""
         denoised_channels = np.zeros(chunk.shape, chunk.dtype)
         for ci, channel in enumerate(chunk):
-            sig_stft = stft(
-                (channel),
-                n_fft=self._n_fft,
-                hop_length=self._hop_length,
-                win_length=self._win_length,
+            _, _, sig_stft = stft(
+                channel,
+                nfft=self._n_fft,
+                noverlap=self._win_length - self._hop_length,
+                nperseg=self._win_length,
+                padded=False
             )
 
             # spectrogram of signal in dB
-            sig_stft_db = _amp_to_db(np.abs(sig_stft))
+            sig_stft_db = _amp_to_db(sig_stft)
 
             # calculate the threshold for each frequency/time bin
             db_thresh = np.repeat(
@@ -106,7 +106,7 @@ def spectral_gating_stationary(self, chunk):
             sig_mask = sig_stft_db > db_thresh
 
             sig_mask = sig_mask * self._prop_decrease + np.ones(np.shape(sig_mask)) * (
-                1.0 - self._prop_decrease
+                    1.0 - self._prop_decrease
             )
 
             if self.smooth_mask:
@@ -117,10 +117,11 @@ def spectral_gating_stationary(self, chunk):
             sig_stft_denoised = sig_stft * sig_mask
 
             # invert/recover the signal
-            denoised_signal = istft(
+            _, denoised_signal = istft(
                 sig_stft_denoised,
-                hop_length=self._hop_length,
-                win_length=self._win_length,
+                nfft=self._n_fft,
+                noverlap=self._win_length - self._hop_length,
+                nperseg=self._win_length
             )
             denoised_channels[ci, : len(denoised_signal)] = denoised_signal
         return denoised_channels

diff --git a/noisereduce/spectralgate/utils.py b/noisereduce/spectralgate/utils.py
@@ -1,5 +1,4 @@
 import numpy as np
-from librosa.core import amplitude_to_db, db_to_amplitude
 
 
 def sigmoid(x, shift, mult):
@@ -9,15 +8,9 @@ def sigmoid(x, shift, mult):
     return 1 / (1 + np.exp(-(x + shift) * mult))
 
 
-def _amp_to_db(x):
+def _amp_to_db(x, top_db=80.0, eps=np.finfo(np.float64).eps):
     """
     Convert the input tensor from amplitude to decibel scale.
     """
-    return amplitude_to_db(x, ref=1.0, amin=1e-20, top_db=80.0)
-
-
-def _db_to_amp(x, ):
-    """
-    Convert the input tensor from decibel scale to amplitude.
-    """
-    return db_to_amplitude(x, ref=1.0)
+    x_db = 20 * np.log10(np.abs(x) + eps)
+    return np.maximum(x_db, np.max(x_db, axis=-1, keepdims=True) - top_db)
diff --git a/requirements.txt b/requirements.txt
@@ -2,11 +2,11 @@
 -e .
 
 matplotlib
-librosa
 numpy
 scipy
 tqdm
 torch
+joblib
 
 
 # for testing

diff --git a/setup.py b/setup.py
@@ -6,7 +6,7 @@
 setup(
     name="noisereduce",
     packages=find_packages(),
-    version="3.0.2",
+    version="3.0.3",
     description="Noise reduction using Spectral Gating in Python",
     author="Tim Sainburg",
     license="MIT",
@@ -21,7 +21,7 @@
         "Topic :: Education",
         "Topic :: Scientific/Engineering",
     ],
-    install_requires=["scipy", "matplotlib", "librosa", "numpy", "tqdm"],
+    install_requires=["scipy", "matplotlib", "numpy", "tqdm", "joblib"],
     extras_require={
         "PyTorch": ["torch>=1.9.0"],
     },
-Original file line number
+Diff line change
@@ Expand Up / @@ -7,7 +7,6 @@ dependencies: @@
       - notebook>5.2
       - scipy
       - matplotlib
-      - librosa
       - numpy
       - tqdm
       - pip:
@@ Expand Down @@