forked from albincorreya/ChromaCoverId
-
Notifications
You must be signed in to change notification settings - Fork 0
/
chroma_features.py
225 lines (190 loc) · 9.56 KB
/
chroma_features.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
# -*- coding: utf-8 -*-
"""
Some chroma feature extraction functions for audio cover detection task experiments using various
audio processing libraries. The main of this wrapper is to faciliate easy prototyping and
experiments for research purposes.
[TODO] : add more features
------
Albin Andrew Correya
@2017
"""
from essentia import Pool, array
import essentia.standard as estd
import numpy as np
import librosa
class ChromaFeatures:
"""
Class containing methods to compute various chroma features
Methods :
chroma_stft : Computes chromagram using short fourier transform
chroma_cqt : Computes chromagram from constant-q transform of the audio signal
chroma_cens : Computes improved chromagram using CENS method as mentioned in
chroma_hpcp : Computes Harmonic pitch class profiles aka HPCP (improved chromagram)
Example use :
chroma = ChromaFeatures("./data/test_audio.wav")
#chroma cens with default parameters
chroma.chroma_cens()
#chroma stft with default parameters
chroma.chroma_stft()
"""
def __init__(self, audio_file, mono=True, sample_rate=44100, normalize_gain=False):
""""""
self.fs = sample_rate
if normalize_gain:
self.audio_vector = estd.EasyLoader(filename=audio_file, sampleRate=self.fs, replayGain=-9)()
else:
self.audio_vector = estd.MonoLoader(filename=audio_file, sampleRate=self.fs)()
print("== Audio vector of %s loaded with shape %s and sample rate %s ==" % (audio_file, self.audio_vector.shape, self.fs))
return
def chroma_stft(self, frameSize=4096, hopSize=2048, display=False):
"""
Computes the chromagram from the short-term fourier transform of the input audio signal
"""
chroma = librosa.feature.chroma_stft(y=self.audio_vector,
sr=self.fs,
tuning=0,
norm=2,
hop_length=hopSize,
n_fft=frameSize)
if display:
display_chroma(chroma, hopSize)
return np.swapaxes(chroma, 0, 1)
def chroma_cqt(self, hopSize=2048, display=False):
"""
Computes the chromagram feature from the constant-q transform of the input audio signal
"""
chroma = librosa.feature.chroma_cqt(y=self.audio_vector,
sr=self.fs,
hop_length=hopSize)
if display:
display_chroma(chroma, hopSize)
return np.swapaxes(chroma, 0, 1)
def chroma_cens(self, hopSize=2048, display=False):
'''
Computes CENS chroma vectors for the input audio signal (numpy array)
Refer https://librosa.github.io/librosa/generated/librosa.feature.chroma_cens.html for more parameters
'''
chroma_cens = librosa.feature.chroma_cens(y=self.audio_vector,
sr=self.fs,
hop_length=hopSize)
if display:
display_chroma(chroma_cens, hopSize)
return np.swapaxes(chroma_cens, 0, 1)
def chroma_hpcp(self,
frameSize=4096,
hopSize=2048,
windowType='blackmanharris62',
harmonicsPerPeak=8,
magnitudeThreshold=1e-05,
maxPeaks=1000,
whitening=True,
referenceFrequency=440,
minFrequency=40,
maxFrequency=5000,
nonLinear=False,
numBins=12,
display=False):
'''
Compute Harmonic Pitch Class Profiles (HPCP) for the input audio files using essentia standard mode using
the default parameters as mentioned in [1].
Please refer to the following paper for detailed explanantion of the algorithm.
[1]. Gómez, E. (2006). Tonal Description of Polyphonic Audio for Music Content Processing.
For full list of parameters of essentia standard mode HPCP please refer to http://essentia.upf.edu/documentation/reference/std_HPCP.html
Parameters
harmonicsPerPeak : (integer ∈ [0, ∞), default = 0) :
number of harmonics for frequency contribution, 0 indicates exclusive fundamental frequency contribution
maxFrequency : (real ∈ (0, ∞), default = 5000) :
the maximum frequency that contributes to the HPCP [Hz] (the difference between the max and split frequencies must not be less than 200.0 Hz)
minFrequency : (real ∈ (0, ∞), default = 40) :
the minimum frequency that contributes to the HPCP [Hz] (the difference between the min and split frequencies must not be less than 200.0 Hz)
nonLinear : (bool ∈ {true, false}, default = false) :
apply non-linear post-processing to the output (use with normalized='unitMax'). Boosts values close to 1, decreases values close to 0.
normalized (string ∈ {none, unitSum, unitMax}, default = unitMax) :
whether to normalize the HPCP vector
referenceFrequency : (real ∈ (0, ∞), default = 440) :
the reference frequency for semitone index calculation, corresponding to A3 [Hz]
sampleRate : (real ∈ (0, ∞), default = 44100) :
the sampling rate of the audio signal [Hz]
numBins : (integer ∈ [12, ∞), default = 12) :
the size of the output HPCP (must be a positive nonzero multiple of 12)
whitening : (boolean (True, False), default = False)
Optional step of computing spectral whitening to the output from speakPeak magnitudes
'''
audio = array(self.audio_vector)
#print audio.shape
frameGenerator = estd.FrameGenerator(audio, frameSize=frameSize, hopSize=hopSize)
window = estd.Windowing(type=windowType)
spectrum = estd.Spectrum()
# Refer http://essentia.upf.edu/documentation/reference/std_SpectralPeaks.html
spectralPeaks = estd.SpectralPeaks(magnitudeThreshold=0,
maxFrequency=maxFrequency,
minFrequency=minFrequency,
maxPeaks=maxPeaks,
orderBy="frequency",
sampleRate=self.fs)
# http://essentia.upf.edu/documentation/reference/std_SpectralWhitening.html
spectralWhitening = estd.SpectralWhitening(maxFrequency= maxFrequency,
sampleRate=self.fs)
# http://essentia.upf.edu/documentation/reference/std_HPCP.html
hpcp = estd.HPCP(sampleRate=self.fs,
maxFrequency=maxFrequency,
minFrequency=minFrequency,
referenceFrequency=referenceFrequency,
nonLinear=nonLinear,
harmonics=harmonicsPerPeak,
size=numBins)
pool = Pool()
#compute hpcp for each frame and add the results to the pool
for frame in frameGenerator:
spectrum_mag = spectrum(window(frame))
frequencies, magnitudes = spectralPeaks(spectrum_mag)
if whitening:
w_magnitudes = spectralWhitening(spectrum_mag,
frequencies,
magnitudes)
hpcp_vector = hpcp(frequencies, w_magnitudes)
else:
hpcp_vector = hpcp(frequencies, magnitudes)
pool.add('tonal.hpcp',hpcp_vector)
if display:
display_chroma(np.swapaxes(pool['tonal.hpcp']), 0, 1)
return pool['tonal.hpcp']
def beat_sync_chroma(self, chroma, display=False):
"""
Computes the beat-sync chromagram
[TODO] : add madmom beat tracker
"""
y_harmonic, y_percussive = librosa.effects.hpss(self.audio_vector)
tempo, beat_frames = librosa.beat.beat_track(y=y_percussive,sr=self.fs)
beat_chroma = librosa.util.sync(chroma, beat_frames, aggregate=np.median)
if display:
display_chroma(beat_chroma)
return beat_chroma
def two_dim_fft_magnitudes(self, feature_vector, display=False):
"""
Computes 2d - fourier transform magnitude coefficiants of the input feature vector (numpy array)
Usually fed by Constant-q transform or chroma feature vectors for cover detection tasks.
"""
import matplotlib.pyplot as plt
# 2d fourier transform
ndim_fft = np.fft.fft2(feature_vector)
ndim_fft_mag = np.abs(np.fft.fftshift(ndim_fft))
if display:
from librosa.display import specshow
plt.figure(figsize=(8,6))
plt.title('2D-Fourier transform magnitude coefficiants')
specshow(ndim_fft_mag, cmap='jet')
return ndim_fft_mag
def display_chroma(chroma, hop_size=1024, cmap="jet"):
"""
Make plots for input chroma vector using matplotlib
"""
from librosa.display import specshow
import matplotlib.pyplot as plt
plt.figure(figsize=(16, 8))
plt.subplot(2,1,1)
plt.title("Chroma")
specshow(np.swapaxes(chroma,1,0), x_axis='time', y_axis='chroma', cmap=cmap, hop_length=hop_size)
plt.show()
return