forked from danstowell/autoencoder-specgram
-
Notifications
You must be signed in to change notification settings - Fork 1
/
util.py
84 lines (72 loc) · 2.86 KB
/
util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# utility functions
import numpy as np
from numpy import float32
import os
import errno
from scikits.audiolab import Sndfile
from scikits.audiolab import Format
from matplotlib.mlab import specgram
from userconfig import *
########################################################
def standard_specgram(signal):
"Return specgram matrix, made using the audio-layer config"
return np.array(specgram(signal, NFFT=audioframe_len, noverlap=audioframe_len - audioframe_stride, window=np.hamming(audioframe_len))[0][specbinlow:specbinlow + specbinnum, :], dtype=float32)
def load_soundfile(inwavpath, startpossecs, maxdursecs=None):
"""Loads audio data, optionally limiting to a specified start position and duration.
Must be SINGLE-CHANNEL and matching our desired sample-rate."""
framelen = 4096
hopspls = framelen
unhopspls = framelen - hopspls
if (framelen % wavdownsample) != 0:
raise ValueError("framelen needs to be a multiple of wavdownsample: %i, %i" % (
framelen, wavdownsample))
if (hopspls % wavdownsample) != 0:
raise ValueError("hopspls needs to be a multiple of wavdownsample: %i, %i" % (
hopspls, wavdownsample))
if maxdursecs == None:
maxdursecs = 9999
sf = Sndfile(inwavpath, "r")
splsread = 0
framesread = 0
if sf.channels != 1:
raise ValueError(
"Sound file %s has multiple channels (%i) - mono required." % (inwavpath, sf.channels))
timemax_spls = int(maxdursecs * sf.samplerate)
if sf.samplerate != (srate * wavdownsample):
raise ValueError(
"Sample rate mismatch: we expect %g, file has %g" % (srate, sf.samplerate))
if startpossecs > 0:
# note: returns IOError if beyond the end
sf.seek(startpossecs * sf.samplerate)
audiodata = np.array([], dtype=np.float32)
while(True):
try:
if splsread == 0:
chunk = sf.read_frames(framelen)[::wavdownsample]
splsread += framelen
else:
chunk = np.hstack(
(chunk[:unhopspls], sf.read_frames(hopspls)[::wavdownsample]))
splsread += hopspls
framesread += 1
if framesread % 25000 == 0:
print("Read %i frames" % framesread)
if len(chunk) != (framelen / wavdownsample):
print("Not read sufficient samples - returning")
break
chunk = np.array(chunk, dtype=np.float32)
audiodata = np.hstack((audiodata, chunk))
if splsread >= timemax_spls:
break
except RuntimeError:
break
sf.close()
return audiodata
def mkdir_p(path):
try:
os.makedirs(path)
except OSError as exc: # Python >2.5
if exc.errno == errno.EEXIST and os.path.isdir(path):
pass
else:
raise