-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathutils.py
122 lines (103 loc) · 3.67 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import torch
import Levenshtein as Lev
IGNORE_ID = -1
def pad_list(xs, pad_value):
# From: espnet/src/nets/e2e_asr_th.py: pad_list()
n_batch = len(xs)
max_len = max(x.size(0) for x in xs)
pad = xs[0].new(n_batch, max_len, * xs[0].size()[1:]).fill_(pad_value)
for i in range(n_batch):
pad[i, :xs[i].size(0)] = xs[i]
return pad
def calculate_wer(s1, s2, word_sep=' '):
"""
Computes the Word Error Rate, defined as the edit distance between the
two provided sentences after tokenizing to words.
Arguments:
s1 (string): space-separated sentence
s2 (string): space-separated sentence
"""
# build mapping of words to integers
s1words = []
for l in s1.splitlines():
s1words.extend(l.split(word_sep))
s2words = []
for l in s2.splitlines():
s2words.extend(l.split(word_sep))
b = set(s1words + s2words)
word2char = dict(zip(b, range(len(b))))
# map the words to a char array (Levenshtein packages only accepts
# strings)
w1 = [chr(word2char[w]) for w in s1words]
w2 = [chr(word2char[w]) for w in s2words]
return Lev.distance(''.join(w1), ''.join(w2)), len(s1words), len(s2words)
def calculate_cer(s1, s2, word_sep=' '):
"""
Computes the Character Error Rate, defined as the edit distance.
Arguments:
s1 (string): space-separated sentence
s2 (string): space-separated sentence
"""
s1, s2 = s1.replace(word_sep, '\n'), s2.replace(word_sep, '\n')
s1, s2 = "".join(s1.splitlines()), "".join(s2.splitlines())
return Lev.distance(s1, s2), len(s1), len(s2)
def calculate_ler(s1, s2):
"""
Computes the Label Error Rate, defined as the edit distance.
Arguments:
s1 (string): sentence 1
s2 (string): sentence 2
"""
return Lev.distance(s1, s2), len(s1), len(s2)
def load_model(path):
package = torch.load(path, map_location=lambda storage, loc: storage)
if package['name'] == 'deepspeech':
from deepspeech.model import DeepSpeech
model = DeepSpeech(package['model_conf'], package['audio_conf'], package['labels'])
else:
raise NotImplementedError
model.load_state_dict(package['state_dict'])
return model, package
def save_model(model, path, optimizer=None, epoch=None, train_results=None,
val_results=None, avg_loss=None, meta=None):
package = {
'name' : model.name,
'version': model.version,
'model_conf': model.model_conf,
'audio_conf': model.audio_conf,
'labels': model.labels,
'state_dict': model.state_dict(),
}
if optimizer is not None:
package['optim_dict'] = optimizer.state_dict()
if avg_loss is not None:
package['avg_loss'] = avg_loss
if epoch is not None:
package['epoch'] = epoch
if train_results is not None:
package['train_results'] = train_results
package['val_results'] = val_results
if meta is not None:
package['meta'] = meta
torch.save(package, path)
return
class LabelDecoder(object):
def __init__(self, labels):
self.labels_map_inv = dict([(i, c) for (i, c) in enumerate(labels)])
def decode(self, tokens):
return "".join(list(filter(None, [self.labels_map_inv.get(t) for t in tokens])))
class AverageMeter(object):
"""Computes and stores the average and current value"""
def __init__(self):
self.value = 0
self.reset()
def reset(self):
self.value = 0
self.avg = 0
self.sum = 0
self.count = 0
def update(self, value, n=1):
self.value = value
self.sum += value * n
self.count += n
self.avg = self.sum / self.count