-
Notifications
You must be signed in to change notification settings - Fork 0
/
np_model_base.py
300 lines (242 loc) · 7.95 KB
/
np_model_base.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
import numpy as np
import random
from utils import randvec, randmatrix, progress_bar, d_tanh
__author__ = "Christopher Potts"
__version__ = "CS224u, Stanford, Spring 2021"
class NNModelBase(object):
def __init__(self,
hidden_dim=50,
hidden_activation=np.tanh,
d_hidden_activation=d_tanh,
eta=0.01,
max_iter=100,
tol=1e-6,
display_progress=True):
self.hidden_dim = hidden_dim
self.hidden_activation = hidden_activation
self.d_hidden_activation = d_hidden_activation
self.eta = eta
self.max_iter = max_iter
self.tol = tol
self.display_progress = display_progress
self.params = ['hidden_dim', 'eta', 'max_iter']
def initialize_parameters(self):
raise NotImplementedError
def update_parameters(self, gradients):
raise NotImplementedError
def forward_propagation(self):
raise NotImplementedError
def backward_propagation(self):
raise NotImplementedError
def fit(self, X, y):
"""Train the network.
Parameters
----------
X : list of lists
Each element should be a list of elements in `self.vocab`.
y : list
The one-hot label vector.
Returns
----------
self
"""
y = self.prepare_output_data(y)
self.initialize_parameters()
# Unified view for training
training_data = list(zip(X, y))
# SGD:
iteration = 0
for iteration in range(1, self.max_iter+1):
error = 0.0
random.shuffle(training_data)
for ex, labels in training_data:
hidden_states, predictions = self.forward_propagation(ex)
error += self.get_error(predictions, labels)
# Back-prop:
gradients = self.backward_propagation(
hidden_states, predictions, ex, labels)
self.update_parameters(gradients)
error /= len(training_data)
if error <= self.tol:
if self.display_progress:
progress_bar(
"Converged on iteration {} with error {}".format(
iteration, error))
break
else:
if self.display_progress:
progress_bar(
"Finished epoch {} of {}; error is {}".format
(iteration, self.max_iter, error))
return self
@staticmethod
def get_error(predictions, labels):
"""Cross-entropy error: -log(prediction-for-correct-label).
Parameters
----------
predictions : np.array
Predicted probabilities for each class
labels : np.array
One-hot encoded vector.
Returns
-------
float
"""
return -np.log(predictions[np.argmax(labels)])
@staticmethod
def _define_embedding_matrix(vocab_size, embed_dim):
return np.random.uniform(
low=-1.0, high=1.0, size=(vocab_size, embed_dim))
def predict_one_proba(self, seq):
"""Softmax predictions for a single example.
Parameters
----------
seq : list
Variable length sequence of elements in the vocabulary.
Returns
-------
np.array
"""
hidden_states, predictions = self.forward_propagation(seq)
return predictions
def predict_proba(self, X):
"""Softmax predictions for a list of examples.
Parameters
----------
X : list of lists
List of examples.
Returns
-------
list of np.array
"""
return [self.predict_one_proba(seq) for seq in X]
def predict(self, X):
"""Predictions for a list of examples.
Parameters
----------
X : list of lists
List of examples.
Returns
-------
list
"""
return [self.predict_one(ex) for ex in X]
def predict_one(self, x):
"""Predictions for a single example.
Parameters
----------
seq : list
Variable length sequence of elements in the vocabulary.
Returns
-------
int
The index of the highest probability class according to
the model.
"""
probs = self.predict_one_proba(x)
return self.classes[np.argmax(probs)]
def get_word_rep(self, w):
"""For getting the input representation of word `w` from
`self.embedding`.
Parameters
----------
w : str
Returns
-------
np.array, dimension `self.embed_dim`
"""
if w in self.vocab_lookup:
word_index = self.vocab_lookup[w]
else:
word_index = self.vocab_lookup['$UNK']
return self.embedding[word_index]
@staticmethod
def weight_init(m, n):
"""Uses the Xavier Glorot method for initializing the weights
of an `m` by `n` matrix.
Parameters
----------
m : int
Row dimension
n : int
Column dimension
Returns
-------
np.array, shape `(m, n)`
"""
#x = np.sqrt(6.0/(m+n))
x = np.sqrt(1.0 / n)
return randmatrix(m, n, lower=-x, upper=x)
@staticmethod
def bias_init(n):
"""Uses the current PyTorch default `nn.Linear`."""
x = np.sqrt(1.0 / n)
return randvec(n, lower=-x, upper=x)
def prepare_output_data(self, y):
"""Format `y` into a vector of one-hot encoded vectors.
Parameters
----------
y : list
Returns
-------
np.array with length the same as y and each row the
length of the number of classes
"""
self.classes = sorted(set(y))
self.output_dim = len(self.classes)
y = self._onehot_encode(y)
return y
def _onehot_encode(self, y):
"""Maps a single label `y` to a one-hot encoding with 1.0 in
the position of y and 0.0 for all other classes.
Parameters
----------
y : object
Typically a str, int, or bool
Returns
-------
np.array, dimension `len(self.classes)`
"""
classmap = dict(zip(self.classes, range(self.output_dim)))
y_ = np.zeros((len(y), self.output_dim))
for i, cls in enumerate(y):
y_[i][classmap[cls]] = 1.0
return y_
def prepare_output_data(self, y):
"""Format `y` so that Tensorflow can deal with it, by turning
it into a vector of one-hot encoded vectors.
Parameters
----------
y : list
Returns
-------
np.array with length the same as y and each row the
length of the number of classes
"""
self.classes = sorted(set(y))
self.output_dim = len(self.classes)
y = self._onehot_encode(y)
return y
def get_params(self, deep=True):
"""Gets the hyperparameters for the model, as given by the
`self.params` attribute. This is called `get_params` for
compatibility with sklearn.
Returns
-------
dict
Map from attribute names to their values.
"""
params = self.params.copy()
# Obligatorily add `vocab` so that sklearn passes it in when
# creating new model instances during cross-validation:
if hasattr(self, 'vocab'):
params += ['vocab']
return {p: getattr(self, p) for p in params}
def set_params(self, **params):
for key, val in params.items():
setattr(self, key, val)
return self
def __repr__(self):
param_str = ["{}={}".format(a, getattr(self, a)) for a in self.params]
param_str = ",\n\t".join(param_str)
return "{}(\n\t{})".format(self.__class__.__name__, param_str)