-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patheval_ptb_sgmgt.py
executable file
·292 lines (223 loc) · 10.5 KB
/
eval_ptb_sgmgt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
'''
Build an RNN Generative Model
Yizhe Zhang, Zhe Gan yz196,[email protected],
Feb., 16, 2016
Duke University, ECE
'''
import sys
import time
import logging
#import cPickle
import numpy as np
import theano
import theano.tensor as tensor
from collections import OrderedDict
from datasets import *
from model.language_model import init_params, init_tparams, build_model
from model.optimizers import SGLD, SGMGNHT_1, SGMGNHT_2
from model.utils import get_minibatches_idx
def zipp(params, tparams):
"""
When we reload the model. Needed for the GPU stuff.
"""
for kk, vv in params.iteritems():
tparams[kk].set_value(vv)
def unzip(zipped):
"""
When we pickle the model. Needed for the GPU stuff.
"""
new_params = OrderedDict()
for kk, vv in zipped.iteritems():
new_params[kk] = vv.get_value()
return new_params
def calu_negll(f_cost, data_x, data_y, iterator):
total_negll = 0.
for _, valid_index in iterator:
x = data_x[valid_index].T
y = data_y[valid_index].T
negll = f_cost(x, y) * x.shape[0] * x.shape[1]
total_negll += negll
return total_negll / data_x.shape[0] / data_x.shape[1]
def calu_pred_prob(f_pred_prob, data_x, data_y, iterator):
pred_prob = np.array([])
for _, valid_index in iterator:
x = data_x[valid_index].T
y = data_y[valid_index].T
pred_prob = np.concatenate((pred_prob, f_pred_prob(x, y)))
return pred_prob
""" Training the model. """
def train_model(train_x, train_y, valid_x, valid_y, test_x, test_y, a = '1', n_words=10000,
n_x=300, n_h=200, max_epochs=20, collect_epoch = 4, lrate=5e-4, anneal_lr_epoch = 1,
anneal_lr_factor = 1.0, dropout_val = 0.0, batch_size=32, valid_batch_size=64, dispFreq=10,
validFreq=400, saveFreq=1000, saveto = 'ptb_result_sgmgt.npz'):
""" n_words : vocabulary size
n_x : word embedding dimension
n_h : LSTM/GRU number of hidden units
n_z : latent embedding sapce for a sentence
patience : Number of epoch to wait before early stop if no progress
max_epochs : The maximum number of epoch to run
lrate : learning rate
optimizer : methods to do optimization
batch_size : batch size during training
valid_batch_size : The batch size used for validation/test set
use_dropout : whether use dropout or not
dispFreq : Display to stdout the training progress every N updates
validFreq : Compute the validation error after this number of update.
test_size : If >0, we keep only this number of test example.
"""
options = {}
options['n_words'] = n_words
options['n_x'] = n_x
options['n_h'] = n_h
options['max_epochs'] = max_epochs
options['lrate'] = lrate
options['batch_size'] = batch_size
options['valid_batch_size'] = valid_batch_size
options['dispFreq'] = dispFreq
options['validFreq'] = validFreq
options['saveFreq'] = saveFreq
options['a'] = a
logger.info('Model options {}'.format(options))
logger.info('Building model...')
params = init_params(options)
tparams = init_tparams(params)
use_noise, x, y, f_pred_prob, cost = build_model(tparams,options)
f_cost = theano.function([x, y], cost, name='f_cost')
lr_theano = tensor.scalar(name='lr')
ntrain_theano = tensor.scalar(name='ntrain')
it_theano = tensor.scalar(name='ntrain')
if options['a'] == '1':
f_grad_shared, f_update = SGMGNHT_1(tparams, cost, [x,y], ntrain_theano, lr_theano, it_theano)
elif options['a'] == '2':
f_grad_shared, f_update = SGMGNHT_2(tparams, cost, [x,y], ntrain_theano, lr_theano, it_theano)
#f_grad_shared, f_update = SGMGNHT(tparams, cost, [x, y], ntrain_theano, lr_theano, it_theano)
logger.info('Training model...')
#logger.info('a {} m {} c {} s_p{} s_xi{} g_xi{}'.format(a, m, c, sigma_p, sigma_xi, gamma_xi))
kf_valid = get_minibatches_idx(valid_x.shape[0], valid_batch_size)
kf_test = get_minibatches_idx(test_x.shape[0], valid_batch_size)
estop = False # early stop
history_negll = []
best_p = None
best_valid_negll, best_test_negll = 0., 0.
bad_counter = 0
uidx = 0 # the number of update done
start_time = time.time()
# statistics of data
train_num_words = train_x.shape[0] * train_x.shape[1]
valid_num_words = valid_x.shape[0] * valid_x.shape[1]
test_num_words = test_x.shape[0] * test_x.shape[1]
n_average = 0
valid_probs = np.zeros((valid_num_words,))
test_probs = np.zeros((test_num_words,))
try:
for eidx in xrange(max_epochs):
n_samples = 0
kf = get_minibatches_idx(train_x.shape[0], batch_size, shuffle=True)
if eidx >= anneal_lr_epoch:
#annealing learning rate
lrate = lrate/anneal_lr_factor
for _, train_index in kf:
uidx += 1
use_noise.set_value(dropout_val)
x = train_x[train_index].T
y = train_y[train_index].T
n_samples += x.shape[1]
cost = f_grad_shared(x, y)
[p,mom,xi] = f_update(lrate,train_num_words, uidx)
if np.isnan(cost) or np.isinf(cost):
print 'NaN'
logger.info('NaN detected')
return 1., 1.
if np.mod(uidx, dispFreq) == 0:
logger.info('Epoch {} Update {} Cost {}'.format(eidx, uidx, np.exp(cost)))
print np.array(p[0]),np.array(mom[0]),np.array(xi[0]),np.exp(cost)
if np.mod(uidx, saveFreq) == 0:
logger.info('Saving ...')
if best_p is not None:
params = best_p
else:
params = unzip(tparams)
np.savez(saveto, history_negll=history_negll, **params)
logger.info('Done ...')
if np.mod(uidx, validFreq) == 0:
use_noise.set_value(0.)
if eidx < collect_epoch:
valid_negll = calu_negll(f_cost, valid_x, valid_y, kf_valid)
test_negll = calu_negll(f_cost, test_x, test_y, kf_test)
history_negll.append([valid_negll, test_negll])
else:
valid_probs_curr = calu_pred_prob(f_pred_prob, valid_x, valid_y, kf_valid)
test_probs_curr = calu_pred_prob(f_pred_prob, test_x, test_y, kf_test)
valid_probs = (n_average * valid_probs + valid_probs_curr)/(n_average+1)
test_probs = (n_average * test_probs + test_probs_curr)/(n_average+1)
n_average += 1
valid_negll = -np.log(valid_probs + 1e-6).sum() / valid_num_words
test_negll = -np.log(test_probs + 1e-6).sum() / test_num_words
history_negll.append([valid_negll, test_negll])
if (uidx == 0 or
valid_negll <= np.array(history_negll)[:,0].min()):
best_p = unzip(tparams)
best_valid_negll = valid_negll
best_test_negll = test_negll
bad_counter = 0
logger.info('Perp: Valid {} Test {}'.format(np.exp(valid_negll),
np.exp(test_negll)))
if (len(history_negll) > 10 and
valid_negll >= np.array(history_negll)[:-10,0].min()):
bad_counter += 1
if bad_counter > 10:
logger.info('Early Stop!')
estop = True
break
logger.info('Seen {} samples'.format(n_samples))
if estop:
break
except KeyboardInterrupt:
logger.info('Training interupted')
end_time = time.time()
if best_p is not None:
zipp(best_p, tparams)
else:
best_p = unzip(tparams)
# use_noise.set_value(0.)
# kf_train_sorted = get_minibatches_idx(len(train), batch_size)
# train_negll = calu_negll(f_cost, prepare_data, train, kf_train_sorted)
# valid_negll = calu_negll(f_cost, prepare_data, valid, kf_valid)
# test_negll = calu_negll(f_cost, prepare_data, test, kf_test)
logger.info('Perp: Valid {} Test {}'.format(np.exp(best_valid_negll), np.exp(best_test_negll)))
np.savez(saveto, history_negll=history_negll, **best_p)
logger.info('The code run for {} epochs, with {} sec/epochs'.format(eidx + 1,
(end_time - start_time) / (1. * (eidx + 1))))
return best_valid_negll, best_test_negll
if __name__ == '__main__':
# create logger with 'eval_ptb'
# https://docs.python.org/2/howto/logging-cookbook.html
a = sys.argv[1]
logger = logging.getLogger('eval_ptb_sgmgt')
logger.setLevel(logging.INFO)
fh = logging.FileHandler('eval_ptb_sgmgt.log')
fh.setLevel(logging.INFO)
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(message)s')
fh.setFormatter(formatter)
ch.setFormatter(formatter)
logger.addHandler(fh)
ptb_train_path = 'data/ptb.train.txt.gz'
ptb_valid_path = 'data/ptb.valid.txt.gz'
ptb_test_path = 'data/ptb.test.txt.gz'
# data params
time_steps = 20
batch_size = 32
# data
train_set = Text(ptb_train_path, batch_size, time_steps)
valid_set = Text(ptb_valid_path, batch_size, time_steps,
vocab_map=train_set.vocab_map, vocab_index=train_set.vocab_idx)
test_set = Text(ptb_test_path, batch_size, time_steps,
vocab_map=train_set.vocab_map, vocab_index=train_set.vocab_idx)
vocab_size = len(train_set.vocab_map)
train_x, train_y = train_set.X, train_set.y
valid_x, valid_y = valid_set.X, valid_set.y
test_x, test_y = test_set.X, test_set.y
print a
[va_negll, te_negll] = train_model(train_x, train_y, valid_x, valid_y, test_x, test_y, a = a, n_words=vocab_size)