-
Notifications
You must be signed in to change notification settings - Fork 2
/
Training_network.py
173 lines (151 loc) · 7.1 KB
/
Training_network.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
# -*- coding: utf-8 -*-
"""
Created on Sun Apr 16 18:33:20 2017
@author: Anthony
"""
import numpy as np
class Training_network(object):
""" The trainer class performs SGD with momentum on a cost function """
def __init__(self):
self.step_cache = {} # for storing velocities in momentum update
def train(self, X, y, X_val, y_val,
model, loss_function,
reg=0.0,
learning_rate=1e-2, momentum=0, learning_rate_decay=0.95,
update='rmsprop', sample_batches=True,
num_epochs=30, batch_size=100, acc_frequency=None,
verbose=False):
"""
Optimize the parameters of a model to minimize a loss function. We use
training data X and y to compute the loss and gradients, and periodically
check the accuracy on the validation set.
Inputs:
- X: Array of training data; each X[i] is a training sample.
- y: Vector of training labels; y[i] gives the label for X[i].
- X_val: Array of validation data
- y_val: Vector of validation labels
- model: Dictionary that maps parameter names to parameter values. Each
parameter value is a numpy array.
- loss_function: A function that can be called in the following ways:
scores = loss_function(X, model, reg=reg)
loss, grads = loss_function(X, model, y, reg=reg)
- reg: Regularization strength. This will be passed to the loss function.
- learning_rate: Initial learning rate to use.
- momentum: Parameter to use for momentum updates.
- learning_rate_decay: The learning rate is multiplied by this after each
epoch.
- update: The update rule to use. One of 'sgd', 'momentum', or 'rmsprop'.
- sample_batches: If True, use a minibatch of data for each parameter update
(stochastic gradient descent); if False, use the entire training set for
each parameter update (gradient descent).
- num_epochs: The number of epochs to take over the training data.
- batch_size: The number of training samples to use at each iteration.
- acc_frequency: If set to an integer, we compute the training and
validation set error after every acc_frequency iterations.
- verbose: If True, print status after each epoch.
Returns a tuple of:
- best_model: The model that got the highest validation accuracy during
training.
- loss_history: List containing the value of the loss function at each
iteration.
- train_acc_history: List storing the training set accuracy at each epoch.
- val_acc_history: List storing the validation set accuracy at each epoch.
"""
N = X.shape[0]
if sample_batches:
iterations_per_epoch = N / batch_size # using SGD
else:
iterations_per_epoch = 1 # using GD
num_iters = num_epochs * iterations_per_epoch
epoch = 0
best_val_acc = 0.0
best_model = {}
loss_history = []
train_acc_history = []
val_acc_history = []
for it in xrange(num_iters):
if it % 500 == 0: print 'starting iteration ', it
# get batch of data
if sample_batches:
batch_mask = np.random.choice(N, batch_size)
X_batch = X[batch_mask]
y_batch = y[batch_mask]
else:
# no SGD used, full gradient descent
X_batch = X
y_batch = y
# evaluate cost and gradient
cost, grads = loss_function(X_batch, model, y_batch, reg)
loss_history.append(cost)
# perform a parameter update
for p in model:
# compute the parameter step
if update == 'sgd':
dx = -learning_rate * grads[p]
elif update == 'momentum':
if not p in self.step_cache:
self.step_cache[p] = np.zeros(grads[p].shape)
# dx = np.zeros_like(grads[p]) # you can remove this after
#####################################################################
# TODO: implement the momentum update formula and store the step #
# update into variable dx. You should use the variable #
# step_cache[p] and the momentum strength is stored in momentum. #
# Don't forget to also update the step_cache[p]. #
#####################################################################
self.step_cache[p] = momentum * self.step_cache[p] - learning_rate * grads[p]
dx = self.step_cache[p]
elif update == 'rmsprop':
decay_rate = 0.99 # you could also make this an option
if not p in self.step_cache:
self.step_cache[p] = np.zeros(grads[p].shape)
#####################################################################
# TODO: implement the RMSProp update and store the parameter update #
# dx. Don't forget to also update step_cache[p]. Use smoothing 1e-8 #
#####################################################################
self.step_cache[p] = decay_rate * self.step_cache[p] + ( 1 - decay_rate ) * ( grads[p] ** 2 )
dx = - learning_rate * grads[p] / np.sqrt( self.step_cache[p] + 1e-8)
else:
raise ValueError('Unrecognized update type "%s"' % update)
# update the parameters
model[p] += dx
# every epoch perform an evaluation on the validation set
first_it = (it == 0)
epoch_end = (it + 1) % iterations_per_epoch == 0
acc_check = (acc_frequency is not None and it % acc_frequency == 0)
if first_it or epoch_end or acc_check:
if it > 0 and epoch_end:
# decay the learning rate
learning_rate *= learning_rate_decay
epoch += 1
# evaluate train accuracy
if N > 1000:
train_mask = np.random.choice(N, 1000)
X_train_subset = X[train_mask]
y_train_subset = y[train_mask]
else:
X_train_subset = X
y_train_subset = y
scores_train = loss_function(X_train_subset, model)
y_pred_train = np.argmax(scores_train, axis=1)
train_acc = np.mean(y_pred_train == y_train_subset)
train_acc_history.append(train_acc)
# evaluate val accuracy
scores_val = loss_function(X_val, model)
y_pred_val = np.argmax(scores_val, axis=1)
val_acc = np.mean(y_pred_val == y_val)
val_acc_history.append(val_acc)
# keep track of the best model based on validation accuracy
if val_acc > best_val_acc:
# make a copy of the model
best_val_acc = val_acc
best_model = {}
for p in model:
best_model[p] = model[p].copy()
# print progress if needed
if verbose:
print ('Finished epoch %d / %d: cost %f, train: %f, val %f, lr %e'
% (epoch, num_epochs, cost, train_acc, val_acc, learning_rate))
if verbose:
print 'finished optimization. best validation accuracy: %f' % (best_val_acc, )
# return the best model and the training history statistics
return best_model, loss_history, train_acc_history, val_acc_history