-
Notifications
You must be signed in to change notification settings - Fork 20
/
bayesian_parameter_optimization.py
189 lines (177 loc) · 7.77 KB
/
bayesian_parameter_optimization.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
""" Adaptep from Colin Raffel's git repo https://github.com/craffel/
Shared utility functions for downsampled hash sequence experiments.
"""
import os
import numpy as np
import lasagne
import deepdish
import traceback
import functools
import glob
import sys
import simple_spearmint
import neural_networks
def run_trial(data, nnet_params, hyperparameter_space, train_function):
"""Train a network given the task and hyperparameters and return the result.
Parameters
----------
data: np.ndarray
dataset from which train and validate set will be built using k-fold
cross validation. Last column must be target variable.
nnet_params: dict
Hyperparameter values that are not going to be optimized but parametrize
the neural network.
hyperparameter_space : dict
Dictionary of model hyperparameters
train_function : callable
This function will be called with the constructed network, training
data, and hyperparameters to create a model.
Returns
-------
best_objective : float
Lowest objective value achieved.
best_epoch : dict
Statistics about the epoch during which the lowest objective value was
achieved.
best_params : dict
Parameters of the model for the best-objective epoch.
"""
# We will be modifying params, so make a copy of it
hyperparameter_space = dict(hyperparameter_space)
print ',\n'.join(['\t{} : {}'.format(k, v)
for k, v in hyperparameter_space.items()])
# data is standardized during preprocessing step
# Get training set statistics for standardization
# input_mean = np.mean(np.concatenate(data[:, :-1], axis=1), axis=1)
# input_std = np.std(np.concatenate(data[:, :-1], axis=1), axis=1)
# create train and validation indices
train_ids = np.random.binomial(1, .7, len(data)).astype(bool)
data = {'train': data[train_ids], 'validate': data[~train_ids]}
# Choose network structure based on network param
if hyperparameter_space['network'] == 'general_network':
build_network_layers = neural_networks.build_general_network
else:
raise ValueError('Unknown network {}'.format(
hyperparameter_space['network']))
layers = build_network_layers(
(nnet_params['batch_size'], data['train'].shape[1]-1), # last is target
nnet_params['n_layers'],
nnet_params['widths'],
nnet_params['non_linearities'],
drop_out=hyperparameter_space['dropout'])
# Generate updates-creating function
updates_function = functools.partial(
nnet_params['update_func'],
learning_rate=hyperparameter_space['learning_rate'],
rho=hyperparameter_space['momentum'])
# Create a list of epochs
epochs = []
# Keep track of lowest objective found so far
best_objective = np.inf
try:
for epoch in train_function(data, layers, updates_function,
nnet_params['batch_size'],
nnet_params['epoch_size']):
# Stop training if a nan training cost is encountered
if not np.isfinite(epoch['train_cost']):
break
epochs.append(epoch)
if epoch['validate_objective'] < best_objective:
best_objective = epoch['validate_objective']
best_epoch = epoch
best_model = lasagne.layers.get_all_param_values(layers)
print "{}: {}, ".format(epoch['iteration'],
epoch['validate_objective']),
sys.stdout.flush()
# If there was an error while training, report it to whetlab
except Exception:
print "ERROR: "
print traceback.format_exc()
return np.nan, {}, {}
print
# Check that all training costs were not NaN; return NaN if any were.
success = np.all([np.isfinite(e['train_cost']) for e in epochs])
if np.isinf(best_objective) or len(epochs) == 0 or not success:
print ' Failed to converge.'
print
return np.nan, {}, {}
else:
for k, v in best_epoch.items():
print "\t{:>35} | {}".format(k, v)
print
return best_objective, best_epoch, best_model
def parameter_search(data, nnet_params, hyperparameter_space, trial_directory,
model_directory, train_function, model_name='best_model',
n_models=10):
"""Run parameter optimization given some train function, writing out results
Parameters
----------
data: np.ndarray
Matrix where rows are observations and columns are feature values.
Last column must be target value.
The data will be use to create a randomized train and validate set.
nnet_params: dict
Hyperparameter values that are not going to be optimized but parametrize
the neural network.
hyperparameter_space : dict
Hyperparameter space (in the format used by `simple_spearmint`) to
optimize over.
trial_directory : str
Directory where parameter optimization trial results will be written.
model_directory : str
Directory where the best-performing model will be written
train_function : callable
This function will be called with the constructed network, training
data, and hyperparameters to create a model.
model_name : str
String to be used when saving models to file
n_models_to_save : int
Number of best models to save
"""
# Create parameter trials directory if it doesn't exist
if not os.path.exists(trial_directory):
os.makedirs(trial_directory)
# Create model directory if it doesn't exist
if not os.path.exists(model_directory):
os.makedirs(model_directory)
# Create SimpleSpearmint suggester instance
ss = simple_spearmint.SimpleSpearmint(hyperparameter_space)
# Load in previous results for "warm start"
for trial_file in glob.glob(os.path.join(trial_directory, '*.h5')):
trial = deepdish.io.load(trial_file)
ss.update(trial['hyperparameters'], trial['best_objective'])
# Run parameter optimization forever
best_scores = np.empty((n_models,))
best_scores[:] = np.inf
while True:
# Get a new suggestion
suggestion = ss.suggest()
# Train a network with these hyperparameters
best_objective, best_epoch, best_model = run_trial(
data, nnet_params, suggestion, train_function)
# Update spearmint on the result
ss.update(suggestion, best_objective)
# Write out a result file
trial_filename = ','.join('{}={}'.format(k, v)
for k, v in suggestion.items()) + '.h5'
deepdish.io.save(os.path.join(trial_directory, trial_filename),
{'hyperparameters': suggestion,
'best_objective': best_objective,
'best_epoch': best_epoch})
# We will write the N best models
idx_max = np.argmax(best_scores)
if (not np.isnan(best_objective) and (best_objective <
best_scores[idx_max])):
best_scores[idx_max] = best_objective
deepdish.io.save(os.path.join(model_directory,
"{}_{}.h5".format(model_name,
idx_max)),
best_model)
"""
# Also write out the entire model when the objective is the smallest
# We don't want to write all models; they are > 100MB each
if (not np.isnan(best_objective) and
best_objective == np.nanmin(ss.objective_values)):
deepdish.io.save(
os.path.join(model_directory, model_name+'.h5'), best_model)
"""