forked from optuna/optuna-examples
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtensorflow_eager_simple.py
151 lines (115 loc) · 5.05 KB
/
tensorflow_eager_simple.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
"""
Optuna example that optimizes multi-layer perceptrons using Tensorflow (Eager Execution).
In this example, we optimize the validation accuracy of hand-written digit recognition using
Tensorflow and MNIST. We optimize the neural network architecture as well as the optimizer
configuration.
"""
import urllib
import optuna
from packaging import version
import tensorflow as tf
from tensorflow.keras.datasets import mnist
# TODO(crcrpar): Remove the below three lines once everything is ok.
# Register a global custom opener to avoid HTTP Error 403: Forbidden when downloading MNIST.
opener = urllib.request.build_opener()
opener.addheaders = [("User-agent", "Mozilla/5.0")]
urllib.request.install_opener(opener)
if version.parse(tf.__version__) < version.parse("2.0.0"):
raise RuntimeError("tensorflow>=2.0.0 is required for this example.")
N_TRAIN_EXAMPLES = 3000
N_VALID_EXAMPLES = 1000
BATCHSIZE = 128
CLASSES = 10
EPOCHS = 1
def create_model(trial):
# We optimize the numbers of layers, their units and weight decay parameter.
n_layers = trial.suggest_int("n_layers", 1, 3)
weight_decay = trial.suggest_float("weight_decay", 1e-10, 1e-3, log=True)
model = tf.keras.Sequential()
model.add(tf.keras.layers.Flatten())
for i in range(n_layers):
num_hidden = trial.suggest_int("n_units_l{}".format(i), 4, 128, log=True)
model.add(
tf.keras.layers.Dense(
num_hidden,
activation="relu",
kernel_regularizer=tf.keras.regularizers.l2(weight_decay),
)
)
model.add(
tf.keras.layers.Dense(CLASSES, kernel_regularizer=tf.keras.regularizers.l2(weight_decay))
)
return model
def create_optimizer(trial):
# We optimize the choice of optimizers as well as their parameters.
kwargs = {}
optimizer_options = ["RMSprop", "Adam", "SGD"]
optimizer_selected = trial.suggest_categorical("optimizer", optimizer_options)
if optimizer_selected == "RMSprop":
kwargs["learning_rate"] = trial.suggest_float(
"rmsprop_learning_rate", 1e-5, 1e-1, log=True
)
kwargs["decay"] = trial.suggest_float("rmsprop_decay", 0.85, 0.99)
kwargs["momentum"] = trial.suggest_float("rmsprop_momentum", 1e-5, 1e-1, log=True)
elif optimizer_selected == "Adam":
kwargs["learning_rate"] = trial.suggest_float("adam_learning_rate", 1e-5, 1e-1, log=True)
elif optimizer_selected == "SGD":
kwargs["learning_rate"] = trial.suggest_float(
"sgd_opt_learning_rate", 1e-5, 1e-1, log=True
)
kwargs["momentum"] = trial.suggest_float("sgd_opt_momentum", 1e-5, 1e-1, log=True)
optimizer = getattr(tf.optimizers, optimizer_selected)(**kwargs)
return optimizer
def learn(model, optimizer, dataset, mode="eval"):
accuracy = tf.metrics.Accuracy("accuracy", dtype=tf.float32)
for batch, (images, labels) in enumerate(dataset):
with tf.GradientTape() as tape:
logits = model(images, training=(mode == "train"))
loss_value = tf.reduce_mean(
tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels)
)
if mode == "eval":
accuracy(
tf.argmax(logits, axis=1, output_type=tf.int64), tf.cast(labels, tf.int64)
)
else:
grads = tape.gradient(loss_value, model.variables)
optimizer.apply_gradients(zip(grads, model.variables))
if mode == "eval":
return accuracy
def get_mnist():
(x_train, y_train), (x_valid, y_valid) = mnist.load_data()
x_train = x_train.astype("float32") / 255
x_valid = x_valid.astype("float32") / 255
y_train = y_train.astype("int32")
y_valid = y_valid.astype("int32")
train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_ds = train_ds.shuffle(60000).batch(BATCHSIZE).take(N_TRAIN_EXAMPLES)
valid_ds = tf.data.Dataset.from_tensor_slices((x_valid, y_valid))
valid_ds = valid_ds.shuffle(10000).batch(BATCHSIZE).take(N_VALID_EXAMPLES)
return train_ds, valid_ds
# FYI: Objective functions can take additional arguments
# (https://optuna.readthedocs.io/en/stable/faq.html#objective-func-additional-args).
def objective(trial):
# Get MNIST data.
train_ds, valid_ds = get_mnist()
# Build model and optimizer.
model = create_model(trial)
optimizer = create_optimizer(trial)
# Training and validating cycle.
with tf.device("/cpu:0"):
for _ in range(EPOCHS):
learn(model, optimizer, train_ds, "train")
accuracy = learn(model, optimizer, valid_ds, "eval")
# Return last validation accuracy.
return accuracy.result()
if __name__ == "__main__":
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=100)
print("Number of finished trials: ", len(study.trials))
print("Best trial:")
trial = study.best_trial
print(" Value: ", trial.value)
print(" Params: ")
for key, value in trial.params.items():
print(" {}: {}".format(key, value))