-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathHW5.py
313 lines (211 loc) · 9.65 KB
/
HW5.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
# coding: utf-8
# # Deep Learning for Computer Vision: Assignment 5
# ## Computer Science: COMS W 4995 004
# ## Due: April 6, 2017
# ### Problem: Telling Cats from Dogs using VGG16
# This assignment is based on the blog post
# "Building powerful image classification models using very little data"
# from blog.keras.io. Here you will build a classifier that can distinguish between pictures of dogs and cats. You will use a ConvNet (VGG16) that was pre-trained ImageNet. Your task will be to re-architect the network to solve your problem. To do this you will:
# 0. Make a training dataset, using images from the link below, with 10,000 images of cats and 10,000 images of dogs. Use 1,000 images of each category for your validation set. The data should be orgainized into folders named ./data/train/cats/ + ./data/train/dogs/ + ./data/validation/cats/ + ./data/validation/dogs/. (No need to worry about a test set for this assignment.)
# 1. take VGG16 network architecture
# 2. load in the pre-trained weights from the link below for all layers except the last layers
# 3. add a fully connected layer followed by a final sigmoid layer to replace the 1000 category softmax layer that was used when the network was trained on ImageNet
# 4. freeze all layers except the last two that you added
# 5. fine-tune the network on your cats vs. dogs image data
# 6. evaluate the accuracy
# 7. unfreeze all layers
# 8. continue fine-tuning the network on your cats vs. dogs image data
# 9. evaluate the accuracy
# 10. comment your code and make sure to include accuracy, a few sample mistakes, and anything else you would like to add
#
# Downloads:
# 1. You can get your image data from:
# https://www.kaggle.com/c/dogs-vs-cats/data.
# 2. You can get your VGG16 pre-trained network weights from
# https://gist.github.com/baraldilorenzo/07d7802847aaad0a35d3
#
# (Note this assignment deviates from blog.keras.io in that it uses more data AND performs the fine-tuning in two steps: first freezing the lower layers and then un-freezing them for a final run of fine-tuning. The resulting ConvNet gets more than 97% accuracy in telling pictures of cats and dogs apart.)
#
# A bunch of code and network definition has been included to to get you started. This is not meant to be a difficult assignment, as you have your final projects to work on! Good luck and have fun!
# Here we import necessary libraries.
# In[1]:
import os
import h5py,pdb
import matplotlib.pyplot as plt
import time, pickle, pandas
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential, load_model
from keras.layers import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras.callbacks import TensorBoard, ModelCheckpoint
from keras import backend
from keras import optimizers
nb_classes = 2
class_name = {
0: 'cat',
1: 'dog',
}
# dimensions of our images.
img_width, img_height = 150, 150
train_data_dir = './data/train'
validation_data_dir = './data/validation'
nb_train_samples = 20000
nb_validation_samples = 2000
def build_vgg16(framework='tf'):
if framework == 'th':
# build the VGG16 network in Theano weight ordering mode
backend.set_image_dim_ordering('th')
else:
# build the VGG16 network in Tensorflow weight ordering mode
backend.set_image_dim_ordering('tf')
model = Sequential()
if framework == 'th':
model.add(ZeroPadding2D((1, 1), input_shape=(3, img_width, img_height)))
else:
model.add(ZeroPadding2D((1, 1), input_shape=(img_width, img_height, 3)))
model.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_1'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_2'))
model.add(MaxPooling2D((2, 2), strides=(2, 2)))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_1'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_2'))
model.add(MaxPooling2D((2, 2), strides=(2, 2)))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_1'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_2'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_3'))
model.add(MaxPooling2D((2, 2), strides=(2, 2)))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_1'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_2'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_3'))
model.add(MaxPooling2D((2, 2), strides=(2, 2)))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_1'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_2'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_3'))
model.add(MaxPooling2D((2, 2), strides=(2, 2)))
return model
weights_path = 'vgg16_weights.h5'
th_model = build_vgg16('th')
assert os.path.exists(weights_path), 'Model weights not found (see "weights_path" variable in script).'
f = h5py.File(weights_path)
for k in range(f.attrs['nb_layers']):
if k >= len(th_model.layers):
# we don't look at the last (fully-connected) layers in the savefile
break
g = f['layer_{}'.format(k)]
weights = [g['param_{}'.format(p)] for p in range(g.attrs['nb_params'])]
th_model.layers[k].set_weights(weights)
f.close()
print('Model loaded.')
tf_model = build_vgg16('tf')
# transfer weights from th_model to tf_model
for th_layer, tf_layer in zip(th_model.layers, tf_model.layers):
if th_layer.__class__.__name__ == 'Convolution2D':
kernel, bias = th_layer.get_weights()
kernel = np.transpose(kernel, (2, 3, 1, 0))
tf_layer.set_weights([kernel, bias])
else:
tf_layer.set_weights(tf_layer.get_weights())
# Next we make the last layer or layers. We flatten the output from the last convolutional layer, and add fully connected layer with 256 hidden units. Finally, we add the output layer which is has a scalar output as we have a binary classifier.
# In[11]:
num_layers_before_top=len(tf_model.layers)
# build a classifier model to put on top of the convolutional model
top_model = Sequential()
print Flatten(input_shape=tf_model.output_shape[1:])
top_model.add(Flatten(input_shape=tf_model.output_shape[1:]))
top_model.add(Dense(256, activation='relu'))
top_model.add(Dropout(0.5))
top_model.add(Dense(1, activation='sigmoid'))
print (tf_model.summary())
print(top_model.summary())
# We add this model to the top of our VGG16 network, freeze all the weights except the top, and compile.
# In[12]:
# add the model on top of the convolutional base
tf_model.add(top_model)
for layer in tf_model.layers[:num_layers_before_top]:
layer.trainable = False
tf_model.compile(loss='binary_crossentropy',
optimizer=optimizers.SGD(lr=1e-4, momentum=0.9),
metrics=['accuracy'])
# In[ ]:
train_datagen = ImageDataGenerator(
rescale=1./255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True)
test_datagen = ImageDataGenerator(rescale=1./255)
train_generator = train_datagen.flow_from_directory(
train_data_dir,
target_size=(img_width, img_height),
batch_size=32,
class_mode='binary')
validation_generator = test_datagen.flow_from_directory(
validation_data_dir,
target_size=(img_width, img_height),
batch_size=32,
class_mode='binary')
# Now we train for 5 epochs to get the weights for the top close to where we need them. Essentially, we want the network to be doing the right thing before we unnfreeze the lower weights.
# In[ ]:
# fine-tune the model
nb_epoch=5
batch_size = 16
hist_little_convet = tf_model.fit_generator(
train_generator,
samples_per_epoch = nb_train_samples,
nb_epoch = nb_epoch,
validation_data = validation_generator,
nb_val_samples = nb_validation_samples,
#initial_epoch = 0,
)
# Running this, we see that it gets 91% accuracy on the validation set, so we have almost halved the errors from before.
# In[57]:
# In[58]:
# Now we can unnfreeze the lower layers.
# In[59]:
for layer in tf_model.layers[:num_layers_before_top]:
layer.trainable = True
# In[ ]:
# In[61]:
# We will let this train for 10 epochs.
# In[ ]:
nb_epoch=10
batch_size = 16
hist_little_convet = tf_model.fit_generator(
train_generator,
samples_per_epoch = nb_train_samples,
nb_epoch = nb_epoch,
validation_data = validation_generator,
nb_val_samples = nb_validation_samples,
)
# In[12]:
# We get to 96% accuracy! But it looks like we stopped it a bit early...
# In[14]:
# In[ ]:
# In[ ]:
# In[26]:
# We let it go one last time and see that it pushes up just a bit higher to 97%. Also note that it looks like it is beginning to overfit as the training loss is coming way down and the training accuracy is going well beyond the validation accuracy.
# In[ ]:
nb_epoch=1
batch_size = 16
hist_little_convet = tf_model.fit_generator(
train_generator,
samples_per_epoch = nb_train_samples,
nb_epoch = nb_epoch,
validation_data = validation_generator,
nb_val_samples = nb_validation_samples,
)
# Wow! 97% accuracy! And we are done...
# In[28]:
# In[ ]:
pdb.set_trace()