-
Notifications
You must be signed in to change notification settings - Fork 0
/
ex2_pytorch.py
executable file
·276 lines (230 loc) · 12.2 KB
/
ex2_pytorch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
def weights_init(m):
if type(m) == nn.Linear:
m.weight.data.normal_(0.0, 1e-3)
m.bias.data.fill_(0.)
def update_lr(optimizer, lr):
for param_group in optimizer.param_groups:
param_group['lr'] = lr
# --------------------------------
# Device configuration
# --------------------------------
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device: %s' % device)
# --------------------------------
# Hyper-parameters
# --------------------------------
input_size = 32 * 32 * 3
hidden_size = [180, 180, 180, 180]
num_classes = 10
num_epochs = 50
batch_size = 500
learning_rate = 1e-2
learning_rate_decay = 0.95
reg = 0.001
num_training = 49000
num_validation = 1000
train = True
# -------------------------------------------------
# Load the CIFAR-10 dataset
# -------------------------------------------------
norm_transform = transforms.Compose([transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
cifar_dataset = torchvision.datasets.CIFAR10(root='datasets/',
train=True,
transform=norm_transform,
download=True)
test_dataset = torchvision.datasets.CIFAR10(root='datasets/',
train=False,
transform=norm_transform
)
# -------------------------------------------------
# Prepare the training and validation splits
# -------------------------------------------------
mask = list(range(num_training))
train_dataset = torch.utils.data.Subset(cifar_dataset, mask)
mask = list(range(num_training, num_training + num_validation))
val_dataset = torch.utils.data.Subset(cifar_dataset, mask)
# -------------------------------------------------
# Data loader
# -------------------------------------------------
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
batch_size=batch_size,
shuffle=True)
val_loader = torch.utils.data.DataLoader(dataset=val_dataset,
batch_size=batch_size,
shuffle=False)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
batch_size=batch_size,
shuffle=False)
# ======================================================================================
# Q4: Implementing multi-layer perceptron in PyTorch
# ======================================================================================
# So far we have implemented a two-layer network using numpy by explicitly
# writing down the forward computation and deriving and implementing the
# equations for backward computation. This process can be tedious to extend to
# large network architectures
#
# Popular deep-learning libraries like PyTorch and Tensorflow allow us to
# quickly implement complicated neural network architectures. They provide
# pre-defined layers which can be used as building blocks to define our
# network. They also enable automatic-differentiation, which allows us to
# define only the forward pass and let the libraries perform back-propagation
# using automatic differentiation.
#
# In this question we will implement a multi-layer perceptron using the PyTorch
# library. Please complete the code for the MultiLayerPerceptron, training and
# evaluating the model. Once you can train the two layer model, experiment with
# adding more layers and report your observations
# --------------------------------------------------------------------------------------
# -------------------------------------------------
# Fully connected neural network with one hidden layer
# -------------------------------------------------
class MultiLayerPerceptron(nn.Module):
def __init__(self, input_size, hidden_layers, num_classes):
super(MultiLayerPerceptron, self).__init__()
#################################################################################
# TODO: Initialize the modules required to implement the mlp with the layer #
# configuration. input_size --> hidden_layers[0] --> hidden_layers[1] .... --> #
# hidden_layers[-1] --> num_classes #
# Make use of linear and relu layers from the torch.nn module #
#################################################################################
layers = [] # Use the layers list to store a variable number of layers
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
layers.append(nn.Linear(input_size, hidden_layers[0]))
layers.append(nn.BatchNorm1d(num_features=hidden_layers[0]))
layers.append(nn.ReLU())
layers.append(nn.Dropout(p=0.4))
for hidden_layer in hidden_layers:
layers.append(nn.Linear(hidden_layer, hidden_layer))
layers.append(nn.BatchNorm1d(num_features=hidden_layer))
layers.append(nn.ReLU())
layers.append(nn.Dropout(p=0.4))
layers.append(nn.Linear(hidden_layers[0], num_classes))
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
# Enter the layers into nn.Sequential, so the model may "see" them
# Note the use of * in front of layers
self.layers = nn.Sequential(*layers)
def forward(self, x):
#################################################################################
# TODO: Implement the forward pass computations #
# Note that you do not need to use the softmax operation at the end. #
# Softmax is only required for the loss computation and the criterion used below#
# nn.CrossEntropyLoss() already integrates the softmax and the log loss together#
#################################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
out = self.layers(x)
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
return out
model = MultiLayerPerceptron(input_size, hidden_size, num_classes).to(device)
# Print model's state_dict
'''
print("Model's state_dict:")
for param_tensor in model.state_dict():
print(param_tensor, "\t", model.state_dict()[param_tensor].size())
'''
if train:
model.apply(weights_init)
model.train() # set dropout and batch normalization layers to training mode
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=reg)
# Train the model
lr = learning_rate
total_step = len(train_loader)
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
# Move tensors to the configured device
images = images.to(device)
labels = labels.to(device)
#################################################################################
# TODO: Implement the training code #
# 1. Pass the images to the model #
# 2. Compute the loss using the output and the labels. #
# 3. Compute gradients and update the model using the optimizer #
# Use examples in https://pytorch.org/tutorials/beginner/pytorch_with_examples.html
#################################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
x = images.view(-1, input_size)
y_pred = model(x)
loss = criterion(y_pred, labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
if (i + 1) % 100 == 0:
print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
.format(epoch + 1, num_epochs, i + 1, total_step, loss.item()))
# Code to update the lr
lr *= learning_rate_decay
update_lr(optimizer, lr)
with torch.no_grad():
correct = 0
total = 0
for images, labels in val_loader:
images = images.to(device)
labels = labels.to(device)
####################################################
# TODO: Implement the evaluation code #
# 1. Pass the images to the model #
# 2. Get the most confident predicted class #
####################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
x = images.view(-1, input_size)
out = model(x)
_, predicted = torch.max(out, 1)
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Validataion accuracy is: {} %'.format(100 * correct / total))
##################################################################################
# TODO: Now that you can train a simple two-layer MLP using above code, you can #
# easily experiment with adding more layers and different layer configurations #
# and let the pytorch library handle computing the gradients #
# #
# Experiment with different number of layers (at least from 2 to 5 layers) and #
# record the final validation accuracies Report your observations on how adding #
# more layers to the MLP affects its behavior. Try to improve the model #
# configuration using the validation performance as the guidance. You can #
# experiment with different activation layers available in torch.nn, adding #
# dropout layers, if you are interested. Use the best model on the validation #
# set, to evaluate the performance on the test set once and report it #
##################################################################################
# Save the model checkpoint
torch.save(model.state_dict(), 'model.ckpt')
else:
# Run the test code once you have your by setting train flag to false
# and loading the best model
best_model = None
best_model = torch.load('model.ckpt')
model.load_state_dict(best_model)
# Test the model
model.eval() # set dropout and batch normalization layers to evaluation mode
# In test phase, we don't need to compute gradients (for memory efficiency)
with torch.no_grad():
correct = 0
total = 0
for images, labels in test_loader:
images = images.to(device)
labels = labels.to(device)
####################################################
# TODO: Implement the evaluation code #
# 1. Pass the images to the model #
# 2. Get the most confident predicted class #
####################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
x = images.view(-1, input_size)
out = model(x)
_, predicted = torch.max(out, 1)
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
total += labels.size(0)
correct += (predicted == labels).sum().item()
if total == 1000:
break
print('Accuracy of the network on the {} test images: {} %'.format(total, 100 * correct / total))