-
Notifications
You must be signed in to change notification settings - Fork 5
/
04_01_minimal.py
151 lines (114 loc) · 4.65 KB
/
04_01_minimal.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# Input that may vary
batch_size = 160
# Libraries
import datetime
import numpy as np
import matplotlib.pyplot as plt
import torch
from torch.autograd import Variable
import torch.nn as nn
from torch.utils import data
# The custom-defined model
from QOL_Library.ConvTimeLSTM1 import ConvTime_LSTM1
# Function to separate whole sequences into prior and current scenes
from QOL_Library.Separate_X_Y import sep_x_y
# Formal PyTorch data set classes that increase hardware utilization
from QOL_Library.Dataset_Classes import train_Dataset, validation_Dataset
import nvidia_smi
# Marking the begin time
print(datetime.datetime.now())
print("importing data")
# Import Moving MNIST
Moving_MNIST = np.load('data/mnist_test_seq.npy')
Moving_MNIST = Moving_MNIST / 255
Moving_MNIST.shape
# Give PyTorch the data
# Making into PyTorch tensor
Moving_MNIST_tensor = torch.from_numpy(Moving_MNIST)
# Putting the existing dimensions into appropriate order
Moving_MNIST_tensor = Moving_MNIST_tensor.permute(1, 0, 2, 3)
# Added the acknowledge that this is 1 spectral band
Moving_MNIST_tensor = Moving_MNIST_tensor.unsqueeze(2)
# Checking shape
Moving_MNIST_tensor.shape
print("processing data")
# Train/validation split
train_indices = np.random.choice(range(10000), size = 8000, replace = False)
OutofSample_indices = [index for index in range(10000) if index not in train_indices.tolist()]
validation_indices = np.random.choice(OutofSample_indices, size = 1000, replace = False)
# Separate x (previous 10 in seq) and y (next in seq)
x, y = sep_x_y(Moving_MNIST_tensor[train_indices])
x_validation, y_validation = sep_x_y(Moving_MNIST_tensor[validation_indices])
print("setting up the model")
# Picking one of the like-sequence tensors within the list to set parameters
channels = x.shape[2]
height = x.shape[3]
width = x.shape[4]
# Set model hyperparameters
conv_time_lstm = ConvTime_LSTM1(input_size = (height,
width),
input_dim = channels,
hidden_dim = [128, 64, 64, 1],
kernel_size = (5, 5),
num_layers = 4,
batch_first = True,
bias = True,
return_all_layers = False,
GPU = True)
# Give it to the GPU
conv_time_lstm.cuda()
# Training
# Optimization methods
loss = torch.nn.MSELoss()
optimizer = torch.optim.Adam(conv_time_lstm.parameters())
print("facilitating parallel operations")
# Pass our data to those classes
training_set = train_Dataset(x,
y,
data_indices=range(y.shape[0]))
validation_set = validation_Dataset(x_validation,
y_validation,
data_indices=range(y_validation.shape[0]))
train_loader = torch.utils.data.DataLoader(dataset = training_set,
batch_size = batch_size,
shuffle = True)
validation_loader = torch.utils.data.DataLoader(dataset = validation_set,
batch_size = batch_size,
shuffle = True)
# Pass formal device argument, preferring GPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# Allow parallelization
conv_time_lstm = torch.nn.DataParallel(conv_time_lstm)
print("training loop")
print(datetime.datetime.now())
# Training loop
loss_list = []
epochs = int(np.ceil((7*10**5) / x.shape[0]))
for i in range(epochs):
for data in train_loader:
# data loader
batch_x, batch_y = data
# move to GPU
batch_x = batch_x.to(device)
batch_y = batch_y.to(device)
# run model and get the prediction
batch_y_hat = conv_time_lstm(batch_x,
torch.ones_like(batch_x))
batch_y_hat = batch_y_hat[0][0][:, -2:-1, :, :, :]
# calculate and store the loss
batch_loss = loss(batch_y, batch_y_hat)
loss_list.append(batch_loss.item())
# update parameters
optimizer.zero_grad()
batch_loss.backward()
optimizer.step()
print('Epoch: ', i, '\n\tBatch loss: ', batch_loss.item(), '\n')
# Printing gpu perf
nvidia_smi.nvmlInit()
for gpu_core in range(4):
handle = nvidia_smi.nvmlDeviceGetHandleByIndex(gpu_core)
res = nvidia_smi.nvmlDeviceGetUtilizationRates(handle)
print(f'gpu: {res.gpu}%, gpu-mem: {res.memory}%')
# Marking the end time
print("end of training loop")
print(datetime.datetime.now())