-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathprepare.py
55 lines (43 loc) · 2.6 KB
/
prepare.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import torch.utils.data as utils
import numpy as np
import torch
def PrepareDataset(speed_matrix, BATCH_SIZE=40, seq_len=10, pred_len=1, train_propotion=0.7, valid_propotion=0.1):
""" Prepare training and testing datasets and dataloaders.
Convert speed/volume/occupancy matrix to training and testing dataset.
The vertical axis of speed_matrix is the time axis and the horizontal axis
is the spatial axis.
Args:
speed_matrix: a Matrix containing spatial-temporal speed data for a network
seq_len: length of input sequence
pred_len: length of predicted sequence
Returns:
Training dataloader
Testing dataloader
"""
time_len = speed_matrix.shape[0]
max_speed = speed_matrix.max().max()
speed_matrix = speed_matrix / max_speed
speed_sequences, speed_labels = [], []
for i in range(time_len - seq_len - pred_len):
speed_sequences.append(speed_matrix.iloc[i:i + seq_len].values)
speed_labels.append(speed_matrix.iloc[i + seq_len:i + seq_len + pred_len].values)
speed_sequences, speed_labels = np.asarray(speed_sequences), np.asarray(speed_labels)
# shuffle and split the dataset to training and testing datasets
sample_size = speed_sequences.shape[0]
index = np.arange(sample_size, dtype=int)
np.random.shuffle(index)
train_index = int(np.floor(sample_size * train_propotion))
valid_index = int(np.floor(sample_size * (train_propotion + valid_propotion)))
train_data, train_label = speed_sequences[:train_index], speed_labels[:train_index]
valid_data, valid_label = speed_sequences[train_index:valid_index], speed_labels[train_index:valid_index]
test_data, test_label = speed_sequences[valid_index:], speed_labels[valid_index:]
train_data, train_label = torch.Tensor(train_data), torch.Tensor(train_label)
valid_data, valid_label = torch.Tensor(valid_data), torch.Tensor(valid_label)
test_data, test_label = torch.Tensor(test_data), torch.Tensor(test_label)
train_dataset = utils.TensorDataset(train_data, train_label)
valid_dataset = utils.TensorDataset(valid_data, valid_label)
test_dataset = utils.TensorDataset(test_data, test_label)
train_dataloader = utils.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)
valid_dataloader = utils.DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)
test_dataloader = utils.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)
return train_dataloader, valid_dataloader, test_dataloader, max_speed