-
Notifications
You must be signed in to change notification settings - Fork 1
/
prepareData.py
107 lines (89 loc) · 3.88 KB
/
prepareData.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import os
import numpy as np
import argparse
import configparser
def search_data(sequence_length, num_of_depend, label_start_idx,
num_for_predict, units, points_per_hour):
'''
Parameters
----------
sequence_length: int, length of all history data
num_of_depend: int,
label_start_idx: int, the first index of predicting target
num_for_predict: int, the number of points will be predicted for each sample
units: int, week: 7 * 24, day: 24, recent(hour): 1
points_per_hour: int, number of points per hour, depends on data
Returns
----------
list[(start_idx, end_idx)]
'''
if points_per_hour < 0:
raise ValueError("points_per_hour should be greater than 0!")
if label_start_idx + num_for_predict > sequence_length:
return None
x_idx = []
for i in range(1, num_of_depend + 1):
start_idx = label_start_idx - points_per_hour * units * i
end_idx = start_idx + num_for_predict
if start_idx >= 0:
x_idx.append((start_idx, end_idx))
else:
return None
if len(x_idx) != num_of_depend:
return None
return x_idx[::-1]
def get_sample_indices(data_sequence, num_of_weeks, num_of_days, num_of_hours,
label_start_idx, num_for_predict, num_predict, points_per_hour=12):
'''
Parameters
----------
data_sequence: np.ndarray
shape is (sequence_length, num_of_vertices, num_of_features)
num_of_weeks, num_of_days, num_of_hours: int
label_start_idx: int, the first index of predicting target, 预测值开始的那个点
num_for_predict: int,
the number of points will be predicted for each sample
points_per_hour: int, default 12, number of points per hour
Returns
----------
week_sample: np.ndarray
shape is (num_of_weeks * points_per_hour,
num_of_vertices, num_of_features)
day_sample: np.ndarray
shape is (num_of_days * points_per_hour,
num_of_vertices, num_of_features)
hour_sample: np.ndarray
shape is (num_of_hours * points_per_hour,
num_of_vertices, num_of_features)
target: np.ndarray
shape is (num_for_predict, num_of_vertices, num_of_features)
'''
week_sample, day_sample, hour_sample = None, None, None
if label_start_idx + num_for_predict > data_sequence.shape[0]:
return week_sample, day_sample, hour_sample, None
if num_of_weeks > 0:
week_indices = search_data(data_sequence.shape[0], num_of_weeks,
label_start_idx, num_for_predict,
7 * 24, points_per_hour)
if not week_indices:
return None, None, None, None
week_sample = np.concatenate([data_sequence[i: j]
for i, j in week_indices], axis=0)
if num_of_days > 0:
day_indices = search_data(data_sequence.shape[0], num_of_days,
label_start_idx, num_for_predict,
24, points_per_hour)
if not day_indices:
return None, None, None, None
day_sample = np.concatenate([data_sequence[i: j]
for i, j in day_indices], axis=0)
if num_of_hours > 0:
hour_indices = search_data(data_sequence.shape[0], num_of_hours,
label_start_idx, num_for_predict,
1, points_per_hour)
if not hour_indices:
return None, None, None, None
hour_sample = np.concatenate([data_sequence[i: j]
for i, j in hour_indices], axis=0)
target = data_sequence[label_start_idx: label_start_idx + num_predict]
return week_sample, day_sample, hour_sample, target