-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata_utils.py
135 lines (104 loc) · 2.79 KB
/
data_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import pickle
import numpy as np
def load_cfar10_batch(path):
""" Loads a batch of the CIFAR-10 dataset.
Parameters
----------
path : str
Path to the data batch.
Returns
-------
features : numpy.ndarray
Shape is (number of data points, width of image, height of image, number of channels)
For instance: (10000, 32, 32, 3)
The width and height might be the other way around.
labels : numpy.ndarray
Shape is (number of data points, ).
For instance: (10000, ).
Includes between 0-9.
Notes
-----
Based on: https://towardsdatascience.com/cifar-10-image-classification-in-tensorflow-5b501f7dc77c
"""
with open(path, mode='rb') as file:
# note the encoding type is 'latin1'
batch = pickle.load(file, encoding='latin1')
features = batch['data'].reshape((len(batch['data']), 3, 32, 32)).transpose(0, 2, 3, 1)
labels = np.array(batch['labels'])
return features, labels
def load_label_names():
""" Loads the label names in the CIFAR-10 dataset.
Parameters
----------
None
Returns
-------
list
The labels as strings - 10 labels corresponding to 0-9.
Notes
-----
None
"""
return ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
def generate_linear_regression_dataset(m, b, std, n, seed=None):
""" Generate one-dimensional data with linear trend.
Parameters
----------
m : float
Slope of line.
b : float
Y-intercept of line.
std : float
Standard deviation of random error.
n : int
The number of data points.
seed : int
Random seed.
Returns
-------
tuple
Tuple of numpy.ndarrays of x and y.
Notes
-----
None
"""
if seed is not None:
np.random.seed(seed)
x = np.random.random_sample(n) * 50
if seed is not None:
np.random.seed(seed)
e = np.random.randn(n) * std
y = m*x + b + e
x = x.reshape(-1, 1)
y = y.reshape(-1, 1)
return x, y
def generate_non_linear_regression_dataset(b, std, n, seed=None):
""" Generate one-dimensional data with linear trend.
Parameters
----------
b : float
Y-intercept of curve.
std : float
Standard deviation of random error.
n : int
The number of data points.
seed : int
Random seed.
Returns
-------
tuple
Tuple of numpy.ndarrays of x and y.
Notes
-----
None
"""
if seed is not None:
np.random.seed(seed)
x = np.random.random_sample(n) * 10
if seed is not None:
np.random.seed(seed)
e = np.random.randn(n) * std
y = x ** 2 + b + e
x = x.reshape(-1, 1)
y = y.reshape(-1, 1)
return x, y