-
Notifications
You must be signed in to change notification settings - Fork 359
/
Copy pathlinear_regression.py
184 lines (132 loc) · 5.4 KB
/
linear_regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
import tensorflow as tf
import numpy as np
import scipy.io as sio
import scipy.optimize as opt
import pandas as pd
import matplotlib.pyplot as plt
from helper import general as general
# support functions ------------------------------------------------------------
def load_data():
"""for ex5
d['X'] shape = (12, 1)
pandas has trouble taking this 2d ndarray to construct a dataframe, so I ravel
the results
"""
d = sio.loadmat('ex5data1.mat')
return map(np.ravel, [d['X'], d['y'], d['Xval'], d['yval'], d['Xtest'], d['ytest']])
def poly_features(x, power, as_ndarray=False):
data = {'f{}'.format(i): np.power(x, i) for i in range(1, power + 1)}
df = pd.DataFrame(data)
return df.as_matrix() if as_ndarray else df
def prepare_poly_data(*args, power):
"""
args: keep feeding in X, Xval, or Xtest
will return in the same order
"""
def prepare(x):
# expand feature
df = poly_features(x, power=power)
# normalization
ndarr = general.normalize_feature(df).as_matrix()
# add intercept term
return np.insert(ndarr, 0, np.ones(ndarr.shape[0]), axis=1)
return [prepare(x) for x in args]
def plot_learning_curve(X, y, Xval, yval, l=0):
training_cost, cv_cost = [], []
m = X.shape[0]
for i in range(1, m + 1):
# regularization applies here for fitting parameters
res = linear_regression_np(X[:i, :], y[:i], l=l)
# remember, when you compute the cost here, you are computing
# non-regularized cost. Regularization is used to fit parameters only
tc = cost(res.x, X[:i, :], y[:i])
cv = cost(res.x, Xval, yval)
training_cost.append(tc)
cv_cost.append(cv)
plt.plot(np.arange(1, m + 1), training_cost, label='training cost')
plt.plot(np.arange(1, m + 1), cv_cost, label='cv cost')
plt.legend(loc=1)
# linear regression functions --------------------------------------------------
def cost(theta, X, y):
"""
X: R(m*n), m records, n features
y: R(m)
theta : R(n), linear regression parameters
"""
m = X.shape[0]
inner = X @ theta - y # R(m*1)
# 1*m @ m*1 = 1*1 in matrix multiplication
# but you know numpy didn't do transpose in 1d array, so here is just a
# vector inner product to itselves
square_sum = inner.T @ inner
cost = square_sum / (2 * m)
return cost
def regularized_cost(theta, X, y, l=1):
m = X.shape[0]
regularized_term = (l / (2 * m)) * np.power(theta[1:], 2).sum()
return cost(theta, X, y) + regularized_term
def gradient(theta, X, y):
m = X.shape[0]
inner = X.T @ (X @ theta - y) # (m,n).T @ (m, 1) -> (n, 1)
return inner / m
def regularized_gradient(theta, X, y, l=1):
m = X.shape[0]
regularized_term = theta.copy() # same shape as theta
regularized_term[0] = 0 # don't regularize intercept theta
regularized_term = (l / m) * regularized_term
return gradient(theta, X, y) + regularized_term
def batch_gradient_decent(theta, X, y, epoch, alpha=0.01):
"""fit the linear regression, return the parameter and cost
epoch: how many pass to run through whole batch
"""
cost_data = [cost(theta, X, y)]
_theta = theta.copy() # don't want to mess up with original theta
for _ in range(epoch):
_theta = _theta - alpha * gradient(_theta, X, y)
cost_data.append(cost(_theta, X, y))
return _theta, cost_data
def linear_regression_np(X, y, l=1):
"""linear regression
args:
X: feature matrix, (m, n+1) # with incercept x0=1
y: target vector, (m, )
l: lambda constant for regularization
return: trained parameters
"""
# init theta
theta = np.ones(X.shape[1])
# train it
res = opt.minimize(fun=regularized_cost,
x0=theta,
args=(X, y, l),
method='TNC',
jac=regularized_gradient,
options={'disp': True})
return res
def linear_regression(X_data, y_data, alpha, epoch, optimizer=tf.train.GradientDescentOptimizer):
"""tensorflow implementation"""
# placeholder for graph input
X = tf.placeholder(tf.float32, shape=X_data.shape)
y = tf.placeholder(tf.float32, shape=y_data.shape)
# construct the graph
with tf.variable_scope('linear-regression'):
W = tf.get_variable("weights",
(X_data.shape[1], 1),
initializer=tf.constant_initializer()) # n*1
y_pred = tf.matmul(X, W) # m*n @ n*1 -> m*1
loss = 1 / (2 * len(X_data)) * tf.matmul((y_pred - y), (y_pred - y), transpose_a=True) # (m*1).T @ m*1 = 1*1
opt = optimizer(learning_rate=alpha)
opt_operation = opt.minimize(loss)
# run the session
with tf.Session() as sess:
sess.run(tf.initialize_all_variables())
loss_data = []
for i in range(epoch):
_, loss_val, W_val = sess.run([opt_operation, loss, W], feed_dict={X: X_data, y: y_data})
loss_data.append(loss_val[0, 0]) # because every loss_val is 1*1 ndarray
if len(loss_data) > 1 and np.abs(loss_data[-1] - loss_data[-2]) < 10 ** -9: # early break when it's converged
# print('Converged at epoch {}'.format(i))
break
# clear the graph
tf.reset_default_graph()
return {'loss': loss_data, 'parameters': W_val} # just want to return in row vector format