-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMF3.py
119 lines (99 loc) · 3.7 KB
/
MF3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import numpy as np
"""
params:
data : matrix to be factorized, dimension N x M
W : initial matrix of dimension N x K
H : initial matrix of dimension M x K
K : number of latent features
steps : maximum step
beta : learning rate
lamda : regularization parameter
return:
the final matrices H and W
"""
def matrix_factorization(data, K, steps=5000, beta=0.0002, lamda=0.02):
W = np.random.rand(data.shape[0],K)
H = np.random.rand(data.shape[1],K)
#W = np.random.normal(scale=1./K, size = (data.shape[0], K))
#H = np.random.normal(scale=1./K, size = (data.shape[1], K))
b = np.mean(data[np.where(data != 0)])
H = H.T
b_u = np.zeros(data.shape[0])
for i in range(len(data)):
count = 0
for j in range(len(data[i])):
if (data[i][j] != 0):
b_u[i] += data[i][j]
count += 1
if (count == 0):
b_u[i] = 0
else:
b_u[i] /= count
#print("len",len(np.where(data[i] != 0)[0]))
#if(len(np.where(data[i] != 0)[0]) == 0):
# b_u[i] = 0
#else:
# b_u[i] = np.mean(data[i][np.where(data[i] != 0)])
#print(b_u[i])
b_i = np.zeros(data.shape[1])
for i in range(len(data.T)):
count = 0
for j in range(len(data.T[i])):
if (data.T[i][j] != 0):
b_i[i] += data.T[i][j]
count += 1
if (count == 0):
b_i[i] = 0
else:
b_i[i] /= count
#for i in range(len(data.T)):
# if(len(np.where(data.T[i] != 0)[0]) == 0):
# b_i[i] = 0
# else:
# b_i[i] = np.mean(data.T[i][np.where(data.T[i] != 0)])
#b_i[i] = np.mean(data.T[i][np.where(data.T[i] != 0)])
#print(b_i[i])
#i : user
#j : item
for step in range(steps):
for i in range(len(data)):
for j in range(len(data[i])):
if data[i][j] > 0:
p_bar = b + b_u[i] + b_i[j] + np.dot(W[i,:],H[:,j])
eij = data[i][j] - p_bar
b = b + beta * eij
b_u[i] += beta * (eij - lamda * b_u[i])
b_i[j] += beta * (eij - lamda * b_i[j])
'''
for k in range(K):
P[i][k] = P[i][k] + beta * (2 * eij * Q[k][j] - lamda * P[i][k])
Q[k][j] = Q[k][j] + beta * (2 * eij * P[i][k] - lamda * Q[k][j])
'''
W[i,:] += beta*(2*eij*H[:,j] - lamda * W[i,:])
H[:,j] += beta*(2*eij*W[i,:] - lamda * H[:,j])
edata = np.dot(W,H)
e = 0
for i in range(len(data)):
for j in range(len(data[i])):
if data[i][j] > 0:
e = e + pow(data[i][j] - np.dot(W[i,:],H[:,j]), 2)
for k in range(K):
e = e + (lamda/2) * ( pow(W[i][k],2) + pow(H[k][j],2) )
if e < 0.001:
print("Epsilon's exit")
break
return W, H.T, b_u, b_i, b
def readFile(fileString):
return np.loadtxt(fileString, delimiter=" ", dtype = "int").tolist()
if __name__ == "__main__":
fileString = r'F:\Tut\Sem6\Information System\testData\test1.txt'
data = readFile(fileString)
data = np.array(data)
print(data)
K = 2
W,H,b_u,b_i,b = matrix_factorization(data, K)
print("Matrix Factorization with Bias")
fitted = b + W.dot(H.T) + b_u[:,np.newaxis] + b_i[np.newaxis,:]
for i in fitted:
print(np.around(i, decimals = 2))
#print(fitted)