-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
- Loading branch information
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
import numpy as np | ||
|
||
|
||
class my_GMM: | ||
def __init__(self, k=2): | ||
self.k = k # 定义聚类个数,默认值为2 | ||
self.p = None # 样本维度 | ||
self.n = None # 样本个数 | ||
# 声明变量 | ||
self.params = { | ||
"pi": None, # 混合系数1*k | ||
"mu": None, # 均值k*p | ||
"cov": None, # 协方差k*p*p | ||
"pji": None # 后验分布n*k | ||
} | ||
|
||
def init_params(self, init_mu): | ||
pi = np.ones(self.k) / self.k | ||
mu = init_mu | ||
cov = np.ones((self.k, self.p, self.p)) | ||
pji = np.zeros((self.n, self.k)) | ||
self.params = { | ||
"pi": pi, # 混合系数1*k | ||
"mu": mu, # 均值k*p | ||
"cov": cov, # 协方差k*p*p | ||
"pji": pji # 后验分布n*k | ||
} | ||
|
||
def gaussian_function(self, x_j, mu_k, cov_k): | ||
one = -((x_j - mu_k) @ np.linalg.inv(cov_k) @ (x_j - mu_k).T) / 2 | ||
two = -self.p * np.log(2 * np.pi) / 2 | ||
three = -np.log(np.linalg.det(cov_k)) / 2 | ||
return np.exp(one + two + three) | ||
|
||
def E_step(self, x): | ||
pi = self.params["pi"] | ||
mu = self.params["mu"] | ||
cov = self.params["cov"] | ||
for j in range(self.n): | ||
x_j = x[j] | ||
pji_list = [] | ||
for i in range(self.k): | ||
pi_k = pi[i] | ||
mu_k = mu[i] | ||
cov_k = cov[i] | ||
pji_list.append(pi_k * self.gaussian_function(x_j, mu_k, cov_k)) | ||
self.params['pji'][j, :] = np.array([v / np.sum(pji_list) for v in pji_list]) | ||
|
||
def M_step(self, x): | ||
mu = self.params["mu"] | ||
pji = self.params["pji"] | ||
for i in range(self.k): | ||
mu_k = mu[i] # p | ||
pji_k = pji[:, i] # n | ||
pji_k_j_list = [] | ||
mu_k_list = [] | ||
cov_k_list = [] | ||
for j in range(self.n): | ||
x_j = x[j] # p | ||
pji_k_j = pji_k[j] | ||
pji_k_j_list.append(pji_k_j) | ||
mu_k_list.append(pji_k_j * x_j) | ||
self.params['mu'][i] = np.sum(mu_k_list, axis=0) / np.sum(pji_k_j_list) | ||
for j in range(self.n): | ||
x_j = x[j] # p | ||
pji_k_j = pji_k[j] | ||
cov_k_list.append(pji_k_j * np.dot((x_j - mu_k).T, (x_j - mu_k))) | ||
self.params['cov'][i] = np.sum(cov_k_list, axis=0) / np.sum(pji_k_j_list) | ||
self.params['pi'][i] = np.sum(pji_k_j_list) / self.n | ||
print("均值为:", self.params["mu"].T[0], end=" ") | ||
print("方差为:", self.params["cov"].T[0][0], end=" ") | ||
print("混合系数为:", self.params["pi"]) | ||
|
||
def fit(self, x, mu, max_iter=10): | ||
x = np.array(x) | ||
self.n, self.p = x.shape | ||
self.init_params(mu) | ||
|
||
for i in range(max_iter): | ||
print("第{}次迭代".format(i)) | ||
self.E_step(x) | ||
self.M_step(x) | ||
return np.argmax(np.array(self.params["pji"]), axis=1) | ||
|
||
|
||
X = np.array([[1.0], [1.3], [2.2], [2.6], [2.8], [5.0], [7.3], [7.4], [7.5], [7.7], [7.9]]) | ||
mu = np.array([[6], [7.5]]) | ||
my_model = my_GMM(2) | ||
result = my_model.fit(X, mu, max_iter=8) | ||
print(result) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
import numpy as np | ||
import time | ||
|
||
|
||
def mean_value(X): | ||
X_mean = 0 | ||
for i in range(len(X)): | ||
X_mean += X[i, :] | ||
X_mean = X_mean / len(X) | ||
return X_mean | ||
|
||
|
||
def covariance(X, X_mean): | ||
n, p = X.shape | ||
cov = np.zeros((p, p)) | ||
for i in range(n): | ||
cov += np.dot((X[i:i + 1, :] - X_mean).T, (X[i:i + 1, :] - X_mean)) | ||
cov = cov / n | ||
return cov | ||
|
||
|
||
def gaussian_probability(X, x): | ||
n, p = X.shape | ||
X_mean = mean_value(X) | ||
X_cov = covariance(X, X_mean) | ||
X_cov_det = np.linalg.det(X_cov) | ||
X_cov_inv = np.linalg.inv(X_cov) | ||
one = 1 / ((2 * np.pi) ** (p / 2)) | ||
two = 1 / (X_cov_det ** (1 / 2)) | ||
three = np.exp((-1 / 2) * (x - X_mean) @ X_cov_inv @ (x - X_mean).T) | ||
X_gaussian = one * two * three | ||
return X_gaussian | ||
|
||
|
||
def decision(): | ||
X_good_gaussian = gaussian_probability(X_good, x) | ||
X_bad_gaussian = gaussian_probability(X_bad, x) | ||
good = p_good * X_good_gaussian | ||
bad = p_good * X_bad_gaussian | ||
if good >= bad: | ||
print("密度为{}, 含糖量为{}的瓜,高斯贝叶斯预测为好瓜".format(x[0], x[1])) | ||
else: | ||
print("密度为{}, 含糖量为{}的瓜,高斯贝叶斯预测为坏瓜".format(x[0], x[1])) | ||
|
||
start = time.time() | ||
data = np.array([[0.697, 0.460, 1], [0.774, 0.376, 1], [0.634, 0.264, 1], [0.608, 0.318, 1], | ||
[0.556, 0.215, 1], [0.403, 0.237, 1], [0.481, 0.149, 1], [0.437, 0.211, 1], | ||
[0.666, 0.091, 0], [0.243, 0.267, 0], [0.245, 0.057, 0], [0.343, 0.099, 0], | ||
[0.639, 0.161, 0], [0.657, 0.198, 0], [0.360, 0.370, 0], [0.593, 0.042, 0], | ||
[0.719, 0.103, 0]]) | ||
X_good = np.array([i[0:2] for i in data if i[2] == 1]) | ||
X_bad = np.array([i[0:2] for i in data if i[2] == 0]) | ||
p_good = (len(X_good) + 1) / (len(data) + 2) # 拉普拉斯修正 | ||
p_bad = (len(X_bad) + 1) / (len(data) + 2) # 拉普拉斯修正 | ||
x = np.array([0.5, 0.3]) | ||
|
||
decision() | ||
end = time.time() | ||
print("高斯贝叶斯运行时间的一千倍为:", (end - start) * 1e3) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
import numpy as np | ||
import time | ||
|
||
|
||
def variance(x, mean_value): | ||
var = 0 | ||
for i in x: | ||
var += (i - mean_value) ** 2 | ||
var = np.sqrt((var / len(x))) | ||
return var | ||
|
||
|
||
def conditional_probability(): | ||
c = np.sqrt(2 * np.pi) | ||
p_density_good = ((1 / (c * var_density_good)) * | ||
np.exp(-(x[0] - mean_density_good) ** 2 / var_density_good ** 2)) | ||
p_density_bad = ((1 / (c * var_density_bad)) * | ||
np.exp(-(x[0] - mean_density_bad) ** 2 / var_density_bad ** 2)) | ||
p_sugar_good = ((1 / (c * var_sugar_good)) * | ||
np.exp(-(x[0] - mean_sugar_good) ** 2 / var_sugar_good ** 2)) | ||
p_sugar_bad = ((1 / (c * var_sugar_bad)) * | ||
np.exp(-(x[0] - mean_sugar_bad) ** 2 / var_sugar_bad ** 2)) | ||
return p_density_good, p_density_bad, p_sugar_good, p_sugar_bad | ||
|
||
|
||
def decision(): | ||
p_density_good, p_density_bad, p_sugar_good, p_sugar_bad = conditional_probability() | ||
good = p_density_good * p_sugar_good * p_good | ||
bad = p_density_bad * p_sugar_bad * p_bad | ||
if good >= bad: | ||
print("密度为{}, 含糖量为{}的瓜,朴素高斯贝叶斯预测为好瓜".format(x[0], x[1])) | ||
else: | ||
print("密度为{}, 含糖量为{}的瓜,朴素高斯贝叶斯预测为坏瓜".format(x[0], x[1])) | ||
|
||
|
||
start = time.time() | ||
data = np.array([[0.697, 0.460, 1], [0.774, 0.376, 1], [0.634, 0.264, 1], [0.608, 0.318, 1], | ||
[0.556, 0.215, 1], [0.403, 0.237, 1], [0.481, 0.149, 1], [0.437, 0.211, 1], | ||
[0.666, 0.091, 0], [0.243, 0.267, 0], [0.245, 0.057, 0], [0.343, 0.099, 0], | ||
[0.639, 0.161, 0], [0.657, 0.198, 0], [0.360, 0.370, 0], [0.593, 0.042, 0], | ||
[0.719, 0.103, 0]]) | ||
X_good = np.array([i[0:2] for i in data if i[2] == 1]) | ||
X_bad = np.array([i[0:2] for i in data if i[2] == 0]) | ||
p_good = (len(X_good) + 1) / (len(data) + 2) # 拉普拉斯修正 | ||
p_bad = (len(X_bad) + 1) / (len(data) + 2) # 拉普拉斯修正 | ||
x = np.array([0.5, 0.3]) | ||
mean_density_good = np.mean(X_good[:, 0]) | ||
mean_density_bad = np.mean(X_bad[:, 0]) | ||
mean_sugar_good = np.mean(X_good[:, 1]) | ||
mean_sugar_bad = np.mean(X_bad[:, 1]) | ||
var_density_good = variance(X_good[:, 0], mean_density_good) | ||
var_density_bad = variance(X_bad[:, 0], mean_density_bad) | ||
var_sugar_good = variance(X_good[:, 1], mean_sugar_good) | ||
var_sugar_bad = variance(X_bad[:, 1], mean_sugar_bad) | ||
end = time.time() | ||
decision() | ||
print("朴素高斯贝叶斯运行时间的一千倍为:", (end - start) * 1e3) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
import numpy as np | ||
|
||
# 转移矩阵 | ||
A = np.array([[0.8, 0.2], [0.5, 0.5]]) | ||
|
||
res = A[1] @ A @ A @ A | ||
|
||
print("射中的概率为%.4f, 射不中的概率为%.4f" % (res[0], res[1])) | ||
|