Skip to content

Commit

Permalink
代码
Browse files Browse the repository at this point in the history
  • Loading branch information
gq-max committed Dec 29, 2020
1 parent 48fec1a commit 6aef19f
Show file tree
Hide file tree
Showing 12 changed files with 285 additions and 0 deletions.
8 changes: 8 additions & 0 deletions 代码/.idea/.gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 14 additions & 0 deletions 代码/.idea/deployment.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 13 additions & 0 deletions 代码/.idea/inspectionProfiles/Project_Default.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions 代码/.idea/inspectionProfiles/profiles_settings.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions 代码/.idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions 代码/.idea/modules.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions 代码/.idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 11 additions & 0 deletions 代码/.idea/代码.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

90 changes: 90 additions & 0 deletions 代码/GMM.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import numpy as np


class my_GMM:
def __init__(self, k=2):
self.k = k # 定义聚类个数,默认值为2
self.p = None # 样本维度
self.n = None # 样本个数
# 声明变量
self.params = {
"pi": None, # 混合系数1*k
"mu": None, # 均值k*p
"cov": None, # 协方差k*p*p
"pji": None # 后验分布n*k
}

def init_params(self, init_mu):
pi = np.ones(self.k) / self.k
mu = init_mu
cov = np.ones((self.k, self.p, self.p))
pji = np.zeros((self.n, self.k))
self.params = {
"pi": pi, # 混合系数1*k
"mu": mu, # 均值k*p
"cov": cov, # 协方差k*p*p
"pji": pji # 后验分布n*k
}

def gaussian_function(self, x_j, mu_k, cov_k):
one = -((x_j - mu_k) @ np.linalg.inv(cov_k) @ (x_j - mu_k).T) / 2
two = -self.p * np.log(2 * np.pi) / 2
three = -np.log(np.linalg.det(cov_k)) / 2
return np.exp(one + two + three)

def E_step(self, x):
pi = self.params["pi"]
mu = self.params["mu"]
cov = self.params["cov"]
for j in range(self.n):
x_j = x[j]
pji_list = []
for i in range(self.k):
pi_k = pi[i]
mu_k = mu[i]
cov_k = cov[i]
pji_list.append(pi_k * self.gaussian_function(x_j, mu_k, cov_k))
self.params['pji'][j, :] = np.array([v / np.sum(pji_list) for v in pji_list])

def M_step(self, x):
mu = self.params["mu"]
pji = self.params["pji"]
for i in range(self.k):
mu_k = mu[i] # p
pji_k = pji[:, i] # n
pji_k_j_list = []
mu_k_list = []
cov_k_list = []
for j in range(self.n):
x_j = x[j] # p
pji_k_j = pji_k[j]
pji_k_j_list.append(pji_k_j)
mu_k_list.append(pji_k_j * x_j)
self.params['mu'][i] = np.sum(mu_k_list, axis=0) / np.sum(pji_k_j_list)
for j in range(self.n):
x_j = x[j] # p
pji_k_j = pji_k[j]
cov_k_list.append(pji_k_j * np.dot((x_j - mu_k).T, (x_j - mu_k)))
self.params['cov'][i] = np.sum(cov_k_list, axis=0) / np.sum(pji_k_j_list)
self.params['pi'][i] = np.sum(pji_k_j_list) / self.n
print("均值为:", self.params["mu"].T[0], end=" ")
print("方差为:", self.params["cov"].T[0][0], end=" ")
print("混合系数为:", self.params["pi"])

def fit(self, x, mu, max_iter=10):
x = np.array(x)
self.n, self.p = x.shape
self.init_params(mu)

for i in range(max_iter):
print("第{}次迭代".format(i))
self.E_step(x)
self.M_step(x)
return np.argmax(np.array(self.params["pji"]), axis=1)


X = np.array([[1.0], [1.3], [2.2], [2.6], [2.8], [5.0], [7.3], [7.4], [7.5], [7.7], [7.9]])
mu = np.array([[6], [7.5]])
my_model = my_GMM(2)
result = my_model.fit(X, mu, max_iter=8)
print(result)
59 changes: 59 additions & 0 deletions 代码/gaussian_bayes_classifier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import numpy as np
import time


def mean_value(X):
X_mean = 0
for i in range(len(X)):
X_mean += X[i, :]
X_mean = X_mean / len(X)
return X_mean


def covariance(X, X_mean):
n, p = X.shape
cov = np.zeros((p, p))
for i in range(n):
cov += np.dot((X[i:i + 1, :] - X_mean).T, (X[i:i + 1, :] - X_mean))
cov = cov / n
return cov


def gaussian_probability(X, x):
n, p = X.shape
X_mean = mean_value(X)
X_cov = covariance(X, X_mean)
X_cov_det = np.linalg.det(X_cov)
X_cov_inv = np.linalg.inv(X_cov)
one = 1 / ((2 * np.pi) ** (p / 2))
two = 1 / (X_cov_det ** (1 / 2))
three = np.exp((-1 / 2) * (x - X_mean) @ X_cov_inv @ (x - X_mean).T)
X_gaussian = one * two * three
return X_gaussian


def decision():
X_good_gaussian = gaussian_probability(X_good, x)
X_bad_gaussian = gaussian_probability(X_bad, x)
good = p_good * X_good_gaussian
bad = p_good * X_bad_gaussian
if good >= bad:
print("密度为{}, 含糖量为{}的瓜,高斯贝叶斯预测为好瓜".format(x[0], x[1]))
else:
print("密度为{}, 含糖量为{}的瓜,高斯贝叶斯预测为坏瓜".format(x[0], x[1]))

start = time.time()
data = np.array([[0.697, 0.460, 1], [0.774, 0.376, 1], [0.634, 0.264, 1], [0.608, 0.318, 1],
[0.556, 0.215, 1], [0.403, 0.237, 1], [0.481, 0.149, 1], [0.437, 0.211, 1],
[0.666, 0.091, 0], [0.243, 0.267, 0], [0.245, 0.057, 0], [0.343, 0.099, 0],
[0.639, 0.161, 0], [0.657, 0.198, 0], [0.360, 0.370, 0], [0.593, 0.042, 0],
[0.719, 0.103, 0]])
X_good = np.array([i[0:2] for i in data if i[2] == 1])
X_bad = np.array([i[0:2] for i in data if i[2] == 0])
p_good = (len(X_good) + 1) / (len(data) + 2) # 拉普拉斯修正
p_bad = (len(X_bad) + 1) / (len(data) + 2) # 拉普拉斯修正
x = np.array([0.5, 0.3])

decision()
end = time.time()
print("高斯贝叶斯运行时间的一千倍为:", (end - start) * 1e3)
57 changes: 57 additions & 0 deletions 代码/gaussian_naive_bayes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import numpy as np
import time


def variance(x, mean_value):
var = 0
for i in x:
var += (i - mean_value) ** 2
var = np.sqrt((var / len(x)))
return var


def conditional_probability():
c = np.sqrt(2 * np.pi)
p_density_good = ((1 / (c * var_density_good)) *
np.exp(-(x[0] - mean_density_good) ** 2 / var_density_good ** 2))
p_density_bad = ((1 / (c * var_density_bad)) *
np.exp(-(x[0] - mean_density_bad) ** 2 / var_density_bad ** 2))
p_sugar_good = ((1 / (c * var_sugar_good)) *
np.exp(-(x[0] - mean_sugar_good) ** 2 / var_sugar_good ** 2))
p_sugar_bad = ((1 / (c * var_sugar_bad)) *
np.exp(-(x[0] - mean_sugar_bad) ** 2 / var_sugar_bad ** 2))
return p_density_good, p_density_bad, p_sugar_good, p_sugar_bad


def decision():
p_density_good, p_density_bad, p_sugar_good, p_sugar_bad = conditional_probability()
good = p_density_good * p_sugar_good * p_good
bad = p_density_bad * p_sugar_bad * p_bad
if good >= bad:
print("密度为{}, 含糖量为{}的瓜,朴素高斯贝叶斯预测为好瓜".format(x[0], x[1]))
else:
print("密度为{}, 含糖量为{}的瓜,朴素高斯贝叶斯预测为坏瓜".format(x[0], x[1]))


start = time.time()
data = np.array([[0.697, 0.460, 1], [0.774, 0.376, 1], [0.634, 0.264, 1], [0.608, 0.318, 1],
[0.556, 0.215, 1], [0.403, 0.237, 1], [0.481, 0.149, 1], [0.437, 0.211, 1],
[0.666, 0.091, 0], [0.243, 0.267, 0], [0.245, 0.057, 0], [0.343, 0.099, 0],
[0.639, 0.161, 0], [0.657, 0.198, 0], [0.360, 0.370, 0], [0.593, 0.042, 0],
[0.719, 0.103, 0]])
X_good = np.array([i[0:2] for i in data if i[2] == 1])
X_bad = np.array([i[0:2] for i in data if i[2] == 0])
p_good = (len(X_good) + 1) / (len(data) + 2) # 拉普拉斯修正
p_bad = (len(X_bad) + 1) / (len(data) + 2) # 拉普拉斯修正
x = np.array([0.5, 0.3])
mean_density_good = np.mean(X_good[:, 0])
mean_density_bad = np.mean(X_bad[:, 0])
mean_sugar_good = np.mean(X_good[:, 1])
mean_sugar_bad = np.mean(X_bad[:, 1])
var_density_good = variance(X_good[:, 0], mean_density_good)
var_density_bad = variance(X_bad[:, 0], mean_density_bad)
var_sugar_good = variance(X_good[:, 1], mean_sugar_good)
var_sugar_bad = variance(X_bad[:, 1], mean_sugar_bad)
end = time.time()
decision()
print("朴素高斯贝叶斯运行时间的一千倍为:", (end - start) * 1e3)
9 changes: 9 additions & 0 deletions 代码/markov.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import numpy as np

# 转移矩阵
A = np.array([[0.8, 0.2], [0.5, 0.5]])

res = A[1] @ A @ A @ A

print("射中的概率为%.4f, 射不中的概率为%.4f" % (res[0], res[1]))

0 comments on commit 6aef19f

Please sign in to comment.