-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathGMMs.py
105 lines (98 loc) · 4.17 KB
/
GMMs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import numpy as np
import itertools
import matplotlib as mpl
from sklearn import mixture
#Calculate the best number of gaussian GMM for each class using BIC
def calcaulteGMMForEachClass(X, start=1, end=7, classIndex=0) -> mixture.GaussianMixture:
print("The class %d is started training..." % classIndex)
lowest_bic = np.infty
bic = []
best_gmm = []
n_components_range = range(start, end)
cv_types = ['spherical', 'tied', 'diag', 'full']
best_component = 0
best_cv_type = ""
for cv_type in cv_types:
for n_components in n_components_range:
# Fit a Gaussian mixture with EM
gmm = mixture.GaussianMixture(n_components=n_components,
covariance_type=cv_type)
gmm.fit(X)
bic.append(gmm.bic(X))
if bic[-1] < lowest_bic:
lowest_bic = bic[-1]
best_gmm = gmm
best_component = n_components
best_cv_type = cv_type
bic = np.array(bic)
color_iter = itertools.cycle(['navy', 'turquoise', 'cornflowerblue',
'darkorange'])
print("best number of components for this class is : ", best_component)
print("best covariance type is for this class is : ", best_cv_type)
clf = best_gmm
# bars = []
# print("==="*20)
# # Plot the BIC scores
# spl = plt.subplot(2, 1, 1)
# for i, (cv_type, color) in enumerate(zip(cv_types, color_iter)):
# xpos = np.array(n_components_range) + .2 * (i - 2)
# bars.append(plt.bar(xpos, bic[i * len(n_components_range):
# (i + 1) * len(n_components_range)],
# width=.2, color=color))
# plt.xticks(n_components_range)
# plt.ylim([bic.min() * 1.01 - .01 * bic.max(), bic.max()])
# plt.title('BIC score per model')
# xpos = np.mod(bic.argmin(), len(n_components_range)) + .65 + \
# .2 * np.floor(bic.argmin() / len(n_components_range))
# plt.text(xpos, bic.min() * 0.97 + .03 * bic.max(), '*', fontsize=14)
# spl.set_xlabel('Number of components')
# spl.legend([b[0] for b in bars], cv_types)
# Plot the winner
# splot = plt.subplot(2, 1, 2)
# Y_ = clf.predict(X)
# for i, (mean, cov, color) in enumerate(zip(clf.means_, clf.covariances_,
# color_iter)):
# v, w = linalg.eigh(cov)
# if not np.any(Y_ == i):
# continue
# plt.scatter(X[Y_ == i, 0], X[Y_ == i, 1], .8, color=color)
#
# # Plot an ellipse to show the Gaussian component
# angle = np.arctan2(w[0][1], w[0][0])
# angle = 180. * angle / np.pi # convert to degrees
# v = 2. * np.sqrt(2.) * np.sqrt(v)
# ell = mpl.patches.Ellipse(mean, v[0], v[1], 180. + angle, color=color)
# ell.set_clip_box(splot.bbox)
# ell.set_alpha(.5)
# splot.add_artist(ell)
# # h = plt.subplot(1, n_components // 1, 1 + 0)
# # make_ellipses(gmm, h)
#
# plt.xticks(())
# plt.yticks(())
# plt.title('Selected GMM: ' + best_cv_type + ' model, ' + str(best_component) + ' components')
# plt.subplots_adjust(hspace=.35, bottom=.02)
# # plt.show()
return best_gmm
def make_ellipses(gmm, ax):
colors = ['navy', 'turquoise', 'darkorange']
for n, color in enumerate(colors):
if gmm.covariance_type == 'full':
covariances = gmm.covariances_[n][:2, :2]
elif gmm.covariance_type == 'tied':
covariances = gmm.covariances_[:2, :2]
elif gmm.covariance_type == 'diag':
covariances = np.diag(gmm.covariances_[n][:2])
elif gmm.covariance_type == 'spherical':
covariances = np.eye(gmm.means_.shape[1]) * gmm.covariances_[n]
v, w = np.linalg.eigh(covariances)
u = w[0] / np.linalg.norm(w[0])
angle = np.arctan2(u[1], u[0])
# convert to degrees
angle = 180 * angle / np.pi
v = 2. * np.sqrt(2.) * np.sqrt(v)
ell = mpl.patches.Ellipse(gmm.means_[n, :2], v[0], v[1],
180 + angle, color=color)
ell.set_clip_box(ax.bbox)
ell.set_alpha(0.5)
ax.add_artist(ell)