forked from microsoft/MLOps_VideoAnomalyDetection
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfit_anoms.py
125 lines (97 loc) · 4.19 KB
/
fit_anoms.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
from settings import *
import pandas as pd
import numpy as np
import sklearn
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, precision_score, recall_score, roc_auc_score, confusion_matrix, accuracy_score
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.pipeline import Pipeline
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt
import seaborn as sns
# check/create path for saving output
dataset = 'UCSDped1'
save_path = os.path.join('./results', dataset)
create_video_of_plot = False
# path for saving movie frames
movie_save_dir = os.path.join(save_path, 'movie_frames')
if not os.path.exists(movie_save_dir):
os.makedirs(movie_save_dir)
df = pd.read_pickle(os.path.join(save_path, 'df.pkl.gz'))
# create feature set
# df.model_std = df.model_std.rolling(10).mean()
# df = df.fillna(0)
X = df.loc[:,['model_std']] #'model_mse', 'model_p_50', 'model_p_75', 'model_p_90', 'model_p_95', 'model_p_99', 'model_std']]
# alternatively, for comparison, you could also use the trivial solution of using the previous frame for predicting the current frame
# X = df.loc[:,['prev_mse', 'prev_p_50', 'prev_p_75', 'prev_p_90', 'prev_p_95', 'prev_p_99', 'prev_std']]
y = df.anom.values.astype('bool')
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=123)
clf = LogisticRegression() #C=1e-1, solver='saga', multi_class='auto', fit_intercept=True, tol=1e-10)
#clf = RandomForestClassifier(n_estimators=50)
scaler = MinMaxScaler(copy=True, feature_range=(0, 1))
pipeline = Pipeline([('scaler',scaler), ('classifier', clf)])
pipeline.fit(X_train, y_train)
print("train accuracy: ", pipeline.score(X_train, y_train))
if create_video_of_plot:
n_frames = 200
y_prob = pipeline.predict_proba(X)
for frame in range(n_frames):
sns.set_context("talk")
sns.set_style("white")
fig = plt.figure(figsize=(20,3))
plt.plot(y[:n_frames], label="Data", linewidth=5)
plt.plot(y_prob[:frame,1], label="Prediction", linewidth=5)
plt.legend(loc=2, frameon=False, fontsize=24)
plt.ylabel("Probability", fontsize=24)
plt.tick_params(
axis='x', # changes apply to the x-axis
which='both', # both major and minor ticks are affected
bottom=False, # ticks along the bottom edge are off
top=False, # ticks along the top edge are off
labelbottom=False) # labels along the bottom edge are off
sns.despine()
plt.savefig(os.path.join(movie_save_dir, 'plot_%03d.png' % (frame)))
plt.close()
y_pred = pipeline.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print("test accuracy: ", acc)
roc = roc_auc_score(y_test, y_pred)
print("test ROC: ", roc)
precision = precision_score(y_test, y_pred)
print("test precision: ", precision)
recall = recall_score(y_test, y_pred)
print("test recall: ", recall)
cm = confusion_matrix(y_test, y_pred)
print("comnfusion Matrix:\n", cm)
fitted_forest = pipeline.steps[1][1]
importances = fitted_forest.feature_importances_
std = np.std([tree.feature_importances_ for tree in fitted_forest.estimators_],
axis=0)
indices = np.argsort(importances)[::-1]
# Print the feature ranking
print("Feature ranking:")
ranked_features = []
for f in range(X_train.shape[1]):
feature = indices[f]
feature_name = X_train.columns[feature]
ranked_features.append(feature_name)
print("%d. feature %s (%d, %f)" % (f + 1, feature_name, indices[f], importances[indices[f]]))
print(X_train.columns)
import matplotlib.pyplot as plt
# Plot the feature importances of the forest
plt.figure()
plt.title("Feature importances")
plt.bar(range(X_train.shape[1]), importances[indices],
color="grey", yerr=std[indices], align="center")
plt.xticks(range(X_train.shape[1]), labels=ranked_features)
plt.xlim([-1, X_train.shape[1]])
plt.show()
print("Done")
# print(clf.n_iter_)
# print(clf.coef_)
# print(clf.intercept_)
# plt.scatter(df.mse_prev, df.mse_model)
# plt.show()