-
Notifications
You must be signed in to change notification settings - Fork 87
/
Copy pathmain.py
109 lines (86 loc) · 2.67 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import streamlit as st
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
st.title('Streamlit Example')
st.write("""
# Explore different classifier and datasets
Which one is the best?
""")
dataset_name = st.sidebar.selectbox(
'Select Dataset',
('Iris', 'Breast Cancer', 'Wine')
)
st.write(f"## {dataset_name} Dataset")
classifier_name = st.sidebar.selectbox(
'Select classifier',
('KNN', 'SVM', 'Random Forest')
)
def get_dataset(name):
data = None
if name == 'Iris':
data = datasets.load_iris()
elif name == 'Wine':
data = datasets.load_wine()
else:
data = datasets.load_breast_cancer()
X = data.data
y = data.target
return X, y
X, y = get_dataset(dataset_name)
st.write('Shape of dataset:', X.shape)
st.write('number of classes:', len(np.unique(y)))
def add_parameter_ui(clf_name):
params = dict()
if clf_name == 'SVM':
C = st.sidebar.slider('C', 0.01, 10.0)
params['C'] = C
elif clf_name == 'KNN':
K = st.sidebar.slider('K', 1, 15)
params['K'] = K
else:
max_depth = st.sidebar.slider('max_depth', 2, 15)
params['max_depth'] = max_depth
n_estimators = st.sidebar.slider('n_estimators', 1, 100)
params['n_estimators'] = n_estimators
return params
params = add_parameter_ui(classifier_name)
def get_classifier(clf_name, params):
clf = None
if clf_name == 'SVM':
clf = SVC(C=params['C'])
elif clf_name == 'KNN':
clf = KNeighborsClassifier(n_neighbors=params['K'])
else:
clf = clf = RandomForestClassifier(n_estimators=params['n_estimators'],
max_depth=params['max_depth'], random_state=1234)
return clf
clf = get_classifier(classifier_name, params)
#### CLASSIFICATION ####
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
acc = accuracy_score(y_test, y_pred)
st.write(f'Classifier = {classifier_name}')
st.write(f'Accuracy =', acc)
#### PLOT DATASET ####
# Project the data onto the 2 primary principal components
pca = PCA(2)
X_projected = pca.fit_transform(X)
x1 = X_projected[:, 0]
x2 = X_projected[:, 1]
fig = plt.figure()
plt.scatter(x1, x2,
c=y, alpha=0.8,
cmap='viridis')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.colorbar()
#plt.show()
st.pyplot(fig)