forked from manya706/carebot-
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpredict_disease.py
92 lines (69 loc) · 2.46 KB
/
predict_disease.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from xgboost import XGBClassifier
data = pd.read_csv("Datasets/Training.csv")
features = list(data.columns)
x = data.iloc[:,:-1]
y = data.iloc[:, -1]
data_test = pd.read_csv("Datasets/Testing.csv")
x_test = data_test.drop(columns=["prognosis"])
y_test = data_test[['prognosis']]
label = LabelEncoder()
label.fit(y)
y = label.transform(y)
label.fit(y_test)
y_test = label.transform(y_test)
clf = LogisticRegression(penalty = 'l2', C=0.1, max_iter=10000)
clf.fit(x,y)
y_pred_clf = clf.predict(x_test)
acc_clf = accuracy_score(y_test,y_pred_clf)
dt = DecisionTreeClassifier()
dt.fit(x,y)
y_predict_dt = dt.predict(x_test)
acc_dt = accuracy_score(y_test,y_predict_dt)
kfold = KFold(n_splits=5, shuffle=True, random_state=42)
scores = cross_val_score(dt, x, y, cv=kfold)
def random_forest(x,y,x_test,y_test):
dt = RandomForestClassifier(n_estimators=2, random_state=42,max_depth=20,max_features=10)
dt.fit(x,y)
y_pred = dt.predict(x_test)
accuracy = accuracy_score(y_test,y_pred)
return accuracy*100
# print("Accuracy score by decision tree: ", accuracy_score)
rf = RandomForestClassifier()
a=random_forest(x , y, x_test, y_test)
def xgb_model():
model = XGBClassifier()
return model
xgb = xgb_model()
xgb.fit(x,y)
y_pred_xgb = xgb.predict(x_test)
accuracy = accuracy_score(y_test, y_pred_xgb)
symptoms = x.columns.values
symptom_index = {}
for index, value in enumerate(symptoms):
symptom = " ".join([i.capitalize() for i in value.split("_")])
symptom_index[symptom] = index
data_dict = {
"symptom_index":symptom_index,
"predictions_classes":label.classes_
}
def predictDisease(symptoms):
symptoms = symptoms.split(",")
input_data = [0] * len(data_dict["symptom_index"])
for symptom in symptoms:
index = data_dict["symptom_index"][symptom]
input_data[index] = 1
input_data = np.array(input_data).reshape(1,-1)
xgboost_prediction = data_dict["predictions_classes"][xgb.predict(input_data)[0]]
predictions = xgboost_prediction
return predictions
print(predictDisease("Itching"))