-
Notifications
You must be signed in to change notification settings - Fork 3
/
Model4.r
67 lines (47 loc) · 1.85 KB
/
Model4.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
setwd("/media/shilpi/New Volume/ML/r/breast_cancer");
data=read.csv("breastcancer.csv")
data
str(data)
data.names();
summary(data)
#PRE PROCESSING
data$Clump_Thickness=as.numeric(data$Clump_Thickness)
data$Uniformity_CellSize=as.numeric(data$Uniformity_CellSize)
data$Uniformity_CellShape=as.numeric(data$Uniformity_CellShape)
data$Marginal_Adhesion=as.numeric(data$Marginal_Adhesion)
data$Single_Epithelial_cellSize=as.numeric(data$Single_Epithelial_cellSize)
data$BareNuclei=as.numeric(data$BareNuclei)
data$Bland_Chromatin=as.numeric(data$Bland_Chromatin)
data$Normal_Nucleoli=as.numeric(data$Normal_Nucleoli)
data$Mitoses=as.numeric(data$Mitoses)
data$Class=as.factor(data$Class)
data[is.na(data$Clump_Thickness)]<-mean(data$Clump_Thickness,na.rm=TRUE)
data[is.na(data$Uniformity_CellSize)]<-mean(data$Uniformity_CellSize,na.rm = TRUE)
data[is.na(data$Uniformity_CellShape)]<-mean(data$Uniformity_CellShape,na.rm = TRUE)
data[is.na(data$Marginal_Adhesion)]<-mean(data$Marginal_Adhesion,na.rm = TRUE)
data[is.na(data$Single_Epithelial_cellSize)]<-mean(data$Single_Epithelial_cellSize,na.rm = TRUE)
data[is.na(data$BareNuclei)]<-mean(data$BareNuclei,na.rm = TRUE)
data[is.na(data$Bland_Chromatin)]<-mean(data$Bland_Chromatin,na.rm = TRUE)
data[is.na(data$Normal_Nucleoli)]<-mean(data$Normal_Nucleoli,na.rm = TRUE)
data[is.na(data$Mitoses)]<-mean(data$Mitoses,na.rm = TRUE)
summary(data)
set.seed(100)
malignantdata=subset(data,Class==2)
benigndata=subset(data,Class==4)
str(malignantdata)
k=2;
library("fclust")
KMC = FKM(malignantdata[ ,3:11], k= 2,m=2)
KMC$clus[ ,1]
malignantdata$Class=KMC$clus[ ,1]
benigndata$Class=3
data<-rbind(malignantdata,benigndata)
write.csv(data, file = "BreastCancer3.csv",row.names = TRUE)
data
str(data)
data$Class=as.factor(data$Class)
str(data)
library(C50)
library(caTools)
treeModel<-C5.0(x=data[,3:11],y=data$Class)
summary(treeModel)