-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbroker_segmentation.r
61 lines (43 loc) · 1.8 KB
/
broker_segmentation.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# Apply a clustering method at least three years of information ending with 2018 to cluster brokers into five segments. Describe the characteristic properties of each segment. Provide a visualization of your broker segmentation using principal component analysis and describe the clusters in terms of the components.
library(dplyr)
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
broker_data = read.table("alchemy_broker_data.csv",
header=TRUE,
colClasses = c(rep("character",3), rep("numeric", 29)),
sep=",",
row.names=1)
broker_data <- broker_data %>%
dplyr::select(Submissions_2016,
Submissions_2017,
Submissions_2018,
QuoteCount_2016,
QuoteCount_2017,
QuoteCount_2018,
PolicyCount_2016,
PolicyCount_2017,
PolicyCount_2018,
GWP_2016,
GWP_2017,
GWP_2018,
AvgTIV_2016,
AvgTIV_2017,
AvgTIV_2018)
#Handling NA's
#Remove rows with more than 10 NA's
broker_data = broker_data[rowSums(is.na(broker_data)) <= 10,]
summary(broker_data)
##Impute NA's with 0
broker_data[is.na(broker_data)] = 0
#Remove outliers
library('outliers')
outliers <- apply(broker_data[colnames(broker_data)],2,function(x) which(x == outlier(x)))
broker_data <- broker_data[-unique(unlist(outliers)),]
#Scaling
broker_data_scaled = scale(broker_data)
#Clustering
broker_kmeans = kmeans(broker_data_scaled, centers=5)
summary(broker_kmeans)
#PCA
broker_data_pca <- prcomp(broker_data_scaled, retx=TRUE)
broker_data_pca$rotation[,1:2]
plot(broker_data_pca$x[,1:2], col=broker_kmeans$cluster, pch=broker_kmeans$cluster)