Skip to content

Commit

Permalink
change UB Exam Dataset to Local Exam Dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
luongthanhanhduc committed May 18, 2017
1 parent 4f0342f commit a1a40a7
Show file tree
Hide file tree
Showing 15 changed files with 79 additions and 79 deletions.
30 changes: 15 additions & 15 deletions data/modules.csv
Original file line number Diff line number Diff line change
Expand Up @@ -2,45 +2,45 @@ Regularization,Dataset,Metric,Silhouette,BetaCV,DunnIndex
No Regularization,IIT Bombay Dataset,Aouiche,0.1235358,0.5766875,0.1428571
No Regularization,IIT Bombay Dataset,Aligon,0.3141606,0.5440821,0.1
No Regularization,IIT Bombay Dataset,Makiyama,0.13533,0.664748,0.0741799
No Regularization,UB Exam Dataset,Aouiche,0.07935877,0.9231208,0.2
No Regularization,UB Exam Dataset,Aligon,0.1304497,0.8644934,0.5
No Regularization,UB Exam Dataset,Makiyama,0.09405354,0.8968658,0.2301996
No Regularization,Local Exam Dataset,Aouiche,0.07935877,0.9231208,0.2
No Regularization,Local Exam Dataset,Aligon,0.1304497,0.8644934,0.5
No Regularization,Local Exam Dataset,Makiyama,0.09405354,0.8968658,0.2301996
No Regularization,PhoneLab-Google+,Aouiche,0.03618038,0.8934701,0.5
No Regularization,PhoneLab-Google+,Aligon,0.06388966,0.9024188,0.5
No Regularization,PhoneLab-Google+,Makiyama,0.1167357,0.8795412,0.2462216
Naming,IIT Bombay Dataset,Aouiche,0.1669147,0.4963258,0.1666667
Naming,IIT Bombay Dataset,Aligon,0.3697328,0.4765228,0.1
Naming,IIT Bombay Dataset,Makiyama,0.1712637,0.6056812,0.0513167
Naming,UB Exam Dataset,Aouiche,0.1726122,0.8177304,0.1666667
Naming,UB Exam Dataset,Aligon,0.2543268,0.733504,0.3333333
Naming,UB Exam Dataset,Makiyama,0.2684318,0.6965586,0.08146848
Naming,Local Exam Dataset,Aouiche,0.1726122,0.8177304,0.1666667
Naming,Local Exam Dataset,Aligon,0.2543268,0.733504,0.3333333
Naming,Local Exam Dataset,Makiyama,0.2684318,0.6965586,0.08146848
Naming,PhoneLab-Google+,Aouiche,0.03535551,0.8936538,0.5
Naming,PhoneLab-Google+,Aligon,0.06260814,0.9036594,0.5
Naming,PhoneLab-Google+,Makiyama,0.1127893,0.8855078,0.2462216
Expression Standardization,IIT Bombay Dataset,Aouiche,0.1224014,0.6047874,0.1428571
Expression Standardization,IIT Bombay Dataset,Aligon,0.2993084,0.5636266,0.1
Expression Standardization,IIT Bombay Dataset,Makiyama,0.1363646,0.6599014,0.0741799
Expression Standardization,UB Exam Dataset,Aouiche,0.08419705,0.9225258,0.2
Expression Standardization,UB Exam Dataset,Aligon,0.08806635,0.9113887,0.3888889
Expression Standardization,UB Exam Dataset,Makiyama,0.07635507,0.9195338,0.2600599
Expression Standardization,Local Exam Dataset,Aouiche,0.08419705,0.9225258,0.2
Expression Standardization,Local Exam Dataset,Aligon,0.08806635,0.9113887,0.3888889
Expression Standardization,Local Exam Dataset,Makiyama,0.07635507,0.9195338,0.2600599
Expression Standardization,PhoneLab-Google+,Aouiche,0.1188399,0.8847059,0.8
Expression Standardization,PhoneLab-Google+,Aligon,0.09856893,0.8975172,0.5
Expression Standardization,PhoneLab-Google+,Makiyama,0.1184555,0.8789764,0.3333333
FROM-nested Subquery,IIT Bombay Dataset,Aouiche,0.1092233,0.585477,0.1428571
FROM-nested Subquery,IIT Bombay Dataset,Aligon,0.3040806,0.5455569,0.1
FROM-nested Subquery,IIT Bombay Dataset,Makiyama,0.1645668,0.6473655,0.0741799
FROM-nested Subquery,UB Exam Dataset,Aouiche,0.07851794,0.9240567,0.2
FROM-nested Subquery,UB Exam Dataset,Aligon,0.1323939,0.8655788,0.5
FROM-nested Subquery,UB Exam Dataset,Makiyama,0.1216638,0.8640163,0.1835034
FROM-nested Subquery,Local Exam Dataset,Aouiche,0.07851794,0.9240567,0.2
FROM-nested Subquery,Local Exam Dataset,Aligon,0.1323939,0.8655788,0.5
FROM-nested Subquery,Local Exam Dataset,Makiyama,0.1216638,0.8640163,0.1835034
FROM-nested Subquery,PhoneLab-Google+,Aouiche,0.03618038,0.8934701,0.5
FROM-nested Subquery,PhoneLab-Google+,Aligon,0.06444628,0.9018627,0.5
FROM-nested Subquery,PhoneLab-Google+,Makiyama,0.1155413,0.8813375,0.3333333
UNION Pull-out,IIT Bombay Dataset,Aouiche,0.1255522,0.5773791,0.1428571
UNION Pull-out,IIT Bombay Dataset,Aligon,0.3170179,0.5432966,0.1
UNION Pull-out,IIT Bombay Dataset,Makiyama,0.139807,0.6644235,0.0741799
UNION Pull-out,UB Exam Dataset,Aouiche,0.07935877,0.9231208,0.2
UNION Pull-out,UB Exam Dataset,Aligon,0.1285675,0.8660956,0.5
UNION Pull-out,UB Exam Dataset,Makiyama,0.09405354,0.8968658,0.2301996
UNION Pull-out,Local Exam Dataset,Aouiche,0.07935877,0.9231208,0.2
UNION Pull-out,Local Exam Dataset,Aligon,0.1285675,0.8660956,0.5
UNION Pull-out,Local Exam Dataset,Makiyama,0.09405354,0.8968658,0.2301996
UNION Pull-out,PhoneLab-Google+,Aouiche,0.03618038,0.8934701,0.5
UNION Pull-out,PhoneLab-Google+,Aligon,0.06372295,0.9022875,0.5
UNION Pull-out,PhoneLab-Google+,Makiyama,0.1155413,0.8813375,0.3333333
12 changes: 6 additions & 6 deletions data/result.csv
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@ IIT Bombay Dataset,FALSE,Aligon,0.3143479,0.5448168,0.1
IIT Bombay Dataset,TRUE,Aligon,0.3485726,0.4825776,0.1
IIT Bombay Dataset,FALSE,Aouiche,0.1235358,0.5766875,0.1428571
IIT Bombay Dataset,TRUE,Aouiche,0.1273715,0.5236675,0.2
UB Exam Dataset,FALSE,Makiyama,0.09405354,0.8968658,0.2301996
UB Exam Dataset,TRUE,Makiyama,0.3033892,0.6454333,0.08146848
UB Exam Dataset,FALSE,Aligon,0.1310474,0.86316,0.5
UB Exam Dataset,TRUE,Aligon,0.2307616,0.7602108,0.25
UB Exam Dataset,FALSE,Aouiche,0.07935877,0.9231208,0.2
UB Exam Dataset,TRUE,Aouiche,0.2356835,0.7466782,0.1666667
Local Exam Dataset,FALSE,Makiyama,0.09405354,0.8968658,0.2301996
Local Exam Dataset,TRUE,Makiyama,0.3033892,0.6454333,0.08146848
Local Exam Dataset,FALSE,Aligon,0.1310474,0.86316,0.5
Local Exam Dataset,TRUE,Aligon,0.2307616,0.7602108,0.25
Local Exam Dataset,FALSE,Aouiche,0.07935877,0.9231208,0.2
Local Exam Dataset,TRUE,Aouiche,0.2356835,0.7466782,0.1666667
PocketData-Google+,FALSE,Makiyama,0.1167357,0.8795412,0.2462216
PocketData-Google+,TRUE,Makiyama,0.1146658,0.8845571,0.3333333
PocketData-Google+,FALSE,Aligon,0.06302234,0.903197,0.5
Expand Down
Binary file modified figure/compare_betacv.pdf
Binary file not shown.
Binary file modified figure/compare_dunn.pdf
Binary file not shown.
Binary file modified figure/compare_silhouette.pdf
Binary file not shown.
Binary file modified figure/module.pdf
Binary file not shown.
Binary file modified figure/sil_bombay_Aligon.pdf
Binary file not shown.
Binary file modified figure/sil_bombay_Aligon_regularization.pdf
Binary file not shown.
Binary file modified figure/sil_googleplus_Aligon.pdf
Binary file not shown.
Binary file modified figure/sil_googleplus_Aligon_regularization.pdf
Binary file not shown.
Binary file modified figure/sil_ub_Aligon.pdf
Binary file not shown.
Binary file modified figure/sil_ub_Aligon_regularization.pdf
Binary file not shown.
57 changes: 25 additions & 32 deletions script_figure_2.R
Original file line number Diff line number Diff line change
@@ -1,36 +1,29 @@
# set working directory
#setwd("~/Downloads/EttuBench")

# load two files evaluation.R and utils.R
source(file = "./evaluation.R")
source(file = "./utils.R")

# load supporting libraries
library(cluster)
library(factoextra)
library(RColorBrewer)

dataset <- read.csv(file = "./data/bombay_queries.csv", header = TRUE, sep = "\t")

distMat <- readDistMat("./data/bombay_aligon.csv")
silhouettePlot(distMat, dataset$label, "./figure/sil_bombay_Aligon.pdf")

distMat <- readDistMat("./data/bombay_aligon_regularization.csv")
silhouettePlot(distMat, dataset$label, "./figure/sil_bombay_Aligon_regularization.pdf")

dataset <- read.csv(file = "./data/ub_queries.csv", header = TRUE, sep = "\t")

distMat <- readDistMat("./data/ub_aligon.csv")
silhouettePlot(distMat, dataset$label, "./figure/sil_ub_Aligon.pdf")

distMat <- readDistMat("./data/ub_aligon_regularization.csv")
silhouettePlot(distMat, dataset$label, "./figure/sil_ub_Aligon_regularization.pdf")

dataset <- read.csv(file = "./data/googleplus_queries.csv", header = TRUE, sep = "\t")

distMat <- readDistMat("./data/googleplus_aligon.csv")
silhouettePlot(distMat, dataset$label, "./figure/sil_googleplus_Aligon.pdf")

distMat <- readDistMat("./data/googleplus_aligon_regularization.csv")
silhouettePlot(distMat, dataset$label, "./figure/sil_googleplus_Aligon_regularization.pdf")

library(ggplot2)

comparison <- read.csv(file = "./data/result.csv", header = TRUE)
comparison$dataset <- factor(comparison$dataset,
levels = c("IIT Bombay Dataset",
"Local Exam Dataset",
"PocketData-Google+"))

ggplot(data = comparison, aes(x = metric, y = silhouette, fill = regularization)) +
geom_bar(position="dodge", stat="identity") + facet_grid(~ dataset) +
ylab("Average Silhouette Coefficient") + xlab("Metric") +
theme_bw(base_size = 18) + theme(legend.position = "top") + scale_fill_grey() +
ggsave(filename = "./figure/compare_silhouette.pdf")

ggplot(data = comparison, aes(x = metric, y = beta_cv, fill = regularization)) +
geom_bar(position="dodge", stat="identity") + facet_grid(~ dataset) +
ylab("Average Silhouette Coefficient") + xlab("Metric") +
theme_bw(base_size = 18) + theme(legend.position = "top") + scale_fill_grey() +
ggsave(filename = "./figure/compare_betacv.pdf")

ggplot(data = comparison, aes(x = metric, y = dunn, fill = regularization)) +
geom_bar(position="dodge", stat="identity") + facet_grid(~ dataset) +
ylab("Average Silhouette Coefficient") + xlab("Metric") +
theme_bw(base_size = 18) + theme(legend.position = "top") + scale_fill_grey() +
ggsave(filename = "./figure/compare_dunn.pdf")
57 changes: 32 additions & 25 deletions script_figure_3.R
Original file line number Diff line number Diff line change
@@ -1,29 +1,36 @@
# set working directory
#setwd("~/Downloads/EttuBench")

# load two files evaluation.R and utils.R
source(file = "./evaluation.R")
source(file = "./utils.R")

# load supporting libraries
library(ggplot2)

comparison <- read.csv(file = "./data/result.csv", header = TRUE)
comparison$dataset <- factor(comparison$dataset,
levels = c("IIT Bombay Dataset",
"UB Exam Dataset",
"PocketData-Google+"))

ggplot(data = comparison, aes(x = metric, y = silhouette, fill = regularization)) +
geom_bar(position="dodge", stat="identity") + facet_grid(~ dataset) +
ylab("Average Silhouette Coefficient") + xlab("Metric") +
theme_bw(base_size = 18) + theme(legend.position = "top") + scale_fill_grey() +
ggsave(filename = "./figure/compare_silhouette.pdf")

ggplot(data = comparison, aes(x = metric, y = beta_cv, fill = regularization)) +
geom_bar(position="dodge", stat="identity") + facet_grid(~ dataset) +
ylab("Average Silhouette Coefficient") + xlab("Metric") +
theme_bw(base_size = 18) + theme(legend.position = "top") + scale_fill_grey() +
ggsave(filename = "./figure/compare_betacv.pdf")

ggplot(data = comparison, aes(x = metric, y = dunn, fill = regularization)) +
geom_bar(position="dodge", stat="identity") + facet_grid(~ dataset) +
ylab("Average Silhouette Coefficient") + xlab("Metric") +
theme_bw(base_size = 18) + theme(legend.position = "top") + scale_fill_grey() +
ggsave(filename = "./figure/compare_dunn.pdf")
library(cluster)
library(factoextra)
library(RColorBrewer)

dataset <- read.csv(file = "./data/bombay_queries.csv", header = TRUE, sep = "\t")

distMat <- readDistMat("./data/bombay_aligon.csv")
silhouettePlot(distMat, dataset$label, "./figure/sil_bombay_Aligon.pdf")

distMat <- readDistMat("./data/bombay_aligon_regularization.csv")
silhouettePlot(distMat, dataset$label, "./figure/sil_bombay_Aligon_regularization.pdf")

dataset <- read.csv(file = "./data/ub_queries.csv", header = TRUE, sep = "\t")

distMat <- readDistMat("./data/ub_aligon.csv")
silhouettePlot(distMat, dataset$label, "./figure/sil_ub_Aligon.pdf")

distMat <- readDistMat("./data/ub_aligon_regularization.csv")
silhouettePlot(distMat, dataset$label, "./figure/sil_ub_Aligon_regularization.pdf")

dataset <- read.csv(file = "./data/googleplus_queries.csv", header = TRUE, sep = "\t")

distMat <- readDistMat("./data/googleplus_aligon.csv")
silhouettePlot(distMat, dataset$label, "./figure/sil_googleplus_Aligon.pdf")

distMat <- readDistMat("./data/googleplus_aligon_regularization.csv")
silhouettePlot(distMat, dataset$label, "./figure/sil_googleplus_Aligon_regularization.pdf")

2 changes: 1 addition & 1 deletion script_figure_4.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ comparison$Regularization <- factor(comparison$Regularization,
"FROM-nested Subquery",
"UNION Pull-out"))
comparison$Dataset <- factor(comparison$Dataset,
levels = c("IIT Bombay Dataset", "UB Exam Dataset",
levels = c("IIT Bombay Dataset", "Local Exam Dataset",
"PhoneLab-Google+"))

# individual module analysis
Expand Down

0 comments on commit a1a40a7

Please sign in to comment.