From 416d1d18739152abfd52bf3bb03ac1acaf834b55 Mon Sep 17 00:00:00 2001
From: Maximilian Zenk <m.zenk@dkfz-heidelberg.de>
Date: Thu, 1 Sep 2022 13:51:29 +0200
Subject: [PATCH] Update ranking script

---
 Task_2/ranking/compute_ranking.R | 262 +++++++++++++------------------
 Task_2/ranking/example_data.yaml | 106 ++++++-------
 2 files changed, 155 insertions(+), 213 deletions(-)

diff --git a/Task_2/ranking/compute_ranking.R b/Task_2/ranking/compute_ranking.R
index c524b18..ca17b77 100644
--- a/Task_2/ranking/compute_ranking.R
+++ b/Task_2/ranking/compute_ranking.R
@@ -13,9 +13,9 @@ library(magrittr)
 #' Calculate a single ranking
 #'
 #' @param data The underlying dataset used to calculate the ranking (data.frame)
-#' @param metric_variant Either "Dice" or "Hausdorff95" (str)  
+#' @param metric_variant Either "Dice" or "Hausdorff95" (str)
 #' @param institution_name Name of the institution (used in title) (str)
-#' @param ranking_method Ranking method, choose from 
+#' @param ranking_method Ranking method, choose from
 #' (rankThenMean, rankThenMedian, aggregateThenMean, aggregateThenMedian, testBased)
 #' @param title_name_ending Ending of the title string (str)
 #' @param file_name_ending Ending of the file name string (str)
@@ -24,56 +24,56 @@ library(magrittr)
 
 calculate_sub_ranking <- function(data, metric_variant, institution_name, ranking_method,
                                   title_name_ending, file_name_ending, report_dir = NULL) {
-  
+
   smallBetter_order <- FALSE
   isna <- 0
   if(metric_variant == "Hausdorff95") {
     smallBetter_order <- TRUE
     isna <- 1000
   }
-  
+
   if(sum(is.na(data$metric_value))>0) {
-    challenge <- as.challenge(data, algorithm = "algorithm", case = "case", value = "metric_value", 
+    challenge <- as.challenge(data, algorithm = "algorithm", case = "case", value = "metric_value",
                               smallBetter = smallBetter_order, na.treat = isna)
   } else {
-    challenge <- as.challenge(data, algorithm = "algorithm", case = "case", value = "metric_value", 
+    challenge <- as.challenge(data, algorithm = "algorithm", case = "case", value = "metric_value",
                               smallBetter = smallBetter_order)
   }
-  
+
   if(ranking_method == "rankThenMean") {
     ranking <- challenge%>%rankThenAggregate(FUN = mean, ties.method = "min")
   } else if(ranking_method == "rankThenMedian") {
     ranking <- challenge%>%rankThenAggregate(FUN = median, ties.method = "min")
   } else if(ranking_method == "aggregateThenMean") {
-    ranking <- challenge%>%aggregateThenRank(FUN = mean, na.treat = isna, ties.method = "min")  
+    ranking <- challenge%>%aggregateThenRank(FUN = mean, na.treat = isna, ties.method = "min")
   } else if(ranking_method == "aggregateThenMedian") {
-    ranking <- challenge%>%aggregateThenRank(FUN = median, na.treat = isna, ties.method = "min")  
+    ranking <- challenge%>%aggregateThenRank(FUN = median, na.treat = isna, ties.method = "min")
   } else if(ranking_method == "testBased") {
-    ranking <- challenge%>%testThenRank(alpha = 0.05, 
-                                        p.adjust.method = "none", 
-                                        na.treat = isna, ties.method = "min")  
+    ranking <- challenge%>%testThenRank(alpha = 0.05,
+                                        p.adjust.method = "none",
+                                        na.treat = isna, ties.method = "min")
   } else {
     warning("Please specify valid ranking scheme")
   }
- 
-  
+
+
   if (!is.null(report_dir)){
     # Bootstrapping analysis
-    registerDoParallel(cores = 8)  
+    registerDoParallel(cores = 8)
     set.seed(1)
     ranking_bootstrapped <- ranking%>%bootstrap(nboot = 1000, parallel = TRUE, progress = "none")
     stopImplicitCluster()
-    
+
     # Ranking report
-    ranking_bootstrapped %>% 
-      report(title = paste(institution_name, title_name_ending, sep=" "), 
-             file = file.path(report_dir, paste(institution_name, file_name_ending, sep="_")), 
-             format = "PDF", 
-             latex_engine = "pdflatex", 
-             clean = FALSE 
+    ranking_bootstrapped %>%
+      report(title = paste(institution_name, title_name_ending, sep=" "),
+             file = file.path(report_dir, paste(institution_name, file_name_ending, sep="_")),
+             format = "PDF",
+             latex_engine = "pdflatex",
+             clean = FALSE
       )
   }
-  
+
   return(ranking)
 }
 
@@ -84,79 +84,73 @@ calculate_sub_ranking <- function(data, metric_variant, institution_name, rankin
 #'
 #' @param data The underlying dataset used to calculate the ranking (data.frame)
 #' @param institution_name Name of the institution (used in title) (str)
-#' @param ranking_method Ranking method, choose from 
+#' @param ranking_method Ranking method, choose from
 #' (rankThenMean, rankThenMedian, aggregateThenMean, aggregateThenMedian, testBased)
 #'
 #' @return A list of the 6 ranking lists
 
 calculate_all_rankings_per_institute <- function(data, institution_name, ranking_method, report_dir = NULL) {
-  
+
   ## Enhancing tumor (ET) ##
-  data_et <- subset(data, region == "ET")
-  
   # Compute ET ranking for the Dice metric
   print("... calculate ET Dice ranking ...")
-  
-  data_et_dice <- subset(data_et, metric == "Dice")
-  ranking_et_dice <- calculate_sub_ranking(data_et_dice, "Dice", 
-                                           institution_name, ranking_method, 
+
+  data_et_dice <- subset(data, metric == "Dice_ET")
+  ranking_et_dice <- calculate_sub_ranking(data_et_dice, "Dice",
+                                           institution_name, ranking_method,
                                            "ET Dice", "ET_Dice",
                                            report_dir)
-  
+
   # Compute ET ranking for the HD95 metric
   print("... calculate ET HD95 ranking ...")
-  
-  data_et_hd95 <- subset(data_et, metric == "Hausdorff95")
-  ranking_et_hd95 <- calculate_sub_ranking(data_et_hd95, "Hausdorff95", 
+
+  data_et_hd95 <- subset(data, metric == "Hausdorff95_ET")
+  ranking_et_hd95 <- calculate_sub_ranking(data_et_hd95, "Hausdorff95",
                                            institution_name, ranking_method,
                                            "ET HD95", "ET_HD95",
                                            report_dir)
-  
+
   ## Tumor core (TC) ##
-  data_tc <- subset(data, region == "TC")
-  
   # Compute TC ranking for the Dice metric
   print("... calculate TC Dice ranking ...")
-  
-  data_tc_dice <- subset(data_tc, metric == "Dice")
-  ranking_tc_dice <- calculate_sub_ranking(data_tc_dice, "Dice", 
+
+  data_tc_dice <- subset(data, metric == "Dice_TC")
+  ranking_tc_dice <- calculate_sub_ranking(data_tc_dice, "Dice",
                                            institution_name, ranking_method,
                                            "TC Dice", "TC_Dice",
                                            report_dir)
-  
+
   # Compute TC ranking for the HD95 metric
   print("... calculate TC HD95 ranking ...")
-  
-  data_tc_hd95 <- subset(data_tc, metric == "Hausdorff95")
-  ranking_tc_hd95 <- calculate_sub_ranking(data_tc_hd95, "Hausdorff95", 
+
+  data_tc_hd95 <- subset(data, metric == "Hausdorff95_TC")
+  ranking_tc_hd95 <- calculate_sub_ranking(data_tc_hd95, "Hausdorff95",
                                            institution_name, ranking_method,
                                            "TC HD95", "TC_HD95",
                                            report_dir)
-  
+
   ## Whole tumor (WT) ##
-  data_wt <- subset(data, region == "WT")
-  
   # Compute WT ranking for the Dice metric
   print("... calculate WT Dice ranking ...")
-  data_wt_dice <- subset(data_wt, metric == "Dice")
-  ranking_wt_dice <- calculate_sub_ranking(data_wt_dice, "Dice", 
+  data_wt_dice <- subset(data, metric == "Dice_WT")
+  ranking_wt_dice <- calculate_sub_ranking(data_wt_dice, "Dice",
                                            institution_name, ranking_method,
                                            "WT Dice", "WT_Dice",
                                            report_dir)
-  
+
   # Compute WT ranking for the HD95 metric
   print("... calculate WT HD95 ranking ...")
-  
-  data_wt_hd95 <- subset(data_wt, metric == "Hausdorff95")
-  ranking_wt_hd95 <- calculate_sub_ranking(data_wt_hd95, "Hausdorff95", 
+
+  data_wt_hd95 <- subset(data, metric == "Hausdorff95_WT")
+  ranking_wt_hd95 <- calculate_sub_ranking(data_wt_hd95, "Hausdorff95",
                                            institution_name, ranking_method,
                                            "WT HD95", "WT_HD95",
                                            report_dir)
-  
+
   # Store all rankings in a list
-  rankings <- list(ranking_et_dice, ranking_et_hd95, ranking_tc_dice, 
+  rankings <- list(ranking_et_dice, ranking_et_hd95, ranking_tc_dice,
                    ranking_tc_hd95, ranking_wt_dice, ranking_wt_hd95)
-  
+
   return(rankings)
 }
 
@@ -174,7 +168,7 @@ calculate_significance_one_institute <- function(rankings, dataSignCounts) {
   alpha=0.05
   p.adjust.method="holm"
   order=FALSE
-  
+
   signMatrix = NULL
   for (ranking in rankings) {
     currSignMatrix = ranking$data%>%decision.challenge(na.treat=ranking$call[[1]][[1]]$na.treat,
@@ -188,7 +182,7 @@ calculate_significance_one_institute <- function(rankings, dataSignCounts) {
       signMatrix$dummyTask <- signMatrix$dummyTask + currSignMatrix$dummyTask
     }
   }
-  
+
   return(signMatrix)
 }
 
@@ -201,21 +195,21 @@ calculate_significance_one_institute <- function(rankings, dataSignCounts) {
 #' @return data in data.frame format
 
 load_data <- function(path) {
-  
+
   print("... load data from institute ...")
-  
+
   # Load data from yaml file and convert to data frame
   yaml_data <- yaml.load_file(path)
   # need to replace nulls from yaml, as these indicate missing values
   yaml_data <- replace_nulls_in_list(yaml_data)
   yaml_data_df <- data.frame(melt(yaml_data))
-  
-  data <- data.frame(case = yaml_data_df$L1, 
-                          region = yaml_data_df$L3,
+
+  data <- data.frame(case = yaml_data_df$L1,
+                          # region = yaml_data_df$L3,  # Included in metric now
                           algorithm = yaml_data_df$L2,
-                          metric = yaml_data_df$L4,
+                          metric = yaml_data_df$L3,
                           metric_value = yaml_data_df$value)
-  
+
   return(data)
 }
 
@@ -238,7 +232,7 @@ replace_nulls_in_list <- function(x) {
 
 # Function to calculate the mean ranks per algorithm for one institution --------
 
-#' Overall function to compute the rankings per institute and calculate the 
+#' Overall function to compute the rankings per institute and calculate the
 #' mean rank per algorithm
 #'
 #' @param data The underlying dataset used to calculate the ranking (data.frame)
@@ -247,103 +241,85 @@ replace_nulls_in_list <- function(x) {
 #' @return Mean ranks for each algorithm (data.frame)
 
 calculate_mean_ranks_one_institute <- function(rankings, data, institution_name, report_dir = NULL) {
-  
+
   ## Bring all ranks together for each algorithm
   print("... compute mean ranks per algorithm ...")
-  
+
   algorithms <- unique(data$algorithm)
   all_ranks_df <- data.frame(matrix(ncol = length(algorithms), nrow = 6))
   counter = 1
-  
+
   for(alg in algorithms) {
     alg_ranks <- c()
-    
+
     # Extract ranks from each of the 6 rankings for each algorithm
     for(ranking in rankings) {
       alg_rank <- ranking[[1]]$dummyTask[c(alg),c("rank")]
       alg_ranks <- rbind(alg_ranks, alg_rank)
     }
-    
+
     # Store ranks for each algorithm in data frame
     all_ranks_df[[counter]] <- alg_ranks
     colnames(all_ranks_df)[counter] <- alg
     counter = counter + 1
   }
-  
+
   # Compute mean rank over the 6 ranks per algorithm for this institution
   mean_rank_df <- data.frame(t(colMeans(all_ranks_df)))
-  
+
   sprintf("... done with %s ...", institution_name)
-  
+
   return(mean_rank_df)
 }
 
 
-# Function to generate boxplots -------------------------------------------
-
-#' Generates a dot- and boxplot for the raw metric values of one institute, 
-#' grouped by the regions ET, TC and WT for one metric
-#'
-#' @param data Raw data used to generate plots (data.frame)
-#' @param metric_variant Either "Dice" or "Hausdorff95" (str)  
-#' @param institution_name Name of the institution (used in title) (str)
-#'
-#' @return Ggplot
-
-generate_dot_boxplots_per_institute <- function(data, metric_variant, institution_name) {
-
-  p <- ggplot(data, aes(x=algorithm, y=metric_value, color=region)) +
-    geom_boxplot(lwd=0.8,outlier.shape=NA) + 
-    geom_point(position=position_jitterdodge(), alpha = 0.8, size=1.3) +
-    xlab("Algorithm") + ylab(metric_variant) + labs(color = "Region") +
-    theme_light() + ggtitle(institution_name) + 
-    theme(axis.text.x = element_text(size = 12, angle = 90, vjust = 0.5, hjust=1),
-          axis.text.y = element_text(size = 13),
-          legend.text = element_text(size = 13),
-          legend.title = element_text(size= 16),
-          axis.title = element_text(size= 16),
-          strip.text = element_text(size = 16),
-          title = element_text(size=16),
-          legend.position="bottom"
-    )
-  
-  return(p)
-}
-
 # Main script --------------------------------------------------------------
 args = commandArgs(trailingOnly = TRUE)
 
 if (length(args) == 0) {
-  stop("Please specify these arguments: data_path [, report_save_dir].")
-} else if (length(args) == 1) {
-  report_dir = NULL
-} else if (length(args) == 2) {
-  report_dir = args[2]
+  stop("Please specify these arguments: data_path [, ranking_method, --make_reports].")
 }
+make_reports = FALSE
 data_path <- args[1]
+make_reports = FALSE
+ranking_method <- "rankThenMean"
+all_ranking_methods = list("rankThenMean", "rankThenMedian", "aggregateThenMean", "aggregateThenMedian", "testBased")
+
+if (length(args) == 2) {
+  if (args[2] == "--make_reports") {
+    make_reports = TRUE
+  } else {
+    ranking_method <- args[2]
+  }
+} else if (length(args) == 3) {
+  ranking_method <- args[2]
+  if (!(ranking_method %in% all_ranking_methods)) {
+    stop(paste("Ranking method must be one of", all_ranking_methods))
+  }
+  if (args[3] == "--make_reports") {
+    make_reports = TRUE
+  }  else {
+    stop(paste("Unrecognized argument.", args[3]))
+  }
+}
+
 output_dir <- "ranking_output"
-# output_dir <- "ranking_output_C22_excluded"
 if (! dir.exists(output_dir)) {
   dir.create(output_dir)
 }
+if (make_reports) {
+  report_dir <- paste(output_dir, paste("reports",ranking_method, sep = "_"), sep = "/")
+} else {
+  report_dir <- NULL
+}
 
 # get list of all institution files
 data_files <- list.files(data_path, pattern = '.*\\.(yaml|yml)$', full.names = TRUE)
 
-# select ranking method
-ranking_method <- "rankThenMean"
-# ranking_method <- "rankThenMedian"
-# ranking_method <- "aggregateThenMean"
-# ranking_method <- "aggregateThenMedian"
-# ranking_method <- "testBased"
-
 mean_ranks_all_institutions <- NULL
 all_institution_names <- NULL
 all_data <- list()
-
-dataSignCounts <- data.frame("91263" = numeric(), "91061" = numeric(), "91425" = numeric())
 dataSignMatrices <- list()
-names(dataSignCounts) <- sub("^X", "", names(dataSignCounts))
 
 for (path in data_files) {
   # Institution i ----------------------------------------------------------
@@ -355,32 +331,20 @@ for (path in data_files) {
   #   print("skipping")
   # }
   data_fets_inst <- load_data(path)
-  data_fets_inst <- subset(data_fets_inst, algorithm != "baseline_nnunet2020")   # not ranked
-  
-  # plot dots- and boxplots
-  p_dice <- generate_dot_boxplots_per_institute(subset(data_fets_inst, metric=="Dice"), "Dice",
-                                                institution_name)
-  p_hd95 <- generate_dot_boxplots_per_institute(subset(data_fets_inst, metric=="Hausdorff95"), "HD95",
-                                                institution_name)
-
-  dice_name <- paste("boxplot", institution_name, "Dice", sep="_")
-  hd95_name <- paste("boxplot", institution_name, "HD95", sep="_")
-  
-  ggsave(paste(output_dir, paste(dice_name, ".png", sep=""), sep="/"), p_dice)
-  ggsave(paste(output_dir, paste(hd95_name, ".png", sep=""), sep="/"), p_hd95)
+  # data_fets_inst <- subset(data_fets_inst, algorithm != "baseline_nnunet2020")   # not ranked
 
   # Calculate the rankings for the ET, TC and WT
   # For each region, the ranking is computed for the Dice and Hausdorff95 metrics
   # Resulting in 6 rankings
   print("... calculate rankings ... ...")
   rankings <- calculate_all_rankings_per_institute(data_fets_inst, institution_name, ranking_method, report_dir=report_dir)
-  
+
   # Compute mean rank per algorithm for each institution --------------------
   mean_rank_df <- calculate_mean_ranks_one_institute(rankings, data_fets_inst, institution_name)
-  
+
   # Make sure that data frames have same ordering
   mean_rank_df %>% select(sort(names(.)))
-  
+
   if (is.null(mean_ranks_all_institutions))
   {
     mean_ranks_all_institutions <- mean_rank_df
@@ -392,10 +356,9 @@ for (path in data_files) {
     all_institution_names <- c(all_institution_names, institution_name)
   }
   all_data[[institution_name]] <- data_fets_inst
-  
+
   # Calculate number of significantly superior rankings per algorithm
   dataSignMatrices[[length(dataSignMatrices) + 1]] <- calculate_significance_one_institute(rankings, dataSignCounts)
-  dataSignCounts[nrow(dataSignCounts)+1,] <- rowSums(dataSignMatrices[[length(dataSignMatrices)]]$dummyTask[])[names(dataSignCounts)]
 }
 rownames(mean_ranks_all_institutions) <- all_institution_names
 
@@ -405,10 +368,10 @@ final_ranks_df <- data.frame(meanRank = colMeans(mean_ranks_all_institutions))
 final_ranks_df <- cbind(final_ranks_df, finalRank = rank(final_ranks_df$meanRank))
 final_ranks_df <- final_ranks_df[order(final_ranks_df$finalRank),]
 
-final_ranks_df_print <- 
-  hux(final_ranks_df) %>% 
-  add_rownames() %>% 
-  set_bold(row = 1, col = everywhere, value = TRUE) %>% 
+final_ranks_df_print <-
+  hux(final_ranks_df) %>%
+  add_rownames() %>%
+  set_bold(row = 1, col = everywhere, value = TRUE) %>%
   set_all_borders(TRUE)
 
 print("The final ranking is: ")
@@ -418,13 +381,6 @@ file_name_mean_ranks <- paste("per_institute_ranks", ranking_method, sep="_")
 write.csv(final_ranks_df, file = paste(output_dir, paste(file_name_final_ranks, ".csv",sep=""), sep="/"))
 write.csv(mean_ranks_all_institutions, file = paste(output_dir, paste(file_name_mean_ranks, ".csv",sep=""), sep="/"))
 
-# Calculate number of significantly superiorities ------------------------
-countSign <- colSums(dataSignCounts)
-print("Counting how often algorithms are significantly superior to the others: ")
-print(countSign)
-file_name_significant_counts <- paste("significant_counts", ranking_method, sep="_")
-write.csv(countSign, file = paste(output_dir, paste(file_name_significant_counts, ".csv",sep=""), sep="/"))
-
 # also sum up significance matrices
 total_sign_matrix <- NULL
 for (s in dataSignMatrices) {
@@ -435,5 +391,9 @@ for (s in dataSignMatrices) {
     total_sign_matrix <- total_sign_matrix + ordered_s
   }
 }
+print("Counting how often algorithms are significantly superior to the others (each row shows the no. superiorities of that model): ")
+print(total_sign_matrix)
+print("Sum along rows:")
+print(rowSums(total_sign_matrix))
 file_name <- paste("significant_matrix", ranking_method, sep="_")
 write.csv(total_sign_matrix, file = paste(output_dir, paste(file_name, ".csv",sep=""), sep="/"))
diff --git a/Task_2/ranking/example_data.yaml b/Task_2/ranking/example_data.yaml
index b81a40c..7c3973c 100644
--- a/Task_2/ranking/example_data.yaml
+++ b/Task_2/ranking/example_data.yaml
@@ -1,62 +1,44 @@
-0:
-  deepmedic:
-    WT:
-      Dice: 0.5
-      Hausdorff95: 8
-    TC:
-      Dice: 0.7
-      Hausdorff95: 2
-    ET:
-      Dice: 0.54
-      Hausdorff95: 14
-  deepscan:
-    WT:
-      Dice: 0.45
-      Hausdorff95: 12
-    TC:
-      Dice: 0.85
-      Hausdorff95: 5
-    ET:
-      Dice: 0.2
-      Hausdorff95: 22
-  nnunet:
-    WT:
-      Dice: 0.4
-      Hausdorff95: 9
-    TC:
-      Dice: 0.7
-      Hausdorff95: 6
-    ET:
-      Dice: 0.12
-      Hausdorff95: 16
-1:
-  deepmedic:
-    WT:
-      Dice: 0.98
-      Hausdorff95: 2
-    TC:
-      Dice: 0.31
-      Hausdorff95: 5
-    ET:
-      Dice: 0.45
-      Hausdorff95: 1
-  deepscan:
-    WT:
-      Dice: 0.63
-      Hausdorff95: 6.1
-    TC:
-      Dice: 0.23
-      Hausdorff95: 4.2
-    ET:
-      Dice: 0.65
-      Hausdorff95: 9.1
-  nnunet:
-    WT:
-      Dice: 0.31
-      Hausdorff95: 6
-    TC:
-      Dice: 0.56
-      Hausdorff95: 11
-    ET:
-      Dice: 0.98
-      Hausdorff95: 1
\ No newline at end of file
+0:
+  deepmedic:
+    Dice_WT: 0.5
+    Hausdorff95_WT: 8
+    Dice_TC: 0.7
+    Hausdorff95_TC: 2
+    Dice_ET: 0.54
+    Hausdorff95_ET: 14
+  deepscan:
+    Dice_WT: 0.45
+    Hausdorff95_WT: 12
+    Dice_TC: 0.85
+    Hausdorff95_TC: 5
+    Dice_ET: 0.2
+    Hausdorff95_ET: 22
+  nnunet:
+    Dice_WT: 0.4
+    Hausdorff95_WT: 9
+    Dice_TC: 0.7
+    Hausdorff95_TC: 6
+    Dice_ET: 0.12
+    Hausdorff95_ET: 16
+1:
+  deepmedic:
+    Dice_WT: 0.98
+    Hausdorff95_WT: 2
+    Dice_TC: 0.31
+    Hausdorff95_TC: 5
+    Dice_ET: 0.45
+    Hausdorff95_ET: 1
+  deepscan:
+    Dice_WT: 0.63
+    Hausdorff95_WT: 6.1
+    Dice_TC: 0.23
+    Hausdorff95_TC: 4.2
+    Dice_ET: 0.65
+    Hausdorff95_ET: 9.1
+  nnunet:
+    Dice_WT: 0.31
+    Hausdorff95_WT: 6
+    Dice_TC: 0.56
+    Hausdorff95_TC: 11
+    Dice_ET: 0.98
+    Hausdorff95_ET: 1