From 416d1d18739152abfd52bf3bb03ac1acaf834b55 Mon Sep 17 00:00:00 2001 From: Maximilian Zenk Date: Thu, 1 Sep 2022 13:51:29 +0200 Subject: [PATCH] Update ranking script --- Task_2/ranking/compute_ranking.R | 262 +++++++++++++------------------ Task_2/ranking/example_data.yaml | 106 ++++++------- 2 files changed, 155 insertions(+), 213 deletions(-) diff --git a/Task_2/ranking/compute_ranking.R b/Task_2/ranking/compute_ranking.R index c524b18..ca17b77 100644 --- a/Task_2/ranking/compute_ranking.R +++ b/Task_2/ranking/compute_ranking.R @@ -13,9 +13,9 @@ library(magrittr) #' Calculate a single ranking #' #' @param data The underlying dataset used to calculate the ranking (data.frame) -#' @param metric_variant Either "Dice" or "Hausdorff95" (str) +#' @param metric_variant Either "Dice" or "Hausdorff95" (str) #' @param institution_name Name of the institution (used in title) (str) -#' @param ranking_method Ranking method, choose from +#' @param ranking_method Ranking method, choose from #' (rankThenMean, rankThenMedian, aggregateThenMean, aggregateThenMedian, testBased) #' @param title_name_ending Ending of the title string (str) #' @param file_name_ending Ending of the file name string (str) @@ -24,56 +24,56 @@ library(magrittr) calculate_sub_ranking <- function(data, metric_variant, institution_name, ranking_method, title_name_ending, file_name_ending, report_dir = NULL) { - + smallBetter_order <- FALSE isna <- 0 if(metric_variant == "Hausdorff95") { smallBetter_order <- TRUE isna <- 1000 } - + if(sum(is.na(data$metric_value))>0) { - challenge <- as.challenge(data, algorithm = "algorithm", case = "case", value = "metric_value", + challenge <- as.challenge(data, algorithm = "algorithm", case = "case", value = "metric_value", smallBetter = smallBetter_order, na.treat = isna) } else { - challenge <- as.challenge(data, algorithm = "algorithm", case = "case", value = "metric_value", + challenge <- as.challenge(data, algorithm = "algorithm", case = "case", value = "metric_value", smallBetter = smallBetter_order) } - + if(ranking_method == "rankThenMean") { ranking <- challenge%>%rankThenAggregate(FUN = mean, ties.method = "min") } else if(ranking_method == "rankThenMedian") { ranking <- challenge%>%rankThenAggregate(FUN = median, ties.method = "min") } else if(ranking_method == "aggregateThenMean") { - ranking <- challenge%>%aggregateThenRank(FUN = mean, na.treat = isna, ties.method = "min") + ranking <- challenge%>%aggregateThenRank(FUN = mean, na.treat = isna, ties.method = "min") } else if(ranking_method == "aggregateThenMedian") { - ranking <- challenge%>%aggregateThenRank(FUN = median, na.treat = isna, ties.method = "min") + ranking <- challenge%>%aggregateThenRank(FUN = median, na.treat = isna, ties.method = "min") } else if(ranking_method == "testBased") { - ranking <- challenge%>%testThenRank(alpha = 0.05, - p.adjust.method = "none", - na.treat = isna, ties.method = "min") + ranking <- challenge%>%testThenRank(alpha = 0.05, + p.adjust.method = "none", + na.treat = isna, ties.method = "min") } else { warning("Please specify valid ranking scheme") } - - + + if (!is.null(report_dir)){ # Bootstrapping analysis - registerDoParallel(cores = 8) + registerDoParallel(cores = 8) set.seed(1) ranking_bootstrapped <- ranking%>%bootstrap(nboot = 1000, parallel = TRUE, progress = "none") stopImplicitCluster() - + # Ranking report - ranking_bootstrapped %>% - report(title = paste(institution_name, title_name_ending, sep=" "), - file = file.path(report_dir, paste(institution_name, file_name_ending, sep="_")), - format = "PDF", - latex_engine = "pdflatex", - clean = FALSE + ranking_bootstrapped %>% + report(title = paste(institution_name, title_name_ending, sep=" "), + file = file.path(report_dir, paste(institution_name, file_name_ending, sep="_")), + format = "PDF", + latex_engine = "pdflatex", + clean = FALSE ) } - + return(ranking) } @@ -84,79 +84,73 @@ calculate_sub_ranking <- function(data, metric_variant, institution_name, rankin #' #' @param data The underlying dataset used to calculate the ranking (data.frame) #' @param institution_name Name of the institution (used in title) (str) -#' @param ranking_method Ranking method, choose from +#' @param ranking_method Ranking method, choose from #' (rankThenMean, rankThenMedian, aggregateThenMean, aggregateThenMedian, testBased) #' #' @return A list of the 6 ranking lists calculate_all_rankings_per_institute <- function(data, institution_name, ranking_method, report_dir = NULL) { - + ## Enhancing tumor (ET) ## - data_et <- subset(data, region == "ET") - # Compute ET ranking for the Dice metric print("... calculate ET Dice ranking ...") - - data_et_dice <- subset(data_et, metric == "Dice") - ranking_et_dice <- calculate_sub_ranking(data_et_dice, "Dice", - institution_name, ranking_method, + + data_et_dice <- subset(data, metric == "Dice_ET") + ranking_et_dice <- calculate_sub_ranking(data_et_dice, "Dice", + institution_name, ranking_method, "ET Dice", "ET_Dice", report_dir) - + # Compute ET ranking for the HD95 metric print("... calculate ET HD95 ranking ...") - - data_et_hd95 <- subset(data_et, metric == "Hausdorff95") - ranking_et_hd95 <- calculate_sub_ranking(data_et_hd95, "Hausdorff95", + + data_et_hd95 <- subset(data, metric == "Hausdorff95_ET") + ranking_et_hd95 <- calculate_sub_ranking(data_et_hd95, "Hausdorff95", institution_name, ranking_method, "ET HD95", "ET_HD95", report_dir) - + ## Tumor core (TC) ## - data_tc <- subset(data, region == "TC") - # Compute TC ranking for the Dice metric print("... calculate TC Dice ranking ...") - - data_tc_dice <- subset(data_tc, metric == "Dice") - ranking_tc_dice <- calculate_sub_ranking(data_tc_dice, "Dice", + + data_tc_dice <- subset(data, metric == "Dice_TC") + ranking_tc_dice <- calculate_sub_ranking(data_tc_dice, "Dice", institution_name, ranking_method, "TC Dice", "TC_Dice", report_dir) - + # Compute TC ranking for the HD95 metric print("... calculate TC HD95 ranking ...") - - data_tc_hd95 <- subset(data_tc, metric == "Hausdorff95") - ranking_tc_hd95 <- calculate_sub_ranking(data_tc_hd95, "Hausdorff95", + + data_tc_hd95 <- subset(data, metric == "Hausdorff95_TC") + ranking_tc_hd95 <- calculate_sub_ranking(data_tc_hd95, "Hausdorff95", institution_name, ranking_method, "TC HD95", "TC_HD95", report_dir) - + ## Whole tumor (WT) ## - data_wt <- subset(data, region == "WT") - # Compute WT ranking for the Dice metric print("... calculate WT Dice ranking ...") - data_wt_dice <- subset(data_wt, metric == "Dice") - ranking_wt_dice <- calculate_sub_ranking(data_wt_dice, "Dice", + data_wt_dice <- subset(data, metric == "Dice_WT") + ranking_wt_dice <- calculate_sub_ranking(data_wt_dice, "Dice", institution_name, ranking_method, "WT Dice", "WT_Dice", report_dir) - + # Compute WT ranking for the HD95 metric print("... calculate WT HD95 ranking ...") - - data_wt_hd95 <- subset(data_wt, metric == "Hausdorff95") - ranking_wt_hd95 <- calculate_sub_ranking(data_wt_hd95, "Hausdorff95", + + data_wt_hd95 <- subset(data, metric == "Hausdorff95_WT") + ranking_wt_hd95 <- calculate_sub_ranking(data_wt_hd95, "Hausdorff95", institution_name, ranking_method, "WT HD95", "WT_HD95", report_dir) - + # Store all rankings in a list - rankings <- list(ranking_et_dice, ranking_et_hd95, ranking_tc_dice, + rankings <- list(ranking_et_dice, ranking_et_hd95, ranking_tc_dice, ranking_tc_hd95, ranking_wt_dice, ranking_wt_hd95) - + return(rankings) } @@ -174,7 +168,7 @@ calculate_significance_one_institute <- function(rankings, dataSignCounts) { alpha=0.05 p.adjust.method="holm" order=FALSE - + signMatrix = NULL for (ranking in rankings) { currSignMatrix = ranking$data%>%decision.challenge(na.treat=ranking$call[[1]][[1]]$na.treat, @@ -188,7 +182,7 @@ calculate_significance_one_institute <- function(rankings, dataSignCounts) { signMatrix$dummyTask <- signMatrix$dummyTask + currSignMatrix$dummyTask } } - + return(signMatrix) } @@ -201,21 +195,21 @@ calculate_significance_one_institute <- function(rankings, dataSignCounts) { #' @return data in data.frame format load_data <- function(path) { - + print("... load data from institute ...") - + # Load data from yaml file and convert to data frame yaml_data <- yaml.load_file(path) # need to replace nulls from yaml, as these indicate missing values yaml_data <- replace_nulls_in_list(yaml_data) yaml_data_df <- data.frame(melt(yaml_data)) - - data <- data.frame(case = yaml_data_df$L1, - region = yaml_data_df$L3, + + data <- data.frame(case = yaml_data_df$L1, + # region = yaml_data_df$L3, # Included in metric now algorithm = yaml_data_df$L2, - metric = yaml_data_df$L4, + metric = yaml_data_df$L3, metric_value = yaml_data_df$value) - + return(data) } @@ -238,7 +232,7 @@ replace_nulls_in_list <- function(x) { # Function to calculate the mean ranks per algorithm for one institution -------- -#' Overall function to compute the rankings per institute and calculate the +#' Overall function to compute the rankings per institute and calculate the #' mean rank per algorithm #' #' @param data The underlying dataset used to calculate the ranking (data.frame) @@ -247,103 +241,85 @@ replace_nulls_in_list <- function(x) { #' @return Mean ranks for each algorithm (data.frame) calculate_mean_ranks_one_institute <- function(rankings, data, institution_name, report_dir = NULL) { - + ## Bring all ranks together for each algorithm print("... compute mean ranks per algorithm ...") - + algorithms <- unique(data$algorithm) all_ranks_df <- data.frame(matrix(ncol = length(algorithms), nrow = 6)) counter = 1 - + for(alg in algorithms) { alg_ranks <- c() - + # Extract ranks from each of the 6 rankings for each algorithm for(ranking in rankings) { alg_rank <- ranking[[1]]$dummyTask[c(alg),c("rank")] alg_ranks <- rbind(alg_ranks, alg_rank) } - + # Store ranks for each algorithm in data frame all_ranks_df[[counter]] <- alg_ranks colnames(all_ranks_df)[counter] <- alg counter = counter + 1 } - + # Compute mean rank over the 6 ranks per algorithm for this institution mean_rank_df <- data.frame(t(colMeans(all_ranks_df))) - + sprintf("... done with %s ...", institution_name) - + return(mean_rank_df) } -# Function to generate boxplots ------------------------------------------- - -#' Generates a dot- and boxplot for the raw metric values of one institute, -#' grouped by the regions ET, TC and WT for one metric -#' -#' @param data Raw data used to generate plots (data.frame) -#' @param metric_variant Either "Dice" or "Hausdorff95" (str) -#' @param institution_name Name of the institution (used in title) (str) -#' -#' @return Ggplot - -generate_dot_boxplots_per_institute <- function(data, metric_variant, institution_name) { - - p <- ggplot(data, aes(x=algorithm, y=metric_value, color=region)) + - geom_boxplot(lwd=0.8,outlier.shape=NA) + - geom_point(position=position_jitterdodge(), alpha = 0.8, size=1.3) + - xlab("Algorithm") + ylab(metric_variant) + labs(color = "Region") + - theme_light() + ggtitle(institution_name) + - theme(axis.text.x = element_text(size = 12, angle = 90, vjust = 0.5, hjust=1), - axis.text.y = element_text(size = 13), - legend.text = element_text(size = 13), - legend.title = element_text(size= 16), - axis.title = element_text(size= 16), - strip.text = element_text(size = 16), - title = element_text(size=16), - legend.position="bottom" - ) - - return(p) -} - # Main script -------------------------------------------------------------- args = commandArgs(trailingOnly = TRUE) if (length(args) == 0) { - stop("Please specify these arguments: data_path [, report_save_dir].") -} else if (length(args) == 1) { - report_dir = NULL -} else if (length(args) == 2) { - report_dir = args[2] + stop("Please specify these arguments: data_path [, ranking_method, --make_reports].") } +make_reports = FALSE data_path <- args[1] +make_reports = FALSE +ranking_method <- "rankThenMean" +all_ranking_methods = list("rankThenMean", "rankThenMedian", "aggregateThenMean", "aggregateThenMedian", "testBased") + +if (length(args) == 2) { + if (args[2] == "--make_reports") { + make_reports = TRUE + } else { + ranking_method <- args[2] + } +} else if (length(args) == 3) { + ranking_method <- args[2] + if (!(ranking_method %in% all_ranking_methods)) { + stop(paste("Ranking method must be one of", all_ranking_methods)) + } + if (args[3] == "--make_reports") { + make_reports = TRUE + } else { + stop(paste("Unrecognized argument.", args[3])) + } +} + output_dir <- "ranking_output" -# output_dir <- "ranking_output_C22_excluded" if (! dir.exists(output_dir)) { dir.create(output_dir) } +if (make_reports) { + report_dir <- paste(output_dir, paste("reports",ranking_method, sep = "_"), sep = "/") +} else { + report_dir <- NULL +} # get list of all institution files data_files <- list.files(data_path, pattern = '.*\\.(yaml|yml)$', full.names = TRUE) -# select ranking method -ranking_method <- "rankThenMean" -# ranking_method <- "rankThenMedian" -# ranking_method <- "aggregateThenMean" -# ranking_method <- "aggregateThenMedian" -# ranking_method <- "testBased" - mean_ranks_all_institutions <- NULL all_institution_names <- NULL all_data <- list() - -dataSignCounts <- data.frame("91263" = numeric(), "91061" = numeric(), "91425" = numeric()) dataSignMatrices <- list() -names(dataSignCounts) <- sub("^X", "", names(dataSignCounts)) for (path in data_files) { # Institution i ---------------------------------------------------------- @@ -355,32 +331,20 @@ for (path in data_files) { # print("skipping") # } data_fets_inst <- load_data(path) - data_fets_inst <- subset(data_fets_inst, algorithm != "baseline_nnunet2020") # not ranked - - # plot dots- and boxplots - p_dice <- generate_dot_boxplots_per_institute(subset(data_fets_inst, metric=="Dice"), "Dice", - institution_name) - p_hd95 <- generate_dot_boxplots_per_institute(subset(data_fets_inst, metric=="Hausdorff95"), "HD95", - institution_name) - - dice_name <- paste("boxplot", institution_name, "Dice", sep="_") - hd95_name <- paste("boxplot", institution_name, "HD95", sep="_") - - ggsave(paste(output_dir, paste(dice_name, ".png", sep=""), sep="/"), p_dice) - ggsave(paste(output_dir, paste(hd95_name, ".png", sep=""), sep="/"), p_hd95) + # data_fets_inst <- subset(data_fets_inst, algorithm != "baseline_nnunet2020") # not ranked # Calculate the rankings for the ET, TC and WT # For each region, the ranking is computed for the Dice and Hausdorff95 metrics # Resulting in 6 rankings print("... calculate rankings ... ...") rankings <- calculate_all_rankings_per_institute(data_fets_inst, institution_name, ranking_method, report_dir=report_dir) - + # Compute mean rank per algorithm for each institution -------------------- mean_rank_df <- calculate_mean_ranks_one_institute(rankings, data_fets_inst, institution_name) - + # Make sure that data frames have same ordering mean_rank_df %>% select(sort(names(.))) - + if (is.null(mean_ranks_all_institutions)) { mean_ranks_all_institutions <- mean_rank_df @@ -392,10 +356,9 @@ for (path in data_files) { all_institution_names <- c(all_institution_names, institution_name) } all_data[[institution_name]] <- data_fets_inst - + # Calculate number of significantly superior rankings per algorithm dataSignMatrices[[length(dataSignMatrices) + 1]] <- calculate_significance_one_institute(rankings, dataSignCounts) - dataSignCounts[nrow(dataSignCounts)+1,] <- rowSums(dataSignMatrices[[length(dataSignMatrices)]]$dummyTask[])[names(dataSignCounts)] } rownames(mean_ranks_all_institutions) <- all_institution_names @@ -405,10 +368,10 @@ final_ranks_df <- data.frame(meanRank = colMeans(mean_ranks_all_institutions)) final_ranks_df <- cbind(final_ranks_df, finalRank = rank(final_ranks_df$meanRank)) final_ranks_df <- final_ranks_df[order(final_ranks_df$finalRank),] -final_ranks_df_print <- - hux(final_ranks_df) %>% - add_rownames() %>% - set_bold(row = 1, col = everywhere, value = TRUE) %>% +final_ranks_df_print <- + hux(final_ranks_df) %>% + add_rownames() %>% + set_bold(row = 1, col = everywhere, value = TRUE) %>% set_all_borders(TRUE) print("The final ranking is: ") @@ -418,13 +381,6 @@ file_name_mean_ranks <- paste("per_institute_ranks", ranking_method, sep="_") write.csv(final_ranks_df, file = paste(output_dir, paste(file_name_final_ranks, ".csv",sep=""), sep="/")) write.csv(mean_ranks_all_institutions, file = paste(output_dir, paste(file_name_mean_ranks, ".csv",sep=""), sep="/")) -# Calculate number of significantly superiorities ------------------------ -countSign <- colSums(dataSignCounts) -print("Counting how often algorithms are significantly superior to the others: ") -print(countSign) -file_name_significant_counts <- paste("significant_counts", ranking_method, sep="_") -write.csv(countSign, file = paste(output_dir, paste(file_name_significant_counts, ".csv",sep=""), sep="/")) - # also sum up significance matrices total_sign_matrix <- NULL for (s in dataSignMatrices) { @@ -435,5 +391,9 @@ for (s in dataSignMatrices) { total_sign_matrix <- total_sign_matrix + ordered_s } } +print("Counting how often algorithms are significantly superior to the others (each row shows the no. superiorities of that model): ") +print(total_sign_matrix) +print("Sum along rows:") +print(rowSums(total_sign_matrix)) file_name <- paste("significant_matrix", ranking_method, sep="_") write.csv(total_sign_matrix, file = paste(output_dir, paste(file_name, ".csv",sep=""), sep="/")) diff --git a/Task_2/ranking/example_data.yaml b/Task_2/ranking/example_data.yaml index b81a40c..7c3973c 100644 --- a/Task_2/ranking/example_data.yaml +++ b/Task_2/ranking/example_data.yaml @@ -1,62 +1,44 @@ -0: - deepmedic: - WT: - Dice: 0.5 - Hausdorff95: 8 - TC: - Dice: 0.7 - Hausdorff95: 2 - ET: - Dice: 0.54 - Hausdorff95: 14 - deepscan: - WT: - Dice: 0.45 - Hausdorff95: 12 - TC: - Dice: 0.85 - Hausdorff95: 5 - ET: - Dice: 0.2 - Hausdorff95: 22 - nnunet: - WT: - Dice: 0.4 - Hausdorff95: 9 - TC: - Dice: 0.7 - Hausdorff95: 6 - ET: - Dice: 0.12 - Hausdorff95: 16 -1: - deepmedic: - WT: - Dice: 0.98 - Hausdorff95: 2 - TC: - Dice: 0.31 - Hausdorff95: 5 - ET: - Dice: 0.45 - Hausdorff95: 1 - deepscan: - WT: - Dice: 0.63 - Hausdorff95: 6.1 - TC: - Dice: 0.23 - Hausdorff95: 4.2 - ET: - Dice: 0.65 - Hausdorff95: 9.1 - nnunet: - WT: - Dice: 0.31 - Hausdorff95: 6 - TC: - Dice: 0.56 - Hausdorff95: 11 - ET: - Dice: 0.98 - Hausdorff95: 1 \ No newline at end of file +0: + deepmedic: + Dice_WT: 0.5 + Hausdorff95_WT: 8 + Dice_TC: 0.7 + Hausdorff95_TC: 2 + Dice_ET: 0.54 + Hausdorff95_ET: 14 + deepscan: + Dice_WT: 0.45 + Hausdorff95_WT: 12 + Dice_TC: 0.85 + Hausdorff95_TC: 5 + Dice_ET: 0.2 + Hausdorff95_ET: 22 + nnunet: + Dice_WT: 0.4 + Hausdorff95_WT: 9 + Dice_TC: 0.7 + Hausdorff95_TC: 6 + Dice_ET: 0.12 + Hausdorff95_ET: 16 +1: + deepmedic: + Dice_WT: 0.98 + Hausdorff95_WT: 2 + Dice_TC: 0.31 + Hausdorff95_TC: 5 + Dice_ET: 0.45 + Hausdorff95_ET: 1 + deepscan: + Dice_WT: 0.63 + Hausdorff95_WT: 6.1 + Dice_TC: 0.23 + Hausdorff95_TC: 4.2 + Dice_ET: 0.65 + Hausdorff95_ET: 9.1 + nnunet: + Dice_WT: 0.31 + Hausdorff95_WT: 6 + Dice_TC: 0.56 + Hausdorff95_TC: 11 + Dice_ET: 0.98 + Hausdorff95_ET: 1