From a8c77b4c3e88b793d90fef77508bcae36de28c43 Mon Sep 17 00:00:00 2001 From: Suzanne Jin Date: Wed, 11 Dec 2024 10:21:17 +0000 Subject: [PATCH 01/13] delete previous snapshot --- .../propr/grea/tests/main.nf.test.snap | 31 ------------------- 1 file changed, 31 deletions(-) delete mode 100644 modules/nf-core/propr/grea/tests/main.nf.test.snap diff --git a/modules/nf-core/propr/grea/tests/main.nf.test.snap b/modules/nf-core/propr/grea/tests/main.nf.test.snap deleted file mode 100644 index 2db674fc5ae..00000000000 --- a/modules/nf-core/propr/grea/tests/main.nf.test.snap +++ /dev/null @@ -1,31 +0,0 @@ -{ - "versions": { - "content": [ - [ - "versions.yml:md5,222a7a8b79b5a2987637279847c609d1" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-04-29T10:45:07.582509" - }, - "grea chained to propr using default options - enrichedGO": { - "content": [ - [ - [ - { - "id": "test_adj" - }, - "test_adj.go.tsv:md5,904e1fe3eed0f2dded8e5b64321a0269" - ] - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.3" - }, - "timestamp": "2024-08-03T16:06:25.669444" - } -} \ No newline at end of file From 0c58cb85d552b4a3a8025db0790bf09e93b2b046 Mon Sep 17 00:00:00 2001 From: Suzanne Jin Date: Wed, 11 Dec 2024 10:22:56 +0000 Subject: [PATCH 02/13] [propr/grea] update container and conda environment --- modules/nf-core/propr/grea/environment.yml | 4 +++- modules/nf-core/propr/grea/main.nf | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/modules/nf-core/propr/grea/environment.yml b/modules/nf-core/propr/grea/environment.yml index 2bb015a1047..9744dab906b 100644 --- a/modules/nf-core/propr/grea/environment.yml +++ b/modules/nf-core/propr/grea/environment.yml @@ -1,5 +1,7 @@ channels: - conda-forge - bioconda + dependencies: - - conda-forge::r-propr=5.0.4 + - bioconda::bioconductor-limma=3.58.1 + - conda-forge::r-propr=5.1.5 diff --git a/modules/nf-core/propr/grea/main.nf b/modules/nf-core/propr/grea/main.nf index d2e1ee6de9a..b0eaae65ad0 100644 --- a/modules/nf-core/propr/grea/main.nf +++ b/modules/nf-core/propr/grea/main.nf @@ -4,8 +4,8 @@ process PROPR_GREA { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/r-propr:5.0.4': - 'biocontainers/r-propr:5.0.4' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b6/b65f7192866fbd9a947df15b104808abb720e7a224bbe3ca8f7f8f680f52c97a/data' : + 'community.wave.seqera.io/library/bioconductor-limma_r-propr:f52f1d4fea746393' }" input: tuple val(meta), path(adj) From c058b28c5c068202a306f0d183cd6454c1c5c92b Mon Sep 17 00:00:00 2001 From: Suzanne Jin Date: Wed, 11 Dec 2024 10:26:28 +0000 Subject: [PATCH 03/13] [propr/grea] update main.nf output --- modules/nf-core/propr/grea/main.nf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/nf-core/propr/grea/main.nf b/modules/nf-core/propr/grea/main.nf index b0eaae65ad0..5b783476ea9 100644 --- a/modules/nf-core/propr/grea/main.nf +++ b/modules/nf-core/propr/grea/main.nf @@ -1,6 +1,6 @@ process PROPR_GREA { tag "$meta.id" - label 'process_single' + label 'process_high' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -12,9 +12,9 @@ process PROPR_GREA { tuple val(meta2), path(gmt) output: - tuple val(meta), path("*.go.tsv"), emit: enrichedGO - path "versions.yml", emit: versions - path "*.R_sessionInfo.log", emit: session_info + tuple val(meta), path("*.grea.tsv"), emit: results + path "versions.yml", emit: versions + path "*.R_sessionInfo.log", emit: session_info when: task.ext.when == null || task.ext.when From 6d68aa1777782693a353a56a34c76f80e336af96 Mon Sep 17 00:00:00 2001 From: Suzanne Jin Date: Wed, 11 Dec 2024 10:27:20 +0000 Subject: [PATCH 04/13] [propr/grea] copied the updated template from differentialabundance dev-ratio --- modules/nf-core/propr/grea/templates/grea.R | 198 +++++++++++--------- 1 file changed, 112 insertions(+), 86 deletions(-) diff --git a/modules/nf-core/propr/grea/templates/grea.R b/modules/nf-core/propr/grea/templates/grea.R index 2d568b70330..f5d50374fc3 100644 --- a/modules/nf-core/propr/grea/templates/grea.R +++ b/modules/nf-core/propr/grea/templates/grea.R @@ -51,66 +51,42 @@ read_delim_flexible <- function(file, header = TRUE, row.names = 1, check.names ) } -#' Converts the .gmt file into a df +#' Loads the .gmt file and converts it into a knowledge database #' -#' @param file_gmt_path path of the .gmt file provided by mygene module. -#' @return output dataframe a Dataframe: 1st column = GOterm, 2nd = Description, 3d to end = genes. -process_gmt_file <- function(file_gmt_path) { - - lines <- readLines(file_gmt_path) - data_list <- list() - - for (line in lines) { - fields <- strsplit(line, "\\t")[[1]] # Split the line based on the tab character - go_term <- fields[1] # Extract the GO term - - # Create a data frame with the GO term in the first column - # Fill in missing values with NA to ensure consistent column lengths - data_list[[go_term]] <- data.frame(GOterm = go_term, - Description = fields[2], - GeneIDs = c(fields[3:length(fields)], rep(NA, max(0, 3 - length(fields))))) +#' @param filename path of the .gmt file +#' @param genes vector of gene names. Note that this set should be as complete as possible. +#' So it should not only contain the target genes but also the background genes. +#' @return output a list with: `db` A knowledge database where each row is a graph node (eg. gene) +#' and each column is a concept (eg. GO term, pathway, etc) and `description` A list of descriptions +#' for each concept +load_gmt <- function(filename, nodes) { + + # read gmt file + gmt <- readLines(filename) + gmt <- strsplit(gmt, "\\t") + + # initialize database matrix + db <- matrix(0, nrow = length(nodes), ncol = length(gmt)) + rownames(db) <- nodes + colnames(db) <- sapply(gmt, function(entry) entry[[1]]) + + # description of the concepts + description <- list() + + # for concept in gmt + for (i in 1:length(gmt)) { + + # get concept and description + concept <- gmt[[i]][[1]] + description[[concept]] <- gmt[[i]][[2]] + + # fill 1 if gene is in concept + nodes_in_concept <- gmt[[i]][-c(1, 2)] + nodes_in_concept <- nodes_in_concept[nodes_in_concept %in% nodes] + db[nodes_in_concept, i] <- 1 } - gmt_df <- do.call(rbind, data_list) # Combine all data frames into a single data frame - gmt_df\$GeneIDs <- as.character(gmt_df\$GeneIDs) # Convert gene IDs to character to avoid coercion - - return(gmt_df) -} - -#' Converts the .gmt data frame into a knowledge matrix (contingency table) -#' -#' @param gmt_df .gmt df created by process_gmt_file -#' @return output dataframe. A knowledge database where each row is a graph node (gene) -#' and each column is a concept (GO term). -gmt_to_K<- function(gmt_df){ - - summ_df <- as.data.frame(gmt_df\$GeneIDs) - summ_df <- cbind(summ_df, as.data.frame(gmt_df\$GOterm)) - colnames(summ_df)<- c("GeneIDs", "GOterm") - summ_df<- unique(summ_df) - - summ_df\$value <- 1 - - K <- table(summ_df\$GeneIDs, summ_df\$GOterm) - K <- as.data.frame.matrix(K) - - return(K) -} - -#' Expands knowledge matrix with missing genes to ensure same number of rows for A and K -#' -#' @param adjacency_matrix gene x gene correlation or proportionality adjacency matrix (output propr/propd) -#' @return output dataframe. A knowledge database where each row is a graph node (gene) -#' and each column is a concept (GO term). -add_missing <- function(adjacency_matrix, knowledge_matrix){ - - missing_genes <- setdiff(rownames(adjacency_matrix), rownames(knowledge_matrix)) - extra_rows <- data.frame(matrix(0, nrow = length(missing_genes), ncol = ncol(knowledge_matrix))) - rownames(extra_rows) <- missing_genes - colnames(extra_rows) <- colnames(knowledge_matrix) - - knowledge_matrix <- rbind(knowledge_matrix, extra_rows) - return(knowledge_matrix) + return(list(db = db, description = description)) } ################################################ @@ -119,52 +95,65 @@ add_missing <- function(adjacency_matrix, knowledge_matrix){ ################################################ ################################################ +# Set defaults and classes + opt <- list( - adj = '$adj', - gmt = '$gmt', prefix = ifelse('$task.ext.prefix' == 'null', '$meta.id', '$task.ext.prefix'), + + # input data + adj = '$adj', # adjacency matrix + gmt = '$gmt', # knowledge database .gmt file + + # parameters for gene sets + set_min = 15, # minimum number of genes in a set + set_max = 500, # maximum number of genes in a set + + # parameters for permutation test permutation = 100, - fixseed = TRUE, + + # other options + seed = NA, ncores = as.integer('$task.cpus') ) opt_types <- list( + prefix = 'character', adj = 'character', gmt = 'character', - prefix = 'character', + set_min = 'numeric', + set_max = 'numeric', permutation = 'numeric', - fixseed = 'logical', + seed = 'numeric', ncores = 'numeric' ) # Apply parameter overrides -args_opt <- parse_args('$task.ext.args') +args_opt <- parse_args('$task.ext.args') for ( ao in names(args_opt)){ if (! ao %in% names(opt)){ stop(paste("Invalid option:", ao)) } else { # Preserve classes from defaults where possible - if (! is.null(opt[[ao]])){ - args_opt[[ao]] <- as(args_opt[[ao]], opt_types[[ao]]) - } - # set NA - if (args_opt[[ao]] %in% c('NA', NA, 'null')){ - args_opt[[ao]] <- NA - } + args_opt[[ao]] <- as(args_opt[[ao]], opt_types[[ao]]) + + # handle NA, and avoid errors when NA is provided by user as character + if (args_opt[[ao]] %in% c('NA', NA)) args_opt[[ao]] <- NA + + # replace values opt[[ao]] <- args_opt[[ao]] } } # Check if required parameters have been provided + required_opts <- c('adj', 'gmt') # defines a vector required_opts containing the names of the required parameters. missing <- required_opts[unlist(lapply(opt[required_opts], is.null)) | ! required_opts %in% names(opt)] if (length(missing) > 0){ stop(paste("Missing required options:", paste(missing, collapse=', '))) } - # Check file inputs are valid for (file_input in c('adj', 'gmt')){ if (is.null(opt[[file_input]])) { @@ -175,6 +164,9 @@ for (file_input in c('adj', 'gmt')){ } } +# TODO maybe add a function to pretty print the arguments? +print(opt) + ################################################ ################################################ ## Finish loading libraries ## @@ -189,20 +181,56 @@ library(propr) ################################################ ################################################ -# Read gene x gene adjacency matrix -A <- read_delim_flexible(opt\$adj, header = TRUE, row.names = 1, check.names = TRUE) +# set seed when required -# Read and process gene x GO term matrix -gmt_df <- process_gmt_file(opt\$gmt) -K <- gmt_to_K(gmt_df) +if (!is.na(opt\$seed)) { + warning('Setting seed ', opt\$seed, ' for reproducibility') + set.seed(opt\$seed) +} -# Ensure same number of rows (genes) -if (nrow(A) != nrow(K)){ - K <- add_missing(A, K) +# load adjacency matrix +# this matrix should have gene x gene dimensions + +adj <- as.matrix(read_delim_flexible( + opt\$adj, + header = TRUE, + row.names = 1, + check.names = TRUE +)) +if (nrow(adj) != ncol(adj)) { + stop('Adjacency matrix is not square') +} +if (!all(rownames(adj) == colnames(adj))) { + stop('Adjacency matrix row names are not equal to column names') } -# Run Graflex -G <- runGraflex(A, K, opt\$permutation, opt\$fixseed) +# load and process knowledge database + +gmt <- load_gmt( + opt\$gmt, + rownames(adj) +) + +# filter gene sets +# gene sets with less than set_min or more than set_max genes are removed + +idx <- which(colSums(gmt\$db) > opt\$set_min & colSums(gmt\$db) < opt\$set_max) +gmt\$db <- gmt\$db[, idx] +gmt\$description <- gmt\$description[idx] + +# run GREA +# Basically, it calculates the odds ratio of the graph being enriched in each concept, +# and the FDR of the odds ratio through permutation tests + +odds <- runGraflex( + adj, + gmt\$db, + p=opt\$permutation, + ncores=opt\$ncores +) +odds\$Description <- sapply(odds\$Concept, function(concept) + gmt\$description[[concept]] +) ################################################ ################################################ @@ -211,10 +239,10 @@ G <- runGraflex(A, K, opt\$permutation, opt\$fixseed) ################################################ write.table( - G, - file = paste0(opt\$prefix, '.go.tsv'), + odds, + file = paste0(opt\$prefix, '.grea.tsv'), col.names = TRUE, - row.names = TRUE, + row.names = FALSE, sep = '\\t', quote = FALSE @@ -236,13 +264,11 @@ sink() ################################################ ################################################ -r.version <- strsplit(version[['version.string']], ' ')[[1]][3] propr.version <- as.character(packageVersion('propr')) writeLines( c( '"${task.process}":', - paste(' r-base:', r.version), paste(' r-propr:', propr.version) ), 'versions.yml') From b7fb6355fb4f139efabb23055f82b09ab06e25a0 Mon Sep 17 00:00:00 2001 From: Suzanne Jin Date: Wed, 11 Dec 2024 10:41:37 +0000 Subject: [PATCH 05/13] [propr/grea] added some comments and checks --- modules/nf-core/propr/grea/templates/grea.R | 31 +++++++++++++-------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/modules/nf-core/propr/grea/templates/grea.R b/modules/nf-core/propr/grea/templates/grea.R index f5d50374fc3..4790634ffbd 100644 --- a/modules/nf-core/propr/grea/templates/grea.R +++ b/modules/nf-core/propr/grea/templates/grea.R @@ -54,11 +54,13 @@ read_delim_flexible <- function(file, header = TRUE, row.names = 1, check.names #' Loads the .gmt file and converts it into a knowledge database #' #' @param filename path of the .gmt file -#' @param genes vector of gene names. Note that this set should be as complete as possible. -#' So it should not only contain the target genes but also the background genes. -#' @return output a list with: `db` A knowledge database where each row is a graph node (eg. gene) -#' and each column is a concept (eg. GO term, pathway, etc) and `description` A list of descriptions -#' for each concept +#' @param nodes vector of node (eg. gene) names. Note that this set should be as +#' complete as possible. So it should not only contain the target genes but also +#' the background genes. +#' @return a list with: +#' `db` A knowledge database (matrix) where each row is a graph node (eg. gene) +#' and each column is a concept (eg. GO term, pathway, etc). +#' `description` A list of descriptions for each concept. load_gmt <- function(filename, nodes) { # read gmt file @@ -109,7 +111,7 @@ opt <- list( set_max = 500, # maximum number of genes in a set # parameters for permutation test - permutation = 100, + permutation = 100, # number of permutations to perform # other options seed = NA, @@ -129,7 +131,8 @@ opt_types <- list( # Apply parameter overrides -args_opt <- parse_args('$task.ext.args') +args_ext <- ifelse('$task.ext.args' == 'null', '', '$task.ext.args') +args_opt <- parse_args(args_ext) for ( ao in names(args_opt)){ if (! ao %in% names(opt)){ stop(paste("Invalid option:", ao)) @@ -155,6 +158,7 @@ if (length(missing) > 0){ } # Check file inputs are valid + for (file_input in c('adj', 'gmt')){ if (is.null(opt[[file_input]])) { stop(paste("Please provide", file_input), call. = FALSE) @@ -164,7 +168,12 @@ for (file_input in c('adj', 'gmt')){ } } -# TODO maybe add a function to pretty print the arguments? +# check parameters are valid + +if (opt\$permutation < 0) { + stop('permutation should be a positive integer') +} + print(opt) ################################################ @@ -195,10 +204,10 @@ adj <- as.matrix(read_delim_flexible( opt\$adj, header = TRUE, row.names = 1, - check.names = TRUE + check.names = FALSE )) if (nrow(adj) != ncol(adj)) { - stop('Adjacency matrix is not square') + stop('Adjacency matrix should be a squared matrix that reflects the connections between all the nodes') } if (!all(rownames(adj) == colnames(adj))) { stop('Adjacency matrix row names are not equal to column names') @@ -208,7 +217,7 @@ if (!all(rownames(adj) == colnames(adj))) { gmt <- load_gmt( opt\$gmt, - rownames(adj) + rownames(adj) # adj should contain all the nodes (target and background) ) # filter gene sets From a2149dc74c5d89c6af36782b34e7e9d117156dbd Mon Sep 17 00:00:00 2001 From: Suzanne Jin Date: Wed, 11 Dec 2024 10:49:03 +0000 Subject: [PATCH 06/13] [propr/grea] add option to round digits --- modules/nf-core/propr/grea/templates/grea.R | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/modules/nf-core/propr/grea/templates/grea.R b/modules/nf-core/propr/grea/templates/grea.R index 4790634ffbd..82afe6541ef 100644 --- a/modules/nf-core/propr/grea/templates/grea.R +++ b/modules/nf-core/propr/grea/templates/grea.R @@ -113,8 +113,9 @@ opt <- list( # parameters for permutation test permutation = 100, # number of permutations to perform - # other options - seed = NA, + # other parameters + seed = NA, # seed for reproducibility + round_digits = NA, # number of digits to round results ncores = as.integer('$task.cpus') ) @@ -126,6 +127,7 @@ opt_types <- list( set_max = 'numeric', permutation = 'numeric', seed = 'numeric', + round_digits = 'numeric', ncores = 'numeric' ) @@ -247,6 +249,12 @@ odds\$Description <- sapply(odds\$Concept, function(concept) ################################################ ################################################ +if (!is.na(opt\$round_digits)) { + for (col in c('Odds', 'LogOR', 'FDR.under', 'FDR.over')){ + odds[,col] <- round(odds[,col], opt\$round_digits) + } +} + write.table( odds, file = paste0(opt\$prefix, '.grea.tsv'), From 136e8ba492652b19bb1f04cd49106384072be8bf Mon Sep 17 00:00:00 2001 From: Suzanne Jin Date: Wed, 11 Dec 2024 12:02:18 +0000 Subject: [PATCH 07/13] [propr/grea] updated tests, need to solve some problems with gmt filtering --- modules/nf-core/propr/grea/main.nf | 8 ++-- modules/nf-core/propr/grea/meta.yml | 12 ++--- modules/nf-core/propr/grea/templates/grea.R | 10 +++- .../nf-core/propr/grea/tests/grea_test.config | 9 ++-- modules/nf-core/propr/grea/tests/main.nf.test | 47 ++++++++++--------- 5 files changed, 48 insertions(+), 38 deletions(-) diff --git a/modules/nf-core/propr/grea/main.nf b/modules/nf-core/propr/grea/main.nf index 5b783476ea9..fd727208d94 100644 --- a/modules/nf-core/propr/grea/main.nf +++ b/modules/nf-core/propr/grea/main.nf @@ -8,13 +8,13 @@ process PROPR_GREA { 'community.wave.seqera.io/library/bioconductor-limma_r-propr:f52f1d4fea746393' }" input: - tuple val(meta), path(adj) + tuple val(meta), path(adjacency) tuple val(meta2), path(gmt) output: - tuple val(meta), path("*.grea.tsv"), emit: results - path "versions.yml", emit: versions - path "*.R_sessionInfo.log", emit: session_info + tuple val(meta), path("*.grea.tsv"), emit: results + path "versions.yml", emit: versions + path "*.R_sessionInfo.log", emit: session_info when: task.ext.when == null || task.ext.when diff --git a/modules/nf-core/propr/grea/meta.yml b/modules/nf-core/propr/grea/meta.yml index 58f73fc4d86..8163d82b01c 100644 --- a/modules/nf-core/propr/grea/meta.yml +++ b/modules/nf-core/propr/grea/meta.yml @@ -2,12 +2,12 @@ name: "propr_grea" description: Perform Gene Ratio Enrichment Analysis keywords: - - logratio - - differential - propr - grea - - enrichment - - expression + - logratio + - differential expression + - functional enrichment + - functional analysis tools: - "grea": description: "Gene Ratio Enrichment Analysis" @@ -21,10 +21,10 @@ input: - - meta: type: map description: | - Groovy Map containing sample information. + Groovy Map containing data information. This can be used at the workflow level to pass optional parameters to the module. [id: 'test', ...] - - adj: + - adjacency: type: file description: adjacency matrix for gene ratio proportionality/differential proportionality pattern: "*.{csv,tsv}" diff --git a/modules/nf-core/propr/grea/templates/grea.R b/modules/nf-core/propr/grea/templates/grea.R index 82afe6541ef..37832f9734c 100644 --- a/modules/nf-core/propr/grea/templates/grea.R +++ b/modules/nf-core/propr/grea/templates/grea.R @@ -103,7 +103,7 @@ opt <- list( prefix = ifelse('$task.ext.prefix' == 'null', '$meta.id', '$task.ext.prefix'), # input data - adj = '$adj', # adjacency matrix + adj = '$adjacency', # adjacency matrix gmt = '$gmt', # knowledge database .gmt file # parameters for gene sets @@ -202,6 +202,8 @@ if (!is.na(opt\$seed)) { # load adjacency matrix # this matrix should have gene x gene dimensions +message("Loading input data") + adj <- as.matrix(read_delim_flexible( opt\$adj, header = TRUE, @@ -226,6 +228,9 @@ gmt <- load_gmt( # gene sets with less than set_min or more than set_max genes are removed idx <- which(colSums(gmt\$db) > opt\$set_min & colSums(gmt\$db) < opt\$set_max) +if (length(idx) == 0){ + stop("No gene set pass the filter of set_min=", opt\$set_min, " and set_max=", opt\$set_max) +} gmt\$db <- gmt\$db[, idx] gmt\$description <- gmt\$description[idx] @@ -233,6 +238,8 @@ gmt\$description <- gmt\$description[idx] # Basically, it calculates the odds ratio of the graph being enriched in each concept, # and the FDR of the odds ratio through permutation tests +message("Running GREA") + odds <- runGraflex( adj, gmt\$db, @@ -262,7 +269,6 @@ write.table( row.names = FALSE, sep = '\\t', quote = FALSE - ) ################################################ diff --git a/modules/nf-core/propr/grea/tests/grea_test.config b/modules/nf-core/propr/grea/tests/grea_test.config index 8d0d229a76d..194a856ab55 100644 --- a/modules/nf-core/propr/grea/tests/grea_test.config +++ b/modules/nf-core/propr/grea/tests/grea_test.config @@ -1,8 +1,9 @@ process { - withName: "PROPR_PROPR"{ - ext.args = { "--adjacency true --permutation 5 --fixseed true --cutoff_min 0.05 --cutoff_max 0.95 --cutoff_interval 0.05"} + cpus = 1 + withName: "PROPR_PROPD"{ + ext.args = { "--round_digits 5 --save_adjacency true --features_id_col gene_name"} } withName: "PROPR_GREA"{ - ext.args = { "--permutation 5 --fixseed true"} + ext.args = { "--permutation 10 --set_min 10 --seed 123 --round_digits 5"} } -} \ No newline at end of file +} diff --git a/modules/nf-core/propr/grea/tests/main.nf.test b/modules/nf-core/propr/grea/tests/main.nf.test index dd442b43459..5b32d96523a 100644 --- a/modules/nf-core/propr/grea/tests/main.nf.test +++ b/modules/nf-core/propr/grea/tests/main.nf.test @@ -8,34 +8,34 @@ nextflow_process { tag "modules_nfcore" tag "propr" tag "propr/grea" - tag "mygene" - tag "propr/propr" + tag "propr/propd" - test("grea chained to propr using default options") { + test("test grea chained to propd") { tag "default" config "./grea_test.config" setup { - run("PROPR_PROPR") { - script "../../propr/main.nf" + run("PROPR_PROPD") { + script "../../propd/main.nf" process { """ - input[0] = [ - [ id:'test' ], - file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/mus_musculus/rnaseq_expression/SRP254919.salmon.merged.gene_counts.top1000cov.tsv") - ] - """ - } - } - run("MYGENE") { - script "../../../mygene/main.nf" - process { - """ - input[0] = [ - [id : 'test'], - file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/mus_musculus/rnaseq_expression/SRP254919.gene_meta.tsv") + expression_test_data_dir = params.modules_testdata_base_path + 'genomics/mus_musculus/rnaseq_expression/' + + ch_contrasts = Channel.fromPath(file(expression_test_data_dir + 'SRP254919.contrasts.csv', checkIfExists: true)) + .splitCsv ( header:true, sep:',' ) + .map{ + tuple(it, it.variable, it.reference, it.target) + } + .first() + ch_matrix = [ + [id: 'test'], + file(expression_test_data_dir + 'SRP254919.samplesheet.csv', checkIfExists: true), + file(expression_test_data_dir + 'SRP254919.salmon.merged.gene_counts.top1000cov.tsv', checkIfExists: true) ] + + input[0] = ch_contrasts + input[1] = ch_matrix """ } } @@ -44,8 +44,11 @@ nextflow_process { when { process { """ - input[0] = PROPR_PROPR.out.adj.collect{ meta, adj -> adj }.map{ adj -> [[ id: 'test_adj'], adj]} - input[1] = MYGENE.out.gmt.collect{ meta, gmt -> gmt }.map{ gmt -> [[ id: 'test_gmt'], gmt]} + input[0] = PROPR_PROPD.out.adjacency + input[1] = [ + [id: 'test'], + file(params.modules_testdata_base_path + 'genomics/mus_musculus/gene_set_analysis/mh.all.v2022.1.Mm.symbols.gmt', checkIfExists: true) + ] """ } } @@ -53,7 +56,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.enrichedGO).match("grea chained to propr using default options - enrichedGO") }, + { assert snapshot(process.out.results).match("test grea chained to propd") }, { assert snapshot(process.out.versions).match("versions") } ) From 397e97f60d50e3ba69ec93e86c38464fcd51ff50 Mon Sep 17 00:00:00 2001 From: Suzanne Jin Date: Wed, 11 Dec 2024 13:06:32 +0000 Subject: [PATCH 08/13] [propr/grea] updated snapshots, but need to check single cpu vs multiprocessor discrepancies --- modules/nf-core/propr/grea/tests/main.nf.test | 61 ++++++++++++++++++- .../propr/grea/tests/main.nf.test.snap | 50 +++++++++++++++ .../propr/grea/tests/multiprocessor.config | 9 +++ 3 files changed, 117 insertions(+), 3 deletions(-) create mode 100644 modules/nf-core/propr/grea/tests/main.nf.test.snap create mode 100644 modules/nf-core/propr/grea/tests/multiprocessor.config diff --git a/modules/nf-core/propr/grea/tests/main.nf.test b/modules/nf-core/propr/grea/tests/main.nf.test index 5b32d96523a..8be12a91e51 100644 --- a/modules/nf-core/propr/grea/tests/main.nf.test +++ b/modules/nf-core/propr/grea/tests/main.nf.test @@ -56,10 +56,65 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.results).match("test grea chained to propd") }, - { assert snapshot(process.out.versions).match("versions") } + { assert snapshot( + process.out.results, + process.out.versions + ).match()} + ) + } + } + + test("test grea chained to propd - multiprocessor") { + + tag "default" + config "./multiprocessor.config" + + setup { + run("PROPR_PROPD") { + script "../../propd/main.nf" + process { + """ + expression_test_data_dir = params.modules_testdata_base_path + 'genomics/mus_musculus/rnaseq_expression/' + ch_contrasts = Channel.fromPath(file(expression_test_data_dir + 'SRP254919.contrasts.csv', checkIfExists: true)) + .splitCsv ( header:true, sep:',' ) + .map{ + tuple(it, it.variable, it.reference, it.target) + } + .first() + ch_matrix = [ + [id: 'test'], + file(expression_test_data_dir + 'SRP254919.samplesheet.csv', checkIfExists: true), + file(expression_test_data_dir + 'SRP254919.salmon.merged.gene_counts.top1000cov.tsv', checkIfExists: true) + ] + + input[0] = ch_contrasts + input[1] = ch_matrix + """ + } + } + } + + when { + process { + """ + input[0] = PROPR_PROPD.out.adjacency + input[1] = [ + [id: 'test'], + file(params.modules_testdata_base_path + 'genomics/mus_musculus/gene_set_analysis/mh.all.v2022.1.Mm.symbols.gmt', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.results, + process.out.versions + ).match()} ) } } -} \ No newline at end of file +} diff --git a/modules/nf-core/propr/grea/tests/main.nf.test.snap b/modules/nf-core/propr/grea/tests/main.nf.test.snap new file mode 100644 index 00000000000..5d2fe140505 --- /dev/null +++ b/modules/nf-core/propr/grea/tests/main.nf.test.snap @@ -0,0 +1,50 @@ +{ + "test grea chained to propd - multiprocessor": { + "content": [ + [ + [ + { + "id": "treatment_mCherry_hND6_", + "variable": "treatment", + "reference": "mCherry", + "target": "hND6", + "blocking": "" + }, + "treatment_mCherry_hND6_.grea.tsv:md5,724bbb66b9d85291212c357e866fa58f" + ] + ], + [ + "versions.yml:md5,060fcd8ce4afc482e237fa75686a0aba" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-11T13:00:20.262161746" + }, + "test grea chained to propd": { + "content": [ + [ + [ + { + "id": "treatment_mCherry_hND6_", + "variable": "treatment", + "reference": "mCherry", + "target": "hND6", + "blocking": "" + }, + "treatment_mCherry_hND6_.grea.tsv:md5,786faeccf39926d2f7c980ef549a2697" + ] + ], + [ + "versions.yml:md5,060fcd8ce4afc482e237fa75686a0aba" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-11T13:00:02.026244403" + } +} \ No newline at end of file diff --git a/modules/nf-core/propr/grea/tests/multiprocessor.config b/modules/nf-core/propr/grea/tests/multiprocessor.config new file mode 100644 index 00000000000..f0b47cf5c53 --- /dev/null +++ b/modules/nf-core/propr/grea/tests/multiprocessor.config @@ -0,0 +1,9 @@ +process { + cpus = 2 + withName: "PROPR_PROPD"{ + ext.args = { "--round_digits 5 --save_adjacency true --features_id_col gene_name"} + } + withName: "PROPR_GREA"{ + ext.args = { "--permutation 10 --set_min 10 --seed 123 --round_digits 5"} + } +} From 72a77777e6282480f236b0adc751141d1ddcfcfa Mon Sep 17 00:00:00 2001 From: Suzanne Jin Date: Wed, 11 Dec 2024 14:09:21 +0000 Subject: [PATCH 09/13] [propr/grea] update snapshots --- .../nf-core/propr/grea/tests/grea_test.config | 5 ++ modules/nf-core/propr/grea/tests/main.nf.test | 54 ------------------- .../propr/grea/tests/main.nf.test.snap | 26 +-------- 3 files changed, 6 insertions(+), 79 deletions(-) diff --git a/modules/nf-core/propr/grea/tests/grea_test.config b/modules/nf-core/propr/grea/tests/grea_test.config index 194a856ab55..7d354c013a5 100644 --- a/modules/nf-core/propr/grea/tests/grea_test.config +++ b/modules/nf-core/propr/grea/tests/grea_test.config @@ -1,5 +1,10 @@ process { + // set single core for reproducibility + // NOTE this method relies on parallelization and permutation tests + // The permutations are done within each node, which makes set.seed not working properly when + // different nodes are starting/ending depending on the case cpus = 1 + withName: "PROPR_PROPD"{ ext.args = { "--round_digits 5 --save_adjacency true --features_id_col gene_name"} } diff --git a/modules/nf-core/propr/grea/tests/main.nf.test b/modules/nf-core/propr/grea/tests/main.nf.test index 8be12a91e51..38a015e4b8c 100644 --- a/modules/nf-core/propr/grea/tests/main.nf.test +++ b/modules/nf-core/propr/grea/tests/main.nf.test @@ -63,58 +63,4 @@ nextflow_process { ) } } - - test("test grea chained to propd - multiprocessor") { - - tag "default" - config "./multiprocessor.config" - - setup { - run("PROPR_PROPD") { - script "../../propd/main.nf" - process { - """ - expression_test_data_dir = params.modules_testdata_base_path + 'genomics/mus_musculus/rnaseq_expression/' - - ch_contrasts = Channel.fromPath(file(expression_test_data_dir + 'SRP254919.contrasts.csv', checkIfExists: true)) - .splitCsv ( header:true, sep:',' ) - .map{ - tuple(it, it.variable, it.reference, it.target) - } - .first() - ch_matrix = [ - [id: 'test'], - file(expression_test_data_dir + 'SRP254919.samplesheet.csv', checkIfExists: true), - file(expression_test_data_dir + 'SRP254919.salmon.merged.gene_counts.top1000cov.tsv', checkIfExists: true) - ] - - input[0] = ch_contrasts - input[1] = ch_matrix - """ - } - } - } - - when { - process { - """ - input[0] = PROPR_PROPD.out.adjacency - input[1] = [ - [id: 'test'], - file(params.modules_testdata_base_path + 'genomics/mus_musculus/gene_set_analysis/mh.all.v2022.1.Mm.symbols.gmt', checkIfExists: true) - ] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot( - process.out.results, - process.out.versions - ).match()} - ) - } - } } diff --git a/modules/nf-core/propr/grea/tests/main.nf.test.snap b/modules/nf-core/propr/grea/tests/main.nf.test.snap index 5d2fe140505..6c5dd533ed8 100644 --- a/modules/nf-core/propr/grea/tests/main.nf.test.snap +++ b/modules/nf-core/propr/grea/tests/main.nf.test.snap @@ -1,28 +1,4 @@ { - "test grea chained to propd - multiprocessor": { - "content": [ - [ - [ - { - "id": "treatment_mCherry_hND6_", - "variable": "treatment", - "reference": "mCherry", - "target": "hND6", - "blocking": "" - }, - "treatment_mCherry_hND6_.grea.tsv:md5,724bbb66b9d85291212c357e866fa58f" - ] - ], - [ - "versions.yml:md5,060fcd8ce4afc482e237fa75686a0aba" - ] - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.2" - }, - "timestamp": "2024-12-11T13:00:20.262161746" - }, "test grea chained to propd": { "content": [ [ @@ -47,4 +23,4 @@ }, "timestamp": "2024-12-11T13:00:02.026244403" } -} \ No newline at end of file +} From ec502162eb410d35d8a234546f05132e74934d4e Mon Sep 17 00:00:00 2001 From: Suzanne Jin Date: Wed, 11 Dec 2024 14:17:12 +0000 Subject: [PATCH 10/13] [propr/grea] update meta.yml --- modules/nf-core/propr/grea/meta.yml | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/modules/nf-core/propr/grea/meta.yml b/modules/nf-core/propr/grea/meta.yml index 8163d82b01c..dca54ebc8f6 100644 --- a/modules/nf-core/propr/grea/meta.yml +++ b/modules/nf-core/propr/grea/meta.yml @@ -26,29 +26,34 @@ input: [id: 'test', ...] - adjacency: type: file - description: adjacency matrix for gene ratio proportionality/differential proportionality + description: adjacency matrix representing the graph connections (ie. 1 for edges, 0 otherwise). + This can be the adjacency matrix output from gene ratio approaches like propr/propd. pattern: "*.{csv,tsv}" - - meta2: type: map description: | - Groovy map containing study-wide metadata related to the knowledge database + Groovy Map containing data information. + This can be used at the workflow level to pass optional parameters to the module. + [id: 'test', ...] - gmt: type: file - description: relational database containing genes and GO terms (generated by - mygene module) + description: A tab delimited file format that describes gene sets. The first column is the + concept id (eg. GO term, pathway, etc), the second column is the concept description, and the + rest are nodes (eg. genes) that is associated to the given concept. pattern: "*.{gmt}" output: - - enrichedGO: + - results: - meta: - type: map + type: file description: | Groovy Map containing sample information. This can be used at the workflow level to pass optional parameters to the module. [id: 'test', ...] - - "*.go.tsv": + - "*.grea.tsv": type: file - description: File containing GO terms and their enrichment values - pattern: "*.{csv}" + description: Output file containing the information about the tested concepts (ie. gene sets) + and enrichment statistics. + pattern: "*.{tsv}" - versions: - versions.yml: type: file @@ -57,9 +62,11 @@ output: - session_info: - "*.R_sessionInfo.log": type: file - description: R session log + description: dump of R SessionInfo pattern: "*.R_sessionInfo.log" authors: - "@caraiz2001" + - "@suzannejin" maintainers: - "@caraiz2001" + - "@suzannejin" From b87f223337780a8067a75084f511675aab116085 Mon Sep 17 00:00:00 2001 From: Suzanne Jin Date: Wed, 11 Dec 2024 14:22:29 +0000 Subject: [PATCH 11/13] [propr/grea] remove unnecesary config --- modules/nf-core/propr/grea/tests/multiprocessor.config | 9 --------- 1 file changed, 9 deletions(-) delete mode 100644 modules/nf-core/propr/grea/tests/multiprocessor.config diff --git a/modules/nf-core/propr/grea/tests/multiprocessor.config b/modules/nf-core/propr/grea/tests/multiprocessor.config deleted file mode 100644 index f0b47cf5c53..00000000000 --- a/modules/nf-core/propr/grea/tests/multiprocessor.config +++ /dev/null @@ -1,9 +0,0 @@ -process { - cpus = 2 - withName: "PROPR_PROPD"{ - ext.args = { "--round_digits 5 --save_adjacency true --features_id_col gene_name"} - } - withName: "PROPR_GREA"{ - ext.args = { "--permutation 10 --set_min 10 --seed 123 --round_digits 5"} - } -} From c262b2cdc7be854a6f71d2d29d78b3f2cbea0776 Mon Sep 17 00:00:00 2001 From: Suzanne Jin Date: Wed, 11 Dec 2024 14:32:51 +0000 Subject: [PATCH 12/13] [propr/grea] solve linting --- modules/nf-core/propr/grea/meta.yml | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/modules/nf-core/propr/grea/meta.yml b/modules/nf-core/propr/grea/meta.yml index dca54ebc8f6..f7872465ff5 100644 --- a/modules/nf-core/propr/grea/meta.yml +++ b/modules/nf-core/propr/grea/meta.yml @@ -26,8 +26,9 @@ input: [id: 'test', ...] - adjacency: type: file - description: adjacency matrix representing the graph connections (ie. 1 for edges, 0 otherwise). - This can be the adjacency matrix output from gene ratio approaches like propr/propd. + description: | + Adjacency matrix representing the graph connections (ie. 1 for edges, 0 otherwise). + This can be the adjacency matrix output from gene ratio approaches like propr/propd. pattern: "*.{csv,tsv}" - - meta2: type: map @@ -37,9 +38,10 @@ input: [id: 'test', ...] - gmt: type: file - description: A tab delimited file format that describes gene sets. The first column is the - concept id (eg. GO term, pathway, etc), the second column is the concept description, and the - rest are nodes (eg. genes) that is associated to the given concept. + description: | + A tab delimited file format that describes gene sets. The first column is the + concept id (eg. GO term, pathway, etc), the second column is the concept description, and the + rest are nodes (eg. genes) that is associated to the given concept. pattern: "*.{gmt}" output: - results: @@ -51,8 +53,9 @@ output: [id: 'test', ...] - "*.grea.tsv": type: file - description: Output file containing the information about the tested concepts (ie. gene sets) - and enrichment statistics. + description: | + Output file containing the information about the tested concepts (ie. gene sets) + and enrichment statistics. pattern: "*.{tsv}" - versions: - versions.yml: From a590997dd41489d0f6e9dc267676024e4256fcdd Mon Sep 17 00:00:00 2001 From: Suzanne Jin Date: Wed, 11 Dec 2024 15:56:12 +0100 Subject: [PATCH 13/13] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: JĂșlia Mir Pedrol --- modules/nf-core/propr/grea/meta.yml | 2 +- modules/nf-core/propr/grea/templates/grea.R | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/modules/nf-core/propr/grea/meta.yml b/modules/nf-core/propr/grea/meta.yml index f7872465ff5..cc0613d3dab 100644 --- a/modules/nf-core/propr/grea/meta.yml +++ b/modules/nf-core/propr/grea/meta.yml @@ -56,7 +56,7 @@ output: description: | Output file containing the information about the tested concepts (ie. gene sets) and enrichment statistics. - pattern: "*.{tsv}" + pattern: "*.grea.tsv" - versions: - versions.yml: type: file diff --git a/modules/nf-core/propr/grea/templates/grea.R b/modules/nf-core/propr/grea/templates/grea.R index 37832f9734c..3b761f89b1c 100644 --- a/modules/nf-core/propr/grea/templates/grea.R +++ b/modules/nf-core/propr/grea/templates/grea.R @@ -176,8 +176,6 @@ if (opt\$permutation < 0) { stop('permutation should be a positive integer') } -print(opt) - ################################################ ################################################ ## Finish loading libraries ##