From a8c77b4c3e88b793d90fef77508bcae36de28c43 Mon Sep 17 00:00:00 2001
From: Suzanne Jin <suzanne.jin.yao@gmail.com>
Date: Wed, 11 Dec 2024 10:21:17 +0000
Subject: [PATCH 01/13] delete previous snapshot

---
 .../propr/grea/tests/main.nf.test.snap        | 31 -------------------
 1 file changed, 31 deletions(-)
 delete mode 100644 modules/nf-core/propr/grea/tests/main.nf.test.snap

diff --git a/modules/nf-core/propr/grea/tests/main.nf.test.snap b/modules/nf-core/propr/grea/tests/main.nf.test.snap
deleted file mode 100644
index 2db674fc5ae..00000000000
--- a/modules/nf-core/propr/grea/tests/main.nf.test.snap
+++ /dev/null
@@ -1,31 +0,0 @@
-{
-    "versions": {
-        "content": [
-            [
-                "versions.yml:md5,222a7a8b79b5a2987637279847c609d1"
-            ]
-        ],
-        "meta": {
-            "nf-test": "0.8.4",
-            "nextflow": "23.10.1"
-        },
-        "timestamp": "2024-04-29T10:45:07.582509"
-    },
-    "grea chained to propr using default options - enrichedGO": {
-        "content": [
-            [
-                [
-                    {
-                        "id": "test_adj"
-                    },
-                    "test_adj.go.tsv:md5,904e1fe3eed0f2dded8e5b64321a0269"
-                ]
-            ]
-        ],
-        "meta": {
-            "nf-test": "0.8.4",
-            "nextflow": "24.04.3"
-        },
-        "timestamp": "2024-08-03T16:06:25.669444"
-    }
-}
\ No newline at end of file

From 0c58cb85d552b4a3a8025db0790bf09e93b2b046 Mon Sep 17 00:00:00 2001
From: Suzanne Jin <suzanne.jin.yao@gmail.com>
Date: Wed, 11 Dec 2024 10:22:56 +0000
Subject: [PATCH 02/13] [propr/grea] update container and conda environment

---
 modules/nf-core/propr/grea/environment.yml | 4 +++-
 modules/nf-core/propr/grea/main.nf         | 4 ++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/modules/nf-core/propr/grea/environment.yml b/modules/nf-core/propr/grea/environment.yml
index 2bb015a1047..9744dab906b 100644
--- a/modules/nf-core/propr/grea/environment.yml
+++ b/modules/nf-core/propr/grea/environment.yml
@@ -1,5 +1,7 @@
 channels:
   - conda-forge
   - bioconda
+
 dependencies:
-  - conda-forge::r-propr=5.0.4
+  - bioconda::bioconductor-limma=3.58.1
+  - conda-forge::r-propr=5.1.5
diff --git a/modules/nf-core/propr/grea/main.nf b/modules/nf-core/propr/grea/main.nf
index d2e1ee6de9a..b0eaae65ad0 100644
--- a/modules/nf-core/propr/grea/main.nf
+++ b/modules/nf-core/propr/grea/main.nf
@@ -4,8 +4,8 @@ process PROPR_GREA {
 
     conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/r-propr:5.0.4':
-        'biocontainers/r-propr:5.0.4' }"
+        'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b6/b65f7192866fbd9a947df15b104808abb720e7a224bbe3ca8f7f8f680f52c97a/data' :
+        'community.wave.seqera.io/library/bioconductor-limma_r-propr:f52f1d4fea746393' }"
 
     input:
     tuple val(meta), path(adj)

From c058b28c5c068202a306f0d183cd6454c1c5c92b Mon Sep 17 00:00:00 2001
From: Suzanne Jin <suzanne.jin.yao@gmail.com>
Date: Wed, 11 Dec 2024 10:26:28 +0000
Subject: [PATCH 03/13] [propr/grea] update main.nf output

---
 modules/nf-core/propr/grea/main.nf | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/modules/nf-core/propr/grea/main.nf b/modules/nf-core/propr/grea/main.nf
index b0eaae65ad0..5b783476ea9 100644
--- a/modules/nf-core/propr/grea/main.nf
+++ b/modules/nf-core/propr/grea/main.nf
@@ -1,6 +1,6 @@
 process PROPR_GREA {
     tag "$meta.id"
-    label 'process_single'
+    label 'process_high'
 
     conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
@@ -12,9 +12,9 @@ process PROPR_GREA {
     tuple val(meta2), path(gmt)
 
     output:
-    tuple val(meta), path("*.go.tsv"),  emit: enrichedGO
-    path "versions.yml",                emit: versions
-    path "*.R_sessionInfo.log",         emit: session_info
+    tuple val(meta), path("*.grea.tsv"),  emit: results
+    path "versions.yml",                  emit: versions
+    path "*.R_sessionInfo.log",           emit: session_info
 
     when:
     task.ext.when == null || task.ext.when

From 6d68aa1777782693a353a56a34c76f80e336af96 Mon Sep 17 00:00:00 2001
From: Suzanne Jin <suzanne.jin.yao@gmail.com>
Date: Wed, 11 Dec 2024 10:27:20 +0000
Subject: [PATCH 04/13] [propr/grea] copied the updated template from
 differentialabundance dev-ratio

---
 modules/nf-core/propr/grea/templates/grea.R | 198 +++++++++++---------
 1 file changed, 112 insertions(+), 86 deletions(-)

diff --git a/modules/nf-core/propr/grea/templates/grea.R b/modules/nf-core/propr/grea/templates/grea.R
index 2d568b70330..f5d50374fc3 100644
--- a/modules/nf-core/propr/grea/templates/grea.R
+++ b/modules/nf-core/propr/grea/templates/grea.R
@@ -51,66 +51,42 @@ read_delim_flexible <- function(file, header = TRUE, row.names = 1, check.names
     )
 }
 
-#' Converts the .gmt file into a df
+#' Loads the .gmt file  and converts it into a knowledge database
 #'
-#' @param file_gmt_path path of the .gmt file provided by mygene module.
-#' @return output dataframe a Dataframe: 1st column = GOterm, 2nd = Description, 3d to end = genes.
-process_gmt_file <- function(file_gmt_path) {
-
-    lines <- readLines(file_gmt_path)
-    data_list <- list()
-
-    for (line in lines) {
-        fields <- strsplit(line, "\\t")[[1]] # Split the line based on the tab character
-        go_term <- fields[1] # Extract the GO term
-
-        # Create a data frame with the GO term in the first column
-        # Fill in missing values with NA to ensure consistent column lengths
-        data_list[[go_term]] <- data.frame(GOterm = go_term,
-                                        Description = fields[2],
-                                        GeneIDs = c(fields[3:length(fields)], rep(NA, max(0, 3 - length(fields)))))
+#' @param filename path of the .gmt file
+#' @param genes vector of gene names. Note that this set should be as complete as possible.
+#' So it should not only contain the target genes but also the background genes.
+#' @return output a list with: `db` A knowledge database where each row is a graph node (eg. gene)
+#' and each column is a concept (eg. GO term, pathway, etc) and `description` A list of descriptions
+#' for each concept
+load_gmt <- function(filename, nodes) {
+
+    # read gmt file
+    gmt <- readLines(filename)
+    gmt <- strsplit(gmt, "\\t")
+
+    # initialize database matrix
+    db <- matrix(0, nrow = length(nodes), ncol = length(gmt))
+    rownames(db) <- nodes
+    colnames(db) <- sapply(gmt, function(entry) entry[[1]])
+
+    # description of the concepts
+    description <- list()
+
+    # for concept in gmt
+    for (i in 1:length(gmt)) {
+
+        # get concept and description
+        concept <- gmt[[i]][[1]]
+        description[[concept]] <- gmt[[i]][[2]]
+
+        # fill 1 if gene is in concept
+        nodes_in_concept <- gmt[[i]][-c(1, 2)]
+        nodes_in_concept <- nodes_in_concept[nodes_in_concept %in% nodes]
+        db[nodes_in_concept, i] <- 1
     }
 
-    gmt_df <- do.call(rbind, data_list) # Combine all data frames into a single data frame
-    gmt_df\$GeneIDs <- as.character(gmt_df\$GeneIDs) # Convert gene IDs to character to avoid coercion
-
-    return(gmt_df)
-}
-
-#' Converts the .gmt data frame into a knowledge matrix (contingency table)
-#'
-#' @param gmt_df .gmt df created by process_gmt_file
-#' @return output dataframe. A knowledge database where each row is a graph node (gene)
-#'  and each column is a concept (GO term).
-gmt_to_K<- function(gmt_df){
-
-    summ_df <- as.data.frame(gmt_df\$GeneIDs)
-    summ_df <- cbind(summ_df, as.data.frame(gmt_df\$GOterm))
-    colnames(summ_df)<- c("GeneIDs", "GOterm")
-    summ_df<- unique(summ_df)
-
-    summ_df\$value <- 1
-
-    K <- table(summ_df\$GeneIDs, summ_df\$GOterm)
-    K <- as.data.frame.matrix(K)
-
-    return(K)
-}
-
-#' Expands knowledge matrix with missing genes to ensure same number of rows for A and K
-#'
-#' @param adjacency_matrix gene x gene correlation or proportionality adjacency matrix (output propr/propd)
-#' @return output dataframe. A knowledge database where each row is a graph node (gene)
-#'  and each column is a concept (GO term).
-add_missing <- function(adjacency_matrix, knowledge_matrix){
-
-    missing_genes <- setdiff(rownames(adjacency_matrix), rownames(knowledge_matrix))
-    extra_rows <- data.frame(matrix(0, nrow = length(missing_genes), ncol = ncol(knowledge_matrix)))
-    rownames(extra_rows) <- missing_genes
-    colnames(extra_rows) <- colnames(knowledge_matrix)
-
-    knowledge_matrix <- rbind(knowledge_matrix, extra_rows)
-    return(knowledge_matrix)
+    return(list(db = db, description = description))
 }
 
 ################################################
@@ -119,52 +95,65 @@ add_missing <- function(adjacency_matrix, knowledge_matrix){
 ################################################
 ################################################
 
+# Set defaults and classes
+
 opt <- list(
-    adj              = '$adj',
-    gmt              = '$gmt',
     prefix           = ifelse('$task.ext.prefix' == 'null', '$meta.id',  '$task.ext.prefix'),
+
+    # input data
+    adj              = '$adj',          # adjacency matrix
+    gmt              = '$gmt',          # knowledge database .gmt file
+
+    # parameters for gene sets
+    set_min          = 15,              # minimum number of genes in a set
+    set_max          = 500,             # maximum number of genes in a set
+
+    # parameters for permutation test
     permutation      = 100,
-    fixseed          = TRUE,
+
+    # other options
+    seed             = NA,
     ncores           = as.integer('$task.cpus')
 )
 
 opt_types <- list(
+    prefix           = 'character',
     adj              = 'character',
     gmt              = 'character',
-    prefix           = 'character',
+    set_min          = 'numeric',
+    set_max          = 'numeric',
     permutation      = 'numeric',
-    fixseed          = 'logical',
+    seed             = 'numeric',
     ncores           = 'numeric'
 )
 
 # Apply parameter overrides
-args_opt <- parse_args('$task.ext.args')
 
+args_opt <- parse_args('$task.ext.args')
 for ( ao in names(args_opt)){
     if (! ao %in% names(opt)){
         stop(paste("Invalid option:", ao))
     } else {
 
         # Preserve classes from defaults where possible
-        if (! is.null(opt[[ao]])){
-            args_opt[[ao]] <- as(args_opt[[ao]], opt_types[[ao]])
-        }
-        # set NA
-        if (args_opt[[ao]] %in% c('NA', NA, 'null')){
-            args_opt[[ao]] <- NA
-        }
+        args_opt[[ao]] <- as(args_opt[[ao]], opt_types[[ao]])
+
+        # handle NA, and avoid errors when NA is provided by user as character
+        if (args_opt[[ao]] %in% c('NA', NA)) args_opt[[ao]] <- NA
+
+        # replace values
         opt[[ao]] <- args_opt[[ao]]
     }
 }
 
 # Check if required parameters have been provided
+
 required_opts <- c('adj', 'gmt') # defines a vector required_opts containing the names of the required parameters.
 missing <- required_opts[unlist(lapply(opt[required_opts], is.null)) | ! required_opts %in% names(opt)]
 if (length(missing) > 0){
     stop(paste("Missing required options:", paste(missing, collapse=', ')))
 }
 
-
 # Check file inputs are valid
 for (file_input in c('adj', 'gmt')){
     if (is.null(opt[[file_input]])) {
@@ -175,6 +164,9 @@ for (file_input in c('adj', 'gmt')){
     }
 }
 
+# TODO maybe add a function to pretty print the arguments?
+print(opt)
+
 ################################################
 ################################################
 ## Finish loading libraries                   ##
@@ -189,20 +181,56 @@ library(propr)
 ################################################
 ################################################
 
-# Read gene x gene adjacency matrix
-A <- read_delim_flexible(opt\$adj, header = TRUE, row.names = 1, check.names = TRUE)
+# set seed when required
 
-# Read and process gene x GO term matrix
-gmt_df <- process_gmt_file(opt\$gmt)
-K <- gmt_to_K(gmt_df)
+if (!is.na(opt\$seed)) {
+    warning('Setting seed ', opt\$seed, ' for reproducibility')
+    set.seed(opt\$seed)
+}
 
-# Ensure same number of rows (genes)
-if (nrow(A) != nrow(K)){
-    K <- add_missing(A, K)
+# load adjacency matrix
+# this matrix should have gene x gene dimensions
+
+adj <- as.matrix(read_delim_flexible(
+    opt\$adj,
+    header = TRUE,
+    row.names = 1,
+    check.names = TRUE
+))
+if (nrow(adj) != ncol(adj)) {
+    stop('Adjacency matrix is not square')
+}
+if (!all(rownames(adj) == colnames(adj))) {
+    stop('Adjacency matrix row names are not equal to column names')
 }
 
-# Run Graflex
-G <- runGraflex(A, K, opt\$permutation, opt\$fixseed)
+# load and process knowledge database
+
+gmt <- load_gmt(
+    opt\$gmt,
+    rownames(adj)
+)
+
+# filter gene sets
+# gene sets with less than set_min or more than set_max genes are removed
+
+idx <- which(colSums(gmt\$db) > opt\$set_min & colSums(gmt\$db) < opt\$set_max)
+gmt\$db <- gmt\$db[, idx]
+gmt\$description <- gmt\$description[idx]
+
+# run GREA
+# Basically, it calculates the odds ratio of the graph being enriched in each concept,
+# and the FDR of the odds ratio through permutation tests
+
+odds <- runGraflex(
+    adj,
+    gmt\$db,
+    p=opt\$permutation,
+    ncores=opt\$ncores
+)
+odds\$Description <- sapply(odds\$Concept, function(concept)
+    gmt\$description[[concept]]
+)
 
 ################################################
 ################################################
@@ -211,10 +239,10 @@ G <- runGraflex(A, K, opt\$permutation, opt\$fixseed)
 ################################################
 
 write.table(
-    G,
-    file      = paste0(opt\$prefix, '.go.tsv'),
+    odds,
+    file      = paste0(opt\$prefix, '.grea.tsv'),
     col.names = TRUE,
-    row.names = TRUE,
+    row.names = FALSE,
     sep       = '\\t',
     quote     = FALSE
 
@@ -236,13 +264,11 @@ sink()
 ################################################
 ################################################
 
-r.version <- strsplit(version[['version.string']], ' ')[[1]][3]
 propr.version <- as.character(packageVersion('propr'))
 
 writeLines(
     c(
         '"${task.process}":',
-        paste('    r-base:', r.version),
         paste('    r-propr:', propr.version)
     ),
 'versions.yml')

From b7fb6355fb4f139efabb23055f82b09ab06e25a0 Mon Sep 17 00:00:00 2001
From: Suzanne Jin <suzanne.jin.yao@gmail.com>
Date: Wed, 11 Dec 2024 10:41:37 +0000
Subject: [PATCH 05/13] [propr/grea] added some comments and checks

---
 modules/nf-core/propr/grea/templates/grea.R | 31 +++++++++++++--------
 1 file changed, 20 insertions(+), 11 deletions(-)

diff --git a/modules/nf-core/propr/grea/templates/grea.R b/modules/nf-core/propr/grea/templates/grea.R
index f5d50374fc3..4790634ffbd 100644
--- a/modules/nf-core/propr/grea/templates/grea.R
+++ b/modules/nf-core/propr/grea/templates/grea.R
@@ -54,11 +54,13 @@ read_delim_flexible <- function(file, header = TRUE, row.names = 1, check.names
 #' Loads the .gmt file  and converts it into a knowledge database
 #'
 #' @param filename path of the .gmt file
-#' @param genes vector of gene names. Note that this set should be as complete as possible.
-#' So it should not only contain the target genes but also the background genes.
-#' @return output a list with: `db` A knowledge database where each row is a graph node (eg. gene)
-#' and each column is a concept (eg. GO term, pathway, etc) and `description` A list of descriptions
-#' for each concept
+#' @param nodes vector of node (eg. gene) names. Note that this set should be as
+#' complete as possible. So it should not only contain the target genes but also
+#' the background genes.
+#' @return a list with:
+#'     `db` A knowledge database (matrix) where each row is a graph node (eg. gene)
+#'      and each column is a concept (eg. GO term, pathway, etc).
+#'     `description` A list of descriptions for each concept.
 load_gmt <- function(filename, nodes) {
 
     # read gmt file
@@ -109,7 +111,7 @@ opt <- list(
     set_max          = 500,             # maximum number of genes in a set
 
     # parameters for permutation test
-    permutation      = 100,
+    permutation      = 100,             # number of permutations to perform
 
     # other options
     seed             = NA,
@@ -129,7 +131,8 @@ opt_types <- list(
 
 # Apply parameter overrides
 
-args_opt <- parse_args('$task.ext.args')
+args_ext <- ifelse('$task.ext.args' == 'null', '', '$task.ext.args')
+args_opt <- parse_args(args_ext)
 for ( ao in names(args_opt)){
     if (! ao %in% names(opt)){
         stop(paste("Invalid option:", ao))
@@ -155,6 +158,7 @@ if (length(missing) > 0){
 }
 
 # Check file inputs are valid
+
 for (file_input in c('adj', 'gmt')){
     if (is.null(opt[[file_input]])) {
         stop(paste("Please provide", file_input), call. = FALSE)
@@ -164,7 +168,12 @@ for (file_input in c('adj', 'gmt')){
     }
 }
 
-# TODO maybe add a function to pretty print the arguments?
+# check parameters are valid
+
+if (opt\$permutation < 0) {
+    stop('permutation should be a positive integer')
+}
+
 print(opt)
 
 ################################################
@@ -195,10 +204,10 @@ adj <- as.matrix(read_delim_flexible(
     opt\$adj,
     header = TRUE,
     row.names = 1,
-    check.names = TRUE
+    check.names = FALSE
 ))
 if (nrow(adj) != ncol(adj)) {
-    stop('Adjacency matrix is not square')
+    stop('Adjacency matrix should be a squared matrix that reflects the connections between all the nodes')
 }
 if (!all(rownames(adj) == colnames(adj))) {
     stop('Adjacency matrix row names are not equal to column names')
@@ -208,7 +217,7 @@ if (!all(rownames(adj) == colnames(adj))) {
 
 gmt <- load_gmt(
     opt\$gmt,
-    rownames(adj)
+    rownames(adj)  # adj should contain all the nodes (target and background)
 )
 
 # filter gene sets

From a2149dc74c5d89c6af36782b34e7e9d117156dbd Mon Sep 17 00:00:00 2001
From: Suzanne Jin <suzanne.jin.yao@gmail.com>
Date: Wed, 11 Dec 2024 10:49:03 +0000
Subject: [PATCH 06/13] [propr/grea] add option to round digits

---
 modules/nf-core/propr/grea/templates/grea.R | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/modules/nf-core/propr/grea/templates/grea.R b/modules/nf-core/propr/grea/templates/grea.R
index 4790634ffbd..82afe6541ef 100644
--- a/modules/nf-core/propr/grea/templates/grea.R
+++ b/modules/nf-core/propr/grea/templates/grea.R
@@ -113,8 +113,9 @@ opt <- list(
     # parameters for permutation test
     permutation      = 100,             # number of permutations to perform
 
-    # other options
-    seed             = NA,
+    # other parameters
+    seed             = NA,              # seed for reproducibility
+    round_digits     = NA,              # number of digits to round results
     ncores           = as.integer('$task.cpus')
 )
 
@@ -126,6 +127,7 @@ opt_types <- list(
     set_max          = 'numeric',
     permutation      = 'numeric',
     seed             = 'numeric',
+    round_digits     = 'numeric',
     ncores           = 'numeric'
 )
 
@@ -247,6 +249,12 @@ odds\$Description <- sapply(odds\$Concept, function(concept)
 ################################################
 ################################################
 
+if (!is.na(opt\$round_digits)) {
+    for (col in c('Odds', 'LogOR', 'FDR.under', 'FDR.over')){
+        odds[,col] <- round(odds[,col], opt\$round_digits)
+    }
+}
+
 write.table(
     odds,
     file      = paste0(opt\$prefix, '.grea.tsv'),

From 136e8ba492652b19bb1f04cd49106384072be8bf Mon Sep 17 00:00:00 2001
From: Suzanne Jin <suzanne.jin.yao@gmail.com>
Date: Wed, 11 Dec 2024 12:02:18 +0000
Subject: [PATCH 07/13] [propr/grea] updated tests, need to solve some problems
 with gmt filtering

---
 modules/nf-core/propr/grea/main.nf            |  8 ++--
 modules/nf-core/propr/grea/meta.yml           | 12 ++---
 modules/nf-core/propr/grea/templates/grea.R   | 10 +++-
 .../nf-core/propr/grea/tests/grea_test.config |  9 ++--
 modules/nf-core/propr/grea/tests/main.nf.test | 47 ++++++++++---------
 5 files changed, 48 insertions(+), 38 deletions(-)

diff --git a/modules/nf-core/propr/grea/main.nf b/modules/nf-core/propr/grea/main.nf
index 5b783476ea9..fd727208d94 100644
--- a/modules/nf-core/propr/grea/main.nf
+++ b/modules/nf-core/propr/grea/main.nf
@@ -8,13 +8,13 @@ process PROPR_GREA {
         'community.wave.seqera.io/library/bioconductor-limma_r-propr:f52f1d4fea746393' }"
 
     input:
-    tuple val(meta), path(adj)
+    tuple val(meta), path(adjacency)
     tuple val(meta2), path(gmt)
 
     output:
-    tuple val(meta), path("*.grea.tsv"),  emit: results
-    path "versions.yml",                  emit: versions
-    path "*.R_sessionInfo.log",           emit: session_info
+    tuple val(meta), path("*.grea.tsv"), emit: results
+    path "versions.yml",                 emit: versions
+    path "*.R_sessionInfo.log",          emit: session_info
 
     when:
     task.ext.when == null || task.ext.when
diff --git a/modules/nf-core/propr/grea/meta.yml b/modules/nf-core/propr/grea/meta.yml
index 58f73fc4d86..8163d82b01c 100644
--- a/modules/nf-core/propr/grea/meta.yml
+++ b/modules/nf-core/propr/grea/meta.yml
@@ -2,12 +2,12 @@
 name: "propr_grea"
 description: Perform Gene Ratio Enrichment Analysis
 keywords:
-  - logratio
-  - differential
   - propr
   - grea
-  - enrichment
-  - expression
+  - logratio
+  - differential expression
+  - functional enrichment
+  - functional analysis
 tools:
   - "grea":
       description: "Gene Ratio Enrichment Analysis"
@@ -21,10 +21,10 @@ input:
   - - meta:
         type: map
         description: |
-          Groovy Map containing sample information.
+          Groovy Map containing data information.
           This can be used at the workflow level to pass optional parameters to the module.
           [id: 'test', ...]
-    - adj:
+    - adjacency:
         type: file
         description: adjacency matrix for gene ratio proportionality/differential proportionality
         pattern: "*.{csv,tsv}"
diff --git a/modules/nf-core/propr/grea/templates/grea.R b/modules/nf-core/propr/grea/templates/grea.R
index 82afe6541ef..37832f9734c 100644
--- a/modules/nf-core/propr/grea/templates/grea.R
+++ b/modules/nf-core/propr/grea/templates/grea.R
@@ -103,7 +103,7 @@ opt <- list(
     prefix           = ifelse('$task.ext.prefix' == 'null', '$meta.id',  '$task.ext.prefix'),
 
     # input data
-    adj              = '$adj',          # adjacency matrix
+    adj              = '$adjacency',    # adjacency matrix
     gmt              = '$gmt',          # knowledge database .gmt file
 
     # parameters for gene sets
@@ -202,6 +202,8 @@ if (!is.na(opt\$seed)) {
 # load adjacency matrix
 # this matrix should have gene x gene dimensions
 
+message("Loading input data")
+
 adj <- as.matrix(read_delim_flexible(
     opt\$adj,
     header = TRUE,
@@ -226,6 +228,9 @@ gmt <- load_gmt(
 # gene sets with less than set_min or more than set_max genes are removed
 
 idx <- which(colSums(gmt\$db) > opt\$set_min & colSums(gmt\$db) < opt\$set_max)
+if (length(idx) == 0){
+    stop("No gene set pass the filter of set_min=", opt\$set_min, " and set_max=", opt\$set_max)
+}
 gmt\$db <- gmt\$db[, idx]
 gmt\$description <- gmt\$description[idx]
 
@@ -233,6 +238,8 @@ gmt\$description <- gmt\$description[idx]
 # Basically, it calculates the odds ratio of the graph being enriched in each concept,
 # and the FDR of the odds ratio through permutation tests
 
+message("Running GREA")
+
 odds <- runGraflex(
     adj,
     gmt\$db,
@@ -262,7 +269,6 @@ write.table(
     row.names = FALSE,
     sep       = '\\t',
     quote     = FALSE
-
 )
 
 ################################################
diff --git a/modules/nf-core/propr/grea/tests/grea_test.config b/modules/nf-core/propr/grea/tests/grea_test.config
index 8d0d229a76d..194a856ab55 100644
--- a/modules/nf-core/propr/grea/tests/grea_test.config
+++ b/modules/nf-core/propr/grea/tests/grea_test.config
@@ -1,8 +1,9 @@
 process {
-    withName: "PROPR_PROPR"{
-        ext.args = { "--adjacency true --permutation 5 --fixseed true --cutoff_min 0.05 --cutoff_max 0.95 --cutoff_interval 0.05"}
+    cpus = 1
+    withName: "PROPR_PROPD"{
+        ext.args = { "--round_digits 5 --save_adjacency true --features_id_col gene_name"}
     }
     withName: "PROPR_GREA"{
-        ext.args = { "--permutation 5 --fixseed true"}
+        ext.args = { "--permutation 10 --set_min 10 --seed 123 --round_digits 5"}
     }
-}
\ No newline at end of file
+}
diff --git a/modules/nf-core/propr/grea/tests/main.nf.test b/modules/nf-core/propr/grea/tests/main.nf.test
index dd442b43459..5b32d96523a 100644
--- a/modules/nf-core/propr/grea/tests/main.nf.test
+++ b/modules/nf-core/propr/grea/tests/main.nf.test
@@ -8,34 +8,34 @@ nextflow_process {
     tag "modules_nfcore"
     tag "propr"
     tag "propr/grea"
-    tag "mygene"
-    tag "propr/propr"
+    tag "propr/propd"
 
-    test("grea chained to propr using default options") {
+    test("test grea chained to propd") {
 
         tag "default"
         config "./grea_test.config"
 
         setup {
-            run("PROPR_PROPR") {
-                script "../../propr/main.nf"
+            run("PROPR_PROPD") {
+                script "../../propd/main.nf"
                 process {
                 """
-                input[0] = [
-                    [ id:'test' ],
-                    file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/mus_musculus/rnaseq_expression/SRP254919.salmon.merged.gene_counts.top1000cov.tsv")
-                ]
-                """
-                }
-            }
-            run("MYGENE") {
-                script "../../../mygene/main.nf"
-                process {
-                """
-                input[0] = [
-                    [id : 'test'],
-                    file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/mus_musculus/rnaseq_expression/SRP254919.gene_meta.tsv")
+                expression_test_data_dir = params.modules_testdata_base_path + 'genomics/mus_musculus/rnaseq_expression/'
+
+                ch_contrasts = Channel.fromPath(file(expression_test_data_dir + 'SRP254919.contrasts.csv', checkIfExists: true))
+                    .splitCsv ( header:true, sep:',' )
+                    .map{
+                        tuple(it, it.variable, it.reference, it.target)
+                    }
+                    .first()
+                ch_matrix = [
+                    [id: 'test'],
+                    file(expression_test_data_dir + 'SRP254919.samplesheet.csv', checkIfExists: true),
+                    file(expression_test_data_dir + 'SRP254919.salmon.merged.gene_counts.top1000cov.tsv', checkIfExists: true)
                 ]
+
+                input[0] = ch_contrasts
+                input[1] = ch_matrix
                 """
                 }
             }
@@ -44,8 +44,11 @@ nextflow_process {
         when {
             process {
                 """
-                input[0] = PROPR_PROPR.out.adj.collect{ meta, adj -> adj }.map{ adj -> [[ id: 'test_adj'], adj]}
-                input[1] = MYGENE.out.gmt.collect{ meta, gmt -> gmt }.map{ gmt -> [[ id: 'test_gmt'], gmt]}
+                input[0] = PROPR_PROPD.out.adjacency
+                input[1] = [
+                    [id: 'test'],
+                    file(params.modules_testdata_base_path + 'genomics/mus_musculus/gene_set_analysis/mh.all.v2022.1.Mm.symbols.gmt', checkIfExists: true)
+                ]
                 """
             }
         }
@@ -53,7 +56,7 @@ nextflow_process {
         then {
             assertAll(
                 { assert process.success },
-                { assert snapshot(process.out.enrichedGO).match("grea chained to propr using default options - enrichedGO") },
+                { assert snapshot(process.out.results).match("test grea chained to propd") },
                 { assert snapshot(process.out.versions).match("versions") }
 
             )

From 397e97f60d50e3ba69ec93e86c38464fcd51ff50 Mon Sep 17 00:00:00 2001
From: Suzanne Jin <suzanne.jin.yao@gmail.com>
Date: Wed, 11 Dec 2024 13:06:32 +0000
Subject: [PATCH 08/13] [propr/grea] updated snapshots, but need to check
 single cpu vs multiprocessor discrepancies

---
 modules/nf-core/propr/grea/tests/main.nf.test | 61 ++++++++++++++++++-
 .../propr/grea/tests/main.nf.test.snap        | 50 +++++++++++++++
 .../propr/grea/tests/multiprocessor.config    |  9 +++
 3 files changed, 117 insertions(+), 3 deletions(-)
 create mode 100644 modules/nf-core/propr/grea/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/propr/grea/tests/multiprocessor.config

diff --git a/modules/nf-core/propr/grea/tests/main.nf.test b/modules/nf-core/propr/grea/tests/main.nf.test
index 5b32d96523a..8be12a91e51 100644
--- a/modules/nf-core/propr/grea/tests/main.nf.test
+++ b/modules/nf-core/propr/grea/tests/main.nf.test
@@ -56,10 +56,65 @@ nextflow_process {
         then {
             assertAll(
                 { assert process.success },
-                { assert snapshot(process.out.results).match("test grea chained to propd") },
-                { assert snapshot(process.out.versions).match("versions") }
+                { assert snapshot(
+                    process.out.results,
+                    process.out.versions
+                ).match()}
+            )
+        }
+    }
+
+    test("test grea chained to propd - multiprocessor") {
+
+        tag "default"
+        config "./multiprocessor.config"
+
+        setup {
+            run("PROPR_PROPD") {
+                script "../../propd/main.nf"
+                process {
+                """
+                expression_test_data_dir = params.modules_testdata_base_path + 'genomics/mus_musculus/rnaseq_expression/'
 
+                ch_contrasts = Channel.fromPath(file(expression_test_data_dir + 'SRP254919.contrasts.csv', checkIfExists: true))
+                    .splitCsv ( header:true, sep:',' )
+                    .map{
+                        tuple(it, it.variable, it.reference, it.target)
+                    }
+                    .first()
+                ch_matrix = [
+                    [id: 'test'],
+                    file(expression_test_data_dir + 'SRP254919.samplesheet.csv', checkIfExists: true),
+                    file(expression_test_data_dir + 'SRP254919.salmon.merged.gene_counts.top1000cov.tsv', checkIfExists: true)
+                ]
+
+                input[0] = ch_contrasts
+                input[1] = ch_matrix
+                """
+                }
+            }
+        }
+
+        when {
+            process {
+                """
+                input[0] = PROPR_PROPD.out.adjacency
+                input[1] = [
+                    [id: 'test'],
+                    file(params.modules_testdata_base_path + 'genomics/mus_musculus/gene_set_analysis/mh.all.v2022.1.Mm.symbols.gmt', checkIfExists: true)
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    process.out.results,
+                    process.out.versions
+                ).match()}
             )
         }
     }
-}
\ No newline at end of file
+}
diff --git a/modules/nf-core/propr/grea/tests/main.nf.test.snap b/modules/nf-core/propr/grea/tests/main.nf.test.snap
new file mode 100644
index 00000000000..5d2fe140505
--- /dev/null
+++ b/modules/nf-core/propr/grea/tests/main.nf.test.snap
@@ -0,0 +1,50 @@
+{
+    "test grea chained to propd - multiprocessor": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "treatment_mCherry_hND6_",
+                        "variable": "treatment",
+                        "reference": "mCherry",
+                        "target": "hND6",
+                        "blocking": ""
+                    },
+                    "treatment_mCherry_hND6_.grea.tsv:md5,724bbb66b9d85291212c357e866fa58f"
+                ]
+            ],
+            [
+                "versions.yml:md5,060fcd8ce4afc482e237fa75686a0aba"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.2",
+            "nextflow": "24.10.2"
+        },
+        "timestamp": "2024-12-11T13:00:20.262161746"
+    },
+    "test grea chained to propd": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "treatment_mCherry_hND6_",
+                        "variable": "treatment",
+                        "reference": "mCherry",
+                        "target": "hND6",
+                        "blocking": ""
+                    },
+                    "treatment_mCherry_hND6_.grea.tsv:md5,786faeccf39926d2f7c980ef549a2697"
+                ]
+            ],
+            [
+                "versions.yml:md5,060fcd8ce4afc482e237fa75686a0aba"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.2",
+            "nextflow": "24.10.2"
+        },
+        "timestamp": "2024-12-11T13:00:02.026244403"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/propr/grea/tests/multiprocessor.config b/modules/nf-core/propr/grea/tests/multiprocessor.config
new file mode 100644
index 00000000000..f0b47cf5c53
--- /dev/null
+++ b/modules/nf-core/propr/grea/tests/multiprocessor.config
@@ -0,0 +1,9 @@
+process {
+    cpus = 2
+    withName: "PROPR_PROPD"{
+        ext.args = { "--round_digits 5 --save_adjacency true --features_id_col gene_name"}
+    }
+    withName: "PROPR_GREA"{
+        ext.args = { "--permutation 10 --set_min 10 --seed 123 --round_digits 5"}
+    }
+}

From 72a77777e6282480f236b0adc751141d1ddcfcfa Mon Sep 17 00:00:00 2001
From: Suzanne Jin <suzanne.jin.yao@gmail.com>
Date: Wed, 11 Dec 2024 14:09:21 +0000
Subject: [PATCH 09/13] [propr/grea] update snapshots

---
 .../nf-core/propr/grea/tests/grea_test.config |  5 ++
 modules/nf-core/propr/grea/tests/main.nf.test | 54 -------------------
 .../propr/grea/tests/main.nf.test.snap        | 26 +--------
 3 files changed, 6 insertions(+), 79 deletions(-)

diff --git a/modules/nf-core/propr/grea/tests/grea_test.config b/modules/nf-core/propr/grea/tests/grea_test.config
index 194a856ab55..7d354c013a5 100644
--- a/modules/nf-core/propr/grea/tests/grea_test.config
+++ b/modules/nf-core/propr/grea/tests/grea_test.config
@@ -1,5 +1,10 @@
 process {
+    // set single core for reproducibility
+    // NOTE this method relies on parallelization and permutation tests
+    // The permutations are done within each node, which makes set.seed not working properly when
+    // different nodes are starting/ending depending on the case
     cpus = 1
+
     withName: "PROPR_PROPD"{
         ext.args = { "--round_digits 5 --save_adjacency true --features_id_col gene_name"}
     }
diff --git a/modules/nf-core/propr/grea/tests/main.nf.test b/modules/nf-core/propr/grea/tests/main.nf.test
index 8be12a91e51..38a015e4b8c 100644
--- a/modules/nf-core/propr/grea/tests/main.nf.test
+++ b/modules/nf-core/propr/grea/tests/main.nf.test
@@ -63,58 +63,4 @@ nextflow_process {
             )
         }
     }
-
-    test("test grea chained to propd - multiprocessor") {
-
-        tag "default"
-        config "./multiprocessor.config"
-
-        setup {
-            run("PROPR_PROPD") {
-                script "../../propd/main.nf"
-                process {
-                """
-                expression_test_data_dir = params.modules_testdata_base_path + 'genomics/mus_musculus/rnaseq_expression/'
-
-                ch_contrasts = Channel.fromPath(file(expression_test_data_dir + 'SRP254919.contrasts.csv', checkIfExists: true))
-                    .splitCsv ( header:true, sep:',' )
-                    .map{
-                        tuple(it, it.variable, it.reference, it.target)
-                    }
-                    .first()
-                ch_matrix = [
-                    [id: 'test'],
-                    file(expression_test_data_dir + 'SRP254919.samplesheet.csv', checkIfExists: true),
-                    file(expression_test_data_dir + 'SRP254919.salmon.merged.gene_counts.top1000cov.tsv', checkIfExists: true)
-                ]
-
-                input[0] = ch_contrasts
-                input[1] = ch_matrix
-                """
-                }
-            }
-        }
-
-        when {
-            process {
-                """
-                input[0] = PROPR_PROPD.out.adjacency
-                input[1] = [
-                    [id: 'test'],
-                    file(params.modules_testdata_base_path + 'genomics/mus_musculus/gene_set_analysis/mh.all.v2022.1.Mm.symbols.gmt', checkIfExists: true)
-                ]
-                """
-            }
-        }
-
-        then {
-            assertAll(
-                { assert process.success },
-                { assert snapshot(
-                    process.out.results,
-                    process.out.versions
-                ).match()}
-            )
-        }
-    }
 }
diff --git a/modules/nf-core/propr/grea/tests/main.nf.test.snap b/modules/nf-core/propr/grea/tests/main.nf.test.snap
index 5d2fe140505..6c5dd533ed8 100644
--- a/modules/nf-core/propr/grea/tests/main.nf.test.snap
+++ b/modules/nf-core/propr/grea/tests/main.nf.test.snap
@@ -1,28 +1,4 @@
 {
-    "test grea chained to propd - multiprocessor": {
-        "content": [
-            [
-                [
-                    {
-                        "id": "treatment_mCherry_hND6_",
-                        "variable": "treatment",
-                        "reference": "mCherry",
-                        "target": "hND6",
-                        "blocking": ""
-                    },
-                    "treatment_mCherry_hND6_.grea.tsv:md5,724bbb66b9d85291212c357e866fa58f"
-                ]
-            ],
-            [
-                "versions.yml:md5,060fcd8ce4afc482e237fa75686a0aba"
-            ]
-        ],
-        "meta": {
-            "nf-test": "0.9.2",
-            "nextflow": "24.10.2"
-        },
-        "timestamp": "2024-12-11T13:00:20.262161746"
-    },
     "test grea chained to propd": {
         "content": [
             [
@@ -47,4 +23,4 @@
         },
         "timestamp": "2024-12-11T13:00:02.026244403"
     }
-}
\ No newline at end of file
+}

From ec502162eb410d35d8a234546f05132e74934d4e Mon Sep 17 00:00:00 2001
From: Suzanne Jin <suzanne.jin.yao@gmail.com>
Date: Wed, 11 Dec 2024 14:17:12 +0000
Subject: [PATCH 10/13] [propr/grea] update meta.yml

---
 modules/nf-core/propr/grea/meta.yml | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/modules/nf-core/propr/grea/meta.yml b/modules/nf-core/propr/grea/meta.yml
index 8163d82b01c..dca54ebc8f6 100644
--- a/modules/nf-core/propr/grea/meta.yml
+++ b/modules/nf-core/propr/grea/meta.yml
@@ -26,29 +26,34 @@ input:
           [id: 'test', ...]
     - adjacency:
         type: file
-        description: adjacency matrix for gene ratio proportionality/differential proportionality
+        description: adjacency matrix representing the graph connections (ie. 1 for edges, 0 otherwise).
+        This can be the adjacency matrix output from gene ratio approaches like propr/propd.
         pattern: "*.{csv,tsv}"
   - - meta2:
         type: map
         description: |
-          Groovy map containing study-wide metadata related to the knowledge database
+          Groovy Map containing data information.
+          This can be used at the workflow level to pass optional parameters to the module.
+          [id: 'test', ...]
     - gmt:
         type: file
-        description: relational database containing genes and GO terms (generated by
-          mygene module)
+        description: A tab delimited file format that describes gene sets. The first column is the
+        concept id (eg. GO term, pathway, etc), the second column is the concept description, and the
+        rest are nodes (eg. genes) that is associated to the given concept.
         pattern: "*.{gmt}"
 output:
-  - enrichedGO:
+  - results:
       - meta:
-          type: map
+          type: file
           description: |
             Groovy Map containing sample information.
             This can be used at the workflow level to pass optional parameters to the module.
             [id: 'test', ...]
-      - "*.go.tsv":
+      - "*.grea.tsv":
           type: file
-          description: File containing GO terms and their enrichment values
-          pattern: "*.{csv}"
+          description: Output file containing the information about the tested concepts (ie. gene sets)
+          and enrichment statistics.
+          pattern: "*.{tsv}"
   - versions:
       - versions.yml:
           type: file
@@ -57,9 +62,11 @@ output:
   - session_info:
       - "*.R_sessionInfo.log":
           type: file
-          description: R session log
+          description: dump of R SessionInfo
           pattern: "*.R_sessionInfo.log"
 authors:
   - "@caraiz2001"
+  - "@suzannejin"
 maintainers:
   - "@caraiz2001"
+  - "@suzannejin"

From b87f223337780a8067a75084f511675aab116085 Mon Sep 17 00:00:00 2001
From: Suzanne Jin <suzanne.jin.yao@gmail.com>
Date: Wed, 11 Dec 2024 14:22:29 +0000
Subject: [PATCH 11/13] [propr/grea] remove unnecesary config

---
 modules/nf-core/propr/grea/tests/multiprocessor.config | 9 ---------
 1 file changed, 9 deletions(-)
 delete mode 100644 modules/nf-core/propr/grea/tests/multiprocessor.config

diff --git a/modules/nf-core/propr/grea/tests/multiprocessor.config b/modules/nf-core/propr/grea/tests/multiprocessor.config
deleted file mode 100644
index f0b47cf5c53..00000000000
--- a/modules/nf-core/propr/grea/tests/multiprocessor.config
+++ /dev/null
@@ -1,9 +0,0 @@
-process {
-    cpus = 2
-    withName: "PROPR_PROPD"{
-        ext.args = { "--round_digits 5 --save_adjacency true --features_id_col gene_name"}
-    }
-    withName: "PROPR_GREA"{
-        ext.args = { "--permutation 10 --set_min 10 --seed 123 --round_digits 5"}
-    }
-}

From c262b2cdc7be854a6f71d2d29d78b3f2cbea0776 Mon Sep 17 00:00:00 2001
From: Suzanne Jin <suzanne.jin.yao@gmail.com>
Date: Wed, 11 Dec 2024 14:32:51 +0000
Subject: [PATCH 12/13] [propr/grea] solve linting

---
 modules/nf-core/propr/grea/meta.yml | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/modules/nf-core/propr/grea/meta.yml b/modules/nf-core/propr/grea/meta.yml
index dca54ebc8f6..f7872465ff5 100644
--- a/modules/nf-core/propr/grea/meta.yml
+++ b/modules/nf-core/propr/grea/meta.yml
@@ -26,8 +26,9 @@ input:
           [id: 'test', ...]
     - adjacency:
         type: file
-        description: adjacency matrix representing the graph connections (ie. 1 for edges, 0 otherwise).
-        This can be the adjacency matrix output from gene ratio approaches like propr/propd.
+        description: |
+          Adjacency matrix representing the graph connections (ie. 1 for edges, 0 otherwise).
+          This can be the adjacency matrix output from gene ratio approaches like propr/propd.
         pattern: "*.{csv,tsv}"
   - - meta2:
         type: map
@@ -37,9 +38,10 @@ input:
           [id: 'test', ...]
     - gmt:
         type: file
-        description: A tab delimited file format that describes gene sets. The first column is the
-        concept id (eg. GO term, pathway, etc), the second column is the concept description, and the
-        rest are nodes (eg. genes) that is associated to the given concept.
+        description: |
+          A tab delimited file format that describes gene sets. The first column is the
+          concept id (eg. GO term, pathway, etc), the second column is the concept description, and the
+          rest are nodes (eg. genes) that is associated to the given concept.
         pattern: "*.{gmt}"
 output:
   - results:
@@ -51,8 +53,9 @@ output:
             [id: 'test', ...]
       - "*.grea.tsv":
           type: file
-          description: Output file containing the information about the tested concepts (ie. gene sets)
-          and enrichment statistics.
+          description: |
+            Output file containing the information about the tested concepts (ie. gene sets)
+            and enrichment statistics.
           pattern: "*.{tsv}"
   - versions:
       - versions.yml:

From a590997dd41489d0f6e9dc267676024e4256fcdd Mon Sep 17 00:00:00 2001
From: Suzanne Jin <suzanne.jin.yao@gmail.com>
Date: Wed, 11 Dec 2024 15:56:12 +0100
Subject: [PATCH 13/13] Apply suggestions from code review
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Júlia Mir Pedrol <mirp.julia@gmail.com>
---
 modules/nf-core/propr/grea/meta.yml         | 2 +-
 modules/nf-core/propr/grea/templates/grea.R | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/modules/nf-core/propr/grea/meta.yml b/modules/nf-core/propr/grea/meta.yml
index f7872465ff5..cc0613d3dab 100644
--- a/modules/nf-core/propr/grea/meta.yml
+++ b/modules/nf-core/propr/grea/meta.yml
@@ -56,7 +56,7 @@ output:
           description: |
             Output file containing the information about the tested concepts (ie. gene sets)
             and enrichment statistics.
-          pattern: "*.{tsv}"
+          pattern: "*.grea.tsv"
   - versions:
       - versions.yml:
           type: file
diff --git a/modules/nf-core/propr/grea/templates/grea.R b/modules/nf-core/propr/grea/templates/grea.R
index 37832f9734c..3b761f89b1c 100644
--- a/modules/nf-core/propr/grea/templates/grea.R
+++ b/modules/nf-core/propr/grea/templates/grea.R
@@ -176,8 +176,6 @@ if (opt\$permutation < 0) {
     stop('permutation should be a positive integer')
 }
 
-print(opt)
-
 ################################################
 ################################################
 ## Finish loading libraries                   ##