From 513cce4fa042d3bc684e86c603abbb3bf12f3abc Mon Sep 17 00:00:00 2001 From: stemangiola Date: Sat, 20 Jul 2024 14:05:40 +1000 Subject: [PATCH 01/11] use matrix multiplication --- R/methods_SE.R | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/R/methods_SE.R b/R/methods_SE.R index 89edcddf..8cf96454 100755 --- a/R/methods_SE.R +++ b/R/methods_SE.R @@ -188,17 +188,18 @@ setMethod("tidybulk", "RangedSummarizedExperiment", .tidybulk_se) my_counts_scaled = list( - assays(.data) %>% - as.list() %>% - .[[1]] %>% - multiply_by( - rep(multiplier, rep(nrow(.),length(multiplier))) - ) + assay(.data) %*% + diag(multiplier) + ) %>% setNames(value_scaled) + colnames(my_counts_scaled[[1]]) = assay(.data) |> colnames() + + as.list() %>% + .[[1]]colnames() # Add the assay - assays(.data) = assays(.data) %>% c(my_counts_scaled) + assays(.data, withDimnames=FALSE) = assays(.data) %>% c(my_counts_scaled) .data %>% @@ -313,7 +314,7 @@ setMethod("scale_abundance", as.matrix() if(is.null(target_distribution)) target_distribution = preprocessCore::normalize.quantiles.determine.target(.data_norm) - + .data_norm = .data_norm |> preprocessCore::normalize.quantiles.use.target( From ddff6cbbd0ed52a8b581d9e69ee42f50502934cf Mon Sep 17 00:00:00 2001 From: stemangiola Date: Fri, 2 Aug 2024 23:48:46 +1000 Subject: [PATCH 02/11] add scaling argument to DE --- R/functions_SE.R | 21 ++++++--- R/methods.R | 52 ++++++++++----------- R/methods_SE.R | 2 - man/quantile_normalise_abundance-methods.Rd | 6 +-- man/test_differential_abundance-methods.Rd | 2 +- 5 files changed, 44 insertions(+), 39 deletions(-) diff --git a/R/functions_SE.R b/R/functions_SE.R index 262d1468..8a1693fc 100755 --- a/R/functions_SE.R +++ b/R/functions_SE.R @@ -744,9 +744,9 @@ get_differential_transcript_abundance_bulk_SE <- function(.data, # Replace `:` with ___ because it creates error with edgeR if(design |> colnames() |> str_detect(":") |> any()) { message("tidybulk says: the interaction term `:` has been replaced with `___` in the design matrix, in order to work with edgeR.") - colnames(design) = design |> colnames() |> str_replace(":", "___") + colnames(design) = design |> colnames() |> str_replace(":", "___") } - + # Print the design column names in case I want contrasts message( sprintf( @@ -1070,6 +1070,7 @@ get_differential_transcript_abundance_bulk_voom_SE <- function(.data, #' @param .contrasts A character vector. See edgeR makeContrasts specification for the parameter `contrasts`. If contrasts are not present the first covariate is the one the model is tested against (e.g., ~ factor_of_interest) #' @param method A string character. Either "edgeR_quasi_likelihood" (i.e., QLF), "edgeR_likelihood_ratio" (i.e., LRT) #' @param scaling_method A character string. The scaling method passed to the backend function (i.e., edgeR::calcNormFactors; "TMM","TMMwsp","RLE","upperquartile") +#' @param .scaling_factor A tidyeval (column name) for the precalculated TMM scaling #' @param omit_contrast_in_colnames If just one contrast is specified you can choose to omit the contrast label in the colnames. #' @param ... Additional arguments for glmmSeq #' @@ -1085,6 +1086,7 @@ get_differential_transcript_abundance_glmmSeq_SE <- function(.data, test_above_log2_fold_change = NULL, scaling_method = "TMM", + .scaling_factor = NULL, omit_contrast_in_colnames = FALSE, prefix = "", .dispersion = NULL, @@ -1092,6 +1094,7 @@ get_differential_transcript_abundance_glmmSeq_SE <- function(.data, .abundance = enquo(.abundance) .dispersion = enquo(.dispersion) + .scaling_factor = enquo(.scaling_factor) # Check if contrasts are of the same form if( @@ -1145,8 +1148,8 @@ get_differential_transcript_abundance_glmmSeq_SE <- function(.data, object = .formula |> lme4::nobars(), data = metadata ) - - if(quo_is_symbolic(.dispersion)) + + if(.dispersion |> quo_is_symbolic()) dispersion = rowData(.data)[,quo_name(.dispersion),drop=FALSE] |> as_tibble(rownames = feature__$name) |> deframe() else dispersion = counts |> edgeR::estimateDisp(design = design) %$% tagwise.dispersion |> setNames(rownames(counts)) @@ -1161,9 +1164,13 @@ get_differential_transcript_abundance_glmmSeq_SE <- function(.data, dispersion = dispersion[rownames(counts)] # Scaling - sizeFactors <- counts |> edgeR::calcNormFactors(method = scaling_method) - - + if(.scaling_factor |> quo_is_symbolic()) + sizeFactors = .data |> pivot_sample() |> pull(!!.scaling_factor) + else + sizeFactors <- counts |> edgeR::calcNormFactors(method = scaling_method) + + + glmmSeq_object = glmmSeq( .formula, countdata = counts , diff --git a/R/methods.R b/R/methods.R index 4bf025e5..3283a0c3 100755 --- a/R/methods.R +++ b/R/methods.R @@ -594,14 +594,14 @@ setMethod("scale_abundance", "tidybulk", .scale_abundance) #' @details Tranform the feature abundance across samples so to have the same quantile distribution (using preprocessCore). #' #' Underlying method -#' +#' #' If `limma_normalize_quantiles` is chosen -#' +#' #' .data |>limma::normalizeQuantiles() -#' +#' #' If `preprocesscore_normalize_quantiles_use_target` is chosen -#' -#' .data |> +#' +#' .data |> #' preprocessCore::normalize.quantiles.use.target( #' target = preprocessCore::normalize.quantiles.determine.target(.data) #' ) @@ -638,7 +638,7 @@ setGeneric("quantile_normalise_abundance", function(.data, .abundance = NULL, method = "limma_normalize_quantiles", target_distribution = NULL, - + action = "add") { @@ -695,7 +695,7 @@ setGeneric("quantile_normalise_abundance", function(.data, } if(is.null(target_distribution)) target_distribution = preprocessCore::normalize.quantiles.determine.target(.data_norm) - + .data_norm_quant = .data_norm |> preprocessCore::normalize.quantiles.use.target( @@ -1154,14 +1154,14 @@ setGeneric("reduce_dimensions", function(.data, # adjust top for the max number of features I have if(top > .data |> distinct(!!.feature) |> nrow()){ warning(sprintf( - "tidybulk says: the \"top\" argument %s is higher than the number of features %s", - top, + "tidybulk says: the \"top\" argument %s is higher than the number of features %s", + top, .data |> distinct(!!.feature) |> nrow() )) - + top = min(top, .data |> distinct(!!.feature) |> nrow()) } - + # Validate data frame if(do_validate()) { validation(.data, !!.element, !!.feature, !!.abundance) @@ -2604,14 +2604,14 @@ setMethod("ensembl_to_symbol", "tidybulk", .ensembl_to_symbol) #' @param significance_threshold DEPRECATED - A real between 0 and 1 (usually 0.05). #' @param fill_missing_values DEPRECATED - A boolean. Whether to fill missing sample/transcript values with the median of the transcript. This is rarely needed. #' @param .contrasts DEPRECATED - This parameter takes the format of the contrast parameter of the method of choice. For edgeR and limma-voom is a character vector. For DESeq2 is a list including a character vector of length three. The first covariate is the one the model is tested against (e.g., ~ factor_of_interest) -#' @param ... Further arguments passed to some of the internal functions. Currently, it is needed just for internal debug. +#' @param ... Further arguments passed to some of the internal experimental functions. For example for glmmSeq, it is possible to pass .dispersion, and .scaling_factor column tidyeval to skip the caluclation of dispersion and scaling and use precalculated values. This is helpful is you want to calculate those quantities on many genes and do DE testing on fewer genes. .scaling_factor is the TMM value that can be obtained with tidybulk::scale_abundance. #' #' #' @details This function provides the option to use edgeR \url{https://doi.org/10.1093/bioinformatics/btp616}, limma-voom \url{https://doi.org/10.1186/gb-2014-15-2-r29}, limma_voom_sample_weights \url{https://doi.org/10.1093/nar/gkv412} or DESeq2 \url{https://doi.org/10.1186/s13059-014-0550-8} to perform the testing. #' All methods use raw counts, irrespective of if scale_abundance or adjust_abundance have been calculated, therefore it is essential to add covariates such as batch effects (if applicable) in the formula. #' #' Underlying method for edgeR framework: -#' +#' #' .data |> #' #' # Filter @@ -2638,7 +2638,7 @@ setMethod("ensembl_to_symbol", "tidybulk", .ensembl_to_symbol) #' #' #' Underlying method for DESeq2 framework: -#' +#' #' keep_abundant( #' factor_of_interest = !!as.symbol(parse_formula(.formula)[[1]]), #' minimum_counts = minimum_counts, @@ -2657,16 +2657,16 @@ setMethod("ensembl_to_symbol", "tidybulk", .ensembl_to_symbol) #' counts = #' .data %>% #' assay(my_assay) -#' +#' #' # Create design matrix for dispersion, removing random effects #' design = #' model.matrix( #' object = .formula |> lme4::nobars(), #' data = metadata #' ) -#' +#' #' dispersion = counts |> edgeR::estimateDisp(design = design) %$% tagwise.dispersion |> setNames(rownames(counts)) -#' +#' #' glmmSeq( .formula, #' countdata = counts , #' metadata = metadata |> as.data.frame(), @@ -2674,8 +2674,8 @@ setMethod("ensembl_to_symbol", "tidybulk", .ensembl_to_symbol) #' progress = TRUE, #' method = method |> str_remove("(?i)^glmmSeq_" ), #' ) -#' -#' +#' +#' #' @return A consistent object (to the input) with additional columns for the statistics from the test (e.g., log fold change, p-value and false discovery rate). #' #' @@ -3980,12 +3980,12 @@ setGeneric("test_gene_rank", function(.data, # DEPRECATION OF reference function if (is_present(.sample) & !is.null(.sample)) { - + # Signal the deprecation to the user deprecate_warn("1.13.2", "tidybulk::test_gene_rank(.sample = )", details = "The argument .sample is now deprecated and not needed anymore.") } - + # Get column names .arrange_desc = enquo(.arrange_desc) .entrez = enquo(.entrez) @@ -4023,14 +4023,14 @@ setGeneric("test_gene_rank", function(.data, .data |> select(!!.entrez, !!.arrange_desc) |> - distinct() |> - + distinct() |> + # Select one entrez - NEEDED? - with_groups(c(!!.entrez,!!.arrange_desc ), slice, 1) |> + with_groups(c(!!.entrez,!!.arrange_desc ), slice, 1) |> - # arrange + # arrange arrange(desc(!!.arrange_desc)) |> - + # Format deframe() |> entrez_rank_to_gsea(species, gene_collections = gene_sets ) |> diff --git a/R/methods_SE.R b/R/methods_SE.R index 8cf96454..59af5326 100755 --- a/R/methods_SE.R +++ b/R/methods_SE.R @@ -195,8 +195,6 @@ setMethod("tidybulk", "RangedSummarizedExperiment", .tidybulk_se) setNames(value_scaled) colnames(my_counts_scaled[[1]]) = assay(.data) |> colnames() - as.list() %>% - .[[1]]colnames() # Add the assay assays(.data, withDimnames=FALSE) = assays(.data) %>% c(my_counts_scaled) diff --git a/man/quantile_normalise_abundance-methods.Rd b/man/quantile_normalise_abundance-methods.Rd index 5c527cd8..d7938677 100644 --- a/man/quantile_normalise_abundance-methods.Rd +++ b/man/quantile_normalise_abundance-methods.Rd @@ -111,10 +111,10 @@ Underlying method If `limma_normalize_quantiles` is chosen .data |>limma::normalizeQuantiles() - + If `preprocesscore_normalize_quantiles_use_target` is chosen - -.data |> + +.data |> preprocessCore::normalize.quantiles.use.target( target = preprocessCore::normalize.quantiles.determine.target(.data) ) diff --git a/man/test_differential_abundance-methods.Rd b/man/test_differential_abundance-methods.Rd index 01472dbe..d8beb5cf 100755 --- a/man/test_differential_abundance-methods.Rd +++ b/man/test_differential_abundance-methods.Rd @@ -149,7 +149,7 @@ test_differential_abundance( \item{action}{A character string. Whether to join the new information to the input tbl (add), or just get the non-redundant tbl with the new information (get).} -\item{...}{Further arguments passed to some of the internal functions. Currently, it is needed just for internal debug.} +\item{...}{Further arguments passed to some of the internal experimental functions. For example for glmmSeq, it is possible to pass .dispersion, and .scaling_factor column tidyeval to skip the caluclation of dispersion and scaling and use precalculated values. This is helpful is you want to calculate those quantities on many genes and do DE testing on fewer genes. .scaling_factor is the TMM value that can be obtained with tidybulk::scale_abundance.} \item{significance_threshold}{DEPRECATED - A real between 0 and 1 (usually 0.05).} From f39f763b1c1b3e059c7af7ad67c2e72aa8b579d0 Mon Sep 17 00:00:00 2001 From: Stefano Mangiola Date: Tue, 17 Sep 2024 22:10:40 +0300 Subject: [PATCH 03/11] Update DESCRIPTION --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 34c8a527..bad82c8f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: tidybulk Title: Brings transcriptomics to the tidyverse -Version: 1.17.3 +Version: 1.17.4 Authors@R: c(person("Stefano", "Mangiola", email = "mangiolastefano@gmail.com", role = c("aut", "cre")), person("Maria", "Doyle", email = "Maria.Doyle@petermac.org", From a0bdd3ff739b9f05576d6e9154f736b3f95ed127 Mon Sep 17 00:00:00 2001 From: Stefano Mangiola Date: Wed, 16 Oct 2024 20:18:57 +1030 Subject: [PATCH 04/11] fix tests --- R/methods_SE.R | 2 ++ .../testthat/test-bulk_methods_SummarizedExperiment.R | 10 ++++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/R/methods_SE.R b/R/methods_SE.R index 9a260e2e..d3aeac8d 100755 --- a/R/methods_SE.R +++ b/R/methods_SE.R @@ -2895,6 +2895,8 @@ setMethod("describe_transcript", "RangedSummarizedExperiment", .describe_transcr combination_of_factors_of_NON_interest = # Factors se[1,1, drop=FALSE] |> + colData() |> + as_tibble(rownames = ".sample") |> select(...) |> suppressWarnings() |> colnames() |> diff --git a/tests/testthat/test-bulk_methods_SummarizedExperiment.R b/tests/testthat/test-bulk_methods_SummarizedExperiment.R index 84bc5a8f..3aae6428 100755 --- a/tests/testthat/test-bulk_methods_SummarizedExperiment.R +++ b/tests/testthat/test-bulk_methods_SummarizedExperiment.R @@ -747,7 +747,7 @@ test_that("gene over representation",{ species="Homo sapiens" ) - expect_equal( ncol(res), 10 ) + expect_equal( ncol(res), 13 ) @@ -854,8 +854,8 @@ test_that("Only reduced dimensions UMAP - no object",{ test_that("resolve_complete_confounders_of_non_interest",{ - library(tidySummarizedExperiment) - library(tidybulk) + #library(tidySummarizedExperiment) + library(SummarizedExperiment) # Sample annotations sample_annotations <- data.frame( @@ -890,7 +890,9 @@ test_that("resolve_complete_confounders_of_non_interest",{ se |> resolve_complete_confounders_of_non_interest(A, B, C) |> - distinct(.sample, A, B, C) |> + colData() |> + _[, c("A", "B", "C")] |> + as_tibble(rownames = ".sample") |> expect_identical(expected_tibble ) From f62606a891b9cac705cd0fd3c810740dd0c81c2b Mon Sep 17 00:00:00 2001 From: Stefano Mangiola Date: Wed, 16 Oct 2024 20:59:19 +1030 Subject: [PATCH 05/11] fix tests for new Bioconductor --- tests/testthat/test-bulk_methods.R | 10 +++++----- .../testthat/test-bulk_methods_SummarizedExperiment.R | 8 +++++--- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/tests/testthat/test-bulk_methods.R b/tests/testthat/test-bulk_methods.R index 3a32bb11..7343a083 100755 --- a/tests/testthat/test-bulk_methods.R +++ b/tests/testthat/test-bulk_methods.R @@ -84,7 +84,7 @@ test_that("Only scaled counts - no object",{ expect_equal( unique(res$multiplier), c(1.3078113, 1.1929933, 1.9014731, 0.9678922, 1.4771970), - tolerance=1e-3 + tolerance=1e-2 ) expect_equal( @@ -345,7 +345,7 @@ test_that("Only differential trancript abundance - no object",{ ) |> filter(FDR<0.05) |> nrow() |> - expect_equal(169) + expect_equal(171) }) @@ -603,7 +603,7 @@ test_that("Voom with treat method",{ res |> filter(`adj.P.Val___cell_typeb_cell-cell_typemonocyte` < 0.05) |> nrow() |> - expect_equal(293) + expect_equal(294) res |> filter(`adj.P.Val___cell_typeb_cell-cell_typet_cell`<0.05) |> @@ -695,7 +695,7 @@ test_that("DESeq2 differential trancript abundance - no object",{ expect_equal( unique(res$log2FoldChange)[1:4], c(3.449740, 2.459516, 2.433466, 1.951263), - tolerance=1e-3 + tolerance=1e-2 ) expect_equal( @@ -2007,7 +2007,7 @@ test_that("filter abundant with design - no object",{ ) |> filter(.abundant) |> nrow() |> - expect_equal(1965) + expect_equal(1970) diff --git a/tests/testthat/test-bulk_methods_SummarizedExperiment.R b/tests/testthat/test-bulk_methods_SummarizedExperiment.R index 3aae6428..3b9cd8b6 100755 --- a/tests/testthat/test-bulk_methods_SummarizedExperiment.R +++ b/tests/testthat/test-bulk_methods_SummarizedExperiment.R @@ -361,12 +361,14 @@ test_that("differential trancript abundance - SummarizedExperiment",{ as_tibble() |> filter(FDR<0.05) |> nrow() |> - expect_equal(169) + expect_equal(171) }) test_that("differential trancript abundance - SummarizedExperiment - alternative .abundance",{ + library(SummarizedExperiment) + assays(se_mini) = list(counts = assay(se_mini), bla = assay(se_mini)) @@ -440,7 +442,7 @@ test_that("differential trancript abundance - SummarizedExperiment - alternative as_tibble() |> filter(FDR<0.05) |> nrow() |> - expect_equal(169) + expect_equal(171) }) @@ -494,7 +496,7 @@ test_that("Voom with treat method",{ res |> filter(adj.P.Val___Cell.typeb_cell.Cell.typemonocyte < 0.05) |> nrow() |> - expect_equal(293) + expect_equal(294) res |> filter(adj.P.Val___Cell.typeb_cell.Cell.typet_cell < 0.05) |> From c521054cb2e79bdf255cd4a27238924f9cf167ea Mon Sep 17 00:00:00 2001 From: Stefano Mangiola Date: Wed, 16 Oct 2024 21:03:15 +1030 Subject: [PATCH 06/11] version UP --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index bad82c8f..c3675dc4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: tidybulk Title: Brings transcriptomics to the tidyverse -Version: 1.17.4 +Version: 1.17.5 Authors@R: c(person("Stefano", "Mangiola", email = "mangiolastefano@gmail.com", role = c("aut", "cre")), person("Maria", "Doyle", email = "Maria.Doyle@petermac.org", From 54efc9fe72e76ca81498d4fb1d3124da7a7b84a0 Mon Sep 17 00:00:00 2001 From: Stefano Mangiola Date: Thu, 17 Oct 2024 12:24:53 +1030 Subject: [PATCH 07/11] use stop() instead --- DESCRIPTION | 2 +- R/cibersort.R | 9 +- R/functions.R | 223 ++++----------------- R/functions_SE.R | 99 ++------- R/ggplot.R | 6 +- R/glmmSeq.R | 33 +-- R/methods.R | 77 +------ R/methods_SE.R | 132 +++--------- R/utilities.R | 94 ++++----- man/get_reduced_dimensions_UMAP_bulk_SE.Rd | 2 +- man/reduce_dimensions-methods.Rd | 2 +- tests/testthat/test-bulk_methods.R | 7 +- 12 files changed, 162 insertions(+), 524 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index c3675dc4..f19ca1a5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -95,7 +95,7 @@ Biarch: true biocViews: AssayDomain, Infrastructure, RNASeq, DifferentialExpression, GeneExpression, Normalization, Clustering, QualityControl, Sequencing, Transcription, Transcriptomics Encoding: UTF-8 LazyData: true -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 LazyDataCompression: xz URL: https://github.com/stemangiola/tidybulk BugReports: https://github.com/stemangiola/tidybulk/issues diff --git a/R/cibersort.R b/R/cibersort.R index 4694c04a..ad2b905b 100755 --- a/R/cibersort.R +++ b/R/cibersort.R @@ -255,12 +255,9 @@ my_CIBERSORT <- function(Y, X, perm=0, QN=TRUE, cores = 3, exp_transform = FALSE )) Y=Y[,colSums(Y)>0, drop=FALSE] - # Check if package is installed, otherwise install - if (find.package("matrixStats", quiet = TRUE) %>% length %>% equals(0)) { - message("tidybulk says: Installing matrixStats needed for cibersort") - install.packages("matrixStats", repos = "https://cloud.r-project.org") - } - + # Check if 'matrixStats' package is installed, otherwise stop with instructions + check_and_install_packages("matrixStats") + # Eliminate sd == 0 if(length(which(matrixStats::colSds(Y)==0))>0) warning(sprintf( diff --git a/R/functions.R b/R/functions.R index 9dec0e03..137b7ed0 100755 --- a/R/functions.R +++ b/R/functions.R @@ -242,7 +242,6 @@ add_scaled_counts_bulk.calcNormFactor <- function(.data, #' @importFrom magrittr equals #' @importFrom rlang := #' @importFrom stats median -#' @importFrom utils install.packages #' #' @param .data A tibble #' @param .sample The name of the sample column @@ -266,13 +265,9 @@ get_scaled_counts_bulk <- function(.data, .transcript = enquo(.transcript) .abundance = enquo(.abundance) - # Check if package is installed, otherwise install - if (find.package("edgeR", quiet = TRUE) %>% length %>% equals(0)) { - message("tidybulk says: Installing edgeR needed for analyses") - if (!requireNamespace("BiocManager", quietly = TRUE)) - install.packages("BiocManager", repos = "https://cloud.r-project.org") - BiocManager::install("edgeR", ask = FALSE) - } + # Check if 'edgeR' package is installed, otherwise stop with instructions + check_and_install_packages("edgeR") + # Reformat input data set df <- @@ -365,7 +360,6 @@ get_scaled_counts_bulk <- function(.data, #' @import tibble #' @importFrom magrittr set_colnames #' @importFrom stats model.matrix -#' @importFrom utils install.packages #' @importFrom purrr when #' @importFrom rlang inform #' @importFrom tidyr spread @@ -509,15 +503,8 @@ get_differential_transcript_abundance_bulk <- function(.data, ~ limma::makeContrasts(contrasts = .x, levels = design), ~ NULL) - # Check if package is installed, otherwise install - if (find.package("edgeR", quiet = TRUE) %>% length %>% equals(0)) { - message("tidybulk says: Installing edgeR needed for differential transcript abundance analyses") - if (!requireNamespace("BiocManager", quietly = TRUE)) - install.packages("BiocManager", repos = "https://cloud.r-project.org") - BiocManager::install("edgeR", ask = FALSE) - } - - + # Check if 'edgeR' package is installed, otherwise stop with instructions + check_and_install_packages("edgeR") edgeR_object = df_for_edgeR %>% @@ -659,7 +646,6 @@ get_differential_transcript_abundance_bulk <- function(.data, #' @import tibble #' @importFrom magrittr set_colnames #' @importFrom stats model.matrix -#' @importFrom utils install.packages #' @importFrom purrr when #' @importFrom rlang inform #' @importFrom tidyr spread @@ -736,21 +722,8 @@ get_differential_transcript_abundance_glmmSeq <- function(.data, # ~ limma::makeContrasts(contrasts = .x, levels = design), # ~ NULL) - # Check if package is installed, otherwise install - if (find.package("edgeR", quiet = TRUE) %>% length %>% equals(0)) { - message("tidybulk says: Installing edgeR needed for differential transcript abundance analyses") - if (!requireNamespace("BiocManager", quietly = TRUE)) - install.packages("BiocManager", repos = "https://cloud.r-project.org") - BiocManager::install("edgeR", ask = FALSE) - } - - # Check if package is installed, otherwise install - if (find.package("glmmSeq", quiet = TRUE) %>% length %>% equals(0)) { - message("tidybulk says: Installing glmmSeq needed for differential transcript abundance analyses") - if (!requireNamespace("BiocManager", quietly = TRUE)) - install.packages("BiocManager", repos = "https://cloud.r-project.org") - BiocManager::install("glmmSeq", ask = FALSE) - } + # Check if 'edgeR' package is installed, otherwise stop with instructions + check_and_install_packages(c("edgeR","glmmSeq")) metadata = .data |> @@ -840,7 +813,6 @@ get_differential_transcript_abundance_glmmSeq <- function(.data, #' @import tibble #' @importFrom magrittr set_colnames #' @importFrom stats model.matrix -#' @importFrom utils install.packages #' @importFrom purrr when #' @importFrom rlang inform #' @importFrom dplyr arrange @@ -914,13 +886,8 @@ get_differential_transcript_abundance_bulk_voom <- function(.data, ~ limma::makeContrasts(contrasts = .x, levels = design), ~ NULL) - # Check if package is installed, otherwise install - if (find.package("limma", quiet = TRUE) %>% length %>% equals(0)) { - message("tidybulk says: Installing limma needed for differential transcript abundance analyses") - if (!requireNamespace("BiocManager", quietly = TRUE)) - install.packages("BiocManager", repos = "https://cloud.r-project.org") - BiocManager::install("limma", ask = FALSE) - } + # Check if 'limma' package is installed, otherwise stop with instructions + check_and_install_packages("limma") voom_object = df_for_voom %>% @@ -1048,7 +1015,6 @@ get_differential_transcript_abundance_bulk_voom <- function(.data, #' @import tibble #' @importFrom magrittr set_colnames #' @importFrom stats model.matrix -#' @importFrom utils install.packages #' @importFrom purrr when #' @importFrom rlang inform #' @importFrom dplyr mutate_if @@ -1097,13 +1063,8 @@ get_differential_transcript_abundance_deseq2 <- function(.data, omit_contrast_in_colnames = FALSE } - # Check if package is installed, otherwise install - if (find.package("DESeq2", quiet = TRUE) %>% length %>% equals(0)) { - message("tidybulk says: Installing DESeq2 needed for differential transcript abundance analyses") - if (!requireNamespace("BiocManager", quietly = TRUE)) - install.packages("BiocManager", repos = "https://cloud.r-project.org") - BiocManager::install("DESeq2", ask = FALSE) - } + # Check if 'DESeq2' package is installed, otherwise stop with instructions + check_and_install_packages("DESeq2") if (is.null(test_above_log2_fold_change)) { test_above_log2_fold_change <- 0 @@ -1229,7 +1190,6 @@ get_differential_transcript_abundance_deseq2 <- function(.data, #' @import tibble #' @importFrom magrittr set_colnames #' @importFrom stats model.matrix -#' @importFrom utils install.packages #' @importFrom purrr when #' @importFrom purrr map_lgl #' @importFrom stringr str_replace @@ -1267,10 +1227,10 @@ test_differential_cellularity_ <- function(.data, .abundance = enquo(.abundance) - if (find.package("broom", quiet = TRUE) %>% length %>% equals(0)) { - message("tidybulk says: Installing broom needed for analyses") - install.packages("broom", repos = "https://cloud.r-project.org") - } + # if (find.package("broom", quiet = TRUE) %>% length %>% equals(0)) { + # message("tidybulk says: Installing broom needed for analyses") + # install.packages("broom", repos = "https://cloud.r-project.org") + # } deconvoluted = .data %>% @@ -1391,7 +1351,6 @@ test_differential_cellularity_ <- function(.data, #' @import tibble #' @importFrom magrittr set_colnames #' @importFrom stats model.matrix -#' @importFrom utils install.packages #' @importFrom purrr when #' @importFrom purrr map_lgl #' @importFrom stringr str_replace @@ -1483,7 +1442,6 @@ test_stratification_cellularity_ <- function(.data, #' @importFrom magrittr set_colnames #' @importFrom purrr map2_dfr #' @importFrom stats model.matrix -#' @importFrom utils install.packages #' #' #' @param .data A `tbl` (with at least three columns for sample, feature and transcript abundance) or `SummarizedExperiment` (more convenient if abstracted to tibble with library(tidySummarizedExperiment)) @@ -1563,12 +1521,8 @@ test_gene_enrichment_bulk_EGSEA <- function(.data, ~ NULL) # Check if package is installed, otherwise install - if (find.package("EGSEA", quiet = TRUE) %>% length %>% equals(0)) { - stop(" - EGSEA not installed. Please install it. EGSEA requires manual installation to not overwhelm the user in case it is not needed. - BiocManager::install(\"EGSEA\", ask = FALSE) - ") - } + check_and_install_packages("EGSEA") + if (!"EGSEA" %in% (.packages())) { stop("EGSEA package not loaded. Please run library(\"EGSEA\"). With this setup, EGSEA require manual loading, for technical reasons.") } @@ -1797,7 +1751,6 @@ get_clusters_kmeans_bulk <- #' #' @import tibble #' @importFrom rlang := -#' @importFrom utils install.packages #' #' @param .data A tibble #' @param .abundance A column symbol with the value the clustering is based on (e.g., `count`) @@ -1822,19 +1775,9 @@ get_clusters_SNN_bulk <- .feature = enquo(.feature) .abundance = enquo(.abundance) - # Check if package is installed, otherwise install - if (find.package("cluster", quiet = TRUE) %>% length %>% equals(0)) { - message("tidybulk says: Installing cluster") - install.packages("cluster", repos = "https://cloud.r-project.org") - } - if (find.package("Seurat", quiet = TRUE) %>% length %>% equals(0)) { - message("tidybulk says: Installing Seurat") - install.packages("Seurat", repos = "https://cloud.r-project.org") - } - if (find.package("KernSmooth", quiet = TRUE) %>% length %>% equals(0)) { - message("tidybulk says: Installing KernSmooth") - install.packages("KernSmooth", repos = "https://cloud.r-project.org") - } + # Check if required packages are installed, otherwise stop with instructions + check_and_install_packages(c("cluster", "Seurat", "KernSmooth")) + my_df = .data %>% @@ -2140,7 +2083,6 @@ we suggest to partition the dataset for sample clusters. #' @import tibble #' @importFrom rlang := #' @importFrom stats setNames -#' @importFrom utils install.packages #' #' @param .data A tibble #' @param .abundance A column symbol with the value the clustering is based on (e.g., `count`) @@ -2183,11 +2125,8 @@ get_reduced_dimensions_TSNE_bulk <- arguments = arguments %>% c(dims = .dims) - # Check if package is installed, otherwise install - if (find.package("Rtsne", quiet = TRUE) %>% length %>% equals(0)) { - message("tidybulk says: Installing Rtsne") - install.packages("Rtsne", repos = "https://cloud.r-project.org") - } + # Check if 'Rtsne' package is installed, otherwise stop with instructions + check_and_install_packages("Rtsne") # Set perprexity to not be too high if (!"perplexity" %in% names(arguments)) @@ -2253,7 +2192,6 @@ get_reduced_dimensions_TSNE_bulk <- #' @import tibble #' @importFrom rlang := #' @importFrom stats setNames -#' @importFrom utils install.packages #' #' @param .data A tibble #' @param .abundance A column symbol with the value the clustering is based on (e.g., `count`) @@ -2305,11 +2243,8 @@ get_reduced_dimensions_UMAP_bulk <- if (!"init" %in% names(arguments)) arguments = arguments %>% c(init = "spca") - # Check if package is installed, otherwise install - if (find.package("uwot", quiet = TRUE) %>% length %>% equals(0)) { - message("tidybulk says: Installing uwot") - install.packages("uwot", repos = "https://cloud.r-project.org") - } + # Check if 'uwot' package is installed, otherwise stop with instructions + check_and_install_packages("uwot") df_source = .data %>% @@ -2780,11 +2715,8 @@ remove_redundancy_elements_through_correlation <- function(.data, if(.data %>% distinct(!!.element) %>% nrow() <= 1 ) stop("tidybulk says: You must have more than one element (trancripts if of_samples == FALSE) to perform remove_redundancy") - # Check if package is installed, otherwise install - if (find.package("widyr", quiet = TRUE) %>% length %>% equals(0)) { - message("tidybulk says: Installing widyr needed for correlation analyses") - install.packages("widyr", repos = "https://cloud.r-project.org") - } + # Check if 'widyr' package is installed, otherwise stop with instructions + check_and_install_packages("widyr") # Get the redundant data frame .data.correlated = @@ -3106,17 +3038,8 @@ run_llsr = function(mix, reference = X_cibersort, intercept= TRUE) { #' run_epic = function(mix, reference = NULL) { - # Check if package is installed, otherwise install - if (find.package("devtools", quiet = TRUE) %>% length %>% equals(0)) { - message("tidybulk says: Installing class needed for EPIC") - install.packages("devtools", repos = "https://cloud.r-project.org", dependencies = c("Depends", "Imports")) - } - - # Check if package is installed, otherwise install - if (find.package("EPIC", quiet = TRUE) %>% length %>% equals(0)) { - message("tidybulk says: Installing class needed for EPIC") - devtools::install_github("GfellerLab/EPIC") - } + # Check if 'EPIC' package is installed, otherwise stop with instructions + check_and_install_packages("EPIC") if("EPIC" %in% .packages() %>% not) stop("tidybulk says: Please install and then load the package EPIC manually (i.e. library(EPIC)). This is because EPIC is not in Bioconductor or CRAN so it is not possible to seamlessly make EPIC part of the dependencies.") @@ -3157,7 +3080,6 @@ run_epic = function(mix, reference = NULL) { #' @importFrom stats setNames #' @importFrom rlang dots_list #' @importFrom magrittr equals -#' @importFrom utils install.packages #' #' @param .data A tibble #' @param .sample The name of the sample column @@ -3204,27 +3126,10 @@ get_cell_type_proportions = function(.data, # Execute do.call because I have to deal with ... method %>% tolower %>% equals("cibersort") ~ { - - # Check if package is installed, otherwise install - if (find.package("class", quiet = TRUE) %>% length %>% equals(0)) { - message("tidybulk says: Installing class needed for Cibersort") - install.packages("class", repos = "https://cloud.r-project.org", dependencies = c("Depends", "Imports")) - } - - # Check if package is installed, otherwise install - if (find.package("e1071", quiet = TRUE) %>% length %>% equals(0)) { - message("tidybulk says: Installing e1071 needed for Cibersort") - install.packages("e1071", repos = "https://cloud.r-project.org", dependencies = c("Depends", "Imports")) - } - - # Check if package is installed, otherwise install - if (find.package("preprocessCore", quiet = TRUE) %>% length %>% equals(0)) { - message("tidybulk says: Installing preprocessCore needed for Cibersort") - if (!requireNamespace("BiocManager", quietly = TRUE)) - install.packages("BiocManager", repos = "https://cloud.r-project.org") - BiocManager::install("preprocessCore", ask = FALSE) - - } + + # Check if 'preprocessCore' package is installed, otherwise stop with instructions + check_and_install_packages(c("class", "e1071", "preprocessCore")) + # Choose reference reference = reference %>% when(is.null(.) ~ X_cibersort, ~ .) @@ -3267,6 +3172,9 @@ get_cell_type_proportions = function(.data, method %>% tolower %in% c("mcp_counter", "quantiseq", "xcell") ~ { # # Check if package is installed, otherwise install + check_and_install_packages(c("SummarizedExperiment", "S4Vectors")) + + if (find.package("immunedeconv", quiet = TRUE) %>% length %>% equals(0)) { message("tidybulk says: Installing immunedeconv") devtools::install_github("icbi-lab/immunedeconv", upgrade = FALSE) @@ -3311,7 +3219,6 @@ get_cell_type_proportions = function(.data, #' @importFrom magrittr set_colnames #' @importFrom stats model.matrix #' @importFrom stats as.formula -#' @importFrom utils install.packages #' @importFrom stats rnorm #' @importFrom stringr str_c #' @@ -3341,14 +3248,8 @@ get_adjusted_counts_for_unwanted_variation_bulk <- function(.data, .factor_of_interest = enquo(.factor_of_interest) .factor_unwanted = enquo(.factor_unwanted) - # Check if package is installed, otherwise install - if (find.package("sva", quiet = TRUE) %>% length %>% equals(0)) { - message("tidybulk says: Installing sva - Combat needed for adjustment for unwanted variation") - if (!requireNamespace("BiocManager", quietly = TRUE)) - install.packages("BiocManager", repos = "https://cloud.r-project.org") - BiocManager::install("sva", ask = FALSE) - } - + # Check if 'sva' package is installed, otherwise stop with instructions + check_and_install_packages("sva") # New column name value_adjusted = as.symbol(sprintf("%s%s", quo_name(.abundance), adjusted_string)) @@ -3461,7 +3362,7 @@ get_adjusted_counts_for_unwanted_variation_bulk <- function(.data, adjusted_df = mat |> - edgeR::cpm(log = T) |> + edgeR::cpm(log = TRUE) |> limma::removeBatchEffect( design = design, covariates = unwanted_covariate_matrix, @@ -3582,19 +3483,9 @@ tidybulk_to_SummarizedExperiment = function(.data, .transcript = col_names$.transcript .abundance = col_names$.abundance - # Check if package is installed, otherwise install - if (find.package("SummarizedExperiment", quiet = TRUE) %>% length %>% equals(0)) { - message("tidybulk says: Installing SummarizedExperiment") - if (!requireNamespace("BiocManager", quietly = TRUE)) - install.packages("BiocManager", repos = "https://cloud.r-project.org") - BiocManager::install("SummarizedExperiment", ask = FALSE) - } - if (find.package("S4Vectors", quiet = TRUE) %>% length %>% equals(0)) { - message("tidybulk says: Installing S4Vectors") - if (!requireNamespace("BiocManager", quietly = TRUE)) - install.packages("BiocManager", repos = "https://cloud.r-project.org") - BiocManager::install("S4Vectors", ask = FALSE) - } + # Check if 'SummarizedExperiment' and 'S4Vectors' packages are installed, otherwise stop with instructions + check_and_install_packages(c("SummarizedExperiment", "S4Vectors")) + # If present get the scaled abundance .abundance_scaled = .data %>% @@ -3660,7 +3551,6 @@ tidybulk_to_SummarizedExperiment = function(.data, #' @importFrom magrittr set_colnames #' @importFrom stats model.matrix #' @importFrom stats as.formula -#' @importFrom utils install.packages #' @importFrom tidyr complete #' @importFrom rlang quo_is_symbol #' @@ -4008,16 +3898,7 @@ entrez_over_to_gsea = function(my_entrez_rank, species, gene_collections = NULL # https://yulab-smu.github.io/clusterProfiler-book/chapter5.html # Check if package is installed, otherwise install - if (find.package("fastmatch", quiet = TRUE) %>% length %>% equals(0)) { - message("tidybulk says: Installing fastmatch needed for analyses") - install.packages("fastmatch", repos = "https://cloud.r-project.org") - } - - if (find.package("clusterProfiler", quiet = TRUE) %>% length %>% equals(0)) { - message("clusterProfiler not installed. Installing.") - BiocManager::install("clusterProfiler", ask = FALSE) - } - + check_and_install_packages(c("fastmatch", "clusterProfiler")) @@ -4071,25 +3952,7 @@ entrez_rank_to_gsea = function(my_entrez_rank, species, gene_collections = NULL # https://yulab-smu.github.io/clusterProfiler-book/chapter5.html # Check if package is installed, otherwise install - if (find.package("fastmatch", quiet = TRUE) %>% length %>% equals(0)) { - message("tidybulk says: Installing fastmatch needed for analyses") - install.packages("fastmatch", repos = "https://cloud.r-project.org") - } - - if (find.package("clusterProfiler", quiet = TRUE) %>% length %>% equals(0)) { - message("tidybulk says: clusterProfiler not installed. Installing.") - BiocManager::install("clusterProfiler", ask = FALSE) - } - - if (find.package("enrichplot", quiet = TRUE) %>% length %>% equals(0)) { - message("tidybulk says: enrichplot not installed. tidybulk says: Installing.") - BiocManager::install("enrichplot", ask = FALSE) - } - - if (find.package("ggplot2", quiet = TRUE) %>% length %>% equals(0)) { - message("tidybulk says: Installing ggplot2 needed for analyses") - install.packages("ggplot2", repos = "https://cloud.r-project.org") - } + check_and_install_packages(c("fastmatch", "clusterProfiler", "enrichplot", "ggplot2")) # Get gene sets signatures if(is.null(gene_collections ) ) diff --git a/R/functions_SE.R b/R/functions_SE.R index ca94af1f..a8dc9999 100755 --- a/R/functions_SE.R +++ b/R/functions_SE.R @@ -59,7 +59,6 @@ get_clusters_kmeans_bulk_SE <- #' #' @import tibble #' @importFrom rlang := -#' @importFrom utils install.packages #' #' @param .data A tibble #' @param .abundance A column symbol with the value the clustering is based on (e.g., `count`) @@ -79,18 +78,7 @@ get_clusters_SNN_bulk_SE <- # Check if package is installed, otherwise install - if (find.package("cluster", quiet = TRUE) %>% length %>% equals(0)) { - message("Installing cluster") - install.packages("cluster", repos = "https://cloud.r-project.org") - } - if (find.package("Seurat", quiet = TRUE) %>% length %>% equals(0)) { - message("Installing Seurat") - install.packages("Seurat", repos = "https://cloud.r-project.org") - } - if (find.package("KernSmooth", quiet = TRUE) %>% length %>% equals(0)) { - message("Installing KernSmooth") - install.packages("KernSmooth", repos = "https://cloud.r-project.org") - } + check_and_install_packages(c("cluster", "Seurat", "KernSmooth")) ndims = min(c(nrow(.data), ncol(.data), 30))-1 @@ -314,7 +302,6 @@ we suggest to partition the dataset for sample clusters. #' @import tibble #' @importFrom rlang := #' @importFrom stats setNames -#' @importFrom utils install.packages #' #' @param .data A tibble #' @param .abundance A column symbol with the value the clustering is based on (e.g., `count`) @@ -354,10 +341,8 @@ get_reduced_dimensions_TSNE_bulk_SE <- # Check if package is installed, otherwise install - if (find.package("Rtsne", quiet = TRUE) %>% length %>% equals(0)) { - message("Installing Rtsne") - install.packages("Rtsne", repos = "https://cloud.r-project.org") - } + check_and_install_packages("Rtsne") + # Set perprexity to not be too high if (!"perplexity" %in% names(arguments)) @@ -397,7 +382,6 @@ get_reduced_dimensions_TSNE_bulk_SE <- #' @import tibble #' @importFrom rlang := #' @importFrom stats setNames -#' @importFrom utils install.packages #' #' @param .data A tibble #' @param .abundance A column symbol with the value the clustering is based on (e.g., `count`) @@ -437,10 +421,7 @@ get_reduced_dimensions_UMAP_bulk_SE <- # Check if package is installed, otherwise install - if (find.package("uwot", quiet = TRUE) %>% length %>% equals(0)) { - message("tidybulk says: Installing uwot") - install.packages("uwot", repos = "https://cloud.r-project.org") - } + check_and_install_packages("uwot") # Calculate based on PCA @@ -584,10 +565,8 @@ remove_redundancy_elements_through_correlation_SE <- function(.data, . = NULL # Check if package is installed, otherwise install - if (find.package("widyr", quiet = TRUE) %>% length %>% equals(0)) { - message("Installing widyr needed for correlation analyses") - install.packages("widyr", repos = "https://cloud.r-project.org") - } + check_and_install_packages("widyr") + # Get the redundant data frame .data %>% @@ -699,7 +678,6 @@ remove_redundancy_elements_though_reduced_dimensions_SE <- #' @import tibble #' @importFrom magrittr set_colnames #' @importFrom stats model.matrix -#' @importFrom utils install.packages #' @importFrom purrr when #' @importFrom magrittr extract2 #' @@ -762,12 +740,7 @@ get_differential_transcript_abundance_bulk_SE <- function(.data, ~ NULL) # Check if package is installed, otherwise install - if (find.package("edgeR", quiet = TRUE) %>% length %>% equals(0)) { - message("Installing edgeR needed for differential transcript abundance analyses") - if (!requireNamespace("BiocManager", quietly = TRUE)) - install.packages("BiocManager", repos = "https://cloud.r-project.org") - BiocManager::install("edgeR", ask = FALSE) - } + check_and_install_packages("edgeR") # If no assay is specified take first my_assay = ifelse( @@ -880,7 +853,6 @@ get_differential_transcript_abundance_bulk_SE <- function(.data, #' @import tibble #' @importFrom magrittr set_colnames #' @importFrom stats model.matrix -#' @importFrom utils install.packages #' @importFrom purrr when #' #' @@ -928,19 +900,17 @@ get_differential_transcript_abundance_bulk_voom_SE <- function(.data, ) ) + # Check if package is installed, otherwise install + check_and_install_packages("limma") + + my_contrasts = .contrasts %>% ifelse_pipe(length(.) > 0, ~ limma::makeContrasts(contrasts = .x, levels = design), ~ NULL) - # Check if package is installed, otherwise install - if (find.package("limma", quiet = TRUE) %>% length %>% equals(0)) { - message("Installing limma needed for differential transcript abundance analyses") - if (!requireNamespace("BiocManager", quietly = TRUE)) - install.packages("BiocManager", repos = "https://cloud.r-project.org") - BiocManager::install("limma", ask = FALSE) - } + # If no assay is specified take first my_assay = ifelse( @@ -1061,7 +1031,6 @@ get_differential_transcript_abundance_bulk_voom_SE <- function(.data, #' @import tibble #' @importFrom magrittr set_colnames #' @importFrom stats model.matrix -#' @importFrom utils install.packages #' @importFrom purrr when #' #' @@ -1118,12 +1087,7 @@ get_differential_transcript_abundance_glmmSeq_SE <- function(.data, # } # Check if package is installed, otherwise install - if (find.package("glmmSeq", quiet = TRUE) %>% length %>% equals(0)) { - message("tidybulk says: Installing glmmSeq needed for differential transcript abundance analyses") - if (!requireNamespace("BiocManager", quietly = TRUE)) - install.packages("BiocManager", repos = "https://cloud.r-project.org") - BiocManager::install("glmmSeq", ask = FALSE) - } + check_and_install_packages("glmmSeq") # If no assay is specified take first my_assay = ifelse( @@ -1225,7 +1189,6 @@ get_differential_transcript_abundance_glmmSeq_SE <- function(.data, #' @import tibble #' @importFrom magrittr set_colnames #' @importFrom stats model.matrix -#' @importFrom utils install.packages #' @importFrom purrr when #' #' @@ -1268,12 +1231,8 @@ get_differential_transcript_abundance_deseq2_SE <- function(.data, } # Check if package is installed, otherwise install - if (find.package("DESeq2", quiet = TRUE) %>% length %>% equals(0)) { - message("Installing DESeq2 needed for differential transcript abundance analyses") - if (!requireNamespace("BiocManager", quietly = TRUE)) - install.packages("BiocManager", repos = "https://cloud.r-project.org") - BiocManager::install("DESeq2", ask = FALSE) - } + check_and_install_packages("DESeq2") + if (is.null(test_above_log2_fold_change)) { test_above_log2_fold_change <- 0 @@ -1396,15 +1355,8 @@ multivariable_differential_tissue_composition_SE = function( when( grepl("Surv", .my_formula) %>% any ~ { # Check if package is installed, otherwise install - if (find.package("survival", quiet = TRUE) %>% length %>% equals(0)) { - message("Installing betareg needed for analyses") - install.packages("survival", repos = "https://cloud.r-project.org") - } - - if (find.package("boot", quiet = TRUE) %>% length %>% equals(0)) { - message("Installing boot needed for analyses") - install.packages("boot", repos = "https://cloud.r-project.org") - } + check_and_install_packages(c("survival", "boot")) + (.) %>% survival::coxph(.my_formula, .) %>% @@ -1466,15 +1418,8 @@ univariable_differential_tissue_composition_SE = function( when( grepl("Surv", .my_formula) %>% any ~ { # Check if package is installed, otherwise install - if (find.package("survival", quiet = TRUE) %>% length %>% equals(0)) { - message("Installing betareg needed for analyses") - install.packages("survival", repos = "https://cloud.r-project.org") - } - - if (find.package("boot", quiet = TRUE) %>% length %>% equals(0)) { - message("Installing boot needed for analyses") - install.packages("boot", repos = "https://cloud.r-project.org") - } + check_and_install_packages(c("survival", "boot")) + (.) %>% mutate(.proportion_0_corrected = .proportion_0_corrected %>% boot::logit()) %>% @@ -1484,10 +1429,8 @@ univariable_differential_tissue_composition_SE = function( } , ~ { # Check if package is installed, otherwise install - if (find.package("betareg", quiet = TRUE) %>% length %>% equals(0)) { - message("Installing betareg needed for analyses") - install.packages("betareg", repos = "https://cloud.r-project.org") - } + check_and_install_packages("betareg") + (.) %>% betareg::betareg(.my_formula, .) %>% broom::tidy() %>% diff --git a/R/ggplot.R b/R/ggplot.R index 489c3f62..70882635 100644 --- a/R/ggplot.R +++ b/R/ggplot.R @@ -52,11 +52,7 @@ log10_reverse_trans <- function() { #' @export logit_trans <- function(){ - - if (find.package("functional", quiet = TRUE) %>% length %>% equals(0)) { - message("Installing functional needed for analyses") - install.packages("functional", repos = "https://cloud.r-project.org") - } + check_and_install_packages("functional") trans <- qlogis inv <- plogis diff --git a/R/glmmSeq.R b/R/glmmSeq.R index 9eb0b9db..630c551e 100644 --- a/R/glmmSeq.R +++ b/R/glmmSeq.R @@ -184,7 +184,7 @@ glmmTMB_to_confidence_intervals_random_effects = function(fit){ pivot_longer(-group_id, names_to = "parameter", values_to = "CI") ) - mod = glmmTMB::ranef(fit, condVar=T)$cond + mod = glmmTMB::ranef(fit, condVar=TRUE)$cond mod = map2_dfr( mod, names(mod), ~ .x |> @@ -230,7 +230,7 @@ lmer_to_confidence_intervals_random_effects = function(fit){ pivot_longer(-group_id, names_to = "parameter", values_to = "CI") ) - mod = lme4::ranef(fit, condVar=T) + mod = lme4::ranef(fit, condVar=TRUE) mod = map2_dfr( mod, names(mod), ~ .x |> @@ -581,12 +581,7 @@ glmmSeq = function (modelFormula, countdata, metadata, id = NULL, dispersion = N if (progress) { # Check if package is installed, otherwise install - if (find.package("pblapply", quiet = TRUE) %>% length %>% equals(0)) { - message("tidybulk says: Installing pblapply needed for differential transcript abundance analyses") - if (!requireNamespace("BiocManager", quietly = TRUE)) - install.packages("BiocManager", repos = "https://cloud.r-project.org") - BiocManager::install("pblapply", ask = FALSE) - } + check_and_install_packages("pbapply") resultList <- pbapply::pblapply(fullList, function(geneList) { args <- c(list(geneList = geneList, fullFormula = fullFormula, @@ -635,12 +630,7 @@ glmmSeq = function (modelFormula, countdata, metadata, id = NULL, dispersion = N else if (progress) { # Check if package is installed, otherwise install - if (find.package("pbmcapply", quiet = TRUE) %>% length %>% equals(0)) { - message("tidybulk says: Installing pbmcapply needed for differential transcript abundance analyses") - if (!requireNamespace("BiocManager", quietly = TRUE)) - install.packages("BiocManager", repos = "https://cloud.r-project.org") - BiocManager::install("pbmcapply", ask = FALSE) - } + check_and_install_packages("pbmcapply") resultList <- pbmcapply::pbmclapply(fullList, function(geneList) { glmerCore(geneList, fullFormula, reduced, @@ -688,12 +678,8 @@ glmmSeq = function (modelFormula, countdata, metadata, id = NULL, dispersion = N if (progress) { # Check if package is installed, otherwise install - if (find.package("pblapply", quiet = TRUE) %>% length %>% equals(0)) { - message("tidybulk says: Installing pblapply needed for differential transcript abundance analyses") - if (!requireNamespace("BiocManager", quietly = TRUE)) - install.packages("BiocManager", repos = "https://cloud.r-project.org") - BiocManager::install("pblapply", ask = FALSE) - } + check_and_install_packages("pbapply") + resultList <- pbapply::pblapply(fullList, function(geneList) { args <- c(list(geneList = geneList, fullFormula = fullFormula, @@ -721,12 +707,7 @@ glmmSeq = function (modelFormula, countdata, metadata, id = NULL, dispersion = N if (progress) { # Check if package is installed, otherwise install - if (find.package("pbmcapply", quiet = TRUE) %>% length %>% equals(0)) { - message("tidybulk says: Installing pbmcapply needed for differential transcript abundance analyses") - if (!requireNamespace("BiocManager", quietly = TRUE)) - install.packages("BiocManager", repos = "https://cloud.r-project.org") - BiocManager::install("pbmcapply", ask = FALSE) - } + check_and_install_packages("pbmcapply") resultList <- pbmcapply::pbmclapply(fullList, function(geneList) { glmmTMBcore(geneList, fullFormula, reduced, diff --git a/R/methods.R b/R/methods.R index 4dab28a3..28093457 100755 --- a/R/methods.R +++ b/R/methods.R @@ -150,18 +150,7 @@ setGeneric("as_SummarizedExperiment", function(.data, .abundance = col_names$.abundance # Check if package is installed, otherwise install - if (find.package("SummarizedExperiment", quiet = TRUE) |> length() |> equals(0)) { - message("Installing SummarizedExperiment") - if (!requireNamespace("BiocManager", quietly = TRUE)) - install.packages("BiocManager", repos = "https://cloud.r-project.org") - BiocManager::install("SummarizedExperiment", ask = FALSE) - } - if (find.package("S4Vectors", quiet = TRUE) |> length() %>% equals(0)) { - message("Installing S4Vectors") - if (!requireNamespace("BiocManager", quietly = TRUE)) - install.packages("BiocManager", repos = "https://cloud.r-project.org") - BiocManager::install("S4Vectors", ask = FALSE) - } + check_and_install_packages(c("SummarizedExperiment", "S4Vectors")) # If present get the scaled abundance .abundance_scaled = @@ -673,12 +662,8 @@ setGeneric("quantile_normalise_abundance", function(.data, if(tolower(method) == "limma_normalize_quantiles"){ # Check if package is installed, otherwise install - if (find.package("limma", quiet = TRUE) %>% length %>% equals(0)) { - message("tidybulk says: Installing limma needed for analyses") - if (!requireNamespace("BiocManager", quietly = TRUE)) - install.packages("BiocManager", repos = "https://cloud.r-project.org") - BiocManager::install("limma", ask = FALSE) - } + check_and_install_packages("limma") + .data_norm = .data_norm |> @@ -687,12 +672,7 @@ setGeneric("quantile_normalise_abundance", function(.data, else if(tolower(method) == "preprocesscore_normalize_quantiles_use_target"){ # Check if package is installed, otherwise install - if (find.package("preprocessCore", quiet = TRUE) %>% length %>% equals(0)) { - message("tidybulk says: Installing preprocessCore needed for analyses") - if (!requireNamespace("BiocManager", quietly = TRUE)) - install.packages("BiocManager", repos = "https://cloud.r-project.org") - BiocManager::install("preprocessCore", ask = FALSE) - } + check_and_install_packages("preprocessCore") if(is.null(target_distribution)) target_distribution = preprocessCore::normalize.quantiles.determine.target(.data_norm) @@ -2298,12 +2278,7 @@ symbol_to_entrez = function(.data, .transcript = col_names$.transcript # Check if package is installed, otherwise install - if (find.package("org.Hs.eg.db", quiet = TRUE) |> length() |> equals(0)) { - message("Installing org.Hs.eg.db needed for annotation") - if (!requireNamespace("BiocManager", quietly = TRUE)) - install.packages("BiocManager", repos = "https://cloud.r-project.org") - BiocManager::install("org.Hs.eg.db", ask = FALSE) - } + check_and_install_packages("org.Hs.eg.db") .data |> @@ -2367,28 +2342,7 @@ setGeneric("describe_transcript", function(.data, # Check if package is installed, otherwise install - if (find.package("org.Hs.eg.db", quiet = TRUE) |> length() |> equals(0)) { - message("Installing org.Hs.eg.db needed for differential transcript abundance analyses") - if (!requireNamespace("BiocManager", quietly = TRUE)) - install.packages("BiocManager", repos = "https://cloud.r-project.org") - BiocManager::install("org.Hs.eg.db", ask = FALSE) - } - - # Check if package is installed, otherwise install - if (find.package("org.Mm.eg.db", quiet = TRUE) |> length() |> equals(0)) { - message("Installing org.Mm.eg.db needed for differential transcript abundance analyses") - if (!requireNamespace("BiocManager", quietly = TRUE)) - install.packages("BiocManager", repos = "https://cloud.r-project.org") - BiocManager::install("org.Mm.eg.db", ask = FALSE) - } - - # Check if package is installed, otherwise install - if (find.package("AnnotationDbi", quiet = TRUE) |> length() |> equals(0)) { - message("Installing AnnotationDbi needed for differential transcript abundance analyses") - if (!requireNamespace("BiocManager", quietly = TRUE)) - install.packages("BiocManager", repos = "https://cloud.r-project.org") - BiocManager::install("AnnotationDbi", ask = FALSE) - } + check_and_install_packages(c("org.Hs.eg.db", "org.Mm.eg.db", "AnnotationDbi")) description_df = @@ -3223,12 +3177,7 @@ setGeneric("identify_abundant", function(.data, # Check if package is installed, otherwise install - if (find.package("edgeR", quiet = TRUE) %>% length %>% equals(0)) { - message("Installing edgeR needed for differential transcript abundance analyses") - if (!requireNamespace("BiocManager", quietly = TRUE)) - install.packages("BiocManager", repos = "https://cloud.r-project.org") - BiocManager::install("edgeR", ask = FALSE) - } + check_and_install_packages("edgeR") # If character fail if( @@ -3813,10 +3762,8 @@ setGeneric("test_gene_overrepresentation", function(.data, # Check packages msigdbr # Check if package is installed, otherwise install - if (find.package("msigdbr", quiet = TRUE) |> length() |> equals(0)) { - message("msigdbr not installed. Installing.") - BiocManager::install("msigdbr", ask = FALSE) - } + check_and_install_packages("msigdbr") + # Check is correct species name if(species %in% msigdbr::msigdbr_species()$species_name |> not()) @@ -4000,10 +3947,8 @@ setGeneric("test_gene_rank", function(.data, # Check packages msigdbr # Check if package is installed, otherwise install - if (find.package("msigdbr", quiet = TRUE) |> length() |> equals(0)) { - message("msigdbr not installed. Installing.") - BiocManager::install("msigdbr", ask = FALSE) - } + check_and_install_packages("msigdbr") + # Check is correct species name if(species %in% msigdbr::msigdbr_species()$species_name |> not()) diff --git a/R/methods_SE.R b/R/methods_SE.R index d3aeac8d..0d14b7e9 100755 --- a/R/methods_SE.R +++ b/R/methods_SE.R @@ -8,13 +8,8 @@ . = NULL # Check if package is installed, otherwise install - if (find.package("SummarizedExperiment", quiet = TRUE) %>% length %>% equals(0)) { - message("Installing SummarizedExperiment") - if (!requireNamespace("BiocManager", quietly = TRUE)) - install.packages("BiocManager", repos = "https://cloud.r-project.org") - BiocManager::install("SummarizedExperiment", ask = FALSE) - } - + check_and_install_packages("SummarizedExperiment") + # Make col names .sample = enquo(.sample) .transcript = enquo(.transcript) @@ -99,12 +94,8 @@ setMethod("tidybulk", "RangedSummarizedExperiment", .tidybulk_se) . = NULL # Check if package is installed, otherwise install - if (find.package("edgeR", quiet = TRUE) %>% length %>% equals(0)) { - message("Installing edgeR needed for analyses") - if (!requireNamespace("BiocManager", quietly = TRUE)) - install.packages("BiocManager", repos = "https://cloud.r-project.org") - BiocManager::install("edgeR", ask = FALSE) - } + check_and_install_packages("edgeR") + # DEPRECATION OF reference function if (is_present(reference_selection_function) & !is.null(reference_selection_function)) { @@ -281,12 +272,8 @@ setMethod("scale_abundance", else if(tolower(method) == "limma_normalize_quantiles"){ # Check if package is installed, otherwise install - if (find.package("limma", quiet = TRUE) %>% length %>% equals(0)) { - message("tidybulk says: Installing limma needed for analyses") - if (!requireNamespace("BiocManager", quietly = TRUE)) - install.packages("BiocManager", repos = "https://cloud.r-project.org") - BiocManager::install("limma", ask = FALSE) - } + check_and_install_packages("limma") + .data_norm <- .data %>% @@ -299,12 +286,8 @@ setMethod("scale_abundance", else if(tolower(method) == "preprocesscore_normalize_quantiles_use_target"){ # Check if package is installed, otherwise install - if (find.package("preprocessCore", quiet = TRUE) %>% length %>% equals(0)) { - message("tidybulk says: Installing preprocessCore needed for analyses") - if (!requireNamespace("BiocManager", quietly = TRUE)) - install.packages("BiocManager", repos = "https://cloud.r-project.org") - BiocManager::install("preprocessCore", ask = FALSE) - } + check_and_install_packages("preprocessCore") + .data_norm = .data |> @@ -821,12 +804,7 @@ setMethod("remove_redundancy", .abundance = enquo(.abundance) # Check if package is installed, otherwise install - if (find.package("sva", quiet = TRUE) %>% length %>% equals(0)) { - message("Installing sva - Combat needed for adjustment for unwanted variation") - if (!requireNamespace("BiocManager", quietly = TRUE)) - install.packages("BiocManager", repos = "https://cloud.r-project.org") - BiocManager::install("sva", ask = FALSE) - } + check_and_install_packages("sva") # DEPRECATION OF log_transform @@ -949,7 +927,7 @@ setMethod("remove_redundancy", my_assay_adjusted = .data |> assay(my_assay) |> - edgeR::cpm(log = T) |> + edgeR::cpm(log = TRUE) |> limma::removeBatchEffect( design = design, covariates = unwanted_covariate_matrix, @@ -1240,25 +1218,7 @@ setMethod("aggregate_duplicates", method %>% tolower %>% equals("cibersort") ~ { # Check if package is installed, otherwise install - if (find.package("class", quiet = TRUE) %>% length %>% equals(0)) { - message("Installing class needed for Cibersort") - install.packages("class", repos = "https://cloud.r-project.org", dependencies = c("Depends", "Imports")) - } - - # Check if package is installed, otherwise install - if (find.package("e1071", quiet = TRUE) %>% length %>% equals(0)) { - message("Installing e1071 needed for Cibersort") - install.packages("e1071", repos = "https://cloud.r-project.org", dependencies = c("Depends", "Imports")) - } - - # Check if package is installed, otherwise install - if (find.package("preprocessCore", quiet = TRUE) %>% length %>% equals(0)) { - message("Installing preprocessCore needed for Cibersort") - if (!requireNamespace("BiocManager", quietly = TRUE)) - install.packages("BiocManager", repos = "https://cloud.r-project.org") - BiocManager::install("preprocessCore", ask = FALSE) - - } + check_and_install_packages(c("class", "e1071", "preprocessCore")) # Choose reference reference = reference %>% when(is.null(.) ~ X_cibersort, ~ .) @@ -1301,11 +1261,8 @@ setMethod("aggregate_duplicates", method %>% tolower %in% c("mcp_counter", "quantiseq", "xcell") ~ { # Check if package is installed, otherwise install - if (find.package("immunedeconv", quiet = TRUE) %>% length %>% equals(0)) { - message("Installing immunedeconv") - devtools::install_github("icbi-lab/immunedeconv", upgrade = FALSE) - } - + check_and_install_packages("immunedeconv") + if(method %in% c("mcp_counter", "quantiseq", "xcell") & !"immunedeconv" %in% (.packages())) stop("tidybulk says: for xcell, mcp_counter, or quantiseq deconvolution you should have the package immunedeconv attached. Please execute library(immunedeconv)") @@ -1702,12 +1659,8 @@ setMethod("keep_variable", # Check if package is installed, otherwise install - if (find.package("edgeR", quiet = TRUE) %>% length %>% equals(0)) { - message("Installing edgeR needed for analyses") - if (!requireNamespace("BiocManager", quietly = TRUE)) - install.packages("BiocManager", repos = "https://cloud.r-project.org") - BiocManager::install("edgeR", ask = FALSE) - } + check_and_install_packages("edgeR") + # If no assay is specified take first my_assay = ifelse( @@ -1922,12 +1875,8 @@ setMethod("keep_abundant", ) # Check if package is installed, otherwise install - if (find.package("EGSEA", quiet = TRUE) %>% length %>% equals(0)) { - stop(" - EGSEA not installed. Please install it. EGSEA require manual installation for not overwelming the user in case it is not needed. - BiocManager::install(\"EGSEA\", ask = FALSE) - ") - } + check_and_install_packages("EGSEA") + if (!"EGSEA" %in% (.packages())) { stop("EGSEA package not loaded. Please run library(\"EGSEA\"). With this setup, EGSEA require manual loading, for technical reasons.") } @@ -2161,10 +2110,8 @@ setMethod("test_gene_enrichment", # Check packages msigdbr # Check if package is installed, otherwise install - if (find.package("msigdbr", quiet = TRUE) %>% length %>% equals(0)) { - message("msigdbr not installed. Installing.") - BiocManager::install("msigdbr", ask = FALSE) - } + check_and_install_packages("msigdbr") + # Check is correct species name if(species %in% msigdbr::msigdbr_species()$species_name %>% not()) @@ -2248,10 +2195,8 @@ setMethod("test_gene_overrepresentation", # Check packages msigdbr # Check if package is installed, otherwise install - if (find.package("msigdbr", quiet = TRUE) %>% length %>% equals(0)) { - message("msigdbr not installed. Installing.") - BiocManager::install("msigdbr", ask = FALSE) - } + check_and_install_packages("msigdbr") + # Check is correct species name if(species %in% msigdbr::msigdbr_species()$species_name %>% not()) @@ -2525,10 +2470,11 @@ setMethod("impute_missing_abundance", # Fix NOTEs . = NULL - if (find.package("broom", quiet = TRUE) %>% length %>% equals(0)) { - message("Installing broom needed for analyses") - install.packages("broom", repos = "https://cloud.r-project.org") - } +# +# if (find.package("broom", quiet = TRUE) %>% length %>% equals(0)) { +# message("Installing broom needed for analyses") +# install.packages("broom", repos = "https://cloud.r-project.org") +# } deconvoluted = .data %>% @@ -2792,29 +2738,9 @@ setMethod("get_bibliography", . = NULL # Check if package is installed, otherwise install - if (find.package("org.Hs.eg.db", quiet = TRUE) %>% length %>% equals(0)) { - message("Installing org.Hs.eg.db needed for differential transcript abundance analyses") - if (!requireNamespace("BiocManager", quietly = TRUE)) - install.packages("BiocManager", repos = "https://cloud.r-project.org") - BiocManager::install("org.Hs.eg.db", ask = FALSE) - } - - # Check if package is installed, otherwise install - if (find.package("org.Mm.eg.db", quiet = TRUE) %>% length %>% equals(0)) { - message("Installing org.Mm.eg.db needed for differential transcript abundance analyses") - if (!requireNamespace("BiocManager", quietly = TRUE)) - install.packages("BiocManager", repos = "https://cloud.r-project.org") - BiocManager::install("org.Mm.eg.db", ask = FALSE) - } - - # Check if package is installed, otherwise install - if (find.package("AnnotationDbi", quiet = TRUE) %>% length %>% equals(0)) { - message("Installing AnnotationDbi needed for differential transcript abundance analyses") - if (!requireNamespace("BiocManager", quietly = TRUE)) - install.packages("BiocManager", repos = "https://cloud.r-project.org") - BiocManager::install("AnnotationDbi", ask = FALSE) - } - + check_and_install_packages(c("org.Hs.eg.db", "org.Mm.eg.db", "AnnotationDbi")) + + .transcript = enquo(.transcript) # Transcript rownames by default diff --git a/R/utilities.R b/R/utilities.R index ad09fc19..3f1dc2bb 100755 --- a/R/utilities.R +++ b/R/utilities.R @@ -1133,16 +1133,10 @@ multivariable_differential_tissue_composition = function( # Beta or Cox when( grepl("Surv", .my_formula) %>% any ~ { + # Check if package is installed, otherwise install - if (find.package("survival", quiet = TRUE) %>% length %>% equals(0)) { - message("Installing betareg needed for analyses") - install.packages("survival", repos = "https://cloud.r-project.org") - } - - if (find.package("boot", quiet = TRUE) %>% length %>% equals(0)) { - message("Installing boot needed for analyses") - install.packages("boot", repos = "https://cloud.r-project.org") - } + check_and_install_packages(c("survival", "boot")) + (.) %>% survival::coxph(.my_formula, .) %>% @@ -1202,15 +1196,8 @@ univariable_differential_tissue_composition = function( when( grepl("Surv", .my_formula) %>% any ~ { # Check if package is installed, otherwise install - if (find.package("survival", quiet = TRUE) %>% length %>% equals(0)) { - message("Installing betareg needed for analyses") - install.packages("survival", repos = "https://cloud.r-project.org") - } - - if (find.package("boot", quiet = TRUE) %>% length %>% equals(0)) { - message("Installing boot needed for analyses") - install.packages("boot", repos = "https://cloud.r-project.org") - } + check_and_install_packages(c("survival", "boot")) + (.) %>% mutate(.proportion_0_corrected = .proportion_0_corrected %>% boot::logit()) %>% @@ -1220,10 +1207,8 @@ univariable_differential_tissue_composition = function( } , ~ { # Check if package is installed, otherwise install - if (find.package("betareg", quiet = TRUE) %>% length %>% equals(0)) { - message("Installing betareg needed for analyses") - install.packages("betareg", repos = "https://cloud.r-project.org") - } + check_and_install_packages("betareg") + (.) %>% betareg::betareg(.my_formula, .) %>% broom::tidy() %>% @@ -1246,22 +1231,12 @@ univariable_differential_tissue_stratification = function( ){ # Check if package is installed, otherwise install - if (find.package("survival", quiet = TRUE) %>% length %>% equals(0)) { - message("Installing survival needed for analyses") - install.packages("survival", repos = "https://cloud.r-project.org") - } + check_and_install_packages(c("survival", "survminer")) - # Check if package is installed, otherwise install - if (find.package("survminer", quiet = TRUE) %>% length %>% equals(0)) { - message("Installing survminer needed for analyses") - install.packages("survminer", repos = "https://cloud.r-project.org") - } - if (find.package("broom", quiet = TRUE) %>% length %>% equals(0)) { - message("Installing broom needed for analyses") - install.packages("broom", repos = "https://cloud.r-project.org") - } + check_and_install_packages("broom") + deconvoluted %>% @@ -1324,22 +1299,8 @@ univariable_differential_tissue_stratification_SE = function( ){ # Check if package is installed, otherwise install - if (find.package("survival", quiet = TRUE) %>% length %>% equals(0)) { - message("Installing survival needed for analyses") - install.packages("survival", repos = "https://cloud.r-project.org") - } - - # Check if package is installed, otherwise install - if (find.package("survminer", quiet = TRUE) %>% length %>% equals(0)) { - message("Installing survminer needed for analyses") - install.packages("survminer", repos = "https://cloud.r-project.org") - } - - - if (find.package("broom", quiet = TRUE) %>% length %>% equals(0)) { - message("Installing broom needed for analyses") - install.packages("broom", repos = "https://cloud.r-project.org") - } + check_and_install_packages(c("survival", "survminer", "broom")) + deconvoluted %>% @@ -1523,4 +1484,33 @@ get_special_column_name_symbol = function(name){ feature__ = get_special_column_name_symbol(".feature") sample__ = get_special_column_name_symbol(".sample") - +check_and_install_packages <- function(packages) { + # Separate GitHub packages from CRAN/Bioconductor packages + github_packages <- packages[grepl("/", packages)] + regular_packages <- packages[!grepl("/", packages)] + + # Check if regular packages are installed + missing_regular_packages <- regular_packages[!sapply(regular_packages, requireNamespace, quietly = TRUE)] + + # Check if GitHub packages are installed + missing_github_packages <- github_packages[!sapply(gsub(".*/", "", github_packages), requireNamespace, quietly = TRUE)] + + # Combine all missing packages + missing_packages <- c(missing_regular_packages, missing_github_packages) + + # If any packages are missing, print installation instructions + if (length(missing_packages) > 0) { + stop( + "tidybulk says: The following packages are required:\n", + paste(" -", missing_packages, collapse = "\n"), "\n", + "Please install them by running:\n", + " if (!requireNamespace('BiocManager', quietly = TRUE))\n", + " install.packages('BiocManager', repos = 'https://cloud.r-project.org')\n", + paste0( + " BiocManager::install(c(", + paste0("'", missing_packages, "'", collapse = ", "), + "), ask = FALSE)" + ) + ) + } +} diff --git a/man/get_reduced_dimensions_UMAP_bulk_SE.Rd b/man/get_reduced_dimensions_UMAP_bulk_SE.Rd index 163b194c..c2324470 100644 --- a/man/get_reduced_dimensions_UMAP_bulk_SE.Rd +++ b/man/get_reduced_dimensions_UMAP_bulk_SE.Rd @@ -28,7 +28,7 @@ get_reduced_dimensions_UMAP_bulk_SE( \item{calculate_for_pca_dimensions}{An integer of length one. The number of PCA dimensions to based the UMAP calculatio on. If NULL all variable features are considered} -\item{...}{Further parameters passed to the function uwot} +\item{...}{Further parameters passed to the function uwot::tumap} \item{.abundance}{A column symbol with the value the clustering is based on (e.g., `count`)} diff --git a/man/reduce_dimensions-methods.Rd b/man/reduce_dimensions-methods.Rd index b0c60955..ed316dc4 100644 --- a/man/reduce_dimensions-methods.Rd +++ b/man/reduce_dimensions-methods.Rd @@ -129,7 +129,7 @@ reduce_dimensions( \item{action}{A character string. Whether to join the new information to the input tbl (add), or just get the non-redundant tbl with the new information (get).} -\item{...}{Further parameters passed to the function prcomp if you choose method="PCA" or Rtsne if you choose method="tSNE"} +\item{...}{Further parameters passed to the function prcomp if you choose method="PCA" or Rtsne if you choose method="tSNE", or uwot::tumap if you choose method="umap"} \item{log_transform}{DEPRECATED - A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} } diff --git a/tests/testthat/test-bulk_methods.R b/tests/testthat/test-bulk_methods.R index 7343a083..ef49695b 100755 --- a/tests/testthat/test-bulk_methods.R +++ b/tests/testthat/test-bulk_methods.R @@ -655,11 +655,8 @@ test_that("New method choice",{ test_that("DESeq2 differential trancript abundance - no object",{ - if (find.package("DESeq2", quiet = TRUE) |> length() |> equals(0)) { - if (!requireNamespace("BiocManager", quietly = TRUE)) - install.packages("BiocManager", repos = "https://cloud.r-project.org") - BiocManager::install("DESeq2", ask = FALSE) - } + check_and_install_packages("DESeq2") + test_deseq2_df = DESeq2::DESeqDataSet(se_mini,design=~condition) colData(test_deseq2_df)$condition = factor(colData(test_deseq2_df)$condition) From 6c1fd0d4b3fdb64f4b2d67e579873f6df3a17e53 Mon Sep 17 00:00:00 2001 From: Stefano Mangiola Date: Thu, 17 Oct 2024 12:47:24 +1030 Subject: [PATCH 08/11] version UP --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index f19ca1a5..d338a420 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: tidybulk Title: Brings transcriptomics to the tidyverse -Version: 1.17.5 +Version: 1.17.6 Authors@R: c(person("Stefano", "Mangiola", email = "mangiolastefano@gmail.com", role = c("aut", "cre")), person("Maria", "Doyle", email = "Maria.Doyle@petermac.org", From 6bef9e7982d9279861922534b1a1fc7adb81ff2a Mon Sep 17 00:00:00 2001 From: Stefano Mangiola Date: Sat, 19 Oct 2024 11:36:28 +1030 Subject: [PATCH 09/11] add test tollerance for Bioc CHECK --- DESCRIPTION | 2 +- tests/testthat/test-bulk_methods.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index d338a420..d2f17e51 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: tidybulk Title: Brings transcriptomics to the tidyverse -Version: 1.17.6 +Version: 1.17.7 Authors@R: c(person("Stefano", "Mangiola", email = "mangiolastefano@gmail.com", role = c("aut", "cre")), person("Maria", "Doyle", email = "Maria.Doyle@petermac.org", diff --git a/tests/testthat/test-bulk_methods.R b/tests/testthat/test-bulk_methods.R index ef49695b..ecfd78ce 100755 --- a/tests/testthat/test-bulk_methods.R +++ b/tests/testthat/test-bulk_methods.R @@ -1523,7 +1523,7 @@ test_that("Add reduced dimensions UMAP - no object",{ res |> pull(UMAP1) |> magrittr::extract2(1) |> - expect_equal(-2.12, tolerance = 0.01) + expect_equal(-2.12, tolerance = 0.3) # this because of Linux (openEuler 22.03 LTS-SP1) / aarch64 expect_equal(ncol(res), 8) From d8dc27d03d65a74001c1d8542742cc139f47125f Mon Sep 17 00:00:00 2001 From: Stefano Mangiola Date: Thu, 21 Nov 2024 18:24:42 +1030 Subject: [PATCH 10/11] accept df --- NAMESPACE | 1 + R/functions_SE.R | 42 ++++++++--- R/methods.R | 70 +++++++++++++++--- R/methods_SE.R | 32 ++------ ...ete_confounders_of_non_interest-methods.Rd | 28 ++----- ...ve_complete_confounders_of_non_interest.Rd | 74 +++++++++++++++++++ 6 files changed, 182 insertions(+), 65 deletions(-) create mode 100644 man/resolve_complete_confounders_of_non_interest.Rd diff --git a/NAMESPACE b/NAMESPACE index 17b09661..7c9a52af 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -87,6 +87,7 @@ importFrom(GenomicRanges,makeGRangesListFromDataFrame) importFrom(Matrix,colSums) importFrom(S4Vectors,metadata) importFrom(SummarizedExperiment,SummarizedExperiment) +importFrom(SummarizedExperiment,as.data.frame) importFrom(SummarizedExperiment,assays) importFrom(SummarizedExperiment,colData) importFrom(SummarizedExperiment,rowData) diff --git a/R/functions_SE.R b/R/functions_SE.R index a8dc9999..cd7488e4 100755 --- a/R/functions_SE.R +++ b/R/functions_SE.R @@ -1446,6 +1446,33 @@ univariable_differential_tissue_composition_SE = function( unnest(surv_test, keep_empty = TRUE) } +.resolve_complete_confounders_of_non_interest_df <- function(df, ...){ + + combination_of_factors_of_NON_interest = + # Factors + df |> + as_tibble(rownames = ".sample") |> + select(...) |> + suppressWarnings() |> + colnames() |> + + # Combinations + combn(2) |> + t() |> + as_tibble() |> + set_names(c("factor_1", "factor_2")) + + for(i in combination_of_factors_of_NON_interest |> nrow() |> seq_len()){ + df = + df |> + resolve_complete_confounders_of_non_interest_pair_df( + !!as.symbol(combination_of_factors_of_NON_interest[i,]$factor_1), + !!as.symbol(combination_of_factors_of_NON_interest[i,]$factor_2) + ) + } + + df +} #' Resolve Complete Confounders of Non-Interest #' @@ -1475,13 +1502,13 @@ univariable_differential_tissue_composition_SE = function( #' # se is a SummarizedExperiment object #' resolve_complete_confounders_of_non_interest(se, .factor_1 = factor1, .factor_2 = factor2) #' @noRd -resolve_complete_confounders_of_non_interest_pair_SE <- function(se, .factor_1, .factor_2){ +resolve_complete_confounders_of_non_interest_pair_df <- function(df, .factor_1, .factor_2){ .factor_1 <- enquo(.factor_1) .factor_2 <- enquo(.factor_2) cd = - colData(se) |> + df |> as_tibble() |> rowid_to_column() |> distinct(rowid, !!.factor_1, !!.factor_2) |> @@ -1516,15 +1543,12 @@ resolve_complete_confounders_of_non_interest_pair_SE <- function(se, .factor_1, cd = cd |> mutate(!!.factor_2 := if_else(n1 + n2 < 3, dummy_factor_2, !!.factor_2)) } - - colData(se)[,c(quo_name(.factor_1), quo_name(.factor_2))] = + + df[,c(quo_name(.factor_1), quo_name(.factor_2))] = cd |> unnest(se_data) |> arrange(rowid) |> - select(!!.factor_1, !!.factor_2) |> - DataFrame() - - se + select(!!.factor_1, !!.factor_2) + df } - diff --git a/R/methods.R b/R/methods.R index 28093457..c79c3d9f 100755 --- a/R/methods.R +++ b/R/methods.R @@ -4914,24 +4914,70 @@ as_matrix <- function(tbl, #' Resolve Complete Confounders of Non-Interest #' -#' This generic function processes a SummarizedExperiment object to handle confounders -#' that are not of interest in the analysis. It dynamically handles combinations -#' of provided factors, adjusting the data by nesting and summarizing over these factors. +#' This function identifies and resolves complete confounders among specified factors of non-interest within a `SummarizedExperiment` object. +#' Complete confounders occur when the levels of one factor are entirely predictable based on the levels of another factor. +#' Such relationships can interfere with downstream analyses by introducing redundancy or collinearity. #' +#' The function systematically examines pairs of specified factors and determines whether they are completely confounded. +#' If a pair of factors is found to be confounded, one of the factors is adjusted or removed to resolve the issue. +#' The adjusted `SummarizedExperiment` object is returned, preserving all assays and metadata except the resolved factors. #' -#' @param se A SummarizedExperiment object that contains the data to be processed. -#' @param ... Arbitrary number of factor variables represented as symbols or quosures -#' to be considered for resolving confounders. These factors are processed -#' in combinations of two. +#' @param se A `SummarizedExperiment` object. This object contains assay data, row data (e.g., gene annotations), and column data (e.g., sample annotations). +#' @param ... Factors of non-interest (column names from `colData(se)`) to examine for complete confounders. #' -#' @rdname resolve_complete_confounders_of_non_interest-methods +#' @details +#' Complete confounders of non-interest can create dependencies between variables that may bias statistical models or violate their assumptions. +#' This function systematically addresses this by: +#' 1. Identifying pairs of factors in the specified columns that are fully confounded. +#' 2. Resolving confounding by adjusting or removing one of the factors from the `colData` slot. #' -#' @return A modified SummarizedExperiment object with confounders resolved. +#' The resolution strategy depends on the analysis context and can be modified in the helper function +#' `resolve_complete_confounders_of_non_interest_pair_SE()`. By default, the function removes one of the confounded factors. +#' +#' @return +#' A `SummarizedExperiment` object with resolved confounders. The object retains its structure, including assays and metadata, +#' but the column data (`colData`) is updated to reflect the resolved factors. #' #' @examples -#' # Not run: -#' # se is a SummarizedExperiment object -#' # resolve_complete_confounders_of_non_interest(se, factor1, factor2, factor3) +#' # Load necessary libraries +#' library(SummarizedExperiment) +#' library(dplyr) +#' +#' # Sample annotations +#' sample_annotations <- data.frame( +#' sample_id = paste0("Sample", seq(1, 9)), +#' factor_of_interest = c(rep("treated", 4), rep("untreated", 5)), +#' A = c("a1", "a2", "a1", "a2", "a1", "a2", "a1", "a2", "a3"), +#' B = c("b1", "b1", "b2", "b1", "b1", "b1", "b2", "b1", "b3"), +#' C = c("c1", "c1", "c1", "c1", "c1", "c1", "c1", "c1", "c3"), +#' stringsAsFactors = FALSE +#' ) +#' +#' # Simulated assay data +#' assay_data <- matrix(rnorm(100 * 9), nrow = 100, ncol = 9) +#' +#' # Row data (e.g., gene annotations) +#' row_data <- data.frame(gene_id = paste0("Gene", seq_len(100))) +#' +#' # Create SummarizedExperiment object +#' se <- SummarizedExperiment( +#' assays = list(counts = assay_data), +#' rowData = row_data, +#' colData = DataFrame(sample_annotations) +#' ) +#' +#' # Apply the function to resolve confounders +#' se_resolved <- resolve_complete_confounders_of_non_interest(se, A, B, C) +#' +#' # View the updated column data +#' colData(se_resolved) +#' +#' @seealso +#' \code{\link[SummarizedExperiment]{SummarizedExperiment}} for creating and handling `SummarizedExperiment` objects. +#' +#' @importFrom dplyr select +#' @importFrom rlang set_names +#' @importFrom tibble as_tibble #' @export setGeneric("resolve_complete_confounders_of_non_interest", function(se, ...) { standardGeneric("resolve_complete_confounders_of_non_interest") diff --git a/R/methods_SE.R b/R/methods_SE.R index 0d14b7e9..f3e8d1f5 100755 --- a/R/methods_SE.R +++ b/R/methods_SE.R @@ -2816,33 +2816,17 @@ setMethod("describe_transcript", "RangedSummarizedExperiment", .describe_transcr #' @importFrom dplyr select #' @importFrom rlang set_names #' @importFrom tibble as_tibble +#' @importFrom SummarizedExperiment as.data.frame .resolve_complete_confounders_of_non_interest <- function(se, ...){ - combination_of_factors_of_NON_interest = - # Factors - se[1,1, drop=FALSE] |> - colData() |> - as_tibble(rownames = ".sample") |> - select(...) |> - suppressWarnings() |> - colnames() |> - - # Combinations - combn(2) |> - t() |> - as_tibble() |> - set_names(c("factor_1", "factor_2")) - - for(i in combination_of_factors_of_NON_interest |> nrow() |> seq_len()){ - se = - se |> - resolve_complete_confounders_of_non_interest_pair_SE( - !!as.symbol(combination_of_factors_of_NON_interest[i,]$factor_1), - !!as.symbol(combination_of_factors_of_NON_interest[i,]$factor_2) - ) - } - + colData(se) = + colData(se) |> + as.data.frame() |> + .resolve_complete_confounders_of_non_interest_df(...) |> + DataFrame() + se + } #' resolve_complete_confounders_of_non_interest diff --git a/man/resolve_complete_confounders_of_non_interest-methods.Rd b/man/resolve_complete_confounders_of_non_interest-methods.Rd index b828aaf8..934b0ad5 100644 --- a/man/resolve_complete_confounders_of_non_interest-methods.Rd +++ b/man/resolve_complete_confounders_of_non_interest-methods.Rd @@ -1,39 +1,27 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/methods.R, R/methods_SE.R +% Please edit documentation in R/methods_SE.R \docType{methods} -\name{resolve_complete_confounders_of_non_interest} -\alias{resolve_complete_confounders_of_non_interest} +\name{resolve_complete_confounders_of_non_interest,SummarizedExperiment-method} \alias{resolve_complete_confounders_of_non_interest,SummarizedExperiment-method} \alias{resolve_complete_confounders_of_non_interest,RangedSummarizedExperiment-method} -\title{Resolve Complete Confounders of Non-Interest} +\title{resolve_complete_confounders_of_non_interest} \usage{ -resolve_complete_confounders_of_non_interest(se, ...) - \S4method{resolve_complete_confounders_of_non_interest}{SummarizedExperiment}(se, ...) \S4method{resolve_complete_confounders_of_non_interest}{RangedSummarizedExperiment}(se, ...) } \arguments{ -\item{se}{A SummarizedExperiment object that contains the data to be processed.} +\item{se}{A `SummarizedExperiment` object. This object contains assay data, row data (e.g., gene annotations), and column data (e.g., sample annotations).} -\item{...}{Arbitrary number of factor variables represented as symbols or quosures -to be considered for resolving confounders. These factors are processed -in combinations of two.} +\item{...}{Factors of non-interest (column names from `colData(se)`) to examine for complete confounders.} } \value{ -A modified SummarizedExperiment object with confounders resolved. - A consistent object (to the input) with additional columns for the statistics from the hypothesis test (e.g., log fold change, p-value and false discovery rate). A consistent object (to the input) with additional columns for the statistics from the hypothesis test (e.g., log fold change, p-value and false discovery rate). } \description{ -This generic function processes a SummarizedExperiment object to handle confounders -that are not of interest in the analysis. It dynamically handles combinations -of provided factors, adjusting the data by nesting and summarizing over these factors. -} -\examples{ -# Not run: -# se is a SummarizedExperiment object -# resolve_complete_confounders_of_non_interest(se, factor1, factor2, factor3) +resolve_complete_confounders_of_non_interest + +resolve_complete_confounders_of_non_interest } diff --git a/man/resolve_complete_confounders_of_non_interest.Rd b/man/resolve_complete_confounders_of_non_interest.Rd new file mode 100644 index 00000000..01ad6808 --- /dev/null +++ b/man/resolve_complete_confounders_of_non_interest.Rd @@ -0,0 +1,74 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/methods.R +\name{resolve_complete_confounders_of_non_interest} +\alias{resolve_complete_confounders_of_non_interest} +\title{Resolve Complete Confounders of Non-Interest} +\usage{ +resolve_complete_confounders_of_non_interest(se, ...) +} +\arguments{ +\item{se}{A `SummarizedExperiment` object. This object contains assay data, row data (e.g., gene annotations), and column data (e.g., sample annotations).} + +\item{...}{Factors of non-interest (column names from `colData(se)`) to examine for complete confounders.} +} +\value{ +A `SummarizedExperiment` object with resolved confounders. The object retains its structure, including assays and metadata, +but the column data (`colData`) is updated to reflect the resolved factors. +} +\description{ +This function identifies and resolves complete confounders among specified factors of non-interest within a `SummarizedExperiment` object. +Complete confounders occur when the levels of one factor are entirely predictable based on the levels of another factor. +Such relationships can interfere with downstream analyses by introducing redundancy or collinearity. +} +\details{ +The function systematically examines pairs of specified factors and determines whether they are completely confounded. +If a pair of factors is found to be confounded, one of the factors is adjusted or removed to resolve the issue. +The adjusted `SummarizedExperiment` object is returned, preserving all assays and metadata except the resolved factors. + + +Complete confounders of non-interest can create dependencies between variables that may bias statistical models or violate their assumptions. +This function systematically addresses this by: +1. Identifying pairs of factors in the specified columns that are fully confounded. +2. Resolving confounding by adjusting or removing one of the factors from the `colData` slot. + +The resolution strategy depends on the analysis context and can be modified in the helper function +`resolve_complete_confounders_of_non_interest_pair_SE()`. By default, the function removes one of the confounded factors. +} +\examples{ +# Load necessary libraries +library(SummarizedExperiment) +library(dplyr) + +# Sample annotations +sample_annotations <- data.frame( + sample_id = paste0("Sample", seq(1, 9)), + factor_of_interest = c(rep("treated", 4), rep("untreated", 5)), + A = c("a1", "a2", "a1", "a2", "a1", "a2", "a1", "a2", "a3"), + B = c("b1", "b1", "b2", "b1", "b1", "b1", "b2", "b1", "b3"), + C = c("c1", "c1", "c1", "c1", "c1", "c1", "c1", "c1", "c3"), + stringsAsFactors = FALSE +) + +# Simulated assay data +assay_data <- matrix(rnorm(100 * 9), nrow = 100, ncol = 9) + +# Row data (e.g., gene annotations) +row_data <- data.frame(gene_id = paste0("Gene", seq_len(100))) + +# Create SummarizedExperiment object +se <- SummarizedExperiment( + assays = list(counts = assay_data), + rowData = row_data, + colData = DataFrame(sample_annotations) +) + +# Apply the function to resolve confounders +se_resolved <- resolve_complete_confounders_of_non_interest(se, A, B, C) + +# View the updated column data +colData(se_resolved) + +} +\seealso{ +\code{\link[SummarizedExperiment]{SummarizedExperiment}} for creating and handling `SummarizedExperiment` objects. +} From b5f31b0d0e1af04e4c67dd3722ee9fda198d8698 Mon Sep 17 00:00:00 2001 From: Stefano Mangiola Date: Thu, 21 Nov 2024 18:25:55 +1030 Subject: [PATCH 11/11] version UP --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index d2f17e51..1a695be5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: tidybulk Title: Brings transcriptomics to the tidyverse -Version: 1.17.7 +Version: 1.17.8 Authors@R: c(person("Stefano", "Mangiola", email = "mangiolastefano@gmail.com", role = c("aut", "cre")), person("Maria", "Doyle", email = "Maria.Doyle@petermac.org",