Skip to content

Commit

Permalink
Merge pull request #275 from stemangiola/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
stemangiola authored Jul 15, 2023
2 parents 963cb22 + 43aeb1e commit 5365cac
Show file tree
Hide file tree
Showing 15 changed files with 1,737 additions and 276 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,4 @@ egsea_report_*
/Meta/
/doc/
_targets.R
_targets*
11 changes: 8 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: tidybulk
Title: Brings transcriptomics to the tidyverse
Version: 1.11.4
Version: 1.11.5
Authors@R: c(person("Stefano", "Mangiola", email = "[email protected]",
role = c("aut", "cre")),
person("Maria", "Doyle", email = "[email protected]",
Expand All @@ -17,7 +17,7 @@ Depends:
Imports:
tibble,
readr,
dplyr,
dplyr (>= 1.1.0),
magrittr,
tidyr,
stringi,
Expand Down Expand Up @@ -78,7 +78,12 @@ Suggests:
igraph,
EGSEA,
IRanges,
here
here,
glmmSeq,
pbapply,
pbmcapply,
lme4,
MASS
VignetteBuilder:
knitr
RdMacros:
Expand Down
5 changes: 5 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ export(mutate)
export(nest)
export(pivot_sample)
export(pivot_transcript)
export(quantile_normalise_abundance)
export(reduce_dimensions)
export(remove_redundancy)
export(rename)
Expand All @@ -70,6 +71,7 @@ export(tidybulk)
export(tidybulk_SAM_BAM)
export(unnest)
exportMethods(as_SummarizedExperiment)
exportMethods(quantile_normalise_abundance)
exportMethods(scale_abundance)
exportMethods(tidybulk)
exportMethods(tidybulk_SAM_BAM)
Expand Down Expand Up @@ -103,6 +105,7 @@ importFrom(dplyr,group_by_drop_default)
importFrom(dplyr,group_cols)
importFrom(dplyr,if_else)
importFrom(dplyr,inner_join)
importFrom(dplyr,join_by)
importFrom(dplyr,left_join)
importFrom(dplyr,mutate)
importFrom(dplyr,mutate_if)
Expand All @@ -125,13 +128,15 @@ importFrom(magrittr,"%$%")
importFrom(magrittr,"%>%")
importFrom(magrittr,divide_by)
importFrom(magrittr,equals)
importFrom(magrittr,extract2)
importFrom(magrittr,multiply_by)
importFrom(magrittr,set_colnames)
importFrom(magrittr,set_rownames)
importFrom(parallel,mclapply)
importFrom(purrr,as_mapper)
importFrom(purrr,map)
importFrom(purrr,map2)
importFrom(purrr,map2_dfc)
importFrom(purrr,map2_dfr)
importFrom(purrr,map_chr)
importFrom(purrr,map_dfr)
Expand Down
154 changes: 152 additions & 2 deletions R/functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -355,8 +355,6 @@ get_scaled_counts_bulk <- function(.data,

}



#' Get differential transcription information to a tibble using edgeR.
#'
#' @keywords internal
Expand Down Expand Up @@ -643,6 +641,158 @@ get_differential_transcript_abundance_bulk <- function(.data,
}
}

#' Get differential transcription information to a tibble using glmmSeq
#'
#' @keywords internal
#' @noRd
#'
#'
#'
#' @import tibble
#' @importFrom magrittr set_colnames
#' @importFrom stats model.matrix
#' @importFrom utils install.packages
#' @importFrom purrr when
#' @importFrom rlang inform
#' @importFrom tidyr spread
#' @importFrom tidyr pivot_wider
#' @importFrom dplyr slice
#'
#'
#' @param .data A tibble
#' @param .formula a formula with no response variable, referring only to numeric variables
#' @param .sample The name of the sample column
#' @param .transcript The name of the transcript/gene column
#' @param .abundance The name of the transcript/gene abundance column
#' @param .contrasts A character vector. Not used for this method
#' @param method A string character. Either "edgeR_quasi_likelihood" (i.e., QLF), "edgeR_likelihood_ratio" (i.e., LRT)
#' @param test_above_log2_fold_change A positive real value. This works for edgeR and limma_voom methods. It uses the `treat` function, which tests that the difference in abundance is bigger than this threshold rather than zero \url{https://pubmed.ncbi.nlm.nih.gov/19176553}.
#' @param scaling_method A character string. The scaling method passed to the backend function (i.e., edgeR::calcNormFactors; "TMM","TMMwsp","RLE","upperquartile")
#' @param omit_contrast_in_colnames If just one contrast is specified you can choose to omit the contrast label in the colnames.
#' @param .sample_total_read_count
#'
#' @return A tibble with glmmSeq results
#'
get_differential_transcript_abundance_glmmSeq <- function(.data,
.formula,
.sample = NULL,
.transcript = NULL,
.abundance = NULL,
.contrasts = NULL,
method ,
test_above_log2_fold_change = NULL,
scaling_method = NULL,
omit_contrast_in_colnames = FALSE,
prefix = "",
.sample_total_read_count = NULL,
...) {
# Get column names
.sample = enquo(.sample)
.transcript = enquo(.transcript)
.abundance = enquo(.abundance)
.sample_total_read_count = enquo(.sample_total_read_count)

# Check if omit_contrast_in_colnames is correctly setup
if(omit_contrast_in_colnames & length(.contrasts) > 1){
warning("tidybulk says: you can omit contrasts in column names only when maximum one contrast is present")
omit_contrast_in_colnames = FALSE
}

# # Specify the design column tested
# if(is.null(.contrasts))
# message(
# sprintf(
# "tidybulk says: The design column being tested is %s",
# design %>% colnames %>% .[1]
# )
# )

# # If I don't have intercept or I have categorical factor of interest BUT I don't have contrasts
# if(
# is.null(.contrasts) &
# (
# (
# ! .data |> pull(parse_formula(.formula)[1]) |> is("numeric") &
# .data |> pull(parse_formula(.formula)[1]) |> unique() |> length() |> gt(2)
# ) |
# colnames(design)[1] != "(Intercept)"
# )
# )
# warning("tidybulk says: If you have (i) an intercept-free design (i.e. ~ 0 + factor) or you have a categorical factor of interest with more than 2 values you should use the `contrasts` argument.")
#
# my_contrasts =
# .contrasts %>%
# ifelse_pipe(length(.) > 0,
# ~ limma::makeContrasts(contrasts = .x, levels = design),
# ~ NULL)

# Check if package is installed, otherwise install
if (find.package("edgeR", quiet = TRUE) %>% length %>% equals(0)) {
message("tidybulk says: Installing edgeR needed for differential transcript abundance analyses")
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager", repos = "https://cloud.r-project.org")
BiocManager::install("edgeR", ask = FALSE)
}

# Check if package is installed, otherwise install
if (find.package("glmmSeq", quiet = TRUE) %>% length %>% equals(0)) {
message("tidybulk says: Installing glmmSeq needed for differential transcript abundance analyses")
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager", repos = "https://cloud.r-project.org")
BiocManager::install("glmmSeq", ask = FALSE)
}

metadata =
.data |>
pivot_sample(!!.sample) |>
as_data_frame(rownames = !!.sample)

counts =
.data %>%
select(!!.transcript,!!.sample,!!.abundance) %>%
spread(!!.sample,!!.abundance) %>%
as_matrix(rownames = !!.transcript)

# Reorder counts
counts = counts[,rownames(metadata),drop=FALSE]

glmmSeq_object =
glmmSeq( .formula,
countdata = counts ,
metadata = metadata,
dispersion = setNames(edgeR::estimateDisp(counts)$tagwise.dispersion, rownames(counts)),
progress = TRUE,
method = method |> str_remove("(?i)^glmmSeq_"),
...
)

glmmSeq_object |>
summary() |>
as_tibble(rownames = "gene") |>
mutate(across(starts_with("P_"), list(adjusted = function(x) p.adjust(x, method="BH")), .names = "{.col}_{.fn}")) |>

# Attach attributes
reattach_internals(.data) %>%

# select method
memorise_methods_used("glmmSeq") |>

# Add raw object
attach_to_internals(glmmSeq_object, "glmmSeq") %>%
# Communicate the attribute added
{

rlang::inform("\ntidybulk says: to access the raw results (fitted GLM) do `attr(..., \"internals\")$glmmSeq`", .frequency_id = "Access DE results glmmSeq", .frequency = "once")

(.)
} %>%

# Attach prefix
setNames(c(
colnames(.)[1],
sprintf("%s%s", prefix, colnames(.)[2:ncol(.)])
))
}

#' Get differential transcription information to a tibble using voom.
#'
Expand Down
Loading

0 comments on commit 5365cac

Please sign in to comment.