diff --git a/DESCRIPTION b/DESCRIPTION index dc049d25..792a3be2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: MiscMetabar Type: Package Title: Miscellaneous Functions for Metabarcoding Analysis -Version: 0.8.00 +Version: 0.9.1 Authors@R: person("Adrien", "Taudière", email = "adrien.taudiere@zaclys.net", role = c("aut", "cre", "cph"), comment = c(ORCID = "0000-0003-1088-1182")) Description: Facilitate the description, transformation, exploration, and reproducibility of metabarcoding analyses. 'MiscMetabar' is mainly built on top of the 'phyloseq', 'dada2' and 'targets' R packages. It helps to build reproducible and robust bioinformatics pipelines in R. 'MiscMetabar' makes ecological analysis of alpha and beta-diversity easier, more reproducible and more powerful by integrating a large number of tools. Important features are described in Taudière A. (2023) . @@ -28,11 +28,14 @@ Suggests: DT, edgeR, formattable, + ggalluvial, + ggfittext, gghalves, ggh4x, ggstatsplot, ggridges, ggVennDiagram, + glmulti, gtsummary, grDevices, grid, diff --git a/NAMESPACE b/NAMESPACE index 43e283e9..d6431b84 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,6 +5,7 @@ S3method(unique_or_na,factor) export(LCBD_pq) export(SRS_curve_pq) export(accu_plot) +export(accu_plot_balanced_modality) export(accu_samp_threshold) export(add_blast_info) export(add_dna_to_phyloseq) @@ -13,6 +14,7 @@ export(add_info_to_sam_data) export(add_new_taxonomy_pq) export(adonis_phyloseq) export(adonis_pq) +export(adonis_rarperm_pq) export(all_object_size) export(ancombc_pq) export(are_modality_even_depth) @@ -45,12 +47,16 @@ export(funky_color) export(get_file_extension) export(get_funguild_db) export(ggVenn_phyloseq) +export(ggaluv_pq) export(ggbetween_pq) +export(ggscatt_pq) export(ggvenn_pq) +export(glmutli_pq) export(graph_test_pq) export(heat_tree_pq) export(hill_phyloseq) export(hill_pq) +export(hill_test_rarperm_pq) export(hill_tuckey_phyloseq) export(hill_tuckey_pq) export(iNEXT_pq) @@ -91,7 +97,9 @@ export(plot_guild_pq) export(plot_mt) export(plot_tax_pq) export(plot_tsne_pq) +export(plot_var_part_pq) export(psmelt_samples_pq) +export(rarefy_sample_count_by_modality) export(read_phyloseq) export(read_pq) export(rename_samples) @@ -116,6 +124,8 @@ export(summary_plot_pq) export(swarm_clustering) export(tax_bar_pq) export(tax_datatable) +export(taxa_as_columns) +export(taxa_as_rows) export(taxa_only_in_one_level) export(tbl_sum_samdata) export(track_wkflow) @@ -126,6 +136,8 @@ export(tsne_pq) export(unique_or_na) export(upset_pq) export(upset_test_pq) +export(var_par_pq) +export(var_par_rarperm_pq) export(venn_phyloseq) export(venn_pq) export(verify_pq) @@ -141,10 +153,13 @@ import(purrr) importFrom(grDevices,col2rgb) importFrom(lifecycle,deprecated) importFrom(rlang,.data) +importFrom(stats,anova) +importFrom(stats,as.formula) importFrom(stats,ave) importFrom(stats,kruskal.test) importFrom(stats,na.exclude) importFrom(stats,na.omit) +importFrom(stats,quantile) importFrom(stats,reformulate) importFrom(stats,reorder) importFrom(stats,runif) diff --git a/NEWS.md b/NEWS.md index 4406aef6..c2ea00c0 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,25 @@ -# MiscMetabar 0.8 (in development) +# MiscMetabar 0.9.1 (in development) + +## New functions +- Add functions [taxa_as_rows()] and [taxa_as_columns()] to replace verbose called to [clean_pq()] +- Add function [ggscatt_pq()] to plot and test for effect of a numerical columns in sam_data on Hill number. Its the equivalent for numerical variables of [ggbetween_pq()] which focus on the effect of a factor. +- Add functions [var_par_pq()] , [var_par_rarperm_pq()] and [plot_var_part_pq()] to compute the partition of the variation of community and plot it. It introduce the notion of `rarperm` part in the function name. It refers to the fact that this function compute permutation of samples depth rarefaction to measure the variation due to the random process in rarefaction. +- Add function [hill_test_rarperm_pq()] to test the effect of a factor on hill diversity accounting for the variation due to random nature of the rarefaction by sample depth. +- Add function [rarefy_sample_count_by_modality()] to equalize the number of samples for each levels of a modality (factor) +- Add function [accu_plot_balanced_modality()] to plot accumulation curves with balanced modality (same number of samples per level) and depth rarefaction (same number of sequences per sample) +- Add function [adonis_rarperm_pq()] to compute multiple Permanova analyses on different sample depth rarefaction. +- Add function [ggaluv_pq()] to plot taxonomic distribution in alluvial fashion with ggplot2 (using the [ggalluvial] package) +- Add function [glmutli_pq()] to use automated model selection and multimodel inference with (G)LMs for phyloseq object + + +## New parameters + +- Add param `taxa_ranks` in function [psmelt_samples_pq()] to group results by samples AND taxonomic ranks. +- Add param `hill_scales` in functions [hill_tuckey_pq()] and [hill_pq()] to choose the level of the hill number. +- Add param `na_remove` in function `hill_pq()` to remove samples with NA in the factor fact. + + +# MiscMetabar 0.8.1 - Add param `plot_with_tuckey` to `hill_pq()`., - Add function `formattable_pq()` to make beautiful table of the distribution of taxa across a modality using visualization inside in the table. diff --git a/R/MiscMetabar-package.R b/R/MiscMetabar-package.R index 75e29fb6..24d4e77b 100644 --- a/R/MiscMetabar-package.R +++ b/R/MiscMetabar-package.R @@ -23,7 +23,9 @@ if (getRversion() >= "2.15.1") { "update", "upr", "upViewport", "val", "value", "vegdist", "viewport", "write.table", "x", "x1", "X1", "x2", "y", "y1", "y2", "ymax", "ymin", ".group", "archetype", "nOTUid", "taxon", "total", - "chim_rm", "condition", "physeq", "seq_tab_Pairs", "nb_samp", "silent" + "chim_rm", "condition", "physeq", "seq_tab_Pairs", "nb_samp", "silent", + "X1_lim1", "X1_lim2", "aicc", "variable", "pos_letters", "alluvium", + "na_remove", "stratum", "to_lodes_form" )) } diff --git a/R/alpha_div_test.R b/R/alpha_div_test.R new file mode 100644 index 00000000..0a9bab27 --- /dev/null +++ b/R/alpha_div_test.R @@ -0,0 +1,383 @@ +################################################################################ +#' Calculate hill number and compute Tuckey post-hoc test +#' @description +#' +#' `r lifecycle::badge("maturing")` +#' Note that, by default, this function use a sqrt of the read numbers in the linear +#' model in order to correct for uneven sampling depth. +#' @aliases hill_tuckey_pq +#' @inheritParams clean_pq +#' @param modality (required) the variable to test +#' @param hill_scales (a vector of integer) The list of q values to compute +#' the hill number H^q. If Null, no hill number are computed. Default value +#' compute the Hill number 0 (Species richness), the Hill number 1 +#' (exponential of Shannon Index) and the Hill number 2 (inverse of Simpson +#' Index). +#' @param silent (logical) If TRUE, no message are printing. +#' @param correction_for_sample_size (logical, default TRUE) This function +#' use a sqrt of the read numbers in the linear model in order to +#' correct for uneven sampling depth. +#' @return A ggplot2 object +#' +#' @export +#' +#' @author Adrien Taudière +#' @examples +#' data("GlobalPatterns", package = "phyloseq") +#' GlobalPatterns@sam_data[, "Soil_logical"] <- +#' ifelse(GlobalPatterns@sam_data[, "SampleType"] == "Soil", "Soil", "Not Soil") +#' hill_tuckey_pq(GlobalPatterns, "Soil_logical") +#' hill_tuckey_pq(GlobalPatterns, "Soil_logical", hill_scales = 1:2) +hill_tuckey_pq <- function( + physeq, + modality, + hill_scales = c(0, 1, 2), + silent = TRUE, + correction_for_sample_size = TRUE) { + modality_vector <- + as.factor(as.vector(unlist(unclass(physeq@sam_data[, modality])))) + + if (length(modality_vector) != dim(physeq@otu_table)[2]) { + physeq@otu_table <- t(physeq@otu_table) + } + read_numbers <- apply(physeq@otu_table, 2, sum) + + physeq <- taxa_as_rows(physeq) + otu_hill <- + vegan::renyi(t(physeq@otu_table), + scales = hill_scales, + hill = TRUE + ) + + colnames(otu_hill) <- paste0("Hill_", hill_scales) + tuk <- list() + for (i in seq_along(hill_scales)) { + if (correction_for_sample_size) { + tuk[[i]] <- + stats::TukeyHSD(stats::aov(lm(otu_hill[, i] ~ sqrt(read_numbers))$residuals ~ modality_vector)) + } else { + tuk[[i]] <- + stats::TukeyHSD(stats::aov(otu_hill[, i] ~ modality_vector)) + } + } + df <- do.call( + "rbind", + sapply(tuk, function(x) { + data.frame(x$modality_vector) + }, simplify = FALSE) + ) + colnames(df) <- colnames(tuk[[1]]$modality_vector) + df$x <- paste0( + "Hill_", + c( + sort(rep(hill_scales, dim( + tuk[[1]]$modality_vector + )[1])) + ), "__", + rownames(tuk[[1]]$modality_vector) + ) + + df$modality <- rownames(tuk[[1]]$modality_vector) + + p <- ggplot(data = df) + + geom_linerange(aes(ymax = upr, ymin = lwr, x = x), linewidth = 2) + + geom_point(aes(x = x, y = diff), + size = 4, + shape = 21, + fill = "white" + ) + + coord_flip() + + theme_gray() + + geom_hline(yintercept = 0) + + ylab("Differences in mean levels (value and confidence intervals at 95%)") + + xlab("") + + ggtitle("Results of the Tuckey HSD testing for differences + in mean Hill numbers") + + return(p) +} +################################################################################ + + +################################################################################ +#' Test multiple times effect of factor on Hill diversity +#' with different rarefaction even depth +#' +#' @description +#' `r lifecycle::badge("experimental")` +#' +#' @inheritParams clean_pq +#' @param fact (required) Name of the factor in `physeq@sam_data` used to plot +#' different lines +#' @param hill_scales (a vector of integer) The list of q values to compute +#' the hill number H^q. If Null, no hill number are computed. Default value +#' compute the Hill number 0 (Species richness), the Hill number 1 +#' (exponential of Shannon Index) and the Hill number 2 (inverse of Simpson +#' Index). +#' @param nperm (int) The number of permutations to perform. +#' @param sample.size (int) A single integer value equal to the number of +#' reads being simulated, also known as the depth. See +#' [phyloseq::rarefy_even_depth()]. +#' @param verbose (logical). If TRUE, print additional informations. +#' @param progress_bar (logical, default TRUE) Do we print progress during +#' the calculation? +#' @param p_val_signif (float, `[0:1]`) The mimimum value of p-value to count a +#' test as significant int the `prop_signif` result. +#' @param type A character specifying the type of statistical approach +#' (See [ggstatsplot::ggbetweenstats()] for more details): +#' +#' - "parametric" +#' - "nonparametric" +#' - "robust" +#' - "bayes" +#' +#' @param ... Others arguments passed on to [ggstatsplot::ggbetweenstats()] function +#' @seealso [ggstatsplot::ggbetweenstats()], [hill_pq()] +#' @return A list of 6 components : +#' +#' - method +#' - expressions +#' - plots +#' - pvals +#' - prop_signif +#' - statistics +#' +#' @export +#' @author Adrien Taudière +#' +#' @examples +#' \donttest{ +#' if (requireNamespace("ggstatsplot")) { +#' hill_test_rarperm_pq(data_fungi, "Time", nperm = 2) +#' res <- hill_test_rarperm_pq(data_fungi, "Height", nperm = 9, p.val = 0.9) +#' patchwork::wrap_plots(res$plots[[1]]) +#' res$plots[[1]][[1]] + res$plots[[2]][[1]] + res$plots[[3]][[1]] +#' res$prop_signif +#' res_para <- hill_test_rarperm_pq(data_fungi, "Height", nperm = 9, type = "parametrique") +#' res_para$plots[[1]][[1]] + res_para$plots[[2]][[1]] + res_para$plots[[3]][[1]] +#' res_para$pvals +#' res_para$method +#' res_para$expressions[[1]] +#' } +#' } +hill_test_rarperm_pq <- function(physeq, + fact, + hill_scales = c(0, 1, 2), + nperm = 99, + sample.size = min(sample_sums(physeq)), + verbose = FALSE, + progress_bar = TRUE, + p_val_signif = 0.05, + type = "non-parametrique", + ...) { + verify_pq(physeq) + res_perm <- list() + p_perm <- list() + if (progress_bar) { + pb <- txtProgressBar( + min = 0, + max = nperm * length(hill_scales), + style = 3, + width = 50, + char = "=" + ) + } + for (i in 1:nperm) { + if (verbose) { + psm <- + psmelt_samples_pq( + physeq = rarefy_even_depth( + physeq, + rngseed = i, + sample.size = sample.size, + verbose = verbose + ), + hill_scales = hill_scales + ) + } else { + psm <- + suppressMessages(psmelt_samples_pq( + physeq = rarefy_even_depth( + physeq, + rngseed = i, + sample.size = sample.size, + verbose = verbose + ), + hill_scales = hill_scales + )) + } + p_perm[[i]] <- list() + res_perm[[i]] <- list() + for (j in seq_along(hill_scales)) { + p_perm[[i]][[j]] <- + ggstatsplot::ggbetweenstats(psm, !!fact, !!paste0("Hill_", hill_scales[[j]]), + type = type, + ... + ) + res_perm[[i]][[j]] <- + ggstatsplot::extract_stats(p_perm[[i]][[j]]) + } + if (progress_bar) { + setTxtProgressBar(pb, i * length(hill_scales)) + } + } + + method <- res_perm[[1]][[1]]$subtitle_data[, c("method", "effectsize", "conf.method")] + + expressions <- sapply(res_perm, function(x) { + sapply(x, function(xx) { + xx$subtitle_data$expression + }) + }) + rownames(expressions) <- paste0("Hill_", hill_scales) + colnames(expressions) <- paste0("ngseed", 1:nperm) + + statistics <- sapply(res_perm, function(x) { + sapply(x, function(xx) { + xx$subtitle_data$statistic + }) + }) + rownames(statistics) <- paste0("Hill_", hill_scales) + colnames(statistics) <- paste0("ngseed", 1:nperm) + + pvals <- sapply(res_perm, function(x) { + sapply(x, function(xx) { + xx$subtitle_data$p.value + }) + }) + rownames(pvals) <- paste0("Hill_", hill_scales) + colnames(pvals) <- paste0("ngseed_", 1:nperm) + + prop_signif <- rowSums(pvals < p_val_signif) / ncol(pvals) + names(prop_signif) <- paste0("Hill_", hill_scales) + res <- + list( + "method" = method, + "expressions" = expressions, + "plots" = p_perm, + "pvals" = pvals, + "prop_signif" = prop_signif, + "statistics" = statistics + ) + return(res) +} +################################################################################ + + + +################################################################################ +#' Automated model selection and multimodel inference with (G)LMs for phyloseq +#' @description +#' `r lifecycle::badge("experimental")` +#' +#' @inheritParams clean_pq +#' @param formula (required) a formula for [glmulti::glmulti()] +#' Variables must be present in the `physeq@sam_data` slot or be one +#' of hill number defined in hill_scales or the variable Abundance which +#' refer to the number of sequences per sample. +#' @param fitfunction (default "lm") +#' @param hill_scales (a vector of integer) The list of q values to compute +#' the hill number H^q. If Null, no hill number are computed. Default value +#' compute the Hill number 0 (Species richness), the Hill number 1 +#' (exponential of Shannon Index) and the Hill number 2 (inverse of Simpson +#' Index). +#' @param aic_step The value between AIC scores to cut for. +#' @param confsetsize The number of models to be looked for, i.e. the size of the returned confidence set. +#' @param plotty (logical) Whether to plot the progress of the IC profile when running. +#' @param level If 1, only main effects (terms of order 1) are used to build +#' the candidate set. If 2, pairwise interactions are also used (higher order +#' interactions are currently ignored) +#' @param method The method to be used to explore the candidate set of models. +#' If "h" (default) an exhaustive screening is undertaken. +#' If "g" the genetic algorithm is employed (recommended for large candidate sets). +#' If "l", a very fast exhaustive branch-and-bound algorithm is used. +#' Package leaps must then be loaded, and this can only be applied to linear models +#' with covariates and no interactions. If "d", a simple summary of the candidate set +#' is printed, including the number of candidate models. +#' @param crit The Information Criterion to be used. Default is the small-sample corrected AIC (aicc). This should be a function that accepts a fitted model as first argument. Other provided functions are the classic AIC, the Bayes IC (bic), and QAIC/QAICc (qaic and qaicc). +#' @param ... Others arguments passed on to [glmulti::glmulti()] function +#' +#' @return A data.frame summarizing the glmulti results with columns +#' +#' -estimates +#' -unconditional_interval +#' -nb_model" +#' -importance +#' -alpha +#' @export +#' @seealso [glmulti::glmulti()] +#' @examples +#' \donttest{ +#' if (requireNamespace("glmulti")) { +#' res_glmulti <- +#' glmutli_pq(data_fungi, "Hill_0 ~ Hill_1 + Abundance + Time + Height", level = 1) +#' res_glmulti +#' res_glmulti_interaction <- +#' glmutli_pq(data_fungi, "Hill_0 ~ Abundance + Time + Height", level = 2) +#' res_glmulti +#' } +#' } +#' @details +#' This function is mainly a wrapper of the work of others. +#' Please make a reference to [glmulti::glmulti()] if you +#' use this function. +glmutli_pq <- + function(physeq, + formula, + fitfunction = "lm", + hill_scales = c(0, 1, 2), + aic_step = 2, + confsetsize = 100, + plotty = FALSE, + level = 1, + method = "h", + crit = "aicc", + ...) { + psm_samp <- psmelt_samples_pq(physeq, hill_scales = hill_scales) + + res_glmulti <- do.call(glmulti::glmulti, list( + y = formula(formula), + data = psm_samp, + crit = crit, + level = level, + method = method, + fitfunction = fitfunction, + confsetsize = confsetsize, + plotty = plotty, + ... + )) + + ## AICc + top_glmulti <- glmulti::weightable(res_glmulti) + condition_crit <- top_glmulti[[crit]] <= (min(top_glmulti[[crit]]) + aic_step) + if (sum(condition_crit) == 0) { + stop("None modele are selected. Try a aic_step lower or another crit") + } + top_glmulti <- top_glmulti[condition_crit, ] + + ## Stockage des meilleurs modèles + cf <- data.frame(stats::coef(res_glmulti, icmethod = "Burnham")) + + colnames(cf) <- + c( + "estimates", + "unconditional_interval", + "nb_model", + "importance", + "alpha" + ) + cf$variable <- rownames(cf) + cf <- cf %>% filter(!grepl("Intercept", variable)) + + if (fitfunction == "lm") { + test <- list() + R2__h0 <- NULL + for (i in 1:nrow(top_glmulti)) { + test[[i]] <- summary(res_glmulti@objects[[i]]) + R2__h0[i] <- test[[i]]$adj.r.squared + } + + # message(paste0("Mean adjust r squared: ", round(mean(R2__h0), 3))) + } + return(cf) + } diff --git a/R/beta_div_test.R b/R/beta_div_test.R index 6c1184c4..78fd74d4 100644 --- a/R/beta_div_test.R +++ b/R/beta_div_test.R @@ -149,14 +149,7 @@ adonis_pq <- function(physeq, rarefy_nb_seqs = FALSE, verbose = TRUE, ...) { - physeq <- clean_pq( - physeq, - force_taxa_as_columns = TRUE, - remove_empty_samples = TRUE, - remove_empty_taxa = FALSE, - clean_samples_names = FALSE, - silent = TRUE - ) + physeq <- taxa_as_columns(physeq) if (dist_method %in% c("aitchison", "robust.aitchison")) { phy_dist <- @@ -174,7 +167,7 @@ adonis_pq <- function(physeq, termf <- stats::terms(.formula) term_lab <- attr(termf, "term.labels")[attr(termf, "order") == 1] - verify_pq(physeq, ...) + verify_pq(physeq) if (na_remove) { new_physeq <- physeq @@ -220,6 +213,117 @@ adonis_pq <- function(physeq, } ################################################################################ +################################################################################ +#' Permanova (adonis) on permutations of rarefaction even depth +#' +#' @description +#' `r lifecycle::badge("experimental")` +#' +#' @inheritParams adonis_pq +#' @param nperm (int) The number of permutations to perform. +#' @param progress_bar (logical, default TRUE) Do we print progress during +#' the calculation. +#' @param quantile_prob (float, `[0:1]`) the value to compute the quantile. +#' Minimum quantile is compute using 1-quantile_prob. +#' @param sample.size (int) A single integer value equal to the number of +#' reads being simulated, also known as the depth. See +#' [phyloseq::rarefy_even_depth()]. +#' @param ... Other params for be passed on to [adonis_pq()] function +#' +#' @return A list of three dataframe representing the mean, the minimum quantile +#' and the maximum quantile value for adonis results. See [adonis_pq()]. +#' @export +#' @author Adrien Taudière +#' @seealso [adonis_pq()] +#' @examples +#' if (requireNamespace("vegan")) { +#' data_fungi_woNA <- +#' subset_samples(data_fungi, !is.na(Time) & !is.na(Height)) +#' adonis_rarperm_pq(data_fungi_woNA, "Time*Height", na_remove = TRUE, nperm = 3) +#' } +adonis_rarperm_pq <- function(physeq, + formula, + dist_method = "bray", + merge_sample_by = NULL, + na_remove = FALSE, + rarefy_nb_seqs = FALSE, + verbose = TRUE, + nperm = 99, + progress_bar = TRUE, + quantile_prob = 0.975, + sample.size = min(sample_sums(physeq)), + ...) { + res_perm <- list() + if (progress_bar) { + pb <- txtProgressBar( + min = 0, + max = nperm, + style = 3, + width = 50, + char = "=" + ) + } + for (i in 1:nperm) { + res_perm[[i]] <- + adonis_pq( + rarefy_even_depth( + physeq, + rngseed = i, + sample.size = sample.size, + verbose = verbose + ), + formula, + dist_method = dist_method, + merge_sample_by = merge_sample_by, + na_remove = na_remove, + correction_for_sample_size = FALSE, + rarefy_nb_seqs = rarefy_nb_seqs, + verbose = verbose, + sample.size = sample.size, + ... + ) + if (progress_bar) { + setTxtProgressBar(pb, i) + } + } + res_adonis <- list() + res_adonis[["mean"]] <- + apply(array(unlist(res_perm), c(dim( + as.data.frame(res_perm[[1]]) + ), nperm)), c(1, 2), mean) + colnames(res_adonis[["mean"]]) <- colnames(res_perm[[1]]) + rownames(res_adonis[["mean"]]) <- rownames(res_perm[[1]]) + + + res_adonis[["quantile_min"]] <- + apply( + array(unlist(res_perm), c(dim( + as.data.frame(res_perm[[1]]) + ), nperm)), + c(1, 2), + quantile, + na.rm = TRUE, + probs = 1 - quantile_prob + ) + colnames(res_adonis[["quantile_min"]]) <- colnames(res_perm[[1]]) + rownames(res_adonis[["quantile_min"]]) <- rownames(res_perm[[1]]) + + res_adonis[["quantile_max"]] <- + apply( + array(unlist(res_perm), c(dim( + as.data.frame(res_perm[[1]]) + ), nperm)), + c(1, 2), + quantile, + na.rm = TRUE, + probs = quantile_prob + ) + colnames(res_adonis[["quantile_max"]]) <- colnames(res_perm[[1]]) + rownames(res_adonis[["quantile_max"]]) <- rownames(res_perm[[1]]) + + return(res_adonis) +} +################################################################################ ################################################################################ #' @title Compute and test local contributions to beta diversity (LCBD) of @@ -258,14 +362,7 @@ adonis_pq <- function(physeq, LCBD_pq <- function(physeq, p_adjust_method = "BH", ...) { - physeq <- clean_pq( - physeq, - force_taxa_as_columns = TRUE, - remove_empty_samples = TRUE, - remove_empty_taxa = FALSE, - clean_samples_names = FALSE, - silent = TRUE - ) + physeq <- taxa_as_columns(physeq) mat <- as.matrix(unclass(physeq@otu_table)) resBeta <- adespatial::beta.div(mat, adj = FALSE, ...) @@ -537,10 +634,7 @@ multipatt_pq <- function(physeq, pval = 0.05, control = permute::how(nperm = 999), ...) { - physeq <- clean_pq(physeq, - clean_samples_names = FALSE, - force_taxa_as_columns = TRUE - ) + physeq <- taxa_as_columns(physeq) res <- indicspecies::multipatt(as.matrix(physeq@otu_table), @@ -955,7 +1049,7 @@ taxa_only_in_one_level <- function(physeq, ################################################################################ ################################################################################ -#' Distribution of sequences across a factor for one taxa +#' Distribution of sequences across a factor for one taxon #' #' @description #' `r lifecycle::badge("experimental")` @@ -982,7 +1076,7 @@ taxa_only_in_one_level <- function(physeq, #' distri_1_taxa(data_fungi, "Time", "ASV81", digits = 1) #' @importFrom stats sd distri_1_taxa <- function(physeq, fact, taxa_name, digits = 2) { - physeq <- clean_pq(physeq, force_taxa_as_rows = TRUE) + physeq <- taxa_as_rows(physeq) df <- data.frame( "nb_seq" = tapply( @@ -1014,3 +1108,298 @@ distri_1_taxa <- function(physeq, fact, taxa_name, digits = 2) { return(df) } ################################################################################ + +################################################################################ +#' Partition the Variation of a phyloseq object by 2, 3, or 4 Explanatory Matrices +#' @description +#' `r lifecycle::badge("experimental")` +#' The function partitions the variation in otu_table using +#' distance (Bray per default) with respect to two, three, or four explanatory +#' tables, using +#' adjusted R² in redundancy analysis ordination (RDA) or distance-based +#' redundancy analysis. If response is a single vector, partitioning is by +#' partial regression. Collinear variables in the explanatory tables do NOT +#' have to be removed prior to partitioning. See [vegan::varpart()] for more +#' information. +#' +#' @inheritParams clean_pq +#' @param list_component (required) A named list of 2, 3 or four vectors with +#' names from the `@sam_data` slot. +#' @param dist_method (default "bray") the distance used. See +#' [phyloseq::distance()] for all available distances or run +#' [phyloseq::distanceMethodList()]. +#' For "aitchison" and "robust.aitchison" distance, [vegan::vegdist()] +#' function is directly used. +#' @param dbrda_computation (logical) Do dbrda computations are runned for each +#' individual component (each name of the list component) ? +#' +#' @return an object of class "varpart", see [vegan::varpart()] +#' @export +#' @author Adrien Taudière +#' @seealso [var_par_rarperm_pq()], [vegan::varpart()], [plot_var_part_pq()] +#' @examples +#' \donttest{ +#' if (requireNamespace("vegan")) { +#' data_fungi_woNA <- +#' subset_samples(data_fungi, !is.na(Time) & !is.na(Height)) +#' res_var <- var_par_pq(data_fungi_woNA, +#' list_component = list( +#' "Time" = c("Time"), +#' "Size" = c("Height", "Diameter") +#' ), +#' dbrda_computation = TRUE +#' ) +#' } +#' } +#' @details +#' This function is mainly a wrapper of the work of others. +#' Please make a reference to `vegan::varpart()` if you +#' use this function. +var_par_pq <- + function(physeq, + list_component, + dist_method = "bray", + dbrda_computation = TRUE) { + physeq <- taxa_as_columns(physeq) + + verify_pq(physeq) + + if (dist_method %in% c("robust.aitchison", "aitchison")) { + dist_physeq <- + vegan::vegdist(as(otu_table(physeq, taxa_are_rows = FALSE), "matrix"), + method = dist_method + ) + } else { + dist_physeq <- phyloseq::distance(physeq, method = dist_method) + } + + for (i in 1:length(list_component)) { + assign( + names(list_component)[i], + as.data.frame(unclass(physeq@sam_data[, list_component[[i]]])) + ) + } + + if (length(list_component) == 2) { + res_varpart <- + vegan::varpart( + dist_physeq, + eval(sym(names(list_component)[1])), + eval(sym(names(list_component)[2])) + ) + } else if (length(list_component) == 3) { + res_varpart <- + vegan::varpart( + dist_physeq, + eval(sym(names(list_component)[1])), + eval(sym(names(list_component)[2])), + eval(sym(names(list_component)[3])) + ) + } else if (length(list_component) == 4) { + res_varpart <- + vegan::varpart( + dist_physeq, + eval(sym(names(list_component)[1])), + eval(sym(names(list_component)[2])), + eval(sym(names(list_component)[3])), + eval(sym(names(list_component)[4])) + ) + } else { + stop("The list_component must be of length 2, 3 or 4") + } + + if (dbrda_computation) { + res_varpart$dbrda_result <- list() + for (i in 1:length(list_component)) { + res_varpart$dbrda_result[[i]] <- + anova(vegan::dbrda( + as.formula(paste0( + "dist_physeq ~ ", paste(c(list_component[[i]]), collapse = " + ") + )), + data = as.data.frame(unclass( + physeq@sam_data + )) + )) + } + } + res_varpart$Xnames <- names(list_component) + return(res_varpart) + } +################################################################################ + + +################################################################################ +#' Partition the Variation of a phyloseq object with rarefaction permutations +#' @description +#' `r lifecycle::badge("experimental")` +#' +#' This is an extension of the function [var_par_pq()]. The main addition is +#' the computation of nperm permutations with rarefaction even depth by +#' sample. The return object +#' +#' +#' @inheritParams clean_pq +#' @param list_component (required) A named list of 2, 3 or four vectors with +#' names from the `@sam_data` slot. +#' @param dist_method (default "bray") the distance used. See +#' [phyloseq::distance()] for all available distances or run +#' [phyloseq::distanceMethodList()]. +#' For aitchison and robust.aitchison distance, [vegan::vegdist()] +#' function is directly used.#' @param fill_bg +#' @param nperm (int) The number of permutations to perform. +#' @param quantile_prob (float, `[0:1]`) the value to compute the quantile. +#' Minimum quantile is compute using 1-quantile_prob. +#' @param dbrda_computation (logical) Do dbrda computations are runned for each +#' individual component (each name of the list component) ? +#' @param dbrda_signif_pval (float, `[0:1]`) The value under which the dbrda is +#' considered significant. +#' @param sample.size (int) A single integer value equal to the number of +#' reads being simulated, also known as the depth. See +#' [phyloseq::rarefy_even_depth()]. +#' @param verbose (logical). If TRUE, print additional informations. +#' @param progress_bar (logical, default TRUE) Do we print progress during +#' the calculation? +#' +#' @return A list of class varpart with additional information in the +#' `$part$indfract` part. Adj.R.square is the mean across permutation. +#' Adj.R.squared_quantil_min and Adj.R.squared_quantil_max represent +#' the quantile values of adjuste R squared +#' @export +#' @seealso [var_par_pq()], [vegan::varpart()], [plot_var_part_pq()] +#' @author Adrien Taudière +#' @examples +#' \donttest{ +#' if (requireNamespace("vegan")) { +#' data_fungi_woNA <- subset_samples(data_fungi, !is.na(Time) & !is.na(Height)) +#' res_var_9 <- var_par_rarperm_pq( +#' data_fungi_woNA, +#' list_component = list( +#' "Time" = c("Time"), +#' "Size" = c("Height", "Diameter") +#' ), +#' nperm = 9, +#' dbrda_computation = TRUE +#' ) +#' res_var_2 <- var_par_rarperm_pq( +#' data_fungi_woNA, +#' list_component = list( +#' "Time" = c("Time"), +#' "Size" = c("Height", "Diameter") +#' ), +#' nperm = 2, +#' dbrda_computation = TRUE +#' ) +#' } +#' } +#' @details +#' This function is mainly a wrapper of the work of others. +#' Please make a reference to `vegan::varpart()` if you +#' use this function. +var_par_rarperm_pq <- + function(physeq, + list_component, + dist_method = "bray", + nperm = 99, + quantile_prob = 0.975, + dbrda_computation = FALSE, + dbrda_signif_pval = 0.05, + sample.size = min(sample_sums(physeq)), + verbose = FALSE, + progress_bar = TRUE) { + physeq <- taxa_as_columns(physeq) + verify_pq(physeq) + + if (progress_bar) { + pb <- txtProgressBar( + min = 0, + max = nperm, + style = 3, + width = 50, + char = "=" + ) + } + + if (dist_method %in% c("robust.aitchison", "aitchison")) { + dist_physeq <- + vegdist(as(otu_table(physeq, taxa_are_rows = FALSE), "matrix"), method = dist_method) + } else { + dist_physeq <- phyloseq::distance(physeq, method = dist_method) + } + + res_perm <- list() + for (i in 1:nperm) { + res_perm[[i]] <- + var_par_pq( + physeq = + rarefy_even_depth( + physeq, + rngseed = i, + sample.size = sample.size, + verbose = verbose + ), + list_component = list_component, + dist_method = dist_method, + dbrda_computation = dbrda_computation + ) + + if (progress_bar) { + setTxtProgressBar(pb, i) + } + } + res_varpart <- var_par_pq( + physeq = physeq, + list_component = list_component, + dist_method = dist_method, + dbrda_computation = dbrda_computation + ) + + if (dbrda_computation) { + res_varpart$dbrda_result_prop_pval_signif <- + rowSums(sapply(res_perm, function(x) { + sapply(x$dbrda_result, function(xx) { + xx$`Pr(>F)`[[1]] + }) + }) < dbrda_signif_pval) / nperm + } + + + res_varpart$part$indfract$R.square <- + rowMeans(sapply(res_perm, function(x) { + (x$part$indfract$R.square) + })) + res_varpart$part$indfract$R.square_quantil_max <- + apply(sapply(res_perm, function(x) { + (x$part$indfract$R.square) + }), 1, function(xx) { + quantile(xx, probs = quantile_prob, na.rm = TRUE) + }) + res_varpart$part$indfract$R.square_quantil_min <- + apply(sapply(res_perm, function(x) { + (x$part$indfract$R.square) + }), 1, function(xx) { + quantile(xx, probs = 1 - quantile_prob, na.rm = TRUE) + }) + + res_varpart$part$indfract$Adj.R.square <- + rowMeans(sapply(res_perm, function(x) { + (x$part$indfract$Adj.R.square) + })) + res_varpart$part$indfract$Adj.R.squared_quantil_max <- + apply(sapply(res_perm, function(x) { + (x$part$indfract$Adj.R.square) + }), 1, function(xx) { + quantile(xx, probs = quantile_prob, na.rm = TRUE) + }) + res_varpart$part$indfract$Adj.R.squared_quantil_min <- + apply(sapply(res_perm, function(x) { + (x$part$indfract$Adj.R.square) + }), 1, function(xx) { + quantile(xx, probs = 1 - quantile_prob, na.rm = TRUE) + }) + return(res_varpart) + + + res_varpart$Xnames <- names(list_component) + return(res_varpart) + } +################################################################################ diff --git a/R/blast.R b/R/blast.R index d813bc6d..40861fbc 100644 --- a/R/blast.R +++ b/R/blast.R @@ -331,7 +331,7 @@ blast_pq <- function(physeq, ################################################################################ -#' Filter undesirable taxa using blast against a against a custom database. +#' Filter undesirable taxa using blast against a custom database. #' #' `r lifecycle::badge("experimental")` #' diff --git a/R/controls.R b/R/controls.R index 48619374..3393f33c 100644 --- a/R/controls.R +++ b/R/controls.R @@ -176,7 +176,7 @@ subset_taxa_tax_control <- } for (i in seq_along(sample_names(physeq))) { - # for each samples + # for each sample if (method %in% c("min", "max", "mean", "cutoff_mixt")) { find_cutoff <- function(proba = 0.5, il = index_lower) { diff --git a/R/dada_phyloseq.R b/R/dada_phyloseq.R index 6667ec49..32d096e7 100644 --- a/R/dada_phyloseq.R +++ b/R/dada_phyloseq.R @@ -424,7 +424,7 @@ track_wkflow <- function(list_of_objects, ################################################################################ #' Track the number of reads (= sequences), samples and cluster (e.g. ASV) -#' for each samples. +#' for each sample #' #' @description #' `r lifecycle::badge("experimental")` @@ -1215,7 +1215,7 @@ mumu_pq <- function(physeq, stderr = TRUE ) otu_tab <- - data.frame(unclass(clean_pq(physeq, force_taxa_as_rows = TRUE)@otu_table)) + data.frame(unclass(taxa_as_rows(physeq)@otu_table)) otu_tab <- cbind("ASV" = rownames(otu_tab), otu_tab) write.table( otu_tab, @@ -2530,12 +2530,12 @@ taxa_only_in_one_level <- function(physeq, ################################################################################ -#' Normalize otu table using samples depth +#' Normalize OTU table using samples depth #' @description #' `r lifecycle::badge("experimental")` #' #' This function implement the method proposed by -#' [McKnight et al. 2018](https://doi.org/10.5061/dryad.tn8qs35) +#' McKnight et al. 2018 (\doi{doi:10.5061/dryad.tn8qs35}) #' #' @inheritParams clean_pq #' @@ -2587,7 +2587,7 @@ normalize_prop_pq <- function(physeq, base_log = 2, constante = 10000, digits = ################################################################################ -#' Build a samples information data.frame from physeq object +#' Build a sample information tibble from physeq object #' #' @description #' `r lifecycle::badge("experimental")` @@ -2610,9 +2610,12 @@ normalize_prop_pq <- function(physeq, base_log = 2, constante = 10000, digits = #' is grouped by samples with abundance summed across OTU. #' @param rarefy_by_sample (logical, default FALSE) If TRUE, rarefy #' samples using [phyloseq::rarefy_even_depth()] function. +#' @param taxa_ranks A vector of taxonomic ranks. For examples c("Family","Genus"). +#' If taxa ranks is not set (default value = NULL), taxonomic information are not +#' present in the resulting tibble. #' @author Adrien Taudière #' @export -#' @return A tibble with a row for each samples. Columns provide information +#' @return A tibble with a row for each sample. Columns provide information #' from `sam_data` slot as well as hill numbers, Abundance (nb of sequences), #' and Abundance_log10 (*log10(1+Abundance)*). #' @examples @@ -2620,18 +2623,18 @@ normalize_prop_pq <- function(physeq, base_log = 2, constante = 10000, digits = #' psm_tib <- psmelt_samples_pq(data_fungi_mini, hill_scales = c(0, 2, 7)) #' ggstatsplot::ggbetweenstats(psm_tib, Height, Hill_0) #' ggstatsplot::ggbetweenstats(psm_tib, Height, Hill_7) -#' ggstatsplot::ggscatterstats(psmelt_samples_pq(data_fungi_mini, -#' filter_zero = TRUE -#' ), Abundance_log10, Hill_1) -#' ggstatsplot::ggscatterstats(psmelt_samples_pq(data_fungi, -#' rarefy_by_sample = TRUE -#' ), Time, Hill_1) +#' +#' psm_tib_tax <- psmelt_samples_pq(data_fungi_mini, taxa_ranks = c("Class", "Family")) +#' ggplot(filter(psm_tib_tax, Abundance > 2000), aes(y = Family, x = Abundance, fill = Time)) + +#' geom_bar(stat = "identity") + +#' facet_wrap(~Height) #' } psmelt_samples_pq <- function(physeq, hill_scales = c(0, 1, 2), filter_zero = TRUE, - rarefy_by_sample = FALSE) { + rarefy_by_sample = FALSE, + taxa_ranks = NULL) { verify_pq(physeq) if (rarefy_by_sample) { physeq <- rarefy_even_depth(physeq) @@ -2640,17 +2643,56 @@ psmelt_samples_pq <- if (filter_zero) { psm <- psm |> filter(Abundance > 0) } - psm <- psm |> - select(Sample, OTU, Abundance, colnames(physeq@sam_data)) + if (is.null(taxa_ranks)) { + psm <- psm |> + select(Sample, OTU, Abundance, colnames(physeq@sam_data)) + nb_distinct_samp <- psm |> + group_by(Sample) |> + select(-OTU, -Abundance) |> + distinct() |> + nrow() + + if (nsamples(physeq) != nb_distinct_samp) { + stop("The number of samples in physeq is different from the resulting + number in psm tibble.") + } + } else { + psm <- psm |> + select(Sample, OTU, Abundance, colnames(physeq@sam_data), !!taxa_ranks) + } + + if (is.null(taxa_ranks)) { + psm_samp <- psm |> + select(-OTU) |> + group_by(Sample) |> + summarise( + Abundance = sum(Abundance), + across(where(is.numeric) & + !Abundance, ~ mean(.x, na.rm = TRUE)), + across(where(is.character), ~ .x[1]) + ) + } else { + psm_temp <- psm |> + select(-OTU) |> + group_by(Sample) + + for (i in seq_along(taxa_ranks)) { + psm_temp <- psm_temp |> + group_by(.data[[taxa_ranks[[i]]]], .add = TRUE) + } + + psm_samp <- psm_temp |> + summarise( + Abundance = sum(Abundance), + across(where(is.numeric) & + !Abundance, ~ mean(.x, na.rm = TRUE)), + across(where(is.character), ~ .x[1]), + .groups = "drop" + ) + } if (!is.null(hill_scales)) { - physeq <- clean_pq( - physeq, - force_taxa_as_rows = TRUE, - remove_empty_samples = FALSE, - remove_empty_taxa = FALSE, - clean_samples_names = FALSE - ) + physeq <- taxa_as_rows(physeq) df_hill <- vegan::renyi(t(physeq)@otu_table, scales = hill_scales, @@ -2660,30 +2702,120 @@ psmelt_samples_pq <- colnames(df_hill) <- paste0("Hill_", hill_scales) df_hill$Sample <- rownames(df_hill) - psm <- full_join(psm, df_hill) - } - nb_distinct_samp <- psm |> - group_by(Sample) |> - select(-OTU, -Abundance) |> - distinct() |> - nrow() - - if (nsamples(physeq) != nb_distinct_samp) { - stop("The number of samples in physeq is different from the resulting - number in psm tibble.") + psm_samp <- full_join(psm_samp, df_hill) } - psm <- psm |> - select(-OTU) |> - group_by(Sample) |> - summarise( - Abundance = sum(Abundance), - across(where(is.numeric) & - !Abundance, ~ mean(.x, na.rm = TRUE)), - across(where(is.character), ~ .x[1]) - ) + psm_samp <- psm_samp |> mutate(Abundance_log10 = log10(1 + Abundance)) + return(tibble(psm_samp)) + } +################################################################################ + +################################################################################ +#' Force taxa to be in columns in the otu_table of a physeq object +#' +#' @description +#' `r lifecycle::badge("maturing")` +#' @inheritParams clean_pq +#' @author Adrien Taudière +#' @export +#' @return A new \code{\link{phyloseq-class}} object +taxa_as_columns <- function(physeq) { + physeq <- clean_pq( + physeq, + clean_samples_names = FALSE, + remove_empty_samples = FALSE, + remove_empty_taxa = FALSE, + force_taxa_as_columns = TRUE, + silent = TRUE + ) + return(physeq) +} +################################################################################ - psm <- psm |> mutate(Abundance_log10 = log10(1 + Abundance)) - return(tibble(psm)) +################################################################################ +#' Force taxa to be in columns in the otu_table of a physeq object +#' +#' @description +#' `r lifecycle::badge("maturing")` +#' @inheritParams clean_pq +#' @author Adrien Taudière +#' @export +#' @return A new \code{\link{phyloseq-class}} object +taxa_as_rows <- function(physeq) { + physeq <- clean_pq( + physeq, + clean_samples_names = FALSE, + remove_empty_samples = FALSE, + remove_empty_taxa = FALSE, + force_taxa_as_rows = TRUE, + silent = TRUE + ) + return(physeq) +} +################################################################################ + +################################################################################ +#' Rarefy (equalize) the number of samples per modality of a factor +#' @description +#' `r lifecycle::badge("experimental")` +#' +#' @inheritParams clean_pq +#' @param fact (required): The variable to rarefy. Must be present in +#' the `sam_data` slot of the physeq object. +#' @param rngseed (Optional). A single integer value passed to set.seed, +#' which is used to fix a seed for reproducibly random number generation +#' (in this case, reproducibly random subsampling). If set to FALSE, then no +#' iddling with the RNG seed is performed, +#' and it is up to the user to appropriately call +#' @param verbose (logical). If TRUE, print additional informations. +#' @export +#' @author Adrien Taudière +#' @return A new \code{\link{phyloseq-class}} object. +#' @seealso [accu_plot_balanced_modality()] +#' @examples +#' table(data_fungi_mini@sam_data$Height) +#' data_fungi_mini2 <- rarefy_sample_count_by_modality(data_fungi_mini, "Height") +#' table(data_fungi_mini2@sam_data$Height) +#' if (requireNamespace("patchwork")) { +#' ggvenn_pq(data_fungi_mini, "Height") + ggvenn_pq(data_fungi_mini2, "Height") +#' } +rarefy_sample_count_by_modality <- + function(physeq, fact, rngseed = FALSE, verbose = TRUE) { + if (as(rngseed, "logical")) { + set.seed(rngseed) + if (verbose) { + message("`set.seed(", rngseed, ")` was used to initialize repeatable random subsampling.") + message("Please record this for your records so others can reproduce.") + message("Try `set.seed(", rngseed, "); .Random.seed` for the full vector", + sep = "" + ) + message("...") + } + } else if (verbose) { + message( + "You set `rngseed` to FALSE. Make sure you've set & recorded\n", + " the random seed of your session for reproducibility.\n", + "See `?set.seed`\n" + ) + message("...") + } + mod <- physeq@sam_data[[fact]] + n_mod <- table(mod) + samples_names <- sample_names(physeq) + samp_to_keep <- c() + for (modality in levels(as.factor(mod))) { + samp_to_keep <- + c( + samp_to_keep, + sample( + as.numeric(grep(modality, mod)), + size = min(n_mod), + replace = FALSE + ) + ) + } + new_physeq <- + subset_samples_pq(physeq, 1:nsamples(physeq) %in% samp_to_keep) + return(new_physeq) } ################################################################################ diff --git a/R/data.R b/R/data.R index cafcf309..cafef09a 100644 --- a/R/data.R +++ b/R/data.R @@ -65,8 +65,8 @@ "data_fungi_mini" -#' This tutorial explore the dataset from Tengeler et al. (2020) available in the `mia` package. -#' obtain using `mia::makePhyloseqFromTreeSE(Tengeler2020)` +#' This tutorial explores the dataset from Tengeler et al. (2020) available in the `mia` package. +#' obtained using `mia::makePhyloseqFromTreeSE(Tengeler2020)` #' #' This is a phyloseq version of the Tengeler2020 dataset. #' diff --git a/R/miscellanous.R b/R/miscellanous.R index 9025bc0d..84ad4be5 100644 --- a/R/miscellanous.R +++ b/R/miscellanous.R @@ -9,7 +9,7 @@ #' #' @inheritParams clean_pq #' @param min_number (int) the minimum number of sequences to put -#' a 1 in the otu table. +#' a 1 in the OTU table. #' @author Adrien Taudière #' #' @return A \code{physeq} object with only 0/1 in the OTU table diff --git a/R/plot_functions.R b/R/plot_functions.R index cb61f116..a01d6577 100644 --- a/R/plot_functions.R +++ b/R/plot_functions.R @@ -152,8 +152,7 @@ accu_plot <- transp(funky_color(nlevels(factor_interm) + 1), 0.3) } - plot( - accu_all, + plot(accu_all, # ci_type = "poly", # ci_col = ci_col[1], col = col[1], @@ -164,13 +163,9 @@ accu_plot <- ) for (i in seq_along(levels(factor_interm))) { - graphics::lines( - accu[[i]], - # ci_type = "poly", - # ci_col = ci_col[i + 1], + graphics::lines(accu[[i]], col = col[i + 1], - lwd = lwd # , - # ci_lty = 0 + lwd = lwd ) } if (leg) { @@ -210,7 +205,8 @@ accu_plot <- if (n[length(n)] != tot[i]) { n <- c(n, tot[i]) } - res_interm <- vegan::rarefy(as.matrix(unclass(x[i, ])), n, se = TRUE) + res_interm <- + vegan::rarefy(as.matrix(unclass(x[i, ])), n, se = TRUE) res <- cbind(as.matrix(res_interm)[1, ], as.matrix(res_interm)[2, ]) return(res) @@ -277,6 +273,181 @@ accu_plot <- } ################################################################################ + +################################################################################ +#' Plot accumulation curves with balanced modality and depth rarefaction +#' @description +#' `r lifecycle::badge("experimental")` +#' +#' This function (i) rarefy (equalize) the number of samples per modality of a +#' factor and (ii) rarefy the number of sequences per sample (depth). The +#' seed is set to 1:nperm. Thus, with exacly the same parameter, including +#' nperm values, results must be identical. +#' +#' @inheritParams clean_pq +#' @param fact (required) The variable to rarefy. Must be present in +#' the `sam_data` slot of the physeq object. +#' @param nperm (int) The number of permutations to perform. +#' @param step (int) distance among points calculated to plot lines. +#' A low value give better plot but is more time consuming. +#' @param by.fact (logical, default TRUE) +#' First merge the OTU table by factor to plot only one line by factor +#' @param progress_bar (logical, default TRUE) Do we print progress during +#' the calculation? +#' @param quantile_prob (float, `[0:1]`) the value to compute the quantile. +#' Minimum quantile is compute using 1-quantile_prob. +#' @param rarefy_by_sample_before_merging (logical, default TRUE): +#' rarefy_by_sample_before_merging = FALSE is buggy for the moment.Please +#' only use rarefy_by_sample_before_merging = TRUE +#' @param sample.size (int) A single integer value equal to the number of +#' reads being simulated, also known as the depth. See +#' [phyloseq::rarefy_even_depth()]. +#' @param verbose (logical). If TRUE, print additional informations. +#' @param ... Other params for be passed on to [accu_plot()] function +#' +#' @export +#' @author Adrien Taudière +#' @seealso [accu_plot()], [rarefy_sample_count_by_modality()], [phyloseq::rarefy_even_depth()] +#' +#' @return A ggplot2 plot representing the richness accumulation plot +#' @examples +#' \donttest{ +#' data_fungi_woNA4Time <- +#' subset_samples(data_fungi, !is.na(Time)) +#' data_fungi_woNA4Time@sam_data$Time <- paste0("time-", data_fungi_woNA4Time@sam_data$Time) +#' accu_plot_balanced_modality(data_fungi_woNA4Time, "Time", nperm = 3) +#' +#' data_fungi_woNA4Height <- +#' subset_samples(data_fungi, !is.na(Height)) +#' accu_plot_balanced_modality(data_fungi_woNA4Height, "Height", nperm = 3) +#' } +accu_plot_balanced_modality <- function(physeq, + fact, + nperm = 99, + step = 2000, + by.fact = TRUE, + progress_bar = TRUE, + quantile_prob = 0.975, + rarefy_by_sample_before_merging = TRUE, + sample.size = 1000, + verbose = FALSE, + ...) { + if (rarefy_by_sample_before_merging) { + p_for_dim <- accu_plot( + rarefy_sample_count_by_modality( + rarefy_even_depth( + physeq, + rngseed = 1, + sample.size = sample.size, + verbose = verbose + ), + fact, + rngseed = 1 + ), + fact = fact, + step = step, + by.fact = by.fact + )$data + } else { + p_for_dim <- accu_plot(physeq, + fact = fact, + step = step, + by.fact = by.fact + )$data + dim_for_plist <- + max(tapply(sample_sums(physeq), physeq@sam_data[[fact]], sum)) + } + + dim_for_plist <- dim(p_for_dim) + plist <- array(dim = c(dim_for_plist[1], 5, nperm)) + + if (progress_bar) { + pb <- txtProgressBar( + min = 0, + max = nperm, + style = 3, + width = 50, + char = "=" + ) + } + for (i in 1:nperm) { + if (rarefy_by_sample_before_merging) { + plist[, , i] <- + as.matrix(suppressWarnings(suppressMessages( + accu_plot( + rarefy_sample_count_by_modality( + rarefy_even_depth( + physeq, + rngseed = i, + sample.size = + sample.size, + verbose = verbose + ), + fact, + rngseed = i + ), + fact = fact, + step = step, + by.fact = by.fact, + ... + ) + ))$data[, c(2:4, 6, 7)]) + } else { + res_interm <- + as.matrix(suppressWarnings(suppressMessages( + accu_plot( + rarefy_sample_count_by_modality(physeq, + fact, + rngseed = i, + verbose = verbose + ), + fact = fact, + step = step, + by.fact = by.fact, + ... + ) + ))$data[, c(2:4, 6, 7)]) + plist[1:nrow(res_interm), , i] <- res_interm + } + if (progress_bar) { + setTxtProgressBar(pb, i) + } + } + + res_mean <- data.frame(apply(plist, 1:2, mean, na.rm = TRUE)) + colnames(res_mean) <- colnames(p_for_dim[, c(2:4, 6, 7)]) + res_mean$fact <- p_for_dim$.id + + res_mean$X1_lim1 <- apply(plist, 1:2, function(x) { + quantile(x, probs = quantile_prob, na.rm = TRUE) + })[, 1] + res_mean$X1_lim2 <- apply(plist, 1:2, function(x) { + quantile(x, probs = 1 - quantile_prob, na.rm = TRUE) + })[, 1] + + res_mean$factor <- p_for_dim$.id + + res_mean <- res_mean %>% + dplyr::filter(!is.na(X1)) %>% + arrange(X1) + + p <- ggplot(res_mean, aes(x = x, y = X1, color = factor)) + + geom_line(linewidth = 1.5) + + geom_ribbon( + aes( + ymin = X1_lim1, + ymax = X1_lim2, + fill = factor + ), + alpha = 0.2, + linetype = 2, + linewidth = 0.2 + ) + return(p) +} +################################################################################ + + ################################################################################ #' Compute the number of sequence to obtain a given proportion of ASV in #' accumulation curves @@ -286,7 +457,7 @@ accu_plot <- #' @param res_accuplot the result of the function accu_plot() #' @param threshold the proportion of ASV to obtain in each samples #' -#' @return a value for each samples of the number of sequences needed +#' @return a value for each sample of the number of sequences needed #' to obtain `threshold` proportion of the ASV #' #' @examples @@ -580,7 +751,7 @@ circle_pq <- #' of \code{fact} #' #' @export -#' @seealso \code{\link[networkD3]{sankeyNetwork}} +#' @seealso \code{\link[networkD3]{sankeyNetwork}}, [ggaluv_pq()] sankey_pq <- function(physeq = NULL, @@ -1039,14 +1210,7 @@ ggvenn_pq <- function(physeq = NULL, physeq@sam_data[[fact]] <- as.factor(physeq@sam_data[[fact]]) } - physeq <- clean_pq( - physeq, - force_taxa_as_columns = TRUE, - remove_empty_samples = FALSE, - remove_empty_taxa = FALSE, - clean_samples_names = FALSE, - silent = TRUE - ) + physeq <- taxa_as_columns(physeq) if (rarefy_before_merging) { physeq <- rarefy_even_depth(physeq) @@ -1080,17 +1244,20 @@ ggvenn_pq <- function(physeq = NULL, taxonomic_rank ]))) } - nb_seq <- c(nb_seq, sum(physeq@otu_table[physeq@sam_data[[fact]] == f, ], na.rm = TRUE)) + nb_seq <- + c(nb_seq, sum(physeq@otu_table[physeq@sam_data[[fact]] == f, ], na.rm = TRUE)) } if (max(nb_seq) / min(nb_seq) > 2) { - message(paste0( - "Two modalities differ greatly (more than x2) in their number of sequences (", - max(nb_seq), - " vs ", - min(nb_seq), - "). You may be interested by the parameter rarefy_after_merging" - )) + message( + paste0( + "Two modalities differ greatly (more than x2) in their number of sequences (", + max(nb_seq), + " vs ", + min(nb_seq), + "). You may be interested by the parameter rarefy_after_merging" + ) + ) } if (add_nb_samples) { @@ -1185,7 +1352,8 @@ multiplot <- matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE)) - print(plots[[i]], + print( + plots[[i]], vp = grid::viewport( layout.pos.row = matchidx$row, layout.pos.col = matchidx$col @@ -1215,7 +1383,14 @@ multiplot <- #' @param fact (required): The variable to test. Must be present in #' the `sam_data` slot of the physeq object. #' @param variable : Alias for factor. Kept only for backward compatibility. -#' @param color_fac (optional): The variable to color the barplot +#' @param hill_scales (a vector of integer) The list of q values to compute +#' the hill number H^q. If Null, no hill number are computed. Default value +#' compute the Hill number 0 (Species richness), the Hill number 1 +#' (exponential of Shannon Index) and the Hill number 2 (inverse of Simpson +#' Index). +#' @param color_fac (optional): The variable to color the barplot. For ex. +#' same as fact. Not very useful because ggplot2 plot colors can be +#' change using `scale_color_XXX()` function. #' @param letters (optional, default=FALSE): If set to TRUE, the plot #' show letters based on p-values for comparison. Use the #' \code{\link[multcompView]{multcompLetters}} function from the package @@ -1238,7 +1413,9 @@ multiplot <- #' do not change value of Hill number but only the test associated #' values (including the pvalues). To rarefy samples, you may use the #' function [phyloseq::rarefy_even_depth()]. -#' +#' @param na_remove (logical, default TRUE) Do we remove samples with NA in +#' the factor fact ? Note that na_remove is always TRUE when using +#' letters = TRUE #' @return Either an unique ggplot2 object (if one_plot is TRUE) or #' a list of 4 ggplot2 plot: #' - plot_Hill_0 : the boxplot of Hill number 0 (= species richness) @@ -1253,214 +1430,156 @@ multiplot <- #' @author Adrien Taudière #' @examples #' -#' p <- hill_pq(data_fungi_mini, "Height") +#' p <- hill_pq(data_fungi_mini, "Height", hill_scales = 1:2) #' p_h1 <- p[[1]] + theme(legend.position = "none") #' p_h2 <- p[[2]] + theme(legend.position = "none") -#' p_h3 <- p[[3]] + theme(legend.position = "none") -#' multiplot(plotlist = list(p_h1, p_h2, p_h3, p[[4]]), cols = 4) -#' +#' multiplot(plotlist = list(p_h1, p_h2, p[[3]]), cols = 4) #' \donttest{ #' if (requireNamespace("multcompView")) { +#' p2 <- hill_pq(data_fungi, "Time", +#' correction_for_sample_size = FALSE, +#' letters = TRUE, add_points = TRUE, plot_with_tuckey = FALSE +#' ) +#' if (requireNamespace("patchwork")) { +#' patchwork::wrap_plots(p2, guides = "collect") +#' } #' # Artificially modify data_fungi to force alpha-diversity effect #' data_fungi_modif <- clean_pq(subset_samples_pq(data_fungi, !is.na(data_fungi@sam_data$Height))) #' data_fungi_modif@otu_table[data_fungi_modif@sam_data$Height == "High", ] <- #' data_fungi_modif@otu_table[data_fungi_modif@sam_data$Height == "High", ] + #' sample(c(rep(0, ntaxa(data_fungi_modif) / 2), rep(100, ntaxa(data_fungi_modif) / 2))) -#' p2 <- hill_pq(data_fungi_modif, "Height", letters = TRUE) +#' p3 <- hill_pq(data_fungi_modif, "Height", letters = TRUE) +#' p3[[1]] #' } #' } #' @seealso [psmelt_samples_pq()] and [ggbetween_pq()] -hill_pq <- - function(physeq, - fact = NULL, - variable = NULL, - color_fac = NA, - letters = FALSE, - add_points = FALSE, - add_info = TRUE, - one_plot = FALSE, - plot_with_tuckey = TRUE, - correction_for_sample_size = TRUE) { - if (!is.null(variable)) { - if (!is.null(fact)) { - stop("You must set only one parameter of variable or fact. This 2 - parameters are strictly equivalent.") - } +hill_pq <- function(physeq, + fact = NULL, + variable = NULL, + hill_scales = c(0, 1, 2), + color_fac = NA, + letters = FALSE, + add_points = FALSE, + add_info = TRUE, + one_plot = FALSE, + plot_with_tuckey = TRUE, + correction_for_sample_size = TRUE, + na_remove = TRUE) { + if (!is.null(variable)) { + if (!is.null(fact)) { + stop( + "You must set only one parameter of variable or fact. This 2 + parameters are strictly equivalent." + ) } else { - if (!is.null(fact)) { - variable <- fact - } else { - stop("You must set the parameter fact.") - } + variable_fac <- variable } - var <- sym(variable) - if (is.na(color_fac)) { - color_fac <- sym(variable) + } else { + if (!is.null(fact)) { + variable_fac <- fact } else { - color_fac <- sym(color_fac) + stop("You must set the parameter fact.") } + } + var <- sym(variable_fac) + if (is.na(color_fac)) { + color_fac <- sym(variable_fac) + } else { + color_fac <- sym(color_fac) + } - physeq <- clean_pq( - physeq, - force_taxa_as_rows = TRUE, - remove_empty_samples = FALSE, - remove_empty_taxa = FALSE, - clean_samples_names = FALSE - ) + physeq <- taxa_as_rows(physeq) + if (na_remove || letters) { + physeq <- subset_samples_pq(physeq, !is.na(physeq@sam_data[[fact]])) + } + physeq@sam_data[[fact]] <- as.factor(physeq@sam_data[[fact]]) - otu_hill <- - vegan::renyi(t(physeq)@otu_table, - scales = c(0, 1, 2), - hill = TRUE - ) - colnames(otu_hill) <- c("Hill_0", "Hill_1", "Hill_2") - df_hill <- data.frame(otu_hill, physeq@sam_data) - df_hill[, 1:3] <- apply(df_hill[, 1:3], 2, as.numeric) + otu_hill <- + vegan::renyi(t(physeq)@otu_table, + scales = hill_scales, + hill = TRUE + ) + colnames(otu_hill) <- paste0("Hill_", hill_scales) - p_var <- hill_tuckey_pq(physeq, variable, correction_for_sample_size = correction_for_sample_size) + df_hill <- data.frame(otu_hill, physeq@sam_data) + df_hill[, seq_along(hill_scales)] <- + apply(df_hill[, seq_along(hill_scales)], 2, as.numeric) - p_0 <- ggplot(df_hill, aes(group = !!var, Hill_0)) + - geom_boxplot(outlier.size = 2, aes(colour = as.factor(!!color_fac), y = !!var)) + - labs(x = "Richness (Hill 0)") - p_1 <- ggplot(df_hill, aes(group = !!var, Hill_1)) + - geom_boxplot(outlier.size = 2, aes(colour = as.factor(!!color_fac), y = !!var)) + - labs(x = "Shannon (Hill 1)") - p_2 <- ggplot(df_hill, aes(group = !!var, Hill_2)) + + p_var <- + hill_tuckey_pq( + physeq, + modality = variable_fac, + hill_scales = hill_scales, + correction_for_sample_size = correction_for_sample_size + ) + p_list <- list() + for (i in seq_along(hill_scales)) { + p_list[[i]] <- + ggplot(df_hill, aes(group = !!var, .data[[paste0("Hill_", hill_scales[[i]])]])) + geom_boxplot(outlier.size = 2, aes(colour = as.factor(!!color_fac), y = !!var)) + - labs(x = "Simpson (Hill 2)") - + labs(x = paste0("Hill_", hill_scales[[i]])) if (add_points) { - p_0 <- - p_0 + geom_jitter(aes(y = !!var, colour = as.factor(!!color_fac)), alpha = 0.5) - p_1 <- - p_1 + geom_jitter(aes(y = !!var, colour = as.factor(!!color_fac)), alpha = 0.5) - p_2 <- - p_2 + geom_jitter(aes(y = !!var, colour = as.factor(!!color_fac)), alpha = 0.5) + p_list[[i]] <- + p_list[[i]] + geom_jitter(aes(y = !!var, colour = as.factor(!!color_fac)), alpha = 0.5) } - if (add_info) { subtitle_plot <- paste0( "Nb of samples: '", - paste0(names(table(physeq@sam_data[[variable]])), + paste0( + names(table(physeq@sam_data[[variable_fac]])), sep = "' : ", - table(physeq@sam_data[[variable]]), collapse = " - '" + table(physeq@sam_data[[variable_fac]]), + collapse = " - '" ) ) - p_0 <- p_0 + labs(subtitle = subtitle_plot) - p_1 <- p_1 + labs(subtitle = subtitle_plot) - p_2 <- p_2 + labs(subtitle = subtitle_plot) + p_list[[i]] <- p_list[[i]] + labs(subtitle = subtitle_plot) } - if (letters) { - ### HILL 0 - data_h0 <- - p_var$data[grep("Hill Number 0", p_var$data[, 5]), ] - data_h0_pval <- data_h0$p.adj - names(data_h0_pval) <- data_h0$modality + data_h <- + p_var$data[grep(paste0("Hill_", hill_scales[[i]]), p_var$data[, 5]), ] + data_h_pval <- data_h$`p adj` + names(data_h_pval) <- data_h$modality Letters <- - multcompView::multcompLetters(data_h0_pval, reversed = TRUE)$Letters + multcompView::multcompLetters(data_h_pval, reversed = TRUE)$Letters dt <- data.frame(variab = names(Letters), Letters = Letters) names(dt) <- c(var, "Letters") - data_letters <- p_0$data %>% + data_letters <- p_list[[i]]$data %>% group_by(!!var) %>% - summarize(max_Hill = max(Hill_0)) %>% + summarize(pos_letters = max(.data[[paste0("Hill_", hill_scales[[i]])]]) + 1) %>% inner_join(dt) - p_0 <- p_0 + + p_list[[i]] <- p_list[[i]] + geom_label( data = data_letters, aes( - x = max_Hill + 1, + x = pos_letters, label = Letters ), - y = ggplot_build(p_0)$data[[1]]$y, - size = 4, - stat = "unique", - parse = TRUE - ) - - ### HILL 1 - data_h1 <- - p_var$data[grep("Hill Number 1", p_var$data[, 5]), ] - data_h1_pval <- data_h1$p.adj - names(data_h1_pval) <- data_h1$modality - Letters <- - multcompView::multcompLetters(data_h1_pval, reversed = TRUE)$Letters - - dt <- data.frame(variab = names(Letters), Letters = Letters) - names(dt) <- c(var, "Letters") - data_letters <- p_1$data %>% - group_by(!!var) %>% - summarize(max_Hill = max(Hill_1)) %>% - inner_join(dt) - - p_1 <- p_1 + - geom_label( - data = data_letters, - aes( - x = max_Hill + 1, - label = Letters - ), - y = ggplot_build(p_0)$data[[1]]$y, - size = 4, - stat = "unique", - parse = TRUE - ) - - ### HILL 2 - data_h2 <- - p_var$data[grep("Hill Number 2", p_var$data[, 5]), ] - data_h2_pval <- data_h2$p.adj - names(data_h2_pval) <- data_h2$modality - Letters <- - multcompView::multcompLetters(data_h2_pval, reversed = TRUE)$Letters - - dt <- data.frame(variab = names(Letters), Letters = Letters) - names(dt) <- c(var, "Letters") - data_letters <- p_2$data %>% - group_by(!!var) %>% - summarize(max_Hill = max(Hill_2)) %>% - inner_join(dt) - - p_2 <- p_2 + - geom_label( - data = data_letters, - aes( - x = max_Hill + 1, - label = Letters - ), - y = ggplot_build(p_0)$data[[1]]$y, + y = ggplot_build(p_list[[i]])$data[[1]]$y, size = 4, stat = "unique", parse = TRUE ) } + } - res <- list( - "plot_Hill_0" = p_0, - "plot_Hill_1" = p_1, - "plot_Hill_2" = p_2, - "plot_tuckey" = p_var - ) + res <- p_list + if (plot_with_tuckey) { + res[["tuckey"]] <- p_var + } - if (one_plot) { - requireNamespace("patchwork", quietly = TRUE) - if (letters || !plot_with_tuckey) { - res <- ((p_0 + theme(legend.position = "none")) + labs(subtitle = element_blank()) + - (p_1 + theme(legend.position = "none", axis.text.y = element_blank()) + labs(subtitle = element_blank()) + ylab(NULL)) + - (p_2 + theme(legend.position = "none", axis.text.y = element_blank()) + labs(subtitle = element_blank()) + ylab(NULL))) - } else { - res <- ((p_0 + theme(legend.position = "none")) + labs(subtitle = element_blank()) + - (p_1 + theme(legend.position = "none", axis.text.y = element_blank()) + labs(subtitle = element_blank()) + ylab(NULL)) + - (p_2 + theme(legend.position = "none", axis.text.y = element_blank()) + labs(subtitle = element_blank()) + ylab(NULL))) / - p_var + ggtitle("Tuckey HSD testing for differences in mean Hill numbers") - } + if (one_plot) { + requireNamespace("patchwork", quietly = TRUE) + if (letters || !plot_with_tuckey) { + res[["tuckey"]] <- NULL } - return(res) + res <- patchwork::wrap_plots(res) } + return(res) +} ################################################################################ ################################################################################ @@ -1509,44 +1628,53 @@ hill_pq <- #' Please make a reference to `ggstatsplot::ggbetweenstats()` if you #' use this function. -ggbetween_pq <- function(physeq, fact, one_plot = FALSE, rarefy_by_sample = FALSE, ...) { - physeq <- clean_pq(physeq, force_taxa_as_columns = TRUE) +ggbetween_pq <- + function(physeq, + fact, + one_plot = FALSE, + rarefy_by_sample = FALSE, + ...) { + verify_pq(physeq) + physeq <- taxa_as_columns(physeq) - if (rarefy_by_sample) { - physeq <- clean_pq(rarefy_even_depth(physeq)) - } + if (rarefy_by_sample) { + physeq <- clean_pq(rarefy_even_depth(physeq)) + } - if (are_modality_even_depth(physeq, fact)$p.value < 0.05) { - warning(paste0( - "The mean number of sequences per samples vary across modalities of the variable '", - fact, - "' You should use rarefy_by_sample = TRUE or try hill_pq() with correction_for_sample_size = TRUE" - )) - } + if (are_modality_even_depth(physeq, fact)$p.value < 0.05) { + warning( + paste0( + "The mean number of sequences per samples vary across modalities of the variable '", + fact, + "' You should use rarefy_by_sample = TRUE or try hill_pq() with correction_for_sample_size = TRUE" + ) + ) + } - df <- cbind( - "nb_asv" = sample_sums(physeq@otu_table), physeq@sam_data, - "hill_0" = vegan::renyi(physeq@otu_table, scales = 0, hill = TRUE), - "hill_1" = vegan::renyi(physeq@otu_table, scales = 1, hill = TRUE), - "hill_2" = vegan::renyi(physeq@otu_table, scales = 2, hill = TRUE) - ) - fact <- sym(fact) - p0 <- ggstatsplot::ggbetweenstats(df, !!fact, hill_0, ...) - p1 <- ggstatsplot::ggbetweenstats(df, !!fact, hill_1, ...) - p2 <- ggstatsplot::ggbetweenstats(df, !!fact, hill_2, ...) - - res <- list( - "plot_Hill_0" = p0, - "plot_Hill_1" = p1, - "plot_Hill_2" = p2 - ) + df <- cbind( + "nb_asv" = sample_sums(physeq@otu_table), + physeq@sam_data, + "hill_0" = vegan::renyi(physeq@otu_table, scales = 0, hill = TRUE), + "hill_1" = vegan::renyi(physeq@otu_table, scales = 1, hill = TRUE), + "hill_2" = vegan::renyi(physeq@otu_table, scales = 2, hill = TRUE) + ) + fact <- sym(fact) + p0 <- ggstatsplot::ggbetweenstats(df, !!fact, hill_0, ...) + p1 <- ggstatsplot::ggbetweenstats(df, !!fact, hill_1, ...) + p2 <- ggstatsplot::ggbetweenstats(df, !!fact, hill_2, ...) - if (one_plot) { - requireNamespace("patchwork", quietly = TRUE) - res <- res[[1]] + res[[2]] + res[[3]] + res <- list( + "plot_Hill_0" = p0, + "plot_Hill_1" = p1, + "plot_Hill_2" = p2 + ) + + if (one_plot) { + requireNamespace("patchwork", quietly = TRUE) + res <- res[[1]] + res[[2]] + res[[3]] + } + return(res) } - return(res) -} @@ -1559,7 +1687,7 @@ ggbetween_pq <- function(physeq, fact, one_plot = FALSE, rarefy_by_sample = FALS ################################################################################ -#' Summarise a \code{\link{phyloseq-class}} object using a plot. +#' Summarize a \code{\link{phyloseq-class}} object using a plot. #' @description #' `r lifecycle::badge("maturing")` #' @inheritParams clean_pq @@ -1612,12 +1740,17 @@ summary_plot_pq <- function(physeq, ), paste( "Sequences length:\n", - ifelse(is.null(physeq@refseq), + ifelse( + is.null(physeq@refseq), "No refseq slot", paste( - round(mean(Biostrings::width(physeq@refseq)), 2), + round(mean( + Biostrings::width(physeq@refseq) + ), 2), "+/-", - round(stats::sd(Biostrings::width(physeq@refseq)), 2) + round(stats::sd( + Biostrings::width(physeq@refseq) + ), 2) ) ) ) @@ -1715,7 +1848,8 @@ summary_plot_pq <- function(physeq, " ASV)", "\n", "Min seq length: ", - ifelse(is.null(physeq@refseq), + ifelse( + is.null(physeq@refseq), "No refseq slot", min(Biostrings::width(physeq@refseq)) ), @@ -1723,7 +1857,7 @@ summary_plot_pq <- function(physeq, "Max nb seq 1 taxa in 1 sample: ", max(otu_tab), "\n", - "Max nb of sample for one taxa (", + "Max nb of sample for one taxon (", names(sort(taxa_sums(otu_tab > 0), decreasing = TRUE))[1], "): ", max(taxa_sums(otu_tab > 0)), @@ -1797,15 +1931,16 @@ rotl_pq <- function(physeq, taxa_names_rotl <- taxa_names_rotl[!grepl("NA", taxa_names_rotl)] taxa_names_rotl <- c(unclass(gsub("_", " ", taxa_names_rotl))) - resolved_names <- tnrs_match_names(taxa_names_rotl) + resolved_names <- rotl::tnrs_match_names(taxa_names_rotl) resolved_names <- resolved_names[resolved_names$flags == "", ] clean_taxa_names_rotl <- taxa_names_rotl[taxa_names_rotl %in% resolved_names$unique_name] resolved_names2 <- - tnrs_match_names(clean_taxa_names_rotl, context_name = context_name) + rotl::tnrs_match_names(clean_taxa_names_rotl, context_name = context_name) - tr <- tol_induced_subtree(ott_ids = ott_id(resolved_names2)) + tr <- + rotl::tol_induced_subtree(ott_ids = rotl::ott_id(resolved_names2)) return(tr) } ################################################################################ @@ -1831,6 +1966,7 @@ rotl_pq <- function(physeq, #' @examples #' \donttest{ #' if (requireNamespace("metacoder")) { +#' library("metacoder") #' data("GlobalPatterns", package = "phyloseq") #' #' GPsubset <- subset_taxa( @@ -1872,8 +2008,10 @@ heat_tree_pq <- function(physeq, taxonomic_level = NULL, ...) { } data_metacoder <- metacoder::parse_phyloseq(physeq) - data_metacoder$data$taxon_counts <- metacoder::calc_taxon_abund(data_metacoder, data = "otu_table") - data_metacoder$data$taxon_counts$nb_sequences <- rowSums(data_metacoder$data$taxon_counts[, -1]) + data_metacoder$data$taxon_counts <- + metacoder::calc_taxon_abund(data_metacoder, data = "otu_table") + data_metacoder$data$taxon_counts$nb_sequences <- + rowSums(data_metacoder$data$taxon_counts[, -1]) p <- heat_tree(data_metacoder, ...) @@ -1974,13 +2112,15 @@ biplot_pq <- function(physeq, if (sample_sums(physeq)[1] / sample_sums(physeq)[2] > 2 || sample_sums(physeq)[2] / sample_sums(physeq)[1] > 2) { - message(paste0( - "The two modalities differ greatly (more than x2) in their number of sequences (", - sample_sums(physeq)[1], - " vs ", - sample_sums(physeq)[2], - "). You may be interested by the parameter rarefy_after_merging" - )) + message( + paste0( + "The two modalities differ greatly (more than x2) in their number of sequences (", + sample_sums(physeq)[1], + " vs ", + sample_sums(physeq)[2], + "). You may be interested by the parameter rarefy_after_merging" + ) + ) } if (is.null(fact)) { @@ -2011,7 +2151,8 @@ biplot_pq <- function(physeq, if (!is.null(merge_sample_by) && nb_samples_info) { left_name <- paste0(left_name, " (", modality_1_nb, " samples)") - right_name <- paste0(right_name, " (", modality_2_nb, " samples)") + right_name <- + paste0(right_name, " (", modality_2_nb, " samples)") } physeq@sam_data$modality <- modality @@ -2190,19 +2331,23 @@ multi_biplot_pq <- function(physeq, stop("You must set one of split_by or pairs.") } else if (!is.null(pairs) && !is.null(split_by)) { stop("You must set either split_by or pairs, not both.") - } else if (!is.null(split_by) && is.null(physeq@sam_data[[split_by]])) { + } else if (!is.null(split_by) && + is.null(physeq@sam_data[[split_by]])) { stop("split_by must be set and must be a variable in physeq@sam_data") } else if (!is.null(pairs) && is.null(physeq@sam_data[[pairs]])) { stop("pairs must be set and must be a variable in physeq@sam_data") } if (na_remove && !is.null(split_by)) { - new_physeq <- subset_samples_pq(physeq, !is.na(physeq@sam_data[[split_by]])) + new_physeq <- + subset_samples_pq(physeq, !is.na(physeq@sam_data[[split_by]])) if (nsamples(physeq) - nsamples(new_physeq) > 0) { - message(paste0( - nsamples(physeq) - nsamples(new_physeq), - " were discarded due to NA in variables present in formula." - )) + message( + paste0( + nsamples(physeq) - nsamples(new_physeq), + " were discarded due to NA in variables present in formula." + ) + ) } physeq <- new_physeq } @@ -2210,7 +2355,8 @@ multi_biplot_pq <- function(physeq, if (!is.null(pairs)) { p <- list() for (c in levels(as.factor(physeq@sam_data[[pairs]]))) { - new_physeq <- subset_samples_pq(physeq, physeq@sam_data[[pairs]] %in% c) + new_physeq <- + subset_samples_pq(physeq, physeq@sam_data[[pairs]] %in% c) p[[c]] <- biplot_pq(new_physeq, ...) + ggtitle(c) } } else { @@ -2220,8 +2366,9 @@ multi_biplot_pq <- function(physeq, p <- list() for (c in seq_along(ncol(couples))) { names_p <- paste0(couples[1, c], " - ", couples[2, c]) - new_physeq <- subset_samples_pq(physeq, physeq@sam_data[[split_by]] %in% - c(couples[1, c], couples[2, c])) + new_physeq <- + subset_samples_pq(physeq, physeq@sam_data[[split_by]] %in% + c(couples[1, c], couples[2, c])) p[[names_p]] <- biplot_pq(new_physeq, fact = split_by, merge_sample_by = split_by, @@ -2309,12 +2456,15 @@ plot_tax_pq <- na_remove = TRUE, clean_pq = TRUE) { if (na_remove) { - new_physeq <- subset_samples_pq(physeq, !is.na(physeq@sam_data[[fact]])) + new_physeq <- + subset_samples_pq(physeq, !is.na(physeq@sam_data[[fact]])) if (nsamples(physeq) - nsamples(new_physeq) > 0) { - message(paste0( - nsamples(physeq) - nsamples(new_physeq), - " were discarded due to NA in variables present in formula." - )) + message( + paste0( + nsamples(physeq) - nsamples(new_physeq), + " were discarded due to NA in variables present in formula." + ) + ) } physeq <- new_physeq } @@ -2388,9 +2538,11 @@ plot_tax_pq <- title = paste("Total nb of sequences: ", sum(physeq_old@otu_table)), subtitle = paste0( "Nb of samples: '", - paste0(names(table(physeq_old@sam_data[[fact]])), + paste0( + names(table(physeq_old@sam_data[[fact]])), sep = "' : ", - table(physeq_old@sam_data[[fact]]), collapse = " - '" + table(physeq_old@sam_data[[fact]]), + collapse = " - '" ) ) ) @@ -2401,9 +2553,11 @@ plot_tax_pq <- title = paste("Total nb of sequences: ", sum(physeq_old@otu_table)), subtitle = paste0( "Nb of samples: '", - paste0(names(table(physeq_old@sam_data[[fact]])), + paste0( + names(table(physeq_old@sam_data[[fact]])), sep = "' : ", - table(physeq_old@sam_data[[fact]]), collapse = " - '" + table(physeq_old@sam_data[[fact]]), + collapse = " - '" ) ) ) @@ -2476,8 +2630,10 @@ multitax_bar_pq <- function(physeq, group_by(OTU) %>% summarise(Abundance = sum(Abundance)) - psm <- inner_join(psm_2, psm_1[, c("OTU", lvl1, lvl2, lvl3)], - by = join_by("OTU" == "OTU"), multiple = + psm <- inner_join(psm_2, + psm_1[, c("OTU", lvl1, lvl2, lvl3)], + by = join_by("OTU" == "OTU"), + multiple = "first" ) @@ -2511,8 +2667,10 @@ multitax_bar_pq <- function(physeq, summarise(Abundance = sum(Abundance)) %>% filter(Abundance > 0) - psm <- inner_join(psm_2, psm_1[, c("OTU", lvl1, lvl2, lvl3)], - by = join_by("OTU" == "OTU"), multiple = + psm <- inner_join(psm_2, + psm_1[, c("OTU", lvl1, lvl2, lvl3)], + by = join_by("OTU" == "OTU"), + multiple = "first" ) @@ -2572,14 +2730,7 @@ tsne_pq <- theta = 0.0, perplexity = 30, ...) { - physeq <- clean_pq( - physeq, - force_taxa_as_rows = TRUE, - remove_empty_samples = TRUE, - remove_empty_taxa = FALSE, - clean_samples_names = FALSE, - silent = TRUE - ) + physeq <- taxa_as_rows(physeq) res_tsne <- Rtsne::Rtsne( @@ -2719,13 +2870,7 @@ SRS_curve_pq <- function(physeq, clean_pq = FALSE, ...) { physeq <- clean_pq(physeq) } - physeq <- clean_pq( - physeq, - force_taxa_as_rows = TRUE, - remove_empty_samples = FALSE, - remove_empty_taxa = FALSE, - clean_samples_names = FALSE - ) + physeq <- taxa_as_rows(physeq) df <- data.frame(physeq@otu_table) @@ -2788,7 +2933,7 @@ iNEXT_pq <- function(physeq, ...) { if (!is.null(merge_sample_by)) { physeq <- merge_samples2(physeq, merge_sample_by) - physeq <- clean_pq(physeq, force_taxa_as_columns = TRUE) + physeq <- taxa_as_columns(physeq) } df <- data.frame(t(as.matrix(unclass(physeq@otu_table)))) @@ -2984,18 +3129,20 @@ upset_pq <- function(physeq, colnames(psm2) <- colnames(psm) if (is.null(taxa_fill)) { - p <- ComplexUpset::upset(psm2, intersect = samp_names, ...) + xlab(fact) + p <- + ComplexUpset::upset(psm2, intersect = samp_names, ...) + xlab(fact) } else { - p <- ComplexUpset::upset(psm2, + p <- ComplexUpset::upset( + psm2, intersect = samp_names, - base_annotations = list(), annotations = list( - "ASV" = ( - ggplot(mapping = aes(fill = .data[[taxa_fill]])) + - geom_bar() + - ylab("ASV per Class") + - theme(legend.key.size = unit(0.2, "cm")) + - theme(axis.text = element_text(size = 12))) - ), + base_annotations = list(), + annotations = list("ASV" = ( + ggplot(mapping = aes(fill = .data[[taxa_fill]])) + + geom_bar() + + ylab("ASV per Class") + + theme(legend.key.size = unit(0.2, "cm")) + + theme(axis.text = element_text(size = 12)) + )), ... ) + xlab(fact) } @@ -3161,7 +3308,10 @@ diff_fct_diff_class <- return(names(sort(table(x), decreasing = TRUE)[1])) } } else { - stop(paste0(character_method, " is not a valid method for character_method params.")) + stop(paste0( + character_method, + " is not a valid method for character_method params." + )) } } else if (is.numeric(x)) { return(numeric_fonction(x, ...)) @@ -3176,9 +3326,11 @@ diff_fct_diff_class <- if (logical_method == "NA_if_not_all_TRUE") { if (sum(x, na.rm = TRUE) > 0 && sum(!x, na.rm = TRUE) == 0) { return(TRUE) - } else if (sum(!x, na.rm = TRUE) > 0 && sum(x, na.rm = TRUE) > 0) { + } else if (sum(!x, na.rm = TRUE) > 0 && + sum(x, na.rm = TRUE) > 0) { return(NA) - } else if (sum(!x, na.rm = TRUE) > 0 && sum(x, na.rm = TRUE) == 0) { + } else if (sum(!x, na.rm = TRUE) > 0 && + sum(x, na.rm = TRUE) == 0) { return(FALSE) } } @@ -3189,7 +3341,10 @@ diff_fct_diff_class <- return(FALSE) } } else { - stop(paste0(logical_method, " is not a valid method for character_method params.")) + stop(paste0( + logical_method, + " is not a valid method for character_method params." + )) } } else { stop("At least one column is neither numeric nor character or logical") @@ -3229,23 +3384,29 @@ diff_fct_diff_class <- #' @author Adrien Taudière #' @seealso [plot_tax_pq()] and [multitax_bar_pq()] #' -tax_bar_pq <- function(physeq, fact = "Sample", taxa = "Order", percent_bar = FALSE, nb_seq = TRUE) { - if (!nb_seq) { - physeq <- as_binary_otu_table(physeq) - } - psm <- psmelt(physeq) - if (percent_bar) { - ggplot(psm) + - geom_bar(aes(x = .data[[fact]], fill = .data[[taxa]], y = Abundance), - stat = "identity", position = "fill" - ) - } else { - ggplot(psm) + - geom_bar(aes(x = .data[[fact]], fill = .data[[taxa]], y = Abundance), - stat = "identity" - ) +tax_bar_pq <- + function(physeq, + fact = "Sample", + taxa = "Order", + percent_bar = FALSE, + nb_seq = TRUE) { + if (!nb_seq) { + physeq <- as_binary_otu_table(physeq) + } + psm <- psmelt(physeq) + if (percent_bar) { + ggplot(psm) + + geom_bar(aes(x = .data[[fact]], fill = .data[[taxa]], y = Abundance), + stat = "identity", + position = "fill" + ) + } else { + ggplot(psm) + + geom_bar(aes(x = .data[[fact]], fill = .data[[taxa]], y = Abundance), + stat = "identity" + ) + } } -} ################################################################################ ################################################################################ @@ -3308,7 +3469,8 @@ ridges_pq <- function(physeq, group_by(.data[[fact]], OTU, Class) %>% summarise("count" = n()) - p <- ggplot(psm_asv, aes(y = factor(.data[[fact]]), x = count)) + + p <- + ggplot(psm_asv, aes(y = factor(.data[[fact]]), x = count)) + ggridges::geom_density_ridges( aes(fill = Class), ... @@ -3450,3 +3612,416 @@ treemap_pq <- function(physeq, return(p) } ################################################################################ + + +################################################################################ +#' Plot the partition the variation of a phyloseq object +#' @description +#' `r lifecycle::badge("experimental")` +#' +#' @param res_varpart (required) the result of the functions [var_par_pq()] +#' or [var_par_rarperm_pq()] +#' @param cutoff The values below cutoff will not be displayed. +#' @param digits The number of significant digits. +#' @param digits_quantile The number of significant digits for quantile. +#' @param fill_bg Fill colours of ellipses. +#' @param show_quantiles Do quantiles are printed ? +#' @param filter_quantile_zero Do we filter out value with quantile encompassing +#' the zero value? +#' @param show_dbrda_signif Do dbrda significance for each component is printed +#' using *? +#' @param show_dbrda_signif_pval (float, `[0:1]`) The value under which the +#' dbrda is considered significant. +#' @param alpha (int, `[0:255]`) Transparency of the fill colour. +#' @param id.size A numerical value giving the character expansion factor for the names of circles or ellipses. +#' @param min_prop_pval_signif_dbrda (float, `[0:1]`) Only used if using the +#' result of [var_par_rarperm_pq()] function. The * for dbrda_signif is only add if +#' at least `min_prop_pval_signif_dbrda` of permutations show significance. +#' +#' @return A plot +#' @export +#' @author Adrien Taudière +#' @seealso [var_par_rarperm_pq()], [var_par_pq()] +#' @examples +#' \donttest{ +#' if (requireNamespace("vegan")) { +#' data_fungi_woNA <- subset_samples(data_fungi, !is.na(Time) & !is.na(Height)) +#' res_var_9 <- var_par_rarperm_pq( +#' data_fungi_woNA, +#' list_component = list( +#' "Time" = c("Time"), +#' "Size" = c("Height", "Diameter") +#' ), +#' nperm = 9, +#' dbrda_computation = TRUE +#' ) +#' res_var_2 <- var_par_rarperm_pq( +#' data_fungi_woNA, +#' list_component = list( +#' "Time" = c("Time"), +#' "Size" = c("Height", "Diameter") +#' ), +#' nperm = 2, +#' dbrda_computation = TRUE +#' ) +#' res_var0 <- var_par_pq(data_fungi_woNA, +#' list_component = list( +#' "Time" = c("Time"), +#' "Size" = c("Height", "Diameter") +#' ), +#' dbrda_computation = TRUE +#' ) +#' plot_var_part_pq(res_var0, digits_quantile = 2, show_dbrda_signif = TRUE) +#' plot_var_part_pq(res_var_9, +#' digits_quantile = 2, show_quantiles = TRUE, +#' show_dbrda_signif = TRUE +#' ) +#' plot_var_part_pq( +#' res_var_2, +#' digits = 5, +#' digits_quantile = 2, +#' cutoff = 0, +#' show_quantiles = TRUE +#' ) +#' } +#' } +#' @importFrom stats anova as.formula quantile +#' @details +#' This function is mainly a wrapper of the work of others. +#' Please make a reference to `vegan::varpart()` if you +#' use this function. +plot_var_part_pq <- + function(res_varpart, + cutoff = 0, + digits = 1, + digits_quantile = 2, + fill_bg = c("seagreen3", "mediumpurple", "blue", "orange"), + show_quantiles = FALSE, + filter_quantile_zero = TRUE, + show_dbrda_signif = FALSE, + show_dbrda_signif_pval = 0.05, + alpha = 63, + id.size = 1.2, + min_prop_pval_signif_dbrda = 0.95) { + if (show_dbrda_signif_pval > 1 | show_dbrda_signif_pval < 0) { + stop("show_dbrda_signif_pval value must be within the range [0-1]") + } + if (min_prop_pval_signif_dbrda > 1 | + min_prop_pval_signif_dbrda < 0) { + stop("show_dbrda_signif_pval value must be within the range [0-1]") + } + x <- res_varpart$part + vals <- x$indfract$Adj.R.square + is.na(vals) <- vals < cutoff + vals <- round(vals, digits + 1) + labs_text <- format(vals, digits = digits, nsmall = digits + 1) + labs_text <- gsub("NA", "", labs_text) + if (show_quantiles) { + labs_text <- paste0( + labs_text, + "\n (", + round(x$indfract$Adj.R.squared_quantil_min, digits_quantile + 1), + "...", + round(x$indfract$Adj.R.squared_quantil_max, digits_quantile + 1), + ")" + ) + labs_text[is.na(vals)] <- "" + } + + if (filter_quantile_zero) { + labs_text[x$indfract$Adj.R.squared_quantil_min < 0] <- "" + } + + if (show_dbrda_signif) { + if (is.null(res_varpart$dbrda_result_prop_pval_signif)) { + cond <- + c(1:length(res_varpart$dbrda_result))[sapply(res_varpart$dbrda_result, function(x) { + x$`Pr(>F)`[[1]] < show_dbrda_signif_pval + })] + res_varpart$Xnames[cond] <- + paste0(res_varpart$Xnames[cond], "*") + } else { + cond <- + c(1:length(res_varpart$dbrda_result))[res_varpart$dbrda_result_prop_pval_signif >= + min_prop_pval_signif_dbrda] + res_varpart$Xnames[cond] <- + paste0(res_varpart$Xnames[cond], "*") + } + } + + vegan::showvarparts( + x$nsets, + labs_text, + bg = fill_bg, + alpha = alpha, + id.size = id.size, + Xnames = res_varpart$Xnames + ) + if (any(is.na(vals))) { + graphics::mtext(paste("Values <", cutoff, " not shown", sep = ""), 1) + } + if (sum(x$indfract$Adj.R.squared_quantil_min) > 0 && + filter_quantile_zero) { + graphics::mtext( + paste("Values with min quantile <0 not shown", sep = ""), + side = 1, + line = 1 + ) + } + if (show_dbrda_signif) { + if (is.null(res_varpart$dbrda_result_prop_pval_signif)) { + graphics::mtext( + paste( + "* indicate significant anova of dbRDA for each component at p=", + show_dbrda_signif_pval, + sep = "" + ) + ) + } else { + graphics::mtext( + paste( + "* indicate significant anova of dbRDA, for each component, in at least ", + round(min_prop_pval_signif_dbrda * 100, 2), + "% of rarefaction permutations", + sep = "" + ) + ) + } + } + return(invisible()) + } +################################################################################ + + +################################################################################ +#' Scatterplot with marginal distributions and statistical results against +#' Hill diversity of phyloseq object +#' +#' @description +#' `r lifecycle::badge("experimental")` +#' +#' Basically a wrapper of function [ggstatsplot::ggscatterstats()] for +#' object of class phyloseq and Hill number. +#' +#' @inheritParams clean_pq +#' @param num_modality (required) Name of the numeric column in +#' `physeq@sam_data` to plot and test against hill numberk +#' @param hill_scales (a vector of integer) The list of q values to compute +#' the hill number H^q. If Null, no hill number are computed. Default value +#' compute the Hill number 0 (Species richness), the Hill number 1 +#' (exponential of Shannon Index) and the Hill number 2 (inverse of Simpson +#' Index). +#' @param rarefy_by_sample (logical, default FALSE) If TRUE, rarefy +#' samples using [phyloseq::rarefy_even_depth()] function. +#' @param one_plot (logical, default FALSE) If TRUE, return a unique +#' plot with the three plot inside using the patchwork package. +#' @param ... Other arguments passed on to [ggstatsplot::ggscatterstats()] +#' function. +#' +#' @return Either an unique ggplot2 object (if one_plot is TRUE) or +#' a list of ggplot2 plot for each hill_scales. +#' @export +#' @author Adrien Taudière +#' +#' @examples +#' if (requireNamespace("ggstatsplot")) { +#' ggscatt_pq(data_fungi_mini, "Time", type = "non-parametric") +#' ggscatt_pq(data_fungi_mini, "Time", hill_scales = 1:4, type = "parametric") +#' ggscatt_pq(data_fungi_mini, "Sample_id", +#' hill_scales = c(0, 0.5), +#' one_plot = FALSE +#' ) +#' } +#' @details +#' This function is mainly a wrapper of the work of others. +#' Please make a reference to `ggstatsplot::ggscatterstats()` if you +#' use this function. +#' @seealso [ggbetween_pq()] +ggscatt_pq <- function(physeq, + num_modality, + hill_scales = c(0, 1, 2), + rarefy_by_sample = FALSE, + one_plot = TRUE, + ...) { + verify_pq(physeq) + physeq <- clean_pq(physeq, force_taxa_as_columns = TRUE) + + if (rarefy_by_sample) { + physeq <- clean_pq(rarefy_even_depth(physeq)) + } + + p_list <- list() + psm_res <- psmelt_samples_pq(physeq, hill_scales = hill_scales) + for (i in seq_along(hill_scales)) { + p_list[[i]] <- + ggstatsplot::ggscatterstats( + psm_res, !!paste0("Hill_", hill_scales[[i]]), !!num_modality, + ... + ) + } + + if (one_plot) { + return(patchwork::wrap_plots(p_list)) + } else { + return(p_list) + } +} +################################################################################ + + + + + +################################################################################ +#' Alluvial plot for taxonomy and samples factor vizualisation +#' +#' @description +#' `r lifecycle::badge("experimental")` +#' +#' Basically a wrapper of [ggalluvial](https://corybrunson.github.io/ggalluvial/index.html) +#' package +#' +#' @inheritParams clean_pq +#' @param taxa_ranks A vector of taxonomic ranks. For examples c("Family","Genus"). +#' If taxa ranks is not set +#' (default value = c("Phylum", "Class", "Order", "Family")). +#' @param wrap_factor A name to determine +#' which samples to merge using [merge_samples2()] function. +#' Need to be in \code{physeq@sam_data}. +#' Need to be use when you want to wrap by factor the final plot +#' with the number of taxa (type="nb_asv") +#' @param by_sample (logical) If FALSE (default), sample information is not taking +#' into account, so the taxonomy is studied globally. If fact is not NULL, by_sample +#' is automatically set to TRUE. +#' @param rarefy_by_sample (logical, default FALSE) If TRUE, rarefy +#' samples using [phyloseq::rarefy_even_depth()] function. +#' @param fact (required) Name of the factor in `physeq@sam_data` used to plot the last column +#' @param type If "nb_seq" (default), the number of sequences is +#' used in plot. If "nb_asv", the number of ASV is plotted. +#' @param width (passed on to [ggalluvial::geom_flow()]) the width of each stratum, +#' as a proportion of the distance between axes. Defaults to 1/3. +#' @param min.size (passed on to [ggfittext::geom_fit_text()]) Minimum font size, +#' in points. Text that would need to be shrunk below this size to fit the box will +#' be hidden. Defaults to 4 pt. +#' @param na_remove (logical, default FALSE) If set to TRUE, remove samples with +#' NA in the variables set in formula. +#' @param use_ggfittext (logical, default FALSE) Do we use ggfittext to plot labels? +#' @param use_geom_label (logical, default FALSE) Do we use geom_label to plot labels? +#' @param size_lab Size for label if use_ggfittext is FALSE +#' @param ... Other arguments passed on to [ggalluvial::geom_flow()] function. +#' +#' @return A ggplot object +#' @export +#' @author Adrien Taudière +#' @examples +#' if (requireNamespace("ggalluvial")) { +#' ggaluv_pq(data_fungi_mini) +#' } +#' \donttest{ +#' if (requireNamespace("ggalluvial")) { +#' ggaluv_pq(data_fungi_mini, type = "nb_asv") +#' +#' ggaluv_pq(data_fungi_mini, wrap_factor = "Height", by_sample = TRUE, type = "nb_asv") + +#' facet_wrap("Height") +#' +#' ggaluv_pq(data_fungi_mini, +#' width = 0.9, min.size = 10, +#' type = "nb_asv", taxa_ranks = c("Phylum", "Class", "Order", "Family", "Genus") +#' ) + +#' coord_flip() + scale_x_discrete(limits = rev) +#' } +#' } +#' @details +#' This function is mainly a wrapper of the work of others. +#' Please make a reference to `ggalluvial` package if you +#' use this function. +#' @seealso [sankey_pq()] +ggaluv_pq <- function(physeq, + taxa_ranks = c("Phylum", "Class", "Order", "Family"), + wrap_factor = NULL, + by_sample = FALSE, + rarefy_by_sample = FALSE, + fact = NULL, + type = "nb_seq", + width = 1.2, + min.size = 3, + na_remove = FALSE, + use_ggfittext = FALSE, + use_geom_label = FALSE, + size_lab = 2, + ...) { + verify_pq(physeq) + if (rarefy_by_sample) { + physeq <- rarefy_even_depth(physeq) + } + + if (na_remove && !is.null(fact)) { + physeq <- subset_samples_pq(physeq, !is.na(physeq@sam_data[[fact]])) + } + + if (!is.null(wrap_factor)) { + physeq <- + merge_samples2(physeq, physeq@sam_data[[wrap_factor]]) + } else if (!by_sample || !is.null(fact)) { + physeq <- + merge_samples2(physeq, group = rep("all_samples_together", nsamples(physeq))) + } + + if (type == "nb_asv") { + physeq <- as_binary_otu_table(physeq) + } else if (type != "nb_seq") { + stop("Type must be eiter nb_seq or nb_asv") + } + + psm_samp <- + psmelt_samples_pq( + physeq, + taxa_ranks = taxa_ranks, + hill_scales = NULL, + rarefy_by_sample = FALSE + ) + + if (is.null(fact)) { + psm_samp <- ggalluvial::to_lodes_form(psm_samp, axes = taxa_ranks) + } else { + psm_samp <- ggalluvial::to_lodes_form(psm_samp, axes = c(taxa_ranks, fact)) + } + + p <- ggplot( + data = psm_samp, + aes( + alluvium = alluvium, + x = x, + stratum = stratum, + y = Abundance, + fill = after_stat(stratum) + ) + ) + + ggalluvial::geom_flow(...) + + ggalluvial::geom_stratum() + + theme_minimal() + + theme(legend.position = "none") + + if (use_ggfittext) { + p <- p + + ggfittext::geom_fit_text( + aes(label = stratum), + stat = "stratum", + width = width, + min.size = min.size + ) + } else if (use_geom_label) { + p <- p + + geom_label( + aes(label = stratum), + stat = "stratum", + size = size_lab + ) + } + + if (!is.null(wrap_factor)) { + p <- p + facet_wrap(wrap_factor) + } + return(p) +} +################################################################################ diff --git a/R/speedyseq_functions.R b/R/speedyseq_functions.R index bab3473a..3bded139 100644 --- a/R/speedyseq_functions.R +++ b/R/speedyseq_functions.R @@ -75,7 +75,7 @@ setMethod( x <- prune_taxa(!is.na(group), x) group <- group[!is.na(group)] } - # Get the merged otu table with new taxa named by most abundant + # Get the merged otu_table with new taxa named by most abundant otu <- merge_taxa_vec(otu_table(x), group, reorder = reorder) # Adjust taxonomy if necessary if (!is.null(x@tax_table) & tax_adjust != 0) { @@ -326,7 +326,7 @@ bad_flush_right <- function(x, bad = "BAD", na_bad = FALSE, k = length(x)) { #' @param x A `phyloseq`, `otu_table`, or `sample_data` object #' @param group A sample variable or a vector of length `nsamples(x)` defining #' the sample grouping. A vector must be supplied if x is an otu_table -#' @param fun_otu Function for combining abundances in the otu table; default +#' @param fun_otu Function for combining abundances in the otu_table; default #' is `sum`. Can be a formula to be converted to a function by #' [purrr::as_mapper()] #' @param funs Named list of merge functions for sample variables; default is @@ -436,7 +436,7 @@ setMethod( x.merged <- x.merged %>% tibble::column_to_rownames(".group") } - # Return an otu table in the proper orientation + # Return an otu_table in the proper orientation x.merged <- x.merged %>% otu_table(taxa_are_rows = FALSE) if (needs_flip) { x.merged <- t(x.merged) diff --git a/R/table_functions.R b/R/table_functions.R index 344c29b9..76c29399 100644 --- a/R/table_functions.R +++ b/R/table_functions.R @@ -129,11 +129,7 @@ compare_pairs_pq <- function(physeq = NULL, nb_min_seq = 0, veg_index = "shannon", na_remove = TRUE) { - physeq <- clean_pq(physeq, - clean_samples_names = FALSE, - force_taxa_as_columns = TRUE, - silent = TRUE - ) + physeq <- taxa_as_columns(physeq) if (na_remove) { new_physeq <- subset_samples_pq(physeq, !is.na(physeq@sam_data[[bifactor]])) @@ -335,9 +331,9 @@ compare_pairs_pq <- function(physeq = NULL, #' taxonomic_levels = c("Order", "Family", "Genus", "Species"), #' formattable_args = list( #' Order = FALSE, -#' Species = formatter( +#' Species = formattable::formatter( #' "span", -#' style = x ~ style( +#' style = x ~ formattable::style( #' "font-style" = "italic", #' `color` = ifelse(is.na(x), "white", "grey") #' ) @@ -346,7 +342,9 @@ compare_pairs_pq <- function(physeq = NULL, #' arrange_by = "Family", #' descending_order = FALSE #' ) -#' +#' } +#' \donttest{ +#' if (requireNamespace("formattable")) { #' ## Distribution of the nb of sequences per OTU across Height modality #' ## (nb of sequences are log-transformed). #' ## OTU name background is light gray for Basidiomycota @@ -358,58 +356,62 @@ compare_pairs_pq <- function(physeq = NULL, #' taxonomic_levels = c("Phylum", "Family", "Genus"), #' void_style = TRUE, #' formattable_args = list( -#' OTU = formatter( +#' OTU = formattable::formatter( #' "span", -#' style = ~ style( +#' style = ~ formattable::style( #' "display" = "block", #' `border-radius` = "5px", #' `background-color` = ifelse(Phylum == "Basidiomycota", transp("gray"), "gray") #' ), #' `padding-right` = "2px" #' ), -#' High = formatter( +#' High = formattable::formatter( #' "span", -#' style = x ~ style( +#' style = x ~ formattable::style( #' "font-size" = "80%", #' "display" = "inline-block", #' direction = "rtl", #' `border-radius` = "0px", #' `padding-right` = "2px", -#' `background-color` = csscolor(gradient( +#' `background-color` = formattable::csscolor(formattable::gradient( #' as.numeric(x), transp("#1a91ff"), "#1a91ff" #' )), -#' width = percent(proportion(as.numeric(x), na.rm = TRUE)) +#' width = formattable::percent(formattable::proportion(as.numeric(x), na.rm = TRUE)) #' ) #' ), -#' Low = formatter( +#' Low = formattable::formatter( #' "span", -#' style = x ~ style( +#' style = x ~ formattable::style( #' "font-size" = "80%", #' "display" = "inline-block", #' direction = "rtl", #' `border-radius` = "0px", #' `padding-right` = "2px", -#' `background-color` = csscolor(gradient(as.numeric(x), transp("green"), "green")), -#' width = percent(proportion(as.numeric(x), na.rm = TRUE)) +#' `background-color` = formattable::csscolor(formattable::gradient( +#' as.numeric(x), +#' transp("green"), "green" +#' )), +#' width = formattable::percent(formattable::proportion(as.numeric(x), na.rm = TRUE)) #' ) #' ), -#' Middle = formatter( +#' Middle = formattable::formatter( #' "span", -#' style = x ~ style( +#' style = x ~ formattable::style( #' "font-size" = "80%", #' "display" = "inline-block", #' direction = "rtl", #' `border-radius` = "0px", #' `padding-right` = "2px", -#' `background-color` = csscolor(gradient( +#' `background-color` = formattable::csscolor(formattable::gradient( #' as.numeric(x), transp("orange"), "orange" #' )), -#' width = percent(proportion(as.numeric(x), na.rm = TRUE)) +#' width = formattable::percent(formattable::proportion(as.numeric(x), na.rm = TRUE)) #' ) #' ) #' ) #' ) #' } +#' } #' @details #' This function is mainly a wrapper of the work of others. #' Please make a reference to `formattable::formattable()` if you @@ -518,7 +520,7 @@ formattable_pq <- function(physeq, `background-color` = ifelse(x == 0, "white", formattable::csscolor( formattable::gradient(as.numeric(x), transp("#1a9641ff"), "#1a9641ff") )), - width = formattable::percent(proportion(as.numeric(x), na.rm = TRUE)) + width = formattable::percent(formattable::proportion(as.numeric(x), na.rm = TRUE)) ) ), Family = formattable::formatter( @@ -551,7 +553,7 @@ formattable_pq <- function(physeq, `background-color` = ifelse(x == 0, "white", formattable::csscolor( formattable::gradient(as.numeric(x), transp("#4d4888ff"), "#4d4888ff") )), - width = formattable::percent(proportion(as.numeric(x), na.rm = TRUE)) + width = formattable::percent(formattable::proportion(as.numeric(x), na.rm = TRUE)) ) ), proportion_samp = formattable::formatter( diff --git a/R/tuckeyTest.R b/R/tuckeyTest.R deleted file mode 100644 index da50526c..00000000 --- a/R/tuckeyTest.R +++ /dev/null @@ -1,116 +0,0 @@ -################################################################################ -#' Calculate hill number and compute Tuckey post-hoc test -#' @description -#' -#' `r lifecycle::badge("maturing")` -#' Note that, by default, this function use a sqrt of the read numbers in the linear -#' model in order to correct for uneven sampling depth. -#' @aliases hill_tuckey_pq -#' @inheritParams clean_pq -#' @param modality (required) the variable to test -#' @param silent (logical) If TRUE, no message are printing. -#' @param correction_for_sample_size (logical, default TRUE) This function -#' use a sqrt of the read numbers in the linear model in order to -#' correct for uneven sampling depth. -#' @return A ggplot2 object -#' -#' @export -#' -#' @author Adrien Taudière -#' @examples -#' data("GlobalPatterns", package = "phyloseq") -#' GlobalPatterns@sam_data[, "Soil_logical"] <- -#' ifelse(GlobalPatterns@sam_data[, "SampleType"] == "Soil", "Soil", "Not Soil") -#' hill_tuckey_pq(GlobalPatterns, "Soil_logical") -hill_tuckey_pq <- function( - physeq, - modality, - silent = TRUE, - correction_for_sample_size = TRUE) { - modality_vector <- - as.factor(as.vector(unlist(unclass(physeq@sam_data[, modality])))) - - if (length(modality_vector) != dim(physeq@otu_table)[2]) { - physeq@otu_table <- t(physeq@otu_table) - } - read_numbers <- apply(physeq@otu_table, 2, sum) - - physeq <- clean_pq(physeq, - force_taxa_as_rows = TRUE, - remove_empty_samples = FALSE, - remove_empty_taxa = FALSE, - clean_samples_names = FALSE, - silent = silent - ) - otu_hill <- - vegan::renyi(t(physeq@otu_table), - scales = c(0, 1, 2), - hill = TRUE - ) - - hill_1 <- otu_hill$"0" - hill_2 <- otu_hill$"1" - hill_3 <- otu_hill$"2" - - if (correction_for_sample_size) { - tuk1 <- - stats::TukeyHSD(stats::aov(lm(hill_1 ~ sqrt(read_numbers))$residuals ~ modality_vector)) - tuk2 <- - stats::TukeyHSD(stats::aov(lm(hill_2 ~ sqrt(read_numbers))$residuals ~ modality_vector)) - tuk3 <- - stats::TukeyHSD(stats::aov(lm(hill_3 ~ sqrt(read_numbers))$residuals ~ modality_vector)) - } else { - tuk1 <- - stats::TukeyHSD(stats::aov(hill_1 ~ modality_vector)) - tuk2 <- - stats::TukeyHSD(stats::aov(hill_2 ~ modality_vector)) - tuk3 <- - stats::TukeyHSD(stats::aov(hill_3 ~ modality_vector)) - } - - df <- - rbind( - tuk1$modality_vector, - tuk2$modality_vector, - tuk3$modality_vector - ) - df <- - data.frame( - df, - "x" = paste( - "Hill Number", - c( - rep("0", dim( - tuk3$modality_vector - )[1]), - rep("1", dim( - tuk3$modality_vector - )[1]), - rep("2", dim( - tuk3$modality_vector - )[1]) - ), - rownames(tuk1$modality_vector) - ), - "modality" = - rownames(tuk1$modality_vector) - ) - - p <- ggplot(data = df) + - geom_linerange(aes(ymax = upr, ymin = lwr, x = x), linewidth = 2) + - geom_point(aes(x = x, y = diff), - size = 4, - shape = 21, - fill = "white" - ) + - coord_flip() + - theme_gray() + - geom_hline(yintercept = 0) + - ylab("Differences in mean levels (value and confidence intervals at 95%)") + - xlab("") + - ggtitle("Results of the Tuckey HSD testing for differences - in mean Hill numbers") - - return(p) -} -################################################################################ diff --git a/README.Rmd b/README.Rmd index df53ea02..86fe5107 100644 --- a/README.Rmd +++ b/README.Rmd @@ -30,15 +30,13 @@ knitr::opts_chunk$set( See the pkgdown documentation site [here](https://adrientaudiere.github.io/MiscMetabar/) and the [package paper](https://doi.org/10.21105/joss.06038) in the Journal Of Open Softwares. -Biological studies, especially in ecology, health sciences and taxonomy, need to describe the biological composition of samples. During the last twenty years, (i) the development of DNA sequencing, (ii) reference databases, (iii) high-throughput sequencing (HTS), and (iv) bioinformatics resources have allowed the description of biological communities through metabarcoding. Metabarcoding involves the sequencing of millions (*meta*-) of short regions of specific DNA (*-barcoding*, @valentini2009) often from environmental samples (eDNA, @taberlet2012) such as human stomach contents, lake water, soil and air. - -`MiscMetabar` aims to facilitate the **description**, **transformation**, **exploration** and **reproducibility** of metabarcoding analysis using R. The development of `MiscMetabar` relies heavily on the R packages [`dada2`](https://benjjneb.github.io/dada2/index.html) [@callahan2016], [`phyloseq`](https://joey711.github.io/phyloseq/) [@mcmurdie2013] and [`targets`](https://books.ropensci.org/targets/) [@landau2021]. - +Biological studies, especially in ecology, health sciences and taxonomy, need to describe the biological composition of samples. Over the last twenty years, (i) the development of DNA sequencing, (ii) reference databases, (iii) high-throughput sequencing (HTS), and (iv) bioinformatics resources have enabled the description of biological communities through metabarcoding. Metabarcoding involves the sequencing of millions (*meta*-) of short regions of specific DNA (*-barcoding*, @valentini2009) often from environmental samples (eDNA, @taberlet2012) such as human stomach contents, lake water, soil, and air. +`MiscMetabar` aims to facilitate the **description**, **transformation**, **exploration** and **reproducibility** of metabarcoding analyses using R. The development of `MiscMetabar` relies heavily on the R packages [`dada2`](https://benjjneb.github.io/dada2/index.html) [@callahan2016], [`phyloseq`](https://joey711.github.io/phyloseq/) [@mcmurdie2013] and [`targets`](https://books.ropensci.org/targets/) [@landau2021]. ## Installation -There is a CRAN version of MiscMetabar. +A CRAN version of MiscMetabar is available. ```{r, results = 'hide', eval=FALSE} install.packages("MiscMetabar") @@ -68,13 +66,13 @@ devtools::install_github("adrientaudiere/MiscMetabar", ref = "dev") See articles in the [MiscMetabar](https://adrientaudiere.github.io/MiscMetabar/) website for more examples. -For an introduction to metabarcoding in R, Please visite the [state of the field](https://adrientaudiere.github.io/MiscMetabar/articles/states_of_fields_in_R.html) articles. The [import, export and track](https://adrientaudiere.github.io/MiscMetabar/articles/import_export_track.html) article explains how import and export `phyloseq` object. Its also show how to summarize useful information (number of sequences, samples and clusters) accross bioinformatic pipelines. The article [explore data](https://adrientaudiere.github.io/MiscMetabar/articles/explore_data.html) takes a closer look to different way of explore samples and taxonomical data from `phyloseq` object. +For an introduction to metabarcoding in R, see the [state of the field](https://adrientaudiere.github.io/MiscMetabar/articles/states_of_fields_in_R.html) article. The [import, export and tracking](https://adrientaudiere.github.io/MiscMetabar/articles/import_export_track.html) article explains how to import and export `phyloseq` objects. It also shows how to summarize useful information (number of sequences, samples and clusters) across bioinformatic pipelines. The article [explore data](https://adrientaudiere.github.io/MiscMetabar/articles/explore_data.html) takes a closer look at different ways to explore samples and taxonomic data from `phyloseq` object. If you are interested in ecological metrics, see the articles describing [alpha-diversity](https://adrientaudiere.github.io/MiscMetabar/articles/alpha-div.html) and [beta-diversity](https://adrientaudiere.github.io/MiscMetabar/articles/beta-div.html) analysis. -The article [filter taxa and samples](https://adrientaudiere.github.io/MiscMetabar/articles/filter.html) describes some data-filtering processes using MiscMetabar and the [reclustering](https://adrientaudiere.github.io/MiscMetabar/articles/Reclustering.html) tutorial introduces the different way of clustering already-clustered OTU/ASV. The article [tengeler](https://adrientaudiere.github.io/MiscMetabar/articles/tengeler.html) explore the dataset from Tengeler et al. (2020) using some MiscMetabar functions. +The article [filter taxa and samples](https://adrientaudiere.github.io/MiscMetabar/articles/filter.html) describes some data filtering processes using MiscMetabar and the [reclustering](https://adrientaudiere.github.io/MiscMetabar/articles/Reclustering.html) tutorial introduces the different way of clustering already-clustered OTU/ASV. The article [tengeler](https://adrientaudiere.github.io/MiscMetabar/articles/tengeler.html) explore the dataset from Tengeler et al. (2020) using some MiscMetabar functions. -For developers, I also wrote a article describing som [rules of codes](https://adrientaudiere.github.io/MiscMetabar/articles/Rules.html). +For developers, I also wrote an article describing some [rules of codes](https://adrientaudiere.github.io/MiscMetabar/articles/Rules.html). ### Summarize a physeq object @@ -89,7 +87,7 @@ summary_plot_pq(data_fungi) ### Alpha-diversity analysis ```{r, fig.cap="Hill number 1"} -p <- MiscMetabar::hill_pq(data_fungi, variable = "Height") +p <- MiscMetabar::hill_pq(data_fungi, fact = "Height") p$plot_Hill_0 ``` @@ -108,11 +106,11 @@ ggvenn_pq(data_fungi, fact = "Height") + labs(title = "Share number of ASV among Height in tree") ``` -### Note for non-linux users +### Note for non-Linux users -Some functions may not work on windows (*e.g.* `track_wflow()`, `cutadapt_remove_primers()`, `krona()`, `vsearch_clustering()`, ...). A solution is to exploit docker container, for example the using the great [rocker project](https://rocker-project.org/). +Some functions may not work on Windows (*e.g.* `track_wflow()`, `cutadapt_remove_primers()`, `krona()`, `vsearch_clustering()`, ...). A solution is to exploit docker container, for example the using the great [rocker project](https://rocker-project.org/). -Here is a list of functions with some limitations or not working at all on windows OS: +Here is a list of functions with some limitations or not working at all on Windows OS: - `build_phytree_pq()` - `count_seq()` @@ -129,9 +127,9 @@ Here is a list of functions with some limitations or not working at all on windo - `tsne_pq()` - `venn_pq()` -MiscMetabar is developed under Linux and the vast majority of functions may works on Unix system, but its functionning is not test under iOS. +MiscMetabar is developed under Linux and the vast majority of functions may works on Unix system, but its functionning is not tested under iOS. -### Installation of other softwares for debian Linux distributions +### Installation of other softwares for Debian Linux distributions If you encounter any errors or have any questions about the installation of these softwares, please visit their dedicated websites. diff --git a/README.md b/README.md index dd8647e7..1f1ba3bc 100644 --- a/README.md +++ b/README.md @@ -21,19 +21,19 @@ paper](https://doi.org/10.21105/joss.06038) in the Journal Of Open Softwares. Biological studies, especially in ecology, health sciences and taxonomy, -need to describe the biological composition of samples. During the last +need to describe the biological composition of samples. Over the last twenty years, (i) the development of DNA sequencing, (ii) reference databases, (iii) high-throughput sequencing (HTS), and (iv) -bioinformatics resources have allowed the description of biological +bioinformatics resources have enabled the description of biological communities through metabarcoding. Metabarcoding involves the sequencing of millions (*meta*-) of short regions of specific DNA (*-barcoding*, Valentini, Pompanon, and Taberlet (2009)) often from environmental samples (eDNA, Taberlet et al. (2012)) such as human stomach contents, -lake water, soil and air. +lake water, soil, and air. `MiscMetabar` aims to facilitate the **description**, **transformation**, **exploration** and **reproducibility** of -metabarcoding analysis using R. The development of `MiscMetabar` relies +metabarcoding analyses using R. The development of `MiscMetabar` relies heavily on the R packages [`dada2`](https://benjjneb.github.io/dada2/index.html) (Callahan et al. 2016), [`phyloseq`](https://joey711.github.io/phyloseq/) (McMurdie and @@ -42,7 +42,7 @@ Holmes 2013) and [`targets`](https://books.ropensci.org/targets/) ## Installation -There is a CRAN version of MiscMetabar. +A CRAN version of MiscMetabar is available. ``` r install.packages("MiscMetabar") @@ -74,16 +74,15 @@ See articles in the [MiscMetabar](https://adrientaudiere.github.io/MiscMetabar/) website for more examples. -For an introduction to metabarcoding in R, Please visite the [state of -the +For an introduction to metabarcoding in R, see the [state of the field](https://adrientaudiere.github.io/MiscMetabar/articles/states_of_fields_in_R.html) -articles. The [import, export and -track](https://adrientaudiere.github.io/MiscMetabar/articles/import_export_track.html) -article explains how import and export `phyloseq` object. Its also show -how to summarize useful information (number of sequences, samples and -clusters) accross bioinformatic pipelines. The article [explore +article. The [import, export and +tracking](https://adrientaudiere.github.io/MiscMetabar/articles/import_export_track.html) +article explains how to import and export `phyloseq` objects. It also +shows how to summarize useful information (number of sequences, samples +and clusters) across bioinformatic pipelines. The article [explore data](https://adrientaudiere.github.io/MiscMetabar/articles/explore_data.html) -takes a closer look to different way of explore samples and taxonomical +takes a closer look at different ways to explore samples and taxonomic data from `phyloseq` object. If you are interested in ecological metrics, see the articles describing @@ -92,7 +91,7 @@ and [beta-diversity](https://adrientaudiere.github.io/MiscMetabar/articles/beta-div.html) analysis. The article [filter taxa and samples](https://adrientaudiere.github.io/MiscMetabar/articles/filter.html) -describes some data-filtering processes using MiscMetabar and the +describes some data filtering processes using MiscMetabar and the [reclustering](https://adrientaudiere.github.io/MiscMetabar/articles/Reclustering.html) tutorial introduces the different way of clustering already-clustered OTU/ASV. The article @@ -100,7 +99,7 @@ OTU/ASV. The article explore the dataset from Tengeler et al. (2020) using some MiscMetabar functions. -For developers, I also wrote a article describing som [rules of +For developers, I also wrote an article describing some [rules of codes](https://adrientaudiere.github.io/MiscMetabar/articles/Rules.html). ### Summarize a physeq object @@ -118,32 +117,16 @@ summary_plot_pq(data_fungi) ### Alpha-diversity analysis ``` r -p <- MiscMetabar::hill_pq(data_fungi, variable = "Height") +p <- MiscMetabar::hill_pq(data_fungi, fact = "Height") p$plot_Hill_0 +#> NULL ``` -
- -Hill number 1 -

-Hill number 1 -

- -
- ``` r p$plot_tuckey +#> NULL ``` -
- -Result of the Tuckey post-hoc test -

-Result of the Tuckey post-hoc test -

- -
- ### Beta-diversity analysis ``` r @@ -157,15 +140,15 @@ ggvenn_pq(data_fungi, fact = "Height") + -### Note for non-linux users +### Note for non-Linux users -Some functions may not work on windows (*e.g.* `track_wflow()`, +Some functions may not work on Windows (*e.g.* `track_wflow()`, `cutadapt_remove_primers()`, `krona()`, `vsearch_clustering()`, …). A solution is to exploit docker container, for example the using the great [rocker project](https://rocker-project.org/). Here is a list of functions with some limitations or not working at all -on windows OS: +on Windows OS: - `build_phytree_pq()` - `count_seq()` @@ -183,9 +166,9 @@ on windows OS: - `venn_pq()` MiscMetabar is developed under Linux and the vast majority of functions -may works on Unix system, but its functionning is not test under iOS. +may works on Unix system, but its functionning is not tested under iOS. -### Installation of other softwares for debian Linux distributions +### Installation of other softwares for Debian Linux distributions If you encounter any errors or have any questions about the installation of these softwares, please visit their dedicated websites. diff --git a/_pkgdown.yml b/_pkgdown.yml index 4b8cb53e..2c1d9a25 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -53,9 +53,6 @@ reference: - title: Transform phyloseq object - subtitle: Taxonomy contents: - - add_funguild_info - - funguild_assign - - get_funguild_db - add_new_taxonomy_pq - add_blast_info - blast_to_phyloseq @@ -73,6 +70,8 @@ reference: contents: - as_binary_otu_table - normalize_prop_pq + - taxa_as_rows + - taxa_as_columns - subtitle: Subset/merge taxa contents: @@ -83,8 +82,10 @@ reference: - subtitle: Subset/merge samples contents: - - select_one_sample - merge_samples2 + - rarefy_sample_count_by_modality + - select_one_sample + - subset_samples_pq - subtitle: (Re)clustering OTU table contents: @@ -102,7 +103,7 @@ reference: - subtitle: Sample data contents: - - subset_samples_pq + - add_info_to_sam_data - subtitle: Phylogenetic tree contents: @@ -118,17 +119,20 @@ reference: - subtitle: alpha-diversity contents: - accu_plot + - accu_plot_balanced_modality - accu_samp_threshold + - ggbetween_pq + - ggscatt_pq + - glmutli_pq - hill_pq + - hill_test_rarperm_pq - hill_tuckey_pq - iNEXT_pq - - ggbetween_pq - - subtitle: beta-diversity contents: - - accu_samp_threshold - adonis_pq + - adonis_rarperm_pq - biplot_pq - circle_pq - compare_pairs_pq @@ -141,11 +145,13 @@ reference: - plot_SCBD_pq - plot_tsne_pq - ridges_pq - - sankey_pq - SRS_curve_pq - tsne_pq - upset_pq - upset_test_pq + - var_par_pq + - var_par_rarperm_pq + - plot_var_part_pq - venn_pq - subtitle: Differential abundance analysis @@ -161,6 +167,7 @@ reference: - subtitle: Taxonomy contents: + - ggaluv_pq - heat_tree_pq - krona - merge_krona @@ -169,6 +176,7 @@ reference: - plot_guild_pq - plot_tax_pq - rotl_pq + - sankey_pq - search_exact_seq_pq - tax_bar_pq - tax_datatable @@ -202,7 +210,6 @@ reference: - rename_samples_otu_table - rename_samples - sample_data_with_new_names - - add_info_to_sam_data - psmelt_samples_pq - subtitle: Taxonomy management contents: @@ -216,7 +223,7 @@ reference: - data_fungi_mini - Tengeler2020_pq -- title: Others utilities +- title: Other utilities - subtitle: Color management contents: - funky_color diff --git a/docs/404.html b/docs/404.html index 8cb91e94..9366bbb7 100644 --- a/docs/404.html +++ b/docs/404.html @@ -31,7 +31,7 @@ MiscMetabar - 0.8.00 + 0.9.1 + + + + + +
+
+
+ +
+

[Experimental]

+

This function (i) rarefy (equalize) the number of samples per modality of a +factor and (ii) rarefy the number of sequences per sample (depth). The +seed is set to 1:nperm. Thus, with exacly the same parameter, including +nperm values, results must be identical.

+
+ +
+

Usage

+
accu_plot_balanced_modality(
+  physeq,
+  fact,
+  nperm = 99,
+  step = 2000,
+  by.fact = TRUE,
+  progress_bar = TRUE,
+  quantile_prob = 0.975,
+  rarefy_by_sample_before_merging = TRUE,
+  sample.size = 1000,
+  verbose = FALSE,
+  ...
+)
+
+ +
+

Arguments

+
physeq
+

(required): a phyloseq-class object obtained +using the phyloseq package.

+ + +
fact
+

(required) The variable to rarefy. Must be present in +the sam_data slot of the physeq object.

+ + +
nperm
+

(int) The number of permutations to perform.

+ + +
step
+

(int) distance among points calculated to plot lines. +A low value give better plot but is more time consuming.

+ + +
by.fact
+

(logical, default TRUE) +First merge the OTU table by factor to plot only one line by factor

+ + +
progress_bar
+

(logical, default TRUE) Do we print progress during +the calculation?

+ + +
quantile_prob
+

(float, [0:1]) the value to compute the quantile. +Minimum quantile is compute using 1-quantile_prob.

+ + +
rarefy_by_sample_before_merging
+

(logical, default TRUE): +rarefy_by_sample_before_merging = FALSE is buggy for the moment.Please +only use rarefy_by_sample_before_merging = TRUE

+ + +
sample.size
+

(int) A single integer value equal to the number of +reads being simulated, also known as the depth. See +phyloseq::rarefy_even_depth().

+ + +
verbose
+

(logical). If TRUE, print additional informations.

+ + +
...
+

Other params for be passed on to accu_plot() function

+ +
+
+

Value

+ + +

A ggplot2 plot representing the richness accumulation plot

+
+ +
+

Author

+

Adrien Taudière

+
+ +
+

Examples

+
# \donttest{
+data_fungi_woNA4Time <-
+  subset_samples(data_fungi, !is.na(Time))
+data_fungi_woNA4Time@sam_data$Time <- paste0("time-", data_fungi_woNA4Time@sam_data$Time)
+accu_plot_balanced_modality(data_fungi_woNA4Time, "Time", nperm = 3)
+#> `set.seed(1)` was used to initialize repeatable random subsampling.
+#> Please record this for your records so others can reproduce.
+#> Try `set.seed(1); .Random.seed` for the full vector
+#> ...
+#> Warning: no non-missing arguments to max; returning -Inf
+#> 
  |                                                        
  |                                                  |   0%
  |                                                        
  |=================                                 |  33%
  |                                                        
  |=================================                 |  67%
  |                                                        
  |==================================================| 100%
+#> Warning: Removed 4 rows containing missing values or values outside the scale range
+#> (`geom_line()`).
+
+
+data_fungi_woNA4Height <-
+  subset_samples(data_fungi, !is.na(Height))
+accu_plot_balanced_modality(data_fungi_woNA4Height, "Height", nperm = 3)
+#> `set.seed(1)` was used to initialize repeatable random subsampling.
+#> Please record this for your records so others can reproduce.
+#> Try `set.seed(1); .Random.seed` for the full vector
+#> ...
+#> Warning: no non-missing arguments to max; returning -Inf
+#> 
  |                                                        
  |                                                  |   0%
  |                                                        
  |=================                                 |  33%
  |                                                        
  |=================================                 |  67%
  |                                                        
  |==================================================| 100%
+#> Warning: Removed 3 rows containing missing values or values outside the scale range
+#> (`geom_line()`).
+
+# }
+
+
+
+ + +
+ + + +
+ + + + + + + diff --git a/docs/reference/accu_samp_threshold-1.png b/docs/reference/accu_samp_threshold-1.png index 067cebd4..287a9ca5 100644 Binary files a/docs/reference/accu_samp_threshold-1.png and b/docs/reference/accu_samp_threshold-1.png differ diff --git a/docs/reference/accu_samp_threshold.html b/docs/reference/accu_samp_threshold.html index f5e96f59..7f6e0cda 100644 --- a/docs/reference/accu_samp_threshold.html +++ b/docs/reference/accu_samp_threshold.html @@ -12,7 +12,7 @@ MiscMetabar - 0.8.00 + 0.9.1 + + + + + +
+
+
+ +
+

[Experimental]

+
+ +
+

Usage

+
adonis_rarperm_pq(
+  physeq,
+  formula,
+  dist_method = "bray",
+  merge_sample_by = NULL,
+  na_remove = FALSE,
+  rarefy_nb_seqs = FALSE,
+  verbose = TRUE,
+  nperm = 99,
+  progress_bar = TRUE,
+  quantile_prob = 0.975,
+  sample.size = min(sample_sums(physeq)),
+  ...
+)
+
+ +
+

Arguments

+
physeq
+

(required): a phyloseq-class object obtained +using the phyloseq package.

+ + +
formula
+

(required) the right part of a formula for vegan::adonis2(). +Variables must be present in the physeq@sam_data slot.

+ + +
dist_method
+

(default "bray") the distance used. See +phyloseq::distance() for all available distances or run +phyloseq::distanceMethodList(). +For aitchison and robust.aitchison distance, vegan::vegdist() +function is directly used.

+ + +
merge_sample_by
+

a vector to determine +which samples to merge using the merge_samples2() +function. Need to be in physeq@sam_data

+ + +
na_remove
+

(logical, default FALSE) If set to TRUE, remove samples with +NA in the variables set in formula.

+ + +
rarefy_nb_seqs
+

(logical, default FALSE) Rarefy each sample +(before merging if merge_sample_by is set) using +phyloseq::rarefy_even_depth(). +if correction_for_sample_size is TRUE, rarefy_nb_seqs will have no +effect.

+ + +
verbose
+

(logical, default TRUE) If TRUE, prompt some messages.

+ + +
nperm
+

(int) The number of permutations to perform.

+ + +
progress_bar
+

(logical, default TRUE) Do we print progress during +the calculation.

+ + +
quantile_prob
+

(float, [0:1]) the value to compute the quantile. +Minimum quantile is compute using 1-quantile_prob.

+ + +
sample.size
+

(int) A single integer value equal to the number of +reads being simulated, also known as the depth. See +phyloseq::rarefy_even_depth().

+ + +
...
+

Other params for be passed on to adonis_pq() function

+ +
+
+

Value

+ + +

A list of three dataframe representing the mean, the minimum quantile +and the maximum quantile value for adonis results. See adonis_pq().

+
+
+

See also

+ +
+
+

Author

+

Adrien Taudière

+
+ +
+

Examples

+
if (requireNamespace("vegan")) {
+  data_fungi_woNA <-
+    subset_samples(data_fungi, !is.na(Time) & !is.na(Height))
+  adonis_rarperm_pq(data_fungi_woNA, "Time*Height", na_remove = TRUE, nperm = 3)
+}
+#> 
  |                                                        
  |                                                  |   0%
+#> `set.seed(1)` was used to initialize repeatable random subsampling.
+#> Please record this for your records so others can reproduce.
+#> Try `set.seed(1); .Random.seed` for the full vector
+#> ...
+#> 1156OTUs were removed because they are no longer 
+#> present in any sample after random subsampling
+#> ...
+#> Time
+#> Height
+#> 
  |                                                        
  |=================                                 |  33%
+#> `set.seed(2)` was used to initialize repeatable random subsampling.
+#> Please record this for your records so others can reproduce.
+#> Try `set.seed(2); .Random.seed` for the full vector
+#> ...
+#> 1160OTUs were removed because they are no longer 
+#> present in any sample after random subsampling
+#> ...
+#> Time
+#> Height
+#> 
  |                                                        
  |=================================                 |  67%
+#> `set.seed(3)` was used to initialize repeatable random subsampling.
+#> Please record this for your records so others can reproduce.
+#> Try `set.seed(3); .Random.seed` for the full vector
+#> ...
+#> 1180OTUs were removed because they are no longer 
+#> present in any sample after random subsampling
+#> ...
+#> Time
+#> Height
+#> 
  |                                                        
  |==================================================| 100%
+#> $mean
+#>              Df   SumOfSqs         R2         F Pr(>F)
+#> Time          1  1.0135889 0.01886192 2.0866054  0.001
+#> Height        2  0.8289606 0.01541881 0.8527159  0.941
+#> Time:Height   2  0.8818878 0.01639702 0.9067231  0.766
+#> Residual    105 51.0407446 0.94932225        NA     NA
+#> Total       110 53.7651818 1.00000000        NA     NA
+#> 
+#> $quantile_min
+#>              Df   SumOfSqs         R2         F  Pr(>F)
+#> Time          1  0.8756634 0.01622743 1.7913365 0.00100
+#> Height        2  0.8216582 0.01522648 0.8404060 0.90975
+#> Time:Height   2  0.7854405 0.01468994 0.8135536 0.65915
+#> Residual    105 50.6817233 0.94795991        NA      NA
+#> Total       110 53.4639765 1.00000000        NA      NA
+#> 
+#> $quantile_max
+#>              Df   SumOfSqs         R2         F  Pr(>F)
+#> Time          1  1.1624151 0.02174335 2.4083989 0.00100
+#> Height        2  0.8343976 0.01560680 0.8643377 0.97055
+#> Time:Height   2  0.9361960 0.01734900 0.9575539 0.96125
+#> Residual    105 51.3289949 0.95119710        NA      NA
+#> Total       110 53.9625125 1.00000000        NA      NA
+#> 
+
+
+
+ + +
+ + + +
+ + + + + + + diff --git a/docs/reference/all_object_size.html b/docs/reference/all_object_size.html index 97d2874f..3c3e836b 100644 --- a/docs/reference/all_object_size.html +++ b/docs/reference/all_object_size.html @@ -12,7 +12,7 @@ MiscMetabar - 0.8.00 + 0.9.1
+ + + + + +
+
+
+ +
+

[Experimental]

+

Basically a wrapper of ggalluvial +package

+
+ +
+

Usage

+
ggaluv_pq(
+  physeq,
+  taxa_ranks = c("Phylum", "Class", "Order", "Family"),
+  wrap_factor = NULL,
+  by_sample = FALSE,
+  rarefy_by_sample = FALSE,
+  fact = NULL,
+  type = "nb_seq",
+  width = 1.2,
+  min.size = 3,
+  na_remove = FALSE,
+  use_ggfittext = FALSE,
+  use_geom_label = FALSE,
+  size_lab = 2,
+  ...
+)
+
+ +
+

Arguments

+
physeq
+

(required): a phyloseq-class object obtained +using the phyloseq package.

+ + +
taxa_ranks
+

A vector of taxonomic ranks. For examples c("Family","Genus"). +If taxa ranks is not set +(default value = c("Phylum", "Class", "Order", "Family")).

+ + +
wrap_factor
+

A name to determine +which samples to merge using merge_samples2() function. +Need to be in physeq@sam_data. +Need to be use when you want to wrap by factor the final plot +with the number of taxa (type="nb_asv")

+ + +
by_sample
+

(logical) If FALSE (default), sample information is not taking +into account, so the taxonomy is studied globally. If fact is not NULL, by_sample +is automatically set to TRUE.

+ + +
rarefy_by_sample
+

(logical, default FALSE) If TRUE, rarefy +samples using phyloseq::rarefy_even_depth() function.

+ + +
fact
+

(required) Name of the factor in physeq@sam_data used to plot the last column

+ + +
type
+

If "nb_seq" (default), the number of sequences is +used in plot. If "nb_asv", the number of ASV is plotted.

+ + +
width
+

(passed on to ggalluvial::geom_flow()) the width of each stratum, +as a proportion of the distance between axes. Defaults to 1/3.

+ + +
min.size
+

(passed on to ggfittext::geom_fit_text()) Minimum font size, +in points. Text that would need to be shrunk below this size to fit the box will +be hidden. Defaults to 4 pt.

+ + +
na_remove
+

(logical, default FALSE) If set to TRUE, remove samples with +NA in the variables set in formula.

+ + +
use_ggfittext
+

(logical, default FALSE) Do we use ggfittext to plot labels?

+ + +
use_geom_label
+

(logical, default FALSE) Do we use geom_label to plot labels?

+ + +
size_lab
+

Size for label if use_ggfittext is FALSE

+ + +
...
+

Other arguments passed on to ggalluvial::geom_flow() function.

+ +
+
+

Value

+ + +

A ggplot object

+
+
+

Details

+

This function is mainly a wrapper of the work of others. +Please make a reference to ggalluvial package if you +use this function.

+
+
+

See also

+ +
+
+

Author

+

Adrien Taudière

+
+ +
+

Examples

+
if (requireNamespace("ggalluvial")) {
+  ggaluv_pq(data_fungi_mini)
+}
+#> Loading required namespace: ggalluvial
+
+# \donttest{
+if (requireNamespace("ggalluvial")) {
+  ggaluv_pq(data_fungi_mini, type = "nb_asv")
+
+  ggaluv_pq(data_fungi_mini, wrap_factor = "Height", by_sample = TRUE, type = "nb_asv") +
+    facet_wrap("Height")
+
+  ggaluv_pq(data_fungi_mini,
+    width = 0.9, min.size = 10,
+    type = "nb_asv", taxa_ranks = c("Phylum", "Class", "Order", "Family", "Genus")
+  ) +
+    coord_flip() + scale_x_discrete(limits = rev)
+}
+#> Warning: `group` has missing values; corresponding samples will be dropped
+
+# }
+
+
+
+ + +
+ + + +
+ + + + + + + diff --git a/docs/reference/ggbetween_pq-1.png b/docs/reference/ggbetween_pq-1.png index 8a977a3c..dfad3323 100644 Binary files a/docs/reference/ggbetween_pq-1.png and b/docs/reference/ggbetween_pq-1.png differ diff --git a/docs/reference/ggbetween_pq.html b/docs/reference/ggbetween_pq.html index 2d887b36..4123c04e 100644 --- a/docs/reference/ggbetween_pq.html +++ b/docs/reference/ggbetween_pq.html @@ -22,7 +22,7 @@ MiscMetabar - 0.8.00 + 0.9.1 + + + + + +
+
+
+ +
+

[Experimental]

+

Basically a wrapper of function ggstatsplot::ggscatterstats() for +object of class phyloseq and Hill number.

+
+ +
+

Usage

+
ggscatt_pq(
+  physeq,
+  num_modality,
+  hill_scales = c(0, 1, 2),
+  rarefy_by_sample = FALSE,
+  one_plot = TRUE,
+  ...
+)
+
+ +
+

Arguments

+
physeq
+

(required): a phyloseq-class object obtained +using the phyloseq package.

+ + +
num_modality
+

(required) Name of the numeric column in +physeq@sam_data to plot and test against hill numberk

+ + +
hill_scales
+

(a vector of integer) The list of q values to compute +the hill number H^q. If Null, no hill number are computed. Default value +compute the Hill number 0 (Species richness), the Hill number 1 +(exponential of Shannon Index) and the Hill number 2 (inverse of Simpson +Index).

+ + +
rarefy_by_sample
+

(logical, default FALSE) If TRUE, rarefy +samples using phyloseq::rarefy_even_depth() function.

+ + +
one_plot
+

(logical, default FALSE) If TRUE, return a unique +plot with the three plot inside using the patchwork package.

+ + +
...
+

Other arguments passed on to ggstatsplot::ggscatterstats() +function.

+ +
+
+

Value

+ + +

Either an unique ggplot2 object (if one_plot is TRUE) or +a list of ggplot2 plot for each hill_scales.

+
+
+

Details

+

This function is mainly a wrapper of the work of others. +Please make a reference to ggstatsplot::ggscatterstats() if you +use this function.

+
+
+

See also

+ +
+
+

Author

+

Adrien Taudière

+
+ +
+

Examples

+
if (requireNamespace("ggstatsplot")) {
+  ggscatt_pq(data_fungi_mini, "Time", type = "non-parametric")
+  ggscatt_pq(data_fungi_mini, "Time", hill_scales = 1:4, type = "parametric")
+  ggscatt_pq(data_fungi_mini, "Sample_id",
+    hill_scales = c(0, 0.5),
+    one_plot = FALSE
+  )
+}
+#> Taxa are now in columns.
+#> Cleaning suppress 0 taxa and 0 samples.
+#> Taxa are now in rows.
+#> Joining with `by = join_by(Sample)`
+#> Registered S3 method overwritten by 'ggside':
+#>   method from   
+#>   +.gg   ggplot2
+#> Taxa are now in columns.
+#> Cleaning suppress 0 taxa and 0 samples.
+#> Taxa are now in rows.
+#> Joining with `by = join_by(Sample)`
+#> Taxa are now in columns.
+#> Cleaning suppress 0 taxa and 0 samples.
+#> Taxa are now in rows.
+#> Joining with `by = join_by(Sample)`
+#> [[1]]
+#> `stat_xsidebin()` using `bins = 30`. Pick better value with `binwidth`.
+#> `stat_ysidebin()` using `bins = 30`. Pick better value with `binwidth`.
+
+#> 
+#> [[2]]
+#> `stat_xsidebin()` using `bins = 30`. Pick better value with `binwidth`.
+#> `stat_ysidebin()` using `bins = 30`. Pick better value with `binwidth`.
+
+#> 
+
+
+
+ + +
+ + + +
+ + + + + + + diff --git a/docs/reference/ggvenn_pq-6.png b/docs/reference/ggvenn_pq-6.png index 42168d24..e9a9d34f 100644 Binary files a/docs/reference/ggvenn_pq-6.png and b/docs/reference/ggvenn_pq-6.png differ diff --git a/docs/reference/ggvenn_pq.html b/docs/reference/ggvenn_pq.html index a93045e8..0787cd7b 100644 --- a/docs/reference/ggvenn_pq.html +++ b/docs/reference/ggvenn_pq.html @@ -20,7 +20,7 @@ MiscMetabar - 0.8.00 + 0.9.1 + + + + + +
+
+
+ +
+

[Experimental]

+
+ +
+

Usage

+
glmutli_pq(
+  physeq,
+  formula,
+  fitfunction = "lm",
+  hill_scales = c(0, 1, 2),
+  aic_step = 2,
+  confsetsize = 100,
+  plotty = FALSE,
+  level = 1,
+  method = "h",
+  crit = "aicc",
+  ...
+)
+
+ +
+

Arguments

+
physeq
+

(required): a phyloseq-class object obtained +using the phyloseq package.

+ + +
formula
+

(required) a formula for glmulti::glmulti() +Variables must be present in the physeq@sam_data slot or be one +of hill number defined in hill_scales or the variable Abundance which +refer to the number of sequences per sample.

+ + +
fitfunction
+

(default "lm")

+ + +
hill_scales
+

(a vector of integer) The list of q values to compute +the hill number H^q. If Null, no hill number are computed. Default value +compute the Hill number 0 (Species richness), the Hill number 1 +(exponential of Shannon Index) and the Hill number 2 (inverse of Simpson +Index).

+ + +
aic_step
+

The value between AIC scores to cut for.

+ + +
confsetsize
+

The number of models to be looked for, i.e. the size of the returned confidence set.

+ + +
plotty
+

(logical) Whether to plot the progress of the IC profile when running.

+ + +
level
+

If 1, only main effects (terms of order 1) are used to build +the candidate set. If 2, pairwise interactions are also used (higher order +interactions are currently ignored)

+ + +
method
+

The method to be used to explore the candidate set of models. +If "h" (default) an exhaustive screening is undertaken. +If "g" the genetic algorithm is employed (recommended for large candidate sets). +If "l", a very fast exhaustive branch-and-bound algorithm is used. +Package leaps must then be loaded, and this can only be applied to linear models +with covariates and no interactions. If "d", a simple summary of the candidate set +is printed, including the number of candidate models.

+ + +
crit
+

The Information Criterion to be used. Default is the small-sample corrected AIC (aicc). This should be a function that accepts a fitted model as first argument. Other provided functions are the classic AIC, the Bayes IC (bic), and QAIC/QAICc (qaic and qaicc).

+ + +
...
+

Others arguments passed on to glmulti::glmulti() function

+ +
+
+

Value

+ + +

A data.frame summarizing the glmulti results with columns

+ + +

-estimates +-unconditional_interval +-nb_model" +-importance +-alpha

+
+
+

Details

+

This function is mainly a wrapper of the work of others. +Please make a reference to glmulti::glmulti() if you +use this function.

+
+
+

See also

+ +
+ +
+

Examples

+
# \donttest{
+if (requireNamespace("glmulti")) {
+  res_glmulti <-
+    glmutli_pq(data_fungi, "Hill_0 ~ Hill_1 + Abundance + Time + Height", level = 1)
+  res_glmulti
+  res_glmulti_interaction <-
+    glmutli_pq(data_fungi, "Hill_0 ~ Abundance + Time + Height", level = 2)
+  res_glmulti
+}
+#> Loading required namespace: glmulti
+#> Taxa are now in rows.
+#> Joining with `by = join_by(Sample)`
+#> Initialization...
+#> TASK: Exhaustive screening of candidate set.
+#> Fitting...
+#> Completed.
+#> Taxa are now in rows.
+#> Joining with `by = join_by(Sample)`
+#> Initialization...
+#> TASK: Exhaustive screening of candidate set.
+#> Fitting...
+#> 
+#> After 50 models:
+#> Best model: Hill_0~1+Abundance+Time+Time:Abundance+Height:Abundance+Height:Time
+#> Crit= 1069.11608982306
+#> Mean crit= 1218.19009955263
+#> Completed.
+#>                estimates unconditional_interval nb_model importance
+#> Hill_1       3.062117997           1.868174e-01        8          1
+#> Abundance    0.002959644           8.478374e-08        8          1
+#> Time         0.789091999           2.443263e-01        8          1
+#> HeightLow    6.884340946           3.444196e+01        8          1
+#> HeightMiddle 0.339123798           3.727962e+01        8          1
+#>                     alpha     variable
+#> Hill_1       8.570200e-01       Hill_1
+#> Abundance    5.773492e-04    Abundance
+#> Time         9.800932e-01         Time
+#> HeightLow    1.163660e+01    HeightLow
+#> HeightMiddle 1.210648e+01 HeightMiddle
+# }
+
+
+
+ + +
+ + + +
+ + + + + + + diff --git a/docs/reference/graph_test_pq-1.png b/docs/reference/graph_test_pq-1.png index 76f3bd6b..03cf0a0a 100644 Binary files a/docs/reference/graph_test_pq-1.png and b/docs/reference/graph_test_pq-1.png differ diff --git a/docs/reference/graph_test_pq.html b/docs/reference/graph_test_pq.html index 4a7747e5..22187eea 100644 --- a/docs/reference/graph_test_pq.html +++ b/docs/reference/graph_test_pq.html @@ -14,7 +14,7 @@ MiscMetabar - 0.8.00 + 0.9.1
+ + + + + +
+
+
+ +
+

[Experimental]

+
+ +
+

Usage

+
hill_test_rarperm_pq(
+  physeq,
+  fact,
+  hill_scales = c(0, 1, 2),
+  nperm = 99,
+  sample.size = min(sample_sums(physeq)),
+  verbose = FALSE,
+  progress_bar = TRUE,
+  p_val_signif = 0.05,
+  type = "non-parametrique",
+  ...
+)
+
+ +
+

Arguments

+
physeq
+

(required): a phyloseq-class object obtained +using the phyloseq package.

+ + +
fact
+

(required) Name of the factor in physeq@sam_data used to plot +different lines

+ + +
hill_scales
+

(a vector of integer) The list of q values to compute +the hill number H^q. If Null, no hill number are computed. Default value +compute the Hill number 0 (Species richness), the Hill number 1 +(exponential of Shannon Index) and the Hill number 2 (inverse of Simpson +Index).

+ + +
nperm
+

(int) The number of permutations to perform.

+ + +
sample.size
+

(int) A single integer value equal to the number of +reads being simulated, also known as the depth. See +phyloseq::rarefy_even_depth().

+ + +
verbose
+

(logical). If TRUE, print additional informations.

+ + +
progress_bar
+

(logical, default TRUE) Do we print progress during +the calculation?

+ + +
p_val_signif
+

(float, [0:1]) The mimimum value of p-value to count a +test as significant int the prop_signif result.

+ + +
type
+

A character specifying the type of statistical approach +(See ggstatsplot::ggbetweenstats() for more details):

  • "parametric"

  • +
  • "nonparametric"

  • +
  • "robust"

  • +
  • "bayes"

  • +
+ + +
...
+

Others arguments passed on to ggstatsplot::ggbetweenstats() function

+ +
+
+

Value

+ + +

A list of 6 components :

  • method

  • +
  • expressions

  • +
  • plots

  • +
  • pvals

  • +
  • prop_signif

  • +
  • statistics

  • +
+ +
+

Author

+

Adrien Taudière

+
+ +
+

Examples

+
# \donttest{
+if (requireNamespace("ggstatsplot")) {
+  hill_test_rarperm_pq(data_fungi, "Time", nperm = 2)
+  res <- hill_test_rarperm_pq(data_fungi, "Height", nperm = 9, p.val = 0.9)
+  patchwork::wrap_plots(res$plots[[1]])
+  res$plots[[1]][[1]] + res$plots[[2]][[1]] + res$plots[[3]][[1]]
+  res$prop_signif
+  res_para <- hill_test_rarperm_pq(data_fungi, "Height", nperm = 9, type = "parametrique")
+  res_para$plots[[1]][[1]] + res_para$plots[[2]][[1]] + res_para$plots[[3]][[1]]
+  res_para$pvals
+  res_para$method
+  res_para$expressions[[1]]
+}
+#> 
  |                                                        
  |                                                  |   0%
  |                                                        
  |=========================                         |  50%
  |                                                        
  |==================================================| 100%
  |                                                        
  |                                                  |   0%
  |                                                        
  |======                                            |  11%
  |                                                        
  |===========                                       |  22%
  |                                                        
  |=================                                 |  33%
  |                                                        
  |======================                            |  44%
  |                                                        
  |============================                      |  56%
  |                                                        
  |=================================                 |  67%
  |                                                        
  |=======================================           |  78%
  |                                                        
  |============================================      |  89%
  |                                                        
  |==================================================| 100%
  |                                                        
  |                                                  |   0%
  |                                                        
  |======                                            |  11%
  |                                                        
  |===========                                       |  22%
  |                                                        
  |=================                                 |  33%
  |                                                        
  |======================                            |  44%
  |                                                        
  |============================                      |  56%
  |                                                        
  |=================================                 |  67%
  |                                                        
  |=======================================           |  78%
  |                                                        
  |============================================      |  89%
  |                                                        
  |==================================================| 100%
+#> list(italic("F")["Welch"](2, 84.92) == "0.08", italic(p) == "0.92", 
+#>     widehat(omega["p"]^2) == "0.00", CI["95%"] ~ "[" * "0.00", 
+#>     "1.00" * "]", italic("n")["obs"] == "131")
+# }
+
+
+
+ + +
+ + + +
+ + + + + + + diff --git a/docs/reference/hill_tuckey_pq-1.png b/docs/reference/hill_tuckey_pq-1.png index 927caec2..8034993b 100644 Binary files a/docs/reference/hill_tuckey_pq-1.png and b/docs/reference/hill_tuckey_pq-1.png differ diff --git a/docs/reference/hill_tuckey_pq-2.png b/docs/reference/hill_tuckey_pq-2.png new file mode 100644 index 00000000..4d2a78d8 Binary files /dev/null and b/docs/reference/hill_tuckey_pq-2.png differ diff --git a/docs/reference/hill_tuckey_pq.html b/docs/reference/hill_tuckey_pq.html index a23f3c5a..6fb9fac8 100644 --- a/docs/reference/hill_tuckey_pq.html +++ b/docs/reference/hill_tuckey_pq.html @@ -14,7 +14,7 @@ MiscMetabar - 0.8.00 + 0.9.1 + + + + + +
+
+
+ +
+

[Experimental]

+
+ +
+

Usage

+
plot_var_part_pq(
+  res_varpart,
+  cutoff = 0,
+  digits = 1,
+  digits_quantile = 2,
+  fill_bg = c("seagreen3", "mediumpurple", "blue", "orange"),
+  show_quantiles = FALSE,
+  filter_quantile_zero = TRUE,
+  show_dbrda_signif = FALSE,
+  show_dbrda_signif_pval = 0.05,
+  alpha = 63,
+  id.size = 1.2,
+  min_prop_pval_signif_dbrda = 0.95
+)
+
+ +
+

Arguments

+
res_varpart
+

(required) the result of the functions var_par_pq() +or var_par_rarperm_pq()

+ + +
cutoff
+

The values below cutoff will not be displayed.

+ + +
digits
+

The number of significant digits.

+ + +
digits_quantile
+

The number of significant digits for quantile.

+ + +
fill_bg
+

Fill colours of ellipses.

+ + +
show_quantiles
+

Do quantiles are printed ?

+ + +
filter_quantile_zero
+

Do we filter out value with quantile encompassing +the zero value?

+ + +
show_dbrda_signif
+

Do dbrda significance for each component is printed +using *?

+ + +
show_dbrda_signif_pval
+

(float, [0:1]) The value under which the +dbrda is considered significant.

+ + +
alpha
+

(int, [0:255]) Transparency of the fill colour.

+ + +
id.size
+

A numerical value giving the character expansion factor for the names of circles or ellipses.

+ + +
min_prop_pval_signif_dbrda
+

(float, [0:1]) Only used if using the +result of var_par_rarperm_pq() function. The * for dbrda_signif is only add if +at least min_prop_pval_signif_dbrda of permutations show significance.

+ +
+
+

Value

+ + +

A plot

+
+
+

Details

+

This function is mainly a wrapper of the work of others. +Please make a reference to vegan::varpart() if you +use this function.

+
+ +
+

Author

+

Adrien Taudière

+
+ +
+

Examples

+
# \donttest{
+if (requireNamespace("vegan")) {
+  data_fungi_woNA <- subset_samples(data_fungi, !is.na(Time) & !is.na(Height))
+  res_var_9 <- var_par_rarperm_pq(
+    data_fungi_woNA,
+    list_component = list(
+      "Time" = c("Time"),
+      "Size" = c("Height", "Diameter")
+    ),
+    nperm = 9,
+    dbrda_computation = TRUE
+  )
+  res_var_2 <- var_par_rarperm_pq(
+    data_fungi_woNA,
+    list_component = list(
+      "Time" = c("Time"),
+      "Size" = c("Height", "Diameter")
+    ),
+    nperm = 2,
+    dbrda_computation = TRUE
+  )
+  res_var0 <- var_par_pq(data_fungi_woNA,
+    list_component = list(
+      "Time" = c("Time"),
+      "Size" = c("Height", "Diameter")
+    ),
+    dbrda_computation = TRUE
+  )
+  plot_var_part_pq(res_var0, digits_quantile = 2, show_dbrda_signif = TRUE)
+  plot_var_part_pq(res_var_9,
+    digits_quantile = 2, show_quantiles = TRUE,
+    show_dbrda_signif = TRUE
+  )
+  plot_var_part_pq(
+    res_var_2,
+    digits = 5,
+    digits_quantile = 2,
+    cutoff = 0,
+    show_quantiles = TRUE
+  )
+}
+#> 
  |                                                        
  |                                                  |   0%
  |                                                        
  |======                                            |  11%
  |                                                        
  |===========                                       |  22%
  |                                                        
  |=================                                 |  33%
  |                                                        
  |======================                            |  44%
  |                                                        
  |============================                      |  56%
  |                                                        
  |=================================                 |  67%
  |                                                        
  |=======================================           |  78%
  |                                                        
  |============================================      |  89%
  |                                                        
  |==================================================| 100%
  |                                                        
  |                                                  |   0%
  |                                                        
  |=========================                         |  50%
  |                                                        
  |==================================================| 100%
+
+
+
+# }
+
+
+
+ + +
+ + + +
+ + + + + + + diff --git a/docs/reference/psmelt_samples_pq-1.png b/docs/reference/psmelt_samples_pq-1.png index eace79e5..54cef919 100644 Binary files a/docs/reference/psmelt_samples_pq-1.png and b/docs/reference/psmelt_samples_pq-1.png differ diff --git a/docs/reference/psmelt_samples_pq.html b/docs/reference/psmelt_samples_pq.html index b141274a..c8209b8b 100644 --- a/docs/reference/psmelt_samples_pq.html +++ b/docs/reference/psmelt_samples_pq.html @@ -5,7 +5,7 @@ Note that contrary to hill_pq(), this function does not take into account for difference in the number of sequences per samples/modalities. You may use rarefy_by_sample = TRUE if the mean number of sequences per -samples differs among modalities.">Build a samples information data.frame from physeq object — psmelt_samples_pq • MiscMetabarBuild a sample information tibble from physeq object — psmelt_samples_pq • MiscMetabarMiscMetabar - 0.8.00 + 0.9.1 + + + + + +
+
+
+ +
+

[Experimental]

+
+ +
+

Usage

+
rarefy_sample_count_by_modality(physeq, fact, rngseed = FALSE, verbose = TRUE)
+
+ +
+

Arguments

+
physeq
+

(required): a phyloseq-class object obtained +using the phyloseq package.

+ + +
fact
+

(required): The variable to rarefy. Must be present in +the sam_data slot of the physeq object.

+ + +
rngseed
+

(Optional). A single integer value passed to set.seed, +which is used to fix a seed for reproducibly random number generation +(in this case, reproducibly random subsampling). If set to FALSE, then no +iddling with the RNG seed is performed, +and it is up to the user to appropriately call

+ + +
verbose
+

(logical). If TRUE, print additional informations.

+ +
+
+

Value

+ + +

A new phyloseq-class object.

+
+ +
+

Author

+

Adrien Taudière

+
+ +
+

Examples

+
table(data_fungi_mini@sam_data$Height)
+#> 
+#>   High    Low Middle 
+#>     28     32     30 
+data_fungi_mini2 <- rarefy_sample_count_by_modality(data_fungi_mini, "Height")
+#> You set `rngseed` to FALSE. Make sure you've set & recorded
+#>  the random seed of your session for reproducibility.
+#> See `?set.seed`
+#> ...
+table(data_fungi_mini2@sam_data$Height)
+#> 
+#>   High    Low Middle 
+#>     28     28     28 
+if (requireNamespace("patchwork")) {
+  ggvenn_pq(data_fungi_mini, "Height") + ggvenn_pq(data_fungi_mini2, "Height")
+}
+#> Taxa are now in columns.
+#> Taxa are now in columns.
+
+
+
+
+ + +
+ + + +
+ + + + + + + diff --git a/docs/reference/read_pq.html b/docs/reference/read_pq.html index b02cdb52..2cd9e391 100644 --- a/docs/reference/read_pq.html +++ b/docs/reference/read_pq.html @@ -12,7 +12,7 @@ MiscMetabar - 0.8.00 + 0.9.1 + + + + + +
+
+
+ +
+

[Maturing]

+
+ +
+

Usage

+
taxa_as_columns(physeq)
+
+ +
+

Arguments

+
physeq
+

(required): a phyloseq-class object obtained +using the phyloseq package.

+ +
+
+

Value

+ + +

A new phyloseq-class object

+
+
+

Author

+

Adrien Taudière

+
+ +
+ + +
+ + + +
+ + + + + + + diff --git a/docs/reference/taxa_as_rows.html b/docs/reference/taxa_as_rows.html new file mode 100644 index 00000000..a471aaad --- /dev/null +++ b/docs/reference/taxa_as_rows.html @@ -0,0 +1,121 @@ + +Force taxa to be in columns in the otu_table of a physeq object — taxa_as_rows • MiscMetabar + Skip to contents + + +
+
+
+ +
+

[Maturing]

+
+ +
+

Usage

+
taxa_as_rows(physeq)
+
+ +
+

Arguments

+
physeq
+

(required): a phyloseq-class object obtained +using the phyloseq package.

+ +
+
+

Value

+ + +

A new phyloseq-class object

+
+
+

Author

+

Adrien Taudière

+
+ +
+ + +
+ + + +
+ + + + + + + diff --git a/docs/reference/taxa_only_in_one_level.html b/docs/reference/taxa_only_in_one_level.html index facbcfb7..57b8cf3c 100644 --- a/docs/reference/taxa_only_in_one_level.html +++ b/docs/reference/taxa_only_in_one_level.html @@ -12,7 +12,7 @@ MiscMetabar - 0.8.00 + 0.9.1 + + + + + +
+
+
+ +
+

[Experimental] +The function partitions the variation in otu_table using +distance (Bray per default) with respect to two, three, or four explanatory +tables, using +adjusted R² in redundancy analysis ordination (RDA) or distance-based +redundancy analysis. If response is a single vector, partitioning is by +partial regression. Collinear variables in the explanatory tables do NOT +have to be removed prior to partitioning. See vegan::varpart() for more +information.

+
+ +
+

Usage

+
var_par_pq(
+  physeq,
+  list_component,
+  dist_method = "bray",
+  dbrda_computation = TRUE
+)
+
+ +
+

Arguments

+
physeq
+

(required): a phyloseq-class object obtained +using the phyloseq package.

+ + +
list_component
+

(required) A named list of 2, 3 or four vectors with +names from the @sam_data slot.

+ + +
dist_method
+

(default "bray") the distance used. See +phyloseq::distance() for all available distances or run +phyloseq::distanceMethodList(). +For "aitchison" and "robust.aitchison" distance, vegan::vegdist() +function is directly used.

+ + +
dbrda_computation
+

(logical) Do dbrda computations are runned for each +individual component (each name of the list component) ?

+ +
+
+

Value

+ + +

an object of class "varpart", see vegan::varpart()

+ + +
+
+

Details

+

This function is mainly a wrapper of the work of others. +Please make a reference to vegan::varpart() if you +use this function.

+
+ +
+

Author

+

Adrien Taudière

+
+ +
+

Examples

+
# \donttest{
+if (requireNamespace("vegan")) {
+  data_fungi_woNA <-
+    subset_samples(data_fungi, !is.na(Time) & !is.na(Height))
+  res_var <- var_par_pq(data_fungi_woNA,
+    list_component = list(
+      "Time" = c("Time"),
+      "Size" = c("Height", "Diameter")
+    ),
+    dbrda_computation = TRUE
+  )
+}
+# }
+
+
+
+ + +
+ + + +
+ + + + + + + diff --git a/docs/reference/var_par_rarperm_pq.html b/docs/reference/var_par_rarperm_pq.html new file mode 100644 index 00000000..db582fc0 --- /dev/null +++ b/docs/reference/var_par_rarperm_pq.html @@ -0,0 +1,229 @@ + +Partition the Variation of a phyloseq object with rarefaction permutations — var_par_rarperm_pq • MiscMetabar + Skip to contents + + +
+
+
+ +
+

[Experimental]

+

This is an extension of the function var_par_pq(). The main addition is +the computation of nperm permutations with rarefaction even depth by +sample. The return object

+
+ +
+

Usage

+
var_par_rarperm_pq(
+  physeq,
+  list_component,
+  dist_method = "bray",
+  nperm = 99,
+  quantile_prob = 0.975,
+  dbrda_computation = FALSE,
+  dbrda_signif_pval = 0.05,
+  sample.size = min(sample_sums(physeq)),
+  verbose = FALSE,
+  progress_bar = TRUE
+)
+
+ +
+

Arguments

+
physeq
+

(required): a phyloseq-class object obtained +using the phyloseq package.

+ + +
list_component
+

(required) A named list of 2, 3 or four vectors with +names from the @sam_data slot.

+ + +
dist_method
+

(default "bray") the distance used. See +phyloseq::distance() for all available distances or run +phyloseq::distanceMethodList(). +For aitchison and robust.aitchison distance, vegan::vegdist() +function is directly used.#' @param fill_bg

+ + +
nperm
+

(int) The number of permutations to perform.

+ + +
quantile_prob
+

(float, [0:1]) the value to compute the quantile. +Minimum quantile is compute using 1-quantile_prob.

+ + +
dbrda_computation
+

(logical) Do dbrda computations are runned for each +individual component (each name of the list component) ?

+ + +
dbrda_signif_pval
+

(float, [0:1]) The value under which the dbrda is +considered significant.

+ + +
sample.size
+

(int) A single integer value equal to the number of +reads being simulated, also known as the depth. See +phyloseq::rarefy_even_depth().

+ + +
verbose
+

(logical). If TRUE, print additional informations.

+ + +
progress_bar
+

(logical, default TRUE) Do we print progress during +the calculation?

+ +
+
+

Value

+ + +

A list of class varpart with additional information in the +$part$indfract part. Adj.R.square is the mean across permutation. +Adj.R.squared_quantil_min and Adj.R.squared_quantil_max represent +the quantile values of adjuste R squared

+
+
+

Details

+

This function is mainly a wrapper of the work of others. +Please make a reference to vegan::varpart() if you +use this function.

+
+ +
+

Author

+

Adrien Taudière

+
+ +
+

Examples

+
# \donttest{
+if (requireNamespace("vegan")) {
+  data_fungi_woNA <- subset_samples(data_fungi, !is.na(Time) & !is.na(Height))
+  res_var_9 <- var_par_rarperm_pq(
+    data_fungi_woNA,
+    list_component = list(
+      "Time" = c("Time"),
+      "Size" = c("Height", "Diameter")
+    ),
+    nperm = 9,
+    dbrda_computation = TRUE
+  )
+  res_var_2 <- var_par_rarperm_pq(
+    data_fungi_woNA,
+    list_component = list(
+      "Time" = c("Time"),
+      "Size" = c("Height", "Diameter")
+    ),
+    nperm = 2,
+    dbrda_computation = TRUE
+  )
+}
+#> 
  |                                                        
  |                                                  |   0%
  |                                                        
  |======                                            |  11%
  |                                                        
  |===========                                       |  22%
  |                                                        
  |=================                                 |  33%
  |                                                        
  |======================                            |  44%
  |                                                        
  |============================                      |  56%
  |                                                        
  |=================================                 |  67%
  |                                                        
  |=======================================           |  78%
  |                                                        
  |============================================      |  89%
  |                                                        
  |==================================================| 100%
  |                                                        
  |                                                  |   0%
  |                                                        
  |=========================                         |  50%
  |                                                        
  |==================================================| 100%
+# }
+
+
+
+ + +
+ + + +
+ + + + + + + diff --git a/docs/reference/venn_pq.html b/docs/reference/venn_pq.html index 33d78c12..442d2d02 100644 --- a/docs/reference/venn_pq.html +++ b/docs/reference/venn_pq.html @@ -10,7 +10,7 @@ MiscMetabar - 0.8.00 + 0.9.1