diff --git a/DESCRIPTION b/DESCRIPTION index b6678523..527190d3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: MiscMetabar Type: Package Title: Miscellaneous Functions for Metabarcoding Analysis -Version: 0.10.4 +Version: 0.11.0 Authors@R: person("Adrien", "Taudière", email = "adrien.taudiere@zaclys.net", role = c("aut", "cre", "cph"), comment = c(ORCID = "0000-0003-1088-1182")) Description: Facilitate the description, transformation, exploration, and reproducibility of metabarcoding analyses. 'MiscMetabar' is mainly built on top of the 'phyloseq', 'dada2' and 'targets' R packages. It helps to build reproducible and robust bioinformatics pipelines in R. 'MiscMetabar' makes ecological analysis of alpha and beta-diversity easier, more reproducible and more powerful by integrating a large number of tools. Important features are described in Taudière A. (2023) . @@ -74,6 +74,8 @@ Suggests: tibble, tidyr, treemapify, + umap, + uwot, vegan, venneuler, vctrs, diff --git a/MiscMetabar.Rproj b/MiscMetabar.Rproj index b987bfed..4b60472d 100644 --- a/MiscMetabar.Rproj +++ b/MiscMetabar.Rproj @@ -1,4 +1,5 @@ Version: 1.0 +ProjectId: 3859fc88-3ad3-46d5-8186-259f982e0e31 RestoreWorkspace: No SaveWorkspace: No diff --git a/NAMESPACE b/NAMESPACE index 6b760058..55f60280 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -19,6 +19,8 @@ export(all_object_size) export(ancombc_pq) export(are_modality_even_depth) export(as_binary_otu_table) +export(assign_sintax) +export(assign_vsearch_lca) export(asv2otu) export(biplot_physeq) export(biplot_pq) @@ -39,6 +41,7 @@ export(dist_bycol) export(dist_pos_control) export(distri_1_taxa) export(fac2col) +export(filt_taxa_pq) export(filter_asv_blast) export(filter_taxa_blast) export(filter_trim) @@ -54,6 +57,7 @@ export(ggscatt_pq) export(ggvenn_pq) export(glmutli_pq) export(graph_test_pq) +export(hill_curves_pq) export(hill_phyloseq) export(hill_pq) export(hill_test_rarperm_pq) @@ -79,6 +83,7 @@ export(multipatt_pq) export(multiplot) export(multitax_bar_pq) export(mumu_pq) +export(no_legend) export(normalize_prop_pq) export(otu_circle) export(perc) @@ -88,6 +93,7 @@ export(physeq_or_string_to_dna) export(plot_LCBD_pq) export(plot_SCBD_pq) export(plot_ancombc_pq) +export(plot_complexity_pq) export(plot_deseq2_phyloseq) export(plot_deseq2_pq) export(plot_edgeR_phyloseq) @@ -136,6 +142,7 @@ export(track_wkflow_samples) export(transp) export(treemap_pq) export(tsne_pq) +export(umap_pq) export(unique_or_na) export(upset_pq) export(upset_test_pq) diff --git a/NEWS.md b/NEWS.md index 046f2b53..b292fee2 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,14 @@ -# MiscMetabar 0.10.5 (in development) +# MiscMetabar 0.11 (in development) + +- Add function `filt_taxa_pq()` to filter taxa based on the number of sequences/occurences +- Add functions `no_legend()` and `hill_curves_pq()` to plot hill diversity accumulation curves for phyloseq +- Add function `umap_pq()` to compute Dimensionality Reduction with UMAP +- Add function `plot_complexity_pq()` to plot kmer complexity of references sequences of a phyloseq object +- Add param `type` to `ridge_pq()` to plot a cumulative version (type="ecdf") version of ridge +- Introduce the idea of a pq-verse: some other packages will complete the MiscMetabar packages to make package maintenance easier. The [comparpq](https://github.com/adrientaudiere/comparpq) package will facilitate the comparison of phyloseq object with different taxonomy, different clustering method, different samples with same modality or different primers. +- Add functions [assign_vsearch_lca()], [assign_sintax()] and internal function [write_temp_fasta()] +- Add param `method` to `add_new_taxonomy_pq()` to allow the use of [dada2::assign_taxonomy()] (default, precedent only method available), [assign_sintax()] or [assign_vsearch_lca()] + # MiscMetabar 0.10.4 diff --git a/R/MiscMetabar-package.R b/R/MiscMetabar-package.R index 385cecd3..dd4fafda 100644 --- a/R/MiscMetabar-package.R +++ b/R/MiscMetabar-package.R @@ -26,7 +26,8 @@ if (getRversion() >= "2.15.1") { "chim_rm", "condition", "physeq", "seq_tab_Pairs", "nb_samp", "silent", "X1_lim1", "X1_lim2", "aicc", "variable", "pos_letters", "alluvium", "na_remove", "stratum", "to_lodes_form", "clean_fastq", "clean_sam", - "samples_names_common", "seq_id" + "samples_names_common", "seq_id", "alpha_hill", "Modality", "Type", "complexity", + "value_bootstrap" )) } diff --git a/R/alpha_div_test.R b/R/alpha_div_test.R index aa4495d2..ec5483c9 100644 --- a/R/alpha_div_test.R +++ b/R/alpha_div_test.R @@ -122,7 +122,7 @@ hill_tuckey_pq <- function( #' @param sample.size (int) A single integer value equal to the number of #' reads being simulated, also known as the depth. See #' [phyloseq::rarefy_even_depth()]. -#' @param verbose (logical). If TRUE, print additional informations. +#' @param verbose (logical). If TRUE, print additional information. #' @param progress_bar (logical, default TRUE) Do we print progress during #' the calculation? #' @param p_val_signif (float, `[0:1]`) The mimimum value of p-value to count a diff --git a/R/beta_div_test.R b/R/beta_div_test.R index 11553491..1e95f64f 100644 --- a/R/beta_div_test.R +++ b/R/beta_div_test.R @@ -128,10 +128,10 @@ graph_test_pq <- function(physeq, #' @param verbose (logical, default TRUE) If TRUE, prompt some messages. #' @param ... Other arguments passed on to [vegan::adonis2()] function. #' Note that the parameter `by` is important. If by is set to NULL -#' (default) the p-value is computed for the entire model. -#' by = NULL will assess the overall significance of all terms together, -#' by = "terms" will assess significance for each term (sequentially from first to last), -#' setting by = "margin" will assess the marginal effects of the terms (each marginal term analysed in a model with all other variables), +#' (default) the p-value is computed for the entire model. +#' by = NULL will assess the overall significance of all terms together, +#' by = "terms" will assess significance for each term (sequentially from first to last), +#' setting by = "margin" will assess the marginal effects of the terms (each marginal term analysed in a model with all other variables), #' by = "onedf" will analyse one-degree-of-freedom contrasts sequentially. The argument is passed on to anova.cca. #' @return The function returns an anova.cca result object with a #' new column for partial R^2. See help of [vegan::adonis2()] for @@ -143,7 +143,7 @@ graph_test_pq <- function(physeq, #' adonis_pq(enterotype, "SeqTech*Enterotype", na_remove = TRUE, by = "terms") #' adonis_pq(enterotype, "SeqTech*Enterotype", na_remove = TRUE, by = "onedf") #' adonis_pq(enterotype, "SeqTech*Enterotype", na_remove = TRUE, by = "margin") -#' +#' #' adonis_pq(enterotype, "SeqTech", dist_method = "jaccard", by = "terms") #' adonis_pq(enterotype, "SeqTech", dist_method = "robust.aitchison", by = "terms") #' } @@ -1048,20 +1048,19 @@ plot_ancombc_pq <- #' #' @author Adrien Taudière #' @examples -#' data_fungi_mini_woNA4height <- subset_samples( -#' data_fungi_mini, -#' !is.na(data_fungi_mini@sam_data$Height) -#' ) -#' taxa_only_in_one_level(data_fungi_mini_woNA4height, "Height", "High") -#' #' # Taxa present only in low height samples -#' suppressMessages(suppressWarnings( -#' taxa_only_in_one_level(data_fungi, "Height", "Low") -#' )) -#' # Number of taxa present only in sample of time equal to 15 -#' suppressMessages(suppressWarnings( -#' length(taxa_only_in_one_level(data_fungi, "Time", "15")) -#' )) - +#' data_fungi_mini_woNA4height <- subset_samples( +#' data_fungi_mini, +#' !is.na(data_fungi_mini@sam_data$Height) +#' ) +#' taxa_only_in_one_level(data_fungi_mini_woNA4height, "Height", "High") +#' #' # Taxa present only in low height samples +#' suppressMessages(suppressWarnings( +#' taxa_only_in_one_level(data_fungi, "Height", "Low") +#' )) +#' # Number of taxa present only in sample of time equal to 15 +#' suppressMessages(suppressWarnings( +#' length(taxa_only_in_one_level(data_fungi, "Time", "15")) +#' )) taxa_only_in_one_level <- function(physeq, modality, level, @@ -1309,7 +1308,7 @@ var_par_pq <- #' @param sample.size (int) A single integer value equal to the number of #' reads being simulated, also known as the depth. See #' [phyloseq::rarefy_even_depth()]. -#' @param verbose (logical). If TRUE, print additional informations. +#' @param verbose (logical). If TRUE, print additional information. #' @param progress_bar (logical, default TRUE) Do we print progress during #' the calculation? #' diff --git a/R/controls.R b/R/controls.R index 832819c5..244c44f3 100644 --- a/R/controls.R +++ b/R/controls.R @@ -7,7 +7,8 @@ #' lifecycle-experimental #' #' Search for exact matching of sequences using complement, -#' reverse and reverse-complement +#' reverse and reverse-complement. It is useful to check for +#' primers issues after cutadapt step. #' #' @inheritParams clean_pq #' @param seq2search A DNAStringSet object of sequences to search for. diff --git a/R/dada_phyloseq.R b/R/dada_phyloseq.R index d3c4a152..71e29f2e 100644 --- a/R/dada_phyloseq.R +++ b/R/dada_phyloseq.R @@ -18,6 +18,7 @@ if (getRversion() >= "2.15.1") { #' #' @return A new \code{\link[phyloseq]{phyloseq-class}} object with `refseq` slot and new #' taxa names +#' @author Adrien Taudière #' @export add_dna_to_phyloseq <- function(physeq, prefix_taxa_names = "Taxa_") { @@ -70,6 +71,7 @@ add_dna_to_phyloseq <- function(physeq, prefix_taxa_names = "Taxa_") { #' @param prefix_taxa_names (default "Taxa_"): the prefix of taxa names (eg. "ASV_" or "OTU_") #' @return A new \code{\link[phyloseq]{phyloseq-class}} object #' @export +#' @author Adrien Taudière clean_pq <- function(physeq, remove_empty_samples = TRUE, remove_empty_taxa = TRUE, @@ -232,23 +234,27 @@ clean_pq <- function(physeq, #' @return The number of sequences, clusters (e.g. OTUs, ASVs) and samples for #' each object. #' @export +#' @author Adrien Taudière #' @seealso [track_wkflow_samples()] #' @examplesIf tolower(Sys.info()[["sysname"]]) != "windows" #' data(enterotype) #' if (requireNamespace("pbapply")) { #' track_wkflow(list(data_fungi, enterotype), taxonomy_rank = c(3, 5)) -#' track_wkflow(list("data FUNGI"=data_fungi, +#' track_wkflow(list( +#' "data FUNGI" = data_fungi, #' "fastq files forward" = -#' unlist(list_fastq_files(system.file("extdata", package = "MiscMetabar"), -#' paired_end = FALSE)))) +#' unlist(list_fastq_files(system.file("extdata", package = "MiscMetabar"), +#' paired_end = FALSE +#' )) +#' )) #' } track_wkflow <- function(list_of_objects, obj_names = NULL, clean_pq = FALSE, taxonomy_rank = NULL, - verbose=TRUE, + verbose = TRUE, ...) { - if(verbose) { + if (verbose) { message("Compute the number of sequences") } if (!is.null(obj_names)) { @@ -265,8 +271,8 @@ track_wkflow <- function(list_of_objects, track_nb_seq_per_obj <- pbapply::pblapply(list_of_objects, function(object) { - if(verbose) { - message(paste("Start object of class:", class(object), sep = " ")) + if (verbose) { + message(paste("Start object of class:", class(object), sep = " ")) } if (inherits(object, "phyloseq")) { sum(object@otu_table) @@ -302,8 +308,8 @@ track_wkflow <- function(list_of_objects, message("Compute the number of clusters") track_nb_cluster_per_obj <- pbapply::pblapply(list_of_objects, function(object) { - if(verbose) { - message(paste("Start object of class:", class(object), sep = " ")) + if (verbose) { + message(paste("Start object of class:", class(object), sep = " ")) } if (inherits(object, "phyloseq")) { ntaxa(object) @@ -330,8 +336,8 @@ track_wkflow <- function(list_of_objects, message("Compute the number of samples") track_nb_sam_per_obj <- pbapply::pblapply(list_of_objects, function(object) { - if(verbose) { - message(paste("Start object of class:", class(object), sep = " ")) + if (verbose) { + message(paste("Start object of class:", class(object), sep = " ")) } if (inherits(object, "phyloseq")) { nsamples(object) @@ -361,8 +367,8 @@ track_wkflow <- function(list_of_objects, message("Compute the number of values in taxonomic rank") track_nb_tax_value_per_obj <- pbapply::pblapply(list_of_objects, function(object) { - if(verbose) { - message(paste("Start object of class:", class(object), sep = " ")) + if (verbose) { + message(paste("Start object of class:", class(object), sep = " ")) } if (inherits(object, "phyloseq")) { if (taxa_are_rows(object)) { @@ -381,8 +387,8 @@ track_wkflow <- function(list_of_objects, names_taxonomic_rank <- pbapply::pblapply(list_of_objects, function(object) { - if(verbose) { - message(paste("Start object of class:", class(object), sep = " ")) + if (verbose) { + message(paste("Start object of class:", class(object), sep = " ")) } if (inherits(object, "phyloseq")) { if (taxa_are_rows(object)) { @@ -945,7 +951,7 @@ save_pq <- function(physeq, path = NULL, ...) { #' @return One to four csv tables (refseq.csv, otu_table.csv, tax_table.csv, sam_data.csv) #' and if present a phy_tree in Newick format. At least the otu_table.csv need to be present. #' @export -#' +#' @author Adrien Taudière #' @examples #' write_pq(data_fungi, path = paste0(tempdir(), "/phyloseq")) #' read_pq(path = paste0(tempdir(), "/phyloseq")) @@ -1371,7 +1377,7 @@ mumu_pq <- function(physeq, #' @return Nothing if the phyloseq object is valid. An error in the other case. #' Warnings if verbose = TRUE #' @export -#' +#' @author Adrien Taudière verify_pq <- function(physeq, verbose = FALSE, min_nb_seq_sample = 500, @@ -1440,7 +1446,7 @@ verify_pq <- function(physeq, #' @inheritParams clean_pq #' @param condition A boolean vector to subset samples. Length must fit #' the number of samples -#' +#' @author Adrien Taudière #' @examples #' #' cond_samp <- grepl("A1", data_fungi@sam_data[["Sample_names"]]) @@ -1510,7 +1516,7 @@ subset_samples_pq <- function(physeq, condition) { #' #' @return a new phyloseq object #' @export -#' +#' @author Adrien Taudière subset_taxa_pq <- function(physeq, condition, verbose = TRUE, @@ -1581,6 +1587,89 @@ subset_taxa_pq <- function(physeq, } ################################################################################ +################################################################################ +#' Filter taxa of a phyloseq object based on the minimum number of +#' sequences/samples +#' +#' @description +#' +#' +#' lifecycle-experimental +#' +#' Basically a wraper of [subset_taxa_pq()]. +#' +#' @inheritParams clean_pq +#' @param min_nb_seq (int default NULL) minimum number of sequences by taxa. +#' @param min_occurence (int default NULL) minimum number of sample by taxa. +#' @param combination Either "AND" (default) or "OR". If set to "AND" and both +#' min_nb_seq and min_occurence are not NULL, the taxa must match the two +#' condition to passe the filter. If set to "OR", taxa matching only one +#' condition are kept. +#' @param clean_pq (logical) +#' If set to TRUE, empty samples and empty taxa (ASV, OTU) are discarded +#' after filtering. +#' +#' @return a new phyloseq object +#' @export +#' @author Adrien Taudière +#' @examples +#' filt_taxa_pq(data_fungi, min_nb_seq = 20) +#' filt_taxa_pq(data_fungi, min_occurence = 2) +#' filt_taxa_pq(data_fungi, +#' min_occurence = 2, +#' min_nb_seq = 10, clean_pq = FALSE +#' ) +#' filt_taxa_pq(data_fungi, +#' min_occurence = 2, +#' min_nb_seq = 10, +#' combination = "OR" +#' ) +filt_taxa_pq <- function(physeq, + min_nb_seq = NULL, + min_occurence = NULL, + combination = "AND", + clean_pq = TRUE) { + new_physeq <- physeq + if (is.null(min_nb_seq) && is.null(min_occurence)) { + stop("You must inform either min_nb_seq or min_occurence or both params!") + } + if (!is.null(min_nb_seq) && is.null(min_occurence)) { + new_physeq <- + subset_taxa_pq(physeq, taxa_sums(physeq) >= min_nb_seq) + } + + if (is.null(min_nb_seq) && !is.null(min_occurence)) { + new_physeq <- + subset_taxa_pq( + new_physeq, + taxa_sums(as_binary_otu_table(new_physeq)) >= min_occurence + ) + } + + if (!is.null(min_nb_seq) && !is.null(min_occurence)) { + if (combination == "AND") { + new_physeq <- + subset_taxa_pq( + new_physeq, + taxa_sums(as_binary_otu_table(new_physeq)) >= 1 & + taxa_sums((new_physeq)) >= 100 + ) + } else if (combination == "OR") { + new_physeq <- + subset_taxa_pq( + new_physeq, + taxa_sums(as_binary_otu_table(new_physeq)) >= 1 | + taxa_sums((new_physeq)) >= 100 + ) + } + } + + if (clean_pq) { + new_physeq <- clean_pq(new_physeq) + } + return(new_physeq) +} + ################################################################################ #' Select one sample from a physeq object @@ -1660,27 +1749,45 @@ select_one_sample <- function(physeq, sam_name, silent = FALSE) { #' @inheritParams clean_pq #' @param ref_fasta (required) A link to a database. #' Pass on to `dada2::assignTaxonomy`. +#' @param method (required, default "dada2") :.add +#' +#' - "dada2" : [dada2::assignTaxonomy()] +#' +#' - "sintax" : see [assign_sintax()] +#' +#' - "lca" : see [assign_vsearch_lca()] +#' #' @param suffix (character) The suffix to name the new columns. #' If set to NULL (the default), the basename of the file reFasta -#' is used. -#' @param ... Other arguments pass on to `dada2::assignTaxonomy`. +#' is used with the name of the method. Set suffix to "" in order +#' to remove any suffix. +#' @param ... Other arguments pass on to the taxonomic assignation method. #' @return A new \code{\link[phyloseq]{phyloseq-class}} object with a larger slot tax_table" -#' +#' @seealso [dada2::assignTaxonomy()], [assign_sintax()], [assign_vsearch_lca()] #' @export #' #' @author Adrien Taudière #' -add_new_taxonomy_pq <- function(physeq, ref_fasta, suffix = NULL, ...) { +add_new_taxonomy_pq <- function(physeq, ref_fasta, suffix = NULL, method = "dada2", ...) { if (is.null(suffix)) { - suffix <- basename(ref_fasta) - } - tax_tab <- - dada2::assignTaxonomy(physeq@refseq, refFasta = ref_fasta, ...) - colnames(tax_tab) <- - make.unique(paste0(colnames(tax_tab), "_", suffix)) - new_tax_tab <- tax_table(cbind(physeq@tax_table, tax_tab)) - new_physeq <- physeq - tax_table(new_physeq) <- new_tax_tab + suffix <- paste0(basename(ref_fasta), "_", method) + } + if (method == "dada2") { + tax_tab <- + dada2::assignTaxonomy(physeq@refseq, refFasta = ref_fasta, ...) + colnames(tax_tab) <- + make.unique(paste0(colnames(tax_tab), "_", suffix)) + new_tax_tab <- tax_table(cbind(physeq@tax_table, tax_tab)) + new_physeq <- physeq + tax_table(new_physeq) <- new_tax_tab + } else if (method == "sintax") { + new_physeq <- assign_sintax(physeq, ref_fasta = ref_fasta, suffix = suffix, ...) + } else if (method == "lca") { + new_physeq <- assign_vsearch_lca(physeq, ref_fasta = ref_fasta, suffix = suffix, ...) + } # else if(method=="idtaxa") { + # new_physeq <- assign_idtaxa(physeq, ref_fasta = ref_fasta, suffix=suffix,...) + # } + return(new_physeq) } ################################################################################ @@ -2420,8 +2527,7 @@ cutadapt_remove_primers <- function(path_to_fastq, nb_files = Inf, cmd_is_run = TRUE, return_file_path = FALSE, - args_before_cutadapt = "source ~/miniconda3/etc/profile.d/conda.sh && conda activate cutadaptenv && " - ) { + args_before_cutadapt = "source ~/miniconda3/etc/profile.d/conda.sh && conda activate cutadaptenv && ") { cmd <- list() if (!dir.exists(folder_output)) { dir.create(folder_output) @@ -2513,7 +2619,7 @@ cutadapt_remove_primers <- function(path_to_fastq, )) unlink(paste0(tempdir(), "/script_cutadapt.sh")) } - if(return_file_path){ + if (return_file_path) { return(normalizePath(folder_output)) } else { return(cmd) @@ -2544,20 +2650,19 @@ cutadapt_remove_primers <- function(path_to_fastq, #' #' @author Adrien Taudière #' @examples -#' data_fungi_mini_woNA4height <- subset_samples( -#' data_fungi_mini, -#' !is.na(data_fungi_mini@sam_data$Height) -#' ) -#' taxa_only_in_one_level(data_fungi_mini_woNA4height, "Height", "High") -#' #' # Taxa present only in low height samples -#' suppressMessages(suppressWarnings( -#' taxa_only_in_one_level(data_fungi, "Height", "Low") -#' )) -#' # Number of taxa present only in sample of time equal to 15 -#' suppressMessages(suppressWarnings( -#' length(taxa_only_in_one_level(data_fungi, "Time", "15")) -#' )) - +#' data_fungi_mini_woNA4height <- subset_samples( +#' data_fungi_mini, +#' !is.na(data_fungi_mini@sam_data$Height) +#' ) +#' taxa_only_in_one_level(data_fungi_mini_woNA4height, "Height", "High") +#' #' # Taxa present only in low height samples +#' suppressMessages(suppressWarnings( +#' taxa_only_in_one_level(data_fungi, "Height", "Low") +#' )) +#' # Number of taxa present only in sample of time equal to 15 +#' suppressMessages(suppressWarnings( +#' length(taxa_only_in_one_level(data_fungi, "Time", "15")) +#' )) taxa_only_in_one_level <- function(physeq, modality, level, @@ -2856,7 +2961,7 @@ taxa_as_rows <- function(physeq) { #' (in this case, reproducibly random subsampling). If set to FALSE, then no #' iddling with the RNG seed is performed, and it is up to the user to #' appropriately call -#' @param verbose (logical). If TRUE, print additional informations. +#' @param verbose (logical). If TRUE, print additional information. #' @export #' @author Adrien Taudière #' @return A new \code{\link[phyloseq]{phyloseq-class}} object. diff --git a/R/krona.R b/R/krona.R index f9982d37..0b2a5f7f 100644 --- a/R/krona.R +++ b/R/krona.R @@ -18,7 +18,8 @@ #' @param add_unassigned_rank (int) Add unassigned for rank #' inferior to 'add_unassigned_rank' when necessary. #' @param name A name for intermediary files, Useful to name -#' your krona result files before merging using [merge_krona()] +#' your krona result files before merging using [merge_krona()]. +#' Must not contain space. #' #' @examplesIf tolower(Sys.info()[["sysname"]]) != "windows" && MiscMetabar::is_krona_installed() #' data("GlobalPatterns", package = "phyloseq") @@ -43,13 +44,6 @@ krona <- ranks = "All", add_unassigned_rank = 0, name = NULL) { - if (ranks[1] == "All") { - ranks <- seq_along(physeq@tax_table[1, ]) - } - - df <- data.frame(unclass(physeq@tax_table[, ranks])) - df$ASVs <- rownames(physeq@tax_table) - if (is.null(name)) { if (nb_seq) { name <- "Number.of.sequences_temp" @@ -58,6 +52,21 @@ krona <- } } + if (grepl(" ", name)) { + stop("The name parameter must not contain space") + } + + if (ranks[1] == "All") { + message( + "All the ", ncol(physeq@tax_table), + " taxonomic present in the tax_table will be used for krona plot!" + ) + ranks <- seq_along(physeq@tax_table[1, ]) + } + + df <- data.frame(unclass(physeq@tax_table[, ranks])) + df$ASVs <- rownames(physeq@tax_table) + if (nb_seq) { df$nb_seq <- taxa_sums(physeq) } else { diff --git a/R/plot_functions.R b/R/plot_functions.R index 530c5527..68a810fa 100644 --- a/R/plot_functions.R +++ b/R/plot_functions.R @@ -313,7 +313,7 @@ accu_plot <- #' @param sample.size (int) A single integer value equal to the number of #' reads being simulated, also known as the depth. See #' [phyloseq::rarefy_even_depth()]. -#' @param verbose (logical). If TRUE, print additional informations. +#' @param verbose (logical). If TRUE, print additional information. #' @param ... Other params for be passed on to [accu_plot()] function #' #' @export @@ -2613,7 +2613,7 @@ multi_biplot_pq <- function(physeq, #' with NA in the `split_by` variable of the `physeq@sam_data` slot #' @param clean_pq (logical) #' If set to TRUE, empty samples are discarded after subsetting ASV -#' @return A ggplot2 graphic +#' @return A ggplot2 object #' @export #' @author Adrien Taudière #' @seealso [tax_bar_pq()] and [multitax_bar_pq()] @@ -2800,7 +2800,7 @@ plot_tax_pq <- #' @param log10trans (logical, default TRUE) If TRUE, #' the number of sequences (or ASV if nb_seq = FALSE) is log10 #' transformed. -#' @return A ggplot2 graphic +#' @return A ggplot2 object #' @export #' #' @author Adrien Taudière @@ -3653,6 +3653,8 @@ tax_bar_pq <- #' the number of sequences (or ASV if nb_seq = FALSE) is log10 #' transformed. #' @param tax_level The taxonomic level to fill ridges +#' @param type Either "density" (the default) or "ecdf" to plot a +#' plot a cumulative version using [ggplot2::stat_ecdf()] #' @param ... Other params passed on to [ggridges::geom_density_ridges()] #' #' @return A \code{\link[ggplot2]{ggplot}}2 plot with bar representing the number of sequence en each @@ -3666,6 +3668,7 @@ tax_bar_pq <- #' \donttest{ #' if (requireNamespace("ggridges")) { #' ridges_pq(data_fungi_mini, "Time", alpha = 0.5, scale = 0.9) +#' ridges_pq(data_fungi_mini, "Time", alpha = 0.5, scale = 0.9, type = "ecdf") #' ridges_pq(data_fungi_mini, "Sample_names", log10trans = TRUE) + facet_wrap("~Height") #' #' ridges_pq(data_fungi_mini, @@ -3682,30 +3685,49 @@ ridges_pq <- function(physeq, nb_seq = TRUE, log10trans = TRUE, tax_level = "Class", + type = "density", ...) { psm <- psmelt(physeq) - psm <- psm %>% filter(Abundance > 0) + psm <- psm %>% dplyr::filter(Abundance > 0) if (log10trans) { psm$Abundance <- log10(psm$Abundance) } if (nb_seq) { - p <- ggplot(psm, aes(y = factor(.data[[fact]]), x = Abundance)) + - ggridges::geom_density_ridges(aes(fill = .data[[tax_level]]), ...) + - xlim(c(0, NA)) + p <- ggplot( + psm, + aes( + y = factor(.data[[fact]]), + x = Abundance, + fill = .data[[tax_level]], + color = .data[[tax_level]] + ) + ) } else { psm_asv <- psm %>% group_by(.data[[fact]], OTU, .data[[tax_level]]) %>% summarise("count" = n()) - p <- - ggplot(psm_asv, aes(y = factor(.data[[fact]]), x = count)) + - ggridges::geom_density_ridges( - aes(fill = .data[[tax_level]]), - ... - ) + + p <- ggplot( + psm_asv, + aes( + y = factor(.data[[fact]]), + x = count, + fill = .data[[tax_level]], + color = .data[[tax_level]] + ) + ) + } + + if (type == "density") { + p <- p + + ggridges::geom_density_ridges(aes(), ...) + xlim(c(0, NA)) + } else if (type == "ecdf") { + p <- p + stat_ecdf(aes(y = NULL)) + + facet_wrap(fact, ncol = 2) + + theme_minimal() + ylab("Probability") } return(p) } @@ -3736,7 +3758,7 @@ ridges_pq <- function(physeq, #' legend of color for lvl 1 #' @param ... Other arguments passed on to [treemapify::geom_treemap()] function. #' -#' @return A ggplot2 graphic +#' @return A ggplot2 object #' @export #' #' @author Adrien Taudière @@ -4324,7 +4346,6 @@ plot_refseq_extremity_pq <- function( last_n = 10, hill_scales = c(1, 2), min_width = 0) { - if (min_width > 0) { cond <- Biostrings::width(physeq@refseq) > min_width names(cond) <- taxa_names(physeq) @@ -4337,10 +4358,12 @@ plot_refseq_extremity_pq <- function( letters_sequences <- c(strsplit(as.vector(IRanges::narrow(physeq@refseq, end = end_n)), - split = "" - )) - letters_sequences <- lapply(letters_sequences, `length<-`, - max(lengths(letters_sequences))) + split = "" + )) + letters_sequences <- lapply( + letters_sequences, `length<-`, + max(lengths(letters_sequences)) + ) tib_interm <- t(data.frame(letters_sequences)) nucleotide_first_interm <- data.frame( @@ -4375,7 +4398,8 @@ plot_refseq_extremity_pq <- function( if (!is.null(hill_scales)) { renyi_nucleotide <- vegan::renyi(nucleotide_first_interm[, c("nb_A", "nb_C", "nb_G", "nb_T")], - scales = hill_scales) + scales = hill_scales + ) if (inherits(renyi_nucleotide, "numeric")) { renyi_nucleotide <- data.frame(renyi_nucleotide) @@ -4399,7 +4423,8 @@ plot_refseq_extremity_pq <- function( renyi_nucleotide$seq_id <- sort(rep(1:ncol(tib_interm), - times = nrow(renyi_nucleotide) / ncol(tib_interm))) + times = nrow(renyi_nucleotide) / ncol(tib_interm) + )) p_start <- p_start + geom_line(data = renyi_nucleotide, aes(x = seq_id, y = value, color = name)) @@ -4411,10 +4436,14 @@ plot_refseq_extremity_pq <- function( if (!is.null(last_n)) { tib_interm_last <- t(data.frame(letters = c( - strsplit(as.vector( - IRanges::narrow(physeq@refseq, - start = Biostrings::width(physeq@refseq) - last_n)), - split = "") + strsplit( + as.vector( + IRanges::narrow(physeq@refseq, + start = Biostrings::width(physeq@refseq) - last_n + ) + ), + split = "" + ) ))) nucleotide_last_interm <- data.frame( @@ -4531,3 +4560,427 @@ plot_refseq_pq <- function(physeq, hill_scales = NULL, first_n = min(Biostrings: res <- plot_refseq_extremity_pq(physeq = physeq, hill_scales = hill_scales, first_n = first_n, last_n = last_n, min_width = min_width) return(res$plot_start) } + + +################################################################################ +#' Discard legend in ggplot2 +#' +#' @description +#' +#' +#' lifecycle-stable +#' +#' A more memorable shortcut for theme(legend.position = "none"). +#' +#' @export +#' @return A ggplot2 object +#' @author Adrien Taudière +#' @examples +#' plot_refseq_pq(data_fungi) +#' plot_refseq_pq(data_fungi) + no_legend() +no_legend <- function() { + list(theme(legend.position = "none")) +} +################################################################################ + +################################################################################ +#' Hill Diversities and Corresponding Accumulation Curves for phyloseq +#' +#' @description +#' +#' +#' lifecycle-experimental +#' +#' Basically a wrapper of [vegan::renyi()] and +#' [vegan::renyiaccum()] functions +#' +#' @inheritParams clean_pq +#' @param merge_sample_by a vector to determine which samples to merge using +#' the [merge_samples2()] function. Need to be in `physeq@sam_data` +#' @param color_fac (optional): The variable to color the barplot. For ex. +#' same as fact. If merge_sample_by is set, color_fac must be nested in +#' the merge_sample_by factor. See examples. +#' @param hill_scales Scales of Rényi diversity. +#' @param nperm (int Default NULL) If a integer is set to nperm, nperm +#' permutation are computed to draw confidence interval for each curves. +#' The function use [vegan::renyi()] if nperm is NULL and +#' [vegan::renyiaccum()] else. +#' @param na_remove (logical, default FALSE) If set to TRUE, remove samples with +#' NA in the variables set in merge_sample_by. Not used if merge_sample_by is +#' NULL. +#' @param wrap_factor (logical, default TRUE) Do the plot is wrap by the factor +#' @param plot_legend (logical, default TRUE) If set to FALSE, +#' no legend are plotted. +#' @param linewidth (int, default 2) The linewidth of lines. +#' @param size_point (int, default 1) The size of the point. +#' +#' @param ... Other arguments passed on to [vegan::renyi()] function or +#' [vegan::renyiaccum()] if nperm is not NULL. +#' +#' @export +#' @author Adrien Taudière +#' @return A ggplot2 object +#' @examples +#' if (requireNamespace("vegan")) { +#' hill_curves_pq(data_fungi_mini, merge_sample_by = "Time") +#' hill_curves_pq(data_fungi_mini, color_fac = "Time", plot_legend = FALSE) +#' hill_curves_pq(data_fungi_mini, +#' color_fac = "Time", plot_legend = FALSE, +#' nperm = 9, size_point = 1, linewidth = 0.5 +#' ) +#' +#' hill_curves_pq(data_fungi_mini, +#' nperm = 9, plot_legend = FALSE, size_point = 1, +#' linewidth = 0.5 +#' ) +#' hill_curves_pq(data_fungi_mini, "Height", +#' hill_scales = c(0, 1, 2, 8), plot_legend = FALSE +#' ) +#' hill_curves_pq(data_fungi_mini, "Height", +#' hill_scales = c(0, 0.5, 1, 2, 4, 8), +#' nperm = 9 +#' ) +#' hill_curves_pq(data_fungi_mini, "Height", nperm = 9, wrap_factor = FALSE) +#' +#' data_fungi_mini@sam_data$H_T <- paste0( +#' data_fungi_mini@sam_data$Height, +#' "_", data_fungi_mini@sam_data$Time +#' ) +#' merge_samples2(data_fungi_mini, "H_T") +#' hill_curves_pq(data_fungi_mini, "H_T", color_fac = "Time", nperm = 9) +#' } +#' @details +#' This function is mainly a wrapper of the work of others. +#' Please make a reference to [vegan::renyi()] or +#' [vegan::renyiaccum()] functions +#' +hill_curves_pq <- function(physeq, + merge_sample_by = NULL, + color_fac = NULL, + hill_scales = c(0, 0.25, 0.5, 1, 2, 4, 8, 16, 32, 64, Inf), + nperm = NULL, + na_remove = TRUE, + wrap_factor = TRUE, + plot_legend = TRUE, + linewidth = 2, + size_point = 2, + ...) { + verify_pq(physeq) + + if (na_remove && !is.null(merge_sample_by)) { + new_physeq <- + subset_samples_pq(physeq, !is.na(physeq@sam_data[[merge_sample_by]])) + if (nsamples(physeq) - nsamples(new_physeq) > 0) { + message( + paste0( + nsamples(physeq) - nsamples(new_physeq), + " were discarded due to NA in variables present in formula." + ) + ) + } + physeq <- new_physeq + } + if (!is.null(merge_sample_by)) { + physeq <- merge_samples2(physeq, merge_sample_by) + } + + physeq <- clean_pq( + physeq, + force_taxa_as_rows = TRUE, + remove_empty_samples = FALSE, + remove_empty_taxa = FALSE, + clean_samples_names = FALSE + ) + + if (!is.null(nperm)) { + df_hill <- + vegan::renyiaccum( + t(physeq)@otu_table, + scales = hill_scales, + permutation = nperm, + hill = TRUE, + ... + ) + what <- c("Collector", "mean", "Qnt 0.025", "Qnt 0.975") + what <- what[what %in% dimnames(df_hill)[[3]]] + if (any(what %in% dimnames(df_hill)[[3]])) { + df_hill <- df_hill[, , what, drop = FALSE] + } + dm <- dim(df_hill) + dnam <- dimnames(df_hill) + lin <- rep(dnam[[3]], each = dm[1] * dm[2]) + alp <- factor(dnam[[2]], levels = dnam[[2]]) + alpha <- rep(rep(alp, each = dm[1]), len = prod(dm)) + diversity <- as.vector(df_hill) + + if (is.null(color_fac)) { + modality <- rep(sample_names(physeq), len = prod(dm)) + } else { + modality <- rep(levels(as.factor(physeq@sam_data[, color_fac][[1]])), len = prod(dm)) + } + + samp_names <- rep(sample_names(physeq), len = prod(dm)) + + df_plot <- data.frame( + diversity = diversity, + Type = lin, + Modality = modality, + alpha_hill = alpha, + samp_names = samp_names + ) + + p <- ggplot( + df_plot, + aes( + y = diversity, + x = alpha_hill, + color = Modality, + group = samp_names + ) + ) + + geom_point(data = subset(df_plot, Type == "mean"), size = size_point) + + geom_line( + data = subset(df_plot, Type == "mean"), + linetype = 1, + linewidth = linewidth + ) + + geom_line( + data = subset(df_plot, Type == "Qnt 0.025"), + linetype = 2, + linewidth = linewidth / 3 + ) + + geom_line( + data = subset(df_plot, Type == "Qnt 0.975"), + linetype = 2, + linewidth = linewidth / 3 + ) + if (wrap_factor) { + p <- p + + facet_wrap("Modality") + } + } else { + df_hill <- + vegan::renyi(t(physeq)@otu_table, scales = hill_scales, hill = TRUE) + if (inherits(df_hill, "data.frame")) { + if (is.null(color_fac)) { + modality <- sample_names(physeq) + } else { + modality <- factor(rep(rownames(df_hill), ncol(df_hill)), levels = rownames(df_hill)) + } + + alp <- factor(rep(colnames(df_hill), each = nrow(df_hill)), levels = colnames(df_hill)) + div <- as.vector(as.matrix(df_hill)) + df_plot <- data.frame( + diversity = div, + Modality = modality, + alpha_hill = alp + ) + } else { + df_plot <- data.frame( + diversity = x, + alpha = factor(names(x), levels = names(x)), + plot = "plot" + ) + lo <- hi <- med <- NA + } + p <- ggplot( + df_plot, + aes( + y = diversity, + x = alpha_hill, + color = Modality, + group = Modality + ) + ) + + geom_point(size = 2) + + geom_line() + } + + if (!plot_legend) { + p <- p + no_legend() + } + return(p) +} +################################################################################ + + +################################################################################ +#' Computes a manifold approximation and projection (UMAP) for +#' phyloseq object +#' +#' @description +#' +#' +#' lifecycle-experimental +#' +#' https://journals.asm.org/doi/full/10.1128/msystems.00691-21 +#' +#' @inheritParams clean_pq +#' @param pkg Which R packages to use, either "umap" or "uwot". +#' @param ... Others arguments passed on to [umap::umap()] or +#' [uwot::umap2()] function. +#' For example `n_neighbors` set the number of nearest neighbors (Default 15). +#' See [umap::umap.defaults()] or [uwot::umap2()] for the list of +#' parameters and default values. +#' +#' @return A dataframe with samples informations and the x_umap and y_umap position +#' @author Adrien Taudière +#' @export +#' @seealso [umap::umap()], [tsne_pq()], [phyloseq::plot_ordination()] +#' @examples +#' library("umap") +#' df_umap <- umap_pq(data_fungi_mini) +#' ggplot(df_umap, aes(x = x_umap, y = y_umap, col = Height)) + +#' geom_point(size = 2) +#' +#' # library(patchwork) +#' # physeq <- data_fungi_mini +#' # res_tsne <- tsne_pq(data_fungi_mini) +#' # df_umap_tsne <- df_umap +#' # df_umap_tsne$x_tsne <- res_tsne$Y[, 1] +#' # df_umap_tsne$y_tsne <- res_tsne$Y[, 2] +#' # ((ggplot(df_umap, aes(x = x_umap, y = y_umap, col = Height)) + +#' # geom_point(size = 2) + +#' # ggtitle("UMAP")) + (plot_ordination(physeq, +#' # ordination = +#' # ordinate(physeq, method = "PCoA", distance = "bray"), color = "Height" +#' # )) + +#' # ggtitle("PCoA")) / +#' # ((ggplot(df_umap_tsne, aes(x = x_tsne, y = y_tsne, col = Height)) + +#' # geom_point(size = 2) + +#' # ggtitle("tsne")) + +#' # (plot_ordination(physeq, +#' # ordination = ordinate(physeq, method = "NMDS", distance = "bray"), +#' # color = "Height" +#' # ) + +#' # ggtitle("NMDS"))) + +#' # patchwork::plot_layout(guides = "collect") +#' +#' df_uwot <- umap_pq(data_fungi_mini, pkg = "uwot") +#' +#' (ggplot(df_umap, aes(x = x_umap, y = y_umap, col = Height)) + +#' geom_point(size = 2) + +#' ggtitle("umap::umap")) / +#' (ggplot(df_uwot, aes(x = x_umap, y = y_umap, col = Height)) + +#' geom_point(size = 2) + +#' ggtitle("uwot::umap2")) +#' +#' @details +#' This function is mainly a wrapper of the work of others. +#' Please make a reference to `umap::umap()` if you +#' use this function. + +umap_pq <- function(physeq, pkg = "umap", ...) { + verify_pq(physeq) + physeq <- MiscMetabar::taxa_as_columns(physeq) + + psm_samp <- psmelt_samples_pq(physeq) + if (pkg == "umap") { + res_umap <- umap::umap(as.matrix(unclass(physeq@otu_table)), ...) + umap_layout <- as_tibble(res_umap$layout) + umap_layout$Sample <- rownames(res_umap$layout) + names(umap_layout) <- c("x_umap", "y_umap", "Sample") + } else if (pkg == "uwot") { + res_umap <- uwot::umap2(as.matrix(unclass(physeq@otu_table)), ...) + umap_layout <- as_tibble(res_umap) + umap_layout$Sample <- rownames(res_umap) + names(umap_layout) <- c("x_umap", "y_umap", "Sample") + } else { + stop("Param pkg must be set to 'umap' or 'uwot'.") + } + + df_umap <- left_join(umap_layout, psm_samp) + + return(df_umap) +} +################################################################################ + +################################################################################ +#' Plot kmer complexity of references sequences of a phyloseq object +#' +#' @description +#' +#' +#' lifecycle-experimental +#' +#' Basically a wrapper of [dada2::seqComplexity()] +#' +#' @inheritParams clean_pq +#' @param kmer_size int (default 2) The size of the kmers +#' (or "oligonucleotides" or "words") to use. +#' @param window (int, default NULL) The width in nucleotides of the moving +#' window. If NULL the whole sequence is used. +#' @param by (int, default 5) The step size in nucleotides between each moving +#' window tested. +#' @param bins (int, default 100). The number of bins to use for the histogram. +#' @param aggregate (logical, default FALSE) If TRUE, compute an aggregate quality profile +#' for all samples +#' @param vline_random_kmer (logical, default TRUE) If TRUE, add a vertical line +#' at the value for random kmer (equal to 4^kmerSize)) +#' @param ... Arguments passed on to geom_histogram. +#' +#' @return A ggplot2 object +#' @export +#' @author Adrien Taudière +#' @seealso [dada2::seqComplexity()], [dada2::plotComplexity()] +#' @examples +#' plot_complexity_pq(subset_samples(data_fungi_mini, Height == "High"), +#' vline_random_kmer = FALSE +#' ) +#' plot_complexity_pq(subset_samples(data_fungi_mini, Height == "Low"), +#' aggregate = FALSE, kmer_size = 4 +#' ) +#' # plot_complexity_pq(subset_samples(data_fungi, Height == "Low"), kmer_size = 4) +#' +#' @details +#' This function is mainly a wrapper of the work of others. +#' Please make a reference to [dada2::seqComplexity()] + +plot_complexity_pq <- function(physeq, + kmer_size = 2, + window = NULL, + by = 5, + bins = 100, + aggregate = FALSE, + vline_random_kmer = TRUE, + ...) { + if (aggregate) { + refseq_complex <- seqComplexity(physeq@refseq, + kmerSize = kmer_size, window = window, + by = by + ) + } else { + refseq_complex <- vector("list", length = nsamples(physeq)) + names(refseq_complex) <- sample_names(physeq) + for (sam in sample_names(physeq)) { + physeq_interm <- subset_samples_pq(physeq, sample_names(physeq) == sam) + refseq_complex[[sam]] <- seqComplexity(physeq_interm@refseq, + kmerSize = kmer_size, window = window, + by = by + ) + } + df <- data.frame(complexity = unlist(refseq_complex), file = rep(sample_names(physeq), + times = sapply(refseq_complex, length) + )) + } + + p <- ggplot(data = df, aes(x = complexity)) + + geom_histogram( + bins = bins, + na.rm = TRUE, ... + ) + + ylab("Count") + + xlab("Effective Oligonucleotide Number") + + theme_bw() + + facet_wrap(~file) + + scale_x_continuous( + limits = c(0, 4^kmer_size), + breaks = seq(0, 4^kmer_size, (4^kmer_size) / 4) + ) + if (vline_random_kmer) { + p <- p + + geom_vline(xintercept = 4^kmer_size, color = "red") + } + return(p) +} +################################################################################ diff --git a/R/targets_misc.R b/R/targets_misc.R index 1fdca990..8f64c6ab 100644 --- a/R/targets_misc.R +++ b/R/targets_misc.R @@ -21,7 +21,8 @@ #' @examples #' list_fastq_files(system.file("extdata", package = "MiscMetabar")) #' list_fastq_files(system.file("extdata", package = "MiscMetabar"), -#' paired_end = FALSE, pattern_R1 = "") +#' paired_end = FALSE, pattern_R1 = "" +#' ) #' #' @author Adrien Taudière @@ -174,20 +175,28 @@ filter_trim <- ) dir.create(output_rev) dir.create(output_fw) - file.rename(paste0(output_rev, "interm"), - paste0(output_rev, "/", basename(rev))) - file.rename(paste0(output_fw, "interm"), - paste0(output_fw, "/", basename(fw))) + file.rename( + paste0(output_rev, "interm"), + paste0(output_rev, "/", basename(rev)) + ) + file.rename( + paste0(output_fw, "interm"), + paste0(output_fw, "/", basename(fw)) + ) return(list("fw" = output_fw, "rv" = output_rev)) } else { - dada2::filterAndTrim(filt = paste0(output_fw, "interm"), - fwd = fw, - ...) + dada2::filterAndTrim( + filt = paste0(output_fw, "interm"), + fwd = fw, + ... + ) dir.create(output_fw) - file.rename(paste0(output_fw, "interm"), - paste0(output_fw, "/", basename(fw))) + file.rename( + paste0(output_fw, "interm"), + paste0(output_fw, "/", basename(fw)) + ) return(output_fw) } } else { @@ -291,7 +300,7 @@ rename_samples <- function(phyloseq_component, names_of_samples, taxa_are_rows = FALSE) { if (is.null(sample_names(phyloseq_component)) && - inherits(phyloseq_component, "matrix")) { + inherits(phyloseq_component, "matrix")) { phyloseq_component <- otu_table(phyloseq_component, taxa_are_rows = taxa_are_rows) } if (length(names_of_samples) != length(sample_names(phyloseq_component))) { @@ -381,20 +390,28 @@ sam_data_matching_names <- function(path_sam_data, rename(common_names = clean_fastq) if (sum(is.na(tib_j$raw_fastq)) > 0) { - message(sum(is.na(tib_j$raw_fastq)), - " samples in sam_data files are not present in fastq_files") + message( + sum(is.na(tib_j$raw_fastq)), + " samples in sam_data files are not present in fastq_files" + ) if (verbose) { - warning(tib_j$raw_sam[is.na(tib_j$raw_fastq)], - "not_matching_names_from_sam_data.txt") + warning( + tib_j$raw_sam[is.na(tib_j$raw_fastq)], + "not_matching_names_from_sam_data.txt" + ) } } if (sum(is.na(tib_j$raw_sam)) > 0) { - message(sum(is.na(tib_j$raw_sam)), - " samples in fastq files are not present in sam_data") + message( + sum(is.na(tib_j$raw_sam)), + " samples in fastq files are not present in sam_data" + ) if (verbose) { - warning(tib_j$raw_fastq[is.na(tib_j$raw_sam)], - "not_matching_names_from_fastq_files.txt") + warning( + tib_j$raw_fastq[is.na(tib_j$raw_sam)], + "not_matching_names_from_fastq_files.txt" + ) } if (remove_undocumented_fastq_files) { if (verbose) { diff --git a/R/vsearch.R b/R/vsearch.R index 2f1ed378..67283c5d 100644 --- a/R/vsearch.R +++ b/R/vsearch.R @@ -13,7 +13,7 @@ #' or (ii) a character vector that will be convert to DNAstringSet using #' [Biostrings::DNAStringSet()] #' @param path_to_fasta (required if seq2search is NULL) a path to fasta file if seq2search is est to NULL. -#' @param vsearchpath (default: vsearch) path to vsearch +#' @param vsearchpath (default: "vsearch") path to vsearch #' @param id (default: 0.8) id for the option `--usearch_global` of the vsearch software #' @param iddef (default: 0) iddef for the option `--usearch_global` of the vsearch software #' @param keep_temporary_files (logical, default: FALSE) Do we keep temporary files @@ -351,7 +351,7 @@ swarm_clustering <- function(physeq = NULL, #' @param nproc (default: 1) #' Set to number of cpus/processors to use for the clustering #' @param id (default: 0.97) level of identity to cluster -#' @param vsearchpath (default: vsearch) path to vsearch +#' @param vsearchpath (default: "vsearch") path to vsearch #' @param tax_adjust (Default 0) See the man page #' of [merge_taxa_vec()] for more details. #' To conserved the taxonomic rank of the most abundant ASV, @@ -611,7 +611,11 @@ chimera_removal_vs <- return(new_physeq) } } +################################################################################ + + +################################################################################ #' Detect for chimera taxa using [vsearch](https://github.com/torognes/vsearch) #' #' @description @@ -624,7 +628,7 @@ chimera_removal_vs <- #' [Biostrings::DNAStringSet()] #' @param nb_seq (required) a numeric vector giving the number of sequences for #' each DNA sequences -#' @param vsearchpath (default: vsearch) path to vsearch +#' @param vsearchpath (default: "vsearch") path to vsearch #' @param abskew (int, default 2) The abundance skew is used to distinguish in a #' three way alignment which sequence is the chimera and which are the #' parents. The assumption is that chimeras appear later in the PCR @@ -748,3 +752,491 @@ chimera_detection_vs <- function(seq2search, ) ) } +################################################################################ + +################################################################################ +#' Write a temporary fasta file (internal use) +#' +#' @description +#' +#' +#' lifecycle-experimental +#' +#' Write a fasta file from either a biostring object seq2search or a +#' refseq slot from a phyloseq object +#' +#' @inheritParams assign_sintax +#' @param temporary_fasta_file The name of a temporary_fasta_file (default "temp.fasta") +#' @seealso [assign_sintax()], [assign_vsearch_lca] +#' @return Nothing, produce a fasta file +#' @keywords internal +#' @noRd +#' + +write_temp_fasta <- function(physeq, + seq2search, + temporary_fasta_file = "temp.fasta", + behavior = NULL, + clean_pq = TRUE, + verbose = TRUE) { + if (!is.null(physeq) && !is.null(seq2search)) { + stop("You must enter a single parameter from physeq and seq2search.") + } else if (is.null(seq2search)) { + verify_pq(physeq) + if (is.null(physeq@refseq)) { + stop("The phyloseq object do not contain a @refseq slot") + } + if (clean_pq) { + physeq <- clean_pq(physeq, silent = !verbose) + } + dna <- Biostrings::DNAStringSet(physeq@refseq) + Biostrings::writeXStringSet(dna, temporary_fasta_file) + } else if (is.null(physeq)) { + Biostrings::writeXStringSet(seq2search, temporary_fasta_file) + if (behavior == "add_to_phyloseq") { + stop("You can't use behavior = 'add_to_phyloseq' with seq2search param.") + } + } else if (is.null(physeq) && is.null(seq2search)) { + stop("You must specify either physeq or seq2search parameter.") + } +} +################################################################################ + +################################################################################ +#' Assign Taxonomy using Sintax algorithm of Vsearch +#' +#' @description +#' +#' +#' lifecycle-experimental +#' +#' Please cite [Vsearch](https://github.com/torognes/vsearch) +#' if you use this function to assign taxonomy. +#' +#' @inheritParams clean_pq +#' @param seq2search A DNAStringSet object of sequences to search for. +#' @param ref_fasta (required) A link to a database in vsearch format +#' The reference database must contain taxonomic information in the header of +#' each sequence in the form of a string starting with ";tax=" and followed +#' by a comma-separated list of up to nine taxonomic identifiers. Each taxonomic +#' identifier must start with an indication of the rank by one of the letters d +#' (for domain) k (kingdom), p (phylum), c (class), o (order), f (family), +#' g (genus), s (species), or t (strain). The letter is followed by a colon +#' (:) and the name of that rank. Commas and semicolons are not allowed in +#' the name of the rank. Non-ascii characters should be avoided in the names. +#' +#' Example: +#' +#' \>X80725_S000004313;tax=d:Bacteria,p:Proteobacteria,c:Gammaproteobacteria,o:Enterobacteriales,f:Enterobacteriaceae,g:Escherichia/Shigella,s:Escherichia_coli,t:str._K-12_substr._MG1655 +#' @param behavior Either "return_matrix" (default), "return_cmd", +#' or "add_to_phyloseq": +#' +#' - "return_matrix" return a list of two matrix with taxonomic value in the +#' first element of the list and bootstrap value in the second one. +#' +#' - "return_cmd" return the command to run without running it. +#' +#' - "add_to_phyloseq" return a phyloseq object with amended slot `@taxtable`. +#' Only available if using physeq input and not seq2search input. +#' +#' @param vsearchpath (default: "vsearch") path to vsearch +#' @param clean_pq (logical, default TRUE) +#' If set to TRUE, empty samples and empty ASV are discarded +#' before clustering. +#' @param nproc (default: 1) +#' Set to number of cpus/processors to use +#' @param suffix (character) The suffix to name the new columns. +#' If set to "" (the default), the taxo_rank algorithm is used +#' without suffix. +#' @param taxo_rank A list with the name of the taxonomic rank present in +#' ref_fasta +#' @param min_boostrap (Int. \[0:1\], default 0.5) +#' Minimum bootstrap value to inform taxonomy. For each bootstrap +#' below the min_boostrap value, the taxonomy information is set to NA. +#' @param keep_temporary_files (logical, default: FALSE) Do we keep temporary files? +#' +#' - temporary_fasta_file (default "temp.fasta") : the fasta file from physeq +#' or seq2search +#' +#' - "output_taxo_vs.txt" : see Vsearch Manual for parameter --tabbedout +#' +#' @param verbose (logical). If TRUE, print additional information. +#' @param temporary_fasta_file The name of a temporary_fasta_file (default "temp.fasta") +#' @param cmd_args Other arguments to be passed on to vsearch sintax cmd. +#' By default cmd_args is equal to "--sintax_random" as recommended by +#' [Torognes](https://github.com/torognes/vsearch/issues/535). +#' @return See param behavior +#' @examplesIf MiscMetabar::is_vsearch_installed() +#' \donttest{ +#' assign_sintax(data_fungi_mini, +#' ref_fasta = system.file("extdata", "mini_UNITE_fungi.fasta.gz", package = "MiscMetabar"), +#' behavior = "return_cmd" +#' ) +#' +#' data_fungi_mini_new <- assign_sintax(data_fungi_mini, +#' ref_fasta = system.file("extdata", "mini_UNITE_fungi.fasta.gz", package = "MiscMetabar"), +#' behavior = "add_to_phyloseq" +#' ) +#' +#' assignation_results <- assign_sintax(data_fungi_mini, +#' ref_fasta = system.file("extdata", "mini_UNITE_fungi.fasta.gz", package = "MiscMetabar") +#' ) +#' +#' left_join( +#' tidyr::pivot_longer(assignation_results$taxo_value, -taxa_names), +#' tidyr::pivot_longer(assignation_results$taxo_boostrap, -taxa_names), +#' by = join_by(taxa_names, name), +#' suffix = c("rank", "bootstrap") +#' ) |> +#' mutate(name = factor(name, levels = c("K", "P", "C", "O", "F", "G", "S"))) |> +#' # mutate(valuerank = forcats::fct_reorder(valuerank, as.integer(name), .desc = TRUE)) |> +#' ggplot(aes(valuebootstrap, +#' valuerank, +#' fill = name +#' )) + +#' geom_jitter(alpha = 0.8, aes(color = name)) + +#' geom_boxplot(alpha = 0.3) +#' } +#' @export +#' @author Adrien Taudière +#' @details +#' This function is mainly a wrapper of the work of others. +#' Please cite [vsearch](https://github.com/torognes/vsearch). +assign_sintax <- function(physeq = NULL, + seq2search = NULL, + ref_fasta = NULL, + behavior = "return_matrix", + vsearchpath = "vsearch", + clean_pq = TRUE, + nproc = 1, + suffix = "", + taxo_rank = c("K", "P", "C", "O", "F", "G", "S"), + min_boostrap = 0.5, + keep_temporary_files = FALSE, + verbose = TRUE, + temporary_fasta_file = "temp.fasta", + cmd_args = "--sintax_random") { + write_temp_fasta( + physeq = physeq, + seq2search = seq2search, + temporary_fasta_file = temporary_fasta_file, + behavior = behavior, + clean_pq = clean_pq, + verbose = verbose + ) + + if (verbose) { + message("Start Vsearch sintax") + } + + cmd_sintax <- + paste0( + " --sintax ", + temporary_fasta_file, + " --db ", + ref_fasta, + " --tabbedout output_taxo_vs.txt ", + " --threads ", + nproc, + " ", + cmd_args + ) + + if (behavior == "return_cmd") { + if (!keep_temporary_files) { + unlink(temporary_fasta_file) + } + return("sintax" = paste0(vsearchpath, " ", cmd_sintax)) + } + + system2(vsearchpath, + args = cmd_sintax, + stdout = TRUE, + stderr = TRUE + ) + + res_sintax <- read.csv("output_taxo_vs.txt", sep = "\t", header = F) + taxa_names <- res_sintax$V1 + res_sintax <- tibble(res_sintax$V2, taxa_names) + res_sintax <- res_sintax |> + tidyr::separate_wider_delim(-taxa_names, names = paste0(taxo_rank, suffix), delim = ",") |> + tidyr::pivot_longer(-taxa_names) |> + tidyr::separate_wider_delim( + value, + names_sep = "", + names = c("", "_bootstrap"), + delim = "(" + ) |> + mutate(across(value_bootstrap, ~ as.numeric(gsub(")", "", .x)))) |> + mutate(across(value, ~ gsub(".:", "", .x))) + + res_sintax_wide_bootstrap <- + res_sintax |> + select(-value) |> + tidyr::pivot_wider(names_from = name, values_from = value_bootstrap) + + res_sintax_wide_taxo <- + res_sintax |> + select(-value_bootstrap) |> + tidyr::pivot_wider(names_from = name, values_from = value) + + if (!is.null(min_boostrap)) { + res_sintax_wide_taxo_filter <- res_sintax_wide_taxo + res_sintax_wide_taxo_filter[res_sintax_wide_bootstrap < min_boostrap] <- NA + } + + if (!keep_temporary_files) { + unlink(temporary_fasta_file) + unlink("output_taxo_vs.txt") + } + + if (behavior == "add_to_phyloseq") { + tax_tab <- as.data.frame(as.matrix(physeq@tax_table)) + tax_tab$taxa_names <- taxa_names(physeq) + + new_physeq <- physeq + new_tax_tab <- left_join(tax_tab, res_sintax_wide_taxo_filter, + by = join_by(taxa_names) + ) |> + dplyr::select(-taxa_names) |> + as.matrix() + new_physeq@tax_table <- tax_table(new_tax_tab) + taxa_names(new_physeq@tax_table) <- taxa_names(physeq) + + return(new_physeq) + } else if (behavior == "return_matrix") { + return(list( + "taxo_value" = res_sintax_wide_taxo, + "taxo_boostrap" = res_sintax_wide_bootstrap + )) + } +} +################################################################################ + +################################################################################ +#' Assign taxonomy using LCA **à la** [stampa](https://github.com/frederic-mahe/stampa) +#' +#' @description +#' +#' +#' lifecycle-experimental +#' +#' Please cite [Vsearch](https://github.com/torognes/vsearch) and +#' [stampa](https://github.com/frederic-mahe/stampa) if you use this function +#' to assign taxonomy. +#' +#' +#' @inheritParams clean_pq +#' @param seq2search A DNAStringSet object of sequences to search for. +#' @param ref_fasta (required) A link to a database in vsearch format +#' The reference database must contain taxonomic information in the header of +#' each sequence in the form of a string starting with ";tax=" and followed +#' by a comma-separated list of up to nine taxonomic identifiers. Each taxonomic +#' identifier must start with an indication of the rank by one of the letters d +#' (for domain) k (kingdom), p (phylum), c (class), o (order), f (family), +#' g (genus), s (species), or t (strain). The letter is followed by a colon +#' (:) and the name of that rank. Commas and semicolons are not allowed in +#' the name of the rank. Non-ascii characters should be avoided in the names. +#' +#' Example: +#' +#' \>X80725_S000004313;tax=d:Bacteria,p:Proteobacteria,c:Gammaproteobacteria,o:Enterobacteriales,f:Enterobacteriaceae,g:Escherichia/Shigella,s:Escherichia_coli,t:str._K-12_substr._MG1655 +#' @param behavior Either "return_matrix" (default), "return_cmd", +#' or "add_to_phyloseq": +#' +#' - "return_matrix" return a list of two matrix with taxonomic value in the +#' first element of the list and bootstrap value in the second one. +#' +#' - "return_cmd" return the command to run without running it. +#' +#' - "add_to_phyloseq" return a phyloseq object with amended slot `@taxtable`. +#' Only available if using physeq input and not seq2search input. +#' +#' @param vsearchpath (default: "vsearch") path to vsearch +#' @param clean_pq (logical, default TRUE) +#' If set to TRUE, empty samples and empty ASV are discarded +#' before clustering. +#' @param taxo_rank A list with the name of the taxonomic rank present in +#' ref_fasta +#' @param nproc (int, default: 1) +#' Set to number of cpus/processors to use +#' @param suffix (character) The suffix to name the new columns. +#' If set to "" (the default), the taxo_rank algorithm is used +#' without suffix. +#' @param id (Int. \[0:1\] default 0.5). Default value is based on +#' [stampa](https://github.com/frederic-mahe/stampa). +#' See Vsearch Manual for parameter `--id` +#' @param lca_cutoff (int, default 1). Fraction of matching hits +#' required for the last common ancestor (LCA) output. For example, a value +#' of 0.9 imply that if less than 10% of assigned species are not congruent +#' the taxonomy is filled. +#' Default value is based on [stampa](https://github.com/frederic-mahe/stampa). +#' See Vsearch Manual for parameter `--lca_cutoff` +#' +#' Text from vsearch manual : +#' "Adjust the fraction of matching hits required for the last +#' common ancestor (LCA) output with the --lcaout option during searches. +#' The default value is 1.0 which requires all hits to match at each taxonomic +#' rank for that rank to be included. If a lower cutoff value is used, +#' e.g. 0.95, a small fraction of non-matching hits are allowed while that +#' rank will still be reported. The argument to this option must be larger +#' than 0.5, but not larger than 1.0" +#' @param maxrejects (int, default: 32) +#' Maximum number of non-matching target sequences to consider before +#' stopping the search for a given query. +#' Default value is based on [stampa](https://github.com/frederic-mahe/stampa) +#' See Vsearch Manual for parameter `--maxrejects`. + +#' @param top_hits_only (Logical, default TRUE) +#' Only the top hits with an equally high percentage of identity between the query and +#' database sequence sets are written to the output. If you set top_hits_only +#' you may need to set a lower `maxaccepts` and/or `lca_cutoof`. +#' Default value is based on [stampa](https://github.com/frederic-mahe/stampa) +#' See Vsearch Manual for parameter `--top_hits_only` +#' @param maxaccepts (int, default: 0) +#' Default value is based on [stampa](https://github.com/frederic-mahe/stampa). +#' Maximum number of matching target sequences to accept before stopping the search +#' for a given query. +#' See Vsearch Manual for parameter `--maxaccepts` +#' @param keep_temporary_files (logical, default: FALSE) Do we keep temporary files? +#' +#' - temporary_fasta_file (default "temp.fasta") : the fasta file from physeq or +#' seq2search +#' +#' - "out_lca.txt" : see Vsearch Manual for parameter --lcaout +#' +#' - "userout.txt" : see Vsearch Manual for parameter --userout +#' +#' @param verbose (logical). If TRUE, print additional information. +#' @param temporary_fasta_file Name of the temporary fasta file. Only useful +#' with keep_temporary_files = TRUE. +#' @param cmd_args Other arguments to be passed on to vsearch usearch_global cmd. +#' @return See param behavior +#' @seealso [assign_sintax()], [add_new_taxonomy_pq()] +#' @examplesIf MiscMetabar::is_vsearch_installed() +#' \donttest{ +#' data_fungi_mini_new <- assign_vsearch_lca(data_fungi_mini, +#' ref_fasta = system.file("extdata", "mini_UNITE_fungi.fasta.gz", package = "MiscMetabar"), +#' lca_cutoff = 0.9 +#' ) +#' } +#' @export +#' @author Adrien Taudière +#' @details +#' This function is mainly a wrapper of the work of others. +#' Please cite [vsearch](https://github.com/torognes/vsearch) and +#' [stampa](https://github.com/frederic-mahe/stampa) +assign_vsearch_lca <- function(physeq = NULL, + seq2search = NULL, + ref_fasta = NULL, + behavior = "return_matrix", + vsearchpath = "vsearch", + clean_pq = TRUE, + taxo_rank = c("K", "P", "C", "O", "F", "G", "S"), + nproc = 1, + suffix = "", + id = 0.5, + lca_cutoff = 1, + maxrejects = 32, + top_hits_only = TRUE, + maxaccepts = 0, + keep_temporary_files = FALSE, + verbose = TRUE, + temporary_fasta_file = "temp.fasta", + cmd_args = "") { + write_temp_fasta( + physeq = physeq, + seq2search = seq2search, + temporary_fasta_file = temporary_fasta_file, + behavior = behavior, + clean_pq = clean_pq, + verbose = verbose + ) + + cmd_usearch <- + paste0( + " --usearch_global temp.fasta --db ", + ref_fasta, + " --lcaout out_lca.txt -id ", + id, + " --threads ", + nproc, + " --userfields query+id+target", + " --maxaccepts ", + maxaccepts, + " --maxrejects ", + maxrejects, + " --lca_cutoff ", + lca_cutoff, + " --userout userout.txt ", + cmd_args + ) + + if (top_hits_only) { + cmd_usearch <- + paste0(cmd_usearch, " --top_hits_only") + } + + if (behavior == "return_cmd") { + if (!keep_temporary_files) { + unlink(temporary_fasta_file) + } + return("sintax" = paste0(vsearchpath, " ", cmd_usearch)) + } + + system2(vsearchpath, + args = cmd_usearch, + stdout = TRUE, + stderr = TRUE + ) + + res_usearch <- read.csv("out_lca.txt", sep = "\t", header = F) + taxa_names <- res_usearch$V1 + res_usearch <- tibble(res_usearch$V2, taxa_names) + if (sum(is.na(res_usearch)) == nrow(res_usearch)) { + message("None match were found using usearch global and id=", id) + if (behavior == "add_to_phyloseq") { + return(physeq) + } else { + return(NULL) + } + } + + res_usearch <- res_usearch |> + tidyr::separate_wider_delim(-taxa_names, + names = paste0(taxo_rank, suffix), + delim = ",", + too_few = "align_start" + ) |> + tidyr::pivot_longer(-taxa_names) |> + mutate(across(value, ~ gsub(".:", "", .x))) + + res_usearch_wide_taxo <- + res_usearch |> + tidyr::pivot_wider(names_from = name, values_from = value) + + if (!keep_temporary_files) { + unlink(temporary_fasta_file) + unlink("out_lca.txt") + unlink("userout.txt") + } + + if (behavior == "add_to_phyloseq") { + tax_tab <- as.data.frame(as.matrix(physeq@tax_table)) + tax_tab$taxa_names <- taxa_names(physeq) + + new_physeq <- physeq + new_tax_tab <- left_join(tax_tab, res_usearch_wide_taxo, + by = join_by(taxa_names) + ) |> + dplyr::select(-taxa_names) |> + as.matrix() + new_physeq@tax_table <- tax_table(new_tax_tab) + taxa_names(new_physeq@tax_table) <- taxa_names(physeq) + + return(new_physeq) + } else if (behavior == "return_matrix") { + return(res_usearch_wide_taxo) + } +} +################################################################################ diff --git a/README.Rmd b/README.Rmd index fd68ee05..907e4d76 100644 --- a/README.Rmd +++ b/README.Rmd @@ -16,6 +16,8 @@ bibliography: paper/bibliography.bib +A panel showing some github statistics of the repositories using repobeats.axiom + ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, @@ -87,9 +89,9 @@ For developers, I also wrote an article describing some [rules of codes](https:/ ```{r example} #| fig.alt: > -#| Four rectangles represent the four component of an example phyloseq -#| dataset. In each rectangle, some informations about the component are -#| shown. +#| Four rectangles represent the four component of an example phyloseq +#| dataset. In each rectangle, some informations about the component are +#| shown. library("MiscMetabar") library("phyloseq") @@ -102,8 +104,8 @@ summary_plot_pq(data_fungi) ```{r, fig.cap="Hill number 0"} #| fig.alt: > -#| Hill number 0, aka richness are plot in function of -#| the height modality +#| Hill number 0, aka richness are plot in function of +#| the height modality p <- MiscMetabar::hill_pq(data_fungi, fact = "Height") p$plot_Hill_0 ``` diff --git a/README.md b/README.md index e41a085f..fa3ae358 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,8 @@ v3](https://img.shields.io/badge/License-GPL%20v3-blue.svg)](https://www.gnu.org +A panel showing some github statistics of the repositories using repobeats.axiom + # MiscMetabar MiscMetabar website See the pkgdown documentation site diff --git a/_pkgdown.yml b/_pkgdown.yml index 3c83eaf8..963e4f67 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -52,9 +52,14 @@ reference: contents: - summary_plot_pq - tbl_sum_samdata - - verify_pq + - title: Transform phyloseq object +- subtitle: Verify and clean phyloseq object + contents: + - clean_pq + - verify_pq + - subtitle: Taxonomy contents: - add_new_taxonomy_pq @@ -63,6 +68,8 @@ reference: - blast_pq - blast_to_derep - filter_asv_blast + - assign_vsearch_lca + - assign_sintax - subtitle: Function to add information from taxonomy contents: @@ -83,6 +90,7 @@ reference: - subset_taxa_tax_control - select_taxa - merge_taxa_vec + - filt_taxa_pq - subtitle: Subset/merge samples contents: @@ -116,15 +124,10 @@ reference: - subtitle: References sequences contents: + - add_dna_to_phyloseq + - plot_complexity_pq - plot_refseq_extremity_pq - plot_refseq_pq - - add_dna_to_phyloseq - -- subtitle: Others - contents: - - add_dna_to_phyloseq - - clean_pq - - title: Explore phyloseq object - subtitle: alpha-diversity @@ -135,6 +138,7 @@ reference: - ggbetween_pq - ggscatt_pq - glmutli_pq + - hill_curves_pq - hill_pq - hill_test_rarperm_pq - hill_tuckey_pq @@ -158,6 +162,7 @@ reference: - ridges_pq - SRS_curve_pq - tsne_pq + - umap_pq - upset_pq - upset_test_pq - var_par_pq @@ -254,6 +259,7 @@ reference: - subtitle: Plot management contents: - multiplot + - no_legend - subtitle: Variable management contents: - diff_fct_diff_class diff --git a/docs/404.html b/docs/404.html index 0e754e30..c7896628 100644 --- a/docs/404.html +++ b/docs/404.html @@ -14,8 +14,8 @@ - - + + @@ -27,7 +27,7 @@ MiscMetabar - 0.10.4 + 0.11.0 + + + + + +
+
+
+ +
+

+lifecycle-experimental

+ +

Please cite Vsearch +if you use this function to assign taxonomy.

+
+ +
+

Usage

+
assign_sintax(
+  physeq = NULL,
+  seq2search = NULL,
+  ref_fasta = NULL,
+  behavior = "return_matrix",
+  vsearchpath = "vsearch",
+  clean_pq = TRUE,
+  nproc = 1,
+  suffix = "",
+  taxo_rank = c("K", "P", "C", "O", "F", "G", "S"),
+  min_boostrap = 0.5,
+  keep_temporary_files = FALSE,
+  verbose = TRUE,
+  temporary_fasta_file = "temp.fasta",
+  cmd_args = "--sintax_random"
+)
+
+ +
+

Arguments

+ + +
physeq
+

(required): a phyloseq-class object obtained +using the phyloseq package.

+ + + +

A DNAStringSet object of sequences to search for.

+ + +
ref_fasta
+

(required) A link to a database in vsearch format +The reference database must contain taxonomic information in the header of +each sequence in the form of a string starting with ";tax=" and followed +by a comma-separated list of up to nine taxonomic identifiers. Each taxonomic +identifier must start with an indication of the rank by one of the letters d +(for domain) k (kingdom), p (phylum), c (class), o (order), f (family), +g (genus), s (species), or t (strain). The letter is followed by a colon +(:) and the name of that rank. Commas and semicolons are not allowed in +the name of the rank. Non-ascii characters should be avoided in the names.

+

Example:

+

\>X80725_S000004313;tax=d:Bacteria,p:Proteobacteria,c:Gammaproteobacteria,o:Enterobacteriales,f:Enterobacteriaceae,g:Escherichia/Shigella,s:Escherichia_coli,t:str._K-12_substr._MG1655

+ + +
behavior
+

Either "return_matrix" (default), "return_cmd", +or "add_to_phyloseq":

  • "return_matrix" return a list of two matrix with taxonomic value in the +first element of the list and bootstrap value in the second one.

  • +
  • "return_cmd" return the command to run without running it.

  • +
  • "add_to_phyloseq" return a phyloseq object with amended slot @taxtable. +Only available if using physeq input and not seq2search input.

  • +
+ + +
vsearchpath
+

(default: "vsearch") path to vsearch

+ + +
clean_pq
+

(logical, default TRUE) +If set to TRUE, empty samples and empty ASV are discarded +before clustering.

+ + +
nproc
+

(default: 1) +Set to number of cpus/processors to use

+ + +
suffix
+

(character) The suffix to name the new columns. +If set to "" (the default), the taxo_rank algorithm is used +without suffix.

+ + +
taxo_rank
+

A list with the name of the taxonomic rank present in +ref_fasta

+ + +
min_boostrap
+

(Int. [0:1], default 0.5) +Minimum bootstrap value to inform taxonomy. For each bootstrap +below the min_boostrap value, the taxonomy information is set to NA.

+ + +
keep_temporary_files
+

(logical, default: FALSE) Do we keep temporary files?

  • temporary_fasta_file (default "temp.fasta") : the fasta file from physeq +or seq2search

  • +
  • "output_taxo_vs.txt" : see Vsearch Manual for parameter –tabbedout

  • +
+ + +
verbose
+

(logical). If TRUE, print additional information.

+ + +
temporary_fasta_file
+

The name of a temporary_fasta_file (default "temp.fasta")

+ + +
cmd_args
+

Other arguments to be passed on to vsearch sintax cmd. +By default cmd_args is equal to "–sintax_random" as recommended by +Torognes.

+ +
+
+

Value

+

See param behavior

+
+
+

Details

+

This function is mainly a wrapper of the work of others. +Please cite vsearch.

+
+
+

Author

+

Adrien Taudière

+
+ +
+

Examples

+
# \donttest{
+assign_sintax(data_fungi_mini,
+  ref_fasta = system.file("extdata", "mini_UNITE_fungi.fasta.gz", package = "MiscMetabar"),
+  behavior = "return_cmd"
+)
+#> Cleaning suppress 0 taxa and 0 samples.
+#> Start Vsearch sintax
+#> [1] "vsearch  --sintax temp.fasta --db /tmp/RtmpbOMt4J/temp_libpath32c49814bbb/MiscMetabar/extdata/mini_UNITE_fungi.fasta.gz --tabbedout output_taxo_vs.txt  --threads 1 --sintax_random"
+
+data_fungi_mini_new <- assign_sintax(data_fungi_mini,
+  ref_fasta = system.file("extdata", "mini_UNITE_fungi.fasta.gz", package = "MiscMetabar"),
+  behavior = "add_to_phyloseq"
+)
+#> Cleaning suppress 0 taxa and 0 samples.
+#> Start Vsearch sintax
+
+assignation_results <- assign_sintax(data_fungi_mini,
+  ref_fasta = system.file("extdata", "mini_UNITE_fungi.fasta.gz", package = "MiscMetabar")
+)
+#> Cleaning suppress 0 taxa and 0 samples.
+#> Start Vsearch sintax
+
+left_join(
+  tidyr::pivot_longer(assignation_results$taxo_value, -taxa_names),
+  tidyr::pivot_longer(assignation_results$taxo_boostrap, -taxa_names),
+  by = join_by(taxa_names, name),
+  suffix = c("rank", "bootstrap")
+) |>
+  mutate(name = factor(name, levels = c("K", "P", "C", "O", "F", "G", "S"))) |>
+  # mutate(valuerank = forcats::fct_reorder(valuerank, as.integer(name), .desc = TRUE)) |>
+  ggplot(aes(valuebootstrap,
+    valuerank,
+    fill = name
+  )) +
+  geom_jitter(alpha = 0.8, aes(color = name)) +
+  geom_boxplot(alpha = 0.3)
+
+# }
+
+
+
+ + +
+ + + +
+ + + + + + + diff --git a/docs/reference/assign_vsearch_lca.html b/docs/reference/assign_vsearch_lca.html new file mode 100644 index 00000000..1b27f409 --- /dev/null +++ b/docs/reference/assign_vsearch_lca.html @@ -0,0 +1,286 @@ + +Assign taxonomy using LCA à la stampa — assign_vsearch_lca • MiscMetabar + Skip to contents + + +
+
+
+ +
+

+lifecycle-experimental

+ +

Please cite Vsearch and +stampa if you use this function +to assign taxonomy.

+
+ +
+

Usage

+
assign_vsearch_lca(
+  physeq = NULL,
+  seq2search = NULL,
+  ref_fasta = NULL,
+  behavior = "return_matrix",
+  vsearchpath = "vsearch",
+  clean_pq = TRUE,
+  taxo_rank = c("K", "P", "C", "O", "F", "G", "S"),
+  nproc = 1,
+  suffix = "",
+  id = 0.5,
+  lca_cutoff = 1,
+  maxrejects = 32,
+  top_hits_only = TRUE,
+  maxaccepts = 0,
+  keep_temporary_files = FALSE,
+  verbose = TRUE,
+  temporary_fasta_file = "temp.fasta",
+  cmd_args = ""
+)
+
+ +
+

Arguments

+ + +
physeq
+

(required): a phyloseq-class object obtained +using the phyloseq package.

+ + + +

A DNAStringSet object of sequences to search for.

+ + +
ref_fasta
+

(required) A link to a database in vsearch format +The reference database must contain taxonomic information in the header of +each sequence in the form of a string starting with ";tax=" and followed +by a comma-separated list of up to nine taxonomic identifiers. Each taxonomic +identifier must start with an indication of the rank by one of the letters d +(for domain) k (kingdom), p (phylum), c (class), o (order), f (family), +g (genus), s (species), or t (strain). The letter is followed by a colon +(:) and the name of that rank. Commas and semicolons are not allowed in +the name of the rank. Non-ascii characters should be avoided in the names.

+

Example:

+

\>X80725_S000004313;tax=d:Bacteria,p:Proteobacteria,c:Gammaproteobacteria,o:Enterobacteriales,f:Enterobacteriaceae,g:Escherichia/Shigella,s:Escherichia_coli,t:str._K-12_substr._MG1655

+ + +
behavior
+

Either "return_matrix" (default), "return_cmd", +or "add_to_phyloseq":

  • "return_matrix" return a list of two matrix with taxonomic value in the +first element of the list and bootstrap value in the second one.

  • +
  • "return_cmd" return the command to run without running it.

  • +
  • "add_to_phyloseq" return a phyloseq object with amended slot @taxtable. +Only available if using physeq input and not seq2search input.

  • +
+ + +
vsearchpath
+

(default: "vsearch") path to vsearch

+ + +
clean_pq
+

(logical, default TRUE) +If set to TRUE, empty samples and empty ASV are discarded +before clustering.

+ + +
taxo_rank
+

A list with the name of the taxonomic rank present in +ref_fasta

+ + +
nproc
+

(int, default: 1) +Set to number of cpus/processors to use

+ + +
suffix
+

(character) The suffix to name the new columns. +If set to "" (the default), the taxo_rank algorithm is used +without suffix.

+ + +
id
+

(Int. [0:1] default 0.5). Default value is based on +stampa. +See Vsearch Manual for parameter --id

+ + +
lca_cutoff
+

(int, default 1). Fraction of matching hits +required for the last common ancestor (LCA) output. For example, a value +of 0.9 imply that if less than 10% of assigned species are not congruent +the taxonomy is filled. +Default value is based on stampa. +See Vsearch Manual for parameter --lca_cutoff

+

Text from vsearch manual : +"Adjust the fraction of matching hits required for the last +common ancestor (LCA) output with the –lcaout option during searches. +The default value is 1.0 which requires all hits to match at each taxonomic +rank for that rank to be included. If a lower cutoff value is used, +e.g. 0.95, a small fraction of non-matching hits are allowed while that +rank will still be reported. The argument to this option must be larger +than 0.5, but not larger than 1.0"

+ + +
maxrejects
+

(int, default: 32) +Maximum number of non-matching target sequences to consider before +stopping the search for a given query. +Default value is based on stampa +See Vsearch Manual for parameter --maxrejects.

+ + +
top_hits_only
+

(Logical, default TRUE) +Only the top hits with an equally high percentage of identity between the query and +database sequence sets are written to the output. If you set top_hits_only +you may need to set a lower maxaccepts and/or lca_cutoof. +Default value is based on stampa +See Vsearch Manual for parameter --top_hits_only

+ + +
maxaccepts
+

(int, default: 0) +Default value is based on stampa. +Maximum number of matching target sequences to accept before stopping the search +for a given query. +See Vsearch Manual for parameter --maxaccepts

+ + +
keep_temporary_files
+

(logical, default: FALSE) Do we keep temporary files?

  • temporary_fasta_file (default "temp.fasta") : the fasta file from physeq or +seq2search

  • +
  • "out_lca.txt" : see Vsearch Manual for parameter –lcaout

  • +
  • "userout.txt" : see Vsearch Manual for parameter –userout

  • +
+ + +
verbose
+

(logical). If TRUE, print additional information.

+ + +
temporary_fasta_file
+

Name of the temporary fasta file. Only useful +with keep_temporary_files = TRUE.

+ + +
cmd_args
+

Other arguments to be passed on to vsearch usearch_global cmd.

+ +
+
+

Value

+

See param behavior

+
+
+

Details

+

This function is mainly a wrapper of the work of others. +Please cite vsearch and +stampa

+
+ +
+

Author

+

Adrien Taudière

+
+ +
+

Examples

+
# \donttest{
+data_fungi_mini_new <- assign_vsearch_lca(data_fungi_mini,
+  ref_fasta = system.file("extdata", "mini_UNITE_fungi.fasta.gz", package = "MiscMetabar"),
+  lca_cutoff = 0.9
+)
+#> Cleaning suppress 0 taxa and 0 samples.
+# }
+
+
+
+ + +
+ + + +
+ + + + + + + diff --git a/docs/reference/biplot_pq.html b/docs/reference/biplot_pq.html index 9a273529..55d8c7d5 100644 --- a/docs/reference/biplot_pq.html +++ b/docs/reference/biplot_pq.html @@ -1,5 +1,5 @@ -Visualization of two samples for comparison — biplot_pq • MiscMetabarVisualization of two samples for comparison — biplot_pq • MiscMetabarMiscMetabar - 0.10.4 + 0.11.0 + + + + + +
+
+
+ +
+

+lifecycle-experimental

+ +

Basically a wrapper of vegan::renyi() and +vegan::renyiaccum() functions

+
+ +
+

Usage

+
hill_curves_pq(
+  physeq,
+  merge_sample_by = NULL,
+  color_fac = NULL,
+  hill_scales = c(0, 0.25, 0.5, 1, 2, 4, 8, 16, 32, 64, Inf),
+  nperm = NULL,
+  na_remove = TRUE,
+  wrap_factor = TRUE,
+  plot_legend = TRUE,
+  linewidth = 2,
+  size_point = 2,
+  ...
+)
+
+ +
+

Arguments

+ + +
physeq
+

(required): a phyloseq-class object obtained +using the phyloseq package.

+ + +
merge_sample_by
+

a vector to determine which samples to merge using +the merge_samples2() function. Need to be in physeq@sam_data

+ + +
color_fac
+

(optional): The variable to color the barplot. For ex. +same as fact. If merge_sample_by is set, color_fac must be nested in +the merge_sample_by factor. See examples.

+ + +
hill_scales
+

Scales of Rényi diversity.

+ + +
nperm
+

(int Default NULL) If a integer is set to nperm, nperm +permutation are computed to draw confidence interval for each curves. +The function use vegan::renyi() if nperm is NULL and +vegan::renyiaccum() else.

+ + +
na_remove
+

(logical, default FALSE) If set to TRUE, remove samples with +NA in the variables set in merge_sample_by. Not used if merge_sample_by is +NULL.

+ + +
wrap_factor
+

(logical, default TRUE) Do the plot is wrap by the factor

+ + +
plot_legend
+

(logical, default TRUE) If set to FALSE, +no legend are plotted.

+ + +
linewidth
+

(int, default 2) The linewidth of lines.

+ + +
size_point
+

(int, default 1) The size of the point.

+ + +
...
+

Other arguments passed on to vegan::renyi() function or +vegan::renyiaccum() if nperm is not NULL.

+ +
+
+

Value

+

A ggplot2 object

+
+
+

Details

+

This function is mainly a wrapper of the work of others. +Please make a reference to vegan::renyi() or +vegan::renyiaccum() functions

+
+
+

Author

+

Adrien Taudière

+
+ +
+

Examples

+
if (requireNamespace("vegan")) {
+  hill_curves_pq(data_fungi_mini, merge_sample_by = "Time")
+  hill_curves_pq(data_fungi_mini, color_fac = "Time", plot_legend = FALSE)
+  hill_curves_pq(data_fungi_mini,
+    color_fac = "Time", plot_legend = FALSE,
+    nperm = 9, size_point = 1, linewidth = 0.5
+  )
+
+  hill_curves_pq(data_fungi_mini,
+    nperm = 9, plot_legend = FALSE, size_point = 1,
+    linewidth = 0.5
+  )
+  hill_curves_pq(data_fungi_mini, "Height",
+    hill_scales = c(0, 1, 2, 8), plot_legend = FALSE
+  )
+  hill_curves_pq(data_fungi_mini, "Height",
+    hill_scales = c(0, 0.5, 1, 2, 4, 8),
+    nperm = 9
+  )
+  hill_curves_pq(data_fungi_mini, "Height", nperm = 9, wrap_factor = FALSE)
+
+  data_fungi_mini@sam_data$H_T <- paste0(
+    data_fungi_mini@sam_data$Height,
+    "_", data_fungi_mini@sam_data$Time
+  )
+  merge_samples2(data_fungi_mini, "H_T")
+  hill_curves_pq(data_fungi_mini, "H_T", color_fac = "Time", nperm = 9)
+}
+#> 17 were discarded due to NA in variables present in formula.
+#> At least one sample name start with a zero.
+#>     That can be a problem for some phyloseq functions such as
+#>     plot_bar and psmelt.
+#> Cleaning suppress 0 taxa and 0 samples.
+#> Cleaning suppress 0 taxa and 0 samples.
+#> Cleaning suppress 0 taxa and 0 samples.
+#> Cleaning suppress 0 taxa and 0 samples.
+#> 47 were discarded due to NA in variables present in formula.
+#> Cleaning suppress 0 taxa and 0 samples.
+#> 47 were discarded due to NA in variables present in formula.
+#> Cleaning suppress 0 taxa and 0 samples.
+#> 'nperm' >= set of all permutations: complete enumeration.
+#> Set of permutations < 'minperm'. Generating entire set.
+#> 47 were discarded due to NA in variables present in formula.
+#> Cleaning suppress 0 taxa and 0 samples.
+#> 'nperm' >= set of all permutations: complete enumeration.
+#> Set of permutations < 'minperm'. Generating entire set.
+#> Cleaning suppress 0 taxa and 0 samples.
+
+
+
+
+ + +
+ + + +
+ + + + + + + diff --git a/docs/reference/hill_pq.html b/docs/reference/hill_pq.html index 080e5fb4..d13cdffb 100644 --- a/docs/reference/hill_pq.html +++ b/docs/reference/hill_pq.html @@ -1,5 +1,5 @@ -Graphical representation of hill number 0, 1 and 2 across a factor — hill_pq • MiscMetabarGraphical representation of hill number 0, 1 and 2 across a factor — hill_pq • MiscMetabarMiscMetabar - 0.10.4 + 0.11.0 + + + + + +
+
+
+ +
+

+lifecycle-stable

+ +

A more memorable shortcut for theme(legend.position = "none").

+
+ +
+

Usage

+
no_legend()
+
+ +
+

Value

+

A ggplot2 object

+
+
+

Author

+

Adrien Taudière

+
+ +
+

Examples

+
plot_refseq_pq(data_fungi)
+#> Cleaning suppress 0 taxa (  ) and 0 sample(s) (  ).
+#> Number of non-matching ASV 0
+#> Number of matching ASV 1420
+#> Number of filtered-out ASV 1
+#> Number of kept ASV 1419
+#> Number of kept samples 185
+#> Cleaning suppress 0 taxa and 0 samples.
+#> Warning: Removed 688 rows containing missing values or values outside the scale range
+#> (`geom_point()`).
+
+plot_refseq_pq(data_fungi) + no_legend()
+#> Cleaning suppress 0 taxa (  ) and 0 sample(s) (  ).
+#> Number of non-matching ASV 0
+#> Number of matching ASV 1420
+#> Number of filtered-out ASV 1
+#> Number of kept ASV 1419
+#> Number of kept samples 185
+#> Cleaning suppress 0 taxa and 0 samples.
+#> Warning: Removed 688 rows containing missing values or values outside the scale range
+#> (`geom_point()`).
+
+
+
+
+ + +
+ + + +
+ + + + + + + diff --git a/docs/reference/normalize_prop_pq.html b/docs/reference/normalize_prop_pq.html index 768276cf..9dddced5 100644 --- a/docs/reference/normalize_prop_pq.html +++ b/docs/reference/normalize_prop_pq.html @@ -1,5 +1,5 @@ -Normalize OTU table using samples depth — normalize_prop_pq • MiscMetabarNormalize OTU table using samples depth — normalize_prop_pq • MiscMetabarMiscMetabar - 0.10.4 + 0.11.0 + + + + + +
+
+
+ +
+

+lifecycle-experimental

+ +

Basically a wrapper of dada2::seqComplexity()

+
+ +
+

Usage

+
plot_complexity_pq(
+  physeq,
+  kmer_size = 2,
+  window = NULL,
+  by = 5,
+  bins = 100,
+  aggregate = FALSE,
+  vline_random_kmer = TRUE,
+  ...
+)
+
+ +
+

Arguments

+ + +
physeq
+

(required): a phyloseq-class object obtained +using the phyloseq package.

+ + +
kmer_size
+

int (default 2) The size of the kmers +(or "oligonucleotides" or "words") to use.

+ + +
window
+

(int, default NULL) The width in nucleotides of the moving +window. If NULL the whole sequence is used.

+ + +
by
+

(int, default 5) The step size in nucleotides between each moving +window tested.

+ + +
bins
+

(int, default 100). The number of bins to use for the histogram.

+ + +
aggregate
+

(logical, default FALSE) If TRUE, compute an aggregate quality profile +for all samples

+ + +
vline_random_kmer
+

(logical, default TRUE) If TRUE, add a vertical line +at the value for random kmer (equal to 4^kmerSize))

+ + +
...
+

Arguments passed on to geom_histogram.

+ +
+
+

Value

+

A ggplot2 object

+
+
+

Details

+

This function is mainly a wrapper of the work of others. +Please make a reference to dada2::seqComplexity()

+
+ +
+

Author

+

Adrien Taudière

+
+ +
+

Examples

+
plot_complexity_pq(subset_samples(data_fungi_mini, Height == "High"),
+  vline_random_kmer = FALSE
+)
+
+plot_complexity_pq(subset_samples(data_fungi_mini, Height == "Low"),
+  aggregate = FALSE, kmer_size = 4
+)
+
+# plot_complexity_pq(subset_samples(data_fungi, Height == "Low"), kmer_size = 4)
+
+
+
+
+ + +
+ + + +
+ + + + + + + diff --git a/docs/reference/plot_deseq2_pq.html b/docs/reference/plot_deseq2_pq.html index 237cfdde..cc11c8db 100644 --- a/docs/reference/plot_deseq2_pq.html +++ b/docs/reference/plot_deseq2_pq.html @@ -1,5 +1,5 @@ -Plot DESeq2 results for a phyloseq or a DESeq2 object. — plot_deseq2_pq • MiscMetabarPlot DESeq2 results for a phyloseq or a DESeq2 object. — plot_deseq2_pq • MiscMetabarMiscMetabar - 0.10.4 + 0.11.0 + + + + + +
+
+
+ +
+

+lifecycle-experimental

+ +

https://journals.asm.org/doi/full/10.1128/msystems.00691-21

+
+ +
+

Usage

+
umap_pq(physeq, pkg = "umap", ...)
+
+ +
+

Arguments

+ + +
physeq
+

(required): a phyloseq-class object obtained +using the phyloseq package.

+ + +
pkg
+

Which R packages to use, either "umap" or "uwot".

+ + +
...
+

Others arguments passed on to umap::umap() or +uwot::umap2() function. +For example n_neighbors set the number of nearest neighbors (Default 15). +See umap::umap.defaults() or uwot::umap2() for the list of +parameters and default values.

+ +
+
+

Value

+

A dataframe with samples informations and the x_umap and y_umap position

+
+
+

Details

+

This function is mainly a wrapper of the work of others. +Please make a reference to umap::umap() if you +use this function.

+
+ +
+

Author

+

Adrien Taudière

+
+ +
+

Examples

+
library("umap")
+df_umap <- umap_pq(data_fungi_mini)
+#> Taxa are now in columns.
+#> Taxa are now in rows.
+#> Joining with `by = join_by(Sample)`
+#> Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if
+#> `.name_repair` is omitted as of tibble 2.0.0.
+#>  Using compatibility `.name_repair`.
+#>  The deprecated feature was likely used in the MiscMetabar package.
+#>   Please report the issue at
+#>   <https://github.com/adrientaudiere/MiscMetabar/issues>.
+#> Joining with `by = join_by(Sample)`
+ggplot(df_umap, aes(x = x_umap, y = y_umap, col = Height)) +
+  geom_point(size = 2)
+
+
+# library(patchwork)
+# physeq <- data_fungi_mini
+# res_tsne <- tsne_pq(data_fungi_mini)
+# df_umap_tsne <- df_umap
+# df_umap_tsne$x_tsne <- res_tsne$Y[, 1]
+# df_umap_tsne$y_tsne <- res_tsne$Y[, 2]
+# ((ggplot(df_umap, aes(x = x_umap, y = y_umap, col = Height)) +
+#  geom_point(size = 2) +
+#  ggtitle("UMAP")) + (plot_ordination(physeq,
+#  ordination =
+#    ordinate(physeq, method = "PCoA", distance = "bray"), color = "Height"
+# )) +
+#  ggtitle("PCoA")) /
+#  ((ggplot(df_umap_tsne, aes(x = x_tsne, y = y_tsne, col = Height)) +
+#    geom_point(size = 2) +
+#    ggtitle("tsne")) +
+#    (plot_ordination(physeq,
+#      ordination = ordinate(physeq, method = "NMDS", distance = "bray"),
+#      color = "Height"
+#    ) +
+#      ggtitle("NMDS"))) +
+#  patchwork::plot_layout(guides = "collect")
+
+df_uwot <- umap_pq(data_fungi_mini, pkg = "uwot")
+#> Taxa are now in columns.
+#> Taxa are now in rows.
+#> Joining with `by = join_by(Sample)`
+#> Joining with `by = join_by(Sample)`
+
+(ggplot(df_umap, aes(x = x_umap, y = y_umap, col = Height)) +
+  geom_point(size = 2) +
+  ggtitle("umap::umap")) /
+  (ggplot(df_uwot, aes(x = x_umap, y = y_umap, col = Height)) +
+    geom_point(size = 2) +
+    ggtitle("uwot::umap2"))
+
+
+
+
+
+ + +
+ + + +
+ + + + + + + diff --git a/docs/reference/unique_or_na.html b/docs/reference/unique_or_na.html index 3b6a3a34..b9bba165 100644 --- a/docs/reference/unique_or_na.html +++ b/docs/reference/unique_or_na.html @@ -1,5 +1,5 @@ -Get the unique value in x or NA if none — unique_or_na • MiscMetabarGet the unique value in x or NA if none — unique_or_na • MiscMetabarMiscMetabar - 0.10.4 + 0.11.0