diff --git a/.vscode/settings.json b/.vscode/settings.json index 0e425dad..874da1a5 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,7 +1,23 @@ { "workbench.colorCustomizations": { - "activityBar.background": "#253202", - "titleBar.activeBackground": "#344602", - "titleBar.activeForeground": "#F4FEDC" - } + "activityBar.activeBackground": "#ab307e", + "activityBar.background": "#ab307e", + "activityBar.foreground": "#e7e7e7", + "activityBar.inactiveForeground": "#e7e7e799", + "activityBarBadge.background": "#25320e", + "activityBarBadge.foreground": "#e7e7e7", + "commandCenter.border": "#e7e7e799", + "sash.hoverBorder": "#ab307e", + "statusBar.background": "#832561", + "statusBar.foreground": "#e7e7e7", + "statusBarItem.hoverBackground": "#ab307e", + "statusBarItem.remoteBackground": "#832561", + "statusBarItem.remoteForeground": "#e7e7e7", + "titleBar.activeBackground": "#832561", + "titleBar.activeForeground": "#e7e7e7", + "titleBar.inactiveBackground": "#83256199", + "titleBar.inactiveForeground": "#e7e7e799", + "tab.activeBorder": "#ab307e" + }, + "peacock.color": "#832561" } \ No newline at end of file diff --git a/DESCRIPTION b/DESCRIPTION index 2bc35a7e..425b4a78 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: MiscMetabar Type: Package Title: Miscellaneous Functions for Metabarcoding Analysis -Version: 0.9.2 +Version: 0.9.3 Authors@R: person("Adrien", "Taudière", email = "adrien.taudiere@zaclys.net", role = c("aut", "cre", "cph"), comment = c(ORCID = "0000-0003-1088-1182")) Description: Facilitate the description, transformation, exploration, and reproducibility of metabarcoding analyses. 'MiscMetabar' is mainly built on top of the 'phyloseq', 'dada2' and 'targets' R packages. It helps to build reproducible and robust bioinformatics pipelines in R. 'MiscMetabar' makes ecological analysis of alpha and beta-diversity easier, more reproducible and more powerful by integrating a large number of tools. Important features are described in Taudière A. (2023) . @@ -79,7 +79,7 @@ Suggests: vctrs, viridis, withr -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 URL: https://github.com/adrientaudiere/MiscMetabar, https://adrientaudiere.github.io/MiscMetabar/ biocViews: Sequencing, Microbiome, Metagenomics, diff --git a/NAMESPACE b/NAMESPACE index d6431b84..a4fdb020 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -40,6 +40,7 @@ export(dist_pos_control) export(distri_1_taxa) export(fac2col) export(filter_asv_blast) +export(filter_taxa_blast) export(filter_trim) export(formattable_pq) export(funguild_assign) @@ -98,6 +99,7 @@ export(plot_mt) export(plot_tax_pq) export(plot_tsne_pq) export(plot_var_part_pq) +export(postcluster_pq) export(psmelt_samples_pq) export(rarefy_sample_count_by_modality) export(read_phyloseq) diff --git a/NEWS.md b/NEWS.md index 08abd78b..ef4cf6c0 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,21 @@ -# MiscMetabar 0.9.2 (in development) + +# MiscMetabar 0.9.3 (in development) + +- Homogenize terminology replacing ASV by taxa/taxon in documentation and code +- Build an alias function `filter_taxa_blast()` for +`filter_asv_blast()` +- Build an alias function `postcluster_pq()` for +`asv2otu()` + + +## BREAKING CHANGES + +- Replacing misnamed param `rename_asv` by `rename_taxons` in `clean_pq()` +- Replacing misnamed param `reorder_asv` by `reorder_taxons` in `clean_pq()` + + + +# MiscMetabar 0.9.2 - Add param `default_fun` in function `merge_samples2()` in order to replace the default function that change the sample data in case of merging. A useful parameter is `default_fun=diff_fct_diff_class`. - Add param `kruskal_test` to `hill_pq()` function to prevent user to mis-interpret Tuckey HSD result (and letters) if the global effect of the tested factor on Hill diversity is non significant. diff --git a/R/Deseq2_edgeR.R b/R/Deseq2_edgeR.R index 8ded9156..1bd565d4 100644 --- a/R/Deseq2_edgeR.R +++ b/R/Deseq2_edgeR.R @@ -1,13 +1,13 @@ ################################################################################ #' Plot edgeR results for a phyloseq or a edgeR object. #' -#' @description -#' +#' @description +#' #' #' lifecycle-maturing #' #' Graphical representation of edgeR result. -#' +#' #' @inheritParams clean_pq #' @param contrast (required):This argument specifies what comparison #' to extract from the object to build a results table. @@ -125,12 +125,12 @@ plot_edgeR_pq <- # Plot the result of a DESeq2 test ################################################################################ #' Plot DESeq2 results for a phyloseq or a DESeq2 object. -#' +#' #' @description #' #' #' lifecycle-experimental -#' +#' #' Graphical representation of DESeq2 analysis. #' #' @param data (required) a \code{\link{phyloseq-class}} or a diff --git a/R/MiscMetabar-package.R b/R/MiscMetabar-package.R index 24d4e77b..975106d3 100644 --- a/R/MiscMetabar-package.R +++ b/R/MiscMetabar-package.R @@ -8,13 +8,13 @@ NULL if (getRversion() >= "2.15.1") { utils::globalVariables(c( - ".id", "%>%", "Ab", "Abundance", "ASV_names", "beta.div", "calc_taxon_abund", + ".id", "%>%", "Ab", "Abundance", "taxon_names", "beta.div", "calc_taxon_abund", "character_method", "Class", "col_tax", "colors", "combn", "complement", "e-value", "Family", "Genus", "grid.draw", "grid.layout", "group_by", "Guild", "heat_tree", "hill_0", "Hill_0", "hill_1", "Hill_1", "hill_2", "Hill_2", "how name", "install_github", "install.packages", "LCBD", "log2FoldChange", "logFC", "LVL1", "LVL3", "lwr", "max_Hill", - "modality", "multcompLetters", "name", "nb_asv", "nb_seq", "nb_values", + "modality", "multcompLetters", "name", "nb_taxa", "nb_seq", "nb_values", "ott_id", "OTU", "p.adj", "p.adjust", "plot_layout", "plot_layout value", "Proportion", "pushViewport", "Query name", "rarefy", "read.delim", "reverse", "reverseComplement", "rgb", diff --git a/R/alpha_div_test.R b/R/alpha_div_test.R index d8200531..96c4d3c3 100644 --- a/R/alpha_div_test.R +++ b/R/alpha_div_test.R @@ -105,11 +105,11 @@ hill_tuckey_pq <- function( #' Test multiple times effect of factor on Hill diversity #' with different rarefaction even depth #' -#' @description +#' @description #' #' lifecycle-experimental -#' -#' This reduce the risk of a random drawing of a exceptional situation of an unique rarefaction. +#' +#' This reduce the risk of a random drawing of a exceptional situation of an unique rarefaction. #' @inheritParams clean_pq #' @param fact (required) Name of the factor in `physeq@sam_data` used to plot #' different lines @@ -272,12 +272,12 @@ hill_test_rarperm_pq <- function(physeq, ################################################################################ #' Automated model selection and multimodel inference with (G)LMs for phyloseq #' -#' @description +#' @description #' #' lifecycle-experimental #' #' See [glmulti::glmulti()] for more information. -#' +#' #' @inheritParams clean_pq #' @param formula (required) a formula for [glmulti::glmulti()] #' Variables must be present in the `physeq@sam_data` slot or be one diff --git a/R/beta_div_test.R b/R/beta_div_test.R index 84af2e33..f691502a 100644 --- a/R/beta_div_test.R +++ b/R/beta_div_test.R @@ -220,15 +220,15 @@ adonis_pq <- function(physeq, ################################################################################ #' Permanova (adonis) on permutations of rarefaction even depth #' -#' @description -#' +#' @description +#' #' #' lifecycle-experimental #' -#' Permanova are computed on a given number of rarefaction with different -#' seed.number. This reduce the risk of a random drawing of a exceptional -#' situation of an unique rarefaction. -#' +#' Permanova are computed on a given number of rarefaction with different +#' seed.number. This reduce the risk of a random drawing of a exceptional +#' situation of an unique rarefaction. +#' #' @inheritParams adonis_pq #' @param nperm (int, default = 99) The number of permutations to perform. #' @param progress_bar (logical, default TRUE) Do we print progress during @@ -578,7 +578,7 @@ plot_LCBD_pq <- function(physeq, #' Please make a reference to `vegan::beta.div()` if you #' use this function. plot_SCBD_pq <- function(physeq, - tax_level = "ASV", + tax_level = "Taxa", tax_col = "Order", min_SCBD = 0.01, ...) { @@ -587,7 +587,7 @@ plot_SCBD_pq <- function(physeq, tax_tab <- data.frame(physeq@tax_table) resSCBD <- tibble( - "ASV" = taxa_names(physeq), + "Taxa" = taxa_names(physeq), "SCBD" = resBeta$SCBD, tax_tab ) @@ -663,7 +663,7 @@ multipatt_pq <- function(physeq, res_df <- res$sign res_df$p.adj <- p.adjust(res_df$p.value, method = p_adjust_method) - res_df$ASV_names <- rownames(res_df) + res_df$taxon_names <- rownames(res_df) res_df_signif <- res_df %>% filter(p.adj < pval) %>% @@ -672,7 +672,7 @@ multipatt_pq <- function(physeq, p <- ggplot( res_df_signif, aes( - x = ASV_names, + x = taxon_names, y = name, size = 2 * value, color = stat @@ -775,12 +775,12 @@ ancombc_pq <- function(physeq, fact, levels_fact = NULL, tax_level = "Class", .. ################################################################################ #' Filter ancombc_pq results #' -#' @description +#' @description #' #' lifecycle-experimental #' #' Internally used in [plot_ancombc_pq()]. -#' +#' #' @param ancombc_res (required) the result of the ancombc_pq function #' For the moment only bimodal factors are possible. #' @param filter_passed (logical, default TRUE) Do we filter using the column @@ -857,12 +857,12 @@ signif_ancombc <- function(ancombc_res, ################################################################################ #' Plot ANCOMBC2 result for phyloseq object #' -#' @description +#' @description #' #' lifecycle-experimental #' #' Graphical representation of ANCOMBC2 result. -#' +#' #' @inheritParams clean_pq #' @param ancombc_res (required) the result of the ancombc_pq function #' For the moment only bimodal factors are possible. @@ -961,7 +961,7 @@ plot_ancombc_pq <- ) taxtable <- data.frame(physeq@tax_table) - taxtable$taxon <- taxa_names(physeq) + taxtable$taxa <- taxa_names(physeq) df <- left_join(signif_ancombc_res, taxtable, by = join_by("taxon" == "taxon")) @@ -1080,13 +1080,13 @@ taxa_only_in_one_level <- function(physeq, ################################################################################ #' Distribution of sequences across a factor for one taxon #' -#' @description -#' +#' @description +#' #' #' lifecycle-experimental #' -#' Focus on one taxon and one factor. -#' +#' Focus on one taxon and one factor. +#' #' @inheritParams clean_pq #' @param fact (required) Name of the factor in `physeq@sam_data` used to plot #' different lines diff --git a/R/blast.R b/R/blast.R index 25bd789f..eb1782c0 100644 --- a/R/blast.R +++ b/R/blast.R @@ -2,17 +2,17 @@ #' Blast some sequence against `refseq` slot of a \code{\link{phyloseq-class}} #' object. #' -#' @description -#' +#' @description +#' #' #' lifecycle-maturing -#' +#' #' Use the blast software. -#' +#' #' @param physeq (required): a \code{\link{phyloseq-class}} object obtained #' using the `phyloseq` package. #' @param seq2search (required) path to a fasta file defining the sequences -#' you want to blast against the ASV sequences from the physeq object. +#' you want to blast against the taxa (ASV, OTU) sequences from the physeq object. #' @param blastpath path to blast program #' @param id_cut (default: 90) cut of in identity percent to keep result #' @param bit_score_cut (default: 50) cut of in bit score to keep result @@ -184,13 +184,13 @@ blast_to_phyloseq <- function(physeq, #' Blast all sequence of `refseq` slot of a \code{\link{phyloseq-class}} #' object against a custom database. #' -#' @description -#' +#' @description +#' #' #' lifecycle-experimental #' #' Use the blast software. -#' +#' #' @inheritParams blast_to_phyloseq #' @param fasta_for_db path to a fasta file to make the blast database #' @param database path to a blast database @@ -338,23 +338,21 @@ blast_pq <- function(physeq, return(blast_tab) } - - ################################################################################ #' Filter undesirable taxa using blast against a custom database. #' -#' @description -#' +#' @description +#' #' #' lifecycle-experimental #' #' Use the blast software. -#' +#' #' @inheritParams blast_to_phyloseq #' @param fasta_for_db path to a fasta file to make the blast database #' @param database path to a blast database #' @param clean_pq (logical) -#' If set to TRUE, empty samples and empty ASV are discarded +#' If set to TRUE, empty samples and empty taxa (ASV, OTU) are discarded #' after filtering. #' @param id_filter (default: 90) cut of in identity percent to keep result #' @param bit_score_filter (default: 50) cut of in bit score to keep result @@ -432,20 +430,27 @@ filter_asv_blast <- function(physeq, } +################################################################################ +#' @rdname filter_asv_blast +#' @export +filter_taxa_blast <- filter_asv_blast +################################################################################ + + #' Blast some sequence against sequences from of a \code{\link{derep-class}} #' object. #' -#' @description +#' @description #' #' lifecycle-experimental #' #' Use the blast software. -#' +#' #' @inheritParams blast_to_phyloseq #' @param derep The result of `dada2::derepFastq()`. A list of `derep-class` #' object. #' @param seq2search (required) path to a fasta file defining the sequences -#' you want to blast against the ASV sequences from the physeq object. +#' you want to blast against the taxa (ASV, OTU) sequences from the physeq object. #' @param min_length_seq (default: 200) Removed sequences with less than #' `min_length_seq` from derep before blast. Set to 0 to discard filtering #' sequences by length. diff --git a/R/controls.R b/R/controls.R index 44efa048..0d473e8a 100644 --- a/R/controls.R +++ b/R/controls.R @@ -1,14 +1,14 @@ ################################################################################ #' Search for exact matching of sequences #' -#' @description -#' +#' @description +#' #' #' lifecycle-experimental #' #' Search for exact matching of sequences using complement, #' reverse and reverse-complement -#' +#' #' @inheritParams clean_pq #' @param seq2search A DNAStringSet object of sequences to search for. #' @return A list of data-frames for each input sequences with the name, @@ -129,13 +129,13 @@ dist_pos_control <- function(physeq, samples_names, method = "bray") { ################################################################################ #' Subset taxa using a taxa control or distribution based method #' -#' @description +#' @description #' #' lifecycle-experimental -#' -#' There is 3 main methods : discard taxa (i) using a control taxa (e.g. truffle root tips), -#' (ii) using a mixture models to detect bimodality in pseudo-abundance distribution or -#' (iii) using a minimum difference threshold pseudo-abundance. Each cutoff is defined at +#' +#' There is 3 main methods : discard taxa (i) using a control taxa (e.g. truffle root tips), +#' (ii) using a mixture models to detect bimodality in pseudo-abundance distribution or +#' (iii) using a minimum difference threshold pseudo-abundance. Each cutoff is defined at #' the sample level. #' #' @aliases subset_taxa_tax_control diff --git a/R/dada_phyloseq.R b/R/dada_phyloseq.R index 37983eb4..a31f62ad 100644 --- a/R/dada_phyloseq.R +++ b/R/dada_phyloseq.R @@ -4,28 +4,29 @@ if (getRversion() >= "2.15.1") { ################################################################################ #' Add dna in `refseq` slot of a `physeq` object using taxa names and renames taxa -#' using ASV_1, ASV_2, … +#' using prefix_taxa_names and number (default Taxa_1, Taxa_2 ...) +#' +#' @description #' -#' @description -#' #' #' lifecycle-stable #' #' Useful in targets bioinformatic pipeline. -#' +#' #' @inheritParams clean_pq +#' @param prefix_taxa_names (default "Taxa_"): the prefix of taxa names (eg. "ASV_" or "OTU_") #' #' @return A new \code{\link{phyloseq-class}} object with `refseq` slot and new #' taxa names #' @export -add_dna_to_phyloseq <- function(physeq) { +add_dna_to_phyloseq <- function(physeq, prefix_taxa_names = "Taxa_") { verify_pq(physeq) dna <- Biostrings::DNAStringSet(phyloseq::taxa_names(physeq)) names(dna) <- phyloseq::taxa_names(physeq) physeq <- phyloseq::merge_phyloseq(physeq, dna) phyloseq::taxa_names(physeq) <- - paste0("ASV_", seq(phyloseq::ntaxa(physeq))) + paste0(prefix_taxa_names, seq(phyloseq::ntaxa(physeq))) return(physeq) } ################################################################################ @@ -35,11 +36,11 @@ add_dna_to_phyloseq <- function(physeq) { ################################################################################ #' Clean phyloseq object by removing empty samples and taxa #' -#' @description +#' @description #' #' #' lifecycle-experimental -#' +#' #' In addition, this function check for discrepancy (and rename) between #' (i) taxa names in refseq, taxonomy table and otu_table and between #' (ii) sample names in sam_data and otu_table. @@ -58,13 +59,15 @@ add_dna_to_phyloseq <- function(physeq) { #' transpose the otu_table and set taxa_are_rows to false #' @param force_taxa_as_rows (logical) If true, if the taxa are columns #' transpose the otu_table and set taxa_are_rows to true -#' @param reorder_asv (logical) if TRUE the otu_table is ordered by the number of -#' sequences of ASV (descending order). Default to FALSE. -#' @param rename_asv (logical) if TRUE, ASV are renamed by their position -#' in the OTU_table (asv_1, asv_2, ...). Default to FALSE. If rename ASV is true, -#' the ASV names in verbose information can be misleading. +#' @param reorder_taxa (logical) if TRUE the otu_table is ordered by the number of +#' sequences of taxa (ASV, OTU) in descending order. Default to FALSE. +#' @param rename_taxa (logical) if TRUE, taxa (ASV, OTU) are renamed by their position +#' in the OTU_table and prefix_taxa_names param (by default: Taxa_1, Taxa_2, ...). +#' Default to FALSE. If rename taxa (ASV, OTU) is true, +#' the taxa (ASV, OTU) names in verbose information can be misleading. #' @param simplify_taxo (logical) if TRUE, correct the taxonomy_table using the #' `MiscMetabar::simplify_taxo()` function +#' @param prefix_taxa_names (default "Taxa_"): the prefix of taxa names (eg. "ASV_" or "OTU_") #' @return A new \code{\link{phyloseq-class}} object #' @export clean_pq <- function(physeq, @@ -75,9 +78,10 @@ clean_pq <- function(physeq, verbose = FALSE, force_taxa_as_columns = FALSE, force_taxa_as_rows = FALSE, - reorder_asv = FALSE, - rename_asv = FALSE, - simplify_taxo = FALSE) { + reorder_taxa = FALSE, + rename_taxa = FALSE, + simplify_taxo = FALSE, + prefix_taxa_names = "_Taxa") { if (clean_samples_names) { if (!is.null(physeq@refseq)) { if (sum(!names(physeq@refseq) %in% taxa_names(physeq)) > 0) { @@ -108,15 +112,15 @@ clean_pq <- function(physeq, verify_pq(physeq) - if (reorder_asv) { + if (reorder_taxa) { physeq <- reorder_taxa_pq( physeq, taxa_names(physeq)[order(taxa_sums(physeq), decreasing = TRUE)] ) } - if (rename_asv) { - taxa_names(physeq) <- paste0("ASV_", seq(1, ntaxa(physeq))) + if (rename_taxa) { + taxa_names(physeq) <- paste0(prefix_taxa_names, seq(1, ntaxa(physeq))) } if (sum(grepl("^0", sample_names(physeq)) > 0) && !silent) { @@ -482,6 +486,8 @@ track_wkflow_samples <- function(list_pq_obj, ...) { ########################################################################### + + ################################################################################ #' Recluster sequences of an object of class `physeq` #' or a list of DNA sequences @@ -492,7 +498,7 @@ track_wkflow_samples <- function(list_pq_obj, ...) { #' lifecycle-maturing #' #' This function use the `merge_taxa_vec` function to merge taxa into clusters. -#' +#' #' @inheritParams clean_pq #' @param dna_seq You may directly use a character vector of DNA sequences #' in place of physeq args. When physeq is set, dna sequences take the value of @@ -510,7 +516,7 @@ track_wkflow_samples <- function(list_pq_obj, ...) { #' @param id (default: 0.97) level of identity to cluster #' @param tax_adjust (Default 0) See the man page #' of [merge_taxa_vec()] for more details. -#' To conserved the taxonomic rank of the most abundant ASV, +#' To conserved the taxonomic rank of the most abundant taxa (ASV, OTU,...), #' set tax_adjust to 0 (default). For the moment only tax_adjust = 0 is #' robust #' @param vsearch_cluster_method (default: "--cluster_size) See other possible @@ -543,17 +549,17 @@ track_wkflow_samples <- function(list_pq_obj, ...) { #' #' @examples #' if (requireNamespace("DECIPHER")) { -#' asv2otu(data_fungi_mini) +#' postcluster_pq(data_fungi_mini) #' } #' \donttest{ #' if (requireNamespace("DECIPHER")) { -#' asv2otu(data_fungi_mini, method_clusterize = "longest") +#' postcluster_pq(data_fungi_mini, method_clusterize = "longest") #' #' if (MiscMetabar::is_swarm_installed()) { -#' d_swarm <- asv2otu(data_fungi_mini, method = "swarm") +#' d_swarm <- postcluster_pq(data_fungi_mini, method = "swarm") #' } #' if (MiscMetabar::is_vsearch_installed()) { -#' d_vs <- asv2otu(data_fungi_mini, method = "vsearch") +#' d_vs <- postcluster_pq(data_fungi_mini, method = "vsearch") #' } #' } #' } @@ -566,21 +572,21 @@ track_wkflow_samples <- function(list_pq_obj, ...) { #' @export #' @author Adrien Taudière -asv2otu <- function(physeq = NULL, - dna_seq = NULL, - nproc = 1, - method = "clusterize", - id = 0.97, - vsearchpath = "vsearch", - tax_adjust = 0, - vsearch_cluster_method = "--cluster_size", - vsearch_args = "--strand both", - keep_temporary_files = FALSE, - swarmpath = "swarm", - d = 1, - swarm_args = "--fastidious", - method_clusterize = "overlap", - ...) { +postcluster_pq <- function(physeq = NULL, + dna_seq = NULL, + nproc = 1, + method = "clusterize", + id = 0.97, + vsearchpath = "vsearch", + tax_adjust = 0, + vsearch_cluster_method = "--cluster_size", + vsearch_args = "--strand both", + keep_temporary_files = FALSE, + swarmpath = "swarm", + d = 1, + swarm_args = "--fastidious", + method_clusterize = "overlap", + ...) { if (inherits(physeq, "phyloseq")) { verify_pq(physeq) if (is.null(physeq@refseq)) { @@ -655,10 +661,19 @@ asv2otu <- function(physeq = NULL, ################################################################################ + +################################################################################ +#' @rdname postcluster_pq +#' @export +asv2otu <- postcluster_pq +################################################################################ + + + ################################################################################ #' Save phyloseq object in the form of multiple csv tables. #' -#' @description +#' @description #' #' lifecycle-maturing #' @@ -673,11 +688,11 @@ asv2otu <- function(physeq = NULL, #' @param sam_data_first (logical) if TRUE, put the sample data at the top of the table #' Only used if `one_file` and write_sam_data are both TRUE. #' @param clean_pq (logical) -#' If set to TRUE, empty samples are discarded after subsetting ASV -#' @param reorder_asv (logical) if TRUE the otu_table is ordered by the number of -#' sequences of ASV (descending order). Default to TRUE. Only possible if clean_pq +#' If set to TRUE, empty samples are discarded after subsetting taxa (ASV, OTU, ...) +#' @param reorder_taxa (logical) if TRUE the otu_table is ordered by the number of +#' sequences of taxa (ASV, OTU, ...) (descending order). Default to TRUE. Only possible if clean_pq #' is set to TRUE. -#' @param rename_asv reorder_asv (logical) if TRUE, ASV are renamed by their position +#' @param rename_taxa reorder_taxa (logical) if TRUE, taxa (ASV, OTU, ...) are renamed by their position #' in the OTU_table (asv_1, asv_2, ...). Default to FALSE. Only possible if clean_pq #' is set to TRUE. #' @param quote a logical value (default FALSE) or a numeric vector. @@ -705,8 +720,8 @@ write_pq <- function(physeq, write_sam_data = TRUE, sam_data_first = FALSE, clean_pq = TRUE, - reorder_asv = FALSE, - rename_asv = FALSE, + reorder_taxa = FALSE, + rename_taxa = FALSE, remove_empty_samples = TRUE, remove_empty_taxa = TRUE, clean_samples_names = TRUE, @@ -719,8 +734,8 @@ write_pq <- function(physeq, physeq <- clean_pq( physeq, - reorder_asv = reorder_asv, - rename_asv = rename_asv, + reorder_taxa = reorder_taxa, + rename_taxa = rename_taxa, remove_empty_samples = remove_empty_samples, remove_empty_taxa = remove_empty_taxa, clean_samples_names = clean_samples_names, @@ -922,10 +937,10 @@ save_pq <- function(physeq, path = NULL, ...) { #' Read phyloseq object from multiple csv tables and a phylogenetic tree #' in Newick format. #' -#' @description +#' @description #' #' lifecycle-maturing -#' +#' #' This is the reverse function of [write_pq()]. #' #' @param path (required) a path to the folder to read the phyloseq object @@ -1242,7 +1257,7 @@ mumu_pq <- function(physeq, ) otu_tab <- data.frame(unclass(taxa_as_rows(physeq)@otu_table)) - otu_tab <- cbind("ASV" = rownames(otu_tab), otu_tab) + otu_tab <- cbind("Taxa" = rownames(otu_tab), otu_tab) write.table( otu_tab, "otu_table.csv", @@ -1816,7 +1831,7 @@ add_funguild_info <- function(physeq, #' lifecycle-experimental #' #' Graphical function. -#' +#' #' @inheritParams clean_pq #' @param levels_order (Default NULL) A character vector to #' reorder the levels of guild. See examples. @@ -1893,9 +1908,9 @@ plot_guild_pq <- names(nb_seq_by_guild) <- guilds$Var1 guilds$seq <- nb_seq_by_guild - names(guilds) <- c("Guild", "nb_asv", "nb_seq") + names(guilds) <- c("Guild", "nb_taxa", "nb_seq") guilds$nb_seq <- as.numeric(guilds$nb_seq) - guilds$nb_asv <- as.numeric(guilds$nb_asv) + guilds$nb_taxa <- as.numeric(guilds$nb_taxa) guilds$Guild <- factor(as.vector(guilds$Guild), levels = guilds$Guild[order(guilds$nb_seq)] @@ -1915,7 +1930,7 @@ plot_guild_pq <- guilds, data.frame( "Guild" = "All ASV", - "nb_asv" = ntaxa(physeq), + "nb_taxa" = ntaxa(physeq), "nb_seq" = sum(physeq@otu_table), "colors" = "ALL" ) @@ -1942,7 +1957,7 @@ plot_guild_pq <- "lightpink4" ) ) + - geom_text(aes(label = nb_asv, x = log10(nb_seq) + 0.2), + geom_text(aes(label = nb_taxa, x = log10(nb_seq) + 0.2), family = "serif" ) + geom_text(aes(label = nb_seq, x = log10(nb_seq) / 2), @@ -2307,7 +2322,7 @@ add_info_to_sam_data <- function(physeq, #' lifecycle-stable #' #' Internally used in [vsearch_clustering()], [swarm_clustering()] and -#' [asv2otu()]. +#' [postcluster_pq()]. #' #' @inheritParams clean_pq #' @param dna_seq You may directly use a character vector of DNA sequences @@ -2390,7 +2405,7 @@ physeq_or_string_to_dna <- function(physeq = NULL, #' @param args_before_cutadapt (String) A one line bash command to run before #' to run cutadapt. For examples, "source ~/miniconda3/etc/profile.d/conda.sh && conda activate cutadaptenv &&" allow to bypass the conda init which asks to restart the shell #' -#' @return a list of command +#' @return a list of command #' @export #' @author Adrien Taudière #' @@ -2522,11 +2537,11 @@ cutadapt_remove_primers <- function(path_to_fastq, ################################################################################ #' List the taxa founded only in one given level of a modality #' -#' @description -#' +#' @description +#' #' #' lifecycle-experimental -#' +#' #' Given one modality name in sam_data and one level of the modality, #' return the taxa strictly specific of this level. #' @@ -2538,7 +2553,7 @@ cutadapt_remove_primers <- function(path_to_fastq, #' @param min_nb_samples_taxa (default 0 = no filter) The minimum number of samples per taxa #' #' @return A vector of taxa names -#' @export +#' @export #' #' @examples #' # Taxa present only in low height samples @@ -2776,7 +2791,7 @@ psmelt_samples_pq <- #' lifecycle-maturing #' #' Mainly for internal use. It is a special case of clean_pq function. -#' +#' #' @inheritParams clean_pq #' @author Adrien Taudière #' @export @@ -2803,7 +2818,7 @@ taxa_as_columns <- function(physeq) { #' lifecycle-maturing #' #' Mainly for internal use. It is a special case of clean_pq function. -#' +#' #' @inheritParams clean_pq #' @author Adrien Taudière #' @export @@ -2828,10 +2843,10 @@ taxa_as_rows <- function(physeq) { #' #' lifecycle-experimental #' -#' This function randomly draw the same number of samples for each modality of factor. -#' It is usefull to dissentangle the effect of different number of samples per modality +#' This function randomly draw the same number of samples for each modality of factor. +#' It is usefull to dissentangle the effect of different number of samples per modality #' on diversity. Internally used in [accu_plot_balanced_modality()]. -#' +#' #' @inheritParams clean_pq #' @param fact (required): The variable to rarefy. Must be present in #' the `sam_data` slot of the physeq object. diff --git a/R/krona.R b/R/krona.R index 7127171b..f9982d37 100644 --- a/R/krona.R +++ b/R/krona.R @@ -104,7 +104,7 @@ krona <- #' #' Need the installation of kronatools on the computer #' ([installation instruction](https://github.com/marbl/Krona/wiki/Installing)). -#' +#' #' Function merge_krona allows merging multiple html files in one interactive #' krona file #' diff --git a/R/miscellanous.R b/R/miscellanous.R index badcb55b..3bbe6959 100644 --- a/R/miscellanous.R +++ b/R/miscellanous.R @@ -6,7 +6,7 @@ #' #' #' lifecycle-maturing -#' +#' #' Useful to test if the results are not biased by sequences bias #' that appended during PCR or NGS pipeline. #' @@ -36,10 +36,10 @@ as_binary_otu_table <- function(physeq, min_number = 1) { #' Compute paired distances among matrix (e.g. otu_table) #' #' @description -#' +#' #' #' lifecycle-experimental -#' +#' #' May be used to verify ecological distance among samples. #' #' @note the first column of the first matrix is compare to the first column of @@ -120,7 +120,7 @@ all_object_size <- function() { ################################################################################ #' Simplify taxonomy by removing some unused characters such as "k__" -#' +#' #' @description #' #' @@ -148,7 +148,7 @@ simplify_taxo <- function(physeq, remove_space = TRUE) { ################################################################################ #' Get the extension of a file #' -#' @description +#' @description #' #' lifecycle-maturing #' @@ -169,12 +169,12 @@ get_file_extension <- function(file_path) { #' Convert a value (or a fraction x/y) in percentage #' #' @description -#' +#' #' #' lifecycle-maturing -#' +#' #' Mostly for internal use. -#' +#' #' @param x (required): value #' @param y if y is set, compute the division of x by y #' @param accuracy number of digits (number of digits after zero) @@ -206,7 +206,7 @@ perc <- function(x, y = NULL, accuracy = 0, add_symbol = FALSE) { #' @description #' #' lifecycle-experimental -#' +#' #' Use grep to count the number of line with only one '+' (fastq, fastq.gz) #' or lines starting with a '>' (fasta) to count sequences. #' @@ -357,7 +357,7 @@ transp <- function(col, alpha = 0.5) { #' lifecycle-experimental #' #' Useful to test a pipeline on small fastq files. -#' +#' #' @param fastq_files The path to one fastq file or a list of fastq files #' (see examples) #' @param folder_output The path to a folder for output files diff --git a/R/plot_functions.R b/R/plot_functions.R index f2daeaf9..4d6b6974 100644 --- a/R/plot_functions.R +++ b/R/plot_functions.R @@ -7,7 +7,7 @@ #' lifecycle-maturing #' #' Graphical representation of mt test. -#' +#' #' @param mt (required) Result of a mt test from the function [phyloseq::mt()]. #' @param alpha (default: 0.05) Choose the cut off p-value to plot taxa. #' @param color_tax (default: "Class") A taxonomic level to color the points. @@ -57,12 +57,12 @@ plot_mt <- ################################################################################ #' Plot accumulation curves for \code{\link{phyloseq-class}} object -#' -#' @description -#' +#' +#' @description +#' #' #' lifecycle-maturing -#' +#' #' Note that as most bioinformatic pipeline discard singleton, accumulation curves from metabarcoding #' cannot be interpreted in the same way as with conventional biodiversity sampling techniques. #' @@ -517,7 +517,7 @@ accu_samp_threshold <- function(res_accuplot, threshold = 0.95) { #' #' #' lifecycle-maturing -#' +#' #' Graphical representation of distribution of taxa across a factor. #' #' @inheritParams clean_pq @@ -530,7 +530,7 @@ accu_samp_threshold <- function(res_accuplot, threshold = 0.95) { #' number of OTUs (add_nb_seq = FALSE) #' @param rarefy (logical) Does each samples modalities need to be rarefy in #' order to compare them with the same amount of sequences? -#' @param min_prop_tax (default: 0.01) The minimum proportion for taxon to be +#' @param min_prop_tax (default: 0.01) The minimum proportion for taxa to be #' plotted #' @param min_prop_mod (default: 0.1) The minimum proportion for modalities #' to be plotted @@ -740,7 +740,7 @@ circle_pq <- #' lifecycle-maturing #' #' Graphical representation of distribution of taxa across Taxonomy and (optionnaly a factor). -#' +#' #' @inheritParams clean_pq #' @param fact Name of the factor to cluster samples by modalities. #' Need to be in \code{physeq@sam_data}. @@ -748,7 +748,7 @@ circle_pq <- #' @param add_nb_seq Represent the number of sequences or the #' number of OTUs (add_nb_seq = FALSE). Note that plotting the number of #' sequences is slower. -#' @param min_prop_tax (default: 0) The minimum proportion for taxon to be +#' @param min_prop_tax (default: 0) The minimum proportion for taxa to be #' plotted. EXPERIMENTAL. For the moment each links below the min.prop. #' tax is discard from the sankey network resulting in sometimes weird plot. #' @param tax2remove a vector of taxonomic groups to remove from the analysis @@ -961,7 +961,7 @@ sankey_pq <- #' #' #' lifecycle-maturing -#' +#' #' Graphical representation of distribution of taxa across combined modality of a factor. #' #' @inheritParams clean_pq @@ -1404,7 +1404,7 @@ multiplot <- ################################################################################ #' Graphical representation of hill number 0, 1 and 2 across a factor -#' +#' #' @description #' #' @@ -1421,7 +1421,7 @@ multiplot <- #' model in order to correct for uneven sampling depth. This correction #' is only done before tuckey HSD plot and do not change the hill number #' computed. -#' +#' #' @inheritParams clean_pq #' @param fact (required): The variable to test. Must be present in #' the `sam_data` slot of the physeq object. @@ -1752,7 +1752,7 @@ ggbetween_pq <- } df <- cbind( - "nb_asv" = sample_sums(physeq@otu_table), + "nb_taxa" = sample_sums(physeq@otu_table), physeq@sam_data, "hill_0" = vegan::renyi(physeq@otu_table, scales = 0, hill = TRUE), "hill_1" = vegan::renyi(physeq@otu_table, scales = 1, hill = TRUE), @@ -1794,7 +1794,7 @@ ggbetween_pq <- #' lifecycle-maturing #' #' Graphical representation of a phyloseq object. -#' +#' #' @inheritParams clean_pq #' @param add_info Does the bottom down corner contain #' extra informations? @@ -1950,7 +1950,7 @@ summary_plot_pq <- function(physeq, min(taxa_sums(otu_tab)), " (", sum(taxa_sums(otu_tab) == min(taxa_sums(otu_tab))), - " ASV)", + " Taxons)", "\n", "Min seq length: ", ifelse( @@ -2039,16 +2039,21 @@ rotl_pq <- function(physeq, taxa_names_rotl <- taxa_names_rotl[!grepl("NA", taxa_names_rotl)] taxa_names_rotl <- c(unclass(gsub("_", " ", taxa_names_rotl))) - resolved_names <- rotl::tnrs_match_names(taxa_names_rotl) + resolved_names <- httr::with_config( + httr::config(ssl_verifypeer = FALSE), + rotl::tnrs_match_names(taxa_names_rotl) + ) resolved_names <- resolved_names[resolved_names$flags == "", ] clean_taxa_names_rotl <- taxa_names_rotl[taxa_names_rotl %in% resolved_names$unique_name] - resolved_names2 <- - rotl::tnrs_match_names(clean_taxa_names_rotl, context_name = context_name) + resolved_names2 <- httr::with_config( + httr::config(ssl_verifypeer = FALSE), rotl::tnrs_match_names(clean_taxa_names_rotl, context_name = context_name) + ) - tr <- - rotl::tol_induced_subtree(ott_ids = rotl::ott_id(resolved_names2)) + tr <- httr::with_config( + httr::config(ssl_verifypeer = FALSE), rotl::tol_induced_subtree(ott_ids = rotl::ott_id(resolved_names2)) + ) return(tr) } ################################################################################ @@ -2137,7 +2142,7 @@ heat_tree_pq <- function(physeq, taxonomic_level = NULL, ...) { #' lifecycle-maturing #' #' Graphical representation of distribution of taxa across two samples. -#' +#' #' @inheritParams clean_pq #' @param fact (default: NULL) Name of the factor in `physeq@sam_data`. #' If left to NULL use the `left_name` and `right_name` parameter as modality. @@ -2516,7 +2521,7 @@ multi_biplot_pq <- function(physeq, #' [merge_samples2()] function. #' Need to be in \code{physeq@sam_data} #' @param type If "nb_seq" (default), the number of sequences is -#' used in plot. If "nb_asv", the number of ASV is plotted. If both, +#' used in plot. If "nb_taxa", the number of ASV is plotted. If both, #' return a list of two plots, one for nbSeq and one for ASV. #' @param taxa_fill (default: 'Order'): Name of the taxonomic rank of interest #' @param print_values (logical, default TRUE): Do we print some values on plot? @@ -2632,12 +2637,12 @@ plot_tax_pq <- ) } } - if (type %in% c("nb_asv", "both")) { + if (type %in% c("nb_taxa", "both")) { mdf <- psmelt(as_binary_otu_table(physeq)) mdf <- mdf %>% mutate(percent = Abundance / sum(Abundance)) - p_asv <- + p_taxa <- ggplot(mdf, aes(x = .data[[fact]], y = .data[["Abundance"]], fill = .data[[taxa_fill]])) + geom_bar( aes(fill = .data[[taxa_fill]]), @@ -2666,8 +2671,8 @@ plot_tax_pq <- ) ) } - if (type %in% c("nb_asv", "both")) { - p_asv <- p_asv + + if (type %in% c("nb_taxa", "both")) { + p_taxa <- p_taxa + labs( title = paste("Total nb of sequences: ", sum(physeq_old@otu_table)), subtitle = paste0( @@ -2685,10 +2690,10 @@ plot_tax_pq <- if (type == "nb_seq") { return(p_seq) - } else if (type == "nb_asv") { - return(p_asv) + } else if (type == "nb_taxa") { + return(p_taxa) } else if (type == "both") { - return(list(p_seq, p_asv)) + return(list(p_seq, p_taxa)) } } ################################################################################ @@ -2872,7 +2877,7 @@ tsne_pq <- #' Plot a tsne low dimensional representation of a phyloseq object #' #' @description -#' +#' #' #' lifecycle-experimental #' @@ -2972,8 +2977,8 @@ plot_tsne_pq <- function(physeq, ################################################################################ #' Scaling with ranked subsampling (SRS) curve of phyloseq object -#' -#' @description +#' +#' @description #' #' lifecycle-experimental #' @@ -3015,7 +3020,7 @@ SRS_curve_pq <- function(physeq, clean_pq = FALSE, ...) { #' lifecycle-experimental #' #' Note that this function is quite time-consuming due to high dimensionality in metabarcoding community matrix. -#' +#' #' @inheritParams clean_pq #' @param merge_sample_by (default: NULL) if not `NULL` samples of #' physeq are merged using the vector set by `merge_sample_by`. This @@ -3268,10 +3273,10 @@ upset_pq <- function(physeq, psm2, intersect = samp_names, base_annotations = list(), - annotations = list("ASV" = ( + annotations = list("Taxa" = ( ggplot(mapping = aes(fill = .data[[taxa_fill]])) + geom_bar() + - ylab("ASV per Class") + + ylab("Taxa per Class") + theme(legend.key.size = unit(0.2, "cm")) + theme(axis.text = element_text(size = 12)) )), @@ -3287,10 +3292,10 @@ upset_pq <- function(physeq, ################################################################################ #' Test for differences between intersections #' -#' @description +#' @description #' #' lifecycle-experimental -#' +#' #' See [upset_pq()] to plot upset. #' #' @inheritParams upset_pq @@ -3500,7 +3505,7 @@ diff_fct_diff_class <- #' lifecycle-experimental #' #' Graphical representation of distribution of taxonomy, optionnaly across a factor. -#' +#' #' @inheritParams clean_pq #' @param fact Name of the factor to cluster samples by modalities. #' Need to be in \code{physeq@sam_data}. @@ -3560,8 +3565,8 @@ tax_bar_pq <- #' #' lifecycle-experimental #' -#' Graphical representation of distribution of taxa across a factor using ridges. -#' +#' Graphical representation of distribution of taxa across a factor using ridges. +#' #' @inheritParams clean_pq #' @param fact (required) Name of the factor in `physeq@sam_data` used to plot #' different lines @@ -4047,7 +4052,7 @@ ggscatt_pq <- function(physeq, #' which samples to merge using [merge_samples2()] function. #' Need to be in \code{physeq@sam_data}. #' Need to be use when you want to wrap by factor the final plot -#' with the number of taxa (type="nb_asv") +#' with the number of taxa (type="nb_taxa") #' @param by_sample (logical) If FALSE (default), sample information is not taking #' into account, so the taxonomy is studied globally. If fact is not NULL, by_sample #' is automatically set to TRUE. @@ -4055,7 +4060,7 @@ ggscatt_pq <- function(physeq, #' samples using [phyloseq::rarefy_even_depth()] function. #' @param fact (required) Name of the factor in `physeq@sam_data` used to plot the last column #' @param type If "nb_seq" (default), the number of sequences is -#' used in plot. If "nb_asv", the number of ASV is plotted. +#' used in plot. If "nb_taxa", the number of ASV is plotted. #' @param width (passed on to [ggalluvial::geom_flow()]) the width of each stratum, #' as a proportion of the distance between axes. Defaults to 1/3. #' @param min.size (passed on to [ggfittext::geom_fit_text()]) Minimum font size, @@ -4077,14 +4082,14 @@ ggscatt_pq <- function(physeq, #' } #' \donttest{ #' if (requireNamespace("ggalluvial")) { -#' ggaluv_pq(data_fungi_mini, type = "nb_asv") +#' ggaluv_pq(data_fungi_mini, type = "nb_taxa") #' -#' ggaluv_pq(data_fungi_mini, wrap_factor = "Height", by_sample = TRUE, type = "nb_asv") + +#' ggaluv_pq(data_fungi_mini, wrap_factor = "Height", by_sample = TRUE, type = "nb_taxa") + #' facet_wrap("Height") #' #' ggaluv_pq(data_fungi_mini, #' width = 0.9, min.size = 10, -#' type = "nb_asv", taxa_ranks = c("Phylum", "Class", "Order", "Family", "Genus") +#' type = "nb_taxa", taxa_ranks = c("Phylum", "Class", "Order", "Family", "Genus") #' ) + #' coord_flip() + scale_x_discrete(limits = rev) #' } @@ -4125,10 +4130,10 @@ ggaluv_pq <- function(physeq, merge_samples2(physeq, group = rep("all_samples_together", nsamples(physeq))) } - if (type == "nb_asv") { + if (type == "nb_taxa") { physeq <- as_binary_otu_table(physeq) } else if (type != "nb_seq") { - stop("Type must be eiter nb_seq or nb_asv") + stop("Type must be eiter nb_seq or nb_taxa") } psm_samp <- diff --git a/R/speedyseq_functions.R b/R/speedyseq_functions.R index 41503cba..fa26d9e1 100644 --- a/R/speedyseq_functions.R +++ b/R/speedyseq_functions.R @@ -43,7 +43,7 @@ #' @return A new phyloseq-class, otu_table, tax_table, XStringset or #' sam_data object depending on the class of the x param #' @seealso -#' Function in MiscMetabar that use this function: [asv2otu()] +#' Function in MiscMetabar that use this function: [postcluster_pq()] #' #' [base::rowsum()] #' diff --git a/R/table_functions.R b/R/table_functions.R index a2dd759d..04e79bed 100644 --- a/R/table_functions.R +++ b/R/table_functions.R @@ -1,13 +1,13 @@ ################################################################################ #' Make a datatable with the taxonomy of a \code{\link{phyloseq-class}} object -#' -#' @description -#' +#' +#' @description +#' #' #' lifecycle-maturing #' #' An interactive table for phyloseq taxonomy. -#' +#' #' @inheritParams clean_pq #' @param abundance (default: TRUE) Does the number of sequences is print #' @param taxonomic_level (default: NULL) a vector of selected taxonomic @@ -100,8 +100,8 @@ tax_datatable <- function(physeq, #' @description #' #' -#' lifecycle-experimental -#' +#' lifecycle-experimental +#' #' For the moment refseq slot need to be not Null. #' #' @inheritParams clean_pq @@ -277,12 +277,12 @@ compare_pairs_pq <- function(physeq = NULL, ################################################################################ #' Create a visualization table to describe taxa distribution across a modality #' -#' @description +#' @description #' #' lifecycle-maturing #' #' Allow to visualize a table with graphical input. -#' +#' #' @inheritParams clean_pq #' @param modality (required) The name of a column present in the `@sam_data` slot #' of the physeq object. Must be a character vector or a factor. diff --git a/R/targets_misc.R b/R/targets_misc.R index 994e8f10..7d0139a4 100644 --- a/R/targets_misc.R +++ b/R/targets_misc.R @@ -6,7 +6,7 @@ #' lifecycle-maturing #' #' Useful for targets bioinformatic pipeline. -#' +#' #' @param path path to files (required) #' @param paired_end do you have paired_end files? (default TRUE) #' @param pattern a pattern to filter files (passed on to list.files function). @@ -61,7 +61,7 @@ list_fastq_files <- #' lifecycle-experimental #' #' Useful for targets bioinformatic pipeline. -#' +#' #' @inheritParams clean_pq #' @param names_of_samples (required) The new names of the samples #' @@ -223,7 +223,7 @@ filter_trim <- #' #' #' lifecycle-maturing -#' +#' #' Useful for targets bioinformatic pipeline. #' #' @param file_path (required) a path to the sample_data file @@ -265,7 +265,7 @@ sample_data_with_new_names <- function(file_path, #' lifecycle-maturing #' #' Useful for targets bioinformatic pipeline. -#' +#' #' @param phyloseq_component (required) one of otu_table or sam_data slot of a #' phyloseq-class object #' @param names_of_samples (required) A vector of samples names diff --git a/R/vsearch.R b/R/vsearch.R index 91e60e49..7d8f8ef3 100644 --- a/R/vsearch.R +++ b/R/vsearch.R @@ -2,12 +2,12 @@ #' Search for a list of sequence in a fasta file against physeq reference #' sequences using [vsearch](https://github.com/torognes/vsearch) #' -#' @description +#' @description #' #' lifecycle-maturing #' #' Use of VSEARCH software. -#' +#' #' @inheritParams clean_pq #' @param seq2search (required if path_to_fasta is NULL) Either (i) a DNAstringSet object #' or (ii) a character vector that will be convert to DNAstringSet using @@ -137,7 +137,7 @@ vs_search_global <- function(physeq, #' lifecycle-maturing #' #' A wrapper of SWARM software. -#' +#' #' @inheritParams clean_pq #' @param dna_seq NOT WORKING FOR THE MOMENT #' You may directly use a character vector of DNA sequences @@ -195,7 +195,7 @@ vs_search_global <- function(physeq, #' sequences_ex_swarm <- swarm_clustering( #' dna_seq = sequences_ex #' ) -#' @seealso [asv2otu()], [vsearch_clustering()] +#' @seealso [postcluster_pq()], [vsearch_clustering()] #' @references #' SWARM can be downloaded from #' \url{https://github.com/torognes/swarm}. @@ -343,7 +343,7 @@ swarm_clustering <- function(physeq = NULL, #' lifecycle-maturing #' #' A wrapper of VSEARCH software. -#' +#' #' @inheritParams clean_pq #' @param dna_seq You may directly use a character vector of DNA sequences #' in place of physeq args. When physeq is set, dna sequences take the value of @@ -370,7 +370,7 @@ swarm_clustering <- function(physeq = NULL, #' - cluster.fasta (centroid if method = "vsearch") #' - temp.uc (clusters if method = "vsearch") #' -#' @seealso [asv2otu()], [swarm_clustering()] +#' @seealso [postcluster_pq()], [swarm_clustering()] #' @details This function use the [merge_taxa_vec()] function to #' merge taxa into clusters. By default tax_adjust = 0. See the man page #' of [merge_taxa_vec()]. @@ -485,12 +485,12 @@ vsearch_clustering <- function(physeq = NULL, #' Search for a list of sequence in an object to remove chimera taxa #' using [vsearch](https://github.com/torognes/vsearch) #' -#' @description +#' @description #' #' lifecycle-experimental #' #' Use the VSEARCH software. -#' +#' #' @param object (required) A phyloseq-class object or one of dada, derep, #' data.frame or list coercible to sequences table using the #' function [dada2::makeSequenceTable()] @@ -614,12 +614,12 @@ chimera_removal_vs <- #' Detect for chimera taxa using [vsearch](https://github.com/torognes/vsearch) #' -#' @description +#' @description #' #' lifecycle-experimental #' #' Use the VSEARCH software. -#' +#' #' @param seq2search (required) a list of DNA sequences coercible by function #' [Biostrings::DNAStringSet()] #' @param nb_seq (required) a numeric vector giving the number of sequences for @@ -666,7 +666,7 @@ chimera_detection_vs <- function(seq2search, keep_temporary_files = FALSE) { dna_raw <- Biostrings::DNAStringSet(seq2search) names(dna_raw) <- paste0( - "ASV", seq(1, length(seq2search)), + "Taxa", seq(1, length(seq2search)), ";size=", nb_seq ) diff --git a/cran-comments.md b/cran-comments.md index 02c46a26..27ee0ffd 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,3 +1,6 @@ -- There are 5 \dontrun in examples for functions relying on external software (namely blast, cutadapt, mumu and krona x2). +Problem raised by CRAN Team the 22/08/2024 : -- I spent long time to simplify data and example, but in total, the run time is still quite long. It is intrinsically due to the nature of metagenomic big data. \ No newline at end of file +'Packages which use Internet resources should fail gracefully with an informative message +if the resource is not available or has changed (and not give a check warning nor error).' + +Problem resolved in issue https://github.com/adrientaudiere/MiscMetabar/issues/97 \ No newline at end of file diff --git a/docs/404.html b/docs/404.html index bf8e713d..46be9e99 100644 --- a/docs/404.html +++ b/docs/404.html @@ -14,90 +14,75 @@ - - - + + + - Skip to contents - -