Skip to content

Commit

Permalink
Merge pull request #32 from adrientaudiere/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
adrientaudiere authored Oct 20, 2023
2 parents 6234de6 + c341b8d commit 2c4bce7
Show file tree
Hide file tree
Showing 28 changed files with 1,110 additions and 350 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
Package: MiscMetabar
Type: Package
Title: Miscellaneous functions for metabarcoding analysis
Version: 0.42
Version: 0.5
Author: Adrien Taudière <[email protected]>
Maintainer: Adrien Taudière <[email protected]>
Description: Functions to help analyze and visualize metabarcoding data. Mainly build on the top of phyloseq, dada2 and targets R packages.
License: MIT + file LICENSE
License: GNU AGPLv3
Encoding: UTF-8
LazyData: true
Depends:
Expand Down
12 changes: 11 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
# MiscMetabar 0.43 (in development)
# MiscMetabar 0.51 (in development)


# MiscMetabar 0.5

- Phyloseq object are converted in taxa_are_columns in the ggvenn_pq() thanks to issue #31

## BREAKING CHANGES

- Rename param `log_10` in function `biplot_pq()` into `log10trans`
- Rename param `log10trans` in function `circle_pq()` into `log10trans`

# MiscMetabar 0.42

- Add argument `one_plot` (default FALSE, same behavior than before) to `hill_pq` function in order to return an unique ggplot2 object with the four plots inside.
Expand Down
113 changes: 79 additions & 34 deletions R/plot_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -198,9 +198,6 @@ accu_plot <-
if (is.null(step)) {
step <- round(max(tot) / 30, 0)
}
if (is.null(step)) {
step <- 1
}

n_max <- seq(1, max(tot), by = step)
out <- lapply(seq_len(nr), function(i) {
Expand Down Expand Up @@ -385,7 +382,7 @@ circle_pq <-
start_degree = NULL,
row_col = NULL,
grid_col = NULL,
log10trans = FALSE,
log10transform = FALSE,
...) {
if (!inherits(physeq, "phyloseq")) {
stop("physeq must be an object of class 'phyloseq'")
Expand Down Expand Up @@ -494,7 +491,7 @@ circle_pq <-
}
otu_table_ech <- o_t_e_interm

if (log10trans) {
if (log10transform) {
otu_table_ech <- apply(otu_table_ech, 2, function(x) {
log10(1 + x)
})
Expand Down Expand Up @@ -1014,6 +1011,15 @@ ggvenn_pq <- function(physeq = NULL,
physeq@sam_data[[fact]] <- as.factor(physeq@sam_data[[fact]])
}

physeq <- clean_pq(
physeq,
force_taxa_as_columns = TRUE,
remove_empty_samples = FALSE,
remove_empty_taxa = FALSE,
clean_samples_names = FALSE,
silent = TRUE
)

if (rarefy_before_merging) {
physeq <- rarefy_even_depth(physeq)
physeq <- clean_pq(physeq)
Expand Down Expand Up @@ -1168,20 +1174,22 @@ multiplot <-
#' Note that this function use a sqrt of the read numbers in the linear
#' model in order to correct for uneven sampling depth.
#' @inheritParams clean_pq
#' @param variable (required): The variable to test
#' @param variable (required): The variable to test. Must be present in
#' the `sam_data` slot of the physeq object.
#' @param color_fac (optional): The variable to color the barplot
#' @param letters (optional, default=FALSE): If set to TRUE, the plot
#' show letters based on p-values for comparison. Use the
#' \code{\link[multcompView]{multcompLetters}} function from the package
#' multcompLetters. BROKEN for the moment.
#' show letters based on p-values for comparison. Use the
#' \code{\link[multcompView]{multcompLetters}} function from the package
#' multcompLetters. BROKEN for the moment. Note that na values in Tthe
#' variable param need to be removed (see examples) to use letters.
#' @param add_points (logical): add jitter point on boxplot
#' @param add_info (logical, default TRUE) Do we add a subtitle with
#' information about the number of samples per modality.
#' @param one_plot (logical, default=FALSE) If true, return a unique
#' information about the number of samples per modality ?
#' @param one_plot (logical, default FALSE) If TRUE, return a unique
#' plot with the four plot inside using the patchwork package.
#' Note that if letters is TRUE, tuckey HSD results are discarded from
#' the unique plot. In that case, use one_plot = FALSE to see the tuckey
#' HSD results in the fourth plot of the resulting list.
#' Note that if letters and one_plot are both TRUE, tuckey HSD results
#' are discarded from the unique plot. In that case, use one_plot = FALSE
#' to see the tuckey HSD results in the fourth plot of the resulting list.
#' @param correction_for_sample_size (logical, default TRUE) This function
#' use a sqrt of the read numbers in the linear model in order to
#' correct for uneven sampling depth.
Expand Down Expand Up @@ -1310,7 +1318,6 @@ hill_pq <-
)

### HILL 1

data_h1 <-
p_var$data[grep("Hill Number 1", p_var$data[, 5]), ]
data_h1_pval <- data_h1$p.adj
Expand Down Expand Up @@ -1339,7 +1346,6 @@ hill_pq <-
)

### HILL 2

data_h2 <-
p_var$data[grep("Hill Number 2", p_var$data[, 5]), ]
data_h2_pval <- data_h2$p.adj
Expand Down Expand Up @@ -1641,10 +1647,10 @@ rotl_pq <- function(physeq,
#' Heat tree from `metacoder` package using `tax_table` slot
#' @description
#' `r lifecycle::badge("maturing")`
#'
#' Note that the number of ASV is store under the name `n_obs`
#'
#' Note that the number of ASV is store under the name `n_obs`
#' and the number of sequences under the name `nb_sequences`
#'
#'
#' @inheritParams clean_pq
#' @param taxonomic_level (default: NULL): a vector of selected
#' taxonomic level using
Expand All @@ -1657,7 +1663,6 @@ rotl_pq <- function(physeq,
#'
#' @examples
#' \dontrun{
#' library("metacoder")
#' data("GlobalPatterns")
#' GPsubset <- subset_taxa(
#' GlobalPatterns,
Expand All @@ -1680,26 +1685,26 @@ rotl_pq <- function(physeq,
#' node_size_trans = "log10 area"
#' )
#' }
#'
#' #' heat_tree_pq(GPsubset,
#'
#' heat_tree_pq(GPsubset,
#' node_size = n_seq,
#' node_color = n_obs,
#' node_label = taxon_names,
#' tree_label = taxon_names,
#' node_size_trans = "log10 area"
#' )
#' }
#'
heat_tree_pq <- function(physeq, taxonomic_level = NULL, ...) {
library("metacoder")
if (!is.null(taxonomic_level)) {
physeq@tax_table <- physeq@tax_table[, taxonomic_level]
}
data_metacoder <- metacoder::parse_phyloseq(physeq)

data_metacoder <- parse_phyloseq(physeq)
data_metacoder$data$taxon_counts <- calc_taxon_abund(data_metacoder, data = "otu_table")
data_metacoder$data$taxon_counts$nb_sequences <- rowSums(data_metacoder$data$taxon_counts[, -1])

p <- metacoder::heat_tree(data_metacoder, ...)
p <- heat_tree(data_metacoder, ...)

return(p)
}
Expand Down Expand Up @@ -1727,7 +1732,7 @@ heat_tree_pq <- function(physeq, taxonomic_level = NULL, ...) {
#' @param right_name_col Color for the right name
#' @param right_fill Fill fo the right sample.
#' @param right_col Color fo the right sample.
#' @param log_10 (logical) Does abundancy is log10 transformed ?
#' @param log10trans (logical) Does abundancy is log10 transformed ?
#' @param nudge_y A parameter to control the y position of abundancy values.
#' If a vector of two values are set. The first value is for the left side.
#' and the second value for the right one. If one value is set,
Expand Down Expand Up @@ -1767,7 +1772,7 @@ biplot_pq <- function(physeq,
right_name_col = "#1d2949",
right_fill = "#1d2949",
right_col = "#1d2949",
log_10 = TRUE,
log10trans = TRUE,
nudge_y = c(0.3, 0.3),
geom_label = FALSE,
text_size = 3,
Expand Down Expand Up @@ -1856,7 +1861,7 @@ biplot_pq <- function(physeq,
if (length(nudge_y) == 1) {
nudge_y <- c(nudge_y, nudge_y)
}
if (log_10) {
if (log10trans) {
mdf$Ab <- log10(mdf$Abundance + 1)
} else {
mdf$Ab <- mdf$Abundance
Expand Down Expand Up @@ -2060,7 +2065,7 @@ multi_biplot_pq <- function(physeq,


################################################################################
#' Plot taxonomic distribution in function of a factor.
#' Plot taxonomic distribution in function of a factor with stacked bar in %
#'
#' @description
#' `r lifecycle::badge("experimental")`
Expand Down Expand Up @@ -2094,6 +2099,7 @@ multi_biplot_pq <- function(physeq,
#' @return A ggplot2 graphic
#' @export
#' @author Adrien Taudière
#' @seealso [tax_bar_pq()] and [multitax_bar_pq()]
#' @examples
#' data(data_fungi_sp_known)
#' plot_tax_pq(data_fungi_sp_known,
Expand Down Expand Up @@ -2836,12 +2842,51 @@ upset_test_pq <-
#' - FALSE_if_not_all_TRUE
#' @param character_method : A method for character vector (and factor). One of :
#' - unique_or_na (default)
#' - more frequent
#' - more_frequent
#' - more_frequent_without_equality
#' @param ... other arguments passed on to the numeric function (ex. na.rm=TRUE)
#' @return a single value
#' @export
#'
#' @examples
#' diff_fct_diff_class(
#' data_fungi@sam_data$Sample_id,
#' numeric_fonction = sum,
#' na.rm = TRUE
#' )
#' diff_fct_diff_class(
#' data_fungi@sam_data$Time,
#' numeric_fonction = mean,
#' na.rm = TRUE
#' )
#' diff_fct_diff_class(
#' data_fungi@sam_data$Height == "Low",
#' logical_method = "TRUE_if_one"
#' )
#' diff_fct_diff_class(
#' data_fungi@sam_data$Height == "Low",
#' logical_method = "NA_if_not_all_TRUE"
#' )
#' diff_fct_diff_class(
#' data_fungi@sam_data$Height == "Low",
#' logical_method = "FALSE_if_not_all_TRUE"
#' )
#' diff_fct_diff_class(
#' data_fungi@sam_data$Height,
#' character_method = "unique_or_na"
#' )
#' diff_fct_diff_class(
#' c("IE", "IE"),
#' character_method = "unique_or_na"
#' )
#' diff_fct_diff_class(
#' c("IE", "IE", "TE", "TE"),
#' character_method = "more_frequent"
#' )
#' diff_fct_diff_class(
#' c("IE", "IE", "TE", "TE"),
#' character_method = "more_frequent_without_equality"
#' )
#' @author Adrien Taudière
diff_fct_diff_class <-
function(x,
Expand All @@ -2857,7 +2902,7 @@ diff_fct_diff_class <-
} else if (character_method == "more_frequent") {
return(names(sort(table(x), decreasing = TRUE)[1]))
} else if (character_method == "more_frequent_without_equality") {
if (names(sort(table(x), decreasing = TRUE)[1]) == names(sort(table(x), decreasing = TRUE)[2])) {
if (sort(table(x), decreasing = TRUE)[1] == sort(table(x), decreasing = TRUE)[2]) {
return(NA)
} else {
return(names(sort(table(x), decreasing = TRUE)[1]))
Expand Down Expand Up @@ -2901,7 +2946,7 @@ diff_fct_diff_class <-


################################################################################
#' iNterpolation and EXTrapolation of Hill numbers (with iNEXT)
#' Plot the distribution of sequences or ASV in one taxonomic levels
#' @description
#' `r lifecycle::badge("experimental")`
#'
Expand All @@ -2925,7 +2970,7 @@ diff_fct_diff_class <-
#' tax_bar_pq(data_fungi, taxa = "Class", percent_bar = TRUE)
#' tax_bar_pq(data_fungi, taxa = "Class", fact = "Time")
#' @author Adrien Taudière
#'
#' @seealso [plot_tax_pq()] and [multitax_bar_pq()]
#'
tax_bar_pq <- function(physeq, fact = "Sample", taxa = "Order", percent_bar = FALSE, nb_seq = TRUE) {
if (!nb_seq) {
Expand Down
Loading

0 comments on commit 2c4bce7

Please sign in to comment.