Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Balance parameter added to summary plot #4

Merged
merged 8 commits into from
Nov 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 34 additions & 9 deletions R/maaslin3.R
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ args$max_pngs <- 30
args$cores <- 1
args$save_models <- FALSE
args$reference <- NULL
args$summary_plot_balanced <- FALSE

#### end ####

Expand Down Expand Up @@ -582,6 +583,19 @@ options <-
)
)

options <-
optparse::add_option(
options,
c("--summary_plot_balanced"),
type = "logical",
dest = "make_summary_plot_balanced",
default = args$summary_plot_balanced,
help = paste(
"If coef_plot_vars is selected this will",
"select balanced top features [ Default: %default ]"
)
)

option_not_valid_error <- function(message, valid_options) {
logging::logerror(paste(message, ": %s"), toString(valid_options))
stop("Option not valid", call. = FALSE)
Expand Down Expand Up @@ -765,7 +779,8 @@ maaslin_log_arguments <- function(input_data,
max_pngs = 30,
cores = 1,
save_models = FALSE,
verbosity = 'FINEST') {
verbosity = 'FINEST',
summary_plot_balanced=FALSE) {
# Allow for lower case variables
normalization <- toupper(normalization)
transform <- toupper(transform)
Expand Down Expand Up @@ -873,6 +888,8 @@ maaslin_log_arguments <- function(input_data,
logging::logdebug("Augment: %s", augment)
logging::logdebug("Evaluate only: %s", evaluate_only)
logging::logdebug("Cores: %d", cores)
logging::logdebug("Balanced Summary plot: %s", summary_plot_balanced)


maaslin_check_arguments(
feature_specific_covariate,
Expand Down Expand Up @@ -2307,7 +2324,8 @@ maaslin_plot_results <- function(output,
coef_plot_vars = NULL,
heatmap_vars = NULL,
plot_associations = TRUE,
max_pngs = 30) {
max_pngs = 30,
balanced = FALSE) {
# create an output folder and figures folder if it does not exist
if (!file.exists(output)) {
logging::loginfo("Creating output folder")
Expand Down Expand Up @@ -2354,7 +2372,8 @@ maaslin_plot_results <- function(output,
coef_plot_vars = coef_plot_vars,
heatmap_vars = heatmap_vars,
median_comparison_abundance = median_comparison_abundance,
median_comparison_prevalence = median_comparison_prevalence
median_comparison_prevalence = median_comparison_prevalence,
balanced = balanced
)
}

Expand Down Expand Up @@ -2416,7 +2435,8 @@ maaslin_plot_results_from_output <- function(output,
coef_plot_vars = NULL,
heatmap_vars = NULL,
plot_associations = TRUE,
max_pngs = 30) {
max_pngs = 30,
balanced=FALSE) {

# create an output folder and figures folder if it does not exist
if (!file.exists(output)) {
Expand Down Expand Up @@ -2469,7 +2489,8 @@ maaslin_plot_results_from_output <- function(output,
coef_plot_vars = coef_plot_vars,
heatmap_vars = heatmap_vars,
median_comparison_abundance = median_comparison_abundance,
median_comparison_prevalence = median_comparison_prevalence
median_comparison_prevalence = median_comparison_prevalence,
balanced = balanced
)
}

Expand Down Expand Up @@ -2602,7 +2623,8 @@ maaslin3 <- function(input_data,
max_pngs = 30,
cores = 1,
save_models = FALSE,
verbosity = 'FINEST') {
verbosity = 'FINEST',
summary_plot_balanced=FALSE) {
logging::logReset()

# Allow for lower case variables
Expand Down Expand Up @@ -2659,7 +2681,8 @@ maaslin3 <- function(input_data,
max_pngs,
cores,
save_models,
verbosity
verbosity,
balanced
)

# Read data in
Expand Down Expand Up @@ -2798,7 +2821,8 @@ maaslin3 <- function(input_data,
coef_plot_vars,
heatmap_vars,
plot_associations,
max_pngs
max_pngs,
summary_plot_balanced
)
},
warning = function(w) {
Expand Down Expand Up @@ -2898,6 +2922,7 @@ if (identical(environment(), globalenv()) &&
augment = current_args$augment,
evaluate_only = current_args$evaluate_only,
reference = current_args$reference,
unscaled_abundance = current_args$unscaled_abundance
unscaled_abundance = current_args$unscaled_abundance,
summary_plot_balanced = current_args$summary_plot_balanced
)
}
52 changes: 44 additions & 8 deletions R/viz.R
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,8 @@ make_coef_plot <- function(merged_results_sig,
max_significance,
median_comparison_prevalence,
median_comparison_abundance,
median_df) {
median_df,
plot_threshold = 10) {
coef_plot_data <-
merged_results_sig[merged_results_sig$full_metadata_name %in%
coef_plot_vars,]
Expand All @@ -111,9 +112,9 @@ make_coef_plot <- function(merged_results_sig,
quantile_df <- coef_plot_data %>%
dplyr::group_by(.data$full_metadata_name) %>%
dplyr::summarise(
lower_q = median(.data$coef) - 10 *
lower_q = median(.data$coef) - plot_threshold *
(median(.data$coef) - quantile(.data$coef, 0.25)),
upper_q = median(.data$coef) + 10 *
upper_q = median(.data$coef) + plot_threshold *
(quantile(.data$coef, 0.75) - median(.data$coef))
) %>%
data.frame()
Expand Down Expand Up @@ -470,7 +471,8 @@ maaslin3_summary_plot <-
coef_plot_vars = NULL,
heatmap_vars = NULL,
median_comparison_abundance = FALSE,
median_comparison_prevalence = FALSE) {
median_comparison_prevalence = FALSE,
balanced=FALSE) {
if (first_n > 200) {
logging::logerror(
paste(
Expand Down Expand Up @@ -533,15 +535,43 @@ maaslin3_summary_plot <-

# Subset associations for plotting
merged_results_joint_only <-
unique(merged_results[, c('feature', 'qval_joint')])
unique(merged_results[, c('feature', 'qval_joint', 'full_metadata_name')])
merged_results_joint_only <-
merged_results_joint_only[
order(merged_results_joint_only$qval_joint),]
if (length(unique(merged_results_joint_only$feature)) < first_n) {
first_n <- length(unique(merged_results_joint_only$feature))
signif_taxa <-
unique(merged_results_joint_only$feature)[seq(first_n)]
} else {
# If balanced is turned on but there are not coefs choosen, error out
if (balanced){
if (is.null(coef_plot_vars)){
logging::logerror(
paste(
"Balanced plotting requires you set the variables you
want to plot using
the parameter coef_plot_vars"
)
)
return()
} else {
# grab the first N feature where N=N/(length of coef_plot_var) to
# plot the coef plot
first_n_per = first_n/length(coef_plot_vars)
signif_taxa <- merged_results_joint_only %>%
dplyr::group_by(.data$full_metadata_name) %>%
dplyr::arrange(desc(-.data$qval_joint), .by_group = T) %>%
dplyr::slice_head(n=ceiling(first_n_per)) %>%
dplyr::pull(feature) %>%
unique()
}
} else {
signif_taxa <-
unique(merged_results_joint_only$feature)[seq(first_n)]
}
}
signif_taxa <-
unique(merged_results_joint_only$feature)[seq(first_n)]


merged_results_sig <- merged_results %>%
dplyr::filter(.data$feature %in% signif_taxa)
Expand Down Expand Up @@ -588,12 +618,18 @@ maaslin3_summary_plot <-
if (length(coef_plot_vars) > 0 &
sum(merged_results_sig$full_metadata_name %in%
coef_plot_vars) >= 1) {
if (balanced) {
plot_thres = 5
} else {
plot_thres = 10
}
p1 <- make_coef_plot(merged_results_sig,
coef_plot_vars,
max_significance,
median_comparison_prevalence,
median_comparison_abundance,
median_df)
median_df,
plot_threshold = plot_thres)

} else {
p1 <- NULL
Expand Down
8 changes: 7 additions & 1 deletion man/maaslin3.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ maaslin3(input_data,
max_pngs = 30,
cores = 1,
save_models = FALSE,
verbosity = 'FINEST')
verbosity = 'FINEST',
summary_plot_balanced = FALSE)
}
\arguments{
\item{input_data}{A data frame of feature abundances or read counts or a
Expand Down Expand Up @@ -210,6 +211,11 @@ maaslin3(input_data,
\item{save_models}{Whether to return the fit models and save them to an
RData file.}
\item{verbosity}{The level of verbosity for the \code{logging} package.}
\item{summary_plot_balanced}{If set to TRUE the summary plot will
show the top N features of each variable included in
\code{coef_plot_vars} where N is equal to:
\code{ceiling(summary_plot_first_n/length(coef_plot_vars))}. Will error
if \code{coef_plot_vars} = \code{NULL}}
}
\value{
A list containing the following items:
Expand Down
8 changes: 7 additions & 1 deletion man/maaslin_log_arguments.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ maaslin_log_arguments(input_data,
max_pngs = 30,
cores = 1,
save_models = FALSE,
verbosity = 'FINEST')
verbosity = 'FINEST',
summary_plot_balanced = FALSE)
}
\arguments{
\item{input_data}{A data frame of feature abundances or read counts or a
Expand Down Expand Up @@ -185,6 +186,11 @@ maaslin_log_arguments(input_data,
\item{save_models}{Whether to return the fit models and save them to an
RData file.}
\item{verbosity}{The level of verbosity for the \code{logging} package.}
\item{summary_plot_balanced}{If set to TRUE the summary plot will
show the top N features of each variable included in
\code{coef_plot_vars} where N is equal to:
\code{ceiling(summary_plot_first_n/length(coef_plot_vars))}. Will error
if \code{coef_plot_vars} = \code{NULL}}
}
\value{
No value is returned, but a logger is opened with the parameters logged.
Expand Down
8 changes: 7 additions & 1 deletion man/maaslin_plot_results.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ maaslin_plot_results(output,
coef_plot_vars = NULL,
heatmap_vars = NULL,
plot_associations = TRUE,
max_pngs = 30)
max_pngs = 30,
balanced = FALSE)
}
\arguments{
\item{output}{The output folder to write results.}
Expand Down Expand Up @@ -95,6 +96,11 @@ maaslin_plot_results(output,
associations.}
\item{max_pngs}{The top \code{max_pngs} significant associations will be
plotted.}
\item{balanced}{If set to TRUE the summary plot will
show the top N features of each variable included in
\code{coef_plot_vars} where N is equal to:
\code{ceiling(summary_plot_first_n/length(coef_plot_vars))}. Will error
if \code{coef_plot_vars} = \code{NULL}}
}
\value{
Results will be written to the \code{figures} folder within the folder
Expand Down
8 changes: 7 additions & 1 deletion man/maaslin_plot_results_from_output.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ maaslin_plot_results_from_output(output,
coef_plot_vars = NULL,
heatmap_vars = NULL,
plot_associations = TRUE,
max_pngs = 30)
max_pngs = 30,
balanced = FALSE)
}
\arguments{
\item{output}{The output folder to write results.}
Expand Down Expand Up @@ -86,6 +87,11 @@ maaslin_plot_results_from_output(output,
associations.}
\item{max_pngs}{The top \code{max_pngs} significant associations will be
plotted.}
\item{balanced}{If set to TRUE the summary plot will
show the top N features of each variable included in
\code{coef_plot_vars} where N is equal to:
\code{ceiling(summary_plot_first_n/length(coef_plot_vars))}. Will error
if \code{coef_plot_vars} = \code{NULL}}
}

\value{
Expand Down
4 changes: 3 additions & 1 deletion tests/testthat/test_maaslin_log_arguments.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ maaslin_log_arguments(input_data = 'something1',
max_pngs = 8,
cores = 9,
save_models = FALSE,
verbosity = 'FINEST')
verbosity = 'FINEST',
summary_plot_balanced=FALSE)

lines_in <- readLines(file.path(output_tmp, 'maaslin3.log'))
lines_in <- sub('.*::', '', lines_in)
Expand Down Expand Up @@ -80,6 +81,7 @@ lines_to_compare <- c("Writing function arguments to log file",
"Augment: TRUE",
"Evaluate only:",
"Cores: 9",
"Balanced Summary plot: FALSE",
"Verifying options selected are valid")

expect_starts_with <- function(strings, prefixes) {
Expand Down
Loading