From 9d7297c63bb956fba02a10bf91e38685d0594824 Mon Sep 17 00:00:00 2001 From: Suzanne Jin Date: Mon, 18 Dec 2023 18:21:32 +0100 Subject: [PATCH 1/3] fix issue with argument p, and use updatePermutes inside propr() and propd() when p > 0. Also, updated README --- NAMESPACE | 1 + R/1-propr.R | 7 +++- R/2-propd.R | 8 ++-- R/3-shared-updateCutoffs.R | 2 +- R/3-shared-updatePermutes.R | 11 ++++++ README.Rmd | 68 +++++++++++++++++++++++---------- README.md | 76 +++++++++++++++++++++++++++---------- inst/CITATION | 2 +- man/propd.Rd | 4 +- man/propr.Rd | 4 ++ man/updatePermutes.Rd | 23 +++++++++++ 11 files changed, 157 insertions(+), 49 deletions(-) create mode 100644 man/updatePermutes.Rd diff --git a/NAMESPACE b/NAMESPACE index 1cdbcd2..13ac4a8 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -28,6 +28,7 @@ export(updateCutoffs) export(updateCutoffs.propd) export(updateCutoffs.propr) export(updateF) +export(updatePermutes) exportClasses(propd) exportClasses(propr) exportMethods(show) diff --git a/R/1-propr.R b/R/1-propr.R index ed03092..6256418 100755 --- a/R/1-propr.R +++ b/R/1-propr.R @@ -27,6 +27,8 @@ #' the output Propr matrix when the metric is "phi". If `TRUE`, the function #' will symmetrize the matrix; otherwise, it will return the original matrix. #' @param alpha The alpha parameter used in the alpha log-ratio transformation. +#' @param p The number of permutations to perform for calculating the false +#' discovery rate (FDR). The default is 0. #' @param ... Additional arguments passed to \code{corpcor::pcor.shrink}, #' if "pcor.shrink" metric is selected. #' @@ -64,6 +66,7 @@ propr <- function(counts, select = NA, symmetrize = FALSE, alpha = NA, + p = 0, ...) { ############################################################################## ### CLEAN UP ARGS @@ -170,11 +173,13 @@ propr <- function(counts, "Zeros" = ctzRcpp(counts) ) + # permute data + if (p > 0) result <- updatePermutes(result, p) + ############################################################################## ### GIVE HELPFUL MESSAGES TO USER ############################################################################## - message("Alert: Use 'updatePermutes' to set seed for FDR.") message("Alert: Use 'updateCutoffs' to calculate FDR.") return(result) diff --git a/R/2-propd.R b/R/2-propd.R index 1a9efad..3aaeab4 100644 --- a/R/2-propd.R +++ b/R/2-propd.R @@ -4,7 +4,7 @@ #' assignment of each count to different groups. #' @param alpha The alpha parameter used in the alpha log-ratio transformation. #' @param p The number of permutations to perform for calculating the false -#' discovery rate (FDR). The default is 100. +#' discovery rate (FDR). The default is 0. #' @param weighted A logical value indicating whether weighted calculations #' should be performed. If \code{TRUE}, the function will use limma-based #' weights for the calculations. @@ -32,7 +32,7 @@ propd <- function(counts, group, alpha = NA, - p = 100, + p = 0, weighted = FALSE) { ############################################################################## ### CLEAN UP ARGS @@ -103,12 +103,14 @@ propd <- function(counts, result@results$theta <- round(result@results$theta, 14) # round floats to 1 + # permute data + if (p > 0) result <- updatePermutes(result, p) + ############################################################################## ### GIVE HELPFUL MESSAGES TO USER ############################################################################## message("Alert: Use 'setActive' to select a theta type.") - message("Alert: Use 'updatePermutes' to set seed for FDR.") message("Alert: Use 'updateCutoffs' to calculate FDR.") message("Alert: Use 'updateF' to calculate F-stat.") diff --git a/R/3-shared-updateCutoffs.R b/R/3-shared-updateCutoffs.R index 9f4fef9..23ee0a4 100644 --- a/R/3-shared-updateCutoffs.R +++ b/R/3-shared-updateCutoffs.R @@ -44,6 +44,7 @@ updateCutoffs <- #' @export updateCutoffs.propr <- function(object, cutoff, ncores) { + metric_is_up <- function(metric) { metrics <- c("rho", "cor", "pcor", "pcor.shrink", "pcor.bshrink") return(metric %in% metrics) @@ -66,7 +67,6 @@ updateCutoffs.propr <- if (metric_is_up(object@metric)) { count_greater_than(pkt, cut) } else{ - # phi & phs count_less_than(pkt, cut) } }) diff --git a/R/3-shared-updatePermutes.R b/R/3-shared-updatePermutes.R index ac9727b..0113f1b 100644 --- a/R/3-shared-updatePermutes.R +++ b/R/3-shared-updatePermutes.R @@ -1,3 +1,14 @@ +#' Create permuted data +#' +#' This function creates p permuted data matrices +#' +#' This function wraps \code{updatePermutes.propr} and +#' \code{updatePermutes.propd}. +#' +#' @param object A \code{propr} or \code{propd} object. +#' @param p The number of permutations to perform. +#' @return A \code{propr} or \code{propd} object with the permutes slot updated. +#' @export updatePermutes <- function(object, p) { if (inherits(object, "propr")) { diff --git a/README.Rmd b/README.Rmd index 6bfd927..b0fa610 100755 --- a/README.Rmd +++ b/README.Rmd @@ -16,7 +16,7 @@ If you have used `propr` previously, you will notice some changes. In version 5. ## Introduction -The `propr` package provides an interface for 4 distinct approaches to compositional data analysis (CoDA): proportionality, differential proportionality, partial correlation, and ratio analysis. +The `propr` package provides an interface for 4 distinct approaches to compositional data analysis (CoDA): proportionality, differential proportionality, logratio partial correlation with basis shrinkage, and ratio analysis. If you use this software, please cite our work. We don't get paid to make software, but your citations help us to negotiate support for software maintenance and development. @@ -38,33 +38,62 @@ library(propr) There are a few proportionality statistics available. Select one with the 'metric' argument. ```{r, eval = FALSE} -pr <- propr(counts, # rows as samples, like it should be - metric = "rho", # or "phi", "phs", "cor", "vlr" - ivar = "clr", # or can use "iqlr" instead - alpha = NA, # use to handle zeros - p = 100) # used by updateCutoffs +pr <- propr( + counts, # rows as samples, like it should be + metric = "rho", # or "phi", "phs", "cor", "vlr" + ivar = "clr", # or can use "iqlr" instead + alpha = NA, # use to handle zeros + p = 100 # used for updateCutoffs + ) ``` You can determine the "signficance" of proportionality using a built-in permutation procedure. It tells estimates the false discovery rate (FDR) for any cutoff. This method can take a while to run, but is parallelizable. ```{r, eval = FALSE} -updateCutoffs(pr, - cutoff = seq(0, 1, .05), # cutoffs at which to estimate FDR - ncores = 1) # parallelize here +pr <- updateCutoffs( + pr, + cutoff = seq(0, 1, .05), # cutoffs at which to estimate FDR + ncores = 1 # parallelize here + ) ``` Choose the largest cutoff with an acceptable FDR. +## Logratio partial correlation with basis shrinkage + +There are many ways to calculate partial correlations, with or without shrinkage. The recommended one for datasets with p>>n and influenced by compositional bias is "pcor.bshrink". + +```{r, eval = FALSE} +pr <- propr( + counts, # rows as samples, like it should be + metric = "pcor.bshrink", # partial correlation without shrinkage "pcor" is also available + p = 100 # used for updateCutoffs + ) +``` + +You can also determine the "significance" of logratio partial correlations with the built-in permutation approach. + +```{r, eval = FALSE} +pr <- updateCutoffs( + pr, + cutoff = seq(0, 1, .05), # cutoffs at which to estimate FDR + ncores = 1 # parallelize here + ) +``` + + ## Differential Proportionality There are also a few differential proportionality statistics, but they all get calculated at once. ```{r, eval = FALSE} -pd <- propd(counts, - group, # a vector of 2 or more groups - alpha = NA, # whether to handle zeros - weighted = TRUE, # whether to weigh log-ratios - p = 100) # used by updateCutoffs +pd <- propd( + counts, + group, # a vector of 2 or more groups + alpha = NA, # whether to handle zeros + p = 100, # used for updateCutoffs + weighted = TRUE # whether to weight log-ratios + ) ``` You can switch between the "disjointed" and "emergent" statistics. @@ -80,9 +109,11 @@ setEmergent(pd) You can again permute an FDR with the `updateCutoffs` method. Alternatively, you can calculate an exact p-value for $\theta$ based on a F-test. This is handled by the `updateF` method. ```{r, eval = FALSE} -pd <- updateF(pd, - moderated = FALSE, # moderate stats with limma-voom - ivar = "clr") # used for moderation +pd <- updateF( + pd, + moderated = FALSE, # moderate stats with limma-voom + ivar = "clr" # used for moderation + ) ``` ## Getters @@ -97,9 +128,6 @@ Both functions return S4 objects. This package includes several helper functions Use `getResults` to pipe to `ggplot2` for visualization. -## Partial Correlation - -COMING SOON!! ## Ratio Methods diff --git a/README.md b/README.md index 1bf3fee..838c4a0 100755 --- a/README.md +++ b/README.md @@ -19,7 +19,8 @@ unchanged. The `propr` package provides an interface for 4 distinct approaches to compositional data analysis (CoDA): proportionality, differential -proportionality, partial correlation, and ratio analysis. +proportionality, logratio partial correlation with basis shrinkage, and +ratio analysis. If you use this software, please cite our work. We don’t get paid to make software, but your citations help us to negotiate support for @@ -32,6 +33,10 @@ citation("propr") ## ## To cite propr in publications use: ## + ## Jin S, Notredame C, Erb I (2023) Compositional covariance shrinkage + ## and regularised partial correlations. Statistics and Operations + ## Research Transactions 47(2). doi:10.57645/20.8080.02.8 + ## ## Quinn TP, Erb I, Gloor G, Notredame C, Richardson MF, Crowley TM ## (2019) A field guide for the compositional analysis of any-omics ## data. GigaScience 8(9). doi:10.1093/gigascience/giz107 @@ -78,11 +83,13 @@ There are a few proportionality statistics available. Select one with the ‘metric’ argument. ``` r -pr <- propr(counts, # rows as samples, like it should be - metric = "rho", # or "phi", "phs", "cor", "vlr" - ivar = "clr", # or can use "iqlr" instead - alpha = NA, # use to handle zeros - p = 100) # used by updateCutoffs +pr <- propr( + counts, # rows as samples, like it should be + metric = "rho", # or "phi", "phs", "cor", "vlr" + ivar = "clr", # or can use "iqlr" instead + alpha = NA, # use to handle zeros + p = 100 # used for updateCutoffs + ) ``` You can determine the “signficance” of proportionality using a built-in @@ -91,24 +98,53 @@ for any cutoff. This method can take a while to run, but is parallelizable. ``` r -updateCutoffs(pr, - cutoff = seq(0, 1, .05), # cutoffs at which to estimate FDR - ncores = 1) # parallelize here +pr <- updateCutoffs( + pr, + cutoff = seq(0, 1, .05), # cutoffs at which to estimate FDR + ncores = 1 # parallelize here + ) ``` Choose the largest cutoff with an acceptable FDR. +## Logratio partial correlation with basis shrinkage + +There are many ways to calculate partial correlations, with or without +shrinkage. The recommended one for datasets with p\>\>n and influenced +by compositional bias is “pcor.bshrink”. + +``` r +pr <- propr( + counts, # rows as samples, like it should be + metric = "pcor.bshrink", # partial correlation without shrinkage "pcor" is also available + p = 100 # used for updateCutoffs + ) +``` + +You can also determine the “significance” of logratio partial +correlations with the built-in permutation approach. + +``` r +pr <- updateCutoffs( + pr, + cutoff = seq(0, 1, .05), # cutoffs at which to estimate FDR + ncores = 1 # parallelize here + ) +``` + ## Differential Proportionality There are also a few differential proportionality statistics, but they all get calculated at once. ``` r -pd <- propd(counts, - group, # a vector of 2 or more groups - alpha = NA, # whether to handle zeros - weighted = TRUE, # whether to weigh log-ratios - p = 100) # used by updateCutoffs +pd <- propd( + counts, + group, # a vector of 2 or more groups + alpha = NA, # whether to handle zeros + p = 100, # used for updateCutoffs + weighted = TRUE # whether to weight log-ratios + ) ``` You can switch between the “disjointed” and “emergent” statistics. @@ -126,9 +162,11 @@ Alternatively, you can calculate an exact p-value for *θ* based on a F-test. This is handled by the `updateF` method. ``` r -pd <- updateF(pd, - moderated = FALSE, # moderate stats with limma-voom - ivar = "clr") # used for moderation +pd <- updateF( + pd, + moderated = FALSE, # moderate stats with limma-voom + ivar = "clr" # used for moderation + ) ``` ## Getters @@ -144,10 +182,6 @@ functions that work for both the `propr` and `propd` output. Use `getResults` to pipe to `ggplot2` for visualization. -## Partial Correlation - -COMING SOON!! - ## Ratio Methods COMING SOON!! diff --git a/inst/CITATION b/inst/CITATION index 8bdf206..054c879 100755 --- a/inst/CITATION +++ b/inst/CITATION @@ -4,7 +4,7 @@ citEntry(entry = "Article", title = "Compositional covariance shrinkage and regularised partial correlations", author = personList(as.person("Suzanne Jin"), as.person("Cedric Notredame"), - as.person("Ionas Erb")) + as.person("Ionas Erb")), journal = "Statistics and Operations Research Transactions", year = "2023", volume = "47", diff --git a/man/propd.Rd b/man/propd.Rd index 7061dcd..bc1b1ec 100755 --- a/man/propd.Rd +++ b/man/propd.Rd @@ -14,7 +14,7 @@ \usage{ \S4method{show}{propd}(object) -propd(counts, group, alpha = NA, p = 100, weighted = FALSE) +propd(counts, group, alpha = NA, p = 0, weighted = FALSE) setDisjointed(propd) @@ -36,7 +36,7 @@ assignment of each count to different groups.} \item{alpha}{The alpha parameter used in the alpha log-ratio transformation.} \item{p}{The number of permutations to perform for calculating the false -discovery rate (FDR). The default is 100.} +discovery rate (FDR). The default is 0.} \item{weighted}{A logical value indicating whether weighted calculations should be performed. If \code{TRUE}, the function will use limma-based diff --git a/man/propr.Rd b/man/propr.Rd index ade0a34..f418e67 100755 --- a/man/propr.Rd +++ b/man/propr.Rd @@ -16,6 +16,7 @@ propr( select = NA, symmetrize = FALSE, alpha = NA, + p = 0, ... ) } @@ -57,6 +58,9 @@ will symmetrize the matrix; otherwise, it will return the original matrix.} \item{alpha}{The alpha parameter used in the alpha log-ratio transformation.} +\item{p}{The number of permutations to perform for calculating the false +discovery rate (FDR). The default is 0.} + \item{...}{Additional arguments passed to \code{corpcor::pcor.shrink}, if "pcor.shrink" metric is selected.} } diff --git a/man/updatePermutes.Rd b/man/updatePermutes.Rd new file mode 100644 index 0000000..81d05c7 --- /dev/null +++ b/man/updatePermutes.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/3-shared-updatePermutes.R +\name{updatePermutes} +\alias{updatePermutes} +\title{Create permuted data} +\usage{ +updatePermutes(object, p) +} +\arguments{ +\item{object}{A \code{propr} or \code{propd} object.} + +\item{p}{The number of permutations to perform.} +} +\value{ +A \code{propr} or \code{propd} object with the permutes slot updated. +} +\description{ +This function creates p permuted data matrices +} +\details{ +This function wraps \code{updatePermutes.propr} and + \code{updatePermutes.propd}. +} From 1ee750cb69c784bd62c9df35dfcbe9c01a25adec Mon Sep 17 00:00:00 2001 From: Suzanne Jin Date: Mon, 18 Dec 2023 18:22:16 +0100 Subject: [PATCH 2/3] Increment version number --- DESCRIPTION | 2 +- NEWS.md | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index b1d60b7..74d8d4a 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: propr Title: Calculating Proportionality Between Vectors of Compositional Data -Version: 5.0.0 +Version: 5.0.1 URL: https://github.com/tpq/propr BugReports: https://github.com/tpq/propr/issues Authors@R: c( diff --git a/NEWS.md b/NEWS.md index 0e183db..a1f6e36 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,5 @@ +# propr 5.0.1 + ## propr 5.0.0 --------------------- * Merge pull request for new shrinkage method From 4d5b3e36ba9bf3f2a0da0849ed90af8b9a3977d8 Mon Sep 17 00:00:00 2001 From: Suzanne Jin Date: Mon, 18 Dec 2023 18:25:04 +0100 Subject: [PATCH 3/3] update NEWS --- NEWS.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index a1f6e36..6bd5cf1 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,9 @@ -# propr 5.0.1 +## propr 5.0.1 +--------------------- +* Fix bug: change NA in alr partial correlation to 0 so that FDR can be computed +* Fix bug: implemented updatePermutes inside propr() and propd() +* Update README and CITATION + ## propr 5.0.0 ---------------------