Merge pull request #46 from suzannejin/master

[feature] Added accesory functions like getAdj and optimized graflex
tpq · Sep 10, 2024 · 0f0760e · 0f0760e
2 parents 81cc62c + 84eaeaf
commit 0f0760e
Show file tree

Hide file tree

Showing 75 changed files with 3,347 additions and 715 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,20 +1,31 @@
 # Generated by roxygen2: do not edit by hand
 
 export(aldex2propr)
-export(basis_shrinkage)
 export(calculate_theta)
 export(compute_iqlr)
+export(getAdjacencyFDR)
+export(getAdjacencyFDR.propd)
+export(getAdjacencyFDR.propr)
+export(getAdjacencyFstat)
+export(getCutoffFDR)
+export(getCutoffFDR.propd)
+export(getCutoffFDR.propr)
+export(getCutoffFstat)
 export(getMatrix)
 export(getRatios)
 export(getResults)
+export(getSignificantResultsFDR)
+export(getSignificantResultsFDR.propd)
+export(getSignificantResultsFDR.propr)
+export(getSignificantResultsFstat)
 export(index_reference)
 export(logratio)
 export(logratio_with_alpha)
 export(logratio_without_alpha)
+export(pcor.bshrink)
 export(propd)
 export(propr)
 export(ratios)
-export(runCutoff)
 export(runGraflex)
 export(runNormalization)
 export(runPostHoc)

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,16 @@
+## propr 5.1.0
+---------------------
+* Allowed `updateCutoffs` function to compute the FDR values for negative cutoffs
+* Added `getCutoffFDR` to get a significant cutoff based on the permuted FDR
+* Changed `runCutoff` into `getCutoffFstat`
+* Added `getSignificantResultsFDR` and `getSignificantResultsFstat` to get the significant pairs as results table
+* Added `getAdjacencyFDR` and `getAdjacencyFstat` functions to get the adjacency matrix with the significant pairs
+* Optimized `graflex` related functions to Rcpp C++
+* Removed fixseed, since setting `set.seed()` before calling any function is enough to ensure reproducibility
+* Fix bug: when `ivar=NA`, the already preprocessed data given by the user are replaced when there are zeros, which should not
+* Replaced `ppcor::pcor` by `corpcor::cor2pcor` for coherence with the shrunk partial correlations
+* Parallelized `propd` when ncores > 1
+
 ## propr 5.0.4
 ---------------------
 * Added fixseed option for runGraflex

diff --git a/R/1-propr-OOP.R b/R/1-propr-OOP.R
@@ -6,7 +6,10 @@ setClass(
     counts = "data.frame",
     alpha = "numeric",
     metric = "character",
+    direct = "logical",
+    has_meaningful_negative_values = "logical",
     ivar = "ANY",
+    lambda = "ANY",
     logratio = "data.frame",
     matrix = "matrix",
     pairs = "numeric",

diff --git a/R/1-propr.R b/R/1-propr.R
@@ -18,8 +18,9 @@
 #'   - "clr" (default): Centered log-ratio transformation.
 #'   - "alr": Additive log-ratio transformation ("pcor.bshrink" metric only).
 #'   - "iqlr": Inter-quartile log-ratio transformation from ALDEx2.
-#'   - The explicit name(s) of variable(s) to use as a reference.
-#'  Use NA to skip log-ratio transformation.
+#'   - The explicit name(s) or index(es) of variable(s) to use as a reference.
+#'   - Use NA to skip log-ratio transformation and any other pre-processing, like
+#'  zero replacement. This is useful when the input data is already pre-processed.
 #' @param select A numeric vector representing the indices of features to be
 #'  used for computing the Propr matrix. This argument is optional. If
 #'  provided, it reduces the data size by using only the selected features.
@@ -29,9 +30,6 @@
 #' @param alpha The alpha parameter used in the alpha log-ratio transformation.
 #' @param p The number of permutations to perform for calculating the false
 #'  discovery rate (FDR). The default is 0.
-#' @param fixseed A logical value indicating whether to fix the random seed 
-#' for generating the pemurations. If `TRUE`, the function will fix the random
-#' The default is `FALSE`.
 #' @param ... Additional arguments passed to \code{corpcor::pcor.shrink},
 #'  if "pcor.shrink" metric is selected.
 #'
@@ -62,6 +60,7 @@ propr <- function(counts,
                              "phs",
                              "cor",
                              "vlr",
+                             "ppcor",
                              "pcor",
                              "pcor.shrink",
                              "pcor.bshrink"),
@@ -70,7 +69,6 @@ propr <- function(counts,
                   symmetrize = FALSE,
                   alpha = NA,
                   p = 0,
-                  fixseed = FALSE,
                   ...) {
   ##############################################################################
   ### CLEAN UP ARGS
@@ -82,24 +80,37 @@ propr <- function(counts,
 
   # Special handling for 'metric'
   metric <- metric[1]
+  ivar_pcor <- NA
   if (metric == "pcor.bshrink") {
+    if (length(ivar) != 1) {
+      stop("For 'pcor.bshrink', the 'ivar' argument must be a single value.")
+    }
     if (!ivar %in% c("clr", "alr")) {
       stop("For 'pcor.bshrink', the 'ivar' argument must be 'clr' or 'alr'.")
     }
     message("Alert: Log-ratio transform will be handled by 'bShrink'.")
     ivar_pcor <- ivar
-    ivar <-
-      NA # skips log-ratio transform as performed for all other metrics
+    ivar <- NA # skips log-ratio transform
+  } else {
+    if (length(ivar) == 1 && !is.na(ivar) && ivar == "alr") {
+      stop("Please give the index or name of the reference gene instead of 'alr'.")
+    } else if (length(ivar) > 1 && any(c("alr","clr","iqlr") %in% ivar)) {
+      stop("Please check the ivar argument is correct.")
+    }
   }
 
   ##############################################################################
   ### PERFORM ZERO REPLACEMENT AND LOG-RATIO TRANSFORM
   ##############################################################################
 
-  # NOTE: ct will never have zeros; counts may or may not have zeros!
+  # NOTE: counts are the original counts, while ct may have zeros replaced
   counts <- as_safe_matrix(counts)
-  ct <- simple_zero_replacement(counts)
-  lr <- logratio(counts, ivar, alpha)
+  if (length(ivar) == 1 && is.na(ivar) && is.na(ivar_pcor)) {
+    ct <- counts
+  } else {
+    ct <- simple_zero_replacement(counts)
+  }
+  lr <- logratio(ct, ivar, alpha)
 
   ##############################################################################
   ### OPTIONALLY REDUCE DATA SIZE BEFORE COMPUTING MATRIX
@@ -117,28 +128,50 @@ propr <- function(counts,
   ##############################################################################
 
   lrv <- lr2vlr(lr)
+  lambda <- NULL
+
   if (metric == "rho") {
     mat <- lr2rho(lr)
+
   } else if (metric == "phi") {
     mat <- lr2phi(lr)
     if (symmetrize)
       symRcpp(mat) # optionally force symmetry
+
   } else if (metric == "phs") {
     mat <- lr2phs(lr)
+
   } else if (metric == "cor") {
     mat <- stats::cor(lr)
+
   } else if (metric == "vlr") {
     mat <- lrv
-  } else if (metric == "pcor") {
+
+  } else if (metric == "ppcor") {
     packageCheck("ppcor")
     mat <- ppcor::pcor(lr)$estimate
+
+  } else if (metric == "pcor") {
+    packageCheck("corpcor")
+    cov <- cov(lr)
+    mat <- corpcor::cor2pcor(cov)
+    class(mat) <- "matrix"
+
   } else if (metric == "pcor.shrink") {
     packageCheck("corpcor")
-    mat <- corpcor::pcor.shrink(lr, ...)
+    cov <- corpcor::cov.shrink(lr)
+    mat <- corpcor::cor2pcor(cov)
+    mat <- matrix(mat, ncol=ncol(lr), nrow=ncol(lr))
     class(mat) <- "matrix"
+    lambda <- attr(cov, "lambda")
+
   } else if (metric == "pcor.bshrink") {
-    mat <- basis_shrinkage(ct, outtype = ivar_pcor)
-  } else{
+    with(pcor.bshrink(ct, outtype = ivar_pcor), {
+      mat <<- matrix
+      lambda <<- lambda
+    })
+
+  } else {
     stop("Provided 'metric' not recognized.")
   }
 
@@ -155,8 +188,11 @@ propr <- function(counts,
   result@logratio <- as.data.frame(lr)
   result@pairs <- vector("numeric")
   result@permutes <- list(NULL)
+  result@lambda <- lambda
+  result@direct <- ifelse(metric[1] %in% c("rho", "cor", "pcor", "pcor.shrink", "pcor.bshrink"), TRUE, FALSE)
+  result@has_meaningful_negative_values <- ifelse(metric[1] %in% c("cor", "pcor", "pcor.shrink", "pcor.bshrink"), TRUE, FALSE)  # metrics like proportionality has negative values that are difficult to interpret, whereas correlation metrics have a clear interpretation
 
-  # ivar should not be NA, otherwise updateCutoffs does not work
+  # ivar should not be NA for pcor.bshrink, otherwise updateCutoffs does not work
   if (metric == 'pcor.bshrink') result@ivar <- ivar_pcor
 
   # Clean row and column names
@@ -178,7 +214,7 @@ propr <- function(counts,
     )
 
   # permute data
-  if (p > 0) result <- updatePermutes(result, p, fixseed)
+  if (p > 0) result <- updatePermutes(result, p)
 
   ##############################################################################
   ### GIVE HELPFUL MESSAGES TO USER

diff --git a/R/1a-propr-backend.R b/R/1a-propr-backend.R
@@ -44,6 +44,12 @@ index_reference <- function(counts, ivar) {
     use <- sort(ivar) # use features given by number
   }
 
+  if (length(use) == 0) {
+      stop("No reference detected. Please provide a valid 'ivar' argument.")
+  } else if(length(use) > ncol(counts)) {
+    stop("Detected more reference variables than existing in the data. Please provide a valid 'ivar' argument.")
+  }
+
   return(use)
 }
 
@@ -76,30 +82,6 @@ compute_iqlr <- function(counts) {
   return(use)
 }
 
-#' Simple Zero Replacement in a Count Matrix
-#'
-#' This function replaces zeros with the next smallest non-zero value in the
-#'  input count matrix. If the matrix contains no zeros, it produces an
-#'  informational message indicating that no replacements were made.
-#'
-#' @inheritParams logratio_with_alpha
-#' @return A matrix with zero values replaced by the next smallest non-zero value.
-#' @examples
-#' # Sample input count data with zeros
-#' data <- matrix(c(0, 2, 3, 4, 5, 0), nrow = 2, byrow = TRUE)
-#'
-#' @export
-simple_zero_replacement <- function(ct) {
-  if (any(ct == 0)) {
-    zeros <- ct == 0
-    ct[zeros] <- min(ct[!zeros])
-  } else{
-    message("Alert: No 0s found that need replacement.")
-  }
-
-  return(ct)
-}
-
 #' Perform log-ratio transformation without alpha parameter
 #'
 #' This function applies a log-ratio transformation to a given data matrix
@@ -112,11 +94,14 @@ simple_zero_replacement <- function(ct) {
 #' # Sample input data
 #' data <- matrix(c(1, 2, 3, 4, 5, 6), nrow = 2, byrow = TRUE)
 #'
-#' # Applying log-ratio transformation to rows using columns 2 and 3
+#' # Applying log-ratio transformation to rows using columns 2 and 3 as reference
 #' result <- logratio_without_alpha(data, use = c(2, 3))
 #'
 #' @export
 logratio_without_alpha <- function(ct, use) {
+  if (any(ct == 0)){
+    stop("Error: please first replace the zeros before running logratio.")
+  }
   # Use g(x) = Mean[log(x)] to log-ratio transform data
   logX <- log(ct)
   logSet <- logX[, use, drop = FALSE]
@@ -186,75 +171,80 @@ logratio_with_alpha <- function(ct, use, alpha) {
 #' @export
 logratio <- function(counts, ivar, alpha) {
   counts <- as_safe_matrix(counts)
-  if (is.na(ivar)) {
+  if (length(ivar) == 1 && is.na(ivar)) {
     message("Alert: Skipping built-in log-ratio transformation.")
-    ct <- simple_zero_replacement(counts)
-    lr <- ct
+    lr <- counts
   } else {
     use <- index_reference(counts, ivar) # Index reference
     if (is.na(alpha)) {
-      ct <- simple_zero_replacement(counts)
-      lr <- logratio_without_alpha(ct, use)
+      lr <- logratio_without_alpha(counts, use)
     } else {
       lr <- logratio_with_alpha(counts, use, alpha)
     }
   }
   return(lr)
 }
 
-#' Covariance Shrinkage and Partial Correlation Calculation
+#' Basis Covariance Shrinkage and Partial Correlation Calculation
 #'
-#' This function performs covariance shrinkage and calculates the partial
-#'  correlation matrix based on the input count data matrix. The function can
+#' This function performs covariance shrinkage on the basis matrix
+#'  and calculates the partial correlation matrix. The function can
 #'  output the results in two formats: centered log-ratio (clr) or
 #'  additive log-ratio (alr).
 #'
-#' @param M A data matrix representing the counts. Each row should represent
+#' @param ct A data matrix representing the counts. Each row should represent
 #'  observations, and each column should represent different variables.
 #' @param outtype A character vector specifying the output type. It can take
-#'  either "clr" (centered log-ratio) or "alr" (additive log-ratio).
+#'  either "clr" (centered log-ratio) or "alr" (additive log-ratio). Since
+#'  the reference does not affect the logratio partial correlation coefficients,
+#'  the index of the reference is not needed for "alr". Also, "clr" is recommended
+#'  because of the same reason, at the same time avoiding losing one dimension.
 #'
-#' @return A matrix representing the partial correlation matrix.
+#' @return A matrix representing the shrunk partial correlation matrix.
 #'
 #' @examples
 #' # Sample input count data
 #' data <- iris[,1:4]
 #'
 #' # Calculate partial correlation matrix using clr transformation
-#' result_clr <- basis_shrinkage(data, outtype = "clr")
+#' result_clr <- pcor.bshrink(data, outtype = "clr")
 #'
 #' # Calculate partial correlation matrix using alr transformation
-#' result_alr <- basis_shrinkage(data, outtype = "alr")
+#' result_alr <- pcor.bshrink(data, outtype = "alr")
 #'
 #' @export
-basis_shrinkage <- function(M, outtype = c("clr", "alr")) {
+pcor.bshrink <- function(ct, outtype = c("clr", "alr")) {
   packageCheck("corpcor")
   outtype <- match.arg(outtype)
 
   # transform counts to log proportions
-  P <- M / rowSums(M)
-  B <- log(P)
+  logP <- log( ct / rowSums(ct) )
 
   # covariance shrinkage
-  D  <- ncol(M)
-  Cb <- corpcor::cov.shrink(B, verbose = FALSE)
+  covB <- corpcor::cov.shrink(logP, verbose = FALSE)
+  lambda <- attr(covB, "lambda")
+
+  # convert basis covariance matrix to clr/alr covariance matrix
+  D  <- ncol(ct)
   if (outtype == "alr") {
     F   <- cbind(diag(rep(1, D - 1)), rep(-1, D - 1))
-    Cov <- F %*% Cb %*% t(F)
+    cov <- F %*% covB %*% t(F)
   } else if (outtype == "clr") {
     G   <- diag(rep(1, D)) - matrix(1 / D, D, D)
-    Cov <- G %*% Cb %*% G
+    cov <- G %*% covB %*% G
   }
 
   # partial correlation
-  PC <- corpcor::cor2pcor(Cov)
+  pcor <- corpcor::cor2pcor(cov)
 
   # make output to have same dimensions as input
+  # alr partial correlation has one less dimension,
+  # so here we add a row and a column of 0s
   if (outtype == "alr") {
-    PC <- cbind(PC, replicate(nrow(PC), 0))
-    PC <- rbind(PC, replicate(ncol(PC), 0))
-    PC[nrow(PC), ncol(PC)] <- 1
+    pcor <- cbind(pcor, 0)
+    pcor <- rbind(pcor, 0)
+    pcor[ncol(pcor), ncol(pcor)] <- 1
   }
 
-  return(PC)
+  return(list(matrix = pcor, lambda = lambda))
 }