From 25bf1183d64fa9341b094b3111832b7f9c67b9e0 Mon Sep 17 00:00:00 2001 From: Fair & Square Synthesis Date: Thu, 1 Feb 2024 11:34:43 +1100 Subject: [PATCH] Merge bits of graflex as runGraflex --- DESCRIPTION | 4 +- NAMESPACE | 1 + NEWS.md | 7 ++- R/2d-propd-graflex.R | 131 +++++++++++++++++++++++++++++++++++++++++++ man/binTab.Rd | 20 +++++++ man/calculateOR.Rd | 19 +++++++ man/getFDR.Rd | 23 ++++++++ man/getOR.Rd | 19 +++++++ man/permuteOR.Rd | 24 ++++++++ man/runGraflex.Rd | 26 +++++++++ 10 files changed, 270 insertions(+), 4 deletions(-) create mode 100644 R/2d-propd-graflex.R create mode 100644 man/binTab.Rd create mode 100644 man/calculateOR.Rd create mode 100644 man/getFDR.Rd create mode 100644 man/getOR.Rd create mode 100644 man/permuteOR.Rd create mode 100644 man/runGraflex.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 53a5916..2d3668f 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: propr Title: An R package to calculate proportionality and other measures for compositional data -Version: 5.0.1 +Version: 5.0.2 URL: https://github.com/tpq/propr BugReports: https://github.com/tpq/propr/issues Authors@R: c( @@ -25,7 +25,7 @@ Description: The bioinformatic evaluation of gene co-expression often begins wit License: GPL-2 LazyData: true VignetteBuilder: knitr -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.1 Encoding: UTF-8 Depends: methods, diff --git a/NAMESPACE b/NAMESPACE index 13ac4a8..c56243b 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -15,6 +15,7 @@ export(propd) export(propr) export(ratios) export(runCutoff) +export(runGraflex) export(runNormalization) export(runPostHoc) export(search_tree) diff --git a/NEWS.md b/NEWS.md index b0d0730..c1a9a46 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,11 +1,14 @@ +## propr 5.0.2 +--------------------- +* Merge parts of `graflex` package into `propr` + ## propr 5.0.1 --------------------- * Fix bug: change NA in alr partial correlation to 0 so that FDR can be computed * Fix bug: implemented updatePermutes inside propr() and propd() -* Added 'corpcor' and 'ppcor' in imports +* Added `corpcor` and `ppcor` in imports * Update README and CITATION - ## propr 5.0.0 --------------------- * Merge pull request for new shrinkage method diff --git a/R/2d-propd-graflex.R b/R/2d-propd-graflex.R new file mode 100644 index 0000000..30d4aa1 --- /dev/null +++ b/R/2d-propd-graflex.R @@ -0,0 +1,131 @@ +#' Permute Odds Ratio +#' +#' This function permutes \code{p} odds ratios for the +#' edge overlap between two non-random graphs. +#' It does this by randomly shuffling the rows and +#' columns of \code{A} (jointly, thus preserving +#' the degree distribution). +#' +#' Note that this function calculates overlap for the +#' lower-left triangle of the input matrices. +#' @param A,G An adjacency matrix. +#' @param p An integer. The number of overlaps to permute. +permuteOR <- function(A, G, p = 500) { + Gstar <- G[lower.tri(G)] + res <- lapply(1:p, function(i) { + # Shuffle the adjacency matrix + index <- sample(1:ncol(A)) + A <- A[index, index] + Astar <- A[lower.tri(A)] + getOR(Astar, Gstar) + }) + + do.call("rbind", res) +} + +#' Tabulate Overlap +#' +#' This function tabulates the overlap between +#' two vectors or two adjacency matrices. +#' It is a faster version of \code{table} that +#' only supports binary input. +#' @param A,G A vector or adjacency matrix. +#' @return A table of overlap. +binTab <- function(A, G) { + diff <- A != G + only1 <- A[diff] + b <- sum(only1) + c <- length(only1) - b + + same <- !diff + double1 <- A[same] + a <- sum(double1) + d <- length(double1) - a + + matrix(c(d, b, c, a), 2, 2) +} + +#' Calculate Odds Ratio +#' +#' This function calculates the overlap between +#' two vectors or two adjacency matrices. +#' It returns the OR as well as other metrics. +#' @inheritParams binTab +#' @return A \code{data.frame} of results. +getOR <- function(A, G) { + tab <- binTab(A, G) + or <- (tab[1, 1] * tab[2, 2]) / (tab[1, 2] * tab[2, 1]) + data.frame( + "Neither" = tab[1, 1], + "G.only" = tab[1, 2], + "A.only" = tab[2, 1], + "Both" = tab[2, 2], + "Odds" = or, + "LogOR" = log(or) + ) +} + +#' Calculate Odds Ratio +#' +#' This function calculates an odds ratio for the +#' edge overlap between two non-random graphs. +#' +#' Note that this function calculates overlap for the +#' lower-left triangle of the input matrices. +#' @inheritParams permuteOR +calculateOR <- function(A, G) { + Astar <- A[lower.tri(A)] + Gstar <- G[lower.tri(G)] + getOR(Astar, Gstar) +} + +#' Calculate Odds Ratio FDR +#' +#' This function calculates the false discovery rate (FDR) +#' for over- and under-enrichment by counting the number of +#' times the actual OR was greater than +#' (or less than) a permuted OR. +#' @param actual A result from \code{\link{calculateOR}}. +#' @param permuted A result from \code{\link{permuteOR}}. +#' @return A \code{data.frame} of the FDRs for over- +#' and under- enrichment. +getFDR <- function(actual, permuted) { + actual$FDR.under <- + sum(permuted$Odds <= actual$Odds) / nrow(permuted) + actual$FDR.over <- + sum(permuted$Odds >= actual$Odds) / nrow(permuted) + actual +} + +#' Permute FDR for Multiple Concepts +#' +#' This function calls \code{\link{permuteOR}} for each +#' concept (i.e., column) in the database \code{K}. +#' +#' For each concept, this function calculates the +#' false discovery rate (FDR) by counting the number of +#' times the actual OR was greater than +#' (or less than) a permuted OR. +#' @inheritParams permuteOR +#' @param A An adjacency matrix. +#' @param K A knowledge database where each row is a graph node +#' and each column is a concept. +#' @export +runGraflex <- function(A, K, p = 500) { + if (nrow(A) != nrow(K)) + stop("'A' and 'K' must have identical rows.") + + numTicks <- 0 + res <- lapply(1:ncol(K), function(k) { + numTicks <<- progress(k, ncol(K), numTicks) + Gk <- K[, k] %*% t(K[, k]) + actual <- calculateOR(A, Gk) + permuted <- permuteOR(A, Gk, p = p) + actual <- getFDR(actual, permuted) + actual$Permutes <- p + actual$Concept <- colnames(K)[k] + actual + }) + + do.call("rbind", res) +} diff --git a/man/binTab.Rd b/man/binTab.Rd new file mode 100644 index 0000000..34396db --- /dev/null +++ b/man/binTab.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/2d-propd-graflex.R +\name{binTab} +\alias{binTab} +\title{Tabulate Overlap} +\usage{ +binTab(A, G) +} +\arguments{ +\item{A, G}{A vector or adjacency matrix.} +} +\value{ +A table of overlap. +} +\description{ +This function tabulates the overlap between + two vectors or two adjacency matrices. + It is a faster version of \code{table} that + only supports binary input. +} diff --git a/man/calculateOR.Rd b/man/calculateOR.Rd new file mode 100644 index 0000000..4b4f4e7 --- /dev/null +++ b/man/calculateOR.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/2d-propd-graflex.R +\name{calculateOR} +\alias{calculateOR} +\title{Calculate Odds Ratio} +\usage{ +calculateOR(A, G) +} +\arguments{ +\item{A, G}{An adjacency matrix.} +} +\description{ +This function calculates an odds ratio for the + edge overlap between two non-random graphs. +} +\details{ +Note that this function calculates overlap for the + lower-left triangle of the input matrices. +} diff --git a/man/getFDR.Rd b/man/getFDR.Rd new file mode 100644 index 0000000..7841325 --- /dev/null +++ b/man/getFDR.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/2d-propd-graflex.R +\name{getFDR} +\alias{getFDR} +\title{Calculate Odds Ratio FDR} +\usage{ +getFDR(actual, permuted) +} +\arguments{ +\item{actual}{A result from \code{\link{calculateOR}}.} + +\item{permuted}{A result from \code{\link{permuteOR}}.} +} +\value{ +A \code{data.frame} of the FDRs for over- + and under- enrichment. +} +\description{ +This function calculates the false discovery rate (FDR) + for over- and under-enrichment by counting the number of + times the actual OR was greater than + (or less than) a permuted OR. +} diff --git a/man/getOR.Rd b/man/getOR.Rd new file mode 100644 index 0000000..a5e35c0 --- /dev/null +++ b/man/getOR.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/2d-propd-graflex.R +\name{getOR} +\alias{getOR} +\title{Calculate Odds Ratio} +\usage{ +getOR(A, G) +} +\arguments{ +\item{A, G}{A vector or adjacency matrix.} +} +\value{ +A \code{data.frame} of results. +} +\description{ +This function calculates the overlap between + two vectors or two adjacency matrices. + It returns the OR as well as other metrics. +} diff --git a/man/permuteOR.Rd b/man/permuteOR.Rd new file mode 100644 index 0000000..71d4401 --- /dev/null +++ b/man/permuteOR.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/2d-propd-graflex.R +\name{permuteOR} +\alias{permuteOR} +\title{Permute Odds Ratio} +\usage{ +permuteOR(A, G, p = 500) +} +\arguments{ +\item{A, G}{An adjacency matrix.} + +\item{p}{An integer. The number of overlaps to permute.} +} +\description{ +This function permutes \code{p} odds ratios for the + edge overlap between two non-random graphs. + It does this by randomly shuffling the rows and + columns of \code{A} (jointly, thus preserving + the degree distribution). +} +\details{ +Note that this function calculates overlap for the + lower-left triangle of the input matrices. +} diff --git a/man/runGraflex.Rd b/man/runGraflex.Rd new file mode 100644 index 0000000..648ac94 --- /dev/null +++ b/man/runGraflex.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/2d-propd-graflex.R +\name{runGraflex} +\alias{runGraflex} +\title{Permute FDR for Multiple Concepts} +\usage{ +runGraflex(A, K, p = 500) +} +\arguments{ +\item{A}{An adjacency matrix.} + +\item{K}{A knowledge database where each row is a graph node +and each column is a concept.} + +\item{p}{An integer. The number of overlaps to permute.} +} +\description{ +This function calls \code{\link{permuteOR}} for each + concept (i.e., column) in the database \code{K}. +} +\details{ +For each concept, this function calculates the + false discovery rate (FDR) by counting the number of + times the actual OR was greater than + (or less than) a permuted OR. +}