diff --git a/NEWS.md b/NEWS.md index b37c800..c9e7e89 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,8 @@ -## propr 5.1.1 +# propr 5.1.2 +--------------------- +* Restructured `graflex` to speed up + +# propr 5.1.1 --------------------- * Speed up `graflex` related functions diff --git a/R/3-shared-graflex.R b/R/3-shared-graflex.R index 01d7dc7..aa3de97 100644 --- a/R/3-shared-graflex.R +++ b/R/3-shared-graflex.R @@ -21,19 +21,15 @@ runGraflex <- function(A, K, p=100, ncores=1) { stop("'A' must be a square matrix.") if (ncores == 1){ - # for each knowledge network, calculate odds ratio and FDR res <- lapply(1:ncol(K), function(k) { - Gk <- K[, k] %*% t(K[, k]) # converts the k column into an adjacency matrix (genes x genes) - graflex(A, Gk, p=p) # this calls the graflex function implemented in Rcpp C++ + graflex(A, K[,k], p=p) # this calls the modified graflex function implemented in Rcpp C++ }) - - }else{ + } else { packageCheck("parallel") cl <- parallel::makeCluster(ncores) res <- parallel::parLapply(cl, 1:ncol(K), function(k) { - Gk <- K[, k] %*% t(K[, k]) - graflex(A, Gk, p=p) + graflex(A, K[,k], p=p) }) parallel::stopCluster(cl) } diff --git a/R/RcppExports.R b/R/RcppExports.R index 0a823c6..c4f3197 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -105,28 +105,28 @@ ctzRcpp <- function(X) { .Call(`_propr_ctzRcpp`, X) } -get_triangle <- function(mat) { - .Call(`_propr_get_triangle`, mat) -} - -get_triangle_from_index <- function(mat, index) { - .Call(`_propr_get_triangle_from_index`, mat, index) -} - getOR <- function(A, G) { .Call(`_propr_getOR`, A, G) } -permuteOR <- function(A, Gstar, p = 100L) { - .Call(`_propr_permuteOR`, A, Gstar, p) +getORperm <- function(A, G, perm) { + .Call(`_propr_getORperm`, A, G, perm) +} + +permuteOR <- function(A, G, p = 100L) { + .Call(`_propr_permuteOR`, A, G, p) } getFDR <- function(actual, permuted) { .Call(`_propr_getFDR`, actual, permuted) } -graflex <- function(A, G, p = 100L) { - .Call(`_propr_graflex`, A, G, p) +getG <- function(Gk) { + .Call(`_propr_getG`, Gk) +} + +graflex <- function(A, Gk, p = 100L) { + .Call(`_propr_graflex`, A, Gk, p) } lr2vlr <- function(lr) { diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index ab8c424..c4b173c 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -317,51 +317,41 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } -// get_triangle -IntegerVector get_triangle(const IntegerMatrix& mat); -RcppExport SEXP _propr_get_triangle(SEXP matSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< const IntegerMatrix& >::type mat(matSEXP); - rcpp_result_gen = Rcpp::wrap(get_triangle(mat)); - return rcpp_result_gen; -END_RCPP -} -// get_triangle_from_index -IntegerVector get_triangle_from_index(const IntegerMatrix& mat, const IntegerVector& index); -RcppExport SEXP _propr_get_triangle_from_index(SEXP matSEXP, SEXP indexSEXP) { +// getOR +NumericVector getOR(const IntegerMatrix& A, const IntegerMatrix& G); +RcppExport SEXP _propr_getOR(SEXP ASEXP, SEXP GSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< const IntegerMatrix& >::type mat(matSEXP); - Rcpp::traits::input_parameter< const IntegerVector& >::type index(indexSEXP); - rcpp_result_gen = Rcpp::wrap(get_triangle_from_index(mat, index)); + Rcpp::traits::input_parameter< const IntegerMatrix& >::type A(ASEXP); + Rcpp::traits::input_parameter< const IntegerMatrix& >::type G(GSEXP); + rcpp_result_gen = Rcpp::wrap(getOR(A, G)); return rcpp_result_gen; END_RCPP } -// getOR -NumericVector getOR(const IntegerVector& A, const IntegerVector& G); -RcppExport SEXP _propr_getOR(SEXP ASEXP, SEXP GSEXP) { +// getORperm +NumericVector getORperm(const IntegerMatrix& A, const IntegerMatrix& G, const IntegerVector& perm); +RcppExport SEXP _propr_getORperm(SEXP ASEXP, SEXP GSEXP, SEXP permSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< const IntegerVector& >::type A(ASEXP); - Rcpp::traits::input_parameter< const IntegerVector& >::type G(GSEXP); - rcpp_result_gen = Rcpp::wrap(getOR(A, G)); + Rcpp::traits::input_parameter< const IntegerMatrix& >::type A(ASEXP); + Rcpp::traits::input_parameter< const IntegerMatrix& >::type G(GSEXP); + Rcpp::traits::input_parameter< const IntegerVector& >::type perm(permSEXP); + rcpp_result_gen = Rcpp::wrap(getORperm(A, G, perm)); return rcpp_result_gen; END_RCPP } // permuteOR -NumericMatrix permuteOR(const IntegerMatrix& A, const IntegerVector& Gstar, int p); -RcppExport SEXP _propr_permuteOR(SEXP ASEXP, SEXP GstarSEXP, SEXP pSEXP) { +NumericMatrix permuteOR(const IntegerMatrix& A, const IntegerMatrix& G, int p); +RcppExport SEXP _propr_permuteOR(SEXP ASEXP, SEXP GSEXP, SEXP pSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const IntegerMatrix& >::type A(ASEXP); - Rcpp::traits::input_parameter< const IntegerVector& >::type Gstar(GstarSEXP); + Rcpp::traits::input_parameter< const IntegerMatrix& >::type G(GSEXP); Rcpp::traits::input_parameter< int >::type p(pSEXP); - rcpp_result_gen = Rcpp::wrap(permuteOR(A, Gstar, p)); + rcpp_result_gen = Rcpp::wrap(permuteOR(A, G, p)); return rcpp_result_gen; END_RCPP } @@ -377,16 +367,27 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// getG +IntegerMatrix getG(const IntegerVector& Gk); +RcppExport SEXP _propr_getG(SEXP GkSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const IntegerVector& >::type Gk(GkSEXP); + rcpp_result_gen = Rcpp::wrap(getG(Gk)); + return rcpp_result_gen; +END_RCPP +} // graflex -NumericVector graflex(const IntegerMatrix& A, const IntegerMatrix& G, int p); -RcppExport SEXP _propr_graflex(SEXP ASEXP, SEXP GSEXP, SEXP pSEXP) { +NumericVector graflex(const IntegerMatrix& A, const IntegerVector& Gk, int p); +RcppExport SEXP _propr_graflex(SEXP ASEXP, SEXP GkSEXP, SEXP pSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const IntegerMatrix& >::type A(ASEXP); - Rcpp::traits::input_parameter< const IntegerMatrix& >::type G(GSEXP); + Rcpp::traits::input_parameter< const IntegerVector& >::type Gk(GkSEXP); Rcpp::traits::input_parameter< int >::type p(pSEXP); - rcpp_result_gen = Rcpp::wrap(graflex(A, G, p)); + rcpp_result_gen = Rcpp::wrap(graflex(A, Gk, p)); return rcpp_result_gen; END_RCPP } @@ -516,11 +517,11 @@ static const R_CallMethodDef CallEntries[] = { {"_propr_count_less_equal_than", (DL_FUNC) &_propr_count_less_equal_than, 2}, {"_propr_count_greater_equal_than", (DL_FUNC) &_propr_count_greater_equal_than, 2}, {"_propr_ctzRcpp", (DL_FUNC) &_propr_ctzRcpp, 1}, - {"_propr_get_triangle", (DL_FUNC) &_propr_get_triangle, 1}, - {"_propr_get_triangle_from_index", (DL_FUNC) &_propr_get_triangle_from_index, 2}, {"_propr_getOR", (DL_FUNC) &_propr_getOR, 2}, + {"_propr_getORperm", (DL_FUNC) &_propr_getORperm, 3}, {"_propr_permuteOR", (DL_FUNC) &_propr_permuteOR, 3}, {"_propr_getFDR", (DL_FUNC) &_propr_getFDR, 2}, + {"_propr_getG", (DL_FUNC) &_propr_getG, 1}, {"_propr_graflex", (DL_FUNC) &_propr_graflex, 3}, {"_propr_lr2vlr", (DL_FUNC) &_propr_lr2vlr, 1}, {"_propr_lr2phi", (DL_FUNC) &_propr_lr2phi, 1}, diff --git a/src/graflex.cpp b/src/graflex.cpp index 04d0c28..19887ef 100644 --- a/src/graflex.cpp +++ b/src/graflex.cpp @@ -2,84 +2,70 @@ #include using namespace Rcpp; - -// Function to extract the triangle of a square and symmetric IntegerMatrix +// Optimized function to calculate the contingency table and the odds ratio // [[Rcpp::export]] -IntegerVector get_triangle(const IntegerMatrix& mat) { - int ncol = mat.ncol(); - int n = ncol * (ncol - 1) / 2; - - IntegerVector triangle(n); +NumericVector getOR(const IntegerMatrix& A, const IntegerMatrix& G) { + int ncol = A.ncol(); + int a = 0, b = 0, c = 0, d = 0; - int k = 0; - for (int j = 0; j < ncol; ++j) { - for (int i = j+1; i < ncol; ++i) { - triangle[k++] = mat(i, j); + for (int j = 0; j < ncol - 1; ++j) { + for (int i = j + 1; i < ncol; ++i) { + int a_val = A(i, j), g_val = G(i, j); + a += (1 - a_val) * (1 - g_val); + b += (1 - a_val) * g_val; + c += a_val * (1 - g_val); + d += a_val * g_val; } } - return triangle; -} - -// Function to get the triangle of a matrix based on the given indeces -// [[Rcpp::export]] -IntegerVector get_triangle_from_index(const IntegerMatrix& mat, const IntegerVector& index) { - int ncol = mat.ncol(); - int n = ncol * (ncol - 1) / 2; - - IntegerVector triangle(n); - - int k = 0; - for (int j = 0; j < ncol; ++j) { - for (int i = j+1; i < ncol; ++i) { - triangle[k++] = mat(index[i], index[j]); - } - } + double odds_ratio = static_cast(a * d) / (b * c); + double log_odds_ratio = std::log(odds_ratio); - return triangle; + return NumericVector::create(a, b, c, d, odds_ratio, log_odds_ratio, R_NaN, R_NaN); } -// Function to calculate the contingency table +// Optimized function to calculate the contingency table and the odds ratio, given a permuted index vector // [[Rcpp::export]] -NumericVector getOR(const IntegerVector& A, const IntegerVector& G) { - int n = A.size(); - - // calculate the contingency table +NumericVector getORperm(const IntegerMatrix& A, const IntegerMatrix& G, const IntegerVector& perm) { + int ncol = A.ncol(); int a = 0, b = 0, c = 0, d = 0; - for (int i = 0; i < n; ++i) { - if (A[i] == 0) { - if (G[i] == 0) ++a; // not in A and not in G - else ++b; // not in A but in G - } else { - if (G[i] == 0) ++c; // in A but not in G - else ++d; // in A and in G + + for (int j = 0; j < ncol - 1; ++j) { + int pj = perm[j]; + for (int i = j + 1; i < ncol; ++i) { + int a_val = A(perm[i], pj), g_val = G(i, j); + a += (1 - a_val) * (1 - g_val); + b += (1 - a_val) * g_val; + c += a_val * (1 - g_val); + d += a_val * g_val; } } - - // calculate the odds ratio + double odds_ratio = static_cast(a * d) / (b * c); + double log_odds_ratio = std::log(odds_ratio); - return NumericVector::create( - a, b, c, d, odds_ratio, std::log(odds_ratio), R_NaN, R_NaN - ); + return NumericVector::create(a, b, c, d, odds_ratio, log_odds_ratio, R_NaN, R_NaN); } // Function to calculate the odds ratio and other relevant info for each permutation // [[Rcpp::export]] -NumericMatrix permuteOR(const IntegerMatrix& A, const IntegerVector& Gstar, int p = 100) { - int n = A.ncol(); +NumericMatrix permuteOR(const IntegerMatrix& A, const IntegerMatrix& G, int p = 100) { + int ncol = A.ncol(); NumericMatrix or_table(p, 8); - // calculate odds ratio for each permutation + // calculate the odds ratio for each permutation for (int i = 0; i < p; ++i) { - IntegerVector idx = sample(n, n, false) - 1; - IntegerVector Astar = get_triangle_from_index(A, idx); - or_table(i, _) = getOR(Astar, Gstar); + IntegerVector perm = sample(ncol, ncol, false) - 1; + or_table(i, _) = getORperm(A, G, perm); + // TODO should I downsample the pairs (up to a maximum) to be checked? + // So in this case, we would check how likely we get by chance an OR from the downsampled + // permuted data that is higher/lower than the OR on the downsampled empirical data } return(or_table); } +// Function to calculate the FDR, given the actual odds ratio and the permuted odds ratios // [[Rcpp::export]] List getFDR(double actual, const NumericVector& permuted) { int n = permuted.size(); @@ -104,17 +90,37 @@ List getFDR(double actual, const NumericVector& permuted) { ); } +// Function to calculate the G matrix from the Gk vector +// [[Rcpp::export]] +IntegerMatrix getG(const IntegerVector& Gk) { + int n = Gk.size(); + IntegerMatrix G(n, n); + + for (int i = 0; i < n; ++i) { + int gi = Gk[i]; + G(i, i) = gi * gi; + for (int j = 0; j < i; ++j) { + int value = gi * Gk[j]; + G(i, j) = value; + G(j, i) = value; + } + } + + return G; +} + // Function to calculate the odds ratio and FDR, given the adjacency matrix A and the knowledge graph G // [[Rcpp::export]] -NumericVector graflex(const IntegerMatrix& A, const IntegerMatrix& G, int p = 100) { +NumericVector graflex(const IntegerMatrix& A, const IntegerVector& Gk, int p = 100) { + + // Calculate Gk + IntegerMatrix G = getG(Gk); // get the actual odds ratio - IntegerVector Astar = get_triangle(A); - IntegerVector Gstar = get_triangle(G); - NumericVector actual = getOR(Astar, Gstar); + NumericVector actual = getOR(A, G); // get distribution of odds ratios on permuted data - NumericMatrix permuted = permuteOR(A, Gstar, p); + NumericMatrix permuted = permuteOR(A, G, p); // calculate the FDR List fdr = getFDR(actual(4), permuted(_, 4)); diff --git a/tests/testthat/test-GRAFLEX.R b/tests/testthat/test-GRAFLEX.R index efb85c4..5d9042e 100644 --- a/tests/testthat/test-GRAFLEX.R +++ b/tests/testthat/test-GRAFLEX.R @@ -2,6 +2,21 @@ library(testthat) library(propr) library(Rcpp) +# Create matrices of 0 and 1 +A <- matrix(c(1, 1, 0, 1, 0, + 1, 1, 1, 0, 1, + 0, 1, 1, 0, 1, + 1, 0, 0, 1, 1, + 0, 1, 1, 1, 1), + nrow = 5, byrow = TRUE) +K <- matrix(c(1, 0, + 0, 1, + 1, 0, + 0, 1, + 1, 1), + nrow = 5, byrow = TRUE) +colnames(K) <- c("C1", "C2") + # ===================== # # old code for graflex # # ===================== # @@ -16,8 +31,7 @@ cppFunction(" permuteOR_old <- function(A, G, p = 500) { Gstar <- G[lower.tri(G)] res <- lapply(1:p, function(i) { - # Shuffle the adjacency matrix - # index <- sample(1:ncol(A)) + # index <- sample(ncol(A), ncol(A), replace = FALSE) index <- sample_idx(ncol(A))+1 # use the same random sampling generator A <- A[index, index] Astar <- A[lower.tri(A)] @@ -90,37 +104,28 @@ runGraflex_old <- function(A, K, p = 500) { # run tests # # ===================== # -test_that("check if get_triangle works correctly", { - - n <- 50 - # Create a square and symmetric matrix - mat <- matrix(sample(0:1, n*n, replace = TRUE), nrow = n, byrow = TRUE) - # Expected lower triangle - expected <- mat[lower.tri(mat)] - # Compute the triangle - result <- propr:::get_triangle(mat) - # Check if the result matches the expected lower triangle - expect_equal(result, expected) -}) - -test_that("check if get_triangle_from_index works correctly", { - n <- 50 - - # Create a square and symmetric matrix - mat <- matrix(sample(0:1, n*n, replace = TRUE), nrow = n, byrow = TRUE) +test_that("check if odds ratio is computed correctly", { - # Create a random permutation of the indices - index <- sample(1:n) + G <- K[, 1] %*% t(K[, 1]) + A1 <- A[lower.tri(A)] + G1 <- G[lower.tri(G)] - # expected lower triangle - expected <- mat[index, index] - expected <- expected[lower.tri(expected)] + # Expected values + a <- length(A1[A1 == 0 & G1 == 0]) # not in A not in G + b <- length(A1[A1 == 0 & G1 == 1]) # not in A but in G + c <- length(A1[A1 == 1 & G1 == 0]) # in A but not in G + d <- length(A1[A1 == 1 & G1 == 1]) # in A in G + expected_odds_ratio <- (a * d) / (b * c) - # Compute the lower triangle - result <- propr:::get_triangle_from_index(mat, index-1) + # compute odds ratio table + or_table <- propr:::getOR(A, G) - # Check if the result matches the expected lower triangle - expect_equal(result, expected) + # check + expect_equal(a, as.integer(or_table[1])) + expect_equal(b, as.integer(or_table[2])) + expect_equal(c, as.integer(or_table[3])) + expect_equal(d, as.integer(or_table[4])) + expect_equal(expected_odds_ratio, or_table[5]) }) test_that("check that getFDR works properly", { @@ -137,59 +142,26 @@ test_that("check that getFDR works properly", { expect_equal(res$over, sum(x >= actual) / length(x)) }) -test_that("check if odds ratio is computed correctly", { - - # Create two vectors of 1 and 0 - Astar <- c(1, 0, 1, 1, 0, 1, 0, 0, 1, 0) - Gstar <- c(1, 0, 0, 1, 1, 0, 0, 1, 0, 1) +test_that("check if runGraflex produce the expected contingency table and odds ratio values", { - # Expected values - a <- 2 # not in A not in G - b <- 3 # not in A but in G - c <- 3 # in A but not in G - d <- 2 # in A in G - expected_odds_ratio <- (a * d) / (b * c) - - # compute odds ratio table - or_table <- propr:::getOR(Astar, Gstar) - - # check - expect_equal(a, as.integer(or_table[1])) - expect_equal(b, as.integer(or_table[2])) - expect_equal(c, as.integer(or_table[3])) - expect_equal(d, as.integer(or_table[4])) - expect_equal(expected_odds_ratio, or_table[5]) -}) + Astar <- A[lower.tri(A)] -test_that("check if runGraflex produce the expected results", { - - # Create matrices of 0 and 1 - A <- matrix(c(1, 1, 0, 1, 0, - 1, 1, 1, 0, 1, - 0, 1, 1, 0, 1, - 1, 0, 0, 1, 1, - 0, 1, 1, 1, 1), - nrow = 5, byrow = TRUE) - K <- matrix(c(1, 0, - 0, 1, - 1, 0, - 0, 1, - 1, 1), - nrow = 5, byrow = TRUE) - colnames(K) <- c("C1", "C2") - # Expected values for C1 - a1 <- 2 # not in A not in G - b1 <- 2 # not in A but in G - c1 <- 5 # in A but not in G - d1 <- 1 # in A in G + G1 <- K[, 1] %*% t(K[, 1]) + G1 <- G1[lower.tri(G1)] + a1 <- length(which(Astar == 0 & G1 == 0)) # not in A not in G + b1 <- length(which(Astar == 0 & G1 == 1)) # not in A but in G + c1 <- length(which(Astar == 1 & G1 == 0)) # in A but not in G + d1 <- length(which(Astar == 1 & G1 == 1)) # in A in G expected_odds_ratio1 <- (a1 * d1) / (b1 * c1) # Expected values for C2 - a2 <- 3 # not in A not in G - b2 <- 1 # not in A but in G - c2 <- 4 # in A but not in G - d2 <- 2 # in A in G + G2 <- K[, 2] %*% t(K[, 2]) + G2 <- G2[lower.tri(G2)] + a2 <- length(which(Astar == 0 & G2 == 0)) # not in A not in G + b2 <- length(which(Astar == 0 & G2 == 1)) # not in A but in G + c2 <- length(which(Astar == 1 & G2 == 0)) # in A but not in G + d2 <- length(which(Astar == 1 & G2 == 1)) # in A in G expected_odds_ratio2 <- (a2 * d2) / (b2 * c2) # compute graflex @@ -209,23 +181,7 @@ test_that("check if runGraflex produce the expected results", { }) test_that("check reproducibility seed works", { - - # Create a matrix of 0 and 1 - A <- matrix(c(1, 1, 0, 1, 0, - 1, 1, 1, 0, 1, - 0, 1, 1, 0, 1, - 1, 0, 0, 1, 1, - 0, 1, 1, 1, 1), - nrow = 5, byrow = TRUE) - K <- matrix(c(1, 0, - 0, 1, - 1, 0, - 0, 1, - 1, 1), - nrow = 5, byrow = TRUE) - colnames(K) <- c("C1", "C2") - - # compute graflex + set.seed(0) res1 <- propr:::runGraflex(A, K, p=100) set.seed(0) @@ -233,38 +189,22 @@ test_that("check reproducibility seed works", { set.seed(123) res3 <- propr:::runGraflex(A, K, p=100) - # check expect_equal(res1, res2) expect_false(isTRUE(all.equal(res1, res3))) }) test_that("check that permuteOR works as the old code", { - # Create matrices of 0 and 1 - A <- matrix(c(1, 1, 0, 1, 0, - 1, 1, 1, 0, 1, - 0, 1, 1, 0, 1, - 1, 0, 0, 1, 1, - 0, 1, 1, 1, 1), - nrow = 5, byrow = TRUE) - K <- matrix(c(1, 0, - 0, 1, - 1, 0, - 0, 1, - 1, 1), - nrow = 5, byrow = TRUE) - colnames(K) <- c("C1", "C2") - Gk <- K[, 1] %*% t(K[, 1]) - Gstar <- Gk[lower.tri(Gk)] + G <- K[, 1] %*% t(K[, 1]) # compute permuted odds ratios set.seed(0) - res1 <- propr:::permuteOR(A, Gstar, 10) + res1 <- propr:::permuteOR(A, G, 10) set.seed(0) - res2 <- permuteOR_old(A, Gk, 10) + res2 <- permuteOR_old(A, G, 10) # check - expect_true(all(res1 == res2)) + expect_true(all(res1[,-c(7,8)] == res2)) }) test_that("check if same results are obtained compared to old code", {