-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #18 from bartongroup/development
v0.4.4
- Loading branch information
Showing
33 changed files
with
1,317 additions
and
907 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
Type: Package | ||
Package: rats | ||
Version: 0.4.2 | ||
Date: 2017-04-05 | ||
Version: 0.4.4 | ||
Date: 2017-04-19 | ||
Title: Relative Abundance of Transcripts | ||
Encoding: UTF-8 | ||
Authors: c(person("Kimon Froussios", role=c("aut"), email="[email protected]"), | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -166,12 +166,12 @@ parameters_are_good <- function(slo, annot, name_A, name_B, varname, COUNTS_COL, | |
if (minboots < 100) { | ||
warnmsg["toofewboots"] <- "Your quantifications have few bootstrap iterations, which reduces reproducibility of the calls." | ||
} | ||
if (qbootnum < 100) { | ||
if (qbootnum < 100 && qbootnum != 0) { | ||
warnmsg["toolowbootnum"] <- "The requested qbootnum is low, which reduces reproducibility of the calls." | ||
} | ||
bootcombos <- minboots^numsamples # Conservative estimate. | ||
if (qbootnum >= bootcombos/100) | ||
warnmsg["toomanyboots"] <- "The requested number of quantification bootstraps is very high, relative to the supplied data. Over 1% chance of duplicate iterations." | ||
warnmsg["toomanyboots"] <- "The requested number of quantification bootstraps is very high, relatively to the supplied data. Over 1% chance of duplicate iterations." | ||
if (qbootnum >= maxmatrix/dim(annot)[1]) | ||
return(list("error"=TRUE,"message"="The requested number of quantification bootstraps would exceed the maximum capacity of an R matrix.")) | ||
} # else it is probably count data and qboot will be auto-set to FALSE | ||
|
@@ -302,12 +302,10 @@ alloc_out <- function(annot, full){ | |
Genes <- data.table("parent_id"=as.vector(unique(annot$parent_id)), | ||
"elig"=NA, "sig"=NA, "elig_fx"=NA, "quant_reprod"=NA, "rep_reprod"=NA, "DTU"=NA, "transc_DTU"=NA, | ||
"known_transc"=NA_integer_, "detect_transc"=NA_integer_, "elig_transc"=NA_integer_, | ||
"pvalAB"=NA_real_, "pvalBA"=NA_real_, "pvalAB_corr"=NA_real_, "pvalBA_corr"=NA_real_, | ||
"quant_p_meanAB"=NA_real_, "quant_p_meanBA"=NA_real_, "quant_p_stdevAB"=NA_real_, "quant_p_stdevBA"=NA_real_, | ||
"quant_p_minAB"=NA_real_, "quant_p_minBA"=NA_real_, "quant_p_maxAB"=NA_real_, "quant_p_maxBA"=NA_real_, | ||
"pval"=NA_real_, "pval_corr"=NA_real_, | ||
"quant_p_mean"=NA_real_, "quant_p_stdev"=NA_real_, "quant_p_min"=NA_real_, "quant_p_max"=NA_real_, | ||
"quant_na_freq"=NA_real_, "quant_dtu_freq"=NA_real_, | ||
"rep_p_meanAB"=NA_real_, "rep_p_meanBA"=NA_real_, "rep_p_stdevAB"=NA_real_, "rep_p_stdevBA"=NA_real_, | ||
"rep_p_minAB"=NA_real_, "rep_p_minBA"=NA_real_, "rep_p_maxAB"=NA_real_, "rep_p_maxBA"=NA_real_, | ||
"rep_p_mean"=NA_real_, "rep_p_stdev"=NA_real_, "rep_p_min"=NA_real_, "rep_p_max"=NA_real_, | ||
"rep_na_freq"=NA_real_, "rep_dtu_freq"=NA_real_) | ||
Transcripts <- data.table("target_id"=annot$target_id, "parent_id"=annot$parent_id, | ||
"elig_xp"=NA, "elig"=NA, "sig"=NA, "elig_fx"=NA, "quant_reprod"=NA, "rep_reprod"=NA, "DTU"=NA, "gene_DTU"=NA, | ||
|
@@ -324,8 +322,7 @@ alloc_out <- function(annot, full){ | |
Parameters <- list("num_replic_A"=NA_integer_, "num_replic_B"=NA_integer_) | ||
Genes <- data.table("parent_id"=levels(as.factor(annot$parent_id)), "DTU"=NA, | ||
"elig_transc"=NA_integer_, "elig"=NA, "elig_fx"=NA, | ||
"pvalAB"=NA_real_, "pvalBA"=NA_real_, | ||
"pvalAB_corr"=NA_real_, "pvalBA_corr"=NA_real_, "sig"=NA) | ||
"pval"=NA_real_, "pval_corr"=NA_real_, "sig"=NA) | ||
Transcripts <- data.table("target_id"=annot$target_id, "parent_id"=annot$parent_id, "DTU"=NA, | ||
"sumA"=NA_real_, "sumB"=NA_real_, # sum across replicates of means across bootstraps | ||
"totalA"=NA_real_, "totalB"=NA_real_, # sum of all transcripts for that gene | ||
|
@@ -359,9 +356,9 @@ alloc_out <- function(annot, full){ | |
#' @param threads Number of threads (POSIX systems only). | ||
#' @return list | ||
#' | ||
#' @import utils | ||
#' @import parallel | ||
#' @import data.table | ||
#' @import stats | ||
#' | ||
calculate_DTU <- function(counts_A, counts_B, tx_filter, test_transc, test_genes, full, count_thresh, p_thresh, dprop_thresh, correction, threads= 1) { | ||
if (packageVersion("data.table") >= "1.9.8") # Ensure data.table complies. | ||
|
@@ -410,65 +407,94 @@ calculate_DTU <- function(counts_A, counts_B, tx_filter, test_transc, test_genes | |
|
||
#---------- TESTS | ||
|
||
# Proportion test. | ||
# Transcript-level test. | ||
if (test_transc) { | ||
Transcripts[(elig), pval := unlist( mclapply( as.data.frame(t(Transcripts[(elig), .(sumA, sumB, totalA, totalB)])), | ||
function(row) { prop.test(x = row[c(1, 2)], | ||
n = row[c(3, 4)], | ||
correct = TRUE)[["p.value"]] | ||
function(row) { | ||
return( g.test.2(obsx= c(row[1], row[3]-row[1]), obsy= c(row[2], row[4]-row[2])) ) | ||
}, mc.cores= threads, mc.allow.recursive= FALSE, mc.preschedule= TRUE) | ||
) ] | ||
Transcripts[(elig), pval_corr := p.adjust(pval, method=correction)] | ||
Transcripts[(elig), sig := pval_corr < p_thresh] | ||
Transcripts[(elig), DTU := sig & elig_fx] | ||
} | ||
|
||
# G test. | ||
# Gene-level test. | ||
if (test_genes) { | ||
Genes[(elig), c("pvalAB", "pvalBA") := t( as.data.frame( mclapply(Genes[(elig), parent_id], | ||
Genes[(elig), pval := t( as.data.frame( mclapply(Genes[(elig), parent_id], | ||
function(parent) { | ||
# Extract all relevant data to avoid repeated look ups in the large table. | ||
subdt <- Transcripts[parent, .(sumA, sumB, propA, propB)] # All isoforms, including not detected ones. | ||
pAB <- g.test(x = subdt[, sumA], p = subdt[, propB]) | ||
pBA <- g.test(x = subdt[, sumB], p = subdt[, propA]) | ||
return(c(pAB, pBA)) | ||
subdt <- Transcripts[parent, .(sumA, sumB)] # All isoforms, including not detected ones. | ||
return( g.test.2(obsx= subdt$sumA, obsy= subdt$sumB) ) | ||
}, mc.cores= threads, mc.preschedule= TRUE, mc.allow.recursive= FALSE) | ||
)) ] | ||
Genes[(elig), pvalAB_corr := p.adjust(pvalAB, method=correction)] | ||
Genes[(elig), pvalBA_corr := p.adjust(pvalBA, method=correction)] | ||
Genes[(elig), sig := pvalAB_corr < p_thresh & pvalBA_corr < p_thresh] | ||
Genes[(elig), pval_corr := p.adjust(pval, method=correction)] | ||
Genes[(elig), sig := pval_corr < p_thresh] | ||
Genes[(elig), DTU := sig & elig_fx] | ||
} | ||
}) | ||
return(resobj) | ||
} | ||
|
||
|
||
|
||
#================================================================================ | ||
#' Log-likelihood test of goodness of fit. | ||
#' | ||
#' @param x a numeric vector of positive numbers, with at least one non-zero value. | ||
#' @param p a vector of probabilities of the same length of x. | ||
#' | ||
#' Sourced and adapted from from: | ||
#' V3.3 Pete Hurd Sept 29 2001. [email protected] | ||
#' http://www.psych.ualberta.ca/~phurd/cruft/g.test.r | ||
#' @param obsx a numeric vector of finite positive counts, with at least one non-zero value. | ||
#' @param px a vector of probabilities of the same length as xobs. ( sum(px) <= 1 ) | ||
#' | ||
#' The order of values in the two vectors should be the same. | ||
#' If any value of xp is zero and the corresponding xobs is not, g.test.1 will always reject the hypothesis. | ||
#' No corrections are applied. | ||
#' No input checks are applied, as RATs needs to run this millions of times. | ||
#' | ||
#' @import stats | ||
#' @export | ||
#' | ||
g.test <- function(x, p = rep(1/length(x), length(x))) | ||
{ | ||
n <- sum(x) | ||
E <- n * p | ||
names(E) <- names(x) | ||
g <- 0 | ||
for (i in 1:length(x)){ | ||
if (x[i] != 0) g <- g + x[i] * log(x[i]/E[i]) | ||
} | ||
q <- 1 | ||
STATISTIC <- G <- 2*g/q | ||
PARAMETER <- length(x) - 1 | ||
PVAL <- pchisq(STATISTIC, PARAMETER, lower.tail = FALSE) | ||
# | ||
g.test.1 <- function(obsx, px) { | ||
n = length(obsx) | ||
sx <- sum(obsx) | ||
ex <- sx * px # expected values | ||
G <- 2 * sum( sapply (seq.int(1, n, 1), | ||
function (i) { if (obsx[i] != 0) { return(obsx[i] * log(obsx[i]/ex[i])) } else { return(0) } }) ) | ||
return( pchisq(G, df= n - 1, lower.tail= FALSE) ) | ||
} | ||
|
||
#================================================================================ | ||
#' Log-likelihood test of independence. | ||
#' | ||
#' For two sets of observations. | ||
#' | ||
#' @param obsx a numeric vector of positive counts, with at least one non-zero value. | ||
#' @param obsy a numeric vector of positive counts of same length as obsx, with at least one non-zero value. | ||
#' | ||
#' The order of values in the two vectors should be the same. | ||
#' No corrections are applied. | ||
#' No input checks are applied, as RATs needs to run this millions of times. | ||
#' | ||
#' @import stats | ||
#' @export | ||
# | ||
g.test.2 <- function(obsx, obsy) { | ||
n = length(obsx) | ||
# Row and column sums. | ||
sx <- sum(obsx) | ||
sy <- sum(obsy) | ||
sv <- obsx + obsy | ||
st <- sx + sy | ||
# Marginal probabilities. | ||
mpx <- sx / st | ||
mpy <- sy / st | ||
mpv <- sapply(sv, function(v) {v/st}) | ||
# Expected values. | ||
ex <- mpx * mpv * st | ||
ey <- mpy * mpv * st | ||
# Statistic. | ||
G <- 2 * sum( sum( sapply (seq.int(1, n, 1), | ||
function (i) { if (obsx[i] != 0) { return(obsx[i] * log(obsx[i]/ex[i])) } else { return(0) } }) ), | ||
sum( sapply (seq.int(1, n, 1), | ||
function (i) { if (obsy[i] != 0) { return(obsy[i] * log(obsy[i]/ey[i])) } else { return(0) } }) ) | ||
) | ||
return( pchisq(G, df= n - 1, lower.tail= FALSE) ) | ||
} |
Oops, something went wrong.