diff --git a/R/dryclean.R b/R/dryclean.R index f481288..24e1a72 100644 --- a/R/dryclean.R +++ b/R/dryclean.R @@ -51,7 +51,7 @@ dryclean <- R6::R6Class("dryclean", #' #' @param cov character path to the granges coverage file to be drycleaned #' - #' @param centered boolean (default == FALSE) whether a coverage has already been centered by by dividing each bin by global mean signal of the sample + #' @param center boolean (default == TRUE) whether to center the coverage before drycleaning #' #' @param cbs boolean (default == FALSE) whether to perform cbs on the drycleaned coverage #' @@ -73,7 +73,7 @@ dryclean <- R6::R6Class("dryclean", #' #' @return Drycleaned coverage in GRanges format - clean = function(cov, centered = FALSE, cbs = FALSE, cnsignif = 1e-5, mc.cores = 1, verbose = TRUE, use.blacklist = FALSE, blacklist_path = NA, germline.filter = FALSE, field = "reads.corrected", testing = FALSE){ + clean = function(cov, center = TRUE, cbs = FALSE, cnsignif = 1e-5, mc.cores = 1, verbose = TRUE, use.blacklist = FALSE, blacklist_path = NA, germline.filter = FALSE, field = "reads.corrected", testing = FALSE){ message("Loading coverage") private$history <- rbindlist(list(private$history, data.table(action = paste("Loaded coverage from", cov), date = as.character(Sys.time())))) @@ -145,18 +145,17 @@ X")){ local.all.chr = all.chr cov = cov %Q% (seqnames %in% local.all.chr) cov = cov[, field] %>% gr2dt() %>% setnames(., field, "signal") + cov = cov %>% dt2gr() - - if(centered == FALSE){ - message("Centering the sample") + if(center == TRUE){ + + message("Median-centering the sample") private$history <- rbindlist(list(private$history, data.table(action = paste("Median-normalization of coverage"), date = as.character(Sys.time())))) - cov = cov %>% dt2gr() mcols(cov)[which(is.na(mcols(cov)[, "signal"])), "signal"] = 0 mcols(cov)[which(is.infinite(mcols(cov)[, "signal"])), "signal"] = NA values(cov)[, "signal"] = values(cov)[, "signal"] / mean(values(cov)[, "signal"], na.rm = TRUE) - }else{ - cov = cov %>% dt2gr() - } + } + cov = sortSeqlevels(cov) cov = sort(cov) @@ -263,7 +262,7 @@ X")){ suppressWarnings({ cov <- gUtils::gr.val(query = cov_template, target = cov, - val = c("background.log", + val =c("background.log", "foreground.log", "input.read.counts", "median.chr", @@ -526,12 +525,14 @@ pon <- R6::R6Class("pon", samp.final <- samp.final %>% mutate(file.available = file.exists(normal_cov)) + message("Checking for existence of files") samp.final = samp.final[file.available == TRUE] message(paste0(nrow(samp.final), " files present")) + mat.n = pbmcapply::pbmclapply(samp.final[, sample], function(nm, all.chr){ this.cov = tryCatch(readRDS(samp.final[sample == nm, normal_cov]), error = function(e) NULL) if (!is.null(this.cov)){ @@ -575,8 +576,8 @@ pon <- R6::R6Class("pon", mat.bind.t = matrix(unlist(mat.n), ncol = length(mat.n)) - print(nrow(mat.bind.t)) - print(ncol(mat.bind.t)) + #print(nrow(mat.bind.t)) + #print(ncol(mat.bind.t)) rm(mat.n) gc() diff --git a/README.html b/README.html index 5f839a0..3adb404 100644 --- a/README.html +++ b/README.html @@ -347,7 +347,11 @@
To make our life easier, we have created a Docker
container with the latest stable release of Dryclean and its dependencies. This can be found here. The latest updated version is 0.0.2
, so make sure to select the correct tag
.
Install mskilab R dependencies (gUtils)
devtools::install_github('mskilab/gUtils')
Install dryclean
-devtools::install_github('mskilab/dryclean')
+devtools::install_github('mskilab-org/dryclean')
(after installing R package) Add dryclean directory to PATH and test the executable
$ export PATH=${PATH}:$(Rscript -e 'cat(paste0(installed.packages()["dryclean", "LibPath"], "/dryclean/extdata/"))')
$ drcln -h ## to see the help message
@@ -817,7 +821,7 @@ FALSE
as it has already been centered.
blacklist_path
For ‘dryclean’ to work correctly, the lengths of each sequence on each chromosome in the coverage and PON (Panel of Normal) data must match. If you attempt to normalize the coverage with PON data of different sequence lengths, you will encounter an error. In the event of such an error, you can utilize the get_mismatch()
method to obtain a data table of all chromosomes with mismatched lengths. Additionally, you can use the get_history()
method to review all actions performed on the object with timestamps.
get_mismatch()
method to obtain a data table of all chromosomes with mismatched lengths.
+center=TRUE
. WARNING: If you used Fragcounter to correct the coverage, it has already been centered, therefore set center=FALSE
+Additionally, you can use the get_history()
method to review all actions performed on the object with timestamps.
dryclean
on tumor sample from command linedryclean
on tumor sample from c
-i INPUT, --input=INPUT
path to the coverage file in GRanges format saved as .rds
- -t CENTERED, --centered=CENTERED
- are the samples centered
+ -t CENTER, --center=CENTER
+ whether to center the coverage before drycleaning
-s CBS, --cbs=CBS
whether to perform cbs on the drycleaned coverage
@@ -1093,21 +1073,15 @@ dryclean
on tumor sample from c
-c CORES, --cores=CORES
number of cores to use
- -w WHOLEGENOME, --wholeGenome=WHOLEGENOME
- whether whole genome is being used
-
-b BLACKLIST, --blacklist=BLACKLIST
whether there are blacklisted makers
-l BLACKLIST_PATH, --blacklist_path=BLACKLIST_PATH
- if --blacklist == TRUE, path to a GRanges object marking if each marker is set to be excluded or not
+ if --blacklist == TRUE, path to a GRanges object marking if each marker is set to be excluded or it willuse a default mask
-g GERMLINE.FILTER, --germline.filter=GERMLINE.FILTER
if PON based germline filter is to be used for removing some common germline events, if set to TRUE, give path to germline annotated file
- -f GERMLINE.FILE, --germline.file=GERMLINE.FILE
- path to file annotated with germline calls, if germline.filter == TRUE
-
-m HUMAN, --human=HUMAN
whther the samples under consideration are human
diff --git a/README.md b/README.md
index c449603..f62b12c 100644
--- a/README.md
+++ b/README.md
@@ -320,7 +320,7 @@ The output has following metadata fields:
FALSE
as it has already been centered.get_mismatch()
method to obtain a data table of all chromosomes with mismatched lengths. Additionally, you can use the get_history()
method to review all actions performed on the object with timestamps.
+Prerequisites for 'dryclean' to work correctly:
+get_mismatch()
method to obtain a data table of all chromosomes with mismatched lengths.center=TRUE
. WARNING: If you used Fragcounter to correct the coverage, it has already been centered, therefore set center=FALSE
get_history()
method to review all actions performed on the object with timestamps.
@@ -505,8 +511,8 @@ Options:
-i INPUT, --input=INPUT
path to the coverage file in GRanges format saved as .rds
- -t CENTERED, --centered=CENTERED
- are the samples centered
+ -t CENTER, --center=CENTER
+ whether to center the coverage before drycleaning
-s CBS, --cbs=CBS
whether to perform cbs on the drycleaned coverage
diff --git a/drcln b/drcln
index 3ee39b4..90513db 100755
--- a/drcln
+++ b/drcln
@@ -26,7 +26,7 @@ if (!exists('opt'))
make_option(c("--mode"), type = "character", default = "coverage", help = "Mode of operation: 'pon' or 'coverage'. Set to 'pon' for PON generation and 'coverage' for normalizing a sample using existing PON"),
make_option(c("-p", "--pon"), type = "character", default = NULL, help = "path to the existing Panel Of Normal (PON) saved as .rds"),
make_option(c("-i", "--input"), type = "character", help = "path to the coverage file in GRanges format saved as .rds"),
- make_option(c("-t", "--centered"), type = "logical", default = FALSE, help = "are the samples centered"),
+ make_option(c("-C", "--center"), type = "logical", default = TRUE, help = "whether to center the coverage before drycleaning"),
make_option(c("-s", "--cbs"), type = "logical", default = FALSE, help = "whether to perform cbs on the drycleaned coverage"),
make_option(c("-n", "--cnsignif"), type = "integer", default = 1e-5, help = "the significance levels for the test to accept change-points in cbs"),
make_option(c("-c", "--cores"), type = "integer", default = 10, help = "number of cores to use"),
@@ -104,7 +104,7 @@ suppressWarnings(suppressPackageStartupMessages(library(dryclean)))
a <- dryclean_object$clean(
cov = opt$input,
- centered = opt$centered,
+ center = opt$center,
cbs = opt$cbs,
cnsignif = opt$cnsignif,
mc.cores = opt$cores,
diff --git a/inst/extdata/detergent_test.rds b/inst/extdata/detergent_test.rds
index 68a871e..45b6ab7 100644
Binary files a/inst/extdata/detergent_test.rds and b/inst/extdata/detergent_test.rds differ
diff --git a/inst/extdata/drcln b/inst/extdata/drcln
index 3ee39b4..90513db 100755
--- a/inst/extdata/drcln
+++ b/inst/extdata/drcln
@@ -26,7 +26,7 @@ if (!exists('opt'))
make_option(c("--mode"), type = "character", default = "coverage", help = "Mode of operation: 'pon' or 'coverage'. Set to 'pon' for PON generation and 'coverage' for normalizing a sample using existing PON"),
make_option(c("-p", "--pon"), type = "character", default = NULL, help = "path to the existing Panel Of Normal (PON) saved as .rds"),
make_option(c("-i", "--input"), type = "character", help = "path to the coverage file in GRanges format saved as .rds"),
- make_option(c("-t", "--centered"), type = "logical", default = FALSE, help = "are the samples centered"),
+ make_option(c("-C", "--center"), type = "logical", default = TRUE, help = "whether to center the coverage before drycleaning"),
make_option(c("-s", "--cbs"), type = "logical", default = FALSE, help = "whether to perform cbs on the drycleaned coverage"),
make_option(c("-n", "--cnsignif"), type = "integer", default = 1e-5, help = "the significance levels for the test to accept change-points in cbs"),
make_option(c("-c", "--cores"), type = "integer", default = 10, help = "number of cores to use"),
@@ -104,7 +104,7 @@ suppressWarnings(suppressPackageStartupMessages(library(dryclean)))
a <- dryclean_object$clean(
cov = opt$input,
- centered = opt$centered,
+ center = opt$center,
cbs = opt$cbs,
cnsignif = opt$cnsignif,
mc.cores = opt$cores,
diff --git a/man/dryclean.Rd b/man/dryclean.Rd
index 110ada3..8a55355 100644
--- a/man/dryclean.Rd
+++ b/man/dryclean.Rd
@@ -47,7 +47,7 @@ Function begins the online rPCA process. Use this function if you performed batc
\subsection{Usage}{
\if{html}{\out{