Skip to content

Commit

Permalink
Merge pull request #29 from mskilab-org/sb_dev
Browse files Browse the repository at this point in the history
changed centered to center
  • Loading branch information
sebastian-brylka authored Jan 18, 2024
2 parents b399bf2 + ab436f5 commit 9c4eb5f
Show file tree
Hide file tree
Showing 9 changed files with 56 additions and 87 deletions.
25 changes: 13 additions & 12 deletions R/dryclean.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ dryclean <- R6::R6Class("dryclean",
#'
#' @param cov character path to the granges coverage file to be drycleaned
#'
#' @param centered boolean (default == FALSE) whether a coverage has already been centered by by dividing each bin by global mean signal of the sample
#' @param center boolean (default == TRUE) whether to center the coverage before drycleaning
#'
#' @param cbs boolean (default == FALSE) whether to perform cbs on the drycleaned coverage
#'
Expand All @@ -73,7 +73,7 @@ dryclean <- R6::R6Class("dryclean",
#'
#' @return Drycleaned coverage in GRanges format

clean = function(cov, centered = FALSE, cbs = FALSE, cnsignif = 1e-5, mc.cores = 1, verbose = TRUE, use.blacklist = FALSE, blacklist_path = NA, germline.filter = FALSE, field = "reads.corrected", testing = FALSE){
clean = function(cov, center = TRUE, cbs = FALSE, cnsignif = 1e-5, mc.cores = 1, verbose = TRUE, use.blacklist = FALSE, blacklist_path = NA, germline.filter = FALSE, field = "reads.corrected", testing = FALSE){

message("Loading coverage")
private$history <- rbindlist(list(private$history, data.table(action = paste("Loaded coverage from", cov), date = as.character(Sys.time()))))
Expand Down Expand Up @@ -145,18 +145,17 @@ X")){
local.all.chr = all.chr
cov = cov %Q% (seqnames %in% local.all.chr)
cov = cov[, field] %>% gr2dt() %>% setnames(., field, "signal")
cov = cov %>% dt2gr()


if(centered == FALSE){
message("Centering the sample")
if(center == TRUE){
message("Median-centering the sample")
private$history <- rbindlist(list(private$history, data.table(action = paste("Median-normalization of coverage"), date = as.character(Sys.time()))))
cov = cov %>% dt2gr()
mcols(cov)[which(is.na(mcols(cov)[, "signal"])), "signal"] = 0
mcols(cov)[which(is.infinite(mcols(cov)[, "signal"])), "signal"] = NA
values(cov)[, "signal"] = values(cov)[, "signal"] / mean(values(cov)[, "signal"], na.rm = TRUE)
}else{
cov = cov %>% dt2gr()
}
}


cov = sortSeqlevels(cov)
cov = sort(cov)
Expand Down Expand Up @@ -263,7 +262,7 @@ X")){
suppressWarnings({
cov <- gUtils::gr.val(query = cov_template,
target = cov,
val = c("background.log",
val =c("background.log",
"foreground.log",
"input.read.counts",
"median.chr",
Expand Down Expand Up @@ -526,12 +525,14 @@ pon <- R6::R6Class("pon",
samp.final <- samp.final %>%
mutate(file.available = file.exists(normal_cov))


message("Checking for existence of files")

samp.final = samp.final[file.available == TRUE]

message(paste0(nrow(samp.final), " files present"))


mat.n = pbmcapply::pbmclapply(samp.final[, sample], function(nm, all.chr){
this.cov = tryCatch(readRDS(samp.final[sample == nm, normal_cov]), error = function(e) NULL)
if (!is.null(this.cov)){
Expand Down Expand Up @@ -575,8 +576,8 @@ pon <- R6::R6Class("pon",


mat.bind.t = matrix(unlist(mat.n), ncol = length(mat.n))
print(nrow(mat.bind.t))
print(ncol(mat.bind.t))
#print(nrow(mat.bind.t))
#print(ncol(mat.bind.t))
rm(mat.n)
gc()

Expand Down
74 changes: 24 additions & 50 deletions README.html

Large diffs are not rendered by default.

20 changes: 13 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,7 @@ The output has following metadata fields:
<tr><td>4. median.chr: median chromosome signal</td></tr>
<tr><td>5. foreground: Foreground signal, that forms SCNAs (S vector) in read count/ratio space</td></tr>
<tr><td>6. background: This is the L low ranked vector after decomposition and represent the background noise separated by dryclean in read count/ratio space </td></tr>
<tr><td>7. log.reads: log of the mean-normalized count</td></tr>
<tr><td>7. log.reads: log of the median-normalized count</td></tr>
</tbody>
</table>

Expand All @@ -344,9 +344,9 @@ The parameters that can be used in clean() function:
<td style="border: 1px solid black; padding: 5px;">Field name in GRanges metadata of coverage to use for drycleaning</td>
</tr>
<tr>
<td style="border: 1px solid black; padding: 5px;">centered</td>
<td style="border: 1px solid black; padding: 5px;">FALSE</td>
<td style="border: 1px solid black; padding: 5px;">Whether a coverage has already been mean-normalized</td>
<td style="border: 1px solid black; padding: 5px;">center</td>
<td style="border: 1px solid black; padding: 5px;">TRUE</td>
<td style="border: 1px solid black; padding: 5px;">Whether to center a coverage. If you used the Fragcounter to correct the coverage, set to <code>FALSE</code> as it has already been centered.</td>
</tr>
<tr>
<td style="border: 1px solid black; padding: 5px;">cbs</td>
Expand Down Expand Up @@ -388,7 +388,13 @@ The parameters that can be used in clean() function:

<br>

For 'dryclean' to work correctly, the number of bins on each chromosome in the coverage and PON (Panel of Normal) data must match. If you attempt to normalize the coverage with PON data of different number of bins, you will encounter an error. In the event of such an error, you can utilize the <code>get_mismatch()</code> method to obtain a data table of all chromosomes with mismatched lengths. Additionally, you can use the <code>get_history()</code> method to review all actions performed on the object with timestamps.
Prerequisites for 'dryclean' to work correctly:
<ul>
<li>The number of bins on each chromosome in the coverage and PON (Panel of Normal) data must match. If you attempt to normalize the coverage with PON data of different number of bins, you will encounter an error. In the event of such an error, you can utilize the <code>get_mismatch()</code> method to obtain a data table of all chromosomes with mismatched lengths.</li>
<li>The coverage data has to be centered. If the coverage has not been centered, set <code>center=TRUE</code>. WARNING: If you used Fragcounter to correct the coverage, it has already been centered, therefore set <code>center=FALSE</code></li>
</ul>

Additionally, you can use the <code>get_history()</code> method to review all actions performed on the object with timestamps.



Expand Down Expand Up @@ -505,8 +511,8 @@ Options:
-i INPUT, --input=INPUT
path to the coverage file in GRanges format saved as .rds
-t CENTERED, --centered=CENTERED
are the samples centered
-t CENTER, --center=CENTER
whether to center the coverage before drycleaning
-s CBS, --cbs=CBS
whether to perform cbs on the drycleaned coverage
Expand Down
4 changes: 2 additions & 2 deletions drcln
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ if (!exists('opt'))
make_option(c("--mode"), type = "character", default = "coverage", help = "Mode of operation: 'pon' or 'coverage'. Set to 'pon' for PON generation and 'coverage' for normalizing a sample using existing PON"),
make_option(c("-p", "--pon"), type = "character", default = NULL, help = "path to the existing Panel Of Normal (PON) saved as .rds"),
make_option(c("-i", "--input"), type = "character", help = "path to the coverage file in GRanges format saved as .rds"),
make_option(c("-t", "--centered"), type = "logical", default = FALSE, help = "are the samples centered"),
make_option(c("-C", "--center"), type = "logical", default = TRUE, help = "whether to center the coverage before drycleaning"),
make_option(c("-s", "--cbs"), type = "logical", default = FALSE, help = "whether to perform cbs on the drycleaned coverage"),
make_option(c("-n", "--cnsignif"), type = "integer", default = 1e-5, help = "the significance levels for the test to accept change-points in cbs"),
make_option(c("-c", "--cores"), type = "integer", default = 10, help = "number of cores to use"),
Expand Down Expand Up @@ -104,7 +104,7 @@ suppressWarnings(suppressPackageStartupMessages(library(dryclean)))

a <- dryclean_object$clean(
cov = opt$input,
centered = opt$centered,
center = opt$center,
cbs = opt$cbs,
cnsignif = opt$cnsignif,
mc.cores = opt$cores,
Expand Down
Binary file modified inst/extdata/detergent_test.rds
Binary file not shown.
4 changes: 2 additions & 2 deletions inst/extdata/drcln
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ if (!exists('opt'))
make_option(c("--mode"), type = "character", default = "coverage", help = "Mode of operation: 'pon' or 'coverage'. Set to 'pon' for PON generation and 'coverage' for normalizing a sample using existing PON"),
make_option(c("-p", "--pon"), type = "character", default = NULL, help = "path to the existing Panel Of Normal (PON) saved as .rds"),
make_option(c("-i", "--input"), type = "character", help = "path to the coverage file in GRanges format saved as .rds"),
make_option(c("-t", "--centered"), type = "logical", default = FALSE, help = "are the samples centered"),
make_option(c("-C", "--center"), type = "logical", default = TRUE, help = "whether to center the coverage before drycleaning"),
make_option(c("-s", "--cbs"), type = "logical", default = FALSE, help = "whether to perform cbs on the drycleaned coverage"),
make_option(c("-n", "--cnsignif"), type = "integer", default = 1e-5, help = "the significance levels for the test to accept change-points in cbs"),
make_option(c("-c", "--cores"), type = "integer", default = 10, help = "number of cores to use"),
Expand Down Expand Up @@ -104,7 +104,7 @@ suppressWarnings(suppressPackageStartupMessages(library(dryclean)))

a <- dryclean_object$clean(
cov = opt$input,
centered = opt$centered,
center = opt$center,
cbs = opt$cbs,
cnsignif = opt$cnsignif,
mc.cores = opt$cores,
Expand Down
4 changes: 2 additions & 2 deletions man/dryclean.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Binary file added tests/testthat/cbs_output.rds
Binary file not shown.
12 changes: 0 additions & 12 deletions tests/testthat/test_dryclean.R
Original file line number Diff line number Diff line change
Expand Up @@ -170,18 +170,6 @@ test_that("cbs", {
})



test_that("centering", {
pon_object = pon$new(pon_path = detergent.path)

dryclean_object <- dryclean$new(pon = pon_object)

a <- dryclean_object$clean(cov = sample.1.path, testing = TRUE, centered = FALSE)

expect_equal(a$background.log[1], 0.0508, tolerance = 0.001)

})

test_that("clustering", {
expect_error(
pon_object = pon$new(
Expand Down

0 comments on commit 9c4eb5f

Please sign in to comment.