diff --git a/DESCRIPTION b/DESCRIPTION index 934cb62..f888213 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: easylift Title: An R package to perform genomic liftover -Version: 0.99.8 +Version: 0.99.9 Date: 2023-09-25 Authors@R: c( diff --git a/NEWS.md b/NEWS.md index bd04301..daf19ea 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# easylift 0.99.7 +# easylift 0.99.9 * Bug fixes and further improvements to the code. diff --git a/R/easylift.R b/R/easylift.R index dd86690..cde91a2 100644 --- a/R/easylift.R +++ b/R/easylift.R @@ -8,7 +8,7 @@ #' @param chain The path to the chain file containing the liftover mapping. #' Can be provided in gzipped or non-gzipped format. If omitted, the function #' will look in the default BiocFileCache for a properly named chain file. -#' @param bfc A BiocFileCache object, if not provided (most typically) +#' @param bfc A BiocFileCache object (optional), if not provided (most typically) #' the default location will be used. #' #' @return A GRanges object with lifted genomic coordinates. @@ -21,21 +21,31 @@ #' seqname = Rle(c("chr1", "chr2"), c(100000, 100000)), #' ranges = IRanges(start = 1, end = 200000) #' ) +#' # Here, "hg19" is the source genome #' genome(gr) <- "hg19" #' #' # Here, we use the `system.file()` function because the chain file is in the #' # package (however if you need to point to any other file on your machine, #' # just do 'chain <- "path/to/your/hg19ToHg38.over.chain.gz"'): #' chain <- system.file("extdata", "hg19ToHg38.over.chain.gz", package = "easylift") +#' +#' # Here, "hg38" is the target genome #' easylift(gr, "hg38", chain) #' #' \dontrun{ -#' # To use `BiocFileCache` for the chain file, you can add it to the cache as follows: +#' # To use `BiocFileCache` for the chain file, add it to the cache as follows: #' chain_file <- "/path/to/your/hg19ToHg38.over.chain.gz" #' bfc <- BiocFileCache() -#' bfcadd(bfc, chain_file) -#' # Then, you can use it in `easylift` like this: +#' +#' # Add chain file to cache if already not available +#' if (nrow(bfcquery(bfc, basename(chain_file))) == 0) +#' bfcadd(bfc, chain_file) +#' +#' # Then, use it in `easylift` like this: +#' #' easylift(gr, "hg38") +#' # or +#' gr |> easylift("hg38") #' } #' @import GenomicRanges #' @import GenomeInfoDb @@ -71,14 +81,7 @@ easylift <- function(x, to, chain, bfc) { if (missing(bfc)) { bfc <- BiocFileCache() } - capTo <- paste0(toupper(substr(to, 1, 1)), substr(to, 2, nchar(to))) - trychainfile <- paste0(unique_genomes, "To", capTo, ".over.chain") - q <- bfcquery(bfc, trychainfile) - if (nrow(q) >= 1) { - chain <- bfc[[q$rid[1]]] - } else { - stop(trychainfile, " file not found!") - } + chain <- .get_chain_from_BiocFileCache(unique_genomes, to, bfc) } # Check if the chain file is gzipped and unzip if needed @@ -113,3 +116,19 @@ easylift <- function(x, to, chain, bfc) { return(cur) } + +### 'from' and 'to' are single strings containing UCSC genome names, and 'bfc' is a BiocFileCache object +.get_chain_from_BiocFileCache <- function(from, to, bfc) { + capTo <- paste0(toupper(substr(to, 1, 1)), substr(to, 2, nchar(to))) + trychainfile <- paste0(from, "To", capTo, ".over.chain") + q <- bfcquery(bfc, trychainfile) + if (nrow(q) == 0) { + stop( + "Chain file not specified and filename with ", + trychainfile, + " pattern not found in BiocFileCache default location." + ) + } + chain <- bfc[[q$rid[1]]] + return(chain) +} diff --git a/README.Rmd b/README.Rmd index 85aecb3..34e7679 100644 --- a/README.Rmd +++ b/README.Rmd @@ -47,23 +47,31 @@ gr <- GRanges( end = 200000 ) ) +# Here, "hg19" is the source genome genome(gr) <- "hg19" -to <- "hg38" chain <- "hg19ToHg38.over.chain.gz" -easylift(gr, to, chain) +# Here, "hg38" is the target genome +easylift(gr, "hg38", chain) ``` -To use `BiocFileCache` for the chain file, you can add it to the cache as follows: +### BiocFileCache + +To use `BiocFileCache` for the chain file, add it to the cache as follows: ```{r example2, eval = FALSE} chain_file <- "/path/to/your/hg19ToHg38.over.chain.gz" bfc <- BiocFileCache() -bfcadd(bfc, chain_file) + +# Add chain file to cache if already not available +if (nrow(bfcquery(bfc, basename(chain_file))) == 0) + bfcadd(bfc, chain_file) ``` -Then, you can use it in `easylift` like this: +Then, use it in `easylift` like this: ```{r example3, eval = FALSE} -easylift(gr, "hg38") +easylift(gr, "hg38") +# or +gr |> easylift("hg38") ``` ## Citation @@ -71,7 +79,7 @@ easylift(gr, "hg38") To cite package `easylift` in publications use: Al Nahid A, Love M (2023). _easylift: An R package to perform - genomic liftover_. R package version 0.99.7, + genomic liftover_. R package version 0.99.9, . @@ -82,7 +90,7 @@ A BibTeX entry for LaTeX users is title = {easylift: An R package to perform genomic liftover}, author = {Abdullah Al Nahid and Michael Love}, year = {2023}, - note = {R package version 0.99.7}, + note = {R package version 0.99.9}, url = {https://github.com/nahid18/easylift}, } ``` diff --git a/README.md b/README.md index 6eec2aa..cfb8733 100644 --- a/README.md +++ b/README.md @@ -42,26 +42,34 @@ gr <- GRanges( end = 200000 ) ) +# Here, "hg19" is the source genome genome(gr) <- "hg19" -to <- "hg38" chain <- "hg19ToHg38.over.chain.gz" -easylift(gr, to, chain) +# Here, "hg38" is the target genome +easylift(gr, "hg38", chain) ``` -To use `BiocFileCache` for the chain file, you can add it to the cache -as follows: +### BiocFileCache + +To use `BiocFileCache` for the chain file, add it to the cache as +follows: ``` r chain_file <- "/path/to/your/hg19ToHg38.over.chain.gz" bfc <- BiocFileCache() -bfcadd(bfc, chain_file) + +# Add chain file to cache if already not available +if (nrow(bfcquery(bfc, basename(chain_file))) == 0) + bfcadd(bfc, chain_file) ``` -Then, you can use it in `easylift` like this: +Then, use it in `easylift` like this: ``` r -easylift(gr, "hg38") +easylift(gr, "hg38") +# or +gr |> easylift("hg38") ``` ## Citation @@ -69,7 +77,7 @@ easylift(gr, "hg38") To cite package `easylift` in publications use: Al Nahid A, Love M (2023). *easylift: An R package to perform genomic -liftover*. R package version 0.99.7, +liftover*. R package version 0.99.9, . A BibTeX entry for LaTeX users is @@ -78,7 +86,7 @@ A BibTeX entry for LaTeX users is title = {easylift: An R package to perform genomic liftover}, author = {Abdullah Al Nahid and Michael Love}, year = {2023}, - note = {R package version 0.99.7}, + note = {R package version 0.99.9}, url = {https://github.com/nahid18/easylift}, } diff --git a/man/easylift.Rd b/man/easylift.Rd index c4a3e88..fb7343a 100644 --- a/man/easylift.Rd +++ b/man/easylift.Rd @@ -15,7 +15,7 @@ easylift(x, to, chain, bfc) Can be provided in gzipped or non-gzipped format. If omitted, the function will look in the default BiocFileCache for a properly named chain file.} -\item{bfc}{A BiocFileCache object, if not provided (most typically) +\item{bfc}{A BiocFileCache object (optional), if not provided (most typically) the default location will be used.} } \value{ @@ -33,21 +33,31 @@ gr <- GRanges( seqname = Rle(c("chr1", "chr2"), c(100000, 100000)), ranges = IRanges(start = 1, end = 200000) ) +# Here, "hg19" is the source genome genome(gr) <- "hg19" # Here, we use the `system.file()` function because the chain file is in the # package (however if you need to point to any other file on your machine, # just do 'chain <- "path/to/your/hg19ToHg38.over.chain.gz"'): chain <- system.file("extdata", "hg19ToHg38.over.chain.gz", package = "easylift") + +# Here, "hg38" is the target genome easylift(gr, "hg38", chain) \dontrun{ -# To use `BiocFileCache` for the chain file, you can add it to the cache as follows: +# To use `BiocFileCache` for the chain file, add it to the cache as follows: chain_file <- "/path/to/your/hg19ToHg38.over.chain.gz" bfc <- BiocFileCache() -bfcadd(bfc, chain_file) -# Then, you can use it in `easylift` like this: + +# Add chain file to cache if already not available +if (nrow(bfcquery(bfc, basename(chain_file))) == 0) + bfcadd(bfc, chain_file) + +# Then, use it in `easylift` like this: + easylift(gr, "hg38") +# or +gr |> easylift("hg38") } } \seealso{ diff --git a/tests/testthat/test-easylift.R b/tests/testthat/test-easylift.R index f5d0dea..52c2164 100644 --- a/tests/testthat/test-easylift.R +++ b/tests/testthat/test-easylift.R @@ -2,35 +2,35 @@ library(GenomicRanges) library(IRanges) test_that("easylift function tests with valid chain files", { - # Test 1: Test with a valid chain gzipped file - chain_path_gz <- system.file("extdata", "hg19ToHg38.over.chain.gz", package = "easylift") + chain_path_gz <- + system.file("extdata", "hg19ToHg38.over.chain.gz", package = "easylift") gr <- GenomicRanges::GRanges( seqnames = "chr1", ranges = IRanges::IRanges(start = 100, end = 200), strand = "+" ) genome(gr) <- "hg19" - expect_type(easylift(gr, to = "hg38", chain = chain_path_gz), "S4") - expect_no_error(easylift(gr, to = "hg38", chain = chain_path_gz)) + expect_type(easylift(x = gr, to = "hg38", chain = chain_path_gz), "S4") + expect_no_error(easylift(x = gr, to = "hg38", chain = chain_path_gz)) # Test 2 and 3: Test with a valid chain file - chain_path <- system.file("extdata", "hg19ToHg38.over.chain", package = "easylift") + chain_path <- + system.file("extdata", "hg19ToHg38.over.chain", package = "easylift") gr2 <- GenomicRanges::GRanges( seqnames = "chr2", ranges = IRanges::IRanges(start = 200, end = 300), strand = "+" ) genome(gr2) <- "hg19" - expect_type(easylift(gr2, to = "hg38", chain = chain_path), "S4") - expect_no_error(easylift(gr2, to = "hg38", chain = chain_path)) + expect_type(easylift(x = gr2, to = "hg38", chain = chain_path), "S4") + expect_no_error(easylift(x = gr2, to = "hg38", chain = chain_path)) }) test_that("easylift function tests with error cases", { - # Test 4: Test with an empty GRanges object gr3 <- GenomicRanges::GRanges() - expect_error(easylift(gr3, to = "hg38", chain = chain_path)) + expect_error(easylift(x = gr3, to = "hg38", chain = chain_path)) # Test 5: Test with missing genome information gr4 <- GenomicRanges::GRanges( @@ -38,7 +38,7 @@ test_that("easylift function tests with error cases", { ranges = IRanges::IRanges(start = 400, end = 500), strand = "+" ) - expect_error(easylift(gr4, to = "hg38", chain = chain_path)) + expect_error(easylift(x = gr4, to = "hg38", chain = chain_path)) }) test_that("easylift succeeds with BiocFileCache", { @@ -46,7 +46,8 @@ test_that("easylift succeeds with BiocFileCache", { library("easylift") # Create a test chain file in the temporary directory - chain_file <- system.file("extdata", "hg19ToHg38.over.chain.gz", package = "easylift") + chain_file <- + system.file("extdata", "hg19ToHg38.over.chain.gz", package = "easylift") # Test 6: Check if the chain file exists expect_true(file.exists(chain_file), "Chain file should exist.") @@ -65,21 +66,20 @@ test_that("easylift succeeds with BiocFileCache", { expect_true(nrow(q) > 0, "Chain file should exist in cache.") # Create a test GRanges object - gr <- GenomicRanges::GRanges( - seqname = Rle(c("chr1", "chr2"), c(100000, 100000)), - ranges = IRanges::IRanges(start = 1, end = 200000) - ) + gr <- GenomicRanges::GRanges(seqname = Rle(c("chr1", "chr2"), c(100000, 100000)), + ranges = IRanges::IRanges(start = 1, end = 200000)) genome(gr) <- "hg19" - # Perform easylift with the target assembly + # Perform liftover with the target genome tryCatch({ - result <- easylift(x=gr, to="hg38", bfc=bfc) + result <- easylift(x = gr, to = "hg38", bfc = bfc) }, error = function(e) { cat("Error message:", conditionMessage(e), "\n") stop("easylift encountered an error.") }) # Test 8: Check if easylift succeeded without error - expect_true(!is(result, "try-error"), "easylift should succeed without error.") + expect_true(!is(result, "try-error"), + "easylift should succeed without error.") }) diff --git a/vignettes/easylift.Rmd b/vignettes/easylift.Rmd index 15f4ab6..453ba81 100644 --- a/vignettes/easylift.Rmd +++ b/vignettes/easylift.Rmd @@ -56,8 +56,9 @@ gr <- GRanges( start = 1, end = 200000 ) ) +# Here, "hg19" is the source genome genome(gr) <- "hg19" -to <- "hg38" + # Here, we use the `system.file()` function because the chain file is in the # package (however if you need to point to any other file on your machine, # just do 'chain <- "path/to/your/hg19ToHg38.over.chain.gz"'): @@ -68,24 +69,31 @@ gr ## Run +Provide the `GenomicRanges` object, target genome (e.g. `hg38`) and the chain file + ```{r run} -easylift(gr, to, chain) +# Here, "hg38" is the target genome +easylift(gr, "hg38", chain) ``` -## BiocFileCache +## Run with BiocFileCache -To use `BiocFileCache` for the chain file, you can add it to the cache -as follows: +To use `BiocFileCache` for the chain file, add it to the cache as follows: ```{r bioCache, eval=FALSE} chain_file <- "/path/to/your/hg19ToHg38.over.chain.gz" bfc <- BiocFileCache() -bfcadd(bfc, chain_file) + +# Add chain file to cache if already not available +if (nrow(bfcquery(bfc, basename(chain_file))) == 0) + bfcadd(bfc, chain_file) ``` -Then, you can use it in `easylift` like this: +Then, use it in `easylift` like this: ```{r bioCache2, eval=FALSE} +easylift(gr, "hg38") +# or gr |> easylift("hg38") ```