diff --git a/docs/devel/Orchestrating-Hi-C-analysis-with-Bioconductor.pdf b/docs/devel/Orchestrating-Hi-C-analysis-with-Bioconductor.pdf index 78e0b04..0ad864e 100644 Binary files a/docs/devel/Orchestrating-Hi-C-analysis-with-Bioconductor.pdf and b/docs/devel/Orchestrating-Hi-C-analysis-with-Bioconductor.pdf differ diff --git a/docs/devel/index.html b/docs/devel/index.html index 8f40d3e..46044aa 100644 --- a/docs/devel/index.html +++ b/docs/devel/index.html @@ -301,7 +301,7 @@

Orchestrating Hi-C analysis with Bioconductor

-

Package: OHCA
Authors: Jacques Serizay [aut, cre]
Compiled: 2023-11-07
Package version: 0.98.1
R version: R Under development (unstable) (2023-11-02 r85465)
BioC version: 3.19
License: MIT + file LICENSE

+

Package: OHCA
Authors: Jacques Serizay [aut, cre]
Compiled: 2023-11-07
Package version: 0.99.0
R version: R Under development (unstable) (2023-11-02 r85465)
BioC version: 3.19
License: MIT + file LICENSE

Welcome

This is the landing page of the “Orchestrating Hi-C analysis with Bioconductor” book. The primary aim of this book is to introduce the R user to Hi-C analysis. This book starts with key concepts important for the analysis of chromatin conformation capture and then presents Bioconductor tools that can be leveraged to process, analyze, explore and visualize Hi-C data.

Table of contents

@@ -431,6 +431,7 @@

Orchestrating Hi-C analysis with Bioconductor

Building book

The OHCA book has been rendered in R thanks to a number of packages, including but not only:

    +
  • BiocBook
  • devtools
  • quarto
  • rebook
  • @@ -618,7 +619,6 @@

    Orchestrating Hi-C analysis with Bioconductor

    ## HiContacts 1.5.0 2023-10-24 [2] Bioconductor ## HiContactsData 1.5.0 2023-10-31 [2] Bioconductor ## HiCool 1.3.0 2023-10-24 [2] Bioconductor -## hicrep 1.12.2 2023-11-07 [2] Github (TaoYang-dev/hicrep@e485dfa) ## highr 0.10 2022-12-22 [2] CRAN (R 4.4.0) ## Hmisc 5.1-1 2023-09-12 [2] CRAN (R 4.4.0) ## hms 1.1.3 2023-03-21 [2] CRAN (R 4.4.0) @@ -668,7 +668,7 @@

    Orchestrating Hi-C analysis with Bioconductor

    ## munsell 0.5.0 2018-06-12 [2] CRAN (R 4.4.0) ## nlme 3.1-163 2023-08-09 [3] CRAN (R 4.4.0) ## nnet 7.3-19 2023-05-03 [3] CRAN (R 4.4.0) -## OHCA 0.98.1 2023-11-07 [1] Bioconductor +## OHCA 0.99.0 2023-11-07 [1] local ## openssl 2.1.1 2023-09-25 [2] CRAN (R 4.4.0) ## OrganismDbi 1.45.0 2023-10-25 [2] Bioconductor ## packrat 0.9.2 2023-09-05 [2] CRAN (R 4.4.0) @@ -798,7 +798,7 @@

    Orchestrating Hi-C analysis with Bioconductor

    ## zip 2.3.0 2023-04-17 [2] CRAN (R 4.4.0) ## zlibbioc 1.49.0 2023-10-24 [2] Bioconductor ## -## [1] /tmp/RtmpixmP1F/Rinst529f91dd +## [1] /tmp/RtmpEEUcH8/Rinst555f2f89bc ## [2] /usr/local/lib/R/site-library ## [3] /usr/local/lib/R/library ## diff --git a/docs/devel/pages/data-representation.html b/docs/devel/pages/data-representation.html index 58e5bea..0b46e4d 100644 --- a/docs/devel/pages/data-representation.html +++ b/docs/devel/pages/data-representation.html @@ -1571,8 +1571,8 @@

    This fetches files from the cloud, download them locally and returns the path of the local file.

    coolf
    -##                                          EH7702 
    -##  "/root/.cache/R/ExperimentHub/1701a09a86_7752"
    +## EH7702 +## "/root/.cache/R/ExperimentHub/174688ce76a_7752"

    Similarly, example files are available for other file formats:

    @@ -1642,7 +1642,7 @@

    # ----- This creates a connection to a `.(m)cool` file (path stored in `coolf`) CoolFile(coolf) ## CoolFile object -## .mcool file: /root/.cache/R/ExperimentHub/1701a09a86_7752 +## .mcool file: /root/.cache/R/ExperimentHub/174688ce76a_7752 ## resolution: 1000 ## pairs file: ## metadata(0): @@ -1650,7 +1650,7 @@

    # ----- This creates a connection to a `.hic` file (path stored in `hicf`) HicFile(hicf) ## HicFile object -## .hic file: /root/.cache/R/ExperimentHub/1702b65c0b7_7836 +## .hic file: /root/.cache/R/ExperimentHub/17460f12195_7836 ## resolution: 1000 ## pairs file: ## metadata(0): @@ -1659,8 +1659,8 @@

    HicproFile(hicpromatrixf, hicproregionsf) ## HicproFile object ## HiC-Pro files: -## $ matrix: /root/.cache/R/ExperimentHub/1707968a6f0_7837 -## $ regions: /root/.cache/R/ExperimentHub/17073053091_7838 +## $ matrix: /root/.cache/R/ExperimentHub/1745c1383e1_7837 +## $ regions: /root/.cache/R/ExperimentHub/17439b9b892_7838 ## resolution: 1000 ## pairs file: ## metadata(0): @@ -1668,7 +1668,7 @@

    # ----- This creates a connection to a pairs file PairsFile(pairsf) ## PairsFile object -## resource: /root/.cache/R/ExperimentHub/1702dcdfa3b_7753

    +## resource: /root/.cache/R/ExperimentHub/174ff8a7b2_7753

2.3.3 ContactFile slots

@@ -1684,7 +1684,7 @@

cf <- CoolFile(coolf)
 cf
 ##  CoolFile object
-##  .mcool file: /root/.cache/R/ExperimentHub/1701a09a86_7752 
+##  .mcool file: /root/.cache/R/ExperimentHub/174688ce76a_7752 
 ##  resolution: 1000 
 ##  pairs file: 
 ##  metadata(0):
@@ -1782,7 +1782,7 @@ 

hic ## `HiCExperiment` object with 8,757,906 contacts over 12,079 regions ## ------- -## fileName: "/root/.cache/R/ExperimentHub/1701a09a86_7752" +## fileName: "/root/.cache/R/ExperimentHub/174688ce76a_7752" ## focus: "whole genome" ## resolutions(5): 1000 2000 4000 8000 16000 ## active resolution: 1000 @@ -1814,7 +1814,7 @@

These pieces of information are called slots. They can be directly accessed using getter functions, bearing the same name than the slot.

fileName(hic)
-##  [1] "/root/.cache/R/ExperimentHub/1701a09a86_7752"
+##  [1] "/root/.cache/R/ExperimentHub/174688ce76a_7752"
 
 focus(hic)
 ##  NULL
@@ -1881,7 +1881,7 @@ 

hic ## `HiCExperiment` object with 13,681,280 contacts over 12,165 regions ## ------- -## fileName: "/root/.cache/R/ExperimentHub/1702b65c0b7_7836" +## fileName: "/root/.cache/R/ExperimentHub/17460f12195_7836" ## focus: "whole genome" ## resolutions(5): 1000 2000 4000 8000 16000 ## active resolution: 1000 @@ -2321,14 +2321,14 @@

yeast_hic
 ##  `HiCExperiment` object with 8,757,906 contacts over 763 regions 
 ##  -------
-##  fileName: "/root/.cache/R/ExperimentHub/1701a09a86_7752" 
+##  fileName: "/root/.cache/R/ExperimentHub/174688ce76a_7752" 
 ##  focus: "whole genome" 
 ##  resolutions(5): 1000 2000 4000 8000 16000
 ##  active resolution: 16000 
 ##  interactions: 267709 
 ##  scores(2): count balanced 
 ##  topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0) centromeres(16) 
-##  pairsFile: /root/.cache/R/ExperimentHub/1702dcdfa3b_7753 
+##  pairsFile: /root/.cache/R/ExperimentHub/174ff8a7b2_7753 
 ##  metadata(3): ID org date

@@ -2620,8 +2620,8 @@

pairsFile(yeast_hic) <- pairsf
 
 pairsFile(yeast_hic)
-##                                           EH7703 
-##  "/root/.cache/R/ExperimentHub/1702dcdfa3b_7753"
+##                                          EH7703 
+##  "/root/.cache/R/ExperimentHub/174ff8a7b2_7753"
 
 readLines(pairsFile(yeast_hic), 25)
 ##   [1] "## pairs format v1.0"                                                             
diff --git a/docs/devel/pages/interactions-centric.html b/docs/devel/pages/interactions-centric.html
index c2fec98..8baf145 100644
--- a/docs/devel/pages/interactions-centric.html
+++ b/docs/devel/pages/interactions-centric.html
@@ -386,7 +386,7 @@ 

hic
 ##  `HiCExperiment` object with 471,364 contacts over 407 regions 
 ##  -------
-##  fileName: "/root/.cache/R/ExperimentHub/1701a09a86_7752" 
+##  fileName: "/root/.cache/R/ExperimentHub/174688ce76a_7752" 
 ##  focus: "II" 
 ##  resolutions(5): 1000 2000 4000 8000 16000
 ##  active resolution: 2000 
@@ -414,7 +414,7 @@ 

pf
 ##  PairsFile object
-##  resource: /root/.cache/R/ExperimentHub/1702dcdfa3b_7753
+## resource: /root/.cache/R/ExperimentHub/174ff8a7b2_7753

If needed, PairsFile connections can be imported directly into a GInteractions object with import().

@@ -454,7 +454,7 @@

library(HiContacts)
 ps <- distanceLaw(pf, by_chr = TRUE) 
-##  Importing pairs file /root/.cache/R/ExperimentHub/1702dcdfa3b_7753 in memory. This may take a while...
+##  Importing pairs file /root/.cache/R/ExperimentHub/174ff8a7b2_7753 in memory. This may take a while...
 ps
 ##  # A tibble: 115 × 6
 ##    chr   binned_distance          p     norm_p norm_p_unity slope
@@ -496,7 +496,7 @@ 

eco1_ps <- distanceLaw(eco1_pf, by_chr = TRUE) 
-##  Importing pairs file /root/.cache/R/ExperimentHub/f9346a45c35_7755 in memory. This may take a while...
+##  Importing pairs file /root/.cache/R/ExperimentHub/fae489c8d53_7755 in memory. This may take a while...
 eco1_ps
 ##  # A tibble: 115 × 6
 ##    chr   binned_distance          p     norm_p norm_p_unity slope
@@ -657,7 +657,7 @@ 

pairsFile(hic) <- pairsf
 scalo <- scalogram(hic) 
-##  Importing pairs file /root/.cache/R/ExperimentHub/1702dcdfa3b_7753 in memory. This may take a while...
+##  Importing pairs file /root/.cache/R/ExperimentHub/174ff8a7b2_7753 in memory. This may take a while...
 plotScalogram(scalo |> filter(chr == 'II'), ylim = c(1e3, 1e5))
@@ -681,7 +681,7 @@

## loading from cache pairsFile(eco1_hic) <- eco1_pairsf eco1_scalo <- scalogram(eco1_hic) -## Importing pairs file /root/.cache/R/ExperimentHub/f9346a45c35_7755 in memory. This may take a while... +## Importing pairs file /root/.cache/R/ExperimentHub/fae489c8d53_7755 in memory. This may take a while... merged_scalo <- rbind( scalo |> mutate(sample = 'WT'), eco1_scalo |> mutate(sample = 'eco1') diff --git a/docs/devel/pages/interoperability.html b/docs/devel/pages/interoperability.html index 71c467b..cb248af 100644 --- a/docs/devel/pages/interoperability.html +++ b/docs/devel/pages/interoperability.html @@ -283,10 +283,9 @@

@@ -319,8 +318,9 @@

This notebook illustrates how to use a range of popular Hi-C—related R packages with HiCExperiment objects. Conversion to the data structures supported by the following packages is illustrated here:

    -
  • diffHic
  • -
  • hicrep
  • +
  • +diffHic +
  • multiHiCcompare
  • TopDom
  • @@ -461,104 +461,12 @@

-

-9.2 HiCrep

-

hicrep is a popular package to compute stratum-adjusted correlations between Hi-C datasets (Yang et al. (2017)). “Stratum” refers to the distance from the main diagonal: with increase distance from the main diagonal, interactions of the DNA polymer are bound to decrease. hicrep computes a “per-stratum” correlation score and computes a weighted average correlation for entire chromosomes.

-
-
-
- -
-
-Installing hicrep -
-
-
-

hicrep package has been available from Bioconductor for many years but has been withdrawn from their repositories at some point. You can always install hicrep directly from its GitHub repository as follows:

-
-
remotes::install_github('TaoYang-dev/hicrep')
-
-
-
-

In order to use hicrep, we first need to create two HiCExperiment objects.

-
-
# ---- This downloads example `.mcool` files and caches them locally 
-coolf_eco1 <- HiContactsData('yeast_eco1', 'mcool')
-
-
-
hic_wt <- import(coolf_wt, format = 'cool')
-hic_eco1 <- import(coolf_eco1, format = 'cool')
-
-

We can now run the main get.scc function from hicrep. The documentation for this function is available from the console by typing ?hicrep::get.scc. More information is also available from the GitHub page. It informs the end user that the input for this function should be two intra-chromosomal Hi-C raw count matrices in square (optionally sparse) format.

-
-
hic_wt
-##  `HiCExperiment` object with 8,757,906 contacts over 12,079 regions 
-##  -------
-##  fileName: "/root/.cache/R/ExperimentHub/1701a09a86_7752" 
-##  focus: "whole genome" 
-##  resolutions(5): 1000 2000 4000 8000 16000
-##  active resolution: 1000 
-##  interactions: 2945692 
-##  scores(2): count balanced 
-##  topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0) 
-##  pairsFile: N/A 
-##  metadata(0):
-
-as.matrix(hic_wt["IV"], use.scores = 'count')[1:10, 1:10]
-##  10 x 10 sparse Matrix of class "dgTMatrix"
-##                           
-##   [1,] . 1 . . 1 . . . . .
-##   [2,] 1 . . . . . . . . .
-##   [3,] . . . . . . . . . .
-##   [4,] . . . . . . . . . .
-##   [5,] 1 . . . . . . . 1 .
-##   [6,] . . . . . . . . . .
-##   [7,] . . . . . . . . . .
-##   [8,] . . . . . . . . 1 .
-##   [9,] . . . . 1 . . 1 . .
-##  [10,] . . . . . . . . . .
-
-library(hicrep)
-scc <- get.scc(
-    as.matrix(hic_wt["IV"], use.scores = 'count'), 
-    as.matrix(hic_eco1["IV"], use.scores = 'count'), 
-    resol = 1000, h = 25, lbr = 5000, ubr = 50000
-)
-scc
-##  $corr
-##   [1] 0.9412784 0.9410680 0.9408082 0.9404796 0.9404544 0.9402584 0.9400710
-##   [8] 0.9398965 0.9397935 0.9397027 0.9396112 0.9393001 0.9393180 0.9390608
-##  [15] 0.9391645 0.9394670 0.9395147 0.9396798 0.9397547 0.9398291 0.9401371
-##  [22] 0.9402369 0.9402251 0.9404188 0.9404327 0.9403101 0.9402634 0.9401683
-##  [29] 0.9401746 0.9394978 0.9391277 0.9381969 0.9371561 0.9357012 0.9342620
-##  [36] 0.9324366 0.9302835 0.9277556 0.9247008 0.9208466 0.9166648 0.9120206
-##  [43] 0.9060828 0.9002430 0.8931754 0.8847777
-##  
-##  $wei
-##   [1] 123.2500 123.1667 123.0833 123.0000 122.9167 122.8333 122.7500 122.6667
-##   [9] 122.5833 122.5000 122.4167 122.3333 122.2500 122.1667 122.0833 122.0000
-##  [17] 121.9167 121.8333 121.7500 121.6667 121.5833 121.5000 121.4167 121.3333
-##  [25] 121.2500 121.1667 121.0833 121.0000 120.9167 120.8333 120.7500 120.6667
-##  [33] 120.5833 120.5000 120.4167 120.3333 120.2500 120.1667 120.0833 120.0000
-##  [41] 119.9167 119.8333 119.7500 119.6667 119.5833 119.5000
-##  
-##  $scc
-##            [,1]
-##  [1,] 0.9334303
-##  
-##  $std
-##  [1] 0.001994845
-
-scc$scc
-##            [,1]
-##  [1,] 0.9334303
-
-

-9.3 multiHiCcompare

+

+9.2 multiHiCcompare

The multiHiCcompare package provides functions for joint normalization and difference detection in multiple Hi-C datasets (Stansfield et al. (2019)). According to its excerpt, to perform differential interaction analysis, it requires a list of raw counts for different samples/replicates, stored in data frames with four columns (chr, start1, start2, count).
Manipulate a HiCExperiment object to coerce it into such structure is straightforward.

-
library(dplyr)
+
library(dplyr)
 library(tidyr)
 library(purrr)
 hics <- list(
@@ -582,7 +490,7 @@ 

Once this list is generated, the classical multiHiCcompare workflow can be applied: first run make_hicexp(), followed by cyclic_loess(), then hic_exactTest() and finally results():

-
DI <- hics_list |> 
+
DI <- hics_list |> 
     make_hicexp(
         data_list = hics_list, 
         groups = factor(c(1, 2))
@@ -604,12 +512,12 @@ 

## 22640: 1 665001 665001 0 -0.3110054 10.013750 0.60075706 1.0000000 ## 22641: 1 665001 666001 1 -0.4989794 7.750157 0.41481212 1.0000000

-

-9.4 TopDom

+

+9.3 TopDom

The TopDom method is widely used to annotate topological domains in genomes from Hi-C data (Shin et al. (2015)). The TopDom package was created to implement this method in R (Bengtsson et al. (2020)).

Unfortunately, the format of the input to TopDom is rather tricky (see ?TopDom::readHiC). The following chunk of code shows how to coerce a HiCExperiment object into a TopDom-compatible object.

-
library(TopDom)
+
library(TopDom)
 hic <- import(coolf_wt, format = 'cool')
 HiCExperiment2TopDom <- function(hic, chr) {
     data <- list()
@@ -640,7 +548,7 @@ 

Now that we have coerced a HiCExperiment object into a TopDom-compatible object, we can use the main TopDom function to annotate topological domains.

-
domains <- TopDom::TopDom(hic_topdom, window.size = 5)
+
domains <- TopDom::TopDom(hic_topdom, window.size = 5)
 domains
 ##  TopDom:
 ##  Parameters:
@@ -673,7 +581,7 @@ 

The resulting domains object can be used to extract annotated domains, store them in topologicalFeatures of the original HiCExperiment, and optionally write a bed file to export them in text.

-
topologicalFeatures(hic, 'domain') <- domains$bed |> 
+
topologicalFeatures(hic, 'domain') <- domains$bed |> 
     mutate(chromStart = chromStart + 1) |> 
     filter(name == 'domain') |> 
     makeGRangesFromDataFrame()
@@ -697,8 +605,8 @@ 

rtracklayer::export(topologicalFeatures(hic, 'domain'), 'hic_domains.bed')

-

-9.5 GOTHiC

+

+9.4 GOTHiC

GOTHiC relies on a cumulative binomial test to detect interactions between distal genomic loci that have significantly more reads than expected by chance in Hi-C experiments (Mifsud et al. (2017)).

@@ -724,7 +632,7 @@

Based on these facts, we can simplify the binomial test function provided by GOTHiC so that it can directly used binned interactions imported as a HiCExperiment object in R.

-
Show the code for GOTHiC_binomial function
GOTHiC_binomial <- function(x) {
+
Show the code for GOTHiC_binomial function
GOTHiC_binomial <- function(x) {
 
     if (length(trans(x)) != 0) stop("Only `cis` interactions can be used here.")
     ints <- interactions(x) |>
@@ -785,11 +693,11 @@ 

-
res <- GOTHiC_binomial(hic["II"])
+
res <- GOTHiC_binomial(hic["II"])
 res
 ##  `HiCExperiment` object with 471,364 contacts over 802 regions 
 ##  -------
-##  fileName: "/root/.cache/R/ExperimentHub/1701a09a86_7752" 
+##  fileName: "/root/.cache/R/ExperimentHub/174688ce76a_7752" 
 ##  focus: "II" 
 ##  resolutions(5): 1000 2000 4000 8000 16000
 ##  active resolution: 1000 
@@ -864,9 +772,6 @@ 

Stansfield, J. C., Cresswell, K. G., & Dozmorov, M. G. (2019). multiHiCcompare: Joint normalization and comparative analysis of complex hi-c experiments. Bioinformatics, 35(17), 2916–2923. https://doi.org/10.1093/bioinformatics/btz048
-
-Yang, T., Zhang, F., Yardımcı, G. G., Song, F., Hardison, R. C., Noble, W. S., Yue, F., & Li, Q. (2017). HiCRep: Assessing the reproducibility of hi-c data using a stratum-adjusted correlation coefficient. Genome Research, 27(11), 1939–1949. https://doi.org/10.1101/gr.220640.117 -

Back to top