-
Notifications
You must be signed in to change notification settings - Fork 1
/
scalogram.R
64 lines (61 loc) · 2.09 KB
/
scalogram.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#' Compute a scalogram of contacts
#'
#' @name scalogram
#'
#' @param x A `HiCExperiment` object
#' @param dist_min Minimum distance for interactions to be considered.
#' @param nbins Number of bins to divide each chromosome
#' @param probs Quantiles of interactions
#' @return a tibble
#'
#' @return a tibble
#'
#' @importFrom tibble tibble
#' @importFrom tidyr drop_na
#' @importFrom tidyr pivot_longer
#' @importFrom dplyr select
#' @importFrom dplyr filter
#' @importFrom dplyr mutate
#' @importFrom dplyr group_by
#' @importFrom dplyr group_modify
#'
#' @examples
#' contacts_yeast <- HiCExperiment::contacts_yeast()
#' pairsFile(contacts_yeast) <- HiContactsData::HiContactsData(
#' 'yeast_wt', format = 'pairs.gz'
#' )
#' scalo <- scalogram(contacts_yeast['II'])
#' scalo
NULL
#' @rdname scalogram
#' @export
scalogram <- function(x, dist_min = 0, nbins = 250, probs = c(0.25, 0.5, 0.75)) {
pairsFile <- HiCExperiment::pairsFile(x)
if (is.null(pairsFile)) {
stop("No PairsFile is associated with the provided HiCExperiment object.")
}
message("Importing pairs file ", pairsFile, " in memory. This may take a while...")
pairs <- BiocIO::import(pairsFile, format = 'pairs')
cispairs <- pairs[InteractionSet::intrachr(pairs)]
an_ <- HiCExperiment::anchors(cispairs)
scalo <- tibble::tibble(
dist = cispairs$distance,
chr = as.vector(GenomicRanges::seqnames(an_[[1]])),
pos1 = GenomicRanges::start(an_[[1]]),
pos2 = GenomicRanges::start(an_[[2]])
) |>
tidyr::pivot_longer(-c(chr, dist), names_to = 'pos_', values_to = 'pos') |>
dplyr::select(-pos_) |>
dplyr::filter(dist > dist_min, pos > 1)
seq_ <- seq(1, max(scalo$pos, na.rm = TRUE), length.out = nbins + 1)
df <- scalo |>
dplyr::mutate(binned_pos = seq_[findInterval(pos, vec = seq_)]) |>
dplyr::group_by(chr, binned_pos) |>
dplyr::group_modify(~ {
tibble::tibble(
prob = factor(probs, rev(probs)),
dist_quantile = quantile(.x$dist, probs = probs)
)
})
return(df)
}