-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathcool2matrix.R
65 lines (60 loc) · 2.34 KB
/
cool2matrix.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#' converting cooler format to Hi-C matrices
#'
#' @param file the cooler file to be converted.
#' @param chr specifiy which chr to be compared. Default is "chr1".
#' @return a squared HiC matrix that can be recongnized by get.scc.
#' @references HiCRep: assessing the reproducibility of Hi-C data using a
#' stratum-adjusted correlation coefficient. Tao Yang, Feipeng Zhang, Galip
#' Gurkan Yardimci, Fan Song, Ross C Hardison, William Stafford Noble,
#' Feng Yue, Qunhua Li. Genome Research 2017. doi: 10.1101/gr.220640.117
#' @export
#' @examples
#' \dontrun{
#' # Read a .cool file, extract intrachromosomal interactions on
#' #chr18 and convert it into a squared matrix
#' # chr18 and convert it into a squared matrix
#' mat <- cool2matrix(“hic.cool”, chr = ‘chr18’)
#' }
cool2matrix <- function(file, chr = 'chr1') {
pixels <- h5read(file, c('pixels'));H5close()
bins <- h5read(file, c('bins'));H5close()
chrom.offset <- h5read(file, 'indexes/chrom_offset');H5close()
chr2index <- function(chr) {
if (substr(chr, 1, 3) == 'chr') {
chr = substr(chr, 4, nchar(chr))
}
if (chr == 'X') {
index = 23
}
else if (chr == 'Y') {
index = 24
}
else if (chr == 'M') {
index = 25
}
else {
index = as.integer(chr)
}
return(index)
}
chrom.index <- chr2index(chr)
chrom.range <- chrom.offset[chrom.index:(chrom.index+1)] + c(0, -1)
n.rows <- chrom.range[2] - chrom.range[1] + 1
bin.range <- which(pixels$bin1_id >= chrom.range[1] &
pixels$bin1_id <= chrom.range[2] &
pixels$bin2_id >= chrom.range[1] &
pixels$bin2_id <= chrom.range[2])
n.bins <- length(bin.range)
mat <- matrix(0, ncol = n.rows, nrow = n.rows)
for (i in 1:n.bins) {
mat[pixels$bin1_id[bin.range[i]] - chrom.range[1] + 1,
pixels$bin2_id[bin.range[i]] - chrom.range[1] + 1] <-
pixels$count[bin.range[i]]
mat[pixels$bin2_id[bin.range[i]] - chrom.range[1] + 1,
pixels$bin1_id[bin.range[i]] - chrom.range[1] + 1] <-
pixels$count[bin.range[i]]
}
chrom.ranges <- (chrom.range[1]+1):(chrom.range[2]+1)
mat <- as.data.frame(mat)
return(mat)
}