-
Notifications
You must be signed in to change notification settings - Fork 17
/
Copy pathGaps.jl
61 lines (51 loc) · 1.83 KB
/
Gaps.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# Pairwise Gap Percentage
# =======================
"""
It calculates the gap intersection as percentage from a table of `Frequencies`.
"""
function gap_intersection_percentage(nxy::Frequencies{T,2,GappedAlphabet}) where {T}
T(100.0) * gettablearray(nxy)[21, 21] / gettotal(nxy)
end
"""
It calculates the gap union as percentage from a table of `Frequencies`.
"""
function gap_union_percentage(nxy::Frequencies{T,2,GappedAlphabet}) where {T}
marginals = getmarginalsarray(nxy)
T(100.0) * (marginals[21, 1] + marginals[21, 2] - gettablearray(nxy)[21, 21]) /
gettotal(nxy)
end
# MIToS Pairwise Gap Percentage
# =============================
"""
It takes a MSA or a file and a `FileFormat` as first arguments. It calculates the percentage
of gaps on columns pairs (union and intersection) using sequence clustering (Hobohm I).
Argument, type, default value and descriptions:
```
- clustering Bool true Sequence clustering (Hobohm I)
- threshold 62 Percent identity threshold for sequence clustering (Hobohm I)
```
This function returns:
```
- pairwise gap union as percentage
- pairwise gap intersection as percentage
```
"""
function pairwisegapfraction(
aln::AbstractMatrix{Residue};
clustering::Bool = true,
threshold = 62,
)
clusters = clustering ? hobohmI(aln, threshold) : NoClustering()
table = Frequencies(ContingencyTable(Float64, Val{2}, GappedAlphabet()))
gu = mapcolpairfreq!(gap_union_percentage, aln, table, weights = clusters)
gi = mapcolpairfreq!(gap_intersection_percentage, aln, table, weights = clusters)
gu, gi
end
function pairwisegapfraction(
filename::String,
format::Type{T};
kargs...,
) where {T<:FileFormat}
aln = read_file(filename, T, AnnotatedMultipleSequenceAlignment, generatemapping = true)
pairwisegapfraction(aln; kargs...)
end