-
Notifications
You must be signed in to change notification settings - Fork 17
/
Copy pathMSAStats.jl
104 lines (90 loc) · 3.32 KB
/
MSAStats.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# Counting Gaps and Coverage
# --------------------------
"""
It calculates the fraction of gaps on the `Array` (alignment, sequence, column, etc.). This
function can take an extra `dimension` argument for calculation of the gap fraction over the
given dimension.
"""
function gapfraction(x::AbstractArray{Residue})
counter = 0
len = 0
for res in x
counter += ifelse(res == GAP, 1, 0)
len += 1
end
float(counter) / float(len)
end
function gapfraction(x::AbstractArray{Residue}, dimension::Int)
mapslices(gapfraction, x, dims = dimension)
end
"""
It calculates the fraction of residues (no gaps) on the `Array`
(alignment, sequence, column, etc.). This function can take an extra `dimension` argument
for calculation of the residue fraction over the given dimension
"""
residuefraction(x::AbstractArray{Residue}) = 1.0 - gapfraction(x)
function residuefraction(x::AbstractArray{Residue}, dimension::Int)
mapslices(residuefraction, x, dims = dimension)
end
macro keep_names_dimension(functions)
function_names = functions.args
n = length(function_names)
definitions = Array{Any}(undef, n)
for i = 1:n
f = esc(function_names[i])
definitions[i] = quote
function ($f)(msa::NamedResidueMatrix{T}, dimension::Int) where {T}
result = ($f)(getarray(msa), dimension)
if dimension == 1
name_list = names(msa, 2)
N = length(name_list)
NamedArray(
result,
(
OrderedDict{String,Int}(
Utils._get_function_name(string($f)) => 1,
),
OrderedDict{String,Int}(name_list[i] => i for i = 1:N),
),
("Function", "Col"),
)
elseif dimension == 2
name_list = names(msa, 1)
N = length(name_list)
NamedArray(
result,
(
OrderedDict{String,Int}(name_list[i] => i for i = 1:N),
OrderedDict{String,Int}(
Utils._get_function_name(string($f)) => 1,
),
),
("Seq", "Function"),
)
else
throw(ArgumentError("Dimension must be 1 or 2."))
end
end
($f)(a::AbstractResidueMatrix, dimension::Int) =
($f)(namedmatrix(a), dimension)
end
end
return Expr(:block, definitions...)
end
@keep_names_dimension([gapfraction, residuefraction])
"""
Coverage of the sequences with respect of the number of positions on the MSA
"""
function coverage(msa::AbstractMatrix{Residue})
result = residuefraction(msa, 2)
if isa(result, NamedArray) && ndims(result) == 2
setnames!(result, ["coverage"], 2)
end
result
end
coverage(msa::AbstractAlignedObject) = coverage(namedmatrix(msa))
"""
Fraction of gaps per column/position on the MSA
"""
columngapfraction(msa::AbstractMatrix{Residue}) = gapfraction(msa, 1)
columngapfraction(msa::AbstractAlignedObject) = columngapfraction(namedmatrix(msa))