-
Notifications
You must be signed in to change notification settings - Fork 17
/
Copy pathGeneralUtils.jl
167 lines (156 loc) · 4.54 KB
/
GeneralUtils.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
"""
All is used instead of MIToS 1.0 "all" or "*", because it's possible to dispatch on it.
"""
struct All end
_get_function_name(str::String)::String = split(str, '.')[end]
"""
This function performs the same operation as
`something(findnext(r"[ \t]+", line, last(last_spaces)+1), 0:-1)` but it is faster.
"""
function _find_next_space_or_tab(line, start_pos::Int)
for i = start_pos:lastindex(line)
char = line[i]
if char == ' ' || char == '\t'
start_index = i
end_index = start_index
while end_index <= lastindex(line) &&
(line[end_index] == ' ' || line[end_index] == '\t')
end_index = nextind(line, end_index)
end
return start_index:(prevind(line, end_index))
end
end
return 0:-1
end
"""
`get_n_words{T <: Union{ASCIIString, UTF8String}}(line::T, n::Int)`
It returns a `Vector{T}` with the first `n` (possibles) words/fields (delimited
by space or tab). If there is more than `n` words, the last word
returned contains the finals words and the delimiters. The length of the
returned vector is `n` or less (if the number of words is less than `n`).
This is used for parsing the Stockholm format.
```jldoctest
julia> using MIToS.Utils
julia> get_n_words("#=GR O31698/18-71 SS CCCHHHHHHHHHHHHHHHEEEEEEEEEEEEEEEEHHH", 3)
3-element Vector{String}:
"#=GR"
"O31698/18-71"
"SS CCCHHHHHHHHHHHHHHHEEEEEEEEEEEEEEEEHHH"
```
"""
function get_n_words(line::String, n::Int)
if isempty(line)
return String[]
end
words = Array{String}(undef, n)
N = 1
last_spaces = 0:0
while true
if N == n
@inbounds words[N] = line[(last(last_spaces)+1):end]
break
end
spaces = _find_next_space_or_tab(line, last(last_spaces) + 1)
if first(spaces) == 0
@inbounds words[N] = line[(last(last_spaces)+1):end]
break
end
@inbounds words[N] = line[(last(last_spaces)+1):(first(spaces)-1)]
last_spaces = spaces
N += 1
end
if N != n
resize!(words, N)
end
words
end
"""
`hascoordinates(id)`
It returns `true` if `id`/sequence name has the format: **UniProt/start-end**
(i.e. O83071/192-246)
"""
function hascoordinates(id)
occursin(r"^\w+/\d+-\d+$", id)
end
"""
Selects the first element of the vector. This is useful for unpacking one element vectors.
Throws a warning if there are more elements. `element_name` is *element* by default,
but the name can be changed using the second argument.
"""
function select_element(vector::Array{T,1}, element_name::String = "element") where {T}
len = length(vector)
if len == 0
throw(ErrorException("There is not $element_name"))
elseif len != 1
@warn("There are more than one ($len) $element_name using the first.")
end
@inbounds return (vector[1])
end
"""
Returns a vector with the `part` ("upper" or "lower") of the square matrix `mat`.
The `diagonal` is not included by default.
"""
function matrix2list(
mat::AbstractMatrix{T};
part = "upper",
diagonal::Bool = false,
) where {T}
nrow, ncol = size(mat)
if nrow != ncol
throw(ErrorException("Should be a square matrix"))
end
if diagonal
d = 0
N = div((ncol * ncol) + ncol, 2)
else
d = 1
N = div((ncol * ncol) - ncol, 2)
end
list = Array{T}(undef, N)
k = 1
if part == "upper"
for i = 1:(ncol-d)
for j = (i+d):ncol
list[k] = mat[i, j]
k += 1
end
end
elseif part == "lower"
for j = 1:(ncol-d)
for i = (j+d):ncol
list[k] = mat[i, j]
k += 1
end
end
else
throw(ErrorException("part should be \"upper\" or \"lower\""))
end
list
end
"""
Returns a square symmetric matrix from the vector `vec`. `side` is the number of
rows/columns. The `diagonal` is not included by default, set to `true` if there are
diagonal elements in the list.
"""
function list2matrix(vec::AbstractVector{T}, side::Int; diagonal::Bool = false) where {T}
d = diagonal ? 0 : 1
mat = zeros(T, side, side)
k = 1
for i = 1:(side-d)
for j = (i+d):side
value = vec[k]
mat[i, j] = value
mat[j, i] = value
k += 1
end
end
mat
end
"""
It checks if a PDB code has the correct format.
"""
check_pdbcode(pdbcode::String) = occursin(r"^\w{4}$", pdbcode)
"""
Getter for the `array` field of `NamedArray`s
"""
getarray(x::NamedArray) = x.array