1 |
|
|
# This file is a part of Julia. License is MIT: https://julialang.org/license
|
2 |
|
|
|
3 |
|
|
"""
|
4 |
|
|
Linear algebra module. Provides array arithmetic,
|
5 |
|
|
matrix factorizations and other linear algebra related
|
6 |
|
|
functionality.
|
7 |
|
|
"""
|
8 |
|
|
module LinearAlgebra
|
9 |
|
|
|
10 |
|
|
import Base: \, /, *, ^, +, -, ==
|
11 |
|
|
import Base: USE_BLAS64, abs, acos, acosh, acot, acoth, acsc, acsch, adjoint, asec, asech,
|
12 |
|
|
asin, asinh, atan, atanh, axes, big, broadcast, ceil, cis, collect, conj, convert, copy,
|
13 |
|
|
copyto!, copymutable, cos, cosh, cot, coth, csc, csch, eltype, exp, fill!, floor,
|
14 |
|
|
getindex, hcat, getproperty, imag, inv, isapprox, isequal, isone, iszero, IndexStyle,
|
15 |
|
|
kron, kron!, length, log, map, ndims, one, oneunit, parent, permutedims,
|
16 |
|
|
power_by_squaring, promote_rule, real, sec, sech, setindex!, show, similar, sin,
|
17 |
|
|
sincos, sinh, size, sqrt, strides, stride, tan, tanh, transpose, trunc, typed_hcat,
|
18 |
|
|
vec, view, zero
|
19 |
|
|
using Base: IndexLinear, promote_eltype, promote_op, promote_typeof, print_matrix,
|
20 |
|
|
@propagate_inbounds, reduce, typed_hvcat, typed_vcat, require_one_based_indexing,
|
21 |
|
|
splat
|
22 |
|
|
using Base.Broadcast: Broadcasted, broadcasted
|
23 |
|
|
using Base.PermutedDimsArrays: CommutativeOps
|
24 |
|
|
using OpenBLAS_jll
|
25 |
|
|
using libblastrampoline_jll
|
26 |
|
|
import Libdl
|
27 |
|
|
|
28 |
|
|
export
|
29 |
|
|
# Modules
|
30 |
|
|
LAPACK,
|
31 |
|
|
BLAS,
|
32 |
|
|
|
33 |
|
|
# Types
|
34 |
|
|
Adjoint,
|
35 |
|
|
Transpose,
|
36 |
|
|
SymTridiagonal,
|
37 |
|
|
Tridiagonal,
|
38 |
|
|
Bidiagonal,
|
39 |
|
|
Factorization,
|
40 |
|
|
BunchKaufman,
|
41 |
|
|
Cholesky,
|
42 |
|
|
CholeskyPivoted,
|
43 |
|
|
ColumnNorm,
|
44 |
|
|
Eigen,
|
45 |
|
|
GeneralizedEigen,
|
46 |
|
|
GeneralizedSVD,
|
47 |
|
|
GeneralizedSchur,
|
48 |
|
|
Hessenberg,
|
49 |
|
|
LU,
|
50 |
|
|
LDLt,
|
51 |
|
|
NoPivot,
|
52 |
|
|
RowNonZero,
|
53 |
|
|
QR,
|
54 |
|
|
QRPivoted,
|
55 |
|
|
LQ,
|
56 |
|
|
Schur,
|
57 |
|
|
SVD,
|
58 |
|
|
Hermitian,
|
59 |
|
|
RowMaximum,
|
60 |
|
|
Symmetric,
|
61 |
|
|
LowerTriangular,
|
62 |
|
|
UpperTriangular,
|
63 |
|
|
UnitLowerTriangular,
|
64 |
|
|
UnitUpperTriangular,
|
65 |
|
|
UpperHessenberg,
|
66 |
|
|
Diagonal,
|
67 |
|
|
UniformScaling,
|
68 |
|
|
|
69 |
|
|
# Functions
|
70 |
|
|
axpy!,
|
71 |
|
|
axpby!,
|
72 |
|
|
bunchkaufman,
|
73 |
|
|
bunchkaufman!,
|
74 |
|
|
cholesky,
|
75 |
|
|
cholesky!,
|
76 |
|
|
cond,
|
77 |
|
|
condskeel,
|
78 |
|
|
copyto!,
|
79 |
|
|
copy_transpose!,
|
80 |
|
|
cross,
|
81 |
|
|
adjoint,
|
82 |
|
|
adjoint!,
|
83 |
|
|
det,
|
84 |
|
|
diag,
|
85 |
|
|
diagind,
|
86 |
|
|
diagm,
|
87 |
|
|
dot,
|
88 |
|
|
eigen,
|
89 |
|
|
eigen!,
|
90 |
|
|
eigmax,
|
91 |
|
|
eigmin,
|
92 |
|
|
eigvals,
|
93 |
|
|
eigvals!,
|
94 |
|
|
eigvecs,
|
95 |
|
|
factorize,
|
96 |
|
|
givens,
|
97 |
|
|
hermitianpart,
|
98 |
|
|
hermitianpart!,
|
99 |
|
|
hessenberg,
|
100 |
|
|
hessenberg!,
|
101 |
|
|
isdiag,
|
102 |
|
|
ishermitian,
|
103 |
|
|
isposdef,
|
104 |
|
|
isposdef!,
|
105 |
|
|
issuccess,
|
106 |
|
|
issymmetric,
|
107 |
|
|
istril,
|
108 |
|
|
istriu,
|
109 |
|
|
kron,
|
110 |
|
|
kron!,
|
111 |
|
|
ldiv!,
|
112 |
|
|
ldlt!,
|
113 |
|
|
ldlt,
|
114 |
|
|
logabsdet,
|
115 |
|
|
logdet,
|
116 |
|
|
lowrankdowndate,
|
117 |
|
|
lowrankdowndate!,
|
118 |
|
|
lowrankupdate,
|
119 |
|
|
lowrankupdate!,
|
120 |
|
|
lu,
|
121 |
|
|
lu!,
|
122 |
|
|
lyap,
|
123 |
|
|
mul!,
|
124 |
|
|
lmul!,
|
125 |
|
|
rmul!,
|
126 |
|
|
norm,
|
127 |
|
|
normalize,
|
128 |
|
|
normalize!,
|
129 |
|
|
nullspace,
|
130 |
|
|
ordschur!,
|
131 |
|
|
ordschur,
|
132 |
|
|
pinv,
|
133 |
|
|
qr,
|
134 |
|
|
qr!,
|
135 |
|
|
lq,
|
136 |
|
|
lq!,
|
137 |
|
|
opnorm,
|
138 |
|
|
rank,
|
139 |
|
|
rdiv!,
|
140 |
|
|
reflect!,
|
141 |
|
|
rotate!,
|
142 |
|
|
schur,
|
143 |
|
|
schur!,
|
144 |
|
|
svd,
|
145 |
|
|
svd!,
|
146 |
|
|
svdvals!,
|
147 |
|
|
svdvals,
|
148 |
|
|
sylvester,
|
149 |
|
|
tr,
|
150 |
|
|
transpose,
|
151 |
|
|
transpose!,
|
152 |
|
|
tril,
|
153 |
|
|
triu,
|
154 |
|
|
tril!,
|
155 |
|
|
triu!,
|
156 |
|
|
|
157 |
|
|
# Operators
|
158 |
|
|
\,
|
159 |
|
|
/,
|
160 |
|
|
|
161 |
|
|
# Constants
|
162 |
|
|
I
|
163 |
|
|
|
164 |
|
|
const BlasFloat = Union{Float64,Float32,ComplexF64,ComplexF32}
|
165 |
|
|
const BlasReal = Union{Float64,Float32}
|
166 |
|
|
const BlasComplex = Union{ComplexF64,ComplexF32}
|
167 |
|
|
|
168 |
|
|
if USE_BLAS64
|
169 |
|
|
const BlasInt = Int64
|
170 |
|
|
else
|
171 |
|
|
const BlasInt = Int32
|
172 |
|
|
end
|
173 |
|
|
|
174 |
|
|
|
175 |
|
|
abstract type Algorithm end
|
176 |
|
|
struct DivideAndConquer <: Algorithm end
|
177 |
|
|
struct QRIteration <: Algorithm end
|
178 |
|
|
|
179 |
|
|
abstract type PivotingStrategy end
|
180 |
|
|
struct NoPivot <: PivotingStrategy end
|
181 |
|
|
struct RowNonZero <: PivotingStrategy end
|
182 |
|
|
struct RowMaximum <: PivotingStrategy end
|
183 |
|
|
struct ColumnNorm <: PivotingStrategy end
|
184 |
|
|
|
185 |
|
|
# Check that stride of matrix/vector is 1
|
186 |
|
|
# Writing like this to avoid splatting penalty when called with multiple arguments,
|
187 |
|
|
# see PR 16416
|
188 |
|
|
"""
|
189 |
|
|
stride1(A) -> Int
|
190 |
|
|
|
191 |
|
|
Return the distance between successive array elements
|
192 |
|
|
in dimension 1 in units of element size.
|
193 |
|
|
|
194 |
|
|
# Examples
|
195 |
|
|
```jldoctest
|
196 |
|
|
julia> A = [1,2,3,4]
|
197 |
|
|
4-element Vector{Int64}:
|
198 |
|
|
1
|
199 |
|
|
2
|
200 |
|
|
3
|
201 |
|
|
4
|
202 |
|
|
|
203 |
|
|
julia> LinearAlgebra.stride1(A)
|
204 |
|
|
1
|
205 |
|
|
|
206 |
|
|
julia> B = view(A, 2:2:4)
|
207 |
|
|
2-element view(::Vector{Int64}, 2:2:4) with eltype Int64:
|
208 |
|
|
2
|
209 |
|
|
4
|
210 |
|
|
|
211 |
|
|
julia> LinearAlgebra.stride1(B)
|
212 |
|
|
2
|
213 |
|
|
```
|
214 |
|
|
"""
|
215 |
|
|
stride1(x) = stride(x,1)
|
216 |
|
|
stride1(x::Array) = 1
|
217 |
|
|
stride1(x::DenseArray) = stride(x, 1)::Int
|
218 |
|
|
|
219 |
|
|
@inline chkstride1(A...) = _chkstride1(true, A...)
|
220 |
|
|
@noinline _chkstride1(ok::Bool) = ok || error("matrix does not have contiguous columns")
|
221 |
|
|
@inline _chkstride1(ok::Bool, A, B...) = _chkstride1(ok & (stride1(A) == 1), B...)
|
222 |
|
|
|
223 |
|
|
"""
|
224 |
|
|
LinearAlgebra.checksquare(A)
|
225 |
|
|
|
226 |
|
|
Check that a matrix is square, then return its common dimension.
|
227 |
|
|
For multiple arguments, return a vector.
|
228 |
|
|
|
229 |
|
|
# Examples
|
230 |
|
|
```jldoctest
|
231 |
|
|
julia> A = fill(1, (4,4)); B = fill(1, (5,5));
|
232 |
|
|
|
233 |
|
|
julia> LinearAlgebra.checksquare(A, B)
|
234 |
|
|
2-element Vector{Int64}:
|
235 |
|
|
4
|
236 |
|
|
5
|
237 |
|
|
```
|
238 |
|
|
"""
|
239 |
|
|
function checksquare(A)
|
240 |
|
|
m,n = size(A)
|
241 |
|
|
m == n || throw(DimensionMismatch("matrix is not square: dimensions are $(size(A))"))
|
242 |
|
|
m
|
243 |
|
|
end
|
244 |
|
|
|
245 |
|
|
function checksquare(A...)
|
246 |
|
|
sizes = Int[]
|
247 |
|
|
for a in A
|
248 |
|
|
size(a,1)==size(a,2) || throw(DimensionMismatch("matrix is not square: dimensions are $(size(a))"))
|
249 |
|
|
push!(sizes, size(a,1))
|
250 |
|
|
end
|
251 |
|
|
return sizes
|
252 |
|
|
end
|
253 |
|
|
|
254 |
|
|
function char_uplo(uplo::Symbol)
|
255 |
|
|
if uplo === :U
|
256 |
|
|
return 'U'
|
257 |
|
|
elseif uplo === :L
|
258 |
|
|
return 'L'
|
259 |
|
|
else
|
260 |
|
|
throw_uplo()
|
261 |
|
|
end
|
262 |
|
|
end
|
263 |
|
|
|
264 |
|
|
function sym_uplo(uplo::Char)
|
265 |
|
|
if uplo == 'U'
|
266 |
|
|
return :U
|
267 |
|
|
elseif uplo == 'L'
|
268 |
|
|
return :L
|
269 |
|
|
else
|
270 |
|
|
throw_uplo()
|
271 |
|
|
end
|
272 |
|
|
end
|
273 |
|
|
|
274 |
|
|
@noinline throw_uplo() = throw(ArgumentError("uplo argument must be either :U (upper) or :L (lower)"))
|
275 |
|
|
|
276 |
|
|
"""
|
277 |
|
|
ldiv!(Y, A, B) -> Y
|
278 |
|
|
|
279 |
|
|
Compute `A \\ B` in-place and store the result in `Y`, returning the result.
|
280 |
|
|
|
281 |
|
|
The argument `A` should *not* be a matrix. Rather, instead of matrices it should be a
|
282 |
|
|
factorization object (e.g. produced by [`factorize`](@ref) or [`cholesky`](@ref)).
|
283 |
|
|
The reason for this is that factorization itself is both expensive and typically allocates memory
|
284 |
|
|
(although it can also be done in-place via, e.g., [`lu!`](@ref)),
|
285 |
|
|
and performance-critical situations requiring `ldiv!` usually also require fine-grained
|
286 |
|
|
control over the factorization of `A`.
|
287 |
|
|
|
288 |
|
|
!!! note
|
289 |
|
|
Certain structured matrix types, such as `Diagonal` and `UpperTriangular`, are permitted, as
|
290 |
|
|
these are already in a factorized form
|
291 |
|
|
|
292 |
|
|
# Examples
|
293 |
|
|
```jldoctest
|
294 |
|
|
julia> A = [1 2.2 4; 3.1 0.2 3; 4 1 2];
|
295 |
|
|
|
296 |
|
|
julia> X = [1; 2.5; 3];
|
297 |
|
|
|
298 |
|
|
julia> Y = zero(X);
|
299 |
|
|
|
300 |
|
|
julia> ldiv!(Y, qr(A), X);
|
301 |
|
|
|
302 |
|
|
julia> Y
|
303 |
|
|
3-element Vector{Float64}:
|
304 |
|
|
0.7128099173553719
|
305 |
|
|
-0.051652892561983674
|
306 |
|
|
0.10020661157024757
|
307 |
|
|
|
308 |
|
|
julia> A\\X
|
309 |
|
|
3-element Vector{Float64}:
|
310 |
|
|
0.7128099173553719
|
311 |
|
|
-0.05165289256198333
|
312 |
|
|
0.10020661157024785
|
313 |
|
|
```
|
314 |
|
|
"""
|
315 |
|
|
ldiv!(Y, A, B)
|
316 |
|
|
|
317 |
|
|
"""
|
318 |
|
|
ldiv!(A, B)
|
319 |
|
|
|
320 |
|
|
Compute `A \\ B` in-place and overwriting `B` to store the result.
|
321 |
|
|
|
322 |
|
|
The argument `A` should *not* be a matrix. Rather, instead of matrices it should be a
|
323 |
|
|
factorization object (e.g. produced by [`factorize`](@ref) or [`cholesky`](@ref)).
|
324 |
|
|
The reason for this is that factorization itself is both expensive and typically allocates memory
|
325 |
|
|
(although it can also be done in-place via, e.g., [`lu!`](@ref)),
|
326 |
|
|
and performance-critical situations requiring `ldiv!` usually also require fine-grained
|
327 |
|
|
control over the factorization of `A`.
|
328 |
|
|
|
329 |
|
|
!!! note
|
330 |
|
|
Certain structured matrix types, such as `Diagonal` and `UpperTriangular`, are permitted, as
|
331 |
|
|
these are already in a factorized form
|
332 |
|
|
|
333 |
|
|
# Examples
|
334 |
|
|
```jldoctest
|
335 |
|
|
julia> A = [1 2.2 4; 3.1 0.2 3; 4 1 2];
|
336 |
|
|
|
337 |
|
|
julia> X = [1; 2.5; 3];
|
338 |
|
|
|
339 |
|
|
julia> Y = copy(X);
|
340 |
|
|
|
341 |
|
|
julia> ldiv!(qr(A), X);
|
342 |
|
|
|
343 |
|
|
julia> X
|
344 |
|
|
3-element Vector{Float64}:
|
345 |
|
|
0.7128099173553719
|
346 |
|
|
-0.051652892561983674
|
347 |
|
|
0.10020661157024757
|
348 |
|
|
|
349 |
|
|
julia> A\\Y
|
350 |
|
|
3-element Vector{Float64}:
|
351 |
|
|
0.7128099173553719
|
352 |
|
|
-0.05165289256198333
|
353 |
|
|
0.10020661157024785
|
354 |
|
|
```
|
355 |
|
|
"""
|
356 |
|
|
ldiv!(A, B)
|
357 |
|
|
|
358 |
|
|
|
359 |
|
|
"""
|
360 |
|
|
rdiv!(A, B)
|
361 |
|
|
|
362 |
|
|
Compute `A / B` in-place and overwriting `A` to store the result.
|
363 |
|
|
|
364 |
|
|
The argument `B` should *not* be a matrix. Rather, instead of matrices it should be a
|
365 |
|
|
factorization object (e.g. produced by [`factorize`](@ref) or [`cholesky`](@ref)).
|
366 |
|
|
The reason for this is that factorization itself is both expensive and typically allocates memory
|
367 |
|
|
(although it can also be done in-place via, e.g., [`lu!`](@ref)),
|
368 |
|
|
and performance-critical situations requiring `rdiv!` usually also require fine-grained
|
369 |
|
|
control over the factorization of `B`.
|
370 |
|
|
|
371 |
|
|
!!! note
|
372 |
|
|
Certain structured matrix types, such as `Diagonal` and `UpperTriangular`, are permitted, as
|
373 |
|
|
these are already in a factorized form
|
374 |
|
|
"""
|
375 |
|
|
rdiv!(A, B)
|
376 |
|
|
|
377 |
|
|
"""
|
378 |
|
|
copy_oftype(A, T)
|
379 |
|
|
|
380 |
|
|
Creates a copy of `A` with eltype `T`. No assertions about mutability of the result are
|
381 |
|
|
made. When `eltype(A) == T`, then this calls `copy(A)` which may be overloaded for custom
|
382 |
|
|
array types. Otherwise, this calls `convert(AbstractArray{T}, A)`.
|
383 |
|
|
"""
|
384 |
|
|
copy_oftype(A::AbstractArray{T}, ::Type{T}) where {T} = copy(A)
|
385 |
|
|
copy_oftype(A::AbstractArray{T,N}, ::Type{S}) where {T,N,S} = convert(AbstractArray{S,N}, A)
|
386 |
|
|
|
387 |
|
|
"""
|
388 |
|
|
copymutable_oftype(A, T)
|
389 |
|
|
|
390 |
|
|
Copy `A` to a mutable array with eltype `T` based on `similar(A, T)`.
|
391 |
|
|
|
392 |
|
|
The resulting matrix typically has similar algebraic structure as `A`. For
|
393 |
|
|
example, supplying a tridiagonal matrix results in another tridiagonal matrix.
|
394 |
|
|
In general, the type of the output corresponds to that of `similar(A, T)`.
|
395 |
|
|
|
396 |
|
|
In LinearAlgebra, mutable copies (of some desired eltype) are created to be passed
|
397 |
|
|
to in-place algorithms (such as `ldiv!`, `rdiv!`, `lu!` and so on). If the specific
|
398 |
|
|
algorithm is known to preserve the algebraic structure, use `copymutable_oftype`.
|
399 |
|
|
If the algorithm is known to return a dense matrix (or some wrapper backed by a dense
|
400 |
|
|
matrix), then use `copy_similar`.
|
401 |
|
|
|
402 |
|
|
See also: `Base.copymutable`, `copy_similar`.
|
403 |
|
|
"""
|
404 |
|
|
copymutable_oftype(A::AbstractArray, ::Type{S}) where {S} = copyto!(similar(A, S), A)
|
405 |
|
|
|
406 |
|
|
"""
|
407 |
|
|
copy_similar(A, T)
|
408 |
|
|
|
409 |
|
|
Copy `A` to a mutable array with eltype `T` based on `similar(A, T, size(A))`.
|
410 |
|
|
|
411 |
|
|
Compared to `copymutable_oftype`, the result can be more flexible. In general, the type
|
412 |
|
|
of the output corresponds to that of the three-argument method `similar(A, T, size(A))`.
|
413 |
|
|
|
414 |
|
|
See also: `copymutable_oftype`.
|
415 |
|
|
"""
|
416 |
|
88 (31 %) |
88 (31 %)
samples spent in copy_similar
88 (100 %) (incl.)
when called from eigencopy_oftype
line 5
60 (68 %)
samples spent calling
similar
28 (32 %)
samples spent calling
copyto!
copy_similar(A::AbstractArray, ::Type{T}) where {T} = copyto!(similar(A, T, size(A)), A)
|
417 |
|
|
|
418 |
|
|
|
419 |
|
|
include("adjtrans.jl")
|
420 |
|
|
include("transpose.jl")
|
421 |
|
|
|
422 |
|
|
include("exceptions.jl")
|
423 |
|
|
include("generic.jl")
|
424 |
|
|
|
425 |
|
|
include("blas.jl")
|
426 |
|
|
include("matmul.jl")
|
427 |
|
|
include("lapack.jl")
|
428 |
|
|
|
429 |
|
|
include("dense.jl")
|
430 |
|
|
include("tridiag.jl")
|
431 |
|
|
include("triangular.jl")
|
432 |
|
|
|
433 |
|
|
include("factorization.jl")
|
434 |
|
|
include("eigen.jl")
|
435 |
|
|
include("svd.jl")
|
436 |
|
|
include("symmetric.jl")
|
437 |
|
|
include("cholesky.jl")
|
438 |
|
|
include("lu.jl")
|
439 |
|
|
include("bunchkaufman.jl")
|
440 |
|
|
include("diagonal.jl")
|
441 |
|
|
include("symmetriceigen.jl")
|
442 |
|
|
include("bidiag.jl")
|
443 |
|
|
include("uniformscaling.jl")
|
444 |
|
|
include("qr.jl")
|
445 |
|
|
include("lq.jl")
|
446 |
|
|
include("hessenberg.jl")
|
447 |
|
|
include("abstractq.jl")
|
448 |
|
|
include("givens.jl")
|
449 |
|
|
include("special.jl")
|
450 |
|
|
include("bitarray.jl")
|
451 |
|
|
include("ldlt.jl")
|
452 |
|
|
include("schur.jl")
|
453 |
|
|
include("structuredbroadcast.jl")
|
454 |
|
|
include("deprecated.jl")
|
455 |
|
|
|
456 |
|
|
const โ
= dot
|
457 |
|
|
const ร = cross
|
458 |
|
|
export โ
, ร
|
459 |
|
|
|
460 |
|
|
wrapper_char(::AbstractArray) = 'N'
|
461 |
|
|
wrapper_char(::Adjoint) = 'C'
|
462 |
|
|
wrapper_char(::Adjoint{<:Real}) = 'T'
|
463 |
|
|
wrapper_char(::Transpose) = 'T'
|
464 |
|
|
wrapper_char(A::Hermitian) = A.uplo == 'U' ? 'H' : 'h'
|
465 |
|
|
wrapper_char(A::Hermitian{<:Real}) = A.uplo == 'U' ? 'S' : 's'
|
466 |
|
|
wrapper_char(A::Symmetric) = A.uplo == 'U' ? 'S' : 's'
|
467 |
|
|
|
468 |
|
|
Base.@constprop :aggressive function wrap(A::AbstractVecOrMat, tA::AbstractChar)
|
469 |
|
|
if tA == 'N'
|
470 |
|
|
return A
|
471 |
|
|
elseif tA == 'T'
|
472 |
|
|
return transpose(A)
|
473 |
|
|
elseif tA == 'C'
|
474 |
|
|
return adjoint(A)
|
475 |
|
|
elseif tA == 'H'
|
476 |
|
|
return Hermitian(A, :U)
|
477 |
|
|
elseif tA == 'h'
|
478 |
|
|
return Hermitian(A, :L)
|
479 |
|
|
elseif tA == 'S'
|
480 |
|
|
return Symmetric(A, :U)
|
481 |
|
|
else # tA == 's'
|
482 |
|
|
return Symmetric(A, :L)
|
483 |
|
|
end
|
484 |
|
|
end
|
485 |
|
|
|
486 |
|
|
_unwrap(A::AbstractVecOrMat) = A
|
487 |
|
|
|
488 |
|
|
## convenience methods
|
489 |
|
|
## return only the solution of a least squares problem while avoiding promoting
|
490 |
|
|
## vectors to matrices.
|
491 |
|
|
_cut_B(x::AbstractVector, r::UnitRange) = length(x) > length(r) ? x[r] : x
|
492 |
|
|
_cut_B(X::AbstractMatrix, r::UnitRange) = size(X, 1) > length(r) ? X[r,:] : X
|
493 |
|
|
|
494 |
|
|
# SymTridiagonal ev can be the same length as dv, but the last element is
|
495 |
|
|
# ignored. However, some methods can fail if they read the entire ev
|
496 |
|
|
# rather than just the meaningful elements. This is a helper function
|
497 |
|
|
# for getting only the meaningful elements of ev. See #41089
|
498 |
|
|
_evview(S::SymTridiagonal) = @view S.ev[begin:begin + length(S.dv) - 2]
|
499 |
|
|
|
500 |
|
|
## append right hand side with zeros if necessary
|
501 |
|
|
_zeros(::Type{T}, b::AbstractVector, n::Integer) where {T} = zeros(T, max(length(b), n))
|
502 |
|
|
_zeros(::Type{T}, B::AbstractMatrix, n::Integer) where {T} = zeros(T, max(size(B, 1), n), size(B, 2))
|
503 |
|
|
|
504 |
|
|
# convert to Vector, if necessary
|
505 |
|
|
_makevector(x::Vector) = x
|
506 |
|
|
_makevector(x::AbstractVector) = Vector(x)
|
507 |
|
|
|
508 |
|
|
# append a zero element / drop the last element
|
509 |
|
|
_pushzero(A) = (B = similar(A, length(A)+1); @inbounds B[begin:end-1] .= A; @inbounds B[end] = zero(eltype(B)); B)
|
510 |
|
|
_droplast!(A) = deleteat!(A, lastindex(A))
|
511 |
|
|
|
512 |
|
|
# some trait like this would be cool
|
513 |
|
|
# onedefined(::Type{T}) where {T} = hasmethod(one, (T,))
|
514 |
|
|
# but we are actually asking for oneunit(T), that is, however, defined for generic T as
|
515 |
|
|
# `T(one(T))`, so the question is equivalent for whether one(T) is defined
|
516 |
|
|
onedefined(::Type) = false
|
517 |
|
|
onedefined(::Type{<:Number}) = true
|
518 |
|
|
|
519 |
|
|
# initialize return array for op(A, B)
|
520 |
|
|
_init_eltype(::typeof(*), ::Type{TA}, ::Type{TB}) where {TA,TB} =
|
521 |
|
|
(onedefined(TA) && onedefined(TB)) ?
|
522 |
|
|
typeof(matprod(oneunit(TA), oneunit(TB))) :
|
523 |
|
|
promote_op(matprod, TA, TB)
|
524 |
|
|
_init_eltype(op, ::Type{TA}, ::Type{TB}) where {TA,TB} =
|
525 |
|
|
(onedefined(TA) && onedefined(TB)) ?
|
526 |
|
|
typeof(op(oneunit(TA), oneunit(TB))) :
|
527 |
|
|
promote_op(op, TA, TB)
|
528 |
|
|
_initarray(op, ::Type{TA}, ::Type{TB}, C) where {TA,TB} =
|
529 |
|
|
similar(C, _init_eltype(op, TA, TB), size(C))
|
530 |
|
|
|
531 |
|
|
# General fallback definition for handling under- and overdetermined system as well as square problems
|
532 |
|
|
# While this definition is pretty general, it does e.g. promote to common element type of lhs and rhs
|
533 |
|
|
# which is required by LAPACK but not SuiteSparse which allows real-complex solves in some cases. Hence,
|
534 |
|
|
# we restrict this method to only the LAPACK factorizations in LinearAlgebra.
|
535 |
|
|
# The definition is put here since it explicitly references all the Factorization structs so it has
|
536 |
|
|
# to be located after all the files that define the structs.
|
537 |
|
|
const LAPACKFactorizations{T,S} = Union{
|
538 |
|
|
BunchKaufman{T,S},
|
539 |
|
|
Cholesky{T,S},
|
540 |
|
|
LQ{T,S},
|
541 |
|
|
LU{T,S},
|
542 |
|
|
QR{T,S},
|
543 |
|
|
QRCompactWY{T,S},
|
544 |
|
|
QRPivoted{T,S},
|
545 |
|
|
SVD{T,<:Real,S}}
|
546 |
|
|
|
547 |
|
|
(\)(F::LAPACKFactorizations, B::AbstractVecOrMat) = ldiv(F, B)
|
548 |
|
|
(\)(F::AdjointFactorization{<:Any,<:LAPACKFactorizations}, B::AbstractVecOrMat) = ldiv(F, B)
|
549 |
|
|
(\)(F::TransposeFactorization{<:Any,<:LU}, B::AbstractVecOrMat) = ldiv(F, B)
|
550 |
|
|
|
551 |
|
|
function ldiv(F::Factorization, B::AbstractVecOrMat)
|
552 |
|
|
require_one_based_indexing(B)
|
553 |
|
|
m, n = size(F)
|
554 |
|
|
if m != size(B, 1)
|
555 |
|
|
throw(DimensionMismatch("arguments must have the same number of rows"))
|
556 |
|
|
end
|
557 |
|
|
|
558 |
|
|
TFB = typeof(oneunit(eltype(B)) / oneunit(eltype(F)))
|
559 |
|
|
FF = Factorization{TFB}(F)
|
560 |
|
|
|
561 |
|
|
# For wide problem we (often) compute a minimum norm solution. The solution
|
562 |
|
|
# is larger than the right hand side so we use size(F, 2).
|
563 |
|
|
BB = _zeros(TFB, B, n)
|
564 |
|
|
|
565 |
|
|
if n > size(B, 1)
|
566 |
|
|
# Underdetermined
|
567 |
|
|
copyto!(view(BB, 1:m, :), B)
|
568 |
|
|
else
|
569 |
|
|
copyto!(BB, B)
|
570 |
|
|
end
|
571 |
|
|
|
572 |
|
|
ldiv!(FF, BB)
|
573 |
|
|
|
574 |
|
|
# For tall problems, we compute a least squares solution so only part
|
575 |
|
|
# of the rhs should be returned from \ while ldiv! uses (and returns)
|
576 |
|
|
# the complete rhs
|
577 |
|
|
return _cut_B(BB, 1:n)
|
578 |
|
|
end
|
579 |
|
|
# disambiguate
|
580 |
|
|
(\)(F::LAPACKFactorizations{T}, B::VecOrMat{Complex{T}}) where {T<:BlasReal} =
|
581 |
|
|
@invoke \(F::Factorization{T}, B::VecOrMat{Complex{T}})
|
582 |
|
|
(\)(F::AdjointFactorization{T,<:LAPACKFactorizations}, B::VecOrMat{Complex{T}}) where {T<:BlasReal} =
|
583 |
|
|
ldiv(F, B)
|
584 |
|
|
(\)(F::TransposeFactorization{T,<:LU}, B::VecOrMat{Complex{T}}) where {T<:BlasReal} =
|
585 |
|
|
ldiv(F, B)
|
586 |
|
|
|
587 |
|
|
"""
|
588 |
|
|
LinearAlgebra.peakflops(n::Integer=4096; eltype::DataType=Float64, ntrials::Integer=3, parallel::Bool=false)
|
589 |
|
|
|
590 |
|
|
`peakflops` computes the peak flop rate of the computer by using double precision
|
591 |
|
|
[`gemm!`](@ref LinearAlgebra.BLAS.gemm!). By default, if no arguments are specified, it
|
592 |
|
|
multiplies two `Float64` matrices of size `n x n`, where `n = 4096`. If the underlying BLAS is using
|
593 |
|
|
multiple threads, higher flop rates are realized. The number of BLAS threads can be set with
|
594 |
|
|
[`BLAS.set_num_threads(n)`](@ref).
|
595 |
|
|
|
596 |
|
|
If the keyword argument `eltype` is provided, `peakflops` will construct matrices with elements
|
597 |
|
|
of type `eltype` for calculating the peak flop rate.
|
598 |
|
|
|
599 |
|
|
By default, `peakflops` will use the best timing from 3 trials. If the `ntrials` keyword argument
|
600 |
|
|
is provided, `peakflops` will use those many trials for picking the best timing.
|
601 |
|
|
|
602 |
|
|
If the keyword argument `parallel` is set to `true`, `peakflops` is run in parallel on all
|
603 |
|
|
the worker processors. The flop rate of the entire parallel computer is returned. When
|
604 |
|
|
running in parallel, only 1 BLAS thread is used. The argument `n` still refers to the size
|
605 |
|
|
of the problem that is solved on each processor.
|
606 |
|
|
|
607 |
|
|
!!! compat "Julia 1.1"
|
608 |
|
|
This function requires at least Julia 1.1. In Julia 1.0 it is available from
|
609 |
|
|
the standard library `InteractiveUtils`.
|
610 |
|
|
"""
|
611 |
|
|
function peakflops(n::Integer=4096; eltype::DataType=Float64, ntrials::Integer=3, parallel::Bool=false)
|
612 |
|
|
t = zeros(Float64, ntrials)
|
613 |
|
|
for i=1:ntrials
|
614 |
|
|
a = ones(eltype,n,n)
|
615 |
|
|
t[i] = @elapsed a2 = a*a
|
616 |
|
|
@assert a2[1,1] == n
|
617 |
|
|
end
|
618 |
|
|
|
619 |
|
|
if parallel
|
620 |
|
|
let Distributed = Base.require(Base.PkgId(
|
621 |
|
|
Base.UUID((0x8ba89e20_285c_5b6f, 0x9357_94700520ee1b)), "Distributed"))
|
622 |
|
|
nworkers = @invokelatest Distributed.nworkers()
|
623 |
|
|
results = @invokelatest Distributed.pmap(peakflops, fill(n, nworkers))
|
624 |
|
|
return sum(results)
|
625 |
|
|
end
|
626 |
|
|
else
|
627 |
|
|
return 2*Float64(n)^3 / minimum(t)
|
628 |
|
|
end
|
629 |
|
|
end
|
630 |
|
|
|
631 |
|
|
|
632 |
|
|
function versioninfo(io::IO=stdout)
|
633 |
|
|
indent = " "
|
634 |
|
|
config = BLAS.get_config()
|
635 |
|
|
build_flags = join(string.(config.build_flags), ", ")
|
636 |
|
|
println(io, "BLAS: ", BLAS.libblastrampoline, " (", build_flags, ")")
|
637 |
|
|
for lib in config.loaded_libs
|
638 |
|
|
interface = uppercase(string(lib.interface))
|
639 |
|
|
println(io, indent, "--> ", lib.libname, " (", interface, ")")
|
640 |
|
|
end
|
641 |
|
|
println(io, "Threading:")
|
642 |
|
|
println(io, indent, "Threads.threadpoolsize() = ", Threads.threadpoolsize())
|
643 |
|
|
println(io, indent, "Threads.maxthreadid() = ", Base.Threads.maxthreadid())
|
644 |
|
|
println(io, indent, "LinearAlgebra.BLAS.get_num_threads() = ", BLAS.get_num_threads())
|
645 |
|
|
println(io, "Relevant environment variables:")
|
646 |
|
|
env_var_names = [
|
647 |
|
|
"JULIA_NUM_THREADS",
|
648 |
|
|
"MKL_DYNAMIC",
|
649 |
|
|
"MKL_NUM_THREADS",
|
650 |
|
|
# OpenBLAS has a hierarchy of environment variables for setting the
|
651 |
|
|
# number of threads, see
|
652 |
|
|
# https://github.com/xianyi/OpenBLAS/blob/c43ec53bdd00d9423fc609d7b7ecb35e7bf41b85/README.md#setting-the-number-of-threads-using-environment-variables
|
653 |
|
|
("OPENBLAS_NUM_THREADS", "GOTO_NUM_THREADS", "OMP_NUM_THREADS"),
|
654 |
|
|
]
|
655 |
|
|
printed_at_least_one_env_var = false
|
656 |
|
|
print_var(io, indent, name) = println(io, indent, name, " = ", ENV[name])
|
657 |
|
|
for name in env_var_names
|
658 |
|
|
if name isa Tuple
|
659 |
|
|
# If `name` is a Tuple, then find the first environment which is
|
660 |
|
|
# defined, and disregard the following ones.
|
661 |
|
|
for nm in name
|
662 |
|
|
if haskey(ENV, nm)
|
663 |
|
|
print_var(io, indent, nm)
|
664 |
|
|
printed_at_least_one_env_var = true
|
665 |
|
|
break
|
666 |
|
|
end
|
667 |
|
|
end
|
668 |
|
|
else
|
669 |
|
|
if haskey(ENV, name)
|
670 |
|
|
print_var(io, indent, name)
|
671 |
|
|
printed_at_least_one_env_var = true
|
672 |
|
|
end
|
673 |
|
|
end
|
674 |
|
|
end
|
675 |
|
|
if !printed_at_least_one_env_var
|
676 |
|
|
println(io, indent, "[none]")
|
677 |
|
|
end
|
678 |
|
|
return nothing
|
679 |
|
|
end
|
680 |
|
|
|
681 |
|
|
function __init__()
|
682 |
|
|
try
|
683 |
|
|
BLAS.lbt_forward(OpenBLAS_jll.libopenblas_path; clear=true)
|
684 |
|
|
BLAS.check()
|
685 |
|
|
catch ex
|
686 |
|
|
Base.showerror_nostdio(ex, "WARNING: Error during initialization of module LinearAlgebra")
|
687 |
|
|
end
|
688 |
|
|
# register a hook to disable BLAS threading
|
689 |
|
|
Base.at_disable_library_threading(() -> BLAS.set_num_threads(1))
|
690 |
|
|
|
691 |
|
|
# https://github.com/xianyi/OpenBLAS/blob/c43ec53bdd00d9423fc609d7b7ecb35e7bf41b85/README.md#setting-the-number-of-threads-using-environment-variables
|
692 |
|
|
if !haskey(ENV, "OPENBLAS_NUM_THREADS") && !haskey(ENV, "GOTO_NUM_THREADS") && !haskey(ENV, "OMP_NUM_THREADS")
|
693 |
|
|
@static if Sys.isapple() && Base.BinaryPlatforms.arch(Base.BinaryPlatforms.HostPlatform()) == "aarch64"
|
694 |
|
|
BLAS.set_num_threads(max(1, Sys.CPU_THREADS))
|
695 |
|
|
else
|
696 |
|
|
BLAS.set_num_threads(max(1, Sys.CPU_THREADS รท 2))
|
697 |
|
|
end
|
698 |
|
|
end
|
699 |
|
|
end
|
700 |
|
|
|
701 |
|
|
end # module LinearAlgebra
|