Line | Exclusive | Inclusive | Code |
---|---|---|---|
1 | # This file is a part of Julia. License is MIT: https://julialang.org/license | ||
2 | |||
3 | """ | ||
4 | Linear algebra module. Provides array arithmetic, | ||
5 | matrix factorizations and other linear algebra related | ||
6 | functionality. | ||
7 | """ | ||
8 | module LinearAlgebra | ||
9 | |||
10 | import Base: \, /, *, ^, +, -, == | ||
11 | import Base: USE_BLAS64, abs, acos, acosh, acot, acoth, acsc, acsch, adjoint, asec, asech, | ||
12 | asin, asinh, atan, atanh, axes, big, broadcast, ceil, cis, collect, conj, convert, copy, | ||
13 | copyto!, copymutable, cos, cosh, cot, coth, csc, csch, eltype, exp, fill!, floor, | ||
14 | getindex, hcat, getproperty, imag, inv, isapprox, isequal, isone, iszero, IndexStyle, | ||
15 | kron, kron!, length, log, map, ndims, one, oneunit, parent, permutedims, | ||
16 | power_by_squaring, promote_rule, real, sec, sech, setindex!, show, similar, sin, | ||
17 | sincos, sinh, size, sqrt, strides, stride, tan, tanh, transpose, trunc, typed_hcat, | ||
18 | vec, view, zero | ||
19 | using Base: IndexLinear, promote_eltype, promote_op, promote_typeof, print_matrix, | ||
20 | @propagate_inbounds, reduce, typed_hvcat, typed_vcat, require_one_based_indexing, | ||
21 | splat | ||
22 | using Base.Broadcast: Broadcasted, broadcasted | ||
23 | using Base.PermutedDimsArrays: CommutativeOps | ||
24 | using OpenBLAS_jll | ||
25 | using libblastrampoline_jll | ||
26 | import Libdl | ||
27 | |||
28 | export | ||
29 | # Modules | ||
30 | LAPACK, | ||
31 | BLAS, | ||
32 | |||
33 | # Types | ||
34 | Adjoint, | ||
35 | Transpose, | ||
36 | SymTridiagonal, | ||
37 | Tridiagonal, | ||
38 | Bidiagonal, | ||
39 | Factorization, | ||
40 | BunchKaufman, | ||
41 | Cholesky, | ||
42 | CholeskyPivoted, | ||
43 | ColumnNorm, | ||
44 | Eigen, | ||
45 | GeneralizedEigen, | ||
46 | GeneralizedSVD, | ||
47 | GeneralizedSchur, | ||
48 | Hessenberg, | ||
49 | LU, | ||
50 | LDLt, | ||
51 | NoPivot, | ||
52 | RowNonZero, | ||
53 | QR, | ||
54 | QRPivoted, | ||
55 | LQ, | ||
56 | Schur, | ||
57 | SVD, | ||
58 | Hermitian, | ||
59 | RowMaximum, | ||
60 | Symmetric, | ||
61 | LowerTriangular, | ||
62 | UpperTriangular, | ||
63 | UnitLowerTriangular, | ||
64 | UnitUpperTriangular, | ||
65 | UpperHessenberg, | ||
66 | Diagonal, | ||
67 | UniformScaling, | ||
68 | |||
69 | # Functions | ||
70 | axpy!, | ||
71 | axpby!, | ||
72 | bunchkaufman, | ||
73 | bunchkaufman!, | ||
74 | cholesky, | ||
75 | cholesky!, | ||
76 | cond, | ||
77 | condskeel, | ||
78 | copyto!, | ||
79 | copy_transpose!, | ||
80 | cross, | ||
81 | adjoint, | ||
82 | adjoint!, | ||
83 | det, | ||
84 | diag, | ||
85 | diagind, | ||
86 | diagm, | ||
87 | dot, | ||
88 | eigen, | ||
89 | eigen!, | ||
90 | eigmax, | ||
91 | eigmin, | ||
92 | eigvals, | ||
93 | eigvals!, | ||
94 | eigvecs, | ||
95 | factorize, | ||
96 | givens, | ||
97 | hermitianpart, | ||
98 | hermitianpart!, | ||
99 | hessenberg, | ||
100 | hessenberg!, | ||
101 | isdiag, | ||
102 | ishermitian, | ||
103 | isposdef, | ||
104 | isposdef!, | ||
105 | issuccess, | ||
106 | issymmetric, | ||
107 | istril, | ||
108 | istriu, | ||
109 | kron, | ||
110 | kron!, | ||
111 | ldiv!, | ||
112 | ldlt!, | ||
113 | ldlt, | ||
114 | logabsdet, | ||
115 | logdet, | ||
116 | lowrankdowndate, | ||
117 | lowrankdowndate!, | ||
118 | lowrankupdate, | ||
119 | lowrankupdate!, | ||
120 | lu, | ||
121 | lu!, | ||
122 | lyap, | ||
123 | mul!, | ||
124 | lmul!, | ||
125 | rmul!, | ||
126 | norm, | ||
127 | normalize, | ||
128 | normalize!, | ||
129 | nullspace, | ||
130 | ordschur!, | ||
131 | ordschur, | ||
132 | pinv, | ||
133 | qr, | ||
134 | qr!, | ||
135 | lq, | ||
136 | lq!, | ||
137 | opnorm, | ||
138 | rank, | ||
139 | rdiv!, | ||
140 | reflect!, | ||
141 | rotate!, | ||
142 | schur, | ||
143 | schur!, | ||
144 | svd, | ||
145 | svd!, | ||
146 | svdvals!, | ||
147 | svdvals, | ||
148 | sylvester, | ||
149 | tr, | ||
150 | transpose, | ||
151 | transpose!, | ||
152 | tril, | ||
153 | triu, | ||
154 | tril!, | ||
155 | triu!, | ||
156 | |||
157 | # Operators | ||
158 | \, | ||
159 | /, | ||
160 | |||
161 | # Constants | ||
162 | I | ||
163 | |||
164 | const BlasFloat = Union{Float64,Float32,ComplexF64,ComplexF32} | ||
165 | const BlasReal = Union{Float64,Float32} | ||
166 | const BlasComplex = Union{ComplexF64,ComplexF32} | ||
167 | |||
168 | if USE_BLAS64 | ||
169 | const BlasInt = Int64 | ||
170 | else | ||
171 | const BlasInt = Int32 | ||
172 | end | ||
173 | |||
174 | |||
175 | abstract type Algorithm end | ||
176 | struct DivideAndConquer <: Algorithm end | ||
177 | struct QRIteration <: Algorithm end | ||
178 | |||
179 | abstract type PivotingStrategy end | ||
180 | struct NoPivot <: PivotingStrategy end | ||
181 | struct RowNonZero <: PivotingStrategy end | ||
182 | struct RowMaximum <: PivotingStrategy end | ||
183 | struct ColumnNorm <: PivotingStrategy end | ||
184 | |||
185 | # Check that stride of matrix/vector is 1 | ||
186 | # Writing like this to avoid splatting penalty when called with multiple arguments, | ||
187 | # see PR 16416 | ||
188 | """ | ||
189 | stride1(A) -> Int | ||
190 | |||
191 | Return the distance between successive array elements | ||
192 | in dimension 1 in units of element size. | ||
193 | |||
194 | # Examples | ||
195 | ```jldoctest | ||
196 | julia> A = [1,2,3,4] | ||
197 | 4-element Vector{Int64}: | ||
198 | 1 | ||
199 | 2 | ||
200 | 3 | ||
201 | 4 | ||
202 | |||
203 | julia> LinearAlgebra.stride1(A) | ||
204 | 1 | ||
205 | |||
206 | julia> B = view(A, 2:2:4) | ||
207 | 2-element view(::Vector{Int64}, 2:2:4) with eltype Int64: | ||
208 | 2 | ||
209 | 4 | ||
210 | |||
211 | julia> LinearAlgebra.stride1(B) | ||
212 | 2 | ||
213 | ``` | ||
214 | """ | ||
215 | stride1(x) = stride(x,1) | ||
216 | stride1(x::Array) = 1 | ||
217 | stride1(x::DenseArray) = stride(x, 1)::Int | ||
218 | |||
219 | @inline chkstride1(A...) = _chkstride1(true, A...) | ||
220 | @noinline _chkstride1(ok::Bool) = ok || error("matrix does not have contiguous columns") | ||
221 | @inline _chkstride1(ok::Bool, A, B...) = _chkstride1(ok & (stride1(A) == 1), B...) | ||
222 | |||
223 | """ | ||
224 | LinearAlgebra.checksquare(A) | ||
225 | |||
226 | Check that a matrix is square, then return its common dimension. | ||
227 | For multiple arguments, return a vector. | ||
228 | |||
229 | # Examples | ||
230 | ```jldoctest | ||
231 | julia> A = fill(1, (4,4)); B = fill(1, (5,5)); | ||
232 | |||
233 | julia> LinearAlgebra.checksquare(A, B) | ||
234 | 2-element Vector{Int64}: | ||
235 | 4 | ||
236 | 5 | ||
237 | ``` | ||
238 | """ | ||
239 | function checksquare(A) | ||
240 | m,n = size(A) | ||
241 | m == n || throw(DimensionMismatch("matrix is not square: dimensions are $(size(A))")) | ||
242 | m | ||
243 | end | ||
244 | |||
245 | function checksquare(A...) | ||
246 | sizes = Int[] | ||
247 | for a in A | ||
248 | size(a,1)==size(a,2) || throw(DimensionMismatch("matrix is not square: dimensions are $(size(a))")) | ||
249 | push!(sizes, size(a,1)) | ||
250 | end | ||
251 | return sizes | ||
252 | end | ||
253 | |||
254 | function char_uplo(uplo::Symbol) | ||
255 | if uplo === :U | ||
256 | return 'U' | ||
257 | elseif uplo === :L | ||
258 | return 'L' | ||
259 | else | ||
260 | throw_uplo() | ||
261 | end | ||
262 | end | ||
263 | |||
264 | function sym_uplo(uplo::Char) | ||
265 | if uplo == 'U' | ||
266 | return :U | ||
267 | elseif uplo == 'L' | ||
268 | return :L | ||
269 | else | ||
270 | throw_uplo() | ||
271 | end | ||
272 | end | ||
273 | |||
274 | @noinline throw_uplo() = throw(ArgumentError("uplo argument must be either :U (upper) or :L (lower)")) | ||
275 | |||
276 | """ | ||
277 | ldiv!(Y, A, B) -> Y | ||
278 | |||
279 | Compute `A \\ B` in-place and store the result in `Y`, returning the result. | ||
280 | |||
281 | The argument `A` should *not* be a matrix. Rather, instead of matrices it should be a | ||
282 | factorization object (e.g. produced by [`factorize`](@ref) or [`cholesky`](@ref)). | ||
283 | The reason for this is that factorization itself is both expensive and typically allocates memory | ||
284 | (although it can also be done in-place via, e.g., [`lu!`](@ref)), | ||
285 | and performance-critical situations requiring `ldiv!` usually also require fine-grained | ||
286 | control over the factorization of `A`. | ||
287 | |||
288 | !!! note | ||
289 | Certain structured matrix types, such as `Diagonal` and `UpperTriangular`, are permitted, as | ||
290 | these are already in a factorized form | ||
291 | |||
292 | # Examples | ||
293 | ```jldoctest | ||
294 | julia> A = [1 2.2 4; 3.1 0.2 3; 4 1 2]; | ||
295 | |||
296 | julia> X = [1; 2.5; 3]; | ||
297 | |||
298 | julia> Y = zero(X); | ||
299 | |||
300 | julia> ldiv!(Y, qr(A), X); | ||
301 | |||
302 | julia> Y | ||
303 | 3-element Vector{Float64}: | ||
304 | 0.7128099173553719 | ||
305 | -0.051652892561983674 | ||
306 | 0.10020661157024757 | ||
307 | |||
308 | julia> A\\X | ||
309 | 3-element Vector{Float64}: | ||
310 | 0.7128099173553719 | ||
311 | -0.05165289256198333 | ||
312 | 0.10020661157024785 | ||
313 | ``` | ||
314 | """ | ||
315 | ldiv!(Y, A, B) | ||
316 | |||
317 | """ | ||
318 | ldiv!(A, B) | ||
319 | |||
320 | Compute `A \\ B` in-place and overwriting `B` to store the result. | ||
321 | |||
322 | The argument `A` should *not* be a matrix. Rather, instead of matrices it should be a | ||
323 | factorization object (e.g. produced by [`factorize`](@ref) or [`cholesky`](@ref)). | ||
324 | The reason for this is that factorization itself is both expensive and typically allocates memory | ||
325 | (although it can also be done in-place via, e.g., [`lu!`](@ref)), | ||
326 | and performance-critical situations requiring `ldiv!` usually also require fine-grained | ||
327 | control over the factorization of `A`. | ||
328 | |||
329 | !!! note | ||
330 | Certain structured matrix types, such as `Diagonal` and `UpperTriangular`, are permitted, as | ||
331 | these are already in a factorized form | ||
332 | |||
333 | # Examples | ||
334 | ```jldoctest | ||
335 | julia> A = [1 2.2 4; 3.1 0.2 3; 4 1 2]; | ||
336 | |||
337 | julia> X = [1; 2.5; 3]; | ||
338 | |||
339 | julia> Y = copy(X); | ||
340 | |||
341 | julia> ldiv!(qr(A), X); | ||
342 | |||
343 | julia> X | ||
344 | 3-element Vector{Float64}: | ||
345 | 0.7128099173553719 | ||
346 | -0.051652892561983674 | ||
347 | 0.10020661157024757 | ||
348 | |||
349 | julia> A\\Y | ||
350 | 3-element Vector{Float64}: | ||
351 | 0.7128099173553719 | ||
352 | -0.05165289256198333 | ||
353 | 0.10020661157024785 | ||
354 | ``` | ||
355 | """ | ||
356 | ldiv!(A, B) | ||
357 | |||
358 | |||
359 | """ | ||
360 | rdiv!(A, B) | ||
361 | |||
362 | Compute `A / B` in-place and overwriting `A` to store the result. | ||
363 | |||
364 | The argument `B` should *not* be a matrix. Rather, instead of matrices it should be a | ||
365 | factorization object (e.g. produced by [`factorize`](@ref) or [`cholesky`](@ref)). | ||
366 | The reason for this is that factorization itself is both expensive and typically allocates memory | ||
367 | (although it can also be done in-place via, e.g., [`lu!`](@ref)), | ||
368 | and performance-critical situations requiring `rdiv!` usually also require fine-grained | ||
369 | control over the factorization of `B`. | ||
370 | |||
371 | !!! note | ||
372 | Certain structured matrix types, such as `Diagonal` and `UpperTriangular`, are permitted, as | ||
373 | these are already in a factorized form | ||
374 | """ | ||
375 | rdiv!(A, B) | ||
376 | |||
377 | """ | ||
378 | copy_oftype(A, T) | ||
379 | |||
380 | Creates a copy of `A` with eltype `T`. No assertions about mutability of the result are | ||
381 | made. When `eltype(A) == T`, then this calls `copy(A)` which may be overloaded for custom | ||
382 | array types. Otherwise, this calls `convert(AbstractArray{T}, A)`. | ||
383 | """ | ||
384 | copy_oftype(A::AbstractArray{T}, ::Type{T}) where {T} = copy(A) | ||
385 | copy_oftype(A::AbstractArray{T,N}, ::Type{S}) where {T,N,S} = convert(AbstractArray{S,N}, A) | ||
386 | |||
387 | """ | ||
388 | copymutable_oftype(A, T) | ||
389 | |||
390 | Copy `A` to a mutable array with eltype `T` based on `similar(A, T)`. | ||
391 | |||
392 | The resulting matrix typically has similar algebraic structure as `A`. For | ||
393 | example, supplying a tridiagonal matrix results in another tridiagonal matrix. | ||
394 | In general, the type of the output corresponds to that of `similar(A, T)`. | ||
395 | |||
396 | In LinearAlgebra, mutable copies (of some desired eltype) are created to be passed | ||
397 | to in-place algorithms (such as `ldiv!`, `rdiv!`, `lu!` and so on). If the specific | ||
398 | algorithm is known to preserve the algebraic structure, use `copymutable_oftype`. | ||
399 | If the algorithm is known to return a dense matrix (or some wrapper backed by a dense | ||
400 | matrix), then use `copy_similar`. | ||
401 | |||
402 | See also: `Base.copymutable`, `copy_similar`. | ||
403 | """ | ||
404 | copymutable_oftype(A::AbstractArray, ::Type{S}) where {S} = copyto!(similar(A, S), A) | ||
405 | |||
406 | """ | ||
407 | copy_similar(A, T) | ||
408 | |||
409 | Copy `A` to a mutable array with eltype `T` based on `similar(A, T, size(A))`. | ||
410 | |||
411 | Compared to `copymutable_oftype`, the result can be more flexible. In general, the type | ||
412 | of the output corresponds to that of the three-argument method `similar(A, T, size(A))`. | ||
413 | |||
414 | See also: `copymutable_oftype`. | ||
415 | """ | ||
416 | 29 (11 %) |
29 (100 %)
samples spent calling
copyto!
copy_similar(A::AbstractArray, ::Type{T}) where {T} = copyto!(similar(A, T, size(A)), A)
|
|
417 | |||
418 | |||
419 | include("adjtrans.jl") | ||
420 | include("transpose.jl") | ||
421 | |||
422 | include("exceptions.jl") | ||
423 | include("generic.jl") | ||
424 | |||
425 | include("blas.jl") | ||
426 | include("matmul.jl") | ||
427 | include("lapack.jl") | ||
428 | |||
429 | include("dense.jl") | ||
430 | include("tridiag.jl") | ||
431 | include("triangular.jl") | ||
432 | |||
433 | include("factorization.jl") | ||
434 | include("eigen.jl") | ||
435 | include("svd.jl") | ||
436 | include("symmetric.jl") | ||
437 | include("cholesky.jl") | ||
438 | include("lu.jl") | ||
439 | include("bunchkaufman.jl") | ||
440 | include("diagonal.jl") | ||
441 | include("symmetriceigen.jl") | ||
442 | include("bidiag.jl") | ||
443 | include("uniformscaling.jl") | ||
444 | include("qr.jl") | ||
445 | include("lq.jl") | ||
446 | include("hessenberg.jl") | ||
447 | include("abstractq.jl") | ||
448 | include("givens.jl") | ||
449 | include("special.jl") | ||
450 | include("bitarray.jl") | ||
451 | include("ldlt.jl") | ||
452 | include("schur.jl") | ||
453 | include("structuredbroadcast.jl") | ||
454 | include("deprecated.jl") | ||
455 | |||
456 | const โ = dot | ||
457 | const ร = cross | ||
458 | export โ , ร | ||
459 | |||
460 | wrapper_char(::AbstractArray) = 'N' | ||
461 | wrapper_char(::Adjoint) = 'C' | ||
462 | wrapper_char(::Adjoint{<:Real}) = 'T' | ||
463 | wrapper_char(::Transpose) = 'T' | ||
464 | wrapper_char(A::Hermitian) = A.uplo == 'U' ? 'H' : 'h' | ||
465 | wrapper_char(A::Hermitian{<:Real}) = A.uplo == 'U' ? 'S' : 's' | ||
466 | wrapper_char(A::Symmetric) = A.uplo == 'U' ? 'S' : 's' | ||
467 | |||
468 | Base.@constprop :aggressive function wrap(A::AbstractVecOrMat, tA::AbstractChar) | ||
469 | if tA == 'N' | ||
470 | return A | ||
471 | elseif tA == 'T' | ||
472 | return transpose(A) | ||
473 | elseif tA == 'C' | ||
474 | return adjoint(A) | ||
475 | elseif tA == 'H' | ||
476 | return Hermitian(A, :U) | ||
477 | elseif tA == 'h' | ||
478 | return Hermitian(A, :L) | ||
479 | elseif tA == 'S' | ||
480 | return Symmetric(A, :U) | ||
481 | else # tA == 's' | ||
482 | return Symmetric(A, :L) | ||
483 | end | ||
484 | end | ||
485 | |||
486 | _unwrap(A::AbstractVecOrMat) = A | ||
487 | |||
488 | ## convenience methods | ||
489 | ## return only the solution of a least squares problem while avoiding promoting | ||
490 | ## vectors to matrices. | ||
491 | _cut_B(x::AbstractVector, r::UnitRange) = length(x) > length(r) ? x[r] : x | ||
492 | _cut_B(X::AbstractMatrix, r::UnitRange) = size(X, 1) > length(r) ? X[r,:] : X | ||
493 | |||
494 | # SymTridiagonal ev can be the same length as dv, but the last element is | ||
495 | # ignored. However, some methods can fail if they read the entire ev | ||
496 | # rather than just the meaningful elements. This is a helper function | ||
497 | # for getting only the meaningful elements of ev. See #41089 | ||
498 | _evview(S::SymTridiagonal) = @view S.ev[begin:begin + length(S.dv) - 2] | ||
499 | |||
500 | ## append right hand side with zeros if necessary | ||
501 | _zeros(::Type{T}, b::AbstractVector, n::Integer) where {T} = zeros(T, max(length(b), n)) | ||
502 | _zeros(::Type{T}, B::AbstractMatrix, n::Integer) where {T} = zeros(T, max(size(B, 1), n), size(B, 2)) | ||
503 | |||
504 | # convert to Vector, if necessary | ||
505 | _makevector(x::Vector) = x | ||
506 | _makevector(x::AbstractVector) = Vector(x) | ||
507 | |||
508 | # append a zero element / drop the last element | ||
509 | _pushzero(A) = (B = similar(A, length(A)+1); @inbounds B[begin:end-1] .= A; @inbounds B[end] = zero(eltype(B)); B) | ||
510 | _droplast!(A) = deleteat!(A, lastindex(A)) | ||
511 | |||
512 | # some trait like this would be cool | ||
513 | # onedefined(::Type{T}) where {T} = hasmethod(one, (T,)) | ||
514 | # but we are actually asking for oneunit(T), that is, however, defined for generic T as | ||
515 | # `T(one(T))`, so the question is equivalent for whether one(T) is defined | ||
516 | onedefined(::Type) = false | ||
517 | onedefined(::Type{<:Number}) = true | ||
518 | |||
519 | # initialize return array for op(A, B) | ||
520 | _init_eltype(::typeof(*), ::Type{TA}, ::Type{TB}) where {TA,TB} = | ||
521 | (onedefined(TA) && onedefined(TB)) ? | ||
522 | typeof(matprod(oneunit(TA), oneunit(TB))) : | ||
523 | promote_op(matprod, TA, TB) | ||
524 | _init_eltype(op, ::Type{TA}, ::Type{TB}) where {TA,TB} = | ||
525 | (onedefined(TA) && onedefined(TB)) ? | ||
526 | typeof(op(oneunit(TA), oneunit(TB))) : | ||
527 | promote_op(op, TA, TB) | ||
528 | _initarray(op, ::Type{TA}, ::Type{TB}, C) where {TA,TB} = | ||
529 | similar(C, _init_eltype(op, TA, TB), size(C)) | ||
530 | |||
531 | # General fallback definition for handling under- and overdetermined system as well as square problems | ||
532 | # While this definition is pretty general, it does e.g. promote to common element type of lhs and rhs | ||
533 | # which is required by LAPACK but not SuiteSparse which allows real-complex solves in some cases. Hence, | ||
534 | # we restrict this method to only the LAPACK factorizations in LinearAlgebra. | ||
535 | # The definition is put here since it explicitly references all the Factorization structs so it has | ||
536 | # to be located after all the files that define the structs. | ||
537 | const LAPACKFactorizations{T,S} = Union{ | ||
538 | BunchKaufman{T,S}, | ||
539 | Cholesky{T,S}, | ||
540 | LQ{T,S}, | ||
541 | LU{T,S}, | ||
542 | QR{T,S}, | ||
543 | QRCompactWY{T,S}, | ||
544 | QRPivoted{T,S}, | ||
545 | SVD{T,<:Real,S}} | ||
546 | |||
547 | (\)(F::LAPACKFactorizations, B::AbstractVecOrMat) = ldiv(F, B) | ||
548 | (\)(F::AdjointFactorization{<:Any,<:LAPACKFactorizations}, B::AbstractVecOrMat) = ldiv(F, B) | ||
549 | (\)(F::TransposeFactorization{<:Any,<:LU}, B::AbstractVecOrMat) = ldiv(F, B) | ||
550 | |||
551 | function ldiv(F::Factorization, B::AbstractVecOrMat) | ||
552 | require_one_based_indexing(B) | ||
553 | m, n = size(F) | ||
554 | if m != size(B, 1) | ||
555 | throw(DimensionMismatch("arguments must have the same number of rows")) | ||
556 | end | ||
557 | |||
558 | TFB = typeof(oneunit(eltype(B)) / oneunit(eltype(F))) | ||
559 | FF = Factorization{TFB}(F) | ||
560 | |||
561 | # For wide problem we (often) compute a minimum norm solution. The solution | ||
562 | # is larger than the right hand side so we use size(F, 2). | ||
563 | BB = _zeros(TFB, B, n) | ||
564 | |||
565 | if n > size(B, 1) | ||
566 | # Underdetermined | ||
567 | copyto!(view(BB, 1:m, :), B) | ||
568 | else | ||
569 | copyto!(BB, B) | ||
570 | end | ||
571 | |||
572 | ldiv!(FF, BB) | ||
573 | |||
574 | # For tall problems, we compute a least squares solution so only part | ||
575 | # of the rhs should be returned from \ while ldiv! uses (and returns) | ||
576 | # the complete rhs | ||
577 | return _cut_B(BB, 1:n) | ||
578 | end | ||
579 | # disambiguate | ||
580 | (\)(F::LAPACKFactorizations{T}, B::VecOrMat{Complex{T}}) where {T<:BlasReal} = | ||
581 | @invoke \(F::Factorization{T}, B::VecOrMat{Complex{T}}) | ||
582 | (\)(F::AdjointFactorization{T,<:LAPACKFactorizations}, B::VecOrMat{Complex{T}}) where {T<:BlasReal} = | ||
583 | ldiv(F, B) | ||
584 | (\)(F::TransposeFactorization{T,<:LU}, B::VecOrMat{Complex{T}}) where {T<:BlasReal} = | ||
585 | ldiv(F, B) | ||
586 | |||
587 | """ | ||
588 | LinearAlgebra.peakflops(n::Integer=4096; eltype::DataType=Float64, ntrials::Integer=3, parallel::Bool=false) | ||
589 | |||
590 | `peakflops` computes the peak flop rate of the computer by using double precision | ||
591 | [`gemm!`](@ref LinearAlgebra.BLAS.gemm!). By default, if no arguments are specified, it | ||
592 | multiplies two `Float64` matrices of size `n x n`, where `n = 4096`. If the underlying BLAS is using | ||
593 | multiple threads, higher flop rates are realized. The number of BLAS threads can be set with | ||
594 | [`BLAS.set_num_threads(n)`](@ref). | ||
595 | |||
596 | If the keyword argument `eltype` is provided, `peakflops` will construct matrices with elements | ||
597 | of type `eltype` for calculating the peak flop rate. | ||
598 | |||
599 | By default, `peakflops` will use the best timing from 3 trials. If the `ntrials` keyword argument | ||
600 | is provided, `peakflops` will use those many trials for picking the best timing. | ||
601 | |||
602 | If the keyword argument `parallel` is set to `true`, `peakflops` is run in parallel on all | ||
603 | the worker processors. The flop rate of the entire parallel computer is returned. When | ||
604 | running in parallel, only 1 BLAS thread is used. The argument `n` still refers to the size | ||
605 | of the problem that is solved on each processor. | ||
606 | |||
607 | !!! compat "Julia 1.1" | ||
608 | This function requires at least Julia 1.1. In Julia 1.0 it is available from | ||
609 | the standard library `InteractiveUtils`. | ||
610 | """ | ||
611 | function peakflops(n::Integer=4096; eltype::DataType=Float64, ntrials::Integer=3, parallel::Bool=false) | ||
612 | t = zeros(Float64, ntrials) | ||
613 | for i=1:ntrials | ||
614 | a = ones(eltype,n,n) | ||
615 | t[i] = @elapsed a2 = a*a | ||
616 | @assert a2[1,1] == n | ||
617 | end | ||
618 | |||
619 | if parallel | ||
620 | let Distributed = Base.require(Base.PkgId( | ||
621 | Base.UUID((0x8ba89e20_285c_5b6f, 0x9357_94700520ee1b)), "Distributed")) | ||
622 | nworkers = @invokelatest Distributed.nworkers() | ||
623 | results = @invokelatest Distributed.pmap(peakflops, fill(n, nworkers)) | ||
624 | return sum(results) | ||
625 | end | ||
626 | else | ||
627 | return 2*Float64(n)^3 / minimum(t) | ||
628 | end | ||
629 | end | ||
630 | |||
631 | |||
632 | function versioninfo(io::IO=stdout) | ||
633 | indent = " " | ||
634 | config = BLAS.get_config() | ||
635 | build_flags = join(string.(config.build_flags), ", ") | ||
636 | println(io, "BLAS: ", BLAS.libblastrampoline, " (", build_flags, ")") | ||
637 | for lib in config.loaded_libs | ||
638 | interface = uppercase(string(lib.interface)) | ||
639 | println(io, indent, "--> ", lib.libname, " (", interface, ")") | ||
640 | end | ||
641 | println(io, "Threading:") | ||
642 | println(io, indent, "Threads.threadpoolsize() = ", Threads.threadpoolsize()) | ||
643 | println(io, indent, "Threads.maxthreadid() = ", Base.Threads.maxthreadid()) | ||
644 | println(io, indent, "LinearAlgebra.BLAS.get_num_threads() = ", BLAS.get_num_threads()) | ||
645 | println(io, "Relevant environment variables:") | ||
646 | env_var_names = [ | ||
647 | "JULIA_NUM_THREADS", | ||
648 | "MKL_DYNAMIC", | ||
649 | "MKL_NUM_THREADS", | ||
650 | # OpenBLAS has a hierarchy of environment variables for setting the | ||
651 | # number of threads, see | ||
652 | # https://github.com/xianyi/OpenBLAS/blob/c43ec53bdd00d9423fc609d7b7ecb35e7bf41b85/README.md#setting-the-number-of-threads-using-environment-variables | ||
653 | ("OPENBLAS_NUM_THREADS", "GOTO_NUM_THREADS", "OMP_NUM_THREADS"), | ||
654 | ] | ||
655 | printed_at_least_one_env_var = false | ||
656 | print_var(io, indent, name) = println(io, indent, name, " = ", ENV[name]) | ||
657 | for name in env_var_names | ||
658 | if name isa Tuple | ||
659 | # If `name` is a Tuple, then find the first environment which is | ||
660 | # defined, and disregard the following ones. | ||
661 | for nm in name | ||
662 | if haskey(ENV, nm) | ||
663 | print_var(io, indent, nm) | ||
664 | printed_at_least_one_env_var = true | ||
665 | break | ||
666 | end | ||
667 | end | ||
668 | else | ||
669 | if haskey(ENV, name) | ||
670 | print_var(io, indent, name) | ||
671 | printed_at_least_one_env_var = true | ||
672 | end | ||
673 | end | ||
674 | end | ||
675 | if !printed_at_least_one_env_var | ||
676 | println(io, indent, "[none]") | ||
677 | end | ||
678 | return nothing | ||
679 | end | ||
680 | |||
681 | function __init__() | ||
682 | try | ||
683 | BLAS.lbt_forward(OpenBLAS_jll.libopenblas_path; clear=true) | ||
684 | BLAS.check() | ||
685 | catch ex | ||
686 | Base.showerror_nostdio(ex, "WARNING: Error during initialization of module LinearAlgebra") | ||
687 | end | ||
688 | # register a hook to disable BLAS threading | ||
689 | Base.at_disable_library_threading(() -> BLAS.set_num_threads(1)) | ||
690 | |||
691 | # https://github.com/xianyi/OpenBLAS/blob/c43ec53bdd00d9423fc609d7b7ecb35e7bf41b85/README.md#setting-the-number-of-threads-using-environment-variables | ||
692 | if !haskey(ENV, "OPENBLAS_NUM_THREADS") && !haskey(ENV, "GOTO_NUM_THREADS") && !haskey(ENV, "OMP_NUM_THREADS") | ||
693 | @static if Sys.isapple() && Base.BinaryPlatforms.arch(Base.BinaryPlatforms.HostPlatform()) == "aarch64" | ||
694 | BLAS.set_num_threads(max(1, Sys.CPU_THREADS)) | ||
695 | else | ||
696 | BLAS.set_num_threads(max(1, Sys.CPU_THREADS รท 2)) | ||
697 | end | ||
698 | end | ||
699 | end | ||
700 | |||
701 | end # module LinearAlgebra |