Line | Exclusive | Inclusive | Code |
---|---|---|---|
1 | # This file is a part of Julia. License is MIT: https://julialang.org/license | ||
2 | |||
3 | """ | ||
4 | Interface to BLAS subroutines. | ||
5 | """ | ||
6 | module BLAS | ||
7 | |||
8 | import Base: copyto! | ||
9 | using Base: require_one_based_indexing, USE_BLAS64 | ||
10 | |||
11 | export | ||
12 | # Note: `xFUNC_NAME` is a placeholder for not exported BLAS functions | ||
13 | # ref: http://www.netlib.org/blas/blasqr.pdf | ||
14 | # Level 1 | ||
15 | # xROTG | ||
16 | # xROTMG | ||
17 | rot!, | ||
18 | # xROTM | ||
19 | # xSWAP | ||
20 | scal!, | ||
21 | scal, | ||
22 | blascopy!, | ||
23 | # xAXPY!, | ||
24 | # xAXPBY!, | ||
25 | # xDOT | ||
26 | dotc, | ||
27 | dotu, | ||
28 | # xxDOT | ||
29 | nrm2, | ||
30 | asum, | ||
31 | iamax, | ||
32 | # Level 2 | ||
33 | gemv!, | ||
34 | gemv, | ||
35 | gbmv!, | ||
36 | gbmv, | ||
37 | hemv!, | ||
38 | hemv, | ||
39 | # xHBMV | ||
40 | hpmv!, | ||
41 | symv!, | ||
42 | symv, | ||
43 | sbmv!, | ||
44 | sbmv, | ||
45 | spmv!, | ||
46 | trmv!, | ||
47 | trmv, | ||
48 | # xTBMV | ||
49 | # xTPMV | ||
50 | trsv!, | ||
51 | trsv, | ||
52 | # xTBSV | ||
53 | # xTPSV | ||
54 | ger!, | ||
55 | # xGERU | ||
56 | # xGERC | ||
57 | her!, | ||
58 | # xHPR | ||
59 | # xHER2 | ||
60 | # xHPR2 | ||
61 | syr!, | ||
62 | spr!, | ||
63 | # xSYR2 | ||
64 | # xSPR2 | ||
65 | # Level 3 | ||
66 | gemm!, | ||
67 | gemm, | ||
68 | symm!, | ||
69 | symm, | ||
70 | hemm!, | ||
71 | hemm, | ||
72 | syrk!, | ||
73 | syrk, | ||
74 | herk!, | ||
75 | herk, | ||
76 | syr2k!, | ||
77 | syr2k, | ||
78 | her2k!, | ||
79 | her2k, | ||
80 | trmm!, | ||
81 | trmm, | ||
82 | trsm!, | ||
83 | trsm | ||
84 | |||
85 | using ..LinearAlgebra: libblastrampoline, BlasReal, BlasComplex, BlasFloat, BlasInt, DimensionMismatch, checksquare, stride1, chkstride1 | ||
86 | |||
87 | include("lbt.jl") | ||
88 | |||
89 | # Legacy bindings that some packages (such as NNlib.jl) use. | ||
90 | # We maintain these for backwards-compatibility but new packages | ||
91 | # should not look at these, instead preferring to parse the output | ||
92 | # of BLAS.get_config() | ||
93 | const libblas = libblastrampoline | ||
94 | const liblapack = libblastrampoline | ||
95 | |||
96 | vendor() = :lbt | ||
97 | |||
98 | """ | ||
99 | get_config() | ||
100 | |||
101 | Return an object representing the current `libblastrampoline` configuration. | ||
102 | |||
103 | !!! compat "Julia 1.7" | ||
104 | `get_config()` requires at least Julia 1.7. | ||
105 | """ | ||
106 | get_config() = lbt_get_config() | ||
107 | |||
108 | if USE_BLAS64 | ||
109 | macro blasfunc(x) | ||
110 | return Expr(:quote, Symbol(x, "64_")) | ||
111 | end | ||
112 | else | ||
113 | macro blasfunc(x) | ||
114 | return Expr(:quote, x) | ||
115 | end | ||
116 | end | ||
117 | |||
118 | _tryparse_env_int(key) = tryparse(Int, get(ENV, key, "")) | ||
119 | |||
120 | |||
121 | """ | ||
122 | set_num_threads(n::Integer) | ||
123 | set_num_threads(::Nothing) | ||
124 | |||
125 | Set the number of threads the BLAS library should use equal to `n::Integer`. | ||
126 | |||
127 | Also accepts `nothing`, in which case julia tries to guess the default number of threads. | ||
128 | Passing `nothing` is discouraged and mainly exists for historical reasons. | ||
129 | """ | ||
130 | set_num_threads(nt::Integer)::Nothing = lbt_set_num_threads(Int32(nt)) | ||
131 | function set_num_threads(::Nothing) | ||
132 | nt = something( | ||
133 | _tryparse_env_int("OPENBLAS_NUM_THREADS"), | ||
134 | _tryparse_env_int("OMP_NUM_THREADS"), | ||
135 | _tryparse_env_int("VECLIB_MAXIMUM_THREADS"), | ||
136 | max(1, Sys.CPU_THREADS รท 2), | ||
137 | ) | ||
138 | return set_num_threads(nt) | ||
139 | end | ||
140 | |||
141 | """ | ||
142 | get_num_threads() | ||
143 | |||
144 | Get the number of threads the BLAS library is using. | ||
145 | |||
146 | !!! compat "Julia 1.6" | ||
147 | `get_num_threads` requires at least Julia 1.6. | ||
148 | """ | ||
149 | get_num_threads()::Int = lbt_get_num_threads() | ||
150 | |||
151 | function check() | ||
152 | # TODO: once we have bitfields of the BLAS functions that are actually forwarded, | ||
153 | # ensure that we have a complete set here (warning on an incomplete BLAS implementation) | ||
154 | config = get_config() | ||
155 | |||
156 | # Ensure that one of our loaded libraries satisfies our interface requirement | ||
157 | interface = USE_BLAS64 ? :ilp64 : :lp64 | ||
158 | if !any(lib.interface == interface for lib in config.loaded_libs) | ||
159 | interfacestr = uppercase(string(interface)) | ||
160 | @error("No loaded BLAS libraries were built with $(interfacestr) support") | ||
161 | println("Quitting.") | ||
162 | exit() | ||
163 | end | ||
164 | end | ||
165 | |||
166 | "Check that upper/lower (for special matrices) is correctly specified" | ||
167 | function chkuplo(uplo::AbstractChar) | ||
168 | if !(uplo == 'U' || uplo == 'L') | ||
169 | throw(ArgumentError(lazy"uplo argument must be 'U' (upper) or 'L' (lower), got $uplo")) | ||
170 | end | ||
171 | uplo | ||
172 | end | ||
173 | |||
174 | # Level 1 | ||
175 | # A help function to pick the pointer and inc for 1d like inputs. | ||
176 | @inline function vec_pointer_stride(x::AbstractArray, stride0check = nothing) | ||
177 | Base._checkcontiguous(Bool, x) && return pointer(x), 1 # simplify runtime check when possible | ||
178 | st, ptr = checkedstride(x), pointer(x) | ||
179 | isnothing(stride0check) || (st == 0 && throw(stride0check)) | ||
180 | ptr += min(st, 0) * sizeof(eltype(x)) * (length(x) - 1) | ||
181 | ptr, st | ||
182 | end | ||
183 | function checkedstride(x::AbstractArray) | ||
184 | szs::Dims = size(x) | ||
185 | sts::Dims = strides(x) | ||
186 | _, st, n = Base.merge_adjacent_dim(szs, sts) | ||
187 | n === ndims(x) && return st | ||
188 | throw(ArgumentError("only support vector like inputs")) | ||
189 | end | ||
190 | ## copy | ||
191 | |||
192 | """ | ||
193 | blascopy!(n, X, incx, Y, incy) | ||
194 | |||
195 | Copy `n` elements of array `X` with stride `incx` to array `Y` with stride `incy`. Returns `Y`. | ||
196 | """ | ||
197 | function blascopy! end | ||
198 | |||
199 | for (fname, elty) in ((:dcopy_,:Float64), | ||
200 | (:scopy_,:Float32), | ||
201 | (:zcopy_,:ComplexF64), | ||
202 | (:ccopy_,:ComplexF32)) | ||
203 | @eval begin | ||
204 | # SUBROUTINE DCOPY(N,DX,INCX,DY,INCY) | ||
205 | function blascopy!(n::Integer, DX::Union{Ptr{$elty},AbstractArray{$elty}}, incx::Integer, DY::Union{Ptr{$elty},AbstractArray{$elty}}, incy::Integer) | ||
206 | ccall((@blasfunc($fname), libblastrampoline), Cvoid, | ||
207 | (Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}), | ||
208 | n, DX, incx, DY, incy) | ||
209 | DY | ||
210 | end | ||
211 | end | ||
212 | end | ||
213 | |||
214 | |||
215 | ## rot | ||
216 | |||
217 | """ | ||
218 | rot!(n, X, incx, Y, incy, c, s) | ||
219 | |||
220 | Overwrite `X` with `c*X + s*Y` and `Y` with `-conj(s)*X + c*Y` for the first `n` elements of array `X` with stride `incx` and | ||
221 | first `n` elements of array `Y` with stride `incy`. Returns `X` and `Y`. | ||
222 | |||
223 | !!! compat "Julia 1.5" | ||
224 | `rot!` requires at least Julia 1.5. | ||
225 | """ | ||
226 | function rot! end | ||
227 | |||
228 | for (fname, elty, cty, sty, lib) in ((:drot_, :Float64, :Float64, :Float64, libblastrampoline), | ||
229 | (:srot_, :Float32, :Float32, :Float32, libblastrampoline), | ||
230 | (:zdrot_, :ComplexF64, :Float64, :Float64, libblastrampoline), | ||
231 | (:csrot_, :ComplexF32, :Float32, :Float32, libblastrampoline), | ||
232 | (:zrot_, :ComplexF64, :Float64, :ComplexF64, libblastrampoline), | ||
233 | (:crot_, :ComplexF32, :Float32, :ComplexF32, libblastrampoline)) | ||
234 | @eval begin | ||
235 | # SUBROUTINE DROT(N,DX,INCX,DY,INCY,C,S) | ||
236 | function rot!(n::Integer, DX::Union{Ptr{$elty},AbstractArray{$elty}}, incx::Integer, DY::Union{Ptr{$elty},AbstractArray{$elty}}, incy::Integer, C::$cty, S::$sty) | ||
237 | ccall((@blasfunc($fname), $lib), Cvoid, | ||
238 | (Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ref{$cty}, Ref{$sty}), | ||
239 | n, DX, incx, DY, incy, C, S) | ||
240 | DX, DY | ||
241 | end | ||
242 | end | ||
243 | end | ||
244 | |||
245 | ## scal | ||
246 | |||
247 | """ | ||
248 | scal!(n, a, X, incx) | ||
249 | scal!(a, X) | ||
250 | |||
251 | Overwrite `X` with `a*X` for the first `n` elements of array `X` with stride `incx`. Returns `X`. | ||
252 | |||
253 | If `n` and `incx` are not provided, `length(X)` and `stride(X,1)` are used. | ||
254 | """ | ||
255 | function scal! end | ||
256 | |||
257 | """ | ||
258 | scal(n, a, X, incx) | ||
259 | scal(a, X) | ||
260 | |||
261 | Return `X` scaled by `a` for the first `n` elements of array `X` with stride `incx`. | ||
262 | |||
263 | If `n` and `incx` are not provided, `length(X)` and `stride(X,1)` are used. | ||
264 | """ | ||
265 | function scal end | ||
266 | |||
267 | for (fname, elty) in ((:dscal_,:Float64), | ||
268 | (:sscal_,:Float32), | ||
269 | (:zscal_,:ComplexF64), | ||
270 | (:cscal_,:ComplexF32)) | ||
271 | @eval begin | ||
272 | # SUBROUTINE DSCAL(N,DA,DX,INCX) | ||
273 | function scal!(n::Integer, DA::$elty, DX::Union{Ptr{$elty},AbstractArray{$elty}}, incx::Integer) | ||
274 | ccall((@blasfunc($fname), libblastrampoline), Cvoid, | ||
275 | (Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, Ref{BlasInt}), | ||
276 | n, DA, DX, incx) | ||
277 | DX | ||
278 | end | ||
279 | |||
280 | function scal!(DA::$elty, DX::AbstractArray{$elty}) | ||
281 | p, st = vec_pointer_stride(DX, ArgumentError("dest vector with 0 stride is not allowed")) | ||
282 | GC.@preserve DX scal!(length(DX), DA, p, abs(st)) | ||
283 | DX | ||
284 | end | ||
285 | end | ||
286 | end | ||
287 | scal(n, DA, DX, incx) = scal!(n, DA, copy(DX), incx) | ||
288 | scal(DA, DX) = scal!(DA, copy(DX)) | ||
289 | |||
290 | ## dot | ||
291 | |||
292 | """ | ||
293 | dot(n, X, incx, Y, incy) | ||
294 | |||
295 | Dot product of two vectors consisting of `n` elements of array `X` with stride `incx` and | ||
296 | `n` elements of array `Y` with stride `incy`. | ||
297 | |||
298 | # Examples | ||
299 | ```jldoctest | ||
300 | julia> BLAS.dot(10, fill(1.0, 10), 1, fill(1.0, 20), 2) | ||
301 | 10.0 | ||
302 | ``` | ||
303 | """ | ||
304 | function dot end | ||
305 | |||
306 | """ | ||
307 | dotc(n, X, incx, U, incy) | ||
308 | |||
309 | Dot function for two complex vectors, consisting of `n` elements of array `X` | ||
310 | with stride `incx` and `n` elements of array `U` with stride `incy`, | ||
311 | conjugating the first vector. | ||
312 | |||
313 | # Examples | ||
314 | ```jldoctest | ||
315 | julia> BLAS.dotc(10, fill(1.0im, 10), 1, fill(1.0+im, 20), 2) | ||
316 | 10.0 - 10.0im | ||
317 | ``` | ||
318 | """ | ||
319 | function dotc end | ||
320 | |||
321 | """ | ||
322 | dotu(n, X, incx, Y, incy) | ||
323 | |||
324 | Dot function for two complex vectors consisting of `n` elements of array `X` | ||
325 | with stride `incx` and `n` elements of array `Y` with stride `incy`. | ||
326 | |||
327 | # Examples | ||
328 | ```jldoctest | ||
329 | julia> BLAS.dotu(10, fill(1.0im, 10), 1, fill(1.0+im, 20), 2) | ||
330 | -10.0 + 10.0im | ||
331 | ``` | ||
332 | """ | ||
333 | function dotu end | ||
334 | |||
335 | for (fname, elty) in ((:cblas_ddot,:Float64), | ||
336 | (:cblas_sdot,:Float32)) | ||
337 | @eval begin | ||
338 | # DOUBLE PRECISION FUNCTION DDOT(N,DX,INCX,DY,INCY) | ||
339 | # * .. Scalar Arguments .. | ||
340 | # INTEGER INCX,INCY,N | ||
341 | # * .. | ||
342 | # * .. Array Arguments .. | ||
343 | # DOUBLE PRECISION DX(*),DY(*) | ||
344 | function dot(n::Integer, DX::Union{Ptr{$elty},AbstractArray{$elty}}, incx::Integer, DY::Union{Ptr{$elty},AbstractArray{$elty}}, incy::Integer) | ||
345 | ccall((@blasfunc($fname), libblastrampoline), $elty, | ||
346 | (BlasInt, Ptr{$elty}, BlasInt, Ptr{$elty}, BlasInt), | ||
347 | n, DX, incx, DY, incy) | ||
348 | end | ||
349 | end | ||
350 | end | ||
351 | for (fname, elty) in ((:cblas_zdotc_sub,:ComplexF64), | ||
352 | (:cblas_cdotc_sub,:ComplexF32)) | ||
353 | @eval begin | ||
354 | # DOUBLE PRECISION FUNCTION DDOT(N,DX,INCX,DY,INCY) | ||
355 | # * .. Scalar Arguments .. | ||
356 | # INTEGER INCX,INCY,N | ||
357 | # * .. | ||
358 | # * .. Array Arguments .. | ||
359 | # DOUBLE PRECISION DX(*),DY(*) | ||
360 | function dotc(n::Integer, DX::Union{Ptr{$elty},AbstractArray{$elty}}, incx::Integer, DY::Union{Ptr{$elty},AbstractArray{$elty}}, incy::Integer) | ||
361 | result = Ref{$elty}() | ||
362 | ccall((@blasfunc($fname), libblastrampoline), Cvoid, | ||
363 | (BlasInt, Ptr{$elty}, BlasInt, Ptr{$elty}, BlasInt, Ptr{$elty}), | ||
364 | n, DX, incx, DY, incy, result) | ||
365 | result[] | ||
366 | end | ||
367 | end | ||
368 | end | ||
369 | for (fname, elty) in ((:cblas_zdotu_sub,:ComplexF64), | ||
370 | (:cblas_cdotu_sub,:ComplexF32)) | ||
371 | @eval begin | ||
372 | # DOUBLE PRECISION FUNCTION DDOT(N,DX,INCX,DY,INCY) | ||
373 | # * .. Scalar Arguments .. | ||
374 | # INTEGER INCX,INCY,N | ||
375 | # * .. | ||
376 | # * .. Array Arguments .. | ||
377 | # DOUBLE PRECISION DX(*),DY(*) | ||
378 | function dotu(n::Integer, DX::Union{Ptr{$elty},AbstractArray{$elty}}, incx::Integer, DY::Union{Ptr{$elty},AbstractArray{$elty}}, incy::Integer) | ||
379 | result = Ref{$elty}() | ||
380 | ccall((@blasfunc($fname), libblastrampoline), Cvoid, | ||
381 | (BlasInt, Ptr{$elty}, BlasInt, Ptr{$elty}, BlasInt, Ptr{$elty}), | ||
382 | n, DX, incx, DY, incy, result) | ||
383 | result[] | ||
384 | end | ||
385 | end | ||
386 | end | ||
387 | |||
388 | for (elty, f) in ((Float32, :dot), (Float64, :dot), | ||
389 | (ComplexF32, :dotc), (ComplexF64, :dotc), | ||
390 | (ComplexF32, :dotu), (ComplexF64, :dotu)) | ||
391 | @eval begin | ||
392 | function $f(x::AbstractArray{$elty}, y::AbstractArray{$elty}) | ||
393 | n, m = length(x), length(y) | ||
394 | n == m || throw(DimensionMismatch(lazy"dot product arguments have lengths $n and $m")) | ||
395 | GC.@preserve x y $f(n, vec_pointer_stride(x)..., vec_pointer_stride(y)...) | ||
396 | end | ||
397 | end | ||
398 | end | ||
399 | |||
400 | ## nrm2 | ||
401 | |||
402 | """ | ||
403 | nrm2(n, X, incx) | ||
404 | |||
405 | 2-norm of a vector consisting of `n` elements of array `X` with stride `incx`. | ||
406 | |||
407 | # Examples | ||
408 | ```jldoctest | ||
409 | julia> BLAS.nrm2(4, fill(1.0, 8), 2) | ||
410 | 2.0 | ||
411 | |||
412 | julia> BLAS.nrm2(1, fill(1.0, 8), 2) | ||
413 | 1.0 | ||
414 | ``` | ||
415 | """ | ||
416 | function nrm2 end | ||
417 | |||
418 | for (fname, elty, ret_type) in ((:dnrm2_,:Float64,:Float64), | ||
419 | (:snrm2_,:Float32,:Float32), | ||
420 | (:dznrm2_,:ComplexF64,:Float64), | ||
421 | (:scnrm2_,:ComplexF32,:Float32)) | ||
422 | @eval begin | ||
423 | # SUBROUTINE DNRM2(N,X,INCX) | ||
424 | function nrm2(n::Integer, X::Union{Ptr{$elty},AbstractArray{$elty}}, incx::Integer) | ||
425 | ccall((@blasfunc($fname), libblastrampoline), $ret_type, | ||
426 | (Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}), | ||
427 | n, X, incx) | ||
428 | end | ||
429 | end | ||
430 | end | ||
431 | # openblas returns 0 for negative stride | ||
432 | function nrm2(x::AbstractArray) | ||
433 | p, st = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed")) | ||
434 | GC.@preserve x nrm2(length(x), p, abs(st)) | ||
435 | end | ||
436 | |||
437 | ## asum | ||
438 | |||
439 | """ | ||
440 | asum(n, X, incx) | ||
441 | |||
442 | Sum of the magnitudes of the first `n` elements of array `X` with stride `incx`. | ||
443 | |||
444 | For a real array, the magnitude is the absolute value. For a complex array, the | ||
445 | magnitude is the sum of the absolute value of the real part and the absolute value | ||
446 | of the imaginary part. | ||
447 | |||
448 | # Examples | ||
449 | ```jldoctest | ||
450 | julia> BLAS.asum(5, fill(1.0im, 10), 2) | ||
451 | 5.0 | ||
452 | |||
453 | julia> BLAS.asum(2, fill(1.0im, 10), 5) | ||
454 | 2.0 | ||
455 | ``` | ||
456 | """ | ||
457 | function asum end | ||
458 | |||
459 | for (fname, elty, ret_type) in ((:dasum_,:Float64,:Float64), | ||
460 | (:sasum_,:Float32,:Float32), | ||
461 | (:dzasum_,:ComplexF64,:Float64), | ||
462 | (:scasum_,:ComplexF32,:Float32)) | ||
463 | @eval begin | ||
464 | # SUBROUTINE ASUM(N, X, INCX) | ||
465 | function asum(n::Integer, X::Union{Ptr{$elty},AbstractArray{$elty}}, incx::Integer) | ||
466 | ccall((@blasfunc($fname), libblastrampoline), $ret_type, | ||
467 | (Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}), | ||
468 | n, X, incx) | ||
469 | end | ||
470 | end | ||
471 | end | ||
472 | function asum(x::AbstractArray) | ||
473 | p, st = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed")) | ||
474 | GC.@preserve x asum(length(x), p, abs(st)) | ||
475 | end | ||
476 | |||
477 | ## axpy | ||
478 | |||
479 | """ | ||
480 | axpy!(a, X, Y) | ||
481 | |||
482 | Overwrite `Y` with `X*a + Y`, where `a` is a scalar. Return `Y`. | ||
483 | |||
484 | # Examples | ||
485 | ```jldoctest | ||
486 | julia> x = [1.; 2; 3]; | ||
487 | |||
488 | julia> y = [4. ;; 5 ;; 6]; | ||
489 | |||
490 | julia> BLAS.axpy!(2, x, y) | ||
491 | 1ร3 Matrix{Float64}: | ||
492 | 6.0 9.0 12.0 | ||
493 | ``` | ||
494 | """ | ||
495 | function axpy! end | ||
496 | |||
497 | for (fname, elty) in ((:daxpy_,:Float64), | ||
498 | (:saxpy_,:Float32), | ||
499 | (:zaxpy_,:ComplexF64), | ||
500 | (:caxpy_,:ComplexF32)) | ||
501 | @eval begin | ||
502 | # SUBROUTINE DAXPY(N,DA,DX,INCX,DY,INCY) | ||
503 | # DY <- DA*DX + DY | ||
504 | #* .. Scalar Arguments .. | ||
505 | # DOUBLE PRECISION DA | ||
506 | # INTEGER INCX,INCY,N | ||
507 | #* .. Array Arguments .. | ||
508 | # DOUBLE PRECISION DX(*),DY(*) | ||
509 | function axpy!(n::Integer, alpha::($elty), dx::Union{Ptr{$elty}, AbstractArray{$elty}}, incx::Integer, dy::Union{Ptr{$elty}, AbstractArray{$elty}}, incy::Integer) | ||
510 | ccall((@blasfunc($fname), libblastrampoline), Cvoid, | ||
511 | (Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}), | ||
512 | n, alpha, dx, incx, dy, incy) | ||
513 | dy | ||
514 | end | ||
515 | end | ||
516 | end | ||
517 | |||
518 | function axpy!(alpha::Number, x::AbstractArray{T}, y::AbstractArray{T}) where T<:BlasFloat | ||
519 | if length(x) != length(y) | ||
520 | throw(DimensionMismatch(lazy"x has length $(length(x)), but y has length $(length(y))")) | ||
521 | end | ||
522 | GC.@preserve x y axpy!(length(x), T(alpha), vec_pointer_stride(x)..., | ||
523 | vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed"))...) | ||
524 | y | ||
525 | end | ||
526 | |||
527 | function axpy!(alpha::Number, x::Array{T}, rx::AbstractRange{Ti}, | ||
528 | y::Array{T}, ry::AbstractRange{Ti}) where {T<:BlasFloat,Ti<:Integer} | ||
529 | if length(rx) != length(ry) | ||
530 | throw(DimensionMismatch("ranges of differing lengths")) | ||
531 | end | ||
532 | if minimum(rx) < 1 || maximum(rx) > length(x) | ||
533 | throw(ArgumentError(lazy"range out of bounds for x, of length $(length(x))")) | ||
534 | end | ||
535 | if minimum(ry) < 1 || maximum(ry) > length(y) | ||
536 | throw(ArgumentError(lazy"range out of bounds for y, of length $(length(y))")) | ||
537 | end | ||
538 | GC.@preserve x y axpy!( | ||
539 | length(rx), | ||
540 | T(alpha), | ||
541 | pointer(x, minimum(rx)), | ||
542 | step(rx), | ||
543 | pointer(y, minimum(ry)), | ||
544 | step(ry)) | ||
545 | |||
546 | return y | ||
547 | end | ||
548 | |||
549 | """ | ||
550 | axpby!(a, X, b, Y) | ||
551 | |||
552 | Overwrite `Y` with `X*a + Y*b`, where `a` and `b` are scalars. Return `Y`. | ||
553 | |||
554 | # Examples | ||
555 | ```jldoctest | ||
556 | julia> x = [1., 2, 3]; | ||
557 | |||
558 | julia> y = [4., 5, 6]; | ||
559 | |||
560 | julia> BLAS.axpby!(2., x, 3., y) | ||
561 | 3-element Vector{Float64}: | ||
562 | 14.0 | ||
563 | 19.0 | ||
564 | 24.0 | ||
565 | ``` | ||
566 | """ | ||
567 | function axpby! end | ||
568 | |||
569 | for (fname, elty) in ((:daxpby_,:Float64), (:saxpby_,:Float32), | ||
570 | (:zaxpby_,:ComplexF64), (:caxpby_,:ComplexF32)) | ||
571 | @eval begin | ||
572 | # SUBROUTINE DAXPBY(N,DA,DX,INCX,DB,DY,INCY) | ||
573 | # DY <- DA*DX + DB*DY | ||
574 | #* .. Scalar Arguments .. | ||
575 | # DOUBLE PRECISION DA,DB | ||
576 | # INTEGER INCX,INCY,N | ||
577 | #* .. Array Arguments .. | ||
578 | # DOUBLE PRECISION DX(*),DY(*) | ||
579 | function axpby!(n::Integer, alpha::($elty), dx::Union{Ptr{$elty}, | ||
580 | AbstractArray{$elty}}, incx::Integer, beta::($elty), | ||
581 | dy::Union{Ptr{$elty}, AbstractArray{$elty}}, incy::Integer) | ||
582 | ccall((@blasfunc($fname), libblastrampoline), Cvoid, (Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, | ||
583 | Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, Ref{BlasInt}), | ||
584 | n, alpha, dx, incx, beta, dy, incy) | ||
585 | dy | ||
586 | end | ||
587 | end | ||
588 | end | ||
589 | |||
590 | function axpby!(alpha::Number, x::AbstractArray{T}, beta::Number, y::AbstractArray{T}) where T<:BlasFloat | ||
591 | require_one_based_indexing(x, y) | ||
592 | if length(x) != length(y) | ||
593 | throw(DimensionMismatch(lazy"x has length $(length(x)), but y has length $(length(y))")) | ||
594 | end | ||
595 | GC.@preserve x y axpby!(length(x), T(alpha), vec_pointer_stride(x)..., T(beta), | ||
596 | vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed"))...) | ||
597 | y | ||
598 | end | ||
599 | |||
600 | ## iamax | ||
601 | for (fname, elty) in ((:idamax_,:Float64), | ||
602 | (:isamax_,:Float32), | ||
603 | (:izamax_,:ComplexF64), | ||
604 | (:icamax_,:ComplexF32)) | ||
605 | @eval begin | ||
606 | function iamax(n::Integer, dx::Union{Ptr{$elty}, AbstractArray{$elty}}, incx::Integer) | ||
607 | ccall((@blasfunc($fname), libblastrampoline),BlasInt, | ||
608 | (Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}), | ||
609 | n, dx, incx) | ||
610 | end | ||
611 | end | ||
612 | end | ||
613 | function iamax(dx::AbstractArray) | ||
614 | p, st = vec_pointer_stride(dx) | ||
615 | st <= 0 && return BlasInt(0) | ||
616 | iamax(length(dx), p, st) | ||
617 | end | ||
618 | |||
619 | """ | ||
620 | iamax(n, dx, incx) | ||
621 | iamax(dx) | ||
622 | |||
623 | Find the index of the element of `dx` with the maximum absolute value. `n` is the length of `dx`, and `incx` is the | ||
624 | stride. If `n` and `incx` are not provided, they assume default values of `n=length(dx)` and `incx=stride1(dx)`. | ||
625 | """ | ||
626 | iamax | ||
627 | |||
628 | # Level 2 | ||
629 | ## mv | ||
630 | ### gemv | ||
631 | for (fname, elty) in ((:dgemv_,:Float64), | ||
632 | (:sgemv_,:Float32), | ||
633 | (:zgemv_,:ComplexF64), | ||
634 | (:cgemv_,:ComplexF32)) | ||
635 | @eval begin | ||
636 | #SUBROUTINE DGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY) | ||
637 | #* .. Scalar Arguments .. | ||
638 | # DOUBLE PRECISION ALPHA,BETA | ||
639 | # INTEGER INCX,INCY,LDA,M,N | ||
640 | # CHARACTER TRANS | ||
641 | #* .. Array Arguments .. | ||
642 | # DOUBLE PRECISION A(LDA,*),X(*),Y(*) | ||
643 | function gemv!(trans::AbstractChar, alpha::Union{($elty), Bool}, | ||
644 | A::AbstractVecOrMat{$elty}, X::AbstractVector{$elty}, | ||
645 | beta::Union{($elty), Bool}, Y::AbstractVector{$elty}) | ||
646 | require_one_based_indexing(A, X, Y) | ||
647 | m,n = size(A,1),size(A,2) | ||
648 | if trans == 'N' && (length(X) != n || length(Y) != m) | ||
649 | throw(DimensionMismatch(lazy"A has dimensions $(size(A)), X has length $(length(X)) and Y has length $(length(Y))")) | ||
650 | elseif trans == 'C' && (length(X) != m || length(Y) != n) | ||
651 | throw(DimensionMismatch(lazy"the adjoint of A has dimensions $n, $m, X has length $(length(X)) and Y has length $(length(Y))")) | ||
652 | elseif trans == 'T' && (length(X) != m || length(Y) != n) | ||
653 | throw(DimensionMismatch(lazy"the transpose of A has dimensions $n, $m, X has length $(length(X)) and Y has length $(length(Y))")) | ||
654 | end | ||
655 | chkstride1(A) | ||
656 | lda = stride(A,2) | ||
657 | pX, sX = vec_pointer_stride(X, ArgumentError("input vector with 0 stride is not allowed")) | ||
658 | pY, sY = vec_pointer_stride(Y, ArgumentError("dest vector with 0 stride is not allowed")) | ||
659 | pA = pointer(A) | ||
660 | if lda < 0 | ||
661 | pA += (size(A, 2) - 1) * lda * sizeof($elty) | ||
662 | lda = -lda | ||
663 | trans == 'N' ? (sX = -sX) : (sY = -sY) | ||
664 | end | ||
665 | lda >= size(A,1) || size(A,2) <= 1 || error("when `size(A,2) > 1`, `abs(stride(A,2))` must be at least `size(A,1)`") | ||
666 | lda = max(1, size(A,1), lda) | ||
667 | GC.@preserve A X Y ccall((@blasfunc($fname), libblastrampoline), Cvoid, | ||
668 | (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ref{$elty}, | ||
669 | Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, | ||
670 | Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Clong), | ||
671 | trans, size(A,1), size(A,2), alpha, | ||
672 | pA, lda, pX, sX, | ||
673 | beta, pY, sY, 1) | ||
674 | Y | ||
675 | end | ||
676 | function gemv(trans::AbstractChar, alpha::($elty), A::AbstractMatrix{$elty}, X::AbstractVector{$elty}) | ||
677 | gemv!(trans, alpha, A, X, zero($elty), similar(X, $elty, size(A, (trans == 'N' ? 1 : 2)))) | ||
678 | end | ||
679 | function gemv(trans::AbstractChar, A::AbstractMatrix{$elty}, X::AbstractVector{$elty}) | ||
680 | gemv!(trans, one($elty), A, X, zero($elty), similar(X, $elty, size(A, (trans == 'N' ? 1 : 2)))) | ||
681 | end | ||
682 | end | ||
683 | end | ||
684 | |||
685 | """ | ||
686 | gemv!(tA, alpha, A, x, beta, y) | ||
687 | |||
688 | Update the vector `y` as `alpha*A*x + beta*y` or `alpha*A'x + beta*y` | ||
689 | according to [`tA`](@ref stdlib-blas-trans). | ||
690 | `alpha` and `beta` are scalars. Return the updated `y`. | ||
691 | """ | ||
692 | gemv! | ||
693 | |||
694 | """ | ||
695 | gemv(tA, alpha, A, x) | ||
696 | |||
697 | Return `alpha*A*x` or `alpha*A'x` according to [`tA`](@ref stdlib-blas-trans). | ||
698 | `alpha` is a scalar. | ||
699 | """ | ||
700 | gemv(tA, alpha, A, x) | ||
701 | |||
702 | """ | ||
703 | gemv(tA, A, x) | ||
704 | |||
705 | Return `A*x` or `A'x` according to [`tA`](@ref stdlib-blas-trans). | ||
706 | """ | ||
707 | gemv(tA, A, x) | ||
708 | |||
709 | ### (GB) general banded matrix-vector multiplication | ||
710 | |||
711 | """ | ||
712 | gbmv!(trans, m, kl, ku, alpha, A, x, beta, y) | ||
713 | |||
714 | Update vector `y` as `alpha*A*x + beta*y` or `alpha*A'*x + beta*y` according to [`trans`](@ref stdlib-blas-trans). | ||
715 | The matrix `A` is a general band matrix of dimension `m` by `size(A,2)` with `kl` | ||
716 | sub-diagonals and `ku` super-diagonals. `alpha` and `beta` are scalars. Return the updated `y`. | ||
717 | """ | ||
718 | function gbmv! end | ||
719 | |||
720 | """ | ||
721 | gbmv(trans, m, kl, ku, alpha, A, x) | ||
722 | |||
723 | Return `alpha*A*x` or `alpha*A'*x` according to [`trans`](@ref stdlib-blas-trans). | ||
724 | The matrix `A` is a general band matrix of dimension `m` by `size(A,2)` with `kl` sub-diagonals and `ku` | ||
725 | super-diagonals, and `alpha` is a scalar. | ||
726 | """ | ||
727 | function gbmv end | ||
728 | |||
729 | for (fname, elty) in ((:dgbmv_,:Float64), | ||
730 | (:sgbmv_,:Float32), | ||
731 | (:zgbmv_,:ComplexF64), | ||
732 | (:cgbmv_,:ComplexF32)) | ||
733 | @eval begin | ||
734 | # SUBROUTINE DGBMV(TRANS,M,N,KL,KU,ALPHA,A,LDA,X,INCX,BETA,Y,INCY) | ||
735 | # * .. Scalar Arguments .. | ||
736 | # DOUBLE PRECISION ALPHA,BETA | ||
737 | # INTEGER INCX,INCY,KL,KU,LDA,M,N | ||
738 | # CHARACTER TRANS | ||
739 | # * .. Array Arguments .. | ||
740 | # DOUBLE PRECISION A(LDA,*),X(*),Y(*) | ||
741 | function gbmv!(trans::AbstractChar, m::Integer, kl::Integer, ku::Integer, | ||
742 | alpha::Union{($elty), Bool}, A::AbstractMatrix{$elty}, | ||
743 | x::AbstractVector{$elty}, beta::Union{($elty), Bool}, | ||
744 | y::AbstractVector{$elty}) | ||
745 | require_one_based_indexing(A, x, y) | ||
746 | chkstride1(A) | ||
747 | px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed")) | ||
748 | py, sty = vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed")) | ||
749 | GC.@preserve x y ccall((@blasfunc($fname), libblastrampoline), Cvoid, | ||
750 | (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, | ||
751 | Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, | ||
752 | Ptr{$elty}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, | ||
753 | Ref{BlasInt}, Clong), | ||
754 | trans, m, size(A,2), kl, | ||
755 | ku, alpha, A, max(1,stride(A,2)), | ||
756 | px, stx, beta, py, sty, 1) | ||
757 | y | ||
758 | end | ||
759 | function gbmv(trans::AbstractChar, m::Integer, kl::Integer, ku::Integer, alpha::($elty), A::AbstractMatrix{$elty}, x::AbstractVector{$elty}) | ||
760 | n = size(A,2) | ||
761 | leny = trans == 'N' ? m : n | ||
762 | gbmv!(trans, m, kl, ku, alpha, A, x, zero($elty), similar(x, $elty, leny)) | ||
763 | end | ||
764 | function gbmv(trans::AbstractChar, m::Integer, kl::Integer, ku::Integer, A::AbstractMatrix{$elty}, x::AbstractVector{$elty}) | ||
765 | gbmv(trans, m, kl, ku, one($elty), A, x) | ||
766 | end | ||
767 | end | ||
768 | end | ||
769 | |||
770 | ### symv | ||
771 | |||
772 | """ | ||
773 | symv!(ul, alpha, A, x, beta, y) | ||
774 | |||
775 | Update the vector `y` as `alpha*A*x + beta*y`. `A` is assumed to be symmetric. | ||
776 | Only the [`ul`](@ref stdlib-blas-uplo) triangle of `A` is used. | ||
777 | `alpha` and `beta` are scalars. Return the updated `y`. | ||
778 | """ | ||
779 | function symv! end | ||
780 | |||
781 | for (fname, elty, lib) in ((:dsymv_,:Float64,libblastrampoline), | ||
782 | (:ssymv_,:Float32,libblastrampoline), | ||
783 | (:zsymv_,:ComplexF64,libblastrampoline), | ||
784 | (:csymv_,:ComplexF32,libblastrampoline)) | ||
785 | # Note that the complex symv are not BLAS but auiliary functions in LAPACK | ||
786 | @eval begin | ||
787 | # SUBROUTINE DSYMV(UPLO,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY) | ||
788 | # .. Scalar Arguments .. | ||
789 | # DOUBLE PRECISION ALPHA,BETA | ||
790 | # INTEGER INCX,INCY,LDA,N | ||
791 | # CHARACTER UPLO | ||
792 | # .. Array Arguments .. | ||
793 | # DOUBLE PRECISION A(LDA,*),X(*),Y(*) | ||
794 | function symv!(uplo::AbstractChar, alpha::Union{($elty), Bool}, | ||
795 | A::AbstractMatrix{$elty}, x::AbstractVector{$elty}, | ||
796 | beta::Union{($elty), Bool}, y::AbstractVector{$elty}) | ||
797 | chkuplo(uplo) | ||
798 | require_one_based_indexing(A, x, y) | ||
799 | m, n = size(A) | ||
800 | if m != n | ||
801 | throw(DimensionMismatch(lazy"matrix A is $m by $n but must be square")) | ||
802 | end | ||
803 | if n != length(x) | ||
804 | throw(DimensionMismatch(lazy"A has size $(size(A)), and x has length $(length(x))")) | ||
805 | end | ||
806 | if m != length(y) | ||
807 | throw(DimensionMismatch(lazy"A has size $(size(A)), and y has length $(length(y))")) | ||
808 | end | ||
809 | chkstride1(A) | ||
810 | px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed")) | ||
811 | py, sty = vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed")) | ||
812 | GC.@preserve x y ccall((@blasfunc($fname), $lib), Cvoid, | ||
813 | (Ref{UInt8}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, | ||
814 | Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ref{$elty}, | ||
815 | Ptr{$elty}, Ref{BlasInt}, Clong), | ||
816 | uplo, n, alpha, A, | ||
817 | max(1,stride(A,2)), px, stx, beta, | ||
818 | py, sty, 1) | ||
819 | y | ||
820 | end | ||
821 | function symv(uplo::AbstractChar, alpha::($elty), A::AbstractMatrix{$elty}, x::AbstractVector{$elty}) | ||
822 | symv!(uplo, alpha, A, x, zero($elty), similar(x)) | ||
823 | end | ||
824 | function symv(uplo::AbstractChar, A::AbstractMatrix{$elty}, x::AbstractVector{$elty}) | ||
825 | symv(uplo, one($elty), A, x) | ||
826 | end | ||
827 | end | ||
828 | end | ||
829 | |||
830 | """ | ||
831 | symv(ul, alpha, A, x) | ||
832 | |||
833 | Return `alpha*A*x`. `A` is assumed to be symmetric. | ||
834 | Only the [`ul`](@ref stdlib-blas-uplo) triangle of `A` is used. | ||
835 | `alpha` is a scalar. | ||
836 | """ | ||
837 | symv(ul, alpha, A, x) | ||
838 | |||
839 | """ | ||
840 | symv(ul, A, x) | ||
841 | |||
842 | Return `A*x`. `A` is assumed to be symmetric. | ||
843 | Only the [`ul`](@ref stdlib-blas-uplo) triangle of `A` is used. | ||
844 | """ | ||
845 | symv(ul, A, x) | ||
846 | |||
847 | ### hemv | ||
848 | """ | ||
849 | hemv!(ul, alpha, A, x, beta, y) | ||
850 | |||
851 | Update the vector `y` as `alpha*A*x + beta*y`. `A` is assumed to be Hermitian. | ||
852 | Only the [`ul`](@ref stdlib-blas-uplo) triangle of `A` is used. | ||
853 | `alpha` and `beta` are scalars. Return the updated `y`. | ||
854 | """ | ||
855 | function hemv! end | ||
856 | |||
857 | for (fname, elty) in ((:zhemv_,:ComplexF64), | ||
858 | (:chemv_,:ComplexF32)) | ||
859 | @eval begin | ||
860 | function hemv!(uplo::AbstractChar, ฮฑ::Union{$elty, Bool}, A::AbstractMatrix{$elty}, x::AbstractVector{$elty}, ฮฒ::Union{$elty, Bool}, y::AbstractVector{$elty}) | ||
861 | chkuplo(uplo) | ||
862 | require_one_based_indexing(A, x, y) | ||
863 | m, n = size(A) | ||
864 | if m != n | ||
865 | throw(DimensionMismatch(lazy"matrix A is $m by $n but must be square")) | ||
866 | end | ||
867 | if n != length(x) | ||
868 | throw(DimensionMismatch(lazy"A has size $(size(A)), and x has length $(length(x))")) | ||
869 | end | ||
870 | if m != length(y) | ||
871 | throw(DimensionMismatch(lazy"A has size $(size(A)), and y has length $(length(y))")) | ||
872 | end | ||
873 | chkstride1(A) | ||
874 | lda = max(1, stride(A, 2)) | ||
875 | px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed")) | ||
876 | py, sty = vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed")) | ||
877 | GC.@preserve x y ccall((@blasfunc($fname), libblastrampoline), Cvoid, | ||
878 | (Ref{UInt8}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, | ||
879 | Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ref{$elty}, | ||
880 | Ptr{$elty}, Ref{BlasInt}, Clong), | ||
881 | uplo, n, ฮฑ, A, | ||
882 | lda, px, stx, ฮฒ, | ||
883 | py, sty, 1) | ||
884 | y | ||
885 | end | ||
886 | function hemv(uplo::AbstractChar, ฮฑ::($elty), A::AbstractMatrix{$elty}, x::AbstractVector{$elty}) | ||
887 | hemv!(uplo, ฮฑ, A, x, zero($elty), similar(x)) | ||
888 | end | ||
889 | function hemv(uplo::AbstractChar, A::AbstractMatrix{$elty}, x::AbstractVector{$elty}) | ||
890 | hemv(uplo, one($elty), A, x) | ||
891 | end | ||
892 | end | ||
893 | end | ||
894 | |||
895 | """ | ||
896 | hemv(ul, alpha, A, x) | ||
897 | |||
898 | Return `alpha*A*x`. `A` is assumed to be Hermitian. | ||
899 | Only the [`ul`](@ref stdlib-blas-uplo) triangle of `A` is used. | ||
900 | `alpha` is a scalar. | ||
901 | """ | ||
902 | hemv(ul, alpha, A, x) | ||
903 | |||
904 | """ | ||
905 | hemv(ul, A, x) | ||
906 | |||
907 | Return `A*x`. `A` is assumed to be Hermitian. | ||
908 | Only the [`ul`](@ref stdlib-blas-uplo) triangle of `A` is used. | ||
909 | """ | ||
910 | hemv(ul, A, x) | ||
911 | |||
912 | ### hpmv!, (HP) Hermitian packed matrix-vector operation defined as y := alpha*A*x + beta*y. | ||
913 | for (fname, elty) in ((:zhpmv_, :ComplexF64), | ||
914 | (:chpmv_, :ComplexF32)) | ||
915 | @eval begin | ||
916 | # SUBROUTINE ZHPMV(UPLO,N,ALPHA,AP,X,INCX,BETA,Y,INCY) | ||
917 | # Y <- ALPHA*AP*X + BETA*Y | ||
918 | # * .. Scalar Arguments .. | ||
919 | # DOUBLE PRECISION ALPHA,BETA | ||
920 | # INTEGER INCX,INCY,N | ||
921 | # CHARACTER UPLO | ||
922 | # * .. Array Arguments .. | ||
923 | # DOUBLE PRECISION A(N,N),X(N),Y(N) | ||
924 | function hpmv!(uplo::AbstractChar, | ||
925 | n::Integer, | ||
926 | ฮฑ::$elty, | ||
927 | AP::Union{Ptr{$elty}, AbstractArray{$elty}}, | ||
928 | x::Union{Ptr{$elty}, AbstractArray{$elty}}, | ||
929 | incx::Integer, | ||
930 | ฮฒ::$elty, | ||
931 | y::Union{Ptr{$elty}, AbstractArray{$elty}}, | ||
932 | incy::Integer) | ||
933 | |||
934 | ccall((@blasfunc($fname), libblastrampoline), Cvoid, | ||
935 | (Ref{UInt8}, # uplo, | ||
936 | Ref{BlasInt}, # n, | ||
937 | Ref{$elty}, # ฮฑ, | ||
938 | Ptr{$elty}, # AP, | ||
939 | Ptr{$elty}, # x, | ||
940 | Ref{BlasInt}, # incx, | ||
941 | Ref{$elty}, # ฮฒ, | ||
942 | Ptr{$elty}, # y, output | ||
943 | Ref{BlasInt}, # incy | ||
944 | Clong), # length of uplo | ||
945 | uplo, | ||
946 | n, | ||
947 | ฮฑ, | ||
948 | AP, | ||
949 | x, | ||
950 | incx, | ||
951 | ฮฒ, | ||
952 | y, | ||
953 | incy, | ||
954 | 1) | ||
955 | return y | ||
956 | end | ||
957 | end | ||
958 | end | ||
959 | |||
960 | function hpmv!(uplo::AbstractChar, | ||
961 | ฮฑ::Number, AP::AbstractArray{T}, x::AbstractArray{T}, | ||
962 | ฮฒ::Number, y::AbstractArray{T}) where {T <: BlasComplex} | ||
963 | require_one_based_indexing(AP, x, y) | ||
964 | N = length(x) | ||
965 | if N != length(y) | ||
966 | throw(DimensionMismatch(lazy"x has length $(N), but y has length $(length(y))")) | ||
967 | end | ||
968 | if 2*length(AP) < N*(N + 1) | ||
969 | throw(DimensionMismatch(lazy"Packed hermitian matrix A has size smaller than length(x) = $(N).")) | ||
970 | end | ||
971 | chkstride1(AP) | ||
972 | px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed")) | ||
973 | py, sty = vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed")) | ||
974 | GC.@preserve x y hpmv!(uplo, N, T(ฮฑ), AP, px, stx, T(ฮฒ), py, sty) | ||
975 | y | ||
976 | end | ||
977 | |||
978 | """ | ||
979 | hpmv!(uplo, ฮฑ, AP, x, ฮฒ, y) | ||
980 | |||
981 | Update vector `y` as `ฮฑ*A*x + ฮฒ*y`, where `A` is a Hermitian matrix provided | ||
982 | in packed format `AP`. | ||
983 | |||
984 | With `uplo = 'U'`, the array AP must contain the upper triangular part of the | ||
985 | Hermitian matrix packed sequentially, column by column, so that `AP[1]` | ||
986 | contains `A[1, 1]`, `AP[2]` and `AP[3]` contain `A[1, 2]` and `A[2, 2]` | ||
987 | respectively, and so on. | ||
988 | |||
989 | With `uplo = 'L'`, the array AP must contain the lower triangular part of the | ||
990 | Hermitian matrix packed sequentially, column by column, so that `AP[1]` | ||
991 | contains `A[1, 1]`, `AP[2]` and `AP[3]` contain `A[2, 1]` and `A[3, 1]` | ||
992 | respectively, and so on. | ||
993 | |||
994 | The scalar inputs `ฮฑ` and `ฮฒ` must be complex or real numbers. | ||
995 | |||
996 | The array inputs `x`, `y` and `AP` must all be of `ComplexF32` or `ComplexF64` type. | ||
997 | |||
998 | Return the updated `y`. | ||
999 | |||
1000 | !!! compat "Julia 1.5" | ||
1001 | `hpmv!` requires at least Julia 1.5. | ||
1002 | """ | ||
1003 | hpmv! | ||
1004 | |||
1005 | ### sbmv, (SB) symmetric banded matrix-vector multiplication | ||
1006 | for (fname, elty) in ((:dsbmv_,:Float64), | ||
1007 | (:ssbmv_,:Float32)) | ||
1008 | @eval begin | ||
1009 | # SUBROUTINE DSBMV(UPLO,N,K,ALPHA,A,LDA,X,INCX,BETA,Y,INCY) | ||
1010 | # * .. Scalar Arguments .. | ||
1011 | # DOUBLE PRECISION ALPHA,BETA | ||
1012 | # INTEGER INCX,INCY,K,LDA,N | ||
1013 | # CHARACTER UPLO | ||
1014 | # * .. Array Arguments .. | ||
1015 | # DOUBLE PRECISION A(LDA,*),X(*),Y(*) | ||
1016 | function sbmv!(uplo::AbstractChar, k::Integer, alpha::($elty), A::AbstractMatrix{$elty}, x::AbstractVector{$elty}, beta::($elty), y::AbstractVector{$elty}) | ||
1017 | chkuplo(uplo) | ||
1018 | require_one_based_indexing(A, x, y) | ||
1019 | chkstride1(A) | ||
1020 | px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed")) | ||
1021 | py, sty = vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed")) | ||
1022 | GC.@preserve x y ccall((@blasfunc($fname), libblastrampoline), Cvoid, | ||
1023 | (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ref{$elty}, | ||
1024 | Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, | ||
1025 | Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Clong), | ||
1026 | uplo, size(A,2), k, alpha, | ||
1027 | A, max(1,stride(A,2)), px, stx, | ||
1028 | beta, py, sty, 1) | ||
1029 | y | ||
1030 | end | ||
1031 | function sbmv(uplo::AbstractChar, k::Integer, alpha::($elty), A::AbstractMatrix{$elty}, x::AbstractVector{$elty}) | ||
1032 | n = size(A,2) | ||
1033 | sbmv!(uplo, k, alpha, A, x, zero($elty), similar(x, $elty, n)) | ||
1034 | end | ||
1035 | function sbmv(uplo::AbstractChar, k::Integer, A::AbstractMatrix{$elty}, x::AbstractVector{$elty}) | ||
1036 | sbmv(uplo, k, one($elty), A, x) | ||
1037 | end | ||
1038 | end | ||
1039 | end | ||
1040 | |||
1041 | """ | ||
1042 | sbmv(uplo, k, alpha, A, x) | ||
1043 | |||
1044 | Return `alpha*A*x` where `A` is a symmetric band matrix of order `size(A,2)` with `k` | ||
1045 | super-diagonals stored in the argument `A`. | ||
1046 | Only the [`uplo`](@ref stdlib-blas-uplo) triangle of `A` is used. | ||
1047 | """ | ||
1048 | sbmv(uplo, k, alpha, A, x) | ||
1049 | |||
1050 | """ | ||
1051 | sbmv(uplo, k, A, x) | ||
1052 | |||
1053 | Return `A*x` where `A` is a symmetric band matrix of order `size(A,2)` with `k` | ||
1054 | super-diagonals stored in the argument `A`. | ||
1055 | Only the [`uplo`](@ref stdlib-blas-uplo) triangle of `A` is used. | ||
1056 | """ | ||
1057 | sbmv(uplo, k, A, x) | ||
1058 | |||
1059 | """ | ||
1060 | sbmv!(uplo, k, alpha, A, x, beta, y) | ||
1061 | |||
1062 | Update vector `y` as `alpha*A*x + beta*y` where `A` is a symmetric band matrix of order | ||
1063 | `size(A,2)` with `k` super-diagonals stored in the argument `A`. The storage layout for `A` | ||
1064 | is described the reference BLAS module, level-2 BLAS at | ||
1065 | <http://www.netlib.org/lapack/explore-html/>. | ||
1066 | Only the [`uplo`](@ref stdlib-blas-uplo) triangle of `A` is used. | ||
1067 | |||
1068 | Return the updated `y`. | ||
1069 | """ | ||
1070 | sbmv! | ||
1071 | |||
1072 | ### spmv!, (SP) symmetric packed matrix-vector operation defined as y := alpha*A*x + beta*y. | ||
1073 | for (fname, elty) in ((:dspmv_, :Float64), | ||
1074 | (:sspmv_, :Float32)) | ||
1075 | @eval begin | ||
1076 | # SUBROUTINE DSPMV(UPLO,N,ALPHA,AP,X,INCX,BETA,Y,INCY) | ||
1077 | # Y <- ALPHA*AP*X + BETA*Y | ||
1078 | # * .. Scalar Arguments .. | ||
1079 | # DOUBLE PRECISION ALPHA,BETA | ||
1080 | # INTEGER INCX,INCY,N | ||
1081 | # CHARACTER UPLO | ||
1082 | # * .. Array Arguments .. | ||
1083 | # DOUBLE PRECISION A(N,N),X(N),Y(N) | ||
1084 | function spmv!(uplo::AbstractChar, | ||
1085 | n::Integer, | ||
1086 | ฮฑ::$elty, | ||
1087 | AP::Union{Ptr{$elty}, AbstractArray{$elty}}, | ||
1088 | x::Union{Ptr{$elty}, AbstractArray{$elty}}, | ||
1089 | incx::Integer, | ||
1090 | ฮฒ::$elty, | ||
1091 | y::Union{Ptr{$elty}, AbstractArray{$elty}}, | ||
1092 | incy::Integer) | ||
1093 | |||
1094 | ccall((@blasfunc($fname), libblastrampoline), Cvoid, | ||
1095 | (Ref{UInt8}, # uplo, | ||
1096 | Ref{BlasInt}, # n, | ||
1097 | Ref{$elty}, # ฮฑ, | ||
1098 | Ptr{$elty}, # AP, | ||
1099 | Ptr{$elty}, # x, | ||
1100 | Ref{BlasInt}, # incx, | ||
1101 | Ref{$elty}, # ฮฒ, | ||
1102 | Ptr{$elty}, # y, out | ||
1103 | Ref{BlasInt}, # incy | ||
1104 | Clong), # length of uplo | ||
1105 | uplo, | ||
1106 | n, | ||
1107 | ฮฑ, | ||
1108 | AP, | ||
1109 | x, | ||
1110 | incx, | ||
1111 | ฮฒ, | ||
1112 | y, | ||
1113 | incy, | ||
1114 | 1) | ||
1115 | return y | ||
1116 | end | ||
1117 | end | ||
1118 | end | ||
1119 | |||
1120 | function spmv!(uplo::AbstractChar, | ||
1121 | ฮฑ::Real, AP::AbstractArray{T}, x::AbstractArray{T}, | ||
1122 | ฮฒ::Real, y::AbstractArray{T}) where {T <: BlasReal} | ||
1123 | require_one_based_indexing(AP, x, y) | ||
1124 | N = length(x) | ||
1125 | if N != length(y) | ||
1126 | throw(DimensionMismatch(lazy"x has length $(N), but y has length $(length(y))")) | ||
1127 | end | ||
1128 | if 2*length(AP) < N*(N + 1) | ||
1129 | throw(DimensionMismatch(lazy"Packed symmetric matrix A has size smaller than length(x) = $(N).")) | ||
1130 | end | ||
1131 | chkstride1(AP) | ||
1132 | px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed")) | ||
1133 | py, sty = vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed")) | ||
1134 | GC.@preserve x y spmv!(uplo, N, T(ฮฑ), AP, px, stx, T(ฮฒ), py, sty) | ||
1135 | y | ||
1136 | end | ||
1137 | |||
1138 | """ | ||
1139 | spmv!(uplo, ฮฑ, AP, x, ฮฒ, y) | ||
1140 | |||
1141 | Update vector `y` as `ฮฑ*A*x + ฮฒ*y`, where `A` is a symmetric matrix provided | ||
1142 | in packed format `AP`. | ||
1143 | |||
1144 | With `uplo = 'U'`, the array AP must contain the upper triangular part of the | ||
1145 | symmetric matrix packed sequentially, column by column, so that `AP[1]` | ||
1146 | contains `A[1, 1]`, `AP[2]` and `AP[3]` contain `A[1, 2]` and `A[2, 2]` | ||
1147 | respectively, and so on. | ||
1148 | |||
1149 | With `uplo = 'L'`, the array AP must contain the lower triangular part of the | ||
1150 | symmetric matrix packed sequentially, column by column, so that `AP[1]` | ||
1151 | contains `A[1, 1]`, `AP[2]` and `AP[3]` contain `A[2, 1]` and `A[3, 1]` | ||
1152 | respectively, and so on. | ||
1153 | |||
1154 | The scalar inputs `ฮฑ` and `ฮฒ` must be real. | ||
1155 | |||
1156 | The array inputs `x`, `y` and `AP` must all be of `Float32` or `Float64` type. | ||
1157 | |||
1158 | Return the updated `y`. | ||
1159 | |||
1160 | !!! compat "Julia 1.5" | ||
1161 | `spmv!` requires at least Julia 1.5. | ||
1162 | """ | ||
1163 | spmv! | ||
1164 | |||
1165 | ### spr!, (SP) symmetric packed matrix-vector operation defined as A := alpha*x*x' + A | ||
1166 | for (fname, elty) in ((:dspr_, :Float64), | ||
1167 | (:sspr_, :Float32)) | ||
1168 | @eval begin | ||
1169 | function spr!(uplo::AbstractChar, | ||
1170 | n::Integer, | ||
1171 | ฮฑ::$elty, | ||
1172 | x::Union{Ptr{$elty}, AbstractArray{$elty}}, | ||
1173 | incx::Integer, | ||
1174 | AP::Union{Ptr{$elty}, AbstractArray{$elty}}) | ||
1175 | |||
1176 | ccall((@blasfunc($fname), libblastrampoline), Cvoid, | ||
1177 | (Ref{UInt8}, # uplo, | ||
1178 | Ref{BlasInt}, # n, | ||
1179 | Ref{$elty}, # ฮฑ, | ||
1180 | Ptr{$elty}, # x, | ||
1181 | Ref{BlasInt}, # incx, | ||
1182 | Ptr{$elty}, # AP, | ||
1183 | Clong), # length of uplo | ||
1184 | uplo, | ||
1185 | n, | ||
1186 | ฮฑ, | ||
1187 | x, | ||
1188 | incx, | ||
1189 | AP, | ||
1190 | 1) | ||
1191 | return AP | ||
1192 | end | ||
1193 | end | ||
1194 | end | ||
1195 | |||
1196 | function spr!(uplo::AbstractChar, | ||
1197 | ฮฑ::Real, x::AbstractArray{T}, | ||
1198 | AP::AbstractArray{T}) where {T <: BlasReal} | ||
1199 | chkuplo(uplo) | ||
1200 | require_one_based_indexing(AP, x) | ||
1201 | N = length(x) | ||
1202 | if 2*length(AP) < N*(N + 1) | ||
1203 | throw(DimensionMismatch(lazy"Packed symmetric matrix A has size smaller than length(x) = $(N).")) | ||
1204 | end | ||
1205 | chkstride1(AP) | ||
1206 | px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed")) | ||
1207 | return GC.@preserve x spr!(uplo, N, T(ฮฑ), px, stx , AP) | ||
1208 | end | ||
1209 | |||
1210 | """ | ||
1211 | spr!(uplo, ฮฑ, x, AP) | ||
1212 | |||
1213 | Update matrix `A` as `A+ฮฑ*x*x'`, where `A` is a symmetric matrix provided | ||
1214 | in packed format `AP` and `x` is a vector. | ||
1215 | |||
1216 | With `uplo = 'U'`, the array AP must contain the upper triangular part of the | ||
1217 | symmetric matrix packed sequentially, column by column, so that `AP[1]` | ||
1218 | contains `A[1, 1]`, `AP[2]` and `AP[3]` contain `A[1, 2]` and `A[2, 2]` | ||
1219 | respectively, and so on. | ||
1220 | |||
1221 | With `uplo = 'L'`, the array AP must contain the lower triangular part of the | ||
1222 | symmetric matrix packed sequentially, column by column, so that `AP[1]` | ||
1223 | contains `A[1, 1]`, `AP[2]` and `AP[3]` contain `A[2, 1]` and `A[3, 1]` | ||
1224 | respectively, and so on. | ||
1225 | |||
1226 | The scalar input `ฮฑ` must be real. | ||
1227 | |||
1228 | The array inputs `x` and `AP` must all be of `Float32` or `Float64` type. | ||
1229 | Return the updated `AP`. | ||
1230 | |||
1231 | !!! compat "Julia 1.8" | ||
1232 | `spr!` requires at least Julia 1.8. | ||
1233 | """ | ||
1234 | spr! | ||
1235 | |||
1236 | ### hbmv, (HB) Hermitian banded matrix-vector multiplication | ||
1237 | for (fname, elty) in ((:zhbmv_,:ComplexF64), | ||
1238 | (:chbmv_,:ComplexF32)) | ||
1239 | @eval begin | ||
1240 | # SUBROUTINE ZHBMV(UPLO,N,K,ALPHA,A,LDA,X,INCX,BETA,Y,INCY) | ||
1241 | # * .. Scalar Arguments .. | ||
1242 | # DOUBLE PRECISION ALPHA,BETA | ||
1243 | # INTEGER INCX,INCY,K,LDA,N | ||
1244 | # CHARACTER UPLO | ||
1245 | # * .. Array Arguments .. | ||
1246 | # DOUBLE PRECISION A(LDA,*),X(*),Y(*) | ||
1247 | function hbmv!(uplo::AbstractChar, k::Integer, alpha::($elty), A::AbstractMatrix{$elty}, x::AbstractVector{$elty}, beta::($elty), y::AbstractVector{$elty}) | ||
1248 | chkuplo(uplo) | ||
1249 | require_one_based_indexing(A, x, y) | ||
1250 | chkstride1(A) | ||
1251 | px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed")) | ||
1252 | py, sty = vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed")) | ||
1253 | GC.@preserve x y ccall((@blasfunc($fname), libblastrampoline), Cvoid, | ||
1254 | (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ref{$elty}, | ||
1255 | Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, | ||
1256 | Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Clong), | ||
1257 | uplo, size(A,2), k, alpha, | ||
1258 | A, max(1,stride(A,2)), px, stx, | ||
1259 | beta, py, sty, 1) | ||
1260 | y | ||
1261 | end | ||
1262 | function hbmv(uplo::AbstractChar, k::Integer, alpha::($elty), A::AbstractMatrix{$elty}, x::AbstractVector{$elty}) | ||
1263 | n = size(A,2) | ||
1264 | hbmv!(uplo, k, alpha, A, x, zero($elty), similar(x, $elty, n)) | ||
1265 | end | ||
1266 | function hbmv(uplo::AbstractChar, k::Integer, A::AbstractMatrix{$elty}, x::AbstractVector{$elty}) | ||
1267 | hbmv(uplo, k, one($elty), A, x) | ||
1268 | end | ||
1269 | end | ||
1270 | end | ||
1271 | |||
1272 | ### trmv, Triangular matrix-vector multiplication | ||
1273 | |||
1274 | """ | ||
1275 | trmv(ul, tA, dA, A, b) | ||
1276 | |||
1277 | Return `op(A)*b`, where `op` is determined by [`tA`](@ref stdlib-blas-trans). | ||
1278 | Only the [`ul`](@ref stdlib-blas-uplo) triangle of `A` is used. | ||
1279 | [`dA`](@ref stdlib-blas-diag) determines if the diagonal values are read or | ||
1280 | are assumed to be all ones. | ||
1281 | """ | ||
1282 | function trmv end | ||
1283 | |||
1284 | """ | ||
1285 | trmv!(ul, tA, dA, A, b) | ||
1286 | |||
1287 | Return `op(A)*b`, where `op` is determined by [`tA`](@ref stdlib-blas-trans). | ||
1288 | Only the [`ul`](@ref stdlib-blas-uplo) triangle of `A` is used. | ||
1289 | [`dA`](@ref stdlib-blas-diag) determines if the diagonal values are read or | ||
1290 | are assumed to be all ones. | ||
1291 | The multiplication occurs in-place on `b`. | ||
1292 | """ | ||
1293 | function trmv! end | ||
1294 | |||
1295 | for (fname, elty) in ((:dtrmv_,:Float64), | ||
1296 | (:strmv_,:Float32), | ||
1297 | (:ztrmv_,:ComplexF64), | ||
1298 | (:ctrmv_,:ComplexF32)) | ||
1299 | @eval begin | ||
1300 | # SUBROUTINE DTRMV(UPLO,TRANS,DIAG,N,A,LDA,X,INCX) | ||
1301 | # * .. Scalar Arguments .. | ||
1302 | # INTEGER INCX,LDA,N | ||
1303 | # CHARACTER DIAG,TRANS,UPLO | ||
1304 | # * .. Array Arguments .. | ||
1305 | # DOUBLE PRECISION A(LDA,*),X(*) | ||
1306 | function trmv!(uplo::AbstractChar, trans::AbstractChar, diag::AbstractChar, A::AbstractMatrix{$elty}, x::AbstractVector{$elty}) | ||
1307 | chkuplo(uplo) | ||
1308 | require_one_based_indexing(A, x) | ||
1309 | n = checksquare(A) | ||
1310 | if n != length(x) | ||
1311 | throw(DimensionMismatch(lazy"A has size ($n,$n), x has length $(length(x))")) | ||
1312 | end | ||
1313 | chkstride1(A) | ||
1314 | px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed")) | ||
1315 | GC.@preserve x ccall((@blasfunc($fname), libblastrampoline), Cvoid, | ||
1316 | (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, | ||
1317 | Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, | ||
1318 | Clong, Clong, Clong), | ||
1319 | uplo, trans, diag, n, | ||
1320 | A, max(1,stride(A,2)), px, stx, 1, 1, 1) | ||
1321 | x | ||
1322 | end | ||
1323 | function trmv(uplo::AbstractChar, trans::AbstractChar, diag::AbstractChar, A::AbstractMatrix{$elty}, x::AbstractVector{$elty}) | ||
1324 | trmv!(uplo, trans, diag, A, copy(x)) | ||
1325 | end | ||
1326 | end | ||
1327 | end | ||
1328 | |||
1329 | ### trsv, Triangular matrix-vector solve | ||
1330 | |||
1331 | """ | ||
1332 | trsv!(ul, tA, dA, A, b) | ||
1333 | |||
1334 | Overwrite `b` with the solution to `A*x = b` or one of the other two variants determined by | ||
1335 | [`tA`](@ref stdlib-blas-trans) and [`ul`](@ref stdlib-blas-uplo). | ||
1336 | [`dA`](@ref stdlib-blas-diag) determines if the diagonal values are read or | ||
1337 | are assumed to be all ones. | ||
1338 | Return the updated `b`. | ||
1339 | """ | ||
1340 | function trsv! end | ||
1341 | |||
1342 | """ | ||
1343 | trsv(ul, tA, dA, A, b) | ||
1344 | |||
1345 | Return the solution to `A*x = b` or one of the other two variants determined by | ||
1346 | [`tA`](@ref stdlib-blas-trans) and [`ul`](@ref stdlib-blas-uplo). | ||
1347 | [`dA`](@ref stdlib-blas-diag) determines if the diagonal values are read or | ||
1348 | are assumed to be all ones. | ||
1349 | """ | ||
1350 | function trsv end | ||
1351 | |||
1352 | for (fname, elty) in ((:dtrsv_,:Float64), | ||
1353 | (:strsv_,:Float32), | ||
1354 | (:ztrsv_,:ComplexF64), | ||
1355 | (:ctrsv_,:ComplexF32)) | ||
1356 | @eval begin | ||
1357 | # SUBROUTINE DTRSV(UPLO,TRANS,DIAG,N,A,LDA,X,INCX) | ||
1358 | # .. Scalar Arguments .. | ||
1359 | # INTEGER INCX,LDA,N | ||
1360 | # CHARACTER DIAG,TRANS,UPLO | ||
1361 | # .. Array Arguments .. | ||
1362 | # DOUBLE PRECISION A(LDA,*),X(*) | ||
1363 | function trsv!(uplo::AbstractChar, trans::AbstractChar, diag::AbstractChar, A::AbstractMatrix{$elty}, x::AbstractVector{$elty}) | ||
1364 | chkuplo(uplo) | ||
1365 | require_one_based_indexing(A, x) | ||
1366 | n = checksquare(A) | ||
1367 | if n != length(x) | ||
1368 | throw(DimensionMismatch(lazy"size of A is $n != length(x) = $(length(x))")) | ||
1369 | end | ||
1370 | chkstride1(A) | ||
1371 | px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed")) | ||
1372 | GC.@preserve x ccall((@blasfunc($fname), libblastrampoline), Cvoid, | ||
1373 | (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, | ||
1374 | Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, | ||
1375 | Clong, Clong, Clong), | ||
1376 | uplo, trans, diag, n, | ||
1377 | A, max(1,stride(A,2)), px, stx, 1, 1, 1) | ||
1378 | x | ||
1379 | end | ||
1380 | function trsv(uplo::AbstractChar, trans::AbstractChar, diag::AbstractChar, A::AbstractMatrix{$elty}, x::AbstractVector{$elty}) | ||
1381 | trsv!(uplo, trans, diag, A, copy(x)) | ||
1382 | end | ||
1383 | end | ||
1384 | end | ||
1385 | |||
1386 | ### ger | ||
1387 | |||
1388 | """ | ||
1389 | ger!(alpha, x, y, A) | ||
1390 | |||
1391 | Rank-1 update of the matrix `A` with vectors `x` and `y` as `alpha*x*y' + A`. | ||
1392 | """ | ||
1393 | function ger! end | ||
1394 | |||
1395 | for (fname, elty) in ((:dger_,:Float64), | ||
1396 | (:sger_,:Float32), | ||
1397 | (:zgerc_,:ComplexF64), | ||
1398 | (:cgerc_,:ComplexF32)) | ||
1399 | @eval begin | ||
1400 | function ger!(ฮฑ::$elty, x::AbstractVector{$elty}, y::AbstractVector{$elty}, A::AbstractMatrix{$elty}) | ||
1401 | require_one_based_indexing(A, x, y) | ||
1402 | m, n = size(A) | ||
1403 | if m != length(x) || n != length(y) | ||
1404 | throw(DimensionMismatch(lazy"A has size ($m,$n), x has length $(length(x)), y has length $(length(y))")) | ||
1405 | end | ||
1406 | px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed")) | ||
1407 | py, sty = vec_pointer_stride(y, ArgumentError("input vector with 0 stride is not allowed")) | ||
1408 | GC.@preserve x y ccall((@blasfunc($fname), libblastrampoline), Cvoid, | ||
1409 | (Ref{BlasInt}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, | ||
1410 | Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, | ||
1411 | Ref{BlasInt}), | ||
1412 | m, n, ฮฑ, px, stx, py, sty, A, max(1,stride(A,2))) | ||
1413 | A | ||
1414 | end | ||
1415 | end | ||
1416 | end | ||
1417 | |||
1418 | ### syr | ||
1419 | |||
1420 | """ | ||
1421 | syr!(uplo, alpha, x, A) | ||
1422 | |||
1423 | Rank-1 update of the symmetric matrix `A` with vector `x` as `alpha*x*transpose(x) + A`. | ||
1424 | [`uplo`](@ref stdlib-blas-uplo) controls which triangle of `A` is updated. Returns `A`. | ||
1425 | """ | ||
1426 | function syr! end | ||
1427 | |||
1428 | for (fname, elty, lib) in ((:dsyr_,:Float64,libblastrampoline), | ||
1429 | (:ssyr_,:Float32,libblastrampoline), | ||
1430 | (:zsyr_,:ComplexF64,libblastrampoline), | ||
1431 | (:csyr_,:ComplexF32,libblastrampoline)) | ||
1432 | @eval begin | ||
1433 | function syr!(uplo::AbstractChar, ฮฑ::$elty, x::AbstractVector{$elty}, A::AbstractMatrix{$elty}) | ||
1434 | chkuplo(uplo) | ||
1435 | require_one_based_indexing(A, x) | ||
1436 | n = checksquare(A) | ||
1437 | if length(x) != n | ||
1438 | throw(DimensionMismatch(lazy"A has size ($n,$n), x has length $(length(x))")) | ||
1439 | end | ||
1440 | px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed")) | ||
1441 | GC.@preserve x ccall((@blasfunc($fname), $lib), Cvoid, | ||
1442 | (Ref{UInt8}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, | ||
1443 | Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}), | ||
1444 | uplo, n, ฮฑ, px, stx, A, max(1,stride(A, 2))) | ||
1445 | A | ||
1446 | end | ||
1447 | end | ||
1448 | end | ||
1449 | |||
1450 | ### her | ||
1451 | |||
1452 | """ | ||
1453 | her!(uplo, alpha, x, A) | ||
1454 | |||
1455 | Methods for complex arrays only. Rank-1 update of the Hermitian matrix `A` with vector `x` | ||
1456 | as `alpha*x*x' + A`. | ||
1457 | [`uplo`](@ref stdlib-blas-uplo) controls which triangle of `A` is updated. Returns `A`. | ||
1458 | """ | ||
1459 | function her! end | ||
1460 | |||
1461 | for (fname, elty, relty) in ((:zher_,:ComplexF64, :Float64), | ||
1462 | (:cher_,:ComplexF32, :Float32)) | ||
1463 | @eval begin | ||
1464 | function her!(uplo::AbstractChar, ฮฑ::$relty, x::AbstractVector{$elty}, A::AbstractMatrix{$elty}) | ||
1465 | chkuplo(uplo) | ||
1466 | require_one_based_indexing(A, x) | ||
1467 | n = checksquare(A) | ||
1468 | if length(x) != n | ||
1469 | throw(DimensionMismatch(lazy"A has size ($n,$n), x has length $(length(x))")) | ||
1470 | end | ||
1471 | px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed")) | ||
1472 | GC.@preserve x ccall((@blasfunc($fname), libblastrampoline), Cvoid, | ||
1473 | (Ref{UInt8}, Ref{BlasInt}, Ref{$relty}, Ptr{$elty}, | ||
1474 | Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Clong), | ||
1475 | uplo, n, ฮฑ, px, stx, A, max(1,stride(A,2)), 1) | ||
1476 | A | ||
1477 | end | ||
1478 | end | ||
1479 | end | ||
1480 | |||
1481 | # Level 3 | ||
1482 | ## (GE) general matrix-matrix multiplication | ||
1483 | |||
1484 | """ | ||
1485 | gemm!(tA, tB, alpha, A, B, beta, C) | ||
1486 | |||
1487 | Update `C` as `alpha*A*B + beta*C` or the other three variants according to | ||
1488 | [`tA`](@ref stdlib-blas-trans) and `tB`. Return the updated `C`. | ||
1489 | """ | ||
1490 | function gemm! end | ||
1491 | |||
1492 | for (gemm, elty) in | ||
1493 | ((:dgemm_,:Float64), | ||
1494 | (:sgemm_,:Float32), | ||
1495 | (:zgemm_,:ComplexF64), | ||
1496 | (:cgemm_,:ComplexF32)) | ||
1497 | @eval begin | ||
1498 | # SUBROUTINE DGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC) | ||
1499 | # * .. Scalar Arguments .. | ||
1500 | # DOUBLE PRECISION ALPHA,BETA | ||
1501 | # INTEGER K,LDA,LDB,LDC,M,N | ||
1502 | # CHARACTER TRANSA,TRANSB | ||
1503 | # * .. Array Arguments .. | ||
1504 | # DOUBLE PRECISION A(LDA,*),B(LDB,*),C(LDC,*) | ||
1505 | function gemm!(transA::AbstractChar, transB::AbstractChar, | ||
1506 | alpha::Union{($elty), Bool}, | ||
1507 | A::AbstractVecOrMat{$elty}, B::AbstractVecOrMat{$elty}, | ||
1508 | beta::Union{($elty), Bool}, | ||
1509 | C::AbstractVecOrMat{$elty}) | ||
1510 | # if any([stride(A,1), stride(B,1), stride(C,1)] .!= 1) | ||
1511 | # error("gemm!: BLAS module requires contiguous matrix columns") | ||
1512 | # end # should this be checked on every call? | ||
1513 | require_one_based_indexing(A, B, C) | ||
1514 | m = size(A, transA == 'N' ? 1 : 2) | ||
1515 | ka = size(A, transA == 'N' ? 2 : 1) | ||
1516 | kb = size(B, transB == 'N' ? 1 : 2) | ||
1517 | n = size(B, transB == 'N' ? 2 : 1) | ||
1518 | if ka != kb || m != size(C,1) || n != size(C,2) | ||
1519 | throw(DimensionMismatch(lazy"A has size ($m,$ka), B has size ($kb,$n), C has size $(size(C))")) | ||
1520 | end | ||
1521 | chkstride1(A) | ||
1522 | chkstride1(B) | ||
1523 | chkstride1(C) | ||
1524 | ccall((@blasfunc($gemm), libblastrampoline), Cvoid, | ||
1525 | (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, | ||
1526 | Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, | ||
1527 | Ptr{$elty}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, | ||
1528 | Ref{BlasInt}, Clong, Clong), | ||
1529 | transA, transB, m, n, | ||
1530 | ka, alpha, A, max(1,stride(A,2)), | ||
1531 | B, max(1,stride(B,2)), beta, C, | ||
1532 | max(1,stride(C,2)), 1, 1) | ||
1533 | C | ||
1534 | end | ||
1535 | function gemm(transA::AbstractChar, transB::AbstractChar, alpha::($elty), A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty}) | ||
1536 | gemm!(transA, transB, alpha, A, B, zero($elty), similar(B, $elty, (size(A, transA == 'N' ? 1 : 2), size(B, transB == 'N' ? 2 : 1)))) | ||
1537 | end | ||
1538 | function gemm(transA::AbstractChar, transB::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty}) | ||
1539 | gemm(transA, transB, one($elty), A, B) | ||
1540 | end | ||
1541 | end | ||
1542 | end | ||
1543 | |||
1544 | """ | ||
1545 | gemm(tA, tB, alpha, A, B) | ||
1546 | |||
1547 | Return `alpha*A*B` or the other three variants according to [`tA`](@ref stdlib-blas-trans) and `tB`. | ||
1548 | """ | ||
1549 | gemm(tA, tB, alpha, A, B) | ||
1550 | |||
1551 | """ | ||
1552 | gemm(tA, tB, A, B) | ||
1553 | |||
1554 | Return `A*B` or the other three variants according to [`tA`](@ref stdlib-blas-trans) and `tB`. | ||
1555 | """ | ||
1556 | gemm(tA, tB, A, B) | ||
1557 | |||
1558 | |||
1559 | ## (SY) symmetric matrix-matrix and matrix-vector multiplication | ||
1560 | for (mfname, elty) in ((:dsymm_,:Float64), | ||
1561 | (:ssymm_,:Float32), | ||
1562 | (:zsymm_,:ComplexF64), | ||
1563 | (:csymm_,:ComplexF32)) | ||
1564 | @eval begin | ||
1565 | # SUBROUTINE DSYMM(SIDE,UPLO,M,N,ALPHA,A,LDA,B,LDB,BETA,C,LDC) | ||
1566 | # .. Scalar Arguments .. | ||
1567 | # DOUBLE PRECISION ALPHA,BETA | ||
1568 | # INTEGER LDA,LDB,LDC,M,N | ||
1569 | # CHARACTER SIDE,UPLO | ||
1570 | # .. Array Arguments .. | ||
1571 | # DOUBLE PRECISION A(LDA,*),B(LDB,*),C(LDC,*) | ||
1572 | function symm!(side::AbstractChar, uplo::AbstractChar, alpha::Union{($elty), Bool}, | ||
1573 | A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty}, | ||
1574 | beta::Union{($elty), Bool}, C::AbstractMatrix{$elty}) | ||
1575 | chkuplo(uplo) | ||
1576 | require_one_based_indexing(A, B, C) | ||
1577 | m, n = size(C) | ||
1578 | j = checksquare(A) | ||
1579 | M, N = size(B) | ||
1580 | if side == 'L' | ||
1581 | if j != m | ||
1582 | throw(DimensionMismatch(lazy"A has first dimension $j but needs to match first dimension of C, $m")) | ||
1583 | end | ||
1584 | if N != n | ||
1585 | throw(DimensionMismatch(lazy"B has second dimension $N but needs to match second dimension of C, $n")) | ||
1586 | end | ||
1587 | if j != M | ||
1588 | throw(DimensionMismatch(lazy"A has second dimension $j but needs to match first dimension of B, $M")) | ||
1589 | end | ||
1590 | else | ||
1591 | if j != n | ||
1592 | throw(DimensionMismatch(lazy"B has second dimension $j but needs to match second dimension of C, $n")) | ||
1593 | end | ||
1594 | if N != j | ||
1595 | throw(DimensionMismatch(lazy"A has second dimension $N but needs to match first dimension of B, $j")) | ||
1596 | end | ||
1597 | if M != m | ||
1598 | throw(DimensionMismatch(lazy"A has first dimension $M but needs to match first dimension of C, $m")) | ||
1599 | end | ||
1600 | end | ||
1601 | chkstride1(A) | ||
1602 | chkstride1(B) | ||
1603 | chkstride1(C) | ||
1604 | ccall((@blasfunc($mfname), libblastrampoline), Cvoid, | ||
1605 | (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, | ||
1606 | Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, | ||
1607 | Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, | ||
1608 | Clong, Clong), | ||
1609 | side, uplo, m, n, | ||
1610 | alpha, A, max(1,stride(A,2)), B, | ||
1611 | max(1,stride(B,2)), beta, C, max(1,stride(C,2)), | ||
1612 | 1, 1) | ||
1613 | C | ||
1614 | end | ||
1615 | function symm(side::AbstractChar, uplo::AbstractChar, alpha::($elty), A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty}) | ||
1616 | symm!(side, uplo, alpha, A, B, zero($elty), similar(B)) | ||
1617 | end | ||
1618 | function symm(side::AbstractChar, uplo::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty}) | ||
1619 | symm(side, uplo, one($elty), A, B) | ||
1620 | end | ||
1621 | end | ||
1622 | end | ||
1623 | |||
1624 | """ | ||
1625 | symm(side, ul, alpha, A, B) | ||
1626 | |||
1627 | Return `alpha*A*B` or `alpha*B*A` according to [`side`](@ref stdlib-blas-side). | ||
1628 | `A` is assumed to be symmetric. Only | ||
1629 | the [`ul`](@ref stdlib-blas-uplo) triangle of `A` is used. | ||
1630 | """ | ||
1631 | symm(side, ul, alpha, A, B) | ||
1632 | |||
1633 | """ | ||
1634 | symm(side, ul, A, B) | ||
1635 | |||
1636 | Return `A*B` or `B*A` according to [`side`](@ref stdlib-blas-side). | ||
1637 | `A` is assumed to be symmetric. Only the [`ul`](@ref stdlib-blas-uplo) | ||
1638 | triangle of `A` is used. | ||
1639 | """ | ||
1640 | symm(side, ul, A, B) | ||
1641 | |||
1642 | """ | ||
1643 | symm!(side, ul, alpha, A, B, beta, C) | ||
1644 | |||
1645 | Update `C` as `alpha*A*B + beta*C` or `alpha*B*A + beta*C` according to [`side`](@ref stdlib-blas-side). | ||
1646 | `A` is assumed to be symmetric. Only the [`ul`](@ref stdlib-blas-uplo) triangle of | ||
1647 | `A` is used. Return the updated `C`. | ||
1648 | """ | ||
1649 | symm! | ||
1650 | |||
1651 | ## (HE) Hermitian matrix-matrix and matrix-vector multiplication | ||
1652 | for (mfname, elty) in ((:zhemm_,:ComplexF64), | ||
1653 | (:chemm_,:ComplexF32)) | ||
1654 | @eval begin | ||
1655 | # SUBROUTINE DHEMM(SIDE,UPLO,M,N,ALPHA,A,LDA,B,LDB,BETA,C,LDC) | ||
1656 | # .. Scalar Arguments .. | ||
1657 | # DOUBLE PRECISION ALPHA,BETA | ||
1658 | # INTEGER LDA,LDB,LDC,M,N | ||
1659 | # CHARACTER SIDE,UPLO | ||
1660 | # .. Array Arguments .. | ||
1661 | # DOUBLE PRECISION A(LDA,*),B(LDB,*),C(LDC,*) | ||
1662 | function hemm!(side::AbstractChar, uplo::AbstractChar, alpha::Union{($elty), Bool}, | ||
1663 | A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty}, | ||
1664 | beta::Union{($elty), Bool}, C::AbstractMatrix{$elty}) | ||
1665 | chkuplo(uplo) | ||
1666 | require_one_based_indexing(A, B, C) | ||
1667 | m, n = size(C) | ||
1668 | j = checksquare(A) | ||
1669 | M, N = size(B) | ||
1670 | if side == 'L' | ||
1671 | if j != m | ||
1672 | throw(DimensionMismatch(lazy"A has first dimension $j but needs to match first dimension of C, $m")) | ||
1673 | end | ||
1674 | if N != n | ||
1675 | throw(DimensionMismatch(lazy"B has second dimension $N but needs to match second dimension of C, $n")) | ||
1676 | end | ||
1677 | if j != M | ||
1678 | throw(DimensionMismatch(lazy"A has second dimension $j but needs to match first dimension of B, $M")) | ||
1679 | end | ||
1680 | else | ||
1681 | if j != n | ||
1682 | throw(DimensionMismatch(lazy"B has second dimension $j but needs to match second dimension of C, $n")) | ||
1683 | end | ||
1684 | if N != j | ||
1685 | throw(DimensionMismatch(lazy"A has second dimension $N but needs to match first dimension of B, $j")) | ||
1686 | end | ||
1687 | if M != m | ||
1688 | throw(DimensionMismatch(lazy"A has first dimension $M but needs to match first dimension of C, $m")) | ||
1689 | end | ||
1690 | end | ||
1691 | chkstride1(A) | ||
1692 | chkstride1(B) | ||
1693 | chkstride1(C) | ||
1694 | ccall((@blasfunc($mfname), libblastrampoline), Cvoid, | ||
1695 | (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, | ||
1696 | Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, | ||
1697 | Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, | ||
1698 | Clong, Clong), | ||
1699 | side, uplo, m, n, | ||
1700 | alpha, A, max(1,stride(A,2)), B, | ||
1701 | max(1,stride(B,2)), beta, C, max(1,stride(C,2)), | ||
1702 | 1, 1) | ||
1703 | C | ||
1704 | end | ||
1705 | function hemm(side::AbstractChar, uplo::AbstractChar, alpha::($elty), A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty}) | ||
1706 | hemm!(side, uplo, alpha, A, B, zero($elty), similar(B)) | ||
1707 | end | ||
1708 | function hemm(side::AbstractChar, uplo::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty}) | ||
1709 | hemm(side, uplo, one($elty), A, B) | ||
1710 | end | ||
1711 | end | ||
1712 | end | ||
1713 | |||
1714 | """ | ||
1715 | hemm(side, ul, alpha, A, B) | ||
1716 | |||
1717 | Return `alpha*A*B` or `alpha*B*A` according to [`side`](@ref stdlib-blas-side). | ||
1718 | `A` is assumed to be Hermitian. Only the [`ul`](@ref stdlib-blas-uplo) triangle | ||
1719 | of `A` is used. | ||
1720 | """ | ||
1721 | hemm(side, ul, alpha, A, B) | ||
1722 | |||
1723 | """ | ||
1724 | hemm(side, ul, A, B) | ||
1725 | |||
1726 | Return `A*B` or `B*A` according to [`side`](@ref stdlib-blas-side). `A` is assumed | ||
1727 | to be Hermitian. Only the [`ul`](@ref stdlib-blas-uplo) triangle of `A` is used. | ||
1728 | """ | ||
1729 | hemm(side, ul, A, B) | ||
1730 | |||
1731 | """ | ||
1732 | hemm!(side, ul, alpha, A, B, beta, C) | ||
1733 | |||
1734 | Update `C` as `alpha*A*B + beta*C` or `alpha*B*A + beta*C` according to | ||
1735 | [`side`](@ref stdlib-blas-side). `A` is assumed to be Hermitian. Only the | ||
1736 | [`ul`](@ref stdlib-blas-uplo) triangle of `A` is used. Return the updated `C`. | ||
1737 | """ | ||
1738 | hemm! | ||
1739 | |||
1740 | ## syrk | ||
1741 | |||
1742 | """ | ||
1743 | syrk!(uplo, trans, alpha, A, beta, C) | ||
1744 | |||
1745 | Rank-k update of the symmetric matrix `C` as `alpha*A*transpose(A) + beta*C` or | ||
1746 | `alpha*transpose(A)*A + beta*C` according to [`trans`](@ref stdlib-blas-trans). | ||
1747 | Only the [`uplo`](@ref stdlib-blas-uplo) triangle of `C` is used. Return `C`. | ||
1748 | """ | ||
1749 | function syrk! end | ||
1750 | |||
1751 | """ | ||
1752 | syrk(uplo, trans, alpha, A) | ||
1753 | |||
1754 | Return either the upper triangle or the lower triangle of `A`, | ||
1755 | according to [`uplo`](@ref stdlib-blas-uplo), | ||
1756 | of `alpha*A*transpose(A)` or `alpha*transpose(A)*A`, | ||
1757 | according to [`trans`](@ref stdlib-blas-trans). | ||
1758 | """ | ||
1759 | function syrk end | ||
1760 | |||
1761 | for (fname, elty) in ((:dsyrk_,:Float64), | ||
1762 | (:ssyrk_,:Float32), | ||
1763 | (:zsyrk_,:ComplexF64), | ||
1764 | (:csyrk_,:ComplexF32)) | ||
1765 | @eval begin | ||
1766 | # SUBROUTINE DSYRK(UPLO,TRANS,N,K,ALPHA,A,LDA,BETA,C,LDC) | ||
1767 | # * .. Scalar Arguments .. | ||
1768 | # REAL ALPHA,BETA | ||
1769 | # INTEGER K,LDA,LDC,N | ||
1770 | # CHARACTER TRANS,UPLO | ||
1771 | # * .. Array Arguments .. | ||
1772 | # REAL A(LDA,*),C(LDC,*) | ||
1773 |
13 (5 %)
samples spent in syrk!
function syrk!(uplo::AbstractChar, trans::AbstractChar,
13 (100 %) (ex.), 13 (100 %) (incl.) when called from syrk_wrapper! line 512 |
||
1774 | alpha::Union{($elty), Bool}, A::AbstractVecOrMat{$elty}, | ||
1775 | beta::Union{($elty), Bool}, C::AbstractMatrix{$elty}) | ||
1776 | chkuplo(uplo) | ||
1777 | require_one_based_indexing(A, C) | ||
1778 | n = checksquare(C) | ||
1779 | nn = size(A, trans == 'N' ? 1 : 2) | ||
1780 | if nn != n throw(DimensionMismatch(lazy"C has size ($n,$n), corresponding dimension of A is $nn")) end | ||
1781 | k = size(A, trans == 'N' ? 2 : 1) | ||
1782 | chkstride1(A) | ||
1783 | chkstride1(C) | ||
1784 | 13 (5 %) | 13 (5 %) | ccall((@blasfunc($fname), libblastrampoline), Cvoid, |
1785 | (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, | ||
1786 | Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Ref{$elty}, | ||
1787 | Ptr{$elty}, Ref{BlasInt}, Clong, Clong), | ||
1788 | uplo, trans, n, k, | ||
1789 | alpha, A, max(1,stride(A,2)), beta, | ||
1790 | C, max(1,stride(C,2)), 1, 1) | ||
1791 | C | ||
1792 | end | ||
1793 | end | ||
1794 | end | ||
1795 | function syrk(uplo::AbstractChar, trans::AbstractChar, alpha::Number, A::AbstractVecOrMat) | ||
1796 | T = eltype(A) | ||
1797 | n = size(A, trans == 'N' ? 1 : 2) | ||
1798 | syrk!(uplo, trans, convert(T,alpha), A, zero(T), similar(A, T, (n, n))) | ||
1799 | end | ||
1800 | syrk(uplo::AbstractChar, trans::AbstractChar, A::AbstractVecOrMat) = syrk(uplo, trans, one(eltype(A)), A) | ||
1801 | |||
1802 | """ | ||
1803 | herk!(uplo, trans, alpha, A, beta, C) | ||
1804 | |||
1805 | Methods for complex arrays only. Rank-k update of the Hermitian matrix `C` as | ||
1806 | `alpha*A*A' + beta*C` or `alpha*A'*A + beta*C` according to [`trans`](@ref stdlib-blas-trans). | ||
1807 | Only the [`uplo`](@ref stdlib-blas-uplo) triangle of `C` is updated. Returns `C`. | ||
1808 | """ | ||
1809 | function herk! end | ||
1810 | |||
1811 | """ | ||
1812 | herk(uplo, trans, alpha, A) | ||
1813 | |||
1814 | Methods for complex arrays only. Returns the [`uplo`](@ref stdlib-blas-uplo) | ||
1815 | triangle of `alpha*A*A'` or `alpha*A'*A`, according to [`trans`](@ref stdlib-blas-trans). | ||
1816 | """ | ||
1817 | function herk end | ||
1818 | |||
1819 | for (fname, elty, relty) in ((:zherk_, :ComplexF64, :Float64), | ||
1820 | (:cherk_, :ComplexF32, :Float32)) | ||
1821 | @eval begin | ||
1822 | # SUBROUTINE CHERK(UPLO,TRANS,N,K,ALPHA,A,LDA,BETA,C,LDC) | ||
1823 | # * .. Scalar Arguments .. | ||
1824 | # REAL ALPHA,BETA | ||
1825 | # INTEGER K,LDA,LDC,N | ||
1826 | # CHARACTER TRANS,UPLO | ||
1827 | # * .. | ||
1828 | # * .. Array Arguments .. | ||
1829 | # COMPLEX A(LDA,*),C(LDC,*) | ||
1830 | function herk!(uplo::AbstractChar, trans::AbstractChar, | ||
1831 | ฮฑ::Union{$relty, Bool}, A::AbstractVecOrMat{$elty}, | ||
1832 | ฮฒ::Union{$relty, Bool}, C::AbstractMatrix{$elty}) | ||
1833 | chkuplo(uplo) | ||
1834 | require_one_based_indexing(A, C) | ||
1835 | n = checksquare(C) | ||
1836 | nn = size(A, trans == 'N' ? 1 : 2) | ||
1837 | if nn != n | ||
1838 | throw(DimensionMismatch(lazy"the matrix to update has dimension $n but the implied dimension of the update is $(size(A, trans == 'N' ? 1 : 2))")) | ||
1839 | end | ||
1840 | chkstride1(A) | ||
1841 | chkstride1(C) | ||
1842 | k = size(A, trans == 'N' ? 2 : 1) | ||
1843 | ccall((@blasfunc($fname), libblastrampoline), Cvoid, | ||
1844 | (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, | ||
1845 | Ref{$relty}, Ptr{$elty}, Ref{BlasInt}, Ref{$relty}, | ||
1846 | Ptr{$elty}, Ref{BlasInt}, Clong, Clong), | ||
1847 | uplo, trans, n, k, | ||
1848 | ฮฑ, A, max(1,stride(A,2)), ฮฒ, | ||
1849 | C, max(1,stride(C,2)), 1, 1) | ||
1850 | C | ||
1851 | end | ||
1852 | function herk(uplo::AbstractChar, trans::AbstractChar, ฮฑ::$relty, A::AbstractVecOrMat{$elty}) | ||
1853 | n = size(A, trans == 'N' ? 1 : 2) | ||
1854 | herk!(uplo, trans, ฮฑ, A, zero($relty), similar(A, (n,n))) | ||
1855 | end | ||
1856 | herk(uplo::AbstractChar, trans::AbstractChar, A::AbstractVecOrMat{$elty}) = herk(uplo, trans, one($relty), A) | ||
1857 | end | ||
1858 | end | ||
1859 | |||
1860 | ## syr2k | ||
1861 | for (fname, elty) in ((:dsyr2k_,:Float64), | ||
1862 | (:ssyr2k_,:Float32), | ||
1863 | (:zsyr2k_,:ComplexF64), | ||
1864 | (:csyr2k_,:ComplexF32)) | ||
1865 | @eval begin | ||
1866 | # SUBROUTINE DSYR2K(UPLO,TRANS,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC) | ||
1867 | # | ||
1868 | # .. Scalar Arguments .. | ||
1869 | # REAL PRECISION ALPHA,BETA | ||
1870 | # INTEGER K,LDA,LDB,LDC,N | ||
1871 | # CHARACTER TRANS,UPLO | ||
1872 | # .. | ||
1873 | # .. Array Arguments .. | ||
1874 | # REAL PRECISION A(LDA,*),B(LDB,*),C(LDC,*) | ||
1875 | function syr2k!(uplo::AbstractChar, trans::AbstractChar, | ||
1876 | alpha::($elty), A::AbstractVecOrMat{$elty}, B::AbstractVecOrMat{$elty}, | ||
1877 | beta::($elty), C::AbstractMatrix{$elty}) | ||
1878 | chkuplo(uplo) | ||
1879 | require_one_based_indexing(A, B, C) | ||
1880 | n = checksquare(C) | ||
1881 | nn = size(A, trans == 'N' ? 1 : 2) | ||
1882 | if nn != n throw(DimensionMismatch(lazy"C has size ($n,$n), corresponding dimension of A is $nn")) end | ||
1883 | k = size(A, trans == 'N' ? 2 : 1) | ||
1884 | chkstride1(A) | ||
1885 | chkstride1(B) | ||
1886 | chkstride1(C) | ||
1887 | ccall((@blasfunc($fname), libblastrampoline), Cvoid, | ||
1888 | (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, | ||
1889 | Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ref{$elty}, | ||
1890 | Ptr{$elty}, Ref{BlasInt}, Clong, Clong), | ||
1891 | uplo, trans, n, k, | ||
1892 | alpha, A, max(1,stride(A,2)), B, max(1,stride(B,2)), beta, | ||
1893 | C, max(1,stride(C,2)), 1, 1) | ||
1894 | C | ||
1895 | end | ||
1896 | end | ||
1897 | end | ||
1898 | |||
1899 | """ | ||
1900 | syr2k!(uplo, trans, alpha, A, B, beta, C) | ||
1901 | |||
1902 | Rank-2k update of the symmetric matrix `C` as | ||
1903 | `alpha*A*transpose(B) + alpha*B*transpose(A) + beta*C` or | ||
1904 | `alpha*transpose(A)*B + alpha*transpose(B)*A + beta*C` | ||
1905 | according to [`trans`](@ref stdlib-blas-trans). | ||
1906 | Only the [`uplo`](@ref stdlib-blas-uplo) triangle of `C` is used. Returns `C`. | ||
1907 | """ | ||
1908 | function syr2k! end | ||
1909 | |||
1910 | """ | ||
1911 | syr2k(uplo, trans, alpha, A, B) | ||
1912 | |||
1913 | Returns the [`uplo`](@ref stdlib-blas-uplo) triangle of | ||
1914 | `alpha*A*transpose(B) + alpha*B*transpose(A)` or | ||
1915 | `alpha*transpose(A)*B + alpha*transpose(B)*A`, | ||
1916 | according to [`trans`](@ref stdlib-blas-trans). | ||
1917 | """ | ||
1918 | function syr2k(uplo::AbstractChar, trans::AbstractChar, alpha::Number, A::AbstractVecOrMat, B::AbstractVecOrMat) | ||
1919 | T = eltype(A) | ||
1920 | n = size(A, trans == 'N' ? 1 : 2) | ||
1921 | syr2k!(uplo, trans, convert(T,alpha), A, B, zero(T), similar(A, T, (n, n))) | ||
1922 | end | ||
1923 | """ | ||
1924 | syr2k(uplo, trans, A, B) | ||
1925 | |||
1926 | Return the [`uplo`](@ref stdlib-blas-uplo) triangle of `A*transpose(B) + B*transpose(A)` | ||
1927 | or `transpose(A)*B + transpose(B)*A`, according to [`trans`](@ref stdlib-blas-trans). | ||
1928 | """ | ||
1929 | syr2k(uplo::AbstractChar, trans::AbstractChar, A::AbstractVecOrMat, B::AbstractVecOrMat) = syr2k(uplo, trans, one(eltype(A)), A, B) | ||
1930 | |||
1931 | for (fname, elty1, elty2) in ((:zher2k_,:ComplexF64,:Float64), (:cher2k_,:ComplexF32,:Float32)) | ||
1932 | @eval begin | ||
1933 | # SUBROUTINE CHER2K(UPLO,TRANS,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC) | ||
1934 | # | ||
1935 | # .. Scalar Arguments .. | ||
1936 | # COMPLEX ALPHA | ||
1937 | # REAL BETA | ||
1938 | # INTEGER K,LDA,LDB,LDC,N | ||
1939 | # CHARACTER TRANS,UPLO | ||
1940 | # .. | ||
1941 | # .. Array Arguments .. | ||
1942 | # COMPLEX A(LDA,*),B(LDB,*),C(LDC,*) | ||
1943 | function her2k!(uplo::AbstractChar, trans::AbstractChar, alpha::($elty1), | ||
1944 | A::AbstractVecOrMat{$elty1}, B::AbstractVecOrMat{$elty1}, | ||
1945 | beta::($elty2), C::AbstractMatrix{$elty1}) | ||
1946 | chkuplo(uplo) | ||
1947 | require_one_based_indexing(A, B, C) | ||
1948 | n = checksquare(C) | ||
1949 | nn = size(A, trans == 'N' ? 1 : 2) | ||
1950 | if nn != n throw(DimensionMismatch(lazy"C has size ($n,$n), corresponding dimension of A is $nn")) end | ||
1951 | chkstride1(A) | ||
1952 | chkstride1(B) | ||
1953 | chkstride1(C) | ||
1954 | k = size(A, trans == 'N' ? 2 : 1) | ||
1955 | ccall((@blasfunc($fname), libblastrampoline), Cvoid, | ||
1956 | (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, | ||
1957 | Ref{$elty1}, Ptr{$elty1}, Ref{BlasInt}, Ptr{$elty1}, Ref{BlasInt}, | ||
1958 | Ref{$elty2}, Ptr{$elty1}, Ref{BlasInt}, Clong, Clong), | ||
1959 | uplo, trans, n, k, | ||
1960 | alpha, A, max(1,stride(A,2)), B, max(1,stride(B,2)), | ||
1961 | beta, C, max(1,stride(C,2)), 1, 1) | ||
1962 | C | ||
1963 | end | ||
1964 | function her2k(uplo::AbstractChar, trans::AbstractChar, alpha::($elty1), A::AbstractVecOrMat{$elty1}, B::AbstractVecOrMat{$elty1}) | ||
1965 | n = size(A, trans == 'N' ? 1 : 2) | ||
1966 | her2k!(uplo, trans, alpha, A, B, zero($elty2), similar(A, $elty1, (n,n))) | ||
1967 | end | ||
1968 | her2k(uplo::AbstractChar, trans::AbstractChar, A::AbstractVecOrMat{$elty1}, B::AbstractVecOrMat{$elty1}) = | ||
1969 | her2k(uplo, trans, one($elty1), A, B) | ||
1970 | end | ||
1971 | end | ||
1972 | |||
1973 | """ | ||
1974 | her2k!(uplo, trans, alpha, A, B, beta, C) | ||
1975 | |||
1976 | Rank-2k update of the Hermitian matrix `C` as | ||
1977 | `alpha*A*B' + alpha*B*A' + beta*C` or `alpha*A'*B + alpha*B'*A + beta*C` | ||
1978 | according to [`trans`](@ref stdlib-blas-trans). The scalar `beta` has to be real. | ||
1979 | Only the [`uplo`](@ref stdlib-blas-uplo) triangle of `C` is used. Return `C`. | ||
1980 | """ | ||
1981 | function her2k! end | ||
1982 | |||
1983 | """ | ||
1984 | her2k(uplo, trans, alpha, A, B) | ||
1985 | |||
1986 | Return the [`uplo`](@ref stdlib-blas-uplo) triangle of `alpha*A*B' + alpha*B*A'` | ||
1987 | or `alpha*A'*B + alpha*B'*A`, according to [`trans`](@ref stdlib-blas-trans). | ||
1988 | """ | ||
1989 | her2k(uplo, trans, alpha, A, B) | ||
1990 | |||
1991 | """ | ||
1992 | her2k(uplo, trans, A, B) | ||
1993 | |||
1994 | Return the [`uplo`](@ref stdlib-blas-uplo) triangle of `A*B' + B*A'` | ||
1995 | or `A'*B + B'*A`, according to [`trans`](@ref stdlib-blas-trans). | ||
1996 | """ | ||
1997 | her2k(uplo, trans, A, B) | ||
1998 | |||
1999 | ## (TR) Triangular matrix and vector multiplication and solution | ||
2000 | |||
2001 | """ | ||
2002 | trmm!(side, ul, tA, dA, alpha, A, B) | ||
2003 | |||
2004 | Update `B` as `alpha*A*B` or one of the other three variants determined by | ||
2005 | [`side`](@ref stdlib-blas-side) and [`tA`](@ref stdlib-blas-trans). | ||
2006 | Only the [`ul`](@ref stdlib-blas-uplo) triangle of `A` is used. | ||
2007 | [`dA`](@ref stdlib-blas-diag) determines if the diagonal values are read or | ||
2008 | are assumed to be all ones. | ||
2009 | Return the updated `B`. | ||
2010 | """ | ||
2011 | function trmm! end | ||
2012 | |||
2013 | """ | ||
2014 | trmm(side, ul, tA, dA, alpha, A, B) | ||
2015 | |||
2016 | Return `alpha*A*B` or one of the other three variants determined by | ||
2017 | [`side`](@ref stdlib-blas-side) and [`tA`](@ref stdlib-blas-trans). | ||
2018 | Only the [`ul`](@ref stdlib-blas-uplo) triangle of `A` is used. | ||
2019 | [`dA`](@ref stdlib-blas-diag) determines if the diagonal values are read or | ||
2020 | are assumed to be all ones. | ||
2021 | """ | ||
2022 | function trmm end | ||
2023 | |||
2024 | """ | ||
2025 | trsm!(side, ul, tA, dA, alpha, A, B) | ||
2026 | |||
2027 | Overwrite `B` with the solution to `A*X = alpha*B` or one of the other three variants | ||
2028 | determined by [`side`](@ref stdlib-blas-side) and [`tA`](@ref stdlib-blas-trans). | ||
2029 | Only the [`ul`](@ref stdlib-blas-uplo) triangle of `A` is used. | ||
2030 | [`dA`](@ref stdlib-blas-diag) determines if the diagonal values are read or | ||
2031 | are assumed to be all ones. | ||
2032 | Returns the updated `B`. | ||
2033 | """ | ||
2034 | function trsm! end | ||
2035 | |||
2036 | """ | ||
2037 | trsm(side, ul, tA, dA, alpha, A, B) | ||
2038 | |||
2039 | Return the solution to `A*X = alpha*B` or one of the other three variants determined by | ||
2040 | determined by [`side`](@ref stdlib-blas-side) and [`tA`](@ref stdlib-blas-trans). | ||
2041 | Only the [`ul`](@ref stdlib-blas-uplo) triangle of `A` is used. | ||
2042 | [`dA`](@ref stdlib-blas-diag) determines if the diagonal values are read or | ||
2043 | are assumed to be all ones. | ||
2044 | """ | ||
2045 | function trsm end | ||
2046 | |||
2047 | for (mmname, smname, elty) in | ||
2048 | ((:dtrmm_,:dtrsm_,:Float64), | ||
2049 | (:strmm_,:strsm_,:Float32), | ||
2050 | (:ztrmm_,:ztrsm_,:ComplexF64), | ||
2051 | (:ctrmm_,:ctrsm_,:ComplexF32)) | ||
2052 | @eval begin | ||
2053 | # SUBROUTINE DTRMM(SIDE,UPLO,TRANSA,DIAG,M,N,ALPHA,A,LDA,B,LDB) | ||
2054 | # * .. Scalar Arguments .. | ||
2055 | # DOUBLE PRECISION ALPHA | ||
2056 | # INTEGER LDA,LDB,M,N | ||
2057 | # CHARACTER DIAG,SIDE,TRANSA,UPLO | ||
2058 | # * .. Array Arguments .. | ||
2059 | # DOUBLE PRECISION A(LDA,*),B(LDB,*) | ||
2060 | function trmm!(side::AbstractChar, uplo::AbstractChar, transa::AbstractChar, diag::AbstractChar, alpha::Number, | ||
2061 | A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty}) | ||
2062 | chkuplo(uplo) | ||
2063 | require_one_based_indexing(A, B) | ||
2064 | m, n = size(B) | ||
2065 | nA = checksquare(A) | ||
2066 | if nA != (side == 'L' ? m : n) | ||
2067 | throw(DimensionMismatch(lazy"size of A, $(size(A)), doesn't match $side size of B with dims, $(size(B))")) | ||
2068 | end | ||
2069 | chkstride1(A) | ||
2070 | chkstride1(B) | ||
2071 | ccall((@blasfunc($mmname), libblastrampoline), Cvoid, | ||
2072 | (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, | ||
2073 | Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, | ||
2074 | Clong, Clong, Clong, Clong), | ||
2075 | side, uplo, transa, diag, m, n, | ||
2076 | alpha, A, max(1,stride(A,2)), B, max(1,stride(B,2)), | ||
2077 | 1, 1, 1, 1) | ||
2078 | B | ||
2079 | end | ||
2080 | function trmm(side::AbstractChar, uplo::AbstractChar, transa::AbstractChar, diag::AbstractChar, | ||
2081 | alpha::$elty, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty}) | ||
2082 | trmm!(side, uplo, transa, diag, alpha, A, copy(B)) | ||
2083 | end | ||
2084 | # SUBROUTINE DTRSM(SIDE,UPLO,TRANSA,DIAG,M,N,ALPHA,A,LDA,B,LDB) | ||
2085 | # * .. Scalar Arguments .. | ||
2086 | # DOUBLE PRECISION ALPHA | ||
2087 | # INTEGER LDA,LDB,M,N | ||
2088 | # CHARACTER DIAG,SIDE,TRANSA,UPLO | ||
2089 | # * .. Array Arguments .. | ||
2090 | # DOUBLE PRECISION A(LDA,*),B(LDB,*) | ||
2091 | function trsm!(side::AbstractChar, uplo::AbstractChar, transa::AbstractChar, diag::AbstractChar, | ||
2092 | alpha::$elty, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty}) | ||
2093 | chkuplo(uplo) | ||
2094 | require_one_based_indexing(A, B) | ||
2095 | m, n = size(B) | ||
2096 | k = checksquare(A) | ||
2097 | if k != (side == 'L' ? m : n) | ||
2098 | throw(DimensionMismatch(lazy"size of A is ($k,$k), size of B is ($m,$n), side is $side, and transa='$transa'")) | ||
2099 | end | ||
2100 | chkstride1(A) | ||
2101 | chkstride1(B) | ||
2102 | ccall((@blasfunc($smname), libblastrampoline), Cvoid, | ||
2103 | (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, | ||
2104 | Ref{BlasInt}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, | ||
2105 | Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, | ||
2106 | Clong, Clong, Clong, Clong), | ||
2107 | side, uplo, transa, diag, | ||
2108 | m, n, alpha, A, | ||
2109 | max(1,stride(A,2)), B, max(1,stride(B,2)), | ||
2110 | 1, 1, 1, 1) | ||
2111 | B | ||
2112 | end | ||
2113 | function trsm(side::AbstractChar, uplo::AbstractChar, transa::AbstractChar, diag::AbstractChar, alpha::$elty, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty}) | ||
2114 | trsm!(side, uplo, transa, diag, alpha, A, copy(B)) | ||
2115 | end | ||
2116 | end | ||
2117 | end | ||
2118 | |||
2119 | end # module | ||
2120 | |||
2121 | function copyto!(dest::Array{T}, rdest::AbstractRange{Ti}, | ||
2122 | src::Array{T}, rsrc::AbstractRange{Ti}) where {T<:BlasFloat,Ti<:Integer} | ||
2123 | if minimum(rdest) < 1 || maximum(rdest) > length(dest) | ||
2124 | throw(ArgumentError(lazy"range out of bounds for dest, of length $(length(dest))")) | ||
2125 | end | ||
2126 | if minimum(rsrc) < 1 || maximum(rsrc) > length(src) | ||
2127 | throw(ArgumentError(lazy"range out of bounds for src, of length $(length(src))")) | ||
2128 | end | ||
2129 | if length(rdest) != length(rsrc) | ||
2130 | throw(DimensionMismatch(lazy"ranges must be of the same length")) | ||
2131 | end | ||
2132 | GC.@preserve src dest BLAS.blascopy!( | ||
2133 | length(rsrc), | ||
2134 | pointer(src, minimum(rsrc)), | ||
2135 | step(rsrc), | ||
2136 | pointer(dest, minimum(rdest)), | ||
2137 | step(rdest)) | ||
2138 | |||
2139 | return dest | ||
2140 | end |