From 6409a8a8ebe8a6d648ad1739e7ac589721646e2d Mon Sep 17 00:00:00 2001 From: N5N3 <2642243996@qq.com> Date: Sun, 20 Feb 2022 18:20:01 +0800 Subject: [PATCH] Add more negative stride support to BLAS Level 1/2 functions (#42957) --- stdlib/LinearAlgebra/src/blas.jl | 253 ++++++++--------- stdlib/LinearAlgebra/src/matmul.jl | 12 +- stdlib/LinearAlgebra/test/blas.jl | 426 +++++++++++++++------------- stdlib/LinearAlgebra/test/matmul.jl | 9 + 4 files changed, 358 insertions(+), 342 deletions(-) diff --git a/stdlib/LinearAlgebra/src/blas.jl b/stdlib/LinearAlgebra/src/blas.jl index e8d44c1ae1533..ed7546ac244ed 100644 --- a/stdlib/LinearAlgebra/src/blas.jl +++ b/stdlib/LinearAlgebra/src/blas.jl @@ -161,12 +161,26 @@ end "Check that upper/lower (for special matrices) is correctly specified" function chkuplo(uplo::AbstractChar) if !(uplo == 'U' || uplo == 'L') - throw(ArgumentError("uplo argument must be 'U' (upper) or 'L' (lower), got $uplo")) + throw(ArgumentError(lazy"uplo argument must be 'U' (upper) or 'L' (lower), got $uplo")) end uplo end # Level 1 +# A help function to pick the pointer and inc for 1d like inputs. +@inline function vec_pointer_stride(x::AbstractArray, stride0check = nothing) + isdense(x) && return pointer(x), 1 # simpify runtime check when possibe + ndims(x) == 1 || strides(x) == Base.size_to_strides(stride(x, 1), size(x)...) || + throw(ArgumentError("only support vector like inputs")) + st = stride(x, 1) + isnothing(stride0check) || (st == 0 && throw(stride0check)) + ptr = st > 0 ? pointer(x) : pointer(x, lastindex(x)) + ptr, st +end +isdense(x) = x isa DenseArray +isdense(x::Base.FastContiguousSubArray) = isdense(parent(x)) +isdense(x::Base.ReshapedArray) = isdense(parent(x)) +isdense(x::Base.ReinterpretArray) = isdense(parent(x)) ## copy """ @@ -257,7 +271,11 @@ for (fname, elty) in ((:dscal_,:Float64), DX end - scal!(DA::$elty, DX::AbstractArray{$elty}) = scal!(length(DX),DA,DX,stride(DX,1)) + function scal!(DA::$elty, DX::AbstractArray{$elty}) + p, st = vec_pointer_stride(DX, ArgumentError("dest vector with 0 stride is not allowed")) + GC.@preserve DX scal!(length(DX), DA, p, abs(st)) + DX + end end end scal(n, DA, DX, incx) = scal!(n, DA, copy(DX), incx) @@ -361,73 +379,16 @@ for (fname, elty) in ((:cblas_zdotu_sub,:ComplexF64), end end -@inline function _dot_length_check(x,y) - n = length(x) - if n != length(y) - throw(DimensionMismatch("dot product arguments have lengths $(length(x)) and $(length(y))")) - end - n -end - for (elty, f) in ((Float32, :dot), (Float64, :dot), (ComplexF32, :dotc), (ComplexF64, :dotc), (ComplexF32, :dotu), (ComplexF64, :dotu)) @eval begin - function $f(x::DenseArray{$elty}, y::DenseArray{$elty}) - n = _dot_length_check(x,y) - $f(n, x, 1, y, 1) - end - - function $f(x::StridedVector{$elty}, y::DenseArray{$elty}) - n = _dot_length_check(x,y) - xstride = stride(x,1) - ystride = stride(y,1) - x_delta = xstride < 0 ? n : 1 - GC.@preserve x $f(n, pointer(x, x_delta), xstride, y, ystride) + function $f(x::AbstractArray{$elty}, y::AbstractArray{$elty}) + n, m = length(x), length(y) + n == m || throw(DimensionMismatch(lazy"dot product arguments have lengths $n and $m")) + GC.@preserve x y $f(n, vec_pointer_stride(x)..., vec_pointer_stride(y)...) end - - function $f(x::DenseArray{$elty}, y::StridedVector{$elty}) - n = _dot_length_check(x,y) - xstride = stride(x,1) - ystride = stride(y,1) - y_delta = ystride < 0 ? n : 1 - GC.@preserve y $f(n, x, xstride, pointer(y, y_delta), ystride) - end - - function $f(x::StridedVector{$elty}, y::StridedVector{$elty}) - n = _dot_length_check(x,y) - xstride = stride(x,1) - ystride = stride(y,1) - x_delta = xstride < 0 ? n : 1 - y_delta = ystride < 0 ? n : 1 - GC.@preserve x y $f(n, pointer(x, x_delta), xstride, pointer(y, y_delta), ystride) - end - end -end - -function dot(DX::Union{DenseArray{T},AbstractVector{T}}, DY::Union{DenseArray{T},AbstractVector{T}}) where T<:BlasReal - require_one_based_indexing(DX, DY) - n = length(DX) - if n != length(DY) - throw(DimensionMismatch(lazy"dot product arguments have lengths $(length(DX)) and $(length(DY))")) - end - return dot(n, DX, stride(DX, 1), DY, stride(DY, 1)) -end -function dotc(DX::Union{DenseArray{T},AbstractVector{T}}, DY::Union{DenseArray{T},AbstractVector{T}}) where T<:BlasComplex - require_one_based_indexing(DX, DY) - n = length(DX) - if n != length(DY) - throw(DimensionMismatch(lazy"dot product arguments have lengths $(length(DX)) and $(length(DY))")) end - return dotc(n, DX, stride(DX, 1), DY, stride(DY, 1)) -end -function dotu(DX::Union{DenseArray{T},AbstractVector{T}}, DY::Union{DenseArray{T},AbstractVector{T}}) where T<:BlasComplex - require_one_based_indexing(DX, DY) - n = length(DX) - if n != length(DY) - throw(DimensionMismatch(lazy"dot product arguments have lengths $(length(DX)) and $(length(DY))")) - end - return dotu(n, DX, stride(DX, 1), DY, stride(DY, 1)) end ## nrm2 @@ -461,7 +422,11 @@ for (fname, elty, ret_type) in ((:dnrm2_,:Float64,:Float64), end end end -nrm2(x::Union{AbstractVector,DenseArray}) = nrm2(length(x), x, stride1(x)) +# openblas returns 0 for negative stride +function nrm2(x::AbstractArray) + p, st = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed")) + GC.@preserve x nrm2(length(x), p, abs(st)) +end ## asum @@ -498,7 +463,10 @@ for (fname, elty, ret_type) in ((:dasum_,:Float64,:Float64), end end end -asum(x::Union{AbstractVector,DenseArray}) = asum(length(x), x, stride1(x)) +function asum(x::AbstractArray) + p, st = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed")) + GC.@preserve x asum(length(x), p, abs(st)) +end ## axpy @@ -542,15 +510,17 @@ for (fname, elty) in ((:daxpy_,:Float64), end end end -function axpy!(alpha::Number, x::Union{DenseArray{T},StridedVector{T}}, y::Union{DenseArray{T},StridedVector{T}}) where T<:BlasFloat +function axpy!(alpha::Number, x::AbstractArray{T}, y::AbstractArray{T}) where T<:BlasFloat if length(x) != length(y) throw(DimensionMismatch(lazy"x has length $(length(x)), but y has length $(length(y))")) end - return axpy!(length(x), convert(T,alpha), x, stride(x, 1), y, stride(y, 1)) + GC.@preserve x y axpy!(length(x), T(alpha), vec_pointer_stride(x)..., + vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed"))...) + y end -function axpy!(alpha::Number, x::Array{T}, rx::Union{UnitRange{Ti},AbstractRange{Ti}}, - y::Array{T}, ry::Union{UnitRange{Ti},AbstractRange{Ti}}) where {T<:BlasFloat,Ti<:Integer} +function axpy!(alpha::Number, x::Array{T}, rx::AbstractRange{Ti}, + y::Array{T}, ry::AbstractRange{Ti}) where {T<:BlasFloat,Ti<:Integer} if length(rx) != length(ry) throw(DimensionMismatch("ranges of differing lengths")) end @@ -562,10 +532,10 @@ function axpy!(alpha::Number, x::Array{T}, rx::Union{UnitRange{Ti},AbstractRange end GC.@preserve x y axpy!( length(rx), - convert(T, alpha), - pointer(x) + (first(rx) - 1)*sizeof(T), + T(alpha), + pointer(x, minimum(rx)), step(rx), - pointer(y) + (first(ry) - 1)*sizeof(T), + pointer(y, minimum(ry)), step(ry)) return y @@ -612,12 +582,14 @@ for (fname, elty) in ((:daxpby_,:Float64), (:saxpby_,:Float32), end end -function axpby!(alpha::Number, x::Union{DenseArray{T},AbstractVector{T}}, beta::Number, y::Union{DenseArray{T},AbstractVector{T}}) where T<:BlasFloat +function axpby!(alpha::Number, x::AbstractArray{T}, beta::Number, y::AbstractArray{T}) where T<:BlasFloat require_one_based_indexing(x, y) if length(x) != length(y) throw(DimensionMismatch(lazy"x has length $(length(x)), but y has length $(length(y))")) end - return axpby!(length(x), convert(T, alpha), x, stride(x, 1), convert(T, beta), y, stride(y, 1)) + GC.@preserve x y axpby!(length(x), T(alpha), vec_pointer_stride(x)..., T(beta), + vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed"))...) + y end ## iamax @@ -633,7 +605,11 @@ for (fname, elty) in ((:idamax_,:Float64), end end end -iamax(dx::Union{AbstractVector,DenseArray}) = iamax(length(dx), dx, stride1(dx)) +function iamax(dx::AbstractArray) + p, st = vec_pointer_stride(dx) + st <= 0 && return BlasInt(0) + iamax(length(dx), p, st) +end """ iamax(n, dx, incx) @@ -673,20 +649,16 @@ for (fname, elty) in ((:dgemv_,:Float64), end chkstride1(A) lda = stride(A,2) - sX = stride(X,1) - sY = stride(Y,1) + pX, sX = vec_pointer_stride(X, ArgumentError("input vector with 0 stride is not allowed")) + pY, sY = vec_pointer_stride(Y, ArgumentError("dest vector with 0 stride is not allowed")) + pA = pointer(A) if lda < 0 - colindex = lastindex(A, 2) + pA += (size(A, 2) - 1) * lda * sizeof($elty) lda = -lda trans == 'N' ? (sX = -sX) : (sY = -sY) - else - colindex = firstindex(A, 2) end lda >= size(A,1) || size(A,2) <= 1 || error("when `size(A,2) > 1`, `abs(stride(A,2))` must be at least `size(A,1)`") lda = max(1, size(A,1), lda) - pA = pointer(A, Base._sub2ind(A, 1, colindex)) - pX = pointer(X, stride(X,1) > 0 ? firstindex(X) : lastindex(X)) - pY = pointer(Y, stride(Y,1) > 0 ? firstindex(Y) : lastindex(Y)) GC.@preserve A X Y ccall((@blasfunc($fname), libblastrampoline), Cvoid, (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, @@ -767,14 +739,16 @@ for (fname, elty) in ((:dgbmv_,:Float64), y::AbstractVector{$elty}) require_one_based_indexing(A, x, y) chkstride1(A) - ccall((@blasfunc($fname), libblastrampoline), Cvoid, + px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed")) + py, sty = vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed")) + GC.@preserve x y ccall((@blasfunc($fname), libblastrampoline), Cvoid, (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Clong), trans, m, size(A,2), kl, ku, alpha, A, max(1,stride(A,2)), - x, stride(x,1), beta, y, stride(y,1), 1) + px, stx, beta, py, sty, 1) y end function gbmv(trans::AbstractChar, m::Integer, kl::Integer, ku::Integer, alpha::($elty), A::AbstractMatrix{$elty}, x::AbstractVector{$elty}) @@ -828,13 +802,15 @@ for (fname, elty, lib) in ((:dsymv_,:Float64,libblastrampoline), throw(DimensionMismatch(lazy"A has size $(size(A)), and y has length $(length(y))")) end chkstride1(A) - ccall((@blasfunc($fname), $lib), Cvoid, + px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed")) + py, sty = vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed")) + GC.@preserve x y ccall((@blasfunc($fname), $lib), Cvoid, (Ref{UInt8}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Clong), uplo, n, alpha, A, - max(1,stride(A,2)), x, stride(x,1), beta, - y, stride(y,1), 1) + max(1,stride(A,2)), px, stx, beta, + py, sty, 1) y end function symv(uplo::AbstractChar, alpha::($elty), A::AbstractMatrix{$elty}, x::AbstractVector{$elty}) @@ -891,15 +867,15 @@ for (fname, elty) in ((:zhemv_,:ComplexF64), end chkstride1(A) lda = max(1, stride(A, 2)) - incx = stride(x, 1) - incy = stride(y, 1) - ccall((@blasfunc($fname), libblastrampoline), Cvoid, + px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed")) + py, sty = vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed")) + GC.@preserve x y ccall((@blasfunc($fname), libblastrampoline), Cvoid, (Ref{UInt8}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Clong), uplo, n, α, A, - lda, x, incx, β, - y, incy, 1) + lda, px, stx, β, + py, sty, 1) y end function hemv(uplo::AbstractChar, α::($elty), A::AbstractMatrix{$elty}, x::AbstractVector{$elty}) @@ -977,19 +953,21 @@ for (fname, elty) in ((:zhpmv_, :ComplexF64), end function hpmv!(uplo::AbstractChar, - α::Number, AP::Union{DenseArray{T}, AbstractVector{T}}, x::Union{DenseArray{T}, AbstractVector{T}}, - β::Number, y::Union{DenseArray{T}, AbstractVector{T}}) where {T <: BlasComplex} - chkuplo(uplo) + α::Number, AP::AbstractArray{T}, x::AbstractArray{T}, + β::Number, y::AbstractArray{T}) where {T <: BlasComplex} require_one_based_indexing(AP, x, y) N = length(x) if N != length(y) - throw(DimensionMismatch("x has length $(N), but y has length $(length(y))")) + throw(DimensionMismatch(lazy"x has length $(N), but y has length $(length(y))")) end if 2*length(AP) < N*(N + 1) - throw(DimensionMismatch("Packed Hermitian matrix A has size smaller than length(x) = $(N).")) + throw(DimensionMismatch(lazy"Packed hermitian matrix A has size smaller than length(x) = $(N).")) end chkstride1(AP) - return hpmv!(uplo, N, convert(T, α), AP, x, stride(x, 1), convert(T, β), y, stride(y, 1)) + px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed")) + py, sty = vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed")) + GC.@preserve x y hpmv!(uplo, N, T(α), AP, px, stx, T(β), py, sty) + y end """ @@ -1031,13 +1009,15 @@ for (fname, elty) in ((:dsbmv_,:Float64), chkuplo(uplo) require_one_based_indexing(A, x, y) chkstride1(A) - ccall((@blasfunc($fname), libblastrampoline), Cvoid, + px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed")) + py, sty = vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed")) + GC.@preserve x y ccall((@blasfunc($fname), libblastrampoline), Cvoid, (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Clong), uplo, size(A,2), k, alpha, - A, max(1,stride(A,2)), x, stride(x,1), - beta, y, stride(y,1), 1) + A, max(1,stride(A,2)), px, stx, + beta, py, sty, 1) y end function sbmv(uplo::AbstractChar, k::Integer, alpha::($elty), A::AbstractMatrix{$elty}, x::AbstractVector{$elty}) @@ -1130,19 +1110,21 @@ for (fname, elty) in ((:dspmv_, :Float64), end function spmv!(uplo::AbstractChar, - α::Real, AP::Union{DenseArray{T}, AbstractVector{T}}, x::Union{DenseArray{T}, AbstractVector{T}}, - β::Real, y::Union{DenseArray{T}, AbstractVector{T}}) where {T <: BlasReal} - chkuplo(uplo) + α::Real, AP::AbstractArray{T}, x::AbstractArray{T}, + β::Real, y::AbstractArray{T}) where {T <: BlasReal} require_one_based_indexing(AP, x, y) N = length(x) if N != length(y) - throw(DimensionMismatch("x has length $(N), but y has length $(length(y))")) + throw(DimensionMismatch(lazy"x has length $(N), but y has length $(length(y))")) end if 2*length(AP) < N*(N + 1) - throw(DimensionMismatch("Packed symmetric matrix A has size smaller than length(x) = $(N).")) + throw(DimensionMismatch(lazy"Packed symmetric matrix A has size smaller than length(x) = $(N).")) end chkstride1(AP) - return spmv!(uplo, N, convert(T, α), AP, x, stride(x, 1), convert(T, β), y, stride(y, 1)) + px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed")) + py, sty = vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed")) + GC.@preserve x y spmv!(uplo, N, T(α), AP, px, stx, T(β), py, sty) + y end """ @@ -1201,16 +1183,17 @@ for (fname, elty) in ((:dspr_, :Float64), end function spr!(uplo::AbstractChar, - α::Real, x::Union{DenseArray{T}, AbstractVector{T}}, - AP::Union{DenseArray{T}, AbstractVector{T}}) where {T <: BlasReal} + α::Real, x::AbstractArray{T}, + AP::AbstractArray{T}) where {T <: BlasReal} chkuplo(uplo) require_one_based_indexing(AP, x) N = length(x) if 2*length(AP) < N*(N + 1) - throw(DimensionMismatch("Packed symmetric matrix A has size smaller than length(x) = $(N).")) + throw(DimensionMismatch(lazy"Packed symmetric matrix A has size smaller than length(x) = $(N).")) end chkstride1(AP) - return spr!(uplo, N, convert(T, α), x, stride(x, 1), AP) + px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed")) + return GC.@preserve x spr!(uplo, N, T(α), px, stx , AP) end """ @@ -1251,13 +1234,15 @@ for (fname, elty) in ((:zhbmv_,:ComplexF64), chkuplo(uplo) require_one_based_indexing(A, x, y) chkstride1(A) - ccall((@blasfunc($fname), libblastrampoline), Cvoid, + px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed")) + py, sty = vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed")) + GC.@preserve x y ccall((@blasfunc($fname), libblastrampoline), Cvoid, (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Clong), uplo, size(A,2), k, alpha, - A, max(1,stride(A,2)), x, stride(x,1), - beta, y, stride(y,1), 1) + A, max(1,stride(A,2)), px, stx, + beta, py, sty, 1) y end function hbmv(uplo::AbstractChar, k::Integer, alpha::($elty), A::AbstractMatrix{$elty}, x::AbstractVector{$elty}) @@ -1312,12 +1297,13 @@ for (fname, elty) in ((:dtrmv_,:Float64), throw(DimensionMismatch(lazy"A has size ($n,$n), x has length $(length(x))")) end chkstride1(A) - ccall((@blasfunc($fname), libblastrampoline), Cvoid, + px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed")) + GC.@preserve x ccall((@blasfunc($fname), libblastrampoline), Cvoid, (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Clong, Clong, Clong), uplo, trans, diag, n, - A, max(1,stride(A,2)), x, max(1,stride(x, 1)), 1, 1, 1) + A, max(1,stride(A,2)), px, stx, 1, 1, 1) x end function trmv(uplo::AbstractChar, trans::AbstractChar, diag::AbstractChar, A::AbstractMatrix{$elty}, x::AbstractVector{$elty}) @@ -1368,12 +1354,13 @@ for (fname, elty) in ((:dtrsv_,:Float64), throw(DimensionMismatch(lazy"size of A is $n != length(x) = $(length(x))")) end chkstride1(A) - ccall((@blasfunc($fname), libblastrampoline), Cvoid, + px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed")) + GC.@preserve x ccall((@blasfunc($fname), libblastrampoline), Cvoid, (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Clong, Clong, Clong), uplo, trans, diag, n, - A, max(1,stride(A,2)), x, stride(x, 1), 1, 1, 1) + A, max(1,stride(A,2)), px, stx, 1, 1, 1) x end function trsv(uplo::AbstractChar, trans::AbstractChar, diag::AbstractChar, A::AbstractMatrix{$elty}, x::AbstractVector{$elty}) @@ -1402,13 +1389,13 @@ for (fname, elty) in ((:dger_,:Float64), if m != length(x) || n != length(y) throw(DimensionMismatch(lazy"A has size ($m,$n), x has length $(length(x)), y has length $(length(y))")) end - ccall((@blasfunc($fname), libblastrampoline), Cvoid, + px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed")) + py, sty = vec_pointer_stride(y, ArgumentError("input vector with 0 stride is not allowed")) + GC.@preserve x y ccall((@blasfunc($fname), libblastrampoline), Cvoid, (Ref{BlasInt}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}), - m, n, α, x, - stride(x, 1), y, stride(y, 1), A, - max(1,stride(A,2))) + m, n, α, px, stx, py, sty, A, max(1,stride(A,2))) A end end @@ -1436,11 +1423,11 @@ for (fname, elty, lib) in ((:dsyr_,:Float64,libblastrampoline), if length(x) != n throw(DimensionMismatch(lazy"A has size ($n,$n), x has length $(length(x))")) end - ccall((@blasfunc($fname), $lib), Cvoid, + px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed")) + GC.@preserve x ccall((@blasfunc($fname), $lib), Cvoid, (Ref{UInt8}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}), - uplo, n, α, x, - stride(x, 1), A, max(1,stride(A, 2))) + uplo, n, α, px, stx, A, max(1,stride(A, 2))) A end end @@ -1467,11 +1454,11 @@ for (fname, elty, relty) in ((:zher_,:ComplexF64, :Float64), if length(x) != n throw(DimensionMismatch(lazy"A has size ($n,$n), x has length $(length(x))")) end - ccall((@blasfunc($fname), libblastrampoline), Cvoid, + px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed")) + GC.@preserve x ccall((@blasfunc($fname), libblastrampoline), Cvoid, (Ref{UInt8}, Ref{BlasInt}, Ref{$relty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Clong), - uplo, n, α, x, - stride(x, 1), A, max(1,stride(A,2)), 1) + uplo, n, α, px, stx, A, max(1,stride(A,2)), 1) A end end @@ -2085,8 +2072,8 @@ end end # module -function copyto!(dest::Array{T}, rdest::Union{UnitRange{Ti},AbstractRange{Ti}}, - src::Array{T}, rsrc::Union{UnitRange{Ti},AbstractRange{Ti}}) where {T<:BlasFloat,Ti<:Integer} +function copyto!(dest::Array{T}, rdest::AbstractRange{Ti}, + src::Array{T}, rsrc::AbstractRange{Ti}) where {T<:BlasFloat,Ti<:Integer} if minimum(rdest) < 1 || maximum(rdest) > length(dest) throw(ArgumentError(lazy"range out of bounds for dest, of length $(length(dest))")) end @@ -2098,9 +2085,9 @@ function copyto!(dest::Array{T}, rdest::Union{UnitRange{Ti},AbstractRange{Ti}}, end GC.@preserve src dest BLAS.blascopy!( length(rsrc), - pointer(src) + (first(rsrc) - 1) * sizeof(T), + pointer(src, minimum(rsrc)), step(rsrc), - pointer(dest) + (first(rdest) - 1) * sizeof(T), + pointer(dest, minimum(rdest)), step(rdest)) return dest diff --git a/stdlib/LinearAlgebra/src/matmul.jl b/stdlib/LinearAlgebra/src/matmul.jl index f27a3a768b866..80d9872fdca6e 100644 --- a/stdlib/LinearAlgebra/src/matmul.jl +++ b/stdlib/LinearAlgebra/src/matmul.jl @@ -498,7 +498,8 @@ function gemv!(y::StridedVector{T}, tA::AbstractChar, A::StridedVecOrMat{T}, x:: nA == 0 && return _rmul_or_fill!(y, β) alpha, beta = promote(α, β, zero(T)) if alpha isa Union{Bool,T} && beta isa Union{Bool,T} && - stride(A, 1) == 1 && stride(A, 2) >= size(A, 1) + stride(A, 1) == 1 && abs(stride(A, 2)) >= size(A, 1) && + !iszero(stride(x, 1)) # We only check input's stride here. return BLAS.gemv!(tA, alpha, A, x, beta, y) else return generic_matvecmul!(y, tA, A, x, MulAddMul(α, β)) @@ -516,8 +517,9 @@ function gemv!(y::StridedVector{Complex{T}}, tA::AbstractChar, A::StridedVecOrMa nA == 0 && return _rmul_or_fill!(y, β) alpha, beta = promote(α, β, zero(T)) if alpha isa Union{Bool,T} && beta isa Union{Bool,T} && - stride(A, 1) == 1 && stride(A, 2) >= size(A, 1) && - stride(y, 1) == 1 && tA == 'N' # reinterpret-based optimization is valid only for contiguous `y` + stride(A, 1) == 1 && abs(stride(A, 2)) >= size(A, 1) && + stride(y, 1) == 1 && tA == 'N' && # reinterpret-based optimization is valid only for contiguous `y` + !iszero(stride(x, 1)) BLAS.gemv!(tA, alpha, reinterpret(T, A), x, beta, reinterpret(T, y)) return y else @@ -535,7 +537,9 @@ function gemv!(y::StridedVector{Complex{T}}, tA::AbstractChar, A::StridedVecOrMa mA == 0 && return y nA == 0 && return _rmul_or_fill!(y, β) alpha, beta = promote(α, β, zero(T)) - @views if alpha isa Union{Bool,T} && beta isa Union{Bool,T} && stride(A, 1) == 1 && stride(A, 2) >= size(A, 1) + @views if alpha isa Union{Bool,T} && beta isa Union{Bool,T} && + stride(A, 1) == 1 && abs(stride(A, 2)) >= size(A, 1) && + !iszero(stride(x, 1)) xfl = reinterpret(reshape, T, x) # Use reshape here. yfl = reinterpret(reshape, T, y) BLAS.gemv!(tA, alpha, A, xfl[1, :], beta, yfl[1, :]) diff --git a/stdlib/LinearAlgebra/test/blas.jl b/stdlib/LinearAlgebra/test/blas.jl index 54b227bca7685..d39f7c45ba205 100644 --- a/stdlib/LinearAlgebra/test/blas.jl +++ b/stdlib/LinearAlgebra/test/blas.jl @@ -4,20 +4,31 @@ module TestBLAS using Test, LinearAlgebra, Random using LinearAlgebra: BlasReal, BlasComplex +fabs(x::Real) = abs(x) +fabs(x::Complex) = abs(real(x)) + abs(imag(x)) + +# help function to build packed storage +function pack(A, uplo) + AP = eltype(A)[] + n = size(A, 1) + for j in 1:n, i in (uplo==:L ? (j:n) : (1:j)) + push!(AP, A[i,j]) + end + return AP +end +@testset "vec_pointer_stride" begin + a = zeros(4,4,4) + @test BLAS.asum(view(a,1:2:4,:,:)) == 0 # vector like + @test_throws ArgumentError BLAS.asum(view(a,1:3:4,:,:)) # non-vector like +end Random.seed!(100) ## BLAS tests - testing the interface code to BLAS routines @testset for elty in [Float32, Float64, ComplexF32, ComplexF64] @testset "syr2k!" begin - U = randn(5,2) - V = randn(5,2) - if elty == ComplexF32 || elty == ComplexF64 - U = complex.(U, U) - V = complex.(V, V) - end - U = convert(Array{elty, 2}, U) - V = convert(Array{elty, 2}, V) + U = randn(elty, 5, 2) + V = randn(elty, 5, 2) @test tril(LinearAlgebra.BLAS.syr2k('L','N',U,V)) ≈ tril(U*transpose(V) + V*transpose(U)) @test triu(LinearAlgebra.BLAS.syr2k('U','N',U,V)) ≈ triu(U*transpose(V) + V*transpose(U)) @test tril(LinearAlgebra.BLAS.syr2k('L','T',U,V)) ≈ tril(transpose(U)*V + transpose(V)*U) @@ -26,12 +37,8 @@ Random.seed!(100) if elty in (ComplexF32, ComplexF64) @testset "her2k!" begin - U = randn(5,2) - V = randn(5,2) - U = complex.(U, U) - V = complex.(V, V) - U = convert(Array{elty, 2}, U) - V = convert(Array{elty, 2}, V) + U = randn(elty, 5, 2) + V = randn(elty, 5, 2) @test tril(LinearAlgebra.BLAS.her2k('L','N',U,V)) ≈ tril(U*V' + V*U') @test triu(LinearAlgebra.BLAS.her2k('U','N',U,V)) ≈ triu(U*V' + V*U') @test tril(LinearAlgebra.BLAS.her2k('L','C',U,V)) ≈ tril(U'*V + V'*U) @@ -48,21 +55,21 @@ Random.seed!(100) U4 = triu(fill(elty(1), 4,4)) Z4 = zeros(elty, (4,4)) - elm1 = convert(elty, -1) - el2 = convert(elty, 2) - v14 = convert(Vector{elty}, [1:4;]) - v41 = convert(Vector{elty}, [4:-1:1;]) + elm1 = elty(-1) + el2 = elty(2) + v14 = elty[1:4;] + v41 = elty[4:-1:1;] let n = 10 @testset "dot products" begin if elty <: Real - x1 = convert(Vector{elty}, randn(n)) - x2 = convert(Vector{elty}, randn(n)) + x1 = randn(elty, n) + x2 = randn(elty, n) @test BLAS.dot(x1,x2) ≈ sum(x1.*x2) @test_throws DimensionMismatch BLAS.dot(x1,rand(elty, n + 1)) else - z1 = convert(Vector{elty}, complex.(randn(n),randn(n))) - z2 = convert(Vector{elty}, complex.(randn(n),randn(n))) + z1 = randn(elty, n) + z2 = randn(elty, n) @test BLAS.dotc(z1,z2) ≈ sum(conj(z1).*z2) @test BLAS.dotu(z1,z2) ≈ sum(z1.*z2) @test_throws DimensionMismatch BLAS.dotc(z1,rand(elty, n + 1)) @@ -70,92 +77,60 @@ Random.seed!(100) end end @testset "iamax" begin - if elty <: Real - x = convert(Vector{elty}, randn(n)) - @test BLAS.iamax(x) == argmax(abs.(x)) - else - z = convert(Vector{elty}, complex.(randn(n),randn(n))) - @test BLAS.iamax(z) == argmax(map(x -> abs(real(x)) + abs(imag(x)), z)) - end + x = randn(elty, n) + @test BLAS.iamax(x) == findmax(fabs, x)[2] end @testset "rot!" begin - if elty <: Real - x = convert(Vector{elty}, randn(n)) - y = convert(Vector{elty}, randn(n)) - c = rand(elty) - s = rand(elty) + x = randn(elty, n) + y = randn(elty, n) + c = rand(real(elty)) + for sty in unique!([real(elty), elty]) + s = rand(sty) x2 = copy(x) y2 = copy(y) BLAS.rot!(n, x, 1, y, 1, c, s) @test x ≈ c*x2 + s*y2 - @test y ≈ -s*x2 + c*y2 - else - x = convert(Vector{elty}, complex.(randn(n),rand(n))) - y = convert(Vector{elty}, complex.(randn(n),rand(n))) - cty = (elty == ComplexF32) ? Float32 : Float64 - c = rand(cty) - for sty in [cty, elty] - s = rand(sty) - x2 = copy(x) - y2 = copy(y) - BLAS.rot!(n, x, 1, y, 1, c, s) - @test x ≈ c*x2 + s*y2 - @test y ≈ -conj(s)*x2 + c*y2 - end + @test y ≈ -conj(s)*x2 + c*y2 end end @testset "axp(b)y" begin - if elty <: Real - x1 = convert(Vector{elty}, randn(n)) - x2 = convert(Vector{elty}, randn(n)) - α = rand(elty) - β = rand(elty) - @test BLAS.axpy!(α,copy(x1),copy(x2)) ≈ α*x1 + x2 - @test BLAS.axpby!(α,copy(x1),β,copy(x2)) ≈ α*x1 + β*x2 - @test_throws DimensionMismatch BLAS.axpy!(α, copy(x1), rand(elty, n + 1)) - @test_throws DimensionMismatch BLAS.axpby!(α, copy(x1), β, rand(elty, n + 1)) - @test_throws DimensionMismatch BLAS.axpy!(α, copy(x1), 1:div(n,2), copy(x2), 1:n) - @test_throws ArgumentError BLAS.axpy!(α, copy(x1), 0:div(n,2), copy(x2), 1:(div(n, 2) + 1)) - @test_throws ArgumentError BLAS.axpy!(α, copy(x1), 1:div(n,2), copy(x2), 0:(div(n, 2) - 1)) - @test BLAS.axpy!(α,copy(x1),1:n,copy(x2),1:n) ≈ x2 + α*x1 - else - z1 = convert(Vector{elty}, complex.(randn(n), randn(n))) - z2 = convert(Vector{elty}, complex.(randn(n), randn(n))) - α = rand(elty) - @test BLAS.axpy!(α, copy(z1), copy(z2)) ≈ z2 + α * z1 - @test_throws DimensionMismatch BLAS.axpy!(α, copy(z1), rand(elty, n + 1)) - @test_throws DimensionMismatch BLAS.axpy!(α, copy(z1), 1:div(n, 2), copy(z2), 1:(div(n, 2) + 1)) - @test_throws ArgumentError BLAS.axpy!(α, copy(z1), 0:div(n,2), copy(z2), 1:(div(n, 2) + 1)) - @test_throws ArgumentError BLAS.axpy!(α, copy(z1), 1:div(n,2), copy(z2), 0:(div(n, 2) - 1)) - @test BLAS.axpy!(α,copy(z1),1:n,copy(z2),1:n) ≈ z2 + α*z1 + x1 = randn(elty, n) + x2 = randn(elty, n) + α = rand(elty) + β = rand(elty) + for X1 in (x1, view(x1,n:-1:1)), X2 in (x2, view(x2, n:-1:1)) + @test BLAS.axpy!(α,deepcopy(X1),deepcopy(X2)) ≈ α*X1 + X2 + @test BLAS.axpby!(α,deepcopy(X1),β,deepcopy(X2)) ≈ α*X1 + β*X2 end + for ind1 in (1:n, n:-1:1), ind2 in (1:n, n:-1:1) + @test BLAS.axpy!(α,copy(x1),ind1,copy(x2),ind2) ≈ x2 + α*(ind1 == ind2 ? x1 : reverse(x1)) + end + @test_throws DimensionMismatch BLAS.axpy!(α, copy(x1), rand(elty, n + 1)) + @test_throws DimensionMismatch BLAS.axpby!(α, copy(x1), β, rand(elty, n + 1)) + @test_throws DimensionMismatch BLAS.axpy!(α, copy(x1), 1:div(n,2), copy(x2), 1:n) + @test_throws ArgumentError BLAS.axpy!(α, copy(x1), 0:div(n,2), copy(x2), 1:(div(n, 2) + 1)) + @test_throws ArgumentError BLAS.axpy!(α, copy(x1), 1:div(n,2), copy(x2), 0:(div(n, 2) - 1)) end @testset "nrm2, iamax, and asum for StridedVectors" begin a = rand(elty,n) - b = view(a,2:2:n,1) - @test BLAS.nrm2(b) ≈ norm(b) - if elty <: Real - @test BLAS.asum(b) ≈ sum(abs.(b)) - @test BLAS.iamax(b) ≈ argmax(abs.(b)) - else - @test BLAS.asum(b) ≈ sum(abs.(real(b))) + sum(abs.(imag(b))) - @test BLAS.iamax(b) == argmax(map(x -> abs(real(x)) + abs(imag(x)), b)) + for ind in (2:2:n, n:-2:2) + b = view(a, ind, 1) + @test BLAS.nrm2(b) ≈ sqrt(sum(abs2, b)) + @test BLAS.asum(b) ≈ sum(fabs, b) + @test BLAS.iamax(b) == findmax(fabs, b)[2] * (step(ind) >= 0) end end - # scal - α = rand(elty) - a = rand(elty,n) - @test BLAS.scal(n,α,a,1) ≈ α * a - - @testset "trsv" begin - A = triu(rand(elty,n,n)) - @testset "Vector and SubVector" for x in (rand(elty, n), view(rand(elty,2n),1:2:2n)) - @test A\x ≈ BLAS.trsv('U','N','N',A,x) - @test_throws DimensionMismatch BLAS.trsv('U','N','N',A,Vector{elty}(undef,n+1)) + @testset "scal" begin + α = rand(elty) + a = rand(elty,n) + @test BLAS.scal(n,α,a,1) ≈ α * a + for v in (a, view(a, n:-1:1)) + @test BLAS.scal!(α, deepcopy(v)) ≈ α * v end end - @testset "ger, her, syr" for x in (rand(elty, n), view(rand(elty,2n), 1:2:2n)), - y in (rand(elty,n), view(rand(elty,3n), 1:3:3n)) + + @testset "ger, her, syr" for x in (rand(elty, n), view(rand(elty,2n), 1:2:2n), view(rand(elty,n), n:-1:1)), + y in (rand(elty,n), view(rand(elty,3n), 1:3:3n), view(rand(elty,2n), 2n:-2:2)) A = rand(elty,n,n) α = rand(elty) @@ -178,32 +153,66 @@ Random.seed!(100) end end @testset "copy" begin - x1 = convert(Vector{elty}, randn(n)) - x2 = convert(Vector{elty}, randn(n)) - BLAS.copyto!(x2, 1:n, x1, 1:n) - @test x2 == x1 + x1 = randn(elty, n) + x2 = randn(elty, n) + for ind1 in (1:n, n:-1:1), ind2 in (1:n, n:-1:1) + @test x2 === BLAS.copyto!(x2, ind1, x1, ind2) == (ind1 == ind2 ? x1 : reverse(x1)) + end @test_throws DimensionMismatch BLAS.copyto!(x2, 1:n, x1, 1:(n - 1)) @test_throws ArgumentError BLAS.copyto!(x1, 0:div(n, 2), x2, 1:(div(n, 2) + 1)) @test_throws ArgumentError BLAS.copyto!(x1, 1:(div(n, 2) + 1), x2, 0:div(n, 2)) end - # trmv - A = triu(rand(elty,n,n)) - x = rand(elty,n) - @test BLAS.trmv('U','N','N',A,x) ≈ A*x + @testset "trmv and trsv" begin + A = rand(elty,n,n) + x = rand(elty,n) + xerr = Vector{elty}(undef,n+1) + for uplo in ('U', 'L'), diag in ('U','N'), trans in ('N', 'T', 'C') + Wrapper = if uplo == 'U' + diag == 'U' ? UnitUpperTriangular : UpperTriangular + else + diag == 'U' ? UnitLowerTriangular : LowerTriangular + end + fun = trans == 'N' ? identity : trans == 'T' ? transpose : adjoint + fullA = collect(fun(Wrapper(A))) + @testset "trmv" begin + @test BLAS.trmv(uplo,trans,diag,A,x) ≈ fullA * x + @test_throws DimensionMismatch BLAS.trmv(uplo,trans,diag,A,xerr) + for xx in (x, view(x, n:-1:1)) + @test BLAS.trmv!(uplo,trans,diag,A,deepcopy(xx)) ≈ fullA * xx + end + end + @testset "trsv" begin + @test BLAS.trsv(uplo,trans,diag,A,x) ≈ fullA \ x + @test_throws DimensionMismatch BLAS.trsv(uplo,trans,diag,A,xerr) + for xx in (x, view(x, n:-1:1)) + @test BLAS.trsv!(uplo,trans,diag,A,deepcopy(xx)) ≈ fullA \ xx + end + end + end + end @testset "symmetric/Hermitian multiplication" begin x = rand(elty,n) A = rand(elty,n,n) + y = rand(elty, n) + α = randn(elty) + β = randn(elty) Aherm = A + A' Asymm = A + transpose(A) - @testset "symv and hemv" begin - @test BLAS.symv('U',Asymm,x) ≈ Asymm*x - offsizevec, offsizemat = Array{elty}.(undef,(n+1, (n,n+1))) - @test_throws DimensionMismatch BLAS.symv!('U',one(elty),Asymm,x,one(elty),offsizevec) - @test_throws DimensionMismatch BLAS.symv('U',offsizemat,x) + offsizevec, offsizemat = Array{elty}.(undef,(n+1, (n,n+1))) + @testset "symv and hemv" for uplo in ('U', 'L') + @test BLAS.symv(uplo,Asymm,x) ≈ Asymm*x + for xx in (x, view(x, n:-1:1)), yy in (y, view(y, n:-1:1)) + @test BLAS.symv!(uplo,α,Asymm,xx,β,deepcopy(yy)) ≈ α * Asymm * xx + β * yy + end + @test_throws DimensionMismatch BLAS.symv!(uplo,α,Asymm,x,β,offsizevec) + @test_throws DimensionMismatch BLAS.symv(uplo,offsizemat,x) if elty <: BlasComplex - @test BLAS.hemv('U',Aherm,x) ≈ Aherm*x - @test_throws DimensionMismatch BLAS.hemv('U',offsizemat,x) - @test_throws DimensionMismatch BLAS.hemv!('U',one(elty),Aherm,x,one(elty),offsizevec) + @test BLAS.hemv(uplo,Aherm,x) ≈ Aherm*x + for xx in (x, view(x, n:-1:1)), yy in (y, view(y, n:-1:1)) + @test BLAS.hemv!(uplo,α,Aherm,xx,β,deepcopy(yy)) ≈ α * Aherm * xx + β * yy + end + @test_throws DimensionMismatch BLAS.hemv(uplo,offsizemat,x) + @test_throws DimensionMismatch BLAS.hemv!(uplo,one(elty),Aherm,x,one(elty),offsizevec) end end @@ -233,40 +242,24 @@ Random.seed!(100) # Both matrix dimensions n coincide, as we have Hermitian matrices. # Define the inputs and outputs of hpmv!, y = α*A*x+β*y α = rand(elty) - M = rand(elty, n, n) - AL = Hermitian(M, :L) - AU = Hermitian(M, :U) + A = rand(elty, n, n) x = rand(elty, n) β = rand(elty) y = rand(elty, n) - - y_result_julia_lower = α*AL*x + β*y - - # Create lower triangular packing of AL - AP = typeof(AL[1,1])[] - for j in 1:n - for i in j:n - push!(AP, AL[i,j]) + for uplo in (:L, :U) + Cuplo = String(uplo)[1] + AH = Hermitian(A, uplo) + # Create lower/upper triangular packing of AL + AP = pack(AH, uplo) + for xx in (x, view(x,n:-1:1)), yy in (y, view(y,n:-1:1)) + @test BLAS.hpmv!(Cuplo, α, AP, xx, β, deepcopy(yy)) ≈ α*AH*xx + β*yy end + AP′ = view(zeros(elty, n*(n+1)),1:2:n*(n+1)) + @test_throws ErrorException BLAS.hpmv!(Cuplo, α, AP′, x, β, y) + AP′ = view(AP, 1:length(AP′) - 1) + @test_throws DimensionMismatch BLAS.hpmv!(Cuplo, α, AP′, x, β, y) + @test_throws DimensionMismatch BLAS.hpmv!(Cuplo, α, AP′, x, β, view(y,1:n-1)) end - - y_result_blas_lower = copy(y) - BLAS.hpmv!('L', α, AP, x, β, y_result_blas_lower) - @test y_result_julia_lower ≈ y_result_blas_lower - - y_result_julia_upper = α*AU*x + β*y - - # Create upper triangular packing of AU - AP = typeof(AU[1,1])[] - for j in 1:n - for i in 1:j - push!(AP, AU[i,j]) - end - end - - y_result_blas_upper = copy(y) - BLAS.hpmv!('U', α, AP, x, β, y_result_blas_upper) - @test y_result_julia_upper ≈ y_result_blas_upper end end @@ -276,41 +269,24 @@ Random.seed!(100) # Both matrix dimensions n coincide, as we have symmetric matrices. # Define the inputs and outputs of spmv!, y = α*A*x+β*y α = rand(elty) - M = rand(elty, n, n) - AL = Symmetric(M, :L) - AU = Symmetric(M, :U) + A = rand(elty, n, n) x = rand(elty, n) β = rand(elty) y = rand(elty, n) - - y_result_julia_lower = α*AL*x + β*y - - # Create lower triangular packing of AL - AP = typeof(M[1,1])[] - for j in 1:n - for i in j:n - push!(AP, AL[i,j]) - end - end - - y_result_blas_lower = copy(y) - BLAS.spmv!('L', α, AP, x, β, y_result_blas_lower) - @test y_result_julia_lower ≈ y_result_blas_lower - - - y_result_julia_upper = α*AU*x + β*y - - # Create upper triangular packing of AU - AP = typeof(M[1,1])[] - for j in 1:n - for i in 1:j - push!(AP, AU[i,j]) + for uplo in (:L, :U) + Cuplo = String(uplo)[1] + AS = Symmetric(A, uplo) + # Create lower/upper triangular packing of AL + AP = pack(AS, uplo) + for xx in (x, view(x,n:-1:1)), yy in (y, view(y,n:-1:1)) + @test BLAS.spmv!(Cuplo, α, AP, xx, β, deepcopy(yy)) ≈ α*AS*xx + β*yy end + AP′ = view(zeros(elty, n*(n+1)),1:2:n*(n+1)) + @test_throws ErrorException BLAS.spmv!(Cuplo, α, AP′, x, β, y) + AP′ = view(AP, 1:length(AP′) - 1) + @test_throws DimensionMismatch BLAS.spmv!(Cuplo, α, AP′, x, β, y) + @test_throws DimensionMismatch BLAS.spmv!(Cuplo, α, AP′, x, β, view(y,1:n-1)) end - - y_result_blas_upper = copy(y) - BLAS.spmv!('U', α, AP, x, β, y_result_blas_upper) - @test y_result_julia_upper ≈ y_result_blas_upper end end @@ -321,39 +297,29 @@ Random.seed!(100) M = rand(elty, n, n) AL = Symmetric(M, :L) AU = Symmetric(M, :U) - x = rand(elty, n) - - function pack(A, uplo) - AP = elty[] - for j in 1:n - for i in (uplo==:L ? (j:n) : (1:j)) - push!(AP, A[i,j]) - end - end - return AP + for x in (rand(elty, n), view(rand(elty, n), n:-1:1)) + ALP_result_julia_lower = pack(α*x*x' + AL, :L) + ALP_result_blas_lower = pack(AL, :L) + BLAS.spr!('L', α, x, ALP_result_blas_lower) + @test ALP_result_julia_lower ≈ ALP_result_blas_lower + ALP_result_blas_lower = append!(pack(AL, :L), ones(elty, 10)) + BLAS.spr!('L', α, x, ALP_result_blas_lower) + @test ALP_result_julia_lower ≈ ALP_result_blas_lower[1:end-10] + ALP_result_blas_lower = reshape(pack(AL, :L), 1, length(ALP_result_julia_lower), 1) + BLAS.spr!('L', α, x, ALP_result_blas_lower) + @test ALP_result_julia_lower ≈ vec(ALP_result_blas_lower) + + AUP_result_julia_upper = pack(α*x*x' + AU, :U) + AUP_result_blas_upper = pack(AU, :U) + BLAS.spr!('U', α, x, AUP_result_blas_upper) + @test AUP_result_julia_upper ≈ AUP_result_blas_upper + AUP_result_blas_upper = append!(pack(AU, :U), ones(elty, 10)) + BLAS.spr!('U', α, x, AUP_result_blas_upper) + @test AUP_result_julia_upper ≈ AUP_result_blas_upper[1:end-10] + AUP_result_blas_upper = reshape(pack(AU, :U), 1, length(AUP_result_julia_upper), 1) + BLAS.spr!('U', α, x, AUP_result_blas_upper) + @test AUP_result_julia_upper ≈ vec(AUP_result_blas_upper) end - - ALP_result_julia_lower = pack(α*x*x' + AL, :L) - ALP_result_blas_lower = pack(AL, :L) - BLAS.spr!('L', α, x, ALP_result_blas_lower) - @test ALP_result_julia_lower ≈ ALP_result_blas_lower - ALP_result_blas_lower = append!(pack(AL, :L), ones(elty, 10)) - BLAS.spr!('L', α, x, ALP_result_blas_lower) - @test ALP_result_julia_lower ≈ ALP_result_blas_lower[1:end-10] - ALP_result_blas_lower = reshape(pack(AL, :L), 1, length(ALP_result_julia_lower), 1) - BLAS.spr!('L', α, x, ALP_result_blas_lower) - @test ALP_result_julia_lower ≈ vec(ALP_result_blas_lower) - - AUP_result_julia_upper = pack(α*x*x' + AU, :U) - AUP_result_blas_upper = pack(AU, :U) - BLAS.spr!('U', α, x, AUP_result_blas_upper) - @test AUP_result_julia_upper ≈ AUP_result_blas_upper - AUP_result_blas_upper = append!(pack(AU, :U), ones(elty, 10)) - BLAS.spr!('U', α, x, AUP_result_blas_upper) - @test AUP_result_julia_upper ≈ AUP_result_blas_upper[1:end-10] - AUP_result_blas_upper = reshape(pack(AU, :U), 1, length(AUP_result_julia_upper), 1) - BLAS.spr!('U', α, x, AUP_result_blas_upper) - @test AUP_result_julia_upper ≈ vec(AUP_result_blas_upper) end end @@ -365,33 +331,51 @@ Random.seed!(100) #will work for SymTridiagonal,Tridiagonal,Bidiagonal! @testset "banded matrix mv" begin @testset "gbmv" begin - TD = Tridiagonal(rand(elty,n-1),rand(elty,n),rand(elty,n-1)) - x = rand(elty,n) + TD = Tridiagonal(rand(elty,n-1),rand(elty,n),rand(elty,n-1)) + x = rand(elty, n) #put TD into the BLAS format! fTD = zeros(elty,3,n) fTD[1,2:n] = TD.du fTD[2,:] = TD.d fTD[3,1:n-1] = TD.dl @test BLAS.gbmv('N',n,1,1,fTD,x) ≈ TD*x + y = rand(elty, n) + α = randn(elty) + β = randn(elty) + for xx in (x, view(x, n:-1:1)), yy in (y, view(y, n:-1:1)) + @test BLAS.gbmv!('N',n,1,1,α,fTD,xx,β,deepcopy(yy)) ≈ α * TD * xx + β * yy + end end #will work for SymTridiagonal only! - @testset "sbmv" begin + @testset "sbmv and hbmv" begin + x = rand(elty,n) if elty <: BlasReal ST = SymTridiagonal(rand(elty,n),rand(elty,n-1)) - x = rand(elty,n) #put TD into the BLAS format! fST = zeros(elty,2,n) fST[1,2:n] = ST.ev fST[2,:] = ST.dv @test BLAS.sbmv('U',1,fST,x) ≈ ST*x + y = rand(elty, n) + α = randn(elty) + β = randn(elty) + for xx in (x, view(x, n:-1:1)), yy in (y, view(y, n:-1:1)) + @test BLAS.sbmv!('U',1,α,fST,xx,β,deepcopy(yy)) ≈ α * ST * xx + β * yy + end else - dv = real(rand(elty,n)) + dv = rand(real(elty),n) ev = rand(elty,n-1) bH = zeros(elty,2,n) bH[1,2:n] = ev bH[2,:] = dv fullH = diagm(0 => dv, -1 => conj(ev), 1 => ev) @test BLAS.hbmv('U',1,bH,x) ≈ fullH*x + y = rand(elty, n) + α = randn(elty) + β = randn(elty) + for xx in (x, view(x, n:-1:1)), yy in (y, view(y, n:-1:1)) + @test BLAS.hbmv!('U',1,α,bH,xx,β,deepcopy(yy)) ≈ α * fullH * xx + β * yy + end end end end @@ -595,8 +579,8 @@ end @test BLAS.iamax(x) == 2 M = fill(elty(1.0), 3, 3) - BLAS.scal!(elty(2), view(M,:,2)) - BLAS.scal!(elty(3), view(M,3,:)) + @test BLAS.scal!(elty(2), view(M,:,2)) === view(M,:,2) + @test BLAS.scal!(elty(3), view(M,3,:)) === view(M,3,:) @test M == elty[1. 2. 1.; 1. 2. 1.; 3. 6. 3.] # Level 2 A = WrappedArray(elty[1 2; 3 4]) @@ -688,4 +672,36 @@ end @test LinearAlgebra.BLAS.libblas == "libblastrampoline" @test LinearAlgebra.BLAS.liblapack == "libblastrampoline" +@testset "test for 0-strides" for elty in (Float32, Float64, ComplexF32, ComplexF64) + A = randn(elty, 10, 10); + a = view([randn(elty)], 1 .+ 0(1:10)) + b = view([randn(elty)], 1 .+ 0(1:10)) + α, β = randn(elty), randn(elty) + @testset "dot/dotc/dotu" begin + if elty <: Real + @test BLAS.dot(a,b) ≈ sum(a.*b) + else + @test BLAS.dotc(a,b) ≈ sum(conj(a).*b) + @test BLAS.dotu(a,b) ≈ sum(a.*b) + end + end + @testset "axp(b)y!" begin + @test BLAS.axpy!(α,a,copy(b)) ≈ α*a + b + @test BLAS.axpby!(α,a,β,copy(b)) ≈ α*a + β*b + @test_throws "dest" BLAS.axpy!(α,a,b) + @test_throws "dest" BLAS.axpby!(α,a,β,b) + end + @test BLAS.iamax(a) == 0 + @test_throws "dest" BLAS.scal!(b[1], a) + @testset "nrm2/asum" begin # OpenBLAS allways return 0.0 + @test_throws "input" BLAS.nrm2(a) + @test_throws "input" BLAS.asum(a) + end + # All level2 reject 0-stride array. + @testset "gemv!" begin + @test_throws "input" BLAS.gemv!('N', true, A, a, false, copy(b)) + @test_throws "dest" BLAS.gemv!('N', true, A, copy(a), false, b) + end +end + end # module TestBLAS diff --git a/stdlib/LinearAlgebra/test/matmul.jl b/stdlib/LinearAlgebra/test/matmul.jl index 1c482f8cae97a..ea73814a2848b 100644 --- a/stdlib/LinearAlgebra/test/matmul.jl +++ b/stdlib/LinearAlgebra/test/matmul.jl @@ -297,6 +297,15 @@ end end end +@testset "matrix x vector with negative lda or 0 stride" for T in (Float32, Float64) + for TA in (T, complex(T)), TB in (T, complex(T)) + A = view(randn(TA, 10, 10), 1:10, 10:-1:1) # negative lda + v = view([randn(TB)], 1 .+ 0(1:10)) # 0 stride + Ad, vd = copy(A), copy(v) + @test Ad * vd ≈ A * vd ≈ Ad * v ≈ A * v + end +end + @testset "issue #15286" begin A = reshape(map(Float64, 1:20), 5, 4) C = zeros(8, 8)