Skip to content

Commit

Permalink
Less restrictive copyto! signature for triangular matrices (#54649)
Browse files Browse the repository at this point in the history
On nightly, `copyto!(A::T, B::T) where {T<:UpperTriangular}` checks for
the types to be identical. This is overly restrictive, as we only need
to check that they are both `UpperTriangular`. This PR relaxes this,
which provides a significant performance boost for mismatched types.
  
```julia
julia> using LinearAlgebra

julia> A = UpperTriangular(rand(200,200)); B = UpperTriangular(view(rand(200,200),:,:));

julia> @Btime copyto!($A, $B);
  44.878 μs (0 allocations: 0 bytes) # nightly v"1.12.0-DEV.641" 
  5.658 μs (0 allocations: 0 bytes) # this PR
```

This PR also changes the behavior when the source and the destination
don't have the same size, in which case, `copyto!` should carry out a
linear copy and not a Cartesian one, as per its docstring. The previous
behavior may be obtained by calling
```julia
copyto!(A, CartesianIndices(B), B, CartesianIndices(B))
```
This change would mean that certain operations that used to work would
error now, e.g.:
```julia
julia> A = UpperTriangular(zeros(3,3)); B = UpperTriangular(rand(2,2));

julia> copyto!(A, B)
ERROR: ArgumentError: cannot set index in the upper triangular part (3, 1) of an UpperTriangular matrix to a nonzero value (0.6898709830945821)
```
whereas this used to carry out a Cartesian copy previously.

---------

Co-authored-by: Dilum Aluthge <[email protected]>
  • Loading branch information
jishnub and DilumAluthge authored Jun 4, 2024
1 parent 5034e87 commit b8e714d
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 16 deletions.
47 changes: 32 additions & 15 deletions stdlib/LinearAlgebra/src/triangular.jl
Original file line number Diff line number Diff line change
Expand Up @@ -246,10 +246,11 @@ Base.isstored(A::UpperTriangular, i::Int, j::Int) =
@propagate_inbounds getindex(A::UpperTriangular, i::Int, j::Int) =
i <= j ? A.data[i,j] : _zero(A.data,j,i)

_zero_triangular_half_str(::Type{<:UpperOrUnitUpperTriangular}) = "lower"
_zero_triangular_half_str(::Type{<:LowerOrUnitLowerTriangular}) = "upper"

@noinline function throw_nonzeroerror(T, @nospecialize(x), i, j)
_upper_lower_str(::Type{<:UpperOrUnitUpperTriangular}) = "upper"
_upper_lower_str(::Type{<:LowerOrUnitLowerTriangular}) = "lower"
Ts = _upper_lower_str(T)
Ts = _zero_triangular_half_str(T)
Tn = nameof(T)
throw(ArgumentError(
lazy"cannot set index in the $Ts triangular part ($i, $j) of an $Tn matrix to a nonzero value ($x)"))
Expand Down Expand Up @@ -301,9 +302,7 @@ end
end

@noinline function throw_setindex_structuralzero_error(T, @nospecialize(x))
_struct_zero_half_str(::Type{<:UpperTriangular}) = "lower"
_struct_zero_half_str(::Type{<:LowerTriangular}) = "upper"
Ts = _struct_zero_half_str(T)
Ts = _zero_triangular_half_str(T)
Tn = nameof(T)
throw(ArgumentError(
lazy"cannot set indices in the $Ts triangular part of an $Tn matrix to a nonzero value ($x)"))
Expand Down Expand Up @@ -510,24 +509,40 @@ tr(A::UnitLowerTriangular) = size(A, 1) * oneunit(eltype(A))
tr(A::UpperTriangular) = tr(A.data)
tr(A::UnitUpperTriangular) = size(A, 1) * oneunit(eltype(A))

@propagate_inbounds function copyto!(dest::UpperOrLowerTriangular, U::UpperOrLowerTriangular)
if axes(dest) != axes(U)
@invoke copyto!(dest::AbstractArray, U::AbstractArray)
else
_copyto!(dest, U)
end
return dest
end

# copy and scale
function copyto!(A::T, B::T) where {T<:Union{UpperTriangular,UnitUpperTriangular}}
for T in (:UpperTriangular, :LowerTriangular)
@eval @inline function _copyto!(A::$T, B::$T)
@boundscheck checkbounds(A, axes(B)...)
copytrito!(parent(A), parent(B), uplo_char(A))
return A
end
end
@inline function _copyto!(A::UnitUpperTriangular, B::UnitUpperTriangular)
@boundscheck checkbounds(A, axes(B)...)
n = size(B,1)
B2 = Base.unalias(A, B)
for j = 1:n
for i = 1:(isa(B, UnitUpperTriangular) ? j-1 : j)
for i = 1:j-1
@inbounds A[i,j] = B2[i,j]
end
end
return A
end
function copyto!(A::T, B::T) where {T<:Union{LowerTriangular,UnitLowerTriangular}}
@inline function _copyto!(A::UnitLowerTriangular, B::UnitLowerTriangular)
@boundscheck checkbounds(A, axes(B)...)
n = size(B,1)
B2 = Base.unalias(A, B)
for j = 1:n
for i = (isa(B, UnitLowerTriangular) ? j+1 : j):n
for i = j+1:n
@inbounds A[i,j] = B2[i,j]
end
end
Expand All @@ -537,27 +552,28 @@ end
_triangularize!(::UpperOrUnitUpperTriangular) = triu!
_triangularize!(::LowerOrUnitLowerTriangular) = tril!

function copyto!(dest::StridedMatrix, U::UpperOrLowerTriangular)
@propagate_inbounds function copyto!(dest::StridedMatrix, U::UpperOrLowerTriangular)
if axes(dest) != axes(U)
@invoke copyto!(dest::StridedMatrix, U::AbstractArray)
else
_copyto!(dest, U)
end
return dest
end
function _copyto!(dest::StridedMatrix, U::UpperOrLowerTriangular)
@propagate_inbounds function _copyto!(dest::StridedMatrix, U::UpperOrLowerTriangular)
copytrito!(dest, parent(U), U isa UpperOrUnitUpperTriangular ? 'U' : 'L')
copytrito!(dest, U, U isa UpperOrUnitUpperTriangular ? 'L' : 'U')
return dest
end
function _copyto!(dest::StridedMatrix, U::UpperOrLowerTriangular{<:Any, <:StridedMatrix})
@propagate_inbounds function _copyto!(dest::StridedMatrix, U::UpperOrLowerTriangular{<:Any, <:StridedMatrix})
U2 = Base.unalias(dest, U)
copyto_unaliased!(dest, U2)
return dest
end
# for strided matrices, we explicitly loop over the arrays to improve cache locality
# This fuses the copytrito! for the two halves
function copyto_unaliased!(dest::StridedMatrix, U::UpperOrUnitUpperTriangular{<:Any, <:StridedMatrix})
@inline function copyto_unaliased!(dest::StridedMatrix, U::UpperOrUnitUpperTriangular{<:Any, <:StridedMatrix})
@boundscheck checkbounds(dest, axes(U)...)
isunit = U isa UnitUpperTriangular
for col in axes(dest,2)
for row in 1:col-isunit
Expand All @@ -569,7 +585,8 @@ function copyto_unaliased!(dest::StridedMatrix, U::UpperOrUnitUpperTriangular{<:
end
return dest
end
function copyto_unaliased!(dest::StridedMatrix, L::LowerOrUnitLowerTriangular{<:Any, <:StridedMatrix})
@inline function copyto_unaliased!(dest::StridedMatrix, L::LowerOrUnitLowerTriangular{<:Any, <:StridedMatrix})
@boundscheck checkbounds(dest, axes(L)...)
isunit = L isa UnitLowerTriangular
for col in axes(dest,2)
for row in 1:col-!isunit
Expand Down
21 changes: 20 additions & 1 deletion stdlib/LinearAlgebra/test/triangular.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1083,14 +1083,33 @@ end

@testset "copyto! with aliasing (#39460)" begin
M = Matrix(reshape(1:36, 6, 6))
@testset for T in (UpperTriangular, LowerTriangular)
@testset for T in (UpperTriangular, LowerTriangular, UnitUpperTriangular, UnitLowerTriangular)
A = T(view(M, 1:5, 1:5))
A2 = copy(A)
B = T(view(M, 2:6, 2:6))
@test copyto!(B, A) == A2
end
end

@testset "copyto! with different sizes" begin
Ap = zeros(3,3)
Bp = rand(2,2)
@testset for T in (UpperTriangular, LowerTriangular, UnitUpperTriangular, UnitLowerTriangular)
A = T(Ap)
B = T(Bp)
@test_throws ArgumentError copyto!(A, B)
end
@testset "error message" begin
A = UpperTriangular(Ap)
B = UpperTriangular(Bp)
@test_throws "cannot set index in the lower triangular part" copyto!(A, B)

A = LowerTriangular(Ap)
B = LowerTriangular(Bp)
@test_throws "cannot set index in the upper triangular part" copyto!(A, B)
end
end

@testset "getindex with Integers" begin
M = reshape(1:4,2,2)
for Ttype in (UpperTriangular, UnitUpperTriangular)
Expand Down

7 comments on commit b8e714d

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Executing the daily package evaluation, I will reply here when finished:

@nanosoldier runtests(isdaily = true)

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The package evaluation job you requested has completed - possible new issues were detected.
The full report is available.

@vtjnash
Copy link
Member

@vtjnash vtjnash commented on b8e714d Jun 9, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@nanosoldier runbenchmarks(ALL, isdaily = true)

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Your job failed.

@vtjnash
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like there was some process stuff left behind by that last OOM kill, so I just rebooted the machines to clear away any accumulating junk

@nanosoldier runbenchmarks(ALL, isdaily = true)

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Your benchmark job has completed - possible performance regressions were detected. A full report can be found here.

@vtjnash
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like all basic complex arithmetic is 3x slower, which is odd

Please sign in to comment.