From 2afab2d42dde85929054bf91bf4daf53dc8d458d Mon Sep 17 00:00:00 2001 From: Martin Holters Date: Fri, 16 Dec 2016 08:42:28 +0100 Subject: [PATCH] Remove explicit dependence of sparse broadcast on type inference Instead of determining the output element type beforehand by querying inference, the element type is deduced from the actually computed output values (similar to broadcast over Array, but taking into account the output for the all-inputs-zero case). For the type-unstable case, performance is sub-optimal, but at least it gives the correct result. Closes #19595. --- base/sparse/sparsematrix.jl | 30 ++++++++++++++++++++---------- test/sparse/sparse.jl | 13 +++++++++++++ 2 files changed, 33 insertions(+), 10 deletions(-) diff --git a/base/sparse/sparsematrix.jl b/base/sparse/sparsematrix.jl index 31dda9535f436..bd200aa724b46 100644 --- a/base/sparse/sparsematrix.jl +++ b/base/sparse/sparsematrix.jl @@ -1413,7 +1413,7 @@ function map{Tf,N}(f::Tf, A::SparseMatrixCSC, Bs::Vararg{SparseMatrixCSC,N}) fofzeros = f(_zeros_eltypes(A, Bs...)...) fpreszeros = fofzeros == zero(fofzeros) maxnnzC = fpreszeros ? min(length(A), _sumnnzs(A, Bs...)) : length(A) - entrytypeC = _broadcast_type(f, A, Bs...) + entrytypeC = typeof(fofzeros) indextypeC = _promote_indtype(A, Bs...) Ccolptr = Vector{indextypeC}(A.n + 1) Crowval = Vector{indextypeC}(maxnnzC) @@ -1438,7 +1438,7 @@ function broadcast{Tf,N}(f::Tf, A::SparseMatrixCSC, Bs::Vararg{SparseMatrixCSC,N fofzeros = f(_zeros_eltypes(A, Bs...)...) fpreszeros = fofzeros == zero(fofzeros) indextypeC = _promote_indtype(A, Bs...) - entrytypeC = _broadcast_type(f, A, Bs...) + entrytypeC = typeof(fofzeros) Cm, Cn = Base.to_shape(Base.Broadcast.broadcast_indices(A, Bs...)) maxnnzC = fpreszeros ? _checked_maxnnzbcres(Cm, Cn, A, Bs...) : (Cm * Cn) Ccolptr = Vector{indextypeC}(Cn + 1) @@ -1464,13 +1464,19 @@ _maxnnzfrom(Cm, Cn, A) = nnz(A) * div(Cm, A.m) * div(Cn, A.n) @inline _maxnnzfrom_each(Cm, Cn, As) = (_maxnnzfrom(Cm, Cn, first(As)), _maxnnzfrom_each(Cm, Cn, tail(As))...) @inline _unchecked_maxnnzbcres(Cm, Cn, As) = min(Cm * Cn, sum(_maxnnzfrom_each(Cm, Cn, As))) @inline _checked_maxnnzbcres(Cm, Cn, As...) = Cm != 0 && Cn != 0 ? _unchecked_maxnnzbcres(Cm, Cn, As) : 0 -_broadcast_type(f, As...) = Base._promote_op(f, Base.Broadcast.typestuple(As...)) +@inline _update_nzval!{T}(nzval::Vector{T}, k, x::T) = (nzval[k] = x; nzval) +@inline function _update_nzval!{T,Tx}(nzval::Vector{T}, k, x::Tx) + nzval = convert(Vector{typejoin(Tx, T)}, nzval) + nzval[k] = x + return nzval +end # _map_zeropres!/_map_notzeropres! specialized for a single sparse matrix "Stores only the nonzero entries of `map(f, Matrix(A))` in `C`." function _map_zeropres!{Tf}(f::Tf, C::SparseMatrixCSC, A::SparseMatrixCSC) spaceC = min(length(C.rowval), length(C.nzval)) Ck = 1 + nzval = C.nzval @inbounds for j in 1:C.n C.colptr[j] = Ck for Ak in nzrange(A, j) @@ -1478,14 +1484,14 @@ function _map_zeropres!{Tf}(f::Tf, C::SparseMatrixCSC, A::SparseMatrixCSC) if Cx != zero(eltype(C)) Ck > spaceC && (spaceC = _expandstorage!(C, Ck + nnz(A) - (Ak - 1))) C.rowval[Ck] = A.rowval[Ak] - C.nzval[Ck] = Cx + nzval = _update_nzval!(nzval, Ck, Cx) Ck += 1 end end end @inbounds C.colptr[C.n + 1] = Ck _trimstorage!(C, Ck - 1) - return C + return nzval === C.nzval ? C : SparseMatrixCSC(C.m, C.n, C.colptr, C.rowval, nzval) end """ Densifies `C`, storing `fillvalue` in place of each unstored entry in `A` and @@ -1496,13 +1502,14 @@ function _map_notzeropres!{Tf}(f::Tf, fillvalue, C::SparseMatrixCSC, A::SparseMa _densestructure!(C) # Populate values fill!(C.nzval, fillvalue) + nzval = C.nzval @inbounds for (j, jo) in zip(1:C.n, 0:C.m:(C.m*C.n - 1)), Ak in nzrange(A, j) Cx = f(A.nzval[Ak]) - Cx != fillvalue && (C.nzval[jo + A.rowval[Ak]] = Cx) + Cx != fillvalue && (nzval = _update_nzval!(nzval, jo + A.rowval[Ak], Cx)) end # NOTE: Combining the fill! above into the loop above to avoid multiple sweeps over / # nonsequential access of C.nzval does not appear to improve performance. - return C + return nzval === C.nzval ? C : SparseMatrixCSC(C.m, C.n, C.colptr, C.rowval, nzval) end # helper functions for these methods and some of those below function _expandstorage!(X::SparseMatrixCSC, maxstored) @@ -1533,6 +1540,7 @@ function _map_zeropres!{Tf}(f::Tf, C::SparseMatrixCSC, A::SparseMatrixCSC, B::Sp spaceC = min(length(C.rowval), length(C.nzval)) rowsentinelA = convert(eltype(A.rowval), C.m + 1) rowsentinelB = convert(eltype(B.rowval), C.m + 1) + nzval = C.nzval Ck = 1 @inbounds for j in 1:C.n C.colptr[j] = Ck @@ -1562,12 +1570,13 @@ function _map_zeropres!{Tf}(f::Tf, C::SparseMatrixCSC, A::SparseMatrixCSC, B::Sp if Cx != zero(eltype(C)) Ck > spaceC && (spaceC = _expandstorage!(C, Ck + (nnz(A) - (Ak - 1)) + (nnz(B) - (Bk - 1)))) C.rowval[Ck] = Ci - C.nzval[Ck] = Cx + nzval = _update_nzval!(nzval, Ck, Cx) Ck += 1 end end end @inbounds C.colptr[C.n + 1] = Ck + nzval === C.nzval || (C = SparseMatrixCSC(C.m, C.n, C.colptr, C.rowval, nzval)) _trimstorage!(C, Ck - 1) return C end @@ -1578,6 +1587,7 @@ function _map_notzeropres!{Tf}(f::Tf, fillvalue, C::SparseMatrixCSC, A::SparseMa fill!(C.nzval, fillvalue) # NOTE: Combining this fill! into the loop below to avoid multiple sweeps over / # nonsequential access of C.nzval does not appear to improve performance. + nzval = C.nzval rowsentinelA = convert(eltype(A.rowval), C.m + 1) rowsentinelB = convert(eltype(B.rowval), C.m + 1) @inbounds for (j, jo) in zip(1:C.n, 0:C.m:(C.m*C.n - 1)) @@ -1598,10 +1608,10 @@ function _map_notzeropres!{Tf}(f::Tf, fillvalue, C::SparseMatrixCSC, A::SparseMa Cx, Ci = f(zero(eltype(A)), B.nzval[Bk]), Bi Bk += one(Bk); Bi = Bk < stopBk ? B.rowval[Bk] : rowsentinelB end - Cx != fillvalue && (C.nzval[jo + Ci] = Cx) + Cx != fillvalue && (nzval = _update_nzval!(nzval, jo + Ci, Cx)) end end - return C + return nzval === C.nzval ? C : SparseMatrixCSC(C.m, C.n, C.colptr, C.rowval, nzval) end # _broadcast_zeropres!/_broadcast_notzeropres! specialized for a pair of (input) sparse matrices function _broadcast_zeropres!{Tf}(f::Tf, C::SparseMatrixCSC, A::SparseMatrixCSC, B::SparseMatrixCSC) diff --git a/test/sparse/sparse.jl b/test/sparse/sparse.jl index e9b1e3fbcaca2..d1a33fbf8f217 100644 --- a/test/sparse/sparse.jl +++ b/test/sparse/sparse.jl @@ -1828,3 +1828,16 @@ let @test_throws DimensionMismatch broadcast(+, A, B, speye(N)) @test_throws DimensionMismatch broadcast!(+, X, A, B, speye(N)) end + +# Issue #19595 - broadcasting over sparse matrices with abstract eltype +let x = sparse(eye(Real,3,3)) + @test eltype(x) === Real + @test eltype(x + x) <: Real + @test eltype(x .+ x) <: Real + @test eltype(map(+, x, x)) <: Real + @test eltype(broadcast(+, x, x)) <: Real + @test eltype(x + x + x) <: Real + @test eltype(x .+ x .+ x) <: Real + @test eltype(map(+, map(+, x, x), x)) <: Real + @test eltype(broadcast(+, broadcast(+, x, x), x)) <: Real +end