From 6fb5dfabf5d9a2de9055983b8d9304c4c0dcddb6 Mon Sep 17 00:00:00 2001 From: Sacha Verweij Date: Tue, 7 Mar 2017 12:12:15 -0800 Subject: [PATCH 1/2] Fix shape corner cases in zero-preserving sparse broadcast!(f, C, A, B). Specifically, fix zero-preserving sparse broadcast!(f, C, A, B) where C's shape does not match the broadcast shape of A and B. Strengthen associated tests. --- base/sparse/higherorderfns.jl | 40 ++++++++++++++++++++++++----------- test/sparse/higherorderfns.jl | 11 +++++----- 2 files changed, 34 insertions(+), 17 deletions(-) diff --git a/base/sparse/higherorderfns.jl b/base/sparse/higherorderfns.jl index 1c73c3c957e77..c86602a219bef 100644 --- a/base/sparse/higherorderfns.jl +++ b/base/sparse/higherorderfns.jl @@ -515,16 +515,15 @@ function _broadcast_zeropres!{Tf}(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat, B spaceC::Int = min(length(storedinds(C)), length(storedvals(C))) rowsentinelA = convert(indtype(A), numrows(C) + 1) rowsentinelB = convert(indtype(B), numrows(C) + 1) - # A and B cannot have the same shape, as we directed that case to map in broadcast's - # entry point; here we need efficiently handle only heterogeneous combinations of matrices - # with no singleton dimensions ("matrices" hereafter), one singleton dimension ("columns" - # and "rows"), and two singleton dimensions ("scalars"). Cases involving scalars should - # be rare and optimizing that case complicates the code appreciably, so we largely - # ignore that case's performance below. + # C, A, and B cannot all have the same shape, as we directed that case to map in broadcast's + # entry point; here we need efficiently handle only heterogeneous combinations of mats/vecs + # with no singleton dimensions, one singleton dimension, and two singleton dimensions. + # Cases involving objects with two singleton dimensions should be rare and optimizing + # that case complicates the code appreciably, so we largely ignore that case's + # performance below. # - # We first divide the cases into two groups: those in which neither argument expands - # vertically (matrix-column combinations) and those in which an argument expands - # vertically (matrix-row and column-row combinations). + # We first divide the cases into two groups: those in which neither input argument + # expands vertically, and those in which at least one argument expands vertically. # # NOTE: Placing the loops over columns outside the conditional chain segregating # argument shape combinations eliminates some code replication but unfortunately @@ -532,7 +531,7 @@ function _broadcast_zeropres!{Tf}(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat, B # # Cases without vertical expansion Ck = 1 - if numrows(A) == numrows(B) + if numrows(A) == numrows(B) == numrows(C) @inbounds for j in columns(C) setcolptr!(C, j, Ck) Ak, stopAk = numcols(A) == 1 ? (colstartind(A, 1), colboundind(A, 1)) : (colstartind(A, j), colboundind(A, j)) @@ -575,7 +574,24 @@ function _broadcast_zeropres!{Tf}(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat, B end end # Cases with vertical expansion - elseif numrows(A) == 1 # && numrows(B) != 1, vertically expand first argument + elseif numrows(A) == numrows(B) == 1 # && numrows(C) != 1, vertically expand both A and B + @inbounds for j in columns(C) + setcolptr!(C, j, Ck) + Ak, stopAk = numcols(A) == 1 ? (colstartind(A, 1), colboundind(A, 1)) : (colstartind(A, j), colboundind(A, j)) + Bk, stopBk = numcols(B) == 1 ? (colstartind(B, 1), colboundind(B, 1)) : (colstartind(B, j), colboundind(B, j)) + Ax = Ak < stopAk ? storedvals(A)[Ak] : zero(eltype(A)) + Bx = Bk < stopBk ? storedvals(B)[Bk] : zero(eltype(B)) + Cx = f(Ax, Bx) + if !_iszero(Cx) + for Ci::indtype(C) in 1:numrows(C) + Ck > spaceC && (spaceC = expandstorage!(C, _unchecked_maxnnzbcres(size(C), A, B))) + storedinds(C)[Ck] = Ci + storedvals(C)[Ck] = Cx + Ck += 1 + end + end + end + elseif numrows(A) == 1 # && numrows(B) == numrows(C) != 1 , vertically expand only A @inbounds for j in columns(C) setcolptr!(C, j, Ck) Ak, stopAk = numcols(A) == 1 ? (colstartind(A, 1), colboundind(A, 1)) : (colstartind(A, j), colboundind(A, j)) @@ -616,7 +632,7 @@ function _broadcast_zeropres!{Tf}(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat, B end end end - elseif numrows(B) == 1 # && numrows(A) != 1, vertically expand second argument + else # numrows(B) == 1 && numrows(A) == numrows(C) != 1, vertically expand only B @inbounds for j in columns(C) setcolptr!(C, j, Ck) Ak, stopAk = numcols(A) == 1 ? (colstartind(A, 1), colboundind(A, 1)) : (colstartind(A, j), colboundind(A, j)) diff --git a/test/sparse/higherorderfns.jl b/test/sparse/higherorderfns.jl index 79a382699ffe1..61f8c3b6ce3c2 100644 --- a/test/sparse/higherorderfns.jl +++ b/test/sparse/higherorderfns.jl @@ -164,6 +164,7 @@ end mats = (sprand(N, M, p), sprand(N, 1, p), sprand(1, M, p), sprand(1, 1, 1.0), spzeros(1, 1)) vecs = (sprand(N, p), sprand(1, 1.0), spzeros(1)) tens = (mats..., vecs...) + fZ = Array(first(mats)) for Xo in tens X = ndims(Xo) == 1 ? SparseVector{Float32,Int32}(Xo) : SparseMatrixCSC{Float32,Int32}(Xo) # use different types to check internal type stability via allocation tests below @@ -182,20 +183,20 @@ end @test_throws DimensionMismatch broadcast(+, spzeros((shapeX .- 1)...), Y) end # --> test broadcast! entry point / +-like zero-preserving op - fZ = broadcast(+, fX, fY); Z = sparse(fZ) + broadcast!(+, fZ, fX, fY); Z = sparse(fZ) broadcast!(+, Z, X, Y); Z = sparse(fZ) # warmup for @allocated @test (@allocated broadcast!(+, Z, X, Y)) == 0 @test broadcast!(+, Z, X, Y) == sparse(broadcast!(+, fZ, fX, fY)) # --> test broadcast! entry point / *-like zero-preserving op - fZ = broadcast(*, fX, fY); Z = sparse(fZ) + broadcast!(*, fZ, fX, fY); Z = sparse(fZ) broadcast!(*, Z, X, Y); Z = sparse(fZ) # warmup for @allocated @test (@allocated broadcast!(*, Z, X, Y)) == 0 @test broadcast!(*, Z, X, Y) == sparse(broadcast!(*, fZ, fX, fY)) # --> test broadcast! entry point / not zero-preserving op - fZ = broadcast(f, fX, fY); Z = sparse(fZ) - broadcast!(f, Z, X, Y); Z = sparse(fZ) # warmup for @allocated + fZo = broadcast(f, fX, fY); Z = sparse(fZo) + broadcast!(f, Z, X, Y); Z = sparse(fZo) # warmup for @allocated @test (@allocated broadcast!(f, Z, X, Y)) == 0 - @test broadcast!(f, Z, X, Y) == sparse(broadcast!(f, fZ, fX, fY)) + @test broadcast!(f, Z, X, Y) == sparse(broadcast!(f, fZo, fX, fY)) # --> test shape checks for both broadcast and broadcast! entry points # TODO strengthen this test, avoiding dependence on checking whether # broadcast_indices throws to determine whether sparse broadcast should throw From cfbca08aa59ab134eea2f7c7f9544843538c4aa0 Mon Sep 17 00:00:00 2001 From: Sacha Verweij Date: Tue, 7 Mar 2017 13:14:16 -0800 Subject: [PATCH 2/2] Fix shape corner cases in not-zero-preserving sparse broadcast!(f, C, A, B). Specifically, fix not-zero-preserving sparse broadcast!(f, C, A, B) where C's shape does not match the broadcast shape of A and B. Strengthen associated tests. --- base/sparse/higherorderfns.jl | 19 ++++++++++++++++--- test/sparse/higherorderfns.jl | 6 +++--- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/base/sparse/higherorderfns.jl b/base/sparse/higherorderfns.jl index c86602a219bef..6ef9e4270063d 100644 --- a/base/sparse/higherorderfns.jl +++ b/base/sparse/higherorderfns.jl @@ -687,7 +687,7 @@ function _broadcast_notzeropres!{Tf}(f::Tf, fillvalue, C::SparseVecOrMat, A::Spa rowsentinelA = convert(indtype(A), numrows(C) + 1) rowsentinelB = convert(indtype(B), numrows(C) + 1) # Cases without vertical expansion - if numrows(A) == numrows(B) + if numrows(A) == numrows(B) == numrows(C) @inbounds for (j, jo) in zip(columns(C), _densecoloffsets(C)) Ak, stopAk = numcols(A) == 1 ? (colstartind(A, 1), colboundind(A, 1)) : (colstartind(A, j), colboundind(A, j)) Bk, stopBk = numcols(B) == 1 ? (colstartind(B, 1), colboundind(B, 1)) : (colstartind(B, j), colboundind(B, j)) @@ -711,7 +711,20 @@ function _broadcast_notzeropres!{Tf}(f::Tf, fillvalue, C::SparseVecOrMat, A::Spa end end # Cases with vertical expansion - elseif numrows(A) == 1 # && numrows(B) != 1, vertically expand first argument + elseif numrows(A) == numrows(B) == 1 # && numrows(C) != 1, vertically expand both A and B + @inbounds for (j, jo) in zip(columns(C), _densecoloffsets(C)) + Ak, stopAk = numcols(A) == 1 ? (colstartind(A, 1), colboundind(A, 1)) : (colstartind(A, j), colboundind(A, j)) + Bk, stopBk = numcols(B) == 1 ? (colstartind(B, 1), colboundind(B, 1)) : (colstartind(B, j), colboundind(B, j)) + Ax = Ak < stopAk ? storedvals(A)[Ak] : zero(eltype(A)) + Bx = Bk < stopBk ? storedvals(B)[Bk] : zero(eltype(B)) + Cx = f(Ax, Bx) + if Cx != fillvalue + for Ck::Int in (jo + 1):(jo + numrows(C)) + storedvals(C)[Ck] = Cx + end + end + end + elseif numrows(A) == 1 # && numrows(B) == numrows(C) != 1, vertically expand only A @inbounds for (j, jo) in zip(columns(C), _densecoloffsets(C)) Ak, stopAk = numcols(A) == 1 ? (colstartind(A, 1), colboundind(A, 1)) : (colstartind(A, j), colboundind(A, j)) Bk, stopBk = numcols(B) == 1 ? (colstartind(B, 1), colboundind(B, 1)) : (colstartind(B, j), colboundind(B, j)) @@ -736,7 +749,7 @@ function _broadcast_notzeropres!{Tf}(f::Tf, fillvalue, C::SparseVecOrMat, A::Spa end end end - elseif numrows(B) == 1 # && numrows(A) != 1, vertically expand second argument + else # numrows(B) == 1 && numrows(A) == numrows(C) != 1, vertically expand only B @inbounds for (j, jo) in zip(columns(C), _densecoloffsets(C)) Ak, stopAk = numcols(A) == 1 ? (colstartind(A, 1), colboundind(A, 1)) : (colstartind(A, j), colboundind(A, j)) Bk, stopBk = numcols(B) == 1 ? (colstartind(B, 1), colboundind(B, 1)) : (colstartind(B, j), colboundind(B, j)) diff --git a/test/sparse/higherorderfns.jl b/test/sparse/higherorderfns.jl index 61f8c3b6ce3c2..492a477ee5b38 100644 --- a/test/sparse/higherorderfns.jl +++ b/test/sparse/higherorderfns.jl @@ -193,10 +193,10 @@ end @test (@allocated broadcast!(*, Z, X, Y)) == 0 @test broadcast!(*, Z, X, Y) == sparse(broadcast!(*, fZ, fX, fY)) # --> test broadcast! entry point / not zero-preserving op - fZo = broadcast(f, fX, fY); Z = sparse(fZo) - broadcast!(f, Z, X, Y); Z = sparse(fZo) # warmup for @allocated + broadcast!(f, fZ, fX, fY); Z = sparse(fZ) + broadcast!(f, Z, X, Y); Z = sparse(fZ) # warmup for @allocated @test (@allocated broadcast!(f, Z, X, Y)) == 0 - @test broadcast!(f, Z, X, Y) == sparse(broadcast!(f, fZo, fX, fY)) + @test broadcast!(f, Z, X, Y) == sparse(broadcast!(f, fZ, fX, fY)) # --> test shape checks for both broadcast and broadcast! entry points # TODO strengthen this test, avoiding dependence on checking whether # broadcast_indices throws to determine whether sparse broadcast should throw