From 6fb5dfabf5d9a2de9055983b8d9304c4c0dcddb6 Mon Sep 17 00:00:00 2001
From: Sacha Verweij <sacha@stanford.edu>
Date: Tue, 7 Mar 2017 12:12:15 -0800
Subject: [PATCH 1/2] Fix shape corner cases in zero-preserving sparse
 broadcast!(f, C, A, B).

Specifically, fix zero-preserving sparse broadcast!(f, C, A, B) where C's shape does not match the broadcast shape of A and B.
Strengthen associated tests.
---
 base/sparse/higherorderfns.jl | 40 ++++++++++++++++++++++++-----------
 test/sparse/higherorderfns.jl | 11 +++++-----
 2 files changed, 34 insertions(+), 17 deletions(-)

diff --git a/base/sparse/higherorderfns.jl b/base/sparse/higherorderfns.jl
index 1c73c3c957e77..c86602a219bef 100644
--- a/base/sparse/higherorderfns.jl
+++ b/base/sparse/higherorderfns.jl
@@ -515,16 +515,15 @@ function _broadcast_zeropres!{Tf}(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat, B
     spaceC::Int = min(length(storedinds(C)), length(storedvals(C)))
     rowsentinelA = convert(indtype(A), numrows(C) + 1)
     rowsentinelB = convert(indtype(B), numrows(C) + 1)
-    # A and B cannot have the same shape, as we directed that case to map in broadcast's
-    # entry point; here we need efficiently handle only heterogeneous combinations of matrices
-    # with no singleton dimensions ("matrices" hereafter), one singleton dimension ("columns"
-    # and "rows"), and two singleton dimensions ("scalars"). Cases involving scalars should
-    # be rare and optimizing that case complicates the code appreciably, so we largely
-    # ignore that case's performance below.
+    # C, A, and B cannot all have the same shape, as we directed that case to map in broadcast's
+    # entry point; here we need efficiently handle only heterogeneous combinations of mats/vecs
+    # with no singleton dimensions, one singleton dimension, and two singleton dimensions.
+    # Cases involving objects with two singleton dimensions should be rare and optimizing
+    # that case complicates the code appreciably, so we largely ignore that case's
+    # performance below.
     #
-    # We first divide the cases into two groups: those in which neither argument expands
-    # vertically (matrix-column combinations) and those in which an argument expands
-    # vertically (matrix-row and column-row combinations).
+    # We first divide the cases into two groups: those in which neither input argument
+    # expands vertically, and those in which at least one argument expands vertically.
     #
     # NOTE: Placing the loops over columns outside the conditional chain segregating
     # argument shape combinations eliminates some code replication but unfortunately
@@ -532,7 +531,7 @@ function _broadcast_zeropres!{Tf}(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat, B
     #
     # Cases without vertical expansion
     Ck = 1
-    if numrows(A) == numrows(B)
+    if numrows(A) == numrows(B) == numrows(C)
         @inbounds for j in columns(C)
             setcolptr!(C, j, Ck)
             Ak, stopAk = numcols(A) == 1 ? (colstartind(A, 1), colboundind(A, 1)) : (colstartind(A, j), colboundind(A, j))
@@ -575,7 +574,24 @@ function _broadcast_zeropres!{Tf}(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat, B
             end
         end
     # Cases with vertical expansion
-    elseif numrows(A) == 1 # && numrows(B) != 1, vertically expand first argument
+    elseif numrows(A) == numrows(B) == 1 # && numrows(C) != 1, vertically expand both A and B
+        @inbounds for j in columns(C)
+            setcolptr!(C, j, Ck)
+            Ak, stopAk = numcols(A) == 1 ? (colstartind(A, 1), colboundind(A, 1)) : (colstartind(A, j), colboundind(A, j))
+            Bk, stopBk = numcols(B) == 1 ? (colstartind(B, 1), colboundind(B, 1)) : (colstartind(B, j), colboundind(B, j))
+            Ax = Ak < stopAk ? storedvals(A)[Ak] : zero(eltype(A))
+            Bx = Bk < stopBk ? storedvals(B)[Bk] : zero(eltype(B))
+            Cx = f(Ax, Bx)
+            if !_iszero(Cx)
+                for Ci::indtype(C) in 1:numrows(C)
+                    Ck > spaceC && (spaceC = expandstorage!(C, _unchecked_maxnnzbcres(size(C), A, B)))
+                    storedinds(C)[Ck] = Ci
+                    storedvals(C)[Ck] = Cx
+                    Ck += 1
+                end
+            end
+        end
+    elseif numrows(A) == 1 # && numrows(B) == numrows(C) != 1 , vertically expand only A
         @inbounds for j in columns(C)
             setcolptr!(C, j, Ck)
             Ak, stopAk = numcols(A) == 1 ? (colstartind(A, 1), colboundind(A, 1)) : (colstartind(A, j), colboundind(A, j))
@@ -616,7 +632,7 @@ function _broadcast_zeropres!{Tf}(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat, B
                 end
             end
         end
-    elseif numrows(B) == 1 # && numrows(A) != 1, vertically expand second argument
+    else # numrows(B) == 1 && numrows(A) == numrows(C) != 1, vertically expand only B
         @inbounds for j in columns(C)
             setcolptr!(C, j, Ck)
             Ak, stopAk = numcols(A) == 1 ? (colstartind(A, 1), colboundind(A, 1)) : (colstartind(A, j), colboundind(A, j))
diff --git a/test/sparse/higherorderfns.jl b/test/sparse/higherorderfns.jl
index 79a382699ffe1..61f8c3b6ce3c2 100644
--- a/test/sparse/higherorderfns.jl
+++ b/test/sparse/higherorderfns.jl
@@ -164,6 +164,7 @@ end
     mats = (sprand(N, M, p), sprand(N, 1, p), sprand(1, M, p), sprand(1, 1, 1.0), spzeros(1, 1))
     vecs = (sprand(N, p), sprand(1, 1.0), spzeros(1))
     tens = (mats..., vecs...)
+    fZ = Array(first(mats))
     for Xo in tens
         X = ndims(Xo) == 1 ? SparseVector{Float32,Int32}(Xo) : SparseMatrixCSC{Float32,Int32}(Xo)
         # use different types to check internal type stability via allocation tests below
@@ -182,20 +183,20 @@ end
                 @test_throws DimensionMismatch broadcast(+, spzeros((shapeX .- 1)...), Y)
             end
             # --> test broadcast! entry point / +-like zero-preserving op
-            fZ = broadcast(+, fX, fY); Z = sparse(fZ)
+            broadcast!(+, fZ, fX, fY); Z = sparse(fZ)
             broadcast!(+, Z, X, Y); Z = sparse(fZ) # warmup for @allocated
             @test (@allocated broadcast!(+, Z, X, Y)) == 0
             @test broadcast!(+, Z, X, Y) == sparse(broadcast!(+, fZ, fX, fY))
             # --> test broadcast! entry point / *-like zero-preserving op
-            fZ = broadcast(*, fX, fY); Z = sparse(fZ)
+            broadcast!(*, fZ, fX, fY); Z = sparse(fZ)
             broadcast!(*, Z, X, Y); Z = sparse(fZ) # warmup for @allocated
             @test (@allocated broadcast!(*, Z, X, Y)) == 0
             @test broadcast!(*, Z, X, Y) == sparse(broadcast!(*, fZ, fX, fY))
             # --> test broadcast! entry point / not zero-preserving op
-            fZ = broadcast(f, fX, fY); Z = sparse(fZ)
-            broadcast!(f, Z, X, Y); Z = sparse(fZ) # warmup for @allocated
+            fZo = broadcast(f, fX, fY); Z = sparse(fZo)
+            broadcast!(f, Z, X, Y); Z = sparse(fZo) # warmup for @allocated
             @test (@allocated broadcast!(f, Z, X, Y)) == 0
-            @test broadcast!(f, Z, X, Y) == sparse(broadcast!(f, fZ, fX, fY))
+            @test broadcast!(f, Z, X, Y) == sparse(broadcast!(f, fZo, fX, fY))
             # --> test shape checks for both broadcast and broadcast! entry points
             # TODO strengthen this test, avoiding dependence on checking whether
             # broadcast_indices throws to determine whether sparse broadcast should throw

From cfbca08aa59ab134eea2f7c7f9544843538c4aa0 Mon Sep 17 00:00:00 2001
From: Sacha Verweij <sacha@stanford.edu>
Date: Tue, 7 Mar 2017 13:14:16 -0800
Subject: [PATCH 2/2] Fix shape corner cases in not-zero-preserving sparse
 broadcast!(f, C, A, B).

Specifically, fix not-zero-preserving sparse broadcast!(f, C, A, B) where C's shape does not match the broadcast shape of A and B.
Strengthen associated tests.
---
 base/sparse/higherorderfns.jl | 19 ++++++++++++++++---
 test/sparse/higherorderfns.jl |  6 +++---
 2 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/base/sparse/higherorderfns.jl b/base/sparse/higherorderfns.jl
index c86602a219bef..6ef9e4270063d 100644
--- a/base/sparse/higherorderfns.jl
+++ b/base/sparse/higherorderfns.jl
@@ -687,7 +687,7 @@ function _broadcast_notzeropres!{Tf}(f::Tf, fillvalue, C::SparseVecOrMat, A::Spa
     rowsentinelA = convert(indtype(A), numrows(C) + 1)
     rowsentinelB = convert(indtype(B), numrows(C) + 1)
     # Cases without vertical expansion
-    if numrows(A) == numrows(B)
+    if numrows(A) == numrows(B) == numrows(C)
         @inbounds for (j, jo) in zip(columns(C), _densecoloffsets(C))
             Ak, stopAk = numcols(A) == 1 ? (colstartind(A, 1), colboundind(A, 1)) : (colstartind(A, j), colboundind(A, j))
             Bk, stopBk = numcols(B) == 1 ? (colstartind(B, 1), colboundind(B, 1)) : (colstartind(B, j), colboundind(B, j))
@@ -711,7 +711,20 @@ function _broadcast_notzeropres!{Tf}(f::Tf, fillvalue, C::SparseVecOrMat, A::Spa
             end
         end
     # Cases with vertical expansion
-    elseif numrows(A) == 1 # && numrows(B) != 1, vertically expand first argument
+    elseif numrows(A) == numrows(B) == 1 # && numrows(C) != 1, vertically expand both A and B
+        @inbounds for (j, jo) in zip(columns(C), _densecoloffsets(C))
+            Ak, stopAk = numcols(A) == 1 ? (colstartind(A, 1), colboundind(A, 1)) : (colstartind(A, j), colboundind(A, j))
+            Bk, stopBk = numcols(B) == 1 ? (colstartind(B, 1), colboundind(B, 1)) : (colstartind(B, j), colboundind(B, j))
+            Ax = Ak < stopAk ? storedvals(A)[Ak] : zero(eltype(A))
+            Bx = Bk < stopBk ? storedvals(B)[Bk] : zero(eltype(B))
+            Cx = f(Ax, Bx)
+            if Cx != fillvalue
+                for Ck::Int in (jo + 1):(jo + numrows(C))
+                    storedvals(C)[Ck] = Cx
+                end
+            end
+        end
+    elseif numrows(A) == 1 # && numrows(B) == numrows(C) != 1, vertically expand only A
         @inbounds for (j, jo) in zip(columns(C), _densecoloffsets(C))
             Ak, stopAk = numcols(A) == 1 ? (colstartind(A, 1), colboundind(A, 1)) : (colstartind(A, j), colboundind(A, j))
             Bk, stopBk = numcols(B) == 1 ? (colstartind(B, 1), colboundind(B, 1)) : (colstartind(B, j), colboundind(B, j))
@@ -736,7 +749,7 @@ function _broadcast_notzeropres!{Tf}(f::Tf, fillvalue, C::SparseVecOrMat, A::Spa
                 end
             end
         end
-    elseif numrows(B) == 1 # && numrows(A) != 1, vertically expand second argument
+    else # numrows(B) == 1 && numrows(A) == numrows(C) != 1, vertically expand only B
         @inbounds for (j, jo) in zip(columns(C), _densecoloffsets(C))
             Ak, stopAk = numcols(A) == 1 ? (colstartind(A, 1), colboundind(A, 1)) : (colstartind(A, j), colboundind(A, j))
             Bk, stopBk = numcols(B) == 1 ? (colstartind(B, 1), colboundind(B, 1)) : (colstartind(B, j), colboundind(B, j))
diff --git a/test/sparse/higherorderfns.jl b/test/sparse/higherorderfns.jl
index 61f8c3b6ce3c2..492a477ee5b38 100644
--- a/test/sparse/higherorderfns.jl
+++ b/test/sparse/higherorderfns.jl
@@ -193,10 +193,10 @@ end
             @test (@allocated broadcast!(*, Z, X, Y)) == 0
             @test broadcast!(*, Z, X, Y) == sparse(broadcast!(*, fZ, fX, fY))
             # --> test broadcast! entry point / not zero-preserving op
-            fZo = broadcast(f, fX, fY); Z = sparse(fZo)
-            broadcast!(f, Z, X, Y); Z = sparse(fZo) # warmup for @allocated
+            broadcast!(f, fZ, fX, fY); Z = sparse(fZ)
+            broadcast!(f, Z, X, Y); Z = sparse(fZ) # warmup for @allocated
             @test (@allocated broadcast!(f, Z, X, Y)) == 0
-            @test broadcast!(f, Z, X, Y) == sparse(broadcast!(f, fZo, fX, fY))
+            @test broadcast!(f, Z, X, Y) == sparse(broadcast!(f, fZ, fX, fY))
             # --> test shape checks for both broadcast and broadcast! entry points
             # TODO strengthen this test, avoiding dependence on checking whether
             # broadcast_indices throws to determine whether sparse broadcast should throw