Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix shape corner cases in sparse broadcast!(f, C, A, B) #20937

Merged
merged 2 commits into from
Mar 9, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 44 additions & 15 deletions base/sparse/higherorderfns.jl
Original file line number Diff line number Diff line change
Expand Up @@ -515,24 +515,23 @@ function _broadcast_zeropres!{Tf}(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat, B
spaceC::Int = min(length(storedinds(C)), length(storedvals(C)))
rowsentinelA = convert(indtype(A), numrows(C) + 1)
rowsentinelB = convert(indtype(B), numrows(C) + 1)
# A and B cannot have the same shape, as we directed that case to map in broadcast's
# entry point; here we need efficiently handle only heterogeneous combinations of matrices
# with no singleton dimensions ("matrices" hereafter), one singleton dimension ("columns"
# and "rows"), and two singleton dimensions ("scalars"). Cases involving scalars should
# be rare and optimizing that case complicates the code appreciably, so we largely
# ignore that case's performance below.
# C, A, and B cannot all have the same shape, as we directed that case to map in broadcast's
# entry point; here we need efficiently handle only heterogeneous combinations of mats/vecs
# with no singleton dimensions, one singleton dimension, and two singleton dimensions.
# Cases involving objects with two singleton dimensions should be rare and optimizing
# that case complicates the code appreciably, so we largely ignore that case's
# performance below.
#
# We first divide the cases into two groups: those in which neither argument expands
# vertically (matrix-column combinations) and those in which an argument expands
# vertically (matrix-row and column-row combinations).
# We first divide the cases into two groups: those in which neither input argument
# expands vertically, and those in which at least one argument expands vertically.
#
# NOTE: Placing the loops over columns outside the conditional chain segregating
# argument shape combinations eliminates some code replication but unfortunately
# hurts performance appreciably in some cases.
#
# Cases without vertical expansion
Ck = 1
if numrows(A) == numrows(B)
if numrows(A) == numrows(B) == numrows(C)
@inbounds for j in columns(C)
setcolptr!(C, j, Ck)
Ak, stopAk = numcols(A) == 1 ? (colstartind(A, 1), colboundind(A, 1)) : (colstartind(A, j), colboundind(A, j))
Expand Down Expand Up @@ -575,7 +574,24 @@ function _broadcast_zeropres!{Tf}(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat, B
end
end
# Cases with vertical expansion
elseif numrows(A) == 1 # && numrows(B) != 1, vertically expand first argument
elseif numrows(A) == numrows(B) == 1 # && numrows(C) != 1, vertically expand both A and B
@inbounds for j in columns(C)
setcolptr!(C, j, Ck)
Ak, stopAk = numcols(A) == 1 ? (colstartind(A, 1), colboundind(A, 1)) : (colstartind(A, j), colboundind(A, j))
Bk, stopBk = numcols(B) == 1 ? (colstartind(B, 1), colboundind(B, 1)) : (colstartind(B, j), colboundind(B, j))
Ax = Ak < stopAk ? storedvals(A)[Ak] : zero(eltype(A))
Bx = Bk < stopBk ? storedvals(B)[Bk] : zero(eltype(B))
Cx = f(Ax, Bx)
if !_iszero(Cx)
for Ci::indtype(C) in 1:numrows(C)
Ck > spaceC && (spaceC = expandstorage!(C, _unchecked_maxnnzbcres(size(C), A, B)))
storedinds(C)[Ck] = Ci
storedvals(C)[Ck] = Cx
Ck += 1
end
end
end
elseif numrows(A) == 1 # && numrows(B) == numrows(C) != 1 , vertically expand only A
@inbounds for j in columns(C)
setcolptr!(C, j, Ck)
Ak, stopAk = numcols(A) == 1 ? (colstartind(A, 1), colboundind(A, 1)) : (colstartind(A, j), colboundind(A, j))
Expand Down Expand Up @@ -616,7 +632,7 @@ function _broadcast_zeropres!{Tf}(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat, B
end
end
end
elseif numrows(B) == 1 # && numrows(A) != 1, vertically expand second argument
else # numrows(B) == 1 && numrows(A) == numrows(C) != 1, vertically expand only B
@inbounds for j in columns(C)
setcolptr!(C, j, Ck)
Ak, stopAk = numcols(A) == 1 ? (colstartind(A, 1), colboundind(A, 1)) : (colstartind(A, j), colboundind(A, j))
Expand Down Expand Up @@ -671,7 +687,7 @@ function _broadcast_notzeropres!{Tf}(f::Tf, fillvalue, C::SparseVecOrMat, A::Spa
rowsentinelA = convert(indtype(A), numrows(C) + 1)
rowsentinelB = convert(indtype(B), numrows(C) + 1)
# Cases without vertical expansion
if numrows(A) == numrows(B)
if numrows(A) == numrows(B) == numrows(C)
@inbounds for (j, jo) in zip(columns(C), _densecoloffsets(C))
Ak, stopAk = numcols(A) == 1 ? (colstartind(A, 1), colboundind(A, 1)) : (colstartind(A, j), colboundind(A, j))
Bk, stopBk = numcols(B) == 1 ? (colstartind(B, 1), colboundind(B, 1)) : (colstartind(B, j), colboundind(B, j))
Expand All @@ -695,7 +711,20 @@ function _broadcast_notzeropres!{Tf}(f::Tf, fillvalue, C::SparseVecOrMat, A::Spa
end
end
# Cases with vertical expansion
elseif numrows(A) == 1 # && numrows(B) != 1, vertically expand first argument
elseif numrows(A) == numrows(B) == 1 # && numrows(C) != 1, vertically expand both A and B
@inbounds for (j, jo) in zip(columns(C), _densecoloffsets(C))
Ak, stopAk = numcols(A) == 1 ? (colstartind(A, 1), colboundind(A, 1)) : (colstartind(A, j), colboundind(A, j))
Bk, stopBk = numcols(B) == 1 ? (colstartind(B, 1), colboundind(B, 1)) : (colstartind(B, j), colboundind(B, j))
Ax = Ak < stopAk ? storedvals(A)[Ak] : zero(eltype(A))
Bx = Bk < stopBk ? storedvals(B)[Bk] : zero(eltype(B))
Cx = f(Ax, Bx)
if Cx != fillvalue
for Ck::Int in (jo + 1):(jo + numrows(C))
storedvals(C)[Ck] = Cx
end
end
end
elseif numrows(A) == 1 # && numrows(B) == numrows(C) != 1, vertically expand only A
@inbounds for (j, jo) in zip(columns(C), _densecoloffsets(C))
Ak, stopAk = numcols(A) == 1 ? (colstartind(A, 1), colboundind(A, 1)) : (colstartind(A, j), colboundind(A, j))
Bk, stopBk = numcols(B) == 1 ? (colstartind(B, 1), colboundind(B, 1)) : (colstartind(B, j), colboundind(B, j))
Expand All @@ -720,7 +749,7 @@ function _broadcast_notzeropres!{Tf}(f::Tf, fillvalue, C::SparseVecOrMat, A::Spa
end
end
end
elseif numrows(B) == 1 # && numrows(A) != 1, vertically expand second argument
else # numrows(B) == 1 && numrows(A) == numrows(C) != 1, vertically expand only B
@inbounds for (j, jo) in zip(columns(C), _densecoloffsets(C))
Ak, stopAk = numcols(A) == 1 ? (colstartind(A, 1), colboundind(A, 1)) : (colstartind(A, j), colboundind(A, j))
Bk, stopBk = numcols(B) == 1 ? (colstartind(B, 1), colboundind(B, 1)) : (colstartind(B, j), colboundind(B, j))
Expand Down
7 changes: 4 additions & 3 deletions test/sparse/higherorderfns.jl
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ end
mats = (sprand(N, M, p), sprand(N, 1, p), sprand(1, M, p), sprand(1, 1, 1.0), spzeros(1, 1))
vecs = (sprand(N, p), sprand(1, 1.0), spzeros(1))
tens = (mats..., vecs...)
fZ = Array(first(mats))
for Xo in tens
X = ndims(Xo) == 1 ? SparseVector{Float32,Int32}(Xo) : SparseMatrixCSC{Float32,Int32}(Xo)
# use different types to check internal type stability via allocation tests below
Expand All @@ -182,17 +183,17 @@ end
@test_throws DimensionMismatch broadcast(+, spzeros((shapeX .- 1)...), Y)
end
# --> test broadcast! entry point / +-like zero-preserving op
fZ = broadcast(+, fX, fY); Z = sparse(fZ)
broadcast!(+, fZ, fX, fY); Z = sparse(fZ)
broadcast!(+, Z, X, Y); Z = sparse(fZ) # warmup for @allocated
@test (@allocated broadcast!(+, Z, X, Y)) == 0
@test broadcast!(+, Z, X, Y) == sparse(broadcast!(+, fZ, fX, fY))
# --> test broadcast! entry point / *-like zero-preserving op
fZ = broadcast(*, fX, fY); Z = sparse(fZ)
broadcast!(*, fZ, fX, fY); Z = sparse(fZ)
broadcast!(*, Z, X, Y); Z = sparse(fZ) # warmup for @allocated
@test (@allocated broadcast!(*, Z, X, Y)) == 0
@test broadcast!(*, Z, X, Y) == sparse(broadcast!(*, fZ, fX, fY))
# --> test broadcast! entry point / not zero-preserving op
fZ = broadcast(f, fX, fY); Z = sparse(fZ)
broadcast!(f, fZ, fX, fY); Z = sparse(fZ)
broadcast!(f, Z, X, Y); Z = sparse(fZ) # warmup for @allocated
@test (@allocated broadcast!(f, Z, X, Y)) == 0
@test broadcast!(f, Z, X, Y) == sparse(broadcast!(f, fZ, fX, fY))
Expand Down