Skip to content

Commit

Permalink
Improved sparse-dense multiplication kernels.
Browse files Browse the repository at this point in the history
  • Loading branch information
Sacha0 committed Oct 8, 2017
1 parent c3d2444 commit 0a5af3c
Show file tree
Hide file tree
Showing 2 changed files with 178 additions and 9 deletions.
132 changes: 124 additions & 8 deletions base/sparse/linalg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,7 @@ function sppromote(A::SparseMatrixCSC{TvA,TiA}, B::SparseMatrixCSC{TvB,TiB}) whe
A, B
end


### sparse-dense matrix multiplication: A[c|t]_mul_B[c|t][!]([dense,] sparse, dense)

### gemm-like sparse-dense matrix multipliation: A[c|t]_mul_B[!](α, sparseA, denseB, β, denseC)
# In matrix-vector multiplication, the correct orientation of the vector is assumed.
for (f, op, transp) in ((:A_mul_B, :identity, false),
(:Ac_mul_B, :adjoint, true),
Expand Down Expand Up @@ -94,11 +92,129 @@ for (f, op, transp) in ((:A_mul_B, :identity, false),
end
end

# For compatibility with dense multiplication API. Should be deleted when dense multiplication
# API is updated to follow BLAS API.
A_mul_B!(C::StridedVecOrMat, A::SparseMatrixCSC, B::StridedVecOrMat) = A_mul_B!(one(eltype(B)), A, B, zero(eltype(C)), C)
Ac_mul_B!(C::StridedVecOrMat, A::SparseMatrixCSC, B::StridedVecOrMat) = Ac_mul_B!(one(eltype(B)), A, B, zero(eltype(C)), C)
At_mul_B!(C::StridedVecOrMat, A::SparseMatrixCSC, B::StridedVecOrMat) = At_mul_B!(one(eltype(B)), A, B, zero(eltype(C)), C)

### sparse-dense matrix multiplication: A[c|t]_mul_B[c|t][!]([dense,] sparse, dense)

# */A_mul_B[!]([dense,] sparse, dense)
function *(A::SparseMatrixCSC, B::StridedMatrix)
@boundscheck size(A, 2) == size(B, 1) || throw(DimensionMismatch())
C = zeros(promote_type(eltype(A), eltype(B)), size(A, 1), size(B, 2))
return unchecked_A_mul_B!(C, A, B)
end
function A_mul_B!(C::StridedMatrix, A::SparseMatrixCSC, B::StridedMatrix)
@boundscheck size(A, 2) == size(B, 1) || throw(DimensionMismatch())
@boundscheck size(C, 1) == size(A, 1) || throw(DimensionMismatch())
@boundscheck size(C, 2) == size(B, 2) || throw(DimensionMismatch())
return unchecked_A_mul_B!(C, A, B)
end
function unchecked_A_mul_B!(C::StridedMatrix, A::SparseMatrixCSC, B::StridedMatrix)
nB = size(B, 2)
mB = size(B, 1)
fill!(C, zero(eltype(C)))
@inbounds for jB in 1:nB
for iB in 1:mB # iB == jA
xB = B[iB, jB]
@simd for kA in A.colptr[iB]:(A.colptr[iB+1] - 1)
iA = A.rowval[kA]
xA = A.nzval[kA]
C[iA, jB] = muladd(xA, xB, C[iA, jB])
end
end
end
return C
end

# A_mul_B(c|t)[!]([dense,] sparse, dense)
@propagate_inbounds A_mul_Bt(A::SparseMatrixCSC, B::StridedMatrix) = _A_mul_Bq(A, B, identity)
@propagate_inbounds A_mul_Bc(A::SparseMatrixCSC, B::StridedMatrix) = _A_mul_Bq(A, B, conj)
function _A_mul_Bq(A::SparseMatrixCSC, B::StridedMatrix, op::TF) where TF
@boundscheck size(A, 2) == size(B, 2) || throw(DimensionMismatch())
C = zeros(promote_type(eltype(A), eltype(B)), size(A, 1), size(B, 1))
return unchecked_A_mul_Bq!(C, A, B, op)
end
@propagate_inbounds A_mul_Bt!(C::StridedMatrix, A::SparseMatrixCSC, B::StridedMatrix) = _A_mul_Bq!(C, A, B, identity)
@propagate_inbounds A_mul_Bc!(C::StridedMatrix, A::SparseMatrixCSC, B::StridedMatrix) = _A_mul_Bq!(C, A, B, conj)
function _A_mul_Bq!(C::StridedMatrix, A::SparseMatrixCSC, B::StridedMatrix, op::TF) where TF
@boundscheck size(A, 2) == size(B, 2) || throw(DimensionMismatch())
@boundscheck size(C, 1) == size(A, 1) || throw(DimensionMismatch())
@boundscheck size(C, 2) == size(B, 1) || throw(DimensionMismatch())
return unchecked_A_mul_Bq!(C, A, B, op)
end
function unchecked_A_mul_Bq!(C::StridedMatrix, A::SparseMatrixCSC, B::StridedMatrix, op::TF) where TF
# Without some additional storage into which to reorder data prior to performing multiplication,
# memory access patterns for this operation aren't particularly happy. For now, perform the
# operation in two steps --- an explicit adjoint/transpose followed by an efficient multiplication.
# For the future, test the various possible access patterns to determine whether any best this
# approach all around, and if so replace this implementation.
return twostep_A_mul_Bq!(C, A, B, op)
end
twostep_A_mul_Bq!(C::StridedMatrix, A::SparseMatrixCSC, B::StridedMatrix, ::typeof(identity)) =
unchecked_A_mul_B!(C, A, transpose(B))
twostep_A_mul_Bq!(C::StridedMatrix, A::SparseMatrixCSC, B::StridedMatrix, ::typeof(conj)) =
unchecked_A_mul_B!(C, A, adjoint(B))

# A(c|t)_mul_B[!]([dense,] sparse, dense)
@propagate_inbounds At_mul_B(A::SparseMatrixCSC, B::StridedMatrix) = _Aq_mul_B(A, B, identity)
@propagate_inbounds Ac_mul_B(A::SparseMatrixCSC, B::StridedMatrix) = _Aq_mul_B(A, B, conj)
function _Aq_mul_B(A::SparseMatrixCSC, B::StridedMatrix, op::TF) where TF
@boundscheck size(A, 1) == size(B, 1) || throw(DimensionMismatch())
C = zeros(promote_type(eltype(A), eltype(B)), size(A, 2), size(B, 2))
return unchecked_Aq_mul_B!(C, A, B, op)
end
@propagate_inbounds At_mul_B!(C::StridedMatrix, A::SparseMatrixCSC, B::StridedMatrix) = _Aq_mul_B!(C, A, B, identity)
@propagate_inbounds Ac_mul_B!(C::StridedMatrix, A::SparseMatrixCSC, B::StridedMatrix) = _Aq_mul_B!(C, A, B, conj)
function _Aq_mul_B!(C::StridedMatrix, A::SparseMatrixCSC, B::StridedMatrix, op::TF) where TF
@boundscheck size(A, 1) == size(B, 1) || throw(DimensionMismatch())
@boundscheck size(C, 1) == size(A, 2) || throw(DimensionMismatch())
@boundscheck size(C, 2) == size(B, 2) || throw(DimensionMismatch())
return unchecked_Aq_mul_B!(C, A, B, op)
end
function unchecked_Aq_mul_B!(C::StridedMatrix, A::SparseMatrixCSC, B::StridedMatrix, op::TF) where TF
nB = size(B, 2)
nA = size(A, 2)
fill!(C, zero(eltype(C)))
@inbounds for jB in 1:nB
for jA in 1:nA
CjAjB::eltype(C) = zero(eltype(C))
@simd for kA in A.colptr[jA]:(A.colptr[jA+1] - 1)
iA = A.rowval[kA]
qxA = op(A.nzval[kA])
CjAjB = muladd(qxA, B[iA, jB], CjAjB)
end
C[jA, jB] = CjAjB
end
end
return C
end

# A(t|c)_mul_B(t|c)[!]([dense,] sparse, dense)
@propagate_inbounds At_mul_Bt(A::SparseMatrixCSC, B::StridedMatrix) = _Aq_mul_Bq(A, B, identity)
@propagate_inbounds Ac_mul_Bc(A::SparseMatrixCSC, B::StridedMatrix) = _Aq_mul_Bq(A, B, conj)
function _Aq_mul_Bq(A::SparseMatrixCSC, B::StridedMatrix, op::TF) where TF
@boundscheck size(A, 1) == size(B, 2) || throw(DimensionMismatch())
C = zeros(promote_type(eltype(A), eltype(B)), size(A, 2), size(B, 1))
return unchecked_Aq_mul_Bq!(C, A, B, op)
end
@propagate_inbounds At_mul_Bt!(C::StridedMatrix, A::SparseMatrixCSC, B::StridedMatrix) = _Aq_mul_Bq!(C, A, B, identity)
@propagate_inbounds Ac_mul_Bc!(C::StridedMatrix, A::SparseMatrixCSC, B::StridedMatrix) = _Aq_mul_Bq!(C, A, B, conj)
function _Aq_mul_Bq!(C::StridedMatrix, A::SparseMatrixCSC, B::StridedMatrix, op::TF) where TF
@boundscheck size(A, 1) == size(B, 2) || throw(DimensionMismatch())
@boundscheck size(C, 1) == size(A, 2) || throw(DimensionMismatch())
@boundscheck size(C, 2) == size(B, 1) || throw(DimensionMismatch())
return unchecked_Aq_mul_Bq!(C, A, B, op)
end
function unchecked_Aq_mul_Bq!(C::StridedMatrix, A::SparseMatrixCSC, B::StridedMatrix, op::TF) where TF
# Without some additional storage into which to reorder data prior to performing multiplication,
# memory access patterns for this operation aren't particularly happy. For now, perform the
# operation in two steps --- a relatively efficient multiplication in reverse order
# followed by a transposition. For the future, test the various possible access patterns
# to determine whether any best this approach all around, and if so replace this implementation.
return twostep_Aq_mul_Bq!(C, A, B, op)
end
twostep_Aq_mul_Bq!(C::StridedMatrix, A::SparseMatrixCSC, B::StridedMatrix, ::typeof(identity)) =
transpose!(C, *(B, A))
twostep_Aq_mul_Bq!(C::StridedMatrix, A::SparseMatrixCSC, B::StridedMatrix, ::typeof(conj)) =
adjoint!(C, *(B, A))


### dense-sparse matrix multiplication: A[c|t]_mul_B[c|t][!]([dense,] dense, sparse)
Expand Down
55 changes: 54 additions & 1 deletion test/sparse/sparse.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2093,4 +2093,57 @@ end
@test_throws DimensionMismatch Ac_mul_B!(Cnn, Axn, Snn)
@test_throws DimensionMismatch At_mul_Bt!(Cnn, Ann, Snx)
@test_throws DimensionMismatch Ac_mul_Bc!(Cnn, Ann, Snx)
end
end

@testset "sparse-dense matrix multiplication" begin
using Base.LinAlg: *, A_mul_B!,
A_mul_Bt, A_mul_Bt!, A_mul_Bc, A_mul_Bc!,
At_mul_B, At_mul_B!, Ac_mul_B, Ac_mul_B!,
At_mul_Bt, At_mul_Bt!, Ac_mul_Bc, Ac_mul_Bc!
# out-of-place sparse-dense ops, i.e. A[t|c]_mul_B[t|c](sparse, dense)
#
# exercise kernels, which are shared with corresponding in-place ops
for (m, k, n) in ((5, 5, 5), (5, 10, 15), (15, 10, 5))
sparsemat = sprand(Complex{Float64}, m, k, 0.4)
densemat = rand(Complex{Float64}, k, n)
tdensemat = transpose(densemat)
tsparsemat = transpose(sparsemat)
@test *(sparsemat, densemat) *(Matrix(sparsemat), densemat)
@test A_mul_Bt(sparsemat, tdensemat) A_mul_Bt(Matrix(sparsemat), tdensemat)
@test A_mul_Bc(sparsemat, tdensemat) A_mul_Bc(Matrix(sparsemat), tdensemat)
@test At_mul_B(tsparsemat, densemat) At_mul_B(Matrix(tsparsemat), densemat)
@test Ac_mul_B(tsparsemat, densemat) Ac_mul_B(Matrix(tsparsemat), densemat)
@test At_mul_Bt(tsparsemat, tdensemat) At_mul_Bt(Matrix(tsparsemat), tdensemat)
@test Ac_mul_Bc(tsparsemat, tdensemat) Ac_mul_Bc(Matrix(tsparsemat), tdensemat)
end
# exercise inner-dimensions-match checks
n, x = 3, 4
Cnn, Cxn, Cnx = zeros(n, n), zeros(x, n), zeros(n, x)
Ann, Axn, Anx = zeros(n, n), zeros(x, n), spzeros(n, x)
Snn, Sxn, Snx = spzeros(n, n), spzeros(x, n), spzeros(n, x)
@test_throws DimensionMismatch (*)(Snn, Axn)
@test_throws DimensionMismatch A_mul_Bt(Snn, Anx)
@test_throws DimensionMismatch A_mul_Bc(Snn, Anx)
@test_throws DimensionMismatch At_mul_B(Sxn, Ann)
@test_throws DimensionMismatch Ac_mul_B(Sxn, Ann)
@test_throws DimensionMismatch At_mul_Bt(Snn, Anx)
@test_throws DimensionMismatch Ac_mul_Bc(Snn, Anx)

# in-place sparse-dense ops, i.e. A[t|c]_mul_B[t|c]!(dense, sparse, dense)
# the kernels were exercised through the out-of-place calls above,
# so below exercise only the entry points (shape checks)
#
# exercise matmul outer-dimensions-match checks
for op! in (A_mul_B!, A_mul_Bt!, A_mul_Bc!, At_mul_B!, Ac_mul_B!, At_mul_Bt!, Ac_mul_Bc!)
@test_throws DimensionMismatch op!(Cxn, Snn, Ann)
@test_throws DimensionMismatch op!(Cnx, Snn, Ann)
end
# exercise matul inner-dimensions-match checks
@test_throws DimensionMismatch A_mul_B!(Cnn, Snn, Axn)
@test_throws DimensionMismatch A_mul_Bt!(Cnn, Snn, Anx)
@test_throws DimensionMismatch A_mul_Bc!(Cnn, Snn, Anx)
@test_throws DimensionMismatch At_mul_B!(Cnn, Sxn, Ann)
@test_throws DimensionMismatch Ac_mul_B!(Cnn, Sxn, Ann)
@test_throws DimensionMismatch At_mul_Bt!(Cnn, Snn, Anx)
@test_throws DimensionMismatch Ac_mul_Bc!(Cnn, Snn, Anx)
end

0 comments on commit 0a5af3c

Please sign in to comment.