JuliaDiff · sethaxen · Jan 14, 2021 · May 17, 2020 · May 17, 2020 · May 17, 2020
diff --git a/src/rulesets/LinearAlgebra/symmetric.jl b/src/rulesets/LinearAlgebra/symmetric.jl
@@ -279,6 +279,172 @@ function rrule(::typeof(svdvals), A::LinearAlgebra.RealHermSymComplexHerm{<:BLAS
     return S, svdvals_pullback
 end
 
+#####
+##### matrix functions
+#####
+
+# Formula comes from so-called Daleckiĭ-Kreĭn theorem originally due to
+# Ju. L. Daleckiĭ and S. G. Kreĭn. Integration and differentiation of functions of Hermitian
+# operators and applications to the theory of perturbations.
+# Amer. Math. Soc. Transl., Series 2, 47:1–30, 1965.
+# Stabilization for almost-degenerate matrices due to
+# S. D. Axen, 2020. Representing Ensembles of Molecules.
+# Appendix D: Automatic differentation rules for power series functions of diagonalizable matrices
+# https://escholarship.org/uc/item/6s62d8pw
+# These rules are more stable for degenerate matrices than applying the chain rule to the
+# rules for `eigen`.
+
+for func in (:exp, :log, :sqrt, :cos, :sin, :tan, :cosh, :sinh, :tanh, :acos, :asin, :atan, :acosh, :asinh, :atanh)
+    @eval begin
+        function frule((_, ΔA), ::typeof($func), A::LinearAlgebra.RealHermSymComplexHerm)
+            ΔA isa AbstractZero && return $func(A), ΔA
+            Y, intermediates = _matfun($func, A)
+            Ȳ = _matfun_frechet($func, A, Y, ΔA, intermediates)
+            # If ΔA was hermitian, then ∂Y has the same structure as Y
+            ∂Y = if ishermitian(ΔA) && (isa(Y, Symmetric) || isa(Y, Hermitian))
+                _symhermlike!(Ȳ, Y)
+            else
+                Ȳ
+            end
+            return Y, ∂Y
+        end
+
+        function rrule(::typeof($func), A::LinearAlgebra.RealHermSymComplexHerm)
+            Y, intermediates = _matfun($func, A)
+            $(Symbol(func, :_pullback))(ΔY::AbstractZero) = (NO_FIELDS, ΔY)
+            function $(Symbol(func, :_pullback))(ΔY)
+                # for Hermitian Y, we don't need to realify the diagonal of ΔY, since the
+                # effect is the same as applying _hermitrize! at the end
+                ∂Y = eltype(Y) <: Real ? real(ΔY) : ΔY
+                # for matrix functions, the pullback is related to the pushforward by an adjoint
+                Ā = _matfun_frechet($func, A, Y, ∂Y', intermediates)
+                # the cotangent of Hermitian A should be Hermitian
+                ∂A = typeof(A)(eltype(A) <: Real ? real(Ā) : Ā, A.uplo)
+                _hermitrize!(∂A.data)
+                return NO_FIELDS, ∂A
+            end
+            return Y, $(Symbol("$(func)_pullback"))
+        end
+    end
+end
+
+function frule((_, ΔA), ::typeof(sincos), A::LinearAlgebra.RealHermSymComplexHerm)
+    ΔA isa AbstractZero && return sincos(A), ΔA
+    sinA, (λ, U, sinλ, cosλ) = _matfun(sin, A)
+    cosA = _symhermtype(sinA)((U * Diagonal(cosλ)) * U')
+    tmp = ΔA * U  # We will overwrite this matrix several times to hold different values
+    ∂Λ = U' * tmp
+    ∂sinΛ = _muldiffquotmat!(similar(∂Λ), sin, λ, sinλ, cosλ, ∂Λ)
+    ∂cosΛ = _muldiffquotmat!(∂Λ, cos, λ, cosλ, -sinλ, ∂Λ)
+    ∂sinA = _symhermlike!(mul!(∂sinΛ, U, mul!(tmp, ∂sinΛ, U')), sinA)
+    ∂cosA = _symhermlike!(mul!(∂cosΛ, U, mul!(tmp, ∂cosΛ, U')), cosA)
+    Y = (sinA, cosA)
+    ∂Y = Composite{typeof(Y)}(∂sinA, ∂cosA)
+    return Y, ∂Y
+end
+
+function rrule(::typeof(sincos), A::LinearAlgebra.RealHermSymComplexHerm)
+    sinA, (λ, U, sinλ, cosλ) = _matfun(sin, A)
+    cosA = typeof(sinA)((U * Diagonal(cosλ)) * U', sinA.uplo)
+    Y = (sinA, cosA)
+    sincos_pullback(ΔY::AbstractZero) = (NO_FIELDS, ΔY)
+    function sincos_pullback((ΔsinA, ΔcosA)::Composite)
+        ΔsinA isa AbstractZero && ΔcosA isa AbstractZero && return NO_FIELDS, ΔsinA + ΔcosA
+        if eltype(A) <: Real
+            ∂sinA, ∂cosA = real(ΔsinA), real(ΔcosA)
+        else
+            ∂sinA, ∂cosA = ΔsinA, ΔcosA
+        end
+        if ∂cosA isa AbstractZero
+            Ā = _matfun_frechet(sin, A, sinA, ∂sinA, (λ, U, sinλ, cosλ))
+        elseif ∂sinA isa Zero
+            Ā = _matfun_frechet(cos, A, cosA, ∂cosA, (λ, U, cosλ, -sinλ))
+        else
+            tmp = ∂sinA * U  # we will overwrite this with various temporary values during this computation
+            ∂sinΛ = U' * tmp
+            ∂cosΛ = U' * mul!(tmp, ∂cosA, U)
+            ∂Λ = _muldiffquotmat!(∂sinΛ, sin, λ, sinλ, cosλ, ∂sinΛ)
+            ∂Λ = _muldiffquotmat!(∂Λ, cos, λ, cosλ, -sinλ, ∂cosΛ, true)
+            Ā = mul!(∂Λ, U, mul!(tmp, ∂Λ, U'))
+        end
+        _hermitrize!(Ā)
+        ∂A = typeof(A)(Ā, A.uplo)
+        return NO_FIELDS, ∂A
+    end
+    return Y, sincos_pullback
+end
+
+# compute the matrix function f(A), returning also a cache of intermediates for computing
+# the pushforward or pullback.
+# Note any function `f` used with this **must** have a `frule` defined on it.
+function _matfun(f, A::LinearAlgebra.RealHermSymComplexHerm)
+    λ, U = eigen(A)
+    if all(λi -> _isindomain(f, λi), λ)
+        fλ_df_dλ = map(λi -> frule((Zero(), One()), f, λi), λ)
+    else  # promote to complex if necessary
+        fλ_df_dλ = map(λi -> frule((Zero(), One()), f, complex(λi)), λ)
+    end
+    fλ = first.(fλ_df_dλ)
+    df_dλ = last.(unthunk.(fλ_df_dλ))
+    fA = (U * Diagonal(fλ)) * U'
+    Y = if eltype(A) <: Real
+        Symmetric(fA)
+    elseif eltype(fλ) <: Complex
+        fA
+    else
+        Hermitian(fA)
+    end
+    intermediates = (λ, U, fλ, df_dλ)
+    return Y, intermediates
+end
+
+# Fréchet derivative of matrix function f
+# Computes ∂Y = U * (P .* (U' * ΔA * U)) * U' with fewer allocations
+function _matfun_frechet(f, A::LinearAlgebra.RealHermSymComplexHerm, Y, ΔA, (λ, U, fλ, df_dλ))
+    tmp = ΔA * U
+    ∂Λ = U' * tmp
+    ∂fΛ = _muldiffquotmat!(∂Λ, f, λ, fλ, df_dλ, ∂Λ)
+    # reuse intermediate if possible
+    if eltype(tmp) <: Real && eltype(∂fΛ) <: Complex
+        tmp2 = ∂fΛ * U'
+    else
+        tmp2 = mul!(tmp, ∂fΛ, U')
+    end
+    ∂Y = mul!(∂fΛ, U, tmp2)
+    return ∂Y
+end
+
+# difference quotient, i.e. Pᵢⱼ = (f(λⱼ) - f(λᵢ)) / (λⱼ - λᵢ), with f'(λᵢ) when λᵢ=λⱼ
+function _diffquot(f, λi, λj, fλi, fλj, ∂fλi, ∂fλj)
+    T = Base.promote_typeof(λi, λj, fλi, fλj, ∂fλi, ∂fλj)
+    Δλ = λj - λi
+    iszero(Δλ) && return T(∂fλi)
+    # handle round-off error using Maclaurin series of (f(λᵢ + Δλ) - f(λᵢ)) / Δλ wrt Δλ
+    # and approximating f''(λᵢ) with forward difference (f'(λᵢ + Δλ) - f'(λᵢ)) / Δλ
+    # so (f(λᵢ + Δλ) - f(λᵢ)) / Δλ = (f'(λᵢ + Δλ) + f'(λᵢ)) / 2 + O(Δλ^2)
+    # total error on the order of f(λᵢ) * eps()^(2/3)
+    abs(Δλ) < cbrt(eps(real(T))) && return T((∂fλj + ∂fλi) / 2)
+    Δfλ = fλj - fλi
+    return T(Δfλ / Δλ)
+end
+
+# broadcast multiply Δ by the matrix of difference quotients P, storing the result in PΔ.
+# If β is is nonzero, then @. PΔ = β*PΔ + P*Δ
+# if type of PΔ is incompatible with result, new matrix is allocated
+function _muldiffquotmat!(PΔ, f, λ, fλ, ∂fλ, Δ, β = false)
+    if eltype(PΔ) <: Real && eltype(fλ) <: Complex
+        return β .* PΔ .+ _diffquot.(f, λ, λ', fλ, transpose(fλ), ∂fλ, transpose(∂fλ)) .* Δ
+    else
+        PΔ .= β .* PΔ .+ _diffquot.(f, λ, λ', fλ, transpose(fλ), ∂fλ, transpose(∂fλ)) .* Δ
+        return PΔ
+    end
+end
+
+_isindomain(f, x) = true
+_isindomain(::Union{typeof(acos),typeof(asin)}, x::Real) = -1 ≤ x ≤ 1
+_isindomain(::typeof(acosh), x::Real) = x ≥ 1
+_isindomain(::Union{typeof(log),typeof(sqrt)}, x::Real) = x ≥ 0
+
 #####
 ##### utilities
 #####
@@ -288,6 +454,18 @@ _symhermtype(::Type{<:Symmetric}) = Symmetric
 _symhermtype(::Type{<:Hermitian}) = Hermitian
 _symhermtype(A) = _symhermtype(typeof(A))
 
+function _realifydiag!(A)
+    for i in axes(A, 1)
+        @inbounds A[i, i] = real(A[i, i])
+    end
+    return A
+end
+
+function _symhermlike!(A, S::Union{Symmetric,Hermitian})
+    A isa Hermitian{<:Complex} && _realifydiag!(A)
+    return typeof(S)(A, S.uplo)
+end
+
 # in-place hermitrize matrix
 function _hermitrize!(A)
     n = size(A, 1)