Skip to content

Commit

Permalink
Include number of rejected updates in the results (#116)
Browse files Browse the repository at this point in the history
* Count number of bfgs updates rejected

* Propagate count to outer function

* Return number of BFGS updates rejected

* Raise warning if updates were rejected

* Add missing import

* Update tests

* Increment minor version number

* Bump docs and test compatibilities
  • Loading branch information
sethaxen authored Jan 5, 2023
1 parent 4b79342 commit 6b18635
Show file tree
Hide file tree
Showing 10 changed files with 39 additions and 15 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "Pathfinder"
uuid = "b1d3bc72-d0e7-4279-b92f-7fa5d6d2d454"
authors = ["Seth Axen <[email protected]> and contributors"]
version = "0.5.6"
version = "0.6.0"

[deps]
AbstractDifferentiation = "c29ec348-61ec-40c8-8164-b8c60e9d9f3d"
Expand Down
2 changes: 1 addition & 1 deletion docs/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ DynamicHMC = "3"
ForwardDiff = "0.10"
LogDensityProblems = "1, 2"
LogDensityProblemsAD = "1, 2"
Pathfinder = "0.5"
Pathfinder = "0.6"
StatsFuns = "0.9, 1"
StatsPlots = "0.14, 0.15"
TransformVariables = "0.6, 0.7"
Expand Down
12 changes: 9 additions & 3 deletions src/inverse_hessian.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,17 @@ function gilbert_init(α, s, y)
end

"""
lbfgs_inverse_hessians(θs, ∇logpθs; Hinit=gilbert_init, history_length=5, ϵ=1e-12) -> Vector{WoodburyPDMat}
lbfgs_inverse_hessians(
θs, ∇logpθs; Hinit=gilbert_init, history_length=5, ϵ=1e-12
) -> Tuple{Vector{WoodburyPDMat},Int}
From an L-BFGS trajectory and gradients, compute the inverse Hessian approximations at each point.
Given positions `θs` with gradients `∇logpθs`, construct LBFGS inverse Hessian
approximations with the provided `history_length`.
The 2nd returned value is the number of BFGS updates to the inverse Hessian matrices that
were rejected due to keeping the inverse Hessian positive definite.
"""
function lbfgs_inverse_hessians(θs, ∇logpθs; Hinit=gilbert_init, history_length=5, ϵ=1e-12)
L = length(θs) - 1
Expand All @@ -34,6 +39,7 @@ function lbfgs_inverse_hessians(θs, ∇logpθs; Hinit=gilbert_init, history_len
H = lbfgs_inverse_hessian(Diagonal(α), S, Y, history_ind, history_length_effective) # H₀ = I
Hs = [H] # trace of H

num_bfgs_updates_rejected = 0
for l in 1:L
θlp1, ∇logpθlp1 = θs[l + 1], ∇logpθs[l + 1]
s .= θlp1 .- θ
Expand All @@ -48,15 +54,15 @@ function lbfgs_inverse_hessians(θs, ∇logpθs; Hinit=gilbert_init, history_len
# initial diagonal estimate of H
α = Hinit(α, s, y)
else
@debug "Skipping inverse Hessian update from iteration $l to avoid negative curvature."
num_bfgs_updates_rejected += 1
end

θ, ∇logpθ = θlp1, ∇logpθlp1
H = lbfgs_inverse_hessian(Diagonal(α), S, Y, history_ind, history_length_effective)
push!(Hs, H)
end

return Hs
return Hs, num_bfgs_updates_rejected
end

"""
Expand Down
10 changes: 7 additions & 3 deletions src/mvnormal.jl
Original file line number Diff line number Diff line change
@@ -1,21 +1,25 @@
"""
fit_mvnormals(θs, ∇logpθs; history_length=5)
fit_mvnormals(θs, ∇logpθs; history_length=5) -> (dists, num_bfgs_updates_rejected)
Fit a multivariate-normal distribution to each point on the trajectory `θs`.
Given `θs` with gradients `∇logpθs`, construct LBFGS inverse Hessian approximations with
the provided `history_length`. The inverse Hessians approximate a covariance. The
covariances and corresponding means that define multivariate normal approximations per
point are returned.
The 2nd returned value is the number of BFGS updates to the inverse Hessian matrices that
were rejected due to keeping the inverse Hessian positive definite.
"""
function fit_mvnormals(θs, ∇logpθs; kwargs...)
Σs = lbfgs_inverse_hessians(θs, ∇logpθs; kwargs...)
Σs, num_bfgs_updates_rejected = lbfgs_inverse_hessians(θs, ∇logpθs; kwargs...)
trans = Transducers.MapSplat() do Σ, ∇logpθ, θ
μ = muladd(Σ, ∇logpθ, θ)
return Distributions.MvNormal(μ, Σ)
end
l = length(Σs)
return @views(zip(Σs, ∇logpθs[1:l], θs[1:l])) |> trans |> collect
dists = @views(zip(Σs, ∇logpθs[1:l], θs[1:l])) |> trans |> collect
return dists, num_bfgs_updates_rejected
end

# faster than computing `logpdf` and `rand` independently
Expand Down
13 changes: 12 additions & 1 deletion src/singlepath.jl
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ Container for results of single-path Pathfinder.
`fit_distributions[fit_iteration + 1] == fit_distribution`
- `elbo_estimates::AbstractVector{<:Pathfinder.ELBOEstimate}`: ELBO estimates for all but
the first distribution in `fit_distributions`.
- `num_bfgs_updates_rejected::Int`: Number of times a BFGS update to the reconstructed
inverse Hessian was rejected to keep the inverse Hessian positive definite.
# Returns
- [`PathfinderResult`](@ref)
Expand All @@ -49,6 +51,7 @@ struct PathfinderResult{I,O,R,OP,LP,FD,D,FDT,DT,OS,OT,EE}
optim_trace::OT
fit_distributions::Vector{FD}
elbo_estimates::EE
num_bfgs_updates_rejected::Int
end

function Base.show(io::IO, ::MIME"text/plain", result::PathfinderResult)
Expand Down Expand Up @@ -197,6 +200,7 @@ function pathfinder(
fit_distributions,
fit_iteration,
elbo_estimates,
num_bfgs_updates_rejected,
) = path_result

if !success
Expand All @@ -207,6 +211,11 @@ function pathfinder(
ndraws_elbo_actual = ndraws_elbo
end

if num_bfgs_updates_rejected > 0
perc = round(num_bfgs_updates_rejected * (100//length(fit_distributions)); digits=1)
@warn "$num_bfgs_updates_rejected ($(perc)%) updates to the inverse Hessian estimate were rejected to keep it positive definite."
end

fit_distribution = fit_distributions[fit_iteration + 1]

# reuse existing draws; draw additional ones if necessary
Expand Down Expand Up @@ -238,6 +247,7 @@ function pathfinder(
optim_trace,
fit_distributions,
elbo_estimates,
num_bfgs_updates_rejected,
)
end

Expand Down Expand Up @@ -283,7 +293,7 @@ function _pathfinder(
success = L > 0

# fit mv-normal distributions to trajectory
fit_distributions = fit_mvnormals(
fit_distributions, num_bfgs_updates_rejected = fit_mvnormals(
optim_trace.points, optim_trace.gradients; history_length
)

Expand All @@ -305,6 +315,7 @@ function _pathfinder(
fit_distributions,
fit_iteration,
elbo_estimates,
num_bfgs_updates_rejected,
)
end

Expand Down
2 changes: 1 addition & 1 deletion test/integration/AdvancedHMC/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,6 @@ Distributions = "0.25"
ForwardDiff = "0.10"
MCMCDiagnosticTools = "0.1.3, 0.2"
Optim = "1.4"
Pathfinder = "0.5"
Pathfinder = "0.5, 0.6"
StatsFuns = "0.9, 1"
julia = "1.6"
2 changes: 1 addition & 1 deletion test/integration/DynamicHMC/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ LogDensityProblems = "1, 2"
LogDensityProblemsAD = "1"
MCMCDiagnosticTools = "0.1.3, 0.2"
Optim = "1.4"
Pathfinder = "0.5"
Pathfinder = "0.5, 0.6"
StatsFuns = "0.9, 1"
TransformVariables = "0.6, 0.7"
TransformedLogDensities = "1"
Expand Down
2 changes: 1 addition & 1 deletion test/integration/Turing/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,6 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Turing = "fce5fe82-541a-59a6-adf8-730c64b5f9a0"

[compat]
Pathfinder = "0.5"
Pathfinder = "0.5, 0.6"
Turing = "0.21, 0.22, 0.23"
julia = "1.6"
4 changes: 3 additions & 1 deletion test/inverse_hessian.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
using AbstractDifferentiation
using ForwardDiff
using LinearAlgebra
using Optim
Expand Down Expand Up @@ -60,7 +61,7 @@ end
sol, optim_trace = Pathfinder.optimize_with_trace(prob, optimizer)

# run lbfgs_inverse_hessians with the same initialization as Optim.LBFGS
Hs = Pathfinder.lbfgs_inverse_hessians(
Hs, num_bfgs_updates_rejected = Pathfinder.lbfgs_inverse_hessians(
optim_trace.points,
optim_trace.gradients;
history_length,
Expand All @@ -71,5 +72,6 @@ end
# check that next direction computed from Hessian is the same as the actual
# direction that was taken
@test all((1), dot.(ps, ss) ./ norm.(ss) ./ norm.(ps))
@test num_bfgs_updates_rejected == 0
end
end
5 changes: 3 additions & 2 deletions test/mvnormal.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,14 @@ include("test_utils.jl")
optimizer = Optim.LBFGS()
history_length = optimizer.m
_, optim_trace = Pathfinder.optimize_with_trace(prob, optimizer)
Σs = Pathfinder.lbfgs_inverse_hessians(
Σs, num_bfgs_updates_rejected1 = Pathfinder.lbfgs_inverse_hessians(
optim_trace.points, optim_trace.gradients; history_length
)
dists = @inferred Pathfinder.fit_mvnormals(
dists, num_bfgs_updates_rejected2 = @inferred Pathfinder.fit_mvnormals(
optim_trace.points, optim_trace.gradients; history_length
)
@test dists isa Vector{<:MvNormal{Float64,<:Pathfinder.WoodburyPDMat}}
@test num_bfgs_updates_rejected2 == num_bfgs_updates_rejected1
@test Σs getproperty.(dists, )
@test optim_trace.points .+ Σs .* optim_trace.gradients getproperty.(dists, )
end
Expand Down

2 comments on commit 6b18635

@sethaxen
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/75206

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.6.0 -m "<description of version>" 6b186350c01a1b2baa87672b19012419f6ecc7b3
git push origin v0.6.0

Please sign in to comment.