Skip to content

Commit

Permalink
Support all GalacticOptim-compatible optimizers (#25)
Browse files Browse the repository at this point in the history
* Add GalacticOptim as dependency

* Use GalacticOptim API

* Add comment about recomputing gradient

* Add ForwardDiff as dependency

* Refactor single-path pathfinder

* Rearrange imports

* Test optimization functions

* Update tests

* Increment version number

* Document compatible optimizers

* Copy positions just in case

* Test NLopt works

* Rename files

* Clarify where remaining kwargs are forwarded

* Increment minimum supported version to LTS

* Test minimum supported version

* Standardize use of params

* Require grad function is specified

* Add AbstractDifferentiation as dependency

* Use AD backend to create optimization function

* Improve optimization tests

* Test AD backend functionality

* Include AD

* Use more compact keyword syntax

* Provide nothing for parameters

* Rearrange code

* Support passing OptimizationFunction to multi-path pathfinder

* Docstring formatting

* Improve documentation

* Test ad_backend

* Mark version number as DEV
  • Loading branch information
sethaxen authored Feb 5, 2022
1 parent 7e6da49 commit a7ec9d3
Show file tree
Hide file tree
Showing 15 changed files with 344 additions and 93 deletions.
1 change: 1 addition & 0 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ jobs:
fail-fast: false
matrix:
version:
- '1.6'
- '1'
os:
- ubuntu-latest
Expand Down
15 changes: 11 additions & 4 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
name = "Pathfinder"
uuid = "b1d3bc72-d0e7-4279-b92f-7fa5d6d2d454"
authors = ["Seth Axen <[email protected]> and contributors"]
version = "0.2.3"
version = "0.3.0-DEV"

[deps]
AbstractDifferentiation = "c29ec348-61ec-40c8-8164-b8c60e9d9f3d"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
GalacticOptim = "a75be94c-b780-496d-a8a9-0878b188d577"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Optim = "429524aa-4258-5aef-a3af-852621145aeb"
PDMats = "90014a1f-27ba-587c-ab20-58faa44d9150"
Expand All @@ -15,17 +18,21 @@ StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c"

[compat]
AbstractDifferentiation = "0.4"
Distributions = "0.25"
ForwardDiff = "0.10"
GalacticOptim = "2"
Optim = "1.4"
PDMats = "0.11"
PSIS = "0.2"
StatsBase = "0.33"
StatsFuns = "0.9"
julia = "1"
julia = "1.6"

[extras]
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
NLopt = "76087f3c-5699-56af-9a33-bf431cd00edd"
ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["ForwardDiff", "Test"]
test = ["NLopt", "ReverseDiff", "Test"]
6 changes: 5 additions & 1 deletion src/Pathfinder.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
module Pathfinder

using AbstractDifferentiation: AD
using Distributions: Distributions
# ensure that ForwardDiff is conditionally loaded by GalacticOptim
using ForwardDiff: ForwardDiff
using LinearAlgebra
using GalacticOptim: GalacticOptim
using Optim: Optim, LineSearches
using PDMats: PDMats
using PSIS: PSIS
Expand All @@ -21,7 +25,7 @@ const DEFAULT_OPTIMIZER = Optim.LBFGS(;
)

include("woodbury.jl")
include("maximize.jl")
include("optimize.jl")
include("inverse_hessian.jl")
include("mvnormal.jl")
include("elbo.jl")
Expand Down
21 changes: 0 additions & 21 deletions src/maximize.jl

This file was deleted.

52 changes: 43 additions & 9 deletions src/multipath.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
multipathfinder(
logp,
∇logp,
[∇logp,]
θ₀s::AbstractVector{AbstractVector{<:Real}},
ndraws::Int;
kwargs...
Expand All @@ -27,41 +27,75 @@ resulting draws better approximate draws from the target distribution ``p`` inst
# Arguments
- `logp`: a callable that computes the log-density of the target distribution.
- `∇logp`: a callable that computes the gradient of `logp`.
- `θ₀s`: vector of length `nruns` of initial points of length `dim` from which each
single-path Pathfinder run will begin
- `ndraws`: number of approximate draws to return
- `∇logp`: a callable that computes the gradient of `logp`. If not provided, `logp` is
automatically differentiated using the backend specified in `ad_backend`.
- `θ₀s::AbstractVector{AbstractVector{<:Real}}`: vector of length `nruns` of initial points
of length `dim` from which each single-path Pathfinder run will begin
- `ndraws::Int`: number of approximate draws to return
# Keywords
- `ad_backend=AD.ForwardDiffBackend()`: AbstractDifferentiation.jl AD backend.
- `ndraws_per_run::Int=5`: The number of draws to take for each component before resampling.
- `importance::Bool=true`: Perform Pareto smoothed importance resampling of draws.
- `kwargs...` : Remaining keywords are forwarded to [`pathfinder`](@ref).
# Returns
- `q::Distributions.MixtureModel`: Uniformly weighted mixture of ELBO-maximizing
multivariate normal distributions
- `ϕ::AbstractMatrix{<:Real}`: approximate draws from target distribution with size
`(dim, ndraws)`
- `component_inds::Vector{Int}`: Indices ``k`` of components in ``q`` from which each column
in `ϕ` was drawn.
in `ϕ` was drawn.
"""
function multipathfinder(logp, θ₀s, ndraws; ad_backend=AD.ForwardDiffBackend(), kwargs...)
optim_fun = build_optim_function(logp; ad_backend)
return multipathfinder(optim_fun, θ₀s, ndraws; kwargs...)
end
function multipathfinder(
logp,
∇logp,
logp, ∇logp, θ₀s, ndraws; ad_backend=AD.ForwardDiffBackend(), kwargs...
)
optim_fun = build_optim_function(logp, ∇logp; ad_backend)
return multipathfinder(optim_fun, θ₀s, ndraws; kwargs...)
end

"""
multipathfinder(
f::GalacticOptim.OptimizationFunction,
θ₀s::AbstractVector{<:Real},
ndraws::Int;
kwargs...,
)
Filter samples from a mixture of multivariate normal distributions fit using `pathfinder`.
`f` is a user-created optimization function that represents the negative log density with
its gradient and must have the necessary features (e.g. a Hessian function or specified
automatic differentiation type) for the chosen optimization algorithm. For details, see
[GalacticOptim.jl: OptimizationFunction](https://galacticoptim.sciml.ai/stable/API/optimization_function/).
See [`multipathfinder`](@ref) for a description of remaining arguments.
"""
function multipathfinder(
optim_fun::GalacticOptim.OptimizationFunction,
θ₀s,
ndraws;
ndraws_per_run::Int=5,
rng::Random.AbstractRNG=Random.default_rng(),
importance::Bool=true,
kwargs...,
)
if optim_fun.grad === nothing || optim_fun.grad isa Bool
throw(ArgumentError("optimization function must define a gradient function."))
end
if ndraws > ndraws_per_run * length(θ₀s)
@warn "More draws requested than total number of draws across replicas. Draws will not be unique."
end
logp(x) = -optim_fun.f(x, nothing)

# run pathfinder independently from each starting point
# TODO: allow to be parallelized
res = map(θ₀s) do θ₀
return pathfinder(logp, ∇logp, θ₀, ndraws_per_run; rng=rng, kwargs...)
return pathfinder(optim_fun, θ₀, ndraws_per_run; rng, kwargs...)
end
qs = reduce(vcat, first.(res))
ϕs = reduce(hcat, getindex.(res, 2))
Expand Down
61 changes: 61 additions & 0 deletions src/optimize.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
function build_optim_function(f; ad_backend=AD.ForwardDiffBackend())
∇f(x) = only(AD.gradient(ad_backend, f, x))
return build_optim_function(f, ∇f; ad_backend)
end
function build_optim_function(f, ∇f; ad_backend=AD.ForwardDiffBackend())
# because we need explicit access to grad, we generate these ourselves instead of using
# GalacticOptim's auto-AD feature.
# TODO: switch to caching API if available, see
# https://github.com/JuliaDiff/AbstractDifferentiation.jl/issues/41
function grad(res, x, p...)
∇fx = ∇f(x)
@. res = -∇fx
return res
end
function hess(res, x, p...)
H = only(AD.hessian(ad_backend, f, x))
@. res = -H
return res
end
function hv(res, x, v, p...)
Hv = only(AD.lazy_hessian(ad_backend, f, x) * v)
@. res = -Hv
return res
end
return GalacticOptim.OptimizationFunction((x, p...) -> -f(x); grad, hess, hv)
end

function build_optim_problem(optim_fun, x₀; kwargs...)
return GalacticOptim.OptimizationProblem(optim_fun, x₀, nothing; kwargs...)
end

function optimize_with_trace(prob, optimizer)
u0 = prob.u0
fun = prob.f
grad! = fun.grad
function ∇f(x)
∇fx = similar(x)
grad!(∇fx, x, nothing)
rmul!(∇fx, -1)
return ∇fx
end
# caches for the trace of x, f(x), and ∇f(x)
xs = typeof(u0)[]
fxs = typeof(fun.f(u0, nothing))[]
∇fxs = typeof(similar(u0))[]
function callback(x, nfx, args...)
# NOTE: GalacticOptim doesn't have an interface for accessing the gradient trace,
# so we need to recompute it ourselves
# see https://github.com/SciML/GalacticOptim.jl/issues/149
∇fx = ∇f(x)
# terminate if optimization encounters NaNs
(isnan(nfx) || any(isnan, x) || any(isnan, ∇fx)) && return true
# some backends mutate x, so we must copy it
push!(xs, copy(x))
push!(fxs, -nfx)
push!(∇fxs, ∇fx)
return false
end
GalacticOptim.solve(prob, optimizer; cb=callback)
return xs, fxs, ∇fxs
end
91 changes: 77 additions & 14 deletions src/singlepath.jl
Original file line number Diff line number Diff line change
@@ -1,49 +1,112 @@
"""
pathfinder(logp, ∇logp, θ₀::AbstractVector{<:Real}, ndraws::Int; kwargs...)
pathfinder(logp[, ∇logp], θ₀::AbstractVector{<:Real}, ndraws::Int; kwargs...)
Find the best multivariate normal approximation encountered while optimizing `logp`.
Find the best multivariate normal approximation encountered while maximizing `logp`.
The multivariate normal approximation returned is the one that maximizes the evidence lower
bound (ELBO), or equivalently, minimizes the KL divergence between
From an optimization trajectory, Pathfinder constructs a sequence of (multivariate normal)
approximations to the distribution specified by `logp`. The approximation that maximizes the
evidence lower bound (ELBO), or equivalently, minimizes the KL divergence between the
approximation and the true distribution, is returned.
The covariance of the multivariate normal distribution is an inverse Hessian approximation
constructed using at most the previous `history_length` steps.
# Arguments
- `logp`: a callable that computes the log-density of the target distribution.
- `∇logp`: a callable that computes the gradient of `logp`.
- `∇logp`: a callable that computes the gradient of `logp`. If not provided, `logp` is
automatically differentiated using the backend specified in `ad_backend`.
- `θ₀`: initial point of length `dim` from which to begin optimization
- `ndraws`: number of approximate draws to return
# Keywords
- `ad_backend=AD.ForwardDiffBackend()`: AbstractDifferentiation.jl AD backend.
- `rng::Random.AbstractRNG`: The random number generator to be used for drawing samples
- `optimizer::Optim.AbstractOptimizer`: Optimizer to be used for constructing trajectory.
Defaults to `Optim.LBFGS(; m=$DEFAULT_HISTORY_LENGTH, linesearch=$DEFAULT_LINE_SEARCH)`.
- `optimizer`: Optimizer to be used for constructing trajectory. Can be any optimizer
compatible with GalacticOptim, so long as it supports callbacks. Defaults to
`Optim.LBFGS(; m=$DEFAULT_HISTORY_LENGTH, linesearch=LineSearches.MoreThuente())`. See
the [GalacticOptim.jl documentation](https://galacticoptim.sciml.ai/stable) for details.
- `history_length::Int=$DEFAULT_HISTORY_LENGTH`: Size of the history used to approximate the
inverse Hessian. This should only be set when `optimizer` is not an `Optim.LBFGS`.
inverse Hessian. This should only be set when `optimizer` is not an `Optim.LBFGS`.
- `ndraws_elbo::Int=5`: Number of draws used to estimate the ELBO
- `kwargs...` : Remaining keywords are forwarded to `Optim.Options`.
- `kwargs...` : Remaining keywords are forwarded to `GalacticOptim.OptimizationProblem`.
# Returns
- `q::Distributions.MvNormal`: ELBO-maximizing multivariate normal distribution
- `ϕ::AbstractMatrix{<:Real}`: draws from multivariate normal with size `(dim, ndraws)`
- `logqϕ::Vector{<:Real}`: log-density of multivariate normal at columns of `ϕ`
"""
function pathfinder(logp, θ₀, ndraws; ad_backend=AD.ForwardDiffBackend(), kwargs...)
optim_fun = build_optim_function(logp; ad_backend)
return pathfinder(optim_fun, θ₀, ndraws; kwargs...)
end
function pathfinder(logp, ∇logp, θ₀, ndraws; ad_backend=AD.ForwardDiffBackend(), kwargs...)
optim_fun = build_optim_function(logp, ∇logp; ad_backend)
return pathfinder(optim_fun, θ₀, ndraws; kwargs...)
end

"""
pathfinder(
f::GalacticOptim.OptimizationFunction,
θ₀::AbstractVector{<:Real},
ndraws::Int;
kwargs...,
)
Find the best multivariate normal approximation encountered while minimizing `f`.
`f` is a user-created optimization function that represents the negative log density with
its gradient and must have the necessary features (e.g. a Hessian function or specified
automatic differentiation type) for the chosen optimization algorithm. For details, see
[GalacticOptim.jl: OptimizationFunction](https://galacticoptim.sciml.ai/stable/API/optimization_function/).
See [`pathfinder`](@ref) for a description of remaining arguments.
"""
function pathfinder(
logp,
∇logp,
optim_fun::GalacticOptim.OptimizationFunction,
θ₀,
ndraws;
rng::Random.AbstractRNG=Random.default_rng(),
optimizer::Optim.AbstractOptimizer=DEFAULT_OPTIMIZER,
optimizer=DEFAULT_OPTIMIZER,
history_length::Int=optimizer isa Optim.LBFGS ? optimizer.m : DEFAULT_HISTORY_LENGTH,
ndraws_elbo::Int=5,
kwargs...,
)
optim_prob = build_optim_problem(optim_fun, θ₀; kwargs...)
return pathfinder(optim_prob, ndraws; rng, optimizer, history_length, ndraws_elbo)
end

"""
pathfinder(prob::GalacticOptim.OptimizationProblem, ndraws::Int; kwargs...)
Find the best multivariate normal approximation encountered while solving `prob`.
`prob` is a user-created optimization problem that represents the negative log density with
its gradient, an initial position and must have the necessary features (e.g. a Hessian
function or specified automatic differentiation type) for the chosen optimization algorithm.
For details, see
[GalacticOptim.jl: Defining OptimizationProblems](https://galacticoptim.sciml.ai/stable/API/optimization_problem/).
See [`pathfinder`](@ref) for a description of remaining arguments.
"""
function pathfinder(
optim_prob::GalacticOptim.OptimizationProblem,
ndraws;
rng::Random.AbstractRNG=Random.default_rng(),
optimizer=DEFAULT_OPTIMIZER,
history_length::Int=optimizer isa Optim.LBFGS ? optimizer.m : DEFAULT_HISTORY_LENGTH,
ndraws_elbo::Int=5,
)
if optim_prob.f.grad === nothing || optim_prob.f.grad isa Bool
throw(ArgumentError("optimization function must define a gradient function."))
end
logp(x) = -optim_prob.f.f(x, nothing)
# compute trajectory
θs, logpθs, ∇logpθs = maximize_with_trace(logp, ∇logp, θ₀, optimizer; kwargs...)
θs, logpθs, ∇logpθs = optimize_with_trace(optim_prob, optimizer)
L = length(θs) - 1
@assert L + 1 == length(logpθs) == length(∇logpθs)

# fit mv-normal distributions to trajectory
qs = fit_mvnormals(θs, ∇logpθs; history_length=history_length)
qs = fit_mvnormals(θs, ∇logpθs; history_length)

# find ELBO-maximizing distribution
lopt, elbo, ϕ, logqϕ = maximize_elbo(rng, logp, qs[2:end], ndraws_elbo)
Expand Down
8 changes: 5 additions & 3 deletions test/inverse_hessian.jl
Original file line number Diff line number Diff line change
Expand Up @@ -48,18 +48,20 @@ end
n = 10
history_length = 5
logp(x) = logp_banana(x)
∇logp(x) = ForwardDiff.gradient(logp, x)
nocedal_wright_scaling(α, s, y) = fill!(similar(α), dot(y, s) / sum(abs2, y))
θ₀ = 10 * randn(n)

ad_backend = AD.ForwardDiffBackend()
fun = Pathfinder.build_optim_function(logp; ad_backend)
prob = Pathfinder.build_optim_problem(fun, θ₀)
optimizer = Optim.LBFGS(;
m=history_length, linesearch=Optim.LineSearches.MoreThuente()
)
θs, logpθs, ∇logpθs = Pathfinder.maximize_with_trace(logp, ∇logp, θ₀, optimizer)
θs, logpθs, ∇logpθs = Pathfinder.optimize_with_trace(prob, optimizer)

# run lbfgs_inverse_hessians with the same initialization as Optim.LBFGS
Hs = Pathfinder.lbfgs_inverse_hessians(
θs, ∇logpθs; history_length=history_length, Hinit=nocedal_wright_scaling
θs, ∇logpθs; history_length, Hinit=nocedal_wright_scaling
)
ss = diff(θs)
ps = (Hs .* ∇logpθs)[1:(end - 1)]
Expand Down
Loading

0 comments on commit a7ec9d3

Please sign in to comment.