-
Notifications
You must be signed in to change notification settings - Fork 6
/
singlepath.jl
353 lines (325 loc) · 13.1 KB
/
singlepath.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
"""
PathfinderResult
Container for results of single-path Pathfinder.
# Fields
- `input`: User-provided input object, e.g. a LogDensityProblem, `optim_fun`, `optim_prob`,
or another object.
- `optimizer`: Optimizer used for maximizing the log-density
- `rng`: Pseudorandom number generator that was used for sampling
- `optim_prob::SciMLBase.OptimizationProblem`: Otimization problem used for
optimization
- `logp`: Log-density function
- `fit_distribution::Distributions.MvNormal`: ELBO-maximizing multivariate normal
distribution
- `draws::AbstractMatrix{<:Real}`: draws from multivariate normal with size `(dim, ndraws)`
- `fit_distribution_transformed`: `fit_distribution` transformed to the same space as the
user-supplied target distribution. This is only different from `fit_distribution` when
integrating with other packages, and its type depends on the type of `input`.
- `draws_transformed`: `draws` transformed to be draws from `fit_distribution_transformed`.
- `fit_iteration::Int`: Iteration at which ELBO estimate was maximized
- `num_tries::Int`: Number of tries until Pathfinder succeeded
- `optim_solution::SciMLBase.OptimizationSolution`: Solution object of optimization.
- `optim_trace::Pathfinder.OptimizationTrace`: container for optimization trace of points,
log-density, and gradient. The first point is the initial point.
- `fit_distributions::AbstractVector{Distributions.MvNormal}`: Multivariate normal
distributions for each point in `optim_trace`, where
`fit_distributions[fit_iteration + 1] == fit_distribution`
- `elbo_estimates::AbstractVector{<:Pathfinder.ELBOEstimate}`: ELBO estimates for all but
the first distribution in `fit_distributions`.
- `num_bfgs_updates_rejected::Int`: Number of times a BFGS update to the reconstructed
inverse Hessian was rejected to keep the inverse Hessian positive definite.
# Returns
- [`PathfinderResult`](@ref)
"""
struct PathfinderResult{I,O,R,OP,LP,FD,D,FDT,DT,OS,OT,EE}
input::I
optimizer::O
rng::R
optim_prob::OP
logp::LP
fit_distribution::FD
draws::D
fit_distribution_transformed::FDT
draws_transformed::DT
fit_iteration::Int
num_tries::Int
optim_solution::OS
optim_trace::OT
fit_distributions::Vector{FD}
elbo_estimates::EE
num_bfgs_updates_rejected::Int
end
function Base.show(io::IO, ::MIME"text/plain", result::PathfinderResult)
println(io, "Single-path Pathfinder result")
println(io, " tries: $(result.num_tries)")
println(io, " draws: $(size(result.draws, 2))")
println(
io,
" fit iteration: $(result.fit_iteration) (total: $(length(result.optim_trace) - 1))",
)
println(io, " fit ELBO: $(_to_string(result.elbo_estimates[result.fit_iteration]))")
print(io, " fit distribution: ", result.fit_distribution)
return nothing
end
"""
pathfinder(ℓ; kwargs...)
pathfinder(fun::SciMLBase::OptimizationFunction; kwargs...)
pathfinder(prob::SciMLBase::OptimizationProblem; kwargs...)
Find the best multivariate normal approximation encountered while maximizing a log density.
From an optimization trajectory, Pathfinder constructs a sequence of (multivariate normal)
approximations to the distribution specified by a log density function. The approximation
that maximizes the evidence lower bound (ELBO), or equivalently, minimizes the KL divergence
between the approximation and the true distribution, is returned.
The covariance of the multivariate normal distribution is an inverse Hessian approximation
constructed using at most the previous `history_length` steps.
# Arguments
- `ℓ`: an object, representing the log-density of the target distribution and its gradient,
that implements the [LogDensityProblems](https://www.tamaspapp.eu/LogDensityProblems.jl)
interface.
- `fun::SciMLBase.OptimizationFunction`: an optimization function that represents the
negative log density with its gradient. It must have the necessary features (e.g. a
Hessian function, if applicable) for the chosen optimization algorithm. For details, see
[Optimization.jl: OptimizationFunction](https://optimization.sciml.ai/stable/API/optimization_function/).
- `prob::SciMLBase.OptimizationProblem`: an optimization problem containing a function with
the same properties as `fun`, as well as an initial point, in which case `init` and
`dim` are ignored.
# Keywords
- `dim::Int`: dimension of the target distribution, needed only if `fun` is provided and
`init` is not.
- `init::AbstractVector{<:Real}`: initial point of length `dim` from which to begin
optimization. If not provided, an initial point of type `Vector{Float64}` and length
`dim` is created and filled using `init_sampler`.
- `init_scale::Real`: scale factor ``s`` such that the default `init_sampler` samples
entries uniformly in the range ``[-s, s]``
- `init_sampler`: function with the signature `(rng, x) -> x` that modifies a vector of
length `dims` in-place to generate an initial point
- `ndraws_elbo::Int=$DEFAULT_NDRAWS_ELBO`: Number of draws used to estimate the ELBO
- `ndraws::Int=ndraws_elbo`: number of approximate draws to return
- `rng::Random.AbstractRNG`: The random number generator to be used for drawing samples
- `executor::Transducers.Executor=Transducers.SequentialEx()`: Transducers.jl executor that
determines if and how to perform ELBO computation in parallel. The default
(`SequentialEx()`) performs no parallelization. If `rng` is known to be thread-safe, and
the log-density function is known to have no internal state, then
`Transducers.PreferParallel()` may be used to parallelize log-density evaluation.
This is generally only faster for expensive log density functions.
- `history_length::Int=$DEFAULT_HISTORY_LENGTH`: Size of the history used to approximate the
inverse Hessian.
- `optimizer`: Optimizer to be used for constructing trajectory. Can be any optimizer
compatible with Optimization.jl, so long as it supports callbacks. Defaults to
`Optim.LBFGS(; m=history_length, linesearch=LineSearches.MoreThuente())`. See
the [Optimization.jl documentation](https://optimization.sciml.ai/stable) for details.
- `ntries::Int=1_000`: Number of times to try the optimization, restarting if it fails. Before
every restart, a new initial point is drawn using `init_sampler`.
- `fail_on_nonfinite::Bool=true`: If `true`, optimization fails if the log-density is a
`NaN` or `Inf` or if the gradient is ever non-finite. If `nretries > 0`, then
optimization will be retried after reinitialization.
- `kwargs...` : Remaining keywords are forwarded to
[`Optimization.solve`](https://optimization.sciml.ai/stable/API/solve).
# Returns
- [`PathfinderResult`](@ref)
"""
function pathfinder end
function pathfinder(ℓ; input=ℓ, kwargs...)
_check_log_density_problem(ℓ)
dim = LogDensityProblems.dimension(ℓ)
optim_fun = build_optim_function(ℓ)
return pathfinder(optim_fun; input, dim, kwargs...)
end
function pathfinder(
optim_fun::SciMLBase.OptimizationFunction;
rng=Random.GLOBAL_RNG,
init=nothing,
dim::Int=-1,
init_scale=2,
init_sampler=UniformSampler(init_scale),
input=optim_fun,
kwargs...,
)
if init !== nothing
_init = init
allow_mutating_init = false
elseif init === nothing && dim > 0
_init = Vector{Float64}(undef, dim)
init_sampler(rng, _init)
allow_mutating_init = true
else
throw(ArgumentError("An initial point `init` or dimension `dim` must be provided."))
end
prob = build_optim_problem(optim_fun, _init)
return pathfinder(prob; rng, input, init_sampler, allow_mutating_init, kwargs...)
end
function pathfinder(
prob::SciMLBase.OptimizationProblem;
rng::Random.AbstractRNG=Random.GLOBAL_RNG,
history_length::Int=DEFAULT_HISTORY_LENGTH,
optimizer=default_optimizer(history_length),
ndraws_elbo::Int=DEFAULT_NDRAWS_ELBO,
ndraws::Int=ndraws_elbo,
input=prob,
kwargs...,
)
if prob.f.grad === nothing || prob.f.grad isa Bool
throw(ArgumentError("optimization function must define a gradient function."))
end
logp(x) = -prob.f.f(x, nothing)
path_result = ProgressLogging.progress(; name="Optimizing") do progress_id
return _pathfinder_try_until_succeed(
rng,
prob,
logp;
history_length,
optimizer,
progress_id,
ndraws_elbo,
kwargs...,
)
end
@unpack (
itry,
success,
optim_prob,
optim_solution,
optim_trace,
fit_distributions,
fit_iteration,
elbo_estimates,
num_bfgs_updates_rejected,
) = path_result
if !success
ndraws_elbo_actual = 0
@warn "Pathfinder failed after $itry tries. Increase `ntries`, inspect the model for numerical instability, or provide a more suitable `init_sampler`."
else
elbo_estimate_opt = elbo_estimates[fit_iteration]
ndraws_elbo_actual = ndraws_elbo
end
if num_bfgs_updates_rejected > 0
perc = round(num_bfgs_updates_rejected * (100//length(fit_distributions)); digits=1)
@warn "$num_bfgs_updates_rejected ($(perc)%) updates to the inverse Hessian estimate were rejected to keep it positive definite."
end
fit_distribution = fit_distributions[fit_iteration + 1]
# reuse existing draws; draw additional ones if necessary
draws = if ndraws_elbo_actual == 0
rand(rng, fit_distribution, ndraws)
elseif ndraws_elbo_actual < ndraws
hcat(elbo_estimate_opt.draws, rand(rng, fit_distribution, ndraws - ndraws_elbo_actual))
else
elbo_estimate_opt.draws[:, 1:ndraws]
end
# placeholders
fit_distribution_transformed = fit_distribution
draws_transformed = draws
return PathfinderResult(
input,
optimizer,
rng,
optim_prob,
logp,
fit_distribution,
draws,
fit_distribution_transformed,
draws_transformed,
fit_iteration,
itry,
optim_solution,
optim_trace,
fit_distributions,
elbo_estimates,
num_bfgs_updates_rejected,
)
end
function _pathfinder_try_until_succeed(
rng,
prob,
logp;
ntries::Int=1_000,
init_scale=2,
init_sampler=UniformSampler(init_scale),
allow_mutating_init::Bool=false,
kwargs...,
)
itry = 1
progress_name = "Optimizing (try 1)"
result = _pathfinder(rng, prob, logp; progress_name, kwargs...)
_prob = prob
while !result.success && itry < ntries
if itry == 1 && !allow_mutating_init
_prob = deepcopy(prob)
end
itry += 1
init_sampler(rng, _prob.u0)
progress_name = "Optimizing (try $itry)"
result = _pathfinder(rng, _prob, logp; progress_name, kwargs...)
end
return (; itry, optim_prob=_prob, result...)
end
function _pathfinder(
rng,
prob,
logp;
history_length::Int=DEFAULT_HISTORY_LENGTH,
optimizer=default_optimizer(history_length),
ndraws_elbo=DEFAULT_NDRAWS_ELBO,
executor::Transducers.Executor=Transducers.SequentialEx(),
kwargs...,
)
# compute trajectory
optim_solution, optim_trace = optimize_with_trace(prob, optimizer; kwargs...)
L = length(optim_trace) - 1
success = L > 0
# fit mv-normal distributions to trajectory
fit_distributions, num_bfgs_updates_rejected = fit_mvnormals(
optim_trace.points, optim_trace.gradients; history_length
)
# find ELBO-maximizing distribution
fit_iteration, elbo_estimates = @views maximize_elbo(
rng, logp, fit_distributions[(begin + 1):end], ndraws_elbo, executor
)
if isempty(elbo_estimates)
success = false
else
elbo = elbo_estimates[fit_iteration].value
success &= !isnan(elbo) & (elbo != -Inf)
end
return (;
success,
optim_solution,
optim_trace,
fit_distributions,
fit_iteration,
elbo_estimates,
num_bfgs_updates_rejected,
)
end
"""
UniformSampler(scale::Real)
Sampler that in-place modifies an array to be IID uniformly distributed on `[-scale, scale]`
"""
struct UniformSampler{T<:Real}
scale::T
function UniformSampler(scale::T) where {T<:Real}
scale > 0 || throw(DomainError(scale, "scale of uniform sampler must be positive."))
return new{T}(scale)
end
end
function (s::UniformSampler)(rng::Random.AbstractRNG, point)
scale = s.scale
@. point = rand(rng) * 2scale - scale
return point
end
function _check_log_density_problem(ℓ)
capabilities = LogDensityProblems.capabilities(ℓ)
if capabilities === nothing
throw(
ArgumentError(
"Provided object must implement the LogDensityProblems interface. See https://www.tamaspapp.eu/LogDensityProblems.jl.",
),
)
elseif capabilities === LogDensityProblems.LogDensityOrder{0}()
throw(
ArgumentError(
"The log density problem must at least support gradient computation. To use automatic differentiation, see https://github.com/tpapp/LogDensityProblemsAD.jl.",
),
)
end
return nothing
end