-
Notifications
You must be signed in to change notification settings - Fork 219
/
Optimisation.jl
506 lines (432 loc) · 17.2 KB
/
Optimisation.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
module Optimisation
using ..Turing
using NamedArrays: NamedArrays
using DynamicPPL: DynamicPPL
using LogDensityProblems: LogDensityProblems
using LogDensityProblemsAD: LogDensityProblemsAD
using Optimization: Optimization
using OptimizationOptimJL: OptimizationOptimJL
using Random: Random
using SciMLBase: SciMLBase
using ADTypes: ADTypes
using StatsBase: StatsBase
using Accessors: Accessors
using Printf: Printf
using ForwardDiff: ForwardDiff
using StatsAPI: StatsAPI
using Statistics: Statistics
export maximum_a_posteriori, maximum_likelihood
# The MAP and MLE exports are only needed for the Optim.jl interface.
export MAP, MLE
"""
ModeEstimator
An abstract type to mark whether mode estimation is to be done with maximum a posteriori
(MAP) or maximum likelihood estimation (MLE).
"""
abstract type ModeEstimator end
struct MLE <: ModeEstimator end
struct MAP <: ModeEstimator end
"""
OptimizationContext{C<:AbstractContext} <: AbstractContext
The `OptimizationContext` transforms variables to their constrained space, but
does not use the density with respect to the transformation. This context is
intended to allow an optimizer to sample in R^n freely.
"""
struct OptimizationContext{C<:DynamicPPL.AbstractContext} <: DynamicPPL.AbstractContext
context::C
function OptimizationContext{C}(context::C) where {C<:DynamicPPL.AbstractContext}
if !(
context isa Union{
DynamicPPL.DefaultContext,
DynamicPPL.LikelihoodContext,
DynamicPPL.PriorContext,
}
)
msg = """
`OptimizationContext` supports only leaf contexts of type
`DynamicPPL.DefaultContext`, `DynamicPPL.LikelihoodContext`,
and `DynamicPPL.PriorContext` (given: `$(typeof(context)))`
"""
throw(ArgumentError(msg))
end
return new{C}(context)
end
end
OptimizationContext(ctx::DynamicPPL.AbstractContext) = OptimizationContext{typeof(ctx)}(ctx)
DynamicPPL.NodeTrait(::OptimizationContext) = DynamicPPL.IsLeaf()
function DynamicPPL.tilde_assume(ctx::OptimizationContext, dist, vn, vi)
r = vi[vn, dist]
lp = if ctx.context isa Union{DynamicPPL.DefaultContext,DynamicPPL.PriorContext}
# MAP
Distributions.logpdf(dist, r)
else
# MLE
0
end
return r, lp, vi
end
_loglikelihood(dist::Distribution, x) = StatsAPI.loglikelihood(dist, x)
function _loglikelihood(dists::AbstractArray{<:Distribution}, x)
return StatsAPI.loglikelihood(arraydist(dists), x)
end
function DynamicPPL.dot_tilde_assume(ctx::OptimizationContext, right, left, vns, vi)
# Values should be set and we're using `SampleFromPrior`, hence the `rng` argument
# shouldn't affect anything.
# TODO: Stop using `get_and_set_val!`.
r = DynamicPPL.get_and_set_val!(
Random.default_rng(), vi, vns, right, DynamicPPL.SampleFromPrior()
)
lp = if ctx.context isa Union{DynamicPPL.DefaultContext,DynamicPPL.PriorContext}
# MAP
_loglikelihood(right, r)
else
# MLE
0
end
return r, lp, vi
end
function DynamicPPL.tilde_observe(
ctx::OptimizationContext{<:DynamicPPL.PriorContext}, args...
)
return DynamicPPL.tilde_observe(ctx.context, args...)
end
function DynamicPPL.dot_tilde_observe(
ctx::OptimizationContext{<:DynamicPPL.PriorContext}, args...
)
return DynamicPPL.dot_tilde_observe(ctx.context, args...)
end
"""
OptimLogDensity{M<:DynamicPPL.Model,C<:Context,V<:DynamicPPL.VarInfo}
A struct that stores the negative log density function of a `DynamicPPL` model.
"""
const OptimLogDensity{M<:DynamicPPL.Model,C<:OptimizationContext,V<:DynamicPPL.VarInfo} = Turing.LogDensityFunction{
V,M,C
}
"""
OptimLogDensity(model::DynamicPPL.Model, context::OptimizationContext)
Create a callable `OptimLogDensity` struct that evaluates a model using the given `context`.
"""
function OptimLogDensity(model::DynamicPPL.Model, context::OptimizationContext)
init = DynamicPPL.VarInfo(model)
return Turing.LogDensityFunction(init, model, context)
end
"""
(f::OptimLogDensity)(z)
(f::OptimLogDensity)(z, _)
Evaluate the negative log joint or log likelihood at the array `z`. Which one is evaluated
depends on the context of `f`.
Any second argument is ignored. The two-argument method only exists to match interface the
required by Optimization.jl.
"""
function (f::OptimLogDensity)(z::AbstractVector)
varinfo = DynamicPPL.unflatten(f.varinfo, z)
return -DynamicPPL.getlogp(last(DynamicPPL.evaluate!!(f.model, varinfo, f.context)))
end
(f::OptimLogDensity)(z, _) = f(z)
# NOTE: This seems a bit weird IMO since this is the _negative_ log-likelihood.
LogDensityProblems.logdensity(f::OptimLogDensity, z::AbstractVector) = f(z)
# NOTE: The format of this function is dictated by Optim. The first argument sets whether to
# compute the function value, the second whether to compute the gradient (and stores the
# gradient). The last one is the actual argument of the objective function.
function (f::OptimLogDensity)(F, G, z)
if G !== nothing
# Calculate negative log joint and its gradient.
# TODO: Make OptimLogDensity already an LogDensityProblems.ADgradient? Allow to
# specify AD?
ℓ = LogDensityProblemsAD.ADgradient(f)
neglogp, ∇neglogp = LogDensityProblems.logdensity_and_gradient(ℓ, z)
# Save the gradient to the pre-allocated array.
copyto!(G, ∇neglogp)
# If F is something, the negative log joint is requested as well.
# We have already computed it as a by-product above and hence return it directly.
if F !== nothing
return neglogp
end
end
# Only negative log joint requested but no gradient.
if F !== nothing
return LogDensityProblems.logdensity(f, z)
end
return nothing
end
"""
ModeResult{
V<:NamedArrays.NamedArray,
M<:NamedArrays.NamedArray,
O<:Optim.MultivariateOptimizationResults,
S<:NamedArrays.NamedArray
}
A wrapper struct to store various results from a MAP or MLE estimation.
"""
struct ModeResult{V<:NamedArrays.NamedArray,O<:Any,M<:OptimLogDensity} <:
StatsBase.StatisticalModel
"A vector with the resulting point estimates."
values::V
"The stored optimiser results."
optim_result::O
"The final log likelihood or log joint, depending on whether `MAP` or `MLE` was run."
lp::Float64
"The evaluation function used to calculate the output."
f::M
end
function Base.show(io::IO, ::MIME"text/plain", m::ModeResult)
print(io, "ModeResult with maximized lp of ")
Printf.@printf(io, "%.2f", m.lp)
println(io)
return show(io, m.values)
end
function Base.show(io::IO, m::ModeResult)
return show(io, m.values.array)
end
# Various StatsBase methods for ModeResult
function StatsBase.coeftable(m::ModeResult; level::Real=0.95)
# Get columns for coeftable.
terms = string.(StatsBase.coefnames(m))
estimates = m.values.array[:, 1]
stderrors = StatsBase.stderror(m)
zscore = estimates ./ stderrors
p = map(z -> StatsAPI.pvalue(Distributions.Normal(), z; tail=:both), zscore)
# Confidence interval (CI)
q = Statistics.quantile(Distributions.Normal(), (1 + level) / 2)
ci_low = estimates .- q .* stderrors
ci_high = estimates .+ q .* stderrors
level_ = 100 * level
level_percentage = isinteger(level_) ? Int(level_) : level_
cols = [estimates, stderrors, zscore, p, ci_low, ci_high]
colnms = [
"Coef.",
"Std. Error",
"z",
"Pr(>|z|)",
"Lower $(level_percentage)%",
"Upper $(level_percentage)%",
]
return StatsBase.CoefTable(cols, colnms, terms)
end
function StatsBase.informationmatrix(
m::ModeResult; hessian_function=ForwardDiff.hessian, kwargs...
)
# Calculate Hessian and information matrix.
# Convert the values to their unconstrained states to make sure the
# Hessian is computed with respect to the untransformed parameters.
linked = DynamicPPL.istrans(m.f.varinfo)
if linked
m = Accessors.@set m.f.varinfo = DynamicPPL.invlink!!(m.f.varinfo, m.f.model)
end
# Calculate the Hessian, which is the information matrix because the negative of the log
# likelihood was optimized
varnames = StatsBase.coefnames(m)
info = hessian_function(m.f, m.values.array[:, 1])
# Link it back if we invlinked it.
if linked
m = Accessors.@set m.f.varinfo = DynamicPPL.link!!(m.f.varinfo, m.f.model)
end
return NamedArrays.NamedArray(info, (varnames, varnames))
end
StatsBase.coef(m::ModeResult) = m.values
StatsBase.coefnames(m::ModeResult) = names(m.values)[1]
StatsBase.params(m::ModeResult) = StatsBase.coefnames(m)
StatsBase.vcov(m::ModeResult) = inv(StatsBase.informationmatrix(m))
StatsBase.loglikelihood(m::ModeResult) = m.lp
"""
ModeResult(log_density::OptimLogDensity, solution::SciMLBase.OptimizationSolution)
Create a `ModeResult` for a given `log_density` objective and a `solution` given by `solve`.
`Optimization.solve` returns its own result type. This function converts that into the
richer format of `ModeResult`. It also takes care of transforming them back to the original
parameter space in case the optimization was done in a transformed space.
"""
function ModeResult(log_density::OptimLogDensity, solution::SciMLBase.OptimizationSolution)
varinfo_new = DynamicPPL.unflatten(log_density.varinfo, solution.u)
# `getparams` performs invlinking if needed
vns_vals_iter = Turing.Inference.getparams(log_density.model, varinfo_new)
syms = map(Symbol ∘ first, vns_vals_iter)
vals = map(last, vns_vals_iter)
return ModeResult(
NamedArrays.NamedArray(vals, syms), solution, -solution.objective, log_density
)
end
"""
ModeEstimationConstraints
A struct that holds constraints for mode estimation problems.
The fields are the same as possible constraints supported by the Optimization.jl:
`ub` and `lb` specify lower and upper bounds of box constraints. `cons` is a function that
takes the parameters of the model and returns a list of derived quantities, which are then
constrained by the lower and upper bounds set by `lcons` and `ucons`. We refer to these
as generic constraints. Please see the documentation of
[Optimization.jl](https://docs.sciml.ai/Optimization/stable/) for more details.
Any of the fields can be `nothing`, disabling the corresponding constraints.
"""
struct ModeEstimationConstraints{
Lb<:Union{Nothing,AbstractVector},
Ub<:Union{Nothing,AbstractVector},
Cons,
LCons<:Union{Nothing,AbstractVector},
UCons<:Union{Nothing,AbstractVector},
}
lb::Lb
ub::Ub
cons::Cons
lcons::LCons
ucons::UCons
end
has_box_constraints(c::ModeEstimationConstraints) = c.ub !== nothing || c.lb !== nothing
function has_generic_constraints(c::ModeEstimationConstraints)
return (c.cons !== nothing || c.lcons !== nothing || c.ucons !== nothing)
end
has_constraints(c) = has_box_constraints(c) || has_generic_constraints(c)
"""
generate_initial_params(model::DynamicPPL.Model, initial_params, constraints)
Generate an initial value for the optimization problem.
If `initial_params` is not `nothing`, a copy of it is returned. Otherwise initial parameter
values are generated either by sampling from the prior (if no constraints are present) or
uniformly from the box constraints. If generic constraints are set, an error is thrown.
"""
function generate_initial_params(model::DynamicPPL.Model, initial_params, constraints)
if initial_params === nothing && has_generic_constraints(constraints)
throw(
ArgumentError(
"You must provide an initial value when using generic constraints."
),
)
end
return if initial_params !== nothing
copy(initial_params)
elseif has_box_constraints(constraints)
[
rand(Distributions.Uniform(lower, upper)) for
(lower, upper) in zip(constraints.lb, constraints.ub)
]
else
rand(Vector, model)
end
end
function default_solver(constraints::ModeEstimationConstraints)
return if has_generic_constraints(constraints)
OptimizationOptimJL.IPNewton()
else
OptimizationOptimJL.LBFGS()
end
end
"""
OptimizationProblem(log_density::OptimLogDensity, adtype, constraints)
Create an `OptimizationProblem` for the objective function defined by `log_density`.
"""
function Optimization.OptimizationProblem(log_density::OptimLogDensity, adtype, constraints)
# Note that OptimLogDensity is a callable that evaluates the model with given
# parameters. Hence we can use it in the objective function as below.
f = Optimization.OptimizationFunction(log_density, adtype; cons=constraints.cons)
initial_params = log_density.varinfo[:]
prob = if !has_constraints(constraints)
Optimization.OptimizationProblem(f, initial_params)
else
Optimization.OptimizationProblem(
f,
initial_params;
lcons=constraints.lcons,
ucons=constraints.ucons,
lb=constraints.lb,
ub=constraints.ub,
)
end
return prob
end
"""
estimate_mode(
model::DynamicPPL.Model,
estimator::ModeEstimator,
[solver];
kwargs...
)
Find the mode of the probability distribution of a model.
Under the hood this function calls `Optimization.solve`.
# Arguments
- `model::DynamicPPL.Model`: The model for which to estimate the mode.
- `estimator::ModeEstimator`: Can be either `MLE()` for maximum likelihood estimation or
`MAP()` for maximum a posteriori estimation.
- `solver=nothing`. The optimization algorithm to use. Optional. Can be any solver
recognised by Optimization.jl. If omitted a default solver is used: LBFGS, or IPNewton
if non-box constraints are present.
# Keyword arguments
- `initial_params::Union{AbstractVector,Nothing}=nothing`: Initial value for the
optimization. Optional, unless non-box constraints are specified. If omitted it is
generated by either sampling from the prior distribution or uniformly from the box
constraints, if any.
- `adtype::AbstractADType=AutoForwardDiff()`: The automatic differentiation type to use.
- Keyword arguments `lb`, `ub`, `cons`, `lcons`, and `ucons` define constraints for the
optimization problem. Please see [`ModeEstimationConstraints`](@ref) for more details.
- Any extra keyword arguments are passed to `Optimization.solve`.
"""
function estimate_mode(
model::DynamicPPL.Model,
estimator::ModeEstimator,
solver=nothing;
initial_params=nothing,
adtype=ADTypes.AutoForwardDiff(),
cons=nothing,
lcons=nothing,
ucons=nothing,
lb=nothing,
ub=nothing,
kwargs...,
)
constraints = ModeEstimationConstraints(lb, ub, cons, lcons, ucons)
initial_params = generate_initial_params(model, initial_params, constraints)
if solver === nothing
solver = default_solver(constraints)
end
# Create an OptimLogDensity object that can be used to evaluate the objective function,
# i.e. the negative log density. Set its VarInfo to the initial parameters.
log_density = let
inner_context = if estimator isa MAP
DynamicPPL.DefaultContext()
else
DynamicPPL.LikelihoodContext()
end
ctx = OptimizationContext(inner_context)
ld = OptimLogDensity(model, ctx)
Accessors.@set ld.varinfo = DynamicPPL.unflatten(ld.varinfo, initial_params)
end
# TODO(mhauru) We currently couple together the questions of whether the user specified
# bounds/constraints and whether we transform the objective function to an
# unconstrained space. These should be separate concerns, but for that we need to
# implement getting the bounds of the prior distributions.
optimise_in_unconstrained_space = !has_constraints(constraints)
if optimise_in_unconstrained_space
transformed_varinfo = DynamicPPL.link(log_density.varinfo, log_density.model)
log_density = Accessors.@set log_density.varinfo = transformed_varinfo
end
prob = Optimization.OptimizationProblem(log_density, adtype, constraints)
solution = Optimization.solve(prob, solver; kwargs...)
# TODO(mhauru) We return a ModeResult for compatibility with the older Optim.jl
# interface. Might we want to break that and develop a better return type?
return ModeResult(log_density, solution)
end
"""
maximum_a_posteriori(
model::DynamicPPL.Model,
[solver];
kwargs...
)
Find the maximum a posteriori estimate of a model.
This is a convenience function that calls `estimate_mode` with `MAP()` as the estimator.
Please see the documentation of [`Turing.Optimisation.estimate_mode`](@ref) for more
details.
"""
function maximum_a_posteriori(model::DynamicPPL.Model, args...; kwargs...)
return estimate_mode(model, MAP(), args...; kwargs...)
end
"""
maximum_likelihood(
model::DynamicPPL.Model,
[solver];
kwargs...
)
Find the maximum likelihood estimate of a model.
This is a convenience function that calls `estimate_mode` with `MLE()` as the estimator.
Please see the documentation of [`Turing.Optimisation.estimate_mode`](@ref) for more
details.
"""
function maximum_likelihood(model::DynamicPPL.Model, args...; kwargs...)
return estimate_mode(model, MLE(), args...; kwargs...)
end
end