-
-
Notifications
You must be signed in to change notification settings - Fork 46
/
Copy pathlevenberg.jl
455 lines (421 loc) · 17.8 KB
/
levenberg.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
"""
```julia
LevenbergMarquardt(; chunk_size = Val{0}(),
autodiff = Val{true}(),
standardtag = Val{true}(),
concrete_jac = nothing,
diff_type = Val{:forward},
linsolve = nothing, precs = DEFAULT_PRECS,
damping_initial::Real = 1.0,
damping_increase_factor::Real = 2.0,
damping_decrease_factor::Real = 3.0,
finite_diff_step_geodesic::Real = 0.1,
α_geodesic::Real = 0.75,
b_uphill::Real = 1.0,
min_damping_D::AbstractFloat = 1e-8)
```
An advanced Levenberg-Marquardt implementation with the improvements suggested in the
[paper](https://arxiv.org/abs/1201.5885) "Improvements to the Levenberg-Marquardt
algorithm for nonlinear least-squares minimization". Designed for large-scale and
numerically-difficult nonlinear systems.
### Keyword Arguments
- `chunk_size`: the chunk size used by the internal ForwardDiff.jl automatic differentiation
system. This allows for multiple derivative columns to be computed simultaneously,
improving performance. Defaults to `0`, which is equivalent to using ForwardDiff.jl's
default chunk size mechanism. For more details, see the documentation for
[ForwardDiff.jl](https://juliadiff.org/ForwardDiff.jl/stable/).
- `autodiff`: whether to use forward-mode automatic differentiation for the Jacobian.
Note that this argument is ignored if an analytical Jacobian is passed, as that will be
used instead. Defaults to `Val{true}`, which means ForwardDiff.jl via
SparseDiffTools.jl is used by default. If `Val{false}`, then FiniteDiff.jl is used for
finite differencing.
- `standardtag`: whether to use a standardized tag definition for the purposes of automatic
differentiation. Defaults to true, which thus uses the `NonlinearSolveTag`. If `Val{false}`,
then ForwardDiff's default function naming tag is used, which results in larger stack
traces.
- `concrete_jac`: whether to build a concrete Jacobian. If a Krylov-subspace method is used,
then the Jacobian will not be constructed and instead direct Jacobian-vector products
`J*v` are computed using forward-mode automatic differentiation or finite differencing
tricks (without ever constructing the Jacobian). However, if the Jacobian is still needed,
for example for a preconditioner, `concrete_jac = true` can be passed in order to force
the construction of the Jacobian.
- `diff_type`: the type of finite differencing used if `autodiff = false`. Defaults to
`Val{:forward}` for forward finite differences. For more details on the choices, see the
[FiniteDiff.jl](https://github.com/JuliaDiff/FiniteDiff.jl) documentation.
- `linsolve`: the [LinearSolve.jl](https://github.com/SciML/LinearSolve.jl) used for the
linear solves within the Newton method. Defaults to `nothing`, which means it uses the
LinearSolve.jl default algorithm choice. For more information on available algorithm
choices, see the [LinearSolve.jl documentation](https://docs.sciml.ai/LinearSolve/stable/).
- `precs`: the choice of preconditioners for the linear solver. Defaults to using no
preconditioners. For more information on specifying preconditioners for LinearSolve
algorithms, consult the
[LinearSolve.jl documentation](https://docs.sciml.ai/LinearSolve/stable/).
- `damping_initial`: the starting value for the damping factor. The damping factor is
inversely proportional to the step size. The damping factor is adjusted during each
iteration. Defaults to `1.0`. For more details, see section 2.1 of
[this paper](https://arxiv.org/abs/1201.5885).
- `damping_increase_factor`: the factor by which the damping is increased if a step is
rejected. Defaults to `2.0`. For more details, see section 2.1 of
[this paper](https://arxiv.org/abs/1201.5885).
- `damping_decrease_factor`: the factor by which the damping is decreased if a step is
accepted. Defaults to `3.0`. For more details, see section 2.1 of
[this paper](https://arxiv.org/abs/1201.5885).
- `finite_diff_step_geodesic`: the step size used for finite differencing used to calculate
the geodesic acceleration. Defaults to `0.1` which means that the step size is
approximately 10% of the first-order step. For more details, see section 3 of
[this paper](https://arxiv.org/abs/1201.5885).
- `α_geodesic`: a factor that determines if a step is accepted or rejected. To incorporate
geodesic acceleration as an addition to the Levenberg-Marquardt algorithm, it is necessary
that acceptable steps meet the condition
``\\frac{2||a||}{||v||} \\le \\alpha_{\\text{geodesic}}``, where ``a`` is the geodesic
acceleration, ``v`` is the Levenberg-Marquardt algorithm's step (velocity along a geodesic
path) and `α_geodesic` is some number of order `1`. For most problems `α_geodesic = 0.75`
is a good value but for problems where convergence is difficult `α_geodesic = 0.1` is an
effective choice. Defaults to `0.75`. For more details, see section 3, equation (15) of
[this paper](https://arxiv.org/abs/1201.5885).
- `b_uphill`: a factor that determines if a step is accepted or rejected. The standard
choice in the Levenberg-Marquardt method is to accept all steps that decrease the cost
and reject all steps that increase the cost. Although this is a natural and safe choice,
it is often not the most efficient. Therefore downhill moves are always accepted, but
uphill moves are only conditionally accepted. To decide whether an uphill move will be
accepted at each iteration ``i``, we compute
``\\beta_i = \\cos(v_{\\text{new}}, v_{\\text{old}})``, which denotes the cosine angle
between the proposed velocity ``v_{\\text{new}}`` and the velocity of the last accepted
step ``v_{\\text{old}}``. The idea is to accept uphill moves if the angle is small. To
specify, uphill moves are accepted if
``(1-\\beta_i)^{b_{\\text{uphill}}} C_{i+1} \\le C_i``, where ``C_i`` is the cost at
iteration ``i``. Reasonable choices for `b_uphill` are `1.0` or `2.0`, with `b_uphill=2.0`
allowing higher uphill moves than `b_uphill=1.0`. When `b_uphill=0.0`, no uphill moves
will be accepted. Defaults to `1.0`. For more details, see section 4 of
[this paper](https://arxiv.org/abs/1201.5885).
- `min_damping_D`: the minimum value of the damping terms in the diagonal damping matrix
`DᵀD`, where `DᵀD` is given by the largest diagonal entries of `JᵀJ` yet encountered,
where `J` is the Jacobian. It is suggested by
[this paper](https://arxiv.org/abs/1201.5885) to use a minimum value of the elements in
`DᵀD` to prevent the damping from being too small. Defaults to `1e-8`.
!!! note
Currently, the linear solver and chunk size choice only applies to in-place defined
`NonlinearProblem`s. That is expected to change in the future.
"""
struct LevenbergMarquardt{CS, AD, FDT, L, P, ST, CJ, T} <:
AbstractNewtonAlgorithm{CS, AD, FDT, ST, CJ}
linsolve::L
precs::P
damping_initial::T
damping_increase_factor::T
damping_decrease_factor::T
finite_diff_step_geodesic::T
α_geodesic::T
b_uphill::T
min_damping_D::T
end
function LevenbergMarquardt(; chunk_size = Val{0}(),
autodiff = Val{true}(),
standardtag = Val{true}(),
concrete_jac = nothing,
diff_type = Val{:forward},
linsolve = nothing,
precs = DEFAULT_PRECS,
damping_initial::Real = 1.0,
damping_increase_factor::Real = 2.0,
damping_decrease_factor::Real = 3.0,
finite_diff_step_geodesic::Real = 0.1,
α_geodesic::Real = 0.75,
b_uphill::Real = 1.0,
min_damping_D::AbstractFloat = 1e-8)
LevenbergMarquardt{_unwrap_val(chunk_size), _unwrap_val(autodiff), diff_type,
typeof(linsolve), typeof(precs), _unwrap_val(standardtag),
_unwrap_val(concrete_jac),
typeof(min_damping_D)}(linsolve, precs,
damping_initial,
damping_increase_factor,
damping_decrease_factor,
finite_diff_step_geodesic,
α_geodesic,
b_uphill,
min_damping_D)
end
mutable struct LevenbergMarquardtCache{iip, fType, algType, uType, duType, resType, pType,
INType, tolType, probType, ufType, L, jType, JC,
DᵀDType, λType, lossType,
}
f::fType
alg::algType
u::uType
fu::resType
p::pType
uf::ufType
linsolve::L
J::jType
du_tmp::duType
jac_config::JC
force_stop::Bool
maxiters::Int
internalnorm::INType
retcode::SciMLBase.ReturnCode.T
abstol::tolType
prob::probType
DᵀD::DᵀDType
JᵀJ::jType
λ::λType
λ_factor::λType
damping_increase_factor::λType
damping_decrease_factor::λType
h::λType
α_geodesic::λType
b_uphill::λType
min_damping_D::λType
v::uType
a::uType
tmp_vec::uType
v_old::uType
norm_v_old::lossType
δ::uType
loss_old::lossType
make_new_J::Bool
fu_tmp::resType
mat_tmp::jType
stats::NLStats
function LevenbergMarquardtCache{iip}(f::fType, alg::algType, u::uType, fu::resType,
p::pType, uf::ufType, linsolve::L, J::jType,
du_tmp::duType, jac_config::JC,
force_stop::Bool, maxiters::Int,
internalnorm::INType,
retcode::SciMLBase.ReturnCode.T, abstol::tolType,
prob::probType, DᵀD::DᵀDType, JᵀJ::jType,
λ::λType, λ_factor::λType,
damping_increase_factor::λType,
damping_decrease_factor::λType, h::λType,
α_geodesic::λType, b_uphill::λType,
min_damping_D::λType, v::uType,
a::uType, tmp_vec::uType, v_old::uType,
norm_v_old::lossType, δ::uType,
loss_old::lossType, make_new_J::Bool,
fu_tmp::resType,
mat_tmp::jType,
stats::NLStats) where {
iip, fType, algType,
uType, duType, resType,
pType, INType, tolType,
probType, ufType, L,
jType, JC, DᵀDType,
λType, lossType,
}
new{iip, fType, algType, uType, duType, resType,
pType, INType, tolType, probType, ufType, L,
jType, JC, DᵀDType, λType, lossType}(f, alg, u, fu, p, uf, linsolve, J, du_tmp,
jac_config, force_stop, maxiters,
internalnorm, retcode, abstol, prob, DᵀD,
JᵀJ, λ, λ_factor,
damping_increase_factor,
damping_decrease_factor, h,
α_geodesic, b_uphill, min_damping_D,
v, a, tmp_vec, v_old,
norm_v_old, δ, loss_old, make_new_J,
fu_tmp, mat_tmp, stats)
end
end
function jacobian_caches(alg::LevenbergMarquardt, f, u, p, ::Val{true})
uf = JacobianWrapper(f, p)
J = ArrayInterface.undefmatrix(u)
linprob = LinearProblem(J, _vec(zero(u)); u0 = _vec(zero(u)))
weight = similar(u)
recursivefill!(weight, false)
Pl, Pr = wrapprecs(alg.precs(J, nothing, u, p, nothing, nothing, nothing, nothing,
nothing)..., weight)
linsolve = init(linprob, alg.linsolve, alias_A = true, alias_b = true,
Pl = Pl, Pr = Pr)
du1 = zero(u)
du2 = zero(u)
tmp = zero(u)
jac_config = build_jac_config(alg, f, uf, du1, u, tmp, du2)
uf, linsolve, J, du1, jac_config
end
function jacobian_caches(alg::LevenbergMarquardt, f, u, p, ::Val{false})
JacobianWrapper(f, p), nothing, ArrayInterface.undefmatrix(u), nothing, nothing
end
function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::LevenbergMarquardt,
args...;
alias_u0 = false,
maxiters = 1000,
abstol = 1e-6,
internalnorm = DEFAULT_NORM,
kwargs...) where {uType, iip}
if alias_u0
u = prob.u0
else
u = deepcopy(prob.u0)
end
f = prob.f
p = prob.p
if iip
fu = zero(u)
f(fu, u, p)
else
fu = f(u, p)
end
uf, linsolve, J, du_tmp, jac_config = jacobian_caches(alg, f, u, p, Val(iip))
λ = convert(eltype(u), alg.damping_initial)
λ_factor = convert(eltype(u), alg.damping_increase_factor)
damping_increase_factor = convert(eltype(u), alg.damping_increase_factor)
damping_decrease_factor = convert(eltype(u), alg.damping_decrease_factor)
h = convert(eltype(u), alg.finite_diff_step_geodesic)
α_geodesic = convert(eltype(u), alg.α_geodesic)
b_uphill = convert(eltype(u), alg.b_uphill)
min_damping_D = convert(eltype(u), alg.min_damping_D)
if u isa Number
DᵀD = min_damping_D
else
d = similar(u)
d .= min_damping_D
DᵀD = Diagonal(d)
end
loss = internalnorm(fu)
JᵀJ = zero(J)
v = zero(u)
a = zero(u)
tmp_vec = zero(u)
v_old = zero(u)
δ = zero(u)
make_new_J = true
fu_tmp = zero(fu)
mat_tmp = zero(J)
return LevenbergMarquardtCache{iip}(f, alg, u, fu, p, uf, linsolve, J, du_tmp,
jac_config, false, maxiters, internalnorm,
ReturnCode.Default, abstol, prob, DᵀD, JᵀJ,
λ, λ_factor, damping_increase_factor,
damping_decrease_factor, h,
α_geodesic, b_uphill, min_damping_D,
v, a, tmp_vec, v_old, loss, δ, loss, make_new_J,
fu_tmp, mat_tmp, NLStats(1, 0, 0, 0, 0))
end
function perform_step!(cache::LevenbergMarquardtCache{true})
@unpack fu, f, make_new_J = cache
if iszero(fu)
cache.force_stop = true
return nothing
end
if make_new_J
jacobian!(cache.J, cache)
mul!(cache.JᵀJ, cache.J', cache.J)
cache.DᵀD .= max.(cache.DᵀD, Diagonal(cache.JᵀJ))
cache.make_new_J = false
cache.stats.njacs += 1
end
@unpack u, p, λ, JᵀJ, DᵀD, J, alg, linsolve = cache
# Usual Levenberg-Marquardt step ("velocity").
# The following lines do: cache.v = -cache.mat_tmp \ cache.fu_tmp
mul!(cache.fu_tmp, J', fu)
@. cache.mat_tmp = JᵀJ + λ * DᵀD
linres = dolinsolve(alg.precs, linsolve, A = cache.mat_tmp, b = _vec(cache.fu_tmp),
linu = _vec(cache.du_tmp), p = p, reltol = cache.abstol)
cache.linsolve = linres.cache
@. cache.v = -cache.du_tmp
# Geodesic acceleration (step_size = v + a / 2).
@unpack v, α_geodesic, h = cache
f(cache.fu_tmp, u .+ h .* v, p)
# The following lines do: cache.a = -J \ cache.fu_tmp
mul!(cache.du_tmp, J, v)
@. cache.fu_tmp = (2 / h) * ((cache.fu_tmp - fu) / h - cache.du_tmp)
linres = dolinsolve(alg.precs, linsolve, A = J, b = _vec(cache.fu_tmp),
linu = _vec(cache.du_tmp), p = p, reltol = cache.abstol)
cache.linsolve = linres.cache
@. cache.a = -cache.du_tmp
cache.stats.nsolve += 2
cache.stats.nfactors += 2
# Require acceptable steps to satisfy the following condition.
norm_v = norm(v)
if (2 * norm(cache.a) / norm_v) < α_geodesic
@. cache.δ = v + cache.a / 2
@unpack δ, loss_old, norm_v_old, v_old, b_uphill = cache
f(cache.fu_tmp, u .+ δ, p)
cache.stats.nf += 1
loss = cache.internalnorm(cache.fu_tmp)
# Condition to accept uphill steps (evaluates to `loss ≤ loss_old` in iteration 1).
β = dot(v, v_old) / (norm_v * norm_v_old)
if (1 - β)^b_uphill * loss ≤ loss_old
# Accept step.
cache.u .+= δ
if loss < cache.abstol
cache.force_stop = true
return nothing
end
cache.fu .= cache.fu_tmp
cache.v_old .= v
cache.norm_v_old = norm_v
cache.loss_old = loss
cache.λ_factor = 1 / cache.damping_decrease_factor
cache.make_new_J = true
end
end
cache.λ *= cache.λ_factor
cache.λ_factor = cache.damping_increase_factor
return nothing
end
function perform_step!(cache::LevenbergMarquardtCache{false})
@unpack fu, f, make_new_J = cache
if iszero(fu)
cache.force_stop = true
return nothing
end
if make_new_J
cache.J = jacobian(cache, f)
cache.JᵀJ = cache.J' * cache.J
if cache.JᵀJ isa Number
cache.DᵀD = max(cache.DᵀD, cache.JᵀJ)
else
cache.DᵀD .= max.(cache.DᵀD, Diagonal(cache.JᵀJ))
end
cache.make_new_J = false
cache.stats.njacs += 1
end
@unpack u, p, λ, JᵀJ, DᵀD, J = cache
# Usual Levenberg-Marquardt step ("velocity").
cache.v = -(JᵀJ + λ * DᵀD) \ (J' * fu)
@unpack v, h, α_geodesic = cache
# Geodesic acceleration (step_size = v + a / 2).
cache.a = -J \ ((2 / h) .* ((f(u .+ h .* v, p) .- fu) ./ h .- J * v))
cache.stats.nsolve += 1
cache.stats.nfactors += 1
# Require acceptable steps to satisfy the following condition.
norm_v = norm(v)
if (2 * norm(cache.a) / norm_v) < α_geodesic
cache.δ = v .+ cache.a ./ 2
@unpack δ, loss_old, norm_v_old, v_old, b_uphill = cache
fu_new = f(u .+ δ, p)
cache.stats.nf += 1
loss = cache.internalnorm(fu_new)
# Condition to accept uphill steps (evaluates to `loss ≤ loss_old` in iteration 1).
β = dot(v, v_old) / (norm_v * norm_v_old)
if (1 - β)^b_uphill * loss ≤ loss_old
# Accept step.
cache.u += δ
if loss < cache.abstol
cache.force_stop = true
return nothing
end
cache.fu = fu_new
cache.v_old = v
cache.norm_v_old = norm_v
cache.loss_old = loss
cache.λ_factor = 1 / cache.damping_decrease_factor
cache.make_new_J = true
end
end
cache.λ *= cache.λ_factor
cache.λ_factor = cache.damping_increase_factor
return nothing
end
function SciMLBase.solve!(cache::LevenbergMarquardtCache)
while !cache.force_stop && cache.stats.nsteps < cache.maxiters
perform_step!(cache)
cache.stats.nsteps += 1
end
if cache.stats.nsteps == cache.maxiters
cache.retcode = ReturnCode.MaxIters
else
cache.retcode = ReturnCode.Success
end
SciMLBase.build_solution(cache.prob, cache.alg, cache.u, cache.fu;
retcode = cache.retcode, stats = cache.stats)
end