-
-
Notifications
You must be signed in to change notification settings - Fork 42
/
trustRegion.jl
382 lines (340 loc) · 15.1 KB
/
trustRegion.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
"""
```julia
TrustRegion(; chunk_size = Val{0}(), autodiff = Val{true}(),
standardtag = Val{true}(), concrete_jac = nothing,
diff_type = Val{:forward}, linsolve = nothing, precs = DEFAULT_PRECS,
max_trust_radius::Real = 0.0,
initial_trust_radius::Real = 0.0,
step_threshold::Real = 0.1,
shrink_threshold::Real = 0.25,
expand_threshold::Real = 0.75,
shrink_factor::Real = 0.25,
expand_factor::Real = 2.0,
max_shrink_times::Int = 32)
```
An advanced TrustRegion implementation with support for efficient handling of sparse
matrices via colored automatic differentiation and preconditioned linear solvers. Designed
for large-scale and numerically-difficult nonlinear systems.
### Keyword Arguments
- `chunk_size`: the chunk size used by the internal ForwardDiff.jl automatic differentiation
system. This allows for multiple derivative columns to be computed simultaneously,
improving performance. Defaults to `0`, which is equivalent to using ForwardDiff.jl's
default chunk size mechanism. For more details, see the documentation for
[ForwardDiff.jl](https://juliadiff.org/ForwardDiff.jl/stable/).
- `autodiff`: whether to use forward-mode automatic differentiation for the Jacobian.
Note that this argument is ignored if an analytical Jacobian is passed, as that will be
used instead. Defaults to `Val{true}`, which means ForwardDiff.jl via
SparseDiffTools.jl is used by default. If `Val{false}`, then FiniteDiff.jl is used for
finite differencing.
- `standardtag`: whether to use a standardized tag definition for the purposes of automatic
differentiation. Defaults to true, which thus uses the `NonlinearSolveTag`. If `Val{false}`,
then ForwardDiff's default function naming tag is used, which results in larger stack
traces.
- `concrete_jac`: whether to build a concrete Jacobian. If a Krylov-subspace method is used,
then the Jacobian will not be constructed and instead direct Jacobian-vector products
`J*v` are computed using forward-mode automatic differentiation or finite differencing
tricks (without ever constructing the Jacobian). However, if the Jacobian is still needed,
for example for a preconditioner, `concrete_jac = true` can be passed in order to force
the construction of the Jacobian.
- `diff_type`: the type of finite differencing used if `autodiff = false`. Defaults to
`Val{:forward}` for forward finite differences. For more details on the choices, see the
[FiniteDiff.jl](https://github.com/JuliaDiff/FiniteDiff.jl) documentation.
- `linsolve`: the [LinearSolve.jl](https://github.com/SciML/LinearSolve.jl) used for the
linear solves within the Newton method. Defaults to `nothing`, which means it uses the
LinearSolve.jl default algorithm choice. For more information on available algorithm
choices, see the [LinearSolve.jl documentation](https://docs.sciml.ai/LinearSolve/stable/).
- `precs`: the choice of preconditioners for the linear solver. Defaults to using no
preconditioners. For more information on specifying preconditioners for LinearSolve
algorithms, consult the
[LinearSolve.jl documentation](https://docs.sciml.ai/LinearSolve/stable/).
- `max_trust_radius`: the maximal trust region radius.
Defaults to `max(norm(fu), maximum(u) - minimum(u))`.
- `initial_trust_radius`: the initial trust region radius. Defaults to
`max_trust_radius / 11`.
- `step_threshold`: the threshold for taking a step. In every iteration, the threshold is
compared with a value `r`, which is the actual reduction in the objective function divided
by the predicted reduction. If `step_threshold > r` the model is not a good approximation,
and the step is rejected. Defaults to `0.1`. For more details, see
[Trust-region methods](https://optimization.mccormick.northwestern.edu/index.php/Trust-region_methods)
- `shrink_threshold`: the threshold for shrinking the trust region radius. In every
iteration, the threshold is compared with a value `r` which is the actual reduction in the
objective function divided by the predicted reduction. If `shrink_threshold > r` the trust
region radius is shrunk by `shrink_factor`. Defaults to `0.25`. For more details, see
[Trust-region methods](https://optimization.mccormick.northwestern.edu/index.php/Trust-region_methods)
- `expand_threshold`: the threshold for expanding the trust region radius. If a step is
taken, i.e `step_threshold < r` (with `r` defined in `shrink_threshold`), a check is also
made to see if `expand_threshold < r`. If that is true, the trust region radius is
expanded by `expand_factor`. Defaults to `0.75`.
- `shrink_factor`: the factor to shrink the trust region radius with if
`shrink_threshold > r` (with `r` defined in `shrink_threshold`). Defaults to `0.25`.
- `expand_factor`: the factor to expand the trust region radius with if
`expand_threshold < r` (with `r` defined in `shrink_threshold`). Defaults to `2.0`.
- `max_shrink_times`: the maximum number of times to shrink the trust region radius in a
row, `max_shrink_times` is exceeded, the algorithm returns. Defaults to `32`.
!!! note
Currently, the linear solver and chunk size choice only applies to in-place defined
`NonlinearProblem`s. That is expected to change in the future.
"""
struct TrustRegion{CS, AD, FDT, L, P, ST, CJ, MTR} <:
AbstractNewtonAlgorithm{CS, AD, FDT, ST, CJ}
linsolve::L
precs::P
max_trust_radius::MTR
initial_trust_radius::MTR
step_threshold::MTR
shrink_threshold::MTR
expand_threshold::MTR
shrink_factor::MTR
expand_factor::MTR
max_shrink_times::Int
end
function TrustRegion(; chunk_size = Val{0}(),
autodiff = Val{true}(),
standardtag = Val{true}(), concrete_jac = nothing,
diff_type = Val{:forward}, linsolve = nothing, precs = DEFAULT_PRECS,
max_trust_radius::Real = 0.0,
initial_trust_radius::Real = 0.0,
step_threshold::Real = 0.1,
shrink_threshold::Real = 0.25,
expand_threshold::Real = 0.75,
shrink_factor::Real = 0.25,
expand_factor::Real = 2.0,
max_shrink_times::Int = 32)
TrustRegion{_unwrap_val(chunk_size), _unwrap_val(autodiff), diff_type,
typeof(linsolve), typeof(precs), _unwrap_val(standardtag),
_unwrap_val(concrete_jac), typeof(max_trust_radius)
}(linsolve, precs, max_trust_radius,
initial_trust_radius,
step_threshold,
shrink_threshold,
expand_threshold,
shrink_factor,
expand_factor,
max_shrink_times)
end
mutable struct TrustRegionCache{iip, fType, algType, uType, resType, pType,
INType, tolType, probType, ufType, L, jType, JC, floatType,
trustType
}
f::fType
alg::algType
u::uType
fu::resType
p::pType
uf::ufType
linsolve::L
J::jType
jac_config::JC
iter::Int
force_stop::Bool
maxiters::Int
internalnorm::INType
retcode::SciMLBase.ReturnCode.T
abstol::tolType
prob::probType
trust_r::trustType
max_trust_r::trustType
loss::floatType
loss_new::floatType
H::jType
g::resType
shrink_counter::Int
step_size::uType
u_tmp::uType
fu_new::resType
make_new_J::Bool
r::floatType
function TrustRegionCache{iip}(f::fType, alg::algType, u::uType, fu::resType, p::pType,
uf::ufType, linsolve::L, J::jType,
jac_config::JC, iter::Int,
force_stop::Bool, maxiters::Int, internalnorm::INType,
retcode::SciMLBase.ReturnCode.T, abstol::tolType,
prob::probType, trust_r::trustType,
max_trust_r::trustType, loss::floatType,
loss_new::floatType, H::jType, g::resType,
shrink_counter::Int, step_size::uType, u_tmp::uType,
fu_new::resType, make_new_J::Bool,
r::floatType) where {iip, fType, algType, uType,
resType, pType, INType,
tolType, probType, ufType, L,
jType, JC, floatType, trustType}
new{iip, fType, algType, uType, resType, pType,
INType, tolType, probType, ufType, L, jType, JC, floatType, trustType
}(f, alg, u, fu, p, uf, linsolve, J,
jac_config, iter, force_stop,
maxiters, internalnorm, retcode,
abstol, prob, trust_r, max_trust_r, loss,
loss_new, H, g, shrink_counter,
step_size, u_tmp, fu_new,
make_new_J, r)
end
end
function jacobian_caches(alg::TrustRegion, f, u, p, ::Val{true})
uf = JacobianWrapper(f, p)
J = ArrayInterfaceCore.undefmatrix(u)
linprob = LinearProblem(J, _vec(zero(u)); u0 = _vec(zero(u)))
weight = similar(u)
recursivefill!(weight, false)
Pl, Pr = wrapprecs(alg.precs(J, nothing, u, p, nothing, nothing, nothing, nothing,
nothing)..., weight)
linsolve = init(linprob, alg.linsolve, alias_A = true, alias_b = true,
Pl = Pl, Pr = Pr)
du1 = zero(u)
du2 = zero(u)
tmp = zero(u)
jac_config = build_jac_config(alg, f, uf, du1, u, tmp, du2)
uf, linsolve, J, du1, jac_config
end
function jacobian_caches(alg::TrustRegion, f, u, p, ::Val{false})
J = ArrayInterfaceCore.undefmatrix(u)
JacobianWrapper(f, p), nothing, J, zero(u), nothing
end
function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::TrustRegion,
args...;
alias_u0 = false,
maxiters = 1000,
abstol = 1e-6,
internalnorm = DEFAULT_NORM,
kwargs...) where {uType, iip}
if alias_u0
u = prob.u0
else
u = deepcopy(prob.u0)
end
f = prob.f
p = prob.p
if iip
fu = zero(u)
f(fu, u, p)
else
fu = f(u, p)
end
loss = get_loss(fu)
uf, linsolve, J, u_tmp, jac_config = jacobian_caches(alg, f, u, p, Val(iip))
# Set default trust region radius if not specified
max_trust_radius = alg.max_trust_radius
initial_trust_radius = alg.initial_trust_radius
if max_trust_radius == 0.0
max_trust_radius = max(norm(fu), maximum(u) - minimum(u))
end
if initial_trust_radius == 0.0
initial_trust_radius = max_trust_radius / 11
end
H = ArrayInterfaceCore.undefmatrix(u)
return TrustRegionCache{iip}(f, alg, u, fu, p, uf, linsolve, J, jac_config,
1, false, maxiters, internalnorm,
ReturnCode.Default, abstol, prob, initial_trust_radius,
max_trust_radius, loss, loss, H, zero(fu), 0, zero(u),
u_tmp, zero(fu), true,
loss)
end
function perform_step!(cache::TrustRegionCache{true})
@unpack make_new_J, J, fu, f, u, p, u_tmp, alg, linsolve = cache
if cache.make_new_J
jacobian!(J, cache)
mul!(cache.H, J, J)
mul!(cache.g, J, fu)
end
linres = dolinsolve(alg.precs, linsolve, A = cache.H, b = _vec(cache.g),
linu = _vec(u_tmp),
p = p, reltol = cache.abstol)
cache.linsolve = linres.cache
cache.u_tmp .= -1 .* u_tmp
dogleg!(cache)
# Compute the potentially new u
cache.u_tmp .= u .+ cache.step_size
f(cache.fu_new, cache.u_tmp, p)
trust_region_step!(cache)
return nothing
end
function perform_step!(cache::TrustRegionCache{false})
@unpack make_new_J, fu, f, u, p = cache
if make_new_J
J = jacobian(cache, f)
cache.H = J * J
cache.g = J * fu
end
@unpack g, H = cache
# Compute the Newton step.
cache.u_tmp = -H \ g
dogleg!(cache)
# Compute the potentially new u
cache.u_tmp = u .+ cache.step_size
cache.fu_new = f(cache.u_tmp, p)
trust_region_step!(cache)
return nothing
end
function trust_region_step!(cache::TrustRegionCache)
@unpack fu_new, u_tmp, step_size, g, H, loss, alg, max_trust_r = cache
cache.loss_new = get_loss(fu_new)
# Compute the ratio of the actual reduction to the predicted reduction.
cache.r = -(loss - cache.loss_new) / (step_size' * g + step_size' * H * step_size / 2)
@unpack r = cache
# Update the trust region radius.
if r < alg.shrink_threshold
cache.trust_r *= alg.shrink_factor
cache.shrink_counter += 1
else
cache.shrink_counter = 0
end
if r > alg.step_threshold
take_step!(cache)
cache.loss = cache.loss_new
# Update the trust region radius.
if r > alg.expand_threshold
cache.trust_r = min(alg.expand_factor * cache.trust_r, max_trust_r)
end
cache.make_new_J = true
else
# No need to make a new J, no step was taken, so we try again with a smaller trust_r
cache.make_new_J = false
end
if iszero(cache.fu) || cache.internalnorm(cache.fu) < cache.abstol
cache.force_stop = true
end
end
function dogleg!(cache::TrustRegionCache)
@unpack u_tmp, trust_r = cache
# Test if the full step is within the trust region.
if norm(u_tmp) ≤ trust_r
cache.step_size = u_tmp
return
end
# Calcualte Cauchy point, optimum along the steepest descent direction.
δsd = -cache.g
norm_δsd = norm(δsd)
if norm_δsd ≥ trust_r
cache.step_size = δsd .* trust_r / norm_δsd
return
end
# Find the intersection point on the boundary.
N_sd = u_tmp - δsd
dot_N_sd = dot(N_sd, N_sd)
dot_sd_N_sd = dot(δsd, N_sd)
dot_sd = dot(δsd, δsd)
fact = dot_sd_N_sd^2 - dot_N_sd * (dot_sd - trust_r^2)
τ = (-dot_sd_N_sd + sqrt(fact)) / dot_N_sd
cache.step_size = δsd + τ * N_sd
end
function take_step!(cache::TrustRegionCache{true})
cache.u .= cache.u_tmp
cache.fu .= cache.fu_new
end
function take_step!(cache::TrustRegionCache{false})
cache.u = cache.u_tmp
cache.fu = cache.fu_new
end
function SciMLBase.solve!(cache::TrustRegionCache)
while !cache.force_stop && cache.iter < cache.maxiters &&
cache.shrink_counter < cache.alg.max_shrink_times
perform_step!(cache)
cache.iter += 1
end
if cache.iter == cache.maxiters
cache.retcode = ReturnCode.MaxIters
else
cache.retcode = ReturnCode.Success
end
SciMLBase.build_solution(cache.prob, cache.alg, cache.u, cache.fu;
retcode = cache.retcode)
end