diff --git a/lib/SimpleNonlinearSolve/src/SimpleNonlinearSolve.jl b/lib/SimpleNonlinearSolve/src/SimpleNonlinearSolve.jl index a91b05719..5f269fe12 100644 --- a/lib/SimpleNonlinearSolve/src/SimpleNonlinearSolve.jl +++ b/lib/SimpleNonlinearSolve/src/SimpleNonlinearSolve.jl @@ -21,6 +21,7 @@ include("raphson.jl") include("ad.jl") include("broyden.jl") include("klement.jl") +include("trustRegion.jl") import SnoopPrecompile @@ -30,6 +31,10 @@ SnoopPrecompile.@precompile_all_calls begin for T in (Float32, Float64) solve(prob_no_brack, alg(), tol = T(1e-2)) end + for alg in (TrustRegion(10.0),) + solve(prob_no_brack, alg, tol = T(1e-2)) + end + #= for alg in (SimpleNewtonRaphson,) for u0 in ([1., 1.], StaticArraysCore.SA[1.0, 1.0]) @@ -47,6 +52,6 @@ SnoopPrecompile.@precompile_all_calls begin for T in (Float32, Float64) end end # DiffEq styled algorithms -export Bisection, Broyden, Falsi, Klement, SimpleNewtonRaphson +export Bisection, Broyden, Falsi, Klement, SimpleNewtonRaphson, TrustRegion end # module diff --git a/lib/SimpleNonlinearSolve/src/trustRegion.jl b/lib/SimpleNonlinearSolve/src/trustRegion.jl new file mode 100644 index 000000000..efc30969f --- /dev/null +++ b/lib/SimpleNonlinearSolve/src/trustRegion.jl @@ -0,0 +1,174 @@ +""" +```julia +TrustRegion(max_trust_radius::Number; chunk_size = Val{0}(), + autodiff = Val{true}(), diff_type = Val{:forward}) +``` + +A low-overhead implementation of a +[trust-region](https://optimization.mccormick.northwestern.edu/index.php/Trust-region_methods) +solver + +### Arguments +- `max_trust_radius`: the maximum radius of the trust region. The step size in the algorithm + will change dynamically. However, it will never be greater than the `max_trust_radius`. + +### Keyword Arguments + +- `chunk_size`: the chunk size used by the internal ForwardDiff.jl automatic differentiation + system. This allows for multiple derivative columns to be computed simultaneously, + improving performance. Defaults to `0`, which is equivalent to using ForwardDiff.jl's + default chunk size mechanism. For more details, see the documentation for + [ForwardDiff.jl](https://juliadiff.org/ForwardDiff.jl/stable/). +- `autodiff`: whether to use forward-mode automatic differentiation for the Jacobian. + Note that this argument is ignored if an analytical Jacobian is passed; as that will be + used instead. Defaults to `Val{true}`, which means ForwardDiff.jl is used by default. + If `Val{false}`, then FiniteDiff.jl is used for finite differencing. +- `diff_type`: the type of finite differencing used if `autodiff = false`. Defaults to + `Val{:forward}` for forward finite differences. For more details on the choices, see the + [FiniteDiff.jl](https://github.com/JuliaDiff/FiniteDiff.jl) documentation. +- `initial_trust_radius`: the initial trust region radius. Defaults to + `max_trust_radius / 11`. +- `step_threshold`: the threshold for taking a step. In every iteration, the threshold is + compared with a value `r`, which is the actual reduction in the objective function divided + by the predicted reduction. If `step_threshold > r` the model is not a good approximation, + and the step is rejected. Defaults to `0.1`. For more details, see + [Trust-region methods](https://optimization.mccormick.northwestern.edu/index.php/Trust-region_methods) +- `shrink_threshold`: the threshold for shrinking the trust region radius. In every + iteration, the threshold is compared with a value `r` which is the actual reduction in the + objective function divided by the predicted reduction. If `shrink_threshold > r` the trust + region radius is shrunk by `shrink_factor`. Defaults to `0.25`. For more details, see + [Trust-region methods](https://optimization.mccormick.northwestern.edu/index.php/Trust-region_methods) +- `expand_threshold`: the threshold for expanding the trust region radius. If a step is + taken, i.e `step_threshold < r` (with `r` defined in `shrink_threshold`), a check is also + made to see if `expand_threshold < r`. If that is true, the trust region radius is + expanded by `expand_factor`. Defaults to `0.75`. +- `shrink_factor`: the factor to shrink the trust region radius with if + `shrink_threshold > r` (with `r` defined in `shrink_threshold`). Defaults to `0.25`. +- `expand_factor`: the factor to expand the trust region radius with if + `expand_threshold < r` (with `r` defined in `shrink_threshold`). Defaults to `2.0`. +- `max_shrink_times`: the maximum number of times to shrink the trust region radius in a + row, `max_shrink_times` is exceeded, the algorithm returns. Defaults to `32`. +""" +struct TrustRegion{CS, AD, FDT} <: AbstractNewtonAlgorithm{CS, AD, FDT} + max_trust_radius::Number + initial_trust_radius::Number + step_threshold::Number + shrink_threshold::Number + expand_threshold::Number + shrink_factor::Number + expand_factor::Number + max_shrink_times::Int + function TrustRegion(max_trust_radius::Number; chunk_size = Val{0}(), + autodiff = Val{true}(), + diff_type = Val{:forward}, + initial_trust_radius::Number = max_trust_radius / 11, + step_threshold::Number = 0.1, + shrink_threshold::Number = 0.25, + expand_threshold::Number = 0.75, + shrink_factor::Number = 0.25, + expand_factor::Number = 2.0, + max_shrink_times::Int = 32) + new{SciMLBase._unwrap_val(chunk_size), SciMLBase._unwrap_val(autodiff), + SciMLBase._unwrap_val(diff_type)}(max_trust_radius, initial_trust_radius, + step_threshold, + shrink_threshold, expand_threshold, + shrink_factor, + expand_factor, max_shrink_times) + end +end + +function SciMLBase.solve(prob::NonlinearProblem, + alg::TrustRegion, args...; abstol = nothing, + reltol = nothing, + maxiters = 1000, kwargs...) + f = Base.Fix2(prob.f, prob.p) + x = float(prob.u0) + T = typeof(x) + Δₘₐₓ = float(alg.max_trust_radius) + Δ = float(alg.initial_trust_radius) + η₁ = float(alg.step_threshold) + η₂ = float(alg.shrink_threshold) + η₃ = float(alg.expand_threshold) + t₁ = float(alg.shrink_factor) + t₂ = float(alg.expand_factor) + max_shrink_times = alg.max_shrink_times + + if SciMLBase.isinplace(prob) + error("TrustRegion currently only supports out-of-place nonlinear problems") + end + + atol = abstol !== nothing ? abstol : + real(oneunit(eltype(T))) * (eps(real(one(eltype(T)))))^(4 // 5) + rtol = reltol !== nothing ? reltol : eps(real(one(eltype(T))))^(4 // 5) + + if alg_autodiff(alg) + F, ∇f = value_derivative(f, x) + elseif x isa AbstractArray + F = f(x) + ∇f = FiniteDiff.finite_difference_jacobian(f, x, diff_type(alg), eltype(x), F) + else + F = f(x) + ∇f = FiniteDiff.finite_difference_derivative(f, x, diff_type(alg), eltype(x), F) + end + + fₖ = 0.5 * norm(F)^2 + H = ∇f * ∇f + g = ∇f * F + shrink_counter = 0 + + for k in 1:maxiters + # Solve the trust region subproblem. + δ = dogleg_method(H, g, Δ) + xₖ₊₁ = x + δ + Fₖ₊₁ = f(xₖ₊₁) + fₖ₊₁ = 0.5 * norm(Fₖ₊₁)^2 + + # Compute the ratio of the actual to predicted reduction. + model = -(δ' * g + 0.5 * δ' * H * δ) + r = model \ (fₖ - fₖ₊₁) + + # Update the trust region radius. + if r < η₂ + Δ = t₁ * Δ + shrink_counter += 1 + if shrink_counter > max_shrink_times + return SciMLBase.build_solution(prob, alg, x, F; + retcode = ReturnCode.Success) + end + else + shrink_counter = 0 + end + if r > η₁ + if isapprox(xₖ₊₁, x, atol = atol, rtol = rtol) + return SciMLBase.build_solution(prob, alg, xₖ₊₁, Fₖ₊₁; + retcode = ReturnCode.Success) + end + # Take the step. + x = xₖ₊₁ + F = Fₖ₊₁ + if alg_autodiff(alg) + F, ∇f = value_derivative(f, x) + elseif x isa AbstractArray + ∇f = FiniteDiff.finite_difference_jacobian(f, x, diff_type(alg), eltype(x), + F) + else + ∇f = FiniteDiff.finite_difference_derivative(f, x, diff_type(alg), + eltype(x), + F) + end + + iszero(F) && + return SciMLBase.build_solution(prob, alg, x, F; + retcode = ReturnCode.Success) + + # Update the trust region radius. + if r > η₃ && norm(δ) ≈ Δ + Δ = min(t₂ * Δ, Δₘₐₓ) + end + fₖ = fₖ₊₁ + H = ∇f * ∇f + g = ∇f * F + end + end + return SciMLBase.build_solution(prob, alg, x, F; retcode = ReturnCode.MaxIters) +end diff --git a/lib/SimpleNonlinearSolve/src/utils.jl b/lib/SimpleNonlinearSolve/src/utils.jl index a4d86e2f8..f3b7de9f6 100644 --- a/lib/SimpleNonlinearSolve/src/utils.jl +++ b/lib/SimpleNonlinearSolve/src/utils.jl @@ -43,3 +43,28 @@ function init_J(x) end return J end + +function dogleg_method(H, g, Δ) + # Compute the Newton step. + δN = -H \ g + # Test if the full step is within the trust region. + if norm(δN) ≤ Δ + return δN + end + + # Calcualte Cauchy point, optimum along the steepest descent direction. + δsd = -g + norm_δsd = norm(δsd) + if norm_δsd ≥ Δ + return δsd .* Δ / norm_δsd + end + + # Find the intersection point on the boundary. + δN_δsd = δN - δsd + dot_δN_δsd = dot(δN_δsd, δN_δsd) + dot_δsd_δN_δsd = dot(δsd, δN_δsd) + dot_δsd = dot(δsd, δsd) + fact = dot_δsd_δN_δsd^2 - dot_δN_δsd * (dot_δsd - Δ^2) + tau = (-dot_δsd_δN_δsd + sqrt(fact)) / dot_δN_δsd + return δsd + tau * δN_δsd +end diff --git a/lib/SimpleNonlinearSolve/test/basictests.jl b/lib/SimpleNonlinearSolve/test/basictests.jl index 648c16fb4..d2a030253 100644 --- a/lib/SimpleNonlinearSolve/test/basictests.jl +++ b/lib/SimpleNonlinearSolve/test/basictests.jl @@ -46,13 +46,24 @@ sol = benchmark_scalar(sf, csu0) @test sol.u * sol.u - 2 < 1e-9 @test (@ballocated benchmark_scalar(sf, csu0)) == 0 +# TrustRegion +function benchmark_scalar(f, u0) + probN = NonlinearProblem{false}(f, u0) + sol = (solve(probN, TrustRegion(10.0))) +end + +sol = benchmark_scalar(sf, csu0) +@test sol.retcode === ReturnCode.Success +@test sol.u * sol.u - 2 < 1e-9 + # AD Tests using ForwardDiff # Immutable f, u0 = (u, p) -> u .* u .- p, @SVector[1.0, 1.0] -for alg in [SimpleNewtonRaphson(), Broyden(), Klement()] +for alg in [SimpleNewtonRaphson(), Broyden(), Klement(), + TrustRegion(10.0)] g = function (p) probN = NonlinearProblem{false}(f, csu0, p) sol = solve(probN, alg, tol = 1e-9) @@ -67,7 +78,8 @@ end # Scalar f, u0 = (u, p) -> u * u - p, 1.0 -for alg in [SimpleNewtonRaphson(), Broyden(), Klement()] +for alg in [SimpleNewtonRaphson(), Broyden(), Klement(), + TrustRegion(10.0)] g = function (p) probN = NonlinearProblem{false}(f, oftype(p, u0), p) sol = solve(probN, alg) @@ -108,7 +120,8 @@ for alg in [Bisection(), Falsi()] @test ForwardDiff.jacobian(g, p) ≈ ForwardDiff.jacobian(t, p) end -for alg in [SimpleNewtonRaphson(), Broyden(), Klement()] +for alg in [SimpleNewtonRaphson(), Broyden(), Klement(), + TrustRegion(10.0)] global g, p g = function (p) probN = NonlinearProblem{false}(f, 0.5, p) @@ -128,6 +141,11 @@ probN = NonlinearProblem(f, u0) @test solve(probN, SimpleNewtonRaphson(; autodiff = false)).u[end] ≈ sqrt(2.0) @test solve(probN, SimpleNewtonRaphson(; autodiff = false)).u[end] ≈ sqrt(2.0) +@test solve(probN, TrustRegion(10.0)).u[end] ≈ sqrt(2.0) +@test solve(probN, TrustRegion(10.0); immutable = false).u[end] ≈ sqrt(2.0) +@test solve(probN, TrustRegion(10.0; autodiff = false)).u[end] ≈ sqrt(2.0) +@test solve(probN, TrustRegion(10.0; autodiff = false)).u[end] ≈ sqrt(2.0) + @test solve(probN, Broyden()).u[end] ≈ sqrt(2.0) @test solve(probN, Broyden(); immutable = false).u[end] ≈ sqrt(2.0) @@ -144,6 +162,10 @@ for u0 in [1.0, [1, 1.0]] @test solve(probN, SimpleNewtonRaphson()).u ≈ sol @test solve(probN, SimpleNewtonRaphson(; autodiff = false)).u ≈ sol + @test solve(probN, TrustRegion(10.0)).u ≈ sol + @test solve(probN, TrustRegion(10.0)).u ≈ sol + @test solve(probN, TrustRegion(10.0; autodiff = false)).u ≈ sol + @test solve(probN, Broyden()).u ≈ sol @test solve(probN, Klement()).u ≈ sol @@ -185,3 +207,54 @@ sol = solve(probB, Bisection(; exact_left = true, exact_right = true); immutable @test f(nextfloat(sol.left), nothing) >= 0.0 @test f(sol.right, nothing) >= 0.0 @test f(prevfloat(sol.right), nothing) <= 0.0 + +# Test that `TrustRegion` passes a test that `SimpleNewtonRaphson` fails on. +u0 = [-10.0, -1.0, 1.0, 2.0, 3.0, 4.0, 10.0] +global g, f +f = (u, p) -> 0.010000000000000002 .+ + 10.000000000000002 ./ (1 .+ + (0.21640425613334457 .+ + 216.40425613334457 ./ (1 .+ + (0.21640425613334457 .+ + 216.40425613334457 ./ + (1 .+ 0.0006250000000000001(u .^ 2.0))) .^ 2.0)) .^ 2.0) .- + 0.0011552453009332421u +.-p +g = function (p) + probN = NonlinearProblem{false}(f, u0, p) + sol = solve(probN, TrustRegion(100.0)) + return sol.u +end +p = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +u = g(p) +f(u, p) +@test all(f(u, p) .< 1e-10) + +# Test kwars in `TrustRegion` +max_trust_radius = [10.0, 100.0, 1000.0] +initial_trust_radius = [10.0, 1.0, 0.1] +step_threshold = [0.0, 0.01, 0.25] +shrink_threshold = [0.25, 0.3, 0.5] +expand_threshold = [0.5, 0.8, 0.9] +shrink_factor = [0.1, 0.3, 0.5] +expand_factor = [1.5, 2.0, 3.0] +max_shrink_times = [10, 20, 30] + +list_of_options = zip(max_trust_radius, initial_trust_radius, step_threshold, + shrink_threshold, expand_threshold, shrink_factor, + expand_factor, max_shrink_times) +for options in list_of_options + local probN, sol, alg + alg = TrustRegion(options[1]; + initial_trust_radius = options[2], + step_threshold = options[3], + shrink_threshold = options[4], + expand_threshold = options[5], + shrink_factor = options[6], + expand_factor = options[7], + max_shrink_times = options[8]) + + probN = NonlinearProblem(f, u0, p) + sol = solve(probN, alg) + @test all(f(u, p) .< 1e-10) +end