From 175d656f5133d02a1e4fc43ce65d5363e80023a8 Mon Sep 17 00:00:00 2001 From: Simon Carlson Date: Tue, 10 Jan 2023 20:33:50 +0100 Subject: [PATCH 1/6] Implementation of a trust-region solver --- .../src/SimpleNonlinearSolve.jl | 8 +- lib/SimpleNonlinearSolve/src/trustRegion.jl | 125 ++++++++++++++++++ lib/SimpleNonlinearSolve/src/utils.jl | 25 ++++ lib/SimpleNonlinearSolve/test/basictests.jl | 28 +++- 4 files changed, 182 insertions(+), 4 deletions(-) create mode 100644 lib/SimpleNonlinearSolve/src/trustRegion.jl diff --git a/lib/SimpleNonlinearSolve/src/SimpleNonlinearSolve.jl b/lib/SimpleNonlinearSolve/src/SimpleNonlinearSolve.jl index a91b05719..4937ae119 100644 --- a/lib/SimpleNonlinearSolve/src/SimpleNonlinearSolve.jl +++ b/lib/SimpleNonlinearSolve/src/SimpleNonlinearSolve.jl @@ -21,6 +21,7 @@ include("raphson.jl") include("ad.jl") include("broyden.jl") include("klement.jl") +include("trustRegion.jl") import SnoopPrecompile @@ -30,6 +31,10 @@ SnoopPrecompile.@precompile_all_calls begin for T in (Float32, Float64) solve(prob_no_brack, alg(), tol = T(1e-2)) end + for alg in (TrustRegion(1.0),) + solve(prob_no_brack, alg, tol = T(1e-2)) + end + #= for alg in (SimpleNewtonRaphson,) for u0 in ([1., 1.], StaticArraysCore.SA[1.0, 1.0]) @@ -47,6 +52,7 @@ SnoopPrecompile.@precompile_all_calls begin for T in (Float32, Float64) end end # DiffEq styled algorithms -export Bisection, Broyden, Falsi, Klement, SimpleNewtonRaphson +export Bisection, Broyden, Falsi, Klement, SimpleNewtonRaphson, + TrustRegion end # module diff --git a/lib/SimpleNonlinearSolve/src/trustRegion.jl b/lib/SimpleNonlinearSolve/src/trustRegion.jl new file mode 100644 index 000000000..1d017a80d --- /dev/null +++ b/lib/SimpleNonlinearSolve/src/trustRegion.jl @@ -0,0 +1,125 @@ +""" +```julia +TrustRegion(max_trust_radius::Number; chunk_size = Val{0}(), + autodiff = Val{true}(), diff_type = Val{:forward}) +``` + +A low-overhead implementation of a +[trust-region](https://optimization.mccormick.northwestern.edu/index.php/Trust-region_methods) +solver + + +### Keyword Arguments +- `max_trust_radius`: the maximum radius of the trust region. The step size in the algorithm + will change dynamically. However, it will never be greater than the `max_trust_radius`. + +### Keyword Arguments + +- `chunk_size`: the chunk size used by the internal ForwardDiff.jl automatic differentiation + system. This allows for multiple derivative columns to be computed simultaneously, + improving performance. Defaults to `0`, which is equivalent to using ForwardDiff.jl's + default chunk size mechanism. For more details, see the documentation for + [ForwardDiff.jl](https://juliadiff.org/ForwardDiff.jl/stable/). +- `autodiff`: whether to use forward-mode automatic differentiation for the Jacobian. + Note that this argument is ignored if an analytical Jacobian is passed; as that will be + used instead. Defaults to `Val{true}`, which means ForwardDiff.jl is used by default. + If `Val{false}`, then FiniteDiff.jl is used for finite differencing. +- `diff_type`: the type of finite differencing used if `autodiff = false`. Defaults to + `Val{:forward}` for forward finite differences. For more details on the choices, see the + [FiniteDiff.jl](https://github.com/JuliaDiff/FiniteDiff.jl) documentation. +""" +struct TrustRegion{CS, AD, FDT} <: AbstractNewtonAlgorithm{CS, AD, FDT} + max_trust_radius::Number + function TrustRegion(max_turst_radius::Number; chunk_size = Val{0}(), + autodiff = Val{true}(), + diff_type = Val{:forward}) + new{SciMLBase._unwrap_val(chunk_size), SciMLBase._unwrap_val(autodiff), + SciMLBase._unwrap_val(diff_type)}(max_trust_radius) + end +end + +function SciMLBase.solve(prob::NonlinearProblem, + alg::TrustRegion, args...; abstol = nothing, + reltol = nothing, + maxiters = 1000, kwargs...) + f = Base.Fix2(prob.f, prob.p) + x = float(prob.u0) + T = typeof(x) + Δₘₐₓ = float(alg.max_trust_radius) # The maximum trust region radius. + Δ = Δₘₐₓ / 5 # Initial trust region radius. + η₁ = 0.1 # Threshold for taking a step. + η₂ = 0.25 # Threshold for shrinking the trust region. + η₃ = 0.75 # Threshold for expanding the trust region. + t₁ = 0.25 # Factor to shrink the trust region with. + t₂ = 2.0 # Factor to expand the trust region with. + + if SciMLBase.isinplace(prob) + error("TrustRegion currently only supports out-of-place nonlinear problems") + end + + atol = abstol !== nothing ? abstol : + real(oneunit(eltype(T))) * (eps(real(one(eltype(T)))))^(4 // 5) + rtol = reltol !== nothing ? reltol : eps(real(one(eltype(T))))^(4 // 5) + + if alg_autodiff(alg) + F, ∇f = value_derivative(f, x) + elseif x isa AbstractArray + F = f(x) + ∇f = FiniteDiff.finite_difference_jacobian(f, x, diff_type(alg), eltype(x), F) + else + F = f(x) + ∇f = FiniteDiff.finite_difference_derivative(f, x, diff_type(alg), eltype(x), F) + end + + fₖ = 0.5 * norm(F)^2 + H = ∇f * ∇f + g = ∇f * F + + for k in 1:maxiters + # Solve the trust region subproblem. + δ = dogleg_method(H, g, Δ) + xₖ₊₁ = x + δ + Fₖ₊₁ = f(xₖ₊₁) + fₖ₊₁ = 0.5 * norm(Fₖ₊₁)^2 + + # Compute the ratio of the actual to predicted reduction. + model = -(δ' * g + 0.5 * δ' * H * δ) + r = (fₖ - fₖ₊₁) / model + + # Update the trust region radius. + if r < η₂ + Δ *= t₁ + if r > η₁ + if isapprox(x̂, x, atol = atol, rtol = rtol) + return SciMLBase.build_solution(prob, alg, x, F; + retcode = ReturnCode.Success) + end + + x = xₖ₊₁ + F = Fₖ₊₁ + if alg_autodiff(alg) + F, ∇f = value_derivative(f, x) + elseif x isa AbstractArray + ∇f = FiniteDiff.finite_difference_jacobian(f, x, diff_type(alg), eltype(x), + F) + else + ∇f = FiniteDiff.finite_difference_derivative(f, x, diff_type(alg), + eltype(x), + F) + end + + iszero(F) && + return SciMLBase.build_solution(prob, alg, x, F; + retcode = ReturnCode.Success) + # Update the trust region radius. + if r > η₃ && norm(δ) ≈ Δ + Δ = min(t₂ * Δ, Δₘₐₓ) + end + fₖ = f̂ + H = ∇f * ∇f + g = ∇f * F + end + end + + return SciMLBase.build_solution(prob, alg, x, F; retcode = ReturnCode.MaxIters) +end diff --git a/lib/SimpleNonlinearSolve/src/utils.jl b/lib/SimpleNonlinearSolve/src/utils.jl index a4d86e2f8..f3b7de9f6 100644 --- a/lib/SimpleNonlinearSolve/src/utils.jl +++ b/lib/SimpleNonlinearSolve/src/utils.jl @@ -43,3 +43,28 @@ function init_J(x) end return J end + +function dogleg_method(H, g, Δ) + # Compute the Newton step. + δN = -H \ g + # Test if the full step is within the trust region. + if norm(δN) ≤ Δ + return δN + end + + # Calcualte Cauchy point, optimum along the steepest descent direction. + δsd = -g + norm_δsd = norm(δsd) + if norm_δsd ≥ Δ + return δsd .* Δ / norm_δsd + end + + # Find the intersection point on the boundary. + δN_δsd = δN - δsd + dot_δN_δsd = dot(δN_δsd, δN_δsd) + dot_δsd_δN_δsd = dot(δsd, δN_δsd) + dot_δsd = dot(δsd, δsd) + fact = dot_δsd_δN_δsd^2 - dot_δN_δsd * (dot_δsd - Δ^2) + tau = (-dot_δsd_δN_δsd + sqrt(fact)) / dot_δN_δsd + return δsd + tau * δN_δsd +end diff --git a/lib/SimpleNonlinearSolve/test/basictests.jl b/lib/SimpleNonlinearSolve/test/basictests.jl index 648c16fb4..9122519f8 100644 --- a/lib/SimpleNonlinearSolve/test/basictests.jl +++ b/lib/SimpleNonlinearSolve/test/basictests.jl @@ -46,13 +46,24 @@ sol = benchmark_scalar(sf, csu0) @test sol.u * sol.u - 2 < 1e-9 @test (@ballocated benchmark_scalar(sf, csu0)) == 0 +# SimpleNewtonRaphsonTrustRegion +function benchmark_scalar(f, u0) + probN = NonlinearProblem{false}(f, u0) + sol = (solve(probN, SimpleNewtonRaphsonTrustRegion(1.0))) +end + +sol = benchmark_scalar(sf, csu0) +@test sol.retcode === ReturnCode.Success +@test sol.u * sol.u - 2 < 1e-9 + # AD Tests using ForwardDiff # Immutable f, u0 = (u, p) -> u .* u .- p, @SVector[1.0, 1.0] -for alg in [SimpleNewtonRaphson(), Broyden(), Klement()] +for alg in [SimpleNewtonRaphson(), Broyden(), Klement(), + SimpleNewtonRaphsonTrustRegion(10.0)] g = function (p) probN = NonlinearProblem{false}(f, csu0, p) sol = solve(probN, alg, tol = 1e-9) @@ -67,7 +78,8 @@ end # Scalar f, u0 = (u, p) -> u * u - p, 1.0 -for alg in [SimpleNewtonRaphson(), Broyden(), Klement()] +for alg in [SimpleNewtonRaphson(), Broyden(), Klement(), + SimpleNewtonRaphsonTrustRegion(10.0)] g = function (p) probN = NonlinearProblem{false}(f, oftype(p, u0), p) sol = solve(probN, alg) @@ -108,7 +120,8 @@ for alg in [Bisection(), Falsi()] @test ForwardDiff.jacobian(g, p) ≈ ForwardDiff.jacobian(t, p) end -for alg in [SimpleNewtonRaphson(), Broyden(), Klement()] +for alg in [SimpleNewtonRaphson(), Broyden(), Klement(), + SimpleNewtonRaphsonTrustRegion(1.0)] global g, p g = function (p) probN = NonlinearProblem{false}(f, 0.5, p) @@ -128,6 +141,11 @@ probN = NonlinearProblem(f, u0) @test solve(probN, SimpleNewtonRaphson(; autodiff = false)).u[end] ≈ sqrt(2.0) @test solve(probN, SimpleNewtonRaphson(; autodiff = false)).u[end] ≈ sqrt(2.0) +@test solve(probN, SimpleNewtonRaphsonTrustRegion(1.0)).u[end] ≈ sqrt(2.0) +@test solve(probN, SimpleNewtonRaphsonTrustRegion(1.0); immutable = false).u[end] ≈ sqrt(2.0) +@test solve(probN, SimpleNewtonRaphsonTrustRegion(1.0; autodiff = false)).u[end] ≈ sqrt(2.0) +@test solve(probN, SimpleNewtonRaphsonTrustRegion(1.0; autodiff = false)).u[end] ≈ sqrt(2.0) + @test solve(probN, Broyden()).u[end] ≈ sqrt(2.0) @test solve(probN, Broyden(); immutable = false).u[end] ≈ sqrt(2.0) @@ -144,6 +162,10 @@ for u0 in [1.0, [1, 1.0]] @test solve(probN, SimpleNewtonRaphson()).u ≈ sol @test solve(probN, SimpleNewtonRaphson(; autodiff = false)).u ≈ sol + @test solve(probN, SimpleNewtonRaphsonTrustRegion(1.0)).u ≈ sol + @test solve(probN, SimpleNewtonRaphsonTrustRegion(1.0)).u ≈ sol + @test solve(probN, SimpleNewtonRaphsonTrustRegion(1.0; autodiff = false)).u ≈ sol + @test solve(probN, Broyden()).u ≈ sol @test solve(probN, Klement()).u ≈ sol From bc4ab2d33200eb0c192502ed83aacee088c611d5 Mon Sep 17 00:00:00 2001 From: Simon Carlson Date: Tue, 10 Jan 2023 21:46:51 +0100 Subject: [PATCH 2/6] bug fix --- .../src/SimpleNonlinearSolve.jl | 2 +- lib/SimpleNonlinearSolve/src/trustRegion.jl | 34 +++++++++++-------- lib/SimpleNonlinearSolve/test/basictests.jl | 24 ++++++------- 3 files changed, 33 insertions(+), 27 deletions(-) diff --git a/lib/SimpleNonlinearSolve/src/SimpleNonlinearSolve.jl b/lib/SimpleNonlinearSolve/src/SimpleNonlinearSolve.jl index 4937ae119..b4c094493 100644 --- a/lib/SimpleNonlinearSolve/src/SimpleNonlinearSolve.jl +++ b/lib/SimpleNonlinearSolve/src/SimpleNonlinearSolve.jl @@ -31,7 +31,7 @@ SnoopPrecompile.@precompile_all_calls begin for T in (Float32, Float64) solve(prob_no_brack, alg(), tol = T(1e-2)) end - for alg in (TrustRegion(1.0),) + for alg in (TrustRegion(10.0),) solve(prob_no_brack, alg, tol = T(1e-2)) end diff --git a/lib/SimpleNonlinearSolve/src/trustRegion.jl b/lib/SimpleNonlinearSolve/src/trustRegion.jl index 1d017a80d..cfdf9887d 100644 --- a/lib/SimpleNonlinearSolve/src/trustRegion.jl +++ b/lib/SimpleNonlinearSolve/src/trustRegion.jl @@ -30,9 +30,9 @@ solver """ struct TrustRegion{CS, AD, FDT} <: AbstractNewtonAlgorithm{CS, AD, FDT} max_trust_radius::Number - function TrustRegion(max_turst_radius::Number; chunk_size = Val{0}(), - autodiff = Val{true}(), - diff_type = Val{:forward}) + function TrustRegion(max_trust_radius::Number; chunk_size = Val{0}(), + autodiff = Val{true}(), + diff_type = Val{:forward}) new{SciMLBase._unwrap_val(chunk_size), SciMLBase._unwrap_val(autodiff), SciMLBase._unwrap_val(diff_type)}(max_trust_radius) end @@ -46,8 +46,8 @@ function SciMLBase.solve(prob::NonlinearProblem, x = float(prob.u0) T = typeof(x) Δₘₐₓ = float(alg.max_trust_radius) # The maximum trust region radius. - Δ = Δₘₐₓ / 5 # Initial trust region radius. - η₁ = 0.1 # Threshold for taking a step. + Δ = Δₘₐₓ / 11 # Initial trust region radius. + η₁ = 0.0 # Threshold for taking a step. η₂ = 0.25 # Threshold for shrinking the trust region. η₃ = 0.75 # Threshold for expanding the trust region. t₁ = 0.25 # Factor to shrink the trust region with. @@ -88,38 +88,44 @@ function SciMLBase.solve(prob::NonlinearProblem, # Update the trust region radius. if r < η₂ - Δ *= t₁ - if r > η₁ - if isapprox(x̂, x, atol = atol, rtol = rtol) + Δ = t₁ * Δ + + if Δ < 1e-10 return SciMLBase.build_solution(prob, alg, x, F; retcode = ReturnCode.Success) end - + end + if r > η₁ + if isapprox(xₖ₊₁, x, atol = atol, rtol = rtol) + return SciMLBase.build_solution(prob, alg, xₖ₊₁, Fₖ₊₁; + retcode = ReturnCode.Success) + end + # Take the step. x = xₖ₊₁ F = Fₖ₊₁ if alg_autodiff(alg) F, ∇f = value_derivative(f, x) elseif x isa AbstractArray ∇f = FiniteDiff.finite_difference_jacobian(f, x, diff_type(alg), eltype(x), - F) + F) else ∇f = FiniteDiff.finite_difference_derivative(f, x, diff_type(alg), - eltype(x), - F) + eltype(x), + F) end iszero(F) && return SciMLBase.build_solution(prob, alg, x, F; retcode = ReturnCode.Success) + # Update the trust region radius. if r > η₃ && norm(δ) ≈ Δ Δ = min(t₂ * Δ, Δₘₐₓ) end - fₖ = f̂ + fₖ = fₖ₊₁ H = ∇f * ∇f g = ∇f * F end end - return SciMLBase.build_solution(prob, alg, x, F; retcode = ReturnCode.MaxIters) end diff --git a/lib/SimpleNonlinearSolve/test/basictests.jl b/lib/SimpleNonlinearSolve/test/basictests.jl index 9122519f8..f30586c22 100644 --- a/lib/SimpleNonlinearSolve/test/basictests.jl +++ b/lib/SimpleNonlinearSolve/test/basictests.jl @@ -46,10 +46,10 @@ sol = benchmark_scalar(sf, csu0) @test sol.u * sol.u - 2 < 1e-9 @test (@ballocated benchmark_scalar(sf, csu0)) == 0 -# SimpleNewtonRaphsonTrustRegion +# TrustRegion function benchmark_scalar(f, u0) probN = NonlinearProblem{false}(f, u0) - sol = (solve(probN, SimpleNewtonRaphsonTrustRegion(1.0))) + sol = (solve(probN, TrustRegion(10.0))) end sol = benchmark_scalar(sf, csu0) @@ -63,7 +63,7 @@ using ForwardDiff f, u0 = (u, p) -> u .* u .- p, @SVector[1.0, 1.0] for alg in [SimpleNewtonRaphson(), Broyden(), Klement(), - SimpleNewtonRaphsonTrustRegion(10.0)] + TrustRegion(10.0)] g = function (p) probN = NonlinearProblem{false}(f, csu0, p) sol = solve(probN, alg, tol = 1e-9) @@ -79,7 +79,7 @@ end # Scalar f, u0 = (u, p) -> u * u - p, 1.0 for alg in [SimpleNewtonRaphson(), Broyden(), Klement(), - SimpleNewtonRaphsonTrustRegion(10.0)] + TrustRegion(10.0)] g = function (p) probN = NonlinearProblem{false}(f, oftype(p, u0), p) sol = solve(probN, alg) @@ -121,7 +121,7 @@ for alg in [Bisection(), Falsi()] end for alg in [SimpleNewtonRaphson(), Broyden(), Klement(), - SimpleNewtonRaphsonTrustRegion(1.0)] + TrustRegion(10.0)] global g, p g = function (p) probN = NonlinearProblem{false}(f, 0.5, p) @@ -141,10 +141,10 @@ probN = NonlinearProblem(f, u0) @test solve(probN, SimpleNewtonRaphson(; autodiff = false)).u[end] ≈ sqrt(2.0) @test solve(probN, SimpleNewtonRaphson(; autodiff = false)).u[end] ≈ sqrt(2.0) -@test solve(probN, SimpleNewtonRaphsonTrustRegion(1.0)).u[end] ≈ sqrt(2.0) -@test solve(probN, SimpleNewtonRaphsonTrustRegion(1.0); immutable = false).u[end] ≈ sqrt(2.0) -@test solve(probN, SimpleNewtonRaphsonTrustRegion(1.0; autodiff = false)).u[end] ≈ sqrt(2.0) -@test solve(probN, SimpleNewtonRaphsonTrustRegion(1.0; autodiff = false)).u[end] ≈ sqrt(2.0) +@test solve(probN, TrustRegion(10.0)).u[end] ≈ sqrt(2.0) +@test solve(probN, TrustRegion(10.0); immutable = false).u[end] ≈ sqrt(2.0) +@test solve(probN, TrustRegion(10.0; autodiff = false)).u[end] ≈ sqrt(2.0) +@test solve(probN, TrustRegion(10.0; autodiff = false)).u[end] ≈ sqrt(2.0) @test solve(probN, Broyden()).u[end] ≈ sqrt(2.0) @test solve(probN, Broyden(); immutable = false).u[end] ≈ sqrt(2.0) @@ -162,9 +162,9 @@ for u0 in [1.0, [1, 1.0]] @test solve(probN, SimpleNewtonRaphson()).u ≈ sol @test solve(probN, SimpleNewtonRaphson(; autodiff = false)).u ≈ sol - @test solve(probN, SimpleNewtonRaphsonTrustRegion(1.0)).u ≈ sol - @test solve(probN, SimpleNewtonRaphsonTrustRegion(1.0)).u ≈ sol - @test solve(probN, SimpleNewtonRaphsonTrustRegion(1.0; autodiff = false)).u ≈ sol + @test solve(probN, TrustRegion(10.0)).u ≈ sol + @test solve(probN, TrustRegion(10.0)).u ≈ sol + @test solve(probN, TrustRegion(10.0; autodiff = false)).u ≈ sol @test solve(probN, Broyden()).u ≈ sol From 28aebe68542271d374db284cf7ac8f2ef64facff Mon Sep 17 00:00:00 2001 From: Simon Carlson Date: Tue, 10 Jan 2023 21:49:09 +0100 Subject: [PATCH 3/6] update of parameter --- lib/SimpleNonlinearSolve/src/trustRegion.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/SimpleNonlinearSolve/src/trustRegion.jl b/lib/SimpleNonlinearSolve/src/trustRegion.jl index cfdf9887d..781085cc1 100644 --- a/lib/SimpleNonlinearSolve/src/trustRegion.jl +++ b/lib/SimpleNonlinearSolve/src/trustRegion.jl @@ -47,7 +47,7 @@ function SciMLBase.solve(prob::NonlinearProblem, T = typeof(x) Δₘₐₓ = float(alg.max_trust_radius) # The maximum trust region radius. Δ = Δₘₐₓ / 11 # Initial trust region radius. - η₁ = 0.0 # Threshold for taking a step. + η₁ = 0.1 # Threshold for taking a step. η₂ = 0.25 # Threshold for shrinking the trust region. η₃ = 0.75 # Threshold for expanding the trust region. t₁ = 0.25 # Factor to shrink the trust region with. From 883aeacd3ff5276c4b662b315180f8cbea8a6f48 Mon Sep 17 00:00:00 2001 From: Simon Carlson Date: Tue, 10 Jan 2023 21:51:08 +0100 Subject: [PATCH 4/6] format --- lib/SimpleNonlinearSolve/src/SimpleNonlinearSolve.jl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/SimpleNonlinearSolve/src/SimpleNonlinearSolve.jl b/lib/SimpleNonlinearSolve/src/SimpleNonlinearSolve.jl index b4c094493..5f269fe12 100644 --- a/lib/SimpleNonlinearSolve/src/SimpleNonlinearSolve.jl +++ b/lib/SimpleNonlinearSolve/src/SimpleNonlinearSolve.jl @@ -52,7 +52,6 @@ SnoopPrecompile.@precompile_all_calls begin for T in (Float32, Float64) end end # DiffEq styled algorithms -export Bisection, Broyden, Falsi, Klement, SimpleNewtonRaphson, - TrustRegion +export Bisection, Broyden, Falsi, Klement, SimpleNewtonRaphson, TrustRegion end # module From c555f51833cde8c3d7113b88096068d9045118a9 Mon Sep 17 00:00:00 2001 From: Simon Carlson Date: Tue, 10 Jan 2023 21:58:40 +0100 Subject: [PATCH 5/6] Implemented max allowed shrink of trust-region --- lib/SimpleNonlinearSolve/src/trustRegion.jl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/lib/SimpleNonlinearSolve/src/trustRegion.jl b/lib/SimpleNonlinearSolve/src/trustRegion.jl index 781085cc1..dd91353e6 100644 --- a/lib/SimpleNonlinearSolve/src/trustRegion.jl +++ b/lib/SimpleNonlinearSolve/src/trustRegion.jl @@ -74,6 +74,7 @@ function SciMLBase.solve(prob::NonlinearProblem, fₖ = 0.5 * norm(F)^2 H = ∇f * ∇f g = ∇f * F + counter = 0 for k in 1:maxiters # Solve the trust region subproblem. @@ -89,11 +90,13 @@ function SciMLBase.solve(prob::NonlinearProblem, # Update the trust region radius. if r < η₂ Δ = t₁ * Δ - - if Δ < 1e-10 + counter += 1 + if counter > 32 return SciMLBase.build_solution(prob, alg, x, F; retcode = ReturnCode.Success) end + else + counter = 0 end if r > η₁ if isapprox(xₖ₊₁, x, atol = atol, rtol = rtol) From 36a3f3cc54819fab66975a23f6e33155337a8a9a Mon Sep 17 00:00:00 2001 From: Simon Carlson Date: Wed, 11 Jan 2023 09:11:38 +0100 Subject: [PATCH 6/6] Made kwargs of parameters --- lib/SimpleNonlinearSolve/src/trustRegion.jl | 72 ++++++++++++++++----- lib/SimpleNonlinearSolve/test/basictests.jl | 57 +++++++++++++++- 2 files changed, 110 insertions(+), 19 deletions(-) diff --git a/lib/SimpleNonlinearSolve/src/trustRegion.jl b/lib/SimpleNonlinearSolve/src/trustRegion.jl index dd91353e6..efc30969f 100644 --- a/lib/SimpleNonlinearSolve/src/trustRegion.jl +++ b/lib/SimpleNonlinearSolve/src/trustRegion.jl @@ -8,8 +8,7 @@ A low-overhead implementation of a [trust-region](https://optimization.mccormick.northwestern.edu/index.php/Trust-region_methods) solver - -### Keyword Arguments +### Arguments - `max_trust_radius`: the maximum radius of the trust region. The step size in the algorithm will change dynamically. However, it will never be greater than the `max_trust_radius`. @@ -27,14 +26,54 @@ solver - `diff_type`: the type of finite differencing used if `autodiff = false`. Defaults to `Val{:forward}` for forward finite differences. For more details on the choices, see the [FiniteDiff.jl](https://github.com/JuliaDiff/FiniteDiff.jl) documentation. +- `initial_trust_radius`: the initial trust region radius. Defaults to + `max_trust_radius / 11`. +- `step_threshold`: the threshold for taking a step. In every iteration, the threshold is + compared with a value `r`, which is the actual reduction in the objective function divided + by the predicted reduction. If `step_threshold > r` the model is not a good approximation, + and the step is rejected. Defaults to `0.1`. For more details, see + [Trust-region methods](https://optimization.mccormick.northwestern.edu/index.php/Trust-region_methods) +- `shrink_threshold`: the threshold for shrinking the trust region radius. In every + iteration, the threshold is compared with a value `r` which is the actual reduction in the + objective function divided by the predicted reduction. If `shrink_threshold > r` the trust + region radius is shrunk by `shrink_factor`. Defaults to `0.25`. For more details, see + [Trust-region methods](https://optimization.mccormick.northwestern.edu/index.php/Trust-region_methods) +- `expand_threshold`: the threshold for expanding the trust region radius. If a step is + taken, i.e `step_threshold < r` (with `r` defined in `shrink_threshold`), a check is also + made to see if `expand_threshold < r`. If that is true, the trust region radius is + expanded by `expand_factor`. Defaults to `0.75`. +- `shrink_factor`: the factor to shrink the trust region radius with if + `shrink_threshold > r` (with `r` defined in `shrink_threshold`). Defaults to `0.25`. +- `expand_factor`: the factor to expand the trust region radius with if + `expand_threshold < r` (with `r` defined in `shrink_threshold`). Defaults to `2.0`. +- `max_shrink_times`: the maximum number of times to shrink the trust region radius in a + row, `max_shrink_times` is exceeded, the algorithm returns. Defaults to `32`. """ struct TrustRegion{CS, AD, FDT} <: AbstractNewtonAlgorithm{CS, AD, FDT} max_trust_radius::Number + initial_trust_radius::Number + step_threshold::Number + shrink_threshold::Number + expand_threshold::Number + shrink_factor::Number + expand_factor::Number + max_shrink_times::Int function TrustRegion(max_trust_radius::Number; chunk_size = Val{0}(), autodiff = Val{true}(), - diff_type = Val{:forward}) + diff_type = Val{:forward}, + initial_trust_radius::Number = max_trust_radius / 11, + step_threshold::Number = 0.1, + shrink_threshold::Number = 0.25, + expand_threshold::Number = 0.75, + shrink_factor::Number = 0.25, + expand_factor::Number = 2.0, + max_shrink_times::Int = 32) new{SciMLBase._unwrap_val(chunk_size), SciMLBase._unwrap_val(autodiff), - SciMLBase._unwrap_val(diff_type)}(max_trust_radius) + SciMLBase._unwrap_val(diff_type)}(max_trust_radius, initial_trust_radius, + step_threshold, + shrink_threshold, expand_threshold, + shrink_factor, + expand_factor, max_shrink_times) end end @@ -45,13 +84,14 @@ function SciMLBase.solve(prob::NonlinearProblem, f = Base.Fix2(prob.f, prob.p) x = float(prob.u0) T = typeof(x) - Δₘₐₓ = float(alg.max_trust_radius) # The maximum trust region radius. - Δ = Δₘₐₓ / 11 # Initial trust region radius. - η₁ = 0.1 # Threshold for taking a step. - η₂ = 0.25 # Threshold for shrinking the trust region. - η₃ = 0.75 # Threshold for expanding the trust region. - t₁ = 0.25 # Factor to shrink the trust region with. - t₂ = 2.0 # Factor to expand the trust region with. + Δₘₐₓ = float(alg.max_trust_radius) + Δ = float(alg.initial_trust_radius) + η₁ = float(alg.step_threshold) + η₂ = float(alg.shrink_threshold) + η₃ = float(alg.expand_threshold) + t₁ = float(alg.shrink_factor) + t₂ = float(alg.expand_factor) + max_shrink_times = alg.max_shrink_times if SciMLBase.isinplace(prob) error("TrustRegion currently only supports out-of-place nonlinear problems") @@ -74,7 +114,7 @@ function SciMLBase.solve(prob::NonlinearProblem, fₖ = 0.5 * norm(F)^2 H = ∇f * ∇f g = ∇f * F - counter = 0 + shrink_counter = 0 for k in 1:maxiters # Solve the trust region subproblem. @@ -85,18 +125,18 @@ function SciMLBase.solve(prob::NonlinearProblem, # Compute the ratio of the actual to predicted reduction. model = -(δ' * g + 0.5 * δ' * H * δ) - r = (fₖ - fₖ₊₁) / model + r = model \ (fₖ - fₖ₊₁) # Update the trust region radius. if r < η₂ Δ = t₁ * Δ - counter += 1 - if counter > 32 + shrink_counter += 1 + if shrink_counter > max_shrink_times return SciMLBase.build_solution(prob, alg, x, F; retcode = ReturnCode.Success) end else - counter = 0 + shrink_counter = 0 end if r > η₁ if isapprox(xₖ₊₁, x, atol = atol, rtol = rtol) diff --git a/lib/SimpleNonlinearSolve/test/basictests.jl b/lib/SimpleNonlinearSolve/test/basictests.jl index f30586c22..d2a030253 100644 --- a/lib/SimpleNonlinearSolve/test/basictests.jl +++ b/lib/SimpleNonlinearSolve/test/basictests.jl @@ -63,7 +63,7 @@ using ForwardDiff f, u0 = (u, p) -> u .* u .- p, @SVector[1.0, 1.0] for alg in [SimpleNewtonRaphson(), Broyden(), Klement(), - TrustRegion(10.0)] + TrustRegion(10.0)] g = function (p) probN = NonlinearProblem{false}(f, csu0, p) sol = solve(probN, alg, tol = 1e-9) @@ -79,7 +79,7 @@ end # Scalar f, u0 = (u, p) -> u * u - p, 1.0 for alg in [SimpleNewtonRaphson(), Broyden(), Klement(), - TrustRegion(10.0)] + TrustRegion(10.0)] g = function (p) probN = NonlinearProblem{false}(f, oftype(p, u0), p) sol = solve(probN, alg) @@ -121,7 +121,7 @@ for alg in [Bisection(), Falsi()] end for alg in [SimpleNewtonRaphson(), Broyden(), Klement(), - TrustRegion(10.0)] + TrustRegion(10.0)] global g, p g = function (p) probN = NonlinearProblem{false}(f, 0.5, p) @@ -207,3 +207,54 @@ sol = solve(probB, Bisection(; exact_left = true, exact_right = true); immutable @test f(nextfloat(sol.left), nothing) >= 0.0 @test f(sol.right, nothing) >= 0.0 @test f(prevfloat(sol.right), nothing) <= 0.0 + +# Test that `TrustRegion` passes a test that `SimpleNewtonRaphson` fails on. +u0 = [-10.0, -1.0, 1.0, 2.0, 3.0, 4.0, 10.0] +global g, f +f = (u, p) -> 0.010000000000000002 .+ + 10.000000000000002 ./ (1 .+ + (0.21640425613334457 .+ + 216.40425613334457 ./ (1 .+ + (0.21640425613334457 .+ + 216.40425613334457 ./ + (1 .+ 0.0006250000000000001(u .^ 2.0))) .^ 2.0)) .^ 2.0) .- + 0.0011552453009332421u +.-p +g = function (p) + probN = NonlinearProblem{false}(f, u0, p) + sol = solve(probN, TrustRegion(100.0)) + return sol.u +end +p = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +u = g(p) +f(u, p) +@test all(f(u, p) .< 1e-10) + +# Test kwars in `TrustRegion` +max_trust_radius = [10.0, 100.0, 1000.0] +initial_trust_radius = [10.0, 1.0, 0.1] +step_threshold = [0.0, 0.01, 0.25] +shrink_threshold = [0.25, 0.3, 0.5] +expand_threshold = [0.5, 0.8, 0.9] +shrink_factor = [0.1, 0.3, 0.5] +expand_factor = [1.5, 2.0, 3.0] +max_shrink_times = [10, 20, 30] + +list_of_options = zip(max_trust_radius, initial_trust_radius, step_threshold, + shrink_threshold, expand_threshold, shrink_factor, + expand_factor, max_shrink_times) +for options in list_of_options + local probN, sol, alg + alg = TrustRegion(options[1]; + initial_trust_radius = options[2], + step_threshold = options[3], + shrink_threshold = options[4], + expand_threshold = options[5], + shrink_factor = options[6], + expand_factor = options[7], + max_shrink_times = options[8]) + + probN = NonlinearProblem(f, u0, p) + sol = solve(probN, alg) + @test all(f(u, p) .< 1e-10) +end