diff --git a/lib/SimpleNonlinearSolve/src/SimpleNonlinearSolve.jl b/lib/SimpleNonlinearSolve/src/SimpleNonlinearSolve.jl index c077d833d..622595e1e 100644 --- a/lib/SimpleNonlinearSolve/src/SimpleNonlinearSolve.jl +++ b/lib/SimpleNonlinearSolve/src/SimpleNonlinearSolve.jl @@ -19,23 +19,19 @@ include("utils.jl") include("bisection.jl") include("falsi.jl") include("raphson.jl") -include("ad.jl") include("broyden.jl") include("klement.jl") include("trustRegion.jl") +include("ad.jl") import SnoopPrecompile SnoopPrecompile.@precompile_all_calls begin for T in (Float32, Float64) prob_no_brack = NonlinearProblem{false}((u, p) -> u .* u .- p, T(0.1), T(2)) - for alg in (SimpleNewtonRaphson, Broyden, Klement) + for alg in (SimpleNewtonRaphson, Broyden, Klement, SimpleTrustRegion) solve(prob_no_brack, alg(), abstol = T(1e-2)) end - for alg in (SimpleTrustRegion(10.0),) - solve(prob_no_brack, alg, abstol = T(1e-2)) - end - #= for alg in (SimpleNewtonRaphson,) for u0 in ([1., 1.], StaticArraysCore.SA[1.0, 1.0]) diff --git a/lib/SimpleNonlinearSolve/src/ad.jl b/lib/SimpleNonlinearSolve/src/ad.jl index 3b8d3fee0..58a6181ea 100644 --- a/lib/SimpleNonlinearSolve/src/ad.jl +++ b/lib/SimpleNonlinearSolve/src/ad.jl @@ -30,7 +30,8 @@ end function SciMLBase.solve(prob::NonlinearProblem{<:Union{Number, StaticArraysCore.SVector}, iip, - <:Dual{T, V, P}}, alg::SimpleNewtonRaphson, + <:Dual{T, V, P}}, + alg::Union{SimpleNewtonRaphson, SimpleTrustRegion}, args...; kwargs...) where {iip, T, V, P} sol, partials = scalar_nlsolve_ad(prob, alg, args...; kwargs...) return SciMLBase.build_solution(prob, alg, Dual{T, V, P}(sol.u, partials), sol.resid; @@ -39,7 +40,8 @@ end function SciMLBase.solve(prob::NonlinearProblem{<:Union{Number, StaticArraysCore.SVector}, iip, <:AbstractArray{<:Dual{T, V, P}}}, - alg::SimpleNewtonRaphson, args...; kwargs...) where {iip, T, V, P} + alg::Union{SimpleNewtonRaphson, SimpleTrustRegion}, args...; + kwargs...) where {iip, T, V, P} sol, partials = scalar_nlsolve_ad(prob, alg, args...; kwargs...) return SciMLBase.build_solution(prob, alg, Dual{T, V, P}(sol.u, partials), sol.resid; retcode = sol.retcode) diff --git a/lib/SimpleNonlinearSolve/src/trustRegion.jl b/lib/SimpleNonlinearSolve/src/trustRegion.jl index 08d5d5129..329eb8808 100644 --- a/lib/SimpleNonlinearSolve/src/trustRegion.jl +++ b/lib/SimpleNonlinearSolve/src/trustRegion.jl @@ -1,17 +1,22 @@ """ ```julia -SimpleTrustRegion(max_trust_radius::Number; chunk_size = Val{0}(), - autodiff = Val{true}(), diff_type = Val{:forward}) +SimpleTrustRegion(; chunk_size = Val{0}(), + autodiff = Val{true}(), + diff_type = Val{:forward}, + max_trust_radius::Real = 0.0, + initial_trust_radius::Real = 0.0, + step_threshold::Real = 0.1, + shrink_threshold::Real = 0.25, + expand_threshold::Real = 0.75, + shrink_factor::Real = 0.25, + expand_factor::Real = 2.0, + max_shrink_times::Int = 32 ``` A low-overhead implementation of a [trust-region](https://optimization.mccormick.northwestern.edu/index.php/Trust-region_methods) solver -### Arguments -- `max_trust_radius`: the maximum radius of the trust region. The step size in the algorithm - will change dynamically. However, it will never be greater than the `max_trust_radius`. - ### Keyword Arguments - `chunk_size`: the chunk size used by the internal ForwardDiff.jl automatic differentiation @@ -26,6 +31,8 @@ solver - `diff_type`: the type of finite differencing used if `autodiff = false`. Defaults to `Val{:forward}` for forward finite differences. For more details on the choices, see the [FiniteDiff.jl](https://github.com/JuliaDiff/FiniteDiff.jl) documentation. +- `max_trust_radius`: the maximum radius of the trust region. Defaults to + `max(norm(f(u0)), maximum(u0) - minimum(u0))`. - `initial_trust_radius`: the initial trust region radius. Defaults to `max_trust_radius / 11`. - `step_threshold`: the threshold for taking a step. In every iteration, the threshold is @@ -58,25 +65,28 @@ struct SimpleTrustRegion{T, CS, AD, FDT} <: AbstractNewtonAlgorithm{CS, AD, FDT} shrink_factor::T expand_factor::T max_shrink_times::Int - function SimpleTrustRegion(max_trust_radius::Number; chunk_size = Val{0}(), + function SimpleTrustRegion(; chunk_size = Val{0}(), autodiff = Val{true}(), diff_type = Val{:forward}, - initial_trust_radius::Number = max_trust_radius / 11, - step_threshold::Number = 0.1, - shrink_threshold::Number = 0.25, - expand_threshold::Number = 0.75, - shrink_factor::Number = 0.25, - expand_factor::Number = 2.0, + max_trust_radius::Real = 0.0, + initial_trust_radius::Real = 0.0, + step_threshold::Real = 0.1, + shrink_threshold::Real = 0.25, + expand_threshold::Real = 0.75, + shrink_factor::Real = 0.25, + expand_factor::Real = 2.0, max_shrink_times::Int = 32) - new{typeof(initial_trust_radius), SciMLBase._unwrap_val(chunk_size), - SciMLBase._unwrap_val(autodiff), SciMLBase._unwrap_val(diff_type)}(max_trust_radius, - initial_trust_radius, - step_threshold, - shrink_threshold, - expand_threshold, - shrink_factor, - expand_factor, - max_shrink_times) + new{typeof(initial_trust_radius), + SciMLBase._unwrap_val(chunk_size), + SciMLBase._unwrap_val(autodiff), + SciMLBase._unwrap_val(diff_type)}(max_trust_radius, + initial_trust_radius, + step_threshold, + shrink_threshold, + expand_threshold, + shrink_factor, + expand_factor, + max_shrink_times) end end @@ -114,6 +124,14 @@ function SciMLBase.__solve(prob::NonlinearProblem, ∇f = FiniteDiff.finite_difference_derivative(f, x, diff_type(alg), eltype(x), F) end + # Set default trust region radius if not specified by user. + if Δₘₐₓ == 0.0 + Δₘₐₓ = max(norm(F), maximum(x) - minimum(x)) + end + if Δ == 0.0 + Δ = Δₘₐₓ / 11 + end + fₖ = 0.5 * norm(F)^2 H = ∇f * ∇f g = ∇f * F diff --git a/lib/SimpleNonlinearSolve/test/basictests.jl b/lib/SimpleNonlinearSolve/test/basictests.jl index 39a4f3888..98687bc07 100644 --- a/lib/SimpleNonlinearSolve/test/basictests.jl +++ b/lib/SimpleNonlinearSolve/test/basictests.jl @@ -55,7 +55,7 @@ end # SimpleTrustRegion function benchmark_scalar(f, u0) probN = NonlinearProblem{false}(f, u0) - sol = (solve(probN, SimpleTrustRegion(10.0))) + sol = (solve(probN, SimpleTrustRegion())) end sol = benchmark_scalar(sf, csu0) @@ -68,8 +68,7 @@ using ForwardDiff # Immutable f, u0 = (u, p) -> u .* u .- p, @SVector[1.0, 1.0] -for alg in [SimpleNewtonRaphson(), Broyden(), Klement(), - SimpleTrustRegion(10.0)] +for alg in (SimpleNewtonRaphson(), Broyden(), Klement(), SimpleTrustRegion()) g = function (p) probN = NonlinearProblem{false}(f, csu0, p) sol = solve(probN, alg, abstol = 1e-9) @@ -84,8 +83,7 @@ end # Scalar f, u0 = (u, p) -> u * u - p, 1.0 -for alg in [SimpleNewtonRaphson(), Broyden(), Klement(), - SimpleTrustRegion(10.0)] +for alg in (SimpleNewtonRaphson(), Broyden(), Klement(), SimpleTrustRegion()) g = function (p) probN = NonlinearProblem{false}(f, oftype(p, u0), p) sol = solve(probN, alg) @@ -126,8 +124,7 @@ for alg in [Bisection(), Falsi()] @test ForwardDiff.jacobian(g, p) ≈ ForwardDiff.jacobian(t, p) end -for alg in [SimpleNewtonRaphson(), Broyden(), Klement(), - SimpleTrustRegion(10.0)] +for alg in (SimpleNewtonRaphson(), Broyden(), Klement(), SimpleTrustRegion()) global g, p g = function (p) probN = NonlinearProblem{false}(f, 0.5, p) @@ -144,8 +141,8 @@ probN = NonlinearProblem(f, u0) @test solve(probN, SimpleNewtonRaphson()).u[end] ≈ sqrt(2.0) @test solve(probN, SimpleNewtonRaphson(; autodiff = false)).u[end] ≈ sqrt(2.0) -@test solve(probN, SimpleTrustRegion(10.0)).u[end] ≈ sqrt(2.0) -@test solve(probN, SimpleTrustRegion(10.0; autodiff = false)).u[end] ≈ sqrt(2.0) +@test solve(probN, SimpleTrustRegion()).u[end] ≈ sqrt(2.0) +@test solve(probN, SimpleTrustRegion(; autodiff = false)).u[end] ≈ sqrt(2.0) @test solve(probN, Broyden()).u[end] ≈ sqrt(2.0) @test solve(probN, Klement()).u[end] ≈ sqrt(2.0) @@ -159,9 +156,9 @@ for u0 in [1.0, [1, 1.0]] @test solve(probN, SimpleNewtonRaphson()).u ≈ sol @test solve(probN, SimpleNewtonRaphson(; autodiff = false)).u ≈ sol - @test solve(probN, SimpleTrustRegion(10.0)).u ≈ sol - @test solve(probN, SimpleTrustRegion(10.0)).u ≈ sol - @test solve(probN, SimpleTrustRegion(10.0; autodiff = false)).u ≈ sol + @test solve(probN, SimpleTrustRegion()).u ≈ sol + @test solve(probN, SimpleTrustRegion()).u ≈ sol + @test solve(probN, SimpleTrustRegion(; autodiff = false)).u ≈ sol @test solve(probN, Broyden()).u ≈ sol @@ -215,17 +212,16 @@ f = (u, p) -> 0.010000000000000002 .+ (0.21640425613334457 .+ 216.40425613334457 ./ (1 .+ 0.0006250000000000001(u .^ 2.0))) .^ 2.0)) .^ 2.0) .- - 0.0011552453009332421u -.-p + 0.0011552453009332421u .- p g = function (p) probN = NonlinearProblem{false}(f, u0, p) - sol = solve(probN, SimpleTrustRegion(100.0)) + sol = solve(probN, SimpleTrustRegion()) return sol.u end p = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] u = g(p) f(u, p) -@test all(f(u, p) .< 1e-10) +@test all(abs.(f(u, p)) .< 1e-10) # Test kwars in `SimpleTrustRegion` max_trust_radius = [10.0, 100.0, 1000.0] @@ -242,7 +238,7 @@ list_of_options = zip(max_trust_radius, initial_trust_radius, step_threshold, expand_factor, max_shrink_times) for options in list_of_options local probN, sol, alg - alg = SimpleTrustRegion(options[1]; + alg = SimpleTrustRegion(max_trust_radius = options[1], initial_trust_radius = options[2], step_threshold = options[3], shrink_threshold = options[4], @@ -253,5 +249,5 @@ for options in list_of_options probN = NonlinearProblem(f, u0, p) sol = solve(probN, alg) - @test all(f(u, p) .< 1e-10) + @test all(abs.(f(u, p)) .< 1e-10) end