From 2387fb6e3d7a3228d00d65c836062c80e5deb857 Mon Sep 17 00:00:00 2001 From: Simon Carlson Date: Fri, 20 Jan 2023 17:34:35 +0100 Subject: [PATCH 01/10] Started to implement Levenberg-Marquardt. --- src/NonlinearSolve.jl | 3 +- src/ad.jl | 5 +- src/levenberg.jl | 364 ++++++++++++++++++++++++++++++++++++++++++ test/basictests.jl | 170 ++++++++++++++++++++ 4 files changed, 539 insertions(+), 3 deletions(-) create mode 100644 src/levenberg.jl diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl index 305d9e5aa..c121573d3 100644 --- a/src/NonlinearSolve.jl +++ b/src/NonlinearSolve.jl @@ -32,6 +32,7 @@ include("utils.jl") include("jacobian.jl") include("raphson.jl") include("trustRegion.jl") +include("levenberg.jl") include("ad.jl") import SnoopPrecompile @@ -48,6 +49,6 @@ SnoopPrecompile.@precompile_all_calls begin for T in (Float32, Float64) end end end -export NewtonRaphson, TrustRegion +export NewtonRaphson, TrustRegion, LevenbergMarquardt end # module diff --git a/src/ad.jl b/src/ad.jl index b88f23751..e29c59f15 100644 --- a/src/ad.jl +++ b/src/ad.jl @@ -26,7 +26,7 @@ end function SciMLBase.solve(prob::NonlinearProblem{<:Union{Number, StaticArraysCore.SVector}, iip, <:Dual{T, V, P}}, - alg::Union{NewtonRaphson, TrustRegion}, + alg::Union{NewtonRaphson, TrustRegion, LevenbergMarquardt}, args...; kwargs...) where {iip, T, V, P} sol, partials = scalar_nlsolve_ad(prob, alg, args...; kwargs...) return SciMLBase.build_solution(prob, alg, Dual{T, V, P}(sol.u, partials), sol.resid; @@ -35,7 +35,8 @@ end function SciMLBase.solve(prob::NonlinearProblem{<:Union{Number, StaticArraysCore.SVector}, iip, <:AbstractArray{<:Dual{T, V, P}}}, - alg::Union{NewtonRaphson, TrustRegion}, args...; + alg::Union{NewtonRaphson, TrustRegion, LevenbergMarquardt}, + args...; kwargs...) where {iip, T, V, P} sol, partials = scalar_nlsolve_ad(prob, alg, args...; kwargs...) return SciMLBase.build_solution(prob, alg, Dual{T, V, P}(sol.u, partials), sol.resid; diff --git a/src/levenberg.jl b/src/levenberg.jl new file mode 100644 index 000000000..3a23e843d --- /dev/null +++ b/src/levenberg.jl @@ -0,0 +1,364 @@ +""" +```julia +LevenbergMarquardt(; chunk_size = Val{0}(), autodiff = Val{true}(), + standardtag = Val{true}(), concrete_jac = nothing, + diff_type = Val{:forward}, linsolve = nothing, precs = DEFAULT_PRECS) +``` + +An advanced LevenbergMarquardt implementation with support for efficient handling of sparse +matrices via colored automatic differentiation and preconditioned linear solvers. Designed +for large-scale and numerically-difficult nonlinear systems. + +### Keyword Arguments + +- `chunk_size`: the chunk size used by the internal ForwardDiff.jl automatic differentiation + system. This allows for multiple derivative columns to be computed simultaneously, + improving performance. Defaults to `0`, which is equivalent to using ForwardDiff.jl's + default chunk size mechanism. For more details, see the documentation for + [ForwardDiff.jl](https://juliadiff.org/ForwardDiff.jl/stable/). +- `autodiff`: whether to use forward-mode automatic differentiation for the Jacobian. + Note that this argument is ignored if an analytical Jacobian is passed, as that will be + used instead. Defaults to `Val{true}`, which means ForwardDiff.jl via + SparseDiffTools.jl is used by default. If `Val{false}`, then FiniteDiff.jl is used for + finite differencing. +- `standardtag`: whether to use a standardized tag definition for the purposes of automatic + differentiation. Defaults to true, which thus uses the `NonlinearSolveTag`. If `Val{false}`, + then ForwardDiff's default function naming tag is used, which results in larger stack + traces. +- `concrete_jac`: whether to build a concrete Jacobian. If a Krylov-subspace method is used, + then the Jacobian will not be constructed and instead direct Jacobian-vector products + `J*v` are computed using forward-mode automatic differentiation or finite differencing + tricks (without ever constructing the Jacobian). However, if the Jacobian is still needed, + for example for a preconditioner, `concrete_jac = true` can be passed in order to force + the construction of the Jacobian. +- `diff_type`: the type of finite differencing used if `autodiff = false`. Defaults to + `Val{:forward}` for forward finite differences. For more details on the choices, see the + [FiniteDiff.jl](https://github.com/JuliaDiff/FiniteDiff.jl) documentation. +- `linsolve`: the [LinearSolve.jl](https://github.com/SciML/LinearSolve.jl) used for the + linear solves within the Newton method. Defaults to `nothing`, which means it uses the + LinearSolve.jl default algorithm choice. For more information on available algorithm + choices, see the [LinearSolve.jl documentation](https://docs.sciml.ai/LinearSolve/stable/). +- `precs`: the choice of preconditioners for the linear solver. Defaults to using no + preconditioners. For more information on specifying preconditioners for LinearSolve + algorithms, consult the + [LinearSolve.jl documentation](https://docs.sciml.ai/LinearSolve/stable/). + +!!! note + + Currently, the linear solver and chunk size choice only applies to in-place defined + `NonlinearProblem`s. That is expected to change in the future. +""" +struct LevenbergMarquardt{CS, AD, FDT, L, P, ST, CJ, T} <: + AbstractNewtonAlgorithm{CS, AD, FDT, ST, CJ} + linsolve::L + precs::P + damping_initial::T + damping_increase_factor::T + damping_decrease_factor::T + finite_diff_step_geodesic::T + α_geodesic::T + b_uphill::T # TODO test what happens if the input to this in an Interger, but Float to the others + min_damping_D::T +end + +function LevenbergMarquardt(; chunk_size = Val{0}(), autodiff = Val{true}(), + standardtag = Val{true}(), concrete_jac = nothing, + diff_type = Val{:forward}, linsolve = nothing, + precs = DEFAULT_PRECS, + damping_initial::Real = 1.0, + damping_increase_factor::Real = 2.0, + damping_decrease_factor::Real = 3.0, + finite_diff_step_geodesic::Real = 0.1, + α_geodesic::Real = 0.75, + b_uphill::Real = 1.0, + min_damping_D::Real = 1e-6) + LevenbergMarquardt{_unwrap_val(chunk_size), _unwrap_val(autodiff), diff_type, + typeof(linsolve), typeof(precs), _unwrap_val(standardtag), + _unwrap_val(concrete_jac), typeof(damping_initial)}(linsolve, precs, + damping_initial, + damping_increase_factor, + damping_decrease_factor, + finite_diff_step_geodesic, + α_geodesic, + b_uphill, + min_damping_D) +end + +mutable struct LevenbergMarquardtCache{iip, fType, algType, uType, duType, resType, pType, + INType, tolType, + probType, ufType, L, jType, JC, DᵀDType, + floatType + } + f::fType + alg::algType + u::uType + fu::resType + p::pType + uf::ufType + linsolve::L + J::jType + du1::duType + jac_config::JC + iter::Int + force_stop::Bool + maxiters::Int + internalnorm::INType + retcode::SciMLBase.ReturnCode.T + abstol::tolType + prob::probType + DᵀD::DᵀDType + JᵀJ::jType + λ::floatType + λ_factor::floatType # TODO test if this handles interger inputs. (test to input damp_init::float and damp_inc::int) + v::uType + a::uType + tmp_vec::uType + v_old::uType + norm_v_old::floatType + δ::uType + loss_old::floatType + + function LevenbergMarquardtCache{iip}(f::fType, alg::algType, u::uType, fu::resType, + p::pType, + uf::ufType, linsolve::L, J::jType, du1::duType, + jac_config::JC, iter::Int, + force_stop::Bool, maxiters::Int, + internalnorm::INType, + retcode::SciMLBase.ReturnCode.T, abstol::tolType, + prob::probType, DᵀD::DᵀDType, JᵀJ::jType, + λ::floatType, λ_factor::floatType, v::uType, + a::uType, tmp_vec::uType, + v_old::uType, norm_v_old::floatType, δ::uType, + loss_old::floatType) where { + iip, fType, algType, + uType, + duType, resType, + pType, + INType, + tolType, + probType, ufType, L, + jType, JC, + DᵀDType, + floatType + } + new{iip, fType, algType, uType, duType, resType, + pType, INType, tolType, probType, ufType, L, + jType, JC, DᵀDType, floatType}(f, + alg, + u, + fu, + p, + uf, + linsolve, + J, + du1, + jac_config, + iter, + force_stop, + maxiters, + internalnorm, + retcode, + abstol, + prob, + DᵀD, + JᵀJ, + λ, + λ_factor, + v, + a, + tmp_vec, + v_old, + norm_v_old, + δ, + loss_old) + end +end + +function jacobian_caches(alg::LevenbergMarquardt, f, u, p, ::Val{true}) + uf = JacobianWrapper(f, p) + J = ArrayInterfaceCore.undefmatrix(u) + + linprob = LinearProblem(J, _vec(zero(u)); u0 = _vec(zero(u))) + weight = similar(u) + recursivefill!(weight, false) + + Pl, Pr = wrapprecs(alg.precs(J, nothing, u, p, nothing, nothing, nothing, nothing, + nothing)..., weight) + linsolve = init(linprob, alg.linsolve, alias_A = true, alias_b = true, + Pl = Pl, Pr = Pr) + + du1 = zero(u) + du2 = zero(u) + tmp = zero(u) + jac_config = build_jac_config(alg, f, uf, du1, u, tmp, du2) + + uf, linsolve, J, du1, jac_config +end + +function jacobian_caches(alg::LevenbergMarquardt, f, u, p, ::Val{false}) + JacobianWrapper(f, p), nothing, ArrayInterfaceCore.undefmatrix(u), nothing, nothing +end + +function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::LevenbergMarquardt, + args...; + alias_u0 = false, + maxiters = 1000, + abstol = 1e-6, + internalnorm = DEFAULT_NORM, + kwargs...) where {uType, iip} + if alias_u0 + u = prob.u0 + else + u = deepcopy(prob.u0) + end + f = prob.f + p = prob.p + if iip + fu = zero(u) + f(fu, u, p) + else + fu = f(u, p) + end + uf, linsolve, J, du1, jac_config = jacobian_caches(alg, f, u, p, Val(iip)) + + # "A good compromise is to specify a minimum value of the damping terms in DᵀD". + if u isa Number + DᵀD = alg.min_damping_D # TODO check if type is correct. + else + d = d = similar(u) + d .= alg.min_damping_D + DᵀD = Diagonal(d) + end + loss = internalnorm(fu) + JᵀJ = zero(J) + v = zero(u) + a = zero(u) + tmp_vec = zero(u) + v_old = zero(u) + δ = zero(u) + + return LevenbergMarquardtCache{iip}(f, alg, u, fu, p, uf, linsolve, J, du1, jac_config, + 1, false, maxiters, internalnorm, + ReturnCode.Default, abstol, prob, DᵀD, JᵀJ, + alg.damping_initial, alg.damping_increase_factor, + v, a, tmp_vec, v_old, loss, δ, loss) +end + +function perform_step!(cache::LevenbergMarquardtCache{true}) + # TODO implement for in-place... + # @unpack fu, f = cache + # if iszero(fu) + # cache.force_stop = true + # return nothing + # end + + # cache.J = jacobian(cache, f) + # @unpack u, f, p, λ, J = cache + # mul!(cache.JᵀJ, J', J) + # cache.DᵀD .= max.(cache.DᵀD, Diagonal(cache.JᵀJ)) + + # # Usual Levenberg-Marquardt step ("velocity"). + # cache.v = -(cache.JᵀJ + λ * cache.DᵀD) \ (J' * fu) + + # @unpack v, alg = cache + # h = alg.finite_diff_step_geodesic + # # Geodesic acceleration (step_size = v + a / 2). + # cache.a = -J \ ((2 / h) .* ((f(u .+ h * v, p) .- fu) ./ h .- mul!(cache.Jv, J, v))) + + # # Require acceptable steps to satisfy the following condition. + # norm_v = norm(v) + # if (2 * norm(cache.a) / norm_v) < alg.α_geodesic + # cache.δ = v + cache.a / 2 + # @unpack δ, loss_old, norm_v_old, v_old = cache + # fu_new = f(u .+ δ, p) + # loss = cache.internalnorm(fu_new) + + # # Condition to accept uphill steps (evaluates to `loss ≤ loss_old` in iteration 1). + # β = dot(v, v_old) / (norm_v * norm_v_old) + # if (1 - β)^alg.b_uphill * loss ≤ loss_old + # # Accept step. + # cache.u += δ + # if loss < cache.abstol + # cache.force_stop = true + # return nothing + # end + # cache.fu = fu_new + # cache.v_old = v + # cache.norm_v_old = norm_v + # cache.loss_old = loss + # cache.λ_factor = 1 / alg.damping_decrease_factor + # end + # end + # cache.λ *= cache.λ_factor + # cache.λ_factor = alg.damping_increase_factor + return nothing +end + +function perform_step!(cache::LevenbergMarquardtCache{false}) + @unpack fu, f = cache + if iszero(fu) + cache.force_stop = true + return nothing + end + + J = jacobian(cache, f) + @unpack u, f, p, λ = cache + cache.JᵀJ = J' * J + if cache.DᵀD isa Number + cache.DᵀD = max(cache.DᵀD, cache.JᵀJ) + else + cache.DᵀD .= max.(cache.DᵀD, Diagonal(cache.JᵀJ)) + end + + # Usual Levenberg-Marquardt step ("velocity"). + cache.v = -(cache.JᵀJ + λ * cache.DᵀD) \ (J' * fu) + + @unpack v, alg = cache + h = alg.finite_diff_step_geodesic + # Geodesic acceleration (step_size = v + a / 2). + cache.a = -J \ ((2 / h) .* ((f(u .+ h * v, p) .- fu) ./ h .- J * v)) + + # Require acceptable steps to satisfy the following condition. + norm_v = norm(v) + if (2 * norm(cache.a) / norm_v) < alg.α_geodesic + cache.δ = v + cache.a / 2 + @unpack δ, loss_old, norm_v_old, v_old = cache + fu_new = f(u .+ δ, p) + loss = cache.internalnorm(fu_new) + + # Condition to accept uphill steps (evaluates to `loss ≤ loss_old` in iteration 1). + β = dot(v, v_old) / (norm_v * norm_v_old) + if (1 - β)^alg.b_uphill * loss ≤ loss_old + # Accept step. + cache.u += δ + if loss < cache.abstol + cache.force_stop = true + return nothing + end + cache.fu = fu_new + cache.v_old = v + cache.norm_v_old = norm_v + cache.loss_old = loss + cache.λ_factor = 1 / alg.damping_decrease_factor + end + end + cache.λ *= cache.λ_factor + cache.λ_factor = alg.damping_increase_factor + return nothing +end + +function SciMLBase.solve!(cache::LevenbergMarquardtCache) + while !cache.force_stop && cache.iter < cache.maxiters + perform_step!(cache) + cache.iter += 1 + end + + if cache.iter == cache.maxiters + cache.retcode = ReturnCode.MaxIters + else + cache.retcode = ReturnCode.Success + end + + SciMLBase.build_solution(cache.prob, cache.alg, cache.u, cache.fu; + retcode = cache.retcode) +end diff --git a/test/basictests.jl b/test/basictests.jl index ed2907fdb..6cd59aaa9 100644 --- a/test/basictests.jl +++ b/test/basictests.jl @@ -299,3 +299,173 @@ for options in list_of_options sol = solve(probN, alg, abstol = 1e-10) @test all(abs.(f(u, p)) .< 1e-10) end + +# --- LevenbergMarquardt tests --- + +function benchmark_immutable(f, u0) + probN = NonlinearProblem{false}(f, u0) + solver = init(probN, LevenbergMarquardt(), abstol = 1e-9) + sol = solve!(solver) +end + +function benchmark_mutable(f, u0) + probN = NonlinearProblem{false}(f, u0) + solver = init(probN, LevenbergMarquardt(), abstol = 1e-9) + sol = solve!(solver) +end + +function benchmark_scalar(f, u0) + probN = NonlinearProblem{false}(f, u0) + sol = (solve(probN, LevenbergMarquardt())) +end + +function ff(u, p) + u .* u .- 2 +end + +function sf(u, p) + u * u - 2 +end +u0 = [1.0, 1.0] + +sol = benchmark_immutable(ff, cu0) +@test sol.retcode === ReturnCode.Success +@test all(abs.(sol.u .* sol.u .- 2) .< 1e-9) +sol = benchmark_mutable(ff, u0) +@test sol.retcode === ReturnCode.Success +@test all(abs.(sol.u .* sol.u .- 2) .< 1e-9) +sol = benchmark_scalar(sf, csu0) +@test sol.retcode === ReturnCode.Success +@test abs(sol.u * sol.u - 2) < 1e-9 + +# function benchmark_inplace(f, u0) +# probN = NonlinearProblem{true}(f, u0) +# solver = init(probN, LevenbergMarquardt(), abstol = 1e-9) +# sol = solve!(solver) +# end + +# function ffiip(du, u, p) +# du .= u .* u .- 2 +# end +# u0 = [1.0, 1.0] + +# sol = benchmark_inplace(ffiip, u0) +# @test sol.retcode === ReturnCode.Success +# @test all(abs.(sol.u .* sol.u .- 2) .< 1e-9) + +# u0 = [1.0, 1.0] +# probN = NonlinearProblem{true}(ffiip, u0) +# solver = init(probN, LevenbergMarquardt(), abstol = 1e-9) +# @test (@ballocated solve!(solver)) < 120 + +# AD Tests +using ForwardDiff + +# Immutable +f, u0 = (u, p) -> u .* u .- p, @SVector[1.0, 1.0] + +g = function (p) + probN = NonlinearProblem{false}(f, csu0, p) + sol = solve(probN, LevenbergMarquardt(), abstol = 1e-9) + return sol.u[end] +end + +for p in 1.1:0.1:100.0 + @test g(p) ≈ sqrt(p) + @test ForwardDiff.derivative(g, p) ≈ 1 / (2 * sqrt(p)) +end + +# Scalar +f, u0 = (u, p) -> u * u - p, 1.0 + +g = function (p) + probN = NonlinearProblem{false}(f, oftype(p, u0), p) + sol = solve(probN, LevenbergMarquardt(), abstol = 1e-10) + return sol.u +end + +@test ForwardDiff.derivative(g, 3.0) ≈ 1 / (2 * sqrt(3.0)) + +for p in 1.1:0.1:100.0 + @test g(p) ≈ sqrt(p) + @test ForwardDiff.derivative(g, p) ≈ 1 / (2 * sqrt(p)) +end + +f = (u, p) -> p[1] * u * u - p[2] +t = (p) -> [sqrt(p[2] / p[1])] +p = [0.9, 50.0] +gnewton = function (p) + probN = NonlinearProblem{false}(f, 0.5, p) + sol = solve(probN, LevenbergMarquardt()) + return [sol.u] +end +@test gnewton(p) ≈ [sqrt(p[2] / p[1])] +@test ForwardDiff.jacobian(gnewton, p) ≈ ForwardDiff.jacobian(t, p) + +# Error Checks +f, u0 = (u, p) -> u .* u .- 2.0, @SVector[1.0, 1.0] +probN = NonlinearProblem(f, u0) + +@test solve(probN, LevenbergMarquardt()).u[end] ≈ sqrt(2.0) +@test solve(probN, LevenbergMarquardt(; autodiff = false)).u[end] ≈ sqrt(2.0) + +for u0 in [1.0, [1, 1.0]] + local f, probN, sol + f = (u, p) -> u .* u .- 2.0 + probN = NonlinearProblem(f, u0) + sol = sqrt(2) * u0 + + @test solve(probN, LevenbergMarquardt()).u ≈ sol + @test solve(probN, LevenbergMarquardt()).u ≈ sol + @test solve(probN, LevenbergMarquardt(; autodiff = false)).u ≈ sol +end + +# Test that `LevenbergMarquardt` passes a test that `NewtonRaphson` fails on. +u0 = [-10.0, -1.0, 1.0, 2.0, 3.0, 4.0, 10.0] +global g, f +f = (u, p) -> 0.010000000000000002 .+ + 10.000000000000002 ./ (1 .+ + (0.21640425613334457 .+ + 216.40425613334457 ./ (1 .+ + (0.21640425613334457 .+ + 216.40425613334457 ./ + (1 .+ 0.0006250000000000001(u .^ 2.0))) .^ 2.0)) .^ 2.0) .- + 0.0011552453009332421u .- p +g = function (p) + probN = NonlinearProblem{false}(f, u0, p) + sol = solve(probN, LevenbergMarquardt(), abstol = 1e-10) + return sol.u +end +p = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +u = g(p) +f(u, p) +@test all(abs.(f(u, p)) .< 1e-10) + +# # Test kwars in `LevenbergMarquardt` +# max_trust_radius = [10.0, 100.0, 1000.0] +# initial_trust_radius = [10.0, 1.0, 0.1] +# step_threshold = [0.0, 0.01, 0.25] +# shrink_threshold = [0.25, 0.3, 0.5] +# expand_threshold = [0.5, 0.8, 0.9] +# shrink_factor = [0.1, 0.3, 0.5] +# expand_factor = [1.5, 2.0, 3.0] +# max_shrink_times = [10, 20, 30] + +# list_of_options = zip(max_trust_radius, initial_trust_radius, step_threshold, +# shrink_threshold, expand_threshold, shrink_factor, +# expand_factor, max_shrink_times) +# for options in list_of_options +# local probN, sol, alg +# alg = LevenbergMarquardt(max_trust_radius = options[1], +# initial_trust_radius = options[2], +# step_threshold = options[3], +# shrink_threshold = options[4], +# expand_threshold = options[5], +# shrink_factor = options[6], +# expand_factor = options[7], +# max_shrink_times = options[8]) + +# probN = NonlinearProblem{false}(f, u0, p) +# sol = solve(probN, alg, abstol = 1e-10) +# @test all(abs.(f(u, p)) .< 1e-10) +# end From ee5931dc3a6632cec3cefe290e602a8f50f501eb Mon Sep 17 00:00:00 2001 From: Simon Carlson Date: Sun, 22 Jan 2023 10:32:54 +0100 Subject: [PATCH 02/10] Implemented Levenberg for iip=true --- src/levenberg.jl | 287 +++++++++++++++++++++++++-------------------- test/basictests.jl | 91 +++++++------- 2 files changed, 203 insertions(+), 175 deletions(-) diff --git a/src/levenberg.jl b/src/levenberg.jl index 3a23e843d..642efca55 100644 --- a/src/levenberg.jl +++ b/src/levenberg.jl @@ -1,13 +1,27 @@ """ ```julia -LevenbergMarquardt(; chunk_size = Val{0}(), autodiff = Val{true}(), - standardtag = Val{true}(), concrete_jac = nothing, - diff_type = Val{:forward}, linsolve = nothing, precs = DEFAULT_PRECS) +LevenbergMarquardt(; chunk_size = Val{0}(), + autodiff = Val{true}(), + standardtag = Val{true}(), + concrete_jac = nothing, + diff_type = Val{:forward}, + linsolve = nothing, precs = DEFAULT_PRECS, + damping_initial::Real = 1.0, + damping_increase_factor::Real = 2.0, + damping_decrease_factor::Real = 3.0, + finite_diff_step_geodesic::Real = 0.1, + α_geodesic::Real = 0.75, + b_uphill::Real = 1.0, + min_damping_D::AbstractFloat = 1e-8) ``` -An advanced LevenbergMarquardt implementation with support for efficient handling of sparse -matrices via colored automatic differentiation and preconditioned linear solvers. Designed -for large-scale and numerically-difficult nonlinear systems. +An advanced Levenberg-Marquardt implementation with the improvements suggested in the +[paper](https://arxiv.org/abs/1201.5885) "Improvements to the Levenberg-Marquardt +algorithm for nonlinear least-squares minimization". This implementation is designed with +support for efficient handling of sparse matrices via colored automatic differentiation and +preconditioned linear solvers. Designed for large-scale and numerically-difficult nonlinear +systems. + ### Keyword Arguments @@ -42,6 +56,32 @@ for large-scale and numerically-difficult nonlinear systems. preconditioners. For more information on specifying preconditioners for LinearSolve algorithms, consult the [LinearSolve.jl documentation](https://docs.sciml.ai/LinearSolve/stable/). +- `damping_initial`: the initial damping factor. The damping is proportional to the inverse + of the step size and is changed dynamically in each iteration. Defaults to `1.0`. For more + details, see section 2.1 of [this paper](https://arxiv.org/abs/1201.5885). +- `damping_increase_factor`: the factor by which the damping is increased if a step isn't + accepted i.e. how much smaller the next step size should be if a step is rejected. + Defaults to `2.0`. For more details, see section 2.1 of + [this paper](https://arxiv.org/abs/1201.5885). +- `damping_decrease_factor`: the factor by which the damping is decreased if a step is + accepted i.e. how much larger the next step size should be if a step is accepted. + Defaults to `3.0`. For more details, see section 2.1 of + [this paper](https://arxiv.org/abs/1201.5885). +- `finite_diff_step_geodesic`: the finite differencing step size used for the geodesic + acceleration method. Defaults to `0.1` which means thats the step is about 10% of the + first order step. For more details, see section 3 of + [this paper](https://arxiv.org/abs/1201.5885). +- `α_geodesic`: a factor that determines if a step is accepted or rejected. + In order to utilize the geodesic acceleration as an addition to the Levenberg-Marquardt + algorithm, it is necessary to make one small addition. To require acceptable steps to + satisfy the condition ... continue to document this... + +# TODO documentation and cleanup of code & che + + α_geodesic::Real = 0.75, + b_uphill::Real = 1.0, + min_damping_D::AbstractFloat = 1e-8 + !!! note @@ -57,13 +97,16 @@ struct LevenbergMarquardt{CS, AD, FDT, L, P, ST, CJ, T} <: damping_decrease_factor::T finite_diff_step_geodesic::T α_geodesic::T - b_uphill::T # TODO test what happens if the input to this in an Interger, but Float to the others + b_uphill::T min_damping_D::T end -function LevenbergMarquardt(; chunk_size = Val{0}(), autodiff = Val{true}(), - standardtag = Val{true}(), concrete_jac = nothing, - diff_type = Val{:forward}, linsolve = nothing, +function LevenbergMarquardt(; chunk_size = Val{0}(), + autodiff = Val{true}(), + standardtag = Val{true}(), + concrete_jac = nothing, + diff_type = Val{:forward}, + linsolve = nothing, precs = DEFAULT_PRECS, damping_initial::Real = 1.0, damping_increase_factor::Real = 2.0, @@ -71,17 +114,17 @@ function LevenbergMarquardt(; chunk_size = Val{0}(), autodiff = Val{true}(), finite_diff_step_geodesic::Real = 0.1, α_geodesic::Real = 0.75, b_uphill::Real = 1.0, - min_damping_D::Real = 1e-6) + min_damping_D::AbstractFloat = 1e-8) LevenbergMarquardt{_unwrap_val(chunk_size), _unwrap_val(autodiff), diff_type, typeof(linsolve), typeof(precs), _unwrap_val(standardtag), - _unwrap_val(concrete_jac), typeof(damping_initial)}(linsolve, precs, - damping_initial, - damping_increase_factor, - damping_decrease_factor, - finite_diff_step_geodesic, - α_geodesic, - b_uphill, - min_damping_D) + _unwrap_val(concrete_jac), typeof(min_damping_D)}(linsolve, precs, + damping_initial, + damping_increase_factor, + damping_decrease_factor, + finite_diff_step_geodesic, + α_geodesic, + b_uphill, + min_damping_D) end mutable struct LevenbergMarquardtCache{iip, fType, algType, uType, duType, resType, pType, @@ -97,7 +140,7 @@ mutable struct LevenbergMarquardtCache{iip, fType, algType, uType, duType, resTy uf::ufType linsolve::L J::jType - du1::duType + du_tmp::duType jac_config::JC iter::Int force_stop::Bool @@ -109,7 +152,7 @@ mutable struct LevenbergMarquardtCache{iip, fType, algType, uType, duType, resTy DᵀD::DᵀDType JᵀJ::jType λ::floatType - λ_factor::floatType # TODO test if this handles interger inputs. (test to input damp_init::float and damp_inc::int) + λ_factor::floatType v::uType a::uType tmp_vec::uType @@ -117,60 +160,36 @@ mutable struct LevenbergMarquardtCache{iip, fType, algType, uType, duType, resTy norm_v_old::floatType δ::uType loss_old::floatType + make_new_J::Bool + fu_tmp::resType function LevenbergMarquardtCache{iip}(f::fType, alg::algType, u::uType, fu::resType, - p::pType, - uf::ufType, linsolve::L, J::jType, du1::duType, - jac_config::JC, iter::Int, + p::pType, uf::ufType, linsolve::L, J::jType, + du_tmp::duType, jac_config::JC, iter::Int, force_stop::Bool, maxiters::Int, internalnorm::INType, retcode::SciMLBase.ReturnCode.T, abstol::tolType, prob::probType, DᵀD::DᵀDType, JᵀJ::jType, λ::floatType, λ_factor::floatType, v::uType, - a::uType, tmp_vec::uType, - v_old::uType, norm_v_old::floatType, δ::uType, - loss_old::floatType) where { - iip, fType, algType, - uType, - duType, resType, - pType, - INType, - tolType, - probType, ufType, L, - jType, JC, - DᵀDType, - floatType - } + a::uType, tmp_vec::uType, v_old::uType, + norm_v_old::floatType, δ::uType, + loss_old::floatType, + make_new_J::Bool, + fu_tmp::resType) where { + iip, fType, algType, + uType, duType, resType, + pType, INType, tolType, + probType, ufType, L, + jType, JC, DᵀDType, + floatType + } new{iip, fType, algType, uType, duType, resType, pType, INType, tolType, probType, ufType, L, - jType, JC, DᵀDType, floatType}(f, - alg, - u, - fu, - p, - uf, - linsolve, - J, - du1, - jac_config, - iter, - force_stop, - maxiters, - internalnorm, - retcode, - abstol, - prob, - DᵀD, - JᵀJ, - λ, - λ_factor, - v, - a, - tmp_vec, - v_old, - norm_v_old, - δ, - loss_old) + jType, JC, DᵀDType, floatType}(f, alg, u, fu, p, uf, linsolve, J, du_tmp, + jac_config, iter, force_stop, maxiters, + internalnorm, retcode, abstol, prob, DᵀD, + JᵀJ, λ, λ_factor, v, a, tmp_vec, v_old, + norm_v_old, δ, loss_old, make_new_J, fu_tmp) end end @@ -219,16 +238,16 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::LevenbergMarq else fu = f(u, p) end - uf, linsolve, J, du1, jac_config = jacobian_caches(alg, f, u, p, Val(iip)) + uf, linsolve, J, du_tmp, jac_config = jacobian_caches(alg, f, u, p, Val(iip)) - # "A good compromise is to specify a minimum value of the damping terms in DᵀD". if u isa Number - DᵀD = alg.min_damping_D # TODO check if type is correct. + DᵀD = alg.min_damping_D else - d = d = similar(u) + d = similar(u) d .= alg.min_damping_D DᵀD = Diagonal(d) end + loss = internalnorm(fu) JᵀJ = zero(J) v = zero(u) @@ -236,82 +255,91 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::LevenbergMarq tmp_vec = zero(u) v_old = zero(u) δ = zero(u) + make_new_J = true + fu_tmp = zero(fu) - return LevenbergMarquardtCache{iip}(f, alg, u, fu, p, uf, linsolve, J, du1, jac_config, + return LevenbergMarquardtCache{iip}(f, alg, u, fu, p, uf, linsolve, J, du_tmp, + jac_config, 1, false, maxiters, internalnorm, ReturnCode.Default, abstol, prob, DᵀD, JᵀJ, alg.damping_initial, alg.damping_increase_factor, - v, a, tmp_vec, v_old, loss, δ, loss) + v, a, tmp_vec, v_old, loss, δ, loss, make_new_J, + fu_tmp) end - function perform_step!(cache::LevenbergMarquardtCache{true}) - # TODO implement for in-place... - # @unpack fu, f = cache - # if iszero(fu) - # cache.force_stop = true - # return nothing - # end - - # cache.J = jacobian(cache, f) - # @unpack u, f, p, λ, J = cache - # mul!(cache.JᵀJ, J', J) - # cache.DᵀD .= max.(cache.DᵀD, Diagonal(cache.JᵀJ)) - - # # Usual Levenberg-Marquardt step ("velocity"). - # cache.v = -(cache.JᵀJ + λ * cache.DᵀD) \ (J' * fu) - - # @unpack v, alg = cache - # h = alg.finite_diff_step_geodesic - # # Geodesic acceleration (step_size = v + a / 2). - # cache.a = -J \ ((2 / h) .* ((f(u .+ h * v, p) .- fu) ./ h .- mul!(cache.Jv, J, v))) - - # # Require acceptable steps to satisfy the following condition. - # norm_v = norm(v) - # if (2 * norm(cache.a) / norm_v) < alg.α_geodesic - # cache.δ = v + cache.a / 2 - # @unpack δ, loss_old, norm_v_old, v_old = cache - # fu_new = f(u .+ δ, p) - # loss = cache.internalnorm(fu_new) - - # # Condition to accept uphill steps (evaluates to `loss ≤ loss_old` in iteration 1). - # β = dot(v, v_old) / (norm_v * norm_v_old) - # if (1 - β)^alg.b_uphill * loss ≤ loss_old - # # Accept step. - # cache.u += δ - # if loss < cache.abstol - # cache.force_stop = true - # return nothing - # end - # cache.fu = fu_new - # cache.v_old = v - # cache.norm_v_old = norm_v - # cache.loss_old = loss - # cache.λ_factor = 1 / alg.damping_decrease_factor - # end - # end - # cache.λ *= cache.λ_factor - # cache.λ_factor = alg.damping_increase_factor + @unpack fu, f, make_new_J = cache + if iszero(fu) + cache.force_stop = true + return nothing + end + if make_new_J + jacobian!(cache.J, cache) + mul!(cache.JᵀJ, cache.J', cache.J) + cache.DᵀD .= max.(cache.DᵀD, Diagonal(cache.JᵀJ)) + cache.make_new_J = false + end + @unpack u, p, λ, JᵀJ, DᵀD, J = cache + + # Usual Levenberg-Marquardt step ("velocity"). + cache.v = -(JᵀJ .+ λ .* DᵀD) \ mul!(cache.du_tmp, J', fu) + + @unpack v, alg = cache + h = alg.finite_diff_step_geodesic + # Geodesic acceleration (step_size = v + a / 2). + f(cache.fu_tmp, u .+ h .* v, p) + cache.a = -J \ ((2 / h) .* + ((cache.fu_tmp .- fu) ./ h .- mul!(cache.du_tmp, J, v))) + + # Require acceptable steps to satisfy the following condition. + norm_v = norm(v) + if (2 * norm(cache.a) / norm_v) < alg.α_geodesic + @. cache.δ = v + cache.a / 2 + @unpack δ, loss_old, norm_v_old, v_old = cache + f(cache.fu_tmp, u .+ δ, p) + loss = cache.internalnorm(cache.fu_tmp) + + # Condition to accept uphill steps (evaluates to `loss ≤ loss_old` in iteration 1). + β = dot(v, v_old) / (norm_v * norm_v_old) + if (1 - β)^alg.b_uphill * loss ≤ loss_old + # Accept step. + cache.u .+= δ + if loss < cache.abstol + cache.force_stop = true + return nothing + end + cache.fu .= cache.fu_tmp + cache.v_old .= v + cache.norm_v_old = norm_v + cache.loss_old = loss + cache.λ_factor = 1 / alg.damping_decrease_factor + cache.make_new_J = true + end + end + cache.λ *= cache.λ_factor + cache.λ_factor = alg.damping_increase_factor return nothing end function perform_step!(cache::LevenbergMarquardtCache{false}) - @unpack fu, f = cache + @unpack fu, f, make_new_J = cache if iszero(fu) cache.force_stop = true return nothing end - - J = jacobian(cache, f) - @unpack u, f, p, λ = cache - cache.JᵀJ = J' * J - if cache.DᵀD isa Number - cache.DᵀD = max(cache.DᵀD, cache.JᵀJ) - else - cache.DᵀD .= max.(cache.DᵀD, Diagonal(cache.JᵀJ)) + if make_new_J + cache.J = jacobian(cache, f) + cache.JᵀJ = cache.J' * cache.J + if cache.JᵀJ isa Number + cache.DᵀD = max(cache.DᵀD, cache.JᵀJ) + else + cache.DᵀD .= max.(cache.DᵀD, Diagonal(cache.JᵀJ)) + end + cache.make_new_J = false end + @unpack u, p, λ, JᵀJ, DᵀD, J = cache # Usual Levenberg-Marquardt step ("velocity"). - cache.v = -(cache.JᵀJ + λ * cache.DᵀD) \ (J' * fu) + cache.v = -(JᵀJ + λ * DᵀD) \ (J' * fu) @unpack v, alg = cache h = alg.finite_diff_step_geodesic @@ -340,6 +368,7 @@ function perform_step!(cache::LevenbergMarquardtCache{false}) cache.norm_v_old = norm_v cache.loss_old = loss cache.λ_factor = 1 / alg.damping_decrease_factor + cache.make_new_J = true end end cache.λ *= cache.λ_factor diff --git a/test/basictests.jl b/test/basictests.jl index 6cd59aaa9..0e3b6fa95 100644 --- a/test/basictests.jl +++ b/test/basictests.jl @@ -19,7 +19,7 @@ end function benchmark_scalar(f, u0) probN = NonlinearProblem{false}(f, u0) - sol = (solve(probN, NewtonRaphson())) + sol = (solve(probN, NewtonRaphson(), abstol = 1e-9)) end function ff(u, p) @@ -146,7 +146,7 @@ end function benchmark_scalar(f, u0) probN = NonlinearProblem{false}(f, u0) - sol = (solve(probN, TrustRegion())) + sol = (solve(probN, TrustRegion(), abstol = 1e-9)) end function ff(u, p) @@ -316,7 +316,7 @@ end function benchmark_scalar(f, u0) probN = NonlinearProblem{false}(f, u0) - sol = (solve(probN, LevenbergMarquardt())) + sol = (solve(probN, LevenbergMarquardt(), abstol = 1e-9)) end function ff(u, p) @@ -338,25 +338,25 @@ sol = benchmark_scalar(sf, csu0) @test sol.retcode === ReturnCode.Success @test abs(sol.u * sol.u - 2) < 1e-9 -# function benchmark_inplace(f, u0) -# probN = NonlinearProblem{true}(f, u0) -# solver = init(probN, LevenbergMarquardt(), abstol = 1e-9) -# sol = solve!(solver) -# end +function benchmark_inplace(f, u0) + probN = NonlinearProblem{true}(f, u0) + solver = init(probN, LevenbergMarquardt(), abstol = 1e-9) + sol = solve!(solver) +end -# function ffiip(du, u, p) -# du .= u .* u .- 2 -# end -# u0 = [1.0, 1.0] +function ffiip(du, u, p) + du .= u .* u .- 2 +end +u0 = [1.0, 1.0] -# sol = benchmark_inplace(ffiip, u0) -# @test sol.retcode === ReturnCode.Success -# @test all(abs.(sol.u .* sol.u .- 2) .< 1e-9) +sol = benchmark_inplace(ffiip, u0) +@test sol.retcode === ReturnCode.Success +@test all(abs.(sol.u .* sol.u .- 2) .< 1e-9) -# u0 = [1.0, 1.0] -# probN = NonlinearProblem{true}(ffiip, u0) -# solver = init(probN, LevenbergMarquardt(), abstol = 1e-9) -# @test (@ballocated solve!(solver)) < 120 +u0 = [1.0, 1.0] +probN = NonlinearProblem{true}(ffiip, u0) +solver = init(probN, LevenbergMarquardt(), abstol = 1e-9) +@test (@ballocated solve!(solver)) < 120 # AD Tests using ForwardDiff @@ -442,30 +442,29 @@ f(u, p) @test all(abs.(f(u, p)) .< 1e-10) # # Test kwars in `LevenbergMarquardt` -# max_trust_radius = [10.0, 100.0, 1000.0] -# initial_trust_radius = [10.0, 1.0, 0.1] -# step_threshold = [0.0, 0.01, 0.25] -# shrink_threshold = [0.25, 0.3, 0.5] -# expand_threshold = [0.5, 0.8, 0.9] -# shrink_factor = [0.1, 0.3, 0.5] -# expand_factor = [1.5, 2.0, 3.0] -# max_shrink_times = [10, 20, 30] - -# list_of_options = zip(max_trust_radius, initial_trust_radius, step_threshold, -# shrink_threshold, expand_threshold, shrink_factor, -# expand_factor, max_shrink_times) -# for options in list_of_options -# local probN, sol, alg -# alg = LevenbergMarquardt(max_trust_radius = options[1], -# initial_trust_radius = options[2], -# step_threshold = options[3], -# shrink_threshold = options[4], -# expand_threshold = options[5], -# shrink_factor = options[6], -# expand_factor = options[7], -# max_shrink_times = options[8]) - -# probN = NonlinearProblem{false}(f, u0, p) -# sol = solve(probN, alg, abstol = 1e-10) -# @test all(abs.(f(u, p)) .< 1e-10) -# end +damping_initial = [0.5, 2.0, 5.0] +damping_increase_factor = [1.5, 3.0, 10.0] +damping_decrease_factor = [2, 5, 10] +finite_diff_step_geodesic = [0.02, 0.2, 0.3] +α_geodesic = [0.6, 0.8, 0.9] +b_uphill = [0, 1, 2] +min_damping_D = [1e-12, 1e-9, 1e-4] + +list_of_options = zip(damping_initial, damping_increase_factor, damping_decrease_factor, + finite_diff_step_geodesic, α_geodesic, b_uphill, + min_damping_D) +for options in list_of_options + local probN, sol, alg + alg = LevenbergMarquardt(damping_initial = options[1], + damping_increase_factor = options[2], + damping_decrease_factor = options[3], + finite_diff_step_geodesic = options[4], + α_geodesic = options[5], + b_uphill = options[6], + min_damping_D = options[7]) + + probN = NonlinearProblem{false}(f, u0, p) + sol = solve(probN, alg, abstol = 1e-10) + @test all(abs.(f(u, p)) .< 1e-10) + +end From a166576d340c3b5c550f4c7ff61e68050ce09d6d Mon Sep 17 00:00:00 2001 From: Simon Carlson Date: Sun, 22 Jan 2023 13:25:02 +0100 Subject: [PATCH 03/10] Added documentation --- src/NonlinearSolve.jl | 2 +- src/levenberg.jl | 125 +++++++++++++++++++++++------------------- 2 files changed, 71 insertions(+), 56 deletions(-) diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl index 063eae319..79e6788c6 100644 --- a/src/NonlinearSolve.jl +++ b/src/NonlinearSolve.jl @@ -41,7 +41,7 @@ SnoopPrecompile.@precompile_all_calls begin for T in (Float32, Float64) prob = NonlinearProblem{false}((u, p) -> u .* u .- p, T(0.1), T(2)) precompile_algs = if VERSION >= v"1.7" - (NewtonRaphson(), TrustRegion()) + (NewtonRaphson(), TrustRegion(), LevenbergMarquardt()) else (NewtonRaphson(),) end diff --git a/src/levenberg.jl b/src/levenberg.jl index 642efca55..3b779b338 100644 --- a/src/levenberg.jl +++ b/src/levenberg.jl @@ -17,10 +17,8 @@ LevenbergMarquardt(; chunk_size = Val{0}(), An advanced Levenberg-Marquardt implementation with the improvements suggested in the [paper](https://arxiv.org/abs/1201.5885) "Improvements to the Levenberg-Marquardt -algorithm for nonlinear least-squares minimization". This implementation is designed with -support for efficient handling of sparse matrices via colored automatic differentiation and -preconditioned linear solvers. Designed for large-scale and numerically-difficult nonlinear -systems. +algorithm for nonlinear least-squares minimization". Designed for large-scale and +numerically-difficult nonlinear systems. ### Keyword Arguments @@ -56,31 +54,49 @@ systems. preconditioners. For more information on specifying preconditioners for LinearSolve algorithms, consult the [LinearSolve.jl documentation](https://docs.sciml.ai/LinearSolve/stable/). -- `damping_initial`: the initial damping factor. The damping is proportional to the inverse - of the step size and is changed dynamically in each iteration. Defaults to `1.0`. For more - details, see section 2.1 of [this paper](https://arxiv.org/abs/1201.5885). -- `damping_increase_factor`: the factor by which the damping is increased if a step isn't - accepted i.e. how much smaller the next step size should be if a step is rejected. - Defaults to `2.0`. For more details, see section 2.1 of +- `damping_initial`: the starting value for the damping factor. The damping factor is + inversely proportional to the step size. The damping factor is adjusted during each + iteration. Defaults to `1.0`. For more details, see section 2.1 of + [this paper](https://arxiv.org/abs/1201.5885). +- `damping_increase_factor`: the factor by which the damping is increased if a step is + rejected. Defaults to `2.0`. For more details, see section 2.1 of [this paper](https://arxiv.org/abs/1201.5885). - `damping_decrease_factor`: the factor by which the damping is decreased if a step is - accepted i.e. how much larger the next step size should be if a step is accepted. - Defaults to `3.0`. For more details, see section 2.1 of + accepted. Defaults to `3.0`. For more details, see section 2.1 of [this paper](https://arxiv.org/abs/1201.5885). -- `finite_diff_step_geodesic`: the finite differencing step size used for the geodesic - acceleration method. Defaults to `0.1` which means thats the step is about 10% of the - first order step. For more details, see section 3 of +- `finite_diff_step_geodesic`: the step size used for finite differencing used to calculate + the geodesic acceleration. Defaults to `0.1` which means that the step size is + approximately 10% of the first-order step. For more details, see section 3 of [this paper](https://arxiv.org/abs/1201.5885). -- `α_geodesic`: a factor that determines if a step is accepted or rejected. - In order to utilize the geodesic acceleration as an addition to the Levenberg-Marquardt - algorithm, it is necessary to make one small addition. To require acceptable steps to - satisfy the condition ... continue to document this... - -# TODO documentation and cleanup of code & che - - α_geodesic::Real = 0.75, - b_uphill::Real = 1.0, - min_damping_D::AbstractFloat = 1e-8 +- `α_geodesic`: a factor that determines if a step is accepted or rejected. To incorporate + geodesic acceleration as an addition to the Levenberg-Marquardt algorithm, it is necessary + that acceptable steps meet the condition + ``\\frac{2||a||}{||v||} \\le \\alpha_{\\text{geodesic}}``, where ``a`` is the geodesic + acceleration, ``v`` is the Levenberg-Marquardt algorithm's step (velocity along a geodesic + path) and `α_geodesic` is some number of order `1`. For most problems `α_geodesic = 0.75` + is a good value but for problems where convergence is difficult `α_geodesic = 0.1` is an + effective choice. Defaults to `0.75`. For more details, see section 3, equation (15) of + [this paper](https://arxiv.org/abs/1201.5885). +- `b_uphill`: a factor that determines if a step is accepted or rejected. The standard + choice in the Levenberg-Marquardt method is to accept all steps that decrease the cost + and reject all steps that increase the cost. Although this is a natural and safe choice, + it is often not the most efficient. Therefore downhill moves are always accepted, but + uphill moves are only conditionally accepted. To decide whether an uphill move will be + accepted at each iteration ``i``, we compute + ``\\beta_i = \\cos(v_{\\text{new}}, v_{\\text{old}})``, which denotes the cosine angle + between the proposed velocity ``v_{\\text{new}}`` and the velocity of the last accepted + step ``v_{\\text{old}}``. The idea is to accept uphill moves if the angle is small. To + specify, uphill moves are accepted if + ``(1-\\beta_i)^(b_{\\text{uphill}})C_{i+1} \\le C_i``, where ``C_i`` is the cost at + iteration ``i``. Reasonable choices for `b_uphill` are `1.0` or `2.0`, with `b_uphill=2.0` + allowing higher uphill moves than `b_uphill=1.0`. When `b_uphill=0.0`, no uphill moves + will be accepted. Defaults to `1.0`. For more details, see section 4 of + [this paper](https://arxiv.org/abs/1201.5885). +- `min_damping_D`: the minimum value of the damping terms in the diagonal damping matrix + `DᵀD`, where `DᵀD` is given by the largest diagonal entries of `JᵀJ` yet encountered, + where `J` is the Jacobian. It is suggested by + [this paper](https://arxiv.org/abs/1201.5885) to use a minimum value of the elements in + `DᵀD` to prevent the damping from being too small. Defaults to `1e-8`. !!! note @@ -117,20 +133,20 @@ function LevenbergMarquardt(; chunk_size = Val{0}(), min_damping_D::AbstractFloat = 1e-8) LevenbergMarquardt{_unwrap_val(chunk_size), _unwrap_val(autodiff), diff_type, typeof(linsolve), typeof(precs), _unwrap_val(standardtag), - _unwrap_val(concrete_jac), typeof(min_damping_D)}(linsolve, precs, - damping_initial, - damping_increase_factor, - damping_decrease_factor, - finite_diff_step_geodesic, - α_geodesic, - b_uphill, - min_damping_D) + _unwrap_val(concrete_jac), + typeof(min_damping_D)}(linsolve, precs, + damping_initial, + damping_increase_factor, + damping_decrease_factor, + finite_diff_step_geodesic, + α_geodesic, + b_uphill, + min_damping_D) end mutable struct LevenbergMarquardtCache{iip, fType, algType, uType, duType, resType, pType, - INType, tolType, - probType, ufType, L, jType, JC, DᵀDType, - floatType + INType, tolType, probType, ufType, L, jType, JC, + DᵀDType, λType, lossType } f::fType alg::algType @@ -151,15 +167,15 @@ mutable struct LevenbergMarquardtCache{iip, fType, algType, uType, duType, resTy prob::probType DᵀD::DᵀDType JᵀJ::jType - λ::floatType - λ_factor::floatType + λ::λType + λ_factor::λType v::uType a::uType tmp_vec::uType v_old::uType - norm_v_old::floatType + norm_v_old::lossType δ::uType - loss_old::floatType + loss_old::lossType make_new_J::Bool fu_tmp::resType @@ -170,26 +186,26 @@ mutable struct LevenbergMarquardtCache{iip, fType, algType, uType, duType, resTy internalnorm::INType, retcode::SciMLBase.ReturnCode.T, abstol::tolType, prob::probType, DᵀD::DᵀDType, JᵀJ::jType, - λ::floatType, λ_factor::floatType, v::uType, + λ::λType, λ_factor::λType, v::uType, a::uType, tmp_vec::uType, v_old::uType, - norm_v_old::floatType, δ::uType, - loss_old::floatType, - make_new_J::Bool, + norm_v_old::lossType, δ::uType, + loss_old::lossType, make_new_J::Bool, fu_tmp::resType) where { iip, fType, algType, uType, duType, resType, pType, INType, tolType, probType, ufType, L, jType, JC, DᵀDType, - floatType + λType, lossType } new{iip, fType, algType, uType, duType, resType, pType, INType, tolType, probType, ufType, L, - jType, JC, DᵀDType, floatType}(f, alg, u, fu, p, uf, linsolve, J, du_tmp, - jac_config, iter, force_stop, maxiters, - internalnorm, retcode, abstol, prob, DᵀD, - JᵀJ, λ, λ_factor, v, a, tmp_vec, v_old, - norm_v_old, δ, loss_old, make_new_J, fu_tmp) + jType, JC, DᵀDType, λType, lossType}(f, alg, u, fu, p, uf, linsolve, J, du_tmp, + jac_config, iter, force_stop, maxiters, + internalnorm, retcode, abstol, prob, DᵀD, + JᵀJ, λ, λ_factor, v, a, tmp_vec, v_old, + norm_v_old, δ, loss_old, make_new_J, + fu_tmp) end end @@ -283,12 +299,11 @@ function perform_step!(cache::LevenbergMarquardtCache{true}) # Usual Levenberg-Marquardt step ("velocity"). cache.v = -(JᵀJ .+ λ .* DᵀD) \ mul!(cache.du_tmp, J', fu) + # Geodesic acceleration (step_size = v + a / 2). @unpack v, alg = cache h = alg.finite_diff_step_geodesic - # Geodesic acceleration (step_size = v + a / 2). f(cache.fu_tmp, u .+ h .* v, p) - cache.a = -J \ ((2 / h) .* - ((cache.fu_tmp .- fu) ./ h .- mul!(cache.du_tmp, J, v))) + cache.a = -J \ ((2 / h) .* ((cache.fu_tmp .- fu) ./ h .- mul!(cache.du_tmp, J, v))) # Require acceptable steps to satisfy the following condition. norm_v = norm(v) @@ -344,12 +359,12 @@ function perform_step!(cache::LevenbergMarquardtCache{false}) @unpack v, alg = cache h = alg.finite_diff_step_geodesic # Geodesic acceleration (step_size = v + a / 2). - cache.a = -J \ ((2 / h) .* ((f(u .+ h * v, p) .- fu) ./ h .- J * v)) + cache.a = -J \ ((2 / h) .* ((f(u .+ h .* v, p) .- fu) ./ h .- J * v)) # Require acceptable steps to satisfy the following condition. norm_v = norm(v) if (2 * norm(cache.a) / norm_v) < alg.α_geodesic - cache.δ = v + cache.a / 2 + cache.δ = v .+ cache.a ./ 2 @unpack δ, loss_old, norm_v_old, v_old = cache fu_new = f(u .+ δ, p) loss = cache.internalnorm(fu_new) From a6a5bbb85a18ccdfd450668dea8894e65a91af8c Mon Sep 17 00:00:00 2001 From: Simon Carlson Date: Sun, 22 Jan 2023 13:27:18 +0100 Subject: [PATCH 04/10] format --- test/basictests.jl | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/test/basictests.jl b/test/basictests.jl index 0e3b6fa95..abf8eeb43 100644 --- a/test/basictests.jl +++ b/test/basictests.jl @@ -456,15 +456,14 @@ list_of_options = zip(damping_initial, damping_increase_factor, damping_decrease for options in list_of_options local probN, sol, alg alg = LevenbergMarquardt(damping_initial = options[1], - damping_increase_factor = options[2], - damping_decrease_factor = options[3], - finite_diff_step_geodesic = options[4], - α_geodesic = options[5], - b_uphill = options[6], - min_damping_D = options[7]) + damping_increase_factor = options[2], + damping_decrease_factor = options[3], + finite_diff_step_geodesic = options[4], + α_geodesic = options[5], + b_uphill = options[6], + min_damping_D = options[7]) probN = NonlinearProblem{false}(f, u0, p) sol = solve(probN, alg, abstol = 1e-10) @test all(abs.(f(u, p)) .< 1e-10) - end From 18948e8955c3c790ff7772852d2a57aa984e03dd Mon Sep 17 00:00:00 2001 From: Simon Carlson Date: Sun, 22 Jan 2023 13:49:41 +0100 Subject: [PATCH 05/10] remove from precompilation --- src/NonlinearSolve.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl index 79e6788c6..b645df86d 100644 --- a/src/NonlinearSolve.jl +++ b/src/NonlinearSolve.jl @@ -41,7 +41,7 @@ SnoopPrecompile.@precompile_all_calls begin for T in (Float32, Float64) prob = NonlinearProblem{false}((u, p) -> u .* u .- p, T(0.1), T(2)) precompile_algs = if VERSION >= v"1.7" - (NewtonRaphson(), TrustRegion(), LevenbergMarquardt()) + (NewtonRaphson(),) else (NewtonRaphson(),) end From 1ecad1574ac17b66315dd878339af71826638c29 Mon Sep 17 00:00:00 2001 From: Simon Carlson Date: Sun, 22 Jan 2023 15:29:36 +0100 Subject: [PATCH 06/10] added linsolve, and precompile --- src/NonlinearSolve.jl | 2 +- src/ad.jl | 4 ++-- src/levenberg.jl | 43 +++++++++++++++++++++++++++++-------------- 3 files changed, 32 insertions(+), 17 deletions(-) diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl index b645df86d..fa2ee2009 100644 --- a/src/NonlinearSolve.jl +++ b/src/NonlinearSolve.jl @@ -41,7 +41,7 @@ SnoopPrecompile.@precompile_all_calls begin for T in (Float32, Float64) prob = NonlinearProblem{false}((u, p) -> u .* u .- p, T(0.1), T(2)) precompile_algs = if VERSION >= v"1.7" - (NewtonRaphson(),) + (NewtonRaphson(), LevenbergMarquardt()) else (NewtonRaphson(),) end diff --git a/src/ad.jl b/src/ad.jl index e29c59f15..eb3013e47 100644 --- a/src/ad.jl +++ b/src/ad.jl @@ -26,7 +26,7 @@ end function SciMLBase.solve(prob::NonlinearProblem{<:Union{Number, StaticArraysCore.SVector}, iip, <:Dual{T, V, P}}, - alg::Union{NewtonRaphson, TrustRegion, LevenbergMarquardt}, + alg::AbstractNewtonAlgorithm, args...; kwargs...) where {iip, T, V, P} sol, partials = scalar_nlsolve_ad(prob, alg, args...; kwargs...) return SciMLBase.build_solution(prob, alg, Dual{T, V, P}(sol.u, partials), sol.resid; @@ -35,7 +35,7 @@ end function SciMLBase.solve(prob::NonlinearProblem{<:Union{Number, StaticArraysCore.SVector}, iip, <:AbstractArray{<:Dual{T, V, P}}}, - alg::Union{NewtonRaphson, TrustRegion, LevenbergMarquardt}, + alg::AbstractNewtonAlgorithm, args...; kwargs...) where {iip, T, V, P} sol, partials = scalar_nlsolve_ad(prob, alg, args...; kwargs...) diff --git a/src/levenberg.jl b/src/levenberg.jl index 3b779b338..8e83683cc 100644 --- a/src/levenberg.jl +++ b/src/levenberg.jl @@ -87,7 +87,7 @@ numerically-difficult nonlinear systems. between the proposed velocity ``v_{\\text{new}}`` and the velocity of the last accepted step ``v_{\\text{old}}``. The idea is to accept uphill moves if the angle is small. To specify, uphill moves are accepted if - ``(1-\\beta_i)^(b_{\\text{uphill}})C_{i+1} \\le C_i``, where ``C_i`` is the cost at + ``(1-\\beta_i)^{b_{\\text{uphill}}} C_{i+1} \\le C_i``, where ``C_i`` is the cost at iteration ``i``. Reasonable choices for `b_uphill` are `1.0` or `2.0`, with `b_uphill=2.0` allowing higher uphill moves than `b_uphill=1.0`. When `b_uphill=0.0`, no uphill moves will be accepted. Defaults to `1.0`. For more details, see section 4 of @@ -178,6 +178,7 @@ mutable struct LevenbergMarquardtCache{iip, fType, algType, uType, duType, resTy loss_old::lossType make_new_J::Bool fu_tmp::resType + mat_tmp::jType function LevenbergMarquardtCache{iip}(f::fType, alg::algType, u::uType, fu::resType, p::pType, uf::ufType, linsolve::L, J::jType, @@ -190,14 +191,15 @@ mutable struct LevenbergMarquardtCache{iip, fType, algType, uType, duType, resTy a::uType, tmp_vec::uType, v_old::uType, norm_v_old::lossType, δ::uType, loss_old::lossType, make_new_J::Bool, - fu_tmp::resType) where { - iip, fType, algType, - uType, duType, resType, - pType, INType, tolType, - probType, ufType, L, - jType, JC, DᵀDType, - λType, lossType - } + fu_tmp::resType, + mat_tmp::jType) where { + iip, fType, algType, + uType, duType, resType, + pType, INType, tolType, + probType, ufType, L, + jType, JC, DᵀDType, + λType, lossType + } new{iip, fType, algType, uType, duType, resType, pType, INType, tolType, probType, ufType, L, jType, JC, DᵀDType, λType, lossType}(f, alg, u, fu, p, uf, linsolve, J, du_tmp, @@ -205,7 +207,7 @@ mutable struct LevenbergMarquardtCache{iip, fType, algType, uType, duType, resTy internalnorm, retcode, abstol, prob, DᵀD, JᵀJ, λ, λ_factor, v, a, tmp_vec, v_old, norm_v_old, δ, loss_old, make_new_J, - fu_tmp) + fu_tmp, mat_tmp) end end @@ -273,6 +275,7 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::LevenbergMarq δ = zero(u) make_new_J = true fu_tmp = zero(fu) + mat_tmp = zero(J) return LevenbergMarquardtCache{iip}(f, alg, u, fu, p, uf, linsolve, J, du_tmp, jac_config, @@ -280,7 +283,7 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::LevenbergMarq ReturnCode.Default, abstol, prob, DᵀD, JᵀJ, alg.damping_initial, alg.damping_increase_factor, v, a, tmp_vec, v_old, loss, δ, loss, make_new_J, - fu_tmp) + fu_tmp, mat_tmp) end function perform_step!(cache::LevenbergMarquardtCache{true}) @unpack fu, f, make_new_J = cache @@ -294,16 +297,28 @@ function perform_step!(cache::LevenbergMarquardtCache{true}) cache.DᵀD .= max.(cache.DᵀD, Diagonal(cache.JᵀJ)) cache.make_new_J = false end - @unpack u, p, λ, JᵀJ, DᵀD, J = cache + @unpack u, p, λ, JᵀJ, DᵀD, J, alg, linsolve = cache # Usual Levenberg-Marquardt step ("velocity"). - cache.v = -(JᵀJ .+ λ .* DᵀD) \ mul!(cache.du_tmp, J', fu) + # The following lines do: cache.v = -cache.mat_tmp \ cache.fu_tmp + mul!(cache.fu_tmp, J', fu) + @. cache.mat_tmp = JᵀJ + λ * DᵀD + linres = dolinsolve(alg.precs, linsolve, A = cache.mat_tmp, b = _vec(cache.fu_tmp), + linu = _vec(cache.du_tmp), p = p, reltol = cache.abstol) + cache.linsolve = linres.cache + cache.v .= -cache.du_tmp # Geodesic acceleration (step_size = v + a / 2). @unpack v, alg = cache h = alg.finite_diff_step_geodesic f(cache.fu_tmp, u .+ h .* v, p) - cache.a = -J \ ((2 / h) .* ((cache.fu_tmp .- fu) ./ h .- mul!(cache.du_tmp, J, v))) + + # The following lines do: cache.a = -J \ cache.fu_tmp + cache.fu_tmp .= (2 / h) .* ((cache.fu_tmp .- fu) ./ h .- mul!(cache.du_tmp, J, v)) + linres = dolinsolve(alg.precs, linsolve, A = J, b = _vec(cache.fu_tmp), + linu = _vec(cache.du_tmp), p = p, reltol = cache.abstol) + cache.linsolve = linres.cache + cache.a .= -cache.du_tmp # Require acceptable steps to satisfy the following condition. norm_v = norm(v) From 2cdb3a5951dcd45feeeb6b7a51323beccb2af841 Mon Sep 17 00:00:00 2001 From: Simon Carlson Date: Sun, 22 Jan 2023 15:43:26 +0100 Subject: [PATCH 07/10] Clarity fix --- src/levenberg.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/levenberg.jl b/src/levenberg.jl index 8e83683cc..aeb68172d 100644 --- a/src/levenberg.jl +++ b/src/levenberg.jl @@ -314,7 +314,8 @@ function perform_step!(cache::LevenbergMarquardtCache{true}) f(cache.fu_tmp, u .+ h .* v, p) # The following lines do: cache.a = -J \ cache.fu_tmp - cache.fu_tmp .= (2 / h) .* ((cache.fu_tmp .- fu) ./ h .- mul!(cache.du_tmp, J, v)) + mul!(cache.du_tmp, J, v) + cache.fu_tmp .= (2 / h) .* ((cache.fu_tmp .- fu) ./ h .- cache.du_tmp) linres = dolinsolve(alg.precs, linsolve, A = J, b = _vec(cache.fu_tmp), linu = _vec(cache.du_tmp), p = p, reltol = cache.abstol) cache.linsolve = linres.cache From d23d758bfb9fc05df886b415fae683fcf8ad9e57 Mon Sep 17 00:00:00 2001 From: Simon Carlson Date: Sun, 22 Jan 2023 16:36:55 +0100 Subject: [PATCH 08/10] updating to @. --- src/levenberg.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/levenberg.jl b/src/levenberg.jl index aeb68172d..ddd6bd43e 100644 --- a/src/levenberg.jl +++ b/src/levenberg.jl @@ -306,7 +306,7 @@ function perform_step!(cache::LevenbergMarquardtCache{true}) linres = dolinsolve(alg.precs, linsolve, A = cache.mat_tmp, b = _vec(cache.fu_tmp), linu = _vec(cache.du_tmp), p = p, reltol = cache.abstol) cache.linsolve = linres.cache - cache.v .= -cache.du_tmp + @. cache.v = -cache.du_tmp # Geodesic acceleration (step_size = v + a / 2). @unpack v, alg = cache @@ -319,7 +319,7 @@ function perform_step!(cache::LevenbergMarquardtCache{true}) linres = dolinsolve(alg.precs, linsolve, A = J, b = _vec(cache.fu_tmp), linu = _vec(cache.du_tmp), p = p, reltol = cache.abstol) cache.linsolve = linres.cache - cache.a .= -cache.du_tmp + @. cache.a = -cache.du_tmp # Require acceptable steps to satisfy the following condition. norm_v = norm(v) From 6f92187655a830c1692fc214b2fe38495c7ddaec Mon Sep 17 00:00:00 2001 From: Simon Carlson Date: Sun, 22 Jan 2023 17:12:05 +0100 Subject: [PATCH 09/10] Testing: removing precomp. of Levenberg --- src/NonlinearSolve.jl | 2 +- src/levenberg.jl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl index fa2ee2009..b645df86d 100644 --- a/src/NonlinearSolve.jl +++ b/src/NonlinearSolve.jl @@ -41,7 +41,7 @@ SnoopPrecompile.@precompile_all_calls begin for T in (Float32, Float64) prob = NonlinearProblem{false}((u, p) -> u .* u .- p, T(0.1), T(2)) precompile_algs = if VERSION >= v"1.7" - (NewtonRaphson(), LevenbergMarquardt()) + (NewtonRaphson(),) else (NewtonRaphson(),) end diff --git a/src/levenberg.jl b/src/levenberg.jl index ddd6bd43e..6c263fefb 100644 --- a/src/levenberg.jl +++ b/src/levenberg.jl @@ -315,7 +315,7 @@ function perform_step!(cache::LevenbergMarquardtCache{true}) # The following lines do: cache.a = -J \ cache.fu_tmp mul!(cache.du_tmp, J, v) - cache.fu_tmp .= (2 / h) .* ((cache.fu_tmp .- fu) ./ h .- cache.du_tmp) + @. cache.fu_tmp = (2 / h) * ((cache.fu_tmp - fu) / h - cache.du_tmp) linres = dolinsolve(alg.precs, linsolve, A = J, b = _vec(cache.fu_tmp), linu = _vec(cache.du_tmp), p = p, reltol = cache.abstol) cache.linsolve = linres.cache From 518747a79c0a00dfea1dd3c7dbb88558f3b5ca5f Mon Sep 17 00:00:00 2001 From: Simon Carlson Date: Sun, 22 Jan 2023 18:57:24 +0100 Subject: [PATCH 10/10] Fixing load time for TrustRegion --- src/NonlinearSolve.jl | 2 +- src/trustRegion.jl | 18 +++++++++++++----- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl index b645df86d..79e6788c6 100644 --- a/src/NonlinearSolve.jl +++ b/src/NonlinearSolve.jl @@ -41,7 +41,7 @@ SnoopPrecompile.@precompile_all_calls begin for T in (Float32, Float64) prob = NonlinearProblem{false}((u, p) -> u .* u .- p, T(0.1), T(2)) precompile_algs = if VERSION >= v"1.7" - (NewtonRaphson(),) + (NewtonRaphson(), TrustRegion(), LevenbergMarquardt()) else (NewtonRaphson(),) end diff --git a/src/trustRegion.jl b/src/trustRegion.jl index 1eeddc1d7..8294da579 100644 --- a/src/trustRegion.jl +++ b/src/trustRegion.jl @@ -232,19 +232,27 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::TrustRegion, max_trust_radius = alg.max_trust_radius initial_trust_radius = alg.initial_trust_radius if max_trust_radius == 0.0 - max_trust_radius = max(norm(fu), maximum(u) - minimum(u)) + max_trust_radius = convert(typeof(max_trust_radius), + max(norm(fu), maximum(u) - minimum(u))) end if initial_trust_radius == 0.0 initial_trust_radius = max_trust_radius / 11 end + + loss_new = loss H = ArrayInterfaceCore.undefmatrix(u) + g = zero(fu) + shrink_counter = 0 + step_size = zero(u) + fu_new = zero(fu) + make_new_J = true + r = loss return TrustRegionCache{iip}(f, alg, u, fu, p, uf, linsolve, J, jac_config, 1, false, maxiters, internalnorm, ReturnCode.Default, abstol, prob, initial_trust_radius, - max_trust_radius, loss, loss, H, zero(fu), 0, zero(u), - u_tmp, zero(fu), true, - loss) + max_trust_radius, loss, loss_new, H, g, shrink_counter, + step_size, u_tmp, fu_new, make_new_J, r) end function perform_step!(cache::TrustRegionCache{true}) @@ -293,7 +301,7 @@ function perform_step!(cache::TrustRegionCache{false}) end function trust_region_step!(cache::TrustRegionCache) - @unpack fu_new, u_tmp, step_size, g, H, loss, alg, max_trust_r = cache + @unpack fu_new, step_size, g, H, loss, alg, max_trust_r = cache cache.loss_new = get_loss(fu_new) # Compute the ratio of the actual reduction to the predicted reduction.