From 2387fb6e3d7a3228d00d65c836062c80e5deb857 Mon Sep 17 00:00:00 2001
From: Simon Carlson <ccsimon123@gmail.com>
Date: Fri, 20 Jan 2023 17:34:35 +0100
Subject: [PATCH 01/10] Started to implement Levenberg-Marquardt.

---
 src/NonlinearSolve.jl |   3 +-
 src/ad.jl             |   5 +-
 src/levenberg.jl      | 364 ++++++++++++++++++++++++++++++++++++++++++
 test/basictests.jl    | 170 ++++++++++++++++++++
 4 files changed, 539 insertions(+), 3 deletions(-)
 create mode 100644 src/levenberg.jl

diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl
index 305d9e5aa..c121573d3 100644
--- a/src/NonlinearSolve.jl
+++ b/src/NonlinearSolve.jl
@@ -32,6 +32,7 @@ include("utils.jl")
 include("jacobian.jl")
 include("raphson.jl")
 include("trustRegion.jl")
+include("levenberg.jl")
 include("ad.jl")
 
 import SnoopPrecompile
@@ -48,6 +49,6 @@ SnoopPrecompile.@precompile_all_calls begin for T in (Float32, Float64)
     end
 end end
 
-export NewtonRaphson, TrustRegion
+export NewtonRaphson, TrustRegion, LevenbergMarquardt
 
 end # module
diff --git a/src/ad.jl b/src/ad.jl
index b88f23751..e29c59f15 100644
--- a/src/ad.jl
+++ b/src/ad.jl
@@ -26,7 +26,7 @@ end
 function SciMLBase.solve(prob::NonlinearProblem{<:Union{Number, StaticArraysCore.SVector},
                                                 iip,
                                                 <:Dual{T, V, P}},
-                         alg::Union{NewtonRaphson, TrustRegion},
+                         alg::Union{NewtonRaphson, TrustRegion, LevenbergMarquardt},
                          args...; kwargs...) where {iip, T, V, P}
     sol, partials = scalar_nlsolve_ad(prob, alg, args...; kwargs...)
     return SciMLBase.build_solution(prob, alg, Dual{T, V, P}(sol.u, partials), sol.resid;
@@ -35,7 +35,8 @@ end
 function SciMLBase.solve(prob::NonlinearProblem{<:Union{Number, StaticArraysCore.SVector},
                                                 iip,
                                                 <:AbstractArray{<:Dual{T, V, P}}},
-                         alg::Union{NewtonRaphson, TrustRegion}, args...;
+                         alg::Union{NewtonRaphson, TrustRegion, LevenbergMarquardt},
+                         args...;
                          kwargs...) where {iip, T, V, P}
     sol, partials = scalar_nlsolve_ad(prob, alg, args...; kwargs...)
     return SciMLBase.build_solution(prob, alg, Dual{T, V, P}(sol.u, partials), sol.resid;
diff --git a/src/levenberg.jl b/src/levenberg.jl
new file mode 100644
index 000000000..3a23e843d
--- /dev/null
+++ b/src/levenberg.jl
@@ -0,0 +1,364 @@
+"""
+```julia
+LevenbergMarquardt(; chunk_size = Val{0}(), autodiff = Val{true}(),
+                standardtag = Val{true}(), concrete_jac = nothing,
+                diff_type = Val{:forward}, linsolve = nothing, precs = DEFAULT_PRECS)
+```
+
+An advanced LevenbergMarquardt implementation with support for efficient handling of sparse
+matrices via colored automatic differentiation and preconditioned linear solvers. Designed
+for large-scale and numerically-difficult nonlinear systems.
+
+### Keyword Arguments
+
+- `chunk_size`: the chunk size used by the internal ForwardDiff.jl automatic differentiation
+  system. This allows for multiple derivative columns to be computed simultaneously,
+  improving performance. Defaults to `0`, which is equivalent to using ForwardDiff.jl's
+  default chunk size mechanism. For more details, see the documentation for
+  [ForwardDiff.jl](https://juliadiff.org/ForwardDiff.jl/stable/).
+- `autodiff`: whether to use forward-mode automatic differentiation for the Jacobian.
+  Note that this argument is ignored if an analytical Jacobian is passed, as that will be
+  used instead. Defaults to `Val{true}`, which means ForwardDiff.jl via
+  SparseDiffTools.jl is used by default. If `Val{false}`, then FiniteDiff.jl is used for
+  finite differencing.
+- `standardtag`: whether to use a standardized tag definition for the purposes of automatic
+  differentiation. Defaults to true, which thus uses the `NonlinearSolveTag`. If `Val{false}`,
+  then ForwardDiff's default function naming tag is used, which results in larger stack
+  traces.
+- `concrete_jac`: whether to build a concrete Jacobian. If a Krylov-subspace method is used,
+  then the Jacobian will not be constructed and instead direct Jacobian-vector products
+  `J*v` are computed using forward-mode automatic differentiation or finite differencing
+  tricks (without ever constructing the Jacobian). However, if the Jacobian is still needed,
+  for example for a preconditioner, `concrete_jac = true` can be passed in order to force
+  the construction of the Jacobian.
+- `diff_type`: the type of finite differencing used if `autodiff = false`. Defaults to
+  `Val{:forward}` for forward finite differences. For more details on the choices, see the
+  [FiniteDiff.jl](https://github.com/JuliaDiff/FiniteDiff.jl) documentation.
+- `linsolve`: the [LinearSolve.jl](https://github.com/SciML/LinearSolve.jl) used for the
+  linear solves within the Newton method. Defaults to `nothing`, which means it uses the
+  LinearSolve.jl default algorithm choice. For more information on available algorithm
+  choices, see the [LinearSolve.jl documentation](https://docs.sciml.ai/LinearSolve/stable/).
+- `precs`: the choice of preconditioners for the linear solver. Defaults to using no
+  preconditioners. For more information on specifying preconditioners for LinearSolve
+  algorithms, consult the
+  [LinearSolve.jl documentation](https://docs.sciml.ai/LinearSolve/stable/).
+
+!!! note
+
+    Currently, the linear solver and chunk size choice only applies to in-place defined
+    `NonlinearProblem`s. That is expected to change in the future.
+"""
+struct LevenbergMarquardt{CS, AD, FDT, L, P, ST, CJ, T} <:
+       AbstractNewtonAlgorithm{CS, AD, FDT, ST, CJ}
+    linsolve::L
+    precs::P
+    damping_initial::T
+    damping_increase_factor::T
+    damping_decrease_factor::T
+    finite_diff_step_geodesic::T
+    α_geodesic::T
+    b_uphill::T  # TODO test what happens if the input to this in an Interger, but Float to the others
+    min_damping_D::T
+end
+
+function LevenbergMarquardt(; chunk_size = Val{0}(), autodiff = Val{true}(),
+                            standardtag = Val{true}(), concrete_jac = nothing,
+                            diff_type = Val{:forward}, linsolve = nothing,
+                            precs = DEFAULT_PRECS,
+                            damping_initial::Real = 1.0,
+                            damping_increase_factor::Real = 2.0,
+                            damping_decrease_factor::Real = 3.0,
+                            finite_diff_step_geodesic::Real = 0.1,
+                            α_geodesic::Real = 0.75,
+                            b_uphill::Real = 1.0,
+                            min_damping_D::Real = 1e-6)
+    LevenbergMarquardt{_unwrap_val(chunk_size), _unwrap_val(autodiff), diff_type,
+                       typeof(linsolve), typeof(precs), _unwrap_val(standardtag),
+                       _unwrap_val(concrete_jac), typeof(damping_initial)}(linsolve, precs,
+                                                                           damping_initial,
+                                                                           damping_increase_factor,
+                                                                           damping_decrease_factor,
+                                                                           finite_diff_step_geodesic,
+                                                                           α_geodesic,
+                                                                           b_uphill,
+                                                                           min_damping_D)
+end
+
+mutable struct LevenbergMarquardtCache{iip, fType, algType, uType, duType, resType, pType,
+                                       INType, tolType,
+                                       probType, ufType, L, jType, JC, DᵀDType,
+                                       floatType
+                                       }
+    f::fType
+    alg::algType
+    u::uType
+    fu::resType
+    p::pType
+    uf::ufType
+    linsolve::L
+    J::jType
+    du1::duType
+    jac_config::JC
+    iter::Int
+    force_stop::Bool
+    maxiters::Int
+    internalnorm::INType
+    retcode::SciMLBase.ReturnCode.T
+    abstol::tolType
+    prob::probType
+    DᵀD::DᵀDType
+    JᵀJ::jType
+    λ::floatType
+    λ_factor::floatType         # TODO test if this handles interger inputs. (test to input damp_init::float and damp_inc::int)
+    v::uType
+    a::uType
+    tmp_vec::uType
+    v_old::uType
+    norm_v_old::floatType
+    δ::uType
+    loss_old::floatType
+
+    function LevenbergMarquardtCache{iip}(f::fType, alg::algType, u::uType, fu::resType,
+                                          p::pType,
+                                          uf::ufType, linsolve::L, J::jType, du1::duType,
+                                          jac_config::JC, iter::Int,
+                                          force_stop::Bool, maxiters::Int,
+                                          internalnorm::INType,
+                                          retcode::SciMLBase.ReturnCode.T, abstol::tolType,
+                                          prob::probType, DᵀD::DᵀDType, JᵀJ::jType,
+                                          λ::floatType, λ_factor::floatType, v::uType,
+                                          a::uType, tmp_vec::uType,
+                                          v_old::uType, norm_v_old::floatType, δ::uType,
+                                          loss_old::floatType) where {
+                                                                      iip, fType, algType,
+                                                                      uType,
+                                                                      duType, resType,
+                                                                      pType,
+                                                                      INType,
+                                                                      tolType,
+                                                                      probType, ufType, L,
+                                                                      jType, JC,
+                                                                      DᵀDType,
+                                                                      floatType
+                                                                      }
+        new{iip, fType, algType, uType, duType, resType,
+            pType, INType, tolType, probType, ufType, L,
+            jType, JC, DᵀDType, floatType}(f,
+                                                                   alg,
+                                                                   u,
+                                                                   fu,
+                                                                   p,
+                                                                   uf,
+                                                                   linsolve,
+                                                                   J,
+                                                                   du1,
+                                                                   jac_config,
+                                                                   iter,
+                                                                   force_stop,
+                                                                   maxiters,
+                                                                   internalnorm,
+                                                                   retcode,
+                                                                   abstol,
+                                                                   prob,
+                                                                   DᵀD,
+                                                                   JᵀJ,
+                                                                   λ,
+                                                                   λ_factor,
+                                                                   v,
+                                                                   a,
+                                                                   tmp_vec,
+                                                                   v_old,
+                                                                   norm_v_old,
+                                                                   δ,
+                                                                   loss_old)
+    end
+end
+
+function jacobian_caches(alg::LevenbergMarquardt, f, u, p, ::Val{true})
+    uf = JacobianWrapper(f, p)
+    J = ArrayInterfaceCore.undefmatrix(u)
+
+    linprob = LinearProblem(J, _vec(zero(u)); u0 = _vec(zero(u)))
+    weight = similar(u)
+    recursivefill!(weight, false)
+
+    Pl, Pr = wrapprecs(alg.precs(J, nothing, u, p, nothing, nothing, nothing, nothing,
+                                 nothing)..., weight)
+    linsolve = init(linprob, alg.linsolve, alias_A = true, alias_b = true,
+                    Pl = Pl, Pr = Pr)
+
+    du1 = zero(u)
+    du2 = zero(u)
+    tmp = zero(u)
+    jac_config = build_jac_config(alg, f, uf, du1, u, tmp, du2)
+
+    uf, linsolve, J, du1, jac_config
+end
+
+function jacobian_caches(alg::LevenbergMarquardt, f, u, p, ::Val{false})
+    JacobianWrapper(f, p), nothing, ArrayInterfaceCore.undefmatrix(u), nothing, nothing
+end
+
+function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::LevenbergMarquardt,
+                          args...;
+                          alias_u0 = false,
+                          maxiters = 1000,
+                          abstol = 1e-6,
+                          internalnorm = DEFAULT_NORM,
+                          kwargs...) where {uType, iip}
+    if alias_u0
+        u = prob.u0
+    else
+        u = deepcopy(prob.u0)
+    end
+    f = prob.f
+    p = prob.p
+    if iip
+        fu = zero(u)
+        f(fu, u, p)
+    else
+        fu = f(u, p)
+    end
+    uf, linsolve, J, du1, jac_config = jacobian_caches(alg, f, u, p, Val(iip))
+
+    # "A good compromise is to specify a minimum value of the damping terms in DᵀD".
+    if u isa Number
+        DᵀD = alg.min_damping_D # TODO check if type is correct.
+    else
+        d = d = similar(u)
+        d .= alg.min_damping_D
+        DᵀD = Diagonal(d)
+    end
+    loss = internalnorm(fu)
+    JᵀJ = zero(J)
+    v = zero(u)
+    a = zero(u)
+    tmp_vec = zero(u)
+    v_old = zero(u)
+    δ = zero(u)
+
+    return LevenbergMarquardtCache{iip}(f, alg, u, fu, p, uf, linsolve, J, du1, jac_config,
+                                        1, false, maxiters, internalnorm,
+                                        ReturnCode.Default, abstol, prob, DᵀD, JᵀJ,
+                                        alg.damping_initial, alg.damping_increase_factor,
+                                        v, a, tmp_vec, v_old, loss, δ, loss)
+end
+
+function perform_step!(cache::LevenbergMarquardtCache{true})
+    # TODO implement for in-place...
+    # @unpack fu, f = cache
+    # if iszero(fu)
+    #     cache.force_stop = true
+    #     return nothing
+    # end
+
+    # cache.J = jacobian(cache, f)
+    # @unpack u, f, p, λ, J = cache
+    # mul!(cache.JᵀJ, J', J)
+    # cache.DᵀD .= max.(cache.DᵀD, Diagonal(cache.JᵀJ))
+
+    # # Usual Levenberg-Marquardt step ("velocity").
+    # cache.v = -(cache.JᵀJ + λ * cache.DᵀD) \ (J' * fu)
+
+    # @unpack v, alg = cache
+    # h = alg.finite_diff_step_geodesic
+    # # Geodesic acceleration (step_size = v + a / 2).
+    # cache.a = -J \ ((2 / h) .* ((f(u .+ h * v, p) .- fu) ./ h .- mul!(cache.Jv, J, v)))
+
+    # # Require acceptable steps to satisfy the following condition.
+    # norm_v = norm(v)
+    # if (2 * norm(cache.a) / norm_v) < alg.α_geodesic
+    #     cache.δ = v + cache.a / 2
+    #     @unpack δ, loss_old, norm_v_old, v_old = cache
+    #     fu_new = f(u .+ δ, p)
+    #     loss = cache.internalnorm(fu_new)
+
+    #     # Condition to accept uphill steps (evaluates to `loss ≤ loss_old` in iteration 1).
+    #     β = dot(v, v_old) / (norm_v * norm_v_old)
+    #     if (1 - β)^alg.b_uphill * loss ≤ loss_old
+    #         # Accept step.
+    #         cache.u += δ
+    #         if loss < cache.abstol
+    #             cache.force_stop = true
+    #             return nothing
+    #         end
+    #         cache.fu = fu_new
+    #         cache.v_old = v
+    #         cache.norm_v_old = norm_v
+    #         cache.loss_old = loss
+    #         cache.λ_factor = 1 / alg.damping_decrease_factor
+    #     end
+    # end
+    # cache.λ *= cache.λ_factor
+    # cache.λ_factor = alg.damping_increase_factor
+    return nothing
+end
+
+function perform_step!(cache::LevenbergMarquardtCache{false})
+    @unpack fu, f = cache
+    if iszero(fu)
+        cache.force_stop = true
+        return nothing
+    end
+
+    J = jacobian(cache, f)
+    @unpack u, f, p, λ = cache
+    cache.JᵀJ = J' * J
+    if cache.DᵀD isa Number
+        cache.DᵀD = max(cache.DᵀD, cache.JᵀJ)
+    else
+        cache.DᵀD .= max.(cache.DᵀD, Diagonal(cache.JᵀJ))
+    end
+
+    # Usual Levenberg-Marquardt step ("velocity").
+    cache.v = -(cache.JᵀJ + λ * cache.DᵀD) \ (J' * fu)
+
+    @unpack v, alg = cache
+    h = alg.finite_diff_step_geodesic
+    # Geodesic acceleration (step_size = v + a / 2).
+    cache.a = -J \ ((2 / h) .* ((f(u .+ h * v, p) .- fu) ./ h .- J * v))
+
+    # Require acceptable steps to satisfy the following condition.
+    norm_v = norm(v)
+    if (2 * norm(cache.a) / norm_v) < alg.α_geodesic
+        cache.δ = v + cache.a / 2
+        @unpack δ, loss_old, norm_v_old, v_old = cache
+        fu_new = f(u .+ δ, p)
+        loss = cache.internalnorm(fu_new)
+
+        # Condition to accept uphill steps (evaluates to `loss ≤ loss_old` in iteration 1).
+        β = dot(v, v_old) / (norm_v * norm_v_old)
+        if (1 - β)^alg.b_uphill * loss ≤ loss_old
+            # Accept step.
+            cache.u += δ
+            if loss < cache.abstol
+                cache.force_stop = true
+                return nothing
+            end
+            cache.fu = fu_new
+            cache.v_old = v
+            cache.norm_v_old = norm_v
+            cache.loss_old = loss
+            cache.λ_factor = 1 / alg.damping_decrease_factor
+        end
+    end
+    cache.λ *= cache.λ_factor
+    cache.λ_factor = alg.damping_increase_factor
+    return nothing
+end
+
+function SciMLBase.solve!(cache::LevenbergMarquardtCache)
+    while !cache.force_stop && cache.iter < cache.maxiters
+        perform_step!(cache)
+        cache.iter += 1
+    end
+
+    if cache.iter == cache.maxiters
+        cache.retcode = ReturnCode.MaxIters
+    else
+        cache.retcode = ReturnCode.Success
+    end
+
+    SciMLBase.build_solution(cache.prob, cache.alg, cache.u, cache.fu;
+                             retcode = cache.retcode)
+end
diff --git a/test/basictests.jl b/test/basictests.jl
index ed2907fdb..6cd59aaa9 100644
--- a/test/basictests.jl
+++ b/test/basictests.jl
@@ -299,3 +299,173 @@ for options in list_of_options
     sol = solve(probN, alg, abstol = 1e-10)
     @test all(abs.(f(u, p)) .< 1e-10)
 end
+
+# --- LevenbergMarquardt tests ---
+
+function benchmark_immutable(f, u0)
+    probN = NonlinearProblem{false}(f, u0)
+    solver = init(probN, LevenbergMarquardt(), abstol = 1e-9)
+    sol = solve!(solver)
+end
+
+function benchmark_mutable(f, u0)
+    probN = NonlinearProblem{false}(f, u0)
+    solver = init(probN, LevenbergMarquardt(), abstol = 1e-9)
+    sol = solve!(solver)
+end
+
+function benchmark_scalar(f, u0)
+    probN = NonlinearProblem{false}(f, u0)
+    sol = (solve(probN, LevenbergMarquardt()))
+end
+
+function ff(u, p)
+    u .* u .- 2
+end
+
+function sf(u, p)
+    u * u - 2
+end
+u0 = [1.0, 1.0]
+
+sol = benchmark_immutable(ff, cu0)
+@test sol.retcode === ReturnCode.Success
+@test all(abs.(sol.u .* sol.u .- 2) .< 1e-9)
+sol = benchmark_mutable(ff, u0)
+@test sol.retcode === ReturnCode.Success
+@test all(abs.(sol.u .* sol.u .- 2) .< 1e-9)
+sol = benchmark_scalar(sf, csu0)
+@test sol.retcode === ReturnCode.Success
+@test abs(sol.u * sol.u - 2) < 1e-9
+
+# function benchmark_inplace(f, u0)
+#     probN = NonlinearProblem{true}(f, u0)
+#     solver = init(probN, LevenbergMarquardt(), abstol = 1e-9)
+#     sol = solve!(solver)
+# end
+
+# function ffiip(du, u, p)
+#     du .= u .* u .- 2
+# end
+# u0 = [1.0, 1.0]
+
+# sol = benchmark_inplace(ffiip, u0)
+# @test sol.retcode === ReturnCode.Success
+# @test all(abs.(sol.u .* sol.u .- 2) .< 1e-9)
+
+# u0 = [1.0, 1.0]
+# probN = NonlinearProblem{true}(ffiip, u0)
+# solver = init(probN, LevenbergMarquardt(), abstol = 1e-9)
+# @test (@ballocated solve!(solver)) < 120
+
+# AD Tests
+using ForwardDiff
+
+# Immutable
+f, u0 = (u, p) -> u .* u .- p, @SVector[1.0, 1.0]
+
+g = function (p)
+    probN = NonlinearProblem{false}(f, csu0, p)
+    sol = solve(probN, LevenbergMarquardt(), abstol = 1e-9)
+    return sol.u[end]
+end
+
+for p in 1.1:0.1:100.0
+    @test g(p) ≈ sqrt(p)
+    @test ForwardDiff.derivative(g, p) ≈ 1 / (2 * sqrt(p))
+end
+
+# Scalar
+f, u0 = (u, p) -> u * u - p, 1.0
+
+g = function (p)
+    probN = NonlinearProblem{false}(f, oftype(p, u0), p)
+    sol = solve(probN, LevenbergMarquardt(), abstol = 1e-10)
+    return sol.u
+end
+
+@test ForwardDiff.derivative(g, 3.0) ≈ 1 / (2 * sqrt(3.0))
+
+for p in 1.1:0.1:100.0
+    @test g(p) ≈ sqrt(p)
+    @test ForwardDiff.derivative(g, p) ≈ 1 / (2 * sqrt(p))
+end
+
+f = (u, p) -> p[1] * u * u - p[2]
+t = (p) -> [sqrt(p[2] / p[1])]
+p = [0.9, 50.0]
+gnewton = function (p)
+    probN = NonlinearProblem{false}(f, 0.5, p)
+    sol = solve(probN, LevenbergMarquardt())
+    return [sol.u]
+end
+@test gnewton(p) ≈ [sqrt(p[2] / p[1])]
+@test ForwardDiff.jacobian(gnewton, p) ≈ ForwardDiff.jacobian(t, p)
+
+# Error Checks
+f, u0 = (u, p) -> u .* u .- 2.0, @SVector[1.0, 1.0]
+probN = NonlinearProblem(f, u0)
+
+@test solve(probN, LevenbergMarquardt()).u[end] ≈ sqrt(2.0)
+@test solve(probN, LevenbergMarquardt(; autodiff = false)).u[end] ≈ sqrt(2.0)
+
+for u0 in [1.0, [1, 1.0]]
+    local f, probN, sol
+    f = (u, p) -> u .* u .- 2.0
+    probN = NonlinearProblem(f, u0)
+    sol = sqrt(2) * u0
+
+    @test solve(probN, LevenbergMarquardt()).u ≈ sol
+    @test solve(probN, LevenbergMarquardt()).u ≈ sol
+    @test solve(probN, LevenbergMarquardt(; autodiff = false)).u ≈ sol
+end
+
+# Test that `LevenbergMarquardt` passes a test that `NewtonRaphson` fails on.
+u0 = [-10.0, -1.0, 1.0, 2.0, 3.0, 4.0, 10.0]
+global g, f
+f = (u, p) -> 0.010000000000000002 .+
+              10.000000000000002 ./ (1 .+
+               (0.21640425613334457 .+
+                216.40425613334457 ./ (1 .+
+                 (0.21640425613334457 .+
+                  216.40425613334457 ./
+                  (1 .+ 0.0006250000000000001(u .^ 2.0))) .^ 2.0)) .^ 2.0) .-
+              0.0011552453009332421u .- p
+g = function (p)
+    probN = NonlinearProblem{false}(f, u0, p)
+    sol = solve(probN, LevenbergMarquardt(), abstol = 1e-10)
+    return sol.u
+end
+p = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
+u = g(p)
+f(u, p)
+@test all(abs.(f(u, p)) .< 1e-10)
+
+# # Test kwars in `LevenbergMarquardt`
+# max_trust_radius = [10.0, 100.0, 1000.0]
+# initial_trust_radius = [10.0, 1.0, 0.1]
+# step_threshold = [0.0, 0.01, 0.25]
+# shrink_threshold = [0.25, 0.3, 0.5]
+# expand_threshold = [0.5, 0.8, 0.9]
+# shrink_factor = [0.1, 0.3, 0.5]
+# expand_factor = [1.5, 2.0, 3.0]
+# max_shrink_times = [10, 20, 30]
+
+# list_of_options = zip(max_trust_radius, initial_trust_radius, step_threshold,
+#                       shrink_threshold, expand_threshold, shrink_factor,
+#                       expand_factor, max_shrink_times)
+# for options in list_of_options
+#     local probN, sol, alg
+#     alg = LevenbergMarquardt(max_trust_radius = options[1],
+#                       initial_trust_radius = options[2],
+#                       step_threshold = options[3],
+#                       shrink_threshold = options[4],
+#                       expand_threshold = options[5],
+#                       shrink_factor = options[6],
+#                       expand_factor = options[7],
+#                       max_shrink_times = options[8])
+
+#     probN = NonlinearProblem{false}(f, u0, p)
+#     sol = solve(probN, alg, abstol = 1e-10)
+#     @test all(abs.(f(u, p)) .< 1e-10)
+# end

From ee5931dc3a6632cec3cefe290e602a8f50f501eb Mon Sep 17 00:00:00 2001
From: Simon Carlson <ccsimon123@gmail.com>
Date: Sun, 22 Jan 2023 10:32:54 +0100
Subject: [PATCH 02/10] Implemented Levenberg for iip=true

---
 src/levenberg.jl   | 287 +++++++++++++++++++++++++--------------------
 test/basictests.jl |  91 +++++++-------
 2 files changed, 203 insertions(+), 175 deletions(-)

diff --git a/src/levenberg.jl b/src/levenberg.jl
index 3a23e843d..642efca55 100644
--- a/src/levenberg.jl
+++ b/src/levenberg.jl
@@ -1,13 +1,27 @@
 """
 ```julia
-LevenbergMarquardt(; chunk_size = Val{0}(), autodiff = Val{true}(),
-                standardtag = Val{true}(), concrete_jac = nothing,
-                diff_type = Val{:forward}, linsolve = nothing, precs = DEFAULT_PRECS)
+LevenbergMarquardt(; chunk_size = Val{0}(),
+                    autodiff = Val{true}(),
+                    standardtag = Val{true}(),
+                    concrete_jac = nothing,
+                    diff_type = Val{:forward},
+                    linsolve = nothing, precs = DEFAULT_PRECS,
+                    damping_initial::Real = 1.0,
+                    damping_increase_factor::Real = 2.0,
+                    damping_decrease_factor::Real = 3.0,
+                    finite_diff_step_geodesic::Real = 0.1,
+                    α_geodesic::Real = 0.75,
+                    b_uphill::Real = 1.0,
+                    min_damping_D::AbstractFloat = 1e-8)
 ```
 
-An advanced LevenbergMarquardt implementation with support for efficient handling of sparse
-matrices via colored automatic differentiation and preconditioned linear solvers. Designed
-for large-scale and numerically-difficult nonlinear systems.
+An advanced Levenberg-Marquardt implementation with the improvements suggested in the
+[paper](https://arxiv.org/abs/1201.5885) "Improvements to the Levenberg-Marquardt
+algorithm for nonlinear least-squares minimization". This implementation is designed with
+support for efficient handling of sparse matrices via colored automatic differentiation and
+preconditioned linear solvers. Designed for large-scale and numerically-difficult nonlinear
+systems.
+
 
 ### Keyword Arguments
 
@@ -42,6 +56,32 @@ for large-scale and numerically-difficult nonlinear systems.
   preconditioners. For more information on specifying preconditioners for LinearSolve
   algorithms, consult the
   [LinearSolve.jl documentation](https://docs.sciml.ai/LinearSolve/stable/).
+- `damping_initial`: the initial damping factor. The damping is proportional to the inverse
+  of the step size and is changed dynamically in each iteration. Defaults to `1.0`. For more
+  details, see section 2.1 of [this paper](https://arxiv.org/abs/1201.5885).
+- `damping_increase_factor`: the factor by which the damping is increased if a step isn't
+  accepted i.e. how much smaller the next step size should be if a step is rejected.
+  Defaults to `2.0`. For more details, see section 2.1 of
+  [this paper](https://arxiv.org/abs/1201.5885).
+- `damping_decrease_factor`: the factor by which the damping is decreased if a step is
+  accepted i.e. how much larger the next step size should be if a step is accepted.
+  Defaults to `3.0`. For more details, see section 2.1 of
+  [this paper](https://arxiv.org/abs/1201.5885).
+- `finite_diff_step_geodesic`: the finite differencing step size used for the geodesic
+  acceleration method. Defaults to `0.1` which means thats the step is about 10% of the
+  first order step. For more details, see section 3 of
+  [this paper](https://arxiv.org/abs/1201.5885).
+- `α_geodesic`: a factor that determines if a step is accepted or rejected.
+  In order to utilize the geodesic acceleration as an addition to the Levenberg-Marquardt
+  algorithm, it is necessary to make one small addition. To require acceptable steps to
+  satisfy the condition ... continue to document this...
+
+# TODO documentation and cleanup of code & che
+
+  α_geodesic::Real = 0.75,
+  b_uphill::Real = 1.0,
+  min_damping_D::AbstractFloat = 1e-8
+
 
 !!! note
 
@@ -57,13 +97,16 @@ struct LevenbergMarquardt{CS, AD, FDT, L, P, ST, CJ, T} <:
     damping_decrease_factor::T
     finite_diff_step_geodesic::T
     α_geodesic::T
-    b_uphill::T  # TODO test what happens if the input to this in an Interger, but Float to the others
+    b_uphill::T
     min_damping_D::T
 end
 
-function LevenbergMarquardt(; chunk_size = Val{0}(), autodiff = Val{true}(),
-                            standardtag = Val{true}(), concrete_jac = nothing,
-                            diff_type = Val{:forward}, linsolve = nothing,
+function LevenbergMarquardt(; chunk_size = Val{0}(),
+                            autodiff = Val{true}(),
+                            standardtag = Val{true}(),
+                            concrete_jac = nothing,
+                            diff_type = Val{:forward},
+                            linsolve = nothing,
                             precs = DEFAULT_PRECS,
                             damping_initial::Real = 1.0,
                             damping_increase_factor::Real = 2.0,
@@ -71,17 +114,17 @@ function LevenbergMarquardt(; chunk_size = Val{0}(), autodiff = Val{true}(),
                             finite_diff_step_geodesic::Real = 0.1,
                             α_geodesic::Real = 0.75,
                             b_uphill::Real = 1.0,
-                            min_damping_D::Real = 1e-6)
+                            min_damping_D::AbstractFloat = 1e-8)
     LevenbergMarquardt{_unwrap_val(chunk_size), _unwrap_val(autodiff), diff_type,
                        typeof(linsolve), typeof(precs), _unwrap_val(standardtag),
-                       _unwrap_val(concrete_jac), typeof(damping_initial)}(linsolve, precs,
-                                                                           damping_initial,
-                                                                           damping_increase_factor,
-                                                                           damping_decrease_factor,
-                                                                           finite_diff_step_geodesic,
-                                                                           α_geodesic,
-                                                                           b_uphill,
-                                                                           min_damping_D)
+                       _unwrap_val(concrete_jac), typeof(min_damping_D)}(linsolve, precs,
+                                                                         damping_initial,
+                                                                         damping_increase_factor,
+                                                                         damping_decrease_factor,
+                                                                         finite_diff_step_geodesic,
+                                                                         α_geodesic,
+                                                                         b_uphill,
+                                                                         min_damping_D)
 end
 
 mutable struct LevenbergMarquardtCache{iip, fType, algType, uType, duType, resType, pType,
@@ -97,7 +140,7 @@ mutable struct LevenbergMarquardtCache{iip, fType, algType, uType, duType, resTy
     uf::ufType
     linsolve::L
     J::jType
-    du1::duType
+    du_tmp::duType
     jac_config::JC
     iter::Int
     force_stop::Bool
@@ -109,7 +152,7 @@ mutable struct LevenbergMarquardtCache{iip, fType, algType, uType, duType, resTy
     DᵀD::DᵀDType
     JᵀJ::jType
     λ::floatType
-    λ_factor::floatType         # TODO test if this handles interger inputs. (test to input damp_init::float and damp_inc::int)
+    λ_factor::floatType
     v::uType
     a::uType
     tmp_vec::uType
@@ -117,60 +160,36 @@ mutable struct LevenbergMarquardtCache{iip, fType, algType, uType, duType, resTy
     norm_v_old::floatType
     δ::uType
     loss_old::floatType
+    make_new_J::Bool
+    fu_tmp::resType
 
     function LevenbergMarquardtCache{iip}(f::fType, alg::algType, u::uType, fu::resType,
-                                          p::pType,
-                                          uf::ufType, linsolve::L, J::jType, du1::duType,
-                                          jac_config::JC, iter::Int,
+                                          p::pType, uf::ufType, linsolve::L, J::jType,
+                                          du_tmp::duType, jac_config::JC, iter::Int,
                                           force_stop::Bool, maxiters::Int,
                                           internalnorm::INType,
                                           retcode::SciMLBase.ReturnCode.T, abstol::tolType,
                                           prob::probType, DᵀD::DᵀDType, JᵀJ::jType,
                                           λ::floatType, λ_factor::floatType, v::uType,
-                                          a::uType, tmp_vec::uType,
-                                          v_old::uType, norm_v_old::floatType, δ::uType,
-                                          loss_old::floatType) where {
-                                                                      iip, fType, algType,
-                                                                      uType,
-                                                                      duType, resType,
-                                                                      pType,
-                                                                      INType,
-                                                                      tolType,
-                                                                      probType, ufType, L,
-                                                                      jType, JC,
-                                                                      DᵀDType,
-                                                                      floatType
-                                                                      }
+                                          a::uType, tmp_vec::uType, v_old::uType,
+                                          norm_v_old::floatType, δ::uType,
+                                          loss_old::floatType,
+                                          make_new_J::Bool,
+                                          fu_tmp::resType) where {
+                                                                  iip, fType, algType,
+                                                                  uType, duType, resType,
+                                                                  pType, INType, tolType,
+                                                                  probType, ufType, L,
+                                                                  jType, JC, DᵀDType,
+                                                                  floatType
+                                                                  }
         new{iip, fType, algType, uType, duType, resType,
             pType, INType, tolType, probType, ufType, L,
-            jType, JC, DᵀDType, floatType}(f,
-                                                                   alg,
-                                                                   u,
-                                                                   fu,
-                                                                   p,
-                                                                   uf,
-                                                                   linsolve,
-                                                                   J,
-                                                                   du1,
-                                                                   jac_config,
-                                                                   iter,
-                                                                   force_stop,
-                                                                   maxiters,
-                                                                   internalnorm,
-                                                                   retcode,
-                                                                   abstol,
-                                                                   prob,
-                                                                   DᵀD,
-                                                                   JᵀJ,
-                                                                   λ,
-                                                                   λ_factor,
-                                                                   v,
-                                                                   a,
-                                                                   tmp_vec,
-                                                                   v_old,
-                                                                   norm_v_old,
-                                                                   δ,
-                                                                   loss_old)
+            jType, JC, DᵀDType, floatType}(f, alg, u, fu, p, uf, linsolve, J, du_tmp,
+                                           jac_config, iter, force_stop, maxiters,
+                                           internalnorm, retcode, abstol, prob, DᵀD,
+                                           JᵀJ, λ, λ_factor, v, a, tmp_vec, v_old,
+                                           norm_v_old, δ, loss_old, make_new_J, fu_tmp)
     end
 end
 
@@ -219,16 +238,16 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::LevenbergMarq
     else
         fu = f(u, p)
     end
-    uf, linsolve, J, du1, jac_config = jacobian_caches(alg, f, u, p, Val(iip))
+    uf, linsolve, J, du_tmp, jac_config = jacobian_caches(alg, f, u, p, Val(iip))
 
-    # "A good compromise is to specify a minimum value of the damping terms in DᵀD".
     if u isa Number
-        DᵀD = alg.min_damping_D # TODO check if type is correct.
+        DᵀD = alg.min_damping_D
     else
-        d = d = similar(u)
+        d = similar(u)
         d .= alg.min_damping_D
         DᵀD = Diagonal(d)
     end
+
     loss = internalnorm(fu)
     JᵀJ = zero(J)
     v = zero(u)
@@ -236,82 +255,91 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::LevenbergMarq
     tmp_vec = zero(u)
     v_old = zero(u)
     δ = zero(u)
+    make_new_J = true
+    fu_tmp = zero(fu)
 
-    return LevenbergMarquardtCache{iip}(f, alg, u, fu, p, uf, linsolve, J, du1, jac_config,
+    return LevenbergMarquardtCache{iip}(f, alg, u, fu, p, uf, linsolve, J, du_tmp,
+                                        jac_config,
                                         1, false, maxiters, internalnorm,
                                         ReturnCode.Default, abstol, prob, DᵀD, JᵀJ,
                                         alg.damping_initial, alg.damping_increase_factor,
-                                        v, a, tmp_vec, v_old, loss, δ, loss)
+                                        v, a, tmp_vec, v_old, loss, δ, loss, make_new_J,
+                                        fu_tmp)
 end
-
 function perform_step!(cache::LevenbergMarquardtCache{true})
-    # TODO implement for in-place...
-    # @unpack fu, f = cache
-    # if iszero(fu)
-    #     cache.force_stop = true
-    #     return nothing
-    # end
-
-    # cache.J = jacobian(cache, f)
-    # @unpack u, f, p, λ, J = cache
-    # mul!(cache.JᵀJ, J', J)
-    # cache.DᵀD .= max.(cache.DᵀD, Diagonal(cache.JᵀJ))
-
-    # # Usual Levenberg-Marquardt step ("velocity").
-    # cache.v = -(cache.JᵀJ + λ * cache.DᵀD) \ (J' * fu)
-
-    # @unpack v, alg = cache
-    # h = alg.finite_diff_step_geodesic
-    # # Geodesic acceleration (step_size = v + a / 2).
-    # cache.a = -J \ ((2 / h) .* ((f(u .+ h * v, p) .- fu) ./ h .- mul!(cache.Jv, J, v)))
-
-    # # Require acceptable steps to satisfy the following condition.
-    # norm_v = norm(v)
-    # if (2 * norm(cache.a) / norm_v) < alg.α_geodesic
-    #     cache.δ = v + cache.a / 2
-    #     @unpack δ, loss_old, norm_v_old, v_old = cache
-    #     fu_new = f(u .+ δ, p)
-    #     loss = cache.internalnorm(fu_new)
-
-    #     # Condition to accept uphill steps (evaluates to `loss ≤ loss_old` in iteration 1).
-    #     β = dot(v, v_old) / (norm_v * norm_v_old)
-    #     if (1 - β)^alg.b_uphill * loss ≤ loss_old
-    #         # Accept step.
-    #         cache.u += δ
-    #         if loss < cache.abstol
-    #             cache.force_stop = true
-    #             return nothing
-    #         end
-    #         cache.fu = fu_new
-    #         cache.v_old = v
-    #         cache.norm_v_old = norm_v
-    #         cache.loss_old = loss
-    #         cache.λ_factor = 1 / alg.damping_decrease_factor
-    #     end
-    # end
-    # cache.λ *= cache.λ_factor
-    # cache.λ_factor = alg.damping_increase_factor
+    @unpack fu, f, make_new_J = cache
+    if iszero(fu)
+        cache.force_stop = true
+        return nothing
+    end
+    if make_new_J
+        jacobian!(cache.J, cache)
+        mul!(cache.JᵀJ, cache.J', cache.J)
+        cache.DᵀD .= max.(cache.DᵀD, Diagonal(cache.JᵀJ))
+        cache.make_new_J = false
+    end
+    @unpack u, p, λ, JᵀJ, DᵀD, J = cache
+
+    # Usual Levenberg-Marquardt step ("velocity").
+    cache.v = -(JᵀJ .+ λ .* DᵀD) \ mul!(cache.du_tmp, J', fu)
+
+    @unpack v, alg = cache
+    h = alg.finite_diff_step_geodesic
+    # Geodesic acceleration (step_size = v + a / 2).
+    f(cache.fu_tmp, u .+ h .* v, p)
+    cache.a = -J \ ((2 / h) .*
+                    ((cache.fu_tmp .- fu) ./ h .- mul!(cache.du_tmp, J, v)))
+
+    # Require acceptable steps to satisfy the following condition.
+    norm_v = norm(v)
+    if (2 * norm(cache.a) / norm_v) < alg.α_geodesic
+        @. cache.δ = v + cache.a / 2
+        @unpack δ, loss_old, norm_v_old, v_old = cache
+        f(cache.fu_tmp, u .+ δ, p)
+        loss = cache.internalnorm(cache.fu_tmp)
+
+        # Condition to accept uphill steps (evaluates to `loss ≤ loss_old` in iteration 1).
+        β = dot(v, v_old) / (norm_v * norm_v_old)
+        if (1 - β)^alg.b_uphill * loss ≤ loss_old
+            # Accept step.
+            cache.u .+= δ
+            if loss < cache.abstol
+                cache.force_stop = true
+                return nothing
+            end
+            cache.fu .= cache.fu_tmp
+            cache.v_old .= v
+            cache.norm_v_old = norm_v
+            cache.loss_old = loss
+            cache.λ_factor = 1 / alg.damping_decrease_factor
+            cache.make_new_J = true
+        end
+    end
+    cache.λ *= cache.λ_factor
+    cache.λ_factor = alg.damping_increase_factor
     return nothing
 end
 
 function perform_step!(cache::LevenbergMarquardtCache{false})
-    @unpack fu, f = cache
+    @unpack fu, f, make_new_J = cache
     if iszero(fu)
         cache.force_stop = true
         return nothing
     end
-
-    J = jacobian(cache, f)
-    @unpack u, f, p, λ = cache
-    cache.JᵀJ = J' * J
-    if cache.DᵀD isa Number
-        cache.DᵀD = max(cache.DᵀD, cache.JᵀJ)
-    else
-        cache.DᵀD .= max.(cache.DᵀD, Diagonal(cache.JᵀJ))
+    if make_new_J
+        cache.J = jacobian(cache, f)
+        cache.JᵀJ = cache.J' * cache.J
+        if cache.JᵀJ isa Number
+            cache.DᵀD = max(cache.DᵀD, cache.JᵀJ)
+        else
+            cache.DᵀD .= max.(cache.DᵀD, Diagonal(cache.JᵀJ))
+        end
+        cache.make_new_J = false
     end
+    @unpack u, p, λ, JᵀJ, DᵀD, J = cache
 
     # Usual Levenberg-Marquardt step ("velocity").
-    cache.v = -(cache.JᵀJ + λ * cache.DᵀD) \ (J' * fu)
+    cache.v = -(JᵀJ + λ * DᵀD) \ (J' * fu)
 
     @unpack v, alg = cache
     h = alg.finite_diff_step_geodesic
@@ -340,6 +368,7 @@ function perform_step!(cache::LevenbergMarquardtCache{false})
             cache.norm_v_old = norm_v
             cache.loss_old = loss
             cache.λ_factor = 1 / alg.damping_decrease_factor
+            cache.make_new_J = true
         end
     end
     cache.λ *= cache.λ_factor
diff --git a/test/basictests.jl b/test/basictests.jl
index 6cd59aaa9..0e3b6fa95 100644
--- a/test/basictests.jl
+++ b/test/basictests.jl
@@ -19,7 +19,7 @@ end
 
 function benchmark_scalar(f, u0)
     probN = NonlinearProblem{false}(f, u0)
-    sol = (solve(probN, NewtonRaphson()))
+    sol = (solve(probN, NewtonRaphson(), abstol = 1e-9))
 end
 
 function ff(u, p)
@@ -146,7 +146,7 @@ end
 
 function benchmark_scalar(f, u0)
     probN = NonlinearProblem{false}(f, u0)
-    sol = (solve(probN, TrustRegion()))
+    sol = (solve(probN, TrustRegion(), abstol = 1e-9))
 end
 
 function ff(u, p)
@@ -316,7 +316,7 @@ end
 
 function benchmark_scalar(f, u0)
     probN = NonlinearProblem{false}(f, u0)
-    sol = (solve(probN, LevenbergMarquardt()))
+    sol = (solve(probN, LevenbergMarquardt(), abstol = 1e-9))
 end
 
 function ff(u, p)
@@ -338,25 +338,25 @@ sol = benchmark_scalar(sf, csu0)
 @test sol.retcode === ReturnCode.Success
 @test abs(sol.u * sol.u - 2) < 1e-9
 
-# function benchmark_inplace(f, u0)
-#     probN = NonlinearProblem{true}(f, u0)
-#     solver = init(probN, LevenbergMarquardt(), abstol = 1e-9)
-#     sol = solve!(solver)
-# end
+function benchmark_inplace(f, u0)
+    probN = NonlinearProblem{true}(f, u0)
+    solver = init(probN, LevenbergMarquardt(), abstol = 1e-9)
+    sol = solve!(solver)
+end
 
-# function ffiip(du, u, p)
-#     du .= u .* u .- 2
-# end
-# u0 = [1.0, 1.0]
+function ffiip(du, u, p)
+    du .= u .* u .- 2
+end
+u0 = [1.0, 1.0]
 
-# sol = benchmark_inplace(ffiip, u0)
-# @test sol.retcode === ReturnCode.Success
-# @test all(abs.(sol.u .* sol.u .- 2) .< 1e-9)
+sol = benchmark_inplace(ffiip, u0)
+@test sol.retcode === ReturnCode.Success
+@test all(abs.(sol.u .* sol.u .- 2) .< 1e-9)
 
-# u0 = [1.0, 1.0]
-# probN = NonlinearProblem{true}(ffiip, u0)
-# solver = init(probN, LevenbergMarquardt(), abstol = 1e-9)
-# @test (@ballocated solve!(solver)) < 120
+u0 = [1.0, 1.0]
+probN = NonlinearProblem{true}(ffiip, u0)
+solver = init(probN, LevenbergMarquardt(), abstol = 1e-9)
+@test (@ballocated solve!(solver)) < 120
 
 # AD Tests
 using ForwardDiff
@@ -442,30 +442,29 @@ f(u, p)
 @test all(abs.(f(u, p)) .< 1e-10)
 
 # # Test kwars in `LevenbergMarquardt`
-# max_trust_radius = [10.0, 100.0, 1000.0]
-# initial_trust_radius = [10.0, 1.0, 0.1]
-# step_threshold = [0.0, 0.01, 0.25]
-# shrink_threshold = [0.25, 0.3, 0.5]
-# expand_threshold = [0.5, 0.8, 0.9]
-# shrink_factor = [0.1, 0.3, 0.5]
-# expand_factor = [1.5, 2.0, 3.0]
-# max_shrink_times = [10, 20, 30]
-
-# list_of_options = zip(max_trust_radius, initial_trust_radius, step_threshold,
-#                       shrink_threshold, expand_threshold, shrink_factor,
-#                       expand_factor, max_shrink_times)
-# for options in list_of_options
-#     local probN, sol, alg
-#     alg = LevenbergMarquardt(max_trust_radius = options[1],
-#                       initial_trust_radius = options[2],
-#                       step_threshold = options[3],
-#                       shrink_threshold = options[4],
-#                       expand_threshold = options[5],
-#                       shrink_factor = options[6],
-#                       expand_factor = options[7],
-#                       max_shrink_times = options[8])
-
-#     probN = NonlinearProblem{false}(f, u0, p)
-#     sol = solve(probN, alg, abstol = 1e-10)
-#     @test all(abs.(f(u, p)) .< 1e-10)
-# end
+damping_initial = [0.5, 2.0, 5.0]
+damping_increase_factor = [1.5, 3.0, 10.0]
+damping_decrease_factor = [2, 5, 10]
+finite_diff_step_geodesic = [0.02, 0.2, 0.3]
+α_geodesic = [0.6, 0.8, 0.9]
+b_uphill = [0, 1, 2]
+min_damping_D = [1e-12, 1e-9, 1e-4]
+
+list_of_options = zip(damping_initial, damping_increase_factor, damping_decrease_factor,
+                      finite_diff_step_geodesic, α_geodesic, b_uphill,
+                      min_damping_D)
+for options in list_of_options
+    local probN, sol, alg
+    alg = LevenbergMarquardt(damping_initial = options[1],
+                      damping_increase_factor = options[2],
+                      damping_decrease_factor = options[3],
+                      finite_diff_step_geodesic = options[4],
+                      α_geodesic = options[5],
+                      b_uphill = options[6],
+                      min_damping_D = options[7])
+
+    probN = NonlinearProblem{false}(f, u0, p)
+    sol = solve(probN, alg, abstol = 1e-10)
+    @test all(abs.(f(u, p)) .< 1e-10)
+
+end

From a166576d340c3b5c550f4c7ff61e68050ce09d6d Mon Sep 17 00:00:00 2001
From: Simon Carlson <ccsimon123@gmail.com>
Date: Sun, 22 Jan 2023 13:25:02 +0100
Subject: [PATCH 03/10] Added documentation

---
 src/NonlinearSolve.jl |   2 +-
 src/levenberg.jl      | 125 +++++++++++++++++++++++-------------------
 2 files changed, 71 insertions(+), 56 deletions(-)

diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl
index 063eae319..79e6788c6 100644
--- a/src/NonlinearSolve.jl
+++ b/src/NonlinearSolve.jl
@@ -41,7 +41,7 @@ SnoopPrecompile.@precompile_all_calls begin for T in (Float32, Float64)
     prob = NonlinearProblem{false}((u, p) -> u .* u .- p, T(0.1), T(2))
 
     precompile_algs = if VERSION >= v"1.7"
-        (NewtonRaphson(), TrustRegion())
+        (NewtonRaphson(), TrustRegion(), LevenbergMarquardt())
     else
         (NewtonRaphson(),)
     end
diff --git a/src/levenberg.jl b/src/levenberg.jl
index 642efca55..3b779b338 100644
--- a/src/levenberg.jl
+++ b/src/levenberg.jl
@@ -17,10 +17,8 @@ LevenbergMarquardt(; chunk_size = Val{0}(),
 
 An advanced Levenberg-Marquardt implementation with the improvements suggested in the
 [paper](https://arxiv.org/abs/1201.5885) "Improvements to the Levenberg-Marquardt
-algorithm for nonlinear least-squares minimization". This implementation is designed with
-support for efficient handling of sparse matrices via colored automatic differentiation and
-preconditioned linear solvers. Designed for large-scale and numerically-difficult nonlinear
-systems.
+algorithm for nonlinear least-squares minimization". Designed for large-scale and
+numerically-difficult nonlinear systems.
 
 
 ### Keyword Arguments
@@ -56,31 +54,49 @@ systems.
   preconditioners. For more information on specifying preconditioners for LinearSolve
   algorithms, consult the
   [LinearSolve.jl documentation](https://docs.sciml.ai/LinearSolve/stable/).
-- `damping_initial`: the initial damping factor. The damping is proportional to the inverse
-  of the step size and is changed dynamically in each iteration. Defaults to `1.0`. For more
-  details, see section 2.1 of [this paper](https://arxiv.org/abs/1201.5885).
-- `damping_increase_factor`: the factor by which the damping is increased if a step isn't
-  accepted i.e. how much smaller the next step size should be if a step is rejected.
-  Defaults to `2.0`. For more details, see section 2.1 of
+- `damping_initial`: the starting value for the damping factor. The damping factor is
+  inversely proportional to the step size. The damping factor is adjusted during each
+  iteration. Defaults to `1.0`. For more details, see section 2.1 of
+  [this paper](https://arxiv.org/abs/1201.5885).
+- `damping_increase_factor`: the factor by which the damping is increased if a step is
+  rejected. Defaults to `2.0`. For more details, see section 2.1 of
   [this paper](https://arxiv.org/abs/1201.5885).
 - `damping_decrease_factor`: the factor by which the damping is decreased if a step is
-  accepted i.e. how much larger the next step size should be if a step is accepted.
-  Defaults to `3.0`. For more details, see section 2.1 of
+  accepted. Defaults to `3.0`. For more details, see section 2.1 of
   [this paper](https://arxiv.org/abs/1201.5885).
-- `finite_diff_step_geodesic`: the finite differencing step size used for the geodesic
-  acceleration method. Defaults to `0.1` which means thats the step is about 10% of the
-  first order step. For more details, see section 3 of
+- `finite_diff_step_geodesic`: the step size used for finite differencing used to calculate
+  the geodesic acceleration. Defaults to `0.1` which means that the step size is
+  approximately 10% of the first-order step. For more details, see section 3 of
   [this paper](https://arxiv.org/abs/1201.5885).
-- `α_geodesic`: a factor that determines if a step is accepted or rejected.
-  In order to utilize the geodesic acceleration as an addition to the Levenberg-Marquardt
-  algorithm, it is necessary to make one small addition. To require acceptable steps to
-  satisfy the condition ... continue to document this...
-
-# TODO documentation and cleanup of code & che
-
-  α_geodesic::Real = 0.75,
-  b_uphill::Real = 1.0,
-  min_damping_D::AbstractFloat = 1e-8
+- `α_geodesic`: a factor that determines if a step is accepted or rejected. To incorporate
+  geodesic acceleration as an addition to the Levenberg-Marquardt algorithm, it is necessary
+  that acceptable steps meet the condition
+  ``\\frac{2||a||}{||v||} \\le \\alpha_{\\text{geodesic}}``, where ``a`` is the geodesic
+  acceleration, ``v`` is the Levenberg-Marquardt algorithm's step (velocity along a geodesic
+  path) and `α_geodesic` is some number of order `1`. For most problems `α_geodesic = 0.75`
+  is a good value but for problems where convergence is difficult `α_geodesic = 0.1` is an
+  effective choice. Defaults to `0.75`. For more details, see section 3, equation (15) of
+  [this paper](https://arxiv.org/abs/1201.5885).
+- `b_uphill`: a factor that determines if a step is accepted or rejected. The standard
+  choice in the Levenberg-Marquardt method is to accept all steps that decrease the cost
+  and reject all steps that increase the cost. Although this is a natural and safe choice,
+  it is often not the most efficient. Therefore downhill moves are always accepted, but
+  uphill moves are only conditionally accepted. To decide whether an uphill move will be
+  accepted at each iteration ``i``, we compute
+  ``\\beta_i = \\cos(v_{\\text{new}}, v_{\\text{old}})``, which denotes the cosine angle
+  between the proposed velocity ``v_{\\text{new}}`` and the velocity of the last accepted
+  step ``v_{\\text{old}}``. The idea is to accept uphill moves if the angle is small. To
+  specify, uphill moves are accepted if
+  ``(1-\\beta_i)^(b_{\\text{uphill}})C_{i+1} \\le C_i``, where ``C_i`` is the cost at
+  iteration ``i``. Reasonable choices for `b_uphill` are `1.0` or `2.0`, with `b_uphill=2.0`
+  allowing higher uphill moves than `b_uphill=1.0`. When `b_uphill=0.0`, no uphill moves
+  will be accepted. Defaults to `1.0`. For more details, see section 4 of
+  [this paper](https://arxiv.org/abs/1201.5885).
+- `min_damping_D`: the minimum value of the damping terms in the diagonal damping matrix
+  `DᵀD`, where `DᵀD` is given by the largest diagonal entries of `JᵀJ` yet encountered,
+  where `J` is the Jacobian. It is suggested by
+  [this paper](https://arxiv.org/abs/1201.5885) to use a minimum value of the elements in
+  `DᵀD` to prevent the damping from being too small. Defaults to `1e-8`.
 
 
 !!! note
@@ -117,20 +133,20 @@ function LevenbergMarquardt(; chunk_size = Val{0}(),
                             min_damping_D::AbstractFloat = 1e-8)
     LevenbergMarquardt{_unwrap_val(chunk_size), _unwrap_val(autodiff), diff_type,
                        typeof(linsolve), typeof(precs), _unwrap_val(standardtag),
-                       _unwrap_val(concrete_jac), typeof(min_damping_D)}(linsolve, precs,
-                                                                         damping_initial,
-                                                                         damping_increase_factor,
-                                                                         damping_decrease_factor,
-                                                                         finite_diff_step_geodesic,
-                                                                         α_geodesic,
-                                                                         b_uphill,
-                                                                         min_damping_D)
+                       _unwrap_val(concrete_jac),
+                       typeof(min_damping_D)}(linsolve, precs,
+                                              damping_initial,
+                                              damping_increase_factor,
+                                              damping_decrease_factor,
+                                              finite_diff_step_geodesic,
+                                              α_geodesic,
+                                              b_uphill,
+                                              min_damping_D)
 end
 
 mutable struct LevenbergMarquardtCache{iip, fType, algType, uType, duType, resType, pType,
-                                       INType, tolType,
-                                       probType, ufType, L, jType, JC, DᵀDType,
-                                       floatType
+                                       INType, tolType, probType, ufType, L, jType, JC,
+                                       DᵀDType, λType, lossType
                                        }
     f::fType
     alg::algType
@@ -151,15 +167,15 @@ mutable struct LevenbergMarquardtCache{iip, fType, algType, uType, duType, resTy
     prob::probType
     DᵀD::DᵀDType
     JᵀJ::jType
-    λ::floatType
-    λ_factor::floatType
+    λ::λType
+    λ_factor::λType
     v::uType
     a::uType
     tmp_vec::uType
     v_old::uType
-    norm_v_old::floatType
+    norm_v_old::lossType
     δ::uType
-    loss_old::floatType
+    loss_old::lossType
     make_new_J::Bool
     fu_tmp::resType
 
@@ -170,26 +186,26 @@ mutable struct LevenbergMarquardtCache{iip, fType, algType, uType, duType, resTy
                                           internalnorm::INType,
                                           retcode::SciMLBase.ReturnCode.T, abstol::tolType,
                                           prob::probType, DᵀD::DᵀDType, JᵀJ::jType,
-                                          λ::floatType, λ_factor::floatType, v::uType,
+                                          λ::λType, λ_factor::λType, v::uType,
                                           a::uType, tmp_vec::uType, v_old::uType,
-                                          norm_v_old::floatType, δ::uType,
-                                          loss_old::floatType,
-                                          make_new_J::Bool,
+                                          norm_v_old::lossType, δ::uType,
+                                          loss_old::lossType, make_new_J::Bool,
                                           fu_tmp::resType) where {
                                                                   iip, fType, algType,
                                                                   uType, duType, resType,
                                                                   pType, INType, tolType,
                                                                   probType, ufType, L,
                                                                   jType, JC, DᵀDType,
-                                                                  floatType
+                                                                  λType, lossType
                                                                   }
         new{iip, fType, algType, uType, duType, resType,
             pType, INType, tolType, probType, ufType, L,
-            jType, JC, DᵀDType, floatType}(f, alg, u, fu, p, uf, linsolve, J, du_tmp,
-                                           jac_config, iter, force_stop, maxiters,
-                                           internalnorm, retcode, abstol, prob, DᵀD,
-                                           JᵀJ, λ, λ_factor, v, a, tmp_vec, v_old,
-                                           norm_v_old, δ, loss_old, make_new_J, fu_tmp)
+            jType, JC, DᵀDType, λType, lossType}(f, alg, u, fu, p, uf, linsolve, J, du_tmp,
+                                                 jac_config, iter, force_stop, maxiters,
+                                                 internalnorm, retcode, abstol, prob, DᵀD,
+                                                 JᵀJ, λ, λ_factor, v, a, tmp_vec, v_old,
+                                                 norm_v_old, δ, loss_old, make_new_J,
+                                                 fu_tmp)
     end
 end
 
@@ -283,12 +299,11 @@ function perform_step!(cache::LevenbergMarquardtCache{true})
     # Usual Levenberg-Marquardt step ("velocity").
     cache.v = -(JᵀJ .+ λ .* DᵀD) \ mul!(cache.du_tmp, J', fu)
 
+    # Geodesic acceleration (step_size = v + a / 2).
     @unpack v, alg = cache
     h = alg.finite_diff_step_geodesic
-    # Geodesic acceleration (step_size = v + a / 2).
     f(cache.fu_tmp, u .+ h .* v, p)
-    cache.a = -J \ ((2 / h) .*
-                    ((cache.fu_tmp .- fu) ./ h .- mul!(cache.du_tmp, J, v)))
+    cache.a = -J \ ((2 / h) .* ((cache.fu_tmp .- fu) ./ h .- mul!(cache.du_tmp, J, v)))
 
     # Require acceptable steps to satisfy the following condition.
     norm_v = norm(v)
@@ -344,12 +359,12 @@ function perform_step!(cache::LevenbergMarquardtCache{false})
     @unpack v, alg = cache
     h = alg.finite_diff_step_geodesic
     # Geodesic acceleration (step_size = v + a / 2).
-    cache.a = -J \ ((2 / h) .* ((f(u .+ h * v, p) .- fu) ./ h .- J * v))
+    cache.a = -J \ ((2 / h) .* ((f(u .+ h .* v, p) .- fu) ./ h .- J * v))
 
     # Require acceptable steps to satisfy the following condition.
     norm_v = norm(v)
     if (2 * norm(cache.a) / norm_v) < alg.α_geodesic
-        cache.δ = v + cache.a / 2
+        cache.δ = v .+ cache.a ./ 2
         @unpack δ, loss_old, norm_v_old, v_old = cache
         fu_new = f(u .+ δ, p)
         loss = cache.internalnorm(fu_new)

From a6a5bbb85a18ccdfd450668dea8894e65a91af8c Mon Sep 17 00:00:00 2001
From: Simon Carlson <ccsimon123@gmail.com>
Date: Sun, 22 Jan 2023 13:27:18 +0100
Subject: [PATCH 04/10] format

---
 test/basictests.jl | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/test/basictests.jl b/test/basictests.jl
index 0e3b6fa95..abf8eeb43 100644
--- a/test/basictests.jl
+++ b/test/basictests.jl
@@ -456,15 +456,14 @@ list_of_options = zip(damping_initial, damping_increase_factor, damping_decrease
 for options in list_of_options
     local probN, sol, alg
     alg = LevenbergMarquardt(damping_initial = options[1],
-                      damping_increase_factor = options[2],
-                      damping_decrease_factor = options[3],
-                      finite_diff_step_geodesic = options[4],
-                      α_geodesic = options[5],
-                      b_uphill = options[6],
-                      min_damping_D = options[7])
+                             damping_increase_factor = options[2],
+                             damping_decrease_factor = options[3],
+                             finite_diff_step_geodesic = options[4],
+                             α_geodesic = options[5],
+                             b_uphill = options[6],
+                             min_damping_D = options[7])
 
     probN = NonlinearProblem{false}(f, u0, p)
     sol = solve(probN, alg, abstol = 1e-10)
     @test all(abs.(f(u, p)) .< 1e-10)
-
 end

From 18948e8955c3c790ff7772852d2a57aa984e03dd Mon Sep 17 00:00:00 2001
From: Simon Carlson <ccsimon123@gmail.com>
Date: Sun, 22 Jan 2023 13:49:41 +0100
Subject: [PATCH 05/10] remove from precompilation

---
 src/NonlinearSolve.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl
index 79e6788c6..b645df86d 100644
--- a/src/NonlinearSolve.jl
+++ b/src/NonlinearSolve.jl
@@ -41,7 +41,7 @@ SnoopPrecompile.@precompile_all_calls begin for T in (Float32, Float64)
     prob = NonlinearProblem{false}((u, p) -> u .* u .- p, T(0.1), T(2))
 
     precompile_algs = if VERSION >= v"1.7"
-        (NewtonRaphson(), TrustRegion(), LevenbergMarquardt())
+        (NewtonRaphson(),)
     else
         (NewtonRaphson(),)
     end

From 1ecad1574ac17b66315dd878339af71826638c29 Mon Sep 17 00:00:00 2001
From: Simon Carlson <ccsimon123@gmail.com>
Date: Sun, 22 Jan 2023 15:29:36 +0100
Subject: [PATCH 06/10] added linsolve, and precompile

---
 src/NonlinearSolve.jl |  2 +-
 src/ad.jl             |  4 ++--
 src/levenberg.jl      | 43 +++++++++++++++++++++++++++++--------------
 3 files changed, 32 insertions(+), 17 deletions(-)

diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl
index b645df86d..fa2ee2009 100644
--- a/src/NonlinearSolve.jl
+++ b/src/NonlinearSolve.jl
@@ -41,7 +41,7 @@ SnoopPrecompile.@precompile_all_calls begin for T in (Float32, Float64)
     prob = NonlinearProblem{false}((u, p) -> u .* u .- p, T(0.1), T(2))
 
     precompile_algs = if VERSION >= v"1.7"
-        (NewtonRaphson(),)
+        (NewtonRaphson(), LevenbergMarquardt())
     else
         (NewtonRaphson(),)
     end
diff --git a/src/ad.jl b/src/ad.jl
index e29c59f15..eb3013e47 100644
--- a/src/ad.jl
+++ b/src/ad.jl
@@ -26,7 +26,7 @@ end
 function SciMLBase.solve(prob::NonlinearProblem{<:Union{Number, StaticArraysCore.SVector},
                                                 iip,
                                                 <:Dual{T, V, P}},
-                         alg::Union{NewtonRaphson, TrustRegion, LevenbergMarquardt},
+                         alg::AbstractNewtonAlgorithm,
                          args...; kwargs...) where {iip, T, V, P}
     sol, partials = scalar_nlsolve_ad(prob, alg, args...; kwargs...)
     return SciMLBase.build_solution(prob, alg, Dual{T, V, P}(sol.u, partials), sol.resid;
@@ -35,7 +35,7 @@ end
 function SciMLBase.solve(prob::NonlinearProblem{<:Union{Number, StaticArraysCore.SVector},
                                                 iip,
                                                 <:AbstractArray{<:Dual{T, V, P}}},
-                         alg::Union{NewtonRaphson, TrustRegion, LevenbergMarquardt},
+                         alg::AbstractNewtonAlgorithm,
                          args...;
                          kwargs...) where {iip, T, V, P}
     sol, partials = scalar_nlsolve_ad(prob, alg, args...; kwargs...)
diff --git a/src/levenberg.jl b/src/levenberg.jl
index 3b779b338..8e83683cc 100644
--- a/src/levenberg.jl
+++ b/src/levenberg.jl
@@ -87,7 +87,7 @@ numerically-difficult nonlinear systems.
   between the proposed velocity ``v_{\\text{new}}`` and the velocity of the last accepted
   step ``v_{\\text{old}}``. The idea is to accept uphill moves if the angle is small. To
   specify, uphill moves are accepted if
-  ``(1-\\beta_i)^(b_{\\text{uphill}})C_{i+1} \\le C_i``, where ``C_i`` is the cost at
+  ``(1-\\beta_i)^{b_{\\text{uphill}}} C_{i+1} \\le C_i``, where ``C_i`` is the cost at
   iteration ``i``. Reasonable choices for `b_uphill` are `1.0` or `2.0`, with `b_uphill=2.0`
   allowing higher uphill moves than `b_uphill=1.0`. When `b_uphill=0.0`, no uphill moves
   will be accepted. Defaults to `1.0`. For more details, see section 4 of
@@ -178,6 +178,7 @@ mutable struct LevenbergMarquardtCache{iip, fType, algType, uType, duType, resTy
     loss_old::lossType
     make_new_J::Bool
     fu_tmp::resType
+    mat_tmp::jType
 
     function LevenbergMarquardtCache{iip}(f::fType, alg::algType, u::uType, fu::resType,
                                           p::pType, uf::ufType, linsolve::L, J::jType,
@@ -190,14 +191,15 @@ mutable struct LevenbergMarquardtCache{iip, fType, algType, uType, duType, resTy
                                           a::uType, tmp_vec::uType, v_old::uType,
                                           norm_v_old::lossType, δ::uType,
                                           loss_old::lossType, make_new_J::Bool,
-                                          fu_tmp::resType) where {
-                                                                  iip, fType, algType,
-                                                                  uType, duType, resType,
-                                                                  pType, INType, tolType,
-                                                                  probType, ufType, L,
-                                                                  jType, JC, DᵀDType,
-                                                                  λType, lossType
-                                                                  }
+                                          fu_tmp::resType,
+                                          mat_tmp::jType) where {
+                                                                 iip, fType, algType,
+                                                                 uType, duType, resType,
+                                                                 pType, INType, tolType,
+                                                                 probType, ufType, L,
+                                                                 jType, JC, DᵀDType,
+                                                                 λType, lossType
+                                                                 }
         new{iip, fType, algType, uType, duType, resType,
             pType, INType, tolType, probType, ufType, L,
             jType, JC, DᵀDType, λType, lossType}(f, alg, u, fu, p, uf, linsolve, J, du_tmp,
@@ -205,7 +207,7 @@ mutable struct LevenbergMarquardtCache{iip, fType, algType, uType, duType, resTy
                                                  internalnorm, retcode, abstol, prob, DᵀD,
                                                  JᵀJ, λ, λ_factor, v, a, tmp_vec, v_old,
                                                  norm_v_old, δ, loss_old, make_new_J,
-                                                 fu_tmp)
+                                                 fu_tmp, mat_tmp)
     end
 end
 
@@ -273,6 +275,7 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::LevenbergMarq
     δ = zero(u)
     make_new_J = true
     fu_tmp = zero(fu)
+    mat_tmp = zero(J)
 
     return LevenbergMarquardtCache{iip}(f, alg, u, fu, p, uf, linsolve, J, du_tmp,
                                         jac_config,
@@ -280,7 +283,7 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::LevenbergMarq
                                         ReturnCode.Default, abstol, prob, DᵀD, JᵀJ,
                                         alg.damping_initial, alg.damping_increase_factor,
                                         v, a, tmp_vec, v_old, loss, δ, loss, make_new_J,
-                                        fu_tmp)
+                                        fu_tmp, mat_tmp)
 end
 function perform_step!(cache::LevenbergMarquardtCache{true})
     @unpack fu, f, make_new_J = cache
@@ -294,16 +297,28 @@ function perform_step!(cache::LevenbergMarquardtCache{true})
         cache.DᵀD .= max.(cache.DᵀD, Diagonal(cache.JᵀJ))
         cache.make_new_J = false
     end
-    @unpack u, p, λ, JᵀJ, DᵀD, J = cache
+    @unpack u, p, λ, JᵀJ, DᵀD, J, alg, linsolve = cache
 
     # Usual Levenberg-Marquardt step ("velocity").
-    cache.v = -(JᵀJ .+ λ .* DᵀD) \ mul!(cache.du_tmp, J', fu)
+    # The following lines do: cache.v = -cache.mat_tmp \ cache.fu_tmp
+    mul!(cache.fu_tmp, J', fu)
+    @. cache.mat_tmp = JᵀJ + λ * DᵀD
+    linres = dolinsolve(alg.precs, linsolve, A = cache.mat_tmp, b = _vec(cache.fu_tmp),
+                        linu = _vec(cache.du_tmp), p = p, reltol = cache.abstol)
+    cache.linsolve = linres.cache
+    cache.v .= -cache.du_tmp
 
     # Geodesic acceleration (step_size = v + a / 2).
     @unpack v, alg = cache
     h = alg.finite_diff_step_geodesic
     f(cache.fu_tmp, u .+ h .* v, p)
-    cache.a = -J \ ((2 / h) .* ((cache.fu_tmp .- fu) ./ h .- mul!(cache.du_tmp, J, v)))
+
+    # The following lines do: cache.a = -J \ cache.fu_tmp
+    cache.fu_tmp .= (2 / h) .* ((cache.fu_tmp .- fu) ./ h .- mul!(cache.du_tmp, J, v))
+    linres = dolinsolve(alg.precs, linsolve, A = J, b = _vec(cache.fu_tmp),
+                        linu = _vec(cache.du_tmp), p = p, reltol = cache.abstol)
+    cache.linsolve = linres.cache
+    cache.a .= -cache.du_tmp
 
     # Require acceptable steps to satisfy the following condition.
     norm_v = norm(v)

From 2cdb3a5951dcd45feeeb6b7a51323beccb2af841 Mon Sep 17 00:00:00 2001
From: Simon Carlson <ccsimon123@gmail.com>
Date: Sun, 22 Jan 2023 15:43:26 +0100
Subject: [PATCH 07/10] Clarity fix

---
 src/levenberg.jl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/levenberg.jl b/src/levenberg.jl
index 8e83683cc..aeb68172d 100644
--- a/src/levenberg.jl
+++ b/src/levenberg.jl
@@ -314,7 +314,8 @@ function perform_step!(cache::LevenbergMarquardtCache{true})
     f(cache.fu_tmp, u .+ h .* v, p)
 
     # The following lines do: cache.a = -J \ cache.fu_tmp
-    cache.fu_tmp .= (2 / h) .* ((cache.fu_tmp .- fu) ./ h .- mul!(cache.du_tmp, J, v))
+    mul!(cache.du_tmp, J, v)
+    cache.fu_tmp .= (2 / h) .* ((cache.fu_tmp .- fu) ./ h .- cache.du_tmp)
     linres = dolinsolve(alg.precs, linsolve, A = J, b = _vec(cache.fu_tmp),
                         linu = _vec(cache.du_tmp), p = p, reltol = cache.abstol)
     cache.linsolve = linres.cache

From d23d758bfb9fc05df886b415fae683fcf8ad9e57 Mon Sep 17 00:00:00 2001
From: Simon Carlson <ccsimon123@gmail.com>
Date: Sun, 22 Jan 2023 16:36:55 +0100
Subject: [PATCH 08/10] updating to @.

---
 src/levenberg.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/levenberg.jl b/src/levenberg.jl
index aeb68172d..ddd6bd43e 100644
--- a/src/levenberg.jl
+++ b/src/levenberg.jl
@@ -306,7 +306,7 @@ function perform_step!(cache::LevenbergMarquardtCache{true})
     linres = dolinsolve(alg.precs, linsolve, A = cache.mat_tmp, b = _vec(cache.fu_tmp),
                         linu = _vec(cache.du_tmp), p = p, reltol = cache.abstol)
     cache.linsolve = linres.cache
-    cache.v .= -cache.du_tmp
+    @. cache.v = -cache.du_tmp
 
     # Geodesic acceleration (step_size = v + a / 2).
     @unpack v, alg = cache
@@ -319,7 +319,7 @@ function perform_step!(cache::LevenbergMarquardtCache{true})
     linres = dolinsolve(alg.precs, linsolve, A = J, b = _vec(cache.fu_tmp),
                         linu = _vec(cache.du_tmp), p = p, reltol = cache.abstol)
     cache.linsolve = linres.cache
-    cache.a .= -cache.du_tmp
+    @. cache.a = -cache.du_tmp
 
     # Require acceptable steps to satisfy the following condition.
     norm_v = norm(v)

From 6f92187655a830c1692fc214b2fe38495c7ddaec Mon Sep 17 00:00:00 2001
From: Simon Carlson <ccsimon123@gmail.com>
Date: Sun, 22 Jan 2023 17:12:05 +0100
Subject: [PATCH 09/10] Testing: removing precomp. of Levenberg

---
 src/NonlinearSolve.jl | 2 +-
 src/levenberg.jl      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl
index fa2ee2009..b645df86d 100644
--- a/src/NonlinearSolve.jl
+++ b/src/NonlinearSolve.jl
@@ -41,7 +41,7 @@ SnoopPrecompile.@precompile_all_calls begin for T in (Float32, Float64)
     prob = NonlinearProblem{false}((u, p) -> u .* u .- p, T(0.1), T(2))
 
     precompile_algs = if VERSION >= v"1.7"
-        (NewtonRaphson(), LevenbergMarquardt())
+        (NewtonRaphson(),)
     else
         (NewtonRaphson(),)
     end
diff --git a/src/levenberg.jl b/src/levenberg.jl
index ddd6bd43e..6c263fefb 100644
--- a/src/levenberg.jl
+++ b/src/levenberg.jl
@@ -315,7 +315,7 @@ function perform_step!(cache::LevenbergMarquardtCache{true})
 
     # The following lines do: cache.a = -J \ cache.fu_tmp
     mul!(cache.du_tmp, J, v)
-    cache.fu_tmp .= (2 / h) .* ((cache.fu_tmp .- fu) ./ h .- cache.du_tmp)
+    @. cache.fu_tmp = (2 / h) * ((cache.fu_tmp - fu) / h - cache.du_tmp)
     linres = dolinsolve(alg.precs, linsolve, A = J, b = _vec(cache.fu_tmp),
                         linu = _vec(cache.du_tmp), p = p, reltol = cache.abstol)
     cache.linsolve = linres.cache

From 518747a79c0a00dfea1dd3c7dbb88558f3b5ca5f Mon Sep 17 00:00:00 2001
From: Simon Carlson <ccsimon123@gmail.com>
Date: Sun, 22 Jan 2023 18:57:24 +0100
Subject: [PATCH 10/10] Fixing load time for TrustRegion

---
 src/NonlinearSolve.jl |  2 +-
 src/trustRegion.jl    | 18 +++++++++++++-----
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl
index b645df86d..79e6788c6 100644
--- a/src/NonlinearSolve.jl
+++ b/src/NonlinearSolve.jl
@@ -41,7 +41,7 @@ SnoopPrecompile.@precompile_all_calls begin for T in (Float32, Float64)
     prob = NonlinearProblem{false}((u, p) -> u .* u .- p, T(0.1), T(2))
 
     precompile_algs = if VERSION >= v"1.7"
-        (NewtonRaphson(),)
+        (NewtonRaphson(), TrustRegion(), LevenbergMarquardt())
     else
         (NewtonRaphson(),)
     end
diff --git a/src/trustRegion.jl b/src/trustRegion.jl
index 1eeddc1d7..8294da579 100644
--- a/src/trustRegion.jl
+++ b/src/trustRegion.jl
@@ -232,19 +232,27 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::TrustRegion,
     max_trust_radius = alg.max_trust_radius
     initial_trust_radius = alg.initial_trust_radius
     if max_trust_radius == 0.0
-        max_trust_radius = max(norm(fu), maximum(u) - minimum(u))
+        max_trust_radius = convert(typeof(max_trust_radius),
+                                   max(norm(fu), maximum(u) - minimum(u)))
     end
     if initial_trust_radius == 0.0
         initial_trust_radius = max_trust_radius / 11
     end
+
+    loss_new = loss
     H = ArrayInterfaceCore.undefmatrix(u)
+    g = zero(fu)
+    shrink_counter = 0
+    step_size = zero(u)
+    fu_new = zero(fu)
+    make_new_J = true
+    r = loss
 
     return TrustRegionCache{iip}(f, alg, u, fu, p, uf, linsolve, J, jac_config,
                                  1, false, maxiters, internalnorm,
                                  ReturnCode.Default, abstol, prob, initial_trust_radius,
-                                 max_trust_radius, loss, loss, H, zero(fu), 0, zero(u),
-                                 u_tmp, zero(fu), true,
-                                 loss)
+                                 max_trust_radius, loss, loss_new, H, g, shrink_counter,
+                                 step_size, u_tmp, fu_new, make_new_J, r)
 end
 
 function perform_step!(cache::TrustRegionCache{true})
@@ -293,7 +301,7 @@ function perform_step!(cache::TrustRegionCache{false})
 end
 
 function trust_region_step!(cache::TrustRegionCache)
-    @unpack fu_new, u_tmp, step_size, g, H, loss, alg, max_trust_r = cache
+    @unpack fu_new, step_size, g, H, loss, alg, max_trust_r = cache
     cache.loss_new = get_loss(fu_new)
 
     # Compute the ratio of the actual reduction to the predicted reduction.