From 66a0a7ddb2312d608fdcd49e5ea696458c637135 Mon Sep 17 00:00:00 2001
From: Christopher Rackauckas <accounts@chrisrackauckas.com>
Date: Mon, 16 Oct 2023 19:21:59 -0400
Subject: [PATCH 1/2] Add polyalgorithms and a default method

This adds two polyalgorithms, one focused on ultra-robustness and another focused on mixing robustness and performance, and sets the default algorithm to the balanced one. There's still a lot of improvements that can be done to this, but I think this is good enough to at least have a lot of utility, and the defaults can probably specialize on StaticArrays and stuff like that to further be optimized, but that's fine.
---
 src/NonlinearSolve.jl |   2 +
 src/default.jl        | 323 ++++++++++++++++++++++++++++++++++++++++++
 test/polyalgs.jl      |   8 ++
 test/runtests.jl      |   2 +-
 4 files changed, 334 insertions(+), 1 deletion(-)
 create mode 100644 src/default.jl
 create mode 100644 test/polyalgs.jl

diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl
index 62bde28f3..0f7984a67 100644
--- a/src/NonlinearSolve.jl
+++ b/src/NonlinearSolve.jl
@@ -68,6 +68,7 @@ include("levenberg.jl")
 include("gaussnewton.jl")
 include("jacobian.jl")
 include("ad.jl")
+include("default.jl")
 
 import PrecompileTools
 
@@ -95,6 +96,7 @@ export RadiusUpdateSchemes
 
 export NewtonRaphson, TrustRegion, LevenbergMarquardt, GaussNewton
 export LeastSquaresOptimJL, FastLevenbergMarquardtJL
+export RobustMultiNewton, FastShortcutNonlinearPolyalg
 
 export LineSearch
 
diff --git a/src/default.jl b/src/default.jl
new file mode 100644
index 000000000..c7825f64d
--- /dev/null
+++ b/src/default.jl
@@ -0,0 +1,323 @@
+"""
+RobustMultiNewton(; concrete_jac = nothing, linsolve = nothing,
+                    precs = DEFAULT_PRECS, adkwargs...)
+
+A polyalgorithm focused on robustness. It uses a mixture of Newton methods with different
+globalizing techniques (trust region updates, line searches, etc.) in order to find a
+method that is able to adequately solve the minimization problem.
+
+Basically, if this algorithm fails, then "most" good ways of solving your problem fail and
+you may need to think about reformulating the model (either there is an issue with the model,
+or more precision / more stable linear solver choice is required).
+
+### Keyword Arguments
+
+- `autodiff`: determines the backend used for the Jacobian. Note that this argument is
+    ignored if an analytical Jacobian is passed, as that will be used instead. Defaults to
+    `AutoForwardDiff()`. Valid choices are types from ADTypes.jl.
+- `concrete_jac`: whether to build a concrete Jacobian. If a Krylov-subspace method is used,
+    then the Jacobian will not be constructed and instead direct Jacobian-vector products
+    `J*v` are computed using forward-mode automatic differentiation or finite differencing
+    tricks (without ever constructing the Jacobian). However, if the Jacobian is still needed,
+    for example for a preconditioner, `concrete_jac = true` can be passed in order to force
+    the construction of the Jacobian.
+- `linsolve`: the [LinearSolve.jl](https://github.com/SciML/LinearSolve.jl) used for the
+    linear solves within the Newton method. Defaults to `nothing`, which means it uses the
+    LinearSolve.jl default algorithm choice. For more information on available algorithm
+    choices, see the [LinearSolve.jl documentation](https://docs.sciml.ai/LinearSolve/stable/).
+- `precs`: the choice of preconditioners for the linear solver. Defaults to using no
+    preconditioners. For more information on specifying preconditioners for LinearSolve
+    algorithms, consult the
+    [LinearSolve.jl documentation](https://docs.sciml.ai/LinearSolve/stable/).
+"""
+@concrete struct RobustMultiNewton{CJ} <: AbstractNewtonAlgorithm{CJ, Nothing}
+    adkwargs
+    linsolve
+    precs
+end
+
+function RobustMultiNewton(; concrete_jac = nothing, linsolve = nothing, 
+        precs = DEFAULT_PRECS, adkwargs...)
+
+    return RobustMultiNewton{_unwrap_val(concrete_jac)}(adkwargs, linsolve, precs)
+end
+
+@concrete mutable struct RobustMultiNewtonCache{iip} <: AbstractNonlinearSolveCache{iip}
+    caches
+    alg
+    current::Int
+end
+
+function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::RobustMultiNewton, args...; 
+    kwargs...) where {uType, iip}
+
+    adkwargs = alg.adkwargs
+    linsolve = alg.linsolve
+    precs = alg.precs
+
+    RobustMultiNewtonCache{iip}((
+        SciMLBase.__init(prob, TrustRegion(;linsolve, precs, adkwargs...), args...; kwargs...),
+        SciMLBase.__init(prob, TrustRegion(;linsolve, precs, radius_update_scheme = RadiusUpdateSchemes.Bastin, adkwargs...), args...; kwargs...),
+        SciMLBase.__init(prob, NewtonRaphson(;linsolve, precs, linesearch=BackTracking(), adkwargs...), args...; kwargs...),
+        SciMLBase.__init(prob, TrustRegion(;linsolve, precs, radius_update_scheme = RadiusUpdateSchemes.Fan, adkwargs...), args...; kwargs...),
+        ), alg, 1
+    )
+end
+
+"""
+FastShortcutNonlinearPolyalg(; concrete_jac = nothing, linsolve = nothing,
+                    precs = DEFAULT_PRECS, adkwargs...)
+
+A polyalgorithm focused on balancing speed and robustness. It first tries less robust methods
+for more performance and then tries more robust techniques if the faster ones fail.
+
+### Keyword Arguments
+
+- `autodiff`: determines the backend used for the Jacobian. Note that this argument is
+    ignored if an analytical Jacobian is passed, as that will be used instead. Defaults to
+    `AutoForwardDiff()`. Valid choices are types from ADTypes.jl.
+- `concrete_jac`: whether to build a concrete Jacobian. If a Krylov-subspace method is used,
+    then the Jacobian will not be constructed and instead direct Jacobian-vector products
+    `J*v` are computed using forward-mode automatic differentiation or finite differencing
+    tricks (without ever constructing the Jacobian). However, if the Jacobian is still needed,
+    for example for a preconditioner, `concrete_jac = true` can be passed in order to force
+    the construction of the Jacobian.
+- `linsolve`: the [LinearSolve.jl](https://github.com/SciML/LinearSolve.jl) used for the
+    linear solves within the Newton method. Defaults to `nothing`, which means it uses the
+    LinearSolve.jl default algorithm choice. For more information on available algorithm
+    choices, see the [LinearSolve.jl documentation](https://docs.sciml.ai/LinearSolve/stable/).
+- `precs`: the choice of preconditioners for the linear solver. Defaults to using no
+    preconditioners. For more information on specifying preconditioners for LinearSolve
+    algorithms, consult the
+    [LinearSolve.jl documentation](https://docs.sciml.ai/LinearSolve/stable/).
+"""
+@concrete struct FastShortcutNonlinearPolyalg{CJ} <: AbstractNewtonAlgorithm{CJ, Nothing}
+    adkwargs
+    linsolve
+    precs
+end
+
+function FastShortcutNonlinearPolyalg(; concrete_jac = nothing, linsolve = nothing, 
+    precs = DEFAULT_PRECS, adkwargs...)
+
+    return FastShortcutNonlinearPolyalg{_unwrap_val(concrete_jac)}(adkwargs, linsolve, precs)
+end
+
+@concrete mutable struct FastShortcutNonlinearPolyalgCache{iip} <: AbstractNonlinearSolveCache{iip}
+    caches
+    alg
+    current::Int
+end
+
+function FastShortcutNonlinearPolyalgCache(; concrete_jac = nothing, linsolve = nothing, 
+        precs = DEFAULT_PRECS, adkwargs...)
+
+    return FastShortcutNonlinearPolyalgCache{_unwrap_val(concrete_jac)}(adkwargs, linsolve, precs)
+end
+
+function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::FastShortcutNonlinearPolyalg, args...; 
+    kwargs...) where {uType, iip}
+
+    adkwargs = alg.adkwargs
+    linsolve = alg.linsolve
+    precs = alg.precs
+
+    FastShortcutNonlinearPolyalgCache{iip}((
+        #SciMLBase.__init(prob, Klement(), args...; kwargs...),
+        #SciMLBase.__init(prob, Broyden(), args...; kwargs...),
+        SciMLBase.__init(prob, NewtonRaphson(;linsolve, precs, adkwargs...), args...; kwargs...),
+        SciMLBase.__init(prob, NewtonRaphson(;linsolve, precs, linesearch=BackTracking(), adkwargs...), args...; kwargs...),
+        SciMLBase.__init(prob, TrustRegion(;linsolve, precs, adkwargs...), args...; kwargs...),
+        SciMLBase.__init(prob, TrustRegion(;linsolve, precs, radius_update_scheme = RadiusUpdateSchemes.Bastin, adkwargs...), args...; kwargs...),
+        ), alg, 1
+    )
+end
+
+function SciMLBase.__solve(prob::NonlinearProblem{uType, iip}, alg::FastShortcutNonlinearPolyalg, args...; 
+    kwargs...) where {uType, iip}
+
+    adkwargs = alg.adkwargs
+    linsolve = alg.linsolve
+    precs = alg.precs
+
+    sol1 = SciMLBase.__solve(prob, Klement(), args...; kwargs...)
+    if SciMLBase.successful_retcode(sol1)
+        return SciMLBase.build_solution(prob, alg, sol1.u, sol1.resid;
+                                        sol1.retcode, sol1.stats)
+    end
+
+    sol2 = SciMLBase.__solve(prob, Broyden(), args...; kwargs...)
+    if SciMLBase.successful_retcode(sol2)
+        return SciMLBase.build_solution(prob, alg, sol2.u, sol2.resid;
+                                        sol2.retcode, sol2.stats)
+    end
+
+    sol3 = SciMLBase.__solve(prob, NewtonRaphson(;linsolve, precs, adkwargs...), args...; kwargs...)
+    if SciMLBase.successful_retcode(sol3)
+        return SciMLBase.build_solution(prob, alg, sol3.u, sol3.resid;
+                                        sol3.retcode, sol3.stats)
+    end
+
+    sol4 = SciMLBase.__solve(prob,  TrustRegion(;linsolve, precs, adkwargs...), args...; kwargs...)
+    if SciMLBase.successful_retcode(sol4)
+        return SciMLBase.build_solution(prob, alg, sol4.u, sol4.resid;
+                                        sol4.retcode, sol4.stats)
+    end
+
+    resids = (sol1.resid, sol2.resid, sol3.resid, sol4.resid)
+    minfu, idx = findmin(DEFAULT_NORM, resids)
+
+    if idx == 1
+        SciMLBase.build_solution(prob, alg, sol1.u, sol1.resid;
+                                        sol1.retcode, sol1.stats)
+    elseif idx == 2
+        SciMLBase.build_solution(prob, alg, sol2.u, sol2.resid;
+                                        sol2.retcode, sol2.stats)
+    elseif idx == 3
+        SciMLBase.build_solution(prob, alg, sol3.u, sol3.resid;
+                                sol3.retcode, sol3.stats)
+    elseif idx == 4
+        SciMLBase.build_solution(prob, alg, sol4.u, sol4.resid;
+                                sol4.retcode, sol4.stats)
+    else
+        error("Unreachable reached, 박정석")
+    end
+
+end
+
+## General shared polyalg functions
+
+function perform_step!(cache::Union{RobustMultiNewtonCache, FastShortcutNonlinearPolyalgCache})
+    current = cache.current
+
+    while true
+        if current == 1
+            perform_step!(cache.caches[1])
+        elseif current == 2
+            perform_step!(cache.caches[2])
+        elseif current == 3
+            perform_step!(cache.caches[3])
+        elseif current == 4
+            perform_step!(cache.caches[4])
+        else
+            error("Current choices shouldn't get here!")
+        end
+    end
+
+    return nothing
+end
+
+function SciMLBase.solve!(cache::Union{RobustMultiNewtonCache, FastShortcutNonlinearPolyalgCache})
+    current = cache.current
+
+    while current < 5 && all(not_terminated, cache.caches)
+        if current == 1
+            perform_step!(cache.caches[1])
+            !not_terminated(cache.caches[1]) && (cache.current += 1)
+        elseif current == 2
+            perform_step!(cache.caches[2])
+            !not_terminated(cache.caches[2]) && (cache.current += 1)
+        elseif current == 3
+            perform_step!(cache.caches[3])
+            !not_terminated(cache.caches[3]) && (cache.current += 1)
+        elseif current == 4
+            perform_step!(cache.caches[4])
+            !not_terminated(cache.caches[4]) && (cache.current += 1)
+        else
+            error("Current choices shouldn't get here!")
+        end
+
+        
+
+        #cache.stats.nsteps += 1
+    end
+
+    if current < 5
+        stats = if current == 1
+            cache.caches[1].stats
+        elseif current == 2
+            cache.caches[2].stats
+        elseif current == 3
+            cache.caches[3].stats
+        elseif current == 4
+            cache.caches[4].stats
+        end
+
+        u = if current == 1
+            cache.caches[1].u
+        elseif current == 2
+            cache.caches[2].u
+        elseif current == 3
+            cache.caches[3].u
+        elseif current == 4
+            cache.caches[4].u
+        end
+
+        fu = if current == 1
+            get_fu(cache.caches[1])
+        elseif current == 2
+            get_fu(cache.caches[2])
+        elseif current == 3
+            get_fu(cache.caches[3])
+        elseif current == 4
+            get_fu(cache.caches[4])
+        end
+
+        retcode = if stats.nsteps == cache.caches[1].maxiters
+            ReturnCode.MaxIters
+        else
+            ReturnCode.Success
+        end
+
+        return SciMLBase.build_solution(cache.caches[1].prob, cache.alg, u, fu;
+            retcode, stats)
+    else
+        retcode = ReturnCode.MaxIters
+
+        fus = (get_fu(cache.caches[1]), get_fu(cache.caches[2]), get_fu(cache.caches[3]), get_fu(cache.caches[4]))
+        minfu, idx = findmin(cache.caches[1].internalnorm, fus)
+
+        stats = if idx == 1
+            cache.caches[1].stats
+        elseif idx == 2
+            cache.caches[2].stats
+        elseif idx == 3
+            cache.caches[3].stats
+        elseif idx == 4
+            cache.caches[4].stats
+        end
+
+        u = if idx == 1
+            cache.caches[1].u
+        elseif idx == 2
+            cache.caches[2].u
+        elseif idx == 3
+            cache.caches[3].u
+        elseif idx == 4
+            cache.caches[4].u
+        end
+        
+        return SciMLBase.build_solution(cache.caches[1].prob, cache.alg, u, fu;
+                                        retcode, stats)
+    end
+end
+
+function SciMLBase.reinit!(cache::Union{RobustMultiNewtonCache, FastShortcutNonlinearPolyalgCache}, args...; kwargs...)
+    for c in cache.caches
+        SciMLBase.reinit!(c, args...; kwargs...)
+    end
+end
+
+## Defaults
+
+function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::Nothing, args...; 
+    kwargs...) where {uType, iip}
+
+    SciMLBase.__init(prob, FastShortcutNonlinearPolyalg(), args...; kwargs...)
+end
+
+function SciMLBase.__solve(prob::NonlinearProblem{uType, iip}, alg::Nothing, args...; 
+    kwargs...) where {uType, iip}
+
+    SciMLBase.__solve(prob, FastShortcutNonlinearPolyalg(), args...; kwargs...)
+end
\ No newline at end of file
diff --git a/test/polyalgs.jl b/test/polyalgs.jl
new file mode 100644
index 000000000..110f6228a
--- /dev/null
+++ b/test/polyalgs.jl
@@ -0,0 +1,8 @@
+using NonlinearSolve
+
+f(u, p) = u .* u .- 2
+u0 = [1.0, 1.0]
+probN = NonlinearProblem(f, u0)
+@time solver = solve(probN, abstol = 1e-9)
+@time solver = solve(probN, RobustMultiNewton(), abstol = 1e-9)
+@time solver = solve(probN, FastShortcutNonlinearPolyalg(), abstol = 1e-9)
\ No newline at end of file
diff --git a/test/runtests.jl b/test/runtests.jl
index 9732ab185..759c6e49e 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -15,7 +15,7 @@ end
     if GROUP == "All" || GROUP == "Core"
         @time @safetestset "Basic Tests + Some AD" include("basictests.jl")
         @time @safetestset "Sparsity Tests" include("sparse.jl")
-
+        @time @safetestset "Polyalgs" include("polyalgs.jl")
         @time @safetestset "Nonlinear Least Squares" include("nonlinear_least_squares.jl")
     end
 

From c266c310f64546ef121abb2406886fe2b2519af2 Mon Sep 17 00:00:00 2001
From: Christopher Rackauckas <accounts@chrisrackauckas.com>
Date: Mon, 16 Oct 2023 21:12:57 -0400
Subject: [PATCH 2/2] update recommended algorithms

---
 docs/src/solvers/NonlinearSystemSolvers.md | 46 +++++++++++++++-------
 src/default.jl                             |  5 +++
 2 files changed, 36 insertions(+), 15 deletions(-)

diff --git a/docs/src/solvers/NonlinearSystemSolvers.md b/docs/src/solvers/NonlinearSystemSolvers.md
index 9ad112192..08eb3f37d 100644
--- a/docs/src/solvers/NonlinearSystemSolvers.md
+++ b/docs/src/solvers/NonlinearSystemSolvers.md
@@ -7,22 +7,31 @@ Solves for ``f(u)=0`` in the problem defined by `prob` using the algorithm
 
 ## Recommended Methods
 
-`NewtonRaphson` is a good choice for most problems. For large
-systems, it can make use of sparsity patterns for sparse automatic differentiation
-and sparse linear solving of very large systems. That said, as a classic Newton
-method, its stability region can be smaller than other methods. Meanwhile,
-`SimpleNewtonRaphson` is an implementation which is specialized for
-small equations. It is non-allocating on static arrays and thus really well-optimized
+The default method `FastShortcutNonlinearPolyalg` is a good choice for most
+problems. It is a polyalgorithm that attempts to use a fast algorithm
+(Klement, Broyden) and if that fails it falls back to a more robust
+algorithm (`NewtonRaphson`) before falling back the most robust varient of
+`TrustRegion`. For basic problems this will be very fast, for harder problems
+it will make sure to work.
+
+If one is looking for more robustness then `RobustMultiNewton` is a good choice.
+It attempts a set of the most robust methods in succession and only fails if
+all of the methods fail to converge. Additionally, `DynamicSS` can be a good choice 
+for high stability.
+
+As a balance, `NewtonRaphson` is a good choice for most problems that aren't too
+difficult yet need high performance, and  `TrustRegion` is a bit less performant
+but more stable. If the problem is well-conditioned, `Klement` or `Broyden`
+may be faster, but highly dependent on the eigenvalues of the Jacobian being
+sufficiently small.
+
+`NewtonRaphson` and `TrustRegion` are designed for for large systems. 
+They can make use of sparsity patterns for sparse automatic differentiation
+and sparse linear solving of very large systems. Meanwhile,
+`SimpleNewtonRaphson` and `SimpleTrustRegion` are implementations which is specialized for
+small equations. They are non-allocating on static arrays and thus really well-optimized
 for small systems, thus usually outperforming the other methods when such types are
-used for `u0`. `DynamicSS` can be a good choice for high stability.
-
-For a system which is very non-stiff (i.e., the condition number of the Jacobian
-is small, or the eigenvalues of the Jacobian are within a few orders of magnitude),
-then `NLSolveJL`'s `:anderson` can be a good choice.
-
-!!! note
-    
-    `TrustRegion` and `SimpleTrustRegion` are still in development.
+used for `u0`. 
 
 ## Full List of Methods
 
@@ -46,6 +55,13 @@ features, but have a bit of overhead on very small problems.
     improvements suggested in the [paper](https://arxiv.org/abs/1201.5885) "Improvements to
     the Levenberg-Marquardt algorithm for nonlinear least-squares minimization". Designed for
     large-scale and numerically-difficult nonlinear systems.
+  - `RobustMultiNewton()`: A polyalgorithm that mixes highly robust methods (line searches and
+    trust regions) in order to be as robust as possible for difficult problems. If this method
+    fails to converge, then one can be pretty certain that most (all?) other choices would 
+    likely fail.
+  - `FastShortcutNonlinearPolyalg`: The default method. A polyalgorithm that mixes fast methods
+    with fallbacks to robust methods to allow for solving easy problems quickly without sacrificing
+    robustnes on the hard problems.
 
 ### SimpleNonlinearSolve.jl
 
diff --git a/src/default.jl b/src/default.jl
index c7825f64d..90ac0d9ec 100644
--- a/src/default.jl
+++ b/src/default.jl
@@ -36,6 +36,11 @@ or more precision / more stable linear solver choice is required).
     precs
 end
 
+# When somethin's strange, and numerical
+# who you gonna call?
+# Robusters!
+const Robusters = RobustMultiNewton
+
 function RobustMultiNewton(; concrete_jac = nothing, linsolve = nothing, 
         precs = DEFAULT_PRECS, adkwargs...)