From 9c42b4fe063705269f0bc62a771d4a944ba217a8 Mon Sep 17 00:00:00 2001
From: Avik Pal <avikpal@mit.edu>
Date: Fri, 20 Oct 2023 13:24:26 -0400
Subject: [PATCH 1/9] Formatting

---
 docs/pages.jl                           |  3 +--
 docs/src/tutorials/code_optimization.md | 18 +++++++++---------
 docs/src/tutorials/large_systems.md     | 10 +++++-----
 docs/src/tutorials/modelingtoolkit.md   |  2 +-
 docs/src/tutorials/small_compile.md     | 20 ++++++++++----------
 5 files changed, 26 insertions(+), 27 deletions(-)

diff --git a/docs/pages.jl b/docs/pages.jl
index 8564261bd..93ad7b391 100644
--- a/docs/pages.jl
+++ b/docs/pages.jl
@@ -2,8 +2,7 @@
 
 pages = ["index.md",
     "Getting Started with Nonlinear Rootfinding in Julia" => "tutorials/getting_started.md",
-    "Tutorials" => Any[
-        "Code Optimization for Small Nonlinear Systems" => "tutorials/code_optimization.md",
+    "Tutorials" => Any["Code Optimization for Small Nonlinear Systems" => "tutorials/code_optimization.md",
         "Handling Large Ill-Conditioned and Sparse Systems" => "tutorials/large_systems.md",
         "Symbolic System Definition and Acceleration via ModelingToolkit" => "tutorials/modelingtoolkit.md",
         "tutorials/small_compile.md",
diff --git a/docs/src/tutorials/code_optimization.md b/docs/src/tutorials/code_optimization.md
index daefcb855..cd4089d5d 100644
--- a/docs/src/tutorials/code_optimization.md
+++ b/docs/src/tutorials/code_optimization.md
@@ -34,7 +34,7 @@ Take for example a prototypical small nonlinear solver code in its out-of-place
 ```@example small_opt
 using NonlinearSolve
 
-function f(u, p) 
+function f(u, p)
     u .* u .- p
 end
 u0 = [1.0, 1.0]
@@ -54,7 +54,7 @@ using BenchmarkTools
 Note that this way of writing the function is a shorthand for:
 
 ```@example small_opt
-function f(u, p) 
+function f(u, p)
     [u[1] * u[1] - p, u[2] * u[2] - p]
 end
 ```
@@ -69,25 +69,25 @@ In order to avoid this issue, we can use a non-allocating "in-place" approach. W
 this looks like:
 
 ```@example small_opt
-function f(du, u, p) 
+function f(du, u, p)
     du[1] = u[1] * u[1] - p
     du[2] = u[2] * u[2] - p
     nothing
 end
 
 prob = NonlinearProblem(f, u0, p)
-@btime  sol = solve(prob, NewtonRaphson())
+@btime sol = solve(prob, NewtonRaphson())
 ```
 
 Notice how much faster this already runs! We can make this code even simpler by using
 the `.=` in-place broadcasting.
 
 ```@example small_opt
-function f(du, u, p) 
+function f(du, u, p)
     du .= u .* u .- p
 end
 
-@btime  sol = solve(prob, NewtonRaphson())
+@btime sol = solve(prob, NewtonRaphson())
 ```
 
 ## Further Optimizations for Small Nonlinear Solves with Static Arrays and SimpleNonlinearSolve
@@ -140,7 +140,7 @@ want to use the out-of-place allocating form, but this time we want to output
 a static array. Doing it with broadcasting looks like:
 
 ```@example small_opt
-function f_SA(u, p) 
+function f_SA(u, p)
     u .* u .- p
 end
 u0 = SA[1.0, 1.0]
@@ -153,7 +153,7 @@ Note that only change here is that `u0` is made into a StaticArray! If we needed
 for a more complex nonlinear case, then we'd simply do the following:
 
 ```@example small_opt
-function f_SA(u, p) 
+function f_SA(u, p)
     SA[u[1] * u[1] - p, u[2] * u[2] - p]
 end
 
@@ -170,4 +170,4 @@ which are designed for these small-scale static examples. Let's now use `SimpleN
 @btime solve(prob, SimpleNewtonRaphson())
 ```
 
-And there we go, around 100ns from our starting point of almost 6μs!
\ No newline at end of file
+And there we go, around 100ns from our starting point of almost 6μs!
diff --git a/docs/src/tutorials/large_systems.md b/docs/src/tutorials/large_systems.md
index c1936932e..8e53d193c 100644
--- a/docs/src/tutorials/large_systems.md
+++ b/docs/src/tutorials/large_systems.md
@@ -1,8 +1,8 @@
 # [Efficiently Solving Large Sparse Ill-Conditioned Nonlinear Systems in Julia](@id large_systems)
 
-This tutorial is for getting into the extra features of using NonlinearSolve.jl. Solving ill-conditioned nonlinear systems 
-requires specializing the linear solver on properties of the Jacobian in order to cut down on the ``\mathcal{O}(n^3)`` 
-linear solve and the ``\mathcal{O}(n^2)`` back-solves. This tutorial is designed to explain the advanced usage of 
+This tutorial is for getting into the extra features of using NonlinearSolve.jl. Solving ill-conditioned nonlinear systems
+requires specializing the linear solver on properties of the Jacobian in order to cut down on the ``\mathcal{O}(n^3)``
+linear solve and the ``\mathcal{O}(n^2)`` back-solves. This tutorial is designed to explain the advanced usage of
 NonlinearSolve.jl by solving the steady state stiff Brusselator partial differential equation (BRUSS) using NonlinearSolve.jl.
 
 ## Definition of the Brusselator Equation
@@ -182,8 +182,8 @@ nothing # hide
 
 Notice that this acceleration does not require the definition of a sparsity
 pattern, and can thus be an easier way to scale for large problems. For more
-information on linear solver choices, see the 
-[linear solver documentation](https://docs.sciml.ai/DiffEqDocs/stable/features/linear_nonlinear/#linear_nonlinear). 
+information on linear solver choices, see the
+[linear solver documentation](https://docs.sciml.ai/DiffEqDocs/stable/features/linear_nonlinear/#linear_nonlinear).
 `linsolve` choices are any valid [LinearSolve.jl](https://linearsolve.sciml.ai/dev/) solver.
 
 !!! note
diff --git a/docs/src/tutorials/modelingtoolkit.md b/docs/src/tutorials/modelingtoolkit.md
index 7d26cb30c..3ee7d8e75 100644
--- a/docs/src/tutorials/modelingtoolkit.md
+++ b/docs/src/tutorials/modelingtoolkit.md
@@ -120,4 +120,4 @@ sol[u5]
 
 If you're interested in building models in a component or block based form, such as seen in systems like Simulink or Modelica,
 take a deeper look at [ModelingToolkit.jl's documentation](https://docs.sciml.ai/ModelingToolkit/stable/) which goes into
-detail on such features.
\ No newline at end of file
+detail on such features.
diff --git a/docs/src/tutorials/small_compile.md b/docs/src/tutorials/small_compile.md
index d7d2c2311..4f4c56b14 100644
--- a/docs/src/tutorials/small_compile.md
+++ b/docs/src/tutorials/small_compile.md
@@ -19,18 +19,18 @@ sol = solve(prob, SimpleNewtonRaphson())
 
 However, there are a few downsides to SimpleNonlinearSolve's `SimpleX` style algorithms to note:
 
-1. SimpleNonlinearSolve.jl's methods are not hooked into the LinearSolve.jl system, and thus do not have
-   the ability to specify linear solvers, use sparse matrices, preconditioners, and all of the other features
-   which are required to scale for very large systems of equations.
-2. SimpleNonlinearSolve.jl's methods have less robust error handling and termination conditions, and thus
-   these methods are missing some flexibility and give worse hints for debugging.
-3. SimpleNonlinearSolve.jl's methods are focused on out-of-place support. There is some in-place support,
-   but it's designed for simple smaller systems and so some optimizations are missing.
+ 1. SimpleNonlinearSolve.jl's methods are not hooked into the LinearSolve.jl system, and thus do not have
+    the ability to specify linear solvers, use sparse matrices, preconditioners, and all of the other features
+    which are required to scale for very large systems of equations.
+ 2. SimpleNonlinearSolve.jl's methods have less robust error handling and termination conditions, and thus
+    these methods are missing some flexibility and give worse hints for debugging.
+ 3. SimpleNonlinearSolve.jl's methods are focused on out-of-place support. There is some in-place support,
+    but it's designed for simple smaller systems and so some optimizations are missing.
 
 However, the major upsides of SimpleNonlinearSolve.jl are:
 
-1. The methods are optimized and non-allocating on StaticArrays
-2. The methods are minimal in compilation
+ 1. The methods are optimized and non-allocating on StaticArrays
+ 2. The methods are minimal in compilation
 
 As such, you can use the code as shown above to have very low startup with good methods, but for more scaling and debuggability
 we recommend the full NonlinearSolve.jl. But that said,
@@ -51,4 +51,4 @@ is not only sufficient but optimal.
 
 Julia has tools for building small binaries via static compilation with [StaticCompiler.jl](https://github.com/tshort/StaticCompiler.jl).
 However, these tools are currently limited to type-stable non-allocating functions. That said, SimpleNonlinearSolve.jl's solvers are
-precisely the subset of NonlinearSolve.jl which are compatible with static compilation.
\ No newline at end of file
+precisely the subset of NonlinearSolve.jl which are compatible with static compilation.

From 6b8305463a39d935adc286d2e17f495971457cd6 Mon Sep 17 00:00:00 2001
From: Avik Pal <avikpal@mit.edu>
Date: Fri, 20 Oct 2023 15:51:05 -0400
Subject: [PATCH 2/9] Implement IIP Version of Broyden

---
 src/NonlinearSolve.jl | 10 +++--
 src/broyden.jl        | 85 +++++++++++++++++++++++++++++++++++++++++++
 src/default.jl        |  2 +-
 src/klement.jl        |  0
 src/linesearch.jl     |  3 +-
 5 files changed, 95 insertions(+), 5 deletions(-)
 create mode 100644 src/broyden.jl
 create mode 100644 src/klement.jl

diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl
index 08e5a7e79..6a25ebde8 100644
--- a/src/NonlinearSolve.jl
+++ b/src/NonlinearSolve.jl
@@ -69,18 +69,21 @@ include("levenberg.jl")
 include("gaussnewton.jl")
 include("dfsane.jl")
 include("pseudotransient.jl")
+include("broyden.jl")
+include("klement.jl")
 include("jacobian.jl")
 include("ad.jl")
 include("default.jl")
 
 import PrecompileTools
 
-@static if VERSION >= v"1.10"
+@static if VERSION ≥ v"1.10-"
     PrecompileTools.@compile_workload begin
         for T in (Float32, Float64)
             prob = NonlinearProblem{false}((u, p) -> u .* u .- p, T(0.1), T(2))
 
-            precompile_algs = (NewtonRaphson(), TrustRegion(), LevenbergMarquardt())
+            precompile_algs = (NewtonRaphson(), TrustRegion(), LevenbergMarquardt(),
+                nothing)
 
             for alg in precompile_algs
                 solve(prob, alg, abstol = T(1e-2))
@@ -97,7 +100,8 @@ end
 
 export RadiusUpdateSchemes
 
-export NewtonRaphson, TrustRegion, LevenbergMarquardt, DFSane, GaussNewton, PseudoTransient
+export NewtonRaphson, TrustRegion, LevenbergMarquardt, DFSane, GaussNewton, PseudoTransient,
+    GeneralBroyden, GeneralKlement
 export LeastSquaresOptimJL, FastLevenbergMarquardtJL
 export RobustMultiNewton, FastShortcutNonlinearPolyalg
 
diff --git a/src/broyden.jl b/src/broyden.jl
new file mode 100644
index 000000000..6daf9ef1e
--- /dev/null
+++ b/src/broyden.jl
@@ -0,0 +1,85 @@
+# Sadly `Broyden` is taken up by SimpleNonlinearSolve.jl
+"""
+    GeneralBroyden(max_resets)
+    GeneralBroyden(; max_resets = 3)
+
+An implementation of `Broyden` with support for caching!
+
+## Arguments
+
+  - `max_resets`: the maximum number of resets to perform. Defaults to `3`.
+"""
+struct GeneralBroyden <: AbstractNewtonAlgorithm{false, Nothing}
+    max_resets::Int
+end
+
+GeneralBroyden(; max_resets = 3) = GeneralBroyden(max_resets)
+
+@concrete mutable struct GeneralBroydenCache{iip} <: AbstractNonlinearSolveCache{iip}
+    f
+    alg
+    u
+    du
+    fu
+    fu2
+    dfu
+    p
+    J⁻¹
+    J⁻¹₂
+    J⁻¹df
+    force_stop::Bool
+    resets::Int
+    max_rests::Int
+    maxiters::Int
+    internalnorm
+    retcode::ReturnCode.T
+    abstol
+    prob
+    stats::NLStats
+end
+
+get_fu(cache::GeneralBroydenCache) = cache.fu
+
+function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::GeneralBroyden, args...;
+    alias_u0 = false, maxiters = 1000, abstol = 1e-6, internalnorm = DEFAULT_NORM,
+    kwargs...) where {uType, iip}
+    @unpack f, u0, p = prob
+    u = alias_u0 ? u0 : deepcopy(u0)
+    fu = evaluate_f(prob, u)
+    J⁻¹ = convert(parameterless_type(_mutable(u)),
+        Matrix{eltype(u)}(I, length(fu), length(u)))
+    return GeneralBroydenCache{iip}(f, alg, u, _mutable_zero(u), fu, similar(fu),
+        similar(fu), p, J⁻¹, similar(fu'), _mutable_zero(u), false, 0, alg.max_resets,
+        maxiters, internalnorm, ReturnCode.Default, abstol, prob, NLStats(1, 0, 0, 0, 0))
+end
+
+function perform_step!(cache::GeneralBroydenCache{true})
+    @unpack f, p, du, fu, fu2, dfu, u, J⁻¹, J⁻¹df, J⁻¹₂ = cache
+    T = eltype(u)
+
+    mul!(du, J⁻¹, -fu)
+    u .+= du
+    f(fu2, u, p)
+
+    cache.internalnorm(fu2) < cache.abstol && (cache.force_stop = true)
+    cache.stats.nf += 1
+
+    cache.force_stop && return nothing
+
+    # Update the inverse jacobian
+    dfu .= fu2 .- fu
+    if cache.resets < cache.max_rests &&
+       (all(x -> abs(x) ≤ 1e-12, du) || all(x -> abs(x) ≤ 1e-12, dfu))
+        fill!(J⁻¹, 0)
+        J⁻¹[diagind(J⁻¹)] .= T(1)
+        cache.resets += 1
+    else
+        mul!(J⁻¹df, J⁻¹, dfu)
+        mul!(J⁻¹₂, du', J⁻¹)
+        du .= (du .- J⁻¹df) ./ (dot(du, J⁻¹df) .+ T(1e-5))
+        mul!(J⁻¹, reshape(du, :, 1), J⁻¹₂, 1, 1)
+    end
+    fu .= fu2
+
+    return nothing
+end
diff --git a/src/default.jl b/src/default.jl
index 913f994fe..76289cc1c 100644
--- a/src/default.jl
+++ b/src/default.jl
@@ -162,7 +162,7 @@ end
             # FIXME: Broyden and Klement are type unstable
             #        (upstream SimpleNonlinearSolve.jl issue)
             !iip ? :(Klement()) : nothing, # Klement not yet implemented for IIP
-            !iip ? :(Broyden()) : nothing, # Broyden not yet implemented for IIP
+            :(GeneralBroyden()),
             :(NewtonRaphson(; linsolve, precs, adkwargs...)),
             :(NewtonRaphson(; linsolve, precs, linesearch = BackTracking(), adkwargs...)),
             :(TrustRegion(; linsolve, precs, adkwargs...)),
diff --git a/src/klement.jl b/src/klement.jl
new file mode 100644
index 000000000..e69de29bb
diff --git a/src/linesearch.jl b/src/linesearch.jl
index 30861e14b..1ef36206a 100644
--- a/src/linesearch.jl
+++ b/src/linesearch.jl
@@ -92,7 +92,8 @@ function LineSearchCache(ls::LineSearch, f, u, p, fu1, IIP::Val{iip}) where {iip
     g₀ = _mutable_zero(u)
 
     autodiff = if iip && (ls.autodiff isa AutoZygote || ls.autodiff isa AutoSparseZygote)
-        @warn "Attempting to use Zygote.jl for linesearch on an in-place problem. Falling back to finite differencing."
+        @warn "Attempting to use Zygote.jl for linesearch on an in-place problem. Falling \
+               back to finite differencing."
         AutoFiniteDiff()
     else
         ls.autodiff

From c6992a58674ce620fc1fab8c4c147ca996ffa382 Mon Sep 17 00:00:00 2001
From: Avik Pal <avikpal@mit.edu>
Date: Fri, 20 Oct 2023 18:20:46 -0400
Subject: [PATCH 3/9] Broyden with LineSearch

---
 src/NonlinearSolve.jl    |   4 +-
 src/broyden.jl           |  91 +++++++++++++++++++++----
 src/klement.jl           |   1 +
 src/linesearch.jl        | 143 ++++++++++++++++++++++++++++++++++++---
 src/raphson.jl           |   5 +-
 src/utils.jl             |  14 +++-
 test/23_test_problems.jl |  41 ++++++++---
 test/basictests.jl       |  90 ++++++++++++++++++++++++
 8 files changed, 352 insertions(+), 37 deletions(-)

diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl
index 6a25ebde8..5e21cdf44 100644
--- a/src/NonlinearSolve.jl
+++ b/src/NonlinearSolve.jl
@@ -26,6 +26,8 @@ import UnPack: @unpack
 const AbstractSparseADType = Union{ADTypes.AbstractSparseFiniteDifferences,
     ADTypes.AbstractSparseForwardMode, ADTypes.AbstractSparseReverseMode}
 
+abstract type AbstractNonlinearSolveLineSearchAlgorithm end
+
 abstract type AbstractNonlinearSolveAlgorithm <: AbstractNonlinearAlgorithm end
 abstract type AbstractNewtonAlgorithm{CJ, AD} <: AbstractNonlinearSolveAlgorithm end
 
@@ -105,6 +107,6 @@ export NewtonRaphson, TrustRegion, LevenbergMarquardt, DFSane, GaussNewton, Pseu
 export LeastSquaresOptimJL, FastLevenbergMarquardtJL
 export RobustMultiNewton, FastShortcutNonlinearPolyalg
 
-export LineSearch
+export LineSearch, LiFukushimaLineSearch
 
 end # module
diff --git a/src/broyden.jl b/src/broyden.jl
index 6daf9ef1e..121a2ca71 100644
--- a/src/broyden.jl
+++ b/src/broyden.jl
@@ -1,19 +1,28 @@
 # Sadly `Broyden` is taken up by SimpleNonlinearSolve.jl
 """
-    GeneralBroyden(max_resets)
-    GeneralBroyden(; max_resets = 3)
+    GeneralBroyden(max_resets, linesearch)
+    GeneralBroyden(; max_resets = 3, linesearch = LineSearch())
 
-An implementation of `Broyden` with support for caching!
+An implementation of `Broyden` with reseting and line search.
 
 ## Arguments
 
   - `max_resets`: the maximum number of resets to perform. Defaults to `3`.
+  - `linesearch`: the line search algorithm to use. Defaults to [`LineSearch()`](@ref),
+    which means that no line search is performed. Algorithms from `LineSearches.jl` can be
+    used here directly, and they will be converted to the correct `LineSearch`. It is
+    recommended to use [LiFukushimaLineSearchCache](@ref) -- a derivative free linesearch
+    specifically designed for Broyden's method.
 """
-struct GeneralBroyden <: AbstractNewtonAlgorithm{false, Nothing}
+@concrete struct GeneralBroyden <: AbstractNewtonAlgorithm{false, Nothing}
     max_resets::Int
+    linesearch
 end
 
-GeneralBroyden(; max_resets = 3) = GeneralBroyden(max_resets)
+function GeneralBroyden(; max_resets = 3, linesearch = LineSearch())
+    linesearch = linesearch isa LineSearch ? linesearch : LineSearch(; method = linesearch)
+    return GeneralBroyden(max_resets, linesearch)
+end
 
 @concrete mutable struct GeneralBroydenCache{iip} <: AbstractNonlinearSolveCache{iip}
     f
@@ -29,13 +38,14 @@ GeneralBroyden(; max_resets = 3) = GeneralBroyden(max_resets)
     J⁻¹df
     force_stop::Bool
     resets::Int
-    max_rests::Int
+    max_resets::Int
     maxiters::Int
     internalnorm
     retcode::ReturnCode.T
     abstol
     prob
     stats::NLStats
+    lscache
 end
 
 get_fu(cache::GeneralBroydenCache) = cache.fu
@@ -46,11 +56,11 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::GeneralBroyde
     @unpack f, u0, p = prob
     u = alias_u0 ? u0 : deepcopy(u0)
     fu = evaluate_f(prob, u)
-    J⁻¹ = convert(parameterless_type(_mutable(u)),
-        Matrix{eltype(u)}(I, length(fu), length(u)))
-    return GeneralBroydenCache{iip}(f, alg, u, _mutable_zero(u), fu, similar(fu),
-        similar(fu), p, J⁻¹, similar(fu'), _mutable_zero(u), false, 0, alg.max_resets,
-        maxiters, internalnorm, ReturnCode.Default, abstol, prob, NLStats(1, 0, 0, 0, 0))
+    J⁻¹ = __init_identity_jacobian(u, fu)
+    return GeneralBroydenCache{iip}(f, alg, u, _mutable_zero(u), fu, zero(fu),
+        zero(fu), p, J⁻¹, zero(fu'), _mutable_zero(u), false, 0, alg.max_resets,
+        maxiters, internalnorm, ReturnCode.Default, abstol, prob, NLStats(1, 0, 0, 0, 0),
+        init_linesearch_cache(alg.linesearch, f, u, p, fu, Val(iip)))
 end
 
 function perform_step!(cache::GeneralBroydenCache{true})
@@ -58,7 +68,8 @@ function perform_step!(cache::GeneralBroydenCache{true})
     T = eltype(u)
 
     mul!(du, J⁻¹, -fu)
-    u .+= du
+    α = perform_linesearch!(cache.lscache, u, du)
+    axpy!(α, du, u)
     f(fu2, u, p)
 
     cache.internalnorm(fu2) < cache.abstol && (cache.force_stop = true)
@@ -68,7 +79,7 @@ function perform_step!(cache::GeneralBroydenCache{true})
 
     # Update the inverse jacobian
     dfu .= fu2 .- fu
-    if cache.resets < cache.max_rests &&
+    if cache.resets < cache.max_resets &&
        (all(x -> abs(x) ≤ 1e-12, du) || all(x -> abs(x) ≤ 1e-12, dfu))
         fill!(J⁻¹, 0)
         J⁻¹[diagind(J⁻¹)] .= T(1)
@@ -83,3 +94,57 @@ function perform_step!(cache::GeneralBroydenCache{true})
 
     return nothing
 end
+
+function perform_step!(cache::GeneralBroydenCache{false})
+    @unpack f, p = cache
+    T = eltype(cache.u)
+
+    cache.du = cache.J⁻¹ * -cache.fu
+    α = perform_linesearch!(cache.lscache, cache.u, cache.du)
+    cache.u = cache.u .+ α * cache.du
+    cache.fu2 = f(cache.u, p)
+
+    cache.internalnorm(cache.fu2) < cache.abstol && (cache.force_stop = true)
+    cache.stats.nf += 1
+
+    cache.force_stop && return nothing
+
+    # Update the inverse jacobian
+    cache.dfu = cache.fu2 .- cache.fu
+    if cache.resets < cache.max_resets &&
+       (all(x -> abs(x) ≤ 1e-12, cache.du) || all(x -> abs(x) ≤ 1e-12, cache.dfu))
+        J⁻¹ = similar(cache.J⁻¹)
+        fill!(J⁻¹, 0)
+        J⁻¹[diagind(J⁻¹)] .= T(1)
+        cache.J⁻¹ = J⁻¹
+        cache.resets += 1
+    else
+        cache.J⁻¹df = cache.J⁻¹ * cache.dfu
+        cache.J⁻¹₂ = cache.du' * cache.J⁻¹
+        cache.du = (cache.du .- cache.J⁻¹df) ./ (dot(cache.du, cache.J⁻¹df) .+ T(1e-5))
+        cache.J⁻¹ = cache.J⁻¹ .+ cache.du * cache.J⁻¹₂
+    end
+    cache.fu = cache.fu2
+
+    return nothing
+end
+
+function SciMLBase.reinit!(cache::GeneralBroydenCache{iip}, u0 = cache.u; p = cache.p,
+    abstol = cache.abstol, maxiters = cache.maxiters) where {iip}
+    cache.p = p
+    if iip
+        recursivecopy!(cache.u, u0)
+        cache.f(cache.fu, cache.u, p)
+    else
+        # don't have alias_u0 but cache.u is never mutated for OOP problems so it doesn't matter
+        cache.u = u0
+        cache.fu = cache.f(cache.u, p)
+    end
+    cache.abstol = abstol
+    cache.maxiters = maxiters
+    cache.stats.nf = 1
+    cache.stats.nsteps = 1
+    cache.force_stop = false
+    cache.retcode = ReturnCode.Default
+    return cache
+end
diff --git a/src/klement.jl b/src/klement.jl
index e69de29bb..8b1378917 100644
--- a/src/klement.jl
+++ b/src/klement.jl
@@ -0,0 +1 @@
+
diff --git a/src/linesearch.jl b/src/linesearch.jl
index 1ef36206a..1f63d695c 100644
--- a/src/linesearch.jl
+++ b/src/linesearch.jl
@@ -26,7 +26,15 @@ function LineSearch(; method = Static(), autodiff = AutoFiniteDiff(), alpha = tr
     return LineSearch(method, autodiff, alpha)
 end
 
-@concrete mutable struct LineSearchCache
+@inline function init_linesearch_cache(ls::LineSearch, args...)
+    return init_linesearch_cache(ls.method, ls, args...)
+end
+
+# LineSearches.jl doesn't have a supertype so default to that
+init_linesearch_cache(_, ls, f, u, p, fu, iip) = LineSearchesJLCache(ls, f, u, p, fu, iip)
+
+# Wrapper over LineSearches.jl algorithms
+@concrete mutable struct LineSearchesJLCache
     f
     ϕ
     dϕ
@@ -35,11 +43,11 @@ end
     ls
 end
 
-function LineSearchCache(ls::LineSearch, f, u::Number, p, _, ::Val{false})
+function LineSearchesJLCache(ls::LineSearch, f, u::Number, p, _, ::Val{false})
     eval_f(u, du, α) = eval_f(u - α * du)
     eval_f(u) = f(u, p)
 
-    ls.method isa Static && return LineSearchCache(eval_f, nothing, nothing, nothing,
+    ls.method isa Static && return LineSearchesJLCache(eval_f, nothing, nothing, nothing,
         convert(typeof(u), ls.α), ls)
 
     g(u, fu) = last(value_derivative(Base.Fix2(f, p), u)) * fu
@@ -73,11 +81,11 @@ function LineSearchCache(ls::LineSearch, f, u::Number, p, _, ::Val{false})
         return ϕdϕ_internal
     end
 
-    return LineSearchCache(eval_f, ϕ, dϕ, ϕdϕ, convert(eltype(u), ls.α), ls)
+    return LineSearchesJLCache(eval_f, ϕ, dϕ, ϕdϕ, convert(eltype(u), ls.α), ls)
 end
 
-function LineSearchCache(ls::LineSearch, f, u, p, fu1, IIP::Val{iip}) where {iip}
-    fu = iip ? fu1 : nothing
+function LineSearchesJLCache(ls::LineSearch, f, u, p, fu1, IIP::Val{iip}) where {iip}
+    fu = iip ? deepcopy(fu1) : nothing
     u_ = _mutable_zero(u)
 
     function eval_f(u, du, α)
@@ -86,7 +94,7 @@ function LineSearchCache(ls::LineSearch, f, u, p, fu1, IIP::Val{iip}) where {iip
     end
     eval_f(u) = evaluate_f(f, u, p, IIP; fu)
 
-    ls.method isa Static && return LineSearchCache(eval_f, nothing, nothing, nothing,
+    ls.method isa Static && return LineSearchesJLCache(eval_f, nothing, nothing, nothing,
         convert(eltype(u), ls.α), ls)
 
     g₀ = _mutable_zero(u)
@@ -138,10 +146,10 @@ function LineSearchCache(ls::LineSearch, f, u, p, fu1, IIP::Val{iip}) where {iip
         return ϕdϕ_internal
     end
 
-    return LineSearchCache(eval_f, ϕ, dϕ, ϕdϕ, convert(eltype(u), ls.α), ls)
+    return LineSearchesJLCache(eval_f, ϕ, dϕ, ϕdϕ, convert(eltype(u), ls.α), ls)
 end
 
-function perform_linesearch!(cache::LineSearchCache, u, du)
+function perform_linesearch!(cache::LineSearchesJLCache, u, du)
     cache.ls.method isa Static && return cache.α
 
     ϕ = cache.ϕ(u, du)
@@ -155,3 +163,120 @@ function perform_linesearch!(cache::LineSearchCache, u, du)
 
     return first(cache.ls.method(ϕ, cache.dϕ(u, du), cache.ϕdϕ(u, du), cache.α, ϕ₀, dϕ₀))
 end
+
+"""
+    LiFukushimaLineSearch(; lambda_0 = 1.0, beta = 0.5, sigma_1 = 0.001,
+        eta = 0.1, nan_max_iter = 5, maxiters = 50)
+
+A derivative-free line search and global convergence of Broyden-like method for nonlinear
+equations by Dong-Hui Li & Masao Fukushima. For more details see
+https://doi.org/10.1080/10556780008805782
+"""
+struct LiFukushimaLineSearch{T} <: AbstractNonlinearSolveLineSearchAlgorithm
+    λ₀::T
+    β::T
+    σ₁::T
+    σ₂::T
+    η::T
+    ρ::T
+    nan_max_iter::Int
+    maxiters::Int
+end
+
+function LiFukushimaLineSearch(; lambda_0 = 1.0, beta = 0.1, sigma_1 = 0.001,
+    sigma_2 = 0.001, eta = 0.1, rho = 0.9, nan_max_iter = 5, maxiters = 50)
+    T = promote_type(typeof(lambda_0), typeof(beta), typeof(sigma_1), typeof(eta),
+        typeof(rho), typeof(sigma_2))
+    return LiFukushimaLineSearch{T}(lambda_0, beta, sigma_1, sigma_2, eta, rho,
+        nan_max_iter, maxiters)
+end
+
+@concrete mutable struct LiFukushimaLineSearchCache{iip}
+    f
+    p
+    u_cache
+    fu_cache
+    alg
+    α
+end
+
+function init_linesearch_cache(alg::LiFukushimaLineSearch, ls::LineSearch, f, _u, p, _fu,
+    ::Val{iip}) where {iip}
+    fu = iip ? deepcopy(_fu) : nothing
+    u = iip ? deepcopy(_u) : nothing
+    return LiFukushimaLineSearchCache{iip}(f, p, u, fu, alg, ls.α)
+end
+
+function perform_linesearch!(cache::LiFukushimaLineSearchCache{iip}, u, du) where {iip}
+    (; β, σ₁, σ₂, η, λ₀, ρ, nan_max_iter, maxiters) = cache.alg
+    λ₂ = λ₀
+    λ₁ = λ₂
+
+    if iip
+        cache.f(cache.fu_cache, u, cache.p)
+        fx_norm = norm(cache.fu_cache, 2)
+    else
+        fx_norm = norm(cache.f(u, cache.p), 2)
+    end
+
+    # Non-Blocking exit if the norm is NaN or Inf
+    !isfinite(fx_norm) && return cache.α
+
+    # Early Terminate based on Eq. 2.7
+    if iip
+        cache.u_cache .= u .+ du
+        cache.f(cache.fu_cache, cache.u_cache, cache.p)
+        fxλ_norm = norm(cache.fu_cache, 2)
+    else
+        fxλ_norm = norm(cache.f(u .+ du, cache.p), 2)
+    end
+
+    fxλ_norm ≤ ρ * fx_norm - σ₂ * norm(du, 2)^2 && return cache.α
+
+    if iip
+        cache.u_cache .= u .+ λ₂ .* du
+        cache.f(cache.fu_cache, cache.u_cache, cache.p)
+        fxλp_norm = norm(cache.fu_cache, 2)
+    else
+        fxλp_norm = norm(cache.f(u .+ λ₂ .* du, cache.p), 2)
+    end
+
+    if !isfinite(fxλp_norm)
+        # Backtrack a finite number of steps
+        nan_converged = false
+        for _ in 1:nan_max_iter
+            λ₁, λ₂ = λ₂, β * λ₂
+
+            if iip
+                cache.u_cache .= u .+ λ₂ .* du
+                cache.f(cache.fu_cache, cache.u_cache, cache.p)
+                fxλp_norm = norm(cache.fu_cache, 2)
+            else
+                fxλp_norm = norm(cache.f(u .+ λ₂ .* du, cache.p), 2)
+            end
+
+            nan_converged = isfinite(fxλp_norm)
+            nan_converged && break
+        end
+
+        # Non-Blocking exit if the norm is still NaN or Inf
+        !nan_converged && return cache.α
+    end
+
+    for _ in 1:maxiters
+        if iip
+            cache.u_cache .= u .+ λ₂ .* du
+            cache.f(cache.fu_cache, cache.u_cache, cache.p)
+            fxλp_norm = norm(cache.fu_cache, 2)
+        else
+            fxλp_norm = norm(cache.f(u .+ λ₂ .* du, cache.p), 2)
+        end
+
+        converged = fxλp_norm ≤ (1 + η) * fx_norm - σ₁ * λ₂^2 * norm(du, 2)^2
+
+        converged && break
+        λ₁, λ₂ = λ₂, β * λ₂
+    end
+
+    return λ₂
+end
diff --git a/src/raphson.jl b/src/raphson.jl
index 642c7ee36..a3d9bbc6d 100644
--- a/src/raphson.jl
+++ b/src/raphson.jl
@@ -82,7 +82,8 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg_::NewtonRaphso
 
     return NewtonRaphsonCache{iip}(f, alg, u, fu1, fu2, du, p, uf, linsolve, J,
         jac_cache, false, maxiters, internalnorm, ReturnCode.Default, abstol, prob,
-        NLStats(1, 0, 0, 0, 0), LineSearchCache(alg.linesearch, f, u, p, fu1, Val(iip)))
+        NLStats(1, 0, 0, 0, 0),
+        init_linesearch_cache(alg.linesearch, f, u, p, fu1, Val(iip)))
 end
 
 function perform_step!(cache::NewtonRaphsonCache{true})
@@ -96,7 +97,7 @@ function perform_step!(cache::NewtonRaphsonCache{true})
 
     # Line Search
     α = perform_linesearch!(cache.lscache, u, du)
-    @. u = u - α * du
+    axpy!(α, du, u)
     f(cache.fu1, u, p)
 
     cache.internalnorm(fu1) < cache.abstol && (cache.force_stop = true)
diff --git a/src/utils.jl b/src/utils.jl
index fb8c59850..d33dc292d 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -35,8 +35,8 @@ function default_adargs_to_adtype(; chunk_size = missing, autodiff = nothing,
     if chunk_size !== missing || standardtag !== missing || diff_type !== missing ||
        autodiff !== missing
         Base.depwarn("`chunk_size`, `standardtag`, `diff_type`, \
-            `autodiff::Union{Val, Bool}` kwargs have been deprecated and will be removed in\
-             v3. Update your code to directly specify autodiff=<ADTypes>",
+            `autodiff::Union{Val, Bool}` kwargs have been deprecated and will be removed \
+             in v3. Update your code to directly specify autodiff=<ADTypes>",
             :default_adargs_to_adtype)
     end
     chunk_size === missing && (chunk_size = Val{0}())
@@ -211,3 +211,13 @@ function __get_concrete_algorithm(alg, prob)
     end
     return set_ad(alg, ad)
 end
+
+__init_identity_jacobian(u::Number, _) = u
+function __init_identity_jacobian(u, fu)
+    return convert(parameterless_type(_mutable(u)),
+        Matrix{eltype(u)}(I, length(fu), length(u)))
+end
+function __init_identity_jacobian(u::StaticArray, fu)
+    return convert(MArray{Tuple{length(fu), length(u)}},
+        Matrix{eltype(u)}(I, length(fu), length(u)))
+end
\ No newline at end of file
diff --git a/test/23_test_problems.jl b/test/23_test_problems.jl
index 5e0df8352..8c739c085 100644
--- a/test/23_test_problems.jl
+++ b/test/23_test_problems.jl
@@ -8,19 +8,28 @@ function test_on_library(problems, dicts, alg_ops, broken_tests, ϵ = 1e-4)
         x = dict["start"]
         res = similar(x)
         nlprob = NonlinearProblem(problem, x)
-        @testset "$(dict["title"])" begin
+        @testset "$idx: $(dict["title"])" begin
             for alg in alg_ops
-                sol = solve(nlprob, alg, abstol = 1e-18, reltol = 1e-18)
-                problem(res, sol.u, nothing)
-                broken = idx in broken_tests[alg] ? true : false
-                @test norm(res)≤ϵ broken=broken
+                try
+                    sol = solve(nlprob, alg, abstol = 1e-18, reltol = 1e-18)
+                    problem(res, sol.u, nothing)
+                    broken = idx in broken_tests[alg] ? true : false
+                    @test norm(res)≤ϵ broken=broken
+                catch
+                    broken = idx in broken_tests[alg] ? true : false
+                    if broken
+                        @test false broken=true
+                    else
+                        @test 1 == 2
+                    end
+                end
             end
         end
     end
 end
 
 # NewtonRaphson
-@testset "NewtonRaphson test problem library" begin
+@testset "NewtonRaphson 23 Test Problems" begin
     alg_ops = (NewtonRaphson(),)
 
     # dictionary with indices of test problems where method does not converge to small residual
@@ -30,7 +39,7 @@ end
     test_on_library(problems, dicts, alg_ops, broken_tests)
 end
 
-@testset "TrustRegion test problem library" begin
+@testset "TrustRegion 23 Test Problems" begin
     alg_ops = (TrustRegion(; radius_update_scheme = RadiusUpdateSchemes.Simple),
         TrustRegion(; radius_update_scheme = RadiusUpdateSchemes.Fan),
         TrustRegion(; radius_update_scheme = RadiusUpdateSchemes.Hei),
@@ -50,7 +59,7 @@ end
     test_on_library(problems, dicts, alg_ops, broken_tests)
 end
 
-@testset "TrustRegion test problem library" begin
+@testset "LevenbergMarquardt 23 Test Problems" begin
     alg_ops = (LevenbergMarquardt(; linsolve = NormalCholeskyFactorization()),
         LevenbergMarquardt(; α_geodesic = 0.1, linsolve = NormalCholeskyFactorization()))
 
@@ -62,8 +71,7 @@ end
     test_on_library(problems, dicts, alg_ops, broken_tests)
 end
 
-# DFSane
-@testset "DFSane test problem library" begin
+@testset "DFSane 23 Test Problems" begin
     alg_ops = (DFSane(),)
 
     broken_tests = Dict(alg => Int[] for alg in alg_ops)
@@ -71,3 +79,16 @@ end
 
     test_on_library(problems, dicts, alg_ops, broken_tests)
 end
+
+@testset "GeneralBroyden 23 Test Problems" begin
+    alg_ops = (GeneralBroyden(),
+        GeneralBroyden(; linesearch = LiFukushimaLineSearch(; beta = 0.1)),
+        GeneralBroyden(; linesearch = BackTracking()))
+
+    broken_tests = Dict(alg => Int[] for alg in alg_ops)
+    broken_tests[alg_ops[1]] = [1, 3, 4, 5, 6, 8, 11, 12, 13, 14, 21]
+    broken_tests[alg_ops[2]] = [1, 2, 3, 4, 5, 6, 9, 11, 13, 22]
+    broken_tests[alg_ops[3]] = [1, 2, 4, 5, 6, 8, 11, 12, 13, 14, 21]
+
+    test_on_library(problems, dicts, alg_ops, broken_tests)
+end
diff --git a/test/basictests.jl b/test/basictests.jl
index f20417785..22ba9ab25 100644
--- a/test/basictests.jl
+++ b/test/basictests.jl
@@ -664,3 +664,93 @@ end
         @test all(abs.(newton_fails(sol.u, p)) .< 1e-10)
     end
 end
+
+# --- GeneralBroyden tests ---
+
+@testset "GeneralBroyden" begin
+    function benchmark_nlsolve_oop(f, u0, p = 2.0; linesearch = LineSearch())
+        prob = NonlinearProblem{false}(f, u0, p)
+        return solve(prob, GeneralBroyden(; linesearch), abstol = 1e-9)
+    end
+
+    function benchmark_nlsolve_iip(f, u0, p = 2.0; linesearch = LineSearch())
+        prob = NonlinearProblem{true}(f, u0, p)
+        return solve(prob, GeneralBroyden(; linesearch), abstol = 1e-9)
+    end
+
+    @testset "LineSearch: $(_nameof(lsmethod)) LineSearch AD: $(_nameof(ad))" for lsmethod in (Static(),
+            StrongWolfe(), BackTracking(), HagerZhang(), MoreThuente(),
+            LiFukushimaLineSearch()),
+        ad in (AutoFiniteDiff(), AutoZygote())
+
+        linesearch = LineSearch(; method = lsmethod, autodiff = ad)
+        u0s = ([1.0, 1.0], @SVector[1.0, 1.0], 1.0)
+
+        @testset "[OOP] u0: $(typeof(u0))" for u0 in u0s
+            sol = benchmark_nlsolve_oop(quadratic_f, u0; linesearch)
+            @test SciMLBase.successful_retcode(sol)
+            @test all(abs.(sol.u .* sol.u .- 2) .< 1e-9)
+
+            cache = init(NonlinearProblem{false}(quadratic_f, u0, 2.0),
+                GeneralBroyden(; linesearch), abstol = 1e-9)
+            @test (@ballocated solve!($cache)) < 200
+        end
+
+        @testset "[IIP] u0: $(typeof(u0))" for u0 in ([1.0, 1.0],)
+            ad isa AutoZygote && continue
+            sol = benchmark_nlsolve_iip(quadratic_f!, u0; linesearch)
+            @test SciMLBase.successful_retcode(sol)
+            @test all(abs.(sol.u .* sol.u .- 2) .< 1e-9)
+
+            cache = init(NonlinearProblem{true}(quadratic_f!, u0, 2.0),
+                GeneralBroyden(; linesearch), abstol = 1e-9)
+            @test (@ballocated solve!($cache)) ≤ 64
+        end
+    end
+
+    @testset "[OOP] [Immutable AD]" begin
+        for p in 1.0:0.1:100.0
+            @test begin
+                res = benchmark_nlsolve_oop(quadratic_f, @SVector[1.0, 1.0], p)
+                res_true = sqrt(p)
+                all(res.u .≈ res_true)
+            end
+            @test ForwardDiff.derivative(p -> benchmark_nlsolve_oop(quadratic_f,
+                @SVector[1.0, 1.0], p).u[end], p) ≈ 1 / (2 * sqrt(p))
+        end
+    end
+
+    @testset "[OOP] [Scalar AD]" begin
+        for p in 1.0:0.1:100.0
+            @test begin
+                res = benchmark_nlsolve_oop(quadratic_f, 1.0, p)
+                res_true = sqrt(p)
+                res.u ≈ res_true
+            end
+            @test ForwardDiff.derivative(p -> benchmark_nlsolve_oop(quadratic_f, 1.0, p).u,
+                p) ≈ 1 / (2 * sqrt(p))
+        end
+    end
+
+    t = (p) -> [sqrt(p[2] / p[1])]
+    p = [0.9, 50.0]
+    @test benchmark_nlsolve_oop(quadratic_f2, 0.5, p).u ≈ sqrt(p[2] / p[1])
+    @test ForwardDiff.jacobian(p -> [benchmark_nlsolve_oop(quadratic_f2, 0.5, p).u],
+        p) ≈ ForwardDiff.jacobian(t, p)
+
+    # Iterator interface
+    function nlprob_iterator_interface(f, p_range, ::Val{iip}) where {iip}
+        probN = NonlinearProblem{iip}(f, iip ? [0.5] : 0.5, p_range[begin])
+        cache = init(probN, GeneralBroyden(); maxiters = 100, abstol = 1e-10)
+        sols = zeros(length(p_range))
+        for (i, p) in enumerate(p_range)
+            reinit!(cache, iip ? [cache.u[1]] : cache.u; p = p)
+            sol = solve!(cache)
+            sols[i] = iip ? sol.u[1] : sol.u
+        end
+        return sols
+    end
+    p = range(0.01, 2, length = 200)
+    @test nlprob_iterator_interface(quadratic_f, p, Val(false)) ≈ sqrt.(p)
+    @test nlprob_iterator_interface(quadratic_f!, p, Val(true)) ≈ sqrt.(p)
+end

From 4c0f62872800fd7d4fd1c0f6167835f786d86ace Mon Sep 17 00:00:00 2001
From: Avik Pal <avikpal@mit.edu>
Date: Fri, 20 Oct 2023 18:27:35 -0400
Subject: [PATCH 4/9] Add in documentation

---
 docs/pages.jl                              |  3 ++-
 docs/src/solvers/LineSearch.md             | 14 ++++++++++++++
 docs/src/solvers/NonlinearSystemSolvers.md |  2 ++
 3 files changed, 18 insertions(+), 1 deletion(-)
 create mode 100644 docs/src/solvers/LineSearch.md

diff --git a/docs/pages.jl b/docs/pages.jl
index 93ad7b391..f1b239bc9 100644
--- a/docs/pages.jl
+++ b/docs/pages.jl
@@ -17,7 +17,8 @@ pages = ["index.md",
     "Solver Summaries and Recommendations" => Any["solvers/NonlinearSystemSolvers.md",
         "solvers/BracketingSolvers.md",
         "solvers/SteadyStateSolvers.md",
-        "solvers/NonlinearLeastSquaresSolvers.md"],
+        "solvers/NonlinearLeastSquaresSolvers.md",
+        "solvers/LineSearch.md"],
     "Detailed Solver APIs" => Any["api/nonlinearsolve.md",
         "api/simplenonlinearsolve.md",
         "api/minpack.md",
diff --git a/docs/src/solvers/LineSearch.md b/docs/src/solvers/LineSearch.md
new file mode 100644
index 000000000..5d09301e2
--- /dev/null
+++ b/docs/src/solvers/LineSearch.md
@@ -0,0 +1,14 @@
+# [Line Search](@id linesearch)
+
+A convenience wrapper over `LineSearches.jl` and some native Line Search methods, powered
+internally with fast automatic differentiation.
+
+```@docs
+LineSearch
+```
+
+## Native Line Search Methods
+
+```@docs
+LiFukushimaLineSearch
+```
diff --git a/docs/src/solvers/NonlinearSystemSolvers.md b/docs/src/solvers/NonlinearSystemSolvers.md
index de2d89a4e..a39ab597e 100644
--- a/docs/src/solvers/NonlinearSystemSolvers.md
+++ b/docs/src/solvers/NonlinearSystemSolvers.md
@@ -65,6 +65,8 @@ features, but have a bit of overhead on very small problems.
   - `FastShortcutNonlinearPolyalg()`: The default method. A polyalgorithm that mixes fast methods
     with fallbacks to robust methods to allow for solving easy problems quickly without sacrificing
     robustnes on the hard problems.
+  - `GeneralBroyden()`: Generalization of Broyden's Quasi-Newton Method with Line Search and
+    Automatic Jacobian Resetting. This is a fast method but unstable for most problems!
 
 ### SimpleNonlinearSolve.jl
 

From 1e4cfde6d7aac4439f77893ce11668df2b57cc69 Mon Sep 17 00:00:00 2001
From: Avik Pal <avikpal@mit.edu>
Date: Fri, 20 Oct 2023 18:41:50 -0400
Subject: [PATCH 5/9] Broyden supports matrices

---
 src/broyden.jl          | 18 +++++++++---------
 src/levenberg.jl        |  8 +++++---
 src/pseudotransient.jl  | 15 +++------------
 src/raphson.jl          |  2 +-
 src/trustRegion.jl      |  3 ++-
 src/utils.jl            |  6 +++---
 test/matrix_resizing.jl | 12 +++++++-----
 7 files changed, 30 insertions(+), 34 deletions(-)

diff --git a/src/broyden.jl b/src/broyden.jl
index 121a2ca71..1f8bc73bc 100644
--- a/src/broyden.jl
+++ b/src/broyden.jl
@@ -58,7 +58,7 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::GeneralBroyde
     fu = evaluate_f(prob, u)
     J⁻¹ = __init_identity_jacobian(u, fu)
     return GeneralBroydenCache{iip}(f, alg, u, _mutable_zero(u), fu, zero(fu),
-        zero(fu), p, J⁻¹, zero(fu'), _mutable_zero(u), false, 0, alg.max_resets,
+        zero(fu), p, J⁻¹, zero(_vec(fu)'), _mutable_zero(u), false, 0, alg.max_resets,
         maxiters, internalnorm, ReturnCode.Default, abstol, prob, NLStats(1, 0, 0, 0, 0),
         init_linesearch_cache(alg.linesearch, f, u, p, fu, Val(iip)))
 end
@@ -67,7 +67,7 @@ function perform_step!(cache::GeneralBroydenCache{true})
     @unpack f, p, du, fu, fu2, dfu, u, J⁻¹, J⁻¹df, J⁻¹₂ = cache
     T = eltype(u)
 
-    mul!(du, J⁻¹, -fu)
+    mul!(_vec(du), J⁻¹, -_vec(fu))
     α = perform_linesearch!(cache.lscache, u, du)
     axpy!(α, du, u)
     f(fu2, u, p)
@@ -85,10 +85,10 @@ function perform_step!(cache::GeneralBroydenCache{true})
         J⁻¹[diagind(J⁻¹)] .= T(1)
         cache.resets += 1
     else
-        mul!(J⁻¹df, J⁻¹, dfu)
-        mul!(J⁻¹₂, du', J⁻¹)
+        mul!(_vec(J⁻¹df), J⁻¹, _vec(dfu))
+        mul!(J⁻¹₂, _vec(du)', J⁻¹)
         du .= (du .- J⁻¹df) ./ (dot(du, J⁻¹df) .+ T(1e-5))
-        mul!(J⁻¹, reshape(du, :, 1), J⁻¹₂, 1, 1)
+        mul!(J⁻¹, _vec(du), J⁻¹₂, 1, 1)
     end
     fu .= fu2
 
@@ -99,7 +99,7 @@ function perform_step!(cache::GeneralBroydenCache{false})
     @unpack f, p = cache
     T = eltype(cache.u)
 
-    cache.du = cache.J⁻¹ * -cache.fu
+    cache.du = _restructure(cache.du, cache.J⁻¹ * -_vec(cache.fu))
     α = perform_linesearch!(cache.lscache, cache.u, cache.du)
     cache.u = cache.u .+ α * cache.du
     cache.fu2 = f(cache.u, p)
@@ -119,10 +119,10 @@ function perform_step!(cache::GeneralBroydenCache{false})
         cache.J⁻¹ = J⁻¹
         cache.resets += 1
     else
-        cache.J⁻¹df = cache.J⁻¹ * cache.dfu
-        cache.J⁻¹₂ = cache.du' * cache.J⁻¹
+        cache.J⁻¹df = _restructure(cache.J⁻¹df, cache.J⁻¹ * _vec(cache.dfu))
+        cache.J⁻¹₂ = _vec(cache.du)' * cache.J⁻¹
         cache.du = (cache.du .- cache.J⁻¹df) ./ (dot(cache.du, cache.J⁻¹df) .+ T(1e-5))
-        cache.J⁻¹ = cache.J⁻¹ .+ cache.du * cache.J⁻¹₂
+        cache.J⁻¹ = cache.J⁻¹ .+ _vec(cache.du) * cache.J⁻¹₂
     end
     cache.fu = cache.fu2
 
diff --git a/src/levenberg.jl b/src/levenberg.jl
index 016a12f31..fbfcc6a9a 100644
--- a/src/levenberg.jl
+++ b/src/levenberg.jl
@@ -301,7 +301,9 @@ function perform_step!(cache::LevenbergMarquardtCache{false})
     else
         linres = dolinsolve(alg.precs, linsolve;
             b = _mutable(_vec(J' *  #((2 / h) .* ((f(u .+ h .* v, p) .- fu1) ./ h .- J * v)))),
-                         _vec(((2 / h) .* ((_vec(f(u .+ h .* _restructure(u,v), p)) .- _vec(fu1)) ./ h .- J * _vec(v)))))),
+                              _vec(((2 / h) .*
+                                    ((_vec(f(u .+ h .* _restructure(u, v), p)) .-
+                                      _vec(fu1)) ./ h .- J * _vec(v)))))),
             linu = _vec(cache.a), p, reltol = cache.abstol)
         cache.linsolve = linres.cache
     end
@@ -311,7 +313,7 @@ function perform_step!(cache::LevenbergMarquardtCache{false})
     # Require acceptable steps to satisfy the following condition.
     norm_v = norm(v)
     if 2 * norm(cache.a) ≤ α_geodesic * norm_v
-        cache.δ = _restructure(cache.δ,_vec(v) .+ _vec(cache.a) ./ 2)
+        cache.δ = _restructure(cache.δ, _vec(v) .+ _vec(cache.a) ./ 2)
         @unpack δ, loss_old, norm_v_old, v_old, b_uphill = cache
         fu_new = f(u .+ δ, p)
         cache.stats.nf += 1
@@ -327,7 +329,7 @@ function perform_step!(cache::LevenbergMarquardtCache{false})
                 return nothing
             end
             cache.fu1 = fu_new
-            cache.v_old = _restructure(cache.v_old,v)
+            cache.v_old = _restructure(cache.v_old, v)
             cache.norm_v_old = norm_v
             cache.loss_old = loss
             cache.λ_factor = 1 / cache.damping_decrease_factor
diff --git a/src/pseudotransient.jl b/src/pseudotransient.jl
index 8e70f2350..a041c258c 100644
--- a/src/pseudotransient.jl
+++ b/src/pseudotransient.jl
@@ -29,10 +29,6 @@ SIAM Journal on Scientific Computing,25, 553-569.](https://doi.org/10.1137/S1064
     [LinearSolve.jl documentation](https://docs.sciml.ai/LinearSolve/stable/).
   - `alpha_initial` : the initial pseudo time step. it defaults to 1e-3. If it is small, 
     you are going to need more iterations to converge but it can be more stable.
-
-
-
-
 """
 @concrete struct PseudoTransient{CJ, AD} <: AbstractNewtonAlgorithm{CJ, AD}
     ad::AD
@@ -92,19 +88,14 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg_::PseudoTransi
     else
         fu1 = _mutable(f(u, p))
     end
-    uf, linsolve, J, fu2, jac_cache, du = jacobian_caches(alg,
-        f,
-        u,
-        p,
-        Val(iip);
+    uf, linsolve, J, fu2, jac_cache, du = jacobian_caches(alg, f, u, p, Val(iip);
         linsolve_kwargs)
     alpha = convert(eltype(u), alg.alpha_initial)
     res_norm = internalnorm(fu1)
 
     return PseudoTransientCache{iip}(f, alg, u, fu1, fu2, du, p, alpha, res_norm, uf,
-        linsolve, J,
-        jac_cache, false, maxiters, internalnorm, ReturnCode.Default, abstol, prob,
-        NLStats(1, 0, 0, 0, 0))
+        linsolve, J, jac_cache, false, maxiters, internalnorm, ReturnCode.Default, abstol,
+        prob, NLStats(1, 0, 0, 0, 0))
 end
 
 function perform_step!(cache::PseudoTransientCache{true})
diff --git a/src/raphson.jl b/src/raphson.jl
index a3d9bbc6d..db9e9e322 100644
--- a/src/raphson.jl
+++ b/src/raphson.jl
@@ -97,7 +97,7 @@ function perform_step!(cache::NewtonRaphsonCache{true})
 
     # Line Search
     α = perform_linesearch!(cache.lscache, u, du)
-    axpy!(α, du, u)
+    axpy!(-α, du, u)
     f(cache.fu1, u, p)
 
     cache.internalnorm(fu1) < cache.abstol && (cache.force_stop = true)
diff --git a/src/trustRegion.jl b/src/trustRegion.jl
index f97b63637..8094dc09c 100644
--- a/src/trustRegion.jl
+++ b/src/trustRegion.jl
@@ -419,7 +419,8 @@ function trust_region_step!(cache::TrustRegionCache)
     cache.loss_new = get_loss(fu_new)
 
     # Compute the ratio of the actual reduction to the predicted reduction.
-    cache.r = -(loss - cache.loss_new) / (dot(_vec(du), _vec(g)) + dot(_vec(du), H, _vec(du)) / 2)
+    cache.r = -(loss - cache.loss_new) /
+              (dot(_vec(du), _vec(g)) + dot(_vec(du), H, _vec(du)) / 2)
     @unpack r = cache
 
     if radius_update_scheme === RadiusUpdateSchemes.Simple
diff --git a/src/utils.jl b/src/utils.jl
index d33dc292d..7c6413937 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -74,8 +74,8 @@ end
 @inline _vec(v::Number) = v
 @inline _vec(v::AbstractVector) = v
 
-@inline _restructure(y,x) = restructure(y,x)
-@inline _restructure(y::Number,x::Number) = x
+@inline _restructure(y, x) = restructure(y, x)
+@inline _restructure(y::Number, x::Number) = x
 
 DEFAULT_PRECS(W, du, u, p, t, newW, Plprev, Prprev, cachedata) = nothing, nothing
 
@@ -220,4 +220,4 @@ end
 function __init_identity_jacobian(u::StaticArray, fu)
     return convert(MArray{Tuple{length(fu), length(u)}},
         Matrix{eltype(u)}(I, length(fu), length(u)))
-end
\ No newline at end of file
+end
diff --git a/test/matrix_resizing.jl b/test/matrix_resizing.jl
index f8ad423e5..1aa60fa05 100644
--- a/test/matrix_resizing.jl
+++ b/test/matrix_resizing.jl
@@ -1,21 +1,23 @@
 using NonlinearSolve, Test
 
 ff(u, p) = u .* u .- p
-u0 = rand(2,2)
+u0 = rand(2, 2)
 p = 2.0
 vecprob = NonlinearProblem(ff, vec(u0), p)
 prob = NonlinearProblem(ff, u0, p)
 
-for alg in (NewtonRaphson(), TrustRegion(), LevenbergMarquardt(), PseudoTransient(), RobustMultiNewton(), FastShortcutNonlinearPolyalg())
+for alg in (NewtonRaphson(), TrustRegion(), LevenbergMarquardt(), PseudoTransient(),
+    RobustMultiNewton(), FastShortcutNonlinearPolyalg(), GeneralBroyden())
     @test vec(solve(prob, alg).u) == solve(vecprob, alg).u
 end
 
 fiip(du, u, p) = (du .= u .* u .- p)
-u0 = rand(2,2)
+u0 = rand(2, 2)
 p = 2.0
 vecprob = NonlinearProblem(fiip, vec(u0), p)
 prob = NonlinearProblem(fiip, u0, p)
 
-for alg in (NewtonRaphson(), TrustRegion(), LevenbergMarquardt(), PseudoTransient(), RobustMultiNewton(), FastShortcutNonlinearPolyalg())
+for alg in (NewtonRaphson(), TrustRegion(), LevenbergMarquardt(), PseudoTransient(),
+    RobustMultiNewton(), FastShortcutNonlinearPolyalg(), GeneralBroyden())
     @test vec(solve(prob, alg).u) == solve(vecprob, alg).u
-end
\ No newline at end of file
+end

From 00852f02f04549357f3b0976338866fa0c3aa157 Mon Sep 17 00:00:00 2001
From: Avik Pal <avikpal@mit.edu>
Date: Fri, 20 Oct 2023 20:30:30 -0400
Subject: [PATCH 6/9] Fast General Klement Implementation

---
 docs/src/solvers/NonlinearSystemSolvers.md |   2 +
 src/NonlinearSolve.jl                      |  13 +-
 src/broyden.jl                             |   5 +-
 src/default.jl                             |   4 +-
 src/klement.jl                             | 190 +++++++++++++++++++++
 test/23_test_problems.jl                   |  11 ++
 test/basictests.jl                         |  89 ++++++++++
 7 files changed, 302 insertions(+), 12 deletions(-)

diff --git a/docs/src/solvers/NonlinearSystemSolvers.md b/docs/src/solvers/NonlinearSystemSolvers.md
index a39ab597e..89cde3513 100644
--- a/docs/src/solvers/NonlinearSystemSolvers.md
+++ b/docs/src/solvers/NonlinearSystemSolvers.md
@@ -67,6 +67,8 @@ features, but have a bit of overhead on very small problems.
     robustnes on the hard problems.
   - `GeneralBroyden()`: Generalization of Broyden's Quasi-Newton Method with Line Search and
     Automatic Jacobian Resetting. This is a fast method but unstable for most problems!
+  - `GeneralKlement()`: Generalization of Klement's Quasi-Newton Method with Line Search and
+    Automatic Jacobian Resetting. This is a fast method but unstable for most problems!
 
 ### SimpleNonlinearSolve.jl
 
diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl
index 5e21cdf44..438cea870 100644
--- a/src/NonlinearSolve.jl
+++ b/src/NonlinearSolve.jl
@@ -52,10 +52,13 @@ function SciMLBase.solve!(cache::AbstractNonlinearSolveCache)
         cache.stats.nsteps += 1
     end
 
-    if cache.stats.nsteps == cache.maxiters
-        cache.retcode = ReturnCode.MaxIters
-    else
-        cache.retcode = ReturnCode.Success
+    # The solver might have set a different `retcode`
+    if cache.retcode == ReturnCode.Default
+        if cache.stats.nsteps == cache.maxiters
+            cache.retcode = ReturnCode.MaxIters
+        else
+            cache.retcode = ReturnCode.Success
+        end
     end
 
     return SciMLBase.build_solution(cache.prob, cache.alg, cache.u, get_fu(cache);
@@ -85,7 +88,7 @@ import PrecompileTools
             prob = NonlinearProblem{false}((u, p) -> u .* u .- p, T(0.1), T(2))
 
             precompile_algs = (NewtonRaphson(), TrustRegion(), LevenbergMarquardt(),
-                nothing)
+                PseudoTransient(), GeneralBroyden(), GeneralKlement(), nothing)
 
             for alg in precompile_algs
                 solve(prob, alg, abstol = T(1e-2))
diff --git a/src/broyden.jl b/src/broyden.jl
index 1f8bc73bc..6a767a8af 100644
--- a/src/broyden.jl
+++ b/src/broyden.jl
@@ -113,10 +113,7 @@ function perform_step!(cache::GeneralBroydenCache{false})
     cache.dfu = cache.fu2 .- cache.fu
     if cache.resets < cache.max_resets &&
        (all(x -> abs(x) ≤ 1e-12, cache.du) || all(x -> abs(x) ≤ 1e-12, cache.dfu))
-        J⁻¹ = similar(cache.J⁻¹)
-        fill!(J⁻¹, 0)
-        J⁻¹[diagind(J⁻¹)] .= T(1)
-        cache.J⁻¹ = J⁻¹
+        cache.J⁻¹ = __init_identity_jacobian(cache.u, cache.fu)
         cache.resets += 1
     else
         cache.J⁻¹df = _restructure(cache.J⁻¹df, cache.J⁻¹ * _vec(cache.dfu))
diff --git a/src/default.jl b/src/default.jl
index 76289cc1c..07effeecb 100644
--- a/src/default.jl
+++ b/src/default.jl
@@ -159,10 +159,8 @@ end
         ]
     else
         [
-            # FIXME: Broyden and Klement are type unstable
-            #        (upstream SimpleNonlinearSolve.jl issue)
-            !iip ? :(Klement()) : nothing, # Klement not yet implemented for IIP
             :(GeneralBroyden()),
+            :(GeneralKlement()),
             :(NewtonRaphson(; linsolve, precs, adkwargs...)),
             :(NewtonRaphson(; linsolve, precs, linesearch = BackTracking(), adkwargs...)),
             :(TrustRegion(; linsolve, precs, adkwargs...)),
diff --git a/src/klement.jl b/src/klement.jl
index 8b1378917..42cac5bbe 100644
--- a/src/klement.jl
+++ b/src/klement.jl
@@ -1 +1,191 @@
+@concrete struct GeneralKlement <: AbstractNewtonAlgorithm{false, Nothing}
+    max_resets::Int
+    linsolve
+    precs
+    linesearch
+    singular_tolerance
+end
 
+function GeneralKlement(; max_resets::Int = 5, linsolve = nothing,
+    linesearch = LineSearch(), precs = DEFAULT_PRECS, singular_tolerance = nothing)
+    linesearch = linesearch isa LineSearch ? linesearch : LineSearch(; method = linesearch)
+    return GeneralKlement(max_resets, linsolve, precs, linesearch, singular_tolerance)
+end
+
+@concrete mutable struct GeneralKlementCache{iip} <: AbstractNonlinearSolveCache{iip}
+    f
+    alg
+    u
+    fu
+    fu2
+    du
+    p
+    linsolve
+    J
+    J_cache
+    J_cache2
+    Jᵀ²du
+    Jdu
+    resets
+    singular_tolerance
+    force_stop
+    maxiters::Int
+    internalnorm
+    retcode::ReturnCode.T
+    abstol
+    prob
+    stats::NLStats
+    lscache
+end
+
+get_fu(cache::GeneralKlementCache) = cache.fu
+
+function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::GeneralKlement, args...;
+    alias_u0 = false, maxiters = 1000, abstol = 1e-6, internalnorm = DEFAULT_NORM,
+    linsolve_kwargs = (;), kwargs...) where {uType, iip}
+    @unpack f, u0, p = prob
+    u = alias_u0 ? u0 : deepcopy(u0)
+    fu = evaluate_f(prob, u)
+    J = __init_identity_jacobian(u, fu)
+
+    if u isa Number
+        linsolve = nothing
+    else
+        weight = similar(u)
+        recursivefill!(weight, true)
+        Pl, Pr = wrapprecs(alg.precs(J, nothing, u, p, nothing, nothing, nothing, nothing,
+                nothing)..., weight)
+        linprob = LinearProblem(J, _vec(fu); u0 = _vec(fu))
+        linsolve = init(linprob, alg.linsolve; alias_A = true, alias_b = true, Pl, Pr,
+            linsolve_kwargs...)
+    end
+
+    singular_tolerance = alg.singular_tolerance === nothing ? inv(sqrt(eps(eltype(u)))) :
+                         eltype(u)(alg.singular_tolerance)
+
+    return GeneralKlementCache{iip}(f, alg, u, fu, zero(fu), _mutable_zero(u), p, linsolve,
+        J, zero(J), zero(J), zero(fu), zero(fu), 0, singular_tolerance, false,
+        maxiters, internalnorm, ReturnCode.Default, abstol, prob, NLStats(1, 0, 0, 0, 0),
+        init_linesearch_cache(alg.linesearch, f, u, p, fu, Val(iip)))
+end
+
+function perform_step!(cache::GeneralKlementCache{true})
+    @unpack u, fu, f, p, alg, J, linsolve, du = cache
+    T = eltype(J)
+
+    # FIXME: How can we do this faster?
+    if cond(J) > cache.singular_tolerance
+        if cache.resets == alg.max_resets
+            cache.force_stop = true
+            cache.retcode = ReturnCode.Unstable
+            return nothing
+        end
+        fill!(J, zero(T))
+        J[diagind(J)] .= T(1)
+        cache.resets += 1
+    end
+
+    # u = u - J \ fu
+    linres = dolinsolve(alg.precs, linsolve; A = J, b = -_vec(fu), linu = _vec(du),
+        p, reltol = cache.abstol)
+    cache.linsolve = linres.cache
+
+    # Line Search
+    α = perform_linesearch!(cache.lscache, u, du)
+    axpy!(α, du, u)
+    f(cache.fu2, u, p)
+
+    cache.internalnorm(cache.fu2) < cache.abstol && (cache.force_stop = true)
+    cache.stats.nf += 1
+    cache.stats.nsolve += 1
+    cache.stats.nfactors += 1
+
+    cache.force_stop && return nothing
+
+    # Update the Jacobian
+    cache.J_cache .= cache.J' .^ 2
+    cache.Jdu .= _vec(du) .^ 2
+    mul!(cache.Jᵀ²du, cache.J_cache, cache.Jdu)
+    mul!(cache.Jdu, J, _vec(du))
+    cache.fu .= cache.fu2 .- cache.fu
+    cache.fu .= (cache.fu .- _restructure(cache.fu, cache.Jdu)) ./ max.(cache.Jᵀ²du, eps(T))
+    mul!(cache.J_cache, _vec(cache.fu), _vec(du)')
+    cache.J_cache .*= J
+    mul!(cache.J_cache2, cache.J_cache, J)
+    J .+= cache.J_cache2
+
+    cache.fu .= cache.fu2
+
+    return nothing
+end
+
+function perform_step!(cache::GeneralKlementCache{false})
+    @unpack fu, f, p, alg, J, linsolve = cache
+    T = eltype(J)
+
+    # FIXME: How can we do this faster?
+    if cond(J) > cache.singular_tolerance
+        if cache.resets == alg.max_resets
+            cache.force_stop = true
+            cache.retcode = ReturnCode.Unstable
+            return nothing
+        end
+        cache.J = __init_identity_jacobian(u, fu)
+        cache.resets += 1
+    end
+
+    # u = u - J \ fu
+    if linsolve === nothing
+        cache.du = -fu / cache.J
+    else
+        linres = dolinsolve(alg.precs, linsolve; A = J, b = -_vec(fu),
+            linu = _vec(cache.du), p, reltol = cache.abstol)
+        cache.linsolve = linres.cache
+    end
+
+    # Line Search
+    α = perform_linesearch!(cache.lscache, cache.u, cache.du)
+    cache.u = @. cache.u + α * cache.du  # `u` might not support mutation
+    cache.fu2 = f(cache.u, p)
+
+    cache.internalnorm(cache.fu2) < cache.abstol && (cache.force_stop = true)
+    cache.stats.nf += 1
+    cache.stats.nsolve += 1
+    cache.stats.nfactors += 1
+
+    cache.force_stop && return nothing
+
+    # Update the Jacobian
+    cache.J_cache = cache.J' .^ 2
+    cache.Jdu = _vec(cache.du) .^ 2
+    cache.Jᵀ²du = cache.J_cache * cache.Jdu
+    cache.Jdu = J * _vec(cache.du)
+    cache.fu = cache.fu2 .- cache.fu
+    cache.fu = (cache.fu .- _restructure(cache.fu, cache.Jdu)) ./ max.(cache.Jᵀ²du, eps(T))
+    cache.J_cache = ((_vec(cache.fu) * _vec(cache.du)') .* J) * J
+    cache.J = J .+ cache.J_cache
+
+    cache.fu = cache.fu2
+
+    return nothing
+end
+
+function SciMLBase.reinit!(cache::GeneralKlementCache{iip}, u0 = cache.u; p = cache.p,
+    abstol = cache.abstol, maxiters = cache.maxiters) where {iip}
+    cache.p = p
+    if iip
+        recursivecopy!(cache.u, u0)
+        cache.f(cache.fu, cache.u, p)
+    else
+        # don't have alias_u0 but cache.u is never mutated for OOP problems so it doesn't matter
+        cache.u = u0
+        cache.fu = cache.f(cache.u, p)
+    end
+    cache.abstol = abstol
+    cache.maxiters = maxiters
+    cache.stats.nf = 1
+    cache.stats.nsteps = 1
+    cache.force_stop = false
+    cache.retcode = ReturnCode.Default
+    return cache
+end
diff --git a/test/23_test_problems.jl b/test/23_test_problems.jl
index 8c739c085..39ae155a3 100644
--- a/test/23_test_problems.jl
+++ b/test/23_test_problems.jl
@@ -92,3 +92,14 @@ end
 
     test_on_library(problems, dicts, alg_ops, broken_tests)
 end
+
+@testset "GeneralKlement 23 Test Problems" begin
+    alg_ops = (GeneralKlement(),
+        GeneralKlement(; linesearch = BackTracking()))
+
+    broken_tests = Dict(alg => Int[] for alg in alg_ops)
+    broken_tests[alg_ops[1]] = [1, 2, 3, 4, 5, 6, 7, 13, 22]
+    broken_tests[alg_ops[2]] = [1, 2, 4, 5, 6, 7, 11, 12, 22]
+
+    test_on_library(problems, dicts, alg_ops, broken_tests)
+end
diff --git a/test/basictests.jl b/test/basictests.jl
index 22ba9ab25..a39d25656 100644
--- a/test/basictests.jl
+++ b/test/basictests.jl
@@ -754,3 +754,92 @@ end
     @test nlprob_iterator_interface(quadratic_f, p, Val(false)) ≈ sqrt.(p)
     @test nlprob_iterator_interface(quadratic_f!, p, Val(true)) ≈ sqrt.(p)
 end
+
+# --- GeneralKlement tests ---
+
+@testset "GeneralKlement" begin
+    function benchmark_nlsolve_oop(f, u0, p = 2.0; linesearch = LineSearch())
+        prob = NonlinearProblem{false}(f, u0, p)
+        return solve(prob, GeneralKlement(; linesearch), abstol = 1e-9)
+    end
+
+    function benchmark_nlsolve_iip(f, u0, p = 2.0; linesearch = LineSearch())
+        prob = NonlinearProblem{true}(f, u0, p)
+        return solve(prob, GeneralKlement(; linesearch), abstol = 1e-9)
+    end
+
+    @testset "LineSearch: $(_nameof(lsmethod)) LineSearch AD: $(_nameof(ad))" for lsmethod in (Static(),
+            StrongWolfe(), BackTracking(), HagerZhang(), MoreThuente()),
+        ad in (AutoFiniteDiff(), AutoZygote())
+
+        linesearch = LineSearch(; method = lsmethod, autodiff = ad)
+        u0s = ([1.0, 1.0], @SVector[1.0, 1.0], 1.0)
+
+        @testset "[OOP] u0: $(typeof(u0))" for u0 in u0s
+            sol = benchmark_nlsolve_oop(quadratic_f, u0; linesearch)
+            @test SciMLBase.successful_retcode(sol)
+            @test all(abs.(sol.u .* sol.u .- 2) .< 1e-9)
+
+            cache = init(NonlinearProblem{false}(quadratic_f, u0, 2.0),
+                GeneralKlement(; linesearch), abstol = 1e-9)
+            @test (@ballocated solve!($cache)) < 200
+        end
+
+        @testset "[IIP] u0: $(typeof(u0))" for u0 in ([1.0, 1.0],)
+            ad isa AutoZygote && continue
+            sol = benchmark_nlsolve_iip(quadratic_f!, u0; linesearch)
+            @test SciMLBase.successful_retcode(sol)
+            @test all(abs.(sol.u .* sol.u .- 2) .< 1e-9)
+
+            cache = init(NonlinearProblem{true}(quadratic_f!, u0, 2.0),
+                GeneralKlement(; linesearch), abstol = 1e-9)
+            @test (@ballocated solve!($cache)) ≤ 64
+        end
+    end
+
+    @testset "[OOP] [Immutable AD]" begin
+        for p in 1.0:0.1:100.0
+            @test begin
+                res = benchmark_nlsolve_oop(quadratic_f, @SVector[1.0, 1.0], p)
+                res_true = sqrt(p)
+                all(res.u .≈ res_true)
+            end
+            @test ForwardDiff.derivative(p -> benchmark_nlsolve_oop(quadratic_f,
+                @SVector[1.0, 1.0], p).u[end], p) ≈ 1 / (2 * sqrt(p))
+        end
+    end
+
+    @testset "[OOP] [Scalar AD]" begin
+        for p in 1.0:0.1:100.0
+            @test begin
+                res = benchmark_nlsolve_oop(quadratic_f, 1.0, p)
+                res_true = sqrt(p)
+                res.u ≈ res_true
+            end
+            @test ForwardDiff.derivative(p -> benchmark_nlsolve_oop(quadratic_f, 1.0, p).u,
+                p) ≈ 1 / (2 * sqrt(p))
+        end
+    end
+
+    t = (p) -> [sqrt(p[2] / p[1])]
+    p = [0.9, 50.0]
+    @test benchmark_nlsolve_oop(quadratic_f2, 0.5, p).u ≈ sqrt(p[2] / p[1])
+    @test ForwardDiff.jacobian(p -> [benchmark_nlsolve_oop(quadratic_f2, 0.5, p).u],
+        p) ≈ ForwardDiff.jacobian(t, p)
+
+    # Iterator interface
+    function nlprob_iterator_interface(f, p_range, ::Val{iip}) where {iip}
+        probN = NonlinearProblem{iip}(f, iip ? [0.5] : 0.5, p_range[begin])
+        cache = init(probN, GeneralKlement(); maxiters = 100, abstol = 1e-10)
+        sols = zeros(length(p_range))
+        for (i, p) in enumerate(p_range)
+            reinit!(cache, iip ? [cache.u[1]] : cache.u; p = p)
+            sol = solve!(cache)
+            sols[i] = iip ? sol.u[1] : sol.u
+        end
+        return sols
+    end
+    p = range(0.01, 2, length = 200)
+    @test nlprob_iterator_interface(quadratic_f, p, Val(false)) ≈ sqrt.(p)
+    @test nlprob_iterator_interface(quadratic_f!, p, Val(true)) ≈ sqrt.(p)
+end

From ee5e5b83849e5aaf79323fa4144879c8549a8e63 Mon Sep 17 00:00:00 2001
From: Avik Pal <avikpal@mit.edu>
Date: Sat, 21 Oct 2023 22:17:07 -0400
Subject: [PATCH 7/9] Reuse LU Factorization to check for singular matrix

---
 docs/src/api/nonlinearsolve.md |  3 ++
 src/NonlinearSolve.jl          |  2 +-
 src/dfsane.jl                  |  1 -
 src/klement.jl                 | 66 +++++++++++++++++++++++-----------
 src/trustRegion.jl             |  1 -
 src/utils.jl                   | 25 +++++++++++++
 test/23_test_problems.jl       | 10 +++---
 test/matrix_resizing.jl        |  4 +--
 8 files changed, 83 insertions(+), 29 deletions(-)

diff --git a/docs/src/api/nonlinearsolve.md b/docs/src/api/nonlinearsolve.md
index e4554f3e6..5da204f07 100644
--- a/docs/src/api/nonlinearsolve.md
+++ b/docs/src/api/nonlinearsolve.md
@@ -8,6 +8,9 @@ These are the native solvers of NonlinearSolve.jl.
 NewtonRaphson
 TrustRegion
 PseudoTransient
+DFSane
+GeneralBroyden
+GeneralKlement
 ```
 
 ## Polyalgorithms
diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl
index 438cea870..8d3a8ca2b 100644
--- a/src/NonlinearSolve.jl
+++ b/src/NonlinearSolve.jl
@@ -9,7 +9,7 @@ import ArrayInterface: restructure
 import ForwardDiff
 
 import ADTypes: AbstractFiniteDifferencesMode
-import ArrayInterface: undefmatrix, matrix_colors, parameterless_type, ismutable
+import ArrayInterface: undefmatrix, matrix_colors, parameterless_type, ismutable, issingular
 import ConcreteStructs: @concrete
 import EnumX: @enumx
 import ForwardDiff: Dual
diff --git a/src/dfsane.jl b/src/dfsane.jl
index 13de5ff6a..f5bf69eca 100644
--- a/src/dfsane.jl
+++ b/src/dfsane.jl
@@ -43,7 +43,6 @@ See also the implementation in [SimpleNonlinearSolve.jl](https://github.com/SciM
 - `max_inner_iterations`: the maximum number of iterations allowed for the inner loop of the
   algorithm. Defaults to `1000`.
 """
-
 struct DFSane{T, F} <: AbstractNonlinearSolveAlgorithm
     σ_min::T
     σ_max::T
diff --git a/src/klement.jl b/src/klement.jl
index 42cac5bbe..e4d398445 100644
--- a/src/klement.jl
+++ b/src/klement.jl
@@ -1,15 +1,36 @@
+"""
+    GeneralKlement(; max_resets = 5, linsolve = nothing,
+                     linesearch = LineSearch(), precs = DEFAULT_PRECS)
+
+An implementation of `Klement` with line search, preconditioning and customizable linear
+solves.
+
+## Keyword Arguments
+
+  - `max_resets`: the maximum number of resets to perform. Defaults to `5`.
+  - `linsolve`: the [LinearSolve.jl](https://github.com/SciML/LinearSolve.jl) used for the
+    linear solves within the Newton method. Defaults to `nothing`, which means it uses the
+    LinearSolve.jl default algorithm choice. For more information on available algorithm
+    choices, see the [LinearSolve.jl documentation](https://docs.sciml.ai/LinearSolve/stable/).
+  - `precs`: the choice of preconditioners for the linear solver. Defaults to using no
+    preconditioners. For more information on specifying preconditioners for LinearSolve
+    algorithms, consult the
+    [LinearSolve.jl documentation](https://docs.sciml.ai/LinearSolve/stable/).
+  - `linesearch`: the line search algorithm to use. Defaults to [`LineSearch()`](@ref),
+    which means that no line search is performed. Algorithms from `LineSearches.jl` can be
+    used here directly, and they will be converted to the correct `LineSearch`.
+"""
 @concrete struct GeneralKlement <: AbstractNewtonAlgorithm{false, Nothing}
     max_resets::Int
     linsolve
     precs
     linesearch
-    singular_tolerance
 end
 
 function GeneralKlement(; max_resets::Int = 5, linsolve = nothing,
-    linesearch = LineSearch(), precs = DEFAULT_PRECS, singular_tolerance = nothing)
+    linesearch = LineSearch(), precs = DEFAULT_PRECS)
     linesearch = linesearch isa LineSearch ? linesearch : LineSearch(; method = linesearch)
-    return GeneralKlement(max_resets, linsolve, precs, linesearch, singular_tolerance)
+    return GeneralKlement(max_resets, linsolve, precs, linesearch)
 end
 
 @concrete mutable struct GeneralKlementCache{iip} <: AbstractNonlinearSolveCache{iip}
@@ -27,7 +48,6 @@ end
     Jᵀ²du
     Jdu
     resets
-    singular_tolerance
     force_stop
     maxiters::Int
     internalnorm
@@ -51,20 +71,20 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::GeneralKlemen
     if u isa Number
         linsolve = nothing
     else
+        # For General Julia Arrays default to LU Factorization
+        linsolve_alg = alg.linsolve === nothing && u isa Array ? LUFactorization() :
+                       nothing
         weight = similar(u)
         recursivefill!(weight, true)
         Pl, Pr = wrapprecs(alg.precs(J, nothing, u, p, nothing, nothing, nothing, nothing,
                 nothing)..., weight)
         linprob = LinearProblem(J, _vec(fu); u0 = _vec(fu))
-        linsolve = init(linprob, alg.linsolve; alias_A = true, alias_b = true, Pl, Pr,
+        linsolve = init(linprob, linsolve_alg; alias_A = true, alias_b = true, Pl, Pr,
             linsolve_kwargs...)
     end
 
-    singular_tolerance = alg.singular_tolerance === nothing ? inv(sqrt(eps(eltype(u)))) :
-                         eltype(u)(alg.singular_tolerance)
-
     return GeneralKlementCache{iip}(f, alg, u, fu, zero(fu), _mutable_zero(u), p, linsolve,
-        J, zero(J), zero(J), zero(fu), zero(fu), 0, singular_tolerance, false,
+        J, zero(J), zero(J), _vec(zero(fu)), _vec(zero(fu)), 0, false,
         maxiters, internalnorm, ReturnCode.Default, abstol, prob, NLStats(1, 0, 0, 0, 0),
         init_linesearch_cache(alg.linesearch, f, u, p, fu, Val(iip)))
 end
@@ -73,21 +93,23 @@ function perform_step!(cache::GeneralKlementCache{true})
     @unpack u, fu, f, p, alg, J, linsolve, du = cache
     T = eltype(J)
 
-    # FIXME: How can we do this faster?
-    if cond(J) > cache.singular_tolerance
+    singular, fact_done = _try_factorize_and_check_singular!(linsolve, J)
+
+    if singular
         if cache.resets == alg.max_resets
             cache.force_stop = true
             cache.retcode = ReturnCode.Unstable
             return nothing
         end
+        fact_done = false
         fill!(J, zero(T))
         J[diagind(J)] .= T(1)
         cache.resets += 1
     end
 
     # u = u - J \ fu
-    linres = dolinsolve(alg.precs, linsolve; A = J, b = -_vec(fu), linu = _vec(du),
-        p, reltol = cache.abstol)
+    linres = dolinsolve(alg.precs, linsolve; A = ifelse(fact_done, nothing, J),
+        b = -_vec(fu), linu = _vec(du), p, reltol = cache.abstol)
     cache.linsolve = linres.cache
 
     # Line Search
@@ -108,7 +130,8 @@ function perform_step!(cache::GeneralKlementCache{true})
     mul!(cache.Jᵀ²du, cache.J_cache, cache.Jdu)
     mul!(cache.Jdu, J, _vec(du))
     cache.fu .= cache.fu2 .- cache.fu
-    cache.fu .= (cache.fu .- _restructure(cache.fu, cache.Jdu)) ./ max.(cache.Jᵀ²du, eps(T))
+    cache.fu .= _restructure(cache.fu,
+        (_vec(cache.fu) .- cache.Jdu) ./ max.(cache.Jᵀ²du, eps(T)))
     mul!(cache.J_cache, _vec(cache.fu), _vec(du)')
     cache.J_cache .*= J
     mul!(cache.J_cache2, cache.J_cache, J)
@@ -123,14 +146,16 @@ function perform_step!(cache::GeneralKlementCache{false})
     @unpack fu, f, p, alg, J, linsolve = cache
     T = eltype(J)
 
-    # FIXME: How can we do this faster?
-    if cond(J) > cache.singular_tolerance
+    singular, fact_done = _try_factorize_and_check_singular!(linsolve, J)
+
+    if singular
         if cache.resets == alg.max_resets
             cache.force_stop = true
             cache.retcode = ReturnCode.Unstable
             return nothing
         end
-        cache.J = __init_identity_jacobian(u, fu)
+        fact_done = false
+        cache.J = __init_identity_jacobian(cache.u, fu)
         cache.resets += 1
     end
 
@@ -138,8 +163,8 @@ function perform_step!(cache::GeneralKlementCache{false})
     if linsolve === nothing
         cache.du = -fu / cache.J
     else
-        linres = dolinsolve(alg.precs, linsolve; A = J, b = -_vec(fu),
-            linu = _vec(cache.du), p, reltol = cache.abstol)
+        linres = dolinsolve(alg.precs, linsolve; A = ifelse(fact_done, nothing, J),
+            b = -_vec(fu), linu = _vec(cache.du), p, reltol = cache.abstol)
         cache.linsolve = linres.cache
     end
 
@@ -161,7 +186,8 @@ function perform_step!(cache::GeneralKlementCache{false})
     cache.Jᵀ²du = cache.J_cache * cache.Jdu
     cache.Jdu = J * _vec(cache.du)
     cache.fu = cache.fu2 .- cache.fu
-    cache.fu = (cache.fu .- _restructure(cache.fu, cache.Jdu)) ./ max.(cache.Jᵀ²du, eps(T))
+    cache.fu = _restructure(cache.fu,
+        (_vec(cache.fu) .- cache.Jdu) ./ max.(cache.Jᵀ²du, eps(T)))
     cache.J_cache = ((_vec(cache.fu) * _vec(cache.du)') .* J) * J
     cache.J = J .+ cache.J_cache
 
diff --git a/src/trustRegion.jl b/src/trustRegion.jl
index 8094dc09c..8e7118cc6 100644
--- a/src/trustRegion.jl
+++ b/src/trustRegion.jl
@@ -81,7 +81,6 @@ states as `RadiusUpdateSchemes.T`. Simply put the desired scheme as follows:
 end
 
 """
-```julia
     TrustRegion(; concrete_jac = nothing, linsolve = nothing, precs = DEFAULT_PRECS,
         radius_update_scheme::RadiusUpdateSchemes.T = RadiusUpdateSchemes.Simple,
         max_trust_radius::Real = 0 // 1, initial_trust_radius::Real = 0 // 1,
diff --git a/src/utils.jl b/src/utils.jl
index 7c6413937..3369e5c01 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -221,3 +221,28 @@ function __init_identity_jacobian(u::StaticArray, fu)
     return convert(MArray{Tuple{length(fu), length(u)}},
         Matrix{eltype(u)}(I, length(fu), length(u)))
 end
+
+# Check Singular Matrix
+_issingular(x::Number) = iszero(x)
+@generated function _issingular(x::T) where {T}
+    hasmethod(issingular, Tuple{T}) && return :(issingular(x))
+    return :(__issingular(x))
+end
+__issingular(x::AbstractMatrix{T}) where {T} = cond(x) > inv(sqrt(eps(T)))
+__issingular(x) = false ## If SciMLOperator and such
+
+# If factorization is LU then perform that and update the linsolve cache
+# else check if the matrix is singular
+function _try_factorize_and_check_singular!(linsolve, X)
+    if linsolve.cacheval isa LU
+        # LU Factorization was used
+        linsolve.A = X
+        linsolve.cacheval = LinearSolve.do_factorization(linsolve.alg, X, linsolve.b,
+            linsolve.u)
+        linsolve.isfresh = false
+
+        return !issuccess(linsolve.cacheval), true
+    end
+    return _issingular(X), false
+end
+_try_factorize_and_check_singular!(::Nothing, x) = _issingular(x), false
diff --git a/test/23_test_problems.jl b/test/23_test_problems.jl
index 39ae155a3..25bb8a68a 100644
--- a/test/23_test_problems.jl
+++ b/test/23_test_problems.jl
@@ -87,7 +87,7 @@ end
 
     broken_tests = Dict(alg => Int[] for alg in alg_ops)
     broken_tests[alg_ops[1]] = [1, 3, 4, 5, 6, 8, 11, 12, 13, 14, 21]
-    broken_tests[alg_ops[2]] = [1, 2, 3, 4, 5, 6, 9, 11, 13, 22]
+    broken_tests[alg_ops[2]] = [1, 2, 3, 4, 5, 6, 9, 11, 13, 16, 21, 22]
     broken_tests[alg_ops[3]] = [1, 2, 4, 5, 6, 8, 11, 12, 13, 14, 21]
 
     test_on_library(problems, dicts, alg_ops, broken_tests)
@@ -95,11 +95,13 @@ end
 
 @testset "GeneralKlement 23 Test Problems" begin
     alg_ops = (GeneralKlement(),
-        GeneralKlement(; linesearch = BackTracking()))
+        GeneralKlement(; linesearch = BackTracking()),
+        GeneralKlement(; linesearch = HagerZhang()))
 
     broken_tests = Dict(alg => Int[] for alg in alg_ops)
-    broken_tests[alg_ops[1]] = [1, 2, 3, 4, 5, 6, 7, 13, 22]
-    broken_tests[alg_ops[2]] = [1, 2, 4, 5, 6, 7, 11, 12, 22]
+    broken_tests[alg_ops[1]] = [1, 2, 4, 5, 6, 7, 11, 13, 22]
+    broken_tests[alg_ops[2]] = [1, 2, 4, 5, 6, 7, 11, 13, 22]
+    broken_tests[alg_ops[3]] = [1, 2, 5, 6, 11, 12, 13, 22]
 
     test_on_library(problems, dicts, alg_ops, broken_tests)
 end
diff --git a/test/matrix_resizing.jl b/test/matrix_resizing.jl
index 1aa60fa05..9c16fe0fe 100644
--- a/test/matrix_resizing.jl
+++ b/test/matrix_resizing.jl
@@ -7,7 +7,7 @@ vecprob = NonlinearProblem(ff, vec(u0), p)
 prob = NonlinearProblem(ff, u0, p)
 
 for alg in (NewtonRaphson(), TrustRegion(), LevenbergMarquardt(), PseudoTransient(),
-    RobustMultiNewton(), FastShortcutNonlinearPolyalg(), GeneralBroyden())
+    RobustMultiNewton(), FastShortcutNonlinearPolyalg(), GeneralBroyden(), GeneralKlement())
     @test vec(solve(prob, alg).u) == solve(vecprob, alg).u
 end
 
@@ -18,6 +18,6 @@ vecprob = NonlinearProblem(fiip, vec(u0), p)
 prob = NonlinearProblem(fiip, u0, p)
 
 for alg in (NewtonRaphson(), TrustRegion(), LevenbergMarquardt(), PseudoTransient(),
-    RobustMultiNewton(), FastShortcutNonlinearPolyalg(), GeneralBroyden())
+    RobustMultiNewton(), FastShortcutNonlinearPolyalg(), GeneralBroyden(), GeneralKlement())
     @test vec(solve(prob, alg).u) == solve(vecprob, alg).u
 end

From 611477c83c41f4d5cd3ea4b075203126be2792fb Mon Sep 17 00:00:00 2001
From: Avik Pal <avikpal@mit.edu>
Date: Mon, 23 Oct 2023 01:10:03 -0400
Subject: [PATCH 8/9] Add Limited Memory Broyden

---
 .github/workflows/CI.yml |   3 +-
 src/NonlinearSolve.jl    |   3 +-
 src/broyden.jl           |  37 ++++--
 src/lbroyden.jl          | 260 +++++++++++++++++++++++++++++++++++++++
 src/utils.jl             |  12 ++
 test/23_test_problems.jl |  13 +-
 test/basictests.jl       |  90 ++++++++++++++
 test/matrix_resizing.jl  |   3 +-
 8 files changed, 403 insertions(+), 18 deletions(-)
 create mode 100644 src/lbroyden.jl

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index d3232532a..0d3ee1419 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -16,7 +16,8 @@ jobs:
     strategy:
       matrix:
         group:
-          - All
+          - Core
+          - 23TestProblems
         version:
           - '1'
     steps:
diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl
index 8d3a8ca2b..2b26b3721 100644
--- a/src/NonlinearSolve.jl
+++ b/src/NonlinearSolve.jl
@@ -76,6 +76,7 @@ include("dfsane.jl")
 include("pseudotransient.jl")
 include("broyden.jl")
 include("klement.jl")
+include("lbroyden.jl")
 include("jacobian.jl")
 include("ad.jl")
 include("default.jl")
@@ -106,7 +107,7 @@ end
 export RadiusUpdateSchemes
 
 export NewtonRaphson, TrustRegion, LevenbergMarquardt, DFSane, GaussNewton, PseudoTransient,
-    GeneralBroyden, GeneralKlement
+    GeneralBroyden, GeneralKlement, LimitedMemoryBroyden
 export LeastSquaresOptimJL, FastLevenbergMarquardtJL
 export RobustMultiNewton, FastShortcutNonlinearPolyalg
 
diff --git a/src/broyden.jl b/src/broyden.jl
index 6a767a8af..3232a2d9f 100644
--- a/src/broyden.jl
+++ b/src/broyden.jl
@@ -1,13 +1,14 @@
 # Sadly `Broyden` is taken up by SimpleNonlinearSolve.jl
 """
-    GeneralBroyden(max_resets, linesearch)
-    GeneralBroyden(; max_resets = 3, linesearch = LineSearch())
+    GeneralBroyden(; max_resets = 3, linesearch = LineSearch(), reset_tolerance = nothing)
 
 An implementation of `Broyden` with reseting and line search.
 
 ## Arguments
 
   - `max_resets`: the maximum number of resets to perform. Defaults to `3`.
+  - `reset_tolerance`: the tolerance for the reset check. Defaults to
+    `sqrt(eps(eltype(u)))`.
   - `linesearch`: the line search algorithm to use. Defaults to [`LineSearch()`](@ref),
     which means that no line search is performed. Algorithms from `LineSearches.jl` can be
     used here directly, and they will be converted to the correct `LineSearch`. It is
@@ -16,12 +17,14 @@ An implementation of `Broyden` with reseting and line search.
 """
 @concrete struct GeneralBroyden <: AbstractNewtonAlgorithm{false, Nothing}
     max_resets::Int
+    reset_tolerance
     linesearch
 end
 
-function GeneralBroyden(; max_resets = 3, linesearch = LineSearch())
+function GeneralBroyden(; max_resets = 3, linesearch = LineSearch(),
+    reset_tolerance = nothing)
     linesearch = linesearch isa LineSearch ? linesearch : LineSearch(; method = linesearch)
-    return GeneralBroyden(max_resets, linesearch)
+    return GeneralBroyden(max_resets, reset_tolerance, linesearch)
 end
 
 @concrete mutable struct GeneralBroydenCache{iip} <: AbstractNonlinearSolveCache{iip}
@@ -43,6 +46,8 @@ end
     internalnorm
     retcode::ReturnCode.T
     abstol
+    reset_tolerance
+    reset_check
     prob
     stats::NLStats
     lscache
@@ -57,9 +62,13 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::GeneralBroyde
     u = alias_u0 ? u0 : deepcopy(u0)
     fu = evaluate_f(prob, u)
     J⁻¹ = __init_identity_jacobian(u, fu)
+    reset_tolerance = alg.reset_tolerance === nothing ? sqrt(eps(eltype(u))) :
+                      alg.reset_tolerance
+    reset_check = x -> abs(x) ≤ reset_tolerance
     return GeneralBroydenCache{iip}(f, alg, u, _mutable_zero(u), fu, zero(fu),
         zero(fu), p, J⁻¹, zero(_vec(fu)'), _mutable_zero(u), false, 0, alg.max_resets,
-        maxiters, internalnorm, ReturnCode.Default, abstol, prob, NLStats(1, 0, 0, 0, 0),
+        maxiters, internalnorm, ReturnCode.Default, abstol, reset_tolerance,
+        reset_check, prob, NLStats(1, 0, 0, 0, 0),
         init_linesearch_cache(alg.linesearch, f, u, p, fu, Val(iip)))
 end
 
@@ -79,8 +88,13 @@ function perform_step!(cache::GeneralBroydenCache{true})
 
     # Update the inverse jacobian
     dfu .= fu2 .- fu
-    if cache.resets < cache.max_resets &&
-       (all(x -> abs(x) ≤ 1e-12, du) || all(x -> abs(x) ≤ 1e-12, dfu))
+
+    if all(cache.reset_check, du) || all(cache.reset_check, dfu)
+        if cache.resets ≥ cache.max_resets
+            cache.retcode = ReturnCode.Unstable
+            cache.force_stop = true
+            return nothing
+        end
         fill!(J⁻¹, 0)
         J⁻¹[diagind(J⁻¹)] .= T(1)
         cache.resets += 1
@@ -111,8 +125,12 @@ function perform_step!(cache::GeneralBroydenCache{false})
 
     # Update the inverse jacobian
     cache.dfu = cache.fu2 .- cache.fu
-    if cache.resets < cache.max_resets &&
-       (all(x -> abs(x) ≤ 1e-12, cache.du) || all(x -> abs(x) ≤ 1e-12, cache.dfu))
+    if all(cache.reset_check, cache.du) || all(cache.reset_check, cache.dfu)
+        if cache.resets ≥ cache.max_resets
+            cache.retcode = ReturnCode.Unstable
+            cache.force_stop = true
+            return nothing
+        end
         cache.J⁻¹ = __init_identity_jacobian(cache.u, cache.fu)
         cache.resets += 1
     else
@@ -141,6 +159,7 @@ function SciMLBase.reinit!(cache::GeneralBroydenCache{iip}, u0 = cache.u; p = ca
     cache.maxiters = maxiters
     cache.stats.nf = 1
     cache.stats.nsteps = 1
+    cache.resets = 0
     cache.force_stop = false
     cache.retcode = ReturnCode.Default
     return cache
diff --git a/src/lbroyden.jl b/src/lbroyden.jl
new file mode 100644
index 000000000..458db8104
--- /dev/null
+++ b/src/lbroyden.jl
@@ -0,0 +1,260 @@
+"""
+    LimitedMemoryBroyden(; max_resets::Int = 3, linesearch = LineSearch(),
+        threshold::Int = 10, reset_tolerance = nothing)
+
+An implementation of `LimitedMemoryBroyden` with reseting and line search.
+
+## Arguments
+
+  - `max_resets`: the maximum number of resets to perform. Defaults to `3`.
+  - `reset_tolerance`: the tolerance for the reset check. Defaults to
+    `sqrt(eps(eltype(u)))`.
+  - `threshold`: the number of vectors to store in the low rank approximation. Defaults
+    to `10`.
+  - `linesearch`: the line search algorithm to use. Defaults to [`LineSearch()`](@ref),
+    which means that no line search is performed. Algorithms from `LineSearches.jl` can be
+    used here directly, and they will be converted to the correct `LineSearch`. It is
+    recommended to use [LiFukushimaLineSearchCache](@ref) -- a derivative free linesearch
+    specifically designed for Broyden's method.
+"""
+@concrete struct LimitedMemoryBroyden <: AbstractNewtonAlgorithm{false, Nothing}
+    max_resets::Int
+    threshold::Int
+    linesearch
+    reset_tolerance
+end
+
+function LimitedMemoryBroyden(; max_resets::Int = 3, linesearch = LineSearch(),
+    threshold::Int = 10, reset_tolerance = nothing)
+    linesearch = linesearch isa LineSearch ? linesearch : LineSearch(; method = linesearch)
+    return LimitedMemoryBroyden(max_resets, threshold, linesearch, reset_tolerance)
+end
+
+@concrete mutable struct LimitedMemoryBroydenCache{iip} <: AbstractNonlinearSolveCache{iip}
+    f
+    alg
+    u
+    du
+    fu
+    fu2
+    dfu
+    p
+    U
+    Vᵀ
+    Ux
+    xᵀVᵀ
+    u_cache
+    vᵀ_cache
+    force_stop::Bool
+    resets::Int
+    iterations_since_reset::Int
+    max_resets::Int
+    maxiters::Int
+    internalnorm
+    retcode::ReturnCode.T
+    abstol
+    reset_tolerance
+    reset_check
+    prob
+    stats::NLStats
+    lscache
+end
+
+get_fu(cache::LimitedMemoryBroydenCache) = cache.fu
+
+function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::LimitedMemoryBroyden,
+    args...; alias_u0 = false, maxiters = 1000, abstol = 1e-6, internalnorm = DEFAULT_NORM,
+    kwargs...) where {uType, iip}
+    @unpack f, u0, p = prob
+    u = alias_u0 ? u0 : deepcopy(u0)
+    if u isa Number
+        # If u is a number then we simply use Broyden
+        return SciMLBase.__init(prob,
+            GeneralBroyden(; alg.max_resets, alg.reset_tolerance,
+                alg.linesearch), args...; alias_u0, maxiters, abstol, internalnorm, kwargs...)
+    end
+    fu = evaluate_f(prob, u)
+    threshold = min(alg.threshold, maxiters)
+    U, Vᵀ = __init_low_rank_jacobian(u, fu, threshold)
+    du = -fu
+    reset_tolerance = alg.reset_tolerance === nothing ? sqrt(eps(eltype(u))) :
+                      alg.reset_tolerance
+    reset_check = x -> abs(x) ≤ reset_tolerance
+    return LimitedMemoryBroydenCache{iip}(f, alg, u, du, fu, zero(fu),
+        zero(fu), p, U, Vᵀ, similar(u, threshold), similar(u, 1, threshold),
+        zero(u), zero(u), false, 0, 0, alg.max_resets, maxiters, internalnorm,
+        ReturnCode.Default, abstol, reset_tolerance, reset_check, prob,
+        NLStats(1, 0, 0, 0, 0),
+        init_linesearch_cache(alg.linesearch, f, u, p, fu, Val(iip)))
+end
+
+function perform_step!(cache::LimitedMemoryBroydenCache{true})
+    @unpack f, p, du, u = cache
+    T = eltype(u)
+
+    α = perform_linesearch!(cache.lscache, u, du)
+    axpy!(α, du, u)
+    f(cache.fu2, u, p)
+
+    cache.internalnorm(cache.fu2) < cache.abstol && (cache.force_stop = true)
+    cache.stats.nf += 1
+
+    cache.force_stop && return nothing
+
+    # Update the Inverse Jacobian Approximation
+    cache.dfu .= cache.fu2 .- cache.fu
+
+    # Only try to reset if we have enough iterations since last reset
+    if cache.iterations_since_reset > size(cache.U, 1) &&
+       (all(cache.reset_check, du) || all(cache.reset_check, cache.dfu))
+        if cache.resets ≥ cache.max_resets
+            cache.retcode = ReturnCode.Unstable
+            cache.force_stop = true
+            return nothing
+        end
+        cache.iterations_since_reset = 0
+        cache.resets += 1
+        cache.du .= -cache.fu
+    else
+        idx = min(cache.iterations_since_reset, size(cache.U, 1))
+        U_part = selectdim(cache.U, 1, 1:idx)
+        Vᵀ_part = selectdim(cache.Vᵀ, 2, 1:idx)
+
+        __lbroyden_matvec!(_vec(cache.vᵀ_cache), cache.Ux, U_part, Vᵀ_part, _vec(cache.du))
+        __lbroyden_rmatvec!(_vec(cache.u_cache), cache.xᵀVᵀ, U_part, Vᵀ_part,
+            _vec(cache.dfu))
+        cache.u_cache .= (du .- cache.u_cache) ./
+                         (dot(cache.vᵀ_cache, cache.dfu) .+ T(1e-5))
+
+        idx = mod1(cache.iterations_since_reset + 1, size(cache.U, 1))
+        selectdim(cache.U, 1, idx) .= _vec(cache.u_cache)
+        selectdim(cache.Vᵀ, 2, idx) .= _vec(cache.vᵀ_cache)
+
+        idx = min(cache.iterations_since_reset + 1, size(cache.U, 1))
+        U_part = selectdim(cache.U, 1, 1:idx)
+        Vᵀ_part = selectdim(cache.Vᵀ, 2, 1:idx)
+        __lbroyden_matvec!(_vec(cache.du), cache.Ux, U_part, Vᵀ_part, _vec(cache.fu2))
+        cache.du .*= -1
+        cache.iterations_since_reset += 1
+    end
+
+    cache.fu .= cache.fu2
+
+    return nothing
+end
+
+function perform_step!(cache::LimitedMemoryBroydenCache{false})
+    @unpack f, p = cache
+    T = eltype(cache.u)
+
+    α = perform_linesearch!(cache.lscache, cache.u, cache.du)
+    cache.u = cache.u .+ α * cache.du
+    cache.fu2 = f(cache.u, p)
+
+    cache.internalnorm(cache.fu2) < cache.abstol && (cache.force_stop = true)
+    cache.stats.nf += 1
+
+    cache.force_stop && return nothing
+
+    # Update the Inverse Jacobian Approximation
+    cache.dfu .= cache.fu2 .- cache.fu
+
+    # Only try to reset if we have enough iterations since last reset
+    if cache.iterations_since_reset > size(cache.U, 1) &&
+       (all(cache.reset_check, cache.du) || all(cache.reset_check, cache.dfu))
+        if cache.resets ≥ cache.max_resets
+            cache.retcode = ReturnCode.Unstable
+            cache.force_stop = true
+            return nothing
+        end
+        cache.iterations_since_reset = 0
+        cache.resets += 1
+        cache.du = -cache.fu
+    else
+        idx = min(cache.iterations_since_reset, size(cache.U, 1))
+        U_part = selectdim(cache.U, 1, 1:idx)
+        Vᵀ_part = selectdim(cache.Vᵀ, 2, 1:idx)
+
+        cache.vᵀ_cache = _restructure(cache.vᵀ_cache,
+            __lbroyden_matvec(U_part, Vᵀ_part, _vec(cache.du)))
+        cache.u_cache = _restructure(cache.u_cache,
+            __lbroyden_rmatvec(U_part, Vᵀ_part, _vec(cache.dfu)))
+        cache.u_cache = (cache.du .- cache.u_cache) ./
+                        (dot(cache.vᵀ_cache, cache.dfu) .+ T(1e-5))
+
+        idx = mod1(cache.iterations_since_reset + 1, size(cache.U, 1))
+        selectdim(cache.U, 1, idx) .= _vec(cache.u_cache)
+        selectdim(cache.Vᵀ, 2, idx) .= _vec(cache.vᵀ_cache)
+
+        idx = min(cache.iterations_since_reset + 1, size(cache.U, 1))
+        U_part = selectdim(cache.U, 1, 1:idx)
+        Vᵀ_part = selectdim(cache.Vᵀ, 2, 1:idx)
+        cache.du = _restructure(cache.du,
+            -__lbroyden_matvec(U_part, Vᵀ_part, _vec(cache.fu2)))
+        cache.iterations_since_reset += 1
+    end
+
+    cache.fu = cache.fu2
+
+    return nothing
+end
+
+function SciMLBase.reinit!(cache::LimitedMemoryBroydenCache{iip}, u0 = cache.u; p = cache.p,
+    abstol = cache.abstol, maxiters = cache.maxiters) where {iip}
+    cache.p = p
+    if iip
+        recursivecopy!(cache.u, u0)
+        cache.f(cache.fu, cache.u, p)
+    else
+        # don't have alias_u0 but cache.u is never mutated for OOP problems so it doesn't matter
+        cache.u = u0
+        cache.fu = cache.f(cache.u, p)
+    end
+    cache.abstol = abstol
+    cache.maxiters = maxiters
+    cache.stats.nf = 1
+    cache.stats.nsteps = 1
+    cache.resets = 0
+    cache.iterations_since_reset = 0
+    cache.force_stop = false
+    cache.retcode = ReturnCode.Default
+    return cache
+end
+
+@views function __lbroyden_matvec!(y::AbstractVector, Ux::AbstractVector,
+    U::AbstractMatrix, Vᵀ::AbstractMatrix, x::AbstractVector)
+    # Computes Vᵀ × U × x
+    η = size(U, 1)
+    if η == 0
+        y .= x
+        return nothing
+    end
+    mul!(Ux[1:η], U, x)
+    mul!(y, Vᵀ[:, 1:η], Ux[1:η])
+    return nothing
+end
+
+@views function __lbroyden_matvec(U::AbstractMatrix, Vᵀ::AbstractMatrix, x::AbstractVector)
+    # Computes Vᵀ × U × x
+    size(U, 1) == 0 && return x
+    return Vᵀ * (U * x)
+end
+
+@views function __lbroyden_rmatvec!(y::AbstractVector, xᵀVᵀ::AbstractMatrix,
+    U::AbstractMatrix, Vᵀ::AbstractMatrix, x::AbstractVector)
+    # Computes xᵀ × Vᵀ × U
+    η = size(U, 1)
+    if η == 0
+        y .= x
+        return nothing
+    end
+    mul!(xᵀVᵀ[:, 1:η], x', Vᵀ)
+    mul!(y', xᵀVᵀ[:, 1:η], U)
+    return nothing
+end
+
+@views function __lbroyden_rmatvec(U::AbstractMatrix, Vᵀ::AbstractMatrix, x::AbstractVector)
+    # Computes xᵀ × Vᵀ × U
+    size(U, 1) == 0 && return x
+    return (x' * Vᵀ) * U
+end
diff --git a/src/utils.jl b/src/utils.jl
index 3369e5c01..c99b05bf4 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -222,6 +222,18 @@ function __init_identity_jacobian(u::StaticArray, fu)
         Matrix{eltype(u)}(I, length(fu), length(u)))
 end
 
+function __init_low_rank_jacobian(u::StaticArray, fu, threshold::Int)
+    Vᵀ = convert(MArray{Tuple{length(u), threshold}},
+        zeros(eltype(u), length(u), threshold))
+    U = convert(MArray{Tuple{threshold, length(u)}}, zeros(eltype(u), threshold, length(u)))
+    return U, Vᵀ
+end
+function __init_low_rank_jacobian(u, fu, threshold::Int)
+    Vᵀ = convert(parameterless_type(_mutable(u)), zeros(eltype(u), length(u), threshold))
+    U = convert(parameterless_type(_mutable(u)), zeros(eltype(u), threshold, length(u)))
+    return U, Vᵀ
+end
+
 # Check Singular Matrix
 _issingular(x::Number) = iszero(x)
 @generated function _issingular(x::T) where {T}
diff --git a/test/23_test_problems.jl b/test/23_test_problems.jl
index 25bb8a68a..2bdbecffb 100644
--- a/test/23_test_problems.jl
+++ b/test/23_test_problems.jl
@@ -28,7 +28,6 @@ function test_on_library(problems, dicts, alg_ops, broken_tests, ϵ = 1e-4)
     end
 end
 
-# NewtonRaphson
 @testset "NewtonRaphson 23 Test Problems" begin
     alg_ops = (NewtonRaphson(),)
 
@@ -86,9 +85,9 @@ end
         GeneralBroyden(; linesearch = BackTracking()))
 
     broken_tests = Dict(alg => Int[] for alg in alg_ops)
-    broken_tests[alg_ops[1]] = [1, 3, 4, 5, 6, 8, 11, 12, 13, 14, 21]
-    broken_tests[alg_ops[2]] = [1, 2, 3, 4, 5, 6, 9, 11, 13, 16, 21, 22]
-    broken_tests[alg_ops[3]] = [1, 2, 4, 5, 6, 8, 11, 12, 13, 14, 21]
+    broken_tests[alg_ops[1]] = [1, 3, 4, 5, 6, 11, 12, 13, 14, 21]
+    broken_tests[alg_ops[2]] = [1, 2, 3, 4, 5, 6, 9, 11, 13, 22]
+    broken_tests[alg_ops[3]] = [1, 2, 4, 5, 6, 11, 12, 13, 14, 21]
 
     test_on_library(problems, dicts, alg_ops, broken_tests)
 end
@@ -100,8 +99,10 @@ end
 
     broken_tests = Dict(alg => Int[] for alg in alg_ops)
     broken_tests[alg_ops[1]] = [1, 2, 4, 5, 6, 7, 11, 13, 22]
-    broken_tests[alg_ops[2]] = [1, 2, 4, 5, 6, 7, 11, 13, 22]
-    broken_tests[alg_ops[3]] = [1, 2, 5, 6, 11, 12, 13, 22]
+    broken_tests[alg_ops[2]] = [1, 2, 4, 5, 6, 7, 11, 12, 13, 22]
+    broken_tests[alg_ops[3]] = [1, 2, 4, 5, 6, 8, 11, 12, 13, 22]
 
     test_on_library(problems, dicts, alg_ops, broken_tests)
 end
+
+# NOTE: Not adding LimitedMemoryBroyden here since it fails on most of the preoblems
diff --git a/test/basictests.jl b/test/basictests.jl
index a39d25656..681d506de 100644
--- a/test/basictests.jl
+++ b/test/basictests.jl
@@ -843,3 +843,93 @@ end
     @test nlprob_iterator_interface(quadratic_f, p, Val(false)) ≈ sqrt.(p)
     @test nlprob_iterator_interface(quadratic_f!, p, Val(true)) ≈ sqrt.(p)
 end
+
+# --- LimitedMemoryBroyden tests ---
+
+@testset "LimitedMemoryBroyden" begin
+    function benchmark_nlsolve_oop(f, u0, p = 2.0; linesearch = LineSearch())
+        prob = NonlinearProblem{false}(f, u0, p)
+        return solve(prob, LimitedMemoryBroyden(; linesearch), abstol = 1e-9)
+    end
+
+    function benchmark_nlsolve_iip(f, u0, p = 2.0; linesearch = LineSearch())
+        prob = NonlinearProblem{true}(f, u0, p)
+        return solve(prob, LimitedMemoryBroyden(; linesearch), abstol = 1e-9)
+    end
+
+    @testset "LineSearch: $(_nameof(lsmethod)) LineSearch AD: $(_nameof(ad))" for lsmethod in (Static(),
+            StrongWolfe(), BackTracking(), HagerZhang(), MoreThuente(),
+            LiFukushimaLineSearch()),
+        ad in (AutoFiniteDiff(), AutoZygote())
+
+        linesearch = LineSearch(; method = lsmethod, autodiff = ad)
+        u0s = ([1.0, 1.0], @SVector[1.0, 1.0], 1.0)
+
+        @testset "[OOP] u0: $(typeof(u0))" for u0 in u0s
+            sol = benchmark_nlsolve_oop(quadratic_f, u0; linesearch)
+            @test SciMLBase.successful_retcode(sol)
+            @test all(abs.(sol.u .* sol.u .- 2) .< 1e-9)
+
+            cache = init(NonlinearProblem{false}(quadratic_f, u0, 2.0),
+                LimitedMemoryBroyden(; linesearch), abstol = 1e-9)
+            @test (@ballocated solve!($cache)) < 200
+        end
+
+        @testset "[IIP] u0: $(typeof(u0))" for u0 in ([1.0, 1.0],)
+            ad isa AutoZygote && continue
+            sol = benchmark_nlsolve_iip(quadratic_f!, u0; linesearch)
+            @test SciMLBase.successful_retcode(sol)
+            @test all(abs.(sol.u .* sol.u .- 2) .< 1e-9)
+
+            cache = init(NonlinearProblem{true}(quadratic_f!, u0, 2.0),
+                LimitedMemoryBroyden(; linesearch), abstol = 1e-9)
+            @test (@ballocated solve!($cache)) ≤ 64
+        end
+    end
+
+    @testset "[OOP] [Immutable AD]" begin
+        for p in 1.0:0.1:100.0
+            @test begin
+                res = benchmark_nlsolve_oop(quadratic_f, @SVector[1.0, 1.0], p)
+                res_true = sqrt(p)
+                all(((x, y),) -> isapprox(x, y; atol = 1e-3), zip(res.u, res_true))
+            end
+            @test ForwardDiff.derivative(p -> benchmark_nlsolve_oop(quadratic_f,
+                @SVector[1.0, 1.0], p).u[end], p)≈1 / (2 * sqrt(p)) atol=1e-3
+        end
+    end
+
+    @testset "[OOP] [Scalar AD]" begin
+        for p in 1.0:0.1:100.0
+            @test begin
+                res = benchmark_nlsolve_oop(quadratic_f, 1.0, p)
+                res_true = sqrt(p)
+                res.u ≈ res_true
+            end
+            @test ForwardDiff.derivative(p -> benchmark_nlsolve_oop(quadratic_f, 1.0, p).u,
+                p) ≈ 1 / (2 * sqrt(p))
+        end
+    end
+
+    t = (p) -> [sqrt(p[2] / p[1])]
+    p = [0.9, 50.0]
+    @test benchmark_nlsolve_oop(quadratic_f2, 0.5, p).u ≈ sqrt(p[2] / p[1])
+    @test ForwardDiff.jacobian(p -> [benchmark_nlsolve_oop(quadratic_f2, 0.5, p).u],
+        p) ≈ ForwardDiff.jacobian(t, p)
+
+    # Iterator interface
+    function nlprob_iterator_interface(f, p_range, ::Val{iip}) where {iip}
+        probN = NonlinearProblem{iip}(f, iip ? [0.5] : 0.5, p_range[begin])
+        cache = init(probN, LimitedMemoryBroyden(); maxiters = 100, abstol = 1e-10)
+        sols = zeros(length(p_range))
+        for (i, p) in enumerate(p_range)
+            reinit!(cache, iip ? [cache.u[1]] : cache.u; p = p)
+            sol = solve!(cache)
+            sols[i] = iip ? sol.u[1] : sol.u
+        end
+        return sols
+    end
+    p = range(0.01, 2, length = 200)
+    @test nlprob_iterator_interface(quadratic_f, p, Val(false))≈sqrt.(p) atol=1e-2
+    @test nlprob_iterator_interface(quadratic_f!, p, Val(true))≈sqrt.(p) atol=1e-2
+end
diff --git a/test/matrix_resizing.jl b/test/matrix_resizing.jl
index 9c16fe0fe..1d9462fa1 100644
--- a/test/matrix_resizing.jl
+++ b/test/matrix_resizing.jl
@@ -7,7 +7,8 @@ vecprob = NonlinearProblem(ff, vec(u0), p)
 prob = NonlinearProblem(ff, u0, p)
 
 for alg in (NewtonRaphson(), TrustRegion(), LevenbergMarquardt(), PseudoTransient(),
-    RobustMultiNewton(), FastShortcutNonlinearPolyalg(), GeneralBroyden(), GeneralKlement())
+    RobustMultiNewton(), FastShortcutNonlinearPolyalg(), GeneralBroyden(), GeneralKlement(),
+    LimitedMemoryBroyden())
     @test vec(solve(prob, alg).u) == solve(vecprob, alg).u
 end
 

From e1940d4926628366ccdffe37a9d78b4516f47e3a Mon Sep 17 00:00:00 2001
From: Christopher Rackauckas <accounts@chrisrackauckas.com>
Date: Wed, 25 Oct 2023 16:35:06 +0200
Subject: [PATCH 9/9] Update docs/src/solvers/NonlinearSystemSolvers.md

---
 docs/src/solvers/NonlinearSystemSolvers.md | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/docs/src/solvers/NonlinearSystemSolvers.md b/docs/src/solvers/NonlinearSystemSolvers.md
index 89cde3513..7664bce10 100644
--- a/docs/src/solvers/NonlinearSystemSolvers.md
+++ b/docs/src/solvers/NonlinearSystemSolvers.md
@@ -66,9 +66,11 @@ features, but have a bit of overhead on very small problems.
     with fallbacks to robust methods to allow for solving easy problems quickly without sacrificing
     robustnes on the hard problems.
   - `GeneralBroyden()`: Generalization of Broyden's Quasi-Newton Method with Line Search and
-    Automatic Jacobian Resetting. This is a fast method but unstable for most problems!
+    Automatic Jacobian Resetting. This is a fast method but unstable when the condition number of
+    the Jacobian matrix is sufficiently large.
   - `GeneralKlement()`: Generalization of Klement's Quasi-Newton Method with Line Search and
-    Automatic Jacobian Resetting. This is a fast method but unstable for most problems!
+    Automatic Jacobian Resetting. This is a fast method but unstable when the condition number of
+    the Jacobian matrix is sufficiently large.
 
 ### SimpleNonlinearSolve.jl