From 686c1cb372c71f12ef4edaba4c9a67d4f133ea67 Mon Sep 17 00:00:00 2001
From: Avik Pal <avikpal@mit.edu>
Date: Wed, 10 Jan 2024 09:46:10 -0500
Subject: [PATCH 01/20] Add Line Search to Broyden

---
 lib/SimpleNonlinearSolve/Project.toml         | 10 ++-
 .../src/SimpleNonlinearSolve.jl               |  6 +-
 lib/SimpleNonlinearSolve/src/linesearch.jl    | 77 +++++++++++++++++++
 .../src/nlsolve/broyden.jl                    | 30 +++++++-
 .../src/nlsolve/lbroyden.jl                   | 29 ++++---
 5 files changed, 131 insertions(+), 21 deletions(-)
 create mode 100644 lib/SimpleNonlinearSolve/src/linesearch.jl

diff --git a/lib/SimpleNonlinearSolve/Project.toml b/lib/SimpleNonlinearSolve/Project.toml
index d518e1686..6373e9647 100644
--- a/lib/SimpleNonlinearSolve/Project.toml
+++ b/lib/SimpleNonlinearSolve/Project.toml
@@ -1,13 +1,14 @@
 name = "SimpleNonlinearSolve"
 uuid = "727e6d20-b764-4bd8-a329-72de5adea6c7"
 authors = ["SciML"]
-version = "1.2.1"
+version = "1.3.0"
 
 [deps]
 ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
 ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
 ConcreteStructs = "2569d6c7-a4a2-43d3-a901-331e8e4be471"
 DiffEqBase = "2b5f629d-d688-5b77-993f-72d75c75574e"
+FastClosures = "9aa1b823-49e4-5ca5-8b0f-3971ec8bab6a"
 FiniteDiff = "6a86dc24-6348-571c-b903-95158fe2bd41"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
@@ -17,17 +18,18 @@ Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
 SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462"
 StaticArraysCore = "1e83bf80-4336-4d27-bf5d-d5a4f845583c"
 
-[extensions]
-SimpleNonlinearSolvePolyesterForwardDiffExt = "PolyesterForwardDiff"
-
 [weakdeps]
 PolyesterForwardDiff = "98d1487c-24ca-40b6-b7ab-df2af84e126b"
 
+[extensions]
+SimpleNonlinearSolvePolyesterForwardDiffExt = "PolyesterForwardDiff"
+
 [compat]
 ADTypes = "0.2.6"
 ArrayInterface = "7"
 ConcreteStructs = "0.2"
 DiffEqBase = "6.126"
+FastClosures = "0.3"
 FiniteDiff = "2"
 ForwardDiff = "0.10.3"
 LinearAlgebra = "1.9"
diff --git a/lib/SimpleNonlinearSolve/src/SimpleNonlinearSolve.jl b/lib/SimpleNonlinearSolve/src/SimpleNonlinearSolve.jl
index 356c0b51e..c8f515cc2 100644
--- a/lib/SimpleNonlinearSolve/src/SimpleNonlinearSolve.jl
+++ b/lib/SimpleNonlinearSolve/src/SimpleNonlinearSolve.jl
@@ -3,14 +3,13 @@ module SimpleNonlinearSolve
 import PrecompileTools: @compile_workload, @setup_workload, @recompile_invalidations
 
 @recompile_invalidations begin
-    using ADTypes,
-        ArrayInterface, ConcreteStructs, DiffEqBase, Reexport, LinearAlgebra, SciMLBase
+    using ADTypes, ArrayInterface, ConcreteStructs, DiffEqBase, FastClosures, FiniteDiff,
+        ForwardDiff, Reexport, LinearAlgebra, SciMLBase
 
     import DiffEqBase: AbstractNonlinearTerminationMode,
         AbstractSafeNonlinearTerminationMode, AbstractSafeBestNonlinearTerminationMode,
         NonlinearSafeTerminationReturnCode, get_termination_mode,
         NONLINEARSOLVE_DEFAULT_NORM, _get_tolerance
-    using FiniteDiff, ForwardDiff
     import ForwardDiff: Dual
     import MaybeInplace: @bb, setindex_trait, CanSetindex, CannotSetindex
     import SciMLBase: AbstractNonlinearAlgorithm, build_solution, isinplace
@@ -26,6 +25,7 @@ abstract type AbstractNewtonAlgorithm <: AbstractSimpleNonlinearSolveAlgorithm e
 @inline __is_extension_loaded(::Val) = false
 
 include("utils.jl")
+include("linesearch.jl")
 
 ## Nonlinear Solvers
 include("nlsolve/raphson.jl")
diff --git a/lib/SimpleNonlinearSolve/src/linesearch.jl b/lib/SimpleNonlinearSolve/src/linesearch.jl
new file mode 100644
index 000000000..fdab57bfd
--- /dev/null
+++ b/lib/SimpleNonlinearSolve/src/linesearch.jl
@@ -0,0 +1,77 @@
+# This is a copy of the version in NonlinearSolve.jl. Temporarily kept here till we move
+# line searches into a dedicated package. Renamed to `__` to avoid conflicts.
+@kwdef @concrete struct __LiFukushimaLineSearch
+    lambda_0 = 1
+    beta = 1 // 2
+    sigma_1 = 1 // 1000
+    sigma_2 = 1 // 1000
+    eta = 1 // 10
+    rho = 9 // 10
+    nan_maxiters::Int = 5
+    maxiters::Int = 100
+end
+
+@concrete mutable struct __LiFukushimaLineSearchCache
+    ϕ
+    λ₀
+    β
+    σ₁
+    σ₂
+    η
+    ρ
+    α
+    nan_maxiters::Int
+    maxiters::Int
+end
+
+function (alg::__LiFukushimaLineSearch)(prob, fu, u)
+    @bb u_cache = similar(u)
+    @bb fu_cache = similar(fu)
+    T = promote_type(eltype(fu), eltype(u))
+
+    ϕ = @closure (u, δu, α) -> begin
+        @bb @. u_cache = u + α * δu
+        return norm(__eval_f(prob, fu_cache, u_cache), 2)
+    end
+
+    return __LiFukushimaLineSearchCache(ϕ, T(alg.lambda_0), T(alg.beta), T(alg.sigma_1),
+        T(alg.sigma_2), T(alg.eta), T(alg.rho), T(true), alg.nan_maxiters, alg.maxiters)
+end
+
+function (cache::__LiFukushimaLineSearchCache)(u, δu)
+    T = promote_type(eltype(u), eltype(δu))
+    ϕ = @closure α -> cache.ϕ(u, δu, α)
+
+    fx_norm = ϕ(T(0))
+
+    # Non-Blocking exit if the norm is NaN or Inf
+    !isfinite(fx_norm) && return cache.α
+
+    # Early Terminate based on Eq. 2.7
+    du_norm = norm(δu, 2)
+    fxλ_norm = ϕ(cache.α)
+    fxλ_norm ≤ cache.ρ * fx_norm - cache.σ₂ * du_norm^2 && return cache.α
+
+    λ₂, λ₁ = cache.λ₀, cache.λ₀
+    fxλp_norm = ϕ(λ₂)
+
+    if !isfinite(fxλp_norm)
+        nan_converged = false
+        for _ in 1:(cache.nan_maxiters)
+            λ₁, λ₂ = λ₂, cache.β * λ₂
+            fxλp_norm = ϕ(λ₂)
+            nan_converged = isfinite(fxλp_norm)
+            nan_converged && break
+        end
+        nan_converged || return cache.α
+    end
+
+    for i in 1:(cache.maxiters)
+        fxλp_norm = ϕ(λ₂)
+        converged = fxλp_norm ≤ (1 + cache.η) * fx_norm - cache.σ₁ * λ₂^2 * du_norm^2
+        converged && return λ₂
+        λ₁, λ₂ = λ₂, cache.β * λ₂
+    end
+
+    return cache.α
+end
diff --git a/lib/SimpleNonlinearSolve/src/nlsolve/broyden.jl b/lib/SimpleNonlinearSolve/src/nlsolve/broyden.jl
index 5f2dccdc7..82312a01d 100644
--- a/lib/SimpleNonlinearSolve/src/nlsolve/broyden.jl
+++ b/lib/SimpleNonlinearSolve/src/nlsolve/broyden.jl
@@ -1,10 +1,26 @@
 """
-    SimpleBroyden()
+    SimpleBroyden(; linesearch = Val(false))
 
 A low-overhead implementation of Broyden. This method is non-allocating on scalar
 and static array problems.
+
+If `linesearch` is `Val(true)`, then we use the `LiFukushimaLineSearch` [1] line search else
+no line search is used. For advanced customization of the line search, use the
+`Broyden` algorithm in `NonlinearSolve.jl`.
+
+### References
+
+[1] Li, Dong-Hui, and Masao Fukushima. "A derivative-free line search and global convergence
+of Broyden-like method for nonlinear equations." Optimization methods and software 13.3
+(2000): 181-201.
 """
-struct SimpleBroyden <: AbstractSimpleNonlinearSolveAlgorithm end
+struct SimpleBroyden{linesearch} <: AbstractSimpleNonlinearSolveAlgorithm end
+
+function SimpleBroyden(; linesearch = Val(false))
+    SimpleBroyden{SciMLBase._unwrap_val(linesearch)}()
+end
+
+__get_linesearch(::SimpleBroyden{LS}) where {LS} = Val(LS)
 
 function SciMLBase.__solve(prob::NonlinearProblem, alg::SimpleBroyden, args...;
         abstol = nothing, reltol = nothing, maxiters = 1000, alias_u0 = false,
@@ -26,9 +42,16 @@ function SciMLBase.__solve(prob::NonlinearProblem, alg::SimpleBroyden, args...;
     abstol, reltol, tc_cache = init_termination_cache(abstol, reltol, fx, x,
         termination_condition)
 
+    ls_cache = __get_linesearch(alg) === Val(true) ?
+               __LiFukushimaLineSearch()(prob, fx, x) : nothing
+
     for _ in 1:maxiters
         @bb δx = J⁻¹ × vec(fprev)
-        @bb @. x = xo - δx
+        @bb δx .*= -1
+
+        α = ls_cache === nothing ? true : ls_cache(x, δx)
+
+        @bb @. x = xo + α * δx
         fx = __eval_f(prob, fx, x)
         @bb @. δf = fx - fprev
 
@@ -37,7 +60,6 @@ function SciMLBase.__solve(prob::NonlinearProblem, alg::SimpleBroyden, args...;
         tc_sol !== nothing && return tc_sol
 
         @bb J⁻¹δf = J⁻¹ × vec(δf)
-        @bb δx .*= -1
         d = dot(δx, J⁻¹δf)
         @bb xᵀJ⁻¹ = transpose(J⁻¹) × vec(δx)
 
diff --git a/lib/SimpleNonlinearSolve/src/nlsolve/lbroyden.jl b/lib/SimpleNonlinearSolve/src/nlsolve/lbroyden.jl
index 3a0c11ded..d949fcc8b 100644
--- a/lib/SimpleNonlinearSolve/src/nlsolve/lbroyden.jl
+++ b/lib/SimpleNonlinearSolve/src/nlsolve/lbroyden.jl
@@ -1,6 +1,6 @@
 """
-    SimpleLimitedMemoryBroyden(; threshold::Int = 27)
-    SimpleLimitedMemoryBroyden(; threshold::Val = Val(27))
+    SimpleLimitedMemoryBroyden(; threshold::Int = 27, linesearch = Val(true))
+    SimpleLimitedMemoryBroyden(; threshold::Val = Val(27), linesearch = Val(true))
 
 A limited memory implementation of Broyden. This method applies the L-BFGS scheme to
 Broyden's method. This Alogrithm unfortunately cannot non-allocating for StaticArrays
@@ -8,17 +8,26 @@ without compromising on the "simple" aspect.
 
 If the threshold is larger than the problem size, then this method will use `SimpleBroyden`.
 
-!!! warning
+If `linesearch` is `Val(true)`, then we use the `LiFukushimaLineSearch` [1] line search else
+no line search is used. For advanced customization of the line search, use the
+`LimitedMemoryBroyden` algorithm in `NonlinearSolve.jl`.
 
-    This method is not very stable and can diverge even for very simple problems. This has
-    mostly been tested for neural networks in DeepEquilibriumNetworks.jl.
+### References
+
+[1] Li, Dong-Hui, and Masao Fukushima. "A derivative-free line search and global convergence
+of Broyden-like method for nonlinear equations." Optimization methods and software 13.3
+(2000): 181-201.
 """
-struct SimpleLimitedMemoryBroyden{threshold} <: AbstractSimpleNonlinearSolveAlgorithm end
+struct SimpleLimitedMemoryBroyden{threshold, linesearch} <:
+       AbstractSimpleNonlinearSolveAlgorithm end
 
 __get_threshold(::SimpleLimitedMemoryBroyden{threshold}) where {threshold} = Val(threshold)
+__use_linesearch(::SimpleLimitedMemoryBroyden{Th, LS}) where {Th, LS} = Val(LS)
 
-function SimpleLimitedMemoryBroyden(; threshold::Union{Val, Int} = Val(27))
-    return SimpleLimitedMemoryBroyden{SciMLBase._unwrap_val(threshold)}()
+function SimpleLimitedMemoryBroyden(; threshold::Union{Val, Int} = Val(27),
+        linesearch = Val(true))
+    return SimpleLimitedMemoryBroyden{SciMLBase._unwrap_val(threshold),
+        SciMLBase._unwrap_val(linesearch)}()
 end
 
 function SciMLBase.__solve(prob::NonlinearProblem, alg::SimpleLimitedMemoryBroyden,
@@ -45,8 +54,8 @@ end
 
     # For scalar problems / if the threshold is larger than problem size just use Broyden
     if x isa Number || length(x) ≤ η
-        return SciMLBase.__solve(prob, SimpleBroyden(), args...;
-            abstol, reltol, maxiters, termination_condition, kwargs...)
+        return SciMLBase.__solve(prob, SimpleBroyden(; linesearch = __use_linesearch(alg)),
+            args...; abstol, reltol, maxiters, termination_condition, kwargs...)
     end
 
     fx = _get_fx(prob, x)

From e822edd1287f89424bdd59a63741a3a38e5ba9d6 Mon Sep 17 00:00:00 2001
From: Avik Pal <avikpal@mit.edu>
Date: Wed, 10 Jan 2024 10:24:01 -0500
Subject: [PATCH 02/20] Add Line Search to LBroyden

---
 lib/SimpleNonlinearSolve/src/nlsolve/broyden.jl  | 1 -
 lib/SimpleNonlinearSolve/src/nlsolve/lbroyden.jl | 6 +++++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/lib/SimpleNonlinearSolve/src/nlsolve/broyden.jl b/lib/SimpleNonlinearSolve/src/nlsolve/broyden.jl
index 82312a01d..cc505339f 100644
--- a/lib/SimpleNonlinearSolve/src/nlsolve/broyden.jl
+++ b/lib/SimpleNonlinearSolve/src/nlsolve/broyden.jl
@@ -50,7 +50,6 @@ function SciMLBase.__solve(prob::NonlinearProblem, alg::SimpleBroyden, args...;
         @bb δx .*= -1
 
         α = ls_cache === nothing ? true : ls_cache(x, δx)
-
         @bb @. x = xo + α * δx
         fx = __eval_f(prob, fx, x)
         @bb @. δf = fx - fprev
diff --git a/lib/SimpleNonlinearSolve/src/nlsolve/lbroyden.jl b/lib/SimpleNonlinearSolve/src/nlsolve/lbroyden.jl
index d949fcc8b..ccd77d38e 100644
--- a/lib/SimpleNonlinearSolve/src/nlsolve/lbroyden.jl
+++ b/lib/SimpleNonlinearSolve/src/nlsolve/lbroyden.jl
@@ -75,8 +75,12 @@ end
     Tcache = __lbroyden_threshold_cache(x, x isa StaticArray ? threshold : Val(η))
     @bb mat_cache = copy(x)
 
+    ls_cache = __get_linesearch(alg) === Val(true) ?
+               __LiFukushimaLineSearch()(prob, fx, x) : nothing
+
     for i in 1:maxiters
-        @bb @. x = xo + δx
+        α = ls_cache === nothing ? true : ls_cache(x, δx)
+        @bb @. x = xo + α * δx
         fx = __eval_f(prob, fx, x)
         @bb @. δf = fx - fo
 

From ee1b66d9f7b45eda729f20e8d64005eaae5dd3ca Mon Sep 17 00:00:00 2001
From: Vaibhav Kumar Dixit <vaibhavyashdixit@gmail.com>
Date: Wed, 10 Jan 2024 14:28:32 -0500
Subject: [PATCH 03/20] Update src/nlsolve/lbroyden.jl

---
 lib/SimpleNonlinearSolve/src/nlsolve/lbroyden.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/SimpleNonlinearSolve/src/nlsolve/lbroyden.jl b/lib/SimpleNonlinearSolve/src/nlsolve/lbroyden.jl
index ccd77d38e..fa3b9d3cd 100644
--- a/lib/SimpleNonlinearSolve/src/nlsolve/lbroyden.jl
+++ b/lib/SimpleNonlinearSolve/src/nlsolve/lbroyden.jl
@@ -75,7 +75,7 @@ end
     Tcache = __lbroyden_threshold_cache(x, x isa StaticArray ? threshold : Val(η))
     @bb mat_cache = copy(x)
 
-    ls_cache = __get_linesearch(alg) === Val(true) ?
+    ls_cache = __use_linesearch(alg) === Val(true) ?
                __LiFukushimaLineSearch()(prob, fx, x) : nothing
 
     for i in 1:maxiters

From 7648d58cec5b39ffc9424b49098148135a4955c6 Mon Sep 17 00:00:00 2001
From: Vaibhav Dixit <vaibhavyashdixit@gmail.com>
Date: Fri, 12 Jan 2024 12:29:57 -0500
Subject: [PATCH 04/20] temp diable termination condition

---
 lib/SimpleNonlinearSolve/src/nlsolve/broyden.jl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/SimpleNonlinearSolve/src/nlsolve/broyden.jl b/lib/SimpleNonlinearSolve/src/nlsolve/broyden.jl
index cc505339f..fa0b17359 100644
--- a/lib/SimpleNonlinearSolve/src/nlsolve/broyden.jl
+++ b/lib/SimpleNonlinearSolve/src/nlsolve/broyden.jl
@@ -39,8 +39,8 @@ function SciMLBase.__solve(prob::NonlinearProblem, alg::SimpleBroyden, args...;
     @bb δJ⁻¹n = copy(x)
     @bb δJ⁻¹ = copy(J⁻¹)
 
-    abstol, reltol, tc_cache = init_termination_cache(abstol, reltol, fx, x,
-        termination_condition)
+    # abstol, reltol, tc_cache = init_termination_cache(abstol, reltol, fx, x,
+    #     termination_condition)
 
     ls_cache = __get_linesearch(alg) === Val(true) ?
                __LiFukushimaLineSearch()(prob, fx, x) : nothing
@@ -55,8 +55,8 @@ function SciMLBase.__solve(prob::NonlinearProblem, alg::SimpleBroyden, args...;
         @bb @. δf = fx - fprev
 
         # Termination Checks
-        tc_sol = check_termination(tc_cache, fx, x, xo, prob, alg)
-        tc_sol !== nothing && return tc_sol
+        # tc_sol = check_termination(tc_cache, fx, x, xo, prob, alg)
+        # tc_sol !== nothing && return tc_sol
 
         @bb J⁻¹δf = J⁻¹ × vec(δf)
         d = dot(δx, J⁻¹δf)

From 20296e803f8df726172c139f260b05ebc468189a Mon Sep 17 00:00:00 2001
From: Vaibhav Dixit <vaibhavyashdixit@gmail.com>
Date: Fri, 12 Jan 2024 13:48:05 -0500
Subject: [PATCH 05/20] isfinite not comiling

---
 lib/SimpleNonlinearSolve/src/linesearch.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/SimpleNonlinearSolve/src/linesearch.jl b/lib/SimpleNonlinearSolve/src/linesearch.jl
index fdab57bfd..8a93410bd 100644
--- a/lib/SimpleNonlinearSolve/src/linesearch.jl
+++ b/lib/SimpleNonlinearSolve/src/linesearch.jl
@@ -45,7 +45,7 @@ function (cache::__LiFukushimaLineSearchCache)(u, δu)
     fx_norm = ϕ(T(0))
 
     # Non-Blocking exit if the norm is NaN or Inf
-    !isfinite(fx_norm) && return cache.α
+    (fx_norm == Inf || fx_norm == NaN) && return cache.α
 
     # Early Terminate based on Eq. 2.7
     du_norm = norm(δu, 2)

From c222e6412ff8a186f459d6695a64036a43887bc9 Mon Sep 17 00:00:00 2001
From: Vaibhav Dixit <vaibhavyashdixit@gmail.com>
Date: Fri, 12 Jan 2024 14:10:06 -0500
Subject: [PATCH 06/20] use DiffEqBase definitions of norm etc

---
 lib/SimpleNonlinearSolve/src/linesearch.jl | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/lib/SimpleNonlinearSolve/src/linesearch.jl b/lib/SimpleNonlinearSolve/src/linesearch.jl
index 8a93410bd..4d2ec4aa5 100644
--- a/lib/SimpleNonlinearSolve/src/linesearch.jl
+++ b/lib/SimpleNonlinearSolve/src/linesearch.jl
@@ -2,11 +2,11 @@
 # line searches into a dedicated package. Renamed to `__` to avoid conflicts.
 @kwdef @concrete struct __LiFukushimaLineSearch
     lambda_0 = 1
-    beta = 1 // 2
-    sigma_1 = 1 // 1000
-    sigma_2 = 1 // 1000
-    eta = 1 // 10
-    rho = 9 // 10
+    beta = 1.0 / 2.0
+    sigma_1 = 1.0 / 1000.0
+    sigma_2 = 1.0 / 1000.0
+    eta = 1.0 / 10.0
+    rho = 9.0 / 10.0
     nan_maxiters::Int = 5
     maxiters::Int = 100
 end
@@ -31,7 +31,7 @@ function (alg::__LiFukushimaLineSearch)(prob, fu, u)
 
     ϕ = @closure (u, δu, α) -> begin
         @bb @. u_cache = u + α * δu
-        return norm(__eval_f(prob, fu_cache, u_cache), 2)
+        return NONLINEARSOLVE_DEFAULT_NORM(__eval_f(prob, fu_cache, u_cache))
     end
 
     return __LiFukushimaLineSearchCache(ϕ, T(alg.lambda_0), T(alg.beta), T(alg.sigma_1),
@@ -42,25 +42,25 @@ function (cache::__LiFukushimaLineSearchCache)(u, δu)
     T = promote_type(eltype(u), eltype(δu))
     ϕ = @closure α -> cache.ϕ(u, δu, α)
 
-    fx_norm = ϕ(T(0))
+    fx_norm::T = ϕ(T(0))
 
     # Non-Blocking exit if the norm is NaN or Inf
-    (fx_norm == Inf || fx_norm == NaN) && return cache.α
+    DiffEqBase.NAN_CHECK(fx_norm) && return cache.α
 
     # Early Terminate based on Eq. 2.7
-    du_norm = norm(δu, 2)
+    du_norm = NONLINEARSOLVE_DEFAULT_NORM(δu)
     fxλ_norm = ϕ(cache.α)
     fxλ_norm ≤ cache.ρ * fx_norm - cache.σ₂ * du_norm^2 && return cache.α
 
     λ₂, λ₁ = cache.λ₀, cache.λ₀
     fxλp_norm = ϕ(λ₂)
 
-    if !isfinite(fxλp_norm)
+    if DiffEqBase.NAN_CHECK(fxλp_norm)
         nan_converged = false
         for _ in 1:(cache.nan_maxiters)
             λ₁, λ₂ = λ₂, cache.β * λ₂
             fxλp_norm = ϕ(λ₂)
-            nan_converged = isfinite(fxλp_norm)
+            nan_converged = DiffEqBase.NAN_CHECK(fxλp_norm)
             nan_converged && break
         end
         nan_converged || return cache.α

From 705ac0612dbc5da1982882900c22ee3a5914d0f0 Mon Sep 17 00:00:00 2001
From: Vaibhav Dixit <vaibhavyashdixit@gmail.com>
Date: Fri, 12 Jan 2024 15:24:05 -0500
Subject: [PATCH 07/20] T(phi)

---
 lib/SimpleNonlinearSolve/src/linesearch.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/SimpleNonlinearSolve/src/linesearch.jl b/lib/SimpleNonlinearSolve/src/linesearch.jl
index 4d2ec4aa5..b93bd21de 100644
--- a/lib/SimpleNonlinearSolve/src/linesearch.jl
+++ b/lib/SimpleNonlinearSolve/src/linesearch.jl
@@ -42,7 +42,7 @@ function (cache::__LiFukushimaLineSearchCache)(u, δu)
     T = promote_type(eltype(u), eltype(δu))
     ϕ = @closure α -> cache.ϕ(u, δu, α)
 
-    fx_norm::T = ϕ(T(0))
+    fx_norm = ϕ(T(0))::T
 
     # Non-Blocking exit if the norm is NaN or Inf
     DiffEqBase.NAN_CHECK(fx_norm) && return cache.α

From 5a92d0f807c97c3734297cffe887830b42adf3d4 Mon Sep 17 00:00:00 2001
From: Vaibhav Dixit <vaibhavyashdixit@gmail.com>
Date: Fri, 12 Jan 2024 16:31:05 -0500
Subject: [PATCH 08/20] Typer assertions and simplify function eval all around

---
 lib/SimpleNonlinearSolve/src/linesearch.jl | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/lib/SimpleNonlinearSolve/src/linesearch.jl b/lib/SimpleNonlinearSolve/src/linesearch.jl
index b93bd21de..82ece8381 100644
--- a/lib/SimpleNonlinearSolve/src/linesearch.jl
+++ b/lib/SimpleNonlinearSolve/src/linesearch.jl
@@ -25,13 +25,11 @@ end
 end
 
 function (alg::__LiFukushimaLineSearch)(prob, fu, u)
-    @bb u_cache = similar(u)
-    @bb fu_cache = similar(fu)
     T = promote_type(eltype(fu), eltype(u))
 
     ϕ = @closure (u, δu, α) -> begin
-        @bb @. u_cache = u + α * δu
-        return NONLINEARSOLVE_DEFAULT_NORM(__eval_f(prob, fu_cache, u_cache))
+        u_cache = @. u + α * δu
+        return NONLINEARSOLVE_DEFAULT_NORM(prob.f(u_cache, prob.p))
     end
 
     return __LiFukushimaLineSearchCache(ϕ, T(alg.lambda_0), T(alg.beta), T(alg.sigma_1),
@@ -45,22 +43,22 @@ function (cache::__LiFukushimaLineSearchCache)(u, δu)
     fx_norm = ϕ(T(0))::T
 
     # Non-Blocking exit if the norm is NaN or Inf
-    DiffEqBase.NAN_CHECK(fx_norm) && return cache.α
+    DiffEqBase.NAN_CHECK(fx_norm)::Bool && return cache.α
 
     # Early Terminate based on Eq. 2.7
-    du_norm = NONLINEARSOLVE_DEFAULT_NORM(δu)
-    fxλ_norm = ϕ(cache.α)
+    du_norm = NONLINEARSOLVE_DEFAULT_NORM(δu)::T
+    fxλ_norm = ϕ(cache.α)::T
     fxλ_norm ≤ cache.ρ * fx_norm - cache.σ₂ * du_norm^2 && return cache.α
 
     λ₂, λ₁ = cache.λ₀, cache.λ₀
-    fxλp_norm = ϕ(λ₂)
+    fxλp_norm = ϕ(λ₂)::T
 
-    if DiffEqBase.NAN_CHECK(fxλp_norm)
+    if DiffEqBase.NAN_CHECK(fxλp_norm)::Bool
         nan_converged = false
         for _ in 1:(cache.nan_maxiters)
             λ₁, λ₂ = λ₂, cache.β * λ₂
             fxλp_norm = ϕ(λ₂)
-            nan_converged = DiffEqBase.NAN_CHECK(fxλp_norm)
+            nan_converged = DiffEqBase.NAN_CHECK(fxλp_norm)::Bool
             nan_converged && break
         end
         nan_converged || return cache.α

From f14726deb7624c1765425ef374b093b98ba134e0 Mon Sep 17 00:00:00 2001
From: Vaibhav Dixit <vaibhavyashdixit@gmail.com>
Date: Fri, 12 Jan 2024 16:53:29 -0500
Subject: [PATCH 09/20] Bring back bb

---
 lib/SimpleNonlinearSolve/src/linesearch.jl | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/lib/SimpleNonlinearSolve/src/linesearch.jl b/lib/SimpleNonlinearSolve/src/linesearch.jl
index 82ece8381..1b21974b9 100644
--- a/lib/SimpleNonlinearSolve/src/linesearch.jl
+++ b/lib/SimpleNonlinearSolve/src/linesearch.jl
@@ -25,11 +25,13 @@ end
 end
 
 function (alg::__LiFukushimaLineSearch)(prob, fu, u)
+    @bb u_cache = similar(u)
+    @bb fu_cache = similar(fu)
     T = promote_type(eltype(fu), eltype(u))
 
     ϕ = @closure (u, δu, α) -> begin
         u_cache = @. u + α * δu
-        return NONLINEARSOLVE_DEFAULT_NORM(prob.f(u_cache, prob.p))
+        return NONLINEARSOLVE_DEFAULT_NORM(__eval_f(prob, fu_cache, u_cache))
     end
 
     return __LiFukushimaLineSearchCache(ϕ, T(alg.lambda_0), T(alg.beta), T(alg.sigma_1),

From 8a5526a796622cb5526f361da6b135a8d70b005f Mon Sep 17 00:00:00 2001
From: Avik Pal <avikpal@mit.edu>
Date: Sun, 14 Jan 2024 00:40:25 -0500
Subject: [PATCH 10/20] Broyden line search works inside kernels

---
 .../src/SimpleNonlinearSolve.jl               |   2 +-
 lib/SimpleNonlinearSolve/src/linesearch.jl    | 109 +++++++++++++-----
 .../src/nlsolve/broyden.jl                    |  14 +--
 .../src/nlsolve/dfsane.jl                     |   2 +-
 .../src/nlsolve/lbroyden.jl                   |  17 ++-
 5 files changed, 96 insertions(+), 48 deletions(-)

diff --git a/lib/SimpleNonlinearSolve/src/SimpleNonlinearSolve.jl b/lib/SimpleNonlinearSolve/src/SimpleNonlinearSolve.jl
index c8f515cc2..c6dd6291f 100644
--- a/lib/SimpleNonlinearSolve/src/SimpleNonlinearSolve.jl
+++ b/lib/SimpleNonlinearSolve/src/SimpleNonlinearSolve.jl
@@ -12,7 +12,7 @@ import PrecompileTools: @compile_workload, @setup_workload, @recompile_invalidat
         NONLINEARSOLVE_DEFAULT_NORM, _get_tolerance
     import ForwardDiff: Dual
     import MaybeInplace: @bb, setindex_trait, CanSetindex, CannotSetindex
-    import SciMLBase: AbstractNonlinearAlgorithm, build_solution, isinplace
+    import SciMLBase: AbstractNonlinearAlgorithm, build_solution, isinplace, _unwrap_val
     import StaticArraysCore: StaticArray, SVector, SMatrix, SArray, MArray, MMatrix, Size
 end
 
diff --git a/lib/SimpleNonlinearSolve/src/linesearch.jl b/lib/SimpleNonlinearSolve/src/linesearch.jl
index 1b21974b9..e5d2bb93c 100644
--- a/lib/SimpleNonlinearSolve/src/linesearch.jl
+++ b/lib/SimpleNonlinearSolve/src/linesearch.jl
@@ -1,17 +1,17 @@
 # This is a copy of the version in NonlinearSolve.jl. Temporarily kept here till we move
-# line searches into a dedicated package. Renamed to `__` to avoid conflicts.
-@kwdef @concrete struct __LiFukushimaLineSearch
+# line searches into a dedicated package.
+@kwdef @concrete struct LiFukushimaLineSearch
     lambda_0 = 1
-    beta = 1.0 / 2.0
-    sigma_1 = 1.0 / 1000.0
-    sigma_2 = 1.0 / 1000.0
-    eta = 1.0 / 10.0
-    rho = 9.0 / 10.0
-    nan_maxiters::Int = 5
+    beta = 0.5
+    sigma_1 = 0.001
+    sigma_2 = 0.001
+    eta = 0.1
+    rho = 0.1
+    nan_maxiters = missing
     maxiters::Int = 100
 end
 
-@concrete mutable struct __LiFukushimaLineSearchCache
+@concrete mutable struct LiFukushimaLineSearchCache{T <: Union{Nothing, Int}}
     ϕ
     λ₀
     β
@@ -20,11 +20,31 @@ end
     η
     ρ
     α
-    nan_maxiters::Int
+    nan_maxiters::T
     maxiters::Int
 end
 
-function (alg::__LiFukushimaLineSearch)(prob, fu, u)
+@concrete struct StaticLiFukushimaLineSearchCache
+    f
+    p
+    λ₀
+    β
+    σ₁
+    σ₂
+    η
+    ρ
+    maxiters::Int
+end
+
+(alg::LiFukushimaLineSearch)(prob, fu, u) = __generic_init(alg, prob, fu, u)
+function (alg::LiFukushimaLineSearch)(prob, fu::SArray, u::SArray)
+    (alg.nan_maxiters === missing || alg.nan_maxiters === nothing) &&
+        return __static_init(alg, prob, fu, u)
+    @warn "`LiFukushimaLineSearch` with NaN checking is not non-allocating" maxlog=1
+    return __generic_init(alg, prob, fu, u)
+end
+
+function __generic_init(alg::LiFukushimaLineSearch, prob, fu, u)
     @bb u_cache = similar(u)
     @bb fu_cache = similar(fu)
     T = promote_type(eltype(fu), eltype(u))
@@ -34,36 +54,45 @@ function (alg::__LiFukushimaLineSearch)(prob, fu, u)
         return NONLINEARSOLVE_DEFAULT_NORM(__eval_f(prob, fu_cache, u_cache))
     end
 
-    return __LiFukushimaLineSearchCache(ϕ, T(alg.lambda_0), T(alg.beta), T(alg.sigma_1),
-        T(alg.sigma_2), T(alg.eta), T(alg.rho), T(true), alg.nan_maxiters, alg.maxiters)
+    nan_maxiters = ifelse(alg.nan_maxiters === missing, 5, alg.nan_maxiters)
+
+    return LiFukushimaLineSearchCache(ϕ, T(alg.lambda_0), T(alg.beta), T(alg.sigma_1),
+        T(alg.sigma_2), T(alg.eta), T(alg.rho), T(true), nan_maxiters, alg.maxiters)
 end
 
-function (cache::__LiFukushimaLineSearchCache)(u, δu)
+function __static_init(alg::LiFukushimaLineSearch, prob, fu, u)
+    T = promote_type(eltype(fu), eltype(u))
+    return StaticLiFukushimaLineSearchCache(prob.f, prob.p, T(alg.lambda_0), T(alg.beta),
+        T(alg.sigma_1), T(alg.sigma_2), T(alg.eta), T(alg.rho), alg.maxiters)
+end
+
+function (cache::LiFukushimaLineSearchCache)(u, δu)
     T = promote_type(eltype(u), eltype(δu))
     ϕ = @closure α -> cache.ϕ(u, δu, α)
-
-    fx_norm = ϕ(T(0))::T
+    fx_norm = ϕ(T(0))
 
     # Non-Blocking exit if the norm is NaN or Inf
-    DiffEqBase.NAN_CHECK(fx_norm)::Bool && return cache.α
+    DiffEqBase.NAN_CHECK(fx_norm) && return cache.α
 
     # Early Terminate based on Eq. 2.7
-    du_norm = NONLINEARSOLVE_DEFAULT_NORM(δu)::T
-    fxλ_norm = ϕ(cache.α)::T
+    du_norm = NONLINEARSOLVE_DEFAULT_NORM(δu)
+    fxλ_norm = ϕ(cache.α)
     fxλ_norm ≤ cache.ρ * fx_norm - cache.σ₂ * du_norm^2 && return cache.α
 
     λ₂, λ₁ = cache.λ₀, cache.λ₀
-    fxλp_norm = ϕ(λ₂)::T
-
-    if DiffEqBase.NAN_CHECK(fxλp_norm)::Bool
-        nan_converged = false
-        for _ in 1:(cache.nan_maxiters)
-            λ₁, λ₂ = λ₂, cache.β * λ₂
-            fxλp_norm = ϕ(λ₂)
-            nan_converged = DiffEqBase.NAN_CHECK(fxλp_norm)::Bool
-            nan_converged && break
+    fxλp_norm = ϕ(λ₂)
+
+    if cache.nan_maxiters !== nothing
+        if DiffEqBase.NAN_CHECK(fxλp_norm)
+            nan_converged = false
+            for _ in 1:(cache.nan_maxiters)
+                λ₁, λ₂ = λ₂, cache.β * λ₂
+                fxλp_norm = ϕ(λ₂)
+                nan_converged = DiffEqBase.NAN_CHECK(fxλp_norm)::Bool
+                nan_converged && break
+            end
+            nan_converged || return cache.α
         end
-        nan_converged || return cache.α
     end
 
     for i in 1:(cache.maxiters)
@@ -75,3 +104,25 @@ function (cache::__LiFukushimaLineSearchCache)(u, δu)
 
     return cache.α
 end
+
+function (cache::StaticLiFukushimaLineSearchCache)(u, δu)
+    T = promote_type(eltype(u), eltype(δu))
+
+    # Early Terminate based on Eq. 2.7
+    fx_norm = NONLINEARSOLVE_DEFAULT_NORM(cache.f(u, cache.p))
+    du_norm = NONLINEARSOLVE_DEFAULT_NORM(δu)
+    fxλ_norm = NONLINEARSOLVE_DEFAULT_NORM(cache.f(u .+ cache.λ₀ .* δu, cache.p))
+    fxλ_norm ≤ cache.ρ * fx_norm - cache.σ₂ * du_norm^2 && return T(true)
+
+    λ₂, λ₁ = cache.λ₀, cache.λ₀
+    fxλp_norm = NONLINEARSOLVE_DEFAULT_NORM(cache.f(u .+ λ₂ .* δu, cache.p))
+
+    for i in 1:(cache.maxiters)
+        fxλp_norm = NONLINEARSOLVE_DEFAULT_NORM(cache.f(u .+ λ₂ .* δu, cache.p))
+        converged = fxλp_norm ≤ (1 + cache.η) * fx_norm - cache.σ₁ * λ₂^2 * du_norm^2
+        converged && return λ₂
+        λ₁, λ₂ = λ₂, cache.β * λ₂
+    end
+
+    return T(true)
+end
diff --git a/lib/SimpleNonlinearSolve/src/nlsolve/broyden.jl b/lib/SimpleNonlinearSolve/src/nlsolve/broyden.jl
index fa0b17359..8eebd9518 100644
--- a/lib/SimpleNonlinearSolve/src/nlsolve/broyden.jl
+++ b/lib/SimpleNonlinearSolve/src/nlsolve/broyden.jl
@@ -16,9 +16,7 @@ of Broyden-like method for nonlinear equations." Optimization methods and softwa
 """
 struct SimpleBroyden{linesearch} <: AbstractSimpleNonlinearSolveAlgorithm end
 
-function SimpleBroyden(; linesearch = Val(false))
-    SimpleBroyden{SciMLBase._unwrap_val(linesearch)}()
-end
+SimpleBroyden(; linesearch = Val(false)) = SimpleBroyden{_unwrap_val(linesearch)}()
 
 __get_linesearch(::SimpleBroyden{LS}) where {LS} = Val(LS)
 
@@ -39,11 +37,11 @@ function SciMLBase.__solve(prob::NonlinearProblem, alg::SimpleBroyden, args...;
     @bb δJ⁻¹n = copy(x)
     @bb δJ⁻¹ = copy(J⁻¹)
 
-    # abstol, reltol, tc_cache = init_termination_cache(abstol, reltol, fx, x,
-    #     termination_condition)
+    abstol, reltol, tc_cache = init_termination_cache(abstol, reltol, fx, x,
+        termination_condition)
 
     ls_cache = __get_linesearch(alg) === Val(true) ?
-               __LiFukushimaLineSearch()(prob, fx, x) : nothing
+               LiFukushimaLineSearch()(prob, fx, x) : nothing
 
     for _ in 1:maxiters
         @bb δx = J⁻¹ × vec(fprev)
@@ -55,8 +53,8 @@ function SciMLBase.__solve(prob::NonlinearProblem, alg::SimpleBroyden, args...;
         @bb @. δf = fx - fprev
 
         # Termination Checks
-        # tc_sol = check_termination(tc_cache, fx, x, xo, prob, alg)
-        # tc_sol !== nothing && return tc_sol
+        tc_sol = check_termination(tc_cache, fx, x, xo, prob, alg)
+        tc_sol !== nothing && return tc_sol
 
         @bb J⁻¹δf = J⁻¹ × vec(δf)
         d = dot(δx, J⁻¹δf)
diff --git a/lib/SimpleNonlinearSolve/src/nlsolve/dfsane.jl b/lib/SimpleNonlinearSolve/src/nlsolve/dfsane.jl
index c6111b38c..9232c5445 100644
--- a/lib/SimpleNonlinearSolve/src/nlsolve/dfsane.jl
+++ b/lib/SimpleNonlinearSolve/src/nlsolve/dfsane.jl
@@ -56,7 +56,7 @@ end
 function SimpleDFSane(; σ_min::Real = 1e-10, σ_max::Real = 1e10, σ_1::Real = 1.0,
         M::Union{Int, Val} = Val(10), γ::Real = 1e-4, τ_min::Real = 0.1, τ_max::Real = 0.5,
         nexp::Int = 2, η_strategy::F = (f_1, k, x, F) -> f_1 ./ k^2) where {F}
-    return SimpleDFSane{SciMLBase._unwrap_val(M)}(σ_min, σ_max, σ_1, γ, τ_min, τ_max, nexp,
+    return SimpleDFSane{_unwrap_val(M)}(σ_min, σ_max, σ_1, γ, τ_min, τ_max, nexp,
         η_strategy)
 end
 
diff --git a/lib/SimpleNonlinearSolve/src/nlsolve/lbroyden.jl b/lib/SimpleNonlinearSolve/src/nlsolve/lbroyden.jl
index fa3b9d3cd..5f144a3b6 100644
--- a/lib/SimpleNonlinearSolve/src/nlsolve/lbroyden.jl
+++ b/lib/SimpleNonlinearSolve/src/nlsolve/lbroyden.jl
@@ -1,6 +1,6 @@
 """
-    SimpleLimitedMemoryBroyden(; threshold::Int = 27, linesearch = Val(true))
-    SimpleLimitedMemoryBroyden(; threshold::Val = Val(27), linesearch = Val(true))
+    SimpleLimitedMemoryBroyden(; threshold::Int = 27, linesearch = Val(false))
+    SimpleLimitedMemoryBroyden(; threshold::Val = Val(27), linesearch = Val(false))
 
 A limited memory implementation of Broyden. This method applies the L-BFGS scheme to
 Broyden's method. This Alogrithm unfortunately cannot non-allocating for StaticArrays
@@ -25,9 +25,8 @@ __get_threshold(::SimpleLimitedMemoryBroyden{threshold}) where {threshold} = Val
 __use_linesearch(::SimpleLimitedMemoryBroyden{Th, LS}) where {Th, LS} = Val(LS)
 
 function SimpleLimitedMemoryBroyden(; threshold::Union{Val, Int} = Val(27),
-        linesearch = Val(true))
-    return SimpleLimitedMemoryBroyden{SciMLBase._unwrap_val(threshold),
-        SciMLBase._unwrap_val(linesearch)}()
+        linesearch = Val(false))
+    return SimpleLimitedMemoryBroyden{_unwrap_val(threshold), _unwrap_val(linesearch)}()
 end
 
 function SciMLBase.__solve(prob::NonlinearProblem, alg::SimpleLimitedMemoryBroyden,
@@ -50,7 +49,7 @@ end
         termination_condition = nothing, kwargs...)
     x = __maybe_unaliased(prob.u0, alias_u0)
     threshold = __get_threshold(alg)
-    η = min(SciMLBase._unwrap_val(threshold), maxiters)
+    η = min(_unwrap_val(threshold), maxiters)
 
     # For scalar problems / if the threshold is larger than problem size just use Broyden
     if x isa Number || length(x) ≤ η
@@ -134,7 +133,7 @@ function __static_solve(prob::NonlinearProblem{<:SArray}, alg::SimpleLimitedMemo
 
     xo, fo, δx = res.x, res.fx, res.δx
 
-    for i in 1:(maxiters - SciMLBase._unwrap_val(threshold))
+    for i in 1:(maxiters - _unwrap_val(threshold))
         x = xo .+ δx
         fx = prob.f(x, prob.p)
         δf = fx - fo
@@ -148,8 +147,8 @@ function __static_solve(prob::NonlinearProblem{<:SArray}, alg::SimpleLimitedMemo
         d = dot(vᵀ, δf)
         δx = @. (δx - mvec) / d
 
-        U = Base.setindex(U, vec(δx), mod1(i, SciMLBase._unwrap_val(threshold)))
-        Vᵀ = Base.setindex(Vᵀ, vec(vᵀ), mod1(i, SciMLBase._unwrap_val(threshold)))
+        U = Base.setindex(U, vec(δx), mod1(i, _unwrap_val(threshold)))
+        Vᵀ = Base.setindex(Vᵀ, vec(vᵀ), mod1(i, _unwrap_val(threshold)))
 
         δx = -_restructure(fx, _matvec!!(U, Vᵀ, vec(fx)))
 

From cf2551b57d1f8d9b0c70ef60ea8b41a08a3ac4e4 Mon Sep 17 00:00:00 2001
From: Avik Pal <avikpal@mit.edu>
Date: Sun, 14 Jan 2024 00:42:07 -0500
Subject: [PATCH 11/20] Add linesearch to the cuda tests

---
 lib/SimpleNonlinearSolve/test/basictests.jl | 4 ++--
 lib/SimpleNonlinearSolve/test/cuda.jl       | 6 ++++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/lib/SimpleNonlinearSolve/test/basictests.jl b/lib/SimpleNonlinearSolve/test/basictests.jl
index 7da944e7b..b0b356daf 100644
--- a/lib/SimpleNonlinearSolve/test/basictests.jl
+++ b/lib/SimpleNonlinearSolve/test/basictests.jl
@@ -105,7 +105,7 @@ end
 # --- SimpleBroyden / SimpleKlement / SimpleLimitedMemoryBroyden tests ---
 
 @testset "$(_nameof(alg))" for alg in [SimpleBroyden(), SimpleKlement(), SimpleDFSane(),
-    SimpleLimitedMemoryBroyden()]
+    SimpleLimitedMemoryBroyden(), SimpleBroyden(; linesearch = Val(true))]
     function benchmark_nlsolve_oop(f, u0, p = 2.0)
         prob = NonlinearProblem{false}(f, u0, p)
         return solve(prob, alg, abstol = 1e-9)
@@ -164,7 +164,7 @@ end
 ## SimpleDFSane needs to allocate a history vector
 @testset "Allocation Checks: $(_nameof(alg))" for alg in (SimpleNewtonRaphson(),
     SimpleHalley(), SimpleBroyden(), SimpleKlement(), SimpleLimitedMemoryBroyden(),
-    SimpleTrustRegion(), SimpleDFSane())
+    SimpleTrustRegion(), SimpleDFSane(), SimpleBroyden(; linesearch = Val(true)))
     @check_allocs nlsolve(prob, alg) = SciMLBase.solve(prob, alg; abstol = 1e-9)
 
     nlprob_scalar = NonlinearProblem{false}(quadratic_f, 1.0, 2.0)
diff --git a/lib/SimpleNonlinearSolve/test/cuda.jl b/lib/SimpleNonlinearSolve/test/cuda.jl
index 9b984bc70..19e67a081 100644
--- a/lib/SimpleNonlinearSolve/test/cuda.jl
+++ b/lib/SimpleNonlinearSolve/test/cuda.jl
@@ -7,7 +7,8 @@ f!(du, u, p) = du .= u .* u .- 2
 
 @testset "Solving on GPUs" begin
     for alg in (SimpleNewtonRaphson(), SimpleDFSane(), SimpleTrustRegion(), SimpleBroyden(),
-        SimpleLimitedMemoryBroyden(), SimpleKlement(), SimpleHalley())
+        SimpleLimitedMemoryBroyden(), SimpleKlement(), SimpleHalley(),
+        SimpleBroyden(; linesearch = Val(true)))
         @info "Testing $alg on CUDA"
 
         # Static Arrays
@@ -43,7 +44,8 @@ end
     prob = NonlinearProblem{false}(f, @SVector[1.0f0, 1.0f0])
 
     for alg in (SimpleNewtonRaphson(), SimpleDFSane(), SimpleTrustRegion(), SimpleBroyden(),
-        SimpleLimitedMemoryBroyden(), SimpleKlement(), SimpleHalley())
+        SimpleLimitedMemoryBroyden(), SimpleKlement(), SimpleHalley(),
+        SimpleBroyden(; linesearch = Val(true)))
         @test begin
             try
                 @cuda kernel_function(prob, alg)

From 1d5eeb817528ca103514a722fd7d26231ba2506f Mon Sep 17 00:00:00 2001
From: Avik Pal <avikpal@mit.edu>
Date: Sun, 14 Jan 2024 00:55:36 -0500
Subject: [PATCH 12/20] Add linesearch to LBroyden

---
 .../src/nlsolve/broyden.jl                    |  2 +-
 .../src/nlsolve/lbroyden.jl                   | 20 +++++++++++--------
 lib/SimpleNonlinearSolve/test/basictests.jl   |  6 ++++--
 lib/SimpleNonlinearSolve/test/cuda.jl         |  6 ++++--
 4 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/lib/SimpleNonlinearSolve/src/nlsolve/broyden.jl b/lib/SimpleNonlinearSolve/src/nlsolve/broyden.jl
index 8eebd9518..327f2e2fb 100644
--- a/lib/SimpleNonlinearSolve/src/nlsolve/broyden.jl
+++ b/lib/SimpleNonlinearSolve/src/nlsolve/broyden.jl
@@ -6,7 +6,7 @@ and static array problems.
 
 If `linesearch` is `Val(true)`, then we use the `LiFukushimaLineSearch` [1] line search else
 no line search is used. For advanced customization of the line search, use the
-`Broyden` algorithm in `NonlinearSolve.jl`.
+[`Broyden`](@ref) algorithm in `NonlinearSolve.jl`.
 
 ### References
 
diff --git a/lib/SimpleNonlinearSolve/src/nlsolve/lbroyden.jl b/lib/SimpleNonlinearSolve/src/nlsolve/lbroyden.jl
index 5f144a3b6..5f5d481c2 100644
--- a/lib/SimpleNonlinearSolve/src/nlsolve/lbroyden.jl
+++ b/lib/SimpleNonlinearSolve/src/nlsolve/lbroyden.jl
@@ -3,14 +3,13 @@
     SimpleLimitedMemoryBroyden(; threshold::Val = Val(27), linesearch = Val(false))
 
 A limited memory implementation of Broyden. This method applies the L-BFGS scheme to
-Broyden's method. This Alogrithm unfortunately cannot non-allocating for StaticArrays
-without compromising on the "simple" aspect.
+Broyden's method.
 
 If the threshold is larger than the problem size, then this method will use `SimpleBroyden`.
 
 If `linesearch` is `Val(true)`, then we use the `LiFukushimaLineSearch` [1] line search else
 no line search is used. For advanced customization of the line search, use the
-`LimitedMemoryBroyden` algorithm in `NonlinearSolve.jl`.
+[`LimitedMemoryBroyden`](@ref) algorithm in `NonlinearSolve.jl`.
 
 ### References
 
@@ -75,7 +74,7 @@ end
     @bb mat_cache = copy(x)
 
     ls_cache = __use_linesearch(alg) === Val(true) ?
-               __LiFukushimaLineSearch()(prob, fx, x) : nothing
+               LiFukushimaLineSearch()(prob, fx, x) : nothing
 
     for i in 1:maxiters
         α = ls_cache === nothing ? true : ls_cache(x, δx)
@@ -125,8 +124,11 @@ function __static_solve(prob::NonlinearProblem{<:SArray}, alg::SimpleLimitedMemo
 
     xo, δx, fo, δf = x, -fx, fx, fx
 
+    ls_cache = __use_linesearch(alg) === Val(true) ?
+               LiFukushimaLineSearch()(prob, fx, x) : nothing
+
     converged, res = __unrolled_lbroyden_initial_iterations(prob, xo, fo, δx, abstol, U, Vᵀ,
-        threshold)
+        threshold, ls_cache)
 
     converged &&
         return build_solution(prob, alg, res.x, res.fx; retcode = ReturnCode.Success)
@@ -134,7 +136,8 @@ function __static_solve(prob::NonlinearProblem{<:SArray}, alg::SimpleLimitedMemo
     xo, fo, δx = res.x, res.fx, res.δx
 
     for i in 1:(maxiters - _unwrap_val(threshold))
-        x = xo .+ δx
+        α = ls_cache === nothing ? true : ls_cache(xo, δx)
+        x = xo .+ α .* δx
         fx = prob.f(x, prob.p)
         δf = fx - fo
 
@@ -160,13 +163,14 @@ function __static_solve(prob::NonlinearProblem{<:SArray}, alg::SimpleLimitedMemo
 end
 
 @generated function __unrolled_lbroyden_initial_iterations(prob, xo, fo, δx, abstol, U,
-        Vᵀ, ::Val{threshold}) where {threshold}
+        Vᵀ, ::Val{threshold}, ls_cache) where {threshold}
     calls = []
     for i in 1:threshold
         static_idx, static_idx_p1 = Val(i - 1), Val(i)
         push!(calls,
             quote
-                x = xo .+ δx
+                α = ls_cache === nothing ? true : ls_cache(xo, δx)
+                x = xo .+ α .* δx
                 fx = prob.f(x, prob.p)
                 δf = fx - fo
 
diff --git a/lib/SimpleNonlinearSolve/test/basictests.jl b/lib/SimpleNonlinearSolve/test/basictests.jl
index b0b356daf..2c4cd6be5 100644
--- a/lib/SimpleNonlinearSolve/test/basictests.jl
+++ b/lib/SimpleNonlinearSolve/test/basictests.jl
@@ -105,7 +105,8 @@ end
 # --- SimpleBroyden / SimpleKlement / SimpleLimitedMemoryBroyden tests ---
 
 @testset "$(_nameof(alg))" for alg in [SimpleBroyden(), SimpleKlement(), SimpleDFSane(),
-    SimpleLimitedMemoryBroyden(), SimpleBroyden(; linesearch = Val(true))]
+    SimpleLimitedMemoryBroyden(), SimpleBroyden(; linesearch = Val(true)),
+    SimpleLimitedMemoryBroyden(; linesearch = Val(true))]
     function benchmark_nlsolve_oop(f, u0, p = 2.0)
         prob = NonlinearProblem{false}(f, u0, p)
         return solve(prob, alg, abstol = 1e-9)
@@ -164,7 +165,8 @@ end
 ## SimpleDFSane needs to allocate a history vector
 @testset "Allocation Checks: $(_nameof(alg))" for alg in (SimpleNewtonRaphson(),
     SimpleHalley(), SimpleBroyden(), SimpleKlement(), SimpleLimitedMemoryBroyden(),
-    SimpleTrustRegion(), SimpleDFSane(), SimpleBroyden(; linesearch = Val(true)))
+    SimpleTrustRegion(), SimpleDFSane(), SimpleBroyden(; linesearch = Val(true)),
+    SimpleLimitedMemoryBroyden(; linesearch = Val(true)))
     @check_allocs nlsolve(prob, alg) = SciMLBase.solve(prob, alg; abstol = 1e-9)
 
     nlprob_scalar = NonlinearProblem{false}(quadratic_f, 1.0, 2.0)
diff --git a/lib/SimpleNonlinearSolve/test/cuda.jl b/lib/SimpleNonlinearSolve/test/cuda.jl
index 19e67a081..95ee60fb6 100644
--- a/lib/SimpleNonlinearSolve/test/cuda.jl
+++ b/lib/SimpleNonlinearSolve/test/cuda.jl
@@ -8,7 +8,8 @@ f!(du, u, p) = du .= u .* u .- 2
 @testset "Solving on GPUs" begin
     for alg in (SimpleNewtonRaphson(), SimpleDFSane(), SimpleTrustRegion(), SimpleBroyden(),
         SimpleLimitedMemoryBroyden(), SimpleKlement(), SimpleHalley(),
-        SimpleBroyden(; linesearch = Val(true)))
+        SimpleBroyden(; linesearch = Val(true)),
+        SimpleLimitedMemoryBroyden(; linesearch = Val(true)))
         @info "Testing $alg on CUDA"
 
         # Static Arrays
@@ -45,7 +46,8 @@ end
 
     for alg in (SimpleNewtonRaphson(), SimpleDFSane(), SimpleTrustRegion(), SimpleBroyden(),
         SimpleLimitedMemoryBroyden(), SimpleKlement(), SimpleHalley(),
-        SimpleBroyden(; linesearch = Val(true)))
+        SimpleBroyden(; linesearch = Val(true)),
+        SimpleLimitedMemoryBroyden(; linesearch = Val(true)))
         @test begin
             try
                 @cuda kernel_function(prob, alg)

From 223ca68a30f292910b27adde52a303c57aee73ae Mon Sep 17 00:00:00 2001
From: Avik Pal <avikpal@mit.edu>
Date: Sun, 14 Jan 2024 01:04:00 -0500
Subject: [PATCH 13/20] Remove the old flaky benchmarktools allocations tests

---
 lib/SimpleNonlinearSolve/test/Project.toml  |  1 -
 lib/SimpleNonlinearSolve/test/basictests.jl | 21 +--------------------
 2 files changed, 1 insertion(+), 21 deletions(-)

diff --git a/lib/SimpleNonlinearSolve/test/Project.toml b/lib/SimpleNonlinearSolve/test/Project.toml
index 4442a15a6..cbf18f66c 100644
--- a/lib/SimpleNonlinearSolve/test/Project.toml
+++ b/lib/SimpleNonlinearSolve/test/Project.toml
@@ -1,6 +1,5 @@
 [deps]
 AllocCheck = "9b6a8646-10ed-4001-bbdc-1d2f46dfbb1a"
-BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
 DiffEqBase = "2b5f629d-d688-5b77-993f-72d75c75574e"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
diff --git a/lib/SimpleNonlinearSolve/test/basictests.jl b/lib/SimpleNonlinearSolve/test/basictests.jl
index 2c4cd6be5..726930072 100644
--- a/lib/SimpleNonlinearSolve/test/basictests.jl
+++ b/lib/SimpleNonlinearSolve/test/basictests.jl
@@ -1,4 +1,4 @@
-using AllocCheck, BenchmarkTools, LinearSolve, SimpleNonlinearSolve, StaticArrays, Random,
+using AllocCheck, LinearSolve, SimpleNonlinearSolve, StaticArrays, Random,
     LinearAlgebra, Test, ForwardDiff, DiffEqBase
 import PolyesterForwardDiff
 
@@ -57,13 +57,6 @@ const TERMINATION_CONDITIONS = [
         end
     end
 
-    @testset "Allocations: Static Array and Scalars" begin
-        @test (@ballocated $(benchmark_nlsolve_oop)($quadratic_f, $(@SVector[1.0, 1.0]),
-            2.0; autodiff = AutoForwardDiff())) < 200
-        @test (@ballocated $(benchmark_nlsolve_oop)($quadratic_f, 1.0, 2.0;
-            autodiff = AutoForwardDiff())) == 0
-    end
-
     @testset "Termination condition: $(termination_condition) u0: $(_nameof(u0))" for termination_condition in TERMINATION_CONDITIONS,
         u0 in (1.0, [1.0, 1.0], @SVector[1.0, 1.0])
 
@@ -89,11 +82,6 @@ end
         end
     end
 
-    @testset "Allocations: Static Array and Scalars" begin
-        @test (@ballocated $(benchmark_nlsolve_oop)($quadratic_f, 1.0, 2.0;
-            autodiff = AutoForwardDiff())) == 0
-    end
-
     @testset "Termination condition: $(termination_condition) u0: $(_nameof(u0))" for termination_condition in TERMINATION_CONDITIONS,
         u0 in (1.0, [1.0, 1.0], @SVector[1.0, 1.0])
 
@@ -129,13 +117,6 @@ end
         @test all(abs.(sol.u .* sol.u .- 2) .< 1e-9)
     end
 
-    @testset "Allocations: Static Array and Scalars" begin
-        @test (@ballocated $(benchmark_nlsolve_oop)($quadratic_f, $(@SVector[1.0, 1.0]),
-            2.0)) < 200
-        allocs = alg isa SimpleDFSane ? 144 : 0
-        @test (@ballocated $(benchmark_nlsolve_oop)($quadratic_f, 1.0, 2.0)) == allocs
-    end
-
     @testset "Termination condition: $(termination_condition) u0: $(_nameof(u0))" for termination_condition in TERMINATION_CONDITIONS,
         u0 in (1.0, [1.0, 1.0], @SVector[1.0, 1.0])
 

From 545e3358f8967b4aeef3e38f24251129d5b9f77f Mon Sep 17 00:00:00 2001
From: Avik Pal <avikpal@mit.edu>
Date: Sun, 14 Jan 2024 01:34:10 -0500
Subject: [PATCH 14/20] DFSane non-allocating for scalars now

---
 lib/SimpleNonlinearSolve/Project.toml         |  3 +++
 .../SimpleNonlinearSolveStaticArraysExt.jl    |  7 ++++++
 lib/SimpleNonlinearSolve/src/linesearch.jl    |  3 ++-
 .../src/nlsolve/dfsane.jl                     | 23 ++++++++++---------
 lib/SimpleNonlinearSolve/test/basictests.jl   |  3 +--
 5 files changed, 25 insertions(+), 14 deletions(-)
 create mode 100644 lib/SimpleNonlinearSolve/ext/SimpleNonlinearSolveStaticArraysExt.jl

diff --git a/lib/SimpleNonlinearSolve/Project.toml b/lib/SimpleNonlinearSolve/Project.toml
index 6373e9647..493766ead 100644
--- a/lib/SimpleNonlinearSolve/Project.toml
+++ b/lib/SimpleNonlinearSolve/Project.toml
@@ -20,9 +20,11 @@ StaticArraysCore = "1e83bf80-4336-4d27-bf5d-d5a4f845583c"
 
 [weakdeps]
 PolyesterForwardDiff = "98d1487c-24ca-40b6-b7ab-df2af84e126b"
+StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
 
 [extensions]
 SimpleNonlinearSolvePolyesterForwardDiffExt = "PolyesterForwardDiff"
+SimpleNonlinearSolveStaticArraysExt = "StaticArrays"
 
 [compat]
 ADTypes = "0.2.6"
@@ -38,4 +40,5 @@ PrecompileTools = "1"
 Reexport = "1"
 SciMLBase = "2.7"
 StaticArraysCore = "1.4"
+StaticArrays = "1"
 julia = "1.9"
diff --git a/lib/SimpleNonlinearSolve/ext/SimpleNonlinearSolveStaticArraysExt.jl b/lib/SimpleNonlinearSolve/ext/SimpleNonlinearSolveStaticArraysExt.jl
new file mode 100644
index 000000000..90318a82a
--- /dev/null
+++ b/lib/SimpleNonlinearSolve/ext/SimpleNonlinearSolveStaticArraysExt.jl
@@ -0,0 +1,7 @@
+module SimpleNonlinearSolveStaticArraysExt
+
+using SimpleNonlinearSolve
+
+@inline SimpleNonlinearSolve.__is_extension_loaded(::Val{:StaticArrays}) = true
+
+end
diff --git a/lib/SimpleNonlinearSolve/src/linesearch.jl b/lib/SimpleNonlinearSolve/src/linesearch.jl
index e5d2bb93c..13f0c28af 100644
--- a/lib/SimpleNonlinearSolve/src/linesearch.jl
+++ b/lib/SimpleNonlinearSolve/src/linesearch.jl
@@ -37,7 +37,8 @@ end
 end
 
 (alg::LiFukushimaLineSearch)(prob, fu, u) = __generic_init(alg, prob, fu, u)
-function (alg::LiFukushimaLineSearch)(prob, fu::SArray, u::SArray)
+function (alg::LiFukushimaLineSearch)(prob, fu::Union{Number, SArray},
+        u::Union{Number, SArray})
     (alg.nan_maxiters === missing || alg.nan_maxiters === nothing) &&
         return __static_init(alg, prob, fu, u)
     @warn "`LiFukushimaLineSearch` with NaN checking is not non-allocating" maxlog=1
diff --git a/lib/SimpleNonlinearSolve/src/nlsolve/dfsane.jl b/lib/SimpleNonlinearSolve/src/nlsolve/dfsane.jl
index 9232c5445..6931e6101 100644
--- a/lib/SimpleNonlinearSolve/src/nlsolve/dfsane.jl
+++ b/lib/SimpleNonlinearSolve/src/nlsolve/dfsane.jl
@@ -9,21 +9,21 @@ see the paper [1].
 
 ### Keyword Arguments
 
-  - `σ_min`: the minimum value of the spectral coefficient `σ_k` which is related to the step
-    size in the algorithm. Defaults to `1e-10`.
-  - `σ_max`: the maximum value of the spectral coefficient `σ_k` which is related to the step
-    size in the algorithm. Defaults to `1e10`.
+  - `σ_min`: the minimum value of the spectral coefficient `σ_k` which is related to the
+    step size in the algorithm. Defaults to `1e-10`.
+  - `σ_max`: the maximum value of the spectral coefficient `σ_k` which is related to the
+    step size in the algorithm. Defaults to `1e10`.
   - `σ_1`: the initial value of the spectral coefficient `σ_k` which is related to the step
     size in the algorithm.. Defaults to `1.0`.
   - `M`: The monotonicity of the algorithm is determined by a this positive integer.
     A value of 1 for `M` would result in strict monotonicity in the decrease of the L2-norm
-    of the function `f`. However, higher values allow for more flexibility in this reduction.
-    Despite this, the algorithm still ensures global convergence through the use of a
-    non-monotone line-search algorithm that adheres to the Grippo-Lampariello-Lucidi
+    of the function `f`. However, higher values allow for more flexibility in this
+    reduction. Despite this, the algorithm still ensures global convergence through the use
+    of a non-monotone line-search algorithm that adheres to the Grippo-Lampariello-Lucidi
     condition. Values in the range of 5 to 20 are usually sufficient, but some cases may call
     for a higher value of `M`. The default setting is 10.
-  - `γ`: a parameter that influences if a proposed step will be accepted. Higher value of `γ`
-    will make the algorithm more restrictive in accepting steps. Defaults to `1e-4`.
+  - `γ`: a parameter that influences if a proposed step will be accepted. Higher value of
+    `γ` will make the algorithm more restrictive in accepting steps. Defaults to `1e-4`.
   - `τ_min`: if a step is rejected the new step size will get multiplied by factor, and this
     parameter is the minimum value of that factor. Defaults to `0.1`.
   - `τ_max`: if a step is rejected the new step size will get multiplied by factor, and this
@@ -31,7 +31,7 @@ see the paper [1].
   - `nexp`: the exponent of the loss, i.e. ``f_k=||F(x_k)||^{nexp}``. The paper uses
     `nexp ∈ {1,2}`. Defaults to `2`.
   - `η_strategy`:  function to determine the parameter `η_k`, which enables growth
-    of ``||F||^2``. Called as ``η_k = η_strategy(f_1, k, x, F)`` with `f_1` initialized as
+    of ``||F||^2``. Called as `η_k = η_strategy(f_1, k, x, F)` with `f_1` initialized as
     ``f_1=||F(x_1)||^{nexp}``, `k` is the iteration number, `x` is the current `x`-value and
     `F` the current residual. Should satisfy ``η_k > 0`` and ``∑ₖ ηₖ < ∞``. Defaults to
     ``||F||^2 / k^2``.
@@ -83,7 +83,8 @@ function SciMLBase.__solve(prob::NonlinearProblem, alg::SimpleDFSane{M}, args...
     α_1 = one(T)
     f_1 = fx_norm
 
-    history_f_k = if x isa SArray
+    history_f_k = if x isa SArray ||
+                     (x isa Number && __is_extension_loaded(Val(:StaticArrays)))
         ones(SVector{M, T}) * fx_norm
     else
         fill(fx_norm, M)
diff --git a/lib/SimpleNonlinearSolve/test/basictests.jl b/lib/SimpleNonlinearSolve/test/basictests.jl
index 726930072..5938197c7 100644
--- a/lib/SimpleNonlinearSolve/test/basictests.jl
+++ b/lib/SimpleNonlinearSolve/test/basictests.jl
@@ -158,8 +158,7 @@ end
         @test true
     catch e
         @error e
-        # History Vector Allocates
-        @test false broken=(alg isa SimpleDFSane)
+        @test false
     end
 
     # ForwardDiff allocates for hessian since we don't propagate the chunksize

From 2ff6536cbd928a0757348ccc8b427b256808ecdb Mon Sep 17 00:00:00 2001
From: Avik Pal <avikpal@mit.edu>
Date: Sun, 14 Jan 2024 02:53:14 -0500
Subject: [PATCH 15/20] Resolve tolerances differently for kernel launches

---
 .../src/SimpleNonlinearSolve.jl                    | 11 ++++++++++-
 lib/SimpleNonlinearSolve/src/ad.jl                 | 14 ++++++++++++++
 .../src/bracketing/bisection.jl                    |  2 +-
 lib/SimpleNonlinearSolve/src/bracketing/brent.jl   |  2 +-
 lib/SimpleNonlinearSolve/src/bracketing/falsi.jl   |  2 +-
 lib/SimpleNonlinearSolve/src/bracketing/itp.jl     |  2 +-
 lib/SimpleNonlinearSolve/src/bracketing/ridder.jl  |  2 +-
 lib/SimpleNonlinearSolve/src/nlsolve/lbroyden.jl   |  8 +++++++-
 lib/SimpleNonlinearSolve/src/utils.jl              |  7 +++++++
 lib/SimpleNonlinearSolve/test/cuda.jl              |  2 +-
 10 files changed, 44 insertions(+), 8 deletions(-)

diff --git a/lib/SimpleNonlinearSolve/src/SimpleNonlinearSolve.jl b/lib/SimpleNonlinearSolve/src/SimpleNonlinearSolve.jl
index c6dd6291f..8a4f2fe27 100644
--- a/lib/SimpleNonlinearSolve/src/SimpleNonlinearSolve.jl
+++ b/lib/SimpleNonlinearSolve/src/SimpleNonlinearSolve.jl
@@ -9,7 +9,7 @@ import PrecompileTools: @compile_workload, @setup_workload, @recompile_invalidat
     import DiffEqBase: AbstractNonlinearTerminationMode,
         AbstractSafeNonlinearTerminationMode, AbstractSafeBestNonlinearTerminationMode,
         NonlinearSafeTerminationReturnCode, get_termination_mode,
-        NONLINEARSOLVE_DEFAULT_NORM, _get_tolerance
+        NONLINEARSOLVE_DEFAULT_NORM
     import ForwardDiff: Dual
     import MaybeInplace: @bb, setindex_trait, CanSetindex, CannotSetindex
     import SciMLBase: AbstractNonlinearAlgorithm, build_solution, isinplace, _unwrap_val
@@ -65,6 +65,15 @@ function SciMLBase.solve(prob::NonlinearProblem, alg::AbstractSimpleNonlinearSol
     return SciMLBase.__solve(prob, alg, args...; kwargs...)
 end
 
+function SciMLBase.solve(prob::NonlinearProblem{<:Union{<:Number, <:SArray}},
+        alg::AbstractSimpleNonlinearSolveAlgorithm, args...; abstol = nothing,
+        reltol = nothing, kwargs...)
+    _abstol = __get_tolerance(prob.u0, abstol, eltype(prob.u0))
+    _reltol = __get_tolerance(prob.u0, reltol, eltype(prob.u0))
+    return SciMLBase.__solve(prob, alg, args...; abstol = _abstol, reltol = _reltol,
+        kwargs...)
+end
+
 @setup_workload begin
     for T in (Float32, Float64)
         prob_no_brack_scalar = NonlinearProblem{false}((u, p) -> u .* u .- p, T(0.1), T(2))
diff --git a/lib/SimpleNonlinearSolve/src/ad.jl b/lib/SimpleNonlinearSolve/src/ad.jl
index 574904bcc..11aa95476 100644
--- a/lib/SimpleNonlinearSolve/src/ad.jl
+++ b/lib/SimpleNonlinearSolve/src/ad.jl
@@ -8,6 +8,20 @@ function SciMLBase.solve(prob::NonlinearProblem{<:Union{Number, <:AbstractArray}
 end
 
 # Handle Ambiguities
+for algType in (SimpleNewtonRaphson, SimpleDFSane, SimpleTrustRegion, SimpleBroyden,
+    SimpleLimitedMemoryBroyden, SimpleKlement, SimpleHalley)
+    @eval begin
+        function SciMLBase.solve(prob::NonlinearProblem{uType, iip,
+                    <:Union{<:Dual{T, V, P}, <:AbstractArray{<:Dual{T, V, P}}}},
+                alg::$(algType), args...; kwargs...) where {uType, T, V, P, iip}
+            sol, partials = __nlsolve_ad(prob, alg, args...; kwargs...)
+            dual_soln = __nlsolve_dual_soln(sol.u, partials, prob.p)
+            return SciMLBase.build_solution(prob, alg, dual_soln, sol.resid; sol.retcode,
+                sol.stats, sol.original)
+        end
+    end
+end
+
 for algType in (Bisection, Brent, Alefeld, Falsi, ITP, Ridder)
     @eval begin
         function SciMLBase.solve(prob::IntervalNonlinearProblem{uType, iip,
diff --git a/lib/SimpleNonlinearSolve/src/bracketing/bisection.jl b/lib/SimpleNonlinearSolve/src/bracketing/bisection.jl
index 66418b3e0..38f9cb9eb 100644
--- a/lib/SimpleNonlinearSolve/src/bracketing/bisection.jl
+++ b/lib/SimpleNonlinearSolve/src/bracketing/bisection.jl
@@ -26,7 +26,7 @@ function SciMLBase.solve(prob::IntervalNonlinearProblem, alg::Bisection, args...
     left, right = prob.tspan
     fl, fr = f(left), f(right)
 
-    abstol = _get_tolerance(abstol,
+    abstol = __get_tolerance(nothing, abstol,
         promote_type(eltype(first(prob.tspan)), eltype(last(prob.tspan))))
 
     if iszero(fl)
diff --git a/lib/SimpleNonlinearSolve/src/bracketing/brent.jl b/lib/SimpleNonlinearSolve/src/bracketing/brent.jl
index 75497f379..f37c45e4a 100644
--- a/lib/SimpleNonlinearSolve/src/bracketing/brent.jl
+++ b/lib/SimpleNonlinearSolve/src/bracketing/brent.jl
@@ -13,7 +13,7 @@ function SciMLBase.solve(prob::IntervalNonlinearProblem, alg::Brent, args...;
     fl, fr = f(left), f(right)
     ϵ = eps(convert(typeof(fl), 1))
 
-    abstol = _get_tolerance(abstol,
+    abstol = __get_tolerance(nothing, abstol,
         promote_type(eltype(first(prob.tspan)), eltype(last(prob.tspan))))
 
     if iszero(fl)
diff --git a/lib/SimpleNonlinearSolve/src/bracketing/falsi.jl b/lib/SimpleNonlinearSolve/src/bracketing/falsi.jl
index 9db7d6cf1..86086f81a 100644
--- a/lib/SimpleNonlinearSolve/src/bracketing/falsi.jl
+++ b/lib/SimpleNonlinearSolve/src/bracketing/falsi.jl
@@ -12,7 +12,7 @@ function SciMLBase.solve(prob::IntervalNonlinearProblem, alg::Falsi, args...;
     left, right = prob.tspan
     fl, fr = f(left), f(right)
 
-    abstol = _get_tolerance(abstol,
+    abstol = __get_tolerance(nothing, abstol,
         promote_type(eltype(first(prob.tspan)), eltype(last(prob.tspan))))
 
     if iszero(fl)
diff --git a/lib/SimpleNonlinearSolve/src/bracketing/itp.jl b/lib/SimpleNonlinearSolve/src/bracketing/itp.jl
index fd46de6c3..cce5eafea 100644
--- a/lib/SimpleNonlinearSolve/src/bracketing/itp.jl
+++ b/lib/SimpleNonlinearSolve/src/bracketing/itp.jl
@@ -58,7 +58,7 @@ function SciMLBase.solve(prob::IntervalNonlinearProblem, alg::ITP, args...;
     left, right = prob.tspan
     fl, fr = f(left), f(right)
 
-    abstol = _get_tolerance(abstol,
+    abstol = __get_tolerance(nothing, abstol,
         promote_type(eltype(first(prob.tspan)), eltype(last(prob.tspan))))
 
     if iszero(fl)
diff --git a/lib/SimpleNonlinearSolve/src/bracketing/ridder.jl b/lib/SimpleNonlinearSolve/src/bracketing/ridder.jl
index 20e0db489..cd18060d5 100644
--- a/lib/SimpleNonlinearSolve/src/bracketing/ridder.jl
+++ b/lib/SimpleNonlinearSolve/src/bracketing/ridder.jl
@@ -12,7 +12,7 @@ function SciMLBase.solve(prob::IntervalNonlinearProblem, alg::Ridder, args...;
     left, right = prob.tspan
     fl, fr = f(left), f(right)
 
-    abstol = _get_tolerance(abstol,
+    abstol = __get_tolerance(nothing, abstol,
         promote_type(eltype(first(prob.tspan)), eltype(last(prob.tspan))))
 
     if iszero(fl)
diff --git a/lib/SimpleNonlinearSolve/src/nlsolve/lbroyden.jl b/lib/SimpleNonlinearSolve/src/nlsolve/lbroyden.jl
index 5f5d481c2..c04fa20ee 100644
--- a/lib/SimpleNonlinearSolve/src/nlsolve/lbroyden.jl
+++ b/lib/SimpleNonlinearSolve/src/nlsolve/lbroyden.jl
@@ -28,6 +28,12 @@ function SimpleLimitedMemoryBroyden(; threshold::Union{Val, Int} = Val(27),
     return SimpleLimitedMemoryBroyden{_unwrap_val(threshold), _unwrap_val(linesearch)}()
 end
 
+function SciMLBase.solve(prob::NonlinearProblem{<:Union{<:Number, <:SArray}},
+        alg::SimpleLimitedMemoryBroyden, args...; kwargs...)
+    # Don't resolve the `abstol` and `reltol` here
+    return SciMLBase.__solve(prob, alg, args...; kwargs...)
+end
+
 function SciMLBase.__solve(prob::NonlinearProblem, alg::SimpleLimitedMemoryBroyden,
         args...; termination_condition = nothing, kwargs...)
     if prob.u0 isa SArray
@@ -120,7 +126,7 @@ function __static_solve(prob::NonlinearProblem{<:SArray}, alg::SimpleLimitedMemo
 
     U, Vᵀ = __init_low_rank_jacobian(vec(x), vec(fx), threshold)
 
-    abstol = DiffEqBase._get_tolerance(abstol, eltype(x))
+    abstol = __get_tolerance(x, abstol, eltype(x))
 
     xo, δx, fo, δf = x, -fx, fx, fx
 
diff --git a/lib/SimpleNonlinearSolve/src/utils.jl b/lib/SimpleNonlinearSolve/src/utils.jl
index c6b9f2004..de23e9c58 100644
--- a/lib/SimpleNonlinearSolve/src/utils.jl
+++ b/lib/SimpleNonlinearSolve/src/utils.jl
@@ -370,3 +370,10 @@ end
 
 @inline __reshape(x::Number, args...) = x
 @inline __reshape(x::AbstractArray, args...) = reshape(x, args...)
+
+# Override cases which might be used in a kernel launch
+__get_tolerance(x, η, ::Type{T}) where {T} = DiffEqBase._get_tolerance(η, T)
+function __get_tolerance(x::Union{SArray, Number}, ::Nothing, ::Type{T}) where {T}
+    η = real(oneunit(T)) * (eps(real(one(T))))^(real(T)(0.8))
+    return T(η)
+end
diff --git a/lib/SimpleNonlinearSolve/test/cuda.jl b/lib/SimpleNonlinearSolve/test/cuda.jl
index 95ee60fb6..fddf751de 100644
--- a/lib/SimpleNonlinearSolve/test/cuda.jl
+++ b/lib/SimpleNonlinearSolve/test/cuda.jl
@@ -37,7 +37,7 @@ f!(du, u, p) = du .= u .* u .- 2
 end
 
 function kernel_function(prob, alg)
-    solve(prob, alg; abstol = 1.0f-6, reltol = 1.0f-6)
+    solve(prob, alg)
     return nothing
 end
 

From a1ce1cfc6048c7c080de17345e6cc7b458c68dd8 Mon Sep 17 00:00:00 2001
From: Avik Pal <avikpal@mit.edu>
Date: Sun, 14 Jan 2024 04:55:41 -0500
Subject: [PATCH 16/20] Add alpha scaling

---
 lib/SimpleNonlinearSolve/src/linesearch.jl    |  3 +-
 .../src/nlsolve/broyden.jl                    | 33 +++++++---
 .../src/nlsolve/lbroyden.jl                   | 60 ++++++++++++-------
 lib/SimpleNonlinearSolve/src/utils.jl         | 10 ++--
 4 files changed, 69 insertions(+), 37 deletions(-)

diff --git a/lib/SimpleNonlinearSolve/src/linesearch.jl b/lib/SimpleNonlinearSolve/src/linesearch.jl
index 13f0c28af..fadf69cdc 100644
--- a/lib/SimpleNonlinearSolve/src/linesearch.jl
+++ b/lib/SimpleNonlinearSolve/src/linesearch.jl
@@ -112,11 +112,10 @@ function (cache::StaticLiFukushimaLineSearchCache)(u, δu)
     # Early Terminate based on Eq. 2.7
     fx_norm = NONLINEARSOLVE_DEFAULT_NORM(cache.f(u, cache.p))
     du_norm = NONLINEARSOLVE_DEFAULT_NORM(δu)
-    fxλ_norm = NONLINEARSOLVE_DEFAULT_NORM(cache.f(u .+ cache.λ₀ .* δu, cache.p))
+    fxλ_norm = NONLINEARSOLVE_DEFAULT_NORM(cache.f(u .+ δu, cache.p))
     fxλ_norm ≤ cache.ρ * fx_norm - cache.σ₂ * du_norm^2 && return T(true)
 
     λ₂, λ₁ = cache.λ₀, cache.λ₀
-    fxλp_norm = NONLINEARSOLVE_DEFAULT_NORM(cache.f(u .+ λ₂ .* δu, cache.p))
 
     for i in 1:(cache.maxiters)
         fxλp_norm = NONLINEARSOLVE_DEFAULT_NORM(cache.f(u .+ λ₂ .* δu, cache.p))
diff --git a/lib/SimpleNonlinearSolve/src/nlsolve/broyden.jl b/lib/SimpleNonlinearSolve/src/nlsolve/broyden.jl
index 327f2e2fb..d670c0556 100644
--- a/lib/SimpleNonlinearSolve/src/nlsolve/broyden.jl
+++ b/lib/SimpleNonlinearSolve/src/nlsolve/broyden.jl
@@ -1,12 +1,16 @@
 """
-    SimpleBroyden(; linesearch = Val(false))
+    SimpleBroyden(; linesearch = Val(false), alpha = nothing)
 
 A low-overhead implementation of Broyden. This method is non-allocating on scalar
 and static array problems.
 
-If `linesearch` is `Val(true)`, then we use the `LiFukushimaLineSearch` [1] line search else
-no line search is used. For advanced customization of the line search, use the
-[`Broyden`](@ref) algorithm in `NonlinearSolve.jl`.
+### Keyword Arguments
+
+  * `linesearch`: If `linesearch` is `Val(true)`, then we use the `LiFukushimaLineSearch`
+    [1] line search else no line search is used. For advanced customization of the line
+    search, use the [`Broyden`](@ref) algorithm in `NonlinearSolve.jl`.
+  * `alpha`: Scale the initial jacobian initialization with `alpha`. If it is `nothing`, we
+    will compute the scaling using `2 * norm(fu) / max(norm(u), true)`.
 
 ### References
 
@@ -14,9 +18,13 @@ no line search is used. For advanced customization of the line search, use the
 of Broyden-like method for nonlinear equations." Optimization methods and software 13.3
 (2000): 181-201.
 """
-struct SimpleBroyden{linesearch} <: AbstractSimpleNonlinearSolveAlgorithm end
+@concrete struct SimpleBroyden{linesearch} <: AbstractSimpleNonlinearSolveAlgorithm
+    alpha
+end
 
-SimpleBroyden(; linesearch = Val(false)) = SimpleBroyden{_unwrap_val(linesearch)}()
+function SimpleBroyden(; linesearch = Val(false), alpha = nothing)
+    return SimpleBroyden{_unwrap_val(linesearch)}(alpha)
+end
 
 __get_linesearch(::SimpleBroyden{LS}) where {LS} = Val(LS)
 
@@ -25,13 +33,22 @@ function SciMLBase.__solve(prob::NonlinearProblem, alg::SimpleBroyden, args...;
         termination_condition = nothing, kwargs...)
     x = __maybe_unaliased(prob.u0, alias_u0)
     fx = _get_fx(prob, x)
+    T = promote_type(eltype(x), eltype(fx))
 
     @bb xo = copy(x)
     @bb δx = copy(x)
     @bb δf = copy(fx)
     @bb fprev = copy(fx)
 
-    J⁻¹ = __init_identity_jacobian(fx, x)
+    if alg.alpha === nothing
+        fx_norm = NONLINEARSOLVE_DEFAULT_NORM(fx)
+        x_norm = NONLINEARSOLVE_DEFAULT_NORM(x)
+        init_α = ifelse(fx_norm ≥ 1e-5, max(x_norm, T(true)) / (2 * fx_norm), T(true))
+    else
+        init_α = inv(alg.alpha)
+    end
+
+    J⁻¹ = __init_identity_jacobian(fx, x, init_α)
     @bb J⁻¹δf = copy(x)
     @bb xᵀJ⁻¹ = copy(x)
     @bb δJ⁻¹n = copy(x)
@@ -47,7 +64,7 @@ function SciMLBase.__solve(prob::NonlinearProblem, alg::SimpleBroyden, args...;
         @bb δx = J⁻¹ × vec(fprev)
         @bb δx .*= -1
 
-        α = ls_cache === nothing ? true : ls_cache(x, δx)
+        α = ls_cache === nothing ? true : ls_cache(xo, δx)
         @bb @. x = xo + α * δx
         fx = __eval_f(prob, fx, x)
         @bb @. δf = fx - fprev
diff --git a/lib/SimpleNonlinearSolve/src/nlsolve/lbroyden.jl b/lib/SimpleNonlinearSolve/src/nlsolve/lbroyden.jl
index c04fa20ee..c35dec388 100644
--- a/lib/SimpleNonlinearSolve/src/nlsolve/lbroyden.jl
+++ b/lib/SimpleNonlinearSolve/src/nlsolve/lbroyden.jl
@@ -1,15 +1,20 @@
 """
-    SimpleLimitedMemoryBroyden(; threshold::Int = 27, linesearch = Val(false))
-    SimpleLimitedMemoryBroyden(; threshold::Val = Val(27), linesearch = Val(false))
+    SimpleLimitedMemoryBroyden(; threshold::Union{Val, Int} = Val(27),
+        linesearch = Val(false), alpha = nothing)
 
 A limited memory implementation of Broyden. This method applies the L-BFGS scheme to
 Broyden's method.
 
 If the threshold is larger than the problem size, then this method will use `SimpleBroyden`.
 
-If `linesearch` is `Val(true)`, then we use the `LiFukushimaLineSearch` [1] line search else
-no line search is used. For advanced customization of the line search, use the
-[`LimitedMemoryBroyden`](@ref) algorithm in `NonlinearSolve.jl`.
+### Keyword Arguments:
+
+  - `linesearch`: If `linesearch` is `Val(true)`, then we use the
+    `LiFukushimaLineSearch` [1] line search else no line search is used. For advanced
+    customization of the line search, use the [`LimitedMemoryBroyden`](@ref) algorithm in
+    `NonlinearSolve.jl`.
+  - `alpha`: Scale the initial jacobian initialization with `alpha`. If it is `nothing`, we
+    will compute the scaling using `2 * norm(fu) / max(norm(u), true)`.
 
 ### References
 
@@ -17,20 +22,22 @@ no line search is used. For advanced customization of the line search, use the
 of Broyden-like method for nonlinear equations." Optimization methods and software 13.3
 (2000): 181-201.
 """
-struct SimpleLimitedMemoryBroyden{threshold, linesearch} <:
-       AbstractSimpleNonlinearSolveAlgorithm end
+@concrete struct SimpleLimitedMemoryBroyden{threshold, linesearch} <:
+                 AbstractSimpleNonlinearSolveAlgorithm
+    alpha
+end
 
 __get_threshold(::SimpleLimitedMemoryBroyden{threshold}) where {threshold} = Val(threshold)
 __use_linesearch(::SimpleLimitedMemoryBroyden{Th, LS}) where {Th, LS} = Val(LS)
 
 function SimpleLimitedMemoryBroyden(; threshold::Union{Val, Int} = Val(27),
-        linesearch = Val(false))
-    return SimpleLimitedMemoryBroyden{_unwrap_val(threshold), _unwrap_val(linesearch)}()
+        linesearch = Val(false), alpha = nothing)
+    return SimpleLimitedMemoryBroyden{_unwrap_val(threshold), _unwrap_val(linesearch)}(alpha)
 end
 
+# Don't resolve the `abstol` and `reltol` here
 function SciMLBase.solve(prob::NonlinearProblem{<:Union{<:Number, <:SArray}},
         alg::SimpleLimitedMemoryBroyden, args...; kwargs...)
-    # Don't resolve the `abstol` and `reltol` here
     return SciMLBase.__solve(prob, alg, args...; kwargs...)
 end
 
@@ -133,8 +140,17 @@ function __static_solve(prob::NonlinearProblem{<:SArray}, alg::SimpleLimitedMemo
     ls_cache = __use_linesearch(alg) === Val(true) ?
                LiFukushimaLineSearch()(prob, fx, x) : nothing
 
+    T = promote_type(eltype(x), eltype(fx))
+    if alg.alpha === nothing
+        fx_norm = NONLINEARSOLVE_DEFAULT_NORM(fx)
+        x_norm = NONLINEARSOLVE_DEFAULT_NORM(x)
+        init_α = ifelse(fx_norm ≥ 1e-5, max(x_norm, T(true)) / (2 * fx_norm), T(true))
+    else
+        init_α = inv(alg.alpha)
+    end
+
     converged, res = __unrolled_lbroyden_initial_iterations(prob, xo, fo, δx, abstol, U, Vᵀ,
-        threshold, ls_cache)
+        threshold, ls_cache, init_α)
 
     converged &&
         return build_solution(prob, alg, res.x, res.fx; retcode = ReturnCode.Success)
@@ -150,8 +166,8 @@ function __static_solve(prob::NonlinearProblem{<:SArray}, alg::SimpleLimitedMemo
         maximum(abs, fx) ≤ abstol &&
             return build_solution(prob, alg, x, fx; retcode = ReturnCode.Success)
 
-        vᵀ = _restructure(x, _rmatvec!!(U, Vᵀ, vec(δx)))
-        mvec = _restructure(x, _matvec!!(U, Vᵀ, vec(δf)))
+        vᵀ = _restructure(x, _rmatvec!!(U, Vᵀ, vec(δx), init_α))
+        mvec = _restructure(x, _matvec!!(U, Vᵀ, vec(δf), init_α))
 
         d = dot(vᵀ, δf)
         δx = @. (δx - mvec) / d
@@ -159,7 +175,7 @@ function __static_solve(prob::NonlinearProblem{<:SArray}, alg::SimpleLimitedMemo
         U = Base.setindex(U, vec(δx), mod1(i, _unwrap_val(threshold)))
         Vᵀ = Base.setindex(Vᵀ, vec(vᵀ), mod1(i, _unwrap_val(threshold)))
 
-        δx = -_restructure(fx, _matvec!!(U, Vᵀ, vec(fx)))
+        δx = -_restructure(fx, _matvec!!(U, Vᵀ, vec(fx), init_α))
 
         xo = x
         fo = fx
@@ -169,7 +185,7 @@ function __static_solve(prob::NonlinearProblem{<:SArray}, alg::SimpleLimitedMemo
 end
 
 @generated function __unrolled_lbroyden_initial_iterations(prob, xo, fo, δx, abstol, U,
-        Vᵀ, ::Val{threshold}, ls_cache) where {threshold}
+        Vᵀ, ::Val{threshold}, ls_cache, init_α) where {threshold}
     calls = []
     for i in 1:threshold
         static_idx, static_idx_p1 = Val(i - 1), Val(i)
@@ -185,8 +201,8 @@ end
                 _U = __first_n_getindex(U, $(static_idx))
                 _Vᵀ = __first_n_getindex(Vᵀ, $(static_idx))
 
-                vᵀ = _restructure(x, _rmatvec!!(_U, _Vᵀ, vec(δx)))
-                mvec = _restructure(x, _matvec!!(_U, _Vᵀ, vec(δf)))
+                vᵀ = _restructure(x, _rmatvec!!(_U, _Vᵀ, vec(δx), init_α))
+                mvec = _restructure(x, _matvec!!(_U, _Vᵀ, vec(δf), init_α))
 
                 d = dot(vᵀ, δf)
                 δx = @. (δx - mvec) / d
@@ -196,7 +212,7 @@ end
 
                 _U = __first_n_getindex(U, $(static_idx_p1))
                 _Vᵀ = __first_n_getindex(Vᵀ, $(static_idx_p1))
-                δx = -_restructure(fx, _matvec!!(_U, _Vᵀ, vec(fx)))
+                δx = -_restructure(fx, _matvec!!(_U, _Vᵀ, vec(fx), init_α))
 
                 xo = x
                 fo = fx
@@ -226,8 +242,8 @@ function _rmatvec!!(y, xᵀU, U, Vᵀ, x)
     return y
 end
 
-@inline _rmatvec!!(::Nothing, Vᵀ, x) = -x
-@inline _rmatvec!!(U, Vᵀ, x) = __mapTdot(__mapdot(x, U), Vᵀ) .- x
+@inline _rmatvec!!(::Nothing, Vᵀ, x, init_α) = -x .* init_α
+@inline _rmatvec!!(U, Vᵀ, x, init_α) = __mapTdot(__mapdot(x, U), Vᵀ) .- x .* init_α
 
 function _matvec!!(y, Vᵀx, U, Vᵀ, x)
     # (-I + UVᵀ) × x
@@ -244,8 +260,8 @@ function _matvec!!(y, Vᵀx, U, Vᵀ, x)
     return y
 end
 
-@inline _matvec!!(::Nothing, Vᵀ, x) = -x
-@inline _matvec!!(U, Vᵀ, x) = __mapTdot(__mapdot(x, Vᵀ), U) .- x
+@inline _matvec!!(::Nothing, Vᵀ, x, init_α) = -x .* init_α
+@inline _matvec!!(U, Vᵀ, x, init_α) = __mapTdot(__mapdot(x, Vᵀ), U) .- x .* init_α
 
 function __mapdot(x::SVector{S1}, Y::SVector{S2, <:SVector{S1}}) where {S1, S2}
     return map(Base.Fix1(dot, x), Y)
diff --git a/lib/SimpleNonlinearSolve/src/utils.jl b/lib/SimpleNonlinearSolve/src/utils.jl
index de23e9c58..c150522da 100644
--- a/lib/SimpleNonlinearSolve/src/utils.jl
+++ b/lib/SimpleNonlinearSolve/src/utils.jl
@@ -214,12 +214,12 @@ function compute_jacobian_and_hessian(ad::AutoFiniteDiff, prob, fx, x)
     end
 end
 
-__init_identity_jacobian(u::Number, _) = one(u)
+__init_identity_jacobian(u::Number, fu, α = true) = oftype(u, α)
 __init_identity_jacobian!!(J::Number) = one(J)
-function __init_identity_jacobian(u, fu)
+function __init_identity_jacobian(u, fu, α = true)
     J = similar(u, promote_type(eltype(u), eltype(fu)), length(fu), length(u))
     fill!(J, zero(eltype(J)))
-    J[diagind(J)] .= one(eltype(J))
+    J[diagind(J)] .= eltype(J)(α)
     return J
 end
 function __init_identity_jacobian!!(J)
@@ -231,9 +231,9 @@ function __init_identity_jacobian!!(J::AbstractVector)
     fill!(J, one(eltype(J)))
     return J
 end
-function __init_identity_jacobian(u::StaticArray, fu)
+function __init_identity_jacobian(u::StaticArray, fu, α = true)
     S1, S2 = length(fu), length(u)
-    J = SMatrix{S1, S2, eltype(u)}(I)
+    J = SMatrix{S1, S2, eltype(u)}(I * α)
     return J
 end
 function __init_identity_jacobian!!(J::SMatrix{S1, S2}) where {S1, S2}

From ba289bedc8bea004dcf785ffa1143965a58d707d Mon Sep 17 00:00:00 2001
From: Avik Pal <avikpal@mit.edu>
Date: Sun, 14 Jan 2024 05:50:30 -0500
Subject: [PATCH 17/20] Resolve ambiguities

---
 .../src/SimpleNonlinearSolve.jl                   |  9 ---------
 lib/SimpleNonlinearSolve/src/ad.jl                | 15 ---------------
 lib/SimpleNonlinearSolve/src/nlsolve/broyden.jl   |  4 ++--
 lib/SimpleNonlinearSolve/src/nlsolve/lbroyden.jl  |  6 ------
 lib/SimpleNonlinearSolve/src/utils.jl             |  4 ++--
 5 files changed, 4 insertions(+), 34 deletions(-)

diff --git a/lib/SimpleNonlinearSolve/src/SimpleNonlinearSolve.jl b/lib/SimpleNonlinearSolve/src/SimpleNonlinearSolve.jl
index 8a4f2fe27..73eda9977 100644
--- a/lib/SimpleNonlinearSolve/src/SimpleNonlinearSolve.jl
+++ b/lib/SimpleNonlinearSolve/src/SimpleNonlinearSolve.jl
@@ -65,15 +65,6 @@ function SciMLBase.solve(prob::NonlinearProblem, alg::AbstractSimpleNonlinearSol
     return SciMLBase.__solve(prob, alg, args...; kwargs...)
 end
 
-function SciMLBase.solve(prob::NonlinearProblem{<:Union{<:Number, <:SArray}},
-        alg::AbstractSimpleNonlinearSolveAlgorithm, args...; abstol = nothing,
-        reltol = nothing, kwargs...)
-    _abstol = __get_tolerance(prob.u0, abstol, eltype(prob.u0))
-    _reltol = __get_tolerance(prob.u0, reltol, eltype(prob.u0))
-    return SciMLBase.__solve(prob, alg, args...; abstol = _abstol, reltol = _reltol,
-        kwargs...)
-end
-
 @setup_workload begin
     for T in (Float32, Float64)
         prob_no_brack_scalar = NonlinearProblem{false}((u, p) -> u .* u .- p, T(0.1), T(2))
diff --git a/lib/SimpleNonlinearSolve/src/ad.jl b/lib/SimpleNonlinearSolve/src/ad.jl
index 11aa95476..a4a777be6 100644
--- a/lib/SimpleNonlinearSolve/src/ad.jl
+++ b/lib/SimpleNonlinearSolve/src/ad.jl
@@ -7,21 +7,6 @@ function SciMLBase.solve(prob::NonlinearProblem{<:Union{Number, <:AbstractArray}
         sol.original)
 end
 
-# Handle Ambiguities
-for algType in (SimpleNewtonRaphson, SimpleDFSane, SimpleTrustRegion, SimpleBroyden,
-    SimpleLimitedMemoryBroyden, SimpleKlement, SimpleHalley)
-    @eval begin
-        function SciMLBase.solve(prob::NonlinearProblem{uType, iip,
-                    <:Union{<:Dual{T, V, P}, <:AbstractArray{<:Dual{T, V, P}}}},
-                alg::$(algType), args...; kwargs...) where {uType, T, V, P, iip}
-            sol, partials = __nlsolve_ad(prob, alg, args...; kwargs...)
-            dual_soln = __nlsolve_dual_soln(sol.u, partials, prob.p)
-            return SciMLBase.build_solution(prob, alg, dual_soln, sol.resid; sol.retcode,
-                sol.stats, sol.original)
-        end
-    end
-end
-
 for algType in (Bisection, Brent, Alefeld, Falsi, ITP, Ridder)
     @eval begin
         function SciMLBase.solve(prob::IntervalNonlinearProblem{uType, iip,
diff --git a/lib/SimpleNonlinearSolve/src/nlsolve/broyden.jl b/lib/SimpleNonlinearSolve/src/nlsolve/broyden.jl
index d670c0556..ad19d765b 100644
--- a/lib/SimpleNonlinearSolve/src/nlsolve/broyden.jl
+++ b/lib/SimpleNonlinearSolve/src/nlsolve/broyden.jl
@@ -6,10 +6,10 @@ and static array problems.
 
 ### Keyword Arguments
 
-  * `linesearch`: If `linesearch` is `Val(true)`, then we use the `LiFukushimaLineSearch`
+  - `linesearch`: If `linesearch` is `Val(true)`, then we use the `LiFukushimaLineSearch`
     [1] line search else no line search is used. For advanced customization of the line
     search, use the [`Broyden`](@ref) algorithm in `NonlinearSolve.jl`.
-  * `alpha`: Scale the initial jacobian initialization with `alpha`. If it is `nothing`, we
+  - `alpha`: Scale the initial jacobian initialization with `alpha`. If it is `nothing`, we
     will compute the scaling using `2 * norm(fu) / max(norm(u), true)`.
 
 ### References
diff --git a/lib/SimpleNonlinearSolve/src/nlsolve/lbroyden.jl b/lib/SimpleNonlinearSolve/src/nlsolve/lbroyden.jl
index c35dec388..eb7b1bbaf 100644
--- a/lib/SimpleNonlinearSolve/src/nlsolve/lbroyden.jl
+++ b/lib/SimpleNonlinearSolve/src/nlsolve/lbroyden.jl
@@ -35,12 +35,6 @@ function SimpleLimitedMemoryBroyden(; threshold::Union{Val, Int} = Val(27),
     return SimpleLimitedMemoryBroyden{_unwrap_val(threshold), _unwrap_val(linesearch)}(alpha)
 end
 
-# Don't resolve the `abstol` and `reltol` here
-function SciMLBase.solve(prob::NonlinearProblem{<:Union{<:Number, <:SArray}},
-        alg::SimpleLimitedMemoryBroyden, args...; kwargs...)
-    return SciMLBase.__solve(prob, alg, args...; kwargs...)
-end
-
 function SciMLBase.__solve(prob::NonlinearProblem, alg::SimpleLimitedMemoryBroyden,
         args...; termination_condition = nothing, kwargs...)
     if prob.u0 isa SArray
diff --git a/lib/SimpleNonlinearSolve/src/utils.jl b/lib/SimpleNonlinearSolve/src/utils.jl
index c150522da..7390f5e3c 100644
--- a/lib/SimpleNonlinearSolve/src/utils.jl
+++ b/lib/SimpleNonlinearSolve/src/utils.jl
@@ -282,8 +282,8 @@ function init_termination_cache(abstol, reltol, du, u, ::Nothing)
 end
 function init_termination_cache(abstol, reltol, du, u, tc::AbstractNonlinearTerminationMode)
     T = promote_type(eltype(du), eltype(u))
-    abstol !== nothing && (abstol = T(abstol))
-    reltol !== nothing && (reltol = T(reltol))
+    abstol = __get_tolerance(u, abstol, T)
+    reltol = __get_tolerance(u, reltol, T)
     tc_cache = init(du, u, tc; abstol, reltol)
     return DiffEqBase.get_abstol(tc_cache), DiffEqBase.get_reltol(tc_cache), tc_cache
 end

From 4d35fc65b31b362a3114828ddc1bfaf5da0fb7cc Mon Sep 17 00:00:00 2001
From: Avik Pal <avikpal@mit.edu>
Date: Sun, 14 Jan 2024 06:04:49 -0500
Subject: [PATCH 18/20] More of the 23 tests are working

---
 lib/SimpleNonlinearSolve/test/23_test_problems.jl | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/lib/SimpleNonlinearSolve/test/23_test_problems.jl b/lib/SimpleNonlinearSolve/test/23_test_problems.jl
index bc82145a9..18ac5e9bd 100644
--- a/lib/SimpleNonlinearSolve/test/23_test_problems.jl
+++ b/lib/SimpleNonlinearSolve/test/23_test_problems.jl
@@ -70,12 +70,9 @@ end
     alg_ops = (SimpleBroyden(),)
 
     broken_tests = Dict(alg => Int[] for alg in alg_ops)
-    broken_tests[alg_ops[1]] = [1, 4, 5, 6, 11, 12, 13, 14]
+    broken_tests[alg_ops[1]] = [1, 5, 11]
 
-    skip_tests = Dict(alg => Int[] for alg in alg_ops)
-    skip_tests[alg_ops[1]] = [2, 22]
-
-    test_on_library(problems, dicts, alg_ops, broken_tests; skip_tests)
+    test_on_library(problems, dicts, alg_ops, broken_tests)
 end
 
 @testset "SimpleKlement 23 Test Problems" begin

From 7e2fc08021ccab73af2b716ddaecb68891f608dc Mon Sep 17 00:00:00 2001
From: Avik Pal <avikpal@mit.edu>
Date: Sun, 14 Jan 2024 09:26:45 -0500
Subject: [PATCH 19/20] Update src/linesearch.jl

Co-authored-by: Vaibhav Kumar Dixit <vaibhavyashdixit@gmail.com>
---
 lib/SimpleNonlinearSolve/src/linesearch.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/SimpleNonlinearSolve/src/linesearch.jl b/lib/SimpleNonlinearSolve/src/linesearch.jl
index fadf69cdc..235ee0bba 100644
--- a/lib/SimpleNonlinearSolve/src/linesearch.jl
+++ b/lib/SimpleNonlinearSolve/src/linesearch.jl
@@ -51,7 +51,7 @@ function __generic_init(alg::LiFukushimaLineSearch, prob, fu, u)
     T = promote_type(eltype(fu), eltype(u))
 
     ϕ = @closure (u, δu, α) -> begin
-        u_cache = @. u + α * δu
+        @. u_cache = u + α * δu
         return NONLINEARSOLVE_DEFAULT_NORM(__eval_f(prob, fu_cache, u_cache))
     end
 

From 34289efb5e348c1e23f5d7933712a01ef10aee58 Mon Sep 17 00:00:00 2001
From: Avik Pal <avikpal@mit.edu>
Date: Sun, 14 Jan 2024 09:27:34 -0500
Subject: [PATCH 20/20] Update src/linesearch.jl

---
 lib/SimpleNonlinearSolve/src/linesearch.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/SimpleNonlinearSolve/src/linesearch.jl b/lib/SimpleNonlinearSolve/src/linesearch.jl
index 235ee0bba..c33253f63 100644
--- a/lib/SimpleNonlinearSolve/src/linesearch.jl
+++ b/lib/SimpleNonlinearSolve/src/linesearch.jl
@@ -51,7 +51,7 @@ function __generic_init(alg::LiFukushimaLineSearch, prob, fu, u)
     T = promote_type(eltype(fu), eltype(u))
 
     ϕ = @closure (u, δu, α) -> begin
-        @. u_cache = u + α * δu
+        @bb @. u_cache = u + α * δu
         return NONLINEARSOLVE_DEFAULT_NORM(__eval_f(prob, fu_cache, u_cache))
     end