diff --git a/lib/SimpleNonlinearSolve/Project.toml b/lib/SimpleNonlinearSolve/Project.toml
index d518e1686..493766ead 100644
--- a/lib/SimpleNonlinearSolve/Project.toml
+++ b/lib/SimpleNonlinearSolve/Project.toml
@@ -1,13 +1,14 @@
 name = "SimpleNonlinearSolve"
 uuid = "727e6d20-b764-4bd8-a329-72de5adea6c7"
 authors = ["SciML"]
-version = "1.2.1"
+version = "1.3.0"
 
 [deps]
 ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
 ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
 ConcreteStructs = "2569d6c7-a4a2-43d3-a901-331e8e4be471"
 DiffEqBase = "2b5f629d-d688-5b77-993f-72d75c75574e"
+FastClosures = "9aa1b823-49e4-5ca5-8b0f-3971ec8bab6a"
 FiniteDiff = "6a86dc24-6348-571c-b903-95158fe2bd41"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
@@ -17,17 +18,20 @@ Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
 SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462"
 StaticArraysCore = "1e83bf80-4336-4d27-bf5d-d5a4f845583c"
 
-[extensions]
-SimpleNonlinearSolvePolyesterForwardDiffExt = "PolyesterForwardDiff"
-
 [weakdeps]
 PolyesterForwardDiff = "98d1487c-24ca-40b6-b7ab-df2af84e126b"
+StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
+
+[extensions]
+SimpleNonlinearSolvePolyesterForwardDiffExt = "PolyesterForwardDiff"
+SimpleNonlinearSolveStaticArraysExt = "StaticArrays"
 
 [compat]
 ADTypes = "0.2.6"
 ArrayInterface = "7"
 ConcreteStructs = "0.2"
 DiffEqBase = "6.126"
+FastClosures = "0.3"
 FiniteDiff = "2"
 ForwardDiff = "0.10.3"
 LinearAlgebra = "1.9"
@@ -36,4 +40,5 @@ PrecompileTools = "1"
 Reexport = "1"
 SciMLBase = "2.7"
 StaticArraysCore = "1.4"
+StaticArrays = "1"
 julia = "1.9"
diff --git a/lib/SimpleNonlinearSolve/ext/SimpleNonlinearSolveStaticArraysExt.jl b/lib/SimpleNonlinearSolve/ext/SimpleNonlinearSolveStaticArraysExt.jl
new file mode 100644
index 000000000..90318a82a
--- /dev/null
+++ b/lib/SimpleNonlinearSolve/ext/SimpleNonlinearSolveStaticArraysExt.jl
@@ -0,0 +1,7 @@
+module SimpleNonlinearSolveStaticArraysExt
+
+using SimpleNonlinearSolve
+
+@inline SimpleNonlinearSolve.__is_extension_loaded(::Val{:StaticArrays}) = true
+
+end
diff --git a/lib/SimpleNonlinearSolve/src/SimpleNonlinearSolve.jl b/lib/SimpleNonlinearSolve/src/SimpleNonlinearSolve.jl
index 356c0b51e..73eda9977 100644
--- a/lib/SimpleNonlinearSolve/src/SimpleNonlinearSolve.jl
+++ b/lib/SimpleNonlinearSolve/src/SimpleNonlinearSolve.jl
@@ -3,17 +3,16 @@ module SimpleNonlinearSolve
 import PrecompileTools: @compile_workload, @setup_workload, @recompile_invalidations
 
 @recompile_invalidations begin
-    using ADTypes,
-        ArrayInterface, ConcreteStructs, DiffEqBase, Reexport, LinearAlgebra, SciMLBase
+    using ADTypes, ArrayInterface, ConcreteStructs, DiffEqBase, FastClosures, FiniteDiff,
+        ForwardDiff, Reexport, LinearAlgebra, SciMLBase
 
     import DiffEqBase: AbstractNonlinearTerminationMode,
         AbstractSafeNonlinearTerminationMode, AbstractSafeBestNonlinearTerminationMode,
         NonlinearSafeTerminationReturnCode, get_termination_mode,
-        NONLINEARSOLVE_DEFAULT_NORM, _get_tolerance
-    using FiniteDiff, ForwardDiff
+        NONLINEARSOLVE_DEFAULT_NORM
     import ForwardDiff: Dual
     import MaybeInplace: @bb, setindex_trait, CanSetindex, CannotSetindex
-    import SciMLBase: AbstractNonlinearAlgorithm, build_solution, isinplace
+    import SciMLBase: AbstractNonlinearAlgorithm, build_solution, isinplace, _unwrap_val
     import StaticArraysCore: StaticArray, SVector, SMatrix, SArray, MArray, MMatrix, Size
 end
 
@@ -26,6 +25,7 @@ abstract type AbstractNewtonAlgorithm <: AbstractSimpleNonlinearSolveAlgorithm e
 @inline __is_extension_loaded(::Val) = false
 
 include("utils.jl")
+include("linesearch.jl")
 
 ## Nonlinear Solvers
 include("nlsolve/raphson.jl")
diff --git a/lib/SimpleNonlinearSolve/src/ad.jl b/lib/SimpleNonlinearSolve/src/ad.jl
index 574904bcc..a4a777be6 100644
--- a/lib/SimpleNonlinearSolve/src/ad.jl
+++ b/lib/SimpleNonlinearSolve/src/ad.jl
@@ -7,7 +7,6 @@ function SciMLBase.solve(prob::NonlinearProblem{<:Union{Number, <:AbstractArray}
         sol.original)
 end
 
-# Handle Ambiguities
 for algType in (Bisection, Brent, Alefeld, Falsi, ITP, Ridder)
     @eval begin
         function SciMLBase.solve(prob::IntervalNonlinearProblem{uType, iip,
diff --git a/lib/SimpleNonlinearSolve/src/bracketing/bisection.jl b/lib/SimpleNonlinearSolve/src/bracketing/bisection.jl
index 66418b3e0..38f9cb9eb 100644
--- a/lib/SimpleNonlinearSolve/src/bracketing/bisection.jl
+++ b/lib/SimpleNonlinearSolve/src/bracketing/bisection.jl
@@ -26,7 +26,7 @@ function SciMLBase.solve(prob::IntervalNonlinearProblem, alg::Bisection, args...
     left, right = prob.tspan
     fl, fr = f(left), f(right)
 
-    abstol = _get_tolerance(abstol,
+    abstol = __get_tolerance(nothing, abstol,
         promote_type(eltype(first(prob.tspan)), eltype(last(prob.tspan))))
 
     if iszero(fl)
diff --git a/lib/SimpleNonlinearSolve/src/bracketing/brent.jl b/lib/SimpleNonlinearSolve/src/bracketing/brent.jl
index 75497f379..f37c45e4a 100644
--- a/lib/SimpleNonlinearSolve/src/bracketing/brent.jl
+++ b/lib/SimpleNonlinearSolve/src/bracketing/brent.jl
@@ -13,7 +13,7 @@ function SciMLBase.solve(prob::IntervalNonlinearProblem, alg::Brent, args...;
     fl, fr = f(left), f(right)
     ϵ = eps(convert(typeof(fl), 1))
 
-    abstol = _get_tolerance(abstol,
+    abstol = __get_tolerance(nothing, abstol,
         promote_type(eltype(first(prob.tspan)), eltype(last(prob.tspan))))
 
     if iszero(fl)
diff --git a/lib/SimpleNonlinearSolve/src/bracketing/falsi.jl b/lib/SimpleNonlinearSolve/src/bracketing/falsi.jl
index 9db7d6cf1..86086f81a 100644
--- a/lib/SimpleNonlinearSolve/src/bracketing/falsi.jl
+++ b/lib/SimpleNonlinearSolve/src/bracketing/falsi.jl
@@ -12,7 +12,7 @@ function SciMLBase.solve(prob::IntervalNonlinearProblem, alg::Falsi, args...;
     left, right = prob.tspan
     fl, fr = f(left), f(right)
 
-    abstol = _get_tolerance(abstol,
+    abstol = __get_tolerance(nothing, abstol,
         promote_type(eltype(first(prob.tspan)), eltype(last(prob.tspan))))
 
     if iszero(fl)
diff --git a/lib/SimpleNonlinearSolve/src/bracketing/itp.jl b/lib/SimpleNonlinearSolve/src/bracketing/itp.jl
index fd46de6c3..cce5eafea 100644
--- a/lib/SimpleNonlinearSolve/src/bracketing/itp.jl
+++ b/lib/SimpleNonlinearSolve/src/bracketing/itp.jl
@@ -58,7 +58,7 @@ function SciMLBase.solve(prob::IntervalNonlinearProblem, alg::ITP, args...;
     left, right = prob.tspan
     fl, fr = f(left), f(right)
 
-    abstol = _get_tolerance(abstol,
+    abstol = __get_tolerance(nothing, abstol,
         promote_type(eltype(first(prob.tspan)), eltype(last(prob.tspan))))
 
     if iszero(fl)
diff --git a/lib/SimpleNonlinearSolve/src/bracketing/ridder.jl b/lib/SimpleNonlinearSolve/src/bracketing/ridder.jl
index 20e0db489..cd18060d5 100644
--- a/lib/SimpleNonlinearSolve/src/bracketing/ridder.jl
+++ b/lib/SimpleNonlinearSolve/src/bracketing/ridder.jl
@@ -12,7 +12,7 @@ function SciMLBase.solve(prob::IntervalNonlinearProblem, alg::Ridder, args...;
     left, right = prob.tspan
     fl, fr = f(left), f(right)
 
-    abstol = _get_tolerance(abstol,
+    abstol = __get_tolerance(nothing, abstol,
         promote_type(eltype(first(prob.tspan)), eltype(last(prob.tspan))))
 
     if iszero(fl)
diff --git a/lib/SimpleNonlinearSolve/src/linesearch.jl b/lib/SimpleNonlinearSolve/src/linesearch.jl
new file mode 100644
index 000000000..c33253f63
--- /dev/null
+++ b/lib/SimpleNonlinearSolve/src/linesearch.jl
@@ -0,0 +1,128 @@
+# This is a copy of the version in NonlinearSolve.jl. Temporarily kept here till we move
+# line searches into a dedicated package.
+@kwdef @concrete struct LiFukushimaLineSearch
+    lambda_0 = 1
+    beta = 0.5
+    sigma_1 = 0.001
+    sigma_2 = 0.001
+    eta = 0.1
+    rho = 0.1
+    nan_maxiters = missing
+    maxiters::Int = 100
+end
+
+@concrete mutable struct LiFukushimaLineSearchCache{T <: Union{Nothing, Int}}
+    ϕ
+    λ₀
+    β
+    σ₁
+    σ₂
+    η
+    ρ
+    α
+    nan_maxiters::T
+    maxiters::Int
+end
+
+@concrete struct StaticLiFukushimaLineSearchCache
+    f
+    p
+    λ₀
+    β
+    σ₁
+    σ₂
+    η
+    ρ
+    maxiters::Int
+end
+
+(alg::LiFukushimaLineSearch)(prob, fu, u) = __generic_init(alg, prob, fu, u)
+function (alg::LiFukushimaLineSearch)(prob, fu::Union{Number, SArray},
+        u::Union{Number, SArray})
+    (alg.nan_maxiters === missing || alg.nan_maxiters === nothing) &&
+        return __static_init(alg, prob, fu, u)
+    @warn "`LiFukushimaLineSearch` with NaN checking is not non-allocating" maxlog=1
+    return __generic_init(alg, prob, fu, u)
+end
+
+function __generic_init(alg::LiFukushimaLineSearch, prob, fu, u)
+    @bb u_cache = similar(u)
+    @bb fu_cache = similar(fu)
+    T = promote_type(eltype(fu), eltype(u))
+
+    ϕ = @closure (u, δu, α) -> begin
+        @bb @. u_cache = u + α * δu
+        return NONLINEARSOLVE_DEFAULT_NORM(__eval_f(prob, fu_cache, u_cache))
+    end
+
+    nan_maxiters = ifelse(alg.nan_maxiters === missing, 5, alg.nan_maxiters)
+
+    return LiFukushimaLineSearchCache(ϕ, T(alg.lambda_0), T(alg.beta), T(alg.sigma_1),
+        T(alg.sigma_2), T(alg.eta), T(alg.rho), T(true), nan_maxiters, alg.maxiters)
+end
+
+function __static_init(alg::LiFukushimaLineSearch, prob, fu, u)
+    T = promote_type(eltype(fu), eltype(u))
+    return StaticLiFukushimaLineSearchCache(prob.f, prob.p, T(alg.lambda_0), T(alg.beta),
+        T(alg.sigma_1), T(alg.sigma_2), T(alg.eta), T(alg.rho), alg.maxiters)
+end
+
+function (cache::LiFukushimaLineSearchCache)(u, δu)
+    T = promote_type(eltype(u), eltype(δu))
+    ϕ = @closure α -> cache.ϕ(u, δu, α)
+    fx_norm = ϕ(T(0))
+
+    # Non-Blocking exit if the norm is NaN or Inf
+    DiffEqBase.NAN_CHECK(fx_norm) && return cache.α
+
+    # Early Terminate based on Eq. 2.7
+    du_norm = NONLINEARSOLVE_DEFAULT_NORM(δu)
+    fxλ_norm = ϕ(cache.α)
+    fxλ_norm ≤ cache.ρ * fx_norm - cache.σ₂ * du_norm^2 && return cache.α
+
+    λ₂, λ₁ = cache.λ₀, cache.λ₀
+    fxλp_norm = ϕ(λ₂)
+
+    if cache.nan_maxiters !== nothing
+        if DiffEqBase.NAN_CHECK(fxλp_norm)
+            nan_converged = false
+            for _ in 1:(cache.nan_maxiters)
+                λ₁, λ₂ = λ₂, cache.β * λ₂
+                fxλp_norm = ϕ(λ₂)
+                nan_converged = DiffEqBase.NAN_CHECK(fxλp_norm)::Bool
+                nan_converged && break
+            end
+            nan_converged || return cache.α
+        end
+    end
+
+    for i in 1:(cache.maxiters)
+        fxλp_norm = ϕ(λ₂)
+        converged = fxλp_norm ≤ (1 + cache.η) * fx_norm - cache.σ₁ * λ₂^2 * du_norm^2
+        converged && return λ₂
+        λ₁, λ₂ = λ₂, cache.β * λ₂
+    end
+
+    return cache.α
+end
+
+function (cache::StaticLiFukushimaLineSearchCache)(u, δu)
+    T = promote_type(eltype(u), eltype(δu))
+
+    # Early Terminate based on Eq. 2.7
+    fx_norm = NONLINEARSOLVE_DEFAULT_NORM(cache.f(u, cache.p))
+    du_norm = NONLINEARSOLVE_DEFAULT_NORM(δu)
+    fxλ_norm = NONLINEARSOLVE_DEFAULT_NORM(cache.f(u .+ δu, cache.p))
+    fxλ_norm ≤ cache.ρ * fx_norm - cache.σ₂ * du_norm^2 && return T(true)
+
+    λ₂, λ₁ = cache.λ₀, cache.λ₀
+
+    for i in 1:(cache.maxiters)
+        fxλp_norm = NONLINEARSOLVE_DEFAULT_NORM(cache.f(u .+ λ₂ .* δu, cache.p))
+        converged = fxλp_norm ≤ (1 + cache.η) * fx_norm - cache.σ₁ * λ₂^2 * du_norm^2
+        converged && return λ₂
+        λ₁, λ₂ = λ₂, cache.β * λ₂
+    end
+
+    return T(true)
+end
diff --git a/lib/SimpleNonlinearSolve/src/nlsolve/broyden.jl b/lib/SimpleNonlinearSolve/src/nlsolve/broyden.jl
index 5f2dccdc7..ad19d765b 100644
--- a/lib/SimpleNonlinearSolve/src/nlsolve/broyden.jl
+++ b/lib/SimpleNonlinearSolve/src/nlsolve/broyden.jl
@@ -1,23 +1,54 @@
 """
-    SimpleBroyden()
+    SimpleBroyden(; linesearch = Val(false), alpha = nothing)
 
 A low-overhead implementation of Broyden. This method is non-allocating on scalar
 and static array problems.
+
+### Keyword Arguments
+
+  - `linesearch`: If `linesearch` is `Val(true)`, then we use the `LiFukushimaLineSearch`
+    [1] line search else no line search is used. For advanced customization of the line
+    search, use the [`Broyden`](@ref) algorithm in `NonlinearSolve.jl`.
+  - `alpha`: Scale the initial jacobian initialization with `alpha`. If it is `nothing`, we
+    will compute the scaling using `2 * norm(fu) / max(norm(u), true)`.
+
+### References
+
+[1] Li, Dong-Hui, and Masao Fukushima. "A derivative-free line search and global convergence
+of Broyden-like method for nonlinear equations." Optimization methods and software 13.3
+(2000): 181-201.
 """
-struct SimpleBroyden <: AbstractSimpleNonlinearSolveAlgorithm end
+@concrete struct SimpleBroyden{linesearch} <: AbstractSimpleNonlinearSolveAlgorithm
+    alpha
+end
+
+function SimpleBroyden(; linesearch = Val(false), alpha = nothing)
+    return SimpleBroyden{_unwrap_val(linesearch)}(alpha)
+end
+
+__get_linesearch(::SimpleBroyden{LS}) where {LS} = Val(LS)
 
 function SciMLBase.__solve(prob::NonlinearProblem, alg::SimpleBroyden, args...;
         abstol = nothing, reltol = nothing, maxiters = 1000, alias_u0 = false,
         termination_condition = nothing, kwargs...)
     x = __maybe_unaliased(prob.u0, alias_u0)
     fx = _get_fx(prob, x)
+    T = promote_type(eltype(x), eltype(fx))
 
     @bb xo = copy(x)
     @bb δx = copy(x)
     @bb δf = copy(fx)
     @bb fprev = copy(fx)
 
-    J⁻¹ = __init_identity_jacobian(fx, x)
+    if alg.alpha === nothing
+        fx_norm = NONLINEARSOLVE_DEFAULT_NORM(fx)
+        x_norm = NONLINEARSOLVE_DEFAULT_NORM(x)
+        init_α = ifelse(fx_norm ≥ 1e-5, max(x_norm, T(true)) / (2 * fx_norm), T(true))
+    else
+        init_α = inv(alg.alpha)
+    end
+
+    J⁻¹ = __init_identity_jacobian(fx, x, init_α)
     @bb J⁻¹δf = copy(x)
     @bb xᵀJ⁻¹ = copy(x)
     @bb δJ⁻¹n = copy(x)
@@ -26,9 +57,15 @@ function SciMLBase.__solve(prob::NonlinearProblem, alg::SimpleBroyden, args...;
     abstol, reltol, tc_cache = init_termination_cache(abstol, reltol, fx, x,
         termination_condition)
 
+    ls_cache = __get_linesearch(alg) === Val(true) ?
+               LiFukushimaLineSearch()(prob, fx, x) : nothing
+
     for _ in 1:maxiters
         @bb δx = J⁻¹ × vec(fprev)
-        @bb @. x = xo - δx
+        @bb δx .*= -1
+
+        α = ls_cache === nothing ? true : ls_cache(xo, δx)
+        @bb @. x = xo + α * δx
         fx = __eval_f(prob, fx, x)
         @bb @. δf = fx - fprev
 
@@ -37,7 +74,6 @@ function SciMLBase.__solve(prob::NonlinearProblem, alg::SimpleBroyden, args...;
         tc_sol !== nothing && return tc_sol
 
         @bb J⁻¹δf = J⁻¹ × vec(δf)
-        @bb δx .*= -1
         d = dot(δx, J⁻¹δf)
         @bb xᵀJ⁻¹ = transpose(J⁻¹) × vec(δx)
 
diff --git a/lib/SimpleNonlinearSolve/src/nlsolve/dfsane.jl b/lib/SimpleNonlinearSolve/src/nlsolve/dfsane.jl
index c6111b38c..6931e6101 100644
--- a/lib/SimpleNonlinearSolve/src/nlsolve/dfsane.jl
+++ b/lib/SimpleNonlinearSolve/src/nlsolve/dfsane.jl
@@ -9,21 +9,21 @@ see the paper [1].
 
 ### Keyword Arguments
 
-  - `σ_min`: the minimum value of the spectral coefficient `σ_k` which is related to the step
-    size in the algorithm. Defaults to `1e-10`.
-  - `σ_max`: the maximum value of the spectral coefficient `σ_k` which is related to the step
-    size in the algorithm. Defaults to `1e10`.
+  - `σ_min`: the minimum value of the spectral coefficient `σ_k` which is related to the
+    step size in the algorithm. Defaults to `1e-10`.
+  - `σ_max`: the maximum value of the spectral coefficient `σ_k` which is related to the
+    step size in the algorithm. Defaults to `1e10`.
   - `σ_1`: the initial value of the spectral coefficient `σ_k` which is related to the step
     size in the algorithm.. Defaults to `1.0`.
   - `M`: The monotonicity of the algorithm is determined by a this positive integer.
     A value of 1 for `M` would result in strict monotonicity in the decrease of the L2-norm
-    of the function `f`. However, higher values allow for more flexibility in this reduction.
-    Despite this, the algorithm still ensures global convergence through the use of a
-    non-monotone line-search algorithm that adheres to the Grippo-Lampariello-Lucidi
+    of the function `f`. However, higher values allow for more flexibility in this
+    reduction. Despite this, the algorithm still ensures global convergence through the use
+    of a non-monotone line-search algorithm that adheres to the Grippo-Lampariello-Lucidi
     condition. Values in the range of 5 to 20 are usually sufficient, but some cases may call
     for a higher value of `M`. The default setting is 10.
-  - `γ`: a parameter that influences if a proposed step will be accepted. Higher value of `γ`
-    will make the algorithm more restrictive in accepting steps. Defaults to `1e-4`.
+  - `γ`: a parameter that influences if a proposed step will be accepted. Higher value of
+    `γ` will make the algorithm more restrictive in accepting steps. Defaults to `1e-4`.
   - `τ_min`: if a step is rejected the new step size will get multiplied by factor, and this
     parameter is the minimum value of that factor. Defaults to `0.1`.
   - `τ_max`: if a step is rejected the new step size will get multiplied by factor, and this
@@ -31,7 +31,7 @@ see the paper [1].
   - `nexp`: the exponent of the loss, i.e. ``f_k=||F(x_k)||^{nexp}``. The paper uses
     `nexp ∈ {1,2}`. Defaults to `2`.
   - `η_strategy`:  function to determine the parameter `η_k`, which enables growth
-    of ``||F||^2``. Called as ``η_k = η_strategy(f_1, k, x, F)`` with `f_1` initialized as
+    of ``||F||^2``. Called as `η_k = η_strategy(f_1, k, x, F)` with `f_1` initialized as
     ``f_1=||F(x_1)||^{nexp}``, `k` is the iteration number, `x` is the current `x`-value and
     `F` the current residual. Should satisfy ``η_k > 0`` and ``∑ₖ ηₖ < ∞``. Defaults to
     ``||F||^2 / k^2``.
@@ -56,7 +56,7 @@ end
 function SimpleDFSane(; σ_min::Real = 1e-10, σ_max::Real = 1e10, σ_1::Real = 1.0,
         M::Union{Int, Val} = Val(10), γ::Real = 1e-4, τ_min::Real = 0.1, τ_max::Real = 0.5,
         nexp::Int = 2, η_strategy::F = (f_1, k, x, F) -> f_1 ./ k^2) where {F}
-    return SimpleDFSane{SciMLBase._unwrap_val(M)}(σ_min, σ_max, σ_1, γ, τ_min, τ_max, nexp,
+    return SimpleDFSane{_unwrap_val(M)}(σ_min, σ_max, σ_1, γ, τ_min, τ_max, nexp,
         η_strategy)
 end
 
@@ -83,7 +83,8 @@ function SciMLBase.__solve(prob::NonlinearProblem, alg::SimpleDFSane{M}, args...
     α_1 = one(T)
     f_1 = fx_norm
 
-    history_f_k = if x isa SArray
+    history_f_k = if x isa SArray ||
+                     (x isa Number && __is_extension_loaded(Val(:StaticArrays)))
         ones(SVector{M, T}) * fx_norm
     else
         fill(fx_norm, M)
diff --git a/lib/SimpleNonlinearSolve/src/nlsolve/lbroyden.jl b/lib/SimpleNonlinearSolve/src/nlsolve/lbroyden.jl
index 3a0c11ded..eb7b1bbaf 100644
--- a/lib/SimpleNonlinearSolve/src/nlsolve/lbroyden.jl
+++ b/lib/SimpleNonlinearSolve/src/nlsolve/lbroyden.jl
@@ -1,24 +1,38 @@
 """
-    SimpleLimitedMemoryBroyden(; threshold::Int = 27)
-    SimpleLimitedMemoryBroyden(; threshold::Val = Val(27))
+    SimpleLimitedMemoryBroyden(; threshold::Union{Val, Int} = Val(27),
+        linesearch = Val(false), alpha = nothing)
 
 A limited memory implementation of Broyden. This method applies the L-BFGS scheme to
-Broyden's method. This Alogrithm unfortunately cannot non-allocating for StaticArrays
-without compromising on the "simple" aspect.
+Broyden's method.
 
 If the threshold is larger than the problem size, then this method will use `SimpleBroyden`.
 
-!!! warning
+### Keyword Arguments:
 
-    This method is not very stable and can diverge even for very simple problems. This has
-    mostly been tested for neural networks in DeepEquilibriumNetworks.jl.
+  - `linesearch`: If `linesearch` is `Val(true)`, then we use the
+    `LiFukushimaLineSearch` [1] line search else no line search is used. For advanced
+    customization of the line search, use the [`LimitedMemoryBroyden`](@ref) algorithm in
+    `NonlinearSolve.jl`.
+  - `alpha`: Scale the initial jacobian initialization with `alpha`. If it is `nothing`, we
+    will compute the scaling using `2 * norm(fu) / max(norm(u), true)`.
+
+### References
+
+[1] Li, Dong-Hui, and Masao Fukushima. "A derivative-free line search and global convergence
+of Broyden-like method for nonlinear equations." Optimization methods and software 13.3
+(2000): 181-201.
 """
-struct SimpleLimitedMemoryBroyden{threshold} <: AbstractSimpleNonlinearSolveAlgorithm end
+@concrete struct SimpleLimitedMemoryBroyden{threshold, linesearch} <:
+                 AbstractSimpleNonlinearSolveAlgorithm
+    alpha
+end
 
 __get_threshold(::SimpleLimitedMemoryBroyden{threshold}) where {threshold} = Val(threshold)
+__use_linesearch(::SimpleLimitedMemoryBroyden{Th, LS}) where {Th, LS} = Val(LS)
 
-function SimpleLimitedMemoryBroyden(; threshold::Union{Val, Int} = Val(27))
-    return SimpleLimitedMemoryBroyden{SciMLBase._unwrap_val(threshold)}()
+function SimpleLimitedMemoryBroyden(; threshold::Union{Val, Int} = Val(27),
+        linesearch = Val(false), alpha = nothing)
+    return SimpleLimitedMemoryBroyden{_unwrap_val(threshold), _unwrap_val(linesearch)}(alpha)
 end
 
 function SciMLBase.__solve(prob::NonlinearProblem, alg::SimpleLimitedMemoryBroyden,
@@ -41,12 +55,12 @@ end
         termination_condition = nothing, kwargs...)
     x = __maybe_unaliased(prob.u0, alias_u0)
     threshold = __get_threshold(alg)
-    η = min(SciMLBase._unwrap_val(threshold), maxiters)
+    η = min(_unwrap_val(threshold), maxiters)
 
     # For scalar problems / if the threshold is larger than problem size just use Broyden
     if x isa Number || length(x) ≤ η
-        return SciMLBase.__solve(prob, SimpleBroyden(), args...;
-            abstol, reltol, maxiters, termination_condition, kwargs...)
+        return SciMLBase.__solve(prob, SimpleBroyden(; linesearch = __use_linesearch(alg)),
+            args...; abstol, reltol, maxiters, termination_condition, kwargs...)
     end
 
     fx = _get_fx(prob, x)
@@ -66,8 +80,12 @@ end
     Tcache = __lbroyden_threshold_cache(x, x isa StaticArray ? threshold : Val(η))
     @bb mat_cache = copy(x)
 
+    ls_cache = __use_linesearch(alg) === Val(true) ?
+               LiFukushimaLineSearch()(prob, fx, x) : nothing
+
     for i in 1:maxiters
-        @bb @. x = xo + δx
+        α = ls_cache === nothing ? true : ls_cache(x, δx)
+        @bb @. x = xo + α * δx
         fx = __eval_f(prob, fx, x)
         @bb @. δf = fx - fo
 
@@ -109,36 +127,49 @@ function __static_solve(prob::NonlinearProblem{<:SArray}, alg::SimpleLimitedMemo
 
     U, Vᵀ = __init_low_rank_jacobian(vec(x), vec(fx), threshold)
 
-    abstol = DiffEqBase._get_tolerance(abstol, eltype(x))
+    abstol = __get_tolerance(x, abstol, eltype(x))
 
     xo, δx, fo, δf = x, -fx, fx, fx
 
+    ls_cache = __use_linesearch(alg) === Val(true) ?
+               LiFukushimaLineSearch()(prob, fx, x) : nothing
+
+    T = promote_type(eltype(x), eltype(fx))
+    if alg.alpha === nothing
+        fx_norm = NONLINEARSOLVE_DEFAULT_NORM(fx)
+        x_norm = NONLINEARSOLVE_DEFAULT_NORM(x)
+        init_α = ifelse(fx_norm ≥ 1e-5, max(x_norm, T(true)) / (2 * fx_norm), T(true))
+    else
+        init_α = inv(alg.alpha)
+    end
+
     converged, res = __unrolled_lbroyden_initial_iterations(prob, xo, fo, δx, abstol, U, Vᵀ,
-        threshold)
+        threshold, ls_cache, init_α)
 
     converged &&
         return build_solution(prob, alg, res.x, res.fx; retcode = ReturnCode.Success)
 
     xo, fo, δx = res.x, res.fx, res.δx
 
-    for i in 1:(maxiters - SciMLBase._unwrap_val(threshold))
-        x = xo .+ δx
+    for i in 1:(maxiters - _unwrap_val(threshold))
+        α = ls_cache === nothing ? true : ls_cache(xo, δx)
+        x = xo .+ α .* δx
         fx = prob.f(x, prob.p)
         δf = fx - fo
 
         maximum(abs, fx) ≤ abstol &&
             return build_solution(prob, alg, x, fx; retcode = ReturnCode.Success)
 
-        vᵀ = _restructure(x, _rmatvec!!(U, Vᵀ, vec(δx)))
-        mvec = _restructure(x, _matvec!!(U, Vᵀ, vec(δf)))
+        vᵀ = _restructure(x, _rmatvec!!(U, Vᵀ, vec(δx), init_α))
+        mvec = _restructure(x, _matvec!!(U, Vᵀ, vec(δf), init_α))
 
         d = dot(vᵀ, δf)
         δx = @. (δx - mvec) / d
 
-        U = Base.setindex(U, vec(δx), mod1(i, SciMLBase._unwrap_val(threshold)))
-        Vᵀ = Base.setindex(Vᵀ, vec(vᵀ), mod1(i, SciMLBase._unwrap_val(threshold)))
+        U = Base.setindex(U, vec(δx), mod1(i, _unwrap_val(threshold)))
+        Vᵀ = Base.setindex(Vᵀ, vec(vᵀ), mod1(i, _unwrap_val(threshold)))
 
-        δx = -_restructure(fx, _matvec!!(U, Vᵀ, vec(fx)))
+        δx = -_restructure(fx, _matvec!!(U, Vᵀ, vec(fx), init_α))
 
         xo = x
         fo = fx
@@ -148,13 +179,14 @@ function __static_solve(prob::NonlinearProblem{<:SArray}, alg::SimpleLimitedMemo
 end
 
 @generated function __unrolled_lbroyden_initial_iterations(prob, xo, fo, δx, abstol, U,
-        Vᵀ, ::Val{threshold}) where {threshold}
+        Vᵀ, ::Val{threshold}, ls_cache, init_α) where {threshold}
     calls = []
     for i in 1:threshold
         static_idx, static_idx_p1 = Val(i - 1), Val(i)
         push!(calls,
             quote
-                x = xo .+ δx
+                α = ls_cache === nothing ? true : ls_cache(xo, δx)
+                x = xo .+ α .* δx
                 fx = prob.f(x, prob.p)
                 δf = fx - fo
 
@@ -163,8 +195,8 @@ end
                 _U = __first_n_getindex(U, $(static_idx))
                 _Vᵀ = __first_n_getindex(Vᵀ, $(static_idx))
 
-                vᵀ = _restructure(x, _rmatvec!!(_U, _Vᵀ, vec(δx)))
-                mvec = _restructure(x, _matvec!!(_U, _Vᵀ, vec(δf)))
+                vᵀ = _restructure(x, _rmatvec!!(_U, _Vᵀ, vec(δx), init_α))
+                mvec = _restructure(x, _matvec!!(_U, _Vᵀ, vec(δf), init_α))
 
                 d = dot(vᵀ, δf)
                 δx = @. (δx - mvec) / d
@@ -174,7 +206,7 @@ end
 
                 _U = __first_n_getindex(U, $(static_idx_p1))
                 _Vᵀ = __first_n_getindex(Vᵀ, $(static_idx_p1))
-                δx = -_restructure(fx, _matvec!!(_U, _Vᵀ, vec(fx)))
+                δx = -_restructure(fx, _matvec!!(_U, _Vᵀ, vec(fx), init_α))
 
                 xo = x
                 fo = fx
@@ -204,8 +236,8 @@ function _rmatvec!!(y, xᵀU, U, Vᵀ, x)
     return y
 end
 
-@inline _rmatvec!!(::Nothing, Vᵀ, x) = -x
-@inline _rmatvec!!(U, Vᵀ, x) = __mapTdot(__mapdot(x, U), Vᵀ) .- x
+@inline _rmatvec!!(::Nothing, Vᵀ, x, init_α) = -x .* init_α
+@inline _rmatvec!!(U, Vᵀ, x, init_α) = __mapTdot(__mapdot(x, U), Vᵀ) .- x .* init_α
 
 function _matvec!!(y, Vᵀx, U, Vᵀ, x)
     # (-I + UVᵀ) × x
@@ -222,8 +254,8 @@ function _matvec!!(y, Vᵀx, U, Vᵀ, x)
     return y
 end
 
-@inline _matvec!!(::Nothing, Vᵀ, x) = -x
-@inline _matvec!!(U, Vᵀ, x) = __mapTdot(__mapdot(x, Vᵀ), U) .- x
+@inline _matvec!!(::Nothing, Vᵀ, x, init_α) = -x .* init_α
+@inline _matvec!!(U, Vᵀ, x, init_α) = __mapTdot(__mapdot(x, Vᵀ), U) .- x .* init_α
 
 function __mapdot(x::SVector{S1}, Y::SVector{S2, <:SVector{S1}}) where {S1, S2}
     return map(Base.Fix1(dot, x), Y)
diff --git a/lib/SimpleNonlinearSolve/src/utils.jl b/lib/SimpleNonlinearSolve/src/utils.jl
index c6b9f2004..7390f5e3c 100644
--- a/lib/SimpleNonlinearSolve/src/utils.jl
+++ b/lib/SimpleNonlinearSolve/src/utils.jl
@@ -214,12 +214,12 @@ function compute_jacobian_and_hessian(ad::AutoFiniteDiff, prob, fx, x)
     end
 end
 
-__init_identity_jacobian(u::Number, _) = one(u)
+__init_identity_jacobian(u::Number, fu, α = true) = oftype(u, α)
 __init_identity_jacobian!!(J::Number) = one(J)
-function __init_identity_jacobian(u, fu)
+function __init_identity_jacobian(u, fu, α = true)
     J = similar(u, promote_type(eltype(u), eltype(fu)), length(fu), length(u))
     fill!(J, zero(eltype(J)))
-    J[diagind(J)] .= one(eltype(J))
+    J[diagind(J)] .= eltype(J)(α)
     return J
 end
 function __init_identity_jacobian!!(J)
@@ -231,9 +231,9 @@ function __init_identity_jacobian!!(J::AbstractVector)
     fill!(J, one(eltype(J)))
     return J
 end
-function __init_identity_jacobian(u::StaticArray, fu)
+function __init_identity_jacobian(u::StaticArray, fu, α = true)
     S1, S2 = length(fu), length(u)
-    J = SMatrix{S1, S2, eltype(u)}(I)
+    J = SMatrix{S1, S2, eltype(u)}(I * α)
     return J
 end
 function __init_identity_jacobian!!(J::SMatrix{S1, S2}) where {S1, S2}
@@ -282,8 +282,8 @@ function init_termination_cache(abstol, reltol, du, u, ::Nothing)
 end
 function init_termination_cache(abstol, reltol, du, u, tc::AbstractNonlinearTerminationMode)
     T = promote_type(eltype(du), eltype(u))
-    abstol !== nothing && (abstol = T(abstol))
-    reltol !== nothing && (reltol = T(reltol))
+    abstol = __get_tolerance(u, abstol, T)
+    reltol = __get_tolerance(u, reltol, T)
     tc_cache = init(du, u, tc; abstol, reltol)
     return DiffEqBase.get_abstol(tc_cache), DiffEqBase.get_reltol(tc_cache), tc_cache
 end
@@ -370,3 +370,10 @@ end
 
 @inline __reshape(x::Number, args...) = x
 @inline __reshape(x::AbstractArray, args...) = reshape(x, args...)
+
+# Override cases which might be used in a kernel launch
+__get_tolerance(x, η, ::Type{T}) where {T} = DiffEqBase._get_tolerance(η, T)
+function __get_tolerance(x::Union{SArray, Number}, ::Nothing, ::Type{T}) where {T}
+    η = real(oneunit(T)) * (eps(real(one(T))))^(real(T)(0.8))
+    return T(η)
+end
diff --git a/lib/SimpleNonlinearSolve/test/23_test_problems.jl b/lib/SimpleNonlinearSolve/test/23_test_problems.jl
index bc82145a9..18ac5e9bd 100644
--- a/lib/SimpleNonlinearSolve/test/23_test_problems.jl
+++ b/lib/SimpleNonlinearSolve/test/23_test_problems.jl
@@ -70,12 +70,9 @@ end
     alg_ops = (SimpleBroyden(),)
 
     broken_tests = Dict(alg => Int[] for alg in alg_ops)
-    broken_tests[alg_ops[1]] = [1, 4, 5, 6, 11, 12, 13, 14]
+    broken_tests[alg_ops[1]] = [1, 5, 11]
 
-    skip_tests = Dict(alg => Int[] for alg in alg_ops)
-    skip_tests[alg_ops[1]] = [2, 22]
-
-    test_on_library(problems, dicts, alg_ops, broken_tests; skip_tests)
+    test_on_library(problems, dicts, alg_ops, broken_tests)
 end
 
 @testset "SimpleKlement 23 Test Problems" begin
diff --git a/lib/SimpleNonlinearSolve/test/Project.toml b/lib/SimpleNonlinearSolve/test/Project.toml
index 4442a15a6..cbf18f66c 100644
--- a/lib/SimpleNonlinearSolve/test/Project.toml
+++ b/lib/SimpleNonlinearSolve/test/Project.toml
@@ -1,6 +1,5 @@
 [deps]
 AllocCheck = "9b6a8646-10ed-4001-bbdc-1d2f46dfbb1a"
-BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
 DiffEqBase = "2b5f629d-d688-5b77-993f-72d75c75574e"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
diff --git a/lib/SimpleNonlinearSolve/test/basictests.jl b/lib/SimpleNonlinearSolve/test/basictests.jl
index 7da944e7b..5938197c7 100644
--- a/lib/SimpleNonlinearSolve/test/basictests.jl
+++ b/lib/SimpleNonlinearSolve/test/basictests.jl
@@ -1,4 +1,4 @@
-using AllocCheck, BenchmarkTools, LinearSolve, SimpleNonlinearSolve, StaticArrays, Random,
+using AllocCheck, LinearSolve, SimpleNonlinearSolve, StaticArrays, Random,
     LinearAlgebra, Test, ForwardDiff, DiffEqBase
 import PolyesterForwardDiff
 
@@ -57,13 +57,6 @@ const TERMINATION_CONDITIONS = [
         end
     end
 
-    @testset "Allocations: Static Array and Scalars" begin
-        @test (@ballocated $(benchmark_nlsolve_oop)($quadratic_f, $(@SVector[1.0, 1.0]),
-            2.0; autodiff = AutoForwardDiff())) < 200
-        @test (@ballocated $(benchmark_nlsolve_oop)($quadratic_f, 1.0, 2.0;
-            autodiff = AutoForwardDiff())) == 0
-    end
-
     @testset "Termination condition: $(termination_condition) u0: $(_nameof(u0))" for termination_condition in TERMINATION_CONDITIONS,
         u0 in (1.0, [1.0, 1.0], @SVector[1.0, 1.0])
 
@@ -89,11 +82,6 @@ end
         end
     end
 
-    @testset "Allocations: Static Array and Scalars" begin
-        @test (@ballocated $(benchmark_nlsolve_oop)($quadratic_f, 1.0, 2.0;
-            autodiff = AutoForwardDiff())) == 0
-    end
-
     @testset "Termination condition: $(termination_condition) u0: $(_nameof(u0))" for termination_condition in TERMINATION_CONDITIONS,
         u0 in (1.0, [1.0, 1.0], @SVector[1.0, 1.0])
 
@@ -105,7 +93,8 @@ end
 # --- SimpleBroyden / SimpleKlement / SimpleLimitedMemoryBroyden tests ---
 
 @testset "$(_nameof(alg))" for alg in [SimpleBroyden(), SimpleKlement(), SimpleDFSane(),
-    SimpleLimitedMemoryBroyden()]
+    SimpleLimitedMemoryBroyden(), SimpleBroyden(; linesearch = Val(true)),
+    SimpleLimitedMemoryBroyden(; linesearch = Val(true))]
     function benchmark_nlsolve_oop(f, u0, p = 2.0)
         prob = NonlinearProblem{false}(f, u0, p)
         return solve(prob, alg, abstol = 1e-9)
@@ -128,13 +117,6 @@ end
         @test all(abs.(sol.u .* sol.u .- 2) .< 1e-9)
     end
 
-    @testset "Allocations: Static Array and Scalars" begin
-        @test (@ballocated $(benchmark_nlsolve_oop)($quadratic_f, $(@SVector[1.0, 1.0]),
-            2.0)) < 200
-        allocs = alg isa SimpleDFSane ? 144 : 0
-        @test (@ballocated $(benchmark_nlsolve_oop)($quadratic_f, 1.0, 2.0)) == allocs
-    end
-
     @testset "Termination condition: $(termination_condition) u0: $(_nameof(u0))" for termination_condition in TERMINATION_CONDITIONS,
         u0 in (1.0, [1.0, 1.0], @SVector[1.0, 1.0])
 
@@ -164,7 +146,8 @@ end
 ## SimpleDFSane needs to allocate a history vector
 @testset "Allocation Checks: $(_nameof(alg))" for alg in (SimpleNewtonRaphson(),
     SimpleHalley(), SimpleBroyden(), SimpleKlement(), SimpleLimitedMemoryBroyden(),
-    SimpleTrustRegion(), SimpleDFSane())
+    SimpleTrustRegion(), SimpleDFSane(), SimpleBroyden(; linesearch = Val(true)),
+    SimpleLimitedMemoryBroyden(; linesearch = Val(true)))
     @check_allocs nlsolve(prob, alg) = SciMLBase.solve(prob, alg; abstol = 1e-9)
 
     nlprob_scalar = NonlinearProblem{false}(quadratic_f, 1.0, 2.0)
@@ -175,8 +158,7 @@ end
         @test true
     catch e
         @error e
-        # History Vector Allocates
-        @test false broken=(alg isa SimpleDFSane)
+        @test false
     end
 
     # ForwardDiff allocates for hessian since we don't propagate the chunksize
diff --git a/lib/SimpleNonlinearSolve/test/cuda.jl b/lib/SimpleNonlinearSolve/test/cuda.jl
index 9b984bc70..fddf751de 100644
--- a/lib/SimpleNonlinearSolve/test/cuda.jl
+++ b/lib/SimpleNonlinearSolve/test/cuda.jl
@@ -7,7 +7,9 @@ f!(du, u, p) = du .= u .* u .- 2
 
 @testset "Solving on GPUs" begin
     for alg in (SimpleNewtonRaphson(), SimpleDFSane(), SimpleTrustRegion(), SimpleBroyden(),
-        SimpleLimitedMemoryBroyden(), SimpleKlement(), SimpleHalley())
+        SimpleLimitedMemoryBroyden(), SimpleKlement(), SimpleHalley(),
+        SimpleBroyden(; linesearch = Val(true)),
+        SimpleLimitedMemoryBroyden(; linesearch = Val(true)))
         @info "Testing $alg on CUDA"
 
         # Static Arrays
@@ -35,7 +37,7 @@ f!(du, u, p) = du .= u .* u .- 2
 end
 
 function kernel_function(prob, alg)
-    solve(prob, alg; abstol = 1.0f-6, reltol = 1.0f-6)
+    solve(prob, alg)
     return nothing
 end
 
@@ -43,7 +45,9 @@ end
     prob = NonlinearProblem{false}(f, @SVector[1.0f0, 1.0f0])
 
     for alg in (SimpleNewtonRaphson(), SimpleDFSane(), SimpleTrustRegion(), SimpleBroyden(),
-        SimpleLimitedMemoryBroyden(), SimpleKlement(), SimpleHalley())
+        SimpleLimitedMemoryBroyden(), SimpleKlement(), SimpleHalley(),
+        SimpleBroyden(; linesearch = Val(true)),
+        SimpleLimitedMemoryBroyden(; linesearch = Val(true)))
         @test begin
             try
                 @cuda kernel_function(prob, alg)