diff --git a/lib/SimpleNonlinearSolve/Project.toml b/lib/SimpleNonlinearSolve/Project.toml index d518e1686..493766ead 100644 --- a/lib/SimpleNonlinearSolve/Project.toml +++ b/lib/SimpleNonlinearSolve/Project.toml @@ -1,13 +1,14 @@ name = "SimpleNonlinearSolve" uuid = "727e6d20-b764-4bd8-a329-72de5adea6c7" authors = ["SciML"] -version = "1.2.1" +version = "1.3.0" [deps] ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b" ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9" ConcreteStructs = "2569d6c7-a4a2-43d3-a901-331e8e4be471" DiffEqBase = "2b5f629d-d688-5b77-993f-72d75c75574e" +FastClosures = "9aa1b823-49e4-5ca5-8b0f-3971ec8bab6a" FiniteDiff = "6a86dc24-6348-571c-b903-95158fe2bd41" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" @@ -17,17 +18,20 @@ Reexport = "189a3867-3050-52da-a836-e630ba90ab69" SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462" StaticArraysCore = "1e83bf80-4336-4d27-bf5d-d5a4f845583c" -[extensions] -SimpleNonlinearSolvePolyesterForwardDiffExt = "PolyesterForwardDiff" - [weakdeps] PolyesterForwardDiff = "98d1487c-24ca-40b6-b7ab-df2af84e126b" +StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" + +[extensions] +SimpleNonlinearSolvePolyesterForwardDiffExt = "PolyesterForwardDiff" +SimpleNonlinearSolveStaticArraysExt = "StaticArrays" [compat] ADTypes = "0.2.6" ArrayInterface = "7" ConcreteStructs = "0.2" DiffEqBase = "6.126" +FastClosures = "0.3" FiniteDiff = "2" ForwardDiff = "0.10.3" LinearAlgebra = "1.9" @@ -36,4 +40,5 @@ PrecompileTools = "1" Reexport = "1" SciMLBase = "2.7" StaticArraysCore = "1.4" +StaticArrays = "1" julia = "1.9" diff --git a/lib/SimpleNonlinearSolve/ext/SimpleNonlinearSolveStaticArraysExt.jl b/lib/SimpleNonlinearSolve/ext/SimpleNonlinearSolveStaticArraysExt.jl new file mode 100644 index 000000000..90318a82a --- /dev/null +++ b/lib/SimpleNonlinearSolve/ext/SimpleNonlinearSolveStaticArraysExt.jl @@ -0,0 +1,7 @@ +module SimpleNonlinearSolveStaticArraysExt + +using SimpleNonlinearSolve + +@inline SimpleNonlinearSolve.__is_extension_loaded(::Val{:StaticArrays}) = true + +end diff --git a/lib/SimpleNonlinearSolve/src/SimpleNonlinearSolve.jl b/lib/SimpleNonlinearSolve/src/SimpleNonlinearSolve.jl index 356c0b51e..73eda9977 100644 --- a/lib/SimpleNonlinearSolve/src/SimpleNonlinearSolve.jl +++ b/lib/SimpleNonlinearSolve/src/SimpleNonlinearSolve.jl @@ -3,17 +3,16 @@ module SimpleNonlinearSolve import PrecompileTools: @compile_workload, @setup_workload, @recompile_invalidations @recompile_invalidations begin - using ADTypes, - ArrayInterface, ConcreteStructs, DiffEqBase, Reexport, LinearAlgebra, SciMLBase + using ADTypes, ArrayInterface, ConcreteStructs, DiffEqBase, FastClosures, FiniteDiff, + ForwardDiff, Reexport, LinearAlgebra, SciMLBase import DiffEqBase: AbstractNonlinearTerminationMode, AbstractSafeNonlinearTerminationMode, AbstractSafeBestNonlinearTerminationMode, NonlinearSafeTerminationReturnCode, get_termination_mode, - NONLINEARSOLVE_DEFAULT_NORM, _get_tolerance - using FiniteDiff, ForwardDiff + NONLINEARSOLVE_DEFAULT_NORM import ForwardDiff: Dual import MaybeInplace: @bb, setindex_trait, CanSetindex, CannotSetindex - import SciMLBase: AbstractNonlinearAlgorithm, build_solution, isinplace + import SciMLBase: AbstractNonlinearAlgorithm, build_solution, isinplace, _unwrap_val import StaticArraysCore: StaticArray, SVector, SMatrix, SArray, MArray, MMatrix, Size end @@ -26,6 +25,7 @@ abstract type AbstractNewtonAlgorithm <: AbstractSimpleNonlinearSolveAlgorithm e @inline __is_extension_loaded(::Val) = false include("utils.jl") +include("linesearch.jl") ## Nonlinear Solvers include("nlsolve/raphson.jl") diff --git a/lib/SimpleNonlinearSolve/src/ad.jl b/lib/SimpleNonlinearSolve/src/ad.jl index 574904bcc..a4a777be6 100644 --- a/lib/SimpleNonlinearSolve/src/ad.jl +++ b/lib/SimpleNonlinearSolve/src/ad.jl @@ -7,7 +7,6 @@ function SciMLBase.solve(prob::NonlinearProblem{<:Union{Number, <:AbstractArray} sol.original) end -# Handle Ambiguities for algType in (Bisection, Brent, Alefeld, Falsi, ITP, Ridder) @eval begin function SciMLBase.solve(prob::IntervalNonlinearProblem{uType, iip, diff --git a/lib/SimpleNonlinearSolve/src/bracketing/bisection.jl b/lib/SimpleNonlinearSolve/src/bracketing/bisection.jl index 66418b3e0..38f9cb9eb 100644 --- a/lib/SimpleNonlinearSolve/src/bracketing/bisection.jl +++ b/lib/SimpleNonlinearSolve/src/bracketing/bisection.jl @@ -26,7 +26,7 @@ function SciMLBase.solve(prob::IntervalNonlinearProblem, alg::Bisection, args... left, right = prob.tspan fl, fr = f(left), f(right) - abstol = _get_tolerance(abstol, + abstol = __get_tolerance(nothing, abstol, promote_type(eltype(first(prob.tspan)), eltype(last(prob.tspan)))) if iszero(fl) diff --git a/lib/SimpleNonlinearSolve/src/bracketing/brent.jl b/lib/SimpleNonlinearSolve/src/bracketing/brent.jl index 75497f379..f37c45e4a 100644 --- a/lib/SimpleNonlinearSolve/src/bracketing/brent.jl +++ b/lib/SimpleNonlinearSolve/src/bracketing/brent.jl @@ -13,7 +13,7 @@ function SciMLBase.solve(prob::IntervalNonlinearProblem, alg::Brent, args...; fl, fr = f(left), f(right) ϵ = eps(convert(typeof(fl), 1)) - abstol = _get_tolerance(abstol, + abstol = __get_tolerance(nothing, abstol, promote_type(eltype(first(prob.tspan)), eltype(last(prob.tspan)))) if iszero(fl) diff --git a/lib/SimpleNonlinearSolve/src/bracketing/falsi.jl b/lib/SimpleNonlinearSolve/src/bracketing/falsi.jl index 9db7d6cf1..86086f81a 100644 --- a/lib/SimpleNonlinearSolve/src/bracketing/falsi.jl +++ b/lib/SimpleNonlinearSolve/src/bracketing/falsi.jl @@ -12,7 +12,7 @@ function SciMLBase.solve(prob::IntervalNonlinearProblem, alg::Falsi, args...; left, right = prob.tspan fl, fr = f(left), f(right) - abstol = _get_tolerance(abstol, + abstol = __get_tolerance(nothing, abstol, promote_type(eltype(first(prob.tspan)), eltype(last(prob.tspan)))) if iszero(fl) diff --git a/lib/SimpleNonlinearSolve/src/bracketing/itp.jl b/lib/SimpleNonlinearSolve/src/bracketing/itp.jl index fd46de6c3..cce5eafea 100644 --- a/lib/SimpleNonlinearSolve/src/bracketing/itp.jl +++ b/lib/SimpleNonlinearSolve/src/bracketing/itp.jl @@ -58,7 +58,7 @@ function SciMLBase.solve(prob::IntervalNonlinearProblem, alg::ITP, args...; left, right = prob.tspan fl, fr = f(left), f(right) - abstol = _get_tolerance(abstol, + abstol = __get_tolerance(nothing, abstol, promote_type(eltype(first(prob.tspan)), eltype(last(prob.tspan)))) if iszero(fl) diff --git a/lib/SimpleNonlinearSolve/src/bracketing/ridder.jl b/lib/SimpleNonlinearSolve/src/bracketing/ridder.jl index 20e0db489..cd18060d5 100644 --- a/lib/SimpleNonlinearSolve/src/bracketing/ridder.jl +++ b/lib/SimpleNonlinearSolve/src/bracketing/ridder.jl @@ -12,7 +12,7 @@ function SciMLBase.solve(prob::IntervalNonlinearProblem, alg::Ridder, args...; left, right = prob.tspan fl, fr = f(left), f(right) - abstol = _get_tolerance(abstol, + abstol = __get_tolerance(nothing, abstol, promote_type(eltype(first(prob.tspan)), eltype(last(prob.tspan)))) if iszero(fl) diff --git a/lib/SimpleNonlinearSolve/src/linesearch.jl b/lib/SimpleNonlinearSolve/src/linesearch.jl new file mode 100644 index 000000000..c33253f63 --- /dev/null +++ b/lib/SimpleNonlinearSolve/src/linesearch.jl @@ -0,0 +1,128 @@ +# This is a copy of the version in NonlinearSolve.jl. Temporarily kept here till we move +# line searches into a dedicated package. +@kwdef @concrete struct LiFukushimaLineSearch + lambda_0 = 1 + beta = 0.5 + sigma_1 = 0.001 + sigma_2 = 0.001 + eta = 0.1 + rho = 0.1 + nan_maxiters = missing + maxiters::Int = 100 +end + +@concrete mutable struct LiFukushimaLineSearchCache{T <: Union{Nothing, Int}} + ϕ + λ₀ + β + σ₁ + σ₂ + η + ρ + α + nan_maxiters::T + maxiters::Int +end + +@concrete struct StaticLiFukushimaLineSearchCache + f + p + λ₀ + β + σ₁ + σ₂ + η + ρ + maxiters::Int +end + +(alg::LiFukushimaLineSearch)(prob, fu, u) = __generic_init(alg, prob, fu, u) +function (alg::LiFukushimaLineSearch)(prob, fu::Union{Number, SArray}, + u::Union{Number, SArray}) + (alg.nan_maxiters === missing || alg.nan_maxiters === nothing) && + return __static_init(alg, prob, fu, u) + @warn "`LiFukushimaLineSearch` with NaN checking is not non-allocating" maxlog=1 + return __generic_init(alg, prob, fu, u) +end + +function __generic_init(alg::LiFukushimaLineSearch, prob, fu, u) + @bb u_cache = similar(u) + @bb fu_cache = similar(fu) + T = promote_type(eltype(fu), eltype(u)) + + ϕ = @closure (u, δu, α) -> begin + @bb @. u_cache = u + α * δu + return NONLINEARSOLVE_DEFAULT_NORM(__eval_f(prob, fu_cache, u_cache)) + end + + nan_maxiters = ifelse(alg.nan_maxiters === missing, 5, alg.nan_maxiters) + + return LiFukushimaLineSearchCache(ϕ, T(alg.lambda_0), T(alg.beta), T(alg.sigma_1), + T(alg.sigma_2), T(alg.eta), T(alg.rho), T(true), nan_maxiters, alg.maxiters) +end + +function __static_init(alg::LiFukushimaLineSearch, prob, fu, u) + T = promote_type(eltype(fu), eltype(u)) + return StaticLiFukushimaLineSearchCache(prob.f, prob.p, T(alg.lambda_0), T(alg.beta), + T(alg.sigma_1), T(alg.sigma_2), T(alg.eta), T(alg.rho), alg.maxiters) +end + +function (cache::LiFukushimaLineSearchCache)(u, δu) + T = promote_type(eltype(u), eltype(δu)) + ϕ = @closure α -> cache.ϕ(u, δu, α) + fx_norm = ϕ(T(0)) + + # Non-Blocking exit if the norm is NaN or Inf + DiffEqBase.NAN_CHECK(fx_norm) && return cache.α + + # Early Terminate based on Eq. 2.7 + du_norm = NONLINEARSOLVE_DEFAULT_NORM(δu) + fxλ_norm = ϕ(cache.α) + fxλ_norm ≤ cache.ρ * fx_norm - cache.σ₂ * du_norm^2 && return cache.α + + λ₂, λ₁ = cache.λ₀, cache.λ₀ + fxλp_norm = ϕ(λ₂) + + if cache.nan_maxiters !== nothing + if DiffEqBase.NAN_CHECK(fxλp_norm) + nan_converged = false + for _ in 1:(cache.nan_maxiters) + λ₁, λ₂ = λ₂, cache.β * λ₂ + fxλp_norm = ϕ(λ₂) + nan_converged = DiffEqBase.NAN_CHECK(fxλp_norm)::Bool + nan_converged && break + end + nan_converged || return cache.α + end + end + + for i in 1:(cache.maxiters) + fxλp_norm = ϕ(λ₂) + converged = fxλp_norm ≤ (1 + cache.η) * fx_norm - cache.σ₁ * λ₂^2 * du_norm^2 + converged && return λ₂ + λ₁, λ₂ = λ₂, cache.β * λ₂ + end + + return cache.α +end + +function (cache::StaticLiFukushimaLineSearchCache)(u, δu) + T = promote_type(eltype(u), eltype(δu)) + + # Early Terminate based on Eq. 2.7 + fx_norm = NONLINEARSOLVE_DEFAULT_NORM(cache.f(u, cache.p)) + du_norm = NONLINEARSOLVE_DEFAULT_NORM(δu) + fxλ_norm = NONLINEARSOLVE_DEFAULT_NORM(cache.f(u .+ δu, cache.p)) + fxλ_norm ≤ cache.ρ * fx_norm - cache.σ₂ * du_norm^2 && return T(true) + + λ₂, λ₁ = cache.λ₀, cache.λ₀ + + for i in 1:(cache.maxiters) + fxλp_norm = NONLINEARSOLVE_DEFAULT_NORM(cache.f(u .+ λ₂ .* δu, cache.p)) + converged = fxλp_norm ≤ (1 + cache.η) * fx_norm - cache.σ₁ * λ₂^2 * du_norm^2 + converged && return λ₂ + λ₁, λ₂ = λ₂, cache.β * λ₂ + end + + return T(true) +end diff --git a/lib/SimpleNonlinearSolve/src/nlsolve/broyden.jl b/lib/SimpleNonlinearSolve/src/nlsolve/broyden.jl index 5f2dccdc7..ad19d765b 100644 --- a/lib/SimpleNonlinearSolve/src/nlsolve/broyden.jl +++ b/lib/SimpleNonlinearSolve/src/nlsolve/broyden.jl @@ -1,23 +1,54 @@ """ - SimpleBroyden() + SimpleBroyden(; linesearch = Val(false), alpha = nothing) A low-overhead implementation of Broyden. This method is non-allocating on scalar and static array problems. + +### Keyword Arguments + + - `linesearch`: If `linesearch` is `Val(true)`, then we use the `LiFukushimaLineSearch` + [1] line search else no line search is used. For advanced customization of the line + search, use the [`Broyden`](@ref) algorithm in `NonlinearSolve.jl`. + - `alpha`: Scale the initial jacobian initialization with `alpha`. If it is `nothing`, we + will compute the scaling using `2 * norm(fu) / max(norm(u), true)`. + +### References + +[1] Li, Dong-Hui, and Masao Fukushima. "A derivative-free line search and global convergence +of Broyden-like method for nonlinear equations." Optimization methods and software 13.3 +(2000): 181-201. """ -struct SimpleBroyden <: AbstractSimpleNonlinearSolveAlgorithm end +@concrete struct SimpleBroyden{linesearch} <: AbstractSimpleNonlinearSolveAlgorithm + alpha +end + +function SimpleBroyden(; linesearch = Val(false), alpha = nothing) + return SimpleBroyden{_unwrap_val(linesearch)}(alpha) +end + +__get_linesearch(::SimpleBroyden{LS}) where {LS} = Val(LS) function SciMLBase.__solve(prob::NonlinearProblem, alg::SimpleBroyden, args...; abstol = nothing, reltol = nothing, maxiters = 1000, alias_u0 = false, termination_condition = nothing, kwargs...) x = __maybe_unaliased(prob.u0, alias_u0) fx = _get_fx(prob, x) + T = promote_type(eltype(x), eltype(fx)) @bb xo = copy(x) @bb δx = copy(x) @bb δf = copy(fx) @bb fprev = copy(fx) - J⁻¹ = __init_identity_jacobian(fx, x) + if alg.alpha === nothing + fx_norm = NONLINEARSOLVE_DEFAULT_NORM(fx) + x_norm = NONLINEARSOLVE_DEFAULT_NORM(x) + init_α = ifelse(fx_norm ≥ 1e-5, max(x_norm, T(true)) / (2 * fx_norm), T(true)) + else + init_α = inv(alg.alpha) + end + + J⁻¹ = __init_identity_jacobian(fx, x, init_α) @bb J⁻¹δf = copy(x) @bb xᵀJ⁻¹ = copy(x) @bb δJ⁻¹n = copy(x) @@ -26,9 +57,15 @@ function SciMLBase.__solve(prob::NonlinearProblem, alg::SimpleBroyden, args...; abstol, reltol, tc_cache = init_termination_cache(abstol, reltol, fx, x, termination_condition) + ls_cache = __get_linesearch(alg) === Val(true) ? + LiFukushimaLineSearch()(prob, fx, x) : nothing + for _ in 1:maxiters @bb δx = J⁻¹ × vec(fprev) - @bb @. x = xo - δx + @bb δx .*= -1 + + α = ls_cache === nothing ? true : ls_cache(xo, δx) + @bb @. x = xo + α * δx fx = __eval_f(prob, fx, x) @bb @. δf = fx - fprev @@ -37,7 +74,6 @@ function SciMLBase.__solve(prob::NonlinearProblem, alg::SimpleBroyden, args...; tc_sol !== nothing && return tc_sol @bb J⁻¹δf = J⁻¹ × vec(δf) - @bb δx .*= -1 d = dot(δx, J⁻¹δf) @bb xᵀJ⁻¹ = transpose(J⁻¹) × vec(δx) diff --git a/lib/SimpleNonlinearSolve/src/nlsolve/dfsane.jl b/lib/SimpleNonlinearSolve/src/nlsolve/dfsane.jl index c6111b38c..6931e6101 100644 --- a/lib/SimpleNonlinearSolve/src/nlsolve/dfsane.jl +++ b/lib/SimpleNonlinearSolve/src/nlsolve/dfsane.jl @@ -9,21 +9,21 @@ see the paper [1]. ### Keyword Arguments - - `σ_min`: the minimum value of the spectral coefficient `σ_k` which is related to the step - size in the algorithm. Defaults to `1e-10`. - - `σ_max`: the maximum value of the spectral coefficient `σ_k` which is related to the step - size in the algorithm. Defaults to `1e10`. + - `σ_min`: the minimum value of the spectral coefficient `σ_k` which is related to the + step size in the algorithm. Defaults to `1e-10`. + - `σ_max`: the maximum value of the spectral coefficient `σ_k` which is related to the + step size in the algorithm. Defaults to `1e10`. - `σ_1`: the initial value of the spectral coefficient `σ_k` which is related to the step size in the algorithm.. Defaults to `1.0`. - `M`: The monotonicity of the algorithm is determined by a this positive integer. A value of 1 for `M` would result in strict monotonicity in the decrease of the L2-norm - of the function `f`. However, higher values allow for more flexibility in this reduction. - Despite this, the algorithm still ensures global convergence through the use of a - non-monotone line-search algorithm that adheres to the Grippo-Lampariello-Lucidi + of the function `f`. However, higher values allow for more flexibility in this + reduction. Despite this, the algorithm still ensures global convergence through the use + of a non-monotone line-search algorithm that adheres to the Grippo-Lampariello-Lucidi condition. Values in the range of 5 to 20 are usually sufficient, but some cases may call for a higher value of `M`. The default setting is 10. - - `γ`: a parameter that influences if a proposed step will be accepted. Higher value of `γ` - will make the algorithm more restrictive in accepting steps. Defaults to `1e-4`. + - `γ`: a parameter that influences if a proposed step will be accepted. Higher value of + `γ` will make the algorithm more restrictive in accepting steps. Defaults to `1e-4`. - `τ_min`: if a step is rejected the new step size will get multiplied by factor, and this parameter is the minimum value of that factor. Defaults to `0.1`. - `τ_max`: if a step is rejected the new step size will get multiplied by factor, and this @@ -31,7 +31,7 @@ see the paper [1]. - `nexp`: the exponent of the loss, i.e. ``f_k=||F(x_k)||^{nexp}``. The paper uses `nexp ∈ {1,2}`. Defaults to `2`. - `η_strategy`: function to determine the parameter `η_k`, which enables growth - of ``||F||^2``. Called as ``η_k = η_strategy(f_1, k, x, F)`` with `f_1` initialized as + of ``||F||^2``. Called as `η_k = η_strategy(f_1, k, x, F)` with `f_1` initialized as ``f_1=||F(x_1)||^{nexp}``, `k` is the iteration number, `x` is the current `x`-value and `F` the current residual. Should satisfy ``η_k > 0`` and ``∑ₖ ηₖ < ∞``. Defaults to ``||F||^2 / k^2``. @@ -56,7 +56,7 @@ end function SimpleDFSane(; σ_min::Real = 1e-10, σ_max::Real = 1e10, σ_1::Real = 1.0, M::Union{Int, Val} = Val(10), γ::Real = 1e-4, τ_min::Real = 0.1, τ_max::Real = 0.5, nexp::Int = 2, η_strategy::F = (f_1, k, x, F) -> f_1 ./ k^2) where {F} - return SimpleDFSane{SciMLBase._unwrap_val(M)}(σ_min, σ_max, σ_1, γ, τ_min, τ_max, nexp, + return SimpleDFSane{_unwrap_val(M)}(σ_min, σ_max, σ_1, γ, τ_min, τ_max, nexp, η_strategy) end @@ -83,7 +83,8 @@ function SciMLBase.__solve(prob::NonlinearProblem, alg::SimpleDFSane{M}, args... α_1 = one(T) f_1 = fx_norm - history_f_k = if x isa SArray + history_f_k = if x isa SArray || + (x isa Number && __is_extension_loaded(Val(:StaticArrays))) ones(SVector{M, T}) * fx_norm else fill(fx_norm, M) diff --git a/lib/SimpleNonlinearSolve/src/nlsolve/lbroyden.jl b/lib/SimpleNonlinearSolve/src/nlsolve/lbroyden.jl index 3a0c11ded..eb7b1bbaf 100644 --- a/lib/SimpleNonlinearSolve/src/nlsolve/lbroyden.jl +++ b/lib/SimpleNonlinearSolve/src/nlsolve/lbroyden.jl @@ -1,24 +1,38 @@ """ - SimpleLimitedMemoryBroyden(; threshold::Int = 27) - SimpleLimitedMemoryBroyden(; threshold::Val = Val(27)) + SimpleLimitedMemoryBroyden(; threshold::Union{Val, Int} = Val(27), + linesearch = Val(false), alpha = nothing) A limited memory implementation of Broyden. This method applies the L-BFGS scheme to -Broyden's method. This Alogrithm unfortunately cannot non-allocating for StaticArrays -without compromising on the "simple" aspect. +Broyden's method. If the threshold is larger than the problem size, then this method will use `SimpleBroyden`. -!!! warning +### Keyword Arguments: - This method is not very stable and can diverge even for very simple problems. This has - mostly been tested for neural networks in DeepEquilibriumNetworks.jl. + - `linesearch`: If `linesearch` is `Val(true)`, then we use the + `LiFukushimaLineSearch` [1] line search else no line search is used. For advanced + customization of the line search, use the [`LimitedMemoryBroyden`](@ref) algorithm in + `NonlinearSolve.jl`. + - `alpha`: Scale the initial jacobian initialization with `alpha`. If it is `nothing`, we + will compute the scaling using `2 * norm(fu) / max(norm(u), true)`. + +### References + +[1] Li, Dong-Hui, and Masao Fukushima. "A derivative-free line search and global convergence +of Broyden-like method for nonlinear equations." Optimization methods and software 13.3 +(2000): 181-201. """ -struct SimpleLimitedMemoryBroyden{threshold} <: AbstractSimpleNonlinearSolveAlgorithm end +@concrete struct SimpleLimitedMemoryBroyden{threshold, linesearch} <: + AbstractSimpleNonlinearSolveAlgorithm + alpha +end __get_threshold(::SimpleLimitedMemoryBroyden{threshold}) where {threshold} = Val(threshold) +__use_linesearch(::SimpleLimitedMemoryBroyden{Th, LS}) where {Th, LS} = Val(LS) -function SimpleLimitedMemoryBroyden(; threshold::Union{Val, Int} = Val(27)) - return SimpleLimitedMemoryBroyden{SciMLBase._unwrap_val(threshold)}() +function SimpleLimitedMemoryBroyden(; threshold::Union{Val, Int} = Val(27), + linesearch = Val(false), alpha = nothing) + return SimpleLimitedMemoryBroyden{_unwrap_val(threshold), _unwrap_val(linesearch)}(alpha) end function SciMLBase.__solve(prob::NonlinearProblem, alg::SimpleLimitedMemoryBroyden, @@ -41,12 +55,12 @@ end termination_condition = nothing, kwargs...) x = __maybe_unaliased(prob.u0, alias_u0) threshold = __get_threshold(alg) - η = min(SciMLBase._unwrap_val(threshold), maxiters) + η = min(_unwrap_val(threshold), maxiters) # For scalar problems / if the threshold is larger than problem size just use Broyden if x isa Number || length(x) ≤ η - return SciMLBase.__solve(prob, SimpleBroyden(), args...; - abstol, reltol, maxiters, termination_condition, kwargs...) + return SciMLBase.__solve(prob, SimpleBroyden(; linesearch = __use_linesearch(alg)), + args...; abstol, reltol, maxiters, termination_condition, kwargs...) end fx = _get_fx(prob, x) @@ -66,8 +80,12 @@ end Tcache = __lbroyden_threshold_cache(x, x isa StaticArray ? threshold : Val(η)) @bb mat_cache = copy(x) + ls_cache = __use_linesearch(alg) === Val(true) ? + LiFukushimaLineSearch()(prob, fx, x) : nothing + for i in 1:maxiters - @bb @. x = xo + δx + α = ls_cache === nothing ? true : ls_cache(x, δx) + @bb @. x = xo + α * δx fx = __eval_f(prob, fx, x) @bb @. δf = fx - fo @@ -109,36 +127,49 @@ function __static_solve(prob::NonlinearProblem{<:SArray}, alg::SimpleLimitedMemo U, Vᵀ = __init_low_rank_jacobian(vec(x), vec(fx), threshold) - abstol = DiffEqBase._get_tolerance(abstol, eltype(x)) + abstol = __get_tolerance(x, abstol, eltype(x)) xo, δx, fo, δf = x, -fx, fx, fx + ls_cache = __use_linesearch(alg) === Val(true) ? + LiFukushimaLineSearch()(prob, fx, x) : nothing + + T = promote_type(eltype(x), eltype(fx)) + if alg.alpha === nothing + fx_norm = NONLINEARSOLVE_DEFAULT_NORM(fx) + x_norm = NONLINEARSOLVE_DEFAULT_NORM(x) + init_α = ifelse(fx_norm ≥ 1e-5, max(x_norm, T(true)) / (2 * fx_norm), T(true)) + else + init_α = inv(alg.alpha) + end + converged, res = __unrolled_lbroyden_initial_iterations(prob, xo, fo, δx, abstol, U, Vᵀ, - threshold) + threshold, ls_cache, init_α) converged && return build_solution(prob, alg, res.x, res.fx; retcode = ReturnCode.Success) xo, fo, δx = res.x, res.fx, res.δx - for i in 1:(maxiters - SciMLBase._unwrap_val(threshold)) - x = xo .+ δx + for i in 1:(maxiters - _unwrap_val(threshold)) + α = ls_cache === nothing ? true : ls_cache(xo, δx) + x = xo .+ α .* δx fx = prob.f(x, prob.p) δf = fx - fo maximum(abs, fx) ≤ abstol && return build_solution(prob, alg, x, fx; retcode = ReturnCode.Success) - vᵀ = _restructure(x, _rmatvec!!(U, Vᵀ, vec(δx))) - mvec = _restructure(x, _matvec!!(U, Vᵀ, vec(δf))) + vᵀ = _restructure(x, _rmatvec!!(U, Vᵀ, vec(δx), init_α)) + mvec = _restructure(x, _matvec!!(U, Vᵀ, vec(δf), init_α)) d = dot(vᵀ, δf) δx = @. (δx - mvec) / d - U = Base.setindex(U, vec(δx), mod1(i, SciMLBase._unwrap_val(threshold))) - Vᵀ = Base.setindex(Vᵀ, vec(vᵀ), mod1(i, SciMLBase._unwrap_val(threshold))) + U = Base.setindex(U, vec(δx), mod1(i, _unwrap_val(threshold))) + Vᵀ = Base.setindex(Vᵀ, vec(vᵀ), mod1(i, _unwrap_val(threshold))) - δx = -_restructure(fx, _matvec!!(U, Vᵀ, vec(fx))) + δx = -_restructure(fx, _matvec!!(U, Vᵀ, vec(fx), init_α)) xo = x fo = fx @@ -148,13 +179,14 @@ function __static_solve(prob::NonlinearProblem{<:SArray}, alg::SimpleLimitedMemo end @generated function __unrolled_lbroyden_initial_iterations(prob, xo, fo, δx, abstol, U, - Vᵀ, ::Val{threshold}) where {threshold} + Vᵀ, ::Val{threshold}, ls_cache, init_α) where {threshold} calls = [] for i in 1:threshold static_idx, static_idx_p1 = Val(i - 1), Val(i) push!(calls, quote - x = xo .+ δx + α = ls_cache === nothing ? true : ls_cache(xo, δx) + x = xo .+ α .* δx fx = prob.f(x, prob.p) δf = fx - fo @@ -163,8 +195,8 @@ end _U = __first_n_getindex(U, $(static_idx)) _Vᵀ = __first_n_getindex(Vᵀ, $(static_idx)) - vᵀ = _restructure(x, _rmatvec!!(_U, _Vᵀ, vec(δx))) - mvec = _restructure(x, _matvec!!(_U, _Vᵀ, vec(δf))) + vᵀ = _restructure(x, _rmatvec!!(_U, _Vᵀ, vec(δx), init_α)) + mvec = _restructure(x, _matvec!!(_U, _Vᵀ, vec(δf), init_α)) d = dot(vᵀ, δf) δx = @. (δx - mvec) / d @@ -174,7 +206,7 @@ end _U = __first_n_getindex(U, $(static_idx_p1)) _Vᵀ = __first_n_getindex(Vᵀ, $(static_idx_p1)) - δx = -_restructure(fx, _matvec!!(_U, _Vᵀ, vec(fx))) + δx = -_restructure(fx, _matvec!!(_U, _Vᵀ, vec(fx), init_α)) xo = x fo = fx @@ -204,8 +236,8 @@ function _rmatvec!!(y, xᵀU, U, Vᵀ, x) return y end -@inline _rmatvec!!(::Nothing, Vᵀ, x) = -x -@inline _rmatvec!!(U, Vᵀ, x) = __mapTdot(__mapdot(x, U), Vᵀ) .- x +@inline _rmatvec!!(::Nothing, Vᵀ, x, init_α) = -x .* init_α +@inline _rmatvec!!(U, Vᵀ, x, init_α) = __mapTdot(__mapdot(x, U), Vᵀ) .- x .* init_α function _matvec!!(y, Vᵀx, U, Vᵀ, x) # (-I + UVᵀ) × x @@ -222,8 +254,8 @@ function _matvec!!(y, Vᵀx, U, Vᵀ, x) return y end -@inline _matvec!!(::Nothing, Vᵀ, x) = -x -@inline _matvec!!(U, Vᵀ, x) = __mapTdot(__mapdot(x, Vᵀ), U) .- x +@inline _matvec!!(::Nothing, Vᵀ, x, init_α) = -x .* init_α +@inline _matvec!!(U, Vᵀ, x, init_α) = __mapTdot(__mapdot(x, Vᵀ), U) .- x .* init_α function __mapdot(x::SVector{S1}, Y::SVector{S2, <:SVector{S1}}) where {S1, S2} return map(Base.Fix1(dot, x), Y) diff --git a/lib/SimpleNonlinearSolve/src/utils.jl b/lib/SimpleNonlinearSolve/src/utils.jl index c6b9f2004..7390f5e3c 100644 --- a/lib/SimpleNonlinearSolve/src/utils.jl +++ b/lib/SimpleNonlinearSolve/src/utils.jl @@ -214,12 +214,12 @@ function compute_jacobian_and_hessian(ad::AutoFiniteDiff, prob, fx, x) end end -__init_identity_jacobian(u::Number, _) = one(u) +__init_identity_jacobian(u::Number, fu, α = true) = oftype(u, α) __init_identity_jacobian!!(J::Number) = one(J) -function __init_identity_jacobian(u, fu) +function __init_identity_jacobian(u, fu, α = true) J = similar(u, promote_type(eltype(u), eltype(fu)), length(fu), length(u)) fill!(J, zero(eltype(J))) - J[diagind(J)] .= one(eltype(J)) + J[diagind(J)] .= eltype(J)(α) return J end function __init_identity_jacobian!!(J) @@ -231,9 +231,9 @@ function __init_identity_jacobian!!(J::AbstractVector) fill!(J, one(eltype(J))) return J end -function __init_identity_jacobian(u::StaticArray, fu) +function __init_identity_jacobian(u::StaticArray, fu, α = true) S1, S2 = length(fu), length(u) - J = SMatrix{S1, S2, eltype(u)}(I) + J = SMatrix{S1, S2, eltype(u)}(I * α) return J end function __init_identity_jacobian!!(J::SMatrix{S1, S2}) where {S1, S2} @@ -282,8 +282,8 @@ function init_termination_cache(abstol, reltol, du, u, ::Nothing) end function init_termination_cache(abstol, reltol, du, u, tc::AbstractNonlinearTerminationMode) T = promote_type(eltype(du), eltype(u)) - abstol !== nothing && (abstol = T(abstol)) - reltol !== nothing && (reltol = T(reltol)) + abstol = __get_tolerance(u, abstol, T) + reltol = __get_tolerance(u, reltol, T) tc_cache = init(du, u, tc; abstol, reltol) return DiffEqBase.get_abstol(tc_cache), DiffEqBase.get_reltol(tc_cache), tc_cache end @@ -370,3 +370,10 @@ end @inline __reshape(x::Number, args...) = x @inline __reshape(x::AbstractArray, args...) = reshape(x, args...) + +# Override cases which might be used in a kernel launch +__get_tolerance(x, η, ::Type{T}) where {T} = DiffEqBase._get_tolerance(η, T) +function __get_tolerance(x::Union{SArray, Number}, ::Nothing, ::Type{T}) where {T} + η = real(oneunit(T)) * (eps(real(one(T))))^(real(T)(0.8)) + return T(η) +end diff --git a/lib/SimpleNonlinearSolve/test/23_test_problems.jl b/lib/SimpleNonlinearSolve/test/23_test_problems.jl index bc82145a9..18ac5e9bd 100644 --- a/lib/SimpleNonlinearSolve/test/23_test_problems.jl +++ b/lib/SimpleNonlinearSolve/test/23_test_problems.jl @@ -70,12 +70,9 @@ end alg_ops = (SimpleBroyden(),) broken_tests = Dict(alg => Int[] for alg in alg_ops) - broken_tests[alg_ops[1]] = [1, 4, 5, 6, 11, 12, 13, 14] + broken_tests[alg_ops[1]] = [1, 5, 11] - skip_tests = Dict(alg => Int[] for alg in alg_ops) - skip_tests[alg_ops[1]] = [2, 22] - - test_on_library(problems, dicts, alg_ops, broken_tests; skip_tests) + test_on_library(problems, dicts, alg_ops, broken_tests) end @testset "SimpleKlement 23 Test Problems" begin diff --git a/lib/SimpleNonlinearSolve/test/Project.toml b/lib/SimpleNonlinearSolve/test/Project.toml index 4442a15a6..cbf18f66c 100644 --- a/lib/SimpleNonlinearSolve/test/Project.toml +++ b/lib/SimpleNonlinearSolve/test/Project.toml @@ -1,6 +1,5 @@ [deps] AllocCheck = "9b6a8646-10ed-4001-bbdc-1d2f46dfbb1a" -BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" DiffEqBase = "2b5f629d-d688-5b77-993f-72d75c75574e" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" diff --git a/lib/SimpleNonlinearSolve/test/basictests.jl b/lib/SimpleNonlinearSolve/test/basictests.jl index 7da944e7b..5938197c7 100644 --- a/lib/SimpleNonlinearSolve/test/basictests.jl +++ b/lib/SimpleNonlinearSolve/test/basictests.jl @@ -1,4 +1,4 @@ -using AllocCheck, BenchmarkTools, LinearSolve, SimpleNonlinearSolve, StaticArrays, Random, +using AllocCheck, LinearSolve, SimpleNonlinearSolve, StaticArrays, Random, LinearAlgebra, Test, ForwardDiff, DiffEqBase import PolyesterForwardDiff @@ -57,13 +57,6 @@ const TERMINATION_CONDITIONS = [ end end - @testset "Allocations: Static Array and Scalars" begin - @test (@ballocated $(benchmark_nlsolve_oop)($quadratic_f, $(@SVector[1.0, 1.0]), - 2.0; autodiff = AutoForwardDiff())) < 200 - @test (@ballocated $(benchmark_nlsolve_oop)($quadratic_f, 1.0, 2.0; - autodiff = AutoForwardDiff())) == 0 - end - @testset "Termination condition: $(termination_condition) u0: $(_nameof(u0))" for termination_condition in TERMINATION_CONDITIONS, u0 in (1.0, [1.0, 1.0], @SVector[1.0, 1.0]) @@ -89,11 +82,6 @@ end end end - @testset "Allocations: Static Array and Scalars" begin - @test (@ballocated $(benchmark_nlsolve_oop)($quadratic_f, 1.0, 2.0; - autodiff = AutoForwardDiff())) == 0 - end - @testset "Termination condition: $(termination_condition) u0: $(_nameof(u0))" for termination_condition in TERMINATION_CONDITIONS, u0 in (1.0, [1.0, 1.0], @SVector[1.0, 1.0]) @@ -105,7 +93,8 @@ end # --- SimpleBroyden / SimpleKlement / SimpleLimitedMemoryBroyden tests --- @testset "$(_nameof(alg))" for alg in [SimpleBroyden(), SimpleKlement(), SimpleDFSane(), - SimpleLimitedMemoryBroyden()] + SimpleLimitedMemoryBroyden(), SimpleBroyden(; linesearch = Val(true)), + SimpleLimitedMemoryBroyden(; linesearch = Val(true))] function benchmark_nlsolve_oop(f, u0, p = 2.0) prob = NonlinearProblem{false}(f, u0, p) return solve(prob, alg, abstol = 1e-9) @@ -128,13 +117,6 @@ end @test all(abs.(sol.u .* sol.u .- 2) .< 1e-9) end - @testset "Allocations: Static Array and Scalars" begin - @test (@ballocated $(benchmark_nlsolve_oop)($quadratic_f, $(@SVector[1.0, 1.0]), - 2.0)) < 200 - allocs = alg isa SimpleDFSane ? 144 : 0 - @test (@ballocated $(benchmark_nlsolve_oop)($quadratic_f, 1.0, 2.0)) == allocs - end - @testset "Termination condition: $(termination_condition) u0: $(_nameof(u0))" for termination_condition in TERMINATION_CONDITIONS, u0 in (1.0, [1.0, 1.0], @SVector[1.0, 1.0]) @@ -164,7 +146,8 @@ end ## SimpleDFSane needs to allocate a history vector @testset "Allocation Checks: $(_nameof(alg))" for alg in (SimpleNewtonRaphson(), SimpleHalley(), SimpleBroyden(), SimpleKlement(), SimpleLimitedMemoryBroyden(), - SimpleTrustRegion(), SimpleDFSane()) + SimpleTrustRegion(), SimpleDFSane(), SimpleBroyden(; linesearch = Val(true)), + SimpleLimitedMemoryBroyden(; linesearch = Val(true))) @check_allocs nlsolve(prob, alg) = SciMLBase.solve(prob, alg; abstol = 1e-9) nlprob_scalar = NonlinearProblem{false}(quadratic_f, 1.0, 2.0) @@ -175,8 +158,7 @@ end @test true catch e @error e - # History Vector Allocates - @test false broken=(alg isa SimpleDFSane) + @test false end # ForwardDiff allocates for hessian since we don't propagate the chunksize diff --git a/lib/SimpleNonlinearSolve/test/cuda.jl b/lib/SimpleNonlinearSolve/test/cuda.jl index 9b984bc70..fddf751de 100644 --- a/lib/SimpleNonlinearSolve/test/cuda.jl +++ b/lib/SimpleNonlinearSolve/test/cuda.jl @@ -7,7 +7,9 @@ f!(du, u, p) = du .= u .* u .- 2 @testset "Solving on GPUs" begin for alg in (SimpleNewtonRaphson(), SimpleDFSane(), SimpleTrustRegion(), SimpleBroyden(), - SimpleLimitedMemoryBroyden(), SimpleKlement(), SimpleHalley()) + SimpleLimitedMemoryBroyden(), SimpleKlement(), SimpleHalley(), + SimpleBroyden(; linesearch = Val(true)), + SimpleLimitedMemoryBroyden(; linesearch = Val(true))) @info "Testing $alg on CUDA" # Static Arrays @@ -35,7 +37,7 @@ f!(du, u, p) = du .= u .* u .- 2 end function kernel_function(prob, alg) - solve(prob, alg; abstol = 1.0f-6, reltol = 1.0f-6) + solve(prob, alg) return nothing end @@ -43,7 +45,9 @@ end prob = NonlinearProblem{false}(f, @SVector[1.0f0, 1.0f0]) for alg in (SimpleNewtonRaphson(), SimpleDFSane(), SimpleTrustRegion(), SimpleBroyden(), - SimpleLimitedMemoryBroyden(), SimpleKlement(), SimpleHalley()) + SimpleLimitedMemoryBroyden(), SimpleKlement(), SimpleHalley(), + SimpleBroyden(; linesearch = Val(true)), + SimpleLimitedMemoryBroyden(; linesearch = Val(true))) @test begin try @cuda kernel_function(prob, alg)