From 9c42b4fe063705269f0bc62a771d4a944ba217a8 Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Fri, 20 Oct 2023 13:24:26 -0400 Subject: [PATCH 1/9] Formatting --- docs/pages.jl | 3 +-- docs/src/tutorials/code_optimization.md | 18 +++++++++--------- docs/src/tutorials/large_systems.md | 10 +++++----- docs/src/tutorials/modelingtoolkit.md | 2 +- docs/src/tutorials/small_compile.md | 20 ++++++++++---------- 5 files changed, 26 insertions(+), 27 deletions(-) diff --git a/docs/pages.jl b/docs/pages.jl index 8564261bd..93ad7b391 100644 --- a/docs/pages.jl +++ b/docs/pages.jl @@ -2,8 +2,7 @@ pages = ["index.md", "Getting Started with Nonlinear Rootfinding in Julia" => "tutorials/getting_started.md", - "Tutorials" => Any[ - "Code Optimization for Small Nonlinear Systems" => "tutorials/code_optimization.md", + "Tutorials" => Any["Code Optimization for Small Nonlinear Systems" => "tutorials/code_optimization.md", "Handling Large Ill-Conditioned and Sparse Systems" => "tutorials/large_systems.md", "Symbolic System Definition and Acceleration via ModelingToolkit" => "tutorials/modelingtoolkit.md", "tutorials/small_compile.md", diff --git a/docs/src/tutorials/code_optimization.md b/docs/src/tutorials/code_optimization.md index daefcb855..cd4089d5d 100644 --- a/docs/src/tutorials/code_optimization.md +++ b/docs/src/tutorials/code_optimization.md @@ -34,7 +34,7 @@ Take for example a prototypical small nonlinear solver code in its out-of-place ```@example small_opt using NonlinearSolve -function f(u, p) +function f(u, p) u .* u .- p end u0 = [1.0, 1.0] @@ -54,7 +54,7 @@ using BenchmarkTools Note that this way of writing the function is a shorthand for: ```@example small_opt -function f(u, p) +function f(u, p) [u[1] * u[1] - p, u[2] * u[2] - p] end ``` @@ -69,25 +69,25 @@ In order to avoid this issue, we can use a non-allocating "in-place" approach. W this looks like: ```@example small_opt -function f(du, u, p) +function f(du, u, p) du[1] = u[1] * u[1] - p du[2] = u[2] * u[2] - p nothing end prob = NonlinearProblem(f, u0, p) -@btime sol = solve(prob, NewtonRaphson()) +@btime sol = solve(prob, NewtonRaphson()) ``` Notice how much faster this already runs! We can make this code even simpler by using the `.=` in-place broadcasting. ```@example small_opt -function f(du, u, p) +function f(du, u, p) du .= u .* u .- p end -@btime sol = solve(prob, NewtonRaphson()) +@btime sol = solve(prob, NewtonRaphson()) ``` ## Further Optimizations for Small Nonlinear Solves with Static Arrays and SimpleNonlinearSolve @@ -140,7 +140,7 @@ want to use the out-of-place allocating form, but this time we want to output a static array. Doing it with broadcasting looks like: ```@example small_opt -function f_SA(u, p) +function f_SA(u, p) u .* u .- p end u0 = SA[1.0, 1.0] @@ -153,7 +153,7 @@ Note that only change here is that `u0` is made into a StaticArray! If we needed for a more complex nonlinear case, then we'd simply do the following: ```@example small_opt -function f_SA(u, p) +function f_SA(u, p) SA[u[1] * u[1] - p, u[2] * u[2] - p] end @@ -170,4 +170,4 @@ which are designed for these small-scale static examples. Let's now use `SimpleN @btime solve(prob, SimpleNewtonRaphson()) ``` -And there we go, around 100ns from our starting point of almost 6μs! \ No newline at end of file +And there we go, around 100ns from our starting point of almost 6μs! diff --git a/docs/src/tutorials/large_systems.md b/docs/src/tutorials/large_systems.md index c1936932e..8e53d193c 100644 --- a/docs/src/tutorials/large_systems.md +++ b/docs/src/tutorials/large_systems.md @@ -1,8 +1,8 @@ # [Efficiently Solving Large Sparse Ill-Conditioned Nonlinear Systems in Julia](@id large_systems) -This tutorial is for getting into the extra features of using NonlinearSolve.jl. Solving ill-conditioned nonlinear systems -requires specializing the linear solver on properties of the Jacobian in order to cut down on the ``\mathcal{O}(n^3)`` -linear solve and the ``\mathcal{O}(n^2)`` back-solves. This tutorial is designed to explain the advanced usage of +This tutorial is for getting into the extra features of using NonlinearSolve.jl. Solving ill-conditioned nonlinear systems +requires specializing the linear solver on properties of the Jacobian in order to cut down on the ``\mathcal{O}(n^3)`` +linear solve and the ``\mathcal{O}(n^2)`` back-solves. This tutorial is designed to explain the advanced usage of NonlinearSolve.jl by solving the steady state stiff Brusselator partial differential equation (BRUSS) using NonlinearSolve.jl. ## Definition of the Brusselator Equation @@ -182,8 +182,8 @@ nothing # hide Notice that this acceleration does not require the definition of a sparsity pattern, and can thus be an easier way to scale for large problems. For more -information on linear solver choices, see the -[linear solver documentation](https://docs.sciml.ai/DiffEqDocs/stable/features/linear_nonlinear/#linear_nonlinear). +information on linear solver choices, see the +[linear solver documentation](https://docs.sciml.ai/DiffEqDocs/stable/features/linear_nonlinear/#linear_nonlinear). `linsolve` choices are any valid [LinearSolve.jl](https://linearsolve.sciml.ai/dev/) solver. !!! note diff --git a/docs/src/tutorials/modelingtoolkit.md b/docs/src/tutorials/modelingtoolkit.md index 7d26cb30c..3ee7d8e75 100644 --- a/docs/src/tutorials/modelingtoolkit.md +++ b/docs/src/tutorials/modelingtoolkit.md @@ -120,4 +120,4 @@ sol[u5] If you're interested in building models in a component or block based form, such as seen in systems like Simulink or Modelica, take a deeper look at [ModelingToolkit.jl's documentation](https://docs.sciml.ai/ModelingToolkit/stable/) which goes into -detail on such features. \ No newline at end of file +detail on such features. diff --git a/docs/src/tutorials/small_compile.md b/docs/src/tutorials/small_compile.md index d7d2c2311..4f4c56b14 100644 --- a/docs/src/tutorials/small_compile.md +++ b/docs/src/tutorials/small_compile.md @@ -19,18 +19,18 @@ sol = solve(prob, SimpleNewtonRaphson()) However, there are a few downsides to SimpleNonlinearSolve's `SimpleX` style algorithms to note: -1. SimpleNonlinearSolve.jl's methods are not hooked into the LinearSolve.jl system, and thus do not have - the ability to specify linear solvers, use sparse matrices, preconditioners, and all of the other features - which are required to scale for very large systems of equations. -2. SimpleNonlinearSolve.jl's methods have less robust error handling and termination conditions, and thus - these methods are missing some flexibility and give worse hints for debugging. -3. SimpleNonlinearSolve.jl's methods are focused on out-of-place support. There is some in-place support, - but it's designed for simple smaller systems and so some optimizations are missing. + 1. SimpleNonlinearSolve.jl's methods are not hooked into the LinearSolve.jl system, and thus do not have + the ability to specify linear solvers, use sparse matrices, preconditioners, and all of the other features + which are required to scale for very large systems of equations. + 2. SimpleNonlinearSolve.jl's methods have less robust error handling and termination conditions, and thus + these methods are missing some flexibility and give worse hints for debugging. + 3. SimpleNonlinearSolve.jl's methods are focused on out-of-place support. There is some in-place support, + but it's designed for simple smaller systems and so some optimizations are missing. However, the major upsides of SimpleNonlinearSolve.jl are: -1. The methods are optimized and non-allocating on StaticArrays -2. The methods are minimal in compilation + 1. The methods are optimized and non-allocating on StaticArrays + 2. The methods are minimal in compilation As such, you can use the code as shown above to have very low startup with good methods, but for more scaling and debuggability we recommend the full NonlinearSolve.jl. But that said, @@ -51,4 +51,4 @@ is not only sufficient but optimal. Julia has tools for building small binaries via static compilation with [StaticCompiler.jl](https://github.com/tshort/StaticCompiler.jl). However, these tools are currently limited to type-stable non-allocating functions. That said, SimpleNonlinearSolve.jl's solvers are -precisely the subset of NonlinearSolve.jl which are compatible with static compilation. \ No newline at end of file +precisely the subset of NonlinearSolve.jl which are compatible with static compilation. From 6b8305463a39d935adc286d2e17f495971457cd6 Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Fri, 20 Oct 2023 15:51:05 -0400 Subject: [PATCH 2/9] Implement IIP Version of Broyden --- src/NonlinearSolve.jl | 10 +++-- src/broyden.jl | 85 +++++++++++++++++++++++++++++++++++++++++++ src/default.jl | 2 +- src/klement.jl | 0 src/linesearch.jl | 3 +- 5 files changed, 95 insertions(+), 5 deletions(-) create mode 100644 src/broyden.jl create mode 100644 src/klement.jl diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl index 08e5a7e79..6a25ebde8 100644 --- a/src/NonlinearSolve.jl +++ b/src/NonlinearSolve.jl @@ -69,18 +69,21 @@ include("levenberg.jl") include("gaussnewton.jl") include("dfsane.jl") include("pseudotransient.jl") +include("broyden.jl") +include("klement.jl") include("jacobian.jl") include("ad.jl") include("default.jl") import PrecompileTools -@static if VERSION >= v"1.10" +@static if VERSION ≥ v"1.10-" PrecompileTools.@compile_workload begin for T in (Float32, Float64) prob = NonlinearProblem{false}((u, p) -> u .* u .- p, T(0.1), T(2)) - precompile_algs = (NewtonRaphson(), TrustRegion(), LevenbergMarquardt()) + precompile_algs = (NewtonRaphson(), TrustRegion(), LevenbergMarquardt(), + nothing) for alg in precompile_algs solve(prob, alg, abstol = T(1e-2)) @@ -97,7 +100,8 @@ end export RadiusUpdateSchemes -export NewtonRaphson, TrustRegion, LevenbergMarquardt, DFSane, GaussNewton, PseudoTransient +export NewtonRaphson, TrustRegion, LevenbergMarquardt, DFSane, GaussNewton, PseudoTransient, + GeneralBroyden, GeneralKlement export LeastSquaresOptimJL, FastLevenbergMarquardtJL export RobustMultiNewton, FastShortcutNonlinearPolyalg diff --git a/src/broyden.jl b/src/broyden.jl new file mode 100644 index 000000000..6daf9ef1e --- /dev/null +++ b/src/broyden.jl @@ -0,0 +1,85 @@ +# Sadly `Broyden` is taken up by SimpleNonlinearSolve.jl +""" + GeneralBroyden(max_resets) + GeneralBroyden(; max_resets = 3) + +An implementation of `Broyden` with support for caching! + +## Arguments + + - `max_resets`: the maximum number of resets to perform. Defaults to `3`. +""" +struct GeneralBroyden <: AbstractNewtonAlgorithm{false, Nothing} + max_resets::Int +end + +GeneralBroyden(; max_resets = 3) = GeneralBroyden(max_resets) + +@concrete mutable struct GeneralBroydenCache{iip} <: AbstractNonlinearSolveCache{iip} + f + alg + u + du + fu + fu2 + dfu + p + J⁻¹ + J⁻¹₂ + J⁻¹df + force_stop::Bool + resets::Int + max_rests::Int + maxiters::Int + internalnorm + retcode::ReturnCode.T + abstol + prob + stats::NLStats +end + +get_fu(cache::GeneralBroydenCache) = cache.fu + +function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::GeneralBroyden, args...; + alias_u0 = false, maxiters = 1000, abstol = 1e-6, internalnorm = DEFAULT_NORM, + kwargs...) where {uType, iip} + @unpack f, u0, p = prob + u = alias_u0 ? u0 : deepcopy(u0) + fu = evaluate_f(prob, u) + J⁻¹ = convert(parameterless_type(_mutable(u)), + Matrix{eltype(u)}(I, length(fu), length(u))) + return GeneralBroydenCache{iip}(f, alg, u, _mutable_zero(u), fu, similar(fu), + similar(fu), p, J⁻¹, similar(fu'), _mutable_zero(u), false, 0, alg.max_resets, + maxiters, internalnorm, ReturnCode.Default, abstol, prob, NLStats(1, 0, 0, 0, 0)) +end + +function perform_step!(cache::GeneralBroydenCache{true}) + @unpack f, p, du, fu, fu2, dfu, u, J⁻¹, J⁻¹df, J⁻¹₂ = cache + T = eltype(u) + + mul!(du, J⁻¹, -fu) + u .+= du + f(fu2, u, p) + + cache.internalnorm(fu2) < cache.abstol && (cache.force_stop = true) + cache.stats.nf += 1 + + cache.force_stop && return nothing + + # Update the inverse jacobian + dfu .= fu2 .- fu + if cache.resets < cache.max_rests && + (all(x -> abs(x) ≤ 1e-12, du) || all(x -> abs(x) ≤ 1e-12, dfu)) + fill!(J⁻¹, 0) + J⁻¹[diagind(J⁻¹)] .= T(1) + cache.resets += 1 + else + mul!(J⁻¹df, J⁻¹, dfu) + mul!(J⁻¹₂, du', J⁻¹) + du .= (du .- J⁻¹df) ./ (dot(du, J⁻¹df) .+ T(1e-5)) + mul!(J⁻¹, reshape(du, :, 1), J⁻¹₂, 1, 1) + end + fu .= fu2 + + return nothing +end diff --git a/src/default.jl b/src/default.jl index 913f994fe..76289cc1c 100644 --- a/src/default.jl +++ b/src/default.jl @@ -162,7 +162,7 @@ end # FIXME: Broyden and Klement are type unstable # (upstream SimpleNonlinearSolve.jl issue) !iip ? :(Klement()) : nothing, # Klement not yet implemented for IIP - !iip ? :(Broyden()) : nothing, # Broyden not yet implemented for IIP + :(GeneralBroyden()), :(NewtonRaphson(; linsolve, precs, adkwargs...)), :(NewtonRaphson(; linsolve, precs, linesearch = BackTracking(), adkwargs...)), :(TrustRegion(; linsolve, precs, adkwargs...)), diff --git a/src/klement.jl b/src/klement.jl new file mode 100644 index 000000000..e69de29bb diff --git a/src/linesearch.jl b/src/linesearch.jl index 30861e14b..1ef36206a 100644 --- a/src/linesearch.jl +++ b/src/linesearch.jl @@ -92,7 +92,8 @@ function LineSearchCache(ls::LineSearch, f, u, p, fu1, IIP::Val{iip}) where {iip g₀ = _mutable_zero(u) autodiff = if iip && (ls.autodiff isa AutoZygote || ls.autodiff isa AutoSparseZygote) - @warn "Attempting to use Zygote.jl for linesearch on an in-place problem. Falling back to finite differencing." + @warn "Attempting to use Zygote.jl for linesearch on an in-place problem. Falling \ + back to finite differencing." AutoFiniteDiff() else ls.autodiff From c6992a58674ce620fc1fab8c4c147ca996ffa382 Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Fri, 20 Oct 2023 18:20:46 -0400 Subject: [PATCH 3/9] Broyden with LineSearch --- src/NonlinearSolve.jl | 4 +- src/broyden.jl | 91 +++++++++++++++++++++---- src/klement.jl | 1 + src/linesearch.jl | 143 ++++++++++++++++++++++++++++++++++++--- src/raphson.jl | 5 +- src/utils.jl | 14 +++- test/23_test_problems.jl | 41 ++++++++--- test/basictests.jl | 90 ++++++++++++++++++++++++ 8 files changed, 352 insertions(+), 37 deletions(-) diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl index 6a25ebde8..5e21cdf44 100644 --- a/src/NonlinearSolve.jl +++ b/src/NonlinearSolve.jl @@ -26,6 +26,8 @@ import UnPack: @unpack const AbstractSparseADType = Union{ADTypes.AbstractSparseFiniteDifferences, ADTypes.AbstractSparseForwardMode, ADTypes.AbstractSparseReverseMode} +abstract type AbstractNonlinearSolveLineSearchAlgorithm end + abstract type AbstractNonlinearSolveAlgorithm <: AbstractNonlinearAlgorithm end abstract type AbstractNewtonAlgorithm{CJ, AD} <: AbstractNonlinearSolveAlgorithm end @@ -105,6 +107,6 @@ export NewtonRaphson, TrustRegion, LevenbergMarquardt, DFSane, GaussNewton, Pseu export LeastSquaresOptimJL, FastLevenbergMarquardtJL export RobustMultiNewton, FastShortcutNonlinearPolyalg -export LineSearch +export LineSearch, LiFukushimaLineSearch end # module diff --git a/src/broyden.jl b/src/broyden.jl index 6daf9ef1e..121a2ca71 100644 --- a/src/broyden.jl +++ b/src/broyden.jl @@ -1,19 +1,28 @@ # Sadly `Broyden` is taken up by SimpleNonlinearSolve.jl """ - GeneralBroyden(max_resets) - GeneralBroyden(; max_resets = 3) + GeneralBroyden(max_resets, linesearch) + GeneralBroyden(; max_resets = 3, linesearch = LineSearch()) -An implementation of `Broyden` with support for caching! +An implementation of `Broyden` with reseting and line search. ## Arguments - `max_resets`: the maximum number of resets to perform. Defaults to `3`. + - `linesearch`: the line search algorithm to use. Defaults to [`LineSearch()`](@ref), + which means that no line search is performed. Algorithms from `LineSearches.jl` can be + used here directly, and they will be converted to the correct `LineSearch`. It is + recommended to use [LiFukushimaLineSearchCache](@ref) -- a derivative free linesearch + specifically designed for Broyden's method. """ -struct GeneralBroyden <: AbstractNewtonAlgorithm{false, Nothing} +@concrete struct GeneralBroyden <: AbstractNewtonAlgorithm{false, Nothing} max_resets::Int + linesearch end -GeneralBroyden(; max_resets = 3) = GeneralBroyden(max_resets) +function GeneralBroyden(; max_resets = 3, linesearch = LineSearch()) + linesearch = linesearch isa LineSearch ? linesearch : LineSearch(; method = linesearch) + return GeneralBroyden(max_resets, linesearch) +end @concrete mutable struct GeneralBroydenCache{iip} <: AbstractNonlinearSolveCache{iip} f @@ -29,13 +38,14 @@ GeneralBroyden(; max_resets = 3) = GeneralBroyden(max_resets) J⁻¹df force_stop::Bool resets::Int - max_rests::Int + max_resets::Int maxiters::Int internalnorm retcode::ReturnCode.T abstol prob stats::NLStats + lscache end get_fu(cache::GeneralBroydenCache) = cache.fu @@ -46,11 +56,11 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::GeneralBroyde @unpack f, u0, p = prob u = alias_u0 ? u0 : deepcopy(u0) fu = evaluate_f(prob, u) - J⁻¹ = convert(parameterless_type(_mutable(u)), - Matrix{eltype(u)}(I, length(fu), length(u))) - return GeneralBroydenCache{iip}(f, alg, u, _mutable_zero(u), fu, similar(fu), - similar(fu), p, J⁻¹, similar(fu'), _mutable_zero(u), false, 0, alg.max_resets, - maxiters, internalnorm, ReturnCode.Default, abstol, prob, NLStats(1, 0, 0, 0, 0)) + J⁻¹ = __init_identity_jacobian(u, fu) + return GeneralBroydenCache{iip}(f, alg, u, _mutable_zero(u), fu, zero(fu), + zero(fu), p, J⁻¹, zero(fu'), _mutable_zero(u), false, 0, alg.max_resets, + maxiters, internalnorm, ReturnCode.Default, abstol, prob, NLStats(1, 0, 0, 0, 0), + init_linesearch_cache(alg.linesearch, f, u, p, fu, Val(iip))) end function perform_step!(cache::GeneralBroydenCache{true}) @@ -58,7 +68,8 @@ function perform_step!(cache::GeneralBroydenCache{true}) T = eltype(u) mul!(du, J⁻¹, -fu) - u .+= du + α = perform_linesearch!(cache.lscache, u, du) + axpy!(α, du, u) f(fu2, u, p) cache.internalnorm(fu2) < cache.abstol && (cache.force_stop = true) @@ -68,7 +79,7 @@ function perform_step!(cache::GeneralBroydenCache{true}) # Update the inverse jacobian dfu .= fu2 .- fu - if cache.resets < cache.max_rests && + if cache.resets < cache.max_resets && (all(x -> abs(x) ≤ 1e-12, du) || all(x -> abs(x) ≤ 1e-12, dfu)) fill!(J⁻¹, 0) J⁻¹[diagind(J⁻¹)] .= T(1) @@ -83,3 +94,57 @@ function perform_step!(cache::GeneralBroydenCache{true}) return nothing end + +function perform_step!(cache::GeneralBroydenCache{false}) + @unpack f, p = cache + T = eltype(cache.u) + + cache.du = cache.J⁻¹ * -cache.fu + α = perform_linesearch!(cache.lscache, cache.u, cache.du) + cache.u = cache.u .+ α * cache.du + cache.fu2 = f(cache.u, p) + + cache.internalnorm(cache.fu2) < cache.abstol && (cache.force_stop = true) + cache.stats.nf += 1 + + cache.force_stop && return nothing + + # Update the inverse jacobian + cache.dfu = cache.fu2 .- cache.fu + if cache.resets < cache.max_resets && + (all(x -> abs(x) ≤ 1e-12, cache.du) || all(x -> abs(x) ≤ 1e-12, cache.dfu)) + J⁻¹ = similar(cache.J⁻¹) + fill!(J⁻¹, 0) + J⁻¹[diagind(J⁻¹)] .= T(1) + cache.J⁻¹ = J⁻¹ + cache.resets += 1 + else + cache.J⁻¹df = cache.J⁻¹ * cache.dfu + cache.J⁻¹₂ = cache.du' * cache.J⁻¹ + cache.du = (cache.du .- cache.J⁻¹df) ./ (dot(cache.du, cache.J⁻¹df) .+ T(1e-5)) + cache.J⁻¹ = cache.J⁻¹ .+ cache.du * cache.J⁻¹₂ + end + cache.fu = cache.fu2 + + return nothing +end + +function SciMLBase.reinit!(cache::GeneralBroydenCache{iip}, u0 = cache.u; p = cache.p, + abstol = cache.abstol, maxiters = cache.maxiters) where {iip} + cache.p = p + if iip + recursivecopy!(cache.u, u0) + cache.f(cache.fu, cache.u, p) + else + # don't have alias_u0 but cache.u is never mutated for OOP problems so it doesn't matter + cache.u = u0 + cache.fu = cache.f(cache.u, p) + end + cache.abstol = abstol + cache.maxiters = maxiters + cache.stats.nf = 1 + cache.stats.nsteps = 1 + cache.force_stop = false + cache.retcode = ReturnCode.Default + return cache +end diff --git a/src/klement.jl b/src/klement.jl index e69de29bb..8b1378917 100644 --- a/src/klement.jl +++ b/src/klement.jl @@ -0,0 +1 @@ + diff --git a/src/linesearch.jl b/src/linesearch.jl index 1ef36206a..1f63d695c 100644 --- a/src/linesearch.jl +++ b/src/linesearch.jl @@ -26,7 +26,15 @@ function LineSearch(; method = Static(), autodiff = AutoFiniteDiff(), alpha = tr return LineSearch(method, autodiff, alpha) end -@concrete mutable struct LineSearchCache +@inline function init_linesearch_cache(ls::LineSearch, args...) + return init_linesearch_cache(ls.method, ls, args...) +end + +# LineSearches.jl doesn't have a supertype so default to that +init_linesearch_cache(_, ls, f, u, p, fu, iip) = LineSearchesJLCache(ls, f, u, p, fu, iip) + +# Wrapper over LineSearches.jl algorithms +@concrete mutable struct LineSearchesJLCache f ϕ dϕ @@ -35,11 +43,11 @@ end ls end -function LineSearchCache(ls::LineSearch, f, u::Number, p, _, ::Val{false}) +function LineSearchesJLCache(ls::LineSearch, f, u::Number, p, _, ::Val{false}) eval_f(u, du, α) = eval_f(u - α * du) eval_f(u) = f(u, p) - ls.method isa Static && return LineSearchCache(eval_f, nothing, nothing, nothing, + ls.method isa Static && return LineSearchesJLCache(eval_f, nothing, nothing, nothing, convert(typeof(u), ls.α), ls) g(u, fu) = last(value_derivative(Base.Fix2(f, p), u)) * fu @@ -73,11 +81,11 @@ function LineSearchCache(ls::LineSearch, f, u::Number, p, _, ::Val{false}) return ϕdϕ_internal end - return LineSearchCache(eval_f, ϕ, dϕ, ϕdϕ, convert(eltype(u), ls.α), ls) + return LineSearchesJLCache(eval_f, ϕ, dϕ, ϕdϕ, convert(eltype(u), ls.α), ls) end -function LineSearchCache(ls::LineSearch, f, u, p, fu1, IIP::Val{iip}) where {iip} - fu = iip ? fu1 : nothing +function LineSearchesJLCache(ls::LineSearch, f, u, p, fu1, IIP::Val{iip}) where {iip} + fu = iip ? deepcopy(fu1) : nothing u_ = _mutable_zero(u) function eval_f(u, du, α) @@ -86,7 +94,7 @@ function LineSearchCache(ls::LineSearch, f, u, p, fu1, IIP::Val{iip}) where {iip end eval_f(u) = evaluate_f(f, u, p, IIP; fu) - ls.method isa Static && return LineSearchCache(eval_f, nothing, nothing, nothing, + ls.method isa Static && return LineSearchesJLCache(eval_f, nothing, nothing, nothing, convert(eltype(u), ls.α), ls) g₀ = _mutable_zero(u) @@ -138,10 +146,10 @@ function LineSearchCache(ls::LineSearch, f, u, p, fu1, IIP::Val{iip}) where {iip return ϕdϕ_internal end - return LineSearchCache(eval_f, ϕ, dϕ, ϕdϕ, convert(eltype(u), ls.α), ls) + return LineSearchesJLCache(eval_f, ϕ, dϕ, ϕdϕ, convert(eltype(u), ls.α), ls) end -function perform_linesearch!(cache::LineSearchCache, u, du) +function perform_linesearch!(cache::LineSearchesJLCache, u, du) cache.ls.method isa Static && return cache.α ϕ = cache.ϕ(u, du) @@ -155,3 +163,120 @@ function perform_linesearch!(cache::LineSearchCache, u, du) return first(cache.ls.method(ϕ, cache.dϕ(u, du), cache.ϕdϕ(u, du), cache.α, ϕ₀, dϕ₀)) end + +""" + LiFukushimaLineSearch(; lambda_0 = 1.0, beta = 0.5, sigma_1 = 0.001, + eta = 0.1, nan_max_iter = 5, maxiters = 50) + +A derivative-free line search and global convergence of Broyden-like method for nonlinear +equations by Dong-Hui Li & Masao Fukushima. For more details see +https://doi.org/10.1080/10556780008805782 +""" +struct LiFukushimaLineSearch{T} <: AbstractNonlinearSolveLineSearchAlgorithm + λ₀::T + β::T + σ₁::T + σ₂::T + η::T + ρ::T + nan_max_iter::Int + maxiters::Int +end + +function LiFukushimaLineSearch(; lambda_0 = 1.0, beta = 0.1, sigma_1 = 0.001, + sigma_2 = 0.001, eta = 0.1, rho = 0.9, nan_max_iter = 5, maxiters = 50) + T = promote_type(typeof(lambda_0), typeof(beta), typeof(sigma_1), typeof(eta), + typeof(rho), typeof(sigma_2)) + return LiFukushimaLineSearch{T}(lambda_0, beta, sigma_1, sigma_2, eta, rho, + nan_max_iter, maxiters) +end + +@concrete mutable struct LiFukushimaLineSearchCache{iip} + f + p + u_cache + fu_cache + alg + α +end + +function init_linesearch_cache(alg::LiFukushimaLineSearch, ls::LineSearch, f, _u, p, _fu, + ::Val{iip}) where {iip} + fu = iip ? deepcopy(_fu) : nothing + u = iip ? deepcopy(_u) : nothing + return LiFukushimaLineSearchCache{iip}(f, p, u, fu, alg, ls.α) +end + +function perform_linesearch!(cache::LiFukushimaLineSearchCache{iip}, u, du) where {iip} + (; β, σ₁, σ₂, η, λ₀, ρ, nan_max_iter, maxiters) = cache.alg + λ₂ = λ₀ + λ₁ = λ₂ + + if iip + cache.f(cache.fu_cache, u, cache.p) + fx_norm = norm(cache.fu_cache, 2) + else + fx_norm = norm(cache.f(u, cache.p), 2) + end + + # Non-Blocking exit if the norm is NaN or Inf + !isfinite(fx_norm) && return cache.α + + # Early Terminate based on Eq. 2.7 + if iip + cache.u_cache .= u .+ du + cache.f(cache.fu_cache, cache.u_cache, cache.p) + fxλ_norm = norm(cache.fu_cache, 2) + else + fxλ_norm = norm(cache.f(u .+ du, cache.p), 2) + end + + fxλ_norm ≤ ρ * fx_norm - σ₂ * norm(du, 2)^2 && return cache.α + + if iip + cache.u_cache .= u .+ λ₂ .* du + cache.f(cache.fu_cache, cache.u_cache, cache.p) + fxλp_norm = norm(cache.fu_cache, 2) + else + fxλp_norm = norm(cache.f(u .+ λ₂ .* du, cache.p), 2) + end + + if !isfinite(fxλp_norm) + # Backtrack a finite number of steps + nan_converged = false + for _ in 1:nan_max_iter + λ₁, λ₂ = λ₂, β * λ₂ + + if iip + cache.u_cache .= u .+ λ₂ .* du + cache.f(cache.fu_cache, cache.u_cache, cache.p) + fxλp_norm = norm(cache.fu_cache, 2) + else + fxλp_norm = norm(cache.f(u .+ λ₂ .* du, cache.p), 2) + end + + nan_converged = isfinite(fxλp_norm) + nan_converged && break + end + + # Non-Blocking exit if the norm is still NaN or Inf + !nan_converged && return cache.α + end + + for _ in 1:maxiters + if iip + cache.u_cache .= u .+ λ₂ .* du + cache.f(cache.fu_cache, cache.u_cache, cache.p) + fxλp_norm = norm(cache.fu_cache, 2) + else + fxλp_norm = norm(cache.f(u .+ λ₂ .* du, cache.p), 2) + end + + converged = fxλp_norm ≤ (1 + η) * fx_norm - σ₁ * λ₂^2 * norm(du, 2)^2 + + converged && break + λ₁, λ₂ = λ₂, β * λ₂ + end + + return λ₂ +end diff --git a/src/raphson.jl b/src/raphson.jl index 642c7ee36..a3d9bbc6d 100644 --- a/src/raphson.jl +++ b/src/raphson.jl @@ -82,7 +82,8 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg_::NewtonRaphso return NewtonRaphsonCache{iip}(f, alg, u, fu1, fu2, du, p, uf, linsolve, J, jac_cache, false, maxiters, internalnorm, ReturnCode.Default, abstol, prob, - NLStats(1, 0, 0, 0, 0), LineSearchCache(alg.linesearch, f, u, p, fu1, Val(iip))) + NLStats(1, 0, 0, 0, 0), + init_linesearch_cache(alg.linesearch, f, u, p, fu1, Val(iip))) end function perform_step!(cache::NewtonRaphsonCache{true}) @@ -96,7 +97,7 @@ function perform_step!(cache::NewtonRaphsonCache{true}) # Line Search α = perform_linesearch!(cache.lscache, u, du) - @. u = u - α * du + axpy!(α, du, u) f(cache.fu1, u, p) cache.internalnorm(fu1) < cache.abstol && (cache.force_stop = true) diff --git a/src/utils.jl b/src/utils.jl index fb8c59850..d33dc292d 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -35,8 +35,8 @@ function default_adargs_to_adtype(; chunk_size = missing, autodiff = nothing, if chunk_size !== missing || standardtag !== missing || diff_type !== missing || autodiff !== missing Base.depwarn("`chunk_size`, `standardtag`, `diff_type`, \ - `autodiff::Union{Val, Bool}` kwargs have been deprecated and will be removed in\ - v3. Update your code to directly specify autodiff=", + `autodiff::Union{Val, Bool}` kwargs have been deprecated and will be removed \ + in v3. Update your code to directly specify autodiff=", :default_adargs_to_adtype) end chunk_size === missing && (chunk_size = Val{0}()) @@ -211,3 +211,13 @@ function __get_concrete_algorithm(alg, prob) end return set_ad(alg, ad) end + +__init_identity_jacobian(u::Number, _) = u +function __init_identity_jacobian(u, fu) + return convert(parameterless_type(_mutable(u)), + Matrix{eltype(u)}(I, length(fu), length(u))) +end +function __init_identity_jacobian(u::StaticArray, fu) + return convert(MArray{Tuple{length(fu), length(u)}}, + Matrix{eltype(u)}(I, length(fu), length(u))) +end \ No newline at end of file diff --git a/test/23_test_problems.jl b/test/23_test_problems.jl index 5e0df8352..8c739c085 100644 --- a/test/23_test_problems.jl +++ b/test/23_test_problems.jl @@ -8,19 +8,28 @@ function test_on_library(problems, dicts, alg_ops, broken_tests, ϵ = 1e-4) x = dict["start"] res = similar(x) nlprob = NonlinearProblem(problem, x) - @testset "$(dict["title"])" begin + @testset "$idx: $(dict["title"])" begin for alg in alg_ops - sol = solve(nlprob, alg, abstol = 1e-18, reltol = 1e-18) - problem(res, sol.u, nothing) - broken = idx in broken_tests[alg] ? true : false - @test norm(res)≤ϵ broken=broken + try + sol = solve(nlprob, alg, abstol = 1e-18, reltol = 1e-18) + problem(res, sol.u, nothing) + broken = idx in broken_tests[alg] ? true : false + @test norm(res)≤ϵ broken=broken + catch + broken = idx in broken_tests[alg] ? true : false + if broken + @test false broken=true + else + @test 1 == 2 + end + end end end end end # NewtonRaphson -@testset "NewtonRaphson test problem library" begin +@testset "NewtonRaphson 23 Test Problems" begin alg_ops = (NewtonRaphson(),) # dictionary with indices of test problems where method does not converge to small residual @@ -30,7 +39,7 @@ end test_on_library(problems, dicts, alg_ops, broken_tests) end -@testset "TrustRegion test problem library" begin +@testset "TrustRegion 23 Test Problems" begin alg_ops = (TrustRegion(; radius_update_scheme = RadiusUpdateSchemes.Simple), TrustRegion(; radius_update_scheme = RadiusUpdateSchemes.Fan), TrustRegion(; radius_update_scheme = RadiusUpdateSchemes.Hei), @@ -50,7 +59,7 @@ end test_on_library(problems, dicts, alg_ops, broken_tests) end -@testset "TrustRegion test problem library" begin +@testset "LevenbergMarquardt 23 Test Problems" begin alg_ops = (LevenbergMarquardt(; linsolve = NormalCholeskyFactorization()), LevenbergMarquardt(; α_geodesic = 0.1, linsolve = NormalCholeskyFactorization())) @@ -62,8 +71,7 @@ end test_on_library(problems, dicts, alg_ops, broken_tests) end -# DFSane -@testset "DFSane test problem library" begin +@testset "DFSane 23 Test Problems" begin alg_ops = (DFSane(),) broken_tests = Dict(alg => Int[] for alg in alg_ops) @@ -71,3 +79,16 @@ end test_on_library(problems, dicts, alg_ops, broken_tests) end + +@testset "GeneralBroyden 23 Test Problems" begin + alg_ops = (GeneralBroyden(), + GeneralBroyden(; linesearch = LiFukushimaLineSearch(; beta = 0.1)), + GeneralBroyden(; linesearch = BackTracking())) + + broken_tests = Dict(alg => Int[] for alg in alg_ops) + broken_tests[alg_ops[1]] = [1, 3, 4, 5, 6, 8, 11, 12, 13, 14, 21] + broken_tests[alg_ops[2]] = [1, 2, 3, 4, 5, 6, 9, 11, 13, 22] + broken_tests[alg_ops[3]] = [1, 2, 4, 5, 6, 8, 11, 12, 13, 14, 21] + + test_on_library(problems, dicts, alg_ops, broken_tests) +end diff --git a/test/basictests.jl b/test/basictests.jl index f20417785..22ba9ab25 100644 --- a/test/basictests.jl +++ b/test/basictests.jl @@ -664,3 +664,93 @@ end @test all(abs.(newton_fails(sol.u, p)) .< 1e-10) end end + +# --- GeneralBroyden tests --- + +@testset "GeneralBroyden" begin + function benchmark_nlsolve_oop(f, u0, p = 2.0; linesearch = LineSearch()) + prob = NonlinearProblem{false}(f, u0, p) + return solve(prob, GeneralBroyden(; linesearch), abstol = 1e-9) + end + + function benchmark_nlsolve_iip(f, u0, p = 2.0; linesearch = LineSearch()) + prob = NonlinearProblem{true}(f, u0, p) + return solve(prob, GeneralBroyden(; linesearch), abstol = 1e-9) + end + + @testset "LineSearch: $(_nameof(lsmethod)) LineSearch AD: $(_nameof(ad))" for lsmethod in (Static(), + StrongWolfe(), BackTracking(), HagerZhang(), MoreThuente(), + LiFukushimaLineSearch()), + ad in (AutoFiniteDiff(), AutoZygote()) + + linesearch = LineSearch(; method = lsmethod, autodiff = ad) + u0s = ([1.0, 1.0], @SVector[1.0, 1.0], 1.0) + + @testset "[OOP] u0: $(typeof(u0))" for u0 in u0s + sol = benchmark_nlsolve_oop(quadratic_f, u0; linesearch) + @test SciMLBase.successful_retcode(sol) + @test all(abs.(sol.u .* sol.u .- 2) .< 1e-9) + + cache = init(NonlinearProblem{false}(quadratic_f, u0, 2.0), + GeneralBroyden(; linesearch), abstol = 1e-9) + @test (@ballocated solve!($cache)) < 200 + end + + @testset "[IIP] u0: $(typeof(u0))" for u0 in ([1.0, 1.0],) + ad isa AutoZygote && continue + sol = benchmark_nlsolve_iip(quadratic_f!, u0; linesearch) + @test SciMLBase.successful_retcode(sol) + @test all(abs.(sol.u .* sol.u .- 2) .< 1e-9) + + cache = init(NonlinearProblem{true}(quadratic_f!, u0, 2.0), + GeneralBroyden(; linesearch), abstol = 1e-9) + @test (@ballocated solve!($cache)) ≤ 64 + end + end + + @testset "[OOP] [Immutable AD]" begin + for p in 1.0:0.1:100.0 + @test begin + res = benchmark_nlsolve_oop(quadratic_f, @SVector[1.0, 1.0], p) + res_true = sqrt(p) + all(res.u .≈ res_true) + end + @test ForwardDiff.derivative(p -> benchmark_nlsolve_oop(quadratic_f, + @SVector[1.0, 1.0], p).u[end], p) ≈ 1 / (2 * sqrt(p)) + end + end + + @testset "[OOP] [Scalar AD]" begin + for p in 1.0:0.1:100.0 + @test begin + res = benchmark_nlsolve_oop(quadratic_f, 1.0, p) + res_true = sqrt(p) + res.u ≈ res_true + end + @test ForwardDiff.derivative(p -> benchmark_nlsolve_oop(quadratic_f, 1.0, p).u, + p) ≈ 1 / (2 * sqrt(p)) + end + end + + t = (p) -> [sqrt(p[2] / p[1])] + p = [0.9, 50.0] + @test benchmark_nlsolve_oop(quadratic_f2, 0.5, p).u ≈ sqrt(p[2] / p[1]) + @test ForwardDiff.jacobian(p -> [benchmark_nlsolve_oop(quadratic_f2, 0.5, p).u], + p) ≈ ForwardDiff.jacobian(t, p) + + # Iterator interface + function nlprob_iterator_interface(f, p_range, ::Val{iip}) where {iip} + probN = NonlinearProblem{iip}(f, iip ? [0.5] : 0.5, p_range[begin]) + cache = init(probN, GeneralBroyden(); maxiters = 100, abstol = 1e-10) + sols = zeros(length(p_range)) + for (i, p) in enumerate(p_range) + reinit!(cache, iip ? [cache.u[1]] : cache.u; p = p) + sol = solve!(cache) + sols[i] = iip ? sol.u[1] : sol.u + end + return sols + end + p = range(0.01, 2, length = 200) + @test nlprob_iterator_interface(quadratic_f, p, Val(false)) ≈ sqrt.(p) + @test nlprob_iterator_interface(quadratic_f!, p, Val(true)) ≈ sqrt.(p) +end From 4c0f62872800fd7d4fd1c0f6167835f786d86ace Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Fri, 20 Oct 2023 18:27:35 -0400 Subject: [PATCH 4/9] Add in documentation --- docs/pages.jl | 3 ++- docs/src/solvers/LineSearch.md | 14 ++++++++++++++ docs/src/solvers/NonlinearSystemSolvers.md | 2 ++ 3 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 docs/src/solvers/LineSearch.md diff --git a/docs/pages.jl b/docs/pages.jl index 93ad7b391..f1b239bc9 100644 --- a/docs/pages.jl +++ b/docs/pages.jl @@ -17,7 +17,8 @@ pages = ["index.md", "Solver Summaries and Recommendations" => Any["solvers/NonlinearSystemSolvers.md", "solvers/BracketingSolvers.md", "solvers/SteadyStateSolvers.md", - "solvers/NonlinearLeastSquaresSolvers.md"], + "solvers/NonlinearLeastSquaresSolvers.md", + "solvers/LineSearch.md"], "Detailed Solver APIs" => Any["api/nonlinearsolve.md", "api/simplenonlinearsolve.md", "api/minpack.md", diff --git a/docs/src/solvers/LineSearch.md b/docs/src/solvers/LineSearch.md new file mode 100644 index 000000000..5d09301e2 --- /dev/null +++ b/docs/src/solvers/LineSearch.md @@ -0,0 +1,14 @@ +# [Line Search](@id linesearch) + +A convenience wrapper over `LineSearches.jl` and some native Line Search methods, powered +internally with fast automatic differentiation. + +```@docs +LineSearch +``` + +## Native Line Search Methods + +```@docs +LiFukushimaLineSearch +``` diff --git a/docs/src/solvers/NonlinearSystemSolvers.md b/docs/src/solvers/NonlinearSystemSolvers.md index de2d89a4e..a39ab597e 100644 --- a/docs/src/solvers/NonlinearSystemSolvers.md +++ b/docs/src/solvers/NonlinearSystemSolvers.md @@ -65,6 +65,8 @@ features, but have a bit of overhead on very small problems. - `FastShortcutNonlinearPolyalg()`: The default method. A polyalgorithm that mixes fast methods with fallbacks to robust methods to allow for solving easy problems quickly without sacrificing robustnes on the hard problems. + - `GeneralBroyden()`: Generalization of Broyden's Quasi-Newton Method with Line Search and + Automatic Jacobian Resetting. This is a fast method but unstable for most problems! ### SimpleNonlinearSolve.jl From 1e4cfde6d7aac4439f77893ce11668df2b57cc69 Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Fri, 20 Oct 2023 18:41:50 -0400 Subject: [PATCH 5/9] Broyden supports matrices --- src/broyden.jl | 18 +++++++++--------- src/levenberg.jl | 8 +++++--- src/pseudotransient.jl | 15 +++------------ src/raphson.jl | 2 +- src/trustRegion.jl | 3 ++- src/utils.jl | 6 +++--- test/matrix_resizing.jl | 12 +++++++----- 7 files changed, 30 insertions(+), 34 deletions(-) diff --git a/src/broyden.jl b/src/broyden.jl index 121a2ca71..1f8bc73bc 100644 --- a/src/broyden.jl +++ b/src/broyden.jl @@ -58,7 +58,7 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::GeneralBroyde fu = evaluate_f(prob, u) J⁻¹ = __init_identity_jacobian(u, fu) return GeneralBroydenCache{iip}(f, alg, u, _mutable_zero(u), fu, zero(fu), - zero(fu), p, J⁻¹, zero(fu'), _mutable_zero(u), false, 0, alg.max_resets, + zero(fu), p, J⁻¹, zero(_vec(fu)'), _mutable_zero(u), false, 0, alg.max_resets, maxiters, internalnorm, ReturnCode.Default, abstol, prob, NLStats(1, 0, 0, 0, 0), init_linesearch_cache(alg.linesearch, f, u, p, fu, Val(iip))) end @@ -67,7 +67,7 @@ function perform_step!(cache::GeneralBroydenCache{true}) @unpack f, p, du, fu, fu2, dfu, u, J⁻¹, J⁻¹df, J⁻¹₂ = cache T = eltype(u) - mul!(du, J⁻¹, -fu) + mul!(_vec(du), J⁻¹, -_vec(fu)) α = perform_linesearch!(cache.lscache, u, du) axpy!(α, du, u) f(fu2, u, p) @@ -85,10 +85,10 @@ function perform_step!(cache::GeneralBroydenCache{true}) J⁻¹[diagind(J⁻¹)] .= T(1) cache.resets += 1 else - mul!(J⁻¹df, J⁻¹, dfu) - mul!(J⁻¹₂, du', J⁻¹) + mul!(_vec(J⁻¹df), J⁻¹, _vec(dfu)) + mul!(J⁻¹₂, _vec(du)', J⁻¹) du .= (du .- J⁻¹df) ./ (dot(du, J⁻¹df) .+ T(1e-5)) - mul!(J⁻¹, reshape(du, :, 1), J⁻¹₂, 1, 1) + mul!(J⁻¹, _vec(du), J⁻¹₂, 1, 1) end fu .= fu2 @@ -99,7 +99,7 @@ function perform_step!(cache::GeneralBroydenCache{false}) @unpack f, p = cache T = eltype(cache.u) - cache.du = cache.J⁻¹ * -cache.fu + cache.du = _restructure(cache.du, cache.J⁻¹ * -_vec(cache.fu)) α = perform_linesearch!(cache.lscache, cache.u, cache.du) cache.u = cache.u .+ α * cache.du cache.fu2 = f(cache.u, p) @@ -119,10 +119,10 @@ function perform_step!(cache::GeneralBroydenCache{false}) cache.J⁻¹ = J⁻¹ cache.resets += 1 else - cache.J⁻¹df = cache.J⁻¹ * cache.dfu - cache.J⁻¹₂ = cache.du' * cache.J⁻¹ + cache.J⁻¹df = _restructure(cache.J⁻¹df, cache.J⁻¹ * _vec(cache.dfu)) + cache.J⁻¹₂ = _vec(cache.du)' * cache.J⁻¹ cache.du = (cache.du .- cache.J⁻¹df) ./ (dot(cache.du, cache.J⁻¹df) .+ T(1e-5)) - cache.J⁻¹ = cache.J⁻¹ .+ cache.du * cache.J⁻¹₂ + cache.J⁻¹ = cache.J⁻¹ .+ _vec(cache.du) * cache.J⁻¹₂ end cache.fu = cache.fu2 diff --git a/src/levenberg.jl b/src/levenberg.jl index 016a12f31..fbfcc6a9a 100644 --- a/src/levenberg.jl +++ b/src/levenberg.jl @@ -301,7 +301,9 @@ function perform_step!(cache::LevenbergMarquardtCache{false}) else linres = dolinsolve(alg.precs, linsolve; b = _mutable(_vec(J' * #((2 / h) .* ((f(u .+ h .* v, p) .- fu1) ./ h .- J * v)))), - _vec(((2 / h) .* ((_vec(f(u .+ h .* _restructure(u,v), p)) .- _vec(fu1)) ./ h .- J * _vec(v)))))), + _vec(((2 / h) .* + ((_vec(f(u .+ h .* _restructure(u, v), p)) .- + _vec(fu1)) ./ h .- J * _vec(v)))))), linu = _vec(cache.a), p, reltol = cache.abstol) cache.linsolve = linres.cache end @@ -311,7 +313,7 @@ function perform_step!(cache::LevenbergMarquardtCache{false}) # Require acceptable steps to satisfy the following condition. norm_v = norm(v) if 2 * norm(cache.a) ≤ α_geodesic * norm_v - cache.δ = _restructure(cache.δ,_vec(v) .+ _vec(cache.a) ./ 2) + cache.δ = _restructure(cache.δ, _vec(v) .+ _vec(cache.a) ./ 2) @unpack δ, loss_old, norm_v_old, v_old, b_uphill = cache fu_new = f(u .+ δ, p) cache.stats.nf += 1 @@ -327,7 +329,7 @@ function perform_step!(cache::LevenbergMarquardtCache{false}) return nothing end cache.fu1 = fu_new - cache.v_old = _restructure(cache.v_old,v) + cache.v_old = _restructure(cache.v_old, v) cache.norm_v_old = norm_v cache.loss_old = loss cache.λ_factor = 1 / cache.damping_decrease_factor diff --git a/src/pseudotransient.jl b/src/pseudotransient.jl index 8e70f2350..a041c258c 100644 --- a/src/pseudotransient.jl +++ b/src/pseudotransient.jl @@ -29,10 +29,6 @@ SIAM Journal on Scientific Computing,25, 553-569.](https://doi.org/10.1137/S1064 [LinearSolve.jl documentation](https://docs.sciml.ai/LinearSolve/stable/). - `alpha_initial` : the initial pseudo time step. it defaults to 1e-3. If it is small, you are going to need more iterations to converge but it can be more stable. - - - - """ @concrete struct PseudoTransient{CJ, AD} <: AbstractNewtonAlgorithm{CJ, AD} ad::AD @@ -92,19 +88,14 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg_::PseudoTransi else fu1 = _mutable(f(u, p)) end - uf, linsolve, J, fu2, jac_cache, du = jacobian_caches(alg, - f, - u, - p, - Val(iip); + uf, linsolve, J, fu2, jac_cache, du = jacobian_caches(alg, f, u, p, Val(iip); linsolve_kwargs) alpha = convert(eltype(u), alg.alpha_initial) res_norm = internalnorm(fu1) return PseudoTransientCache{iip}(f, alg, u, fu1, fu2, du, p, alpha, res_norm, uf, - linsolve, J, - jac_cache, false, maxiters, internalnorm, ReturnCode.Default, abstol, prob, - NLStats(1, 0, 0, 0, 0)) + linsolve, J, jac_cache, false, maxiters, internalnorm, ReturnCode.Default, abstol, + prob, NLStats(1, 0, 0, 0, 0)) end function perform_step!(cache::PseudoTransientCache{true}) diff --git a/src/raphson.jl b/src/raphson.jl index a3d9bbc6d..db9e9e322 100644 --- a/src/raphson.jl +++ b/src/raphson.jl @@ -97,7 +97,7 @@ function perform_step!(cache::NewtonRaphsonCache{true}) # Line Search α = perform_linesearch!(cache.lscache, u, du) - axpy!(α, du, u) + axpy!(-α, du, u) f(cache.fu1, u, p) cache.internalnorm(fu1) < cache.abstol && (cache.force_stop = true) diff --git a/src/trustRegion.jl b/src/trustRegion.jl index f97b63637..8094dc09c 100644 --- a/src/trustRegion.jl +++ b/src/trustRegion.jl @@ -419,7 +419,8 @@ function trust_region_step!(cache::TrustRegionCache) cache.loss_new = get_loss(fu_new) # Compute the ratio of the actual reduction to the predicted reduction. - cache.r = -(loss - cache.loss_new) / (dot(_vec(du), _vec(g)) + dot(_vec(du), H, _vec(du)) / 2) + cache.r = -(loss - cache.loss_new) / + (dot(_vec(du), _vec(g)) + dot(_vec(du), H, _vec(du)) / 2) @unpack r = cache if radius_update_scheme === RadiusUpdateSchemes.Simple diff --git a/src/utils.jl b/src/utils.jl index d33dc292d..7c6413937 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -74,8 +74,8 @@ end @inline _vec(v::Number) = v @inline _vec(v::AbstractVector) = v -@inline _restructure(y,x) = restructure(y,x) -@inline _restructure(y::Number,x::Number) = x +@inline _restructure(y, x) = restructure(y, x) +@inline _restructure(y::Number, x::Number) = x DEFAULT_PRECS(W, du, u, p, t, newW, Plprev, Prprev, cachedata) = nothing, nothing @@ -220,4 +220,4 @@ end function __init_identity_jacobian(u::StaticArray, fu) return convert(MArray{Tuple{length(fu), length(u)}}, Matrix{eltype(u)}(I, length(fu), length(u))) -end \ No newline at end of file +end diff --git a/test/matrix_resizing.jl b/test/matrix_resizing.jl index f8ad423e5..1aa60fa05 100644 --- a/test/matrix_resizing.jl +++ b/test/matrix_resizing.jl @@ -1,21 +1,23 @@ using NonlinearSolve, Test ff(u, p) = u .* u .- p -u0 = rand(2,2) +u0 = rand(2, 2) p = 2.0 vecprob = NonlinearProblem(ff, vec(u0), p) prob = NonlinearProblem(ff, u0, p) -for alg in (NewtonRaphson(), TrustRegion(), LevenbergMarquardt(), PseudoTransient(), RobustMultiNewton(), FastShortcutNonlinearPolyalg()) +for alg in (NewtonRaphson(), TrustRegion(), LevenbergMarquardt(), PseudoTransient(), + RobustMultiNewton(), FastShortcutNonlinearPolyalg(), GeneralBroyden()) @test vec(solve(prob, alg).u) == solve(vecprob, alg).u end fiip(du, u, p) = (du .= u .* u .- p) -u0 = rand(2,2) +u0 = rand(2, 2) p = 2.0 vecprob = NonlinearProblem(fiip, vec(u0), p) prob = NonlinearProblem(fiip, u0, p) -for alg in (NewtonRaphson(), TrustRegion(), LevenbergMarquardt(), PseudoTransient(), RobustMultiNewton(), FastShortcutNonlinearPolyalg()) +for alg in (NewtonRaphson(), TrustRegion(), LevenbergMarquardt(), PseudoTransient(), + RobustMultiNewton(), FastShortcutNonlinearPolyalg(), GeneralBroyden()) @test vec(solve(prob, alg).u) == solve(vecprob, alg).u -end \ No newline at end of file +end From 00852f02f04549357f3b0976338866fa0c3aa157 Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Fri, 20 Oct 2023 20:30:30 -0400 Subject: [PATCH 6/9] Fast General Klement Implementation --- docs/src/solvers/NonlinearSystemSolvers.md | 2 + src/NonlinearSolve.jl | 13 +- src/broyden.jl | 5 +- src/default.jl | 4 +- src/klement.jl | 190 +++++++++++++++++++++ test/23_test_problems.jl | 11 ++ test/basictests.jl | 89 ++++++++++ 7 files changed, 302 insertions(+), 12 deletions(-) diff --git a/docs/src/solvers/NonlinearSystemSolvers.md b/docs/src/solvers/NonlinearSystemSolvers.md index a39ab597e..89cde3513 100644 --- a/docs/src/solvers/NonlinearSystemSolvers.md +++ b/docs/src/solvers/NonlinearSystemSolvers.md @@ -67,6 +67,8 @@ features, but have a bit of overhead on very small problems. robustnes on the hard problems. - `GeneralBroyden()`: Generalization of Broyden's Quasi-Newton Method with Line Search and Automatic Jacobian Resetting. This is a fast method but unstable for most problems! + - `GeneralKlement()`: Generalization of Klement's Quasi-Newton Method with Line Search and + Automatic Jacobian Resetting. This is a fast method but unstable for most problems! ### SimpleNonlinearSolve.jl diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl index 5e21cdf44..438cea870 100644 --- a/src/NonlinearSolve.jl +++ b/src/NonlinearSolve.jl @@ -52,10 +52,13 @@ function SciMLBase.solve!(cache::AbstractNonlinearSolveCache) cache.stats.nsteps += 1 end - if cache.stats.nsteps == cache.maxiters - cache.retcode = ReturnCode.MaxIters - else - cache.retcode = ReturnCode.Success + # The solver might have set a different `retcode` + if cache.retcode == ReturnCode.Default + if cache.stats.nsteps == cache.maxiters + cache.retcode = ReturnCode.MaxIters + else + cache.retcode = ReturnCode.Success + end end return SciMLBase.build_solution(cache.prob, cache.alg, cache.u, get_fu(cache); @@ -85,7 +88,7 @@ import PrecompileTools prob = NonlinearProblem{false}((u, p) -> u .* u .- p, T(0.1), T(2)) precompile_algs = (NewtonRaphson(), TrustRegion(), LevenbergMarquardt(), - nothing) + PseudoTransient(), GeneralBroyden(), GeneralKlement(), nothing) for alg in precompile_algs solve(prob, alg, abstol = T(1e-2)) diff --git a/src/broyden.jl b/src/broyden.jl index 1f8bc73bc..6a767a8af 100644 --- a/src/broyden.jl +++ b/src/broyden.jl @@ -113,10 +113,7 @@ function perform_step!(cache::GeneralBroydenCache{false}) cache.dfu = cache.fu2 .- cache.fu if cache.resets < cache.max_resets && (all(x -> abs(x) ≤ 1e-12, cache.du) || all(x -> abs(x) ≤ 1e-12, cache.dfu)) - J⁻¹ = similar(cache.J⁻¹) - fill!(J⁻¹, 0) - J⁻¹[diagind(J⁻¹)] .= T(1) - cache.J⁻¹ = J⁻¹ + cache.J⁻¹ = __init_identity_jacobian(cache.u, cache.fu) cache.resets += 1 else cache.J⁻¹df = _restructure(cache.J⁻¹df, cache.J⁻¹ * _vec(cache.dfu)) diff --git a/src/default.jl b/src/default.jl index 76289cc1c..07effeecb 100644 --- a/src/default.jl +++ b/src/default.jl @@ -159,10 +159,8 @@ end ] else [ - # FIXME: Broyden and Klement are type unstable - # (upstream SimpleNonlinearSolve.jl issue) - !iip ? :(Klement()) : nothing, # Klement not yet implemented for IIP :(GeneralBroyden()), + :(GeneralKlement()), :(NewtonRaphson(; linsolve, precs, adkwargs...)), :(NewtonRaphson(; linsolve, precs, linesearch = BackTracking(), adkwargs...)), :(TrustRegion(; linsolve, precs, adkwargs...)), diff --git a/src/klement.jl b/src/klement.jl index 8b1378917..42cac5bbe 100644 --- a/src/klement.jl +++ b/src/klement.jl @@ -1 +1,191 @@ +@concrete struct GeneralKlement <: AbstractNewtonAlgorithm{false, Nothing} + max_resets::Int + linsolve + precs + linesearch + singular_tolerance +end +function GeneralKlement(; max_resets::Int = 5, linsolve = nothing, + linesearch = LineSearch(), precs = DEFAULT_PRECS, singular_tolerance = nothing) + linesearch = linesearch isa LineSearch ? linesearch : LineSearch(; method = linesearch) + return GeneralKlement(max_resets, linsolve, precs, linesearch, singular_tolerance) +end + +@concrete mutable struct GeneralKlementCache{iip} <: AbstractNonlinearSolveCache{iip} + f + alg + u + fu + fu2 + du + p + linsolve + J + J_cache + J_cache2 + Jᵀ²du + Jdu + resets + singular_tolerance + force_stop + maxiters::Int + internalnorm + retcode::ReturnCode.T + abstol + prob + stats::NLStats + lscache +end + +get_fu(cache::GeneralKlementCache) = cache.fu + +function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::GeneralKlement, args...; + alias_u0 = false, maxiters = 1000, abstol = 1e-6, internalnorm = DEFAULT_NORM, + linsolve_kwargs = (;), kwargs...) where {uType, iip} + @unpack f, u0, p = prob + u = alias_u0 ? u0 : deepcopy(u0) + fu = evaluate_f(prob, u) + J = __init_identity_jacobian(u, fu) + + if u isa Number + linsolve = nothing + else + weight = similar(u) + recursivefill!(weight, true) + Pl, Pr = wrapprecs(alg.precs(J, nothing, u, p, nothing, nothing, nothing, nothing, + nothing)..., weight) + linprob = LinearProblem(J, _vec(fu); u0 = _vec(fu)) + linsolve = init(linprob, alg.linsolve; alias_A = true, alias_b = true, Pl, Pr, + linsolve_kwargs...) + end + + singular_tolerance = alg.singular_tolerance === nothing ? inv(sqrt(eps(eltype(u)))) : + eltype(u)(alg.singular_tolerance) + + return GeneralKlementCache{iip}(f, alg, u, fu, zero(fu), _mutable_zero(u), p, linsolve, + J, zero(J), zero(J), zero(fu), zero(fu), 0, singular_tolerance, false, + maxiters, internalnorm, ReturnCode.Default, abstol, prob, NLStats(1, 0, 0, 0, 0), + init_linesearch_cache(alg.linesearch, f, u, p, fu, Val(iip))) +end + +function perform_step!(cache::GeneralKlementCache{true}) + @unpack u, fu, f, p, alg, J, linsolve, du = cache + T = eltype(J) + + # FIXME: How can we do this faster? + if cond(J) > cache.singular_tolerance + if cache.resets == alg.max_resets + cache.force_stop = true + cache.retcode = ReturnCode.Unstable + return nothing + end + fill!(J, zero(T)) + J[diagind(J)] .= T(1) + cache.resets += 1 + end + + # u = u - J \ fu + linres = dolinsolve(alg.precs, linsolve; A = J, b = -_vec(fu), linu = _vec(du), + p, reltol = cache.abstol) + cache.linsolve = linres.cache + + # Line Search + α = perform_linesearch!(cache.lscache, u, du) + axpy!(α, du, u) + f(cache.fu2, u, p) + + cache.internalnorm(cache.fu2) < cache.abstol && (cache.force_stop = true) + cache.stats.nf += 1 + cache.stats.nsolve += 1 + cache.stats.nfactors += 1 + + cache.force_stop && return nothing + + # Update the Jacobian + cache.J_cache .= cache.J' .^ 2 + cache.Jdu .= _vec(du) .^ 2 + mul!(cache.Jᵀ²du, cache.J_cache, cache.Jdu) + mul!(cache.Jdu, J, _vec(du)) + cache.fu .= cache.fu2 .- cache.fu + cache.fu .= (cache.fu .- _restructure(cache.fu, cache.Jdu)) ./ max.(cache.Jᵀ²du, eps(T)) + mul!(cache.J_cache, _vec(cache.fu), _vec(du)') + cache.J_cache .*= J + mul!(cache.J_cache2, cache.J_cache, J) + J .+= cache.J_cache2 + + cache.fu .= cache.fu2 + + return nothing +end + +function perform_step!(cache::GeneralKlementCache{false}) + @unpack fu, f, p, alg, J, linsolve = cache + T = eltype(J) + + # FIXME: How can we do this faster? + if cond(J) > cache.singular_tolerance + if cache.resets == alg.max_resets + cache.force_stop = true + cache.retcode = ReturnCode.Unstable + return nothing + end + cache.J = __init_identity_jacobian(u, fu) + cache.resets += 1 + end + + # u = u - J \ fu + if linsolve === nothing + cache.du = -fu / cache.J + else + linres = dolinsolve(alg.precs, linsolve; A = J, b = -_vec(fu), + linu = _vec(cache.du), p, reltol = cache.abstol) + cache.linsolve = linres.cache + end + + # Line Search + α = perform_linesearch!(cache.lscache, cache.u, cache.du) + cache.u = @. cache.u + α * cache.du # `u` might not support mutation + cache.fu2 = f(cache.u, p) + + cache.internalnorm(cache.fu2) < cache.abstol && (cache.force_stop = true) + cache.stats.nf += 1 + cache.stats.nsolve += 1 + cache.stats.nfactors += 1 + + cache.force_stop && return nothing + + # Update the Jacobian + cache.J_cache = cache.J' .^ 2 + cache.Jdu = _vec(cache.du) .^ 2 + cache.Jᵀ²du = cache.J_cache * cache.Jdu + cache.Jdu = J * _vec(cache.du) + cache.fu = cache.fu2 .- cache.fu + cache.fu = (cache.fu .- _restructure(cache.fu, cache.Jdu)) ./ max.(cache.Jᵀ²du, eps(T)) + cache.J_cache = ((_vec(cache.fu) * _vec(cache.du)') .* J) * J + cache.J = J .+ cache.J_cache + + cache.fu = cache.fu2 + + return nothing +end + +function SciMLBase.reinit!(cache::GeneralKlementCache{iip}, u0 = cache.u; p = cache.p, + abstol = cache.abstol, maxiters = cache.maxiters) where {iip} + cache.p = p + if iip + recursivecopy!(cache.u, u0) + cache.f(cache.fu, cache.u, p) + else + # don't have alias_u0 but cache.u is never mutated for OOP problems so it doesn't matter + cache.u = u0 + cache.fu = cache.f(cache.u, p) + end + cache.abstol = abstol + cache.maxiters = maxiters + cache.stats.nf = 1 + cache.stats.nsteps = 1 + cache.force_stop = false + cache.retcode = ReturnCode.Default + return cache +end diff --git a/test/23_test_problems.jl b/test/23_test_problems.jl index 8c739c085..39ae155a3 100644 --- a/test/23_test_problems.jl +++ b/test/23_test_problems.jl @@ -92,3 +92,14 @@ end test_on_library(problems, dicts, alg_ops, broken_tests) end + +@testset "GeneralKlement 23 Test Problems" begin + alg_ops = (GeneralKlement(), + GeneralKlement(; linesearch = BackTracking())) + + broken_tests = Dict(alg => Int[] for alg in alg_ops) + broken_tests[alg_ops[1]] = [1, 2, 3, 4, 5, 6, 7, 13, 22] + broken_tests[alg_ops[2]] = [1, 2, 4, 5, 6, 7, 11, 12, 22] + + test_on_library(problems, dicts, alg_ops, broken_tests) +end diff --git a/test/basictests.jl b/test/basictests.jl index 22ba9ab25..a39d25656 100644 --- a/test/basictests.jl +++ b/test/basictests.jl @@ -754,3 +754,92 @@ end @test nlprob_iterator_interface(quadratic_f, p, Val(false)) ≈ sqrt.(p) @test nlprob_iterator_interface(quadratic_f!, p, Val(true)) ≈ sqrt.(p) end + +# --- GeneralKlement tests --- + +@testset "GeneralKlement" begin + function benchmark_nlsolve_oop(f, u0, p = 2.0; linesearch = LineSearch()) + prob = NonlinearProblem{false}(f, u0, p) + return solve(prob, GeneralKlement(; linesearch), abstol = 1e-9) + end + + function benchmark_nlsolve_iip(f, u0, p = 2.0; linesearch = LineSearch()) + prob = NonlinearProblem{true}(f, u0, p) + return solve(prob, GeneralKlement(; linesearch), abstol = 1e-9) + end + + @testset "LineSearch: $(_nameof(lsmethod)) LineSearch AD: $(_nameof(ad))" for lsmethod in (Static(), + StrongWolfe(), BackTracking(), HagerZhang(), MoreThuente()), + ad in (AutoFiniteDiff(), AutoZygote()) + + linesearch = LineSearch(; method = lsmethod, autodiff = ad) + u0s = ([1.0, 1.0], @SVector[1.0, 1.0], 1.0) + + @testset "[OOP] u0: $(typeof(u0))" for u0 in u0s + sol = benchmark_nlsolve_oop(quadratic_f, u0; linesearch) + @test SciMLBase.successful_retcode(sol) + @test all(abs.(sol.u .* sol.u .- 2) .< 1e-9) + + cache = init(NonlinearProblem{false}(quadratic_f, u0, 2.0), + GeneralKlement(; linesearch), abstol = 1e-9) + @test (@ballocated solve!($cache)) < 200 + end + + @testset "[IIP] u0: $(typeof(u0))" for u0 in ([1.0, 1.0],) + ad isa AutoZygote && continue + sol = benchmark_nlsolve_iip(quadratic_f!, u0; linesearch) + @test SciMLBase.successful_retcode(sol) + @test all(abs.(sol.u .* sol.u .- 2) .< 1e-9) + + cache = init(NonlinearProblem{true}(quadratic_f!, u0, 2.0), + GeneralKlement(; linesearch), abstol = 1e-9) + @test (@ballocated solve!($cache)) ≤ 64 + end + end + + @testset "[OOP] [Immutable AD]" begin + for p in 1.0:0.1:100.0 + @test begin + res = benchmark_nlsolve_oop(quadratic_f, @SVector[1.0, 1.0], p) + res_true = sqrt(p) + all(res.u .≈ res_true) + end + @test ForwardDiff.derivative(p -> benchmark_nlsolve_oop(quadratic_f, + @SVector[1.0, 1.0], p).u[end], p) ≈ 1 / (2 * sqrt(p)) + end + end + + @testset "[OOP] [Scalar AD]" begin + for p in 1.0:0.1:100.0 + @test begin + res = benchmark_nlsolve_oop(quadratic_f, 1.0, p) + res_true = sqrt(p) + res.u ≈ res_true + end + @test ForwardDiff.derivative(p -> benchmark_nlsolve_oop(quadratic_f, 1.0, p).u, + p) ≈ 1 / (2 * sqrt(p)) + end + end + + t = (p) -> [sqrt(p[2] / p[1])] + p = [0.9, 50.0] + @test benchmark_nlsolve_oop(quadratic_f2, 0.5, p).u ≈ sqrt(p[2] / p[1]) + @test ForwardDiff.jacobian(p -> [benchmark_nlsolve_oop(quadratic_f2, 0.5, p).u], + p) ≈ ForwardDiff.jacobian(t, p) + + # Iterator interface + function nlprob_iterator_interface(f, p_range, ::Val{iip}) where {iip} + probN = NonlinearProblem{iip}(f, iip ? [0.5] : 0.5, p_range[begin]) + cache = init(probN, GeneralKlement(); maxiters = 100, abstol = 1e-10) + sols = zeros(length(p_range)) + for (i, p) in enumerate(p_range) + reinit!(cache, iip ? [cache.u[1]] : cache.u; p = p) + sol = solve!(cache) + sols[i] = iip ? sol.u[1] : sol.u + end + return sols + end + p = range(0.01, 2, length = 200) + @test nlprob_iterator_interface(quadratic_f, p, Val(false)) ≈ sqrt.(p) + @test nlprob_iterator_interface(quadratic_f!, p, Val(true)) ≈ sqrt.(p) +end From ee5e5b83849e5aaf79323fa4144879c8549a8e63 Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Sat, 21 Oct 2023 22:17:07 -0400 Subject: [PATCH 7/9] Reuse LU Factorization to check for singular matrix --- docs/src/api/nonlinearsolve.md | 3 ++ src/NonlinearSolve.jl | 2 +- src/dfsane.jl | 1 - src/klement.jl | 66 +++++++++++++++++++++++----------- src/trustRegion.jl | 1 - src/utils.jl | 25 +++++++++++++ test/23_test_problems.jl | 10 +++--- test/matrix_resizing.jl | 4 +-- 8 files changed, 83 insertions(+), 29 deletions(-) diff --git a/docs/src/api/nonlinearsolve.md b/docs/src/api/nonlinearsolve.md index e4554f3e6..5da204f07 100644 --- a/docs/src/api/nonlinearsolve.md +++ b/docs/src/api/nonlinearsolve.md @@ -8,6 +8,9 @@ These are the native solvers of NonlinearSolve.jl. NewtonRaphson TrustRegion PseudoTransient +DFSane +GeneralBroyden +GeneralKlement ``` ## Polyalgorithms diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl index 438cea870..8d3a8ca2b 100644 --- a/src/NonlinearSolve.jl +++ b/src/NonlinearSolve.jl @@ -9,7 +9,7 @@ import ArrayInterface: restructure import ForwardDiff import ADTypes: AbstractFiniteDifferencesMode -import ArrayInterface: undefmatrix, matrix_colors, parameterless_type, ismutable +import ArrayInterface: undefmatrix, matrix_colors, parameterless_type, ismutable, issingular import ConcreteStructs: @concrete import EnumX: @enumx import ForwardDiff: Dual diff --git a/src/dfsane.jl b/src/dfsane.jl index 13de5ff6a..f5bf69eca 100644 --- a/src/dfsane.jl +++ b/src/dfsane.jl @@ -43,7 +43,6 @@ See also the implementation in [SimpleNonlinearSolve.jl](https://github.com/SciM - `max_inner_iterations`: the maximum number of iterations allowed for the inner loop of the algorithm. Defaults to `1000`. """ - struct DFSane{T, F} <: AbstractNonlinearSolveAlgorithm σ_min::T σ_max::T diff --git a/src/klement.jl b/src/klement.jl index 42cac5bbe..e4d398445 100644 --- a/src/klement.jl +++ b/src/klement.jl @@ -1,15 +1,36 @@ +""" + GeneralKlement(; max_resets = 5, linsolve = nothing, + linesearch = LineSearch(), precs = DEFAULT_PRECS) + +An implementation of `Klement` with line search, preconditioning and customizable linear +solves. + +## Keyword Arguments + + - `max_resets`: the maximum number of resets to perform. Defaults to `5`. + - `linsolve`: the [LinearSolve.jl](https://github.com/SciML/LinearSolve.jl) used for the + linear solves within the Newton method. Defaults to `nothing`, which means it uses the + LinearSolve.jl default algorithm choice. For more information on available algorithm + choices, see the [LinearSolve.jl documentation](https://docs.sciml.ai/LinearSolve/stable/). + - `precs`: the choice of preconditioners for the linear solver. Defaults to using no + preconditioners. For more information on specifying preconditioners for LinearSolve + algorithms, consult the + [LinearSolve.jl documentation](https://docs.sciml.ai/LinearSolve/stable/). + - `linesearch`: the line search algorithm to use. Defaults to [`LineSearch()`](@ref), + which means that no line search is performed. Algorithms from `LineSearches.jl` can be + used here directly, and they will be converted to the correct `LineSearch`. +""" @concrete struct GeneralKlement <: AbstractNewtonAlgorithm{false, Nothing} max_resets::Int linsolve precs linesearch - singular_tolerance end function GeneralKlement(; max_resets::Int = 5, linsolve = nothing, - linesearch = LineSearch(), precs = DEFAULT_PRECS, singular_tolerance = nothing) + linesearch = LineSearch(), precs = DEFAULT_PRECS) linesearch = linesearch isa LineSearch ? linesearch : LineSearch(; method = linesearch) - return GeneralKlement(max_resets, linsolve, precs, linesearch, singular_tolerance) + return GeneralKlement(max_resets, linsolve, precs, linesearch) end @concrete mutable struct GeneralKlementCache{iip} <: AbstractNonlinearSolveCache{iip} @@ -27,7 +48,6 @@ end Jᵀ²du Jdu resets - singular_tolerance force_stop maxiters::Int internalnorm @@ -51,20 +71,20 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::GeneralKlemen if u isa Number linsolve = nothing else + # For General Julia Arrays default to LU Factorization + linsolve_alg = alg.linsolve === nothing && u isa Array ? LUFactorization() : + nothing weight = similar(u) recursivefill!(weight, true) Pl, Pr = wrapprecs(alg.precs(J, nothing, u, p, nothing, nothing, nothing, nothing, nothing)..., weight) linprob = LinearProblem(J, _vec(fu); u0 = _vec(fu)) - linsolve = init(linprob, alg.linsolve; alias_A = true, alias_b = true, Pl, Pr, + linsolve = init(linprob, linsolve_alg; alias_A = true, alias_b = true, Pl, Pr, linsolve_kwargs...) end - singular_tolerance = alg.singular_tolerance === nothing ? inv(sqrt(eps(eltype(u)))) : - eltype(u)(alg.singular_tolerance) - return GeneralKlementCache{iip}(f, alg, u, fu, zero(fu), _mutable_zero(u), p, linsolve, - J, zero(J), zero(J), zero(fu), zero(fu), 0, singular_tolerance, false, + J, zero(J), zero(J), _vec(zero(fu)), _vec(zero(fu)), 0, false, maxiters, internalnorm, ReturnCode.Default, abstol, prob, NLStats(1, 0, 0, 0, 0), init_linesearch_cache(alg.linesearch, f, u, p, fu, Val(iip))) end @@ -73,21 +93,23 @@ function perform_step!(cache::GeneralKlementCache{true}) @unpack u, fu, f, p, alg, J, linsolve, du = cache T = eltype(J) - # FIXME: How can we do this faster? - if cond(J) > cache.singular_tolerance + singular, fact_done = _try_factorize_and_check_singular!(linsolve, J) + + if singular if cache.resets == alg.max_resets cache.force_stop = true cache.retcode = ReturnCode.Unstable return nothing end + fact_done = false fill!(J, zero(T)) J[diagind(J)] .= T(1) cache.resets += 1 end # u = u - J \ fu - linres = dolinsolve(alg.precs, linsolve; A = J, b = -_vec(fu), linu = _vec(du), - p, reltol = cache.abstol) + linres = dolinsolve(alg.precs, linsolve; A = ifelse(fact_done, nothing, J), + b = -_vec(fu), linu = _vec(du), p, reltol = cache.abstol) cache.linsolve = linres.cache # Line Search @@ -108,7 +130,8 @@ function perform_step!(cache::GeneralKlementCache{true}) mul!(cache.Jᵀ²du, cache.J_cache, cache.Jdu) mul!(cache.Jdu, J, _vec(du)) cache.fu .= cache.fu2 .- cache.fu - cache.fu .= (cache.fu .- _restructure(cache.fu, cache.Jdu)) ./ max.(cache.Jᵀ²du, eps(T)) + cache.fu .= _restructure(cache.fu, + (_vec(cache.fu) .- cache.Jdu) ./ max.(cache.Jᵀ²du, eps(T))) mul!(cache.J_cache, _vec(cache.fu), _vec(du)') cache.J_cache .*= J mul!(cache.J_cache2, cache.J_cache, J) @@ -123,14 +146,16 @@ function perform_step!(cache::GeneralKlementCache{false}) @unpack fu, f, p, alg, J, linsolve = cache T = eltype(J) - # FIXME: How can we do this faster? - if cond(J) > cache.singular_tolerance + singular, fact_done = _try_factorize_and_check_singular!(linsolve, J) + + if singular if cache.resets == alg.max_resets cache.force_stop = true cache.retcode = ReturnCode.Unstable return nothing end - cache.J = __init_identity_jacobian(u, fu) + fact_done = false + cache.J = __init_identity_jacobian(cache.u, fu) cache.resets += 1 end @@ -138,8 +163,8 @@ function perform_step!(cache::GeneralKlementCache{false}) if linsolve === nothing cache.du = -fu / cache.J else - linres = dolinsolve(alg.precs, linsolve; A = J, b = -_vec(fu), - linu = _vec(cache.du), p, reltol = cache.abstol) + linres = dolinsolve(alg.precs, linsolve; A = ifelse(fact_done, nothing, J), + b = -_vec(fu), linu = _vec(cache.du), p, reltol = cache.abstol) cache.linsolve = linres.cache end @@ -161,7 +186,8 @@ function perform_step!(cache::GeneralKlementCache{false}) cache.Jᵀ²du = cache.J_cache * cache.Jdu cache.Jdu = J * _vec(cache.du) cache.fu = cache.fu2 .- cache.fu - cache.fu = (cache.fu .- _restructure(cache.fu, cache.Jdu)) ./ max.(cache.Jᵀ²du, eps(T)) + cache.fu = _restructure(cache.fu, + (_vec(cache.fu) .- cache.Jdu) ./ max.(cache.Jᵀ²du, eps(T))) cache.J_cache = ((_vec(cache.fu) * _vec(cache.du)') .* J) * J cache.J = J .+ cache.J_cache diff --git a/src/trustRegion.jl b/src/trustRegion.jl index 8094dc09c..8e7118cc6 100644 --- a/src/trustRegion.jl +++ b/src/trustRegion.jl @@ -81,7 +81,6 @@ states as `RadiusUpdateSchemes.T`. Simply put the desired scheme as follows: end """ -```julia TrustRegion(; concrete_jac = nothing, linsolve = nothing, precs = DEFAULT_PRECS, radius_update_scheme::RadiusUpdateSchemes.T = RadiusUpdateSchemes.Simple, max_trust_radius::Real = 0 // 1, initial_trust_radius::Real = 0 // 1, diff --git a/src/utils.jl b/src/utils.jl index 7c6413937..3369e5c01 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -221,3 +221,28 @@ function __init_identity_jacobian(u::StaticArray, fu) return convert(MArray{Tuple{length(fu), length(u)}}, Matrix{eltype(u)}(I, length(fu), length(u))) end + +# Check Singular Matrix +_issingular(x::Number) = iszero(x) +@generated function _issingular(x::T) where {T} + hasmethod(issingular, Tuple{T}) && return :(issingular(x)) + return :(__issingular(x)) +end +__issingular(x::AbstractMatrix{T}) where {T} = cond(x) > inv(sqrt(eps(T))) +__issingular(x) = false ## If SciMLOperator and such + +# If factorization is LU then perform that and update the linsolve cache +# else check if the matrix is singular +function _try_factorize_and_check_singular!(linsolve, X) + if linsolve.cacheval isa LU + # LU Factorization was used + linsolve.A = X + linsolve.cacheval = LinearSolve.do_factorization(linsolve.alg, X, linsolve.b, + linsolve.u) + linsolve.isfresh = false + + return !issuccess(linsolve.cacheval), true + end + return _issingular(X), false +end +_try_factorize_and_check_singular!(::Nothing, x) = _issingular(x), false diff --git a/test/23_test_problems.jl b/test/23_test_problems.jl index 39ae155a3..25bb8a68a 100644 --- a/test/23_test_problems.jl +++ b/test/23_test_problems.jl @@ -87,7 +87,7 @@ end broken_tests = Dict(alg => Int[] for alg in alg_ops) broken_tests[alg_ops[1]] = [1, 3, 4, 5, 6, 8, 11, 12, 13, 14, 21] - broken_tests[alg_ops[2]] = [1, 2, 3, 4, 5, 6, 9, 11, 13, 22] + broken_tests[alg_ops[2]] = [1, 2, 3, 4, 5, 6, 9, 11, 13, 16, 21, 22] broken_tests[alg_ops[3]] = [1, 2, 4, 5, 6, 8, 11, 12, 13, 14, 21] test_on_library(problems, dicts, alg_ops, broken_tests) @@ -95,11 +95,13 @@ end @testset "GeneralKlement 23 Test Problems" begin alg_ops = (GeneralKlement(), - GeneralKlement(; linesearch = BackTracking())) + GeneralKlement(; linesearch = BackTracking()), + GeneralKlement(; linesearch = HagerZhang())) broken_tests = Dict(alg => Int[] for alg in alg_ops) - broken_tests[alg_ops[1]] = [1, 2, 3, 4, 5, 6, 7, 13, 22] - broken_tests[alg_ops[2]] = [1, 2, 4, 5, 6, 7, 11, 12, 22] + broken_tests[alg_ops[1]] = [1, 2, 4, 5, 6, 7, 11, 13, 22] + broken_tests[alg_ops[2]] = [1, 2, 4, 5, 6, 7, 11, 13, 22] + broken_tests[alg_ops[3]] = [1, 2, 5, 6, 11, 12, 13, 22] test_on_library(problems, dicts, alg_ops, broken_tests) end diff --git a/test/matrix_resizing.jl b/test/matrix_resizing.jl index 1aa60fa05..9c16fe0fe 100644 --- a/test/matrix_resizing.jl +++ b/test/matrix_resizing.jl @@ -7,7 +7,7 @@ vecprob = NonlinearProblem(ff, vec(u0), p) prob = NonlinearProblem(ff, u0, p) for alg in (NewtonRaphson(), TrustRegion(), LevenbergMarquardt(), PseudoTransient(), - RobustMultiNewton(), FastShortcutNonlinearPolyalg(), GeneralBroyden()) + RobustMultiNewton(), FastShortcutNonlinearPolyalg(), GeneralBroyden(), GeneralKlement()) @test vec(solve(prob, alg).u) == solve(vecprob, alg).u end @@ -18,6 +18,6 @@ vecprob = NonlinearProblem(fiip, vec(u0), p) prob = NonlinearProblem(fiip, u0, p) for alg in (NewtonRaphson(), TrustRegion(), LevenbergMarquardt(), PseudoTransient(), - RobustMultiNewton(), FastShortcutNonlinearPolyalg(), GeneralBroyden()) + RobustMultiNewton(), FastShortcutNonlinearPolyalg(), GeneralBroyden(), GeneralKlement()) @test vec(solve(prob, alg).u) == solve(vecprob, alg).u end From 611477c83c41f4d5cd3ea4b075203126be2792fb Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Mon, 23 Oct 2023 01:10:03 -0400 Subject: [PATCH 8/9] Add Limited Memory Broyden --- .github/workflows/CI.yml | 3 +- src/NonlinearSolve.jl | 3 +- src/broyden.jl | 37 ++++-- src/lbroyden.jl | 260 +++++++++++++++++++++++++++++++++++++++ src/utils.jl | 12 ++ test/23_test_problems.jl | 13 +- test/basictests.jl | 90 ++++++++++++++ test/matrix_resizing.jl | 3 +- 8 files changed, 403 insertions(+), 18 deletions(-) create mode 100644 src/lbroyden.jl diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index d3232532a..0d3ee1419 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -16,7 +16,8 @@ jobs: strategy: matrix: group: - - All + - Core + - 23TestProblems version: - '1' steps: diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl index 8d3a8ca2b..2b26b3721 100644 --- a/src/NonlinearSolve.jl +++ b/src/NonlinearSolve.jl @@ -76,6 +76,7 @@ include("dfsane.jl") include("pseudotransient.jl") include("broyden.jl") include("klement.jl") +include("lbroyden.jl") include("jacobian.jl") include("ad.jl") include("default.jl") @@ -106,7 +107,7 @@ end export RadiusUpdateSchemes export NewtonRaphson, TrustRegion, LevenbergMarquardt, DFSane, GaussNewton, PseudoTransient, - GeneralBroyden, GeneralKlement + GeneralBroyden, GeneralKlement, LimitedMemoryBroyden export LeastSquaresOptimJL, FastLevenbergMarquardtJL export RobustMultiNewton, FastShortcutNonlinearPolyalg diff --git a/src/broyden.jl b/src/broyden.jl index 6a767a8af..3232a2d9f 100644 --- a/src/broyden.jl +++ b/src/broyden.jl @@ -1,13 +1,14 @@ # Sadly `Broyden` is taken up by SimpleNonlinearSolve.jl """ - GeneralBroyden(max_resets, linesearch) - GeneralBroyden(; max_resets = 3, linesearch = LineSearch()) + GeneralBroyden(; max_resets = 3, linesearch = LineSearch(), reset_tolerance = nothing) An implementation of `Broyden` with reseting and line search. ## Arguments - `max_resets`: the maximum number of resets to perform. Defaults to `3`. + - `reset_tolerance`: the tolerance for the reset check. Defaults to + `sqrt(eps(eltype(u)))`. - `linesearch`: the line search algorithm to use. Defaults to [`LineSearch()`](@ref), which means that no line search is performed. Algorithms from `LineSearches.jl` can be used here directly, and they will be converted to the correct `LineSearch`. It is @@ -16,12 +17,14 @@ An implementation of `Broyden` with reseting and line search. """ @concrete struct GeneralBroyden <: AbstractNewtonAlgorithm{false, Nothing} max_resets::Int + reset_tolerance linesearch end -function GeneralBroyden(; max_resets = 3, linesearch = LineSearch()) +function GeneralBroyden(; max_resets = 3, linesearch = LineSearch(), + reset_tolerance = nothing) linesearch = linesearch isa LineSearch ? linesearch : LineSearch(; method = linesearch) - return GeneralBroyden(max_resets, linesearch) + return GeneralBroyden(max_resets, reset_tolerance, linesearch) end @concrete mutable struct GeneralBroydenCache{iip} <: AbstractNonlinearSolveCache{iip} @@ -43,6 +46,8 @@ end internalnorm retcode::ReturnCode.T abstol + reset_tolerance + reset_check prob stats::NLStats lscache @@ -57,9 +62,13 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::GeneralBroyde u = alias_u0 ? u0 : deepcopy(u0) fu = evaluate_f(prob, u) J⁻¹ = __init_identity_jacobian(u, fu) + reset_tolerance = alg.reset_tolerance === nothing ? sqrt(eps(eltype(u))) : + alg.reset_tolerance + reset_check = x -> abs(x) ≤ reset_tolerance return GeneralBroydenCache{iip}(f, alg, u, _mutable_zero(u), fu, zero(fu), zero(fu), p, J⁻¹, zero(_vec(fu)'), _mutable_zero(u), false, 0, alg.max_resets, - maxiters, internalnorm, ReturnCode.Default, abstol, prob, NLStats(1, 0, 0, 0, 0), + maxiters, internalnorm, ReturnCode.Default, abstol, reset_tolerance, + reset_check, prob, NLStats(1, 0, 0, 0, 0), init_linesearch_cache(alg.linesearch, f, u, p, fu, Val(iip))) end @@ -79,8 +88,13 @@ function perform_step!(cache::GeneralBroydenCache{true}) # Update the inverse jacobian dfu .= fu2 .- fu - if cache.resets < cache.max_resets && - (all(x -> abs(x) ≤ 1e-12, du) || all(x -> abs(x) ≤ 1e-12, dfu)) + + if all(cache.reset_check, du) || all(cache.reset_check, dfu) + if cache.resets ≥ cache.max_resets + cache.retcode = ReturnCode.Unstable + cache.force_stop = true + return nothing + end fill!(J⁻¹, 0) J⁻¹[diagind(J⁻¹)] .= T(1) cache.resets += 1 @@ -111,8 +125,12 @@ function perform_step!(cache::GeneralBroydenCache{false}) # Update the inverse jacobian cache.dfu = cache.fu2 .- cache.fu - if cache.resets < cache.max_resets && - (all(x -> abs(x) ≤ 1e-12, cache.du) || all(x -> abs(x) ≤ 1e-12, cache.dfu)) + if all(cache.reset_check, cache.du) || all(cache.reset_check, cache.dfu) + if cache.resets ≥ cache.max_resets + cache.retcode = ReturnCode.Unstable + cache.force_stop = true + return nothing + end cache.J⁻¹ = __init_identity_jacobian(cache.u, cache.fu) cache.resets += 1 else @@ -141,6 +159,7 @@ function SciMLBase.reinit!(cache::GeneralBroydenCache{iip}, u0 = cache.u; p = ca cache.maxiters = maxiters cache.stats.nf = 1 cache.stats.nsteps = 1 + cache.resets = 0 cache.force_stop = false cache.retcode = ReturnCode.Default return cache diff --git a/src/lbroyden.jl b/src/lbroyden.jl new file mode 100644 index 000000000..458db8104 --- /dev/null +++ b/src/lbroyden.jl @@ -0,0 +1,260 @@ +""" + LimitedMemoryBroyden(; max_resets::Int = 3, linesearch = LineSearch(), + threshold::Int = 10, reset_tolerance = nothing) + +An implementation of `LimitedMemoryBroyden` with reseting and line search. + +## Arguments + + - `max_resets`: the maximum number of resets to perform. Defaults to `3`. + - `reset_tolerance`: the tolerance for the reset check. Defaults to + `sqrt(eps(eltype(u)))`. + - `threshold`: the number of vectors to store in the low rank approximation. Defaults + to `10`. + - `linesearch`: the line search algorithm to use. Defaults to [`LineSearch()`](@ref), + which means that no line search is performed. Algorithms from `LineSearches.jl` can be + used here directly, and they will be converted to the correct `LineSearch`. It is + recommended to use [LiFukushimaLineSearchCache](@ref) -- a derivative free linesearch + specifically designed for Broyden's method. +""" +@concrete struct LimitedMemoryBroyden <: AbstractNewtonAlgorithm{false, Nothing} + max_resets::Int + threshold::Int + linesearch + reset_tolerance +end + +function LimitedMemoryBroyden(; max_resets::Int = 3, linesearch = LineSearch(), + threshold::Int = 10, reset_tolerance = nothing) + linesearch = linesearch isa LineSearch ? linesearch : LineSearch(; method = linesearch) + return LimitedMemoryBroyden(max_resets, threshold, linesearch, reset_tolerance) +end + +@concrete mutable struct LimitedMemoryBroydenCache{iip} <: AbstractNonlinearSolveCache{iip} + f + alg + u + du + fu + fu2 + dfu + p + U + Vᵀ + Ux + xᵀVᵀ + u_cache + vᵀ_cache + force_stop::Bool + resets::Int + iterations_since_reset::Int + max_resets::Int + maxiters::Int + internalnorm + retcode::ReturnCode.T + abstol + reset_tolerance + reset_check + prob + stats::NLStats + lscache +end + +get_fu(cache::LimitedMemoryBroydenCache) = cache.fu + +function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::LimitedMemoryBroyden, + args...; alias_u0 = false, maxiters = 1000, abstol = 1e-6, internalnorm = DEFAULT_NORM, + kwargs...) where {uType, iip} + @unpack f, u0, p = prob + u = alias_u0 ? u0 : deepcopy(u0) + if u isa Number + # If u is a number then we simply use Broyden + return SciMLBase.__init(prob, + GeneralBroyden(; alg.max_resets, alg.reset_tolerance, + alg.linesearch), args...; alias_u0, maxiters, abstol, internalnorm, kwargs...) + end + fu = evaluate_f(prob, u) + threshold = min(alg.threshold, maxiters) + U, Vᵀ = __init_low_rank_jacobian(u, fu, threshold) + du = -fu + reset_tolerance = alg.reset_tolerance === nothing ? sqrt(eps(eltype(u))) : + alg.reset_tolerance + reset_check = x -> abs(x) ≤ reset_tolerance + return LimitedMemoryBroydenCache{iip}(f, alg, u, du, fu, zero(fu), + zero(fu), p, U, Vᵀ, similar(u, threshold), similar(u, 1, threshold), + zero(u), zero(u), false, 0, 0, alg.max_resets, maxiters, internalnorm, + ReturnCode.Default, abstol, reset_tolerance, reset_check, prob, + NLStats(1, 0, 0, 0, 0), + init_linesearch_cache(alg.linesearch, f, u, p, fu, Val(iip))) +end + +function perform_step!(cache::LimitedMemoryBroydenCache{true}) + @unpack f, p, du, u = cache + T = eltype(u) + + α = perform_linesearch!(cache.lscache, u, du) + axpy!(α, du, u) + f(cache.fu2, u, p) + + cache.internalnorm(cache.fu2) < cache.abstol && (cache.force_stop = true) + cache.stats.nf += 1 + + cache.force_stop && return nothing + + # Update the Inverse Jacobian Approximation + cache.dfu .= cache.fu2 .- cache.fu + + # Only try to reset if we have enough iterations since last reset + if cache.iterations_since_reset > size(cache.U, 1) && + (all(cache.reset_check, du) || all(cache.reset_check, cache.dfu)) + if cache.resets ≥ cache.max_resets + cache.retcode = ReturnCode.Unstable + cache.force_stop = true + return nothing + end + cache.iterations_since_reset = 0 + cache.resets += 1 + cache.du .= -cache.fu + else + idx = min(cache.iterations_since_reset, size(cache.U, 1)) + U_part = selectdim(cache.U, 1, 1:idx) + Vᵀ_part = selectdim(cache.Vᵀ, 2, 1:idx) + + __lbroyden_matvec!(_vec(cache.vᵀ_cache), cache.Ux, U_part, Vᵀ_part, _vec(cache.du)) + __lbroyden_rmatvec!(_vec(cache.u_cache), cache.xᵀVᵀ, U_part, Vᵀ_part, + _vec(cache.dfu)) + cache.u_cache .= (du .- cache.u_cache) ./ + (dot(cache.vᵀ_cache, cache.dfu) .+ T(1e-5)) + + idx = mod1(cache.iterations_since_reset + 1, size(cache.U, 1)) + selectdim(cache.U, 1, idx) .= _vec(cache.u_cache) + selectdim(cache.Vᵀ, 2, idx) .= _vec(cache.vᵀ_cache) + + idx = min(cache.iterations_since_reset + 1, size(cache.U, 1)) + U_part = selectdim(cache.U, 1, 1:idx) + Vᵀ_part = selectdim(cache.Vᵀ, 2, 1:idx) + __lbroyden_matvec!(_vec(cache.du), cache.Ux, U_part, Vᵀ_part, _vec(cache.fu2)) + cache.du .*= -1 + cache.iterations_since_reset += 1 + end + + cache.fu .= cache.fu2 + + return nothing +end + +function perform_step!(cache::LimitedMemoryBroydenCache{false}) + @unpack f, p = cache + T = eltype(cache.u) + + α = perform_linesearch!(cache.lscache, cache.u, cache.du) + cache.u = cache.u .+ α * cache.du + cache.fu2 = f(cache.u, p) + + cache.internalnorm(cache.fu2) < cache.abstol && (cache.force_stop = true) + cache.stats.nf += 1 + + cache.force_stop && return nothing + + # Update the Inverse Jacobian Approximation + cache.dfu .= cache.fu2 .- cache.fu + + # Only try to reset if we have enough iterations since last reset + if cache.iterations_since_reset > size(cache.U, 1) && + (all(cache.reset_check, cache.du) || all(cache.reset_check, cache.dfu)) + if cache.resets ≥ cache.max_resets + cache.retcode = ReturnCode.Unstable + cache.force_stop = true + return nothing + end + cache.iterations_since_reset = 0 + cache.resets += 1 + cache.du = -cache.fu + else + idx = min(cache.iterations_since_reset, size(cache.U, 1)) + U_part = selectdim(cache.U, 1, 1:idx) + Vᵀ_part = selectdim(cache.Vᵀ, 2, 1:idx) + + cache.vᵀ_cache = _restructure(cache.vᵀ_cache, + __lbroyden_matvec(U_part, Vᵀ_part, _vec(cache.du))) + cache.u_cache = _restructure(cache.u_cache, + __lbroyden_rmatvec(U_part, Vᵀ_part, _vec(cache.dfu))) + cache.u_cache = (cache.du .- cache.u_cache) ./ + (dot(cache.vᵀ_cache, cache.dfu) .+ T(1e-5)) + + idx = mod1(cache.iterations_since_reset + 1, size(cache.U, 1)) + selectdim(cache.U, 1, idx) .= _vec(cache.u_cache) + selectdim(cache.Vᵀ, 2, idx) .= _vec(cache.vᵀ_cache) + + idx = min(cache.iterations_since_reset + 1, size(cache.U, 1)) + U_part = selectdim(cache.U, 1, 1:idx) + Vᵀ_part = selectdim(cache.Vᵀ, 2, 1:idx) + cache.du = _restructure(cache.du, + -__lbroyden_matvec(U_part, Vᵀ_part, _vec(cache.fu2))) + cache.iterations_since_reset += 1 + end + + cache.fu = cache.fu2 + + return nothing +end + +function SciMLBase.reinit!(cache::LimitedMemoryBroydenCache{iip}, u0 = cache.u; p = cache.p, + abstol = cache.abstol, maxiters = cache.maxiters) where {iip} + cache.p = p + if iip + recursivecopy!(cache.u, u0) + cache.f(cache.fu, cache.u, p) + else + # don't have alias_u0 but cache.u is never mutated for OOP problems so it doesn't matter + cache.u = u0 + cache.fu = cache.f(cache.u, p) + end + cache.abstol = abstol + cache.maxiters = maxiters + cache.stats.nf = 1 + cache.stats.nsteps = 1 + cache.resets = 0 + cache.iterations_since_reset = 0 + cache.force_stop = false + cache.retcode = ReturnCode.Default + return cache +end + +@views function __lbroyden_matvec!(y::AbstractVector, Ux::AbstractVector, + U::AbstractMatrix, Vᵀ::AbstractMatrix, x::AbstractVector) + # Computes Vᵀ × U × x + η = size(U, 1) + if η == 0 + y .= x + return nothing + end + mul!(Ux[1:η], U, x) + mul!(y, Vᵀ[:, 1:η], Ux[1:η]) + return nothing +end + +@views function __lbroyden_matvec(U::AbstractMatrix, Vᵀ::AbstractMatrix, x::AbstractVector) + # Computes Vᵀ × U × x + size(U, 1) == 0 && return x + return Vᵀ * (U * x) +end + +@views function __lbroyden_rmatvec!(y::AbstractVector, xᵀVᵀ::AbstractMatrix, + U::AbstractMatrix, Vᵀ::AbstractMatrix, x::AbstractVector) + # Computes xᵀ × Vᵀ × U + η = size(U, 1) + if η == 0 + y .= x + return nothing + end + mul!(xᵀVᵀ[:, 1:η], x', Vᵀ) + mul!(y', xᵀVᵀ[:, 1:η], U) + return nothing +end + +@views function __lbroyden_rmatvec(U::AbstractMatrix, Vᵀ::AbstractMatrix, x::AbstractVector) + # Computes xᵀ × Vᵀ × U + size(U, 1) == 0 && return x + return (x' * Vᵀ) * U +end diff --git a/src/utils.jl b/src/utils.jl index 3369e5c01..c99b05bf4 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -222,6 +222,18 @@ function __init_identity_jacobian(u::StaticArray, fu) Matrix{eltype(u)}(I, length(fu), length(u))) end +function __init_low_rank_jacobian(u::StaticArray, fu, threshold::Int) + Vᵀ = convert(MArray{Tuple{length(u), threshold}}, + zeros(eltype(u), length(u), threshold)) + U = convert(MArray{Tuple{threshold, length(u)}}, zeros(eltype(u), threshold, length(u))) + return U, Vᵀ +end +function __init_low_rank_jacobian(u, fu, threshold::Int) + Vᵀ = convert(parameterless_type(_mutable(u)), zeros(eltype(u), length(u), threshold)) + U = convert(parameterless_type(_mutable(u)), zeros(eltype(u), threshold, length(u))) + return U, Vᵀ +end + # Check Singular Matrix _issingular(x::Number) = iszero(x) @generated function _issingular(x::T) where {T} diff --git a/test/23_test_problems.jl b/test/23_test_problems.jl index 25bb8a68a..2bdbecffb 100644 --- a/test/23_test_problems.jl +++ b/test/23_test_problems.jl @@ -28,7 +28,6 @@ function test_on_library(problems, dicts, alg_ops, broken_tests, ϵ = 1e-4) end end -# NewtonRaphson @testset "NewtonRaphson 23 Test Problems" begin alg_ops = (NewtonRaphson(),) @@ -86,9 +85,9 @@ end GeneralBroyden(; linesearch = BackTracking())) broken_tests = Dict(alg => Int[] for alg in alg_ops) - broken_tests[alg_ops[1]] = [1, 3, 4, 5, 6, 8, 11, 12, 13, 14, 21] - broken_tests[alg_ops[2]] = [1, 2, 3, 4, 5, 6, 9, 11, 13, 16, 21, 22] - broken_tests[alg_ops[3]] = [1, 2, 4, 5, 6, 8, 11, 12, 13, 14, 21] + broken_tests[alg_ops[1]] = [1, 3, 4, 5, 6, 11, 12, 13, 14, 21] + broken_tests[alg_ops[2]] = [1, 2, 3, 4, 5, 6, 9, 11, 13, 22] + broken_tests[alg_ops[3]] = [1, 2, 4, 5, 6, 11, 12, 13, 14, 21] test_on_library(problems, dicts, alg_ops, broken_tests) end @@ -100,8 +99,10 @@ end broken_tests = Dict(alg => Int[] for alg in alg_ops) broken_tests[alg_ops[1]] = [1, 2, 4, 5, 6, 7, 11, 13, 22] - broken_tests[alg_ops[2]] = [1, 2, 4, 5, 6, 7, 11, 13, 22] - broken_tests[alg_ops[3]] = [1, 2, 5, 6, 11, 12, 13, 22] + broken_tests[alg_ops[2]] = [1, 2, 4, 5, 6, 7, 11, 12, 13, 22] + broken_tests[alg_ops[3]] = [1, 2, 4, 5, 6, 8, 11, 12, 13, 22] test_on_library(problems, dicts, alg_ops, broken_tests) end + +# NOTE: Not adding LimitedMemoryBroyden here since it fails on most of the preoblems diff --git a/test/basictests.jl b/test/basictests.jl index a39d25656..681d506de 100644 --- a/test/basictests.jl +++ b/test/basictests.jl @@ -843,3 +843,93 @@ end @test nlprob_iterator_interface(quadratic_f, p, Val(false)) ≈ sqrt.(p) @test nlprob_iterator_interface(quadratic_f!, p, Val(true)) ≈ sqrt.(p) end + +# --- LimitedMemoryBroyden tests --- + +@testset "LimitedMemoryBroyden" begin + function benchmark_nlsolve_oop(f, u0, p = 2.0; linesearch = LineSearch()) + prob = NonlinearProblem{false}(f, u0, p) + return solve(prob, LimitedMemoryBroyden(; linesearch), abstol = 1e-9) + end + + function benchmark_nlsolve_iip(f, u0, p = 2.0; linesearch = LineSearch()) + prob = NonlinearProblem{true}(f, u0, p) + return solve(prob, LimitedMemoryBroyden(; linesearch), abstol = 1e-9) + end + + @testset "LineSearch: $(_nameof(lsmethod)) LineSearch AD: $(_nameof(ad))" for lsmethod in (Static(), + StrongWolfe(), BackTracking(), HagerZhang(), MoreThuente(), + LiFukushimaLineSearch()), + ad in (AutoFiniteDiff(), AutoZygote()) + + linesearch = LineSearch(; method = lsmethod, autodiff = ad) + u0s = ([1.0, 1.0], @SVector[1.0, 1.0], 1.0) + + @testset "[OOP] u0: $(typeof(u0))" for u0 in u0s + sol = benchmark_nlsolve_oop(quadratic_f, u0; linesearch) + @test SciMLBase.successful_retcode(sol) + @test all(abs.(sol.u .* sol.u .- 2) .< 1e-9) + + cache = init(NonlinearProblem{false}(quadratic_f, u0, 2.0), + LimitedMemoryBroyden(; linesearch), abstol = 1e-9) + @test (@ballocated solve!($cache)) < 200 + end + + @testset "[IIP] u0: $(typeof(u0))" for u0 in ([1.0, 1.0],) + ad isa AutoZygote && continue + sol = benchmark_nlsolve_iip(quadratic_f!, u0; linesearch) + @test SciMLBase.successful_retcode(sol) + @test all(abs.(sol.u .* sol.u .- 2) .< 1e-9) + + cache = init(NonlinearProblem{true}(quadratic_f!, u0, 2.0), + LimitedMemoryBroyden(; linesearch), abstol = 1e-9) + @test (@ballocated solve!($cache)) ≤ 64 + end + end + + @testset "[OOP] [Immutable AD]" begin + for p in 1.0:0.1:100.0 + @test begin + res = benchmark_nlsolve_oop(quadratic_f, @SVector[1.0, 1.0], p) + res_true = sqrt(p) + all(((x, y),) -> isapprox(x, y; atol = 1e-3), zip(res.u, res_true)) + end + @test ForwardDiff.derivative(p -> benchmark_nlsolve_oop(quadratic_f, + @SVector[1.0, 1.0], p).u[end], p)≈1 / (2 * sqrt(p)) atol=1e-3 + end + end + + @testset "[OOP] [Scalar AD]" begin + for p in 1.0:0.1:100.0 + @test begin + res = benchmark_nlsolve_oop(quadratic_f, 1.0, p) + res_true = sqrt(p) + res.u ≈ res_true + end + @test ForwardDiff.derivative(p -> benchmark_nlsolve_oop(quadratic_f, 1.0, p).u, + p) ≈ 1 / (2 * sqrt(p)) + end + end + + t = (p) -> [sqrt(p[2] / p[1])] + p = [0.9, 50.0] + @test benchmark_nlsolve_oop(quadratic_f2, 0.5, p).u ≈ sqrt(p[2] / p[1]) + @test ForwardDiff.jacobian(p -> [benchmark_nlsolve_oop(quadratic_f2, 0.5, p).u], + p) ≈ ForwardDiff.jacobian(t, p) + + # Iterator interface + function nlprob_iterator_interface(f, p_range, ::Val{iip}) where {iip} + probN = NonlinearProblem{iip}(f, iip ? [0.5] : 0.5, p_range[begin]) + cache = init(probN, LimitedMemoryBroyden(); maxiters = 100, abstol = 1e-10) + sols = zeros(length(p_range)) + for (i, p) in enumerate(p_range) + reinit!(cache, iip ? [cache.u[1]] : cache.u; p = p) + sol = solve!(cache) + sols[i] = iip ? sol.u[1] : sol.u + end + return sols + end + p = range(0.01, 2, length = 200) + @test nlprob_iterator_interface(quadratic_f, p, Val(false))≈sqrt.(p) atol=1e-2 + @test nlprob_iterator_interface(quadratic_f!, p, Val(true))≈sqrt.(p) atol=1e-2 +end diff --git a/test/matrix_resizing.jl b/test/matrix_resizing.jl index 9c16fe0fe..1d9462fa1 100644 --- a/test/matrix_resizing.jl +++ b/test/matrix_resizing.jl @@ -7,7 +7,8 @@ vecprob = NonlinearProblem(ff, vec(u0), p) prob = NonlinearProblem(ff, u0, p) for alg in (NewtonRaphson(), TrustRegion(), LevenbergMarquardt(), PseudoTransient(), - RobustMultiNewton(), FastShortcutNonlinearPolyalg(), GeneralBroyden(), GeneralKlement()) + RobustMultiNewton(), FastShortcutNonlinearPolyalg(), GeneralBroyden(), GeneralKlement(), + LimitedMemoryBroyden()) @test vec(solve(prob, alg).u) == solve(vecprob, alg).u end From e1940d4926628366ccdffe37a9d78b4516f47e3a Mon Sep 17 00:00:00 2001 From: Christopher Rackauckas Date: Wed, 25 Oct 2023 16:35:06 +0200 Subject: [PATCH 9/9] Update docs/src/solvers/NonlinearSystemSolvers.md --- docs/src/solvers/NonlinearSystemSolvers.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/src/solvers/NonlinearSystemSolvers.md b/docs/src/solvers/NonlinearSystemSolvers.md index 89cde3513..7664bce10 100644 --- a/docs/src/solvers/NonlinearSystemSolvers.md +++ b/docs/src/solvers/NonlinearSystemSolvers.md @@ -66,9 +66,11 @@ features, but have a bit of overhead on very small problems. with fallbacks to robust methods to allow for solving easy problems quickly without sacrificing robustnes on the hard problems. - `GeneralBroyden()`: Generalization of Broyden's Quasi-Newton Method with Line Search and - Automatic Jacobian Resetting. This is a fast method but unstable for most problems! + Automatic Jacobian Resetting. This is a fast method but unstable when the condition number of + the Jacobian matrix is sufficiently large. - `GeneralKlement()`: Generalization of Klement's Quasi-Newton Method with Line Search and - Automatic Jacobian Resetting. This is a fast method but unstable for most problems! + Automatic Jacobian Resetting. This is a fast method but unstable when the condition number of + the Jacobian matrix is sufficiently large. ### SimpleNonlinearSolve.jl