From c35f0f4414aa7b6fb90ff661a604034fc61a11bc Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Fri, 4 Oct 2024 12:11:39 -0400 Subject: [PATCH] refactor: migrate to LineSearch.jl (#461) * refactor: migrate to LineSearch.jl * fix: forward reinit_cache to SciMLBase.reinit * fix: remaining tests from the migration * chore: bump min versions * fix: final set of bumps * fix: stop using deprecated API in default solvers * docs: fix references --- Project.toml | 8 +- docs/make.jl | 7 +- docs/src/devdocs/internal_interfaces.md | 7 - docs/src/native/globalization.md | 9 +- docs/src/native/solvers.md | 2 +- src/NonlinearSolve.jl | 7 +- src/abstract_types.jl | 22 +- src/algorithms/klement.jl | 2 +- src/algorithms/pseudo_transient.jl | 7 +- src/core/approximate_jacobian.jl | 10 +- src/core/generalized_first_order.jl | 20 +- src/core/spectral_methods.jl | 14 +- src/default.jl | 22 +- src/globalization/line_search.jl | 449 ++---------------------- src/internal/jacobian.jl | 3 +- src/internal/termination.jl | 4 +- test/gpu/core_tests.jl | 16 +- test/misc/other_tests.jl | 10 - 18 files changed, 105 insertions(+), 514 deletions(-) delete mode 100644 test/misc/other_tests.jl diff --git a/Project.toml b/Project.toml index 69dd08304..d8ec44ac1 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "NonlinearSolve" uuid = "8913a72c-1f9b-4ce2-8d82-65094dcecaec" authors = ["SciML"] -version = "3.15.0-DEV" +version = "3.15.0" [deps] ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b" @@ -14,6 +14,7 @@ FastClosures = "9aa1b823-49e4-5ca5-8b0f-3971ec8bab6a" FiniteDiff = "6a86dc24-6348-571c-b903-95158fe2bd41" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" LazyArrays = "5078a376-72f3-5289-bfd5-ec5146d43c02" +LineSearch = "87fe0de2-c867-4266-b59a-2f0a94fc965b" LineSearches = "d3d80556-e9d4-5f37-9878-2ab0fcc64255" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LinearSolve = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae" @@ -79,11 +80,12 @@ Hwloc = "3" InteractiveUtils = "<0.0.1, 1" LazyArrays = "1.8.2, 2" LeastSquaresOptim = "0.8.5" -LineSearches = "7.2" +LineSearch = "0.1.2" +LineSearches = "7.3" LinearAlgebra = "1.10" LinearSolve = "2.35" MINPACK = "1.2" -MaybeInplace = "0.1.3" +MaybeInplace = "0.1.4" ModelingToolkit = "9.41.0" NLSolvers = "0.5" NLsolve = "4.5" diff --git a/docs/make.jl b/docs/make.jl index 3e931a227..eec25f4f7 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -14,9 +14,11 @@ bib = CitationBibliography(joinpath(@__DIR__, "src", "refs.bib")) interlinks = InterLinks( "ADTypes" => "https://sciml.github.io/ADTypes.jl/stable/", + "LineSearch" => "https://sciml.github.io/LineSearch.jl/dev/" ) -makedocs(; sitename = "NonlinearSolve.jl", +makedocs(; + sitename = "NonlinearSolve.jl", authors = "Chris Rackauckas", modules = [NonlinearSolve, SimpleNonlinearSolve, SteadyStateDiffEq, Sundials, DiffEqBase, SciMLBase, SciMLJacobianOperators], @@ -30,6 +32,7 @@ makedocs(; sitename = "NonlinearSolve.jl", plugins = [bib, interlinks], format = Documenter.HTML(assets = ["assets/favicon.ico", "assets/citations.css"], canonical = "https://docs.sciml.ai/NonlinearSolve/stable/"), - pages) + pages +) deploydocs(repo = "github.com/SciML/NonlinearSolve.jl.git"; push_preview = true) diff --git a/docs/src/devdocs/internal_interfaces.md b/docs/src/devdocs/internal_interfaces.md index 91b9562a8..53e20d754 100644 --- a/docs/src/devdocs/internal_interfaces.md +++ b/docs/src/devdocs/internal_interfaces.md @@ -38,13 +38,6 @@ NonlinearSolve.AbstractDampingFunction NonlinearSolve.AbstractDampingFunctionCache ``` -## Line Search - -```@docs -NonlinearSolve.AbstractNonlinearSolveLineSearchAlgorithm -NonlinearSolve.AbstractNonlinearSolveLineSearchCache -``` - ## Trust Region ```@docs diff --git a/docs/src/native/globalization.md b/docs/src/native/globalization.md index d7ff7d684..de163c6d1 100644 --- a/docs/src/native/globalization.md +++ b/docs/src/native/globalization.md @@ -8,12 +8,9 @@ Pages = ["globalization.md"] ## [Line Search Algorithms](@id line-search) -```@docs -LiFukushimaLineSearch -LineSearchesJL -RobustNonMonotoneLineSearch -NoLineSearch -``` +Line Searches have been moved to an external package. Take a look at the +[LineSearch.jl](https://github.com/SciML/LineSearch.jl) package and its +[documentation](https://sciml.github.io/LineSearch.jl/dev/). ## Radius Update Schemes for Trust Region diff --git a/docs/src/native/solvers.md b/docs/src/native/solvers.md index d2c0fc6e5..a5deca141 100644 --- a/docs/src/native/solvers.md +++ b/docs/src/native/solvers.md @@ -22,7 +22,7 @@ documentation. preconditioners. For more information on specifying preconditioners for LinearSolve algorithms, consult the [LinearSolve.jl documentation](https://docs.sciml.ai/LinearSolve/stable/). - - `linesearch`: the line search algorithm to use. Defaults to [`NoLineSearch()`](@ref), + - `linesearch`: the line search algorithm to use. Defaults to [`NoLineSearch()`](@extref LineSearch.NoLineSearch), which means that no line search is performed. Algorithms from [`LineSearches.jl`](https://github.com/JuliaNLSolvers/LineSearches.jl/) must be wrapped in [`LineSearchesJL`](@ref) before being supplied. For a detailed documentation diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl index 50051fcb0..6babbdbdc 100644 --- a/src/NonlinearSolve.jl +++ b/src/NonlinearSolve.jl @@ -22,6 +22,8 @@ using LazyArrays: LazyArrays, ApplyArray, cache using LinearAlgebra: LinearAlgebra, ColumnNorm, Diagonal, I, LowerTriangular, Symmetric, UpperTriangular, axpy!, cond, diag, diagind, dot, issuccess, istril, istriu, lu, mul!, norm, pinv, tril!, triu! +using LineSearch: LineSearch, AbstractLineSearchAlgorithm, AbstractLineSearchCache, + NoLineSearch, RobustNonMonotoneLineSearch using LineSearches: LineSearches using LinearSolve: LinearSolve, LUFactorization, QRFactorization, needs_concrete_A, AbstractFactorization, @@ -172,8 +174,9 @@ export NewtonDescent, SteepestDescent, Dogleg, DampedNewtonDescent, GeodesicAcce # Globalization ## Line Search Algorithms -export LineSearchesJL, NoLineSearch, RobustNonMonotoneLineSearch, LiFukushimaLineSearch -export Static, HagerZhang, MoreThuente, StrongWolfe, BackTracking +export LineSearchesJL, LiFukushimaLineSearch # FIXME: deprecated. use LineSearch.jl directly +export Static, HagerZhang, MoreThuente, StrongWolfe, BackTracking # FIXME: deprecated +export NoLineSearch, RobustNonMonotoneLineSearch ## Trust Region Algorithms export RadiusUpdateSchemes diff --git a/src/abstract_types.jl b/src/abstract_types.jl index 2fe2fd071..0bb2e13d2 100644 --- a/src/abstract_types.jl +++ b/src/abstract_types.jl @@ -106,22 +106,6 @@ function last_step_accepted(cache::AbstractDescentCache) return true end -""" - AbstractNonlinearSolveLineSearchAlgorithm - -Abstract Type for all Line Search Algorithms used in NonlinearSolve.jl. - -### `__internal_init` specification - -```julia -__internal_init( - prob::AbstractNonlinearProblem, alg::AbstractNonlinearSolveLineSearchAlgorithm, f::F, - fu, u, p, args...; internalnorm::IN = DEFAULT_NORM, kwargs...) where {F, IN} --> -AbstractNonlinearSolveLineSearchCache -``` -""" -abstract type AbstractNonlinearSolveLineSearchAlgorithm end - """ AbstractNonlinearSolveLineSearchCache @@ -512,9 +496,9 @@ SciMLBase.isinplace(::AbstractNonlinearSolveJacobianCache{iip}) where {iip} = ii abstract type AbstractNonlinearSolveTraceLevel end # Default Printing -for aType in (AbstractTrustRegionMethod, AbstractNonlinearSolveLineSearchAlgorithm, - AbstractResetCondition, AbstractApproximateJacobianUpdateRule, - AbstractDampingFunction, AbstractNonlinearSolveExtensionAlgorithm) +for aType in (AbstractTrustRegionMethod, AbstractResetCondition, + AbstractApproximateJacobianUpdateRule, AbstractDampingFunction, + AbstractNonlinearSolveExtensionAlgorithm) @eval function Base.show(io::IO, alg::$(aType)) print(io, "$(nameof(typeof(alg)))()") end diff --git a/src/algorithms/klement.jl b/src/algorithms/klement.jl index 66daec1b7..5e911d8c0 100644 --- a/src/algorithms/klement.jl +++ b/src/algorithms/klement.jl @@ -27,7 +27,7 @@ over this. function Klement(; max_resets::Int = 100, linsolve = nothing, alpha = nothing, linesearch = NoLineSearch(), precs = DEFAULT_PRECS, autodiff = nothing, init_jacobian::Val{IJ} = Val(:identity)) where {IJ} - if !(linesearch isa AbstractNonlinearSolveLineSearchAlgorithm) + if !(linesearch isa AbstractLineSearchAlgorithm) Base.depwarn( "Passing in a `LineSearches.jl` algorithm directly is deprecated. \ Please use `LineSearchesJL` instead.", :Klement) diff --git a/src/algorithms/pseudo_transient.jl b/src/algorithms/pseudo_transient.jl index fda39a3b9..0da85dd94 100644 --- a/src/algorithms/pseudo_transient.jl +++ b/src/algorithms/pseudo_transient.jl @@ -1,7 +1,6 @@ """ PseudoTransient(; concrete_jac = nothing, linsolve = nothing, - linesearch::AbstractNonlinearSolveLineSearchAlgorithm = NoLineSearch(), - precs = DEFAULT_PRECS, autodiff = nothing) + linesearch = NoLineSearch(), precs = DEFAULT_PRECS, autodiff = nothing) An implementation of PseudoTransient Method [coffey2003pseudotransient](@cite) that is used to solve steady state problems in an accelerated manner. It uses an adaptive time-stepping @@ -16,8 +15,8 @@ This implementation specifically uses "switched evolution relaxation" you are going to need more iterations to converge but it can be more stable. """ function PseudoTransient(; concrete_jac = nothing, linsolve = nothing, - linesearch::AbstractNonlinearSolveLineSearchAlgorithm = NoLineSearch(), - precs = DEFAULT_PRECS, autodiff = nothing, alpha_initial = 1e-3) + linesearch = NoLineSearch(), precs = DEFAULT_PRECS, autodiff = nothing, + alpha_initial = 1e-3) descent = DampedNewtonDescent(; linsolve, precs, initial_damping = alpha_initial, damping_fn = SwitchedEvolutionRelaxation()) return GeneralizedFirstOrderAlgorithm(; diff --git a/src/core/approximate_jacobian.jl b/src/core/approximate_jacobian.jl index e61f34032..6484c0408 100644 --- a/src/core/approximate_jacobian.jl +++ b/src/core/approximate_jacobian.jl @@ -59,7 +59,7 @@ function ApproximateJacobianSolveAlgorithm{concrete_jac, name}(; linesearch = missing, trustregion = missing, descent, update_rule, reinit_rule, initialization, max_resets::Int = typemax(Int), max_shrink_times::Int = typemax(Int)) where {concrete_jac, name} - if linesearch !== missing && !(linesearch isa AbstractNonlinearSolveLineSearchAlgorithm) + if linesearch !== missing && !(linesearch isa AbstractLineSearchAlgorithm) Base.depwarn("Passing in a `LineSearches.jl` algorithm directly is deprecated. \ Please use `LineSearchesJL` instead.", :GeneralizedFirstOrderAlgorithm) @@ -199,8 +199,8 @@ function SciMLBase.__init( if alg.linesearch !== missing supports_line_search(alg.descent) || error("Line Search not supported by \ $(alg.descent).") - linesearch_cache = __internal_init( - prob, alg.linesearch, f, fu, u, p; stats, internalnorm, kwargs...) + linesearch_cache = init( + prob, alg.linesearch, fu, u; stats, internalnorm, kwargs...) GB = :LineSearch end @@ -317,7 +317,9 @@ function __step!(cache::ApproximateJacobianSolveCache{INV, GB, iip}; if descent_result.success if GB === :LineSearch @static_timeit cache.timer "linesearch" begin - needs_reset, α = __internal_solve!(cache.linesearch_cache, cache.u, δu) + linesearch_sol = solve!(cache.linesearch_cache, cache.u, δu) + needs_reset = !SciMLBase.successful_retcode(linesearch_sol.retcode) + α = linesearch_sol.step_size end if needs_reset && cache.steps_since_last_reset > 5 # Reset after a burn-in period cache.force_reinit = true diff --git a/src/core/generalized_first_order.jl b/src/core/generalized_first_order.jl index 38c3786f5..a485c7c65 100644 --- a/src/core/generalized_first_order.jl +++ b/src/core/generalized_first_order.jl @@ -66,7 +66,7 @@ function GeneralizedFirstOrderAlgorithm{concrete_jac, name}(; jacobian_ad !== nothing && ADTypes.mode(jacobian_ad) isa ADTypes.ReverseMode, jacobian_ad, nothing)) - if linesearch !== missing && !(linesearch isa AbstractNonlinearSolveLineSearchAlgorithm) + if linesearch !== missing && !(linesearch isa AbstractLineSearchAlgorithm) Base.depwarn("Passing in a `LineSearches.jl` algorithm directly is deprecated. \ Please use `LineSearchesJL` instead.", :GeneralizedFirstOrderAlgorithm) @@ -199,8 +199,17 @@ function SciMLBase.__init( if alg.linesearch !== missing supports_line_search(alg.descent) || error("Line Search not supported by \ $(alg.descent).") - linesearch_cache = __internal_init( - prob, alg.linesearch, f, fu, u, p; stats, internalnorm, kwargs...) + linesearch_ad = alg.forward_ad === nothing ? + (alg.reverse_ad === nothing ? alg.jacobian_ad : + alg.reverse_ad) : alg.forward_ad + if linesearch_ad !== nothing && iip && !DI.check_inplace(linesearch_ad) + @warn "$(linesearch_ad) doesn't support in-place problems." + linesearch_ad = nothing + end + linesearch_ad = get_concrete_forward_ad( + linesearch_ad, prob, False; check_forward_mode = false) + linesearch_cache = init( + prob, alg.linesearch, fu, u; stats, autodiff = linesearch_ad, kwargs...) GB = :LineSearch end @@ -264,8 +273,9 @@ function __step!(cache::GeneralizedFirstOrderAlgorithmCache{iip, GB}; cache.make_new_jacobian = true if GB === :LineSearch @static_timeit cache.timer "linesearch" begin - linesearch_failed, α = __internal_solve!( - cache.linesearch_cache, cache.u, δu) + linesearch_sol = solve!(cache.linesearch_cache, cache.u, δu) + linesearch_failed = !SciMLBase.successful_retcode(linesearch_sol.retcode) + α = linesearch_sol.step_size end if linesearch_failed cache.retcode = ReturnCode.InternalLineSearchFailed diff --git a/src/core/spectral_methods.jl b/src/core/spectral_methods.jl index 85e58c1a3..d141a627b 100644 --- a/src/core/spectral_methods.jl +++ b/src/core/spectral_methods.jl @@ -9,9 +9,8 @@ Method. ### Arguments - - `linesearch`: Globalization using a Line Search Method. This needs to follow the - [`NonlinearSolve.AbstractNonlinearSolveLineSearchAlgorithm`](@ref) interface. This - is not optional currently, but that restriction might be lifted in the future. + - `linesearch`: Globalization using a Line Search Method. This is not optional currently, + but that restriction might be lifted in the future. - `σ_min`: The minimum spectral parameter allowed. This is used to ensure that the spectral parameter is not too small. - `σ_max`: The maximum spectral parameter allowed. This is used to ensure that the @@ -119,7 +118,7 @@ end function SciMLBase.__init(prob::AbstractNonlinearProblem, alg::GeneralizedDFSane, args...; stats = empty_nlstats(), alias_u0 = false, maxiters = 1000, abstol = nothing, reltol = nothing, termination_condition = nothing, - internalnorm::F = DEFAULT_NORM, maxtime = nothing, kwargs...) where {F} + maxtime = nothing, kwargs...) timer = get_timer_output() @static_timeit timer "cache construction" begin u = __maybe_unaliased(prob.u0, alias_u0) @@ -130,8 +129,7 @@ function SciMLBase.__init(prob::AbstractNonlinearProblem, alg::GeneralizedDFSane fu = evaluate_f(prob, u) @bb fu_cache = copy(fu) - linesearch_cache = __internal_init(prob, alg.linesearch, prob.f, fu, u, prob.p; - stats, maxiters, internalnorm, kwargs...) + linesearch_cache = init(prob, alg.linesearch, fu, u; stats, kwargs...) abstol, reltol, tc_cache = init_termination_cache( prob, abstol, reltol, fu, u_cache, termination_condition) @@ -167,7 +165,9 @@ function __step!(cache::GeneralizedDFSaneCache{iip}; end @static_timeit cache.timer "linesearch" begin - linesearch_failed, α = __internal_solve!(cache.linesearch_cache, cache.u, cache.du) + linesearch_sol = solve!(cache.linesearch_cache, cache.u, cache.du) + linesearch_failed = !SciMLBase.successful_retcode(linesearch_sol.retcode) + α = linesearch_sol.step_size end if linesearch_failed diff --git a/src/default.jl b/src/default.jl index 8a9aac6af..5623c553f 100644 --- a/src/default.jl +++ b/src/default.jl @@ -364,7 +364,9 @@ function RobustMultiNewton(::Type{T} = Float64; concrete_jac = nothing, linsolve radius_update_scheme = RadiusUpdateSchemes.Bastin), NewtonRaphson(; concrete_jac, linsolve, precs, autodiff), NewtonRaphson(; concrete_jac, linsolve, precs, - linesearch = LineSearchesJL(; method = BackTracking()), autodiff), + linesearch = LineSearch.LineSearchesJL(; + method = LineSearches.BackTracking()), + autodiff), TrustRegion(; concrete_jac, linsolve, precs, radius_update_scheme = RadiusUpdateSchemes.NLsolve, autodiff), TrustRegion(; concrete_jac, linsolve, precs, @@ -405,7 +407,9 @@ function FastShortcutNonlinearPolyalg( else algs = (NewtonRaphson(; concrete_jac, linsolve, precs, autodiff), NewtonRaphson(; concrete_jac, linsolve, precs, - linesearch = LineSearchesJL(; method = BackTracking()), autodiff), + linesearch = LineSearch.LineSearchesJL(; + method = LineSearches.BackTracking()), + autodiff), TrustRegion(; concrete_jac, linsolve, precs, autodiff), TrustRegion(; concrete_jac, linsolve, precs, radius_update_scheme = RadiusUpdateSchemes.Bastin, autodiff)) @@ -426,7 +430,9 @@ function FastShortcutNonlinearPolyalg( SimpleKlement(), NewtonRaphson(; concrete_jac, linsolve, precs, autodiff), NewtonRaphson(; concrete_jac, linsolve, precs, - linesearch = LineSearchesJL(; method = BackTracking()), autodiff), + linesearch = LineSearch.LineSearchesJL(; + method = LineSearches.BackTracking()), + autodiff), TrustRegion(; concrete_jac, linsolve, precs, radius_update_scheme = RadiusUpdateSchemes.Bastin, autodiff)) end @@ -439,12 +445,15 @@ function FastShortcutNonlinearPolyalg( else # TODO: This number requires a bit rigorous testing start_index = u0_len !== nothing ? (u0_len ≤ 25 ? 4 : 1) : 1 - algs = (Broyden(; autodiff), + algs = ( + Broyden(; autodiff), Broyden(; init_jacobian = Val(:true_jacobian), autodiff), Klement(; linsolve, precs, autodiff), NewtonRaphson(; concrete_jac, linsolve, precs, autodiff), NewtonRaphson(; concrete_jac, linsolve, precs, - linesearch = LineSearchesJL(; method = BackTracking()), autodiff), + linesearch = LineSearch.LineSearchesJL(; + method = LineSearches.BackTracking()), + autodiff), TrustRegion(; concrete_jac, linsolve, precs, autodiff), TrustRegion(; concrete_jac, linsolve, precs, radius_update_scheme = RadiusUpdateSchemes.Bastin, autodiff)) @@ -480,7 +489,8 @@ function FastShortcutNLLSPolyalg( linsolve, precs, disable_geodesic = Val(true), autodiff, kwargs...), TrustRegion(; concrete_jac, linsolve, precs, autodiff, kwargs...), GaussNewton(; concrete_jac, linsolve, precs, - linesearch = LineSearchesJL(; method = BackTracking()), + linesearch = LineSearch.LineSearchesJL(; + method = LineSearches.BackTracking()), autodiff, kwargs...), TrustRegion(; concrete_jac, linsolve, precs, radius_update_scheme = RadiusUpdateSchemes.Bastin, autodiff, kwargs...), diff --git a/src/globalization/line_search.jl b/src/globalization/line_search.jl index 323c49258..c7c342bee 100644 --- a/src/globalization/line_search.jl +++ b/src/globalization/line_search.jl @@ -1,440 +1,33 @@ -""" - NoLineSearch <: AbstractNonlinearSolveLineSearchAlgorithm - -Don't perform a line search. Just return the initial step length of `1`. -""" -struct NoLineSearch <: AbstractNonlinearSolveLineSearchAlgorithm end - -@concrete mutable struct NoLineSearchCache <: AbstractNonlinearSolveLineSearchCache - α -end - -function __internal_init(prob::AbstractNonlinearProblem, alg::NoLineSearch, - f::F, fu, u, p, args...; kwargs...) where {F} - return NoLineSearchCache(promote_type(eltype(fu), eltype(u))(true)) -end - -reinit_cache!(cache::NoLineSearchCache, args...; p = cache.p, kwargs...) = nothing - -__internal_solve!(cache::NoLineSearchCache, u, du) = false, cache.α - -""" - LineSearchesJL(; method = LineSearches.Static(), autodiff = nothing, α = true) - -Wrapper over algorithms from -[LineSearches.jl](https://github.com/JuliaNLSolvers/LineSearches.jl/). Allows automatic -construction of the objective functions for the line search algorithms utilizing automatic -differentiation for fast Vector Jacobian Products. - -### Arguments - - - `method`: the line search algorithm to use. Defaults to - `method = LineSearches.Static()`, which means that the step size is fixed to the value - of `alpha`. - - `autodiff`: the automatic differentiation backend to use for the line search. Using a - reverse mode automatic differentiation backend if recommended. - - `α`: the initial step size to use. Defaults to `true` (which is equivalent to `1`). -""" -@concrete struct LineSearchesJL <: AbstractNonlinearSolveLineSearchAlgorithm - method - initial_alpha - autodiff -end - -function Base.show(io::IO, alg::LineSearchesJL) - str = "$(nameof(typeof(alg)))(" - modifiers = String[] - __is_present(alg.autodiff) && - push!(modifiers, "autodiff = $(nameof(typeof(alg.autodiff)))()") - alg.initial_alpha != true && push!(modifiers, "initial_alpha = $(alg.initial_alpha)") - push!(modifiers, "method = $(nameof(typeof(alg.method)))()") - print(io, str, join(modifiers, ", "), ")") -end - LineSearchesJL(method; kwargs...) = LineSearchesJL(; method, kwargs...) function LineSearchesJL(; method = LineSearches.Static(), autodiff = nothing, α = true) - if method isa LineSearchesJL # Prevent breaking old code - return LineSearchesJL(method.method, α, autodiff) - end + Base.depwarn("`LineSearchesJL(...)` is deprecated. Please use `LineSearchesJL` from \ + LineSearch.jl instead.", + :LineSearchesJL) - if method isa AbstractNonlinearSolveLineSearchAlgorithm - Base.depwarn("Passing a native NonlinearSolve line search algorithm to \ - `LineSearchesJL` or `LineSearch` is deprecated. Pass the method \ - directly instead.", - :LineSearchesJL) - return method - end - return LineSearchesJL(method, α, autodiff) + # Prevent breaking old code + method isa LineSearch.LineSearchesJL && + return LineSearch.LineSearchesJL(method.method, α, autodiff) + method isa AbstractLineSearchAlgorithm && return method + return LineSearch.LineSearchesJL(method, α, autodiff) end -Base.@deprecate_binding LineSearch LineSearchesJL true - -Static(args...; kwargs...) = LineSearchesJL(LineSearches.Static(args...; kwargs...)) -HagerZhang(args...; kwargs...) = LineSearchesJL(LineSearches.HagerZhang(args...; kwargs...)) -function MoreThuente(args...; kwargs...) - return LineSearchesJL(LineSearches.MoreThuente(args...; kwargs...)) -end -function BackTracking(args...; kwargs...) - return LineSearchesJL(LineSearches.BackTracking(args...; kwargs...)) -end -function StrongWolfe(args...; kwargs...) - return LineSearchesJL(LineSearches.StrongWolfe(args...; kwargs...)) -end - -# Wrapper over LineSearches.jl algorithms -@concrete mutable struct LineSearchesJLCache <: AbstractNonlinearSolveLineSearchCache - f - p - ϕ - dϕ - ϕdϕ - method - alpha - deriv_op - u_cache - fu_cache - stats::NLStats -end - -function __internal_init( - prob::AbstractNonlinearProblem, alg::LineSearchesJL, f::F, fu, u, p, - args...; stats, internalnorm::IN = DEFAULT_NORM, kwargs...) where {F, IN} - T = promote_type(eltype(fu), eltype(u)) - if u isa Number - autodiff = get_dense_ad(get_concrete_forward_ad( - alg.autodiff, prob; check_forward_mode = true)) - if !(autodiff isa AutoForwardDiff || - autodiff isa AutoPolyesterForwardDiff || - autodiff isa AutoFiniteDiff) - autodiff = AutoFiniteDiff() - # Other cases are not properly supported so we fallback to finite differencing - @warn "Scalar AD is supported only for AutoForwardDiff and AutoFiniteDiff. \ - Detected $(autodiff). Falling back to AutoFiniteDiff." - end - deriv_op = @closure (du, u, fu, p) -> begin - # Temporary solution, we are anyways moving to LineSearch.jl - return DI.derivative(f, autodiff, u, Constant(p)) * fu * du - end - else - # Both forward and reverse AD can be used for line-search. - # We prefer forward AD for better performance, however, reverse AD is also supported if user explicitly requests it. - # 1. If jvp is available, we use forward AD; - # 2. If vjp is available, we use reverse AD; - # 3. If reverse type is requested, we use reverse AD; - # 4. Finally, we use forward AD. - if alg.autodiff isa AutoFiniteDiff - deriv_op = nothing - elseif SciMLBase.has_jvp(f) - if isinplace(prob) - jvp_cache = zero(fu) - deriv_op = @closure (du, u, fu, p) -> begin - f.jvp(jvp_cache, du, u, p) - dot(fu, jvp_cache) - end - else - deriv_op = @closure (du, u, fu, p) -> dot(fu, f.jvp(du, u, p)) - end - elseif SciMLBase.has_vjp(f) - if isinplace(prob) - vjp_cache = zero(u) - deriv_op = @closure (du, u, fu, p) -> begin - f.vjp(vjp_cache, fu, u, p) - dot(du, vjp_cache) - end - else - deriv_op = @closure (du, u, fu, p) -> dot(du, f.vjp(fu, u, p)) - end - elseif alg.autodiff !== nothing && - ADTypes.mode(alg.autodiff) isa ADTypes.ReverseMode - autodiff = get_concrete_reverse_ad( - alg.autodiff, prob; check_reverse_mode = true) - vjp_op = VecJacOperator(prob, fu, u; autodiff) - if isinplace(prob) - vjp_cache = zero(u) - deriv_op = @closure (du, u, fu, p) -> dot(du, vjp_op(vjp_cache, fu, u, p)) - else - deriv_op = @closure (du, u, fu, p) -> dot(du, vjp_op(fu, u, p)) - end - else - autodiff = get_concrete_forward_ad( - alg.autodiff, prob; check_forward_mode = true) - jvp_op = JacVecOperator(prob, fu, u; autodiff) - if isinplace(prob) - jvp_cache = zero(fu) - deriv_op = @closure (du, u, fu, p) -> dot(fu, jvp_op(jvp_cache, du, u, p)) - else - deriv_op = @closure (du, u, fu, p) -> dot(fu, jvp_op(du, u, p)) - end - end - end - - @bb u_cache = similar(u) - @bb fu_cache = similar(fu) - - ϕ = @closure (f, p, u, du, α, u_cache, fu_cache) -> begin - @bb @. u_cache = u + α * du - fu_cache = evaluate_f!!(f, fu_cache, u_cache, p) - stats.nf += 1 - return @fastmath internalnorm(fu_cache)^2 / 2 +for alg in (:Static, :HagerZhang, :MoreThuente, :BackTracking, :StrongWolfe) + depmsg = "`$(alg)(args...; kwargs...)` is deprecated. Please use `LineSearchesJL(; \ + method = $(alg)(args...; kwargs...))` instead." + @eval function $(alg)(args...; autodiff = nothing, initial_alpha = true, kwargs...) + Base.depwarn($(depmsg), $(Meta.quot(alg))) + return LineSearch.LineSearchesJL(; + method = LineSearches.$(alg)(args...; kwargs...), autodiff, initial_alpha) end - - dϕ = @closure (f, p, u, du, α, u_cache, fu_cache, deriv_op) -> begin - @bb @. u_cache = u + α * du - fu_cache = evaluate_f!!(f, fu_cache, u_cache, p) - stats.nf += 1 - return deriv_op(du, u_cache, fu_cache, p) - end - - ϕdϕ = @closure (f, p, u, du, α, u_cache, fu_cache, deriv_op) -> begin - @bb @. u_cache = u + α * du - fu_cache = evaluate_f!!(f, fu_cache, u_cache, p) - stats.nf += 1 - deriv = deriv_op(du, u_cache, fu_cache, p) - obj = @fastmath internalnorm(fu_cache)^2 / 2 - return obj, deriv - end - - return LineSearchesJLCache(f, p, ϕ, dϕ, ϕdϕ, alg.method, T(alg.initial_alpha), - deriv_op, u_cache, fu_cache, stats) end -function __internal_solve!(cache::LineSearchesJLCache, u, du; kwargs...) - ϕ = @closure α -> cache.ϕ(cache.f, cache.p, u, du, α, cache.u_cache, cache.fu_cache) - if cache.deriv_op !== nothing - dϕ = @closure α -> cache.dϕ( - cache.f, cache.p, u, du, α, cache.u_cache, cache.fu_cache, cache.deriv_op) - ϕdϕ = @closure α -> cache.ϕdϕ( - cache.f, cache.p, u, du, α, cache.u_cache, cache.fu_cache, cache.deriv_op) - else - dϕ = @closure α -> FiniteDiff.finite_difference_derivative(ϕ, α) - ϕdϕ = @closure α -> (ϕ(α), FiniteDiff.finite_difference_derivative(ϕ, α)) - end - - ϕ₀, dϕ₀ = ϕdϕ(zero(eltype(u))) - - # Here we should be resetting the search direction for some algorithms especially - # if we start mixing in jacobian reuse and such - dϕ₀ ≥ 0 && return (true, one(eltype(u))) +Base.@deprecate LiFukushimaLineSearch(; nan_max_iter::Int = 5, kwargs...) LineSearch.LiFukushimaLineSearch(; + nan_maxiters = nan_max_iter, kwargs...) - # We can technically reduce 1 axpy by reusing the returned value from cache.method - # but it's not worth the extra complexity - cache.alpha = first(cache.method(ϕ, dϕ, ϕdϕ, cache.alpha, ϕ₀, dϕ₀)) - return (false, cache.alpha) +function callback_into_cache!(topcache, cache::AbstractLineSearchCache, args...) + LineSearch.callback_into_cache!(cache, get_fu(topcache)) end -""" - RobustNonMonotoneLineSearch(; gamma = 1 // 10000, sigma_0 = 1, M::Int = 10, - tau_min = 1 // 10, tau_max = 1 // 2, n_exp::Int = 2, maxiters::Int = 100, - η_strategy = (fn₁, n, uₙ, fₙ) -> fn₁ / n^2) - -Robust NonMonotone Line Search is a derivative free line search method from DF Sane -[la2006spectral](@cite). - -### Keyword Arguments - - - `M`: The monotonicity of the algorithm is determined by a this positive integer. - A value of 1 for `M` would result in strict monotonicity in the decrease of the L2-norm - of the function `f`. However, higher values allow for more flexibility in this reduction. - Despite this, the algorithm still ensures global convergence through the use of a - non-monotone line-search algorithm that adheres to the Grippo-Lampariello-Lucidi - condition. Values in the range of 5 to 20 are usually sufficient, but some cases may - call for a higher value of `M`. The default setting is 10. - - `gamma`: a parameter that influences if a proposed step will be accepted. Higher value - of `gamma` will make the algorithm more restrictive in accepting steps. Defaults to - `1e-4`. - - `tau_min`: if a step is rejected the new step size will get multiplied by factor, and - this parameter is the minimum value of that factor. Defaults to `0.1`. - - `tau_max`: if a step is rejected the new step size will get multiplied by factor, and - this parameter is the maximum value of that factor. Defaults to `0.5`. - - `n_exp`: the exponent of the loss, i.e. ``f_n=||F(x_n)||^{n\\_exp}``. The paper uses - `n_exp ∈ {1, 2}`. Defaults to `2`. - - `η_strategy`: function to determine the parameter `η`, which enables growth - of ``||f_n||^2``. Called as `η = η_strategy(fn_1, n, x_n, f_n)` with `fn_1` initialized - as ``fn_1=||f(x_1)||^{n\\_exp}``, `n` is the iteration number, `x_n` is the current - `x`-value and `f_n` the current residual. Should satisfy ``η > 0`` and ``∑ₖ ηₖ < ∞``. - Defaults to ``fn_1 / n^2``. - - `maxiters`: the maximum number of iterations allowed for the inner loop of the - algorithm. Defaults to `100`. -""" -@kwdef @concrete struct RobustNonMonotoneLineSearch <: - AbstractNonlinearSolveLineSearchAlgorithm - gamma = 1 // 10000 - sigma_1 = 1 - M::Int = 10 - tau_min = 1 // 10 - tau_max = 1 // 2 - n_exp::Int = 2 - maxiters::Int = 100 - η_strategy = (fn₁, n, uₙ, fₙ) -> fn₁ / n^2 -end - -@concrete mutable struct RobustNonMonotoneLineSearchCache <: - AbstractNonlinearSolveLineSearchCache - f - p - ϕ - u_cache - fu_cache - internalnorm - maxiters::Int - history - γ - σ₁ - M::Int - τ_min - τ_max - nsteps::Int - η_strategy - n_exp::Int - stats::NLStats -end - -function __internal_init( - prob::AbstractNonlinearProblem, alg::RobustNonMonotoneLineSearch, f::F, fu, u, - p, args...; stats, internalnorm::IN = DEFAULT_NORM, kwargs...) where {F, IN} - @bb u_cache = similar(u) - @bb fu_cache = similar(fu) - T = promote_type(eltype(fu), eltype(u)) - - ϕ = @closure (f, p, u, du, α, u_cache, fu_cache) -> begin - @bb @. u_cache = u + α * du - fu_cache = evaluate_f!!(f, fu_cache, u_cache, p) - stats.nf += 1 - return internalnorm(fu_cache)^alg.n_exp - end - - fn₁ = internalnorm(fu)^alg.n_exp - η_strategy = @closure (n, xₙ, fₙ) -> alg.η_strategy(fn₁, n, xₙ, fₙ) - - return RobustNonMonotoneLineSearchCache( - f, p, ϕ, u_cache, fu_cache, internalnorm, alg.maxiters, - fill(fn₁, alg.M), T(alg.gamma), T(alg.sigma_1), alg.M, - T(alg.tau_min), T(alg.tau_max), 0, η_strategy, alg.n_exp, stats) -end - -function __internal_solve!(cache::RobustNonMonotoneLineSearchCache, u, du; kwargs...) - T = promote_type(eltype(u), eltype(du)) - ϕ = @closure α -> cache.ϕ(cache.f, cache.p, u, du, α, cache.u_cache, cache.fu_cache) - f_norm_old = ϕ(eltype(u)(0)) - α₊, α₋ = T(cache.σ₁), T(cache.σ₁) - η = cache.η_strategy(cache.nsteps, u, f_norm_old) - f_bar = maximum(cache.history) - - for k in 1:(cache.maxiters) - f_norm = ϕ(α₊) - f_norm ≤ f_bar + η - cache.γ * α₊ * f_norm_old && return (false, α₊) - - α₊ *= clamp(α₊ * f_norm_old / (f_norm + (T(2) * α₊ - T(1)) * f_norm_old), - cache.τ_min, cache.τ_max) - - f_norm = ϕ(-α₋) - f_norm ≤ f_bar + η - cache.γ * α₋ * f_norm_old && return (false, -α₋) - - α₋ *= clamp(α₋ * f_norm_old / (f_norm + (T(2) * α₋ - T(1)) * f_norm_old), - cache.τ_min, cache.τ_max) - end - - return true, T(cache.σ₁) -end - -function callback_into_cache!(topcache, cache::RobustNonMonotoneLineSearchCache, args...) - fu = get_fu(topcache) - cache.history[mod1(cache.nsteps, cache.M)] = cache.internalnorm(fu)^cache.n_exp - cache.nsteps += 1 - return -end - -""" - LiFukushimaLineSearch(; lambda_0 = 1, beta = 1 // 2, sigma_1 = 1 // 1000, - sigma_2 = 1 // 1000, eta = 1 // 10, nan_max_iter::Int = 5, maxiters::Int = 100) - -A derivative-free line search and global convergence of Broyden-like method for nonlinear -equations [li2000derivative](@cite). -""" -@kwdef @concrete struct LiFukushimaLineSearch <: AbstractNonlinearSolveLineSearchAlgorithm - lambda_0 = 1 - beta = 1 // 2 - sigma_1 = 1 // 1000 - sigma_2 = 1 // 1000 - eta = 1 // 10 - rho = 9 // 10 - nan_max_iter::Int = 5 # TODO (breaking): Change this to nan_maxiters for uniformity - maxiters::Int = 100 -end - -@concrete mutable struct LiFukushimaLineSearchCache <: AbstractNonlinearSolveLineSearchCache - ϕ - f - p - internalnorm - u_cache - fu_cache - λ₀ - β - σ₁ - σ₂ - η - ρ - α - nan_maxiters::Int - maxiters::Int - stats::NLStats -end - -function __internal_init( - prob::AbstractNonlinearProblem, alg::LiFukushimaLineSearch, f::F, fu, u, p, - args...; stats, internalnorm::IN = DEFAULT_NORM, kwargs...) where {F, IN} - @bb u_cache = similar(u) - @bb fu_cache = similar(fu) - T = promote_type(eltype(fu), eltype(u)) - - ϕ = @closure (f, p, u, du, α, u_cache, fu_cache) -> begin - @bb @. u_cache = u + α * du - fu_cache = evaluate_f!!(f, fu_cache, u_cache, p) - stats.nf += 1 - return internalnorm(fu_cache) - end - - return LiFukushimaLineSearchCache( - ϕ, f, p, internalnorm, u_cache, fu_cache, T(alg.lambda_0), - T(alg.beta), T(alg.sigma_1), T(alg.sigma_2), T(alg.eta), - T(alg.rho), T(true), alg.nan_max_iter, alg.maxiters, stats) -end - -function __internal_solve!(cache::LiFukushimaLineSearchCache, u, du; kwargs...) - T = promote_type(eltype(u), eltype(du)) - ϕ = @closure α -> cache.ϕ(cache.f, cache.p, u, du, α, cache.u_cache, cache.fu_cache) - - fx_norm = ϕ(T(0)) - - # Non-Blocking exit if the norm is NaN or Inf - !isfinite(fx_norm) && return (true, cache.α) - - # Early Terminate based on Eq. 2.7 - du_norm = cache.internalnorm(du) - fxλ_norm = ϕ(cache.α) - fxλ_norm ≤ cache.ρ * fx_norm - cache.σ₂ * du_norm^2 && return (false, cache.α) - - λ₂, λ₁ = cache.λ₀, cache.λ₀ - fxλp_norm = ϕ(λ₂) - - if !isfinite(fxλp_norm) - nan_converged = false - for _ in 1:(cache.nan_maxiters) - λ₁, λ₂ = λ₂, cache.β * λ₂ - fxλp_norm = ϕ(λ₂) - nan_converged = isfinite(fxλp_norm) - nan_converged && break - end - nan_converged || return (true, cache.α) - end - - for i in 1:(cache.maxiters) - fxλp_norm = ϕ(λ₂) - converged = fxλp_norm ≤ (1 + cache.η) * fx_norm - cache.σ₁ * λ₂^2 * du_norm^2 - converged && return (false, λ₂) - λ₁, λ₂ = λ₂, cache.β * λ₂ - end - - return true, cache.α +function reinit_cache!(cache::AbstractLineSearchCache, args...; kwargs...) + return SciMLBase.reinit!(cache, args...; kwargs...) end diff --git a/src/internal/jacobian.jl b/src/internal/jacobian.jl index 93d10f98b..13feb8253 100644 --- a/src/internal/jacobian.jl +++ b/src/internal/jacobian.jl @@ -25,8 +25,7 @@ Construct a cache for the Jacobian of `f` w.r.t. `u`. - `jvp_autodiff`: Automatic Differentiation or Finite Differencing backend for computing the Jacobian-vector product. - `linsolve`: Linear Solver Algorithm used to determine if we need a concrete jacobian - or if possible we can just use a [`SciMLJacobianOperators.JacobianOperator`](@ref) - instead. + or if possible we can just use a `SciMLJacobianOperators.JacobianOperator` instead. """ @concrete mutable struct JacobianCache{iip} <: AbstractNonlinearSolveJacobianCache{iip} J diff --git a/src/internal/termination.jl b/src/internal/termination.jl index 570bae110..ef3f7c4c0 100644 --- a/src/internal/termination.jl +++ b/src/internal/termination.jl @@ -45,12 +45,12 @@ function update_from_termination_cache!(tc_cache, cache, u = get_u(cache)) end function update_from_termination_cache!( - tc_cache, cache, mode::AbstractNonlinearTerminationMode, u = get_u(cache)) + tc_cache, cache, ::AbstractNonlinearTerminationMode, u = get_u(cache)) evaluate_f!(cache, u, cache.p) end function update_from_termination_cache!( - tc_cache, cache, mode::AbstractSafeBestNonlinearTerminationMode, u = get_u(cache)) + tc_cache, cache, ::AbstractSafeBestNonlinearTerminationMode, u = get_u(cache)) if isinplace(cache) copyto!(get_u(cache), tc_cache.u) else diff --git a/test/gpu/core_tests.jl b/test/gpu/core_tests.jl index d6d38ee1a..75087aa30 100644 --- a/test/gpu/core_tests.jl +++ b/test/gpu/core_tests.jl @@ -12,13 +12,19 @@ prob = NonlinearProblem(linear_f, u0) - SOLVERS = (NewtonRaphson(), LevenbergMarquardt(; linsolve = QRFactorization()), - LevenbergMarquardt(; linsolve = KrylovJL_GMRES()), PseudoTransient(), - Klement(), Broyden(; linesearch = LiFukushimaLineSearch()), + SOLVERS = ( + NewtonRaphson(), + LevenbergMarquardt(; linsolve = QRFactorization()), + LevenbergMarquardt(; linsolve = KrylovJL_GMRES()), + PseudoTransient(), + Klement(), + Broyden(; linesearch = LiFukushimaLineSearch()), LimitedMemoryBroyden(; threshold = 2, linesearch = LiFukushimaLineSearch()), - DFSane(), TrustRegion(; linsolve = QRFactorization()), + DFSane(), + TrustRegion(; linsolve = QRFactorization()), TrustRegion(; linsolve = KrylovJL_GMRES(), concrete_jac = true), # Needed if Zygote not loaded - nothing) + nothing + ) @testset "[IIP] GPU Solvers" begin for alg in SOLVERS diff --git a/test/misc/other_tests.jl b/test/misc/other_tests.jl deleted file mode 100644 index 765177c38..000000000 --- a/test/misc/other_tests.jl +++ /dev/null @@ -1,10 +0,0 @@ -@testitem "No warning tests" tags=[:misc] begin - using NonlinearSolve - - f(u, p) = u .* u .- p - u0 = [1.0, 1.0] - p = 2.0 - prob = NonlinearProblem(f, u0, p) - @test_nowarn solve( - prob, NewtonRaphson(autodiff = AutoFiniteDiff(), linesearch = LineSearchesJL())) -end