From f2f50354bbd5d5fef7be2e281de7f17b0bd43a06 Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Tue, 23 Apr 2024 14:33:13 -0400 Subject: [PATCH 1/7] Add a result type for Descent Algorithms --- Project.toml | 4 ++-- src/NonlinearSolve.jl | 1 + src/abstract_types.jl | 9 ++------- src/core/approximate_jacobian.jl | 7 ++++--- src/core/generalized_first_order.jl | 7 ++++--- src/descent/common.jl | 26 ++++++++++++++++++++++++++ src/descent/damped_newton.jl | 4 ++-- src/descent/dogleg.jl | 6 +++--- src/descent/geodesic_acceleration.jl | 4 ++-- src/descent/newton.jl | 8 ++++---- src/descent/steepest.jl | 2 +- 11 files changed, 51 insertions(+), 27 deletions(-) create mode 100644 src/descent/common.jl diff --git a/Project.toml b/Project.toml index 99a50b257..f5e8d10ac 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "NonlinearSolve" uuid = "8913a72c-1f9b-4ce2-8d82-65094dcecaec" authors = ["SciML"] -version = "3.10.1" +version = "3.11.0" [deps] ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b" @@ -76,7 +76,7 @@ LazyArrays = "1.8.2" LeastSquaresOptim = "0.8.5" LineSearches = "7.2" LinearAlgebra = "1.10" -LinearSolve = "2.21" +LinearSolve = "2.29" MINPACK = "1.2" MaybeInplace = "0.1.1" NLSolvers = "0.5" diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl index ffa792ce9..9261f259f 100644 --- a/src/NonlinearSolve.jl +++ b/src/NonlinearSolve.jl @@ -50,6 +50,7 @@ include("adtypes.jl") include("timer_outputs.jl") include("internal/helpers.jl") +include("descent/common.jl") include("descent/newton.jl") include("descent/steepest.jl") include("descent/dogleg.jl") diff --git a/src/abstract_types.jl b/src/abstract_types.jl index c1ee79b20..c5e5d990b 100644 --- a/src/abstract_types.jl +++ b/src/abstract_types.jl @@ -66,7 +66,7 @@ Abstract Type for all Descent Caches. ### `__internal_solve!` specification ```julia -δu, success, intermediates = __internal_solve!( +descent_result = __internal_solve!( cache::AbstractDescentCache, J, fu, u, idx::Val; skip_solve::Bool = false, kwargs...) ``` @@ -81,12 +81,7 @@ Abstract Type for all Descent Caches. #### Returned values - - `δu`: the descent direction. - - `success`: Certain Descent Algorithms can reject a descent direction for example - `GeodesicAcceleration`. - - `intermediates`: A named tuple containing intermediates computed during the solve. - For example, `GeodesicAcceleration` returns `NamedTuple{(:v, :a)}` containing the - "velocity" and "acceleration" terms. + - `descent_result`: Result in a [`DescentResult`](@ref). ### Interface Functions diff --git a/src/core/approximate_jacobian.jl b/src/core/approximate_jacobian.jl index d5329f1c8..6e8f93ba6 100644 --- a/src/core/approximate_jacobian.jl +++ b/src/core/approximate_jacobian.jl @@ -282,16 +282,17 @@ function __step!(cache::ApproximateJacobianSolveCache{INV, GB, iip}; @static_timeit cache.timer "descent" begin if cache.trustregion_cache !== nothing && hasfield(typeof(cache.trustregion_cache), :trust_region) - δu, descent_success, descent_intermediates = __internal_solve!( + descent_result = __internal_solve!( cache.descent_cache, J, cache.fu, cache.u; new_jacobian, trust_region = cache.trustregion_cache.trust_region) else - δu, descent_success, descent_intermediates = __internal_solve!( + descent_result = __internal_solve!( cache.descent_cache, J, cache.fu, cache.u; new_jacobian) end end + δu, descent_intermediates = descent_result.δu, descent_result.extras - if descent_success + if descent_result.success if GB === :LineSearch @static_timeit cache.timer "linesearch" begin needs_reset, α = __internal_solve!(cache.linesearch_cache, cache.u, δu) diff --git a/src/core/generalized_first_order.jl b/src/core/generalized_first_order.jl index 4733e3d3e..710787840 100644 --- a/src/core/generalized_first_order.jl +++ b/src/core/generalized_first_order.jl @@ -221,16 +221,17 @@ function __step!(cache::GeneralizedFirstOrderAlgorithmCache{iip, GB}; @static_timeit cache.timer "descent" begin if cache.trustregion_cache !== nothing && hasfield(typeof(cache.trustregion_cache), :trust_region) - δu, descent_success, descent_intermediates = __internal_solve!( + descent_result = __internal_solve!( cache.descent_cache, J, cache.fu, cache.u; new_jacobian, trust_region = cache.trustregion_cache.trust_region) else - δu, descent_success, descent_intermediates = __internal_solve!( + descent_result = __internal_solve!( cache.descent_cache, J, cache.fu, cache.u; new_jacobian) end end + δu, descent_intermediates = descent_result.δu, descent_result.extras - if descent_success + if descent_result.success cache.make_new_jacobian = true if GB === :LineSearch @static_timeit cache.timer "linesearch" begin diff --git a/src/descent/common.jl b/src/descent/common.jl new file mode 100644 index 000000000..2a614d84a --- /dev/null +++ b/src/descent/common.jl @@ -0,0 +1,26 @@ +""" + DescentResult(; δu = missing, u = missing, success::Bool = true, extras = (;)) + +Construct a `DescentResult` object. + +### Keyword Arguments + + - `δu`: The descent direction. + - `u`: The new iterate. This is provided only for multi-step methods currently. + - `success`: Certain Descent Algorithms can reject a descent direction for example + [`GeodesicAcceleration`](@ref). + - `extras`: A named tuple containing intermediates computed during the solve. + For example, [`GeodesicAcceleration`](@ref) returns `NamedTuple{(:v, :a)}` containing + the "velocity" and "acceleration" terms. +""" +@concrete struct DescentResult + δu + u + success::Bool + extras +end + +function DescentResult(; δu = missing, u = missing, success::Bool = true, extras = (;)) + @assert δu !== missing || u !== missing + return DescentResult(δu, u, success, extras) +end diff --git a/src/descent/damped_newton.jl b/src/descent/damped_newton.jl index 3e21cc2d1..aaabc6c4e 100644 --- a/src/descent/damped_newton.jl +++ b/src/descent/damped_newton.jl @@ -135,7 +135,7 @@ function __internal_solve!(cache::DampedNewtonDescentCache{INV, mode}, J, fu, u, idx::Val{N} = Val(1); skip_solve::Bool = false, new_jacobian::Bool = true, kwargs...) where {INV, N, mode} δu = get_du(cache, idx) - skip_solve && return δu, true, (;) + skip_solve && return DescentResult(; δu) recompute_A = idx === Val(1) @@ -208,7 +208,7 @@ function __internal_solve!(cache::DampedNewtonDescentCache{INV, mode}, J, fu, @bb @. δu *= -1 set_du!(cache, δu, idx) - return δu, true, (;) + return DescentResult(; δu) end # Define special concatenation for certain Array combinations diff --git a/src/descent/dogleg.jl b/src/descent/dogleg.jl index ee725988b..f3e1d9cc0 100644 --- a/src/descent/dogleg.jl +++ b/src/descent/dogleg.jl @@ -88,7 +88,7 @@ function __internal_solve!(cache::DoglegCache{INV, NF}, J, fu, u, idx::Val{N} = if cache.internalnorm(δu_newton) ≤ trust_region @bb copyto!(δu, δu_newton) set_du!(cache, δu, idx) - return δu, true, (; δuJᵀJδu = T(NaN)) + return DescentResult(; δu, extras = (; δuJᵀJδu = T(NaN))) end # Take intersection of steepest descent direction and trust region if Cauchy point lies @@ -115,7 +115,7 @@ function __internal_solve!(cache::DoglegCache{INV, NF}, J, fu, u, idx::Val{N} = λ = trust_region / l_grad @bb @. δu = λ * δu_cauchy set_du!(cache, δu, idx) - return δu, true, (; δuJᵀJδu = λ^2 * δuJᵀJδu) + return DescentResult(; δu, extras = (; δuJᵀJδu = λ^2 * δuJᵀJδu)) end # FIXME: For anything other than 2-norm a quadratic root will give incorrect results @@ -133,5 +133,5 @@ function __internal_solve!(cache::DoglegCache{INV, NF}, J, fu, u, idx::Val{N} = @bb @. δu = cache.δu_cache_1 + τ * cache.δu_cache_2 set_du!(cache, δu, idx) - return δu, true, (; δuJᵀJδu = T(NaN)) + return DescentResult(; δu, extras = (; δuJᵀJδu = T(NaN))) end diff --git a/src/descent/geodesic_acceleration.jl b/src/descent/geodesic_acceleration.jl index 6cba1202e..d72cd2828 100644 --- a/src/descent/geodesic_acceleration.jl +++ b/src/descent/geodesic_acceleration.jl @@ -105,7 +105,7 @@ end function __internal_solve!(cache::GeodesicAccelerationCache, J, fu, u, idx::Val{N} = Val(1); skip_solve::Bool = false, kwargs...) where {N} a, v, δu = get_acceleration(cache, idx), get_velocity(cache, idx), get_du(cache, idx) - skip_solve && return δu, true, (; a, v) + skip_solve && return DescentResult(; δu, extras = (; a, v)) v, _, _ = __internal_solve!( cache.descent_cache, J, fu, u, Val(2N - 1); skip_solve, kwargs...) @@ -130,5 +130,5 @@ function __internal_solve!(cache::GeodesicAccelerationCache, J, fu, u, idx::Val{ cache.last_step_accepted = false end - return δu, cache.last_step_accepted, (; a, v) + return DescentResult(; δu, success = cache.last_step_accepted, extras = (; a, v)) end diff --git a/src/descent/newton.jl b/src/descent/newton.jl index e79120a5e..bcb83686f 100644 --- a/src/descent/newton.jl +++ b/src/descent/newton.jl @@ -75,7 +75,7 @@ function __internal_solve!( cache::NewtonDescentCache{INV, false}, J, fu, u, idx::Val = Val(1); skip_solve::Bool = false, new_jacobian::Bool = true, kwargs...) where {INV} δu = get_du(cache, idx) - skip_solve && return δu, true, (;) + skip_solve && return DescentResult(; δu) if INV @assert J!==nothing "`J` must be provided when `pre_inverted = Val(true)`." @bb δu = J × vec(fu) @@ -89,14 +89,14 @@ function __internal_solve!( end @bb @. δu *= -1 set_du!(cache, δu, idx) - return δu, true, (;) + return DescentResult(; δu) end function __internal_solve!( cache::NewtonDescentCache{false, true}, J, fu, u, idx::Val = Val(1); skip_solve::Bool = false, new_jacobian::Bool = true, kwargs...) δu = get_du(cache, idx) - skip_solve && return δu, true, (;) + skip_solve && return DescentResult(; δu) if idx === Val(1) @bb cache.JᵀJ_cache = transpose(J) × J end @@ -109,5 +109,5 @@ function __internal_solve!( end @bb @. δu *= -1 set_du!(cache, δu, idx) - return δu, true, (;) + return DescentResult(; δu) end diff --git a/src/descent/steepest.jl b/src/descent/steepest.jl index 40919cc6e..4d4877a1b 100644 --- a/src/descent/steepest.jl +++ b/src/descent/steepest.jl @@ -65,5 +65,5 @@ function __internal_solve!(cache::SteepestDescentCache{INV}, J, fu, u, idx::Val end @bb @. δu *= -1 set_du!(cache, δu, idx) - return δu, true, (;) + return DescentResult(; δu) end From bcb98c5eeb5c3b7bfadbe36ed3850ed6ae07c12f Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Tue, 23 Apr 2024 16:06:19 -0400 Subject: [PATCH 2/7] Switch to QR Pivoted on linear solve failure --- src/descent/dogleg.jl | 8 ++--- src/descent/geodesic_acceleration.jl | 8 ++--- src/internal/linear_solve.jl | 48 ++++++++++++++++++++++++++-- 3 files changed, 54 insertions(+), 10 deletions(-) diff --git a/src/descent/dogleg.jl b/src/descent/dogleg.jl index f3e1d9cc0..5d0bb1c7c 100644 --- a/src/descent/dogleg.jl +++ b/src/descent/dogleg.jl @@ -81,8 +81,8 @@ function __internal_solve!(cache::DoglegCache{INV, NF}, J, fu, u, idx::Val{N} = want to use a Trust Region." δu = get_du(cache, idx) T = promote_type(eltype(u), eltype(fu)) - δu_newton, _, _ = __internal_solve!( - cache.newton_cache, J, fu, u, idx; skip_solve, kwargs...) + δu_newton = __internal_solve!( + cache.newton_cache, J, fu, u, idx; skip_solve, kwargs...).δu # Newton's Step within the trust region if cache.internalnorm(δu_newton) ≤ trust_region @@ -102,8 +102,8 @@ function __internal_solve!(cache::DoglegCache{INV, NF}, J, fu, u, idx::Val{N} = @bb cache.δu_cache_mul = JᵀJ × vec(δu_cauchy) δuJᵀJδu = __dot(δu_cauchy, cache.δu_cache_mul) else - δu_cauchy, _, _ = __internal_solve!( - cache.cauchy_cache, J, fu, u, idx; skip_solve, kwargs...) + δu_cauchy = __internal_solve!( + cache.cauchy_cache, J, fu, u, idx; skip_solve, kwargs...).δu J_ = INV ? inv(J) : J l_grad = cache.internalnorm(δu_cauchy) @bb cache.JᵀJ_cache = J × vec(δu_cauchy) # TODO: Rename diff --git a/src/descent/geodesic_acceleration.jl b/src/descent/geodesic_acceleration.jl index d72cd2828..e2d8612e5 100644 --- a/src/descent/geodesic_acceleration.jl +++ b/src/descent/geodesic_acceleration.jl @@ -106,8 +106,8 @@ function __internal_solve!(cache::GeodesicAccelerationCache, J, fu, u, idx::Val{ skip_solve::Bool = false, kwargs...) where {N} a, v, δu = get_acceleration(cache, idx), get_velocity(cache, idx), get_du(cache, idx) skip_solve && return DescentResult(; δu, extras = (; a, v)) - v, _, _ = __internal_solve!( - cache.descent_cache, J, fu, u, Val(2N - 1); skip_solve, kwargs...) + v = __internal_solve!( + cache.descent_cache, J, fu, u, Val(2N - 1); skip_solve, kwargs...).δu @bb @. cache.u_cache = u + cache.h * v cache.fu_cache = evaluate_f!!(cache.f, cache.fu_cache, cache.u_cache, cache.p) @@ -116,8 +116,8 @@ function __internal_solve!(cache::GeodesicAccelerationCache, J, fu, u, idx::Val{ Jv = _restructure(cache.fu_cache, cache.Jv) @bb @. cache.fu_cache = (2 / cache.h) * ((cache.fu_cache - fu) / cache.h - Jv) - a, _, _ = __internal_solve!(cache.descent_cache, J, cache.fu_cache, u, Val(2N); - skip_solve, kwargs..., reuse_A_if_factorization = true) + a = __internal_solve!(cache.descent_cache, J, cache.fu_cache, u, Val(2N); + skip_solve, kwargs..., reuse_A_if_factorization = true).δu norm_v = cache.internalnorm(v) norm_a = cache.internalnorm(a) diff --git a/src/internal/linear_solve.jl b/src/internal/linear_solve.jl index 261d9e8dc..36d8f1928 100644 --- a/src/internal/linear_solve.jl +++ b/src/internal/linear_solve.jl @@ -1,5 +1,8 @@ import LinearSolve: AbstractFactorization, DefaultAlgorithmChoice, DefaultLinearSolver +const LinearSolveFailureCode = isdefined(ReturnCode, :InternalLinearSolveFailure) ? + ReturnCode.InternalLinearSolveFailure : ReturnCode.Failure + """ LinearSolverCache(alg, linsolve, A, b, u; kwargs...) @@ -23,6 +26,15 @@ handled: Returns the solution of the system `u` and stores the updated cache in `cache.lincache`. +#### Special Handling for Rank-deficient Matrix `A` + +If we detect a failure in the linear solve (mostly due to using an algorithm that doesn't +support rank-deficient matrices), we emit a warning and attempt to solve the problem using +Pivoted QR factorization. This is quite efficient if there are only a few rank-deficient +that originate in the problem. However, if these are quite frequent for the main nonlinear +system, then it is recommended to use a different linear solver that supports rank-deficient +matrices. + #### Keyword Arguments - `reuse_A_if_factorization`: If `true`, then the factorization of `A` is reused if @@ -36,6 +48,7 @@ not mutated, we do this by copying over `A` to a preconstructed cache. @concrete mutable struct LinearSolverCache <: AbstractLinearSolverCache lincache linsolve + additional_lincache::Any A b precs @@ -71,7 +84,7 @@ function LinearSolverCache(alg, linsolve, A, b, u; kwargs...) (linsolve === nothing && A isa SMatrix) || (A isa Diagonal) || (linsolve isa typeof(\)) - return LinearSolverCache(nothing, nothing, A, b, nothing, 0, 0) + return LinearSolverCache(nothing, nothing, nothing, A, b, nothing, 0, 0) end @bb u_ = copy(u_fixed) linprob = LinearProblem(A, b; u0 = u_, kwargs...) @@ -89,7 +102,7 @@ function LinearSolverCache(alg, linsolve, A, b, u; kwargs...) # Unalias here, we will later use these as caches lincache = init(linprob, linsolve; alias_A = false, alias_b = false, Pl, Pr) - return LinearSolverCache(lincache, linsolve, nothing, nothing, precs, 0, 0) + return LinearSolverCache(lincache, linsolve, nothing, nothing, nothing, precs, 0, 0) end # Direct Linear Solve Case without Caching @@ -108,6 +121,7 @@ function (cache::LinearSolverCache{Nothing})(; end return res end + # Use LinearSolve.jl function (cache::LinearSolverCache)(; A = nothing, b = nothing, linu = nothing, du = nothing, p = nothing, @@ -139,8 +153,38 @@ function (cache::LinearSolverCache)(; cache.lincache.Pr = Pr end + # display(A) + linres = solve!(cache.lincache) + # @show cache.lincache.cacheval + # @show LinearAlgebra.issuccess(cache.lincache.cacheval) cache.lincache = linres.cache + # Unfortunately LinearSolve.jl doesn't have the most uniform ReturnCode handling + if linres.retcode === ReturnCode.Failure + # TODO: We need to guard this somehow because this will surely fail if A is on GPU + # TODO: or some fancy Matrix type + if !(cache.linsolve isa QRFactorization{ColumnNorm}) + @warn "Potential Rank Deficient Matrix Detected. Attempting to solve using \ + Pivoted QR Factorization." + @assert (A !== nothing)&&(b !== nothing) "This case is not yet supported. \ + Please open an issue at \ + https://github.com/SciML/NonlinearSolve.jl" + if cache.additional_lincache === nothing # First time + linprob = LinearProblem(A, b; u0 = linres.u) + cache.additional_lincache = init( + linprob, QRFactorization(ColumnNorm()); alias_u0 = false, + alias_A = false, alias_b = false, cache.lincache.Pl, cache.lincache.Pr) + else + cache.additional_lincache.A = A + cache.additional_lincache.b = b + cache.additional_lincache.Pl = cache.lincache.Pl + cache.additional_lincache.Pr = cache.lincache.Pr + end + linres = solve!(cache.additional_lincache) + cache.additional_lincache = linres.cache + return linres.u + end + end return linres.u end From dc539b975dd9dd1d8e5c75f91fe793bb004a5cb1 Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Thu, 25 Apr 2024 16:21:48 -0400 Subject: [PATCH 3/7] Propagate the verbose kwarg --- src/NonlinearSolve.jl | 4 ++-- src/core/approximate_jacobian.jl | 7 ++++--- src/core/generalized_first_order.jl | 7 ++++--- src/core/spectral_methods.jl | 3 ++- src/internal/linear_solve.jl | 9 ++++++--- 5 files changed, 18 insertions(+), 12 deletions(-) diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl index 9261f259f..9b6b9ec04 100644 --- a/src/NonlinearSolve.jl +++ b/src/NonlinearSolve.jl @@ -136,10 +136,10 @@ include("default.jl") @compile_workload begin for prob in probs_nls, alg in nls_algs - solve(prob, alg; abstol = 1e-2) + solve(prob, alg; abstol = 1e-2, verbose = false) end for prob in probs_nlls, alg in nlls_algs - solve(prob, alg; abstol = 1e-2) + solve(prob, alg; abstol = 1e-2, verbose = false) end end end diff --git a/src/core/approximate_jacobian.jl b/src/core/approximate_jacobian.jl index 6e8f93ba6..ac649a107 100644 --- a/src/core/approximate_jacobian.jl +++ b/src/core/approximate_jacobian.jl @@ -114,6 +114,7 @@ end retcode::ReturnCode.T force_stop::Bool force_reinit::Bool + kwargs end store_inverse_jacobian(::ApproximateJacobianSolveCache{INV}) where {INV} = INV @@ -214,7 +215,7 @@ function SciMLBase.__init( descent_cache, linesearch_cache, trustregion_cache, update_rule_cache, reinit_rule_cache, inv_workspace, 0, 0, 0, alg.max_resets, maxiters, maxtime, alg.max_shrink_times, 0, timer, - 0.0, termination_cache, trace, ReturnCode.Default, false, false) + 0.0, termination_cache, trace, ReturnCode.Default, false, false, kwargs) end end @@ -284,10 +285,10 @@ function __step!(cache::ApproximateJacobianSolveCache{INV, GB, iip}; hasfield(typeof(cache.trustregion_cache), :trust_region) descent_result = __internal_solve!( cache.descent_cache, J, cache.fu, cache.u; new_jacobian, - trust_region = cache.trustregion_cache.trust_region) + trust_region = cache.trustregion_cache.trust_region, cache.kwargs...) else descent_result = __internal_solve!( - cache.descent_cache, J, cache.fu, cache.u; new_jacobian) + cache.descent_cache, J, cache.fu, cache.u; new_jacobian, cache.kwargs...) end end δu, descent_intermediates = descent_result.δu, descent_result.extras diff --git a/src/core/generalized_first_order.jl b/src/core/generalized_first_order.jl index 710787840..e3a4b8941 100644 --- a/src/core/generalized_first_order.jl +++ b/src/core/generalized_first_order.jl @@ -111,6 +111,7 @@ concrete_jac(::GeneralizedFirstOrderAlgorithm{CJ}) where {CJ} = CJ trace retcode::ReturnCode.T force_stop::Bool + kwargs end SymbolicIndexingInterface.state_values(cache::GeneralizedFirstOrderAlgorithmCache) = cache.u @@ -202,7 +203,7 @@ function SciMLBase.__init( return GeneralizedFirstOrderAlgorithmCache{iip, GB, maxtime !== nothing}( fu, u, u_cache, p, du, J, alg, prob, jac_cache, descent_cache, linesearch_cache, trustregion_cache, 0, 0, maxiters, maxtime, alg.max_shrink_times, - timer, 0.0, true, termination_cache, trace, ReturnCode.Default, false) + timer, 0.0, true, termination_cache, trace, ReturnCode.Default, false, kwargs) end end @@ -223,10 +224,10 @@ function __step!(cache::GeneralizedFirstOrderAlgorithmCache{iip, GB}; hasfield(typeof(cache.trustregion_cache), :trust_region) descent_result = __internal_solve!( cache.descent_cache, J, cache.fu, cache.u; new_jacobian, - trust_region = cache.trustregion_cache.trust_region) + trust_region = cache.trustregion_cache.trust_region, cache.kwargs...) else descent_result = __internal_solve!( - cache.descent_cache, J, cache.fu, cache.u; new_jacobian) + cache.descent_cache, J, cache.fu, cache.u; new_jacobian, cache.kwargs...) end end δu, descent_intermediates = descent_result.δu, descent_result.extras diff --git a/src/core/spectral_methods.jl b/src/core/spectral_methods.jl index d9d271d78..3d9a03e50 100644 --- a/src/core/spectral_methods.jl +++ b/src/core/spectral_methods.jl @@ -74,6 +74,7 @@ concrete_jac(::GeneralizedDFSane) = nothing trace retcode::ReturnCode.T force_stop::Bool + kwargs end function __reinit_internal!( @@ -150,7 +151,7 @@ function SciMLBase.__init(prob::AbstractNonlinearProblem, alg::GeneralizedDFSane return GeneralizedDFSaneCache{isinplace(prob), maxtime !== nothing}( fu, fu_cache, u, u_cache, prob.p, du, alg, prob, σ_n, T(alg.σ_min), T(alg.σ_max), linesearch_cache, 0, 0, maxiters, maxtime, - timer, 0.0, tc_cache, trace, ReturnCode.Default, false) + timer, 0.0, tc_cache, trace, ReturnCode.Default, false, kwargs) end end diff --git a/src/internal/linear_solve.jl b/src/internal/linear_solve.jl index 36d8f1928..909950a07 100644 --- a/src/internal/linear_solve.jl +++ b/src/internal/linear_solve.jl @@ -125,7 +125,8 @@ end # Use LinearSolve.jl function (cache::LinearSolverCache)(; A = nothing, b = nothing, linu = nothing, du = nothing, p = nothing, - weight = nothing, cachedata = nothing, reuse_A_if_factorization = false, kwargs...) + weight = nothing, cachedata = nothing, reuse_A_if_factorization = false, + verbose = true, kwargs...) cache.nsolve += 1 __update_A!(cache, A, reuse_A_if_factorization) @@ -164,8 +165,10 @@ function (cache::LinearSolverCache)(; # TODO: We need to guard this somehow because this will surely fail if A is on GPU # TODO: or some fancy Matrix type if !(cache.linsolve isa QRFactorization{ColumnNorm}) - @warn "Potential Rank Deficient Matrix Detected. Attempting to solve using \ - Pivoted QR Factorization." + if verbose + @warn "Potential Rank Deficient Matrix Detected. Attempting to solve using \ + Pivoted QR Factorization." + end @assert (A !== nothing)&&(b !== nothing) "This case is not yet supported. \ Please open an issue at \ https://github.com/SciML/NonlinearSolve.jl" From dd4a2fbd21b214e0f1750e087e3a87fdcfe5cb1b Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Thu, 25 Apr 2024 16:23:07 -0400 Subject: [PATCH 4/7] Add to docs --- Project.toml | 2 +- docs/src/devdocs/internal_interfaces.md | 6 ++++++ src/core/approximate_jacobian.jl | 8 ++++---- src/core/generalized_first_order.jl | 4 ++-- src/internal/linear_solve.jl | 6 +++--- 5 files changed, 16 insertions(+), 10 deletions(-) diff --git a/Project.toml b/Project.toml index f5e8d10ac..e9fab016e 100644 --- a/Project.toml +++ b/Project.toml @@ -60,7 +60,7 @@ NonlinearSolveZygoteExt = "Zygote" ADTypes = "0.2.6" Aqua = "0.8" ArrayInterface = "7.9" -BandedMatrices = "1.4" +BandedMatrices = "1.5" BenchmarkTools = "1.4" CUDA = "5.2" ConcreteStructs = "0.2.3" diff --git a/docs/src/devdocs/internal_interfaces.md b/docs/src/devdocs/internal_interfaces.md index 843054cc8..91b9562a8 100644 --- a/docs/src/devdocs/internal_interfaces.md +++ b/docs/src/devdocs/internal_interfaces.md @@ -15,6 +15,12 @@ NonlinearSolve.AbstractDescentAlgorithm NonlinearSolve.AbstractDescentCache ``` +## Descent Results + +```@docs +NonlinearSolve.DescentResult +``` + ## Approximate Jacobian ```@docs diff --git a/src/core/approximate_jacobian.jl b/src/core/approximate_jacobian.jl index ac649a107..e5e6960e2 100644 --- a/src/core/approximate_jacobian.jl +++ b/src/core/approximate_jacobian.jl @@ -212,10 +212,10 @@ function SciMLBase.__init( return ApproximateJacobianSolveCache{INV, GB, iip, maxtime !== nothing}( fu, u, u_cache, p, du, J, alg, prob, initialization_cache, - descent_cache, linesearch_cache, trustregion_cache, - update_rule_cache, reinit_rule_cache, inv_workspace, 0, 0, 0, - alg.max_resets, maxiters, maxtime, alg.max_shrink_times, 0, timer, - 0.0, termination_cache, trace, ReturnCode.Default, false, false, kwargs) + descent_cache, linesearch_cache, trustregion_cache, update_rule_cache, + reinit_rule_cache, inv_workspace, 0, 0, 0, alg.max_resets, + maxiters, maxtime, alg.max_shrink_times, 0, timer, 0.0, + termination_cache, trace, ReturnCode.Default, false, false, kwargs) end end diff --git a/src/core/generalized_first_order.jl b/src/core/generalized_first_order.jl index e3a4b8941..fd1eb5b3f 100644 --- a/src/core/generalized_first_order.jl +++ b/src/core/generalized_first_order.jl @@ -202,8 +202,8 @@ function SciMLBase.__init( return GeneralizedFirstOrderAlgorithmCache{iip, GB, maxtime !== nothing}( fu, u, u_cache, p, du, J, alg, prob, jac_cache, descent_cache, linesearch_cache, - trustregion_cache, 0, 0, maxiters, maxtime, alg.max_shrink_times, - timer, 0.0, true, termination_cache, trace, ReturnCode.Default, false, kwargs) + trustregion_cache, 0, 0, maxiters, maxtime, alg.max_shrink_times, timer, + 0.0, true, termination_cache, trace, ReturnCode.Default, false, kwargs) end end diff --git a/src/internal/linear_solve.jl b/src/internal/linear_solve.jl index 909950a07..080de05ef 100644 --- a/src/internal/linear_solve.jl +++ b/src/internal/linear_solve.jl @@ -124,9 +124,9 @@ end # Use LinearSolve.jl function (cache::LinearSolverCache)(; - A = nothing, b = nothing, linu = nothing, du = nothing, p = nothing, - weight = nothing, cachedata = nothing, reuse_A_if_factorization = false, - verbose = true, kwargs...) + A = nothing, b = nothing, linu = nothing, du = nothing, + p = nothing, weight = nothing, cachedata = nothing, + reuse_A_if_factorization = false, verbose = true, kwargs...) cache.nsolve += 1 __update_A!(cache, A, reuse_A_if_factorization) From 74505908e7fb1dc0d54717dd858859c24c6e0d46 Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Thu, 25 Apr 2024 17:20:47 -0400 Subject: [PATCH 5/7] Integrate the linear solve better into the algorithm --- src/core/approximate_jacobian.jl | 21 +++++++++++++++++++++ src/core/generalized_first_order.jl | 21 +++++++++++++++++++++ src/descent/common.jl | 10 +++++++--- src/descent/damped_newton.jl | 8 ++++++-- src/descent/newton.jl | 17 +++++++++++++---- src/descent/steepest.jl | 8 ++++++-- src/internal/linear_solve.jl | 18 +++++++++++------- 7 files changed, 85 insertions(+), 18 deletions(-) diff --git a/src/core/approximate_jacobian.jl b/src/core/approximate_jacobian.jl index e5e6960e2..4328af345 100644 --- a/src/core/approximate_jacobian.jl +++ b/src/core/approximate_jacobian.jl @@ -291,6 +291,27 @@ function __step!(cache::ApproximateJacobianSolveCache{INV, GB, iip}; cache.descent_cache, J, cache.fu, cache.u; new_jacobian, cache.kwargs...) end end + + if !descent_result.linsolve_success + if new_jacobian && cache.steps_since_last_reset == 0 + # Extremely pathological case. Jacobian was just reset and linear solve + # failed. Should ideally never happen in practice unless true jacobian init + # is used. + cache.retcode = LinearSolveFailureCode + cache.force_stop = true + return + else + # Force a reinit because the problem is currently un-solvable + if !haskey(cache.kwargs, :verbose) || cache.kwargs[:verbose] + @warn "Linear Solve Failed but Jacobian Information is not current. \ + Retrying with reinitialized Approximate Jacobian." + end + cache.force_reinit = true + __step!(cache; recompute_jacobian = true) + return + end + end + δu, descent_intermediates = descent_result.δu, descent_result.extras if descent_result.success diff --git a/src/core/generalized_first_order.jl b/src/core/generalized_first_order.jl index fd1eb5b3f..7d20b0bfc 100644 --- a/src/core/generalized_first_order.jl +++ b/src/core/generalized_first_order.jl @@ -230,6 +230,27 @@ function __step!(cache::GeneralizedFirstOrderAlgorithmCache{iip, GB}; cache.descent_cache, J, cache.fu, cache.u; new_jacobian, cache.kwargs...) end end + + if !descent_result.linsolve_success + if new_jacobian + # Jacobian Information is current and linear solve failed terminate the solve + cache.retcode = LinearSolveFailureCode + cache.force_stop = true + return + else + # Jacobian Information is not current and linear solve failed, recompute + # Jacobian + if !haskey(cache.kwargs, :verbose) || cache.kwargs[:verbose] + @warn "Linear Solve Failed but Jacobian Information is not current. \ + Retrying with updated Jacobian." + end + # In the 2nd call the `new_jacobian` is guaranteed to be `true`. + cache.make_new_jacobian = true + __step!(cache; recompute_jacobian = true, kwargs...) + return + end + end + δu, descent_intermediates = descent_result.δu, descent_result.extras if descent_result.success diff --git a/src/descent/common.jl b/src/descent/common.jl index 2a614d84a..3d53573ea 100644 --- a/src/descent/common.jl +++ b/src/descent/common.jl @@ -1,5 +1,6 @@ """ - DescentResult(; δu = missing, u = missing, success::Bool = true, extras = (;)) + DescentResult(; δu = missing, u = missing, success::Bool = true, + linsolve_success::Bool = true, extras = (;)) Construct a `DescentResult` object. @@ -9,6 +10,7 @@ Construct a `DescentResult` object. - `u`: The new iterate. This is provided only for multi-step methods currently. - `success`: Certain Descent Algorithms can reject a descent direction for example [`GeodesicAcceleration`](@ref). + - `linsolve_success`: Whether the line search was successful. - `extras`: A named tuple containing intermediates computed during the solve. For example, [`GeodesicAcceleration`](@ref) returns `NamedTuple{(:v, :a)}` containing the "velocity" and "acceleration" terms. @@ -17,10 +19,12 @@ Construct a `DescentResult` object. δu u success::Bool + linsolve_success::Bool extras end -function DescentResult(; δu = missing, u = missing, success::Bool = true, extras = (;)) +function DescentResult(; δu = missing, u = missing, success::Bool = true, + linsolve_success::Bool = true, extras = (;)) @assert δu !== missing || u !== missing - return DescentResult(δu, u, success, extras) + return DescentResult(δu, u, success, linsolve_success, extras) end diff --git a/src/descent/damped_newton.jl b/src/descent/damped_newton.jl index aaabc6c4e..792f56773 100644 --- a/src/descent/damped_newton.jl +++ b/src/descent/damped_newton.jl @@ -200,10 +200,14 @@ function __internal_solve!(cache::DampedNewtonDescentCache{INV, mode}, J, fu, end @static_timeit cache.timer "linear solve" begin - δu = cache.lincache(; + linres = cache.lincache(; A, b, reuse_A_if_factorization = !new_jacobian && !recompute_A, kwargs..., linu = _vec(δu)) - δu = _restructure(get_du(cache, idx), δu) + δu = _restructure(get_du(cache, idx), linres.u) + if !linres.success + set_du!(cache, δu, idx) + return DescentResult(; δu, success = false, linsolve_success = false) + end end @bb @. δu *= -1 diff --git a/src/descent/newton.jl b/src/descent/newton.jl index bcb83686f..36edbf86c 100644 --- a/src/descent/newton.jl +++ b/src/descent/newton.jl @@ -81,10 +81,14 @@ function __internal_solve!( @bb δu = J × vec(fu) else @static_timeit cache.timer "linear solve" begin - δu = cache.lincache(; + linres = cache.lincache(; A = J, b = _vec(fu), kwargs..., linu = _vec(δu), du = _vec(δu), reuse_A_if_factorization = !new_jacobian || (idx !== Val(1))) - δu = _restructure(get_du(cache, idx), δu) + δu = _restructure(get_du(cache, idx), linres.u) + if !linres.success + set_du!(cache, δu, idx) + return DescentResult(; δu, success = false, linsolve_success = false) + end end end @bb @. δu *= -1 @@ -102,10 +106,15 @@ function __internal_solve!( end @bb cache.Jᵀfu_cache = transpose(J) × vec(fu) @static_timeit cache.timer "linear solve" begin - δu = cache.lincache(; A = __maybe_symmetric(cache.JᵀJ_cache), b = cache.Jᵀfu_cache, + linres = cache.lincache(; + A = __maybe_symmetric(cache.JᵀJ_cache), b = cache.Jᵀfu_cache, kwargs..., linu = _vec(δu), du = _vec(δu), reuse_A_if_factorization = !new_jacobian || (idx !== Val(1))) - δu = _restructure(get_du(cache, idx), δu) + δu = _restructure(get_du(cache, idx), linres.u) + if !linres.success + set_du!(cache, δu, idx) + return DescentResult(; δu, success = false, linsolve_success = false) + end end @bb @. δu *= -1 set_du!(cache, δu, idx) diff --git a/src/descent/steepest.jl b/src/descent/steepest.jl index 4d4877a1b..227fdb436 100644 --- a/src/descent/steepest.jl +++ b/src/descent/steepest.jl @@ -54,10 +54,14 @@ function __internal_solve!(cache::SteepestDescentCache{INV}, J, fu, u, idx::Val if INV A = J === nothing ? nothing : transpose(J) @static_timeit cache.timer "linear solve" begin - δu = cache.lincache(; + linres = cache.lincache(; A, b = _vec(fu), kwargs..., linu = _vec(δu), du = _vec(δu), reuse_A_if_factorization = !new_jacobian || idx !== Val(1)) - δu = _restructure(get_du(cache, idx), δu) + δu = _restructure(get_du(cache, idx), linres.u) + if !linres.success + set_du!(cache, δu, idx) + return DescentResult(; δu, success = false, linsolve_success = false) + end end else @assert J!==nothing "`J` must be provided when `pre_inverted = Val(false)`." diff --git a/src/internal/linear_solve.jl b/src/internal/linear_solve.jl index 080de05ef..f2277a165 100644 --- a/src/internal/linear_solve.jl +++ b/src/internal/linear_solve.jl @@ -105,6 +105,11 @@ function LinearSolverCache(alg, linsolve, A, b, u; kwargs...) return LinearSolverCache(lincache, linsolve, nothing, nothing, nothing, precs, 0, 0) end +@kwdef @concrete struct LinearSolveResult + u + success::Bool = true +end + # Direct Linear Solve Case without Caching function (cache::LinearSolverCache{Nothing})(; A = nothing, b = nothing, linu = nothing, kwargs...) @@ -119,7 +124,7 @@ function (cache::LinearSolverCache{Nothing})(; else res = cache.A \ cache.b end - return res + return LinearSolveResult(; u = res) end # Use LinearSolve.jl @@ -154,11 +159,7 @@ function (cache::LinearSolverCache)(; cache.lincache.Pr = Pr end - # display(A) - linres = solve!(cache.lincache) - # @show cache.lincache.cacheval - # @show LinearAlgebra.issuccess(cache.lincache.cacheval) cache.lincache = linres.cache # Unfortunately LinearSolve.jl doesn't have the most uniform ReturnCode handling if linres.retcode === ReturnCode.Failure @@ -185,11 +186,14 @@ function (cache::LinearSolverCache)(; end linres = solve!(cache.additional_lincache) cache.additional_lincache = linres.cache - return linres.u + linres.retcode === ReturnCode.Failure && + return LinearSolveResult(; u = linres.u, success = false) + return LinearSolveResult(; u = linres.u) end + return LinearSolveResult(; u = linres.u, success = false) end - return linres.u + return LinearSolveResult(; u = linres.u) end @inline __update_A!(cache::LinearSolverCache, ::Nothing, reuse) = cache From b2a231666fbeea63fb4fa5aa0b67bd6b8574b848 Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Thu, 25 Apr 2024 17:41:02 -0400 Subject: [PATCH 6/7] Add tests --- test/misc/linsolve_switching_tests.jl | 28 +++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 test/misc/linsolve_switching_tests.jl diff --git a/test/misc/linsolve_switching_tests.jl b/test/misc/linsolve_switching_tests.jl new file mode 100644 index 000000000..521e417cc --- /dev/null +++ b/test/misc/linsolve_switching_tests.jl @@ -0,0 +1,28 @@ +@testitem "Singular Systems -- Auto Linear Solve Switching" begin + using LinearSolve, NonlinearSolve + + function f!(du, u, p) + du[1] = 2u[1] - 2 + du[2] = (u[1] - 4u[2])^2 + 0.1 + end + + u0 = [0.0, 0.0] # Singular Jacobian at u0 + + prob = NonlinearProblem(f!, u0) + + sol = solve(prob) # This doesn't have a root so let's just test the switching + @test sol.u≈[1.0, 0.25] atol=1e-3 rtol=1e-3 + + function nlls!(du, u, p) + du[1] = 2u[1] - 2 + du[2] = (u[1] - 4u[2])^2 + 0.1 + du[3] = 0 + end + + u0 = [0.0, 0.0] + + prob = NonlinearProblem(NonlinearFunction(nlls!, resid_prototype = zeros(3)), u0) + + solve(prob) + @test sol.u≈[1.0, 0.25] atol=1e-3 rtol=1e-3 +end From 914f556739c6ca8a046e03b8187b04cecef715ef Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Fri, 26 Apr 2024 15:50:03 -0400 Subject: [PATCH 7/7] Safe guard against cases which might not support rank deficient matrices --- src/NonlinearSolve.jl | 4 ++-- src/internal/linear_solve.jl | 22 +++++++++++++++++++--- test/misc/polyalg_tests.jl | 11 +++-------- 3 files changed, 24 insertions(+), 13 deletions(-) diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl index 9b6b9ec04..f6f6aee1d 100644 --- a/src/NonlinearSolve.jl +++ b/src/NonlinearSolve.jl @@ -12,8 +12,8 @@ import PrecompileTools: @recompile_invalidations, @compile_workload, @setup_work LinearAlgebra, LinearSolve, MaybeInplace, Preferences, Printf, SciMLBase, SimpleNonlinearSolve, SparseArrays, SparseDiffTools - import ArrayInterface: undefmatrix, can_setindex, restructure, fast_scalar_indexing, - ismutable + import ArrayInterface: ArrayInterface, undefmatrix, can_setindex, restructure, + fast_scalar_indexing, ismutable import DiffEqBase: AbstractNonlinearTerminationMode, AbstractSafeNonlinearTerminationMode, AbstractSafeBestNonlinearTerminationMode, diff --git a/src/internal/linear_solve.jl b/src/internal/linear_solve.jl index f2277a165..783bf4956 100644 --- a/src/internal/linear_solve.jl +++ b/src/internal/linear_solve.jl @@ -163,9 +163,11 @@ function (cache::LinearSolverCache)(; cache.lincache = linres.cache # Unfortunately LinearSolve.jl doesn't have the most uniform ReturnCode handling if linres.retcode === ReturnCode.Failure - # TODO: We need to guard this somehow because this will surely fail if A is on GPU - # TODO: or some fancy Matrix type - if !(cache.linsolve isa QRFactorization{ColumnNorm}) + structured_mat = ArrayInterface.isstructured(cache.lincache.A) + is_gpuarray = ArrayInterface.device(cache.lincache.A) isa ArrayInterface.GPU + if !(cache.linsolve isa QRFactorization{ColumnNorm}) && + !is_gpuarray && + !structured_mat if verbose @warn "Potential Rank Deficient Matrix Detected. Attempting to solve using \ Pivoted QR Factorization." @@ -189,6 +191,20 @@ function (cache::LinearSolverCache)(; linres.retcode === ReturnCode.Failure && return LinearSolveResult(; u = linres.u, success = false) return LinearSolveResult(; u = linres.u) + elseif !(cache.linsolve isa QRFactorization{ColumnNorm}) + if verbose + if structured_mat + @warn "Potential Rank Deficient Matrix Detected. But Matrix is \ + Structured. Currently, we don't attempt to solve Rank Deficient \ + Structured Matrices. Please open an issue at \ + https://github.com/SciML/NonlinearSolve.jl" + elseif is_gpuarray + @warn "Potential Rank Deficient Matrix Detected. But Matrix is on GPU. \ + Currently, we don't attempt to solve Rank Deficient GPU \ + Matrices. Please open an issue at \ + https://github.com/SciML/NonlinearSolve.jl" + end + end end return LinearSolveResult(; u = linres.u, success = false) end diff --git a/test/misc/polyalg_tests.jl b/test/misc/polyalg_tests.jl index 666db02ab..c2864759c 100644 --- a/test/misc/polyalg_tests.jl +++ b/test/misc/polyalg_tests.jl @@ -250,12 +250,7 @@ end u0 = @SVector [0.0, 0.0, 0.0] prob = NonlinearProblem(f1_infeasible, u0) - try - sol = solve(prob) - @test all(!isnan, sol.u) - @test !SciMLBase.successful_retcode(sol.retcode) - @inferred solve(prob) - catch err - @test err isa LinearAlgebra.SingularException - end + sol = solve(prob) + @test all(!isnan, sol.u) + @test !SciMLBase.successful_retcode(sol.retcode) end