From d2251ba72c7ae3b7dbd4c80855594895a61697d3 Mon Sep 17 00:00:00 2001
From: Avik Pal <avikpal@mit.edu>
Date: Sat, 21 Oct 2023 22:17:07 -0400
Subject: [PATCH] Reuse LU Factorization to check for singular matrix

---
 docs/src/api/nonlinearsolve.md |  3 ++
 src/NonlinearSolve.jl          |  2 +-
 src/dfsane.jl                  |  1 -
 src/klement.jl                 | 63 +++++++++++++++++++++++-----------
 src/trustRegion.jl             |  1 -
 src/utils.jl                   | 25 ++++++++++++++
 test/23_test_problems.jl       | 10 +++---
 test/matrix_resizing.jl        |  4 +--
 8 files changed, 80 insertions(+), 29 deletions(-)

diff --git a/docs/src/api/nonlinearsolve.md b/docs/src/api/nonlinearsolve.md
index e4554f3e6..5da204f07 100644
--- a/docs/src/api/nonlinearsolve.md
+++ b/docs/src/api/nonlinearsolve.md
@@ -8,6 +8,9 @@ These are the native solvers of NonlinearSolve.jl.
 NewtonRaphson
 TrustRegion
 PseudoTransient
+DFSane
+GeneralBroyden
+GeneralKlement
 ```
 
 ## Polyalgorithms
diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl
index 438cea870..8d3a8ca2b 100644
--- a/src/NonlinearSolve.jl
+++ b/src/NonlinearSolve.jl
@@ -9,7 +9,7 @@ import ArrayInterface: restructure
 import ForwardDiff
 
 import ADTypes: AbstractFiniteDifferencesMode
-import ArrayInterface: undefmatrix, matrix_colors, parameterless_type, ismutable
+import ArrayInterface: undefmatrix, matrix_colors, parameterless_type, ismutable, issingular
 import ConcreteStructs: @concrete
 import EnumX: @enumx
 import ForwardDiff: Dual
diff --git a/src/dfsane.jl b/src/dfsane.jl
index 13de5ff6a..f5bf69eca 100644
--- a/src/dfsane.jl
+++ b/src/dfsane.jl
@@ -43,7 +43,6 @@ See also the implementation in [SimpleNonlinearSolve.jl](https://github.com/SciM
 - `max_inner_iterations`: the maximum number of iterations allowed for the inner loop of the
   algorithm. Defaults to `1000`.
 """
-
 struct DFSane{T, F} <: AbstractNonlinearSolveAlgorithm
     σ_min::T
     σ_max::T
diff --git a/src/klement.jl b/src/klement.jl
index 42cac5bbe..5674b1757 100644
--- a/src/klement.jl
+++ b/src/klement.jl
@@ -1,15 +1,36 @@
+"""
+    GeneralKlement(; max_resets = 5, linsolve = LUFactorization(),
+                     linesearch = LineSearch(), precs = DEFAULT_PRECS)
+
+An implementation of `Klement` with line search, preconditioning and customizable linear
+solves.
+
+## Keyword Arguments
+
+  - `max_resets`: the maximum number of resets to perform. Defaults to `5`.
+  - `linsolve`: the [LinearSolve.jl](https://github.com/SciML/LinearSolve.jl) used for the
+    linear solves within the Newton method. Defaults to `nothing`, which means it uses the
+    LinearSolve.jl default algorithm choice. For more information on available algorithm
+    choices, see the [LinearSolve.jl documentation](https://docs.sciml.ai/LinearSolve/stable/).
+  - `precs`: the choice of preconditioners for the linear solver. Defaults to using no
+    preconditioners. For more information on specifying preconditioners for LinearSolve
+    algorithms, consult the
+    [LinearSolve.jl documentation](https://docs.sciml.ai/LinearSolve/stable/).
+  - `linesearch`: the line search algorithm to use. Defaults to [`LineSearch()`](@ref),
+    which means that no line search is performed. Algorithms from `LineSearches.jl` can be
+    used here directly, and they will be converted to the correct `LineSearch`.
+"""
 @concrete struct GeneralKlement <: AbstractNewtonAlgorithm{false, Nothing}
     max_resets::Int
     linsolve
     precs
     linesearch
-    singular_tolerance
 end
 
-function GeneralKlement(; max_resets::Int = 5, linsolve = nothing,
-    linesearch = LineSearch(), precs = DEFAULT_PRECS, singular_tolerance = nothing)
+function GeneralKlement(; max_resets::Int = 5, linsolve = LUFactorization(),
+    linesearch = LineSearch(), precs = DEFAULT_PRECS)
     linesearch = linesearch isa LineSearch ? linesearch : LineSearch(; method = linesearch)
-    return GeneralKlement(max_resets, linsolve, precs, linesearch, singular_tolerance)
+    return GeneralKlement(max_resets, linsolve, precs, linesearch)
 end
 
 @concrete mutable struct GeneralKlementCache{iip} <: AbstractNonlinearSolveCache{iip}
@@ -27,7 +48,6 @@ end
     Jᵀ²du
     Jdu
     resets
-    singular_tolerance
     force_stop
     maxiters::Int
     internalnorm
@@ -60,11 +80,8 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::GeneralKlemen
             linsolve_kwargs...)
     end
 
-    singular_tolerance = alg.singular_tolerance === nothing ? inv(sqrt(eps(eltype(u)))) :
-                         eltype(u)(alg.singular_tolerance)
-
     return GeneralKlementCache{iip}(f, alg, u, fu, zero(fu), _mutable_zero(u), p, linsolve,
-        J, zero(J), zero(J), zero(fu), zero(fu), 0, singular_tolerance, false,
+        J, zero(J), zero(J), _vec(zero(fu)), _vec(zero(fu)), 0, false,
         maxiters, internalnorm, ReturnCode.Default, abstol, prob, NLStats(1, 0, 0, 0, 0),
         init_linesearch_cache(alg.linesearch, f, u, p, fu, Val(iip)))
 end
@@ -73,21 +90,23 @@ function perform_step!(cache::GeneralKlementCache{true})
     @unpack u, fu, f, p, alg, J, linsolve, du = cache
     T = eltype(J)
 
-    # FIXME: How can we do this faster?
-    if cond(J) > cache.singular_tolerance
+    singular, fact_done = _try_factorize_and_check_singular!(linsolve, J)
+
+    if singular
         if cache.resets == alg.max_resets
             cache.force_stop = true
             cache.retcode = ReturnCode.Unstable
             return nothing
         end
+        fact_done = false
         fill!(J, zero(T))
         J[diagind(J)] .= T(1)
         cache.resets += 1
     end
 
     # u = u - J \ fu
-    linres = dolinsolve(alg.precs, linsolve; A = J, b = -_vec(fu), linu = _vec(du),
-        p, reltol = cache.abstol)
+    linres = dolinsolve(alg.precs, linsolve; A = ifelse(fact_done, nothing, J),
+        b = -_vec(fu), linu = _vec(du), p, reltol = cache.abstol)
     cache.linsolve = linres.cache
 
     # Line Search
@@ -108,7 +127,8 @@ function perform_step!(cache::GeneralKlementCache{true})
     mul!(cache.Jᵀ²du, cache.J_cache, cache.Jdu)
     mul!(cache.Jdu, J, _vec(du))
     cache.fu .= cache.fu2 .- cache.fu
-    cache.fu .= (cache.fu .- _restructure(cache.fu, cache.Jdu)) ./ max.(cache.Jᵀ²du, eps(T))
+    cache.fu .= _restructure(cache.fu,
+        (_vec(cache.fu) .- cache.Jdu) ./ max.(cache.Jᵀ²du, eps(T)))
     mul!(cache.J_cache, _vec(cache.fu), _vec(du)')
     cache.J_cache .*= J
     mul!(cache.J_cache2, cache.J_cache, J)
@@ -123,14 +143,16 @@ function perform_step!(cache::GeneralKlementCache{false})
     @unpack fu, f, p, alg, J, linsolve = cache
     T = eltype(J)
 
-    # FIXME: How can we do this faster?
-    if cond(J) > cache.singular_tolerance
+    singular, fact_done = _try_factorize_and_check_singular!(linsolve, J)
+
+    if singular
         if cache.resets == alg.max_resets
             cache.force_stop = true
             cache.retcode = ReturnCode.Unstable
             return nothing
         end
-        cache.J = __init_identity_jacobian(u, fu)
+        fact_done = false
+        cache.J = __init_identity_jacobian(cache.u, fu)
         cache.resets += 1
     end
 
@@ -138,8 +160,8 @@ function perform_step!(cache::GeneralKlementCache{false})
     if linsolve === nothing
         cache.du = -fu / cache.J
     else
-        linres = dolinsolve(alg.precs, linsolve; A = J, b = -_vec(fu),
-            linu = _vec(cache.du), p, reltol = cache.abstol)
+        linres = dolinsolve(alg.precs, linsolve; A = ifelse(fact_done, nothing, J),
+            b = -_vec(fu), linu = _vec(cache.du), p, reltol = cache.abstol)
         cache.linsolve = linres.cache
     end
 
@@ -161,7 +183,8 @@ function perform_step!(cache::GeneralKlementCache{false})
     cache.Jᵀ²du = cache.J_cache * cache.Jdu
     cache.Jdu = J * _vec(cache.du)
     cache.fu = cache.fu2 .- cache.fu
-    cache.fu = (cache.fu .- _restructure(cache.fu, cache.Jdu)) ./ max.(cache.Jᵀ²du, eps(T))
+    cache.fu = _restructure(cache.fu,
+        (_vec(cache.fu) .- cache.Jdu) ./ max.(cache.Jᵀ²du, eps(T)))
     cache.J_cache = ((_vec(cache.fu) * _vec(cache.du)') .* J) * J
     cache.J = J .+ cache.J_cache
 
diff --git a/src/trustRegion.jl b/src/trustRegion.jl
index 8094dc09c..8e7118cc6 100644
--- a/src/trustRegion.jl
+++ b/src/trustRegion.jl
@@ -81,7 +81,6 @@ states as `RadiusUpdateSchemes.T`. Simply put the desired scheme as follows:
 end
 
 """
-```julia
     TrustRegion(; concrete_jac = nothing, linsolve = nothing, precs = DEFAULT_PRECS,
         radius_update_scheme::RadiusUpdateSchemes.T = RadiusUpdateSchemes.Simple,
         max_trust_radius::Real = 0 // 1, initial_trust_radius::Real = 0 // 1,
diff --git a/src/utils.jl b/src/utils.jl
index 7c6413937..3369e5c01 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -221,3 +221,28 @@ function __init_identity_jacobian(u::StaticArray, fu)
     return convert(MArray{Tuple{length(fu), length(u)}},
         Matrix{eltype(u)}(I, length(fu), length(u)))
 end
+
+# Check Singular Matrix
+_issingular(x::Number) = iszero(x)
+@generated function _issingular(x::T) where {T}
+    hasmethod(issingular, Tuple{T}) && return :(issingular(x))
+    return :(__issingular(x))
+end
+__issingular(x::AbstractMatrix{T}) where {T} = cond(x) > inv(sqrt(eps(T)))
+__issingular(x) = false ## If SciMLOperator and such
+
+# If factorization is LU then perform that and update the linsolve cache
+# else check if the matrix is singular
+function _try_factorize_and_check_singular!(linsolve, X)
+    if linsolve.cacheval isa LU
+        # LU Factorization was used
+        linsolve.A = X
+        linsolve.cacheval = LinearSolve.do_factorization(linsolve.alg, X, linsolve.b,
+            linsolve.u)
+        linsolve.isfresh = false
+
+        return !issuccess(linsolve.cacheval), true
+    end
+    return _issingular(X), false
+end
+_try_factorize_and_check_singular!(::Nothing, x) = _issingular(x), false
diff --git a/test/23_test_problems.jl b/test/23_test_problems.jl
index 39ae155a3..25bb8a68a 100644
--- a/test/23_test_problems.jl
+++ b/test/23_test_problems.jl
@@ -87,7 +87,7 @@ end
 
     broken_tests = Dict(alg => Int[] for alg in alg_ops)
     broken_tests[alg_ops[1]] = [1, 3, 4, 5, 6, 8, 11, 12, 13, 14, 21]
-    broken_tests[alg_ops[2]] = [1, 2, 3, 4, 5, 6, 9, 11, 13, 22]
+    broken_tests[alg_ops[2]] = [1, 2, 3, 4, 5, 6, 9, 11, 13, 16, 21, 22]
     broken_tests[alg_ops[3]] = [1, 2, 4, 5, 6, 8, 11, 12, 13, 14, 21]
 
     test_on_library(problems, dicts, alg_ops, broken_tests)
@@ -95,11 +95,13 @@ end
 
 @testset "GeneralKlement 23 Test Problems" begin
     alg_ops = (GeneralKlement(),
-        GeneralKlement(; linesearch = BackTracking()))
+        GeneralKlement(; linesearch = BackTracking()),
+        GeneralKlement(; linesearch = HagerZhang()))
 
     broken_tests = Dict(alg => Int[] for alg in alg_ops)
-    broken_tests[alg_ops[1]] = [1, 2, 3, 4, 5, 6, 7, 13, 22]
-    broken_tests[alg_ops[2]] = [1, 2, 4, 5, 6, 7, 11, 12, 22]
+    broken_tests[alg_ops[1]] = [1, 2, 4, 5, 6, 7, 11, 13, 22]
+    broken_tests[alg_ops[2]] = [1, 2, 4, 5, 6, 7, 11, 13, 22]
+    broken_tests[alg_ops[3]] = [1, 2, 5, 6, 11, 12, 13, 22]
 
     test_on_library(problems, dicts, alg_ops, broken_tests)
 end
diff --git a/test/matrix_resizing.jl b/test/matrix_resizing.jl
index 1aa60fa05..9c16fe0fe 100644
--- a/test/matrix_resizing.jl
+++ b/test/matrix_resizing.jl
@@ -7,7 +7,7 @@ vecprob = NonlinearProblem(ff, vec(u0), p)
 prob = NonlinearProblem(ff, u0, p)
 
 for alg in (NewtonRaphson(), TrustRegion(), LevenbergMarquardt(), PseudoTransient(),
-    RobustMultiNewton(), FastShortcutNonlinearPolyalg(), GeneralBroyden())
+    RobustMultiNewton(), FastShortcutNonlinearPolyalg(), GeneralBroyden(), GeneralKlement())
     @test vec(solve(prob, alg).u) == solve(vecprob, alg).u
 end
 
@@ -18,6 +18,6 @@ vecprob = NonlinearProblem(fiip, vec(u0), p)
 prob = NonlinearProblem(fiip, u0, p)
 
 for alg in (NewtonRaphson(), TrustRegion(), LevenbergMarquardt(), PseudoTransient(),
-    RobustMultiNewton(), FastShortcutNonlinearPolyalg(), GeneralBroyden())
+    RobustMultiNewton(), FastShortcutNonlinearPolyalg(), GeneralBroyden(), GeneralKlement())
     @test vec(solve(prob, alg).u) == solve(vecprob, alg).u
 end