Skip to content

Commit

Permalink
Broyden with LineSearch
Browse files Browse the repository at this point in the history
  • Loading branch information
avik-pal committed Oct 20, 2023
1 parent 6b83054 commit c6992a5
Show file tree
Hide file tree
Showing 8 changed files with 352 additions and 37 deletions.
4 changes: 3 additions & 1 deletion src/NonlinearSolve.jl
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ import UnPack: @unpack
const AbstractSparseADType = Union{ADTypes.AbstractSparseFiniteDifferences,
ADTypes.AbstractSparseForwardMode, ADTypes.AbstractSparseReverseMode}

abstract type AbstractNonlinearSolveLineSearchAlgorithm end

abstract type AbstractNonlinearSolveAlgorithm <: AbstractNonlinearAlgorithm end
abstract type AbstractNewtonAlgorithm{CJ, AD} <: AbstractNonlinearSolveAlgorithm end

Expand Down Expand Up @@ -105,6 +107,6 @@ export NewtonRaphson, TrustRegion, LevenbergMarquardt, DFSane, GaussNewton, Pseu
export LeastSquaresOptimJL, FastLevenbergMarquardtJL
export RobustMultiNewton, FastShortcutNonlinearPolyalg

export LineSearch
export LineSearch, LiFukushimaLineSearch

end # module
91 changes: 78 additions & 13 deletions src/broyden.jl
Original file line number Diff line number Diff line change
@@ -1,19 +1,28 @@
# Sadly `Broyden` is taken up by SimpleNonlinearSolve.jl
"""
GeneralBroyden(max_resets)
GeneralBroyden(; max_resets = 3)
GeneralBroyden(max_resets, linesearch)
GeneralBroyden(; max_resets = 3, linesearch = LineSearch())
An implementation of `Broyden` with support for caching!
An implementation of `Broyden` with reseting and line search.
## Arguments
- `max_resets`: the maximum number of resets to perform. Defaults to `3`.
- `linesearch`: the line search algorithm to use. Defaults to [`LineSearch()`](@ref),
which means that no line search is performed. Algorithms from `LineSearches.jl` can be
used here directly, and they will be converted to the correct `LineSearch`. It is
recommended to use [LiFukushimaLineSearchCache](@ref) -- a derivative free linesearch
specifically designed for Broyden's method.
"""
struct GeneralBroyden <: AbstractNewtonAlgorithm{false, Nothing}
@concrete struct GeneralBroyden <: AbstractNewtonAlgorithm{false, Nothing}
max_resets::Int
linesearch
end

GeneralBroyden(; max_resets = 3) = GeneralBroyden(max_resets)
function GeneralBroyden(; max_resets = 3, linesearch = LineSearch())
linesearch = linesearch isa LineSearch ? linesearch : LineSearch(; method = linesearch)
return GeneralBroyden(max_resets, linesearch)
end

@concrete mutable struct GeneralBroydenCache{iip} <: AbstractNonlinearSolveCache{iip}
f
Expand All @@ -29,13 +38,14 @@ GeneralBroyden(; max_resets = 3) = GeneralBroyden(max_resets)
J⁻¹df
force_stop::Bool
resets::Int
max_rests::Int
max_resets::Int
maxiters::Int
internalnorm
retcode::ReturnCode.T
abstol
prob
stats::NLStats
lscache
end

get_fu(cache::GeneralBroydenCache) = cache.fu
Expand All @@ -46,19 +56,20 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::GeneralBroyde
@unpack f, u0, p = prob
u = alias_u0 ? u0 : deepcopy(u0)
fu = evaluate_f(prob, u)
J⁻¹ = convert(parameterless_type(_mutable(u)),
Matrix{eltype(u)}(I, length(fu), length(u)))
return GeneralBroydenCache{iip}(f, alg, u, _mutable_zero(u), fu, similar(fu),
similar(fu), p, J⁻¹, similar(fu'), _mutable_zero(u), false, 0, alg.max_resets,
maxiters, internalnorm, ReturnCode.Default, abstol, prob, NLStats(1, 0, 0, 0, 0))
J⁻¹ = __init_identity_jacobian(u, fu)
return GeneralBroydenCache{iip}(f, alg, u, _mutable_zero(u), fu, zero(fu),
zero(fu), p, J⁻¹, zero(fu'), _mutable_zero(u), false, 0, alg.max_resets,
maxiters, internalnorm, ReturnCode.Default, abstol, prob, NLStats(1, 0, 0, 0, 0),
init_linesearch_cache(alg.linesearch, f, u, p, fu, Val(iip)))
end

function perform_step!(cache::GeneralBroydenCache{true})
@unpack f, p, du, fu, fu2, dfu, u, J⁻¹, J⁻¹df, J⁻¹₂ = cache
T = eltype(u)

mul!(du, J⁻¹, -fu)
u .+= du
α = perform_linesearch!(cache.lscache, u, du)
axpy!(α, du, u)
f(fu2, u, p)

cache.internalnorm(fu2) < cache.abstol && (cache.force_stop = true)
Expand All @@ -68,7 +79,7 @@ function perform_step!(cache::GeneralBroydenCache{true})

# Update the inverse jacobian
dfu .= fu2 .- fu
if cache.resets < cache.max_rests &&
if cache.resets < cache.max_resets &&
(all(x -> abs(x) 1e-12, du) || all(x -> abs(x) 1e-12, dfu))
fill!(J⁻¹, 0)
J⁻¹[diagind(J⁻¹)] .= T(1)
Expand All @@ -83,3 +94,57 @@ function perform_step!(cache::GeneralBroydenCache{true})

return nothing
end

function perform_step!(cache::GeneralBroydenCache{false})
@unpack f, p = cache
T = eltype(cache.u)

cache.du = cache.J⁻¹ * -cache.fu
α = perform_linesearch!(cache.lscache, cache.u, cache.du)
cache.u = cache.u .+ α * cache.du
cache.fu2 = f(cache.u, p)

cache.internalnorm(cache.fu2) < cache.abstol && (cache.force_stop = true)
cache.stats.nf += 1

cache.force_stop && return nothing

# Update the inverse jacobian
cache.dfu = cache.fu2 .- cache.fu
if cache.resets < cache.max_resets &&
(all(x -> abs(x) 1e-12, cache.du) || all(x -> abs(x) 1e-12, cache.dfu))
J⁻¹ = similar(cache.J⁻¹)
fill!(J⁻¹, 0)
J⁻¹[diagind(J⁻¹)] .= T(1)
cache.J⁻¹ = J⁻¹
cache.resets += 1
else
cache.J⁻¹df = cache.J⁻¹ * cache.dfu
cache.J⁻¹₂ = cache.du' * cache.J⁻¹
cache.du = (cache.du .- cache.J⁻¹df) ./ (dot(cache.du, cache.J⁻¹df) .+ T(1e-5))
cache.J⁻¹ = cache.J⁻¹ .+ cache.du * cache.J⁻¹₂
end
cache.fu = cache.fu2

return nothing
end

function SciMLBase.reinit!(cache::GeneralBroydenCache{iip}, u0 = cache.u; p = cache.p,
abstol = cache.abstol, maxiters = cache.maxiters) where {iip}
cache.p = p
if iip
recursivecopy!(cache.u, u0)
cache.f(cache.fu, cache.u, p)
else
# don't have alias_u0 but cache.u is never mutated for OOP problems so it doesn't matter
cache.u = u0
cache.fu = cache.f(cache.u, p)
end
cache.abstol = abstol
cache.maxiters = maxiters
cache.stats.nf = 1
cache.stats.nsteps = 1
cache.force_stop = false
cache.retcode = ReturnCode.Default
return cache
end
1 change: 1 addition & 0 deletions src/klement.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

143 changes: 134 additions & 9 deletions src/linesearch.jl
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,15 @@ function LineSearch(; method = Static(), autodiff = AutoFiniteDiff(), alpha = tr
return LineSearch(method, autodiff, alpha)
end

@concrete mutable struct LineSearchCache
@inline function init_linesearch_cache(ls::LineSearch, args...)
return init_linesearch_cache(ls.method, ls, args...)
end

# LineSearches.jl doesn't have a supertype so default to that
init_linesearch_cache(_, ls, f, u, p, fu, iip) = LineSearchesJLCache(ls, f, u, p, fu, iip)

# Wrapper over LineSearches.jl algorithms
@concrete mutable struct LineSearchesJLCache
f
ϕ
Expand All @@ -35,11 +43,11 @@ end
ls
end

function LineSearchCache(ls::LineSearch, f, u::Number, p, _, ::Val{false})
function LineSearchesJLCache(ls::LineSearch, f, u::Number, p, _, ::Val{false})
eval_f(u, du, α) = eval_f(u - α * du)
eval_f(u) = f(u, p)

ls.method isa Static && return LineSearchCache(eval_f, nothing, nothing, nothing,
ls.method isa Static && return LineSearchesJLCache(eval_f, nothing, nothing, nothing,
convert(typeof(u), ls.α), ls)

g(u, fu) = last(value_derivative(Base.Fix2(f, p), u)) * fu
Expand Down Expand Up @@ -73,11 +81,11 @@ function LineSearchCache(ls::LineSearch, f, u::Number, p, _, ::Val{false})
return ϕdϕ_internal
end

return LineSearchCache(eval_f, ϕ, dϕ, ϕdϕ, convert(eltype(u), ls.α), ls)
return LineSearchesJLCache(eval_f, ϕ, dϕ, ϕdϕ, convert(eltype(u), ls.α), ls)
end

function LineSearchCache(ls::LineSearch, f, u, p, fu1, IIP::Val{iip}) where {iip}
fu = iip ? fu1 : nothing
function LineSearchesJLCache(ls::LineSearch, f, u, p, fu1, IIP::Val{iip}) where {iip}
fu = iip ? deepcopy(fu1) : nothing
u_ = _mutable_zero(u)

function eval_f(u, du, α)
Expand All @@ -86,7 +94,7 @@ function LineSearchCache(ls::LineSearch, f, u, p, fu1, IIP::Val{iip}) where {iip
end
eval_f(u) = evaluate_f(f, u, p, IIP; fu)

ls.method isa Static && return LineSearchCache(eval_f, nothing, nothing, nothing,
ls.method isa Static && return LineSearchesJLCache(eval_f, nothing, nothing, nothing,
convert(eltype(u), ls.α), ls)

g₀ = _mutable_zero(u)
Expand Down Expand Up @@ -138,10 +146,10 @@ function LineSearchCache(ls::LineSearch, f, u, p, fu1, IIP::Val{iip}) where {iip
return ϕdϕ_internal
end

return LineSearchCache(eval_f, ϕ, dϕ, ϕdϕ, convert(eltype(u), ls.α), ls)
return LineSearchesJLCache(eval_f, ϕ, dϕ, ϕdϕ, convert(eltype(u), ls.α), ls)
end

function perform_linesearch!(cache::LineSearchCache, u, du)
function perform_linesearch!(cache::LineSearchesJLCache, u, du)
cache.ls.method isa Static && return cache.α

ϕ = cache.ϕ(u, du)
Expand All @@ -155,3 +163,120 @@ function perform_linesearch!(cache::LineSearchCache, u, du)

return first(cache.ls.method(ϕ, cache.(u, du), cache.ϕdϕ(u, du), cache.α, ϕ₀, dϕ₀))
end

"""
LiFukushimaLineSearch(; lambda_0 = 1.0, beta = 0.5, sigma_1 = 0.001,
eta = 0.1, nan_max_iter = 5, maxiters = 50)
A derivative-free line search and global convergence of Broyden-like method for nonlinear
equations by Dong-Hui Li & Masao Fukushima. For more details see
https://doi.org/10.1080/10556780008805782
"""
struct LiFukushimaLineSearch{T} <: AbstractNonlinearSolveLineSearchAlgorithm
λ₀::T
β::T
σ₁::T
σ₂::T
η::T
ρ::T
nan_max_iter::Int
maxiters::Int
end

function LiFukushimaLineSearch(; lambda_0 = 1.0, beta = 0.1, sigma_1 = 0.001,
sigma_2 = 0.001, eta = 0.1, rho = 0.9, nan_max_iter = 5, maxiters = 50)
T = promote_type(typeof(lambda_0), typeof(beta), typeof(sigma_1), typeof(eta),
typeof(rho), typeof(sigma_2))
return LiFukushimaLineSearch{T}(lambda_0, beta, sigma_1, sigma_2, eta, rho,
nan_max_iter, maxiters)
end

@concrete mutable struct LiFukushimaLineSearchCache{iip}
f
p
u_cache
fu_cache
alg
α
end

function init_linesearch_cache(alg::LiFukushimaLineSearch, ls::LineSearch, f, _u, p, _fu,
::Val{iip}) where {iip}
fu = iip ? deepcopy(_fu) : nothing
u = iip ? deepcopy(_u) : nothing
return LiFukushimaLineSearchCache{iip}(f, p, u, fu, alg, ls.α)
end

function perform_linesearch!(cache::LiFukushimaLineSearchCache{iip}, u, du) where {iip}
(; β, σ₁, σ₂, η, λ₀, ρ, nan_max_iter, maxiters) = cache.alg
λ₂ = λ₀
λ₁ = λ₂

if iip
cache.f(cache.fu_cache, u, cache.p)
fx_norm = norm(cache.fu_cache, 2)
else
fx_norm = norm(cache.f(u, cache.p), 2)
end

# Non-Blocking exit if the norm is NaN or Inf
!isfinite(fx_norm) && return cache.α

# Early Terminate based on Eq. 2.7
if iip
cache.u_cache .= u .+ du
cache.f(cache.fu_cache, cache.u_cache, cache.p)
fxλ_norm = norm(cache.fu_cache, 2)
else
fxλ_norm = norm(cache.f(u .+ du, cache.p), 2)
end

fxλ_norm ρ * fx_norm - σ₂ * norm(du, 2)^2 && return cache.α

if iip
cache.u_cache .= u .+ λ₂ .* du
cache.f(cache.fu_cache, cache.u_cache, cache.p)
fxλp_norm = norm(cache.fu_cache, 2)
else
fxλp_norm = norm(cache.f(u .+ λ₂ .* du, cache.p), 2)
end

if !isfinite(fxλp_norm)
# Backtrack a finite number of steps
nan_converged = false
for _ in 1:nan_max_iter
λ₁, λ₂ = λ₂, β * λ₂

if iip
cache.u_cache .= u .+ λ₂ .* du
cache.f(cache.fu_cache, cache.u_cache, cache.p)
fxλp_norm = norm(cache.fu_cache, 2)
else
fxλp_norm = norm(cache.f(u .+ λ₂ .* du, cache.p), 2)
end

nan_converged = isfinite(fxλp_norm)
nan_converged && break
end

# Non-Blocking exit if the norm is still NaN or Inf
!nan_converged && return cache.α
end

for _ in 1:maxiters
if iip
cache.u_cache .= u .+ λ₂ .* du
cache.f(cache.fu_cache, cache.u_cache, cache.p)
fxλp_norm = norm(cache.fu_cache, 2)
else
fxλp_norm = norm(cache.f(u .+ λ₂ .* du, cache.p), 2)
end

converged = fxλp_norm (1 + η) * fx_norm - σ₁ * λ₂^2 * norm(du, 2)^2

converged && break
λ₁, λ₂ = λ₂, β * λ₂
end

return λ₂
end
5 changes: 3 additions & 2 deletions src/raphson.jl
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,8 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg_::NewtonRaphso

return NewtonRaphsonCache{iip}(f, alg, u, fu1, fu2, du, p, uf, linsolve, J,
jac_cache, false, maxiters, internalnorm, ReturnCode.Default, abstol, prob,
NLStats(1, 0, 0, 0, 0), LineSearchCache(alg.linesearch, f, u, p, fu1, Val(iip)))
NLStats(1, 0, 0, 0, 0),
init_linesearch_cache(alg.linesearch, f, u, p, fu1, Val(iip)))
end

function perform_step!(cache::NewtonRaphsonCache{true})
Expand All @@ -96,7 +97,7 @@ function perform_step!(cache::NewtonRaphsonCache{true})

# Line Search
α = perform_linesearch!(cache.lscache, u, du)
@. u = u - α * du
axpy!(α, du, u)
f(cache.fu1, u, p)

cache.internalnorm(fu1) < cache.abstol && (cache.force_stop = true)
Expand Down
14 changes: 12 additions & 2 deletions src/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ function default_adargs_to_adtype(; chunk_size = missing, autodiff = nothing,
if chunk_size !== missing || standardtag !== missing || diff_type !== missing ||
autodiff !== missing
Base.depwarn("`chunk_size`, `standardtag`, `diff_type`, \
`autodiff::Union{Val, Bool}` kwargs have been deprecated and will be removed in\
v3. Update your code to directly specify autodiff=<ADTypes>",
`autodiff::Union{Val, Bool}` kwargs have been deprecated and will be removed \
in v3. Update your code to directly specify autodiff=<ADTypes>",
:default_adargs_to_adtype)
end
chunk_size === missing && (chunk_size = Val{0}())
Expand Down Expand Up @@ -211,3 +211,13 @@ function __get_concrete_algorithm(alg, prob)
end
return set_ad(alg, ad)
end

__init_identity_jacobian(u::Number, _) = u
function __init_identity_jacobian(u, fu)
return convert(parameterless_type(_mutable(u)),
Matrix{eltype(u)}(I, length(fu), length(u)))
end
function __init_identity_jacobian(u::StaticArray, fu)
return convert(MArray{Tuple{length(fu), length(u)}},
Matrix{eltype(u)}(I, length(fu), length(u)))
end
Loading

0 comments on commit c6992a5

Please sign in to comment.