From f58002a1a9c2ad2bd4afbb3d88c8d196fdadd996 Mon Sep 17 00:00:00 2001 From: Alberto De Marchi Date: Fri, 8 Mar 2024 16:43:37 +0100 Subject: [PATCH] Regret factor in ForwardBackward (#91) This PR adds the possibility to specify a regret factor to increase the stepsize gamma at every iteration, if adaptive, before backtracking. Recent results provide convergence guarantees even without a maximum value on gamma [Theorem 14, [arXiv:2208.00779v2](https://arxiv.org/abs/2208.00799)]. This feature was implemented for ForwardBackward only. Although it does not seem a good fit for accelerated methods like ZeroFPR, PANOC, and PANOCplus, at least when based on quasi-Newton directions, it is unclear whether the FastForwardBackward solver could benefit from it. See discussion in #86 . Practical performance seems to improve with values regret_gamma close to 1. Tests and references have also been updated accordingly. --- docs/references.bib | 8 ++ docs/src/guide/implemented_algorithms.md | 2 +- src/algorithms/fast_forward_backward.jl | 3 + src/algorithms/forward_backward.jl | 5 + test/problems/test_lasso_small.jl | 22 ++++ .../test_lasso_small_strongly_convex.jl | 36 ++++++ test/problems/test_sparse_logistic_small.jl | 118 +++++++++++------- 7 files changed, 145 insertions(+), 49 deletions(-) diff --git a/docs/references.bib b/docs/references.bib index 2ab7e30..d8c3987 100644 --- a/docs/references.bib +++ b/docs/references.bib @@ -185,3 +185,11 @@ @article{DeMarchi2022 year={2022}, url={https://doi.org/10.48550/arXiv.2112.13000} } + +@article{DeMarchi2024, + title={An interior proximal gradient method for nonconvex optimization}, + author={De Marchi, Alberto and Themelis, Andreas}, + journal={arXiv:2208.00799v2}, + year={2024}, + url={https://doi.org/10.48550/arXiv.2208.00799} +} \ No newline at end of file diff --git a/docs/src/guide/implemented_algorithms.md b/docs/src/guide/implemented_algorithms.md index a297a22..a14fe6b 100644 --- a/docs/src/guide/implemented_algorithms.md +++ b/docs/src/guide/implemented_algorithms.md @@ -24,7 +24,7 @@ This is the most popular model, by far the most thoroughly studied, and an abund Algorithm | Assumptions | Oracle | Implementation | References ----------|-------------|--------|----------------|----------- -Proximal gradient | ``f`` smooth | ``\nabla f``, ``\operatorname{prox}_{\gamma g}`` | [`ForwardBackward`](@ref) | [Lions1979](@cite) +Proximal gradient | ``f`` locally smooth | ``\nabla f``, ``\operatorname{prox}_{\gamma g}`` | [`ForwardBackward`](@ref) | [Lions1979](@cite), [DeMarchi2024](@cite) Douglas-Rachford | | ``\operatorname{prox}_{\gamma f}``, ``\operatorname{prox}_{\gamma g}`` | [`DouglasRachford`](@ref) | [Eckstein1992](@cite) Fast proximal gradient | ``f`` convex, smooth, ``g`` convex | ``\nabla f``, ``\operatorname{prox}_{\gamma g}`` | [`FastForwardBackward`](@ref) | [Tseng2008](@cite), [Beck2009](@cite) PANOC | ``f`` smooth | ``\nabla f``, ``\operatorname{prox}_{\gamma g}`` | [`PANOC`](@ref) | [Stella2017](@cite) diff --git a/src/algorithms/fast_forward_backward.jl b/src/algorithms/fast_forward_backward.jl index 006ebad..244148b 100644 --- a/src/algorithms/fast_forward_backward.jl +++ b/src/algorithms/fast_forward_backward.jl @@ -33,6 +33,7 @@ See also: [`FastForwardBackward`](@ref). - `gamma=nothing`: stepsize, defaults to `1/Lf` if `Lf` is set, and `nothing` otherwise. - `adaptive=true`: makes `gamma` adaptively adjust during the iterations; this is by default `gamma === nothing`. - `minimum_gamma=1e-7`: lower bound to `gamma` in case `adaptive == true`. +- `regret_gamma=1.0`: factor to enlarge `gamma` in case `adaptive == true`, before backtracking. - `extrapolation_sequence=nothing`: sequence (iterator) of extrapolation coefficients to use for acceleration. # References @@ -48,6 +49,7 @@ Base.@kwdef struct FastForwardBackwardIteration{R,Tx,Tf,Tg,TLf,Tgamma,Textr} gamma::Tgamma = Lf === nothing ? nothing : (1 / Lf) adaptive::Bool = gamma === nothing minimum_gamma::R = real(eltype(x0))(1e-7) + regret_gamma::R = real(eltype(x0))(1.0) extrapolation_sequence::Textr = nothing end @@ -105,6 +107,7 @@ function Base.iterate( state::FastForwardBackwardState{R,Tx}, ) where {R,Tx} state.gamma = if iter.adaptive == true + state.gamma *= iter.regret_gamma gamma, state.g_z = backtrack_stepsize!( state.gamma, iter.f, diff --git a/src/algorithms/forward_backward.jl b/src/algorithms/forward_backward.jl index d21ee7a..3e52241 100644 --- a/src/algorithms/forward_backward.jl +++ b/src/algorithms/forward_backward.jl @@ -28,9 +28,11 @@ See also: [`ForwardBackward`](@ref). - `gamma=nothing`: stepsize to use, defaults to `1/Lf` if not set (but `Lf` is). - `adaptive=false`: forces the method stepsize to be adaptively adjusted. - `minimum_gamma=1e-7`: lower bound to `gamma` in case `adaptive == true`. +- `regret_gamma=1.0`: factor to enlarge `gamma` in case `adaptive == true`, before backtracking. # References 1. Lions, Mercier, “Splitting algorithms for the sum of two nonlinear operators,” SIAM Journal on Numerical Analysis, vol. 16, pp. 964–979 (1979). +2. De Marchi, Themelis, "An interior proximal gradient method for nonconvex optimization," arXiv:2208.00799v2 (2024). """ Base.@kwdef struct ForwardBackwardIteration{R,Tx,Tf,Tg,TLf,Tgamma} f::Tf = Zero() @@ -40,6 +42,7 @@ Base.@kwdef struct ForwardBackwardIteration{R,Tx,Tf,Tg,TLf,Tgamma} gamma::Tgamma = Lf === nothing ? nothing : (1 / Lf) adaptive::Bool = gamma === nothing minimum_gamma::R = real(eltype(x0))(1e-7) + regret_gamma::R = real(eltype(x0))(1.0) end Base.IteratorSize(::Type{<:ForwardBackwardIteration}) = Base.IsInfinite() @@ -84,6 +87,7 @@ function Base.iterate( state::ForwardBackwardState{R,Tx}, ) where {R,Tx} if iter.adaptive == true + state.gamma *= iter.regret_gamma state.gamma, state.g_z, state.f_x = backtrack_stepsize!( state.gamma, iter.f, @@ -150,6 +154,7 @@ See also: [`ForwardBackwardIteration`](@ref), [`IterativeAlgorithm`](@ref). # References 1. Lions, Mercier, “Splitting algorithms for the sum of two nonlinear operators,” SIAM Journal on Numerical Analysis, vol. 16, pp. 964–979 (1979). +2. De Marchi, Themelis, "An interior proximal gradient method for nonconvex optimization," arXiv:2208.00799v2 (2024). """ ForwardBackward(; maxit = 10_000, diff --git a/test/problems/test_lasso_small.jl b/test/problems/test_lasso_small.jl index 19a3c97..a40324a 100644 --- a/test/problems/test_lasso_small.jl +++ b/test/problems/test_lasso_small.jl @@ -65,6 +65,17 @@ using ProximalAlgorithms: @test x0 == x0_backup end + @testset "ForwardBackward (adaptive step, regret)" begin + x0 = zeros(T, n) + x0_backup = copy(x0) + solver = ProximalAlgorithms.ForwardBackward(tol = TOL, adaptive = true, regret_gamma=R(1.01)) + x, it = @inferred solver(x0 = x0, f = fA_autodiff, g = g) + @test eltype(x) == T + @test norm(x - x_star, Inf) <= TOL + @test it < 150 + @test x0 == x0_backup + end + @testset "FastForwardBackward (fixed step)" begin x0 = zeros(T, n) x0_backup = copy(x0) @@ -87,6 +98,17 @@ using ProximalAlgorithms: @test x0 == x0_backup end + @testset "FastForwardBackward (adaptive step, regret)" begin + x0 = zeros(T, n) + x0_backup = copy(x0) + solver = ProximalAlgorithms.FastForwardBackward(tol = TOL, adaptive = true, regret_gamma=R(1.01)) + x, it = @inferred solver(x0 = x0, f = fA_autodiff, g = g) + @test eltype(x) == T + @test norm(x - x_star, Inf) <= TOL + @test it < 100 + @test x0 == x0_backup + end + @testset "FastForwardBackward (custom extrapolation)" begin x0 = zeros(T, n) x0_backup = copy(x0) diff --git a/test/problems/test_lasso_small_strongly_convex.jl b/test/problems/test_lasso_small_strongly_convex.jl index 678eb45..e45931d 100644 --- a/test/problems/test_lasso_small_strongly_convex.jl +++ b/test/problems/test_lasso_small_strongly_convex.jl @@ -70,6 +70,24 @@ using ProximalAlgorithms @test it < 110 @test x0 == x0_backup end + + @testset "ForwardBackward (adaptive step)" begin + solver = ProximalAlgorithms.ForwardBackward(tol = TOL, adaptive = true) + y, it = solver(x0 = x0, f = fA_autodiff, g = g) + @test eltype(y) == T + @test norm(y - x_star, Inf) <= TOL + @test it < 300 + @test x0 == x0_backup + end + + @testset "ForwardBackward (adaptive step, regret)" begin + solver = ProximalAlgorithms.ForwardBackward(tol = TOL, adaptive = true, regret_gamma=T(1.01)) + y, it = solver(x0 = x0, f = fA_autodiff, g = g) + @test eltype(y) == T + @test norm(y - x_star, Inf) <= TOL + @test it < 80 + @test x0 == x0_backup + end @testset "FastForwardBackward" begin solver = ProximalAlgorithms.FastForwardBackward(tol = TOL) @@ -80,6 +98,24 @@ using ProximalAlgorithms @test x0 == x0_backup end + @testset "FastForwardBackward (adaptive step)" begin + solver = ProximalAlgorithms.FastForwardBackward(tol = TOL, adaptive = true) + y, it = solver(x0 = x0, f = fA_autodiff, g = g) + @test eltype(y) == T + @test norm(y - x_star, Inf) <= TOL + @test it < 100 + @test x0 == x0_backup + end + + @testset "FastForwardBackward (adaptive step, regret)" begin + solver = ProximalAlgorithms.FastForwardBackward(tol = TOL, adaptive = true, regret_gamma=T(1.01)) + y, it = solver(x0 = x0, f = fA_autodiff, g = g) + @test eltype(y) == T + @test norm(y - x_star, Inf) <= TOL + @test it < 100 + @test x0 == x0_backup + end + @testset "FastForwardBackward (custom extrapolation)" begin solver = ProximalAlgorithms.FastForwardBackward(tol = TOL) y, it = solver( diff --git a/test/problems/test_sparse_logistic_small.jl b/test/problems/test_sparse_logistic_small.jl index 70f876f..4b4b78b 100644 --- a/test/problems/test_sparse_logistic_small.jl +++ b/test/problems/test_sparse_logistic_small.jl @@ -35,59 +35,81 @@ using LinearAlgebra TOL = R(1e-6) - # Nonfast/Adaptive - - x0 = zeros(T, n) - x0_backup = copy(x0) - solver = ProximalAlgorithms.ForwardBackward(tol = TOL, adaptive = true) - x, it = solver(x0 = x0, f = fA_autodiff, g = g) - @test eltype(x) == T - @test norm(x - x_star, Inf) <= 1e-4 - @test it < 1100 - @test x0 == x0_backup - - # Fast/Adaptive - - x0 = zeros(T, n) - x0_backup = copy(x0) - solver = ProximalAlgorithms.FastForwardBackward(tol = TOL, adaptive = true) - x, it = solver(x0 = x0, f = fA_autodiff, g = g) - @test eltype(x) == T - @test norm(x - x_star, Inf) <= 1e-4 - @test it < 500 - @test x0 == x0_backup + @testset "ForwardBackward (adaptive step)" begin + x0 = zeros(T, n) + x0_backup = copy(x0) + solver = ProximalAlgorithms.ForwardBackward(tol = TOL, adaptive = true) + x, it = solver(x0 = x0, f = fA_autodiff, g = g) + @test eltype(x) == T + @test norm(x - x_star, Inf) <= 1e-4 + @test it < 1100 + @test x0 == x0_backup + end - # ZeroFPR/Adaptive + @testset "ForwardBackward (adaptive step, regret)" begin + x0 = zeros(T, n) + x0_backup = copy(x0) + solver = ProximalAlgorithms.ForwardBackward(tol = TOL, adaptive = true, regret_gamma=R(1.01)) + x, it = solver(x0 = x0, f = fA_autodiff, g = g) + @test eltype(x) == T + @test norm(x - x_star, Inf) <= 1e-4 + @test it < 500 + @test x0 == x0_backup + end - x0 = zeros(T, n) - x0_backup = copy(x0) - solver = ProximalAlgorithms.ZeroFPR(adaptive = true, tol = TOL) - x, it = solver(x0 = x0, f = f_autodiff, A = A, g = g) - @test eltype(x) == T - @test norm(x - x_star, Inf) <= 1e-4 - @test it < 25 - @test x0 == x0_backup + @testset "FastForwardBackward (adaptive step)" begin + x0 = zeros(T, n) + x0_backup = copy(x0) + solver = ProximalAlgorithms.FastForwardBackward(tol = TOL, adaptive = true) + x, it = solver(x0 = x0, f = fA_autodiff, g = g) + @test eltype(x) == T + @test norm(x - x_star, Inf) <= 1e-4 + @test it < 500 + @test x0 == x0_backup + end - # PANOC/Adaptive + @testset "FastForwardBackward (adaptive step, regret)" begin + x0 = zeros(T, n) + x0_backup = copy(x0) + solver = ProximalAlgorithms.FastForwardBackward(tol = TOL, adaptive = true, regret_gamma=R(1.01)) + x, it = solver(x0 = x0, f = fA_autodiff, g = g) + @test eltype(x) == T + @test norm(x - x_star, Inf) <= 1e-4 + @test it < 200 + @test x0 == x0_backup + end - x0 = zeros(T, n) - x0_backup = copy(x0) - solver = ProximalAlgorithms.PANOC(adaptive = true, tol = TOL) - x, it = solver(x0 = x0, f = f_autodiff, A = A, g = g) - @test eltype(x) == T - @test norm(x - x_star, Inf) <= 1e-4 - @test it < 50 - @test x0 == x0_backup + @testset "ZeroFPR (adaptive step)" begin + x0 = zeros(T, n) + x0_backup = copy(x0) + solver = ProximalAlgorithms.ZeroFPR(adaptive = true, tol = TOL) + x, it = solver(x0 = x0, f = f_autodiff, A = A, g = g) + @test eltype(x) == T + @test norm(x - x_star, Inf) <= 1e-4 + @test it < 25 + @test x0 == x0_backup + end - # PANOCplus/Adaptive + @testset "PANOC (adaptive step)" begin + x0 = zeros(T, n) + x0_backup = copy(x0) + solver = ProximalAlgorithms.PANOC(adaptive = true, tol = TOL) + x, it = solver(x0 = x0, f = f_autodiff, A = A, g = g) + @test eltype(x) == T + @test norm(x - x_star, Inf) <= 1e-4 + @test it < 50 + @test x0 == x0_backup + end - x0 = zeros(T, n) - x0_backup = copy(x0) - solver = ProximalAlgorithms.PANOCplus(adaptive = true, tol = TOL) - x, it = solver(x0 = x0, f = f_autodiff, A = A, g = g) - @test eltype(x) == T - @test norm(x - x_star, Inf) <= 1e-4 - @test it < 50 - @test x0 == x0_backup + @testset "PANOCplus (adaptive step)" begin + x0 = zeros(T, n) + x0_backup = copy(x0) + solver = ProximalAlgorithms.PANOCplus(adaptive = true, tol = TOL) + x, it = solver(x0 = x0, f = f_autodiff, A = A, g = g) + @test eltype(x) == T + @test norm(x - x_star, Inf) <= 1e-4 + @test it < 50 + @test x0 == x0_backup + end end