Skip to content

Commit

Permalink
Add implementation of PANOC+ (#57)
Browse files Browse the repository at this point in the history
  • Loading branch information
aldma authored Jan 5, 2022
1 parent de2260e commit 41cb172
Show file tree
Hide file tree
Showing 11 changed files with 401 additions and 0 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ Vũ-Condat primal-dual algorithm[^chambolle_2011][^vu_2013][^condat_2013] | [`Vu
Davis-Yin splitting[^davis_2017] | [`DavisYin`](src/algorithms/davis_yin.jl)
Asymmetric forward-backward-adjoint splitting[^latafat_2017] | [`AFBA`](src/algorithms/primal_dual.jl)
PANOC (L-BFGS)[^stella_2017] | [`PANOC`](src/algorithms/panoc.jl)
PANOC+ (L-BFGS)[^demarchi_2021] | [`PANOCplus`](src/algorithms/panocplus.jl)
ZeroFPR (L-BFGS)[^themelis_2018] | [`ZeroFPR`](src/algorithms/zerofpr.jl)
Douglas-Rachford line-search (L-BFGS)[^themelis_2020] | [`DRLS`](src/algorithms/drls.jl)

Expand Down Expand Up @@ -65,3 +66,5 @@ Contributions are welcome in the form of [issues notification](https://github.co
[^themelis_2018]: Themelis, Stella, Patrinos, *Forward-backward envelope for the sum of two nonconvex functions: Further properties and nonmonotone line-search algorithms*, SIAM Journal on Optimization, vol. 28, no. 3, pp. 2274–2303 (2018). [link](https://epubs.siam.org/doi/10.1137/16M1080240)

[^themelis_2020]: Themelis, Stella, Patrinos, *Douglas-Rachford splitting and ADMM for nonconvex optimization: Accelerated and Newton-type algorithms*, arXiv preprint (2020). [link](https://arxiv.org/abs/2005.10230)

[^demarchi_2021]: De Marchi, Themelis, *Proximal gradient algorithms under local Lipschitz gradient continuity: a convergence and robustness analysis of PANOC*, arXiv preprint (2021). [link](https://arxiv.org/abs/2112.13000)
1 change: 1 addition & 0 deletions src/ProximalAlgorithms.jl
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,6 @@ include("algorithms/primal_dual.jl")
include("algorithms/davis_yin.jl")
include("algorithms/li_lin.jl")
include("algorithms/fista.jl")
include("algorithms/panocplus.jl")

end # module
223 changes: 223 additions & 0 deletions src/algorithms/panocplus.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,223 @@
# De Marchi, Themelis, "Proximal gradient algorithms under local Lipschitz
# gradient continuity: a convergence and robustness analysis of PANOC",
# arXiv:2112.13000 (2021).

using Base.Iterators
using ProximalAlgorithms.IterationTools
using ProximalOperators: Zero
using LinearAlgebra
using Printf

"""
PANOCplusIteration(; <keyword-arguments>)
Instantiate the PANOCplus algorithm (see [1]) for solving optimization problems
of the form
minimize f(Ax) + g(x),
where `f` is locally smooth and `A` is a linear mapping (for example, a matrix).
# Arguments
- `x0`: initial point.
- `f=Zero()`: smooth objective term.
- `A=I`: linear operator (e.g. a matrix).
- `g=Zero()`: proximable objective term.
- `Lf=nothing`: Lipschitz constant of the gradient of x ↦ f(Ax).
- `gamma=nothing`: stepsize to use, defaults to `1/Lf` if not set (but `Lf` is).
- `adaptive=false`: forces the method stepsize to be adaptively adjusted.
- `minimum_gamma=1e-7`: lower bound to `gamma` in case `adaptive == true`.
- `max_backtracks=20`: maximum number of line-search backtracks.
- `directions=LBFGS(5)`: strategy to use to compute line-search directions.
# References
- [1] De Marchi, Themelis, "Proximal gradient algorithms under local Lipschitz
gradient continuity: a convergence and robustness analysis of PANOC",
arXiv:2112.13000 (2021).
"""

Base.@kwdef struct PANOCplusIteration{R,Tx,Tf,TA,Tg,TLf,Tgamma,D}
f::Tf = Zero()
A::TA = I
g::Tg = Zero()
x0::Tx
alpha::R = real(eltype(x0))(0.95)
beta::R = real(eltype(x0))(0.5)
Lf::TLf = nothing
gamma::Tgamma = Lf === nothing ? nothing : (alpha / Lf)
adaptive::Bool = gamma === nothing
minimum_gamma::R = real(eltype(x0))(1e-7)
max_backtracks::Int = 20
directions::D = LBFGS(5)
end

Base.IteratorSize(::Type{<:PANOCplusIteration}) = Base.IsInfinite()

Base.@kwdef mutable struct PANOCplusState{R,Tx,TAx,TH}
x::Tx # iterate
Ax::TAx # A times x
f_Ax::R # value of smooth term
grad_f_Ax::TAx # gradient of f at Ax
At_grad_f_Ax::Tx # gradient of smooth term
gamma::R # stepsize parameter of forward and backward steps
y::Tx # forward point
z::Tx # forward-backward point
g_z::R # value of nonsmooth term (at z)
res::Tx # fixed-point residual at iterate (= x - z)
H::TH # variable metric
tau::R = zero(gamma)
x_prev::Tx = similar(x)
res_prev::Tx = similar(x)
d::Tx = similar(x)
Ad::TAx = similar(Ax)
x_d::Tx = similar(x)
Ax_d::TAx = similar(Ax)
f_Ax_d::R = zero(real(eltype(x)))
grad_f_Ax_d::TAx = similar(Ax)
At_grad_f_Ax_d::Tx = similar(x)
z_curr::Tx = similar(x)
Az::TAx = similar(Ax)
grad_f_Az::TAx = similar(Ax)
At_grad_f_Az::Tx = similar(x)
end

f_model(iter::PANOCplusIteration, state::PANOCplusState) = f_model(state.f_Ax, state.At_grad_f_Ax, state.res, iter.alpha / state.gamma)

function Base.iterate(iter::PANOCplusIteration{R}) where {R}
x = copy(iter.x0)
Ax = iter.A * x
grad_f_Ax, f_Ax = gradient(iter.f, Ax)
gamma = iter.gamma === nothing ? iter.alpha / lower_bound_smoothness_constant(iter.f, iter.A, x, grad_f_Ax) : iter.gamma
At_grad_f_Ax = iter.A' * grad_f_Ax
y = x - gamma .* At_grad_f_Ax
z, g_z = prox(iter.g, y, gamma)
state = PANOCplusState(
x=x, Ax=Ax, f_Ax=f_Ax, grad_f_Ax=grad_f_Ax, At_grad_f_Ax=At_grad_f_Ax,
gamma=gamma, y=y, z=z, g_z=g_z, res=x-z, H=initialize(iter.directions, x),
)
if (iter.gamma === nothing || iter.adaptive == true)
state.gamma, state.g_z, f_Az, f_Az_upp = backtrack_stepsize!(
state.gamma, iter.f, iter.A, iter.g,
state.x, state.f_Ax, state.At_grad_f_Ax, state.y, state.z, state.g_z, state.res,
state.Az, state.grad_f_Az,
alpha = iter.alpha, minimum_gamma = iter.minimum_gamma,
)
end
return state, state
end

set_next_direction!(::QuasiNewtonStyle, ::PANOCplusIteration, state::PANOCplusState) = mul!(state.d, state.H, -state.res)
set_next_direction!(::NoAccelerationStyle, ::PANOCplusIteration, state::PANOCplusState) = state.d .= .-state.res
set_next_direction!(iter::PANOCplusIteration, state::PANOCplusState) = set_next_direction!(acceleration_style(typeof(iter.directions)), iter, state)

update_direction_state!(::QuasiNewtonStyle, ::PANOCplusIteration, state::PANOCplusState) = update!(state.H, state.x - state.x_prev, state.res - state.res_prev)
update_direction_state!(::NoAccelerationStyle, ::PANOCplusIteration, state::PANOCplusState) = return
update_direction_state!(iter::PANOCplusIteration, state::PANOCplusState) = update_direction_state!(acceleration_style(typeof(iter.directions)), iter, state)

reset_direction_state!(::QuasiNewtonStyle, ::PANOCplusIteration, state::PANOCplusState) = reset!(state.H)
reset_direction_state!(::NoAccelerationStyle, ::PANOCplusIteration, state::PANOCplusState) = return
reset_direction_state!(iter::PANOCplusIteration, state::PANOCplusState) = reset_direction_state!(acceleration_style(typeof(iter.directions)), iter, state)

function Base.iterate(iter::PANOCplusIteration{R}, state::PANOCplusState) where R
f_Az, a, b, c = R(Inf), R(Inf), R(Inf), R(Inf)

# store iterate and residual for metric update later on
state.x_prev .= state.x
state.res_prev .= state.res

# compute FBE
FBE_x = f_model(iter, state) + state.g_z

sigma = iter.beta * (0.5 / state.gamma) * (1 - iter.alpha)
tol = 10 * eps(R) * (1 + abs(FBE_x))
threshold = FBE_x - sigma * norm(state.res)^2 + tol

tau_backtracks = 0
can_update_direction = true

while true

if can_update_direction
# compute direction
set_next_direction!(iter, state)
# backtrack tau 1 → 0
state.tau = R(1)
state.x .= state.x_prev .+ state.d
else
state.x .= (1 - state.tau) * (state.x_prev .- state.res_prev) + state.tau * (state.x_prev .+ state.d)
end

mul!(state.Ax, iter.A, state.x)
state.f_Ax = gradient!(state.grad_f_Ax, iter.f, state.Ax)
mul!(state.At_grad_f_Ax, adjoint(iter.A), state.grad_f_Ax)

state.y .= state.x .- state.gamma .* state.At_grad_f_Ax
state.g_z = prox!(state.z, iter.g, state.y, state.gamma)
state.res .= state.x .- state.z

f_Az_upp = f_model(iter, state)

if (iter.gamma === nothing || iter.adaptive == true)
mul!(state.Az, iter.A, state.z)
f_Az = gradient!(state.grad_f_Az, iter.f, state.Az)
tol = 10 * eps(R) * (1 + abs(f_Az))
if f_Az > f_Az_upp + tol && state.gamma >= iter.minimum_gamma
state.gamma *= 0.5
if state.gamma < iter.minimum_gamma
@warn "stepsize `gamma` became too small ($(state.gamma))"
end
can_update_direction = true
reset_direction_state!(iter, state)
continue
end
end

FBE_x_new = f_Az_upp + state.g_z
if FBE_x_new <= threshold
# update metric
update_direction_state!(iter, state)
return state, state
end
state.tau *= 0.5
if tau_backtracks > iter.max_backtracks
@warn "stepsize `tau` became too small ($(state.tau))"
return nothing
end
tau_backtracks += 1
can_update_direction = false

end

end

# Solver

struct PANOCplus{R, K}
maxit::Int
tol::R
verbose::Bool
freq::Int
kwargs::K
end

function (solver::PANOCplus)(x0; kwargs...)
stop(state::PANOCplusState) = norm(state.res, Inf) / state.gamma <= solver.tol
disp((it, state)) = @printf(
"%5d | %.3e | %.3e | %.3e\n",
it,
state.gamma,
norm(state.res, Inf) / state.gamma,
state.tau,
)
iter = PANOCplusIteration(; x0=x0, solver.kwargs..., kwargs...)
iter = take(halt(iter, stop), solver.maxit)
iter = enumerate(iter)
if solver.verbose
iter = tee(sample(iter, solver.freq), disp)
end
num_iters, state_final = loop(iter)
return state_final.z, num_iters
end

PANOCplus(; maxit=1_000, tol=1e-8, verbose=false, freq=10, kwargs...) =
PANOCplus(maxit, tol, verbose, freq, kwargs)
29 changes: 29 additions & 0 deletions test/problems/test_equivalence.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ using ProximalOperators
using ProximalAlgorithms:
DouglasRachfordIteration, DRLSIteration,
ForwardBackwardIteration, PANOCIteration,
PANOCplusIteration,
NoAcceleration

@testset "DR/DRLS equivalence ($T)" for T in [Float32, Float64]
Expand Down Expand Up @@ -62,3 +63,31 @@ end
@test isapprox(fb_state.z, panoc_state.z)
end
end

@testset "PANOC/PANOCplus equivalence ($T)" for T in [Float32, Float64]
A = T[
1.0 -2.0 3.0 -4.0 5.0
2.0 -1.0 0.0 -1.0 3.0
-1.0 0.0 4.0 -3.0 2.0
-1.0 -1.0 -1.0 1.0 3.0
]
b = T[1.0, 2.0, 3.0, 4.0]

m, n = size(A)

R = real(T)

lam = R(0.1) * norm(A' * b, Inf)

f = LeastSquares(A, b)
g = NormL1(lam)

x0 = zeros(R, n)

panoc_iter = PANOCIteration(f=f, g=g, x0=x0, gamma=R(0.95) / opnorm(A)^2)
panocplus_iter = PANOCplusIteration(f=f, g=g, x0=x0, gamma=R(0.95) / opnorm(A)^2)

for (panoc_state, panocplus_state) in Iterators.take(zip(panoc_iter, panocplus_iter), 10)
@test isapprox(panoc_state.z, panocplus_state.z)
end
end
22 changes: 22 additions & 0 deletions test/problems/test_lasso_small.jl
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,28 @@ using ProximalAlgorithms:
@test x0 == x0_backup
end

@testset "PANOCplus (fixed step)" begin
x0 = zeros(T, n)
x0_backup = copy(x0)
solver = ProximalAlgorithms.PANOCplus(tol = TOL)
x, it = @inferred solver(x0, f = f, A = A, g = g, Lf = Lf)
@test eltype(x) == T
@test norm(x - x_star, Inf) <= TOL
@test it < 20
@test x0 == x0_backup
end

@testset "PANOCplus (adaptive step)" begin
x0 = zeros(T, n)
x0_backup = copy(x0)
solver = ProximalAlgorithms.PANOCplus(adaptive = true, tol = TOL)
x, it = @inferred solver(x0, f = f, A = A, g = g)
@test eltype(x) == T
@test norm(x - x_star, Inf) <= TOL
@test it < 20
@test x0 == x0_backup
end

@testset "DouglasRachford" begin
x0 = zeros(T, n)
x0_backup = copy(x0)
Expand Down
26 changes: 26 additions & 0 deletions test/problems/test_lasso_small_h_split.jl
Original file line number Diff line number Diff line change
Expand Up @@ -142,4 +142,30 @@

end

@testset "PANOCplus" begin

# PANOCplus/Nonadaptive

x0 = ArrayPartition(zeros(T, n1), zeros(T, n2))
x0_backup = copy(x0)
solver = ProximalAlgorithms.PANOCplus(tol = TOL)
x, it = solver(x0, f = f, A = A, g = g, Lf = Lf)
@test eltype(x) == T
@test norm(x - x_star, Inf) <= TOL
@test it < 20
@test x0 == x0_backup

## PANOCplus/Adaptive

x0 = ArrayPartition(zeros(T, n1), zeros(T, n2))
x0_backup = copy(x0)
solver = ProximalAlgorithms.PANOCplus(adaptive = true, tol = TOL)
x, it = solver(x0, f = f, A = A, g = g)
@test eltype(x) == T
@test norm(x - x_star, Inf) <= TOL
@test it < 20
@test x0 == x0_backup

end

end
18 changes: 18 additions & 0 deletions test/problems/test_lasso_small_strongly_convex.jl
Original file line number Diff line number Diff line change
Expand Up @@ -69,4 +69,22 @@ using ProximalAlgorithms
@test x0 == x0_backup
end

@testset "PANOC" begin
solver = ProximalAlgorithms.PANOC(tol = TOL)
y, it = solver(x0, f = f, g = h, Lf = Lf)
@test eltype(y) == T
@test norm(y - x_star, Inf) <= TOL
@test it < 45
@test x0 == x0_backup
end

@testset "PANOCplus" begin
solver = ProximalAlgorithms.PANOCplus(tol = TOL)
y, it = solver(x0, f = f, g = h, Lf = Lf)
@test eltype(y) == T
@test norm(y - x_star, Inf) <= TOL
@test it < 45
@test x0 == x0_backup
end

end
26 changes: 26 additions & 0 deletions test/problems/test_lasso_small_v_split.jl
Original file line number Diff line number Diff line change
Expand Up @@ -135,4 +135,30 @@

end

@testset "PANOCplus" begin

# PANOCplus/Nonadaptive

x0 = zeros(T, n)
x0_backup = copy(x0)
solver = ProximalAlgorithms.PANOCplus(tol = TOL)
x, it = solver(x0, f = f, A = A, g = g, Lf = opnorm([A1; A2])^2)
@test eltype(x) == T
@test norm(x - x_star, Inf) <= TOL
@test it < 20
@test x0 == x0_backup

## PANOCplus/Adaptive

x0 = zeros(T, n)
x0_backup = copy(x0)
solver = ProximalAlgorithms.PANOCplus(adaptive = true, tol = TOL)
x, it = solver(x0, f = f, A = A, g = g)
@test eltype(x) == T
@test norm(x - x_star, Inf) <= TOL
@test it < 20
@test x0 == x0_backup

end

end
Loading

0 comments on commit 41cb172

Please sign in to comment.