From aa218762610fbbac73bf29b9c3390b0d3244ccab Mon Sep 17 00:00:00 2001 From: Alberto De Marchi Date: Mon, 15 Nov 2021 21:10:03 +0100 Subject: [PATCH 1/8] init NOLIP, adjusted tests --- src/ProximalAlgorithms.jl | 1 + src/algorithms/nolip.jl | 236 ++++++++++++++++++++ test/problems/test_lasso_small.jl | 26 +++ test/problems/test_lasso_small_h_split.jl | 26 +++ test/problems/test_lasso_small_v_split.jl | 26 +++ test/problems/test_nonconvex_qp.jl | 20 ++ test/problems/test_sparse_logistic_small.jl | 11 + test/problems/test_verbose.jl | 22 ++ 8 files changed, 368 insertions(+) create mode 100644 src/algorithms/nolip.jl diff --git a/src/ProximalAlgorithms.jl b/src/ProximalAlgorithms.jl index ea42d89..251e9b1 100644 --- a/src/ProximalAlgorithms.jl +++ b/src/ProximalAlgorithms.jl @@ -32,5 +32,6 @@ include("algorithms/primal_dual.jl") include("algorithms/davis_yin.jl") include("algorithms/li_lin.jl") include("algorithms/fista.jl") +include("algorithms/nolip.jl") end # module diff --git a/src/algorithms/nolip.jl b/src/algorithms/nolip.jl new file mode 100644 index 0000000..2424525 --- /dev/null +++ b/src/algorithms/nolip.jl @@ -0,0 +1,236 @@ +# Themelis, De Marchi, "A linesearch splitting algorithm for structured optimization +# without global smoothness" (2021). + +using Base.Iterators +using ProximalAlgorithms.IterationTools +using ProximalOperators: Zero +using LinearAlgebra +using Printf + +""" + NOLIPIteration(; ) + +Instantiate the NOLIP algorithm (see [1]) for solving optimization problems +of the form + + minimize f(Ax) + g(x), + +where `f` is locally smooth and `A` is a linear mapping (for example, a matrix). + +# Arguments +- `x0`: initial point. +- `f=Zero()`: smooth objective term. +- `A=I`: linear operator (e.g. a matrix). +- `g=Zero()`: proximable objective term. +- `Lf=nothing`: Lipschitz constant of the gradient of x ↦ f(Ax). +- `gamma=nothing`: stepsize to use, defaults to `1/Lf` if not set (but `Lf` is). +- `adaptive=false`: forces the method stepsize to be adaptively adjusted. +- `minimum_gamma=1e-7`: lower bound to `gamma` in case `adaptive == true`. +- `max_backtracks=20`: maximum number of line-search backtracks. +- `H=LBFGS(x0, 5)`: variable metric to use to compute line-search directions. + +# References +- [1] Themelis, De Marchi, "A linesearch splitting algorithm for structured +optimization without global smoothness", (2021). +""" + +Base.@kwdef struct NOLIPIteration{R,C<:Union{R,Complex{R}},Tx<:AbstractArray{C},Tf,TA,Tg,TH} + f::Tf = Zero() + A::TA = I + g::Tg = Zero() + x0::Tx + alpha::R = real(eltype(x0))(0.95) + beta::R = real(eltype(x0))(0.5) + Lf::Maybe{R} = nothing + gamma::Maybe{R} = Lf === nothing ? nothing : (alpha / Lf) + adaptive::Bool = false + minimum_gamma::R = real(eltype(x0))(1e-7) + max_backtracks::Int = 20 + H::TH = LBFGS(x0, 5) +end + +Base.IteratorSize(::Type{<:NOLIPIteration}) = Base.IsInfinite() + +Base.@kwdef mutable struct NOLIPState{R,Tx,TAx,TH} + x::Tx # iterate + Ax::TAx # A times x + f_Ax::R # value of smooth term + grad_f_Ax::TAx # gradient of f at Ax + At_grad_f_Ax::Tx # gradient of smooth term + gamma::R # stepsize parameter of forward and backward steps + y::Tx # forward point + z::Tx # forward-backward point + g_z::R # value of nonsmooth term (at z) + res::Tx # fixed-point residual at iterate (= x - z) + H::TH # variable metric + tau::Maybe{R} = nothing + x_prev::Tx = zero(x) + res_prev::Tx = zero(x) + d::Tx = zero(x) + Ad::TAx = zero(Ax) + x_d::Tx = zero(x) + Ax_d::TAx = zero(Ax) + f_Ax_d::R = zero(real(eltype(x))) + grad_f_Ax_d::TAx = zero(Ax) + At_grad_f_Ax_d::Tx = zero(x) + z_curr::Tx = zero(x) +end + +f_model(iter::NOLIPIteration, state::NOLIPState) = + f_model(state.f_Ax, state.At_grad_f_Ax, state.res, iter.alpha / state.gamma) + +function Base.iterate(iter::NOLIPIteration{R}) where {R} + x = copy(iter.x0) + Ax = iter.A * x + grad_f_Ax, f_Ax = gradient(iter.f, Ax) + + gamma = iter.gamma + if gamma === nothing + gamma = iter.alpha / lower_bound_smoothness_constant(iter.f, iter.A, x, grad_f_Ax) + end + + At_grad_f_Ax = iter.A' * grad_f_Ax + y = x - gamma .* At_grad_f_Ax + z, g_z = prox(iter.g, y, gamma) + + state = NOLIPState( + x=x, Ax=Ax, f_Ax=f_Ax, grad_f_Ax=grad_f_Ax, At_grad_f_Ax=At_grad_f_Ax, + gamma=gamma, y=y, z=z, g_z=g_z, res=x - z, H=iter.H, + ) + + return state, state +end + +function Base.iterate( + iter::NOLIPIteration{R}, + state::NOLIPState{R,Tx,TAx}, +) where {R,Tx,TAx} + Az, f_Az, grad_f_Az, At_grad_f_Az = nothing, nothing, nothing, nothing + a, b, c = nothing, nothing, nothing + + f_Az_upp = if (iter.gamma === nothing || iter.adaptive == true) + gamma_prev = state.gamma + state.gamma, state.g_z, Az, f_Az, grad_f_Az, f_Az_upp = backtrack_stepsize!( + state.gamma, iter.f, iter.A, iter.g, + state.x, state.f_Ax, state.At_grad_f_Ax, state.y, state.z, state.g_z, state.res, + alpha = iter.alpha, minimum_gamma = iter.minimum_gamma, + ) + if state.gamma != gamma_prev + reset!(state.H) + end + f_Az_upp + else + f_model(iter, state) + end + + # compute FBE + FBE_x = f_Az_upp + state.g_z + + # compute direction + mul!(state.d, state.H, -state.res) + + # store iterate and residual for metric update later on + state.x_prev .= state.x + state.res_prev .= state.res + + # backtrack tau 1 → 0 + tau = R(1) + mul!(state.Ad, iter.A, state.d) + + state.x_d .= state.x .+ state.d + state.Ax_d .= state.Ax .+ state.Ad + state.f_Ax_d = gradient!(state.grad_f_Ax_d, iter.f, state.Ax_d) + mul!(state.At_grad_f_Ax_d, adjoint(iter.A), state.grad_f_Ax_d) + + state.x .= state.x_d + state.Ax .= state.Ax_d + state.f_Ax = state.f_Ax_d + state.grad_f_Ax .= state.grad_f_Ax_d + state.At_grad_f_Ax .= state.At_grad_f_Ax_d + + copyto!(state.z_curr, state.z) + + sigma = iter.beta * (0.5 / state.gamma) * (1 - iter.alpha) + tol = 10 * eps(R) * (1 + abs(FBE_x)) + threshold = FBE_x - sigma * norm(state.res)^2 + tol + + for _ in 1:iter.max_backtracks + state.y .= state.x .- state.gamma .* state.At_grad_f_Ax + state.g_z = prox!(state.z, iter.g, state.y, state.gamma) + state.res .= state.x .- state.z + FBE_x_new = f_model(iter, state) + state.g_z + + if FBE_x_new <= threshold + # update metric + update!(state.H, state.x - state.x_prev, state.res - state.res_prev) + state.tau = tau + return state, state + end + + if Az === nothing + Az = iter.A * state.z_curr + end + + tau *= 0.5 + state.x .= tau .* state.x_d .+ (1 - tau) .* state.z_curr + state.Ax .= tau .* state.Ax_d .+ (1 - tau) .* Az + + if ProximalOperators.is_quadratic(iter.f) + # in case f is quadratic, we can compute its value and gradient + # along a line using interpolation and linear combinations + # this allows saving operations + if grad_f_Az === nothing + grad_f_Az, f_Az = gradient(iter.f, Az) + end + if At_grad_f_Az === nothing + At_grad_f_Az = iter.A' * grad_f_Az + c = f_Az + b = real(dot(state.Ax_d .- Az, grad_f_Az)) + a = state.f_Ax_d - b - c + end + state.f_Ax = a * tau^2 + b * tau + c + state.grad_f_Ax .= tau .* state.grad_f_Ax_d .+ (1 - tau) .* grad_f_Az + state.At_grad_f_Ax .= tau .* state.At_grad_f_Ax_d .+ (1 - tau) .* At_grad_f_Az + else + # otherwise, in the general case where f is only smooth, we compute + # one gradient and matvec per backtracking step + state.f_Ax = gradient!(state.grad_f_Ax, iter.f, state.Ax) + mul!(state.At_grad_f_Ax, adjoint(iter.A), state.grad_f_Ax) + end + end + + @warn "stepsize `tau` became too small ($(tau)), stopping the iterations" + return nothing +end + +# Solver + +struct NOLIP{R, K} + maxit::Int + tol::R + verbose::Bool + freq::Int + kwargs::K +end + +function (solver::NOLIP)(x0; kwargs...) + stop(state::NOLIPState) = norm(state.res, Inf) / state.gamma <= solver.tol + disp((it, state)) = @printf( + "%5d | %.3e | %.3e | %.3e\n", + it, + state.gamma, + norm(state.res, Inf) / state.gamma, + (state.tau === nothing ? 0.0 : state.tau) + ) + iter = NOLIPIteration(; x0=x0, solver.kwargs..., kwargs...) + iter = take(halt(iter, stop), solver.maxit) + iter = enumerate(iter) + if solver.verbose + iter = tee(sample(iter, solver.freq), disp) + end + num_iters, state_final = loop(iter) + return state_final.z, num_iters +end + +NOLIP(; maxit=1_000, tol=1e-8, verbose=false, freq=10, kwargs...) = + NOLIP(maxit, tol, verbose, freq, kwargs) diff --git a/test/problems/test_lasso_small.jl b/test/problems/test_lasso_small.jl index 0a00219..623b9d8 100644 --- a/test/problems/test_lasso_small.jl +++ b/test/problems/test_lasso_small.jl @@ -130,6 +130,32 @@ using ProximalAlgorithms end + @testset "NOLIP" begin + + # NOLIP/Nonadaptive + + x0 = zeros(T, n) + x0_backup = copy(x0) + solver = ProximalAlgorithms.NOLIP(tol = TOL) + x, it = solver(x0, f = f, A = A, g = g, Lf = Lf) + @test eltype(x) == T + @test norm(x - x_star, Inf) <= TOL + @test it < 20 + @test x0 == x0_backup + + ## NOLIP/Adaptive + + x0 = zeros(T, n) + x0_backup = copy(x0) + solver = ProximalAlgorithms.NOLIP(adaptive = true, tol = TOL) + x, it = solver(x0, f = f, A = A, g = g) + @test eltype(x) == T + @test norm(x - x_star, Inf) <= TOL + @test it < 20 + @test x0 == x0_backup + + end + @testset "DouglasRachford" begin # Douglas-Rachford diff --git a/test/problems/test_lasso_small_h_split.jl b/test/problems/test_lasso_small_h_split.jl index 41a4c8e..44be73d 100644 --- a/test/problems/test_lasso_small_h_split.jl +++ b/test/problems/test_lasso_small_h_split.jl @@ -142,4 +142,30 @@ end + @testset "NOLIP" begin + + # NOLIP/Nonadaptive + + x0 = ArrayPartition(zeros(T, n1), zeros(T, n2)) + x0_backup = copy(x0) + solver = ProximalAlgorithms.NOLIP(tol = TOL) + x, it = solver(x0, f = f, A = A, g = g, Lf = Lf) + @test eltype(x) == T + @test norm(x - x_star, Inf) <= TOL + @test it < 20 + @test x0 == x0_backup + + ## NOLIP/Adaptive + + x0 = ArrayPartition(zeros(T, n1), zeros(T, n2)) + x0_backup = copy(x0) + solver = ProximalAlgorithms.NOLIP(adaptive = true, tol = TOL) + x, it = solver(x0, f = f, A = A, g = g) + @test eltype(x) == T + @test norm(x - x_star, Inf) <= TOL + @test it < 20 + @test x0 == x0_backup + + end + end diff --git a/test/problems/test_lasso_small_v_split.jl b/test/problems/test_lasso_small_v_split.jl index fdf28c9..55c0f2e 100644 --- a/test/problems/test_lasso_small_v_split.jl +++ b/test/problems/test_lasso_small_v_split.jl @@ -135,4 +135,30 @@ end + @testset "NOLIP" begin + + # NOLIP/Nonadaptive + + x0 = zeros(T, n) + x0_backup = copy(x0) + solver = ProximalAlgorithms.NOLIP(tol = TOL) + x, it = solver(x0, f = f, A = A, g = g, Lf = opnorm([A1; A2])^2) + @test eltype(x) == T + @test norm(x - x_star, Inf) <= TOL + @test it < 20 + @test x0 == x0_backup + + ## NOLIP/Adaptive + + x0 = zeros(T, n) + x0_backup = copy(x0) + solver = ProximalAlgorithms.NOLIP(adaptive = true, tol = TOL) + x, it = solver(x0, f = f, A = A, g = g) + @test eltype(x) == T + @test norm(x - x_star, Inf) <= TOL + @test it < 20 + @test x0 == x0_backup + + end + end diff --git a/test/problems/test_nonconvex_qp.jl b/test/problems/test_nonconvex_qp.jl index fdc6aed..4f7085a 100644 --- a/test/problems/test_nonconvex_qp.jl +++ b/test/problems/test_nonconvex_qp.jl @@ -28,6 +28,16 @@ using Test @test x0 == x0_backup end + @testset "NOLIP" begin + x0 = zeros(T, n) + x0_backup = copy(x0) + solver = ProximalAlgorithms.NOLIP() + x, it = solver(x0, f = f, g = g) + z = min.(upp, max.(low, x .- gamma .* (Q * x + q))) + @test norm(x - z, Inf) / gamma <= solver.tol + @test x0 == x0_backup + end + @testset "ZeroFPR" begin x0 = zeros(T, n) x0_backup = copy(x0) @@ -82,6 +92,16 @@ end @test x0 == x0_backup end + @testset "NOLIP" begin + x0 = zeros(T, n) + x0_backup = copy(x0) + solver = ProximalAlgorithms.NOLIP(tol = TOL) + x, it = solver(x0, f = f, g = g) + z = min.(upp, max.(low, x .- gamma .* (Q * x + q))) + @test norm(x - z, Inf) / gamma <= solver.tol + @test x0 == x0_backup + end + @testset "ZeroFPR" begin x0 = zeros(T, n) x0_backup = copy(x0) diff --git a/test/problems/test_sparse_logistic_small.jl b/test/problems/test_sparse_logistic_small.jl index 12531cd..afd2d0d 100644 --- a/test/problems/test_sparse_logistic_small.jl +++ b/test/problems/test_sparse_logistic_small.jl @@ -68,4 +68,15 @@ @test it < 50 @test x0 == x0_backup + # NOLIP/Adaptive + + x0 = zeros(T, n) + x0_backup = copy(x0) + solver = ProximalAlgorithms.NOLIP(adaptive = true, tol = TOL) + x, it = solver(x0, f = f, A = A, g = g) + @test eltype(x) == T + @test norm(x - x_star, Inf) <= 1e-4 + @test it < 50 + @test x0 == x0_backup + end diff --git a/test/problems/test_verbose.jl b/test/problems/test_verbose.jl index abab201..27ffbbd 100644 --- a/test/problems/test_verbose.jl +++ b/test/problems/test_verbose.jl @@ -120,6 +120,28 @@ end + @testset "NOLIP" begin + + # NOLIP/Nonadaptive + + x0 = zeros(T, n) + solver = ProximalAlgorithms.NOLIP(tol = TOL, verbose = true) + x, it = solver(x0, f = f, A = A, g = g, Lf = Lf) + @test eltype(x) == T + @test norm(x - x_star, Inf) <= TOL + @test it < 20 + + ## NOLIP/Adaptive + + x0 = zeros(T, n) + solver = ProximalAlgorithms.NOLIP(adaptive = true, tol = TOL, verbose = true) + x, it = solver(x0, f = f, A = A, g = g) + @test eltype(x) == T + @test norm(x - x_star, Inf) <= TOL + @test it < 20 + + end + @testset "DouglasRachford" begin # Douglas-Rachford From 654674104f527567d26f87c9c49cdb6fcf342add Mon Sep 17 00:00:00 2001 From: Alberto De Marchi Date: Mon, 15 Nov 2021 22:26:55 +0100 Subject: [PATCH 2/8] NOLIP functional --- src/algorithms/nolip.jl | 133 ++++++++++++++++++---------------------- 1 file changed, 59 insertions(+), 74 deletions(-) diff --git a/src/algorithms/nolip.jl b/src/algorithms/nolip.jl index 2424525..dfe3f09 100644 --- a/src/algorithms/nolip.jl +++ b/src/algorithms/nolip.jl @@ -92,10 +92,20 @@ function Base.iterate(iter::NOLIPIteration{R}) where {R} At_grad_f_Ax = iter.A' * grad_f_Ax y = x - gamma .* At_grad_f_Ax z, g_z = prox(iter.g, y, gamma) + res = x - z + + if (iter.gamma === nothing || iter.adaptive == true) + gamma_prev = gamma + gamma, g_z, Az, f_Az, grad_f_Az, f_Az_upp = backtrack_stepsize!( + gamma, iter.f, iter.A, iter.g, + x, f_Ax, At_grad_f_Ax, y, z, g_z, res, + alpha = iter.alpha, minimum_gamma = iter.minimum_gamma, + ) + end state = NOLIPState( x=x, Ax=Ax, f_Ax=f_Ax, grad_f_Ax=grad_f_Ax, At_grad_f_Ax=At_grad_f_Ax, - gamma=gamma, y=y, z=z, g_z=g_z, res=x - z, H=iter.H, + gamma=gamma, y=y, z=z, g_z=g_z, res=res, H=iter.H, ) return state, state @@ -108,99 +118,74 @@ function Base.iterate( Az, f_Az, grad_f_Az, At_grad_f_Az = nothing, nothing, nothing, nothing a, b, c = nothing, nothing, nothing - f_Az_upp = if (iter.gamma === nothing || iter.adaptive == true) - gamma_prev = state.gamma - state.gamma, state.g_z, Az, f_Az, grad_f_Az, f_Az_upp = backtrack_stepsize!( - state.gamma, iter.f, iter.A, iter.g, - state.x, state.f_Ax, state.At_grad_f_Ax, state.y, state.z, state.g_z, state.res, - alpha = iter.alpha, minimum_gamma = iter.minimum_gamma, - ) - if state.gamma != gamma_prev - reset!(state.H) - end - f_Az_upp - else - f_model(iter, state) - end - - # compute FBE - FBE_x = f_Az_upp + state.g_z - - # compute direction - mul!(state.d, state.H, -state.res) - # store iterate and residual for metric update later on state.x_prev .= state.x state.res_prev .= state.res - # backtrack tau 1 → 0 - tau = R(1) - mul!(state.Ad, iter.A, state.d) - - state.x_d .= state.x .+ state.d - state.Ax_d .= state.Ax .+ state.Ad - state.f_Ax_d = gradient!(state.grad_f_Ax_d, iter.f, state.Ax_d) - mul!(state.At_grad_f_Ax_d, adjoint(iter.A), state.grad_f_Ax_d) - - state.x .= state.x_d - state.Ax .= state.Ax_d - state.f_Ax = state.f_Ax_d - state.grad_f_Ax .= state.grad_f_Ax_d - state.At_grad_f_Ax .= state.At_grad_f_Ax_d - - copyto!(state.z_curr, state.z) + # compute FBE + FBE_x = f_model(iter, state) + state.g_z sigma = iter.beta * (0.5 / state.gamma) * (1 - iter.alpha) tol = 10 * eps(R) * (1 + abs(FBE_x)) threshold = FBE_x - sigma * norm(state.res)^2 + tol - for _ in 1:iter.max_backtracks + tau_backtracks = 0 + can_update_direction = true + + while true + + if can_update_direction + # compute direction + mul!(state.d, state.H, -state.res) + # backtrack tau 1 → 0 + state.tau = R(1) + state.x .= state.x_prev .+ state.d + else + state.x .= (1 - state.tau) * (state.x_prev .- state.res_prev) + state.tau * (state.x_prev .+ state.d) + end + + mul!(state.Ax, iter.A, state.x) + state.f_Ax = gradient!(state.grad_f_Ax, iter.f, state.Ax) + mul!(state.At_grad_f_Ax, adjoint(iter.A), state.grad_f_Ax) + state.y .= state.x .- state.gamma .* state.At_grad_f_Ax state.g_z = prox!(state.z, iter.g, state.y, state.gamma) state.res .= state.x .- state.z - FBE_x_new = f_model(iter, state) + state.g_z - if FBE_x_new <= threshold + f_Az_upp = f_model(iter, state) + + if (iter.gamma === nothing || iter.adaptive == true) + Az = iter.A * state.z + grad_f_Az, f_Az = gradient(iter.f, Az) + tol = 10 * eps(R) * (1 + abs(f_Az)) + if f_Az > f_Az_upp + tol && state.gamma >= iter.minimum_gamma + state.gamma /= 2 + if state.gamma < iter.minimum_gamma + @warn "stepsize `gamma` became too small ($(state.gamma))" + end + can_update_direction = true + reset!(state.H) + continue + end + end + + FBE_x = f_Az_upp + state.g_z + if FBE_x <= threshold # update metric update!(state.H, state.x - state.x_prev, state.res - state.res_prev) - state.tau = tau return state, state - end - - if Az === nothing - Az = iter.A * state.z_curr - end - - tau *= 0.5 - state.x .= tau .* state.x_d .+ (1 - tau) .* state.z_curr - state.Ax .= tau .* state.Ax_d .+ (1 - tau) .* Az - - if ProximalOperators.is_quadratic(iter.f) - # in case f is quadratic, we can compute its value and gradient - # along a line using interpolation and linear combinations - # this allows saving operations - if grad_f_Az === nothing - grad_f_Az, f_Az = gradient(iter.f, Az) - end - if At_grad_f_Az === nothing - At_grad_f_Az = iter.A' * grad_f_Az - c = f_Az - b = real(dot(state.Ax_d .- Az, grad_f_Az)) - a = state.f_Ax_d - b - c - end - state.f_Ax = a * tau^2 + b * tau + c - state.grad_f_Ax .= tau .* state.grad_f_Ax_d .+ (1 - tau) .* grad_f_Az - state.At_grad_f_Ax .= tau .* state.At_grad_f_Ax_d .+ (1 - tau) .* At_grad_f_Az else - # otherwise, in the general case where f is only smooth, we compute - # one gradient and matvec per backtracking step - state.f_Ax = gradient!(state.grad_f_Ax, iter.f, state.Ax) - mul!(state.At_grad_f_Ax, adjoint(iter.A), state.grad_f_Ax) + state.tau *= 0.5 + if tau_backtracks >= iter.max_backtracks + @warn "parameter `tau` became too small ($(state.tau)), stopping the iterations" + return nothing + end + tau_backtracks += 1 + can_update_direction = false end + end - @warn "stepsize `tau` became too small ($(tau)), stopping the iterations" - return nothing end # Solver From 4d5ce5128c8b6e2e1a2f3981bea57e20f9c2afe6 Mon Sep 17 00:00:00 2001 From: Alberto De Marchi Date: Thu, 30 Dec 2021 08:57:57 +0100 Subject: [PATCH 3/8] updated some tests and readme --- README.md | 3 +++ test/problems/test_lasso_small.jl | 14 +++++--------- test/problems/test_lasso_small_strongly_convex.jl | 9 +++++++++ 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 265c918..2f25113 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,7 @@ Vũ-Condat primal-dual algorithm[^chambolle_2011][^vu_2013][^condat_2013] | [`Vu Davis-Yin splitting[^davis_2017] | [`DavisYin`](src/algorithms/davis_yin.jl) Asymmetric forward-backward-adjoint splitting[^latafat_2017] | [`AFBA`](src/algorithms/primal_dual.jl) PANOC (L-BFGS)[^stella_2017] | [`PANOC`](src/algorithms/panoc.jl) +PANOC+ (L-BFGS)[^demarchi_2021] | [`NOLIP`](src/algorithms/nolip.jl) ZeroFPR (L-BFGS)[^themelis_2018] | [`ZeroFPR`](src/algorithms/zerofpr.jl) Douglas-Rachford line-search (L-BFGS)[^themelis_2020] | [`DRLS`](src/algorithms/drls.jl) @@ -65,3 +66,5 @@ Contributions are welcome in the form of [issues notification](https://github.co [^themelis_2018]: Themelis, Stella, Patrinos, *Forward-backward envelope for the sum of two nonconvex functions: Further properties and nonmonotone line-search algorithms*, SIAM Journal on Optimization, vol. 28, no. 3, pp. 2274–2303 (2018). [link](https://epubs.siam.org/doi/10.1137/16M1080240) [^themelis_2020]: Themelis, Stella, Patrinos, *Douglas-Rachford splitting and ADMM for nonconvex optimization: Accelerated and Newton-type algorithms*, arXiv preprint (2020). [link](https://arxiv.org/abs/2005.10230) + +[^demarchi_2021]: De Marchi, Themelis, *Proximal gradient algorithms under local Lipschitz gradient continuity: a convergence and robustness analysis of PANOC*, arXiv preprint (2021). [link](https://arxiv.org/abs/2112.13000) diff --git a/test/problems/test_lasso_small.jl b/test/problems/test_lasso_small.jl index dedc2ef..c8a9aa1 100644 --- a/test/problems/test_lasso_small.jl +++ b/test/problems/test_lasso_small.jl @@ -124,30 +124,26 @@ using ProximalAlgorithms: @test x0 == x0_backup end - @testset "NOLIP" begin - - # NOLIP/Nonadaptive - + @testset "NOLIP (fixed step)" begin x0 = zeros(T, n) x0_backup = copy(x0) solver = ProximalAlgorithms.NOLIP(tol = TOL) - x, it = solver(x0, f = f, A = A, g = g, Lf = Lf) + x, it = @inferred solver(x0, f = f, A = A, g = g, Lf = Lf) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL @test it < 20 @test x0 == x0_backup + end - ## NOLIP/Adaptive - + @testset "NOLIP (adaptive step)" begin x0 = zeros(T, n) x0_backup = copy(x0) solver = ProximalAlgorithms.NOLIP(adaptive = true, tol = TOL) - x, it = solver(x0, f = f, A = A, g = g) + x, it = @inferred solver(x0, f = f, A = A, g = g) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL @test it < 20 @test x0 == x0_backup - end @testset "DouglasRachford" begin diff --git a/test/problems/test_lasso_small_strongly_convex.jl b/test/problems/test_lasso_small_strongly_convex.jl index 1962fa3..9b4c2c8 100644 --- a/test/problems/test_lasso_small_strongly_convex.jl +++ b/test/problems/test_lasso_small_strongly_convex.jl @@ -69,4 +69,13 @@ using ProximalAlgorithms @test x0 == x0_backup end + @testset "NOLIP" begin + solver = ProximalAlgorithms.NOLIP(tol = TOL) + y, it = solver(x0, f = f, g = h, Lf = Lf) + @test eltype(y) == T + @test norm(y - x_star, Inf) <= TOL + @test it < 45 + @test x0 == x0_backup + end + end From e1b8f05fe2c9c4a8711cd6aa3cd980c11a74d7f8 Mon Sep 17 00:00:00 2001 From: Alberto De Marchi Date: Thu, 30 Dec 2021 09:27:50 +0100 Subject: [PATCH 4/8] minor fix NOLIP --- src/algorithms/nolip.jl | 128 +++++++++--------- .../test_lasso_small_strongly_convex.jl | 9 ++ 2 files changed, 74 insertions(+), 63 deletions(-) diff --git a/src/algorithms/nolip.jl b/src/algorithms/nolip.jl index dfe3f09..4b21f88 100644 --- a/src/algorithms/nolip.jl +++ b/src/algorithms/nolip.jl @@ -1,5 +1,6 @@ -# Themelis, De Marchi, "A linesearch splitting algorithm for structured optimization -# without global smoothness" (2021). +# De Marchi, Themelis, "Proximal gradient algorithms under local Lipschitz +# gradient continuity: a convergence and robustness analysis of PANOC", +# preprint on arXiv:2112.13000 (2021). using Base.Iterators using ProximalAlgorithms.IterationTools @@ -27,26 +28,27 @@ where `f` is locally smooth and `A` is a linear mapping (for example, a matrix). - `adaptive=false`: forces the method stepsize to be adaptively adjusted. - `minimum_gamma=1e-7`: lower bound to `gamma` in case `adaptive == true`. - `max_backtracks=20`: maximum number of line-search backtracks. -- `H=LBFGS(x0, 5)`: variable metric to use to compute line-search directions. +- `directions=LBFGS(5)`: strategy to use to compute line-search directions. # References -- [1] Themelis, De Marchi, "A linesearch splitting algorithm for structured -optimization without global smoothness", (2021). +- [1] De Marchi, Themelis, "Proximal gradient algorithms under local Lipschitz +gradient continuity: a convergence and robustness analysis of PANOC", +preprint on arXiv:2112.13000 (2021). """ -Base.@kwdef struct NOLIPIteration{R,C<:Union{R,Complex{R}},Tx<:AbstractArray{C},Tf,TA,Tg,TH} +Base.@kwdef struct NOLIPIteration{R,Tx,Tf,TA,Tg,TLf,Tgamma,D} f::Tf = Zero() A::TA = I g::Tg = Zero() x0::Tx alpha::R = real(eltype(x0))(0.95) beta::R = real(eltype(x0))(0.5) - Lf::Maybe{R} = nothing - gamma::Maybe{R} = Lf === nothing ? nothing : (alpha / Lf) - adaptive::Bool = false + Lf::TLf = nothing + gamma::Tgamma = Lf === nothing ? nothing : (alpha / Lf) + adaptive::Bool = gamma === nothing minimum_gamma::R = real(eltype(x0))(1e-7) max_backtracks::Int = 20 - H::TH = LBFGS(x0, 5) + directions::D = LBFGS(5) end Base.IteratorSize(::Type{<:NOLIPIteration}) = Base.IsInfinite() @@ -63,60 +65,61 @@ Base.@kwdef mutable struct NOLIPState{R,Tx,TAx,TH} g_z::R # value of nonsmooth term (at z) res::Tx # fixed-point residual at iterate (= x - z) H::TH # variable metric - tau::Maybe{R} = nothing - x_prev::Tx = zero(x) - res_prev::Tx = zero(x) - d::Tx = zero(x) - Ad::TAx = zero(Ax) - x_d::Tx = zero(x) - Ax_d::TAx = zero(Ax) + tau::R = zero(gamma) + x_prev::Tx = similar(x) + res_prev::Tx = similar(x) + d::Tx = similar(x) + Ad::TAx = similar(Ax) + x_d::Tx = similar(x) + Ax_d::TAx = similar(Ax) f_Ax_d::R = zero(real(eltype(x))) - grad_f_Ax_d::TAx = zero(Ax) - At_grad_f_Ax_d::Tx = zero(x) - z_curr::Tx = zero(x) + grad_f_Ax_d::TAx = similar(Ax) + At_grad_f_Ax_d::Tx = similar(x) + z_curr::Tx = similar(x) + Az::TAx = similar(Ax) + grad_f_Az::TAx = similar(Ax) + At_grad_f_Az::Tx = similar(x) end -f_model(iter::NOLIPIteration, state::NOLIPState) = - f_model(state.f_Ax, state.At_grad_f_Ax, state.res, iter.alpha / state.gamma) +f_model(iter::NOLIPIteration, state::NOLIPState) = f_model(state.f_Ax, state.At_grad_f_Ax, state.res, iter.alpha / state.gamma) function Base.iterate(iter::NOLIPIteration{R}) where {R} x = copy(iter.x0) Ax = iter.A * x grad_f_Ax, f_Ax = gradient(iter.f, Ax) - - gamma = iter.gamma - if gamma === nothing - gamma = iter.alpha / lower_bound_smoothness_constant(iter.f, iter.A, x, grad_f_Ax) - end - + gamma = iter.gamma === nothing ? iter.alpha / lower_bound_smoothness_constant(iter.f, iter.A, x, grad_f_Ax) : iter.gamma At_grad_f_Ax = iter.A' * grad_f_Ax y = x - gamma .* At_grad_f_Ax z, g_z = prox(iter.g, y, gamma) - res = x - z - + state = NOLIPState( + x=x, Ax=Ax, f_Ax=f_Ax, grad_f_Ax=grad_f_Ax, At_grad_f_Ax=At_grad_f_Ax, + gamma=gamma, y=y, z=z, g_z=g_z, res=x-z, H=initialize(iter.directions, x), + ) if (iter.gamma === nothing || iter.adaptive == true) - gamma_prev = gamma - gamma, g_z, Az, f_Az, grad_f_Az, f_Az_upp = backtrack_stepsize!( - gamma, iter.f, iter.A, iter.g, - x, f_Ax, At_grad_f_Ax, y, z, g_z, res, + state.gamma, state.g_z, f_Az, f_Az_upp = backtrack_stepsize!( + state.gamma, iter.f, iter.A, iter.g, + state.x, state.f_Ax, state.At_grad_f_Ax, state.y, state.z, state.g_z, state.res, + state.Az, state.grad_f_Az, alpha = iter.alpha, minimum_gamma = iter.minimum_gamma, ) end - - state = NOLIPState( - x=x, Ax=Ax, f_Ax=f_Ax, grad_f_Ax=grad_f_Ax, At_grad_f_Ax=At_grad_f_Ax, - gamma=gamma, y=y, z=z, g_z=g_z, res=res, H=iter.H, - ) - return state, state end -function Base.iterate( - iter::NOLIPIteration{R}, - state::NOLIPState{R,Tx,TAx}, -) where {R,Tx,TAx} - Az, f_Az, grad_f_Az, At_grad_f_Az = nothing, nothing, nothing, nothing - a, b, c = nothing, nothing, nothing +set_next_direction!(::QuasiNewtonStyle, ::NOLIPIteration, state::NOLIPState) = mul!(state.d, state.H, -state.res) +set_next_direction!(::NoAccelerationStyle, ::NOLIPIteration, state::NOLIPState) = state.d .= .-state.res +set_next_direction!(iter::NOLIPIteration, state::NOLIPState) = set_next_direction!(acceleration_style(typeof(iter.directions)), iter, state) + +update_direction_state!(::QuasiNewtonStyle, ::NOLIPIteration, state::NOLIPState) = update!(state.H, state.x - state.x_prev, state.res - state.res_prev) +update_direction_state!(::NoAccelerationStyle, ::NOLIPIteration, state::NOLIPState) = return +update_direction_state!(iter::NOLIPIteration, state::NOLIPState) = update_direction_state!(acceleration_style(typeof(iter.directions)), iter, state) + +reset_direction_state!(::QuasiNewtonStyle, ::NOLIPIteration, state::NOLIPState) = reset!(state.H) +reset_direction_state!(::NoAccelerationStyle, ::NOLIPIteration, state::NOLIPState) = return +reset_direction_state!(iter::NOLIPIteration, state::NOLIPState) = reset_direction_state!(acceleration_style(typeof(iter.directions)), iter, state) + +function Base.iterate(iter::NOLIPIteration{R}, state::NOLIPState) where R + f_Az, a, b, c = R(Inf), R(Inf), R(Inf), R(Inf) # store iterate and residual for metric update later on state.x_prev .= state.x @@ -136,7 +139,7 @@ function Base.iterate( if can_update_direction # compute direction - mul!(state.d, state.H, -state.res) + set_next_direction!(iter, state) # backtrack tau 1 → 0 state.tau = R(1) state.x .= state.x_prev .+ state.d @@ -155,34 +158,33 @@ function Base.iterate( f_Az_upp = f_model(iter, state) if (iter.gamma === nothing || iter.adaptive == true) - Az = iter.A * state.z - grad_f_Az, f_Az = gradient(iter.f, Az) + mul!(state.Az, iter.A, state.z) + f_Az = gradient!(state.grad_f_Az, iter.f, state.Az) tol = 10 * eps(R) * (1 + abs(f_Az)) if f_Az > f_Az_upp + tol && state.gamma >= iter.minimum_gamma - state.gamma /= 2 + state.gamma *= 0.5 if state.gamma < iter.minimum_gamma @warn "stepsize `gamma` became too small ($(state.gamma))" end can_update_direction = true - reset!(state.H) + reset_direction_state!(iter, state) continue end end - FBE_x = f_Az_upp + state.g_z - if FBE_x <= threshold + FBE_x_new = f_Az_upp + state.g_z + if FBE_x_new <= threshold # update metric - update!(state.H, state.x - state.x_prev, state.res - state.res_prev) + update_direction_state!(iter, state) return state, state - else - state.tau *= 0.5 - if tau_backtracks >= iter.max_backtracks - @warn "parameter `tau` became too small ($(state.tau)), stopping the iterations" - return nothing - end - tau_backtracks += 1 - can_update_direction = false end + state.tau *= 0.5 + if tau_backtracks > iter.max_backtracks + @warn "stepsize `tau` became too small ($(state.tau)), stopping the iterations" + return nothing + end + tau_backtracks += 1 + can_update_direction = false end @@ -205,7 +207,7 @@ function (solver::NOLIP)(x0; kwargs...) it, state.gamma, norm(state.res, Inf) / state.gamma, - (state.tau === nothing ? 0.0 : state.tau) + state.tau, ) iter = NOLIPIteration(; x0=x0, solver.kwargs..., kwargs...) iter = take(halt(iter, stop), solver.maxit) diff --git a/test/problems/test_lasso_small_strongly_convex.jl b/test/problems/test_lasso_small_strongly_convex.jl index 9b4c2c8..c1a3700 100644 --- a/test/problems/test_lasso_small_strongly_convex.jl +++ b/test/problems/test_lasso_small_strongly_convex.jl @@ -69,6 +69,15 @@ using ProximalAlgorithms @test x0 == x0_backup end + @testset "PANOC" begin + solver = ProximalAlgorithms.PANOC(tol = TOL) + y, it = solver(x0, f = f, g = h, Lf = Lf) + @test eltype(y) == T + @test norm(y - x_star, Inf) <= TOL + @test it < 45 + @test x0 == x0_backup + end + @testset "NOLIP" begin solver = ProximalAlgorithms.NOLIP(tol = TOL) y, it = solver(x0, f = f, g = h, Lf = Lf) From 2ea872cbf05f267034e317844c08269ee9cef0d6 Mon Sep 17 00:00:00 2001 From: Alberto De Marchi Date: Wed, 5 Jan 2022 10:20:41 +0100 Subject: [PATCH 5/8] minor changes comments/warnings --- src/algorithms/nolip.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/algorithms/nolip.jl b/src/algorithms/nolip.jl index 4b21f88..58cfb0c 100644 --- a/src/algorithms/nolip.jl +++ b/src/algorithms/nolip.jl @@ -1,6 +1,6 @@ # De Marchi, Themelis, "Proximal gradient algorithms under local Lipschitz # gradient continuity: a convergence and robustness analysis of PANOC", -# preprint on arXiv:2112.13000 (2021). +# arXiv:2112.13000 (2021). using Base.Iterators using ProximalAlgorithms.IterationTools @@ -33,7 +33,7 @@ where `f` is locally smooth and `A` is a linear mapping (for example, a matrix). # References - [1] De Marchi, Themelis, "Proximal gradient algorithms under local Lipschitz gradient continuity: a convergence and robustness analysis of PANOC", -preprint on arXiv:2112.13000 (2021). +arXiv:2112.13000 (2021). """ Base.@kwdef struct NOLIPIteration{R,Tx,Tf,TA,Tg,TLf,Tgamma,D} @@ -180,7 +180,7 @@ function Base.iterate(iter::NOLIPIteration{R}, state::NOLIPState) where R end state.tau *= 0.5 if tau_backtracks > iter.max_backtracks - @warn "stepsize `tau` became too small ($(state.tau)), stopping the iterations" + @warn "stepsize `tau` became too small ($(state.tau))" return nothing end tau_backtracks += 1 From 66d449cfe14a6e7f722a8822d819a36612147111 Mon Sep 17 00:00:00 2001 From: Alberto De Marchi Date: Wed, 5 Jan 2022 16:19:38 +0100 Subject: [PATCH 6/8] added equivalence test between PANOC and NOLIP --- test/problems/test_equivalence.jl | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/test/problems/test_equivalence.jl b/test/problems/test_equivalence.jl index 318095e..aa8a6ed 100644 --- a/test/problems/test_equivalence.jl +++ b/test/problems/test_equivalence.jl @@ -5,6 +5,7 @@ using ProximalOperators using ProximalAlgorithms: DouglasRachfordIteration, DRLSIteration, ForwardBackwardIteration, PANOCIteration, + NOLIPIteration, NoAcceleration @testset "DR/DRLS equivalence ($T)" for T in [Float32, Float64] @@ -62,3 +63,31 @@ end @test isapprox(fb_state.z, panoc_state.z) end end + +@testset "PANOC/NOLIP equivalence ($T)" for T in [Float32, Float64] + A = T[ + 1.0 -2.0 3.0 -4.0 5.0 + 2.0 -1.0 0.0 -1.0 3.0 + -1.0 0.0 4.0 -3.0 2.0 + -1.0 -1.0 -1.0 1.0 3.0 + ] + b = T[1.0, 2.0, 3.0, 4.0] + + m, n = size(A) + + R = real(T) + + lam = R(0.1) * norm(A' * b, Inf) + + f = LeastSquares(A, b) + g = NormL1(lam) + + x0 = zeros(R, n) + + panoc_iter = PANOCIteration(f=f, g=g, x0=x0, gamma=R(0.95) / opnorm(A)^2) + nolip_iter = NOLIPIteration(f=f, g=g, x0=x0, gamma=R(0.95) / opnorm(A)^2) + + for (panoc_state, nolip_state) in Iterators.take(zip(panoc_iter, nolip_iter), 10) + @test isapprox(panoc_state.z, nolip_state.z) + end +end From 4cb836b26ebe1dc9452f957524926101a17ea902 Mon Sep 17 00:00:00 2001 From: Alberto De Marchi Date: Wed, 5 Jan 2022 17:17:28 +0100 Subject: [PATCH 7/8] changed name NOLIP to PANOCplus --- README.md | 2 +- src/algorithms/nolip.jl | 48 +++++++++---------- test/problems/test_equivalence.jl | 10 ++-- test/problems/test_lasso_small.jl | 8 ++-- test/problems/test_lasso_small_h_split.jl | 10 ++-- .../test_lasso_small_strongly_convex.jl | 4 +- test/problems/test_lasso_small_v_split.jl | 10 ++-- test/problems/test_nonconvex_qp.jl | 8 ++-- test/problems/test_sparse_logistic_small.jl | 4 +- test/problems/test_verbose.jl | 10 ++-- 10 files changed, 57 insertions(+), 57 deletions(-) diff --git a/README.md b/README.md index 2f25113..581cd46 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ Vũ-Condat primal-dual algorithm[^chambolle_2011][^vu_2013][^condat_2013] | [`Vu Davis-Yin splitting[^davis_2017] | [`DavisYin`](src/algorithms/davis_yin.jl) Asymmetric forward-backward-adjoint splitting[^latafat_2017] | [`AFBA`](src/algorithms/primal_dual.jl) PANOC (L-BFGS)[^stella_2017] | [`PANOC`](src/algorithms/panoc.jl) -PANOC+ (L-BFGS)[^demarchi_2021] | [`NOLIP`](src/algorithms/nolip.jl) +PANOC+ (L-BFGS)[^demarchi_2021] | [`PANOCplus`](src/algorithms/nolip.jl) ZeroFPR (L-BFGS)[^themelis_2018] | [`ZeroFPR`](src/algorithms/zerofpr.jl) Douglas-Rachford line-search (L-BFGS)[^themelis_2020] | [`DRLS`](src/algorithms/drls.jl) diff --git a/src/algorithms/nolip.jl b/src/algorithms/nolip.jl index 58cfb0c..b1625f5 100644 --- a/src/algorithms/nolip.jl +++ b/src/algorithms/nolip.jl @@ -9,9 +9,9 @@ using LinearAlgebra using Printf """ - NOLIPIteration(; ) + PANOCplusIteration(; ) -Instantiate the NOLIP algorithm (see [1]) for solving optimization problems +Instantiate the PANOCplus algorithm (see [1]) for solving optimization problems of the form minimize f(Ax) + g(x), @@ -36,7 +36,7 @@ gradient continuity: a convergence and robustness analysis of PANOC", arXiv:2112.13000 (2021). """ -Base.@kwdef struct NOLIPIteration{R,Tx,Tf,TA,Tg,TLf,Tgamma,D} +Base.@kwdef struct PANOCplusIteration{R,Tx,Tf,TA,Tg,TLf,Tgamma,D} f::Tf = Zero() A::TA = I g::Tg = Zero() @@ -51,9 +51,9 @@ Base.@kwdef struct NOLIPIteration{R,Tx,Tf,TA,Tg,TLf,Tgamma,D} directions::D = LBFGS(5) end -Base.IteratorSize(::Type{<:NOLIPIteration}) = Base.IsInfinite() +Base.IteratorSize(::Type{<:PANOCplusIteration}) = Base.IsInfinite() -Base.@kwdef mutable struct NOLIPState{R,Tx,TAx,TH} +Base.@kwdef mutable struct PANOCplusState{R,Tx,TAx,TH} x::Tx # iterate Ax::TAx # A times x f_Ax::R # value of smooth term @@ -81,9 +81,9 @@ Base.@kwdef mutable struct NOLIPState{R,Tx,TAx,TH} At_grad_f_Az::Tx = similar(x) end -f_model(iter::NOLIPIteration, state::NOLIPState) = f_model(state.f_Ax, state.At_grad_f_Ax, state.res, iter.alpha / state.gamma) +f_model(iter::PANOCplusIteration, state::PANOCplusState) = f_model(state.f_Ax, state.At_grad_f_Ax, state.res, iter.alpha / state.gamma) -function Base.iterate(iter::NOLIPIteration{R}) where {R} +function Base.iterate(iter::PANOCplusIteration{R}) where {R} x = copy(iter.x0) Ax = iter.A * x grad_f_Ax, f_Ax = gradient(iter.f, Ax) @@ -91,7 +91,7 @@ function Base.iterate(iter::NOLIPIteration{R}) where {R} At_grad_f_Ax = iter.A' * grad_f_Ax y = x - gamma .* At_grad_f_Ax z, g_z = prox(iter.g, y, gamma) - state = NOLIPState( + state = PANOCplusState( x=x, Ax=Ax, f_Ax=f_Ax, grad_f_Ax=grad_f_Ax, At_grad_f_Ax=At_grad_f_Ax, gamma=gamma, y=y, z=z, g_z=g_z, res=x-z, H=initialize(iter.directions, x), ) @@ -106,19 +106,19 @@ function Base.iterate(iter::NOLIPIteration{R}) where {R} return state, state end -set_next_direction!(::QuasiNewtonStyle, ::NOLIPIteration, state::NOLIPState) = mul!(state.d, state.H, -state.res) -set_next_direction!(::NoAccelerationStyle, ::NOLIPIteration, state::NOLIPState) = state.d .= .-state.res -set_next_direction!(iter::NOLIPIteration, state::NOLIPState) = set_next_direction!(acceleration_style(typeof(iter.directions)), iter, state) +set_next_direction!(::QuasiNewtonStyle, ::PANOCplusIteration, state::PANOCplusState) = mul!(state.d, state.H, -state.res) +set_next_direction!(::NoAccelerationStyle, ::PANOCplusIteration, state::PANOCplusState) = state.d .= .-state.res +set_next_direction!(iter::PANOCplusIteration, state::PANOCplusState) = set_next_direction!(acceleration_style(typeof(iter.directions)), iter, state) -update_direction_state!(::QuasiNewtonStyle, ::NOLIPIteration, state::NOLIPState) = update!(state.H, state.x - state.x_prev, state.res - state.res_prev) -update_direction_state!(::NoAccelerationStyle, ::NOLIPIteration, state::NOLIPState) = return -update_direction_state!(iter::NOLIPIteration, state::NOLIPState) = update_direction_state!(acceleration_style(typeof(iter.directions)), iter, state) +update_direction_state!(::QuasiNewtonStyle, ::PANOCplusIteration, state::PANOCplusState) = update!(state.H, state.x - state.x_prev, state.res - state.res_prev) +update_direction_state!(::NoAccelerationStyle, ::PANOCplusIteration, state::PANOCplusState) = return +update_direction_state!(iter::PANOCplusIteration, state::PANOCplusState) = update_direction_state!(acceleration_style(typeof(iter.directions)), iter, state) -reset_direction_state!(::QuasiNewtonStyle, ::NOLIPIteration, state::NOLIPState) = reset!(state.H) -reset_direction_state!(::NoAccelerationStyle, ::NOLIPIteration, state::NOLIPState) = return -reset_direction_state!(iter::NOLIPIteration, state::NOLIPState) = reset_direction_state!(acceleration_style(typeof(iter.directions)), iter, state) +reset_direction_state!(::QuasiNewtonStyle, ::PANOCplusIteration, state::PANOCplusState) = reset!(state.H) +reset_direction_state!(::NoAccelerationStyle, ::PANOCplusIteration, state::PANOCplusState) = return +reset_direction_state!(iter::PANOCplusIteration, state::PANOCplusState) = reset_direction_state!(acceleration_style(typeof(iter.directions)), iter, state) -function Base.iterate(iter::NOLIPIteration{R}, state::NOLIPState) where R +function Base.iterate(iter::PANOCplusIteration{R}, state::PANOCplusState) where R f_Az, a, b, c = R(Inf), R(Inf), R(Inf), R(Inf) # store iterate and residual for metric update later on @@ -192,7 +192,7 @@ end # Solver -struct NOLIP{R, K} +struct PANOCplus{R, K} maxit::Int tol::R verbose::Bool @@ -200,8 +200,8 @@ struct NOLIP{R, K} kwargs::K end -function (solver::NOLIP)(x0; kwargs...) - stop(state::NOLIPState) = norm(state.res, Inf) / state.gamma <= solver.tol +function (solver::PANOCplus)(x0; kwargs...) + stop(state::PANOCplusState) = norm(state.res, Inf) / state.gamma <= solver.tol disp((it, state)) = @printf( "%5d | %.3e | %.3e | %.3e\n", it, @@ -209,7 +209,7 @@ function (solver::NOLIP)(x0; kwargs...) norm(state.res, Inf) / state.gamma, state.tau, ) - iter = NOLIPIteration(; x0=x0, solver.kwargs..., kwargs...) + iter = PANOCplusIteration(; x0=x0, solver.kwargs..., kwargs...) iter = take(halt(iter, stop), solver.maxit) iter = enumerate(iter) if solver.verbose @@ -219,5 +219,5 @@ function (solver::NOLIP)(x0; kwargs...) return state_final.z, num_iters end -NOLIP(; maxit=1_000, tol=1e-8, verbose=false, freq=10, kwargs...) = - NOLIP(maxit, tol, verbose, freq, kwargs) +PANOCplus(; maxit=1_000, tol=1e-8, verbose=false, freq=10, kwargs...) = + PANOCplus(maxit, tol, verbose, freq, kwargs) diff --git a/test/problems/test_equivalence.jl b/test/problems/test_equivalence.jl index aa8a6ed..004d618 100644 --- a/test/problems/test_equivalence.jl +++ b/test/problems/test_equivalence.jl @@ -5,7 +5,7 @@ using ProximalOperators using ProximalAlgorithms: DouglasRachfordIteration, DRLSIteration, ForwardBackwardIteration, PANOCIteration, - NOLIPIteration, + PANOCplusIteration, NoAcceleration @testset "DR/DRLS equivalence ($T)" for T in [Float32, Float64] @@ -64,7 +64,7 @@ end end end -@testset "PANOC/NOLIP equivalence ($T)" for T in [Float32, Float64] +@testset "PANOC/PANOCplus equivalence ($T)" for T in [Float32, Float64] A = T[ 1.0 -2.0 3.0 -4.0 5.0 2.0 -1.0 0.0 -1.0 3.0 @@ -85,9 +85,9 @@ end x0 = zeros(R, n) panoc_iter = PANOCIteration(f=f, g=g, x0=x0, gamma=R(0.95) / opnorm(A)^2) - nolip_iter = NOLIPIteration(f=f, g=g, x0=x0, gamma=R(0.95) / opnorm(A)^2) + panocplus_iter = PANOCplusIteration(f=f, g=g, x0=x0, gamma=R(0.95) / opnorm(A)^2) - for (panoc_state, nolip_state) in Iterators.take(zip(panoc_iter, nolip_iter), 10) - @test isapprox(panoc_state.z, nolip_state.z) + for (panoc_state, panocplus_state) in Iterators.take(zip(panoc_iter, panocplus_iter), 10) + @test isapprox(panoc_state.z, panocplus_state.z) end end diff --git a/test/problems/test_lasso_small.jl b/test/problems/test_lasso_small.jl index c8a9aa1..311094a 100644 --- a/test/problems/test_lasso_small.jl +++ b/test/problems/test_lasso_small.jl @@ -124,10 +124,10 @@ using ProximalAlgorithms: @test x0 == x0_backup end - @testset "NOLIP (fixed step)" begin + @testset "PANOCplus (fixed step)" begin x0 = zeros(T, n) x0_backup = copy(x0) - solver = ProximalAlgorithms.NOLIP(tol = TOL) + solver = ProximalAlgorithms.PANOCplus(tol = TOL) x, it = @inferred solver(x0, f = f, A = A, g = g, Lf = Lf) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL @@ -135,10 +135,10 @@ using ProximalAlgorithms: @test x0 == x0_backup end - @testset "NOLIP (adaptive step)" begin + @testset "PANOCplus (adaptive step)" begin x0 = zeros(T, n) x0_backup = copy(x0) - solver = ProximalAlgorithms.NOLIP(adaptive = true, tol = TOL) + solver = ProximalAlgorithms.PANOCplus(adaptive = true, tol = TOL) x, it = @inferred solver(x0, f = f, A = A, g = g) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL diff --git a/test/problems/test_lasso_small_h_split.jl b/test/problems/test_lasso_small_h_split.jl index 44be73d..8fc179a 100644 --- a/test/problems/test_lasso_small_h_split.jl +++ b/test/problems/test_lasso_small_h_split.jl @@ -142,24 +142,24 @@ end - @testset "NOLIP" begin + @testset "PANOCplus" begin - # NOLIP/Nonadaptive + # PANOCplus/Nonadaptive x0 = ArrayPartition(zeros(T, n1), zeros(T, n2)) x0_backup = copy(x0) - solver = ProximalAlgorithms.NOLIP(tol = TOL) + solver = ProximalAlgorithms.PANOCplus(tol = TOL) x, it = solver(x0, f = f, A = A, g = g, Lf = Lf) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL @test it < 20 @test x0 == x0_backup - ## NOLIP/Adaptive + ## PANOCplus/Adaptive x0 = ArrayPartition(zeros(T, n1), zeros(T, n2)) x0_backup = copy(x0) - solver = ProximalAlgorithms.NOLIP(adaptive = true, tol = TOL) + solver = ProximalAlgorithms.PANOCplus(adaptive = true, tol = TOL) x, it = solver(x0, f = f, A = A, g = g) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL diff --git a/test/problems/test_lasso_small_strongly_convex.jl b/test/problems/test_lasso_small_strongly_convex.jl index c1a3700..6de93a2 100644 --- a/test/problems/test_lasso_small_strongly_convex.jl +++ b/test/problems/test_lasso_small_strongly_convex.jl @@ -78,8 +78,8 @@ using ProximalAlgorithms @test x0 == x0_backup end - @testset "NOLIP" begin - solver = ProximalAlgorithms.NOLIP(tol = TOL) + @testset "PANOCplus" begin + solver = ProximalAlgorithms.PANOCplus(tol = TOL) y, it = solver(x0, f = f, g = h, Lf = Lf) @test eltype(y) == T @test norm(y - x_star, Inf) <= TOL diff --git a/test/problems/test_lasso_small_v_split.jl b/test/problems/test_lasso_small_v_split.jl index 55c0f2e..5569adc 100644 --- a/test/problems/test_lasso_small_v_split.jl +++ b/test/problems/test_lasso_small_v_split.jl @@ -135,24 +135,24 @@ end - @testset "NOLIP" begin + @testset "PANOCplus" begin - # NOLIP/Nonadaptive + # PANOCplus/Nonadaptive x0 = zeros(T, n) x0_backup = copy(x0) - solver = ProximalAlgorithms.NOLIP(tol = TOL) + solver = ProximalAlgorithms.PANOCplus(tol = TOL) x, it = solver(x0, f = f, A = A, g = g, Lf = opnorm([A1; A2])^2) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL @test it < 20 @test x0 == x0_backup - ## NOLIP/Adaptive + ## PANOCplus/Adaptive x0 = zeros(T, n) x0_backup = copy(x0) - solver = ProximalAlgorithms.NOLIP(adaptive = true, tol = TOL) + solver = ProximalAlgorithms.PANOCplus(adaptive = true, tol = TOL) x, it = solver(x0, f = f, A = A, g = g) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL diff --git a/test/problems/test_nonconvex_qp.jl b/test/problems/test_nonconvex_qp.jl index 4f7085a..13ac2b5 100644 --- a/test/problems/test_nonconvex_qp.jl +++ b/test/problems/test_nonconvex_qp.jl @@ -28,10 +28,10 @@ using Test @test x0 == x0_backup end - @testset "NOLIP" begin + @testset "PANOCplus" begin x0 = zeros(T, n) x0_backup = copy(x0) - solver = ProximalAlgorithms.NOLIP() + solver = ProximalAlgorithms.PANOCplus() x, it = solver(x0, f = f, g = g) z = min.(upp, max.(low, x .- gamma .* (Q * x + q))) @test norm(x - z, Inf) / gamma <= solver.tol @@ -92,10 +92,10 @@ end @test x0 == x0_backup end - @testset "NOLIP" begin + @testset "PANOCplus" begin x0 = zeros(T, n) x0_backup = copy(x0) - solver = ProximalAlgorithms.NOLIP(tol = TOL) + solver = ProximalAlgorithms.PANOCplus(tol = TOL) x, it = solver(x0, f = f, g = g) z = min.(upp, max.(low, x .- gamma .* (Q * x + q))) @test norm(x - z, Inf) / gamma <= solver.tol diff --git a/test/problems/test_sparse_logistic_small.jl b/test/problems/test_sparse_logistic_small.jl index afd2d0d..976f6d3 100644 --- a/test/problems/test_sparse_logistic_small.jl +++ b/test/problems/test_sparse_logistic_small.jl @@ -68,11 +68,11 @@ @test it < 50 @test x0 == x0_backup - # NOLIP/Adaptive + # PANOCplus/Adaptive x0 = zeros(T, n) x0_backup = copy(x0) - solver = ProximalAlgorithms.NOLIP(adaptive = true, tol = TOL) + solver = ProximalAlgorithms.PANOCplus(adaptive = true, tol = TOL) x, it = solver(x0, f = f, A = A, g = g) @test eltype(x) == T @test norm(x - x_star, Inf) <= 1e-4 diff --git a/test/problems/test_verbose.jl b/test/problems/test_verbose.jl index 27ffbbd..46f2b9f 100644 --- a/test/problems/test_verbose.jl +++ b/test/problems/test_verbose.jl @@ -120,21 +120,21 @@ end - @testset "NOLIP" begin + @testset "PANOCplus" begin - # NOLIP/Nonadaptive + # PANOCplus/Nonadaptive x0 = zeros(T, n) - solver = ProximalAlgorithms.NOLIP(tol = TOL, verbose = true) + solver = ProximalAlgorithms.PANOCplus(tol = TOL, verbose = true) x, it = solver(x0, f = f, A = A, g = g, Lf = Lf) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL @test it < 20 - ## NOLIP/Adaptive + ## PANOCplus/Adaptive x0 = zeros(T, n) - solver = ProximalAlgorithms.NOLIP(adaptive = true, tol = TOL, verbose = true) + solver = ProximalAlgorithms.PANOCplus(adaptive = true, tol = TOL, verbose = true) x, it = solver(x0, f = f, A = A, g = g) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL From 765f12df615d02d1a4368bf694aedc3c4ba77d3c Mon Sep 17 00:00:00 2001 From: Alberto De Marchi Date: Wed, 5 Jan 2022 17:19:12 +0100 Subject: [PATCH 8/8] changed filename NOLIP to PANOCplus --- README.md | 2 +- src/ProximalAlgorithms.jl | 2 +- src/algorithms/{nolip.jl => panocplus.jl} | 0 3 files changed, 2 insertions(+), 2 deletions(-) rename src/algorithms/{nolip.jl => panocplus.jl} (100%) diff --git a/README.md b/README.md index 581cd46..7b40464 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ Vũ-Condat primal-dual algorithm[^chambolle_2011][^vu_2013][^condat_2013] | [`Vu Davis-Yin splitting[^davis_2017] | [`DavisYin`](src/algorithms/davis_yin.jl) Asymmetric forward-backward-adjoint splitting[^latafat_2017] | [`AFBA`](src/algorithms/primal_dual.jl) PANOC (L-BFGS)[^stella_2017] | [`PANOC`](src/algorithms/panoc.jl) -PANOC+ (L-BFGS)[^demarchi_2021] | [`PANOCplus`](src/algorithms/nolip.jl) +PANOC+ (L-BFGS)[^demarchi_2021] | [`PANOCplus`](src/algorithms/panocplus.jl) ZeroFPR (L-BFGS)[^themelis_2018] | [`ZeroFPR`](src/algorithms/zerofpr.jl) Douglas-Rachford line-search (L-BFGS)[^themelis_2020] | [`DRLS`](src/algorithms/drls.jl) diff --git a/src/ProximalAlgorithms.jl b/src/ProximalAlgorithms.jl index a017396..03647ec 100644 --- a/src/ProximalAlgorithms.jl +++ b/src/ProximalAlgorithms.jl @@ -33,6 +33,6 @@ include("algorithms/primal_dual.jl") include("algorithms/davis_yin.jl") include("algorithms/li_lin.jl") include("algorithms/fista.jl") -include("algorithms/nolip.jl") +include("algorithms/panocplus.jl") end # module diff --git a/src/algorithms/nolip.jl b/src/algorithms/panocplus.jl similarity index 100% rename from src/algorithms/nolip.jl rename to src/algorithms/panocplus.jl