From aa218762610fbbac73bf29b9c3390b0d3244ccab Mon Sep 17 00:00:00 2001
From: Alberto De Marchi <albertodm92@libero.it>
Date: Mon, 15 Nov 2021 21:10:03 +0100
Subject: [PATCH 1/8] init NOLIP, adjusted tests

---
 src/ProximalAlgorithms.jl                   |   1 +
 src/algorithms/nolip.jl                     | 236 ++++++++++++++++++++
 test/problems/test_lasso_small.jl           |  26 +++
 test/problems/test_lasso_small_h_split.jl   |  26 +++
 test/problems/test_lasso_small_v_split.jl   |  26 +++
 test/problems/test_nonconvex_qp.jl          |  20 ++
 test/problems/test_sparse_logistic_small.jl |  11 +
 test/problems/test_verbose.jl               |  22 ++
 8 files changed, 368 insertions(+)
 create mode 100644 src/algorithms/nolip.jl

diff --git a/src/ProximalAlgorithms.jl b/src/ProximalAlgorithms.jl
index ea42d89..251e9b1 100644
--- a/src/ProximalAlgorithms.jl
+++ b/src/ProximalAlgorithms.jl
@@ -32,5 +32,6 @@ include("algorithms/primal_dual.jl")
 include("algorithms/davis_yin.jl")
 include("algorithms/li_lin.jl")
 include("algorithms/fista.jl")
+include("algorithms/nolip.jl")
 
 end # module
diff --git a/src/algorithms/nolip.jl b/src/algorithms/nolip.jl
new file mode 100644
index 0000000..2424525
--- /dev/null
+++ b/src/algorithms/nolip.jl
@@ -0,0 +1,236 @@
+# Themelis, De Marchi, "A linesearch splitting algorithm for structured optimization
+# without global smoothness" (2021).
+
+using Base.Iterators
+using ProximalAlgorithms.IterationTools
+using ProximalOperators: Zero
+using LinearAlgebra
+using Printf
+
+"""
+    NOLIPIteration(; <keyword-arguments>)
+
+Instantiate the NOLIP algorithm (see [1]) for solving optimization problems
+of the form
+
+    minimize f(Ax) + g(x),
+
+where `f` is locally smooth and `A` is a linear mapping (for example, a matrix).
+
+# Arguments
+- `x0`: initial point.
+- `f=Zero()`: smooth objective term.
+- `A=I`: linear operator (e.g. a matrix).
+- `g=Zero()`: proximable objective term.
+- `Lf=nothing`: Lipschitz constant of the gradient of x ↦ f(Ax).
+- `gamma=nothing`: stepsize to use, defaults to `1/Lf` if not set (but `Lf` is).
+- `adaptive=false`: forces the method stepsize to be adaptively adjusted.
+- `minimum_gamma=1e-7`: lower bound to `gamma` in case `adaptive == true`.
+- `max_backtracks=20`: maximum number of line-search backtracks.
+- `H=LBFGS(x0, 5)`: variable metric to use to compute line-search directions.
+
+# References
+- [1] Themelis, De Marchi, "A linesearch splitting algorithm for structured
+optimization without global smoothness", (2021).
+"""
+
+Base.@kwdef struct NOLIPIteration{R,C<:Union{R,Complex{R}},Tx<:AbstractArray{C},Tf,TA,Tg,TH}
+    f::Tf = Zero()
+    A::TA = I
+    g::Tg = Zero()
+    x0::Tx
+    alpha::R = real(eltype(x0))(0.95)
+    beta::R = real(eltype(x0))(0.5)
+    Lf::Maybe{R} = nothing
+    gamma::Maybe{R} = Lf === nothing ? nothing : (alpha / Lf)
+    adaptive::Bool = false
+    minimum_gamma::R = real(eltype(x0))(1e-7)
+    max_backtracks::Int = 20
+    H::TH = LBFGS(x0, 5)
+end
+
+Base.IteratorSize(::Type{<:NOLIPIteration}) = Base.IsInfinite()
+
+Base.@kwdef mutable struct NOLIPState{R,Tx,TAx,TH}
+    x::Tx             # iterate
+    Ax::TAx           # A times x
+    f_Ax::R           # value of smooth term
+    grad_f_Ax::TAx    # gradient of f at Ax
+    At_grad_f_Ax::Tx  # gradient of smooth term
+    gamma::R          # stepsize parameter of forward and backward steps
+    y::Tx             # forward point
+    z::Tx             # forward-backward point
+    g_z::R            # value of nonsmooth term (at z)
+    res::Tx           # fixed-point residual at iterate (= x - z)
+    H::TH             # variable metric
+    tau::Maybe{R} = nothing
+    x_prev::Tx = zero(x)
+    res_prev::Tx = zero(x)
+    d::Tx = zero(x)
+    Ad::TAx = zero(Ax)
+    x_d::Tx = zero(x)
+    Ax_d::TAx = zero(Ax)
+    f_Ax_d::R = zero(real(eltype(x)))
+    grad_f_Ax_d::TAx = zero(Ax)
+    At_grad_f_Ax_d::Tx = zero(x)
+    z_curr::Tx = zero(x)
+end
+
+f_model(iter::NOLIPIteration, state::NOLIPState) =
+    f_model(state.f_Ax, state.At_grad_f_Ax, state.res, iter.alpha / state.gamma)
+
+function Base.iterate(iter::NOLIPIteration{R}) where {R}
+    x = copy(iter.x0)
+    Ax = iter.A * x
+    grad_f_Ax, f_Ax = gradient(iter.f, Ax)
+
+    gamma = iter.gamma
+    if gamma === nothing
+        gamma = iter.alpha / lower_bound_smoothness_constant(iter.f, iter.A, x, grad_f_Ax)
+    end
+
+    At_grad_f_Ax = iter.A' * grad_f_Ax
+    y = x - gamma .* At_grad_f_Ax
+    z, g_z = prox(iter.g, y, gamma)
+
+    state = NOLIPState(
+        x=x, Ax=Ax, f_Ax=f_Ax, grad_f_Ax=grad_f_Ax, At_grad_f_Ax=At_grad_f_Ax,
+        gamma=gamma, y=y, z=z, g_z=g_z, res=x - z, H=iter.H,
+    )
+
+    return state, state
+end
+
+function Base.iterate(
+    iter::NOLIPIteration{R},
+    state::NOLIPState{R,Tx,TAx},
+) where {R,Tx,TAx}
+    Az, f_Az, grad_f_Az, At_grad_f_Az = nothing, nothing, nothing, nothing
+    a, b, c = nothing, nothing, nothing
+
+    f_Az_upp = if (iter.gamma === nothing || iter.adaptive == true)
+        gamma_prev = state.gamma
+        state.gamma, state.g_z, Az, f_Az, grad_f_Az, f_Az_upp = backtrack_stepsize!(
+            state.gamma, iter.f, iter.A, iter.g,
+            state.x, state.f_Ax, state.At_grad_f_Ax, state.y, state.z, state.g_z, state.res,
+            alpha = iter.alpha, minimum_gamma = iter.minimum_gamma,
+        )
+        if state.gamma != gamma_prev
+            reset!(state.H)
+        end
+        f_Az_upp
+    else
+        f_model(iter, state)
+    end
+
+    # compute FBE
+    FBE_x = f_Az_upp + state.g_z
+
+    # compute direction
+    mul!(state.d, state.H, -state.res)
+
+    # store iterate and residual for metric update later on
+    state.x_prev .= state.x
+    state.res_prev .= state.res
+
+    # backtrack tau 1 → 0
+    tau = R(1)
+    mul!(state.Ad, iter.A, state.d)
+
+    state.x_d .= state.x .+ state.d
+    state.Ax_d .= state.Ax .+ state.Ad
+    state.f_Ax_d = gradient!(state.grad_f_Ax_d, iter.f, state.Ax_d)
+    mul!(state.At_grad_f_Ax_d, adjoint(iter.A), state.grad_f_Ax_d)
+
+    state.x .= state.x_d
+    state.Ax .= state.Ax_d
+    state.f_Ax = state.f_Ax_d
+    state.grad_f_Ax .= state.grad_f_Ax_d
+    state.At_grad_f_Ax .= state.At_grad_f_Ax_d
+
+    copyto!(state.z_curr, state.z)
+
+    sigma = iter.beta * (0.5 / state.gamma) * (1 - iter.alpha)
+    tol = 10 * eps(R) * (1 + abs(FBE_x))
+    threshold = FBE_x - sigma * norm(state.res)^2 + tol
+
+    for _ in 1:iter.max_backtracks
+        state.y .= state.x .- state.gamma .* state.At_grad_f_Ax
+        state.g_z = prox!(state.z, iter.g, state.y, state.gamma)
+        state.res .= state.x .- state.z
+        FBE_x_new = f_model(iter, state) + state.g_z
+
+        if FBE_x_new <= threshold
+            # update metric
+            update!(state.H, state.x - state.x_prev, state.res - state.res_prev)
+            state.tau = tau
+            return state, state
+        end
+
+        if Az === nothing
+            Az = iter.A * state.z_curr
+        end
+
+        tau *= 0.5
+        state.x .= tau .* state.x_d .+ (1 - tau) .* state.z_curr
+        state.Ax .= tau .* state.Ax_d .+ (1 - tau) .* Az
+
+        if ProximalOperators.is_quadratic(iter.f)
+            # in case f is quadratic, we can compute its value and gradient
+            # along a line using interpolation and linear combinations
+            # this allows saving operations
+            if grad_f_Az === nothing
+                grad_f_Az, f_Az = gradient(iter.f, Az)
+            end
+            if At_grad_f_Az === nothing
+                At_grad_f_Az = iter.A' * grad_f_Az
+                c = f_Az
+                b = real(dot(state.Ax_d .- Az, grad_f_Az))
+                a = state.f_Ax_d - b - c
+            end
+            state.f_Ax = a * tau^2 + b * tau + c
+            state.grad_f_Ax .= tau .* state.grad_f_Ax_d .+ (1 - tau) .* grad_f_Az
+            state.At_grad_f_Ax .= tau .* state.At_grad_f_Ax_d .+ (1 - tau) .* At_grad_f_Az
+        else
+            # otherwise, in the general case where f is only smooth, we compute
+            # one gradient and matvec per backtracking step
+            state.f_Ax = gradient!(state.grad_f_Ax, iter.f, state.Ax)
+            mul!(state.At_grad_f_Ax, adjoint(iter.A), state.grad_f_Ax)
+        end
+    end
+
+    @warn "stepsize `tau` became too small ($(tau)), stopping the iterations"
+    return nothing
+end
+
+# Solver
+
+struct NOLIP{R, K}
+    maxit::Int
+    tol::R
+    verbose::Bool
+    freq::Int
+    kwargs::K
+end
+
+function (solver::NOLIP)(x0; kwargs...)
+    stop(state::NOLIPState) = norm(state.res, Inf) / state.gamma <= solver.tol
+    disp((it, state)) = @printf(
+        "%5d | %.3e | %.3e | %.3e\n",
+        it,
+        state.gamma,
+        norm(state.res, Inf) / state.gamma,
+        (state.tau === nothing ? 0.0 : state.tau)
+    )
+    iter = NOLIPIteration(; x0=x0, solver.kwargs..., kwargs...)
+    iter = take(halt(iter, stop), solver.maxit)
+    iter = enumerate(iter)
+    if solver.verbose
+        iter = tee(sample(iter, solver.freq), disp)
+    end
+    num_iters, state_final = loop(iter)
+    return state_final.z, num_iters
+end
+
+NOLIP(; maxit=1_000, tol=1e-8, verbose=false, freq=10, kwargs...) =
+    NOLIP(maxit, tol, verbose, freq, kwargs)
diff --git a/test/problems/test_lasso_small.jl b/test/problems/test_lasso_small.jl
index 0a00219..623b9d8 100644
--- a/test/problems/test_lasso_small.jl
+++ b/test/problems/test_lasso_small.jl
@@ -130,6 +130,32 @@ using ProximalAlgorithms
 
     end
 
+    @testset "NOLIP" begin
+
+        # NOLIP/Nonadaptive
+
+        x0 = zeros(T, n)
+        x0_backup = copy(x0)
+        solver = ProximalAlgorithms.NOLIP(tol = TOL)
+        x, it = solver(x0, f = f, A = A, g = g, Lf = Lf)
+        @test eltype(x) == T
+        @test norm(x - x_star, Inf) <= TOL
+        @test it < 20
+        @test x0 == x0_backup
+
+        ## NOLIP/Adaptive
+
+        x0 = zeros(T, n)
+        x0_backup = copy(x0)
+        solver = ProximalAlgorithms.NOLIP(adaptive = true, tol = TOL)
+        x, it = solver(x0, f = f, A = A, g = g)
+        @test eltype(x) == T
+        @test norm(x - x_star, Inf) <= TOL
+        @test it < 20
+        @test x0 == x0_backup
+
+    end
+
     @testset "DouglasRachford" begin
 
         # Douglas-Rachford
diff --git a/test/problems/test_lasso_small_h_split.jl b/test/problems/test_lasso_small_h_split.jl
index 41a4c8e..44be73d 100644
--- a/test/problems/test_lasso_small_h_split.jl
+++ b/test/problems/test_lasso_small_h_split.jl
@@ -142,4 +142,30 @@
 
     end
 
+    @testset "NOLIP" begin
+
+        # NOLIP/Nonadaptive
+
+        x0 = ArrayPartition(zeros(T, n1), zeros(T, n2))
+        x0_backup = copy(x0)
+        solver = ProximalAlgorithms.NOLIP(tol = TOL)
+        x, it = solver(x0, f = f, A = A, g = g, Lf = Lf)
+        @test eltype(x) == T
+        @test norm(x - x_star, Inf) <= TOL
+        @test it < 20
+        @test x0 == x0_backup
+
+        ## NOLIP/Adaptive
+
+        x0 = ArrayPartition(zeros(T, n1), zeros(T, n2))
+        x0_backup = copy(x0)
+        solver = ProximalAlgorithms.NOLIP(adaptive = true, tol = TOL)
+        x, it = solver(x0, f = f, A = A, g = g)
+        @test eltype(x) == T
+        @test norm(x - x_star, Inf) <= TOL
+        @test it < 20
+        @test x0 == x0_backup
+
+    end
+
 end
diff --git a/test/problems/test_lasso_small_v_split.jl b/test/problems/test_lasso_small_v_split.jl
index fdf28c9..55c0f2e 100644
--- a/test/problems/test_lasso_small_v_split.jl
+++ b/test/problems/test_lasso_small_v_split.jl
@@ -135,4 +135,30 @@
 
     end
 
+    @testset "NOLIP" begin
+
+        # NOLIP/Nonadaptive
+
+        x0 = zeros(T, n)
+        x0_backup = copy(x0)
+        solver = ProximalAlgorithms.NOLIP(tol = TOL)
+        x, it = solver(x0, f = f, A = A, g = g, Lf = opnorm([A1; A2])^2)
+        @test eltype(x) == T
+        @test norm(x - x_star, Inf) <= TOL
+        @test it < 20
+        @test x0 == x0_backup
+
+        ## NOLIP/Adaptive
+
+        x0 = zeros(T, n)
+        x0_backup = copy(x0)
+        solver = ProximalAlgorithms.NOLIP(adaptive = true, tol = TOL)
+        x, it = solver(x0, f = f, A = A, g = g)
+        @test eltype(x) == T
+        @test norm(x - x_star, Inf) <= TOL
+        @test it < 20
+        @test x0 == x0_backup
+
+    end
+
 end
diff --git a/test/problems/test_nonconvex_qp.jl b/test/problems/test_nonconvex_qp.jl
index fdc6aed..4f7085a 100644
--- a/test/problems/test_nonconvex_qp.jl
+++ b/test/problems/test_nonconvex_qp.jl
@@ -28,6 +28,16 @@ using Test
         @test x0 == x0_backup
     end
 
+    @testset "NOLIP" begin
+        x0 = zeros(T, n)
+        x0_backup = copy(x0)
+        solver = ProximalAlgorithms.NOLIP()
+        x, it = solver(x0, f = f, g = g)
+        z = min.(upp, max.(low, x .- gamma .* (Q * x + q)))
+        @test norm(x - z, Inf) / gamma <= solver.tol
+        @test x0 == x0_backup
+    end
+
     @testset "ZeroFPR" begin
         x0 = zeros(T, n)
         x0_backup = copy(x0)
@@ -82,6 +92,16 @@ end
             @test x0 == x0_backup
         end
 
+        @testset "NOLIP" begin
+            x0 = zeros(T, n)
+            x0_backup = copy(x0)
+            solver = ProximalAlgorithms.NOLIP(tol = TOL)
+            x, it = solver(x0, f = f, g = g)
+            z = min.(upp, max.(low, x .- gamma .* (Q * x + q)))
+            @test norm(x - z, Inf) / gamma <= solver.tol
+            @test x0 == x0_backup
+        end
+
         @testset "ZeroFPR" begin
             x0 = zeros(T, n)
             x0_backup = copy(x0)
diff --git a/test/problems/test_sparse_logistic_small.jl b/test/problems/test_sparse_logistic_small.jl
index 12531cd..afd2d0d 100644
--- a/test/problems/test_sparse_logistic_small.jl
+++ b/test/problems/test_sparse_logistic_small.jl
@@ -68,4 +68,15 @@
     @test it < 50
     @test x0 == x0_backup
 
+    # NOLIP/Adaptive
+
+    x0 = zeros(T, n)
+    x0_backup = copy(x0)
+    solver = ProximalAlgorithms.NOLIP(adaptive = true, tol = TOL)
+    x, it = solver(x0, f = f, A = A, g = g)
+    @test eltype(x) == T
+    @test norm(x - x_star, Inf) <= 1e-4
+    @test it < 50
+    @test x0 == x0_backup
+
 end
diff --git a/test/problems/test_verbose.jl b/test/problems/test_verbose.jl
index abab201..27ffbbd 100644
--- a/test/problems/test_verbose.jl
+++ b/test/problems/test_verbose.jl
@@ -120,6 +120,28 @@
 
     end
 
+    @testset "NOLIP" begin
+
+        # NOLIP/Nonadaptive
+
+        x0 = zeros(T, n)
+        solver = ProximalAlgorithms.NOLIP(tol = TOL, verbose = true)
+        x, it = solver(x0, f = f, A = A, g = g, Lf = Lf)
+        @test eltype(x) == T
+        @test norm(x - x_star, Inf) <= TOL
+        @test it < 20
+
+        ## NOLIP/Adaptive
+
+        x0 = zeros(T, n)
+        solver = ProximalAlgorithms.NOLIP(adaptive = true, tol = TOL, verbose = true)
+        x, it = solver(x0, f = f, A = A, g = g)
+        @test eltype(x) == T
+        @test norm(x - x_star, Inf) <= TOL
+        @test it < 20
+
+    end
+
     @testset "DouglasRachford" begin
 
         # Douglas-Rachford

From 654674104f527567d26f87c9c49cdb6fcf342add Mon Sep 17 00:00:00 2001
From: Alberto De Marchi <albertodm92@libero.it>
Date: Mon, 15 Nov 2021 22:26:55 +0100
Subject: [PATCH 2/8] NOLIP functional

---
 src/algorithms/nolip.jl | 133 ++++++++++++++++++----------------------
 1 file changed, 59 insertions(+), 74 deletions(-)

diff --git a/src/algorithms/nolip.jl b/src/algorithms/nolip.jl
index 2424525..dfe3f09 100644
--- a/src/algorithms/nolip.jl
+++ b/src/algorithms/nolip.jl
@@ -92,10 +92,20 @@ function Base.iterate(iter::NOLIPIteration{R}) where {R}
     At_grad_f_Ax = iter.A' * grad_f_Ax
     y = x - gamma .* At_grad_f_Ax
     z, g_z = prox(iter.g, y, gamma)
+    res = x - z
+
+    if (iter.gamma === nothing || iter.adaptive == true)
+        gamma_prev = gamma
+        gamma, g_z, Az, f_Az, grad_f_Az, f_Az_upp = backtrack_stepsize!(
+            gamma, iter.f, iter.A, iter.g,
+            x, f_Ax, At_grad_f_Ax, y, z, g_z, res,
+            alpha = iter.alpha, minimum_gamma = iter.minimum_gamma,
+        )
+    end
 
     state = NOLIPState(
         x=x, Ax=Ax, f_Ax=f_Ax, grad_f_Ax=grad_f_Ax, At_grad_f_Ax=At_grad_f_Ax,
-        gamma=gamma, y=y, z=z, g_z=g_z, res=x - z, H=iter.H,
+        gamma=gamma, y=y, z=z, g_z=g_z, res=res, H=iter.H,
     )
 
     return state, state
@@ -108,99 +118,74 @@ function Base.iterate(
     Az, f_Az, grad_f_Az, At_grad_f_Az = nothing, nothing, nothing, nothing
     a, b, c = nothing, nothing, nothing
 
-    f_Az_upp = if (iter.gamma === nothing || iter.adaptive == true)
-        gamma_prev = state.gamma
-        state.gamma, state.g_z, Az, f_Az, grad_f_Az, f_Az_upp = backtrack_stepsize!(
-            state.gamma, iter.f, iter.A, iter.g,
-            state.x, state.f_Ax, state.At_grad_f_Ax, state.y, state.z, state.g_z, state.res,
-            alpha = iter.alpha, minimum_gamma = iter.minimum_gamma,
-        )
-        if state.gamma != gamma_prev
-            reset!(state.H)
-        end
-        f_Az_upp
-    else
-        f_model(iter, state)
-    end
-
-    # compute FBE
-    FBE_x = f_Az_upp + state.g_z
-
-    # compute direction
-    mul!(state.d, state.H, -state.res)
-
     # store iterate and residual for metric update later on
     state.x_prev .= state.x
     state.res_prev .= state.res
 
-    # backtrack tau 1 → 0
-    tau = R(1)
-    mul!(state.Ad, iter.A, state.d)
-
-    state.x_d .= state.x .+ state.d
-    state.Ax_d .= state.Ax .+ state.Ad
-    state.f_Ax_d = gradient!(state.grad_f_Ax_d, iter.f, state.Ax_d)
-    mul!(state.At_grad_f_Ax_d, adjoint(iter.A), state.grad_f_Ax_d)
-
-    state.x .= state.x_d
-    state.Ax .= state.Ax_d
-    state.f_Ax = state.f_Ax_d
-    state.grad_f_Ax .= state.grad_f_Ax_d
-    state.At_grad_f_Ax .= state.At_grad_f_Ax_d
-
-    copyto!(state.z_curr, state.z)
+    # compute FBE
+    FBE_x = f_model(iter, state) + state.g_z
 
     sigma = iter.beta * (0.5 / state.gamma) * (1 - iter.alpha)
     tol = 10 * eps(R) * (1 + abs(FBE_x))
     threshold = FBE_x - sigma * norm(state.res)^2 + tol
 
-    for _ in 1:iter.max_backtracks
+    tau_backtracks = 0
+    can_update_direction = true
+
+    while true
+
+        if can_update_direction
+            # compute direction
+            mul!(state.d, state.H, -state.res)
+            # backtrack tau 1 → 0
+            state.tau = R(1)
+            state.x .= state.x_prev .+ state.d
+        else
+            state.x .= (1 - state.tau) * (state.x_prev .- state.res_prev) + state.tau * (state.x_prev .+ state.d)
+        end
+
+        mul!(state.Ax, iter.A, state.x)
+        state.f_Ax = gradient!(state.grad_f_Ax, iter.f, state.Ax)
+        mul!(state.At_grad_f_Ax, adjoint(iter.A), state.grad_f_Ax)
+
         state.y .= state.x .- state.gamma .* state.At_grad_f_Ax
         state.g_z = prox!(state.z, iter.g, state.y, state.gamma)
         state.res .= state.x .- state.z
-        FBE_x_new = f_model(iter, state) + state.g_z
 
-        if FBE_x_new <= threshold
+        f_Az_upp = f_model(iter, state)
+
+        if (iter.gamma === nothing || iter.adaptive == true)
+            Az = iter.A * state.z
+            grad_f_Az, f_Az = gradient(iter.f, Az)
+            tol = 10 * eps(R) * (1 + abs(f_Az))
+            if f_Az > f_Az_upp + tol && state.gamma >= iter.minimum_gamma
+                state.gamma /= 2
+                if state.gamma < iter.minimum_gamma
+                    @warn "stepsize `gamma` became too small ($(state.gamma))"
+                end
+                can_update_direction = true
+                reset!(state.H)
+                continue
+            end
+        end
+
+        FBE_x = f_Az_upp + state.g_z
+        if FBE_x <= threshold
             # update metric
             update!(state.H, state.x - state.x_prev, state.res - state.res_prev)
-            state.tau = tau
             return state, state
-        end
-
-        if Az === nothing
-            Az = iter.A * state.z_curr
-        end
-
-        tau *= 0.5
-        state.x .= tau .* state.x_d .+ (1 - tau) .* state.z_curr
-        state.Ax .= tau .* state.Ax_d .+ (1 - tau) .* Az
-
-        if ProximalOperators.is_quadratic(iter.f)
-            # in case f is quadratic, we can compute its value and gradient
-            # along a line using interpolation and linear combinations
-            # this allows saving operations
-            if grad_f_Az === nothing
-                grad_f_Az, f_Az = gradient(iter.f, Az)
-            end
-            if At_grad_f_Az === nothing
-                At_grad_f_Az = iter.A' * grad_f_Az
-                c = f_Az
-                b = real(dot(state.Ax_d .- Az, grad_f_Az))
-                a = state.f_Ax_d - b - c
-            end
-            state.f_Ax = a * tau^2 + b * tau + c
-            state.grad_f_Ax .= tau .* state.grad_f_Ax_d .+ (1 - tau) .* grad_f_Az
-            state.At_grad_f_Ax .= tau .* state.At_grad_f_Ax_d .+ (1 - tau) .* At_grad_f_Az
         else
-            # otherwise, in the general case where f is only smooth, we compute
-            # one gradient and matvec per backtracking step
-            state.f_Ax = gradient!(state.grad_f_Ax, iter.f, state.Ax)
-            mul!(state.At_grad_f_Ax, adjoint(iter.A), state.grad_f_Ax)
+            state.tau *= 0.5
+            if tau_backtracks >= iter.max_backtracks
+                @warn "parameter `tau` became too small ($(state.tau)), stopping the iterations"
+                return nothing
+            end
+            tau_backtracks += 1
+            can_update_direction = false
         end
+
     end
 
-    @warn "stepsize `tau` became too small ($(tau)), stopping the iterations"
-    return nothing
 end
 
 # Solver

From 4d5ce5128c8b6e2e1a2f3981bea57e20f9c2afe6 Mon Sep 17 00:00:00 2001
From: Alberto De Marchi <albertodm92@libero.it>
Date: Thu, 30 Dec 2021 08:57:57 +0100
Subject: [PATCH 3/8] updated some tests and readme

---
 README.md                                         |  3 +++
 test/problems/test_lasso_small.jl                 | 14 +++++---------
 test/problems/test_lasso_small_strongly_convex.jl |  9 +++++++++
 3 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index 265c918..2f25113 100644
--- a/README.md
+++ b/README.md
@@ -31,6 +31,7 @@ Vũ-Condat primal-dual algorithm[^chambolle_2011][^vu_2013][^condat_2013] | [`Vu
 Davis-Yin splitting[^davis_2017] | [`DavisYin`](src/algorithms/davis_yin.jl)
 Asymmetric forward-backward-adjoint splitting[^latafat_2017] | [`AFBA`](src/algorithms/primal_dual.jl)
 PANOC (L-BFGS)[^stella_2017] | [`PANOC`](src/algorithms/panoc.jl)
+PANOC+ (L-BFGS)[^demarchi_2021] | [`NOLIP`](src/algorithms/nolip.jl)
 ZeroFPR (L-BFGS)[^themelis_2018] | [`ZeroFPR`](src/algorithms/zerofpr.jl)
 Douglas-Rachford line-search (L-BFGS)[^themelis_2020] | [`DRLS`](src/algorithms/drls.jl)
 
@@ -65,3 +66,5 @@ Contributions are welcome in the form of [issues notification](https://github.co
 [^themelis_2018]: Themelis, Stella, Patrinos, *Forward-backward envelope for the sum of two nonconvex functions: Further properties and nonmonotone line-search algorithms*, SIAM Journal on Optimization, vol. 28, no. 3, pp. 2274–2303 (2018). [link](https://epubs.siam.org/doi/10.1137/16M1080240)
 
 [^themelis_2020]: Themelis, Stella, Patrinos, *Douglas-Rachford splitting and ADMM for nonconvex optimization: Accelerated and Newton-type algorithms*, arXiv preprint (2020). [link](https://arxiv.org/abs/2005.10230)
+
+[^demarchi_2021]: De Marchi, Themelis, *Proximal gradient algorithms under local Lipschitz gradient continuity: a convergence and robustness analysis of PANOC*, arXiv preprint (2021). [link](https://arxiv.org/abs/2112.13000)
diff --git a/test/problems/test_lasso_small.jl b/test/problems/test_lasso_small.jl
index dedc2ef..c8a9aa1 100644
--- a/test/problems/test_lasso_small.jl
+++ b/test/problems/test_lasso_small.jl
@@ -124,30 +124,26 @@ using ProximalAlgorithms:
         @test x0 == x0_backup
     end
 
-    @testset "NOLIP" begin
-
-        # NOLIP/Nonadaptive
-
+    @testset "NOLIP (fixed step)" begin
         x0 = zeros(T, n)
         x0_backup = copy(x0)
         solver = ProximalAlgorithms.NOLIP(tol = TOL)
-        x, it = solver(x0, f = f, A = A, g = g, Lf = Lf)
+        x, it = @inferred solver(x0, f = f, A = A, g = g, Lf = Lf)
         @test eltype(x) == T
         @test norm(x - x_star, Inf) <= TOL
         @test it < 20
         @test x0 == x0_backup
+    end
 
-        ## NOLIP/Adaptive
-
+    @testset "NOLIP (adaptive step)" begin
         x0 = zeros(T, n)
         x0_backup = copy(x0)
         solver = ProximalAlgorithms.NOLIP(adaptive = true, tol = TOL)
-        x, it = solver(x0, f = f, A = A, g = g)
+        x, it = @inferred solver(x0, f = f, A = A, g = g)
         @test eltype(x) == T
         @test norm(x - x_star, Inf) <= TOL
         @test it < 20
         @test x0 == x0_backup
-
     end
 
     @testset "DouglasRachford" begin
diff --git a/test/problems/test_lasso_small_strongly_convex.jl b/test/problems/test_lasso_small_strongly_convex.jl
index 1962fa3..9b4c2c8 100644
--- a/test/problems/test_lasso_small_strongly_convex.jl
+++ b/test/problems/test_lasso_small_strongly_convex.jl
@@ -69,4 +69,13 @@ using ProximalAlgorithms
         @test x0 == x0_backup
     end
 
+    @testset "NOLIP" begin
+        solver = ProximalAlgorithms.NOLIP(tol = TOL)
+        y, it = solver(x0, f = f, g = h, Lf = Lf)
+        @test eltype(y) == T
+        @test norm(y - x_star, Inf) <= TOL
+        @test it < 45
+        @test x0 == x0_backup
+    end
+
 end

From e1b8f05fe2c9c4a8711cd6aa3cd980c11a74d7f8 Mon Sep 17 00:00:00 2001
From: Alberto De Marchi <albertodm92@libero.it>
Date: Thu, 30 Dec 2021 09:27:50 +0100
Subject: [PATCH 4/8] minor fix NOLIP

---
 src/algorithms/nolip.jl                       | 128 +++++++++---------
 .../test_lasso_small_strongly_convex.jl       |   9 ++
 2 files changed, 74 insertions(+), 63 deletions(-)

diff --git a/src/algorithms/nolip.jl b/src/algorithms/nolip.jl
index dfe3f09..4b21f88 100644
--- a/src/algorithms/nolip.jl
+++ b/src/algorithms/nolip.jl
@@ -1,5 +1,6 @@
-# Themelis, De Marchi, "A linesearch splitting algorithm for structured optimization
-# without global smoothness" (2021).
+# De Marchi, Themelis, "Proximal gradient algorithms under local Lipschitz
+# gradient continuity: a convergence and robustness analysis of PANOC",
+# preprint on arXiv:2112.13000 (2021).
 
 using Base.Iterators
 using ProximalAlgorithms.IterationTools
@@ -27,26 +28,27 @@ where `f` is locally smooth and `A` is a linear mapping (for example, a matrix).
 - `adaptive=false`: forces the method stepsize to be adaptively adjusted.
 - `minimum_gamma=1e-7`: lower bound to `gamma` in case `adaptive == true`.
 - `max_backtracks=20`: maximum number of line-search backtracks.
-- `H=LBFGS(x0, 5)`: variable metric to use to compute line-search directions.
+- `directions=LBFGS(5)`: strategy to use to compute line-search directions.
 
 # References
-- [1] Themelis, De Marchi, "A linesearch splitting algorithm for structured
-optimization without global smoothness", (2021).
+- [1] De Marchi, Themelis, "Proximal gradient algorithms under local Lipschitz
+gradient continuity: a convergence and robustness analysis of PANOC",
+preprint on arXiv:2112.13000 (2021).
 """
 
-Base.@kwdef struct NOLIPIteration{R,C<:Union{R,Complex{R}},Tx<:AbstractArray{C},Tf,TA,Tg,TH}
+Base.@kwdef struct NOLIPIteration{R,Tx,Tf,TA,Tg,TLf,Tgamma,D}
     f::Tf = Zero()
     A::TA = I
     g::Tg = Zero()
     x0::Tx
     alpha::R = real(eltype(x0))(0.95)
     beta::R = real(eltype(x0))(0.5)
-    Lf::Maybe{R} = nothing
-    gamma::Maybe{R} = Lf === nothing ? nothing : (alpha / Lf)
-    adaptive::Bool = false
+    Lf::TLf = nothing
+    gamma::Tgamma = Lf === nothing ? nothing : (alpha / Lf)
+    adaptive::Bool = gamma === nothing
     minimum_gamma::R = real(eltype(x0))(1e-7)
     max_backtracks::Int = 20
-    H::TH = LBFGS(x0, 5)
+    directions::D = LBFGS(5)
 end
 
 Base.IteratorSize(::Type{<:NOLIPIteration}) = Base.IsInfinite()
@@ -63,60 +65,61 @@ Base.@kwdef mutable struct NOLIPState{R,Tx,TAx,TH}
     g_z::R            # value of nonsmooth term (at z)
     res::Tx           # fixed-point residual at iterate (= x - z)
     H::TH             # variable metric
-    tau::Maybe{R} = nothing
-    x_prev::Tx = zero(x)
-    res_prev::Tx = zero(x)
-    d::Tx = zero(x)
-    Ad::TAx = zero(Ax)
-    x_d::Tx = zero(x)
-    Ax_d::TAx = zero(Ax)
+    tau::R = zero(gamma)
+    x_prev::Tx = similar(x)
+    res_prev::Tx = similar(x)
+    d::Tx = similar(x)
+    Ad::TAx = similar(Ax)
+    x_d::Tx = similar(x)
+    Ax_d::TAx = similar(Ax)
     f_Ax_d::R = zero(real(eltype(x)))
-    grad_f_Ax_d::TAx = zero(Ax)
-    At_grad_f_Ax_d::Tx = zero(x)
-    z_curr::Tx = zero(x)
+    grad_f_Ax_d::TAx = similar(Ax)
+    At_grad_f_Ax_d::Tx = similar(x)
+    z_curr::Tx = similar(x)
+    Az::TAx = similar(Ax)
+    grad_f_Az::TAx = similar(Ax)
+    At_grad_f_Az::Tx = similar(x)
 end
 
-f_model(iter::NOLIPIteration, state::NOLIPState) =
-    f_model(state.f_Ax, state.At_grad_f_Ax, state.res, iter.alpha / state.gamma)
+f_model(iter::NOLIPIteration, state::NOLIPState) = f_model(state.f_Ax, state.At_grad_f_Ax, state.res, iter.alpha / state.gamma)
 
 function Base.iterate(iter::NOLIPIteration{R}) where {R}
     x = copy(iter.x0)
     Ax = iter.A * x
     grad_f_Ax, f_Ax = gradient(iter.f, Ax)
-
-    gamma = iter.gamma
-    if gamma === nothing
-        gamma = iter.alpha / lower_bound_smoothness_constant(iter.f, iter.A, x, grad_f_Ax)
-    end
-
+    gamma = iter.gamma === nothing ? iter.alpha / lower_bound_smoothness_constant(iter.f, iter.A, x, grad_f_Ax) : iter.gamma
     At_grad_f_Ax = iter.A' * grad_f_Ax
     y = x - gamma .* At_grad_f_Ax
     z, g_z = prox(iter.g, y, gamma)
-    res = x - z
-
+    state = NOLIPState(
+        x=x, Ax=Ax, f_Ax=f_Ax, grad_f_Ax=grad_f_Ax, At_grad_f_Ax=At_grad_f_Ax,
+        gamma=gamma, y=y, z=z, g_z=g_z, res=x-z, H=initialize(iter.directions, x),
+    )
     if (iter.gamma === nothing || iter.adaptive == true)
-        gamma_prev = gamma
-        gamma, g_z, Az, f_Az, grad_f_Az, f_Az_upp = backtrack_stepsize!(
-            gamma, iter.f, iter.A, iter.g,
-            x, f_Ax, At_grad_f_Ax, y, z, g_z, res,
+        state.gamma, state.g_z, f_Az, f_Az_upp = backtrack_stepsize!(
+            state.gamma, iter.f, iter.A, iter.g,
+            state.x, state.f_Ax, state.At_grad_f_Ax, state.y, state.z, state.g_z, state.res,
+            state.Az, state.grad_f_Az,
             alpha = iter.alpha, minimum_gamma = iter.minimum_gamma,
         )
     end
-
-    state = NOLIPState(
-        x=x, Ax=Ax, f_Ax=f_Ax, grad_f_Ax=grad_f_Ax, At_grad_f_Ax=At_grad_f_Ax,
-        gamma=gamma, y=y, z=z, g_z=g_z, res=res, H=iter.H,
-    )
-
     return state, state
 end
 
-function Base.iterate(
-    iter::NOLIPIteration{R},
-    state::NOLIPState{R,Tx,TAx},
-) where {R,Tx,TAx}
-    Az, f_Az, grad_f_Az, At_grad_f_Az = nothing, nothing, nothing, nothing
-    a, b, c = nothing, nothing, nothing
+set_next_direction!(::QuasiNewtonStyle, ::NOLIPIteration, state::NOLIPState) = mul!(state.d, state.H, -state.res)
+set_next_direction!(::NoAccelerationStyle, ::NOLIPIteration, state::NOLIPState) = state.d .= .-state.res
+set_next_direction!(iter::NOLIPIteration, state::NOLIPState) = set_next_direction!(acceleration_style(typeof(iter.directions)), iter, state)
+
+update_direction_state!(::QuasiNewtonStyle, ::NOLIPIteration, state::NOLIPState) = update!(state.H, state.x - state.x_prev, state.res - state.res_prev)
+update_direction_state!(::NoAccelerationStyle, ::NOLIPIteration, state::NOLIPState) = return
+update_direction_state!(iter::NOLIPIteration, state::NOLIPState) = update_direction_state!(acceleration_style(typeof(iter.directions)), iter, state)
+
+reset_direction_state!(::QuasiNewtonStyle, ::NOLIPIteration, state::NOLIPState) = reset!(state.H)
+reset_direction_state!(::NoAccelerationStyle, ::NOLIPIteration, state::NOLIPState) = return
+reset_direction_state!(iter::NOLIPIteration, state::NOLIPState) = reset_direction_state!(acceleration_style(typeof(iter.directions)), iter, state)
+
+function Base.iterate(iter::NOLIPIteration{R}, state::NOLIPState) where R
+    f_Az, a, b, c = R(Inf), R(Inf), R(Inf), R(Inf)
 
     # store iterate and residual for metric update later on
     state.x_prev .= state.x
@@ -136,7 +139,7 @@ function Base.iterate(
 
         if can_update_direction
             # compute direction
-            mul!(state.d, state.H, -state.res)
+            set_next_direction!(iter, state)
             # backtrack tau 1 → 0
             state.tau = R(1)
             state.x .= state.x_prev .+ state.d
@@ -155,34 +158,33 @@ function Base.iterate(
         f_Az_upp = f_model(iter, state)
 
         if (iter.gamma === nothing || iter.adaptive == true)
-            Az = iter.A * state.z
-            grad_f_Az, f_Az = gradient(iter.f, Az)
+            mul!(state.Az, iter.A, state.z)
+            f_Az = gradient!(state.grad_f_Az, iter.f, state.Az)
             tol = 10 * eps(R) * (1 + abs(f_Az))
             if f_Az > f_Az_upp + tol && state.gamma >= iter.minimum_gamma
-                state.gamma /= 2
+                state.gamma *= 0.5
                 if state.gamma < iter.minimum_gamma
                     @warn "stepsize `gamma` became too small ($(state.gamma))"
                 end
                 can_update_direction = true
-                reset!(state.H)
+                reset_direction_state!(iter, state)
                 continue
             end
         end
 
-        FBE_x = f_Az_upp + state.g_z
-        if FBE_x <= threshold
+        FBE_x_new = f_Az_upp + state.g_z
+        if FBE_x_new <= threshold
             # update metric
-            update!(state.H, state.x - state.x_prev, state.res - state.res_prev)
+            update_direction_state!(iter, state)
             return state, state
-        else
-            state.tau *= 0.5
-            if tau_backtracks >= iter.max_backtracks
-                @warn "parameter `tau` became too small ($(state.tau)), stopping the iterations"
-                return nothing
-            end
-            tau_backtracks += 1
-            can_update_direction = false
         end
+        state.tau *= 0.5
+        if tau_backtracks > iter.max_backtracks
+            @warn "stepsize `tau` became too small ($(state.tau)), stopping the iterations"
+            return nothing
+        end
+        tau_backtracks += 1
+        can_update_direction = false
 
     end
 
@@ -205,7 +207,7 @@ function (solver::NOLIP)(x0; kwargs...)
         it,
         state.gamma,
         norm(state.res, Inf) / state.gamma,
-        (state.tau === nothing ? 0.0 : state.tau)
+        state.tau,
     )
     iter = NOLIPIteration(; x0=x0, solver.kwargs..., kwargs...)
     iter = take(halt(iter, stop), solver.maxit)
diff --git a/test/problems/test_lasso_small_strongly_convex.jl b/test/problems/test_lasso_small_strongly_convex.jl
index 9b4c2c8..c1a3700 100644
--- a/test/problems/test_lasso_small_strongly_convex.jl
+++ b/test/problems/test_lasso_small_strongly_convex.jl
@@ -69,6 +69,15 @@ using ProximalAlgorithms
         @test x0 == x0_backup
     end
 
+    @testset "PANOC" begin
+        solver = ProximalAlgorithms.PANOC(tol = TOL)
+        y, it = solver(x0, f = f, g = h, Lf = Lf)
+        @test eltype(y) == T
+        @test norm(y - x_star, Inf) <= TOL
+        @test it < 45
+        @test x0 == x0_backup
+    end
+
     @testset "NOLIP" begin
         solver = ProximalAlgorithms.NOLIP(tol = TOL)
         y, it = solver(x0, f = f, g = h, Lf = Lf)

From 2ea872cbf05f267034e317844c08269ee9cef0d6 Mon Sep 17 00:00:00 2001
From: Alberto De Marchi <albertodm92@libero.it>
Date: Wed, 5 Jan 2022 10:20:41 +0100
Subject: [PATCH 5/8] minor changes comments/warnings

---
 src/algorithms/nolip.jl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/algorithms/nolip.jl b/src/algorithms/nolip.jl
index 4b21f88..58cfb0c 100644
--- a/src/algorithms/nolip.jl
+++ b/src/algorithms/nolip.jl
@@ -1,6 +1,6 @@
 # De Marchi, Themelis, "Proximal gradient algorithms under local Lipschitz
 # gradient continuity: a convergence and robustness analysis of PANOC",
-# preprint on arXiv:2112.13000 (2021).
+# arXiv:2112.13000 (2021).
 
 using Base.Iterators
 using ProximalAlgorithms.IterationTools
@@ -33,7 +33,7 @@ where `f` is locally smooth and `A` is a linear mapping (for example, a matrix).
 # References
 - [1] De Marchi, Themelis, "Proximal gradient algorithms under local Lipschitz
 gradient continuity: a convergence and robustness analysis of PANOC",
-preprint on arXiv:2112.13000 (2021).
+arXiv:2112.13000 (2021).
 """
 
 Base.@kwdef struct NOLIPIteration{R,Tx,Tf,TA,Tg,TLf,Tgamma,D}
@@ -180,7 +180,7 @@ function Base.iterate(iter::NOLIPIteration{R}, state::NOLIPState) where R
         end
         state.tau *= 0.5
         if tau_backtracks > iter.max_backtracks
-            @warn "stepsize `tau` became too small ($(state.tau)), stopping the iterations"
+            @warn "stepsize `tau` became too small ($(state.tau))"
             return nothing
         end
         tau_backtracks += 1

From 66d449cfe14a6e7f722a8822d819a36612147111 Mon Sep 17 00:00:00 2001
From: Alberto De Marchi <albertodm92@libero.it>
Date: Wed, 5 Jan 2022 16:19:38 +0100
Subject: [PATCH 6/8] added equivalence test between PANOC and NOLIP

---
 test/problems/test_equivalence.jl | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/test/problems/test_equivalence.jl b/test/problems/test_equivalence.jl
index 318095e..aa8a6ed 100644
--- a/test/problems/test_equivalence.jl
+++ b/test/problems/test_equivalence.jl
@@ -5,6 +5,7 @@ using ProximalOperators
 using ProximalAlgorithms:
     DouglasRachfordIteration, DRLSIteration,
     ForwardBackwardIteration, PANOCIteration,
+    NOLIPIteration,
     NoAcceleration
 
 @testset "DR/DRLS equivalence ($T)" for T in [Float32, Float64]
@@ -62,3 +63,31 @@ end
         @test isapprox(fb_state.z, panoc_state.z)
     end
 end
+
+@testset "PANOC/NOLIP equivalence ($T)" for T in [Float32, Float64]
+    A = T[
+        1.0 -2.0 3.0 -4.0 5.0
+        2.0 -1.0 0.0 -1.0 3.0
+        -1.0 0.0 4.0 -3.0 2.0
+        -1.0 -1.0 -1.0 1.0 3.0
+    ]
+    b = T[1.0, 2.0, 3.0, 4.0]
+
+    m, n = size(A)
+
+    R = real(T)
+
+    lam = R(0.1) * norm(A' * b, Inf)
+
+    f = LeastSquares(A, b)
+    g = NormL1(lam)
+
+    x0 = zeros(R, n)
+
+    panoc_iter = PANOCIteration(f=f, g=g, x0=x0, gamma=R(0.95) / opnorm(A)^2)
+    nolip_iter = NOLIPIteration(f=f, g=g, x0=x0, gamma=R(0.95) / opnorm(A)^2)
+
+    for (panoc_state, nolip_state) in Iterators.take(zip(panoc_iter, nolip_iter), 10)
+        @test isapprox(panoc_state.z, nolip_state.z)
+    end
+end

From 4cb836b26ebe1dc9452f957524926101a17ea902 Mon Sep 17 00:00:00 2001
From: Alberto De Marchi <albertodm92@libero.it>
Date: Wed, 5 Jan 2022 17:17:28 +0100
Subject: [PATCH 7/8] changed name NOLIP to PANOCplus

---
 README.md                                     |  2 +-
 src/algorithms/nolip.jl                       | 48 +++++++++----------
 test/problems/test_equivalence.jl             | 10 ++--
 test/problems/test_lasso_small.jl             |  8 ++--
 test/problems/test_lasso_small_h_split.jl     | 10 ++--
 .../test_lasso_small_strongly_convex.jl       |  4 +-
 test/problems/test_lasso_small_v_split.jl     | 10 ++--
 test/problems/test_nonconvex_qp.jl            |  8 ++--
 test/problems/test_sparse_logistic_small.jl   |  4 +-
 test/problems/test_verbose.jl                 | 10 ++--
 10 files changed, 57 insertions(+), 57 deletions(-)

diff --git a/README.md b/README.md
index 2f25113..581cd46 100644
--- a/README.md
+++ b/README.md
@@ -31,7 +31,7 @@ Vũ-Condat primal-dual algorithm[^chambolle_2011][^vu_2013][^condat_2013] | [`Vu
 Davis-Yin splitting[^davis_2017] | [`DavisYin`](src/algorithms/davis_yin.jl)
 Asymmetric forward-backward-adjoint splitting[^latafat_2017] | [`AFBA`](src/algorithms/primal_dual.jl)
 PANOC (L-BFGS)[^stella_2017] | [`PANOC`](src/algorithms/panoc.jl)
-PANOC+ (L-BFGS)[^demarchi_2021] | [`NOLIP`](src/algorithms/nolip.jl)
+PANOC+ (L-BFGS)[^demarchi_2021] | [`PANOCplus`](src/algorithms/nolip.jl)
 ZeroFPR (L-BFGS)[^themelis_2018] | [`ZeroFPR`](src/algorithms/zerofpr.jl)
 Douglas-Rachford line-search (L-BFGS)[^themelis_2020] | [`DRLS`](src/algorithms/drls.jl)
 
diff --git a/src/algorithms/nolip.jl b/src/algorithms/nolip.jl
index 58cfb0c..b1625f5 100644
--- a/src/algorithms/nolip.jl
+++ b/src/algorithms/nolip.jl
@@ -9,9 +9,9 @@ using LinearAlgebra
 using Printf
 
 """
-    NOLIPIteration(; <keyword-arguments>)
+    PANOCplusIteration(; <keyword-arguments>)
 
-Instantiate the NOLIP algorithm (see [1]) for solving optimization problems
+Instantiate the PANOCplus algorithm (see [1]) for solving optimization problems
 of the form
 
     minimize f(Ax) + g(x),
@@ -36,7 +36,7 @@ gradient continuity: a convergence and robustness analysis of PANOC",
 arXiv:2112.13000 (2021).
 """
 
-Base.@kwdef struct NOLIPIteration{R,Tx,Tf,TA,Tg,TLf,Tgamma,D}
+Base.@kwdef struct PANOCplusIteration{R,Tx,Tf,TA,Tg,TLf,Tgamma,D}
     f::Tf = Zero()
     A::TA = I
     g::Tg = Zero()
@@ -51,9 +51,9 @@ Base.@kwdef struct NOLIPIteration{R,Tx,Tf,TA,Tg,TLf,Tgamma,D}
     directions::D = LBFGS(5)
 end
 
-Base.IteratorSize(::Type{<:NOLIPIteration}) = Base.IsInfinite()
+Base.IteratorSize(::Type{<:PANOCplusIteration}) = Base.IsInfinite()
 
-Base.@kwdef mutable struct NOLIPState{R,Tx,TAx,TH}
+Base.@kwdef mutable struct PANOCplusState{R,Tx,TAx,TH}
     x::Tx             # iterate
     Ax::TAx           # A times x
     f_Ax::R           # value of smooth term
@@ -81,9 +81,9 @@ Base.@kwdef mutable struct NOLIPState{R,Tx,TAx,TH}
     At_grad_f_Az::Tx = similar(x)
 end
 
-f_model(iter::NOLIPIteration, state::NOLIPState) = f_model(state.f_Ax, state.At_grad_f_Ax, state.res, iter.alpha / state.gamma)
+f_model(iter::PANOCplusIteration, state::PANOCplusState) = f_model(state.f_Ax, state.At_grad_f_Ax, state.res, iter.alpha / state.gamma)
 
-function Base.iterate(iter::NOLIPIteration{R}) where {R}
+function Base.iterate(iter::PANOCplusIteration{R}) where {R}
     x = copy(iter.x0)
     Ax = iter.A * x
     grad_f_Ax, f_Ax = gradient(iter.f, Ax)
@@ -91,7 +91,7 @@ function Base.iterate(iter::NOLIPIteration{R}) where {R}
     At_grad_f_Ax = iter.A' * grad_f_Ax
     y = x - gamma .* At_grad_f_Ax
     z, g_z = prox(iter.g, y, gamma)
-    state = NOLIPState(
+    state = PANOCplusState(
         x=x, Ax=Ax, f_Ax=f_Ax, grad_f_Ax=grad_f_Ax, At_grad_f_Ax=At_grad_f_Ax,
         gamma=gamma, y=y, z=z, g_z=g_z, res=x-z, H=initialize(iter.directions, x),
     )
@@ -106,19 +106,19 @@ function Base.iterate(iter::NOLIPIteration{R}) where {R}
     return state, state
 end
 
-set_next_direction!(::QuasiNewtonStyle, ::NOLIPIteration, state::NOLIPState) = mul!(state.d, state.H, -state.res)
-set_next_direction!(::NoAccelerationStyle, ::NOLIPIteration, state::NOLIPState) = state.d .= .-state.res
-set_next_direction!(iter::NOLIPIteration, state::NOLIPState) = set_next_direction!(acceleration_style(typeof(iter.directions)), iter, state)
+set_next_direction!(::QuasiNewtonStyle, ::PANOCplusIteration, state::PANOCplusState) = mul!(state.d, state.H, -state.res)
+set_next_direction!(::NoAccelerationStyle, ::PANOCplusIteration, state::PANOCplusState) = state.d .= .-state.res
+set_next_direction!(iter::PANOCplusIteration, state::PANOCplusState) = set_next_direction!(acceleration_style(typeof(iter.directions)), iter, state)
 
-update_direction_state!(::QuasiNewtonStyle, ::NOLIPIteration, state::NOLIPState) = update!(state.H, state.x - state.x_prev, state.res - state.res_prev)
-update_direction_state!(::NoAccelerationStyle, ::NOLIPIteration, state::NOLIPState) = return
-update_direction_state!(iter::NOLIPIteration, state::NOLIPState) = update_direction_state!(acceleration_style(typeof(iter.directions)), iter, state)
+update_direction_state!(::QuasiNewtonStyle, ::PANOCplusIteration, state::PANOCplusState) = update!(state.H, state.x - state.x_prev, state.res - state.res_prev)
+update_direction_state!(::NoAccelerationStyle, ::PANOCplusIteration, state::PANOCplusState) = return
+update_direction_state!(iter::PANOCplusIteration, state::PANOCplusState) = update_direction_state!(acceleration_style(typeof(iter.directions)), iter, state)
 
-reset_direction_state!(::QuasiNewtonStyle, ::NOLIPIteration, state::NOLIPState) = reset!(state.H)
-reset_direction_state!(::NoAccelerationStyle, ::NOLIPIteration, state::NOLIPState) = return
-reset_direction_state!(iter::NOLIPIteration, state::NOLIPState) = reset_direction_state!(acceleration_style(typeof(iter.directions)), iter, state)
+reset_direction_state!(::QuasiNewtonStyle, ::PANOCplusIteration, state::PANOCplusState) = reset!(state.H)
+reset_direction_state!(::NoAccelerationStyle, ::PANOCplusIteration, state::PANOCplusState) = return
+reset_direction_state!(iter::PANOCplusIteration, state::PANOCplusState) = reset_direction_state!(acceleration_style(typeof(iter.directions)), iter, state)
 
-function Base.iterate(iter::NOLIPIteration{R}, state::NOLIPState) where R
+function Base.iterate(iter::PANOCplusIteration{R}, state::PANOCplusState) where R
     f_Az, a, b, c = R(Inf), R(Inf), R(Inf), R(Inf)
 
     # store iterate and residual for metric update later on
@@ -192,7 +192,7 @@ end
 
 # Solver
 
-struct NOLIP{R, K}
+struct PANOCplus{R, K}
     maxit::Int
     tol::R
     verbose::Bool
@@ -200,8 +200,8 @@ struct NOLIP{R, K}
     kwargs::K
 end
 
-function (solver::NOLIP)(x0; kwargs...)
-    stop(state::NOLIPState) = norm(state.res, Inf) / state.gamma <= solver.tol
+function (solver::PANOCplus)(x0; kwargs...)
+    stop(state::PANOCplusState) = norm(state.res, Inf) / state.gamma <= solver.tol
     disp((it, state)) = @printf(
         "%5d | %.3e | %.3e | %.3e\n",
         it,
@@ -209,7 +209,7 @@ function (solver::NOLIP)(x0; kwargs...)
         norm(state.res, Inf) / state.gamma,
         state.tau,
     )
-    iter = NOLIPIteration(; x0=x0, solver.kwargs..., kwargs...)
+    iter = PANOCplusIteration(; x0=x0, solver.kwargs..., kwargs...)
     iter = take(halt(iter, stop), solver.maxit)
     iter = enumerate(iter)
     if solver.verbose
@@ -219,5 +219,5 @@ function (solver::NOLIP)(x0; kwargs...)
     return state_final.z, num_iters
 end
 
-NOLIP(; maxit=1_000, tol=1e-8, verbose=false, freq=10, kwargs...) =
-    NOLIP(maxit, tol, verbose, freq, kwargs)
+PANOCplus(; maxit=1_000, tol=1e-8, verbose=false, freq=10, kwargs...) =
+    PANOCplus(maxit, tol, verbose, freq, kwargs)
diff --git a/test/problems/test_equivalence.jl b/test/problems/test_equivalence.jl
index aa8a6ed..004d618 100644
--- a/test/problems/test_equivalence.jl
+++ b/test/problems/test_equivalence.jl
@@ -5,7 +5,7 @@ using ProximalOperators
 using ProximalAlgorithms:
     DouglasRachfordIteration, DRLSIteration,
     ForwardBackwardIteration, PANOCIteration,
-    NOLIPIteration,
+    PANOCplusIteration,
     NoAcceleration
 
 @testset "DR/DRLS equivalence ($T)" for T in [Float32, Float64]
@@ -64,7 +64,7 @@ end
     end
 end
 
-@testset "PANOC/NOLIP equivalence ($T)" for T in [Float32, Float64]
+@testset "PANOC/PANOCplus equivalence ($T)" for T in [Float32, Float64]
     A = T[
         1.0 -2.0 3.0 -4.0 5.0
         2.0 -1.0 0.0 -1.0 3.0
@@ -85,9 +85,9 @@ end
     x0 = zeros(R, n)
 
     panoc_iter = PANOCIteration(f=f, g=g, x0=x0, gamma=R(0.95) / opnorm(A)^2)
-    nolip_iter = NOLIPIteration(f=f, g=g, x0=x0, gamma=R(0.95) / opnorm(A)^2)
+    panocplus_iter = PANOCplusIteration(f=f, g=g, x0=x0, gamma=R(0.95) / opnorm(A)^2)
 
-    for (panoc_state, nolip_state) in Iterators.take(zip(panoc_iter, nolip_iter), 10)
-        @test isapprox(panoc_state.z, nolip_state.z)
+    for (panoc_state, panocplus_state) in Iterators.take(zip(panoc_iter, panocplus_iter), 10)
+        @test isapprox(panoc_state.z, panocplus_state.z)
     end
 end
diff --git a/test/problems/test_lasso_small.jl b/test/problems/test_lasso_small.jl
index c8a9aa1..311094a 100644
--- a/test/problems/test_lasso_small.jl
+++ b/test/problems/test_lasso_small.jl
@@ -124,10 +124,10 @@ using ProximalAlgorithms:
         @test x0 == x0_backup
     end
 
-    @testset "NOLIP (fixed step)" begin
+    @testset "PANOCplus (fixed step)" begin
         x0 = zeros(T, n)
         x0_backup = copy(x0)
-        solver = ProximalAlgorithms.NOLIP(tol = TOL)
+        solver = ProximalAlgorithms.PANOCplus(tol = TOL)
         x, it = @inferred solver(x0, f = f, A = A, g = g, Lf = Lf)
         @test eltype(x) == T
         @test norm(x - x_star, Inf) <= TOL
@@ -135,10 +135,10 @@ using ProximalAlgorithms:
         @test x0 == x0_backup
     end
 
-    @testset "NOLIP (adaptive step)" begin
+    @testset "PANOCplus (adaptive step)" begin
         x0 = zeros(T, n)
         x0_backup = copy(x0)
-        solver = ProximalAlgorithms.NOLIP(adaptive = true, tol = TOL)
+        solver = ProximalAlgorithms.PANOCplus(adaptive = true, tol = TOL)
         x, it = @inferred solver(x0, f = f, A = A, g = g)
         @test eltype(x) == T
         @test norm(x - x_star, Inf) <= TOL
diff --git a/test/problems/test_lasso_small_h_split.jl b/test/problems/test_lasso_small_h_split.jl
index 44be73d..8fc179a 100644
--- a/test/problems/test_lasso_small_h_split.jl
+++ b/test/problems/test_lasso_small_h_split.jl
@@ -142,24 +142,24 @@
 
     end
 
-    @testset "NOLIP" begin
+    @testset "PANOCplus" begin
 
-        # NOLIP/Nonadaptive
+        # PANOCplus/Nonadaptive
 
         x0 = ArrayPartition(zeros(T, n1), zeros(T, n2))
         x0_backup = copy(x0)
-        solver = ProximalAlgorithms.NOLIP(tol = TOL)
+        solver = ProximalAlgorithms.PANOCplus(tol = TOL)
         x, it = solver(x0, f = f, A = A, g = g, Lf = Lf)
         @test eltype(x) == T
         @test norm(x - x_star, Inf) <= TOL
         @test it < 20
         @test x0 == x0_backup
 
-        ## NOLIP/Adaptive
+        ## PANOCplus/Adaptive
 
         x0 = ArrayPartition(zeros(T, n1), zeros(T, n2))
         x0_backup = copy(x0)
-        solver = ProximalAlgorithms.NOLIP(adaptive = true, tol = TOL)
+        solver = ProximalAlgorithms.PANOCplus(adaptive = true, tol = TOL)
         x, it = solver(x0, f = f, A = A, g = g)
         @test eltype(x) == T
         @test norm(x - x_star, Inf) <= TOL
diff --git a/test/problems/test_lasso_small_strongly_convex.jl b/test/problems/test_lasso_small_strongly_convex.jl
index c1a3700..6de93a2 100644
--- a/test/problems/test_lasso_small_strongly_convex.jl
+++ b/test/problems/test_lasso_small_strongly_convex.jl
@@ -78,8 +78,8 @@ using ProximalAlgorithms
         @test x0 == x0_backup
     end
 
-    @testset "NOLIP" begin
-        solver = ProximalAlgorithms.NOLIP(tol = TOL)
+    @testset "PANOCplus" begin
+        solver = ProximalAlgorithms.PANOCplus(tol = TOL)
         y, it = solver(x0, f = f, g = h, Lf = Lf)
         @test eltype(y) == T
         @test norm(y - x_star, Inf) <= TOL
diff --git a/test/problems/test_lasso_small_v_split.jl b/test/problems/test_lasso_small_v_split.jl
index 55c0f2e..5569adc 100644
--- a/test/problems/test_lasso_small_v_split.jl
+++ b/test/problems/test_lasso_small_v_split.jl
@@ -135,24 +135,24 @@
 
     end
 
-    @testset "NOLIP" begin
+    @testset "PANOCplus" begin
 
-        # NOLIP/Nonadaptive
+        # PANOCplus/Nonadaptive
 
         x0 = zeros(T, n)
         x0_backup = copy(x0)
-        solver = ProximalAlgorithms.NOLIP(tol = TOL)
+        solver = ProximalAlgorithms.PANOCplus(tol = TOL)
         x, it = solver(x0, f = f, A = A, g = g, Lf = opnorm([A1; A2])^2)
         @test eltype(x) == T
         @test norm(x - x_star, Inf) <= TOL
         @test it < 20
         @test x0 == x0_backup
 
-        ## NOLIP/Adaptive
+        ## PANOCplus/Adaptive
 
         x0 = zeros(T, n)
         x0_backup = copy(x0)
-        solver = ProximalAlgorithms.NOLIP(adaptive = true, tol = TOL)
+        solver = ProximalAlgorithms.PANOCplus(adaptive = true, tol = TOL)
         x, it = solver(x0, f = f, A = A, g = g)
         @test eltype(x) == T
         @test norm(x - x_star, Inf) <= TOL
diff --git a/test/problems/test_nonconvex_qp.jl b/test/problems/test_nonconvex_qp.jl
index 4f7085a..13ac2b5 100644
--- a/test/problems/test_nonconvex_qp.jl
+++ b/test/problems/test_nonconvex_qp.jl
@@ -28,10 +28,10 @@ using Test
         @test x0 == x0_backup
     end
 
-    @testset "NOLIP" begin
+    @testset "PANOCplus" begin
         x0 = zeros(T, n)
         x0_backup = copy(x0)
-        solver = ProximalAlgorithms.NOLIP()
+        solver = ProximalAlgorithms.PANOCplus()
         x, it = solver(x0, f = f, g = g)
         z = min.(upp, max.(low, x .- gamma .* (Q * x + q)))
         @test norm(x - z, Inf) / gamma <= solver.tol
@@ -92,10 +92,10 @@ end
             @test x0 == x0_backup
         end
 
-        @testset "NOLIP" begin
+        @testset "PANOCplus" begin
             x0 = zeros(T, n)
             x0_backup = copy(x0)
-            solver = ProximalAlgorithms.NOLIP(tol = TOL)
+            solver = ProximalAlgorithms.PANOCplus(tol = TOL)
             x, it = solver(x0, f = f, g = g)
             z = min.(upp, max.(low, x .- gamma .* (Q * x + q)))
             @test norm(x - z, Inf) / gamma <= solver.tol
diff --git a/test/problems/test_sparse_logistic_small.jl b/test/problems/test_sparse_logistic_small.jl
index afd2d0d..976f6d3 100644
--- a/test/problems/test_sparse_logistic_small.jl
+++ b/test/problems/test_sparse_logistic_small.jl
@@ -68,11 +68,11 @@
     @test it < 50
     @test x0 == x0_backup
 
-    # NOLIP/Adaptive
+    # PANOCplus/Adaptive
 
     x0 = zeros(T, n)
     x0_backup = copy(x0)
-    solver = ProximalAlgorithms.NOLIP(adaptive = true, tol = TOL)
+    solver = ProximalAlgorithms.PANOCplus(adaptive = true, tol = TOL)
     x, it = solver(x0, f = f, A = A, g = g)
     @test eltype(x) == T
     @test norm(x - x_star, Inf) <= 1e-4
diff --git a/test/problems/test_verbose.jl b/test/problems/test_verbose.jl
index 27ffbbd..46f2b9f 100644
--- a/test/problems/test_verbose.jl
+++ b/test/problems/test_verbose.jl
@@ -120,21 +120,21 @@
 
     end
 
-    @testset "NOLIP" begin
+    @testset "PANOCplus" begin
 
-        # NOLIP/Nonadaptive
+        # PANOCplus/Nonadaptive
 
         x0 = zeros(T, n)
-        solver = ProximalAlgorithms.NOLIP(tol = TOL, verbose = true)
+        solver = ProximalAlgorithms.PANOCplus(tol = TOL, verbose = true)
         x, it = solver(x0, f = f, A = A, g = g, Lf = Lf)
         @test eltype(x) == T
         @test norm(x - x_star, Inf) <= TOL
         @test it < 20
 
-        ## NOLIP/Adaptive
+        ## PANOCplus/Adaptive
 
         x0 = zeros(T, n)
-        solver = ProximalAlgorithms.NOLIP(adaptive = true, tol = TOL, verbose = true)
+        solver = ProximalAlgorithms.PANOCplus(adaptive = true, tol = TOL, verbose = true)
         x, it = solver(x0, f = f, A = A, g = g)
         @test eltype(x) == T
         @test norm(x - x_star, Inf) <= TOL

From 765f12df615d02d1a4368bf694aedc3c4ba77d3c Mon Sep 17 00:00:00 2001
From: Alberto De Marchi <albertodm92@libero.it>
Date: Wed, 5 Jan 2022 17:19:12 +0100
Subject: [PATCH 8/8] changed filename NOLIP to PANOCplus

---
 README.md                                 | 2 +-
 src/ProximalAlgorithms.jl                 | 2 +-
 src/algorithms/{nolip.jl => panocplus.jl} | 0
 3 files changed, 2 insertions(+), 2 deletions(-)
 rename src/algorithms/{nolip.jl => panocplus.jl} (100%)

diff --git a/README.md b/README.md
index 581cd46..7b40464 100644
--- a/README.md
+++ b/README.md
@@ -31,7 +31,7 @@ Vũ-Condat primal-dual algorithm[^chambolle_2011][^vu_2013][^condat_2013] | [`Vu
 Davis-Yin splitting[^davis_2017] | [`DavisYin`](src/algorithms/davis_yin.jl)
 Asymmetric forward-backward-adjoint splitting[^latafat_2017] | [`AFBA`](src/algorithms/primal_dual.jl)
 PANOC (L-BFGS)[^stella_2017] | [`PANOC`](src/algorithms/panoc.jl)
-PANOC+ (L-BFGS)[^demarchi_2021] | [`PANOCplus`](src/algorithms/nolip.jl)
+PANOC+ (L-BFGS)[^demarchi_2021] | [`PANOCplus`](src/algorithms/panocplus.jl)
 ZeroFPR (L-BFGS)[^themelis_2018] | [`ZeroFPR`](src/algorithms/zerofpr.jl)
 Douglas-Rachford line-search (L-BFGS)[^themelis_2020] | [`DRLS`](src/algorithms/drls.jl)
 
diff --git a/src/ProximalAlgorithms.jl b/src/ProximalAlgorithms.jl
index a017396..03647ec 100644
--- a/src/ProximalAlgorithms.jl
+++ b/src/ProximalAlgorithms.jl
@@ -33,6 +33,6 @@ include("algorithms/primal_dual.jl")
 include("algorithms/davis_yin.jl")
 include("algorithms/li_lin.jl")
 include("algorithms/fista.jl")
-include("algorithms/nolip.jl")
+include("algorithms/panocplus.jl")
 
 end # module
diff --git a/src/algorithms/nolip.jl b/src/algorithms/panocplus.jl
similarity index 100%
rename from src/algorithms/nolip.jl
rename to src/algorithms/panocplus.jl