From 1b127cab48b22ffa99e7e16faa04fa8a7e269980 Mon Sep 17 00:00:00 2001
From: Holger Kohr <h.kohr@cwi.nl>
Date: Sat, 21 Oct 2017 14:15:11 +0200
Subject: [PATCH 1/6] ENH: add nonlinear version of ADMM, closes #1201

---
 odl/solvers/nonsmooth/admm.py | 225 ++++++++++++++++++++++++++++++++--
 1 file changed, 217 insertions(+), 8 deletions(-)

diff --git a/odl/solvers/nonsmooth/admm.py b/odl/solvers/nonsmooth/admm.py
index 6d1c5343851..44c7ea0863d 100644
--- a/odl/solvers/nonsmooth/admm.py
+++ b/odl/solvers/nonsmooth/admm.py
@@ -1,12 +1,10 @@
 """Alternating Direction method of Multipliers (ADMM) method variants."""
 
 from __future__ import division
-from builtins import range
+from odl.operator import Operator, OpDomainError, power_method_opnorm
 
-from odl.operator import Operator, OpDomainError
 
-
-__all__ = ('admm_linearized',)
+__all__ = ('admm_linearized', 'admm_precon_nonlinear')
 
 
 def admm_linearized(x, f, g, L, tau, sigma, niter, **kwargs):
@@ -30,9 +28,9 @@ def admm_linearized(x, f, g, L, tau, sigma, niter, **kwargs):
         The functions ``f`` and ``g`` in the problem definition. They
         need to implement the ``proximal`` method.
     L : linear `Operator`
-        The linear operator that is composed with ``g`` in the problem
-        definition. It must fulfill ``L.domain == f.domain`` and
-        ``L.range == g.domain``.
+        Linear operator composed with ``g`` in the problem definition.
+        It must implement ``L.adjoint`` and fulfill ``L.domain == f.domain``
+        and ``L.range == g.domain``.
     tau, sigma : positive float
         Step size parameters for the update of the variables.
     niter : non-negative int
@@ -73,7 +71,9 @@ def admm_linearized(x, f, g, L, tau, sigma, niter, **kwargs):
     minimization subproblem for the :math:`x` variable, this variant
     uses a linearization of a quadratic term in the augmented Lagrangian
     of the generic ADMM, in order to make the step expressible with
-    the proximal operator of :math:`f`.
+    the proximal operator of :math:`f`. See
+    `[PB2014] <http://web.stanford.edu/~boyd/papers/prox_algs.html>`_
+    for details.
 
     Another name for this algorithm is *split inexact Uzawa method*.
 
@@ -161,3 +161,212 @@ def admm_linearized_simple(x, f, g, L, tau, sigma, niter, **kwargs):
         u = L(x) + u - z
         if callback is not None:
             callback(x)
+
+
+def admm_precon_nonlinear(x, f, g, L, delta, niter, sigma=None, **kwargs):
+    """Preconditioned nonlinear ADMM.
+
+    ADMM stands for "Alternating Direction Method of Multipliers" and
+    is a popular convex optimization method. This variant solves problems
+    of the form ::
+
+        min_x [ f(x) + g(L(x)) ]
+
+    with convex ``f`` and ``g``, and a (possibly nonlinear) operator ``L``.
+    See `[BKS+2015] <https://doi.org/10.1007/978-3-319-55795-3_10>`_
+    and the Notes for more mathematical details.
+
+    Parameters
+    ----------
+    x : ``L.domain`` element
+        Starting point, updated in-place.
+    f, g : `Functional`
+        The functions ``f`` and ``g`` in the problem definition. They
+        need to implement the ``proximal`` method and satisfy
+        ``f.domain == L.domain`` and ``g.domain == L.range``,
+        respectively.
+    L : `Operator`
+        Possibly nonlinear operator composed with ``g`` in the problem
+        definition. It must implement ``L.derivative(x).adjoint`` and
+        fulfill ``L.domain == f.domain`` and ``L.range == g.domain``.
+    delta : positive float
+        Step size parameter for the update of the constraint variable.
+    sigma : positive float, optional
+        Step size parameter for ``g.proximal``.
+        Default: ``0.5 / delta``.
+    niter : non-negative int
+        Number of iterations.
+
+    Other Parameters
+    ----------------
+    opnorm_maxiter : nonnegative int
+        Maximum number of iterations to be used for the operator norm
+        estimation in each step of the optimization loop.
+        Default: 2
+    opnorm_factor : float between 0 and 1
+        Multiply this factor with the upper bound for the ``tau`` step
+        size determination, see Notes. Smaller values can be used to
+        compensate for a bad operator norm estimate caused by a small
+        ``opnorm_maxiter``.
+        Default: 0.1
+    callback : callable, optional
+        Function called with the current iterate after each iteration.
+
+    Notes
+    -----
+    The preconditioned nonlinear ADMM solves the problem
+
+    .. math::
+        \min_{x} \\left[ f(x) + g\\big(L(x)\\big) \\right].
+
+    It starts with initial values :math:`x^{(0)}` as given,
+    :math:`y^{(0)} = 0` and :math:`\mu^{(0)} = \overline{\mu}^{(0)} = 0`,
+    and applies the following iteration:
+
+    .. math::
+        A^{(k)} &= \partial L(x^{(k)}),
+
+        \\text{choose } \\tau^{(k)} &< \\frac{1}{\delta \|A^{(k)}\|^2}
+
+        x^{(k+1)} &= \mathrm{prox}_{\\tau^{(k)} f} \\left[
+            x^{(k)} - \\tau^{(k)} \\big(A^{(k)}\\big)^*\, \overline{\mu}^{(k)}
+        \\right]
+
+        y^{(k+1)} &= \mathrm{prox}_{\sigma g}\\left[
+            y^{(k)} + \sigma \\Big(
+                \mu^{(k)} + \delta \\big(L(x^{(k+1)} - y^{(k)}\\big)
+            \\Big)
+        \\right]
+
+        \mu^{(k+1)} &= \mu^{(k)} + \delta \\big(L(x^{(k+1)}) - y^{(k+1)}\\big)
+
+        \overline{\mu}^{(k+1)} &= 2\mu^{(k+1)} - \mu^{(k)}
+
+    For (local) convergence it is required that :math:`\sigma < 1 / \delta`.
+
+    References
+    ----------
+    [BKS+2015] Benning, M, Knoll, F, Schönlieb, C-B., and Valkonen, T.
+    *Preconditioned ADMM with Nonlinear Operator Constraint*.
+    System Modeling and Optimization, 2015, pp. 117–126.
+    """
+    if not isinstance(L, Operator):
+        raise TypeError('`op` {!r} is not an `Operator` instance'
+                        ''.format(L))
+
+    if x not in L.domain:
+        raise OpDomainError('`x` {!r} is not in the domain of `op` {!r}'
+                            ''.format(x, L.domain))
+
+    delta, delta_in = float(delta), delta
+    if delta <= 0:
+        raise ValueError('`delta` must be positive, got {}'.format(delta_in))
+
+    niter, niter_in = int(niter), niter
+    if niter < 0 or niter != niter_in:
+        raise ValueError('`niter` must be a non-negative integer, got {}'
+                         ''.format(niter_in))
+
+    if sigma is None:
+        sigma = 0.5 / delta
+    else:
+        sigma, sigma_in = float(sigma), sigma
+        if not 0 < sigma < 1 / delta:
+            raise ValueError(
+                '`sigma` must lie strictly between 0 and 1/delta`, got '
+                'sigma={:.4} and (1/delta)={:.4}'.format(sigma_in, 1 / delta))
+
+    opnorm_maxiter = kwargs.pop('opnorm_maxiter', 2)
+    opnorm_factor = kwargs.pop('opnorm_factor', 0.1)
+    opnorm_factor, opnorm_factor_in = float(opnorm_factor), opnorm_factor
+    if not 0 < opnorm_factor < 1:
+        raise ValueError('`opnorm_factor` must lie strictly between 0 and 1, '
+                         'got {}'.format(opnorm_factor_in))
+
+    callback = kwargs.pop('callback', None)
+    if callback is not None and not callable(callback):
+        raise TypeError('`callback` {} is not callable'.format(callback))
+
+    # Initialize range variables (quantities in [] are zero initially)
+    y = L.range.zero()
+    # mu = [mu_old +] delta * (L(x) [- y])
+    mu = delta * L(x)
+    # mubar = 2 * mu [- mu_old]
+    mubar = 2 * mu
+
+    # Temporary for Lx
+    tmp_ran = L.range.element()
+    # Temporary for L^*(mubar)
+    tmp_dom = L.domain.element()
+
+    # Store constant g proximal since it may involve computation
+    g_prox_sigma = g.proximal(sigma)
+
+    for i in range(niter):
+        # tau^k <- opnorm_factor / (delta * ||dL(x^k)||^2)
+        A = L.derivative(x)
+        xstart = A.domain.one() if i == 0 else x
+        A_norm = power_method_opnorm(A, xstart, maxiter=opnorm_maxiter)
+        tau = opnorm_factor / (delta * A_norm ** 2)
+
+        # x^(k+1) <- prox[tau^k*f](x^k - tau^k * A^*(mubar^k))
+        A.adjoint(mubar, out=tmp_dom)
+        x.lincomb(1, x, -tau, tmp_dom)
+        f.proximal(tau)(x, out=x)
+
+        # y^(k+1) <- prox[sigma*g]((1 - sigma * delta) * y^k +
+        #                          sigma * (mu^k + delta * L(x^(k+1))))
+        L(x, out=tmp_ran)
+        y.lincomb(1 - sigma * delta, y, sigma * delta, tmp_ran)
+        y.lincomb(1, y, sigma, mu)
+        g_prox_sigma(y, out=y)
+
+        # mu^(k+1) <- mu^k + delta * (L(x^(k+1)) - y^(k+1))
+        # tmp_ran still holds L(x^(k+1))
+        # Using mubar as temporary
+        mubar.assign(mu)
+        mu.lincomb(1, mu, delta, tmp_ran)
+        mu.lincomb(1, mu, -delta, y)
+
+        # mubar^(k+1) = 2 * mu^(k+1) - mu^k
+        mubar.lincomb(2, mu, -1, mubar)
+
+        if callback is not None:
+            callback(x)
+
+
+def admm_precon_nonlinear_simple(x, f, g, L, delta, niter, sigma=None,
+                                 **kwargs):
+    """Non-optimized version of ``admm_precon_nonlinear``.
+
+    This function is intended for debugging. It makes a lot of copies and
+    performs no error checking.
+    """
+    if sigma is None:
+        sigma = 0.5 / delta
+
+    opnorm_maxiter = kwargs.pop('opnorm_maxiter', 2)
+    opnorm_factor = kwargs.pop('opnorm_factor', 0.1)
+    callback = kwargs.pop('callback', None)
+
+    # Initialize range variables (quantities in [] are zero initially)
+    y = L.range.zero()
+    # mu = [mu_old +] delta * (L(x) [- y])
+    mu = delta * L(x)
+    # mubar = 2 * mu [- mu_old]
+    mubar = 2 * mu
+
+    for i in range(niter):
+        A = L.derivative(x)
+        xstart = A.domain.one() if i == 0 else x
+        A_norm = power_method_opnorm(A, xstart, maxiter=opnorm_maxiter)
+        tau = opnorm_factor / (delta * A_norm ** 2)
+        x[:] = f.proximal(tau)(x - tau * A.adjoint(mubar))
+        y = g.proximal(sigma)((1 - sigma * delta) * y +
+                              sigma * (mu + delta * L(x)))
+        mu_old = mu
+        mu = mu + delta * (L(x) - y)
+        mubar = 2 * mu - mu_old
+
+        if callback is not None:
+            callback(x)

From d4905b2bceca66af32ac6d504e07a901c143fd1f Mon Sep 17 00:00:00 2001
From: Holger Kohr <h.kohr@cwi.nl>
Date: Sat, 21 Oct 2017 17:10:28 +0200
Subject: [PATCH 2/6] ENH: add ADMM spectral CT example

---
 examples/solvers/admm_spectral_tomography.py | 169 +++++++++++++++++++
 1 file changed, 169 insertions(+)
 create mode 100644 examples/solvers/admm_spectral_tomography.py

diff --git a/examples/solvers/admm_spectral_tomography.py b/examples/solvers/admm_spectral_tomography.py
new file mode 100644
index 00000000000..b5f9e961349
--- /dev/null
+++ b/examples/solvers/admm_spectral_tomography.py
@@ -0,0 +1,169 @@
+"""Total variation spectral tomography using preconditioned nonlinear ADMM.
+
+In this example we solve the optimization problem
+
+    min_x  ||A(x) - y||_2^2 + lam * ||grad(x)||_{nuc, 1, 2}
+
+where ``||.||_{nuc, 1, 2}`` is the nuclear L1-L2 norm and ``A`` a 2 x 4
+operator matrix
+
+        (c_1 * exp(-R)  c_2 * exp(-R)                              )
+    A = (                              c_3 * exp(-R)  c_4 * exp(-R))
+
+with constants ``c_i`` and the parallel beam ray transform ``R``. In other
+words, ``A`` acts on an input ``x`` with 4 components as follows:
+
+           (c_1 * exp(-R(x_1)) + c_2 * exp(-R(x_2)))
+    A(x) = (c_3 * exp(-R(x_3)) + c_4 * exp(-R(x_4)))
+
+This reflects a simplified model for 2D spectral CT with 4 (discrete) beam
+energies and 2 detector energy bins.
+
+The problem is rewritten in decoupled form as
+
+    min_x g(L(x))
+
+with a separable sum ``g`` of functionals and the stacked operator ``L``:
+
+    g(z) = ||z_1 - g||_2^2 + lam * ||z_2||_1,
+
+               ( A(x)    )
+    z = L(x) = ( grad(x) ).
+
+See the documentation of the `admm_precon_nonlinear` solver for further
+details.
+Note that this problem is underdetermined as a simple count of input
+channels vs. data channels shows. This can be changed in the code below
+(along with ``souce_spectrum`` and ``energy_factors``).
+"""
+
+import numpy as np
+import odl
+
+# --- Set up the forward operator --- #
+
+# Space of a monochromatic phantom (volume)
+single_energy_space = odl.uniform_discr(
+    min_pt=[-20, -20], max_pt=[20, 20], shape=[100, 100], dtype='float32')
+reco_space = single_energy_space ** 2
+
+# Create a parallel beam geometry with flat detector, using 180 angles
+geometry = odl.tomo.parallel_beam_geometry(single_energy_space, num_angles=180)
+
+# Define number of energy bins on both sides as well as the source spectrum
+num_bins_reco = 4
+num_bins_data = 2
+assert num_bins_reco >= num_bins_data
+assert num_bins_reco % num_bins_data == 0
+group_size = num_bins_reco // num_bins_data
+# Note: the spectrum (i.e. intensity per channel) is additive on the data
+# side, i.e., adding more entries adds to the total signal in the data bins
+source_spectrum = [8e4, 1e5, 8e4, 6e4]
+assert len(source_spectrum) == num_bins_reco
+
+# Create the forward operator
+ray_trafo = odl.tomo.RayTransform(single_energy_space, geometry,
+                                  impl='astra_cpu')
+exp_op = odl.ufunc_ops.exp(ray_trafo.range)
+op_mat = []
+for data_bin in range(num_bins_data):
+    row = [0] * num_bins_reco
+    for j in range(data_bin * group_size, (data_bin + 1) * group_size):
+        row[j] = source_spectrum[j] * exp_op * (-ray_trafo)
+    op_mat.append(row)
+
+fwd_op = odl.ProductSpaceOperator(op_mat)
+
+
+# Small helper to visualize the forward operator
+def show_fwd_op(num_bins_reco, num_bins_data):
+    entry_len = 13
+    dtype = 'U' + str(entry_len)
+    mat = np.empty((num_bins_data, num_bins_reco), dtype=dtype)
+    mat[:] = ' ' * entry_len
+    for i in range(mat.shape[0]):
+        for j in range(i * group_size, (i + 1) * group_size):
+            mat[i, j] = 'c_{} * exp(-R)'.format(j)
+    print(mat)
+
+
+print('Forward operator (R = ray transform, c_i = constants):')
+show_fwd_op(num_bins_reco, num_bins_data)
+
+# --- Generate artificial data --- #
+
+# Create multispectral phantom, very simple: C_energy * single_energy_phantom
+energy_factors = [1e-2, 8e-3, 6e-3, 1e-3]
+assert len(energy_factors) == num_bins_reco
+single_energy_phantom = odl.phantom.shepp_logan(single_energy_space,
+                                                modified=True)
+phantom = [fac * single_energy_phantom for fac in energy_factors]
+
+# Simulate noiseless (expected) projections and photon counts
+phantom = fwd_op.domain.element(phantom)
+meas_intens = fwd_op(phantom)
+counts = odl.phantom.poisson_noise(meas_intens, seed=123)
+
+# --- Formulate problem in log space (more stable) --- #
+
+# The new opertor is F_i = -log(A_i / expected_counts[i]), where
+# the expected counts are the sums of counts over the groups
+expected_bin_counts = []
+for i in range(num_bins_data):
+    expected_bin_counts.append(
+        sum(source_spectrum[i * group_size: (i + 1) * group_size]))
+
+scaling_ops = [odl.ScalingOperator(fwd_op.range[i], 1 / expected_bin_counts[i])
+               for i in range(num_bins_data)]
+scaling = odl.DiagonalOperator(*scaling_ops)
+log_fwd_op = -odl.ufunc_ops.log(fwd_op.range) * scaling * fwd_op
+
+# Transform the simulated counts accordingly
+log_counts = fwd_op.range.element()
+for i in range(num_bins_data):
+    log_counts[i] = -np.log(counts[i] / expected_bin_counts[i])
+
+# --- Set up the inverse problem --- #
+
+# Gradient operator for the TV part (one per reco_space component)
+grad = odl.DiagonalOperator(odl.Gradient(single_energy_space), num_bins_reco)
+
+# Stacking of the two operators
+L = odl.BroadcastOperator(log_fwd_op, grad)
+
+# Data matching functional
+data_fit = odl.solvers.L2NormSquared(log_fwd_op.range).translated(log_counts)
+# Regularization functional, we use the Nuclear L1-L2 norm to couple the
+# energy channels
+reg_func = 0.08 * odl.solvers.NuclearNorm(grad.range)
+g = odl.solvers.SeparableSum(data_fit, reg_func)
+
+# Enforce a value range for the reco to avoid underflow of exp(-x)
+f = odl.solvers.IndicatorBox(L.domain, 0, 1e-2)
+
+# --- Select parameters and solve using ADMM --- #
+
+niter = 200  # Number of iterations
+delta = 1.0  # Step size for the constraint update
+
+# Optionally pass a callback to the solver to display intermediate results
+callback = (odl.solvers.CallbackPrintIteration(step=5) &
+            odl.solvers.CallbackShow(step=5))
+
+# Choose a starting point, the FBP reconstruction from the first bin
+fbp_op = odl.tomo.fbp_op(ray_trafo, filter_type='Hann', frequency_scaling=0.9)
+fbp = fbp_op(log_counts[0])
+x = []
+for _ in range(num_bins_reco):
+    x.append(fbp.copy())
+x = L.domain.element(x)
+
+# Run the algorithm
+odl.solvers.nonsmooth.admm.admm_precon_nonlinear(
+    x, f, g, L, delta, niter, opnorm_factor=0.5, callback=callback)
+
+# Display images
+phantom.show(title='Phantom')
+counts.show(title='Simulated photon counts (Sinogram)')
+fbp.show('Monochromatic FBP reconstruction (bin 0)')
+x.show(title='TV reconstruction', force_show=True)

From 01ec77b4e8b3f19354ff2b9a8f3023018d7678c8 Mon Sep 17 00:00:00 2001
From: Holger Kohr <h.kohr@cwi.nl>
Date: Sat, 21 Oct 2017 21:45:34 +0200
Subject: [PATCH 3/6] MAINT: add license header to admm.py and fix unicode
 issue

---
 odl/solvers/nonsmooth/admm.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/odl/solvers/nonsmooth/admm.py b/odl/solvers/nonsmooth/admm.py
index 44c7ea0863d..ed21f7acab0 100644
--- a/odl/solvers/nonsmooth/admm.py
+++ b/odl/solvers/nonsmooth/admm.py
@@ -1,3 +1,12 @@
+# coding: utf-8
+# Copyright 2014-2017 The ODL contributors
+#
+# This file is part of ODL.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public License,
+# v. 2.0. If a copy of the MPL was not distributed with this file, You can
+# obtain one at https://mozilla.org/MPL/2.0/.
+
 """Alternating Direction method of Multipliers (ADMM) method variants."""
 
 from __future__ import division

From 74c6a0583ef276398fb2b0b9626918b01ee7cb6d Mon Sep 17 00:00:00 2001
From: Holger Kohr <h.kohr@cwi.nl>
Date: Sat, 21 Oct 2017 21:57:30 +0200
Subject: [PATCH 4/6] TST: add small unit test for nonlinear ADMM solver

---
 odl/test/solvers/nonsmooth/admm_test.py | 30 ++++++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

diff --git a/odl/test/solvers/nonsmooth/admm_test.py b/odl/test/solvers/nonsmooth/admm_test.py
index f7b37b401b9..5dc29e2ce55 100644
--- a/odl/test/solvers/nonsmooth/admm_test.py
+++ b/odl/test/solvers/nonsmooth/admm_test.py
@@ -10,7 +10,7 @@
 
 from __future__ import division
 import odl
-from odl.solvers import admm_linearized, Callback
+from odl.solvers import admm_linearized, admm_precon_nonlinear, Callback
 
 from odl.util.testutils import all_almost_equal, noise_element
 
@@ -72,5 +72,33 @@ def test_admm_lin_l1():
     assert all_almost_equal(x, data_1, places=2)
 
 
+def test_admm_nonlin_affine_l1():
+    """Verify that the correct value is returned for l1 dist optimization.
+
+    Solves the optimization problem
+
+        min_x ||L(x) - y_1||_1 + 0.5 ||L(x) - y_2||_1
+
+    with ``L(x) = x + 1``, which has optimum value ``x_1 = y_1 - 1`` since the
+    first term dominates.
+    """
+    space = odl.rn(5)
+
+    L = odl.IdentityOperator(space) + space.one()
+
+    x1 = odl.util.testutils.noise_element(space)
+    x2 = odl.util.testutils.noise_element(space)
+    y1 = L(x1)
+    y2 = L(x2)
+
+    f = odl.solvers.L1Norm(space).translated(y1)
+    g = 0.5 * odl.solvers.L1Norm(space).translated(y2)
+
+    x = space.zero()
+    admm_precon_nonlinear(x, f, g, L, delta=1.0, niter=10)
+
+    assert all_almost_equal(x, x, places=2)
+
+
 if __name__ == '__main__':
     odl.util.test_file(__file__)

From 5ed19cc664e11c69d0211e737527ece2549c768d Mon Sep 17 00:00:00 2001
From: Holger Kohr <h.kohr@cwi.nl>
Date: Sun, 22 Oct 2017 00:33:24 +0200
Subject: [PATCH 5/6] ENH: simplify and optimize some proximal operators

---
 odl/solvers/functional/default_functionals.py |  18 +-
 odl/solvers/nonsmooth/proximal_operators.py   | 199 ++++++++++++------
 .../nonsmooth/proximal_operator_test.py       |   1 -
 3 files changed, 135 insertions(+), 83 deletions(-)

diff --git a/odl/solvers/functional/default_functionals.py b/odl/solvers/functional/default_functionals.py
index 93400e362c9..f1dbdb518f5 100644
--- a/odl/solvers/functional/default_functionals.py
+++ b/odl/solvers/functional/default_functionals.py
@@ -864,20 +864,10 @@ def __init__(self, space, lower=None, upper=None):
 
     def _call(self, x):
         """Apply the functional to the given point."""
-        # Compute the projection of x onto the box, if this is equal to x we
-        # know x is inside the box.
-        tmp = self.domain.element()
-        if self.lower is not None and self.upper is None:
-            x.ufuncs.maximum(self.lower, out=tmp)
-        elif self.lower is None and self.upper is not None:
-            x.ufuncs.minimum(self.upper, out=tmp)
-        elif self.lower is not None and self.upper is not None:
-            x.ufuncs.maximum(self.lower, out=tmp)
-            tmp.ufuncs.minimum(self.upper, out=tmp)
-        else:
-            tmp.assign(x)
-
-        return np.inf if x.dist(tmp) > 0 else 0
+        # Since the proximal projects onto our feasible set we can simply
+        # check if it changes anything
+        proj = self.proximal(1)(x)
+        return np.inf if x.dist(proj) > 0 else 0
 
     @property
     def proximal(self):
diff --git a/odl/solvers/nonsmooth/proximal_operators.py b/odl/solvers/nonsmooth/proximal_operators.py
index 590be38cd50..9bebe7d0daf 100644
--- a/odl/solvers/nonsmooth/proximal_operators.py
+++ b/odl/solvers/nonsmooth/proximal_operators.py
@@ -24,8 +24,9 @@
 from __future__ import print_function, division, absolute_import
 import numpy as np
 
-from odl.operator import (Operator, IdentityOperator, ScalingOperator,
-                          ConstantOperator, DiagonalOperator, PointwiseNorm)
+from odl.operator import (
+    Operator, IdentityOperator, ScalingOperator, ConstantOperator,
+    DiagonalOperator, PointwiseNorm)
 from odl.space import ProductSpace
 from odl.set import LinearSpaceElement
 from odl.util import cache_arguments
@@ -201,7 +202,6 @@ def translation_prox_factory(sigma):
             The proximal operator of ``s * F( . - y)`` where ``s`` is the
             step size
         """
-
         return (ConstantOperator(y) + prox_factory(sigma) *
                 (IdentityOperator(y.space) - ConstantOperator(y)))
 
@@ -478,7 +478,6 @@ def identity_factory(sigma):
             The proximal operator instance of G = 0 which is the
             identity operator
         """
-
         return IdentityOperator(space)
 
     return identity_factory
@@ -571,7 +570,6 @@ def __init__(self, sigma):
 
         def _call(self, x, out):
             """Apply the operator to ``x`` and store the result in ``out``."""
-
             if lower is not None and upper is None:
                 x.ufuncs.maximum(lower, out=out)
             elif lower is None and upper is not None:
@@ -605,7 +603,6 @@ def proximal_nonnegativity(space):
     --------
     proximal_box_constraint
     """
-
     return proximal_box_constraint(space, lower=0)
 
 
@@ -747,7 +744,6 @@ def __init__(self, sigma):
 
         def _call(self, x, out):
             """Apply the operator to ``x`` and stores the result in ``out``."""
-
             dtype = getattr(self.domain, 'dtype', float)
             eps = np.finfo(dtype).resolution * 10
 
@@ -850,10 +846,8 @@ def __init__(self, sigma):
             self.sigma = float(sigma)
 
         def _call(self, x, out):
-            """Apply the operator to ``x`` and stores the result in ``out``"""
-
+            """Apply the operator to ``x`` and store the result in ``out``"""
             # (x - sig*g) / (1 + sig/(2 lam))
-
             sig = self.sigma
             if g is None:
                 out.lincomb(1.0 / (1 + 0.5 * sig / lam), x)
@@ -908,9 +902,34 @@ def proximal_l2_squared(space, lam=1, g=None):
     proximal_l2 : proximal without square
     proximal_convex_conj_l2_squared : proximal for convex conjugate
     """
-    # TODO: optimize
-    prox_cc_l2_squared = proximal_convex_conj_l2_squared(space, lam=lam, g=g)
-    return proximal_convex_conj(prox_cc_l2_squared)
+
+    class ProximalL2Squared(Operator):
+
+        """Proximal operator of the squared l2-norm/dist."""
+
+        def __init__(self, sigma):
+            """Initialize a new instance.
+
+            Parameters
+            ----------
+            sigma : positive float
+                Step size parameter
+            """
+            super(ProximalL2Squared, self).__init__(
+                domain=space, range=space, linear=g is None)
+            self.sigma = float(sigma)
+
+        def _call(self, x, out):
+            """Apply the operator to ``x`` and store the result in ``out``"""
+            # (x + 2*sig*lam*g) / (1 + 2*sig*lam))
+            sig = self.sigma
+            if g is None:
+                out.lincomb(1.0 / (1 + 2 * sig * lam), x)
+            else:
+                out.lincomb(1.0 / (1 + 2 * sig * lam), x,
+                            2 * sig * lam / (1 + 2 * sig * lam), g)
+
+    return ProximalL2Squared
 
 
 def proximal_convex_conj_l1(space, lam=1, g=None, isotropic=False):
@@ -1027,11 +1046,12 @@ def __init__(self, sigma):
 
         def _call(self, x, out):
             """Apply the operator to ``x`` and store the result in ``out``."""
+            # lam * (x - sig * g) / max(lam, |x - sig * g|)
 
-            # lam * (x - sigma * g) / max(lam, |x - sigma * g|)
-
+            # diff = x - sig * g
             if g is not None:
-                diff = x - self.sigma * g
+                diff = self.domain.element()
+                diff.lincomb(1, x, -self.sigma, g)
             else:
                 if x is out:
                     # Handle aliased data properly
@@ -1040,36 +1060,23 @@ def _call(self, x, out):
                     diff = x
 
             if isotropic:
-                # Calculate |x| = pointwise 2-norm of x
-
-                tmp = diff[0] ** 2
-                sq_tmp = x[0].space.element()
-                for x_i in diff[1:]:
-                    x_i.multiply(x_i, out=sq_tmp)
-                    tmp += sq_tmp
-                tmp.ufuncs.sqrt(out=tmp)
-
-                # Pointwise maximum of |x| and lambda
-                tmp.ufuncs.maximum(lam, out=tmp)
-
-                # Global scaling
-                tmp /= lam
+                # denom = max( |x-sig*g|_2, lam ) / lam  (|.|_2 pointwise)
+                pwnorm = PointwiseNorm(self.domain, exponent=2)
+                denom = pwnorm(diff)
+                denom.ufuncs.maximum(lam, out=denom)
+                denom /= lam
 
                 # Pointwise division
-                for out_i, x_i in zip(out, diff):
-                    x_i.divide(tmp, out=out_i)
+                for out_i, diff_i in zip(out, diff):
+                    diff_i.divide(denom, out=out_i)
 
             else:
-                # Calculate |x| = pointwise 2-norm of x
+                # out = max( |x-sig*g|, lam ) / lam
                 diff.ufuncs.absolute(out=out)
-
-                # Pointwise maximum of |x| and lambda
                 out.ufuncs.maximum(lam, out=out)
-
-                # Global scaling
                 out /= lam
 
-                # Pointwise division
+                # out = diff / ...
                 diff.divide(out, out=out)
 
     return ProximalConvexConjL1
@@ -1111,27 +1118,32 @@ def proximal_l1(space, lam=1, g=None, isotropic=False):
         F(x) = \\lambda \|x - g\|_1.
 
     For a step size :math:`\\sigma`, the proximal operator of :math:`\\sigma F`
-    is
+    is the "soft-shrinkage" operator
 
     .. math::
-        \mathrm{prox}_{\\sigma F}(y) = \\begin{cases}
-        y - \\sigma \\lambda
-        & \\text{if } y > g + \\sigma \\lambda, \\\\
-        0
-        & \\text{if } g - \\sigma \\lambda \\leq y \\leq g +
-        \\sigma \\lambda \\\\
-        y + \\sigma \\lambda
-        & \\text{if } y < g - \\sigma \\lambda,
+        \mathrm{prox}_{\\sigma F}(x) =
+        \\begin{cases}
+            g, & \\text{where } |x - g| \\leq \sigma\\lambda, \\\\
+            x - \sigma\\lambda \mathrm{sign}(x - g), & \\text{elsewhere.}
         \\end{cases}
 
+    Here, all operations are to be read pointwise.
+
     An alternative formulation is available for `ProductSpace`'s, where the
-    the ``isotropic`` parameter can be used, giving
+    the ``isotropic`` parameter can be used, i.e.,
 
     .. math::
-        F(x) = \\lambda \| \|x - g\|_2 \|_1
+        F(x) = \\lambda \| |x - g|_2 \|_1
 
-    The proximal can be calculated using the Moreau equality (also known as
-    Moreau decomposition or Moreau identity). See for example [BC2011].
+    with the pointwise Euclidean norm :math:`|\cdot|_2`. For this case, one
+    gets
+
+    .. math::
+        \mathrm{prox}_{\\sigma F}(x) =
+        \\begin{cases}
+            g, & \\text{where } |x - g|_2 \\leq \sigma\\lambda, \\\\
+            x - \sigma\\lambda \\frac{x - g}{|x - g|_2}, & \\text{elsewhere.}
+        \\end{cases}
 
     See Also
     --------
@@ -1142,10 +1154,68 @@ def proximal_l1(space, lam=1, g=None, isotropic=False):
     [BC2011] Bauschke, H H, and Combettes, P L. *Convex analysis and
     monotone operator theory in Hilbert spaces*. Springer, 2011.
     """
-    # TODO: optimize
-    prox_cc_l1 = proximal_convex_conj_l1(space, lam=lam, g=g,
-                                         isotropic=isotropic)
-    return proximal_convex_conj(prox_cc_l1)
+    lam = float(lam)
+
+    if g is not None and g not in space:
+        raise TypeError('{!r} is not an element of {!r}'.format(g, space))
+
+    class ProximalL1(Operator):
+
+        """Proximal operator of the l1-norm/distance."""
+
+        def __init__(self, sigma):
+            """Initialize a new instance.
+
+            Parameters
+            ----------
+            sigma : positive float
+                Step size parameter
+            """
+            super(ProximalL1, self).__init__(
+                domain=space, range=space, linear=False)
+            self.sigma = float(sigma)
+
+        def _call(self, x, out):
+            """Apply the operator to ``x`` and stores the result in ``out``."""
+            # diff = x - g
+            if g is not None:
+                diff = x - g
+            else:
+                if x is out:
+                    # Handle aliased data properly
+                    diff = x.copy()
+                else:
+                    diff = x
+
+            if isotropic:
+                # We write the operator as
+                # x - (x - g) / max(|x - g|_2 / sig*lam, 1)
+                pwnorm = PointwiseNorm(self.domain, exponent=2)
+                denom = pwnorm(diff)
+                denom /= self.sigma * lam
+                denom.ufuncs.maximum(1, out=denom)
+
+                # out = (x - g) / denom
+                for out_i, diff_i in zip(out, diff):
+                    diff_i.divide(denom, out=out_i)
+
+                # out = x - ...
+                out.lincomb(1, x, -1, out)
+
+            else:
+                # We write the operator as
+                # x - (x - g) / max(|x - g| / sig*lam, 1)
+                denom = diff.ufuncs.absolute()
+                denom /= self.sigma * lam
+                denom.ufuncs.maximum(1, out=denom)
+
+                # out = (x - g) / denom
+                diff.ufuncs.divide(denom, out=out)
+
+                # out = x - ...
+                out.lincomb(1, x, -1, out)
+
+    return ProximalL1
 
 
 def proximal_convex_conj_kl(space, lam=1, g=None):
@@ -1251,33 +1321,26 @@ def __init__(self, sigma):
 
         def _call(self, x, out):
             """Apply the operator to ``x`` and stores the result in ``out``."""
+            # (x + lam - sqrt((x - lam)^2 + 4*lam*sig*g)) / 2
 
-            # 1 / 2 (lam_X + x - sqrt((x - lam_X) ^ 2 + 4; lam sigma g)
-
-            # out = x - lam_X
+            # out = (x - lam)^2
             out.assign(x)
             out -= lam
-
-            # (out)^2
             out.ufuncs.square(out=out)
 
-            # out = out + 4 lam sigma g
+            # out = ... + 4*lam*sigma*g
             # If g is None, it is taken as the one element
             if g is None:
                 out += 4.0 * lam * self.sigma
             else:
                 out.lincomb(1, out, 4.0 * lam * self.sigma, g)
 
-            # out = sqrt(out)
+            # out = x - sqrt(...) + lam
             out.ufuncs.sqrt(out=out)
-
-            # out = x - out
             out.lincomb(1, x, -1, out)
+            out += lam
 
-            # out = lam_X + out
-            out.lincomb(lam, space.one(), 1, out)
-
-            # out = 1/2 * out
+            # out = 1/2 * ...
             out /= 2
 
     return ProximalConvexConjKL
diff --git a/odl/test/solvers/nonsmooth/proximal_operator_test.py b/odl/test/solvers/nonsmooth/proximal_operator_test.py
index 3feb0fdca05..a3db607d67c 100644
--- a/odl/test/solvers/nonsmooth/proximal_operator_test.py
+++ b/odl/test/solvers/nonsmooth/proximal_operator_test.py
@@ -10,7 +10,6 @@
 
 from __future__ import division
 import numpy as np
-import pytest
 import scipy.special
 
 import odl

From 4bf95b11f2913bc1108d23f8b54f62946c8c7730 Mon Sep 17 00:00:00 2001
From: Holger Kohr <h.kohr@cwi.nl>
Date: Sun, 22 Oct 2017 00:34:07 +0200
Subject: [PATCH 6/6] TST: add ADMM tests optimized vs. simple

---
 odl/test/solvers/nonsmooth/admm_test.py | 34 +++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/odl/test/solvers/nonsmooth/admm_test.py b/odl/test/solvers/nonsmooth/admm_test.py
index 5dc29e2ce55..dbab6f71921 100644
--- a/odl/test/solvers/nonsmooth/admm_test.py
+++ b/odl/test/solvers/nonsmooth/admm_test.py
@@ -11,6 +11,8 @@
 from __future__ import division
 import odl
 from odl.solvers import admm_linearized, admm_precon_nonlinear, Callback
+from odl.solvers.nonsmooth.admm import (
+    admm_linearized_simple, admm_precon_nonlinear_simple)
 
 from odl.util.testutils import all_almost_equal, noise_element
 
@@ -72,6 +74,22 @@ def test_admm_lin_l1():
     assert all_almost_equal(x, data_1, places=2)
 
 
+def test_admm_lin_vs_simple():
+    """Check ``admm_linearized`` against the simple implementation."""
+    space = odl.rn(5)
+    L = odl.ScalingOperator(space, 2)
+    y = L(odl.util.testutils.noise_element(space))
+    f = odl.solvers.L2NormSquared(space).translated(y)
+    g = 0.5 * odl.solvers.L1Norm(space)
+
+    x_simple = space.zero()
+    admm_linearized_simple(x_simple, f, g, L, tau=1.0, sigma=2.0, niter=10)
+    x_optim = space.zero()
+    admm_linearized(x_optim, f, g, L, tau=1.0, sigma=2.0, niter=10)
+
+    assert all_almost_equal(x_optim, x_simple)
+
+
 def test_admm_nonlin_affine_l1():
     """Verify that the correct value is returned for l1 dist optimization.
 
@@ -100,5 +118,21 @@ def test_admm_nonlin_affine_l1():
     assert all_almost_equal(x, x, places=2)
 
 
+def test_admm_nonlin_vs_simple():
+    """Check ``admm_precon_nonlinear`` against the simple implementation."""
+    space = odl.rn(5)
+    L = odl.ufunc_ops.sin(space) * odl.ScalingOperator(space, 2)
+    y = L(odl.util.testutils.noise_element(space))
+    f = odl.solvers.L2NormSquared(space).translated(y)
+    g = 0.5 * odl.solvers.L1Norm(space)
+
+    x_simple = space.zero()
+    admm_precon_nonlinear_simple(x_simple, f, g, L, delta=1.0, niter=2)
+    x_optim = space.zero()
+    admm_precon_nonlinear(x_optim, f, g, L, delta=1.0, niter=2)
+
+    assert all_almost_equal(x_optim, x_simple)
+
+
 if __name__ == '__main__':
     odl.util.test_file(__file__)