From 1b127cab48b22ffa99e7e16faa04fa8a7e269980 Mon Sep 17 00:00:00 2001 From: Holger Kohr Date: Sat, 21 Oct 2017 14:15:11 +0200 Subject: [PATCH 1/6] ENH: add nonlinear version of ADMM, closes #1201 --- odl/solvers/nonsmooth/admm.py | 225 ++++++++++++++++++++++++++++++++-- 1 file changed, 217 insertions(+), 8 deletions(-) diff --git a/odl/solvers/nonsmooth/admm.py b/odl/solvers/nonsmooth/admm.py index 6d1c5343851..44c7ea0863d 100644 --- a/odl/solvers/nonsmooth/admm.py +++ b/odl/solvers/nonsmooth/admm.py @@ -1,12 +1,10 @@ """Alternating Direction method of Multipliers (ADMM) method variants.""" from __future__ import division -from builtins import range +from odl.operator import Operator, OpDomainError, power_method_opnorm -from odl.operator import Operator, OpDomainError - -__all__ = ('admm_linearized',) +__all__ = ('admm_linearized', 'admm_precon_nonlinear') def admm_linearized(x, f, g, L, tau, sigma, niter, **kwargs): @@ -30,9 +28,9 @@ def admm_linearized(x, f, g, L, tau, sigma, niter, **kwargs): The functions ``f`` and ``g`` in the problem definition. They need to implement the ``proximal`` method. L : linear `Operator` - The linear operator that is composed with ``g`` in the problem - definition. It must fulfill ``L.domain == f.domain`` and - ``L.range == g.domain``. + Linear operator composed with ``g`` in the problem definition. + It must implement ``L.adjoint`` and fulfill ``L.domain == f.domain`` + and ``L.range == g.domain``. tau, sigma : positive float Step size parameters for the update of the variables. niter : non-negative int @@ -73,7 +71,9 @@ def admm_linearized(x, f, g, L, tau, sigma, niter, **kwargs): minimization subproblem for the :math:`x` variable, this variant uses a linearization of a quadratic term in the augmented Lagrangian of the generic ADMM, in order to make the step expressible with - the proximal operator of :math:`f`. + the proximal operator of :math:`f`. See + `[PB2014] `_ + for details. Another name for this algorithm is *split inexact Uzawa method*. @@ -161,3 +161,212 @@ def admm_linearized_simple(x, f, g, L, tau, sigma, niter, **kwargs): u = L(x) + u - z if callback is not None: callback(x) + + +def admm_precon_nonlinear(x, f, g, L, delta, niter, sigma=None, **kwargs): + """Preconditioned nonlinear ADMM. + + ADMM stands for "Alternating Direction Method of Multipliers" and + is a popular convex optimization method. This variant solves problems + of the form :: + + min_x [ f(x) + g(L(x)) ] + + with convex ``f`` and ``g``, and a (possibly nonlinear) operator ``L``. + See `[BKS+2015] `_ + and the Notes for more mathematical details. + + Parameters + ---------- + x : ``L.domain`` element + Starting point, updated in-place. + f, g : `Functional` + The functions ``f`` and ``g`` in the problem definition. They + need to implement the ``proximal`` method and satisfy + ``f.domain == L.domain`` and ``g.domain == L.range``, + respectively. + L : `Operator` + Possibly nonlinear operator composed with ``g`` in the problem + definition. It must implement ``L.derivative(x).adjoint`` and + fulfill ``L.domain == f.domain`` and ``L.range == g.domain``. + delta : positive float + Step size parameter for the update of the constraint variable. + sigma : positive float, optional + Step size parameter for ``g.proximal``. + Default: ``0.5 / delta``. + niter : non-negative int + Number of iterations. + + Other Parameters + ---------------- + opnorm_maxiter : nonnegative int + Maximum number of iterations to be used for the operator norm + estimation in each step of the optimization loop. + Default: 2 + opnorm_factor : float between 0 and 1 + Multiply this factor with the upper bound for the ``tau`` step + size determination, see Notes. Smaller values can be used to + compensate for a bad operator norm estimate caused by a small + ``opnorm_maxiter``. + Default: 0.1 + callback : callable, optional + Function called with the current iterate after each iteration. + + Notes + ----- + The preconditioned nonlinear ADMM solves the problem + + .. math:: + \min_{x} \\left[ f(x) + g\\big(L(x)\\big) \\right]. + + It starts with initial values :math:`x^{(0)}` as given, + :math:`y^{(0)} = 0` and :math:`\mu^{(0)} = \overline{\mu}^{(0)} = 0`, + and applies the following iteration: + + .. math:: + A^{(k)} &= \partial L(x^{(k)}), + + \\text{choose } \\tau^{(k)} &< \\frac{1}{\delta \|A^{(k)}\|^2} + + x^{(k+1)} &= \mathrm{prox}_{\\tau^{(k)} f} \\left[ + x^{(k)} - \\tau^{(k)} \\big(A^{(k)}\\big)^*\, \overline{\mu}^{(k)} + \\right] + + y^{(k+1)} &= \mathrm{prox}_{\sigma g}\\left[ + y^{(k)} + \sigma \\Big( + \mu^{(k)} + \delta \\big(L(x^{(k+1)} - y^{(k)}\\big) + \\Big) + \\right] + + \mu^{(k+1)} &= \mu^{(k)} + \delta \\big(L(x^{(k+1)}) - y^{(k+1)}\\big) + + \overline{\mu}^{(k+1)} &= 2\mu^{(k+1)} - \mu^{(k)} + + For (local) convergence it is required that :math:`\sigma < 1 / \delta`. + + References + ---------- + [BKS+2015] Benning, M, Knoll, F, Schönlieb, C-B., and Valkonen, T. + *Preconditioned ADMM with Nonlinear Operator Constraint*. + System Modeling and Optimization, 2015, pp. 117–126. + """ + if not isinstance(L, Operator): + raise TypeError('`op` {!r} is not an `Operator` instance' + ''.format(L)) + + if x not in L.domain: + raise OpDomainError('`x` {!r} is not in the domain of `op` {!r}' + ''.format(x, L.domain)) + + delta, delta_in = float(delta), delta + if delta <= 0: + raise ValueError('`delta` must be positive, got {}'.format(delta_in)) + + niter, niter_in = int(niter), niter + if niter < 0 or niter != niter_in: + raise ValueError('`niter` must be a non-negative integer, got {}' + ''.format(niter_in)) + + if sigma is None: + sigma = 0.5 / delta + else: + sigma, sigma_in = float(sigma), sigma + if not 0 < sigma < 1 / delta: + raise ValueError( + '`sigma` must lie strictly between 0 and 1/delta`, got ' + 'sigma={:.4} and (1/delta)={:.4}'.format(sigma_in, 1 / delta)) + + opnorm_maxiter = kwargs.pop('opnorm_maxiter', 2) + opnorm_factor = kwargs.pop('opnorm_factor', 0.1) + opnorm_factor, opnorm_factor_in = float(opnorm_factor), opnorm_factor + if not 0 < opnorm_factor < 1: + raise ValueError('`opnorm_factor` must lie strictly between 0 and 1, ' + 'got {}'.format(opnorm_factor_in)) + + callback = kwargs.pop('callback', None) + if callback is not None and not callable(callback): + raise TypeError('`callback` {} is not callable'.format(callback)) + + # Initialize range variables (quantities in [] are zero initially) + y = L.range.zero() + # mu = [mu_old +] delta * (L(x) [- y]) + mu = delta * L(x) + # mubar = 2 * mu [- mu_old] + mubar = 2 * mu + + # Temporary for Lx + tmp_ran = L.range.element() + # Temporary for L^*(mubar) + tmp_dom = L.domain.element() + + # Store constant g proximal since it may involve computation + g_prox_sigma = g.proximal(sigma) + + for i in range(niter): + # tau^k <- opnorm_factor / (delta * ||dL(x^k)||^2) + A = L.derivative(x) + xstart = A.domain.one() if i == 0 else x + A_norm = power_method_opnorm(A, xstart, maxiter=opnorm_maxiter) + tau = opnorm_factor / (delta * A_norm ** 2) + + # x^(k+1) <- prox[tau^k*f](x^k - tau^k * A^*(mubar^k)) + A.adjoint(mubar, out=tmp_dom) + x.lincomb(1, x, -tau, tmp_dom) + f.proximal(tau)(x, out=x) + + # y^(k+1) <- prox[sigma*g]((1 - sigma * delta) * y^k + + # sigma * (mu^k + delta * L(x^(k+1)))) + L(x, out=tmp_ran) + y.lincomb(1 - sigma * delta, y, sigma * delta, tmp_ran) + y.lincomb(1, y, sigma, mu) + g_prox_sigma(y, out=y) + + # mu^(k+1) <- mu^k + delta * (L(x^(k+1)) - y^(k+1)) + # tmp_ran still holds L(x^(k+1)) + # Using mubar as temporary + mubar.assign(mu) + mu.lincomb(1, mu, delta, tmp_ran) + mu.lincomb(1, mu, -delta, y) + + # mubar^(k+1) = 2 * mu^(k+1) - mu^k + mubar.lincomb(2, mu, -1, mubar) + + if callback is not None: + callback(x) + + +def admm_precon_nonlinear_simple(x, f, g, L, delta, niter, sigma=None, + **kwargs): + """Non-optimized version of ``admm_precon_nonlinear``. + + This function is intended for debugging. It makes a lot of copies and + performs no error checking. + """ + if sigma is None: + sigma = 0.5 / delta + + opnorm_maxiter = kwargs.pop('opnorm_maxiter', 2) + opnorm_factor = kwargs.pop('opnorm_factor', 0.1) + callback = kwargs.pop('callback', None) + + # Initialize range variables (quantities in [] are zero initially) + y = L.range.zero() + # mu = [mu_old +] delta * (L(x) [- y]) + mu = delta * L(x) + # mubar = 2 * mu [- mu_old] + mubar = 2 * mu + + for i in range(niter): + A = L.derivative(x) + xstart = A.domain.one() if i == 0 else x + A_norm = power_method_opnorm(A, xstart, maxiter=opnorm_maxiter) + tau = opnorm_factor / (delta * A_norm ** 2) + x[:] = f.proximal(tau)(x - tau * A.adjoint(mubar)) + y = g.proximal(sigma)((1 - sigma * delta) * y + + sigma * (mu + delta * L(x))) + mu_old = mu + mu = mu + delta * (L(x) - y) + mubar = 2 * mu - mu_old + + if callback is not None: + callback(x) From d4905b2bceca66af32ac6d504e07a901c143fd1f Mon Sep 17 00:00:00 2001 From: Holger Kohr Date: Sat, 21 Oct 2017 17:10:28 +0200 Subject: [PATCH 2/6] ENH: add ADMM spectral CT example --- examples/solvers/admm_spectral_tomography.py | 169 +++++++++++++++++++ 1 file changed, 169 insertions(+) create mode 100644 examples/solvers/admm_spectral_tomography.py diff --git a/examples/solvers/admm_spectral_tomography.py b/examples/solvers/admm_spectral_tomography.py new file mode 100644 index 00000000000..b5f9e961349 --- /dev/null +++ b/examples/solvers/admm_spectral_tomography.py @@ -0,0 +1,169 @@ +"""Total variation spectral tomography using preconditioned nonlinear ADMM. + +In this example we solve the optimization problem + + min_x ||A(x) - y||_2^2 + lam * ||grad(x)||_{nuc, 1, 2} + +where ``||.||_{nuc, 1, 2}`` is the nuclear L1-L2 norm and ``A`` a 2 x 4 +operator matrix + + (c_1 * exp(-R) c_2 * exp(-R) ) + A = ( c_3 * exp(-R) c_4 * exp(-R)) + +with constants ``c_i`` and the parallel beam ray transform ``R``. In other +words, ``A`` acts on an input ``x`` with 4 components as follows: + + (c_1 * exp(-R(x_1)) + c_2 * exp(-R(x_2))) + A(x) = (c_3 * exp(-R(x_3)) + c_4 * exp(-R(x_4))) + +This reflects a simplified model for 2D spectral CT with 4 (discrete) beam +energies and 2 detector energy bins. + +The problem is rewritten in decoupled form as + + min_x g(L(x)) + +with a separable sum ``g`` of functionals and the stacked operator ``L``: + + g(z) = ||z_1 - g||_2^2 + lam * ||z_2||_1, + + ( A(x) ) + z = L(x) = ( grad(x) ). + +See the documentation of the `admm_precon_nonlinear` solver for further +details. +Note that this problem is underdetermined as a simple count of input +channels vs. data channels shows. This can be changed in the code below +(along with ``souce_spectrum`` and ``energy_factors``). +""" + +import numpy as np +import odl + +# --- Set up the forward operator --- # + +# Space of a monochromatic phantom (volume) +single_energy_space = odl.uniform_discr( + min_pt=[-20, -20], max_pt=[20, 20], shape=[100, 100], dtype='float32') +reco_space = single_energy_space ** 2 + +# Create a parallel beam geometry with flat detector, using 180 angles +geometry = odl.tomo.parallel_beam_geometry(single_energy_space, num_angles=180) + +# Define number of energy bins on both sides as well as the source spectrum +num_bins_reco = 4 +num_bins_data = 2 +assert num_bins_reco >= num_bins_data +assert num_bins_reco % num_bins_data == 0 +group_size = num_bins_reco // num_bins_data +# Note: the spectrum (i.e. intensity per channel) is additive on the data +# side, i.e., adding more entries adds to the total signal in the data bins +source_spectrum = [8e4, 1e5, 8e4, 6e4] +assert len(source_spectrum) == num_bins_reco + +# Create the forward operator +ray_trafo = odl.tomo.RayTransform(single_energy_space, geometry, + impl='astra_cpu') +exp_op = odl.ufunc_ops.exp(ray_trafo.range) +op_mat = [] +for data_bin in range(num_bins_data): + row = [0] * num_bins_reco + for j in range(data_bin * group_size, (data_bin + 1) * group_size): + row[j] = source_spectrum[j] * exp_op * (-ray_trafo) + op_mat.append(row) + +fwd_op = odl.ProductSpaceOperator(op_mat) + + +# Small helper to visualize the forward operator +def show_fwd_op(num_bins_reco, num_bins_data): + entry_len = 13 + dtype = 'U' + str(entry_len) + mat = np.empty((num_bins_data, num_bins_reco), dtype=dtype) + mat[:] = ' ' * entry_len + for i in range(mat.shape[0]): + for j in range(i * group_size, (i + 1) * group_size): + mat[i, j] = 'c_{} * exp(-R)'.format(j) + print(mat) + + +print('Forward operator (R = ray transform, c_i = constants):') +show_fwd_op(num_bins_reco, num_bins_data) + +# --- Generate artificial data --- # + +# Create multispectral phantom, very simple: C_energy * single_energy_phantom +energy_factors = [1e-2, 8e-3, 6e-3, 1e-3] +assert len(energy_factors) == num_bins_reco +single_energy_phantom = odl.phantom.shepp_logan(single_energy_space, + modified=True) +phantom = [fac * single_energy_phantom for fac in energy_factors] + +# Simulate noiseless (expected) projections and photon counts +phantom = fwd_op.domain.element(phantom) +meas_intens = fwd_op(phantom) +counts = odl.phantom.poisson_noise(meas_intens, seed=123) + +# --- Formulate problem in log space (more stable) --- # + +# The new opertor is F_i = -log(A_i / expected_counts[i]), where +# the expected counts are the sums of counts over the groups +expected_bin_counts = [] +for i in range(num_bins_data): + expected_bin_counts.append( + sum(source_spectrum[i * group_size: (i + 1) * group_size])) + +scaling_ops = [odl.ScalingOperator(fwd_op.range[i], 1 / expected_bin_counts[i]) + for i in range(num_bins_data)] +scaling = odl.DiagonalOperator(*scaling_ops) +log_fwd_op = -odl.ufunc_ops.log(fwd_op.range) * scaling * fwd_op + +# Transform the simulated counts accordingly +log_counts = fwd_op.range.element() +for i in range(num_bins_data): + log_counts[i] = -np.log(counts[i] / expected_bin_counts[i]) + +# --- Set up the inverse problem --- # + +# Gradient operator for the TV part (one per reco_space component) +grad = odl.DiagonalOperator(odl.Gradient(single_energy_space), num_bins_reco) + +# Stacking of the two operators +L = odl.BroadcastOperator(log_fwd_op, grad) + +# Data matching functional +data_fit = odl.solvers.L2NormSquared(log_fwd_op.range).translated(log_counts) +# Regularization functional, we use the Nuclear L1-L2 norm to couple the +# energy channels +reg_func = 0.08 * odl.solvers.NuclearNorm(grad.range) +g = odl.solvers.SeparableSum(data_fit, reg_func) + +# Enforce a value range for the reco to avoid underflow of exp(-x) +f = odl.solvers.IndicatorBox(L.domain, 0, 1e-2) + +# --- Select parameters and solve using ADMM --- # + +niter = 200 # Number of iterations +delta = 1.0 # Step size for the constraint update + +# Optionally pass a callback to the solver to display intermediate results +callback = (odl.solvers.CallbackPrintIteration(step=5) & + odl.solvers.CallbackShow(step=5)) + +# Choose a starting point, the FBP reconstruction from the first bin +fbp_op = odl.tomo.fbp_op(ray_trafo, filter_type='Hann', frequency_scaling=0.9) +fbp = fbp_op(log_counts[0]) +x = [] +for _ in range(num_bins_reco): + x.append(fbp.copy()) +x = L.domain.element(x) + +# Run the algorithm +odl.solvers.nonsmooth.admm.admm_precon_nonlinear( + x, f, g, L, delta, niter, opnorm_factor=0.5, callback=callback) + +# Display images +phantom.show(title='Phantom') +counts.show(title='Simulated photon counts (Sinogram)') +fbp.show('Monochromatic FBP reconstruction (bin 0)') +x.show(title='TV reconstruction', force_show=True) From 01ec77b4e8b3f19354ff2b9a8f3023018d7678c8 Mon Sep 17 00:00:00 2001 From: Holger Kohr Date: Sat, 21 Oct 2017 21:45:34 +0200 Subject: [PATCH 3/6] MAINT: add license header to admm.py and fix unicode issue --- odl/solvers/nonsmooth/admm.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/odl/solvers/nonsmooth/admm.py b/odl/solvers/nonsmooth/admm.py index 44c7ea0863d..ed21f7acab0 100644 --- a/odl/solvers/nonsmooth/admm.py +++ b/odl/solvers/nonsmooth/admm.py @@ -1,3 +1,12 @@ +# coding: utf-8 +# Copyright 2014-2017 The ODL contributors +# +# This file is part of ODL. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + """Alternating Direction method of Multipliers (ADMM) method variants.""" from __future__ import division From 74c6a0583ef276398fb2b0b9626918b01ee7cb6d Mon Sep 17 00:00:00 2001 From: Holger Kohr Date: Sat, 21 Oct 2017 21:57:30 +0200 Subject: [PATCH 4/6] TST: add small unit test for nonlinear ADMM solver --- odl/test/solvers/nonsmooth/admm_test.py | 30 ++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/odl/test/solvers/nonsmooth/admm_test.py b/odl/test/solvers/nonsmooth/admm_test.py index f7b37b401b9..5dc29e2ce55 100644 --- a/odl/test/solvers/nonsmooth/admm_test.py +++ b/odl/test/solvers/nonsmooth/admm_test.py @@ -10,7 +10,7 @@ from __future__ import division import odl -from odl.solvers import admm_linearized, Callback +from odl.solvers import admm_linearized, admm_precon_nonlinear, Callback from odl.util.testutils import all_almost_equal, noise_element @@ -72,5 +72,33 @@ def test_admm_lin_l1(): assert all_almost_equal(x, data_1, places=2) +def test_admm_nonlin_affine_l1(): + """Verify that the correct value is returned for l1 dist optimization. + + Solves the optimization problem + + min_x ||L(x) - y_1||_1 + 0.5 ||L(x) - y_2||_1 + + with ``L(x) = x + 1``, which has optimum value ``x_1 = y_1 - 1`` since the + first term dominates. + """ + space = odl.rn(5) + + L = odl.IdentityOperator(space) + space.one() + + x1 = odl.util.testutils.noise_element(space) + x2 = odl.util.testutils.noise_element(space) + y1 = L(x1) + y2 = L(x2) + + f = odl.solvers.L1Norm(space).translated(y1) + g = 0.5 * odl.solvers.L1Norm(space).translated(y2) + + x = space.zero() + admm_precon_nonlinear(x, f, g, L, delta=1.0, niter=10) + + assert all_almost_equal(x, x, places=2) + + if __name__ == '__main__': odl.util.test_file(__file__) From 5ed19cc664e11c69d0211e737527ece2549c768d Mon Sep 17 00:00:00 2001 From: Holger Kohr Date: Sun, 22 Oct 2017 00:33:24 +0200 Subject: [PATCH 5/6] ENH: simplify and optimize some proximal operators --- odl/solvers/functional/default_functionals.py | 18 +- odl/solvers/nonsmooth/proximal_operators.py | 199 ++++++++++++------ .../nonsmooth/proximal_operator_test.py | 1 - 3 files changed, 135 insertions(+), 83 deletions(-) diff --git a/odl/solvers/functional/default_functionals.py b/odl/solvers/functional/default_functionals.py index 93400e362c9..f1dbdb518f5 100644 --- a/odl/solvers/functional/default_functionals.py +++ b/odl/solvers/functional/default_functionals.py @@ -864,20 +864,10 @@ def __init__(self, space, lower=None, upper=None): def _call(self, x): """Apply the functional to the given point.""" - # Compute the projection of x onto the box, if this is equal to x we - # know x is inside the box. - tmp = self.domain.element() - if self.lower is not None and self.upper is None: - x.ufuncs.maximum(self.lower, out=tmp) - elif self.lower is None and self.upper is not None: - x.ufuncs.minimum(self.upper, out=tmp) - elif self.lower is not None and self.upper is not None: - x.ufuncs.maximum(self.lower, out=tmp) - tmp.ufuncs.minimum(self.upper, out=tmp) - else: - tmp.assign(x) - - return np.inf if x.dist(tmp) > 0 else 0 + # Since the proximal projects onto our feasible set we can simply + # check if it changes anything + proj = self.proximal(1)(x) + return np.inf if x.dist(proj) > 0 else 0 @property def proximal(self): diff --git a/odl/solvers/nonsmooth/proximal_operators.py b/odl/solvers/nonsmooth/proximal_operators.py index 590be38cd50..9bebe7d0daf 100644 --- a/odl/solvers/nonsmooth/proximal_operators.py +++ b/odl/solvers/nonsmooth/proximal_operators.py @@ -24,8 +24,9 @@ from __future__ import print_function, division, absolute_import import numpy as np -from odl.operator import (Operator, IdentityOperator, ScalingOperator, - ConstantOperator, DiagonalOperator, PointwiseNorm) +from odl.operator import ( + Operator, IdentityOperator, ScalingOperator, ConstantOperator, + DiagonalOperator, PointwiseNorm) from odl.space import ProductSpace from odl.set import LinearSpaceElement from odl.util import cache_arguments @@ -201,7 +202,6 @@ def translation_prox_factory(sigma): The proximal operator of ``s * F( . - y)`` where ``s`` is the step size """ - return (ConstantOperator(y) + prox_factory(sigma) * (IdentityOperator(y.space) - ConstantOperator(y))) @@ -478,7 +478,6 @@ def identity_factory(sigma): The proximal operator instance of G = 0 which is the identity operator """ - return IdentityOperator(space) return identity_factory @@ -571,7 +570,6 @@ def __init__(self, sigma): def _call(self, x, out): """Apply the operator to ``x`` and store the result in ``out``.""" - if lower is not None and upper is None: x.ufuncs.maximum(lower, out=out) elif lower is None and upper is not None: @@ -605,7 +603,6 @@ def proximal_nonnegativity(space): -------- proximal_box_constraint """ - return proximal_box_constraint(space, lower=0) @@ -747,7 +744,6 @@ def __init__(self, sigma): def _call(self, x, out): """Apply the operator to ``x`` and stores the result in ``out``.""" - dtype = getattr(self.domain, 'dtype', float) eps = np.finfo(dtype).resolution * 10 @@ -850,10 +846,8 @@ def __init__(self, sigma): self.sigma = float(sigma) def _call(self, x, out): - """Apply the operator to ``x`` and stores the result in ``out``""" - + """Apply the operator to ``x`` and store the result in ``out``""" # (x - sig*g) / (1 + sig/(2 lam)) - sig = self.sigma if g is None: out.lincomb(1.0 / (1 + 0.5 * sig / lam), x) @@ -908,9 +902,34 @@ def proximal_l2_squared(space, lam=1, g=None): proximal_l2 : proximal without square proximal_convex_conj_l2_squared : proximal for convex conjugate """ - # TODO: optimize - prox_cc_l2_squared = proximal_convex_conj_l2_squared(space, lam=lam, g=g) - return proximal_convex_conj(prox_cc_l2_squared) + + class ProximalL2Squared(Operator): + + """Proximal operator of the squared l2-norm/dist.""" + + def __init__(self, sigma): + """Initialize a new instance. + + Parameters + ---------- + sigma : positive float + Step size parameter + """ + super(ProximalL2Squared, self).__init__( + domain=space, range=space, linear=g is None) + self.sigma = float(sigma) + + def _call(self, x, out): + """Apply the operator to ``x`` and store the result in ``out``""" + # (x + 2*sig*lam*g) / (1 + 2*sig*lam)) + sig = self.sigma + if g is None: + out.lincomb(1.0 / (1 + 2 * sig * lam), x) + else: + out.lincomb(1.0 / (1 + 2 * sig * lam), x, + 2 * sig * lam / (1 + 2 * sig * lam), g) + + return ProximalL2Squared def proximal_convex_conj_l1(space, lam=1, g=None, isotropic=False): @@ -1027,11 +1046,12 @@ def __init__(self, sigma): def _call(self, x, out): """Apply the operator to ``x`` and store the result in ``out``.""" + # lam * (x - sig * g) / max(lam, |x - sig * g|) - # lam * (x - sigma * g) / max(lam, |x - sigma * g|) - + # diff = x - sig * g if g is not None: - diff = x - self.sigma * g + diff = self.domain.element() + diff.lincomb(1, x, -self.sigma, g) else: if x is out: # Handle aliased data properly @@ -1040,36 +1060,23 @@ def _call(self, x, out): diff = x if isotropic: - # Calculate |x| = pointwise 2-norm of x - - tmp = diff[0] ** 2 - sq_tmp = x[0].space.element() - for x_i in diff[1:]: - x_i.multiply(x_i, out=sq_tmp) - tmp += sq_tmp - tmp.ufuncs.sqrt(out=tmp) - - # Pointwise maximum of |x| and lambda - tmp.ufuncs.maximum(lam, out=tmp) - - # Global scaling - tmp /= lam + # denom = max( |x-sig*g|_2, lam ) / lam (|.|_2 pointwise) + pwnorm = PointwiseNorm(self.domain, exponent=2) + denom = pwnorm(diff) + denom.ufuncs.maximum(lam, out=denom) + denom /= lam # Pointwise division - for out_i, x_i in zip(out, diff): - x_i.divide(tmp, out=out_i) + for out_i, diff_i in zip(out, diff): + diff_i.divide(denom, out=out_i) else: - # Calculate |x| = pointwise 2-norm of x + # out = max( |x-sig*g|, lam ) / lam diff.ufuncs.absolute(out=out) - - # Pointwise maximum of |x| and lambda out.ufuncs.maximum(lam, out=out) - - # Global scaling out /= lam - # Pointwise division + # out = diff / ... diff.divide(out, out=out) return ProximalConvexConjL1 @@ -1111,27 +1118,32 @@ def proximal_l1(space, lam=1, g=None, isotropic=False): F(x) = \\lambda \|x - g\|_1. For a step size :math:`\\sigma`, the proximal operator of :math:`\\sigma F` - is + is the "soft-shrinkage" operator .. math:: - \mathrm{prox}_{\\sigma F}(y) = \\begin{cases} - y - \\sigma \\lambda - & \\text{if } y > g + \\sigma \\lambda, \\\\ - 0 - & \\text{if } g - \\sigma \\lambda \\leq y \\leq g + - \\sigma \\lambda \\\\ - y + \\sigma \\lambda - & \\text{if } y < g - \\sigma \\lambda, + \mathrm{prox}_{\\sigma F}(x) = + \\begin{cases} + g, & \\text{where } |x - g| \\leq \sigma\\lambda, \\\\ + x - \sigma\\lambda \mathrm{sign}(x - g), & \\text{elsewhere.} \\end{cases} + Here, all operations are to be read pointwise. + An alternative formulation is available for `ProductSpace`'s, where the - the ``isotropic`` parameter can be used, giving + the ``isotropic`` parameter can be used, i.e., .. math:: - F(x) = \\lambda \| \|x - g\|_2 \|_1 + F(x) = \\lambda \| |x - g|_2 \|_1 - The proximal can be calculated using the Moreau equality (also known as - Moreau decomposition or Moreau identity). See for example [BC2011]. + with the pointwise Euclidean norm :math:`|\cdot|_2`. For this case, one + gets + + .. math:: + \mathrm{prox}_{\\sigma F}(x) = + \\begin{cases} + g, & \\text{where } |x - g|_2 \\leq \sigma\\lambda, \\\\ + x - \sigma\\lambda \\frac{x - g}{|x - g|_2}, & \\text{elsewhere.} + \\end{cases} See Also -------- @@ -1142,10 +1154,68 @@ def proximal_l1(space, lam=1, g=None, isotropic=False): [BC2011] Bauschke, H H, and Combettes, P L. *Convex analysis and monotone operator theory in Hilbert spaces*. Springer, 2011. """ - # TODO: optimize - prox_cc_l1 = proximal_convex_conj_l1(space, lam=lam, g=g, - isotropic=isotropic) - return proximal_convex_conj(prox_cc_l1) + lam = float(lam) + + if g is not None and g not in space: + raise TypeError('{!r} is not an element of {!r}'.format(g, space)) + + class ProximalL1(Operator): + + """Proximal operator of the l1-norm/distance.""" + + def __init__(self, sigma): + """Initialize a new instance. + + Parameters + ---------- + sigma : positive float + Step size parameter + """ + super(ProximalL1, self).__init__( + domain=space, range=space, linear=False) + self.sigma = float(sigma) + + def _call(self, x, out): + """Apply the operator to ``x`` and stores the result in ``out``.""" + # diff = x - g + if g is not None: + diff = x - g + else: + if x is out: + # Handle aliased data properly + diff = x.copy() + else: + diff = x + + if isotropic: + # We write the operator as + # x - (x - g) / max(|x - g|_2 / sig*lam, 1) + pwnorm = PointwiseNorm(self.domain, exponent=2) + denom = pwnorm(diff) + denom /= self.sigma * lam + denom.ufuncs.maximum(1, out=denom) + + # out = (x - g) / denom + for out_i, diff_i in zip(out, diff): + diff_i.divide(denom, out=out_i) + + # out = x - ... + out.lincomb(1, x, -1, out) + + else: + # We write the operator as + # x - (x - g) / max(|x - g| / sig*lam, 1) + denom = diff.ufuncs.absolute() + denom /= self.sigma * lam + denom.ufuncs.maximum(1, out=denom) + + # out = (x - g) / denom + diff.ufuncs.divide(denom, out=out) + + # out = x - ... + out.lincomb(1, x, -1, out) + + return ProximalL1 def proximal_convex_conj_kl(space, lam=1, g=None): @@ -1251,33 +1321,26 @@ def __init__(self, sigma): def _call(self, x, out): """Apply the operator to ``x`` and stores the result in ``out``.""" + # (x + lam - sqrt((x - lam)^2 + 4*lam*sig*g)) / 2 - # 1 / 2 (lam_X + x - sqrt((x - lam_X) ^ 2 + 4; lam sigma g) - - # out = x - lam_X + # out = (x - lam)^2 out.assign(x) out -= lam - - # (out)^2 out.ufuncs.square(out=out) - # out = out + 4 lam sigma g + # out = ... + 4*lam*sigma*g # If g is None, it is taken as the one element if g is None: out += 4.0 * lam * self.sigma else: out.lincomb(1, out, 4.0 * lam * self.sigma, g) - # out = sqrt(out) + # out = x - sqrt(...) + lam out.ufuncs.sqrt(out=out) - - # out = x - out out.lincomb(1, x, -1, out) + out += lam - # out = lam_X + out - out.lincomb(lam, space.one(), 1, out) - - # out = 1/2 * out + # out = 1/2 * ... out /= 2 return ProximalConvexConjKL diff --git a/odl/test/solvers/nonsmooth/proximal_operator_test.py b/odl/test/solvers/nonsmooth/proximal_operator_test.py index 3feb0fdca05..a3db607d67c 100644 --- a/odl/test/solvers/nonsmooth/proximal_operator_test.py +++ b/odl/test/solvers/nonsmooth/proximal_operator_test.py @@ -10,7 +10,6 @@ from __future__ import division import numpy as np -import pytest import scipy.special import odl From 4bf95b11f2913bc1108d23f8b54f62946c8c7730 Mon Sep 17 00:00:00 2001 From: Holger Kohr Date: Sun, 22 Oct 2017 00:34:07 +0200 Subject: [PATCH 6/6] TST: add ADMM tests optimized vs. simple --- odl/test/solvers/nonsmooth/admm_test.py | 34 +++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/odl/test/solvers/nonsmooth/admm_test.py b/odl/test/solvers/nonsmooth/admm_test.py index 5dc29e2ce55..dbab6f71921 100644 --- a/odl/test/solvers/nonsmooth/admm_test.py +++ b/odl/test/solvers/nonsmooth/admm_test.py @@ -11,6 +11,8 @@ from __future__ import division import odl from odl.solvers import admm_linearized, admm_precon_nonlinear, Callback +from odl.solvers.nonsmooth.admm import ( + admm_linearized_simple, admm_precon_nonlinear_simple) from odl.util.testutils import all_almost_equal, noise_element @@ -72,6 +74,22 @@ def test_admm_lin_l1(): assert all_almost_equal(x, data_1, places=2) +def test_admm_lin_vs_simple(): + """Check ``admm_linearized`` against the simple implementation.""" + space = odl.rn(5) + L = odl.ScalingOperator(space, 2) + y = L(odl.util.testutils.noise_element(space)) + f = odl.solvers.L2NormSquared(space).translated(y) + g = 0.5 * odl.solvers.L1Norm(space) + + x_simple = space.zero() + admm_linearized_simple(x_simple, f, g, L, tau=1.0, sigma=2.0, niter=10) + x_optim = space.zero() + admm_linearized(x_optim, f, g, L, tau=1.0, sigma=2.0, niter=10) + + assert all_almost_equal(x_optim, x_simple) + + def test_admm_nonlin_affine_l1(): """Verify that the correct value is returned for l1 dist optimization. @@ -100,5 +118,21 @@ def test_admm_nonlin_affine_l1(): assert all_almost_equal(x, x, places=2) +def test_admm_nonlin_vs_simple(): + """Check ``admm_precon_nonlinear`` against the simple implementation.""" + space = odl.rn(5) + L = odl.ufunc_ops.sin(space) * odl.ScalingOperator(space, 2) + y = L(odl.util.testutils.noise_element(space)) + f = odl.solvers.L2NormSquared(space).translated(y) + g = 0.5 * odl.solvers.L1Norm(space) + + x_simple = space.zero() + admm_precon_nonlinear_simple(x_simple, f, g, L, delta=1.0, niter=2) + x_optim = space.zero() + admm_precon_nonlinear(x_optim, f, g, L, delta=1.0, niter=2) + + assert all_almost_equal(x_optim, x_simple) + + if __name__ == '__main__': odl.util.test_file(__file__)