Source code for cil.optimisation.algorithms.FISTA

#  Copyright 2019 United Kingdom Research and Innovation
#  Copyright 2019 The University of Manchester
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
#
# Authors:
# CIL Developers, listed at: https://github.com/TomographicImaging/CIL/blob/master/NOTICE.txt

from cil.optimisation.algorithms import Algorithm
from cil.optimisation.functions import ZeroFunction
import numpy
import logging
from numbers import Number

log = logging.getLogger(__name__)


[docs]class ISTA(Algorithm):

    r"""Iterative Shrinkage-Thresholding Algorithm, see :cite:`BeckTeboulle_b`, :cite:`BeckTeboulle_a`.

    Iterative Shrinkage-Thresholding Algorithm (ISTA)

    .. math:: x^{k+1} = \mathrm{prox}_{\alpha^{k} g}(x^{k} - \alpha^{k}\nabla f(x^{k}))

    is used to solve

    .. math:: \min_{x} f(x) + g(x)

    where :math:`f` is differentiable, :math:`g` has a *simple* proximal operator and :math:`\alpha^{k}`
    is the :code:`step_size` per iteration.

    Note
    ----

    For a constant step size, i.e., :math:`a^{k}=a` for :math:`k\geq1`, convergence of ISTA
    is guaranteed if

    .. math:: \alpha\in(0, \frac{2}{L}),

    where :math:`L` is the Lipschitz constant of :math:`f`, see :cite:`CombettesValerie`.

    Parameters
    ----------

    initial : DataContainer
              Initial guess of ISTA.
    f : Function
        Differentiable function. If `None` is passed, the algorithm will use the ZeroFunction.
    g : Function or `None`
        Convex function with *simple* proximal operator. If `None` is passed, the algorithm will use the ZeroFunction.
    step_size : positive :obj:`float`, default = None
                Step size for the gradient step of ISTA.
                The default :code:`step_size` is :math:`\frac{1}{L}` or 1 if `f=None`.
    kwargs: Keyword arguments
        Arguments from the base class :class:`.Algorithm`.

    Note
    -----
    If the function `g` is set to `None` or to the `ZeroFunction` then the ISTA algorithm is equivalent to Gradient Descent.

    If the function `f` is set to `None` or to the `ZeroFunction` then the ISTA algorithm is equivalent to a Proximal Point Algorithm.

    Examples
    --------

    .. math:: \underset{x}{\mathrm{argmin}}\|A x - b\|^{2}_{2}

    >>> f = LeastSquares(A, b=b, c=0.5)
    >>> g = ZeroFunction()
    >>> ig = Aop.domain
    >>> ista = ISTA(initial = ig.allocate(), f = f, g = g, max_iteration=10)
    >>> ista.run()


    See also
    --------

    :class:`.FISTA`
    :class:`.GD`


    """

    def _provable_convergence_condition(self):
        return self.step_size <= 0.99*2.0/self.f.L

    @property
    def step_size(self):
       return self._step_size

    # Set default step size
[docs]    def set_step_size(self, step_size):
        """ Set default step size.
        """

        if step_size is None:
            if isinstance(self.f, ZeroFunction):
                self._step_size = 1

            elif isinstance(self.f.L, Number):
                self._step_size = 0.99*2.0/self.f.L

            else:
                raise ValueError("Function f is not differentiable")

        else:
            self._step_size = step_size

[docs]    def __init__(self, initial, f, g, step_size = None, **kwargs):

        super(ISTA, self).__init__(**kwargs)
        self._step_size = None
        self.set_up(initial=initial, f=f, g=g, step_size=step_size, **kwargs)

[docs]    def set_up(self, initial, f, g, step_size, **kwargs):
        """Set up of the algorithm"""
        log.info("%s setting up", self.__class__.__name__)
        # set up ISTA
        self.initial = initial
        self.x_old = initial.copy()
        self.x = initial.copy()

        if f is None:
            f = ZeroFunction()

        self.f = f

        if g is None:
            g = ZeroFunction()

        self.g = g

        if isinstance(f, ZeroFunction) and isinstance(g, ZeroFunction):
            raise ValueError('You set both f and g to be the ZeroFunction and thus the iterative method will not update and will remain fixed at the initial value.')

        # set step_size
        self.set_step_size(step_size=step_size)
        self.configured = True
        log.info("%s configured", self.__class__.__name__)


[docs]    def update(self):

        r"""Performs a single iteration of ISTA

        .. math:: x_{k+1} = \mathrm{prox}_{\alpha g}(x_{k} - \alpha\nabla f(x_{k}))

        """

        # gradient step
        self.f.gradient(self.x_old, out=self.x)
        self.x_old.sapyb(1., self.x, -self.step_size, out=self.x_old)

        # proximal step
        self.g.proximal(self.x_old, self.step_size, out=self.x)

    def _update_previous_solution(self):
        """ Swaps the references to current and previous solution based on the :func:`~Algorithm.update_previous_solution` of the base class :class:`Algorithm`.
        """
        tmp = self.x_old
        self.x_old = self.x
        self.x = tmp

[docs]    def get_output(self):
        " Returns the current solution. "
        return self.x_old

[docs]    def update_objective(self):
        """ Updates the objective

        .. math:: f(x) + g(x)

        """
        self.loss.append( self.f(self.x_old) + self.g(self.x_old) )


[docs]class FISTA(ISTA):

    r"""Fast Iterative Shrinkage-Thresholding Algorithm, see :cite:`BeckTeboulle_b`, :cite:`BeckTeboulle_a`.

    Fast Iterative Shrinkage-Thresholding Algorithm (FISTA)

    .. math::

        \begin{cases}
            y_{k} = x_{k} - \alpha\nabla f(x_{k})  \\
            x_{k+1} = \mathrm{prox}_{\alpha g}(y_{k})\\
            t_{k+1} = \frac{1+\sqrt{1+ 4t_{k}^{2}}}{2}\\
            y_{k+1} = x_{k} + \frac{t_{k}-1}{t_{k-1}}(x_{k} - x_{k-1})
        \end{cases}

    is used to solve

    .. math:: \min_{x} f(x) + g(x)

    where :math:`f` is differentiable, :math:`g` has a *simple* proximal operator and :math:`\alpha^{k}`
    is the :code:`step_size` per iteration.


    Parameters
    ----------

    initial : DataContainer
            Starting point of the algorithm
    f : Function
        Differentiable function.  If `None` is passed, the algorithm will use the ZeroFunction.
    g : Function or `None`
        Convex function with *simple* proximal operator. If `None` is passed, the algorithm will use the ZeroFunction.
    step_size : positive :obj:`float`, default = None
                Step size for the gradient step of FISTA.
                The default :code:`step_size` is :math:`\frac{1}{L}` or 1 if `f=None`.
    kwargs: Keyword arguments
        Arguments from the base class :class:`.Algorithm`.

    Note
    -----
    If the function `g` is set to `None` or to the `ZeroFunction` then the FISTA algorithm is equivalent to Accelerated Gradient Descent by Nesterov (:cite:`nesterov2003introductory` algorithm 2.2.9).

    If the function `f` is set to `None` or to the `ZeroFunction` then the FISTA algorithm is equivalent to Guler's First Accelerated Proximal Point Method  (:cite:`guler1992new` sec 2).

    Examples
    --------

    .. math:: \underset{x}{\mathrm{argmin}}\|A x - b\|^{2}_{2}


    >>> f = LeastSquares(A, b=b, c=0.5)
    >>> g = ZeroFunction()
    >>> ig = Aop.domain
    >>> fista = FISTA(initial = ig.allocate(), f = f, g = g, max_iteration=10)
    >>> fista.run()

    See also
    --------
    :class:`.FISTA`
    :class:`.GD`

    """

[docs]    def set_step_size(self, step_size):

        """Set the default step size
        """

        if step_size is None:

            if isinstance(self.f, ZeroFunction):
                self._step_size = 1

            elif isinstance(self.f.L, Number):
                self._step_size = 1./self.f.L

            else:
                raise ValueError("Function f is not differentiable")

        else:
            self._step_size = step_size

    def _provable_convergence_condition(self):
        return self.step_size <= 1./self.f.L

[docs]    def __init__(self, initial, f, g, step_size = None, **kwargs):

        self.y = initial.copy()
        self.t = 1
        super(FISTA, self).__init__(initial=initial, f=f, g=g, step_size=step_size, **kwargs)

[docs]    def update(self):

        r"""Performs a single iteration of FISTA

        .. math::

            \begin{cases}
                x_{k+1} = \mathrm{prox}_{\alpha g}(y_{k} - \alpha\nabla f(y_{k}))\\
                t_{k+1} = \frac{1+\sqrt{1+ 4t_{k}^{2}}}{2}\\
                y_{k+1} = x_{k} + \frac{t_{k}-1}{t_{k-1}}(x_{k} - x_{k-1})
            \end{cases}

        """

        self.t_old = self.t

        self.f.gradient(self.y, out=self.x)

        self.y.sapyb(1., self.x, -self.step_size, out=self.y)

        self.g.proximal(self.y, self.step_size, out=self.x)

        self.t = 0.5*(1 + numpy.sqrt(1 + 4*(self.t_old**2)))

        self.x.subtract(self.x_old, out=self.y)
        self.y.sapyb(((self.t_old-1)/self.t), self.x, 1.0, out=self.y)


if __name__ == "__main__":

    from cil.optimisation.functions import L2NormSquared
    from cil.optimisation.algorithms import GD
    from cil.framework import ImageGeometry
    f = L2NormSquared()
    g = L2NormSquared()
    ig = ImageGeometry(3,4,4)
    initial = ig.allocate()
    fista = FISTA(initial, f, g, step_size = 1443432)
    print(fista.is_provably_convergent())

    gd = GD(initial=initial, objective = f, step_size = 1023123)
    print(gd.is_provably_convergent())
Table of Contents

Source code for cil.optimisation.algorithms.FISTA