Source code for cil.optimisation.algorithms.FISTA

#  Copyright 2019 United Kingdom Research and Innovation
#  Copyright 2019 The University of Manchester
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
#
# Authors:
# CIL Developers, listed at: https://github.com/TomographicImaging/CIL/blob/master/NOTICE.txt

from cil.optimisation.algorithms import Algorithm
from cil.optimisation.functions import ZeroFunction
from cil.optimisation.utilities import ConstantStepSize, StepSizeRule
import numpy
import logging
from numbers import Real, Number
import warnings

log = logging.getLogger(__name__)


[docs] class ISTA(Algorithm): r"""Iterative Shrinkage-Thresholding Algorithm, see :cite:`BeckTeboulle_b`, :cite:`BeckTeboulle_a`. Iterative Shrinkage-Thresholding Algorithm (ISTA) .. math:: x^{k+1} = \mathrm{prox}_{\alpha^{k} g}(x^{k} - \alpha^{k}\nabla f(x^{k})) is used to solve .. math:: \min_{x} f(x) + g(x) where :math:`f` is differentiable, :math:`g` has a *simple* proximal operator and :math:`\alpha^{k}` is the :code:`step_size` per iteration. Note ---- For a constant step size, i.e., :math:`a^{k}=a` for :math:`k\geq1`, convergence of ISTA is guaranteed if .. math:: \alpha\in(0, \frac{2}{L}), where :math:`L` is the Lipschitz constant of :math:`f`, see :cite:`CombettesValerie`. Parameters ---------- initial : DataContainer Initial guess of ISTA. f : Function Differentiable function. If `None` is passed, the algorithm will use the ZeroFunction. g : Function or `None` Convex function with *simple* proximal operator. If `None` is passed, the algorithm will use the ZeroFunction. step_size : positive :obj:`float` or child class of :meth:`cil.optimisation.utilities.StepSizeRule`', default = None Step size for the gradient step of ISTA. If a float is passed, this is used as a constant step size. If a child class of :meth:`cil.optimisation.utilities.StepSizeRule`' is passed then it's method `get_step_size` is called for each update. The default :code:`step_size` is a constant :math:`\frac{0.99*2}{L}` or 1 if `f=None`. preconditioner: class with a `apply` method or a function that takes an initialised CIL function as an argument and modifies a provided `gradient`. This could be a custom `preconditioner` or one provided in :meth:`~cil.optimisation.utilities.preconditoner`. If None is passed then `self.gradient_update` will remain unmodified. kwargs: Keyword arguments Arguments from the base class :class:`.Algorithm`. Note ----- If the function `g` is set to `None` or to the `ZeroFunction` then the ISTA algorithm is equivalent to Gradient Descent. If the function `f` is set to `None` or to the `ZeroFunction` then the ISTA algorithm is equivalent to a Proximal Point Algorithm. Examples -------- .. math:: \underset{x}{\mathrm{argmin}}\|A x - b\|^{2}_{2} >>> f = LeastSquares(A, b=b, c=0.5) >>> g = ZeroFunction() >>> ig = Aop.domain >>> ista = ISTA(initial = ig.allocate(), f = f, g = g, max_iteration=10) >>> ista.run() See also -------- :class:`.FISTA` :class:`.GD` """ def _provable_convergence_condition(self): if self.preconditioner is not None: raise NotImplementedError( "Can't check convergence criterion if a preconditioner is used ") if isinstance(self.step_size_rule, ConstantStepSize): return self.step_size_rule.step_size <= 0.99*2.0/self.f.L else: raise TypeError( "Can't check convergence criterion for non-constant step size") @property def step_size(self): if isinstance(self.step_size_rule, ConstantStepSize): return self.step_size_rule.step_size else: warnings.warn( "Note the step-size is set by a step-size rule and could change wit each iteration") return self.step_size_rule.get_step_size() # Set default step size def _calculate_default_step_size(self): """ Calculates the default step size if a step size rule or a step size is not provided. """ return 0.99*2.0/self.f.L
[docs] def __init__(self, initial, f, g, step_size=None, preconditioner=None, **kwargs): super(ISTA, self).__init__(**kwargs) self._step_size = step_size self.set_up(initial=initial, f=f, g=g, step_size=step_size, preconditioner=preconditioner, **kwargs)
[docs] def set_up(self, initial, f, g, step_size, preconditioner, **kwargs): """Set up of the algorithm""" log.info("%s setting up", self.__class__.__name__) # set up ISTA self.initial = initial self.x_old = initial.copy() self.x = initial.copy() self.gradient_update = initial.copy() if f is None: f = ZeroFunction() self.f = f if g is None: g = ZeroFunction() self.g = g if isinstance(f, ZeroFunction) and isinstance(g, ZeroFunction): raise ValueError( 'You set both f and g to be the ZeroFunction and thus the iterative method will not update and will remain fixed at the initial value.') # set step_size if step_size is None: self.step_size_rule = ConstantStepSize( self._calculate_default_step_size()) elif isinstance(step_size, Real): self.step_size_rule = ConstantStepSize(step_size) elif isinstance(step_size, StepSizeRule): self.step_size_rule = step_size self.preconditioner = preconditioner self.configured = True log.info("%s configured", self.__class__.__name__)
[docs] def update(self): r"""Performs a single iteration of ISTA .. math:: x_{k+1} = \mathrm{prox}_{\alpha g}(x_{k} - \alpha\nabla f(x_{k})) """ # gradient step self.f.gradient(self.x_old, out=self.gradient_update) if self.preconditioner is not None: self.preconditioner.apply( self, self.gradient_update, out=self.gradient_update) try: step_size = self.step_size_rule.get_step_size(self) except NameError: raise NameError(msg='`step_size` must be `None`, a real float or a child class of :meth:`cil.optimisation.utilities.StepSizeRule`') self.x_old.sapyb(1., self.gradient_update, -step_size, out=self.x_old) # proximal step self.g.proximal(self.x_old, step_size, out=self.x)
def _update_previous_solution(self): """ Swaps the references to current and previous solution based on the :func:`~Algorithm.update_previous_solution` of the base class :class:`Algorithm`. """ tmp = self.x_old self.x_old = self.x self.x = tmp
[docs] def get_output(self): " Returns the current solution. " return self.x_old
[docs] def update_objective(self): """ Updates the objective .. math:: f(x) + g(x) """ self.loss.append(self.calculate_objective_function_at_point(self.x_old))
[docs] def calculate_objective_function_at_point(self, x): """ Calculates the objective at a given point x .. math:: f(x) + g(x) Parameters ---------- x : DataContainer """ return self.f(x) + self.g(x)
[docs] class FISTA(ISTA): r"""Fast Iterative Shrinkage-Thresholding Algorithm, see :cite:`BeckTeboulle_b`, :cite:`BeckTeboulle_a`. Fast Iterative Shrinkage-Thresholding Algorithm (FISTA) .. math:: \begin{cases} y_{k} = x_{k} - \alpha\nabla f(x_{k}) \\ x_{k+1} = \mathrm{prox}_{\alpha g}(y_{k})\\ t_{k+1} = \frac{1+\sqrt{1+ 4t_{k}^{2}}}{2}\\ y_{k+1} = x_{k} + \frac{t_{k}-1}{t_{k-1}}(x_{k} - x_{k-1}) \end{cases} is used to solve .. math:: \min_{x} f(x) + g(x) where :math:`f` is differentiable, :math:`g` has a *simple* proximal operator and :math:`\alpha^{k}` is the :code:`step_size` per iteration. Parameters ---------- initial : DataContainer Starting point of the algorithm f : Function Differentiable function. If `None` is passed, the algorithm will use the ZeroFunction. g : Function or `None` Convex function with *simple* proximal operator. If `None` is passed, the algorithm will use the ZeroFunction. step_size : positive :obj:`float` or child class of :meth:`cil.optimisation.utilities.StepSizeRule`', default = None Step size for the gradient step of ISTA. If a float is passed, this is used as a constant step size. If a child class of :meth:`cil.optimisation.utilities.StepSizeRule`' is passed then it's method `get_step_size` is called for each update. The default :code:`step_size` is a constant :math:`\frac{1}{L}` or 1 if `f=None`. preconditioner: class with a `apply` method or a function that takes an initialised CIL function as an argument and modifies a provided `gradient`. This could be a custom `preconditioner` or one provided in :meth:`~cil.optimisation.utilities.preconditoner`. If None is passed then `self.gradient_update` will remain unmodified. kwargs: Keyword arguments Arguments from the base class :class:`.Algorithm`. Note ----- If the function `g` is set to `None` or to the `ZeroFunction` then the FISTA algorithm is equivalent to Accelerated Gradient Descent by Nesterov (:cite:`nesterov2003introductory` algorithm 2.2.9). If the function `f` is set to `None` or to the `ZeroFunction` then the FISTA algorithm is equivalent to Guler's First Accelerated Proximal Point Method (:cite:`guler1992new` sec 2). Examples -------- .. math:: \underset{x}{\mathrm{argmin}}\|A x - b\|^{2}_{2} >>> f = LeastSquares(A, b=b, c=0.5) >>> g = ZeroFunction() >>> ig = Aop.domain >>> fista = FISTA(initial = ig.allocate(), f = f, g = g, max_iteration=10) >>> fista.run() See also -------- :class:`.FISTA` :class:`.GD` """ def _calculate_default_step_size(self): """Calculate the default step size if a step size rule or step size is not provided """ return 1./self.f.L def _provable_convergence_condition(self): if self.preconditioner is not None: raise NotImplementedError( "Can't check convergence criterion if a preconditioner is used ") if isinstance(self.step_size_rule, ConstantStepSize): return self.step_size_rule.step_size <= 1./self.f.L else: raise TypeError( "Can't check convergence criterion for non-constant step size")
[docs] def __init__(self, initial, f, g, step_size=None, preconditioner=None, **kwargs): self.y = initial.copy() self.t = 1 super(FISTA, self).__init__(initial=initial, f=f, g=g, step_size=step_size, preconditioner=preconditioner, **kwargs)
[docs] def update(self): r"""Performs a single iteration of FISTA .. math:: \begin{cases} x_{k+1} = \mathrm{prox}_{\alpha g}(y_{k} - \alpha\nabla f(y_{k}))\\ t_{k+1} = \frac{1+\sqrt{1+ 4t_{k}^{2}}}{2}\\ y_{k+1} = x_{k} + \frac{t_{k}-1}{t_{k-1}}(x_{k} - x_{k-1}) \end{cases} """ self.t_old = self.t self.f.gradient(self.y, out=self.gradient_update) if self.preconditioner is not None: self.preconditioner.apply( self, self.gradient_update, out=self.gradient_update) step_size = self.step_size_rule.get_step_size(self) self.y.sapyb(1., self.gradient_update, -step_size, out=self.y) self.g.proximal(self.y, step_size, out=self.x) self.t = 0.5*(1 + numpy.sqrt(1 + 4*(self.t_old**2))) self.x.subtract(self.x_old, out=self.y) self.y.sapyb(((self.t_old-1)/self.t), self.x, 1.0, out=self.y)
if __name__ == "__main__": from cil.optimisation.functions import L2NormSquared from cil.optimisation.algorithms import GD from cil.framework import ImageGeometry f = L2NormSquared() g = L2NormSquared() ig = ImageGeometry(3, 4, 4) initial = ig.allocate() fista = FISTA(initial, f, g, step_size=1443432) print(fista.is_provably_convergent()) gd = GD(initial=initial, objective=f, step_size=1023123) print(gd.is_provably_convergent())