Source code for cil.optimisation.algorithms.GD

#  Copyright 2019 United Kingdom Research and Innovation
#  Copyright 2019 The University of Manchester
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
#
# Authors:
# CIL Developers, listed at: https://github.com/TomographicImaging/CIL/blob/master/NOTICE.txt

import numpy
from cil.optimisation.algorithms import Algorithm
import logging

log = logging.getLogger(__name__)


[docs]class GD(Algorithm): """Gradient Descent algorithm""" def __init__(self, initial=None, objective_function=None, step_size=None, alpha=1e6, beta=0.5, rtol=1e-5, atol=1e-8, **kwargs): '''GD algorithm creator initialisation can be done at creation time if all proper variables are passed or later with set_up :param initial: initial guess :param objective_function: objective function to be minimised :param step_size: step size for gradient descent iteration :param alpha: optional parameter to start the backtracking algorithm, default 1e6 :param beta: optional parameter defining the reduction of step, default 0.5. It's value can be in (0,1) :param rtol: optional parameter defining the relative tolerance comparing the current objective function to 0, default 1e-5, see numpy.isclose :param atol: optional parameter defining the absolute tolerance comparing the current objective function to 0, default 1e-8, see numpy.isclose ''' super().__init__(**kwargs) self.alpha = alpha self.beta = beta self.rtol = rtol self.atol = atol if initial is not None and objective_function is not None: self.set_up(initial=initial, objective_function=objective_function, step_size=step_size)
[docs] def set_up(self, initial, objective_function, step_size): '''initialisation of the algorithm :param initial: initial guess :param objective_function: objective function to be minimised :param step_size: step size''' log.info("%s setting up", self.__class__.__name__) self.x = initial.copy() self.objective_function = objective_function if step_size is None: self.k = 0 self.update_step_size = True self.x_armijo = initial.copy() # self.rate = self.armijo_rule() * 2 # print (self.rate) else: self.step_size = step_size self.update_step_size = False self.update_objective() self.x_update = initial.copy() self.configured = True log.info("%s configured", self.__class__.__name__)
[docs] def update(self): '''Single iteration''' self.objective_function.gradient(self.x, out=self.x_update) if self.update_step_size: # the next update and solution are calculated within the armijo_rule self.step_size = self.armijo_rule() else: self.x.sapyb(1.0, self.x_update, -self.step_size, out=self.x)
[docs] def update_objective(self): self.loss.append(self.objective_function(self.x))
[docs] def armijo_rule(self): '''Applies the Armijo rule to calculate the step size (step_size) https://projecteuclid.org/download/pdf_1/euclid.pjm/1102995080 The Armijo rule runs a while loop to find the appropriate step_size by starting from a very large number (alpha). The step_size is found by dividing alpha by 2 in an iterative way until a certain criterion is met. To avoid infinite loops, we add a maximum number of times the while loop is run. This rule would allow to reach a minimum step_size of 10^-alpha. if alpha = numpy.power(10,gamma) delta = 3 step_size = numpy.power(10, -delta) with armijo rule we can get to step_size from initial alpha by repeating the while loop k times where alpha / 2^k = step_size 10^gamma / 2^k = 10^-delta 2^k = 10^(gamma+delta) k = gamma+delta / log10(2) \\approx 3.3 * (gamma+delta) if we would take by default delta = gamma kmax = numpy.ceil ( 2 * gamma / numpy.log10(2) ) kmax = numpy.ceil (2 * numpy.log10(alpha) / numpy.log10(2)) ''' f_x = self.objective_function(self.x) if not hasattr(self, 'x_update'): self.x_update = self.objective_function.gradient(self.x) while self.k < self.kmax: # self.x - alpha * self.x_update self.x_update.multiply(self.alpha, out=self.x_armijo) self.x.subtract(self.x_armijo, out=self.x_armijo) f_x_a = self.objective_function(self.x_armijo) sqnorm = self.x_update.squared_norm() if f_x_a - f_x <= - ( self.alpha/2. ) * sqnorm: self.x.fill(self.x_armijo) break self.k += 1. # we don't want to update kmax self._alpha *= self.beta if self.k == self.kmax: raise ValueError('Could not find a proper step_size in {} loops. Consider increasing alpha.'.format(self.kmax)) return self.alpha
@property def alpha(self): return self._alpha @alpha.setter def alpha(self, alpha): self._alpha = alpha self.kmax = numpy.ceil (2 * numpy.log10(alpha) / numpy.log10(2)) self.k = 0
[docs] def should_stop(self): return super().should_stop() or \ numpy.isclose(self.get_last_objective(), 0., rtol=self.rtol, atol=self.atol, equal_nan=False)