Source code for cil.framework.partitioner

#  Copyright 2018 United Kingdom Research and Innovation
#  Copyright 2018 The University of Manchester
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
#
# Authors:
# CIL Developers, listed at: https://github.com/TomographicImaging/CIL/blob/master/NOTICE.txt
# Joshua DM Hellier (University of Manchester) [refactorer]
import math

import numpy

from .block import BlockGeometry



[docs]
class Partitioner(object):
    '''Interface for AcquisitionData to be able to partition itself in a number of batches.

    This class, by multiple inheritance with AcquisitionData, allows the user to partition the data,
    by using the method ``partition``.
    The partitioning will generate a ``BlockDataContainer`` with appropriate ``AcquisitionData``.

    '''
    # modes of partitioning
    SEQUENTIAL = 'sequential'
    STAGGERED = 'staggered'
    RANDOM_PERMUTATION = 'random_permutation'

    def _partition_indices(self, num_batches, indices, stagger=False):
        """Partition a list of indices into num_batches of indices.

        Parameters
        ----------
        num_batches : int
            The number of batches to partition the indices into.
        indices : list of int, int
            The indices to partition. If passed a list, this list will be partitioned in ``num_batches``
            partitions. If passed an int the indices will be generated automatically using ``range(indices)``.
        stagger : bool, default False
            If True, the indices will be staggered across the batches.

        Returns
        --------
        list of list of int
            A list of batches of indices.
        """

        # Partition the indices into batches.
        if isinstance(indices, int):
            indices = list(range(indices))

        num_indices = len(indices)
        # sanity check
        if num_indices < num_batches:
            raise ValueError(
                'The number of batches must be less than or equal to the number of indices.'
            )

        if stagger:
            batches = [indices[i::num_batches] for i in range(num_batches)]

        else:
            # we split the indices with floor(N/M)+1 indices in N%M groups
            # and floor(N/M) indices in the remaining M - N%M groups.

            # rename num_indices to N for brevity
            N = num_indices
            # rename num_batches to M for brevity
            M = num_batches
            batches = [
                indices[j:j + math.floor(N / M) + 1] for j in range(N % M)
            ]
            offset = N % M * (math.floor(N / M) + 1)
            for i in range(M - N % M):
                start = offset + i * math.floor(N / M)
                end = start + math.floor(N / M)
                batches.append(indices[start:end])

        return batches

    def _construct_BlockGeometry_from_indices(self, indices):
        '''Convert a list of boolean masks to a list of BlockGeometry.

        Parameters
        ----------
          indices : list of lists of indices

        Returns
        -------
            BlockGeometry
        '''
        ags = []
        for mask in indices:
            ag = self.geometry.copy()
            ag.config.angles.angle_data = numpy.take(self.geometry.angles, mask, axis=0)
            ags.append(ag)
        return BlockGeometry(*ags)


[docs]
    def partition(self, num_batches, mode, seed=None):
        '''Partition the data into ``num_batches`` batches using the specified ``mode``.
        
        The modes are

        1. ``sequential`` - The data will be partitioned into ``num_batches`` batches of sequential indices.

        2. ``staggered`` - The data will be partitioned into ``num_batches`` batches of sequential indices, with stride equal to ``num_batches``.

        3. ``random_permutation`` - The data will be partitioned into ``num_batches`` batches of random indices.

        Parameters
        ----------
        num_batches : int
            The number of batches to partition the data into.
        mode : str
            The mode to use for partitioning. Must be one of ``sequential``, ``staggered`` or ``random_permutation``.
        seed : int, optional
            The seed to use for the random permutation. If not specified, the random number
            generator will not be seeded.


        Returns
        -------
        BlockDataContainer
            Block of `AcquisitionData` objects containing the data requested in each batch

        Note
        ----
        This only works on datasets with an 'angle' dimension, and is not currently implemented for Cone3D_Flex geometry.

        Example
        -------

        Partitioning a list of ints [0, 1, 2, 3, 4, 5, 6, 7, 8] into 4 batches will return:

        1. [[0, 1, 2], [3, 4], [5, 6], [7, 8]] with ``sequential``
        2. [[0, 4, 8], [1, 5], [2, 6], [3, 7]] with ``staggered``
        3. [[8, 2, 6], [7, 1], [0, 4], [3, 5]] with ``random_permutation`` and seed 1

        '''
        if 'angle' not in self.dimension_labels:
            raise NotImplementedError(f"Currently, Partitioner only partitions on the `angle` dimension \
                and therefore can only be used on datasets with an 'angle' dimension. \
                Dimensions provided: {self.dimension_labels}")
        
        if mode == Partitioner.SEQUENTIAL:
            return self._partition_deterministic(num_batches, stagger=False)
        elif mode == Partitioner.STAGGERED:
            return self._partition_deterministic(num_batches, stagger=True)
        elif mode == Partitioner.RANDOM_PERMUTATION:
            return self._partition_random_permutation(num_batches, seed=seed)
        else:
            raise ValueError('Unknown partition mode {}'.format(mode))


    def _partition_deterministic(self, num_batches, stagger=False, indices=None):
        '''Partition the data into ``num_batches`` batches.

        Parameters
        ----------
        num_batches : int
            The number of batches to partition the data into.
        stagger : bool, optional
            If ``True``, the batches will be staggered. Default is ``False``.
        indices : list of int, optional
            The indices to partition. If not specified, the indices will be generated from the number of projections.
        '''
        if indices is None:
            indices = self.geometry.num_projections
        partition_indices = self._partition_indices(num_batches, indices, stagger)
        blk_geo = self._construct_BlockGeometry_from_indices(partition_indices)

        # copy data
        out = blk_geo.allocate(None)
        axis = self.dimension_labels.index('angle')

        for i in range(num_batches):
            out[i].fill(
                numpy.squeeze(
                    numpy.take(self.array, partition_indices[i], axis=axis)
                )
            )

        return out

    def _partition_random_permutation(self, num_batches, seed=None):
        '''Partition the data into ``num_batches`` batches using a random permutation.

        Parameters
        ----------
        num_batches : int
            The number of batches to partition the data into.
        seed : int, optional
            The seed to use for the random permutation. If not specified, the random number generator
            will not be seeded.

        '''
        if seed is not None:
            numpy.random.seed(seed)

        indices = numpy.arange(self.geometry.num_projections)
        numpy.random.shuffle(indices)

        indices = list(indices)

        return self._partition_deterministic(num_batches, stagger=False, indices=indices)
Table of Contents

Source code for cil.framework.partitioner