skimage.feature._fisher_vector 源代码

"""
fisher_vector.py - Implementation of the Fisher vector encoding algorithm

This module contains the source code for Fisher vector computation. The
computation is separated into two distinct steps, which are called separately
by the user, namely:

learn_gmm: Used to estimate the GMM for all vectors/descriptors computed for
           all examples in the dataset (e.g. estimated using all the SIFT
           vectors computed for all images in the dataset, or at least a subset
           of this).

fisher_vector: Used to compute the Fisher vector representation for a
               single set of descriptors/vector (e.g. the SIFT
               descriptors for a single image in your dataset, or
               perhaps a test image).

Reference: Perronnin, F. and Dance, C. Fisher kernels on Visual Vocabularies
           for Image Categorization, IEEE Conference on Computer Vision and
           Pattern Recognition, 2007

Origin Author: Dan Oneata (Author of the original implementation for the Fisher
vector computation using scikit-learn and NumPy. Subsequently ported to
scikit-image (here) by other authors.)
"""

import numpy as np


class FisherVectorException(Exception):
    pass


class DescriptorException(FisherVectorException):
    pass



[文档]
def learn_gmm(descriptors, *, n_modes=32, gm_args=None):
    """Estimate a Gaussian mixture model (GMM) given a set of descriptors and
    number of modes (i.e. Gaussians). This function is essentially a wrapper
    around the scikit-learn implementation of GMM, namely the
    :class:`sklearn.mixture.GaussianMixture` class.

    Due to the nature of the Fisher vector, the only enforced parameter of the
    underlying scikit-learn class is the covariance_type, which must be 'diag'.

    There is no simple way to know what value to use for `n_modes` a-priori.
    Typically, the value is usually one of ``{16, 32, 64, 128}``. One may train
    a few GMMs and choose the one that maximises the log probability of the
    GMM, or choose `n_modes` such that the downstream classifier trained on
    the resultant Fisher vectors has maximal performance.

    Parameters
    ----------
    descriptors : np.ndarray (N, M) or list [(N1, M), (N2, M), ...]
        List of NumPy arrays, or a single NumPy array, of the descriptors
        used to estimate the GMM. The reason a list of NumPy arrays is
        permissible is because often when using a Fisher vector encoding,
        descriptors/vectors are computed separately for each sample/image in
        the dataset, such as SIFT vectors for each image. If a list if passed
        in, then each element must be a NumPy array in which the number of
        rows may differ (e.g. different number of SIFT vector for each image),
        but the number of columns for each must be the same (i.e. the
        dimensionality must be the same).
    n_modes : int
        The number of modes/Gaussians to estimate during the GMM estimate.
    gm_args : dict
        Keyword arguments that can be passed into the underlying scikit-learn
        :class:`sklearn.mixture.GaussianMixture` class.

    Returns
    -------
    gmm : :class:`sklearn.mixture.GaussianMixture`
        The estimated GMM object, which contains the necessary parameters
        needed to compute the Fisher vector.

    References
    ----------
    .. [1] https://scikit-learn.org/stable/modules/generated/sklearn.mixture.GaussianMixture.html

    Examples
    --------
    .. testsetup::
        >>> import pytest; _ = pytest.importorskip('sklearn')

    >>> from skimage.feature import fisher_vector
    >>> rng = np.random.Generator(np.random.PCG64())
    >>> sift_for_images = [rng.standard_normal((10, 128)) for _ in range(10)]
    >>> num_modes = 16
    >>> # Estimate 16-mode GMM with these synthetic SIFT vectors
    >>> gmm = learn_gmm(sift_for_images, n_modes=num_modes)
    """

    try:
        from sklearn.mixture import GaussianMixture
    except ImportError:
        raise ImportError(
            'scikit-learn is not installed. Please ensure it is installed in '
            'order to use the Fisher vector functionality.'
        )

    if not isinstance(descriptors, (list, np.ndarray)):
        raise DescriptorException(
            'Please ensure descriptors are either a NumPy array, '
            'or a list of NumPy arrays.'
        )

    d_mat_1 = descriptors[0]
    if isinstance(descriptors, list) and not isinstance(d_mat_1, np.ndarray):
        raise DescriptorException(
            'Please ensure descriptors are a list of NumPy arrays.'
        )

    if isinstance(descriptors, list):
        expected_shape = descriptors[0].shape
        ranks = [len(e.shape) == len(expected_shape) for e in descriptors]
        if not all(ranks):
            raise DescriptorException(
                'Please ensure all elements of your descriptor list ' 'are of rank 2.'
            )
        dims = [e.shape[1] == descriptors[0].shape[1] for e in descriptors]
        if not all(dims):
            raise DescriptorException(
                'Please ensure all descriptors are of the same dimensionality.'
            )

    if not isinstance(n_modes, int) or n_modes <= 0:
        raise FisherVectorException('Please ensure n_modes is a positive integer.')

    if gm_args:
        has_cov_type = 'covariance_type' in gm_args
        cov_type_not_diag = gm_args['covariance_type'] != 'diag'
        if has_cov_type and cov_type_not_diag:
            raise FisherVectorException('Covariance type must be "diag".')

    if isinstance(descriptors, list):
        descriptors = np.vstack(descriptors)

    if gm_args:
        has_cov_type = 'covariance_type' in gm_args
        if has_cov_type:
            gmm = GaussianMixture(n_components=n_modes, **gm_args)
        else:
            gmm = GaussianMixture(
                n_components=n_modes, covariance_type='diag', **gm_args
            )
    else:
        gmm = GaussianMixture(n_components=n_modes, covariance_type='diag')

    gmm.fit(descriptors)

    return gmm




[文档]
def fisher_vector(descriptors, gmm, *, improved=False, alpha=0.5):
    """Compute the Fisher vector given some descriptors/vectors,
    and an associated estimated GMM.

    Parameters
    ----------
    descriptors : np.ndarray, shape=(n_descriptors, descriptor_length)
        NumPy array of the descriptors for which the Fisher vector
        representation is to be computed.
    gmm : :class:`sklearn.mixture.GaussianMixture`
        An estimated GMM object, which contains the necessary parameters needed
        to compute the Fisher vector.
    improved : bool, default=False
        Flag denoting whether to compute improved Fisher vectors or not.
        Improved Fisher vectors are L2 and power normalized. Power
        normalization is simply f(z) = sign(z) pow(abs(z), alpha) for some
        0 <= alpha <= 1.
    alpha : float, default=0.5
        The parameter for the power normalization step. Ignored if
        improved=False.

    Returns
    -------
    fisher_vector : np.ndarray
        The computation Fisher vector, which is given by a concatenation of the
        gradients of a GMM with respect to its parameters (mixture weights,
        means, and covariance matrices). For D-dimensional input descriptors or
        vectors, and a K-mode GMM, the Fisher vector dimensionality will be
        2KD + K. Thus, its dimensionality is invariant to the number of
        descriptors/vectors.

    References
    ----------
    .. [1] Perronnin, F. and Dance, C. Fisher kernels on Visual Vocabularies
           for Image Categorization, IEEE Conference on Computer Vision and
           Pattern Recognition, 2007
    .. [2] Perronnin, F. and Sanchez, J. and Mensink T. Improving the Fisher
           Kernel for Large-Scale Image Classification, ECCV, 2010

    Examples
    --------
    .. testsetup::
        >>> import pytest; _ = pytest.importorskip('sklearn')

    >>> from skimage.feature import fisher_vector, learn_gmm
    >>> sift_for_images = [np.random.random((10, 128)) for _ in range(10)]
    >>> num_modes = 16
    >>> # Estimate 16-mode GMM with these synthetic SIFT vectors
    >>> gmm = learn_gmm(sift_for_images, n_modes=num_modes)
    >>> test_image_descriptors = np.random.random((25, 128))
    >>> # Compute the Fisher vector
    >>> fv = fisher_vector(test_image_descriptors, gmm)
    """
    try:
        from sklearn.mixture import GaussianMixture
    except ImportError:
        raise ImportError(
            'scikit-learn is not installed. Please ensure it is installed in '
            'order to use the Fisher vector functionality.'
        )

    if not isinstance(descriptors, np.ndarray):
        raise DescriptorException('Please ensure descriptors is a NumPy array.')

    if not isinstance(gmm, GaussianMixture):
        raise FisherVectorException(
            'Please ensure gmm is a sklearn.mixture.GaussianMixture object.'
        )

    if improved and not isinstance(alpha, float):
        raise FisherVectorException(
            'Please ensure that the alpha parameter is a float.'
        )

    num_descriptors = len(descriptors)

    mixture_weights = gmm.weights_
    means = gmm.means_
    covariances = gmm.covariances_

    posterior_probabilities = gmm.predict_proba(descriptors)

    # Statistics necessary to compute GMM gradients wrt its parameters
    pp_sum = posterior_probabilities.mean(axis=0, keepdims=True).T
    pp_x = posterior_probabilities.T.dot(descriptors) / num_descriptors
    pp_x_2 = posterior_probabilities.T.dot(np.power(descriptors, 2)) / num_descriptors

    # Compute GMM gradients wrt its parameters
    d_pi = pp_sum.squeeze() - mixture_weights

    d_mu = pp_x - pp_sum * means

    d_sigma_t1 = pp_sum * np.power(means, 2)
    d_sigma_t2 = pp_sum * covariances
    d_sigma_t3 = 2 * pp_x * means
    d_sigma = -pp_x_2 - d_sigma_t1 + d_sigma_t2 + d_sigma_t3

    # Apply analytical diagonal normalization
    sqrt_mixture_weights = np.sqrt(mixture_weights)
    d_pi /= sqrt_mixture_weights
    d_mu /= sqrt_mixture_weights[:, np.newaxis] * np.sqrt(covariances)
    d_sigma /= np.sqrt(2) * sqrt_mixture_weights[:, np.newaxis] * covariances

    # Concatenate GMM gradients to form Fisher vector representation
    fisher_vector = np.hstack((d_pi, d_mu.ravel(), d_sigma.ravel()))

    if improved:
        fisher_vector = np.sign(fisher_vector) * np.power(np.abs(fisher_vector), alpha)
        fisher_vector = fisher_vector / np.linalg.norm(fisher_vector)

    return fisher_vector