"""
fisher_vector.py - Implementation of the Fisher vector encoding algorithm
This module contains the source code for Fisher vector computation. The
computation is separated into two distinct steps, which are called separately
by the user, namely:
learn_gmm: Used to estimate the GMM for all vectors/descriptors computed for
all examples in the dataset (e.g. estimated using all the SIFT
vectors computed for all images in the dataset, or at least a subset
of this).
fisher_vector: Used to compute the Fisher vector representation for a
single set of descriptors/vector (e.g. the SIFT
descriptors for a single image in your dataset, or
perhaps a test image).
Reference: Perronnin, F. and Dance, C. Fisher kernels on Visual Vocabularies
for Image Categorization, IEEE Conference on Computer Vision and
Pattern Recognition, 2007
Origin Author: Dan Oneata (Author of the original implementation for the Fisher
vector computation using scikit-learn and NumPy. Subsequently ported to
scikit-image (here) by other authors.)
"""
import numpy as np
class FisherVectorException(Exception):
pass
class DescriptorException(FisherVectorException):
pass
[文档]
def learn_gmm(descriptors, *, n_modes=32, gm_args=None):
"""Estimate a Gaussian mixture model (GMM) given a set of descriptors and
number of modes (i.e. Gaussians). This function is essentially a wrapper
around the scikit-learn implementation of GMM, namely the
:class:`sklearn.mixture.GaussianMixture` class.
Due to the nature of the Fisher vector, the only enforced parameter of the
underlying scikit-learn class is the covariance_type, which must be 'diag'.
There is no simple way to know what value to use for `n_modes` a-priori.
Typically, the value is usually one of ``{16, 32, 64, 128}``. One may train
a few GMMs and choose the one that maximises the log probability of the
GMM, or choose `n_modes` such that the downstream classifier trained on
the resultant Fisher vectors has maximal performance.
Parameters
----------
descriptors : np.ndarray (N, M) or list [(N1, M), (N2, M), ...]
List of NumPy arrays, or a single NumPy array, of the descriptors
used to estimate the GMM. The reason a list of NumPy arrays is
permissible is because often when using a Fisher vector encoding,
descriptors/vectors are computed separately for each sample/image in
the dataset, such as SIFT vectors for each image. If a list if passed
in, then each element must be a NumPy array in which the number of
rows may differ (e.g. different number of SIFT vector for each image),
but the number of columns for each must be the same (i.e. the
dimensionality must be the same).
n_modes : int
The number of modes/Gaussians to estimate during the GMM estimate.
gm_args : dict
Keyword arguments that can be passed into the underlying scikit-learn
:class:`sklearn.mixture.GaussianMixture` class.
Returns
-------
gmm : :class:`sklearn.mixture.GaussianMixture`
The estimated GMM object, which contains the necessary parameters
needed to compute the Fisher vector.
References
----------
.. [1] https://scikit-learn.org/stable/modules/generated/sklearn.mixture.GaussianMixture.html
Examples
--------
.. testsetup::
>>> import pytest; _ = pytest.importorskip('sklearn')
>>> from skimage.feature import fisher_vector
>>> rng = np.random.Generator(np.random.PCG64())
>>> sift_for_images = [rng.standard_normal((10, 128)) for _ in range(10)]
>>> num_modes = 16
>>> # Estimate 16-mode GMM with these synthetic SIFT vectors
>>> gmm = learn_gmm(sift_for_images, n_modes=num_modes)
"""
try:
from sklearn.mixture import GaussianMixture
except ImportError:
raise ImportError(
'scikit-learn is not installed. Please ensure it is installed in '
'order to use the Fisher vector functionality.'
)
if not isinstance(descriptors, (list, np.ndarray)):
raise DescriptorException(
'Please ensure descriptors are either a NumPy array, '
'or a list of NumPy arrays.'
)
d_mat_1 = descriptors[0]
if isinstance(descriptors, list) and not isinstance(d_mat_1, np.ndarray):
raise DescriptorException(
'Please ensure descriptors are a list of NumPy arrays.'
)
if isinstance(descriptors, list):
expected_shape = descriptors[0].shape
ranks = [len(e.shape) == len(expected_shape) for e in descriptors]
if not all(ranks):
raise DescriptorException(
'Please ensure all elements of your descriptor list ' 'are of rank 2.'
)
dims = [e.shape[1] == descriptors[0].shape[1] for e in descriptors]
if not all(dims):
raise DescriptorException(
'Please ensure all descriptors are of the same dimensionality.'
)
if not isinstance(n_modes, int) or n_modes <= 0:
raise FisherVectorException('Please ensure n_modes is a positive integer.')
if gm_args:
has_cov_type = 'covariance_type' in gm_args
cov_type_not_diag = gm_args['covariance_type'] != 'diag'
if has_cov_type and cov_type_not_diag:
raise FisherVectorException('Covariance type must be "diag".')
if isinstance(descriptors, list):
descriptors = np.vstack(descriptors)
if gm_args:
has_cov_type = 'covariance_type' in gm_args
if has_cov_type:
gmm = GaussianMixture(n_components=n_modes, **gm_args)
else:
gmm = GaussianMixture(
n_components=n_modes, covariance_type='diag', **gm_args
)
else:
gmm = GaussianMixture(n_components=n_modes, covariance_type='diag')
gmm.fit(descriptors)
return gmm
[文档]
def fisher_vector(descriptors, gmm, *, improved=False, alpha=0.5):
"""Compute the Fisher vector given some descriptors/vectors,
and an associated estimated GMM.
Parameters
----------
descriptors : np.ndarray, shape=(n_descriptors, descriptor_length)
NumPy array of the descriptors for which the Fisher vector
representation is to be computed.
gmm : :class:`sklearn.mixture.GaussianMixture`
An estimated GMM object, which contains the necessary parameters needed
to compute the Fisher vector.
improved : bool, default=False
Flag denoting whether to compute improved Fisher vectors or not.
Improved Fisher vectors are L2 and power normalized. Power
normalization is simply f(z) = sign(z) pow(abs(z), alpha) for some
0 <= alpha <= 1.
alpha : float, default=0.5
The parameter for the power normalization step. Ignored if
improved=False.
Returns
-------
fisher_vector : np.ndarray
The computation Fisher vector, which is given by a concatenation of the
gradients of a GMM with respect to its parameters (mixture weights,
means, and covariance matrices). For D-dimensional input descriptors or
vectors, and a K-mode GMM, the Fisher vector dimensionality will be
2KD + K. Thus, its dimensionality is invariant to the number of
descriptors/vectors.
References
----------
.. [1] Perronnin, F. and Dance, C. Fisher kernels on Visual Vocabularies
for Image Categorization, IEEE Conference on Computer Vision and
Pattern Recognition, 2007
.. [2] Perronnin, F. and Sanchez, J. and Mensink T. Improving the Fisher
Kernel for Large-Scale Image Classification, ECCV, 2010
Examples
--------
.. testsetup::
>>> import pytest; _ = pytest.importorskip('sklearn')
>>> from skimage.feature import fisher_vector, learn_gmm
>>> sift_for_images = [np.random.random((10, 128)) for _ in range(10)]
>>> num_modes = 16
>>> # Estimate 16-mode GMM with these synthetic SIFT vectors
>>> gmm = learn_gmm(sift_for_images, n_modes=num_modes)
>>> test_image_descriptors = np.random.random((25, 128))
>>> # Compute the Fisher vector
>>> fv = fisher_vector(test_image_descriptors, gmm)
"""
try:
from sklearn.mixture import GaussianMixture
except ImportError:
raise ImportError(
'scikit-learn is not installed. Please ensure it is installed in '
'order to use the Fisher vector functionality.'
)
if not isinstance(descriptors, np.ndarray):
raise DescriptorException('Please ensure descriptors is a NumPy array.')
if not isinstance(gmm, GaussianMixture):
raise FisherVectorException(
'Please ensure gmm is a sklearn.mixture.GaussianMixture object.'
)
if improved and not isinstance(alpha, float):
raise FisherVectorException(
'Please ensure that the alpha parameter is a float.'
)
num_descriptors = len(descriptors)
mixture_weights = gmm.weights_
means = gmm.means_
covariances = gmm.covariances_
posterior_probabilities = gmm.predict_proba(descriptors)
# Statistics necessary to compute GMM gradients wrt its parameters
pp_sum = posterior_probabilities.mean(axis=0, keepdims=True).T
pp_x = posterior_probabilities.T.dot(descriptors) / num_descriptors
pp_x_2 = posterior_probabilities.T.dot(np.power(descriptors, 2)) / num_descriptors
# Compute GMM gradients wrt its parameters
d_pi = pp_sum.squeeze() - mixture_weights
d_mu = pp_x - pp_sum * means
d_sigma_t1 = pp_sum * np.power(means, 2)
d_sigma_t2 = pp_sum * covariances
d_sigma_t3 = 2 * pp_x * means
d_sigma = -pp_x_2 - d_sigma_t1 + d_sigma_t2 + d_sigma_t3
# Apply analytical diagonal normalization
sqrt_mixture_weights = np.sqrt(mixture_weights)
d_pi /= sqrt_mixture_weights
d_mu /= sqrt_mixture_weights[:, np.newaxis] * np.sqrt(covariances)
d_sigma /= np.sqrt(2) * sqrt_mixture_weights[:, np.newaxis] * covariances
# Concatenate GMM gradients to form Fisher vector representation
fisher_vector = np.hstack((d_pi, d_mu.ravel(), d_sigma.ravel()))
if improved:
fisher_vector = np.sign(fisher_vector) * np.power(np.abs(fisher_vector), alpha)
fisher_vector = fisher_vector / np.linalg.norm(fisher_vector)
return fisher_vector