sktime.alignment.dtaidist 源代码

"""Interface module to dtaidist package.

Exposes basic interface, excluding multivariate case.
"""

__author__ = ["fkiraly"]

import numpy as np
import pandas as pd

from sktime.alignment.base import BaseAligner


[文档]class AlignerDtwDtai(BaseAligner): """Aligner interface for dtaidistance time warping alignment. Direct interface to ``dtaidistance.dtw_ndim.warping_path`` and ``dtaidistance.dtw_ndim.warping_path_fast``. This distance is covers multivariate data and arbitrary scalar distances as components. Parameters ---------- use_c: bool, optional, default=False Whether to use the faster C variant: ``True`` for C, ``False`` for Python. ``True`` requires a C compiled installation of ``dtaidistance``. * If False, uses ``dtaidistance.dtw_ndim.distance_matrix``. * If True, uses ``dtaidistance.dtw_ndim.distance_matrix_fast``. window : integer, optional, default=infinite Sakoe Chiba window width, from diagonal to boundary. Only allow for maximal shifts from the two diagonals smaller than this number. The maximally allowed warping, thus difference between indices i in series 1 and j in series 2, is thus |i-j| < 2*window + |len(s1) - len(s2)|. It includes the diagonal, meaning that Euclidean distance is obtained by setting ``window=1.`` If the two series are of equal length, this means that the band appearing on the cumulative cost matrix is of width 2*window-1. In other definitions of DTW this number may be referred to as the window instead. max_dist: float, optional, default=infinite Stop if the returned values will be larger than this value. max_step: float, optional, default=infinite Do not allow steps larger than this value. If the difference between two values in the two series is larger than this, thus if |s1[i]-s2[j]| > max_step, replace that value with infinity. max_length_diff: int, optional, default=infinite Return infinity if difference of length of two series is larger than this value. penalty: float, optional, default=0 Penalty to add if compression or expansion is applied psi: integer or 4-tuple of integers or none, optional, default=none Psi relaxation parameter (ignore start and end of matching). If psi is a single integer, it is used for both start and end relaxations for both series in a pair of series. If psi is a 4-tuple, it is used as the psi-relaxation for (begin series1, end series1, begin series2, end series2). Useful for cyclical series. inner_dist: str, or sktime BasePairwiseTransformer, default="squared euclidean" Distance between two points in the time series. * If str, must be one of 'squared euclidean' (default), 'euclidean'. * if estimator, must follow sktime BasePairwiseTransformer API. For a range of distances from scipy, see ``ScipyDist``. References ---------- .. [1] H. Sakoe, S. Chiba, "Dynamic programming algorithm optimization for spoken word recognition," IEEE Transactions on Acoustics, Speech and Signal Processing, vol. 26(1), pp. 43--49, 1978. """ _tags = { # packaging info # -------------- "authors": ["wannesm", "probberechts", "fkiraly"], # wannesm, probberechts credit for interfaced code "python_dependencies": ["dtaidistance"], # estimator type # -------------- "capability:multiple-alignment": False, # can align more than two sequences? "capability:distance": True, # does compute/return overall distance? "capability:distance-matrix": True, # does compute/return distance matrix? "capability:unequal_length": True, # can align sequences of unequal length? "alignment_type": "full", } def __init__( self, use_c=False, window=None, max_dist=None, max_step=None, max_length_diff=None, penalty=None, psi=None, inner_dist="squared euclidean", ): self.window = window self.max_dist = max_dist self.max_step = max_step self.max_length_diff = max_length_diff self.penalty = penalty self.psi = psi self.use_c = use_c self.inner_dist = inner_dist super().__init__() self._dtai_params = self.get_params() def _fit(self, X, Z=None): """Fit alignment given series/sequences to align. core logic Writes to self: alignment : computed alignment from dtw package (nested struct) Parameters ---------- X: list of pd.DataFrame (sequence) of length n - panel of series to align Z: pd.DataFrame with n rows, optional; metadata, row correspond to indices of X """ # soft dependency import of dtw from dtaidistance.dtw import warping_path dtai_params = self._dtai_params # shorthands for 1st and 2nd series # dtaidistances requires 2D np.array (time, variable) s1 = X[0].values s2 = X[1].values path, dist = warping_path( s1, s2, include_distance=True, use_ndim=True, **dtai_params ) self._path = path self._dist = dist return self def _get_alignment(self): """Return alignment for sequences/series passed in fit (iloc indices). Behaviour: returns an alignment for sequences in X passed to fit model should be in fitted state, fitted model parameters read from self Returns ------- pd.DataFrame in alignment format, with columns 'ind'+str(i) for integer i cols contain iloc index of X[i] mapped to alignment coordinate for alignment """ # retrieve alignment path = self._path # convert to required data frame format and return aligndf = pd.DataFrame(path, columns=["ind0", "ind1"]) return aligndf def _get_distance(self): """Return overall distance of alignment. Behaviour: returns overall distance corresponding to alignment not all aligners will return or implement this (optional) Returns ------- distance: float - overall distance between all elements of X passed to fit """ return self._dist def _get_distance_matrix(self): """Return distance matrix of alignment. Behaviour: returns pairwise distance matrix of alignment distances not all aligners will return or implement this (optional) Returns ------- distmat: a (2 x 2) np.array of floats [i,j]-th entry is alignment distance between X[i] and X[j] passed to fit """ # since dtw does only pairwise alignments, this is always a 2x2 matrix distmat = np.zeros((2, 2), dtype="float") distmat[0, 1] = self._dist distmat[1, 0] = self._dist return distmat
[文档] @classmethod def get_test_params(cls, parameter_set="default"): """Test parameters for aligner.""" params0 = {} params1 = {"window": 1, "max_length_diff": 1} params2 = {"penalty": 0.1} return [params0, params1, params2]