"""Martrix Profile classifier.
Pipeline classifier using the Matrix Profile transformer and an estimator.
"""
__author__ = ["MatthewMiddlehurst"]
__all__ = ["MatrixProfileClassifier"]
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sktime.base._base import _clone_estimator
from sktime.classification.base import BaseClassifier
from sktime.transformations.panel.matrix_profile import MatrixProfile
[文档]class MatrixProfileClassifier(BaseClassifier):
"""Martrix Profile (MP) classifier.
This classifier simply transforms the input data using the MatrixProfile [1]
transformer and builds a provided estimator using the transformed data.
Parameters
----------
subsequence_length : int, default=10
The subsequence length for the MatrixProfile transformer.
estimator : sklearn classifier, default=None
An sklearn estimator to be built using the transformed data. Defaults to a
1-nearest neighbour classifier.
n_jobs : int, default=1
The number of jobs to run in parallel for both ``fit`` and ``predict``.
``-1`` means using all processors. Currently available for the classifier
portion only.
random_state : int or None, default=None
Seed for random, integer.
Attributes
----------
n_classes_ : int
Number of classes. Extracted from the data.
classes_ : ndarray of shape (n_classes_)
Holds the label for each class.
See Also
--------
MatrixProfile
References
----------
.. [1] Yeh, Chin-Chia Michael, et al. "Time series joins, motifs, discords and
shapelets: a unifying view that exploits the matrix profile." Data Mining and
Knowledge Discovery 32.1 (2018): 83-123.
https://link.springer.com/article/10.1007/s10618-017-0519-9
Examples
--------
>>> from sktime.classification.feature_based import MatrixProfileClassifier
>>> from sktime.datasets import load_unit_test
>>> X_train, y_train = load_unit_test(split="train", return_X_y=True)
>>> X_test, y_test = load_unit_test(split="test", return_X_y=True) # doctest: +SKIP
>>> clf = MatrixProfileClassifier() # doctest: +SKIP
>>> clf.fit(X_train, y_train) # doctest: +SKIP
MatrixProfileClassifier(...) # doctest: +SKIP
>>> y_pred = clf.predict(X_test) # doctest: +SKIP
"""
_tags = {
# packaging info
# --------------
"authors": ["MatthewMiddlehurst"],
# sklearn 1.3.0 has a bug which causes predict_proba to fail
# see scikit-learn#26768 and sktime#4778
"python_dependencies": "scikit-learn!=1.3.0",
# estimator type
# --------------
"capability:multithreading": True,
"capability:predict_proba": True,
"classifier_type": "distance",
}
def __init__(
self,
subsequence_length=10,
estimator=None,
n_jobs=1,
random_state=None,
):
self.subsequence_length = subsequence_length
self.estimator = estimator
self.n_jobs = n_jobs
self.random_state = random_state
self._transformer = None
self._estimator = None
super().__init__()
def _fit(self, X, y):
"""Fit a pipeline on cases (X,y), where y is the target variable.
Parameters
----------
X : 3D np.array of shape = [n_instances, n_dimensions, series_length]
The training data.
y : array-like, shape = [n_instances]
The class labels.
Returns
-------
self :
Reference to self.
Notes
-----
Changes state by creating a fitted model that updates attributes
ending in "_" and sets is_fitted flag to True.
"""
self._transformer = MatrixProfile(m=self.subsequence_length)
self._estimator = _clone_estimator(
(
KNeighborsClassifier(n_neighbors=1)
if self.estimator is None
else self.estimator
),
self.random_state,
)
m = getattr(self._estimator, "n_jobs", None)
if m is not None:
self._estimator.n_jobs = self._threads_to_use
X_t = self._transformer.fit_transform(X, y)
self._estimator.fit(X_t, y)
return self
def _predict(self, X) -> np.ndarray:
"""Predict class values of n instances in X.
Parameters
----------
X : 3D np.array of shape = [n_instances, n_dimensions, series_length]
The data to make predictions for.
Returns
-------
y : array-like, shape = [n_instances]
Predicted class labels.
"""
return self._estimator.predict(self._transformer.transform(X))
def _predict_proba(self, X) -> np.ndarray:
"""Predict class probabilities for n instances in X.
Parameters
----------
X : 3D np.array of shape = [n_instances, n_dimensions, series_length]
The data to make predict probabilities for.
Returns
-------
y : array-like, shape = [n_instances, n_classes_]
Predicted probabilities using the ordering in classes_.
"""
m = getattr(self._estimator, "predict_proba", None)
if callable(m):
return self._estimator.predict_proba(self._transformer.transform(X))
else:
dists = np.zeros((X.shape[0], self.n_classes_))
preds = self._estimator.predict(self._transformer.transform(X))
for i in range(0, X.shape[0]):
dists[i, self._class_dictionary[preds[i]]] = 1
return dists
[文档] @classmethod
def get_test_params(cls, parameter_set="default"):
"""Return testing parameter settings for the estimator.
Parameters
----------
parameter_set : str, default="default"
Name of the set of test parameters to return, for use in tests. If no
special parameters are defined for a value, will return ``"default"`` set.
For classifiers, a "default" set of parameters should be provided for
general testing, and a "results_comparison" set for comparing against
previously recorded results if the general set does not produce suitable
probabilities to compare against.
Returns
-------
params : dict or list of dict, default={}
Parameters to create testing instances of the class.
Each dict are parameters to construct an "interesting" test instance, i.e.,
``MyClass(**params)`` or ``MyClass(**params[i])`` creates a valid test
instance.
``create_test_instance`` uses the first (or only) dictionary in ``params``.
"""
return [
{"subsequence_length": 4},
{"subsequence_length": 6, "estimator": KNeighborsClassifier(n_neighbors=3)},
]