"""Pipeline with a classifier."""
# copyright: sktime developers, BSD-3-Clause License (see LICENSE file)
import numpy as np
from sktime.base import _HeterogenousMetaEstimator
from sktime.classification.base import BaseClassifier
from sktime.datatypes import convert_to
from sktime.transformations.base import BaseTransformer
from sktime.transformations.compose import TransformerPipeline
from sktime.utils.sklearn import is_sklearn_classifier
__author__ = ["fkiraly"]
__all__ = ["ClassifierPipeline", "SklearnClassifierPipeline"]
[文档]class ClassifierPipeline(_HeterogenousMetaEstimator, BaseClassifier):
"""Pipeline of transformers and a classifier.
The ``ClassifierPipeline`` compositor chains transformers and a single classifier.
The pipeline is constructed with a list of sktime transformers, plus a classifier,
i.e., estimators following the BaseTransformer resp BaseClassifier interface.
The transformer list can be unnamed - a simple list of transformers -
or string named - a list of pairs of string, estimator.
For a list of transformers ``trafo1``, ``trafo2``, ..., ``trafoN`` and a classifier
``clf``,
the pipeline behaves as follows:
``fit(X, y)`` - changes styte by running ``trafo1.fit_transform`` on ``X``,
them ``trafo2.fit_transform`` on the output of ``trafo1.fit_transform``, etc
sequentially, with ``trafo[i]`` receiving the output of ``trafo[i-1]``,
and then running ``clf.fit`` with ``X`` being the output of ``trafo[N]``,
and ``y`` identical with the input to ``self.fit``
``predict(X)`` - result is of executing ``trafo1.transform``, ``trafo2.transform``,
etc
with ``trafo[i].transform`` input = output of ``trafo[i-1].transform``,
then running ``clf.predict`` on the output of ``trafoN.transform``,
and returning the output of ``clf.predict``
``predict_proba(X)`` - result is of executing ``trafo1.transform``,
``trafo2.transform``,
etc, with ``trafo[i].transform`` input = output of ``trafo[i-1].transform``,
then running ``clf.predict_proba`` on the output of ``trafoN.transform``,
and returning the output of ``clf.predict_proba``
``get_params``, ``set_params`` uses ``sklearn`` compatible nesting interface
if list is unnamed, names are generated as names of classes
if names are non-unique, ``f"_{str(i)}"`` is appended to each name string
where ``i`` is the total count of occurrence of a non-unique string
inside the list of names leading up to it (inclusive)
``ClassifierPipeline`` can also be created by using the magic multiplication
on any classifier, i.e., if ``my_clf`` inherits from ``BaseClassifier``,
and ``my_trafo1``, ``my_trafo2`` inherit from ``BaseTransformer``, then,
for instance, ``my_trafo1 * my_trafo2 * my_clf``
will result in the same object as obtained from the constructor
``ClassifierPipeline(classifier=my_clf, transformers=[my_trafo1,
my_trafo2])``
magic multiplication can also be used with (str, transformer) pairs,
as long as one element in the chain is a transformer
Parameters
----------
classifier : sktime classifier, i.e., estimator inheriting from BaseClassifier
this is a "blueprint" classifier, state does not change when ``fit`` is called
transformers : list of sktime transformers, or
list of tuples (str, transformer) of sktime transformers
these are "blueprint" transformers, states do not change when ``fit`` is called
Attributes
----------
classifier_ : sktime classifier, clone of classifier in ``classifier``
this clone is fitted in the pipeline when ``fit`` is called
transformers_ : list of tuples (str, transformer) of sktime transformers
clones of transformers in ``transformers`` which are fitted in the pipeline
is always in (str, transformer) format, even if transformers is just a list
strings not passed in transformers are unique generated strings
i-th transformer in ``transformers_`` is clone of i-th in ``transformers``
Examples
--------
>>> from sktime.transformations.panel.pca import PCATransformer
>>> from sktime.classification.interval_based import TimeSeriesForestClassifier
>>> from sktime.datasets import load_unit_test
>>> from sktime.classification.compose import ClassifierPipeline
>>> X_train, y_train = load_unit_test(split="train")
>>> X_test, y_test = load_unit_test(split="test")
>>> pipeline = ClassifierPipeline(
... TimeSeriesForestClassifier(n_estimators=5), [PCATransformer()]
... )
>>> pipeline.fit(X_train, y_train)
ClassifierPipeline(...)
>>> y_pred = pipeline.predict(X_test)
Alternative construction via dunder method:
>>> pipeline = PCATransformer() * TimeSeriesForestClassifier(n_estimators=5)
"""
_tags = {
"authors": ["fkiraly"],
"X_inner_mtype": "pd-multiindex", # which type do _fit/_predict accept
"capability:multivariate": False,
"capability:unequal_length": False,
"capability:missing_values": False,
"capability:train_estimate": False,
"capability:contractable": False,
"capability:multithreading": False,
"capability:predict_proba": True,
}
# no default tag values - these are set dynamically below
def __init__(self, classifier, transformers):
self.classifier = classifier
self.classifier_ = classifier.clone()
self.transformers = transformers
self.transformers_ = TransformerPipeline(transformers)
super().__init__()
# can handle multivariate iff: both classifier and all transformers can
multivariate = classifier.get_tag("capability:multivariate", False)
multivariate = multivariate and not self.transformers_.get_tag(
"univariate-only", True
)
# can handle missing values iff: both classifier and all transformers can,
# *or* transformer chain removes missing data
missing = classifier.get_tag("capability:missing_values", False)
missing = missing and self.transformers_.get_tag("handles-missing-data", False)
missing = missing or self.transformers_.get_tag(
"capability:missing_values:removes", False
)
# can handle unequal length iff: classifier can and transformers can,
# *or* transformer chain renders the series equal length
unequal = classifier.get_tag("capability:unequal_length")
unequal = unequal and self.transformers_.get_tag(
"capability:unequal_length", False
)
unequal = unequal or self.transformers_.get_tag(
"capability:unequal_length:removes", False
)
# predict_proba is same as that of classifier
predict_proba = classifier.get_tag("capability:predict_proba")
# last three tags are always False, since not supported by transformers
tags_to_set = {
"capability:multivariate": multivariate,
"capability:missing_values": missing,
"capability:unequal_length": unequal,
"capability:contractable": False,
"capability:train_estimate": False,
"capability:multithreading": False,
"capability:predict_proba": predict_proba,
}
self.set_tags(**tags_to_set)
@property
def _transformers(self):
return self.transformers_._steps
@_transformers.setter
def _transformers(self, value):
self.transformers_._steps = value
def __rmul__(self, other):
"""Magic * method, return concatenated ClassifierPipeline, transformers on left.
Implemented for ``other`` being a transformer, otherwise returns
``NotImplemented``.
Parameters
----------
other: ``sktime`` transformer, must inherit from BaseTransformer
otherwise, ``NotImplemented`` is returned
Returns
-------
ClassifierPipeline object, concatenation of ``other`` (first) with ``self``
(last).
"""
if isinstance(other, BaseTransformer):
# use the transformers dunder to get a TransformerPipeline
trafo_pipeline = other * self.transformers_
# then stick the expanded pipeline in a ClassifierPipeline
new_pipeline = ClassifierPipeline(
classifier=self.classifier,
transformers=trafo_pipeline.steps,
)
return new_pipeline
else:
return NotImplemented
def _fit(self, X, y):
"""Fit time series classifier to training data.
core logic
Parameters
----------
X : Training data of type self.get_tag("X_inner_mtype")
y : array-like, shape = [n_instances] - the class labels
Returns
-------
self : reference to self.
State change
------------
creates fitted model (attributes ending in "_")
"""
Xt = self.transformers_.fit_transform(X=X, y=y)
self.classifier_.fit(X=Xt, y=y)
return self
def _predict(self, X) -> np.ndarray:
"""Predict labels for sequences in X.
core logic
Parameters
----------
X : data not used in training, of type self.get_tag("X_inner_mtype")
Returns
-------
y : predictions of labels for X, np.ndarray
"""
Xt = self.transformers_.transform(X=X)
return self.classifier_.predict(X=Xt)
def _predict_proba(self, X) -> np.ndarray:
"""Predicts labels probabilities for sequences in X.
Default behaviour is to call _predict and set the predicted class probability
to 1, other class probabilities to 0. Override if better estimates are
obtainable.
Parameters
----------
X : data to predict y with, of type self.get_tag("X_inner_mtype")
Returns
-------
y : predictions of probabilities for class values of X, np.ndarray
"""
Xt = self.transformers_.transform(X)
return self.classifier_.predict_proba(Xt)
[文档] def get_params(self, deep=True):
"""Get parameters of estimator in ``transformers``.
Parameters
----------
deep : boolean, optional, default=True
If True, will return the parameters for this estimator and
contained sub-objects that are estimators.
Returns
-------
params : mapping of string to any
Parameter names mapped to their values.
"""
params = dict()
trafo_params = self._get_params("_transformers", deep=deep)
params.update(trafo_params)
return params
[文档] def set_params(self, **kwargs):
"""Set the parameters of estimator in ``transformers``.
Valid parameter keys can be listed with ``get_params()``.
Returns
-------
self : returns an instance of self.
"""
if "classifier" in kwargs.keys():
if not isinstance(kwargs["classifier"], BaseClassifier):
raise TypeError('"classifier" arg must be an sktime classifier')
trafo_keys = self._get_params("_transformers", deep=True).keys()
classif_keys = self.classifier.get_params(deep=True).keys()
trafo_args = self._subset_dict_keys(dict_to_subset=kwargs, keys=trafo_keys)
classif_args = self._subset_dict_keys(
dict_to_subset=kwargs, keys=classif_keys, prefix="classifier"
)
if len(classif_args) > 0:
self.classifier.set_params(**classif_args)
if len(trafo_args) > 0:
self._set_params("_transformers", **trafo_args)
return self
[文档] @classmethod
def get_test_params(cls, parameter_set="default"):
"""Return testing parameter settings for the estimator.
Parameters
----------
parameter_set : str, default="default"
Name of the set of test parameters to return, for use in tests. If no
special parameters are defined for a value, will return ``"default"`` set.
For classifiers, a "default" set of parameters should be provided for
general testing, and a "results_comparison" set for comparing against
previously recorded results if the general set does not produce suitable
probabilities to compare against.
Returns
-------
params : dict or list of dict, default={}
Parameters to create testing instances of the class.
Each dict are parameters to construct an "interesting" test instance, i.e.,
``MyClass(**params)`` or ``MyClass(**params[i])`` creates a valid test
instance.
``create_test_instance`` uses the first (or only) dictionary in ``params``.
"""
# imports
from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier
from sktime.classification.dummy import DummyClassifier
from sktime.transformations.series.exponent import ExponentTransformer
t1 = ExponentTransformer(power=2)
t2 = ExponentTransformer(power=0.5)
c = KNeighborsTimeSeriesClassifier()
another_c = DummyClassifier()
params1 = {"transformers": [t1, t2], "classifier": c}
params2 = {"transformers": [t1], "classifier": another_c}
return [params1, params2]
[文档]class SklearnClassifierPipeline(_HeterogenousMetaEstimator, BaseClassifier):
"""Pipeline of transformers and a classifier.
The ``SklearnClassifierPipeline`` chains transformers and an single classifier.
Similar to ``ClassifierPipeline``, but uses a tabular ``sklearn`` classifier.
The pipeline is constructed with a list of sktime transformers, plus a classifier,
i.e., transformers following the BaseTransformer interface,
classifier follows the ``scikit-learn`` classifier interface.
The transformer list can be unnamed - a simple list of transformers -
or string named - a list of pairs of string, estimator.
For a list of transformers ``trafo1``, ``trafo2``, ..., ``trafoN`` and a classifier
``clf``,
the pipeline behaves as follows:
``fit(X, y)`` - changes styte by running ``trafo1.fit_transform`` on ``X``,
them ``trafo2.fit_transform`` on the output of ``trafo1.fit_transform``, etc
sequentially, with ``trafo[i]`` receiving the output of ``trafo[i-1]``,
and then running ``clf.fit`` with ``X`` the output of ``trafo[N]`` converted to
numpy,
and ``y`` identical with the input to ``self.fit``.
``X`` is converted to ``numpyflat`` mtype if ``X`` is of ``Panel`` scitype;
``X`` is converted to ``numpy2D`` mtype if ``X`` is of ``Table`` scitype.
``predict(X)`` - result is of executing ``trafo1.transform``, ``trafo2.transform``,
etc
with ``trafo[i].transform`` input = output of ``trafo[i-1].transform``,
then running ``clf.predict`` on the numpy converted output of
``trafoN.transform``,
and returning the output of ``clf.predict``.
Output of ``trasfoN.transform`` is converted to numpy, as in ``fit``.
``predict_proba(X)`` - result is of executing ``trafo1.transform``,
``trafo2.transform``,
etc, with ``trafo[i].transform`` input = output of ``trafo[i-1].transform``,
then running ``clf.predict_proba`` on the output of ``trafoN.transform``,
and returning the output of ``clf.predict_proba``.
Output of ``trasfoN.transform`` is converted to numpy, as in ``fit``.
``get_params``, ``set_params`` uses ``sklearn`` compatible nesting interface
if list is unnamed, names are generated as names of classes
if names are non-unique, ``f"_{str(i)}"`` is appended to each name string
where ``i`` is the total count of occurrence of a non-unique string
inside the list of names leading up to it (inclusive)
``SklearnClassifierPipeline`` can also be created by using the magic multiplication
between ``sktime`` transformers and ``sklearn`` classifiers,
and ``my_trafo1``, ``my_trafo2`` inherit from ``BaseTransformer``, then,
for instance, ``my_trafo1 * my_trafo2 * my_clf``
will result in the same object as obtained from the constructor
``SklearnClassifierPipeline(classifier=my_clf, transformers=[t1, t2])``
magic multiplication can also be used with (str, transformer) pairs,
as long as one element in the chain is a transformer
Parameters
----------
classifier : sklearn classifier, i.e., inheriting from sklearn ClassifierMixin
this is a "blueprint" classifier, state does not change when ``fit`` is called
transformers : list of sktime transformers, or
list of tuples (str, transformer) of sktime transformers
these are "blueprint" transformers, states do not change when ``fit`` is called
Attributes
----------
classifier_ : sklearn classifier, clone of classifier in ``classifier``
this clone is fitted in the pipeline when ``fit`` is called
transformers_ : list of tuples (str, transformer) of sktime transformers
clones of transformers in ``transformers`` which are fitted in the pipeline
is always in (str, transformer) format, even if transformers is just a list
strings not passed in transformers are unique generated strings
i-th transformer in ``transformers_`` is clone of i-th in ``transformers``
Examples
--------
>>> from sklearn.neighbors import KNeighborsClassifier
>>> from sktime.transformations.series.exponent import ExponentTransformer
>>> from sktime.transformations.series.summarize import SummaryTransformer
>>> from sktime.datasets import load_unit_test
>>> from sktime.classification.compose import SklearnClassifierPipeline
>>> X_train, y_train = load_unit_test(split="train")
>>> X_test, y_test = load_unit_test(split="test")
>>> t1 = ExponentTransformer()
>>> t2 = SummaryTransformer()
>>> pipeline = SklearnClassifierPipeline(KNeighborsClassifier(), [t1, t2])
>>> pipeline = pipeline.fit(X_train, y_train)
>>> y_pred = pipeline.predict(X_test)
Alternative construction via dunder method:
>>> pipeline = t1 * t2 * KNeighborsClassifier()
"""
_tags = {
"X_inner_mtype": "pd-multiindex", # which type do _fit/_predict accept
"capability:multivariate": True,
"capability:unequal_length": True,
"capability:missing_values": True,
"capability:train_estimate": False,
"capability:contractable": False,
"capability:multithreading": False,
"capability:predict_proba": True,
}
# no default tag values - these are set dynamically below
def __init__(self, classifier, transformers):
from sklearn.base import clone
self.classifier = classifier
self.classifier_ = clone(classifier)
self.transformers = transformers
self.transformers_ = TransformerPipeline(transformers)
super().__init__()
# all sktime and sklearn transformers always support multivariate
multivariate = True
# can handle missing values iff transformer chain removes missing data
# sklearn classifiers might be able to handle missing data (but no tag there)
# so better set the tag liberally
missing = self.transformers_.get_tag("handles-missing-data", False)
missing = missing or self.transformers_.get_tag(
"capability:missing_values:removes", False
)
# can handle unequal length iff transformer chain renders series equal length
# because sklearn classifiers require equal length (number of variables) input
unequal = self.transformers_.get_tag("capability:unequal_length:removes", False)
# last three tags are always False, since not supported by transformers
tags_to_set = {
"capability:multivariate": multivariate,
"capability:missing_values": missing,
"capability:unequal_length": unequal,
"capability:contractable": False,
"capability:train_estimate": False,
"capability:multithreading": False,
}
self.set_tags(**tags_to_set)
@property
def _transformers(self):
return self.transformers_._steps
@_transformers.setter
def _transformers(self, value):
self.transformers_._steps = value
def __rmul__(self, other):
"""Magic * method, return concatenated ClassifierPipeline, transformers on left.
Implemented for ``other`` being a transformer, otherwise returns
``NotImplemented``.
Parameters
----------
other: ``sktime`` transformer, must inherit from BaseTransformer
otherwise, ``NotImplemented`` is returned
Returns
-------
ClassifierPipeline object, concatenation of ``other`` (first) with ``self``
(last).
"""
if isinstance(other, BaseTransformer):
# use the transformers dunder to get a TransformerPipeline
trafo_pipeline = other * self.transformers_
# then stick the expanded pipeline in a SklearnClassifierPipeline
new_pipeline = SklearnClassifierPipeline(
classifier=self.classifier,
transformers=trafo_pipeline.steps,
)
return new_pipeline
else:
return NotImplemented
def _convert_X_to_sklearn(self, X):
"""Convert a Table or Panel X to 2D numpy required by sklearn."""
X_scitype = self.transformers_.get_tag("scitype:transform-output")
# if X_scitype is Primitives, output is Table, convert to 2D numpy array
if X_scitype == "Primitives":
Xt = convert_to(X, to_type="numpy2D", as_scitype="Table")
# if X_scitype is Series, output is Panel, convert to 2D numpy array (numpyflat)
elif X_scitype == "Series":
Xt = convert_to(X, to_type="numpyflat", as_scitype="Panel")
else:
raise TypeError(
f"unexpected X output type in {type(self.classifier).__name__}, "
f'in tag "scitype:transform-output", found "{X_scitype}", '
'expected one of "Primitives" or "Series"'
)
return Xt
def _fit(self, X, y):
"""Fit time series classifier to training data.
core logic
Parameters
----------
X : Training data of type self.get_tag("X_inner_mtype")
y : array-like, shape = [n_instances] - the class labels
Returns
-------
self : reference to self.
State change
------------
creates fitted model (attributes ending in "_")
"""
Xt = self.transformers_.fit_transform(X=X, y=y)
Xt_sklearn = self._convert_X_to_sklearn(Xt)
self.classifier_.fit(Xt_sklearn, y)
return self
def _predict(self, X) -> np.ndarray:
"""Predict labels for sequences in X.
core logic
Parameters
----------
X : data not used in training, of type self.get_tag("X_inner_mtype")
Returns
-------
y : predictions of labels for X, np.ndarray
"""
Xt = self.transformers_.transform(X=X)
Xt_sklearn = self._convert_X_to_sklearn(Xt)
return self.classifier_.predict(Xt_sklearn)
def _predict_proba(self, X) -> np.ndarray:
"""Predicts labels probabilities for sequences in X.
Default behaviour is to call _predict and set the predicted class probability
to 1, other class probabilities to 0. Override if better estimates are
obtainable.
Parameters
----------
X : data to predict y with, of type self.get_tag("X_inner_mtype")
Returns
-------
y : predictions of probabilities for class values of X, np.ndarray
"""
Xt = self.transformers_.transform(X)
if hasattr(self.classifier_, "predict_proba"):
Xt_sklearn = self._convert_X_to_sklearn(Xt)
return self.classifier_.predict_proba(Xt_sklearn)
else:
# if sklearn classifier does not have predict_proba
return BaseClassifier._predict_proba(self, X)
[文档] def get_params(self, deep=True):
"""Get parameters of estimator in ``transformers``.
Parameters
----------
deep : boolean, optional, default=True
If True, will return the parameters for this estimator and
contained sub-objects that are estimators.
Returns
-------
params : mapping of string to any
Parameter names mapped to their values.
"""
params = dict()
trafo_params = self._get_params("_transformers", deep=deep)
params.update(trafo_params)
return params
[文档] def set_params(self, **kwargs):
"""Set the parameters of estimator in ``transformers``.
Valid parameter keys can be listed with ``get_params()``.
Returns
-------
self : returns an instance of self.
"""
if "classifier" in kwargs.keys():
if not is_sklearn_classifier(kwargs["classifier"]):
raise TypeError('"classifier" arg must be an sklearn classifier')
trafo_keys = self._get_params("_transformers", deep=True).keys()
classif_keys = self.classifier.get_params(deep=True).keys()
trafo_args = self._subset_dict_keys(dict_to_subset=kwargs, keys=trafo_keys)
classif_args = self._subset_dict_keys(
dict_to_subset=kwargs, keys=classif_keys, prefix="classifier"
)
if len(classif_args) > 0:
self.classifier.set_params(**classif_args)
if len(trafo_args) > 0:
self._set_params("_transformers", **trafo_args)
return self
[文档] @classmethod
def get_test_params(cls, parameter_set="default"):
"""Return testing parameter settings for the estimator.
Parameters
----------
parameter_set : str, default="default"
Name of the set of test parameters to return, for use in tests. If no
special parameters are defined for a value, will return ``"default"`` set.
For classifiers, a "default" set of parameters should be provided for
general testing, and a "results_comparison" set for comparing against
previously recorded results if the general set does not produce suitable
probabilities to compare against.
Returns
-------
params : dict or list of dict, default={}
Parameters to create testing instances of the class.
Each dict are parameters to construct an "interesting" test instance, i.e.,
``MyClass(**params)`` or ``MyClass(**params[i])`` creates a valid test
instance.
``create_test_instance`` uses the first (or only) dictionary in ``params``.
"""
from sklearn.neighbors import KNeighborsClassifier
from sktime.transformations.series.exponent import ExponentTransformer
from sktime.transformations.series.summarize import SummaryTransformer
# example with series-to-series transformer before sklearn classifier
t1 = ExponentTransformer(power=2)
t2 = ExponentTransformer(power=0.5)
c = KNeighborsClassifier()
params1 = {"transformers": [t1, t2], "classifier": c}
# example with series-to-primitive transformer before sklearn classifier
t1 = ExponentTransformer(power=2)
t2 = SummaryTransformer()
c = KNeighborsClassifier()
params2 = {"transformers": [t1, t2], "classifier": c}
# construct without names
return [params1, params2]