sktime.classification.feature_based._fresh_prince 源代码

"""FreshPRINCE Classifier.

Pipeline classifier using the full set of TSFresh features and a RotationForest
classifier.
"""

__author__ = ["MatthewMiddlehurst"]
__all__ = ["FreshPRINCE"]

import numpy as np

from sktime.classification.base import BaseClassifier
from sktime.classification.sklearn import RotationForest
from sktime.transformations.panel.tsfresh import TSFreshFeatureExtractor
from sktime.utils.validation.panel import check_X_y


[文档]class FreshPRINCE(BaseClassifier): """Fresh Pipeline with RotatIoN forest Classifier. This classifier simply transforms the input data using the TSFresh [1]_ transformer with comprehensive features and builds a RotationForest estimator using the transformed data. Parameters ---------- default_fc_parameters : str, default="comprehensive" Set of TSFresh features to be extracted, options are "minimal", "efficient" or "comprehensive". n_estimators : int, default=200 Number of estimators for the RotationForest ensemble. verbose : int, default=0 Level of output printed to the console (for information only) n_jobs : int, default=1 The number of jobs to run in parallel for both ``fit`` and ``predict``. ``-1`` means using all processors. chunksize : int or None, default=None Number of series processed in each parallel TSFresh job, should be optimised for efficient parallelisation. random_state : int or None, default=None Seed for random, integer. Attributes ---------- n_classes_ : int Number of classes. Extracted from the data. classes_ : ndarray of shape (n_classes_) Holds the label for each class. See Also -------- TSFreshFeatureExtractor, TSFreshClassifier, RotationForest References ---------- .. [1] Christ, Maximilian, et al. "Time series feature extraction on basis of scalable hypothesis tests (tsfresh-a python package)." Neurocomputing 307 (2018): 72-77. https://www.sciencedirect.com/science/article/pii/S0925231218304843 Examples -------- >>> from sktime.classification.feature_based import FreshPRINCE >>> from sktime.datasets import load_unit_test >>> X_train, y_train = load_unit_test(split="train", return_X_y=True) >>> X_test, y_test = load_unit_test(split="test", return_X_y=True) # doctest: +SKIP >>> clf = FreshPRINCE( ... default_fc_parameters="comprehensive", ... n_estimators=200, ... save_transformed_data=False, ... verbose=0, ... n_jobs=1, ... ) # doctest: +SKIP >>> clf.fit(X_train, y_train) # doctest: +SKIP FreshPRINCE(...) >>> y_pred = clf.predict(X_test) # doctest: +SKIP """ _tags = { # packaging info # -------------- "authors": ["MatthewMiddlehurst"], "python_version": "<3.10", "python_dependencies": "tsfresh", # estimator type # -------------- "capability:multivariate": True, "capability:multithreading": True, "capability:train_estimate": True, "capability:predict_proba": True, "classifier_type": "feature", } def __init__( self, default_fc_parameters="comprehensive", n_estimators=200, save_transformed_data=False, verbose=0, n_jobs=1, chunksize=None, random_state=None, ): self.default_fc_parameters = default_fc_parameters self.n_estimators = n_estimators self.save_transformed_data = save_transformed_data self.verbose = verbose self.n_jobs = n_jobs self.chunksize = chunksize self.random_state = random_state self.n_instances_ = 0 self.n_dims_ = 0 self.series_length_ = 0 self.transformed_data_ = [] self._rotf = None self._tsfresh = None super().__init__() def _fit(self, X, y): """Fit a pipeline on cases (X,y), where y is the target variable. Parameters ---------- X : 3D np.array of shape = [n_instances, n_dimensions, series_length] The training data. y : array-like, shape = [n_instances] The class labels. Returns ------- self : Reference to self. Notes ----- Changes state by creating a fitted model that updates attributes ending in "_" and sets is_fitted flag to True. """ self.n_instances_, self.n_dims_, self.series_length_ = X.shape self._rotf = RotationForest( n_estimators=self.n_estimators, save_transformed_data=self.save_transformed_data, n_jobs=self._threads_to_use, random_state=self.random_state, ) self._tsfresh = TSFreshFeatureExtractor( default_fc_parameters=self.default_fc_parameters, n_jobs=self._threads_to_use, chunksize=self.chunksize, show_warnings=self.verbose > 1, disable_progressbar=self.verbose < 1, ) X_t = self._tsfresh.fit_transform(X, y) self._rotf.fit(X_t, y) if self.save_transformed_data: self.transformed_data_ = X_t return self def _predict(self, X) -> np.ndarray: """Predict class values of n instances in X. Parameters ---------- X : 3D np.array of shape = [n_instances, n_dimensions, series_length] The data to make predictions for. Returns ------- y : array-like, shape = [n_instances] Predicted class labels. """ return self._rotf.predict(self._tsfresh.transform(X)) def _predict_proba(self, X) -> np.ndarray: """Predict class probabilities for n instances in X. Parameters ---------- X : 3D np.array of shape = [n_instances, n_dimensions, series_length] The data to make predict probabilities for. Returns ------- y : array-like, shape = [n_instances, n_classes_] Predicted probabilities using the ordering in classes_. """ return self._rotf.predict_proba(self._tsfresh.transform(X)) def _get_train_probs(self, X, y) -> np.ndarray: from sktime.datatypes import convert_to self.check_is_fitted() if not isinstance(X, np.ndarray): X = convert_to(X, "numpy3D") X, y = check_X_y(X, y, coerce_to_numpy=True) n_instances, n_dims, series_length = X.shape if ( n_instances != self.n_instances_ or n_dims != self.n_dims_ or series_length != self.series_length_ ): raise ValueError( "n_instances, n_dims, series_length mismatch. X should be " "the same as the training data used in fit for generating train " "probabilities." ) if not self.save_transformed_data: raise ValueError("Currently only works with saved transform data from fit.") return self._rotf._get_train_probs(self.transformed_data_, y)
[文档] @classmethod def get_test_params(cls, parameter_set="default"): """Return testing parameter settings for the estimator. Parameters ---------- parameter_set : str, default="default" Name of the set of test parameters to return, for use in tests. If no special parameters are defined for a value, will return ``"default"`` set. For classifiers, a "default" set of parameters should be provided for general testing, and a "results_comparison" set for comparing against previously recorded results if the general set does not produce suitable probabilities to compare against. Returns ------- params : dict or list of dict, default={} Parameters to create testing instances of the class. Each dict are parameters to construct an "interesting" test instance, i.e., ``MyClass(**params)`` or ``MyClass(**params[i])`` creates a valid test instance. ``create_test_instance`` uses the first (or only) dictionary in ``params``. """ if parameter_set == "results_comparison": return { "n_estimators": 10, "default_fc_parameters": "minimal", } else: return { "n_estimators": 2, "default_fc_parameters": "minimal", "save_transformed_data": True, }