sktime.forecasting.arima._statsmodels 源代码

# !/usr/bin/env python3 -u
# copyright: sktime developers, BSD-3-Clause License (see LICENSE file)
"""Interface to ARIMA from statsmodels package."""

__all__ = ["StatsModelsARIMA"]
__author__ = ["arnaujc91"]

from collections.abc import Iterable
from typing import Optional, Union

import numpy as np
import pandas as pd

from sktime.forecasting.base.adapters import _StatsModelsAdapter


[文档]class StatsModelsARIMA(_StatsModelsAdapter): """(S)ARIMA(X) forecaster, from statsmodels, tsa.arima module. Direct interface for ``statsmodels.tsa.arima.model.ARIMA``. Users should note that statsmodels contains two separate implementations of (S)ARIMA(X), the ARIMA and the SARIMAX class, in different modules: ``tsa.arima.model.ARIMA`` and ``tsa.statespace.SARIMAX``. These are implementations of the same underlying model, (S)ARIMA(X), but with different fitting strategies, fitted parameters, and slightly differring behaviour. Users should refer to the statsmodels documentation for further details: https://www.statsmodels.org/dev/examples/notebooks/generated/statespace_sarimax_faq.html Parameters ---------- order : tuple, optional The (p,d,q) order of the model for the autoregressive, differences, and moving average components. d is always an integer, while p and q may either be integers or lists of integers. seasonal_order : tuple, optional The (P,D,Q,s) order of the seasonal component of the model for the AR parameters, differences, MA parameters, and periodicity. Default is (0, 0, 0, 0). D and s are always integers, while P and Q may either be integers or lists of positive integers. trend : str{'n','c','t','ct'} or iterable, optional Parameter controlling the deterministic trend. Can be specified as a string where 'c' indicates a constant term, 't' indicates a linear trend in time, and 'ct' includes both. Can also be specified as an iterable defining a polynomial, as in ``numpy.poly1d``, where ``[1,1,0,1]`` would denote :math:`a + bt + ct^3`. Default is 'c' for models without integration, and no trend for models with integration. Note that all trend terms are included in the model as exogenous regressors, which differs from how trends are included in ``SARIMAX`` models. See the Notes section for a precise definition of the treatment of trend terms. enforce_stationarity : bool, optional Whether or not to require the autoregressive parameters to correspond to a stationarity process. enforce_invertibility : bool, optional Whether or not to require the moving average parameters to correspond to an invertible process. concentrate_scale : bool, optional Whether or not to concentrate the scale (variance of the error term) out of the likelihood. This reduces the number of parameters by one. This is only applicable when considering estimation by numerical maximum likelihood. trend_offset : int, optional The offset at which to start time trend values. Default is 1, so that if ``trend='t'`` the trend is equal to 1, 2, ..., nobs. Typically is only set when the model created by extending a previous dataset. dates : array_like of datetime, optional If no index is given by ``endog`` or ``exog``, an array-like object of datetime objects can be provided. freq : str, optional If no index is given by ``endog`` or ``exog``, the frequency of the time-series may be specified here as a Pandas offset or offset string. missing : str Available options are 'none', 'drop', and 'raise'. If 'none', no nan checking is done. If 'drop', any observations with nans are dropped. If 'raise', an error is raised. Default is 'none'. start_params : array_like, optional Initial guess of the solution for the loglikelihood maximization. If None, the default is given by Model.start_params. transformed : bool, optional Whether or not ``start_params`` is already transformed. Default is True. includes_fixed : bool, optional If parameters were previously fixed with the ``fix_params`` method, this argument describes whether or not ``start_params`` also includes the fixed parameters, in addition to the free parameters. Default is False. method : str, optional The method used for estimating the parameters of the model. Valid options include 'statespace', 'innovations_mle', 'hannan_rissanen', 'burg', 'innovations', and 'yule_walker'. Not all options are available for every specification (for example 'yule_walker' can only be used with AR(p) models). method_kwargs : dict, optional Arguments to pass to the fit function for the parameter estimator described by the ``method`` argument. gls : bool, optional Whether or not to use generalized least squares (GLS) to estimate regression effects. The default is False if ``method='statespace'`` and is True otherwise. gls_kwargs : dict, optional Arguments to pass to the GLS estimation fit method. Only applicable if GLS estimation is used (see ``gls`` argument for details). cov_type : str, optional The ``cov_type`` keyword governs the method for calculating the covariance matrix of parameter estimates. Can be one of: - 'opg' for the outer product of gradient estimator - 'oim' for the observed information matrix estimator, calculated using the method of Harvey (1989) - 'approx' for the observed information matrix estimator, calculated using a numerical approximation of the Hessian matrix. - 'robust' for an approximate (quasi-maximum likelihood) covariance matrix that may be valid even in the presence of some misspecifications. Intermediate calculations use the 'oim' method. - 'robust_approx' is the same as 'robust' except that the intermediate calculations use the 'approx' method. - 'none' for no covariance matrix calculation. Default is 'opg' unless memory conservation is used to avoid computing the loglikelihood values for each observation, in which case the default is 'oim'. cov_kwds : dict or None, optional A dictionary of arguments affecting covariance matrix computation. **opg, oim, approx, robust, robust_approx** - 'approx_complex_step' : bool, optional - If True, numerical approximations are computed using complex-step methods. If False, numerical approximations are computed using finite difference methods. Default is True. - 'approx_centered' : bool, optional - If True, numerical approximations computed using finite difference methods use a centered approximation. Default is False. return_params : bool, optional Whether or not to return only the array of maximizing parameters. Default is False. low_memory : bool, optional If set to True, techniques are applied to substantially reduce memory usage. If used, some features of the results object will not be available (including smoothed results and in-sample prediction), although out-of-sample forecasting is possible. Default is False. See Also -------- ARIMA SARIMAX AutoARIMA StatsForecastAutoARIMA Examples -------- >>> from sktime.datasets import load_airline >>> from sktime.forecasting.arima import StatsModelsARIMA >>> y = load_airline() >>> forecaster = StatsModelsARIMA(order=(0, 0, 12)) # doctest: +SKIP >>> forecaster.fit(y) # doctest: +SKIP >>> y_pred = forecaster.predict(fh=[1,2,3]) # doctest: +SKIP """ # noqa: E501 _tags = { # packaging info # -------------- "authors": ["chadfulton", "bashtage", "jbrockmendel", "arnaujc91"], # chadfulton, bashtage, jbrockmendel for statsmodels implementation "maintainers": ["arnaujc91"], "ignores-exogeneous-X": False, "capability:pred_int": True, "capability:pred_int:insample": True, "python_dependencies": ["statsmodels"], } def __init__( self, order: tuple[int, int, int] = (0, 0, 0), seasonal_order: tuple[int, int, int, int] = (0, 0, 0, 0), trend: Optional[Union[str, Iterable]] = None, enforce_stationarity: bool = True, enforce_invertibility: bool = True, concentrate_scale: bool = False, trend_offset: int = 1, dates: Optional[np.ndarray] = None, freq: Optional[str] = None, missing: Optional[str] = None, validate_specification: bool = True, start_params: Optional[np.ndarray] = None, transformed: bool = True, includes_fixed: bool = False, method: Optional[str] = None, method_kwargs: Optional[dict] = None, gls: bool = False, gls_kwargs: Optional[dict] = None, cov_type: str = "opg", cov_kwds: Optional[dict] = None, return_params: bool = False, low_memory: bool = False, ): self.order = order self.seasonal_order = seasonal_order self.trend = trend self.enforce_stationarity = enforce_stationarity self.enforce_invertibility = enforce_invertibility self.concentrate_scale = concentrate_scale self.trend_offset = trend_offset self.dates = dates self.freq = freq self.missing = missing self.validate_specification = validate_specification # Fit params self.start_params = start_params self.transformed = transformed self.includes_fixed = includes_fixed self.method = method self.method_kwargs = method_kwargs self.gls = gls self.gls_kwargs = gls_kwargs self.cov_type = cov_type self.cov_kwds = cov_kwds self.return_params = return_params self.low_memory = low_memory super().__init__() def _fit_forecaster(self, y, X=None): from statsmodels.tsa.arima.model import ARIMA as _ARIMA self._forecaster = _ARIMA( endog=y, exog=X, order=self.order, seasonal_order=self.seasonal_order, trend=self.trend, enforce_stationarity=self.enforce_stationarity, enforce_invertibility=self.enforce_invertibility, concentrate_scale=self.concentrate_scale, trend_offset=self.trend_offset, dates=self.dates, freq=self.freq, missing=self.missing, validate_specification=self.validate_specification, ) self._fitted_forecaster = self._forecaster.fit( start_params=self.start_params, transformed=self.transformed, includes_fixed=self.includes_fixed, method=self.method, method_kwargs=self.method_kwargs, gls=self.gls, gls_kwargs=self.gls_kwargs, cov_type=self.cov_type, cov_kwds=self.cov_kwds, return_params=self.return_params, low_memory=self.low_memory, )
[文档] def summary(self): """Get a summary of the fitted forecaster. This is the same as the implementation in statsmodels: https://www.statsmodels.org/dev/examples/notebooks/generated/statespace_structural_harvey_jaeger.html """ return self._fitted_forecaster.summary()
@staticmethod def _extract_conf_int(prediction_results, alpha) -> pd.DataFrame: """Construct confidence interval at specified ``alpha`` for each timestep. Parameters ---------- prediction_results : PredictionResults results class, as returned by ``self._fitted_forecaster.get_prediction`` alpha : float one minus nominal coverage Returns ------- pd.DataFrame confidence intervals at each timestep The dataframe must have at least two columns ``lower`` and ``upper``, and the row indices must be integers relative to ``self.cutoff``. Order of columns do not matter, and row indices must be a superset of relative integer horizon of ``fh``. """ conf_int = prediction_results.conf_int(alpha=alpha) conf_int.columns = ["lower", "upper"] return conf_int
[文档] @classmethod def get_test_params(cls, parameter_set="default"): """Return testing parameter settings for the estimator. Parameters ---------- parameter_set : str, default="default" Name of the set of test parameters to return, for use in tests. If no special parameters are defined for a value, will return ``"default"`` set. There are currently no reserved values for forecasters. Returns ------- params : list of dict, default = [] Parameters to create testing instances of the class Each dict are parameters to construct an "interesting" test instance, i.e., ``MyClass(**params)`` or ``MyClass(**params[i])`` creates a valid test instance. ``create_test_instance`` uses the first (or only) dictionary in ``params`` """ return [ { "order": (0, 1, 2), "trend": "n", "enforce_stationarity": False, "enforce_invertibility": False, "concentrate_scale": True, "method": "statespace", }, { "order": (1, 1, 2), "trend": "t", "enforce_stationarity": False, "enforce_invertibility": False, "method": "statespace", }, { "order": (0, 0, 1), "trend": "ct", "seasonal_order": (1, 0, 1, 2), "cov_type": "opg", "gls": True, "method": "statespace", }, {"cov_type": "robust", "gls": True, "method": "burg"}, ]