feature_engine.transformation.arcsin 源代码
# Authors: Tommaso Pellegrino <tommasopellegrino.1995@gmail.com>
# License: BSD 3 clause
from typing import List, Optional, Union
import numpy as np
import pandas as pd
from feature_engine._base_transformers.base_numerical import BaseNumericalTransformer
from feature_engine._check_init_parameters.check_variables import (
_check_variables_input_value,
)
from feature_engine._docstrings.fit_attributes import (
_feature_names_in_docstring,
_n_features_in_docstring,
_variables_attribute_docstring,
)
from feature_engine._docstrings.init_parameters.all_trasnformers import (
_variables_numerical_docstring,
)
from feature_engine._docstrings.methods import (
_fit_not_learn_docstring,
_fit_transform_docstring,
_inverse_transform_docstring,
)
from feature_engine._docstrings.substitute import Substitution
from feature_engine.tags import _return_tags
[文档]@Substitution(
variables=_variables_numerical_docstring,
variables_=_variables_attribute_docstring,
feature_names_in_=_feature_names_in_docstring,
n_features_in_=_n_features_in_docstring,
fit=_fit_not_learn_docstring,
fit_transform=_fit_transform_docstring,
inverse_transform=_inverse_transform_docstring,
)
class ArcsinTransformer(BaseNumericalTransformer):
"""
The ArcsinTransformer() applies the arcsin transformation to numerical variables.
The arcsin transformation, also called arcsin square root transformation, or
angular transformation, takes the form of arcsin(sqrt(x)) where x is a real number
between 0 and 1.
The arcsin square root transformation helps in dealing with probabilities,
percents, and proportions. It aims to stabilize the variance of the variable and
return more evenly distributed (Gaussian looking) values.
The ArcsinTransformer() only works with numerical variables which values are
between 0 and 1. If a variable contains values outside of this range, the
transformer will raise an error.
A list of variables can be passed as an argument. Alternatively, the transformer
will automatically select and transform all numerical variables.
More details in the :ref:`User Guide <arcsin>`.
Parameters
----------
{variables}
Attributes
----------
{variables_}
{feature_names_in_}
{n_features_in_}
Methods
-------
{fit}
{fit_transform}
{inverse_transform}
transform:
Apply the arcsin transformation.
Examples
--------
>>> import numpy as np
>>> import pandas as pd
>>> from feature_engine.transformation import ArcsinTransformer
>>> np.random.seed(42)
>>> X = pd.DataFrame(dict(x = np.random.beta(1, 1, size = 100)))
>>> ast = ArcsinTransformer()
>>> ast.fit(X)
>>> X = ast.transform(X)
>>> X.head()
x
0 0.785437
1 0.253389
2 0.144664
3 0.783236
4 0.650777
"""
def __init__(
self, variables: Union[None, int, str, List[Union[str, int]]] = None
) -> None:
self.variables = _check_variables_input_value(variables)
[文档] def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None):
"""
This transformer does not learn parameters.
Parameters
----------
X: Pandas DataFrame of shape = [n_samples, n_features].
The training input samples. Can be the entire dataframe, not just the
variables to transform.
y: pandas Series, default=None
It is not needed in this transformer. You can pass y or None.
"""
# check input dataframe
X = super().fit(X)
# check if the variables are in the correct range
if ((X[self.variables_] < 0) | (X[self.variables_] > 1)).any().any():
raise ValueError(
"Some variables contain values outside the possible range 0-1. "
"Can't apply the arcsin transformation. "
)
return self
[文档] def transform(self, X: pd.DataFrame) -> pd.DataFrame:
"""
Apply the arcsin transformation.
Parameters
----------
X: Pandas DataFrame of shape = [n_samples, n_features]
The data to be transformed.
Returns
-------
X_new: pandas dataframe
The dataframe with the transformed variables.
"""
# check input dataframe and if class was fitted
X = self._check_transform_input_and_state(X)
# check if the variables are in the correct range
if ((X[self.variables_] < 0) | (X[self.variables_] > 1)).any().any():
raise ValueError(
"Some variables contain values outside the possible range 0-1. "
"Can't apply the arcsin transformation."
)
# transform
X.loc[:, self.variables_] = np.arcsin(np.sqrt(X.loc[:, self.variables_]))
return X
[文档] def inverse_transform(self, X: pd.DataFrame) -> pd.DataFrame:
"""
Convert the data back to the original representation.
Parameters
----------
X: Pandas DataFrame of shape = [n_samples, n_features]
The data to be transformed.
Returns
-------
X_tr: pandas dataframe
The dataframe with the transformed variables.
"""
# inverse_transform
X.loc[:, self.variables_] = (np.sin(X.loc[:, self.variables_])) ** 2
return X
def _more_tags(self):
tags_dict = _return_tags()
tags_dict["variables"] = "numerical"
# ======= this tests fail because the transformers throw an error when the
# values are less than 0 or greater than 1. Nothing to do with the test itself
# but mostly with the data created and used in the test
msg = (
"transformers raise errors when data is outside [0, 1] range, thus this"
"check fails"
)
tags_dict["_xfail_checks"]["check_estimators_dtypes"] = msg
tags_dict["_xfail_checks"]["check_estimators_fit_returns_self"] = msg
tags_dict["_xfail_checks"]["check_pipeline_consistency"] = msg
tags_dict["_xfail_checks"]["check_estimators_overwrite_params"] = msg
tags_dict["_xfail_checks"]["check_estimators_pickle"] = msg
tags_dict["_xfail_checks"]["check_transformer_general"] = msg
tags_dict["_xfail_checks"]["check_methods_subset_invariance"] = msg
tags_dict["_xfail_checks"]["check_fit2d_1sample"] = msg
tags_dict["_xfail_checks"]["check_fit2d_1feature"] = msg
tags_dict["_xfail_checks"]["check_dict_unchanged"] = msg
tags_dict["_xfail_checks"]["check_dont_overwrite_parameters"] = msg
tags_dict["_xfail_checks"]["check_fit_check_is_fitted"] = msg
tags_dict["_xfail_checks"]["check_n_features_in"] = msg
return tags_dict