feature_engine.selection.drop_features 源代码
from typing import List, Union
import pandas as pd
from feature_engine.dataframe_checks import check_X
from feature_engine.selection.base_selector import BaseSelector
from feature_engine.tags import _return_tags
from feature_engine.variable_handling import check_all_variables
[文档]class DropFeatures(BaseSelector):
"""
DropFeatures() drops a list of variables indicated by the user from the dataframe.
More details in the :ref:`User Guide <drop_features>`.
Parameters
----------
features_to_drop: str or list
Variable(s) to be dropped from the dataframe
Attributes
----------
features_to_drop_:
The features that will be dropped.
feature_names_in_:
List with the names of features seen during `fit`.
n_features_in_:
The number of features in the train set used in fit.
Methods
-------
fit:
This transformer does not learn any parameter.
fit_transform:
Fit to data, then transform it.
get_feature_names_out:
Get output feature names for transformation.
get_support:
Get a mask, or integer index, of the features selected.
get_params:
Get parameters for this estimator.
set_params:
Set the parameters of this estimator.
transform:
Drops indicated features.
Examples
--------
>>> import pandas as pd
>>> from feature_engine.selection import DropFeatures
>>> X = pd.DataFrame(dict(x1 = [1,2,3,4],
>>> x2 = ["a", "a", "b", "c"],
>>> x3 = [True, False, False, True]))
>>> df = DropFeatures(features_to_drop=["x2"])
>>> df.fit_transform(X)
x1 x3
0 1 True
1 2 False
2 3 False
3 4 True
"""
def __init__(self, features_to_drop: List[Union[str, int]]):
if not isinstance(features_to_drop, (str, list)) or len(features_to_drop) == 0:
raise ValueError(
f"features_to_drop should be a list with the name of the variables "
f"you wish to drop from the dataframe. Got {features_to_drop} instead."
)
self.features_to_drop = features_to_drop
[文档] def fit(self, X: pd.DataFrame, y: pd.Series = None):
"""
This transformer does not learn any parameter.
Parameters
----------
X : pandas dataframe of shape = [n_samples, n_features]
The input dataframe
y : pandas Series, default = None
y is not needed for this transformer. You can pass y or None.
"""
# check input dataframe
X = check_X(X)
self.features_to_drop_ = check_all_variables(X, variables=self.features_to_drop)
# check user is not removing all columns in the dataframe
if len(self.features_to_drop_) == len(X.columns):
raise ValueError(
"The resulting dataframe will have no columns after dropping all "
"existing variables"
)
# save input features
self._get_feature_names_in(X)
return self
def _more_tags(self):
tags_dict = _return_tags()
tags_dict["allow_nan"] = True
# add additional test that fails
tags_dict["_xfail_checks"][
"check_parameters_default_constructible"
] = "transformer has 1 mandatory parameter"
tags_dict["_xfail_checks"][
"check_fit2d_1feature"
] = "the transformer raises an error when removing the only column, ok to fail"
return tags_dict