plot_decision_regions: 可视化分类器的决策区域

一个用于绘制分类器在1或2维中决策区域的函数。

> 从 mlxtend.plotting 导入 plot_decision_regions

参考文献

示例 1 - 二维中的决策区域

from mlxtend.plotting import plot_decision_regions
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.svm import SVC

# 加载一些示例数据
iris = datasets.load_iris()
X = iris.data[:, [0, 2]]
y = iris.target

# 训练分类器
svm = SVC(C=0.5, kernel='linear')
svm.fit(X, y)


# 绘制决策区域
plot_decision_regions(X, y, clf=svm, legend=2)

# 添加轴注释
plt.xlabel('sepal length [cm]')
plt.ylabel('petal length [cm]')
plt.title('SVM on Iris')
plt.show()

png

示例 2 - 一维中的决策区域

from mlxtend.plotting import plot_decision_regions
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.svm import SVC

# 加载一些示例数据
iris = datasets.load_iris()
X = iris.data[:, 2]
X = X[:, None]
y = iris.target

# 训练分类器
svm = SVC(C=0.5, kernel='linear')
svm.fit(X, y)

# 绘制决策区域
plot_decision_regions(X, y, clf=svm, legend=2)

# 添加轴注释
plt.xlabel('sepal length [cm]')
plt.title('SVM on Iris')

plt.show()

png

示例 3 - 决策区域网格

from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB 
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn import datasets
import numpy as np

# 初始化分类器
clf1 = LogisticRegression(random_state=1,
                          solver='newton-cg',
                          multi_class='multinomial')
clf2 = RandomForestClassifier(random_state=1, n_estimators=100)
clf3 = GaussianNB()
clf4 = SVC(gamma='auto')

# 加载一些示例数据
iris = datasets.load_iris()
X = iris.data[:, [0,2]]
y = iris.target

import matplotlib.pyplot as plt
from mlxtend.plotting import plot_decision_regions
import matplotlib.gridspec as gridspec
import itertools
gs = gridspec.GridSpec(2, 2)

fig = plt.figure(figsize=(10,8))

labels = ['Logistic Regression', 'Random Forest', 'Naive Bayes', 'SVM']
for clf, lab, grd in zip([clf1, clf2, clf3, clf4],
                         labels,
                         itertools.product([0, 1], repeat=2)):

    clf.fit(X, y)
    ax = plt.subplot(gs[grd[0], grd[1]])
    fig = plot_decision_regions(X=X, y=y, clf=clf, legend=2)
    plt.title(lab)

plt.show()

png

示例 4 - 突出显示测试数据点

from mlxtend.plotting import plot_decision_regions
from mlxtend.preprocessing import shuffle_arrays_unison
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.svm import SVC


# 加载一些示例数据
iris = datasets.load_iris()
X, y = iris.data[:, [0,2]], iris.target
X, y = shuffle_arrays_unison(arrays=[X, y], random_seed=3)

X_train, y_train = X[:100], y[:100]
X_test, y_test = X[100:], y[100:]

# 训练分类器
svm = SVC(C=0.5, kernel='linear')
svm.fit(X_train, y_train)

# 绘制决策区域
plot_decision_regions(X, y, clf=svm, legend=2, 
                      X_highlight=X_test)

# 添加轴注释
plt.xlabel('sepal length [cm]')
plt.ylabel('petal length [cm]')
plt.title('SVM on Iris')
plt.show()

png

示例 5 - 评估分类器在非线性问题上的表现

from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB 
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

# 初始化分类器
clf1 = LogisticRegression(random_state=1, solver='lbfgs')
clf2 = RandomForestClassifier(n_estimators=100, 
                              random_state=1)
clf3 = GaussianNB()
clf4 = SVC(gamma='auto')

# 加载绘图工具
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import itertools
from mlxtend.plotting import plot_decision_regions
import numpy as np

异或

xx, yy = np.meshgrid(np.linspace(-3, 3, 50),
                     np.linspace(-3, 3, 50))
rng = np.random.RandomState(0)
X = rng.randn(300, 2)
y = np.array(np.logical_xor(X[:, 0] > 0, X[:, 1] > 0), 
             dtype=int)

gs = gridspec.GridSpec(2, 2)

fig = plt.figure(figsize=(10,8))

labels = ['Logistic Regression', 'Random Forest', 'Naive Bayes', 'SVM']
for clf, lab, grd in zip([clf1, clf2, clf3, clf4],
                         labels,
                         itertools.product([0, 1], repeat=2)):

    clf.fit(X, y)
    ax = plt.subplot(gs[grd[0], grd[1]])
    fig = plot_decision_regions(X=X, y=y, clf=clf, legend=2)
    plt.title(lab)

plt.show()

png

半月形

from sklearn.datasets import make_moons
X, y = make_moons(n_samples=100, random_state=123)

gs = gridspec.GridSpec(2, 2)

fig = plt.figure(figsize=(10,8))

labels = ['Logistic Regression', 'Random Forest', 'Naive Bayes', 'SVM']
for clf, lab, grd in zip([clf1, clf2, clf3, clf4],
                         labels,
                         itertools.product([0, 1], repeat=2)):

    clf.fit(X, y)
    ax = plt.subplot(gs[grd[0], grd[1]])
    fig = plot_decision_regions(X=X, y=y, clf=clf, legend=2)
    plt.title(lab)

plt.show()

png

同心圆

from sklearn.datasets import make_circles
X, y = make_circles(n_samples=1000, random_state=123, noise=0.1, factor=0.2)

gs = gridspec.GridSpec(2, 2)

fig = plt.figure(figsize=(10,8))

labels = ['Logistic Regression', 'Random Forest', 'Naive Bayes', 'SVM']
for clf, lab, grd in zip([clf1, clf2, clf3, clf4],
                         labels,
                         itertools.product([0, 1], repeat=2)):

    clf.fit(X, y)
    ax = plt.subplot(gs[grd[0], grd[1]])
    fig = plot_decision_regions(X=X, y=y, clf=clf, legend=2)
    plt.title(lab)

plt.show()

png

示例 6 - 使用子图处理现有的轴对象

import matplotlib.pyplot as plt
from mlxtend.plotting import plot_decision_regions

from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB 
from sklearn import datasets
import numpy as np

# 加载一些示例数据
iris = datasets.load_iris()
X = iris.data[:, 2]
X = X[:, None]
y = iris.target

# 初始化并拟合分类器
clf1 = LogisticRegression(random_state=1,
                          solver='lbfgs',
                          multi_class='multinomial')
clf2 = GaussianNB()
clf1.fit(X, y)
clf2.fit(X, y)

fig, axes = plt.subplots(1, 2, figsize=(10, 3))

fig = plot_decision_regions(X=X, y=y, clf=clf1, ax=axes[0], legend=2)
fig = plot_decision_regions(X=X, y=y, clf=clf2, ax=axes[1], legend=1)

plt.show()

png

示例 7 - 带有多个训练特征的决策区域

from mlxtend.plotting import plot_decision_regions
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.svm import SVC

# 加载一些示例数据
X, y = datasets.make_blobs(n_samples=600, n_features=3,
                           centers=[[2, 2, -2],[-2, -2, 2]],
                           cluster_std=[2, 2], random_state=2)

# 训练分类器
svm = SVC(gamma='auto')
svm.fit(X, y)

# 绘制决策区域
fig, ax = plt.subplots()
# 特征3的决策区域 = 1.5
value = 1.5
# 绘制特征3在1.5±0.75范围内的训练样本
width = 0.75
plot_decision_regions(X, y, clf=svm,
                      filler_feature_values={2: value},
                      filler_feature_ranges={2: width},
                      legend=2, ax=ax)
ax.set_xlabel('Feature 1')
ax.set_ylabel('Feature 2')
ax.set_title('Feature 3 = {}'.format(value))

# 添加轴注释
fig.suptitle('SVM on make_blobs')
plt.show()

png

示例 8 - 决策区域切片网格

from mlxtend.plotting import plot_decision_regions
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.svm import SVC

# 加载一些示例数据
X, y = datasets.make_blobs(n_samples=500, n_features=3, centers=[[2, 2, -2],[-2, -2, 2]],
                           cluster_std=[2, 2], random_state=2)

# 训练分类器
svm = SVC(gamma='auto')
svm.fit(X, y)

# 绘制决策区域
fig, axarr = plt.subplots(2, 2, figsize=(10,8), sharex=True, sharey=True)
values = [-4.0, -1.0, 1.0, 4.0]
width = 0.75
for value, ax in zip(values, axarr.flat):
    plot_decision_regions(X, y, clf=svm,
                          filler_feature_values={2: value},
                          filler_feature_ranges={2: width},
                          legend=2, ax=ax)
    ax.set_xlabel('Feature 1')
    ax.set_ylabel('Feature 2')
    ax.set_title('Feature 3 = {}'.format(value))

# 添加轴注释
fig.suptitle('SVM on make_blobs')
plt.show()

png

示例 9 - 自定义绘图风格

from mlxtend.plotting import plot_decision_regions
from mlxtend.preprocessing import shuffle_arrays_unison
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.svm import SVC

# 加载一些示例数据
iris = datasets.load_iris()
X = iris.data[:, [0, 2]]
y = iris.target
X, y = shuffle_arrays_unison(arrays=[X, y], random_seed=3)
X_train, y_train = X[:100], y[:100]
X_test, y_test = X[100:], y[100:]

# 训练分类器
svm = SVC(C=0.5, kernel='linear')
svm.fit(X_train, y_train)

# 指定要传递给底层绘图函数的关键字参数
scatter_kwargs = {'s': 120, 'edgecolor': None, 'alpha': 0.7}
contourf_kwargs = {'alpha': 0.2}
scatter_highlight_kwargs = {'s': 120, 'label': 'Test data', 'alpha': 0.7}
# 绘制决策区域
plot_decision_regions(X, y, clf=svm, legend=2,
                      X_highlight=X_test,
                      scatter_kwargs=scatter_kwargs,
                      contourf_kwargs=contourf_kwargs,
                      scatter_highlight_kwargs=scatter_highlight_kwargs)

# 添加轴注释
plt.xlabel('sepal length [cm]')
plt.ylabel('petal length [cm]')
plt.title('SVM on Iris')
plt.show()

png

示例 10 - 提供自定义图例标签

自定义图例标签可以通过从 plot_decision_region 函数返回 axis 对象来提供，然后获取图例的句柄和标签。接着可以通过 ax.legend 提供自定义句柄（即标签）。

ax = plot_decision_regions(X, y, clf=svm, legend=0)

handles, labels = ax.get_legend_handles_labels()
ax.legend(handles, 
          ['类别 0', '类别 1', '类别 2'], 
           framealpha=0.3, scatterpoints=1)

下面是一个示例。

from mlxtend.plotting import plot_decision_regions
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.svm import SVC

# 加载一些示例数据
iris = datasets.load_iris()
X = iris.data[:, [0, 2]]
y = iris.target

# 训练分类器
svm = SVC(C=0.5, kernel='linear')
svm.fit(X, y)


# 绘制决策区域
ax = plot_decision_regions(X, y, clf=svm, legend=0)

# 添加坐标轴注释
plt.xlabel('sepal length [cm]')
plt.ylabel('petal length [cm]')
plt.title('SVM on Iris')

handles, labels = ax.get_legend_handles_labels()
ax.legend(handles, 
          ['class square', 'class triangle', 'class circle'], 
           framealpha=0.3, scatterpoints=1)

plt.show()

png

示例 11 - 具有不同缩放因子的绘图

from mlxtend.plotting import plot_decision_regions
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.svm import SVC

# 加载一些示例数据
iris = datasets.load_iris()
X = iris.data[:, [0, 2]]
y = iris.target

# 训练分类器
svm = SVC(C=0.5, kernel='linear')
svm.fit(X, y)

SVC(C=0.5, kernel='linear')

In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.

默认缩放因子

plot_decision_regions(X, y, clf=svm, zoom_factor=1.)
plt.show()

png

缩放外部

plot_decision_regions(X, y, clf=svm, zoom_factor=0.1)
plt.show()

png

缩放

请注意，在缩放时（选择 zoom_factor > 1.0），绘制的图形仍然确保所有数据点都显示在图中。

plot_decision_regions(X, y, clf=svm, zoom_factor=2.0)
plt.show()

png

裁剪坐标轴

为了进一步放大，这意味着某些训练示例将不会显示，您可以简单地裁剪坐标轴，如下所示：

plot_decision_regions(X, y, clf=svm, zoom_factor=2.0)
plt.xlim(5, 6)
plt.ylim(2, 5)
plt.show()

png

示例 12 - 使用期望一热编码输出的分类器（Keras）

大多数仿效 scikit-learn 估计器 API 的分类对象应与 plot_decision_regions 函数兼容。然而，如果分类模型（例如，典型的 Keras 模型）输出的是独热编码预测，则需要使用一个额外的技巧。即，对于独热编码输出，我们需要将 Keras 模型包装在一个类中，该类将这些独热编码变量转换为整数。这样的包装类可以像下面这样简单：

class Onehot2Int(object):

    def __init__(self, model):
        self.model = model

    def predict(self, X):
        y_pred = self.model.predict(X)
        return np.argmax(y_pred, axis=1)

下面的示例演示了如何使用 Onehot2Int 类与输出独热编码标签的 Keras 模型一起使用：

import keras
from keras.models import Sequential
from keras.layers import Dense
import matplotlib.pyplot as plt
import numpy as np
from mlxtend.data import iris_data
from mlxtend.preprocessing import standardize
from mlxtend.plotting import plot_decision_regions
from keras.utils import to_categorical

X, y = iris_data()
X = X[:, [2, 3]]

X = standardize(X)

# 独热编码
y_onehot = to_categorical(y)

# 创建模型
np.random.seed(123)
model = Sequential()
model.add(Dense(8, input_shape=(2,), activation='relu', kernel_initializer='he_uniform'))
model.add(Dense(4, activation='relu', kernel_initializer='he_uniform'))
model.add(Dense(3, activation='softmax'))

# 配置模型并开始训练
model.compile(loss="categorical_crossentropy", optimizer=keras.optimizers.Adam(lr=0.005), metrics=['accuracy'])
history = model.fit(X, y_onehot, epochs=10, batch_size=5, verbose=1, validation_split=0.1)

Epoch 1/10
 1/27 [>.............................] - ETA: 3s - loss: 1.2769 - accuracy: 0.4000

/Users/sebastianraschka/miniforge3/envs/mlxtend/lib/python3.8/site-packages/keras/optimizers/optimizer_v2/adam.py:117: UserWarning: The `lr` argument is deprecated, use `learning_rate` instead.
  super().__init__(name, **kwargs)
2023-03-28 17:48:13.901264: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


27/27 [==============================] - 0s 3ms/step - loss: 0.9526 - accuracy: 0.4222 - val_loss: 1.2656 - val_accuracy: 0.0000e+00
Epoch 2/10
27/27 [==============================] - 0s 834us/step - loss: 0.7062 - accuracy: 0.6741 - val_loss: 1.0939 - val_accuracy: 0.0000e+00
Epoch 3/10
27/27 [==============================] - 0s 808us/step - loss: 0.6461 - accuracy: 0.7111 - val_loss: 1.0705 - val_accuracy: 0.0667
Epoch 4/10
27/27 [==============================] - 0s 767us/step - loss: 0.6145 - accuracy: 0.7185 - val_loss: 1.0518 - val_accuracy: 0.0000e+00
Epoch 5/10
27/27 [==============================] - 0s 746us/step - loss: 0.5877 - accuracy: 0.7185 - val_loss: 1.0470 - val_accuracy: 0.0000e+00
Epoch 6/10
27/27 [==============================] - 0s 740us/step - loss: 0.5496 - accuracy: 0.7333 - val_loss: 1.0275 - val_accuracy: 0.0000e+00
Epoch 7/10
27/27 [==============================] - 0s 734us/step - loss: 0.4985 - accuracy: 0.7333 - val_loss: 1.0131 - val_accuracy: 0.0000e+00
Epoch 8/10
27/27 [==============================] - 0s 739us/step - loss: 0.4365 - accuracy: 0.7333 - val_loss: 0.9634 - val_accuracy: 0.0000e+00
Epoch 9/10
27/27 [==============================] - 0s 729us/step - loss: 0.3875 - accuracy: 0.7333 - val_loss: 0.9442 - val_accuracy: 0.0000e+00
Epoch 10/10
27/27 [==============================] - 0s 764us/step - loss: 0.3402 - accuracy: 0.7407 - val_loss: 0.8565 - val_accuracy: 0.0000e+00

# 包装Keras模型
model_no_ohe = Onehot2Int(model)

# 绘制决策边界
plot_decision_regions(X, y, clf=model_no_ohe)
plt.show()

9600/9600 [==============================] - 3s 289us/step

png

API

plot_decision_regions(X, y, clf, feature_index=None, filler_feature_values=None, filler_feature_ranges=None, ax=None, X_highlight=None, zoom_factor=1.0, legend=1, hide_spines=True, markers='s^oxv<>', colors='#1f77b4,#ff7f0e,#3ca02c,#d62728,#9467bd,#8c564b,#e377c2,#7f7f7f,#bcbd22,#17becf', scatter_kwargs=None, contourf_kwargs=None, contour_kwargs=None, scatter_highlight_kwargs=None, n_jobs=None)

Plot decision regions of a classifier.

Please note that this functions assumes that class labels are
labeled consecutively, e.g,. 0, 1, 2, 3, 4, and 5. If you have class
labels with integer labels > 4, you may want to provide additional colors
and/or markers as `colors` and `markers` arguments.
See https://matplotlib.org/examples/color/named_colors.html for more
information.

Parameters

X : array-like, shape = [n_samples, n_features]

Feature Matrix.
y : array-like, shape = [n_samples]

True class labels.
clf : Classifier object.

Must have a .predict method.
feature_index : array-like (default: (0,) for 1D, (0, 1) otherwise)

Feature indices to use for plotting. The first index in feature_index will be on the x-axis, the second index will be on the y-axis.
filler_feature_values : dict (default: None)

Only needed for number features > 2. Dictionary of feature index-value pairs for the features not being plotted.
filler_feature_ranges : dict (default: None)

Only needed for number features > 2. Dictionary of feature index-value pairs for the features not being plotted. Will use the ranges provided to select training samples for plotting.
ax : matplotlib.axes.Axes (default: None)

An existing matplotlib Axes. Creates one if ax=None.
X_highlight : array-like, shape = [n_samples, n_features] (default: None)

An array with data points that are used to highlight samples in X.
zoom_factor : float (default: 1.0)

Controls the scale of the x- and y-axis of the decision plot.
hide_spines : bool (default: True)

Hide axis spines if True.
legend : int (default: 1)

Integer to specify the legend location. No legend if legend is 0.
markers : str (default: 's^oxv<>')

Scatterplot markers.
colors : str (default: 'red,blue,limegreen,gray,cyan')

Comma separated list of colors.
scatter_kwargs : dict (default: None)

Keyword arguments for underlying matplotlib scatter function.
contourf_kwargs : dict (default: None)

Keyword arguments for underlying matplotlib contourf function.
contour_kwargs : dict (default: None)

Keyword arguments for underlying matplotlib contour function (which draws the lines between decision regions).
scatter_highlight_kwargs : dict (default: None)

Keyword arguments for underlying matplotlib scatter function.
n_jobs : int or None, optional (default=None)

The number of CPUs to use to do the computation using Python's multiprocessing library. None means 1. -1 means using all processors. New in v0.22.0.

Returns

ax : matplotlib.axes.Axes object

Examples

For usage examples, please see https://rasbt.github.io/mlxtend/user_guide/plotting/plot_decision_regions/