代码示例 / 时间序列 / 时间序列分类与 Transformer 模型

时间序列分类与 Transformer 模型

作者: Theodoros Ntakouris
创建日期: 2021/06/25
最后修改日期: 2021/08/05
描述: 此笔记本演示如何使用 Transformer 模型进行时间序列分类。

在 Colab 中查看 GitHub 源代码


介绍

这是来自 Attention Is All You Need 的 Transformer 架构, 应用于时间序列而非自然语言。

此示例需要 TensorFlow 2.4 或更高版本。


加载数据集

我们将使用与 从头开始进行时间序列分类 示例相同的数据集和预处理。

import numpy as np
import keras
from keras import layers


def readucr(filename):
    data = np.loadtxt(filename, delimiter="\t")
    y = data[:, 0]
    x = data[:, 1:]
    return x, y.astype(int)


root_url = "https://raw.githubusercontent.com/hfawaz/cd-diagram/master/FordA/"

x_train, y_train = readucr(root_url + "FordA_TRAIN.tsv")
x_test, y_test = readucr(root_url + "FordA_TEST.tsv")

x_train = x_train.reshape((x_train.shape[0], x_train.shape[1], 1))
x_test = x_test.reshape((x_test.shape[0], x_test.shape[1], 1))

n_classes = len(np.unique(y_train))

idx = np.random.permutation(len(x_train))
x_train = x_train[idx]
y_train = y_train[idx]

y_train[y_train == -1] = 0
y_test[y_test == -1] = 0

构建模型

我们的模型处理形状为 (批次大小, 序列长度, 特征) 的张量, 其中 序列长度 是时间步数,而 特征 是每个输入的时间序列。

您可以用这个替换您的分类 RNN 层:输入完全兼容!

我们包含残差连接、层归一化和 dropout。 结果层可以堆叠多次。

投影层通过 keras.layers.Conv1D 实现。

def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    # 注意力和归一化
    x = layers.MultiHeadAttention(
        key_dim=head_size, num_heads=num_heads, dropout=dropout
    )(inputs, inputs)
    x = layers.Dropout(dropout)(x)
    x = layers.LayerNormalization(epsilon=1e-6)(x)
    res = x + inputs

    # 前馈部分
    x = layers.Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(res)
    x = layers.Dropout(dropout)(x)
    x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
    x = layers.LayerNormalization(epsilon=1e-6)(x)
    return x + res

我们模型的主要部分现在已经完成。我们可以堆叠多个 transformer_encoder 块,并且我们也可以继续添加最终的多层感知机分类头。除了一系列的 Dense 层外,我们需要将 TransformerEncoder 部分的输出张量减少到当前批次中每个数据点的特征向量。实现这一点的常见方法是使用池化层。在这个示例中,GlobalAveragePooling1D 层就足够了。

def build_model(
    input_shape,
    head_size,
    num_heads,
    ff_dim,
    num_transformer_blocks,
    mlp_units,
    dropout=0,
    mlp_dropout=0,
):
    inputs = keras.Input(shape=input_shape)
    x = inputs
    for _ in range(num_transformer_blocks):
        x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)

    x = layers.GlobalAveragePooling1D(data_format="channels_last")(x)
    for dim in mlp_units:
        x = layers.Dense(dim, activation="relu")(x)
        x = layers.Dropout(mlp_dropout)(x)
    outputs = layers.Dense(n_classes, activation="softmax")(x)
    return keras.Model(inputs, outputs)

训练与评估

input_shape = x_train.shape[1:]

model = build_model(
    input_shape,
    head_size=256,
    num_heads=4,
    ff_dim=4,
    num_transformer_blocks=4,
    mlp_units=[128],
    mlp_dropout=0.4,
    dropout=0.25,
)

model.compile(
    loss="sparse_categorical_crossentropy",
    optimizer=keras.optimizers.Adam(learning_rate=1e-4),
    metrics=["sparse_categorical_accuracy"],
)
model.summary()

callbacks = [keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)]

model.fit(
    x_train,
    y_train,
    validation_split=0.2,
    epochs=150,
    batch_size=64,
    callbacks=callbacks,
)

model.evaluate(x_test, y_test, verbose=1)
模型: "functional_1"
┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┓
┃ 层 (类型)         输出形状       参数 #  连接到         ┃
┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━┩
│ input_layer         │ (None, 500, 1)    │       0 │ -                    │
│ (输入层)        │                   │         │                      │
├─────────────────────┼───────────────────┼─────────┼──────────────────────┤
│ multi_head_attenti… │ (None, 500, 1)    │   7,169 │ input_layer[0][0],   │
│ (多头注意力 │                   │         │ input_layer[0][0]    │
├─────────────────────┼───────────────────┼─────────┼──────────────────────┤
│ dropout_1 (丢弃) │ (None, 500, 1)    │       0 │ multi_head_attentio… │
├─────────────────────┼───────────────────┼─────────┼──────────────────────┤
│ layer_normalization │ (None, 500, 1)    │       2 │ dropout_1[0][0]      │
│ (层归一化 │                   │         │                      │
├─────────────────────┼───────────────────┼─────────┼──────────────────────┤
│ add (相加)           │ (None, 500, 1)    │       0 │ layer_normalization… │
│                     │                   │         │ input_layer[0][0]    │
├─────────────────────┼───────────────────┼─────────┼──────────────────────┤
│ conv1d (一维卷积)     │ (None, 500, 4)    │       8 │ add[0][0]            │
├─────────────────────┼───────────────────┼─────────┼──────────────────────┤
│ dropout_2 (Dropout) │ (None, 500, 4)    │       0 │ conv1d[0][0]         │
├─────────────────────┼───────────────────┼─────────┼──────────────────────┤
│ conv1d_1 (Conv1D)   │ (None, 500, 1)    │       5 │ dropout_2[0][0]      │
├─────────────────────┼───────────────────┼─────────┼──────────────────────┤
│ layer_normalizatio… │ (None, 500, 1)    │       2 │ conv1d_1[0][0]       │
│ (LayerNormalizatio… │                   │         │                      │
├─────────────────────┼───────────────────┼─────────┼──────────────────────┤
│ add_1 (Add)         │ (None, 500, 1)    │       0 │ layer_normalization… │
│                     │                   │         │ add[0][0]            │
├─────────────────────┼───────────────────┼─────────┼──────────────────────┤
│ multi_head_attenti… │ (None, 500, 1)    │   7,169 │ add_1[0][0],         │
│ (MultiHeadAttentio… │                   │         │ add_1[0][0]          │
├─────────────────────┼───────────────────┼─────────┼──────────────────────┤
│ dropout_4 (Dropout) │ (None, 500, 1)    │       0 │ multi_head_attentio… │
├─────────────────────┼───────────────────┼─────────┼──────────────────────┤
│ layer_normalizatio… │ (None, 500, 1)    │       2 │ dropout_4[0][0]      │
│ (LayerNormalizatio… │                   │         │                      │
├─────────────────────┼───────────────────┼─────────┼──────────────────────┤
│ add_2 (添加)         │ (, 500, 1)    │       0 │ layer_normalization… │
│                     │                   │         │ add_1[0][0]          │
├─────────────────────┼───────────────────┼─────────┼──────────────────────┤
│ conv1d_2 (卷积1D)   │ (, 500, 4)    │       8 │ add_2[0][0]          │
├─────────────────────┼───────────────────┼─────────┼──────────────────────┤
│ dropout_5 (丢弃) │ (, 500, 4)    │       0 │ conv1d_2[0][0]       │
├─────────────────────┼───────────────────┼─────────┼──────────────────────┤
│ conv1d_3 (卷积1D)   │ (, 500, 1)    │       5 │ dropout_5[0][0]      │
├─────────────────────┼───────────────────┼─────────┼──────────────────────┤
│ layer_normalizatio… │ (, 500, 1)    │       2 │ conv1d_3[0][0]       │
│ (层归一化 │                   │         │                      │
├─────────────────────┼───────────────────┼─────────┼──────────────────────┤
│ add_3 (添加)         │ (, 500, 1)    │       0 │ layer_normalization… │
│                     │                   │         │ add_2[0][0]          │
├─────────────────────┼───────────────────┼─────────┼──────────────────────┤
│ multi_head_attenti… │ (, 500, 1)    │   7,169 │ add_3[0][0],         │
│ (多头注意力 │                   │         │ add_3[0][0]          │
├─────────────────────┼───────────────────┼─────────┼──────────────────────┤
│ dropout_7 (Dropout) │ (, 500, 1)    │       0 │ multi_head_attentio… │
├─────────────────────┼───────────────────┼─────────┼──────────────────────┤
│ layer_normalizatio… │ (, 500, 1)    │       2 │ dropout_7[0][0]      │
│ (LayerNormalizatio… │                   │         │                      │
├─────────────────────┼───────────────────┼─────────┼──────────────────────┤
│ add_4 (Add)         │ (, 500, 1)    │       0 │ layer_normalization… │
│                     │                   │         │ add_3[0][0]          │
├─────────────────────┼───────────────────┼─────────┼──────────────────────┤
│ conv1d_4 (Conv1D)   │ (, 500, 4)    │       8 │ add_4[0][0]          │
├─────────────────────┼───────────────────┼─────────┼──────────────────────┤
│ dropout_8 (Dropout) │ (, 500, 4)    │       0 │ conv1d_4[0][0]       │
├─────────────────────┼───────────────────┼─────────┼──────────────────────┤
│ conv1d_5 (Conv1D)   │ (, 500, 1)    │       5 │ dropout_8[0][0]      │
├─────────────────────┼───────────────────┼─────────┼──────────────────────┤
│ layer_normalizatio… │ (, 500, 1)    │       2 │ conv1d_5[0][0]       │
│ (LayerNormalizatio… │                   │         │                      │
├─────────────────────┼───────────────────┼─────────┼──────────────────────┤
│ add_5 (Add)         │ (, 500, 1)    │       0 │ layer_normalization… │
│                     │                   │         │ add_4[0][0]          │
├─────────────────────┼───────────────────┼─────────┼──────────────────────┤
│ multi_head_attenti… │ (None, 500, 1)    │   7,169 │ add_5[0][0],         │
│ (MultiHeadAttentio… │                   │         │ add_5[0][0]          │
├─────────────────────┼───────────────────┼─────────┼──────────────────────┤
│ dropout_10          │ (None, 500, 1)    │       0 │ multi_head_attentio… │
│ (Dropout)           │                   │         │                      │
├─────────────────────┼───────────────────┼─────────┼──────────────────────┤
│ layer_normalizatio… │ (None, 500, 1)    │       2 │ dropout_10[0][0]     │
│ (LayerNormalizatio… │                   │         │                      │
├─────────────────────┼───────────────────┼─────────┼──────────────────────┤
│ add_6 (Add)         │ (None, 500, 1)    │       0 │ layer_normalization… │
│                     │                   │         │ add_5[0][0]          │
├─────────────────────┼───────────────────┼─────────┼──────────────────────┤
│ conv1d_6 (Conv1D)   │ (None, 500, 4)    │       8 │ add_6[0][0]          │
├─────────────────────┼───────────────────┼─────────┼──────────────────────┤
│ dropout_11          │ (None, 500, 4)    │       0 │ conv1d_6[0][0]       │
│ (Dropout)           │                   │         │                      │
├─────────────────────┼───────────────────┼─────────┼──────────────────────┤
│ conv1d_7 (Conv1D)   │ (None, 500, 1)    │       5 │ dropout_11[0][0]     │
├─────────────────────┼───────────────────┼─────────┼──────────────────────┤
│ layer_normalizatio… │ (None, 500, 1)    │       2 │ conv1d_7[0][0]       │
│ (LayerNormalizatio… │                   │         │                      │
├─────────────────────┼───────────────────┼─────────┼──────────────────────┤
│ add_7 (Add)         │ (None, 500, 1)    │       0 │ layer_normalization… │
│                     │                   │         │ add_6[0][0]          │
├─────────────────────┼───────────────────┼─────────┼──────────────────────┤
│ global_average_poo… │ (None, 500)       │       0 │ add_7[0][0]          │
│ (GlobalAveragePool… │                   │         │                      │
├─────────────────────┼───────────────────┼─────────┼──────────────────────┤
│ dense (Dense)       │ (None, 128)       │  64,128 │ global_average_pool… │
├─────────────────────┼───────────────────┼─────────┼──────────────────────┤
│ dropout_12          │ (None, 128)       │       0 │ dense[0][0]          │
│ (Dropout)           │                   │         │                      │
├─────────────────────┼───────────────────┼─────────┼──────────────────────┤
│ dense_1 (Dense)     │ (None, 2)         │     258 │ dropout_12[0][0]     │
└─────────────────────┴───────────────────┴─────────┴──────────────────────┘
 总参数: 93,130 (363.79 KB)
 可训练参数: 93,130 (363.79 KB)
 不可训练的参数: 0 (0.00 B)
Epoch 1/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 17s 183ms/step - loss: 1.0039 - sparse_categorical_accuracy: 0.5180 - val_loss: 0.7024 - val_sparse_categorical_accuracy: 0.5908
Epoch 2/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.8639 - sparse_categorical_accuracy: 0.5625 - val_loss: 0.6370 - val_sparse_categorical_accuracy: 0.6241
Epoch 3/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.7701 - sparse_categorical_accuracy: 0.6118 - val_loss: 0.6042 - val_sparse_categorical_accuracy: 0.6602
Epoch 4/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.7522 - sparse_categorical_accuracy: 0.6167 - val_loss: 0.5794 - val_sparse_categorical_accuracy: 0.6782
Epoch 5/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.6845 - sparse_categorical_accuracy: 0.6606 - val_loss: 0.5609 - val_sparse_categorical_accuracy: 0.6893
Epoch 6/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.6760 - sparse_categorical_accuracy: 0.6653 - val_loss: 0.5520 - val_sparse_categorical_accuracy: 0.7046
Epoch 7/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 127ms/step - loss: 0.6589 - sparse_categorical_accuracy: 0.6558 - val_loss: 0.5390 - val_sparse_categorical_accuracy: 0.7129
Epoch 8/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 127ms/step - loss: 0.6416 - sparse_categorical_accuracy: 0.6675 - val_loss: 0.5299 - val_sparse_categorical_accuracy: 0.7171
Epoch 9/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 127ms/step - loss: 0.6270 - sparse_categorical_accuracy: 0.6861 - val_loss: 0.5202 - val_sparse_categorical_accuracy: 0.7295
Epoch 10/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 127ms/step - loss: 0.5995 - sparse_categorical_accuracy: 0.6969 - val_loss: 0.5135 - val_sparse_categorical_accuracy: 0.7323
Epoch 11/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.5846 - sparse_categorical_accuracy: 0.6927 - val_loss: 0.5084 - val_sparse_categorical_accuracy: 0.7420
Epoch 12/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.5837 - sparse_categorical_accuracy: 0.7163 - val_loss: 0.5042 - val_sparse_categorical_accuracy: 0.7420
Epoch 13/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.5407 - sparse_categorical_accuracy: 0.7323 - val_loss: 0.4984 - val_sparse_categorical_accuracy: 0.7462
Epoch 14/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.5302 - sparse_categorical_accuracy: 0.7446 - val_loss: 0.4958 - val_sparse_categorical_accuracy: 0.7462
Epoch 15/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.5041 - sparse_categorical_accuracy: 0.7459 - val_loss: 0.4905 - val_sparse_categorical_accuracy: 0.7503
Epoch 16/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.5122 - sparse_categorical_accuracy: 0.7506 - val_loss: 0.4842 - val_sparse_categorical_accuracy: 0.7642
Epoch 17/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.5042 - sparse_categorical_accuracy: 0.7565 - val_loss: 0.4824 - val_sparse_categorical_accuracy: 0.7656
Epoch 18/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.4965 - sparse_categorical_accuracy: 0.7709 - val_loss: 0.4794 - val_sparse_categorical_accuracy: 0.7587
Epoch 19/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.4860 - sparse_categorical_accuracy: 0.7649 - val_loss: 0.4733 - val_sparse_categorical_accuracy: 0.7614
Epoch 20/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 127ms/step - loss: 0.4797 - sparse_categorical_accuracy: 0.7716 - val_loss: 0.4700 - val_sparse_categorical_accuracy: 0.7642
Epoch 21/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 127ms/step - loss: 0.4946 - sparse_categorical_accuracy: 0.7638 - val_loss: 0.4668 - val_sparse_categorical_accuracy: 0.7670
Epoch 22/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.4443 - sparse_categorical_accuracy: 0.7949 - val_loss: 0.4640 - val_sparse_categorical_accuracy: 0.7670
Epoch 23/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.4495 - sparse_categorical_accuracy: 0.7897 - val_loss: 0.4597 - val_sparse_categorical_accuracy: 0.7739
Epoch 24/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.4284 - sparse_categorical_accuracy: 0.8085 - val_loss: 0.4572 - val_sparse_categorical_accuracy: 0.7739
Epoch 25/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.4353 - sparse_categorical_accuracy: 0.8060 - val_loss: 0.4548 - val_sparse_categorical_accuracy: 0.7795
Epoch 26/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.4332 - sparse_categorical_accuracy: 0.8024 - val_loss: 0.4531 - val_sparse_categorical_accuracy: 0.7781
Epoch 27/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.4399 - sparse_categorical_accuracy: 0.7992 - val_loss: 0.4462 - val_sparse_categorical_accuracy: 0.7864
Epoch 28/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.4143 - sparse_categorical_accuracy: 0.8098 - val_loss: 0.4433 - val_sparse_categorical_accuracy: 0.7850
Epoch 29/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.3950 - sparse_categorical_accuracy: 0.8373 - val_loss: 0.4421 - val_sparse_categorical_accuracy: 0.7850
Epoch 30/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.4050 - sparse_categorical_accuracy: 0.8186 - val_loss: 0.4392 - val_sparse_categorical_accuracy: 0.7878
Epoch 31/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 127ms/step - loss: 0.4152 - sparse_categorical_accuracy: 0.8162 - val_loss: 0.4361 - val_sparse_categorical_accuracy: 0.7947
Epoch 32/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.3870 - sparse_categorical_accuracy: 0.8290 - val_loss: 0.4335 - val_sparse_categorical_accuracy: 0.7961
Epoch 33/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.3966 - sparse_categorical_accuracy: 0.8239 - val_loss: 0.4295 - val_sparse_categorical_accuracy: 0.7961
Epoch 34/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.3797 - sparse_categorical_accuracy: 0.8320 - val_loss: 0.4252 - val_sparse_categorical_accuracy: 0.8031
Epoch 35/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.3798 - sparse_categorical_accuracy: 0.8336 - val_loss: 0.4222 - val_sparse_categorical_accuracy: 0.8003
Epoch 36/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.3652 - sparse_categorical_accuracy: 0.8437 - val_loss: 0.4217 - val_sparse_categorical_accuracy: 0.8044
Epoch 37/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.3590 - sparse_categorical_accuracy: 0.8394 - val_loss: 0.4203 - val_sparse_categorical_accuracy: 0.8072
Epoch 38/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.3457 - sparse_categorical_accuracy: 0.8562 - val_loss: 0.4182 - val_sparse_categorical_accuracy: 0.8100
Epoch 39/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.3668 - sparse_categorical_accuracy: 0.8379 - val_loss: 0.4147 - val_sparse_categorical_accuracy: 0.8072
Epoch 40/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.3382 - sparse_categorical_accuracy: 0.8612 - val_loss: 0.4116 - val_sparse_categorical_accuracy: 0.8128
Epoch 41/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 127ms/step - loss: 0.3454 - sparse_categorical_accuracy: 0.8525 - val_loss: 0.4076 - val_sparse_categorical_accuracy: 0.8155
Epoch 42/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 127ms/step - loss: 0.3359 - sparse_categorical_accuracy: 0.8672 - val_loss: 0.4075 - val_sparse_categorical_accuracy: 0.8100
Epoch 43/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.3420 - sparse_categorical_accuracy: 0.8538 - val_loss: 0.4033 - val_sparse_categorical_accuracy: 0.8197
Epoch 44/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.3325 - sparse_categorical_accuracy: 0.8642 - val_loss: 0.4010 - val_sparse_categorical_accuracy: 0.8197
Epoch 45/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.3201 - sparse_categorical_accuracy: 0.8715 - val_loss: 0.3993 - val_sparse_categorical_accuracy: 0.8211
Epoch 46/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 127ms/step - loss: 0.3342 - sparse_categorical_accuracy: 0.8597 - val_loss: 0.3966 - val_sparse_categorical_accuracy: 0.8294
Epoch 47/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 127ms/step - loss: 0.3171 - sparse_categorical_accuracy: 0.8714 - val_loss: 0.3955 - val_sparse_categorical_accuracy: 0.8280
Epoch 48/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 127ms/step - loss: 0.3213 - sparse_categorical_accuracy: 0.8698 - val_loss: 0.3919 - val_sparse_categorical_accuracy: 0.8294
Epoch 49/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.3063 - sparse_categorical_accuracy: 0.8822 - val_loss: 0.3907 - val_sparse_categorical_accuracy: 0.8322
Epoch 50/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.2966 - sparse_categorical_accuracy: 0.8826 - val_loss: 0.3888 - val_sparse_categorical_accuracy: 0.8322
Epoch 51/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.2946 - sparse_categorical_accuracy: 0.8844 - val_loss: 0.3885 - val_sparse_categorical_accuracy: 0.8308
Epoch 52/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 127ms/step - loss: 0.2930 - sparse_categorical_accuracy: 0.8948 - val_loss: 0.3865 - val_sparse_categorical_accuracy: 0.8322
Epoch 53/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.2715 - sparse_categorical_accuracy: 0.9141 - val_loss: 0.3835 - val_sparse_categorical_accuracy: 0.8280
Epoch 54/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.2960 - sparse_categorical_accuracy: 0.8848 - val_loss: 0.3806 - val_sparse_categorical_accuracy: 0.8252
Epoch 55/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.2813 - sparse_categorical_accuracy: 0.8989 - val_loss: 0.3808 - val_sparse_categorical_accuracy: 0.8239
Epoch 56/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.2708 - sparse_categorical_accuracy: 0.9076 - val_loss: 0.3784 - val_sparse_categorical_accuracy: 0.8363
Epoch 57/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.2895 - sparse_categorical_accuracy: 0.8882 - val_loss: 0.3786 - val_sparse_categorical_accuracy: 0.8336
Epoch 58/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.2905 - sparse_categorical_accuracy: 0.8810 - val_loss: 0.3780 - val_sparse_categorical_accuracy: 0.8363
Epoch 59/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.2732 - sparse_categorical_accuracy: 0.9023 - val_loss: 0.3738 - val_sparse_categorical_accuracy: 0.8419
Epoch 60/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.2698 - sparse_categorical_accuracy: 0.8962 - val_loss: 0.3733 - val_sparse_categorical_accuracy: 0.8308
Epoch 61/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 127ms/step - loss: 0.2741 - sparse_categorical_accuracy: 0.9025 - val_loss: 0.3724 - val_sparse_categorical_accuracy: 0.8391
Epoch 62/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 128ms/step - loss: 0.2713 - sparse_categorical_accuracy: 0.8973 - val_loss: 0.3698 - val_sparse_categorical_accuracy: 0.8308
Epoch 63/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 127ms/step - loss: 0.2682 - sparse_categorical_accuracy: 0.9004 - val_loss: 0.3681 - val_sparse_categorical_accuracy: 0.8363
Epoch 64/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.2673 - sparse_categorical_accuracy: 0.9006 - val_loss: 0.3692 - val_sparse_categorical_accuracy: 0.8377
Epoch 65/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.2585 - sparse_categorical_accuracy: 0.9056 - val_loss: 0.3684 - val_sparse_categorical_accuracy: 0.8322
Epoch 66/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.2696 - sparse_categorical_accuracy: 0.8958 - val_loss: 0.3654 - val_sparse_categorical_accuracy: 0.8336
Epoch 67/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.2489 - sparse_categorical_accuracy: 0.9182 - val_loss: 0.3630 - val_sparse_categorical_accuracy: 0.8405
Epoch 68/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.2475 - sparse_categorical_accuracy: 0.9121 - val_loss: 0.3626 - val_sparse_categorical_accuracy: 0.8433
Epoch 69/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.2398 - sparse_categorical_accuracy: 0.9195 - val_loss: 0.3607 - val_sparse_categorical_accuracy: 0.8433
Epoch 70/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.2379 - sparse_categorical_accuracy: 0.9138 - val_loss: 0.3598 - val_sparse_categorical_accuracy: 0.8474
Epoch 71/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.2343 - sparse_categorical_accuracy: 0.9162 - val_loss: 0.3568 - val_sparse_categorical_accuracy: 0.8447
Epoch 72/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.2497 - sparse_categorical_accuracy: 0.9104 - val_loss: 0.3554 - val_sparse_categorical_accuracy: 0.8419
Epoch 73/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 127ms/step - loss: 0.2399 - sparse_categorical_accuracy: 0.9070 - val_loss: 0.3552 - val_sparse_categorical_accuracy: 0.8433
Epoch 74/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.2300 - sparse_categorical_accuracy: 0.9190 - val_loss: 0.3572 - val_sparse_categorical_accuracy: 0.8419
Epoch 75/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.2370 - sparse_categorical_accuracy: 0.9109 - val_loss: 0.3523 - val_sparse_categorical_accuracy: 0.8419
Epoch 76/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.2324 - sparse_categorical_accuracy: 0.9172 - val_loss: 0.3512 - val_sparse_categorical_accuracy: 0.8391
Epoch 77/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.2262 - sparse_categorical_accuracy: 0.9210 - val_loss: 0.3488 - val_sparse_categorical_accuracy: 0.8391
Epoch 78/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.2262 - sparse_categorical_accuracy: 0.9175 - val_loss: 0.3495 - val_sparse_categorical_accuracy: 0.8419
Epoch 79/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 127ms/step - loss: 0.2226 - sparse_categorical_accuracy: 0.9270 - val_loss: 0.3487 - val_sparse_categorical_accuracy: 0.8433
Epoch 80/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.2181 - sparse_categorical_accuracy: 0.9247 - val_loss: 0.3501 - val_sparse_categorical_accuracy: 0.8474
Epoch 81/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.2220 - sparse_categorical_accuracy: 0.9181 - val_loss: 0.3479 - val_sparse_categorical_accuracy: 0.8460
Epoch 82/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.2114 - sparse_categorical_accuracy: 0.9254 - val_loss: 0.3464 - val_sparse_categorical_accuracy: 0.8460
Epoch 83/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.2148 - sparse_categorical_accuracy: 0.9196 - val_loss: 0.3467 - val_sparse_categorical_accuracy: 0.8460
Epoch 84/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 127ms/step - loss: 0.2262 - sparse_categorical_accuracy: 0.9181 - val_loss: 0.3446 - val_sparse_categorical_accuracy: 0.8474
Epoch 85/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.2121 - sparse_categorical_accuracy: 0.9205 - val_loss: 0.3452 - val_sparse_categorical_accuracy: 0.8460
Epoch 86/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 125ms/step - loss: 0.2057 - sparse_categorical_accuracy: 0.9238 - val_loss: 0.3460 - val_sparse_categorical_accuracy: 0.8350
Epoch 87/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.2081 - sparse_categorical_accuracy: 0.9342 - val_loss: 0.3455 - val_sparse_categorical_accuracy: 0.8488
Epoch 88/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.2153 - sparse_categorical_accuracy: 0.9211 - val_loss: 0.3421 - val_sparse_categorical_accuracy: 0.8488
Epoch 89/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.1977 - sparse_categorical_accuracy: 0.9366 - val_loss: 0.3413 - val_sparse_categorical_accuracy: 0.8474
Epoch 90/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.1928 - sparse_categorical_accuracy: 0.9410 - val_loss: 0.3428 - val_sparse_categorical_accuracy: 0.8405
Epoch 91/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.1968 - sparse_categorical_accuracy: 0.9327 - val_loss: 0.3411 - val_sparse_categorical_accuracy: 0.8474
Epoch 92/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.1909 - sparse_categorical_accuracy: 0.9308 - val_loss: 0.3404 - val_sparse_categorical_accuracy: 0.8488
Epoch 93/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.2067 - sparse_categorical_accuracy: 0.9285 - val_loss: 0.3371 - val_sparse_categorical_accuracy: 0.8488
Epoch 94/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.1990 - sparse_categorical_accuracy: 0.9329 - val_loss: 0.3385 - val_sparse_categorical_accuracy: 0.8502
Epoch 95/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.1986 - sparse_categorical_accuracy: 0.9267 - val_loss: 0.3368 - val_sparse_categorical_accuracy: 0.8433
Epoch 96/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.2069 - sparse_categorical_accuracy: 0.9235 - val_loss: 0.3346 - val_sparse_categorical_accuracy: 0.8502
Epoch 97/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.1971 - sparse_categorical_accuracy: 0.9296 - val_loss: 0.3340 - val_sparse_categorical_accuracy: 0.8544
Epoch 98/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 125ms/step - loss: 0.2042 - sparse_categorical_accuracy: 0.9250 - val_loss: 0.3352 - val_sparse_categorical_accuracy: 0.8419
Epoch 99/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.1998 - sparse_categorical_accuracy: 0.9271 - val_loss: 0.3334 - val_sparse_categorical_accuracy: 0.8474
Epoch 100/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.1832 - sparse_categorical_accuracy: 0.9406 - val_loss: 0.3317 - val_sparse_categorical_accuracy: 0.8474
Epoch 101/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 125ms/step - loss: 0.1917 - sparse_categorical_accuracy: 0.9340 - val_loss: 0.3343 - val_sparse_categorical_accuracy: 0.8433
Epoch 102/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.1811 - sparse_categorical_accuracy: 0.9286 - val_loss: 0.3317 - val_sparse_categorical_accuracy: 0.8530
Epoch 103/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.1733 - sparse_categorical_accuracy: 0.9396 - val_loss: 0.3340 - val_sparse_categorical_accuracy: 0.8460
Epoch 104/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.1661 - sparse_categorical_accuracy: 0.9464 - val_loss: 0.3288 - val_sparse_categorical_accuracy: 0.8488
Epoch 105/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.1806 - sparse_categorical_accuracy: 0.9390 - val_loss: 0.3296 - val_sparse_categorical_accuracy: 0.8516
Epoch 106/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.1774 - sparse_categorical_accuracy: 0.9401 - val_loss: 0.3291 - val_sparse_categorical_accuracy: 0.8530
Epoch 107/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 125ms/step - loss: 0.1689 - sparse_categorical_accuracy: 0.9463 - val_loss: 0.3290 - val_sparse_categorical_accuracy: 0.8488
Epoch 108/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 125ms/step - loss: 0.1830 - sparse_categorical_accuracy: 0.9319 - val_loss: 0.3299 - val_sparse_categorical_accuracy: 0.8447
Epoch 109/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 125ms/step - loss: 0.1757 - sparse_categorical_accuracy: 0.9304 - val_loss: 0.3315 - val_sparse_categorical_accuracy: 0.8488
Epoch 110/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.1810 - sparse_categorical_accuracy: 0.9378 - val_loss: 0.3280 - val_sparse_categorical_accuracy: 0.8502
Epoch 111/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.1628 - sparse_categorical_accuracy: 0.9522 - val_loss: 0.3276 - val_sparse_categorical_accuracy: 0.8474
Epoch 112/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 125ms/step - loss: 0.1659 - sparse_categorical_accuracy: 0.9484 - val_loss: 0.3285 - val_sparse_categorical_accuracy: 0.8530
Epoch 113/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.1814 - sparse_categorical_accuracy: 0.9364 - val_loss: 0.3281 - val_sparse_categorical_accuracy: 0.8474
Epoch 114/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.1721 - sparse_categorical_accuracy: 0.9391 - val_loss: 0.3287 - val_sparse_categorical_accuracy: 0.8433
Epoch 115/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 127ms/step - loss: 0.1743 - sparse_categorical_accuracy: 0.9321 - val_loss: 0.3275 - val_sparse_categorical_accuracy: 0.8474
Epoch 116/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.1677 - sparse_categorical_accuracy: 0.9415 - val_loss: 0.3297 - val_sparse_categorical_accuracy: 0.8391
Epoch 117/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.1657 - sparse_categorical_accuracy: 0.9449 - val_loss: 0.3228 - val_sparse_categorical_accuracy: 0.8419
Epoch 118/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.1787 - sparse_categorical_accuracy: 0.9316 - val_loss: 0.3230 - val_sparse_categorical_accuracy: 0.8447
Epoch 119/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.1659 - sparse_categorical_accuracy: 0.9408 - val_loss: 0.3233 - val_sparse_categorical_accuracy: 0.8460
Epoch 120/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.1615 - sparse_categorical_accuracy: 0.9385 - val_loss: 0.3235 - val_sparse_categorical_accuracy: 0.8460
Epoch 121/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 125ms/step - loss: 0.1582 - sparse_categorical_accuracy: 0.9526 - val_loss: 0.3247 - val_sparse_categorical_accuracy: 0.8474
Epoch 122/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 125ms/step - loss: 0.1577 - sparse_categorical_accuracy: 0.9497 - val_loss: 0.3263 - val_sparse_categorical_accuracy: 0.8474
Epoch 123/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 125ms/step - loss: 0.1593 - sparse_categorical_accuracy: 0.9483 - val_loss: 0.3261 - val_sparse_categorical_accuracy: 0.8433
Epoch 124/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 125ms/step - loss: 0.1570 - sparse_categorical_accuracy: 0.9442 - val_loss: 0.3277 - val_sparse_categorical_accuracy: 0.8419
Epoch 125/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 125ms/step - loss: 0.1434 - sparse_categorical_accuracy: 0.9460 - val_loss: 0.3257 - val_sparse_categorical_accuracy: 0.8447
Epoch 126/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.1589 - sparse_categorical_accuracy: 0.9414 - val_loss: 0.3237 - val_sparse_categorical_accuracy: 0.8447
Epoch 127/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.1591 - sparse_categorical_accuracy: 0.9460 - val_loss: 0.3217 - val_sparse_categorical_accuracy: 0.8447
Epoch 128/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 125ms/step - loss: 0.1530 - sparse_categorical_accuracy: 0.9450 - val_loss: 0.3203 - val_sparse_categorical_accuracy: 0.8474
Epoch 129/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 125ms/step - loss: 0.1464 - sparse_categorical_accuracy: 0.9514 - val_loss: 0.3206 - val_sparse_categorical_accuracy: 0.8474
Epoch 130/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 125ms/step - loss: 0.1437 - sparse_categorical_accuracy: 0.9526 - val_loss: 0.3231 - val_sparse_categorical_accuracy: 0.8447
Epoch 131/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 125ms/step - loss: 0.1415 - sparse_categorical_accuracy: 0.9510 - val_loss: 0.3226 - val_sparse_categorical_accuracy: 0.8433
Epoch 132/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 125ms/step - loss: 0.1539 - sparse_categorical_accuracy: 0.9505 - val_loss: 0.3261 - val_sparse_categorical_accuracy: 0.8405
Epoch 133/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 125ms/step - loss: 0.1432 - sparse_categorical_accuracy: 0.9544 - val_loss: 0.3239 - val_sparse_categorical_accuracy: 0.8377
Epoch 134/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 125ms/step - loss: 0.1368 - sparse_categorical_accuracy: 0.9567 - val_loss: 0.3200 - val_sparse_categorical_accuracy: 0.8474
Epoch 135/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 125ms/step - loss: 0.1319 - sparse_categorical_accuracy: 0.9619 - val_loss: 0.3200 - val_sparse_categorical_accuracy: 0.8433
Epoch 136/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 125ms/step - loss: 0.1479 - sparse_categorical_accuracy: 0.9494 - val_loss: 0.3201 - val_sparse_categorical_accuracy: 0.8502
Epoch 137/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.1353 - sparse_categorical_accuracy: 0.9573 - val_loss: 0.3208 - val_sparse_categorical_accuracy: 0.8488
Epoch 138/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 125ms/step - loss: 0.1349 - sparse_categorical_accuracy: 0.9584 - val_loss: 0.3213 - val_sparse_categorical_accuracy: 0.8474
Epoch 139/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 125ms/step - loss: 0.1418 - sparse_categorical_accuracy: 0.9532 - val_loss: 0.3197 - val_sparse_categorical_accuracy: 0.8447
Epoch 140/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 125ms/step - loss: 0.1402 - sparse_categorical_accuracy: 0.9534 - val_loss: 0.3204 - val_sparse_categorical_accuracy: 0.8488
Epoch 141/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 125ms/step - loss: 0.1344 - sparse_categorical_accuracy: 0.9525 - val_loss: 0.3207 - val_sparse_categorical_accuracy: 0.8474
Epoch 142/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 125ms/step - loss: 0.1448 - sparse_categorical_accuracy: 0.9494 - val_loss: 0.3192 - val_sparse_categorical_accuracy: 0.8488
Epoch 143/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 125ms/step - loss: 0.1363 - sparse_categorical_accuracy: 0.9552 - val_loss: 0.3219 - val_sparse_categorical_accuracy: 0.8460
Epoch 144/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 125ms/step - loss: 0.1380 - sparse_categorical_accuracy: 0.9540 - val_loss: 0.3219 - val_sparse_categorical_accuracy: 0.8474
Epoch 145/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 125ms/step - loss: 0.1472 - sparse_categorical_accuracy: 0.9468 - val_loss: 0.3219 - val_sparse_categorical_accuracy: 0.8474
Epoch 146/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 125ms/step - loss: 0.1402 - sparse_categorical_accuracy: 0.9622 - val_loss: 0.3217 - val_sparse_categorical_accuracy: 0.8502
Epoch 147/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.1236 - sparse_categorical_accuracy: 0.9617 - val_loss: 0.3194 - val_sparse_categorical_accuracy: 0.8460
Epoch 148/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.1183 - sparse_categorical_accuracy: 0.9683 - val_loss: 0.3193 - val_sparse_categorical_accuracy: 0.8488
Epoch 149/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 126ms/step - loss: 0.1189 - sparse_categorical_accuracy: 0.9618 - val_loss: 0.3237 - val_sparse_categorical_accuracy: 0.8488
Epoch 150/150
 45/45 ━━━━━━━━━━━━━━━━━━━━ 6s 125ms/step - loss: 0.1495 - sparse_categorical_accuracy: 0.9459 - val_loss: 0.3181 - val_sparse_categorical_accuracy: 0.8460
 42/42 ━━━━━━━━━━━━━━━━━━━━ 3s 44ms/step - loss: 0.3182 - sparse_categorical_accuracy: 0.8617

[0.3543623089790344, 0.843181848526001]

结论

在大约 110-120 个周期 (每个周期在 Colab 上 25 秒),模型达到了 ~0.95 的训练准确率,~84 的验证准确率和 ~85 的测试准确率,未进行超参数调整。当然,参数数量和准确率可以通过超参数搜索和更复杂的学习率调整,或不同的优化器来改进。