Darts 中的超参数优化¶
在超参数优化方面,Darts 并没有什么特别之处。需要注意的主要事项可能是 Darts 基于深度学习的 TorchForecastingModels 存在用于提前停止和修剪实验的 PyTorch Lightning 回调。下面,我们展示了使用 Optuna 和 Ray Tune 进行超参数优化的示例。
使用 Optuna 进行超参数优化¶
Optuna 是使用 Darts 进行超参数优化的一个很好的选择。下面,我们展示了一个使用 PyTorch Lightning 回调进行剪枝实验的最小示例。为了示例的目的,我们在单个序列上训练一个 TCNModel
,并通过最小化验证集上的预测误差来优化(可能过拟合)其超参数。你也可以查看 这个笔记本 以获取更完整的示例。
注意 (2023-19-02): Optuna 的
PyTorchLightningPruningCallback
在 pytorch-lightning>=1.8 时会引发错误。在此问题解决之前,建议参考 这里 的解决方法。
import numpy as np
import optuna
import torch
from optuna.integration import PyTorchLightningPruningCallback
from pytorch_lightning.callbacks import EarlyStopping
from sklearn.preprocessing import MaxAbsScaler
from darts.dataprocessing.transformers import Scaler
from darts.datasets import AirPassengersDataset
from darts.metrics import smape
from darts.models import TCNModel
from darts.utils.likelihood_models import GaussianLikelihood
# load data
series = AirPassengersDataset().load().astype(np.float32)
# split in train / validation (note: in practice we would also need a test set)
VAL_LEN = 36
train, val = series[:-VAL_LEN], series[-VAL_LEN:]
# scale
scaler = Scaler(MaxAbsScaler())
train = scaler.fit_transform(train)
val = scaler.transform(val)
# define objective function
def objective(trial):
# select input and output chunk lengths
in_len = trial.suggest_int("in_len", 12, 36)
out_len = trial.suggest_int("out_len", 1, in_len-1)
# Other hyperparameters
kernel_size = trial.suggest_int("kernel_size", 2, 5)
num_filters = trial.suggest_int("num_filters", 1, 5)
weight_norm = trial.suggest_categorical("weight_norm", [False, True])
dilation_base = trial.suggest_int("dilation_base", 2, 4)
dropout = trial.suggest_float("dropout", 0.0, 0.4)
lr = trial.suggest_float("lr", 5e-5, 1e-3, log=True)
include_year = trial.suggest_categorical("year", [False, True])
# throughout training we'll monitor the validation loss for both pruning and early stopping
pruner = PyTorchLightningPruningCallback(trial, monitor="val_loss")
early_stopper = EarlyStopping("val_loss", min_delta=0.001, patience=3, verbose=True)
callbacks = [pruner, early_stopper]
# detect if a GPU is available
if torch.cuda.is_available():
num_workers = 4
else:
num_workers = 0
pl_trainer_kwargs = {
"accelerator": "auto",
"callbacks": callbacks,
}
# optionally also add the (scaled) year value as a past covariate
if include_year:
encoders = {"datetime_attribute": {"past": ["year"]},
"transformer": Scaler()}
else:
encoders = None
# reproducibility
torch.manual_seed(42)
# build the TCN model
model = TCNModel(
input_chunk_length=in_len,
output_chunk_length=out_len,
batch_size=32,
n_epochs=100,
nr_epochs_val_period=1,
kernel_size=kernel_size,
num_filters=num_filters,
weight_norm=weight_norm,
dilation_base=dilation_base,
dropout=dropout,
optimizer_kwargs={"lr": lr},
add_encoders=encoders,
likelihood=GaussianLikelihood(),
pl_trainer_kwargs=pl_trainer_kwargs,
model_name="tcn_model",
force_reset=True,
save_checkpoints=True,
)
# when validating during training, we can use a slightly longer validation
# set which also contains the first input_chunk_length time steps
model_val_set = scaler.transform(series[-(VAL_LEN + in_len) :])
# train the model
model.fit(
series=train,
val_series=model_val_set,
num_loader_workers=num_workers,
)
# reload best model over course of training
model = TCNModel.load_from_checkpoint("tcn_model")
# Evaluate how good it is on the validation set, using sMAPE
preds = model.predict(series=train, n=VAL_LEN)
smapes = smape(val, preds, n_jobs=-1, verbose=True)
smape_val = np.mean(smapes)
return smape_val if smape_val != np.nan else float("inf")
# for convenience, print some optimization trials information
def print_callback(study, trial):
print(f"Current value: {trial.value}, Current params: {trial.params}")
print(f"Best value: {study.best_value}, Best params: {study.best_trial.params}")
# optimize hyperparameters by minimizing the sMAPE on the validation set
if __name__ == "__main__":
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=100, callbacks=[print_callback])
使用 Ray Tune 进行超参数优化¶
Ray Tune 是另一种使用自动剪枝进行超参数优化的选择。
以下是如何使用 Ray Tune 与 NBEATSModel
模型结合 异步超带调度器 的示例。
import pandas as pd
from pytorch_lightning.callbacks import EarlyStopping
from ray import tune
from ray.tune import CLIReporter
from ray.tune.integration.pytorch_lightning import TuneReportCallback
from ray.tune.schedulers import ASHAScheduler
from torchmetrics import MeanAbsoluteError, MeanAbsolutePercentageError, MetricCollection
from darts.dataprocessing.transformers import Scaler
from darts.datasets import AirPassengersDataset
from darts.models import NBEATSModel
def train_model(model_args, callbacks, train, val):
torch_metrics = MetricCollection([MeanAbsolutePercentageError(), MeanAbsoluteError()])
# Create the model using model_args from Ray Tune
model = NBEATSModel(
input_chunk_length=24,
output_chunk_length=12,
n_epochs=500,
torch_metrics=torch_metrics,
pl_trainer_kwargs={"callbacks": callbacks, "enable_progress_bar": False},
**model_args)
model.fit(
series=train,
val_series=val,
)
# Read data:
series = AirPassengersDataset().load()
# Create training and validation sets:
train, val = series.split_after(pd.Timestamp(year=1957, month=12, day=1))
# Normalize the time series (note: we avoid fitting the transformer on the validation set)
transformer = Scaler()
transformer.fit(train)
train = transformer.transform(train)
val = transformer.transform(val)
# Early stop callback
my_stopper = EarlyStopping(
monitor="val_MeanAbsolutePercentageError",
patience=5,
min_delta=0.05,
mode='min',
)
# set up ray tune callback
tune_callback = TuneReportCallback(
{
"loss": "val_loss",
"MAPE": "val_MeanAbsolutePercentageError",
},
on="validation_end",
)
# define the hyperparameter space
config = {
"batch_size": tune.choice([16, 32, 64, 128]),
"num_blocks": tune.choice([1, 2, 3, 4, 5]),
"num_stacks": tune.choice([32, 64, 128]),
"dropout": tune.uniform(0, 0.2),
}
reporter = CLIReporter(
parameter_columns=list(config.keys()),
metric_columns=["loss", "MAPE", "training_iteration"],
)
resources_per_trial = {"cpu": 8, "gpu": 1}
# the number of combinations to try
num_samples = 10
scheduler = ASHAScheduler(max_t=1000, grace_period=3, reduction_factor=2)
train_fn_with_parameters = tune.with_parameters(
train_model, callbacks=[my_stopper, tune_callback], train=train, val=val,
)
# optimize hyperparameters by minimizing the MAPE on the validation set
analysis = tune.run(
train_fn_with_parameters,
resources_per_trial=resources_per_trial,
# Using a metric instead of loss allows for
# comparison between different likelihood or loss functions.
metric="MAPE", # any value in TuneReportCallback.
mode="min",
config=config,
num_samples=num_samples,
scheduler=scheduler,
progress_reporter=reporter,
name="tune_darts",
)
print("Best hyperparameters found were: ", analysis.best_config)
使用 gridsearch()
进行超参数优化¶
Darts 中的每个预测模型都提供了一个 gridsearch()
方法,用于基本的超参数搜索。此方法仅限于非常简单的案例,涉及的超参数很少,并且仅适用于单个时间序列。