import warnings
import logging
MLFlow
'ignore')
warnings.simplefilter('statsforecast').setLevel(logging.ERROR)
logging.getLogger("mlflow").setLevel(logging.ERROR) logging.getLogger(
使用 MLFlow 运行 Statsforecast。
MLFlow 是一个开源实验跟踪系统,帮助数据科学家管理从实验到生产的模型生命周期。Statsforecast 的 MLFlow 集成可在 MLFlow 库中找到,该库包含对热门机器学习库的 MLFlow 支持。
from statsforecast.utils import generate_series
= generate_series(5, min_length=50, max_length=50, equal_ends=True, n_static_features=1)
series series.head()
unique_id | ds | y | static_0 | |
---|---|---|---|---|
0 | 0 | 2000-01-01 | 12.073897 | 43 |
1 | 0 | 2000-01-02 | 59.734166 | 43 |
2 | 0 | 2000-01-03 | 101.260794 | 43 |
3 | 0 | 2000-01-04 | 143.987430 | 43 |
4 | 0 | 2000-01-05 | 185.320406 | 43 |
对于下一部分,需要mlflow
和mlflavors
。使用以下命令进行安装:
pip install mlflow mlflavors
模型日志记录
import pandas as pd
import mlflow
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error
from statsforecast import StatsForecast
from statsforecast.models import AutoARIMA
import mlflavors
import requests
= "model"
ARTIFACT_PATH = "./data"
DATA_PATH = 7
HORIZON = [90]
LEVEL
with mlflow.start_run() as run:
= generate_series(5, min_length=50, max_length=50, equal_ends=True, n_static_features=1)
series
= series.groupby('unique_id').head(43)
train_df = series.groupby('unique_id').tail(7)
test_df = test_df.drop(columns=["y"])
X_test = test_df[["y"]]
y_test
= [AutoARIMA(season_length=7)]
models
= StatsForecast(df=train_df, models=models, freq="D", n_jobs=-1)
sf
sf.fit()
# 评估模型
= sf.predict(h=HORIZON, X_df=X_test, level=LEVEL)["AutoARIMA"]
y_pred
= {
metrics "mae": mean_absolute_error(y_test, y_pred),
"mape": mean_absolute_percentage_error(y_test, y_pred),
}
print(f"Metrics: \n{metrics}")
# 日志指标
mlflow.log_metrics(metrics)
# 使用pickle序列化的日志模型(默认)。
mlflavors.statsforecast.log_model(=sf,
statsforecast_model=ARTIFACT_PATH,
artifact_path="pickle",
serialization_format
)= mlflow.get_artifact_uri(ARTIFACT_PATH)
model_uri
print(f"\nMLflow run id:\n{run.info.run_id}")
Metrics:
{'mae': 6.712853959225143, 'mape': 0.11719246764336884}
2023/10/20 23:45:36 WARNING mlflow.utils.environment: Encountered an unexpected error while inferring pip requirements (model URI: /var/folders/w2/91_v34nx0xs2npnl3zsl9tmm0000gn/T/tmpt4686vpu/model/model.pkl, flavor: statsforecast), fall back to return ['statsforecast==1.6.0']. Set logging level to DEBUG to see the full traceback.
MLflow run id:
0319bbd664424fcd88d6c532e3ecac77
查看实验
要查看新创建的实验和记录的工件,请打开 MLflow UI:
mlflow ui
加载 Statsforecast 模型
可以使用 mlflow.statsforecast.load_model
函数从 MLFlow 注册表加载 statsforecast
模型,并用于生成预测。
= mlflavors.statsforecast.load_model(model_uri=model_uri)
loaded_model = loaded_model.predict(h=HORIZON, X_df=X_test, level=LEVEL)
results results.head()
ds | AutoARIMA | AutoARIMA-lo-90 | AutoARIMA-hi-90 | |
---|---|---|---|---|
unique_id | ||||
0 | 2000-02-13 | 55.894432 | 44.343880 | 67.444984 |
0 | 2000-02-14 | 97.818054 | 86.267502 | 109.368607 |
0 | 2000-02-15 | 146.745422 | 135.194870 | 158.295975 |
0 | 2000-02-16 | 188.888336 | 177.337784 | 200.438904 |
0 | 2000-02-17 | 231.493637 | 219.943085 | 243.044189 |
使用 pyfunc 加载模型
Pyfunc 是 MLFlow 模型的另一种接口,提供了加载和保存模型的工具。此代码在进行预测时与上述内容等效。
= mlflavors.statsforecast.pyfunc.load_model(model_uri=model_uri)
loaded_pyfunc
# 将测试数据转换为二维的 numpy 数组,以便可以通过 pyfunc 进行预测。
# 单行Pandas DataFrame配置参数
= X_test.to_numpy()
X_test_array
# 创建配置DataFrame
= pd.DataFrame(
predict_conf
[
{"X": X_test_array,
"X_cols": X_test.columns,
"X_dtypes": list(X_test.dtypes),
"h": HORIZON,
"level": LEVEL,
}
]
)
= loaded_pyfunc.predict(predict_conf)
pyfunc_result pyfunc_result.head()
ds | AutoARIMA | AutoARIMA-lo-90 | AutoARIMA-hi-90 | |
---|---|---|---|---|
unique_id | ||||
0 | 2000-02-13 | 55.894432 | 44.343880 | 67.444984 |
0 | 2000-02-14 | 97.818054 | 86.267502 | 109.368607 |
0 | 2000-02-15 | 146.745422 | 135.194870 | 158.295975 |
0 | 2000-02-16 | 188.888336 | 177.337784 | 200.438904 |
0 | 2000-02-17 | 231.493637 | 219.943085 | 243.044189 |
模型服务
本节展示了如何将 pyfunc
类型的模型服务到本地 REST API 端点,并随后请求已服务模型的预测。要服务模型,请运行以下命令,替换执行训练代码时打印的运行 ID。
mlflow models serve -m runs:/<run_id>/model --env-manager local --host 127.0.0.1
运行此命令后,可以运行以下代码发送请求。
= 7
HORIZON = [90, 95]
LEVEL
# 定义本地主机和端点URL
= "127.0.0.1"
host = f"http://{host}:5000/invocations"
url
# 将日期时间转换为字符串以进行JSON序列化
= X_test.copy()
X_test_pyfunc "ds"] = X_test_pyfunc["ds"].dt.strftime(date_format="%Y-%m-%d")
X_test_pyfunc[
# 转换为列表以进行 JSON 序列化
= X_test_pyfunc.to_numpy().tolist()
X_test_list
# 将索引转换为字符串列表以进行 JSON 序列化
= list(X_test.columns)
X_cols
# 将数据类型转换为字符串以进行JSON序列化
= [str(dtype) for dtype in list(X_test.dtypes)]
X_dtypes
= pd.DataFrame(
predict_conf
[
{"X": X_test_list,
"X_cols": X_cols,
"X_dtypes": X_dtypes,
"h": HORIZON,
"level": LEVEL,
}
]
)
# 使用pandas DataFrame以拆分方向创建字典
= {"dataframe_split": predict_conf.to_dict(orient="split")}
json_data
# 评分模型
= requests.post(url, json=json_data) response
'predictions']).head() pd.DataFrame(response.json()[
ds | AutoARIMA | AutoARIMA-lo-95 | AutoARIMA-lo-90 | AutoARIMA-hi-90 | AutoARIMA-hi-95 | |
---|---|---|---|---|---|---|
0 | 2000-02-13T00:00:00 | 55.894432 | 42.131100 | 44.343880 | 67.444984 | 69.657768 |
1 | 2000-02-14T00:00:00 | 97.818054 | 84.054718 | 86.267502 | 109.368607 | 111.581390 |
2 | 2000-02-15T00:00:00 | 146.745422 | 132.982086 | 135.194870 | 158.295975 | 160.508759 |
3 | 2000-02-16T00:00:00 | 188.888336 | 175.125015 | 177.337784 | 200.438904 | 202.651672 |
4 | 2000-02-17T00:00:00 | 231.493637 | 217.730301 | 219.943085 | 243.044189 | 245.256973 |
Give us a ⭐ on Github