! [ -e /content ] && pip install -Uqq fastai # 在Colab上升级fastai
Comet.ml
from __future__ import annotations
import tempfile
from fastai.basics import *
from fastai.learner import Callback
from nbdev.showdoc import *
与 Comet.ml 的集成。
注册
- 创建账户: comet.ml/signup。
- 将API密钥导出为环境变量(更多帮助见这里)。在终端中运行:
export COMET_API_KEY='YOUR_LONG_API_TOKEN'
或者将其包含在./comet.config
文件中(推荐)。更多帮助请见这里。
安装
- 你需要安装 neptune-client。在终端中运行:
pip install comet_ml
或者(使用 conda 的替代安装)。在终端中运行:
conda install -c anaconda -c conda-forge -c comet_ml comet_ml
如何使用?
关键是要在创建 Learner()
之前创建回调 CometMLCallback
,如下所示:
from fastai.callback.comet import CometMLCallback
comet_ml_callback = CometCallback('项目名称') # 指定项目
learn = Learner(dls, model,
cbs=comet_ml_callback
)
learn.fit_one_cycle(1)
import comet_ml
class CometCallback(Callback):
"Log losses, metrics, model weights, model architecture summary to neptune"
= Recorder.order + 1
order
def __init__(self, project_name, log_model_weights=True):
self.log_model_weights = log_model_weights
self.keep_experiment_running = keep_experiment_running
self.project_name = project_name
self.experiment = None
def before_fit(self):
try:
self.experiment = comet_ml.Experiment(project_name=self.project_name)
except ValueError:
print("No active experiment")
try:
self.experiment.log_parameter("n_epoch", str(self.learn.n_epoch))
self.experiment.log_parameter("model_class", str(type(self.learn.model)))
except:
print(f"Did not log all properties.")
try:
with tempfile.NamedTemporaryFile(mode="w") as f:
with open(f.name, "w") as g:
repr(self.learn.model))
g.write(self.experiment.log_asset(f.name, "model_summary.txt")
except:
print("Did not log model summary. Check if your model is PyTorch model.")
if self.log_model_weights and not hasattr(self.learn, "save_model"):
print(
"Unable to log model to Comet.\n",
)
def after_batch(self):
# 对数损失与优化超参数
if self.learn.training:
self.experiment.log_metric("batch__smooth_loss", self.learn.smooth_loss)
self.experiment.log_metric("batch__loss", self.learn.loss)
self.experiment.log_metric("batch__train_iter", self.learn.train_iter)
for i, h in enumerate(self.learn.opt.hypers):
for k, v in h.items():
self.experiment.log_metric(f"batch__opt.hypers.{k}", v)
def after_epoch(self):
# 日志指标
for n, v in zip(self.learn.recorder.metric_names, self.learn.recorder.log):
if n not in ["epoch", "time"]:
self.experiment.log_metric(f"epoch__{n}", v)
if n == "time":
self.experiment.log_text(f"epoch__{n}", str(v))
# 记录模型权重
if self.log_model_weights and hasattr(self.learn, "save_model"):
if self.learn.save_model.every_epoch:
= join_path_file(
_file f"{self.learn.save_model.fname}_{self.learn.save_model.epoch}",
self.learn.path / self.learn.model_dir,
=".pth",
ext
)else:
= join_path_file(
_file self.learn.save_model.fname,
self.learn.path / self.learn.model_dir,
=".pth",
ext
)self.experiment.log_asset(_file)
def after_fit(self):
try:
self.experiment.end()
except:
print("No neptune experiment to stop.")
show_doc(CometCallback)
CometCallback
CometCallback (project_name, log_model_weights=True)
Log losses, metrics, model weights, model architecture summary to neptune