Comet.ml

! [ -e /content ] && pip install -Uqq fastai  # 在Colab上升级fastai

from __future__ import annotations

import tempfile

from fastai.basics import *
from fastai.learner import Callback

from nbdev.showdoc import *

与 Comet.ml 的集成。

注册

创建账户: comet.ml/signup。
将API密钥导出为环境变量（更多帮助见这里）。在终端中运行：

export COMET_API_KEY='YOUR_LONG_API_TOKEN'

或者将其包含在./comet.config文件中（推荐）。更多帮助请见这里。

安装

你需要安装 neptune-client。在终端中运行：

pip install comet_ml

或者（使用 conda 的替代安装）。在终端中运行：

conda install -c anaconda -c conda-forge -c comet_ml comet_ml

如何使用？

关键是要在创建 Learner() 之前创建回调 CometMLCallback，如下所示：

from fastai.callback.comet import CometMLCallback

comet_ml_callback = CometCallback('项目名称')  # 指定项目

learn = Learner(dls, model,
                cbs=comet_ml_callback
                )

learn.fit_one_cycle(1)

import comet_ml

class CometCallback(Callback):
    "Log losses, metrics, model weights, model architecture summary to neptune"
    order = Recorder.order + 1

    def __init__(self, project_name, log_model_weights=True):
        self.log_model_weights = log_model_weights
        self.keep_experiment_running = keep_experiment_running
        self.project_name = project_name
        self.experiment = None

    def before_fit(self):
        try:
            self.experiment = comet_ml.Experiment(project_name=self.project_name)
        except ValueError:
            print("No active experiment")

        try:
            self.experiment.log_parameter("n_epoch", str(self.learn.n_epoch))
            self.experiment.log_parameter("model_class", str(type(self.learn.model)))
        except:
            print(f"Did not log all properties.")

        try:
            with tempfile.NamedTemporaryFile(mode="w") as f:
                with open(f.name, "w") as g:
                    g.write(repr(self.learn.model))
                self.experiment.log_asset(f.name, "model_summary.txt")
        except:
            print("Did not log model summary. Check if your model is PyTorch model.")

        if self.log_model_weights and not hasattr(self.learn, "save_model"):
            print(
                "Unable to log model to Comet.\n",
            )

    def after_batch(self):
        # 对数损失与优化超参数
        if self.learn.training:
            self.experiment.log_metric("batch__smooth_loss", self.learn.smooth_loss)
            self.experiment.log_metric("batch__loss", self.learn.loss)
            self.experiment.log_metric("batch__train_iter", self.learn.train_iter)
            for i, h in enumerate(self.learn.opt.hypers):
                for k, v in h.items():
                    self.experiment.log_metric(f"batch__opt.hypers.{k}", v)

    def after_epoch(self):
        # 日志指标
        for n, v in zip(self.learn.recorder.metric_names, self.learn.recorder.log):
            if n not in ["epoch", "time"]:
                self.experiment.log_metric(f"epoch__{n}", v)
            if n == "time":
                self.experiment.log_text(f"epoch__{n}", str(v))

        # 记录模型权重
        if self.log_model_weights and hasattr(self.learn, "save_model"):
            if self.learn.save_model.every_epoch:
                _file = join_path_file(
                    f"{self.learn.save_model.fname}_{self.learn.save_model.epoch}",
                    self.learn.path / self.learn.model_dir,
                    ext=".pth",
                )
            else:
                _file = join_path_file(
                    self.learn.save_model.fname,
                    self.learn.path / self.learn.model_dir,
                    ext=".pth",
                )
            self.experiment.log_asset(_file)

    def after_fit(self):
        try:
            self.experiment.end()
        except:
            print("No neptune experiment to stop.")

show_doc(CometCallback)

CometCallback

 CometCallback (project_name, log_model_weights=True)

Log losses, metrics, model weights, model architecture summary to neptune