from nbdev.showdoc import*from fastai.test_utils import*
根据监控指标/损失的行为做出决策的回调函数
TerminateOnNaNCallback -
class TerminateOnNaNCallback(Callback):"A `Callback` that terminates training if loss is NaN." order=-9def after_batch(self):"Test if `last_loss` is NaN and interrupts training."if torch.isinf(self.loss) or torch.isnan(self.loss): raise CancelFitException
assertlen(learn.recorder.losses) <10*len(learn.dls.train)for l in learn.recorder.losses:assertnot torch.isinf(l) andnot torch.isnan(l)
TrackerCallback -
class TrackerCallback(Callback):"A `Callback` that keeps track of the best value in `monitor`." order,remove_on_fetch,_only_train_loop =60,True,Truedef__init__(self, monitor='valid_loss', # 正在监控的值(通常是损失或指标)。 comp=None, # numpy 比较运算符;若监控指标为损失值,则使用 np.less;若监控指标为评估指标,则使用 np.greater。 min_delta=0., # 上次监控值与最佳监控值之间的最小差值。 reset_on_fit=True# 在模型拟合之前,将监控的值重置为负无穷(如果监控的是指标)或正无穷(如果监控的是损失)。 ):if comp isNone: comp = np.less if'loss'in monitor or'error'in monitor else np.greaterif comp == np.less: min_delta *=-1self.monitor,self.comp,self.min_delta,self.reset_on_fit,self.best= monitor,comp,min_delta,reset_on_fit,Nonedef before_fit(self):"Prepare the monitored value"self.run =nothasattr(self, "lr_finder") andnothasattr(self, "gather_preds")ifself.reset_on_fit orself.best isNone: self.best =float('inf') ifself.comp == np.less else-float('inf')assertself.monitor inself.recorder.metric_names[1:]self.idx =list(self.recorder.metric_names[1:]).index(self.monitor)def after_epoch(self):"Compare the last value to the best up to now" val =self.recorder.values[-1][self.idx]ifself.comp(val -self.min_delta, self.best): self.best,self.new_best = val,Trueelse: self.new_best =Falsedef after_fit(self): self.run=True
当实现一个 Callback 时,如果它的行为依赖于某个指标或损失的最佳值,请子类化此 Callback 并使用其 best(迄今为止的最佳值)和 new_best(在本轮中有了新的最佳值)属性。如果你希望在后续调用 fit 时保持 best 的值(例如,Learner.fit_one_cycle),请将 reset_on_fit 设置为 True。
class SaveModelCallback(TrackerCallback):"A `TrackerCallback` that saves the model's best during training and loads it at the end." order = TrackerCallback.order+1def__init__(self, monitor='valid_loss', # value (usually loss or metric) being monitored. comp=None, # numpy comparison operator; np.less if monitor is loss, np.greater if monitor is metric. min_delta=0., # minimum delta between the last monitor value and the best monitor value. fname='model', # model name to be used when saving model. every_epoch=False, # if true, save model after every epoch; else save only when model is better than existing best. at_end=False, # if true, save model when training ends; else load best model if there is only one saved model. with_opt=False, # if true, save optimizer state (if any available) when saving model. reset_on_fit=True# before model fitting, reset value being monitored to -infinity (if monitor is metric) or +infinity (if monitor is loss). ):super().__init__(monitor=monitor, comp=comp, min_delta=min_delta, reset_on_fit=reset_on_fit)assertnot (every_epoch and at_end), "every_epoch and at_end cannot both be set to True"# keep track of file path for loggersself.last_saved_path =None store_attr('fname,every_epoch,at_end,with_opt')def _save(self, name): self.last_saved_path =self.learn.save(name, with_opt=self.with_opt)def after_epoch(self):"Compare the value monitored to its best score and save if best."ifself.every_epoch:if (self.epoch%self.every_epoch) ==0: self._save(f'{self.fname}_{self.epoch}')else: #every improvementsuper().after_epoch()ifself.new_best:print(f'Better model found at epoch {self.epoch} with {self.monitor} value: {self.best}.')self._save(f'{self.fname}')def after_fit(self, **kwargs):"Load the best model."ifself.at_end: self._save(f'{self.fname}')elifnotself.every_epoch: self.learn.load(f'{self.fname}', with_opt=self.with_opt)
Better model found at epoch 0 with valid_loss value: 12.539285659790039.
Better model found at epoch 1 with valid_loss value: 12.123456001281738.
epoch
train_loss
valid_loss
time
0
5.197007
5.579152
00:00
1
5.154862
5.445522
00:00
Better model found at epoch 0 with valid_loss value: 5.5791521072387695.
Better model found at epoch 1 with valid_loss value: 5.445522308349609.
epoch
train_loss
valid_loss
time
0
4.982775
5.264440
00:00
1
4.887252
5.038480
00:00
epoch
train_loss
valid_loss
time
0
4.578584
4.781651
00:00
1
4.454868
4.507101
00:00
2
4.322047
4.232390
00:00
3
4.186467
3.957614
00:00
在 plateaus 上减少学习率
class ReduceLROnPlateau(TrackerCallback):"A `TrackerCallback` that reduces learning rate when a metric has stopped improving." order=TrackerCallback.order+2def__init__(self, monitor='valid_loss', # 正在监控的值(通常是损失或指标)。 comp=None, # numpy 比较运算符;若监控指标为损失值,则使用 np.less;若监控指标为其他度量值,则使用 np.greater。 min_delta=0., # 最近一次监控值与最佳监控值之间的最小差值。 patience=1, # 训练未改进模型时等待的轮次数。 factor=10., # 用于在降低学习率时除以学习率的分子。 min_lr=0, # 允许的最小学习率;学习率不能降低到此最小值以下。 reset_on_fit=True# 在模型拟合之前,将监控的值重置为负无穷(如果监控的是指标)或正无穷(如果监控的是损失)。 ):super().__init__(monitor=monitor, comp=comp, min_delta=min_delta, reset_on_fit=reset_on_fit)self.patience,self.factor,self.min_lr = patience,factor,min_lrdef before_fit(self): self.wait =0;super().before_fit()def after_epoch(self):"Compare the value monitored to its best score and reduce LR by `factor` if no improvement."super().after_epoch()ifself.new_best: self.wait =0else:self.wait +=1ifself.wait >=self.patience: old_lr =self.opt.hypers[-1]['lr']for h inself.opt.hypers: h['lr'] =max(h['lr'] /self.factor, self.min_lr)self.wait =0ifself.opt.hypers[-1]["lr"] < old_lr:print(f'Epoch {self.epoch}: reducing lr to {self.opt.hypers[-1]["lr"]}')