! [ -e /content ] && pip install -Uqq fastai # 在Colab上升级fastai
模型钩子
from __future__ import annotations
from fastai.basics import *
from nbdev.showdoc import *
import math
回调和辅助函数,用于在模型中添加钩子
from fastai.test_utils import *
什么是钩子?
钩子是您可以附加到模型中特定层的函数,这些函数将在前向传递(前向钩子)或反向传递(反向钩子)中执行。这里我们首先介绍钩子的概念,但如果您想快速实现一个钩子,可以跳到HookCallback
(并阅读以下示例ActivationStats
)。
前向钩子是接受三个参数的函数:应用于的层、该层的输入和该层的输出。
= nn.Linear(5,3)
tst_model def example_forward_hook(m,i,o): print(m,i,o)
= torch.randn(4,5)
x = tst_model.register_forward_hook(example_forward_hook)
hook = tst_model(x)
y hook.remove()
Linear(in_features=5, out_features=3, bias=True) (tensor([[ 0.0117, -0.1157, -1.0055, 0.6962, 0.2329],
[-0.6184, 0.3594, 2.2896, -0.8758, 0.2538],
[-0.5746, -1.3166, -0.3460, 2.0481, 0.9366],
[ 0.6335, 0.7545, -2.2502, 0.2476, 0.0433]]),) tensor([[-0.4212, 0.4666, 0.3218],
[-0.2607, 0.3498, -0.4724],
[-0.0859, 0.9967, 0.6624],
[-0.6484, 0.2241, 0.2266]], grad_fn=<AddmmBackward0>)
反向钩子是接受三个参数的函数:应用于的层、损失相对于输入的梯度以及相对于输出的梯度。
def example_backward_hook(m,gi,go): print(m,gi,go)
= tst_model.register_backward_hook(example_backward_hook)
hook
= torch.randn(4,5)
x = tst_model(x)
y = y.pow(2).mean()
loss
loss.backward() hook.remove()
Linear(in_features=5, out_features=3, bias=True) (tensor([-0.3353, 0.0533, 0.1919]), None, tensor([[ 0.1835, 0.2476, -0.1970],
[ 0.2397, 0.0214, -0.1315],
[-0.0041, -0.2241, 0.0439],
[ 0.2395, 0.1597, -0.1938],
[ 0.3009, 0.1428, -0.1170]])) (tensor([[-0.0592, 0.0075, 0.0259],
[-0.1145, -0.0367, 0.0386],
[-0.1182, -0.0369, 0.1108],
[-0.0435, 0.1193, 0.0165]]),)
/home/jhoward/miniconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:1033: UserWarning: Using a non-full backward hook when the forward contains multiple autograd Nodes is deprecated and will be removed in future versions. This hook will be missing some grad_input. Please use register_full_backward_hook to get the documented behavior.
warnings.warn("Using a non-full backward hook when the forward contains multiple autograd Nodes "
钩子可以改变层的输入/输出,或者梯度,打印值或形状。如果你想存储与这些输入/输出相关的内容,最好将你的钩子与一个类关联,这样它可以将其放入该类实例的状态中。
钩子 -
@docs
class Hook():
"Create a hook on `m` with `hook_func`."
def __init__(self, m, hook_func, is_forward=True, detach=True, cpu=False, gather=False):
'hook_func,detach,cpu,gather')
store_attr(= m.register_forward_hook if is_forward else m.register_backward_hook
f self.hook = f(self.hook_fn)
self.stored,self.removed = None,False
def hook_fn(self, module, input, output):
"Applies `hook_func` to `module`, `input`, `output`."
if self.detach:
input,output = to_detach(input, cpu=self.cpu, gather=self.gather),to_detach(output, cpu=self.cpu, gather=self.gather)
self.stored = self.hook_func(module, input, output)
def remove(self):
"Remove the hook from the model."
if not self.removed:
self.hook.remove()
self.removed=True
def __enter__(self, *args): return self
def __exit__(self, *args): self.remove()
= dict(__enter__="Register the hook",
_docs __exit__="Remove the hook")
在正向传递时,如果 is_forward=True
将调用此函数,否则在反向传递时调用,并且会可选地 detach
、gather
并将模型的输入/输出(梯度)放到 cpu
上,然后再将它们传递给 hook_func
。hook_func
的结果将存储在 Hook
的 stored
属性中。
= nn.Linear(5,3)
tst_model = Hook(tst_model, lambda m,i,o: o)
hook = tst_model(x)
y test_eq(hook.stored, y)
show_doc(Hook.hook_fn)
Hook.hook_fn
[source]
Hook.hook_fn
(module
,input
,output
)
Applies hook_func
to module
, input
, output
.
show_doc(Hook.remove)
在完成模型后,妥善删除模型的钩子非常重要,以避免在下次将模型应用于某些输入时再次调用它们,并释放与其状态相关的内存。
= nn.Linear(5,10)
tst_model = torch.randn(4,5)
x = tst_model(x)
y = Hook(tst_model, example_forward_hook)
hook lambda: tst_model(x), f"{tst_model} ({x},) {y.detach()}")
test_stdout(
hook.remove()lambda: tst_model(x), "") test_stdout(
上下文管理器
因为即使您的代码由于某个错误而中断,删除 Hook
仍然非常重要,因此 Hook
可以用作上下文管理器。
__enter__) show_doc(Hook.
__exit__) show_doc(Hook.
= nn.Linear(5,10)
tst_model = torch.randn(4,5)
x = tst_model(x)
y with Hook(tst_model, example_forward_hook) as h:
lambda: tst_model(x), f"{tst_model} ({x},) {y.detach()}")
test_stdout(lambda: tst_model(x), "") test_stdout(
def _hook_inner(m,i,o): return o if isinstance(o,Tensor) or is_listy(o) else list(o)
def hook_output(module, detach=True, cpu=False, grad=False):
"Return a `Hook` that stores activations of `module` in `self.stored`"
return Hook(module, _hook_inner, detach=detach, cpu=cpu, is_forward=not grad)
存储的激活值是梯度,如果grad=True
,否则是module
的输出。如果detach=True
,它们将与其历史分离,而如果cpu=True
,则将它们放在CPU上。
= nn.Linear(5,10)
tst_model = torch.randn(4,5)
x with hook_output(tst_model) as h:
= tst_model(x)
y
test_eq(y, h.stored)assert not h.stored.requires_grad
with hook_output(tst_model, grad=True) as h:
= tst_model(x)
y = y.pow(2).mean()
loss
loss.backward()2*y / y.numel(), h.stored[0]) test_close(
::: {#cell-29 .cell 0=‘c’ 1=‘u’ 2=‘d’ 3=‘a’}
with hook_output(tst_model, cpu=True) as h:
= tst_model.cuda()(x.cuda())
y 'cpu')) test_eq(h.stored.device, torch.device(
:::
Hooks -
@docs
class Hooks():
"Create several hooks on the modules in `ms` with `hook_func`."
def __init__(self, ms, hook_func, is_forward=True, detach=True, cpu=False):
self.hooks = [Hook(m, hook_func, is_forward, detach, cpu) for m in ms]
def __getitem__(self,i): return self.hooks[i]
def __len__(self): return len(self.hooks)
def __iter__(self): return iter(self.hooks)
@property
def stored(self): return L(o.stored for o in self)
def remove(self):
"Remove the hooks from the model."
for h in self.hooks: h.remove()
def __enter__(self, *args): return self
def __exit__ (self, *args): self.remove()
= dict(stored = "The states saved in each hook.",
_docs __enter__="Register the hooks",
__exit__="Remove the hooks")
= [nn.Linear(5,10), nn.ReLU(), nn.Linear(10,3)]
layers = nn.Sequential(*layers)
tst_model = Hooks(tst_model, lambda m,i,o: o)
hooks = tst_model(x)
y 0], layers[0](x))
test_eq(hooks.stored[1], F.relu(layers[0](x)))
test_eq(hooks.stored[2], y)
test_eq(hooks.stored[ hooks.remove()
='Hooks.stored') show_doc(Hooks.stored, name
Hooks.stored
[source]
The states saved in each hook.
show_doc(Hooks.remove)
上下文管理器
像 Hook
一样,您可以将 Hooks
用作上下文管理器。
__enter__) show_doc(Hooks.
__exit__) show_doc(Hooks.
= [nn.Linear(5,10), nn.ReLU(), nn.Linear(10,3)]
layers = nn.Sequential(*layers)
tst_model with Hooks(layers, lambda m,i,o: o) as h:
= tst_model(x)
y 0], layers[0](x))
test_eq(h.stored[1], F.relu(layers[0](x)))
test_eq(h.stored[2], y) test_eq(h.stored[
def hook_outputs(modules, detach=True, cpu=False, grad=False):
"Return `Hooks` that store activations of all `modules` in `self.stored`"
return Hooks(modules, _hook_inner, detach=detach, cpu=cpu, is_forward=not grad)
存储的激活值是梯度(如果grad=True
),否则是modules
的输出。如果detach=True
,它们将从它们的历史中分离出来,如果cpu=True
,则会被放在CPU上。
= [nn.Linear(5,10), nn.ReLU(), nn.Linear(10,3)]
layers = nn.Sequential(*layers)
tst_model = torch.randn(4,5)
x with hook_outputs(layers) as h:
= tst_model(x)
y 0], layers[0](x))
test_eq(h.stored[1], F.relu(layers[0](x)))
test_eq(h.stored[2], y)
test_eq(h.stored[for s in h.stored: assert not s.requires_grad
with hook_outputs(layers, grad=True) as h:
= tst_model(x)
y = y.pow(2).mean()
loss
loss.backward()= 2*y / y.numel()
g 2][0])
test_close(g, h.stored[= g @ layers[2].weight.data
g 1][0])
test_close(g, h.stored[= g * (layers[0](x) > 0).float()
g 0][0]) test_close(g, h.stored[
::: {#cell-43 .cell 0=‘c’ 1=‘u’ 2=‘d’ 3=‘a’}
with hook_outputs(tst_model, cpu=True) as h:
= tst_model.cuda()(x.cuda())
y for s in h.stored: test_eq(s.device, torch.device('cpu'))
:::
def dummy_eval(m, size=(64,64)):
"Evaluate `m` on a dummy input of a certain `size`"
= in_channels(m)
ch_in = one_param(m).new(1, ch_in, *size).requires_grad_(False).uniform_(-1.,1.)
x with torch.no_grad(): return m.eval()(x)
def model_sizes(m, size=(64,64)):
"Pass a dummy input through the model `m` to get the various sizes of activations."
with hook_outputs(m) as hooks:
= dummy_eval(m, size=size)
_ return [o.stored.shape for o in hooks]
= nn.Sequential(ConvLayer(3, 16), ConvLayer(16, 32, stride=2), ConvLayer(32, 32))
m 1, 16, 64, 64], [1, 32, 32, 32], [1, 32, 32, 32]]) test_eq(model_sizes(m), [[
def num_features_model(m):
"Return the number of output features for `m`."
= 32,in_channels(m)
sz,ch_in while True:
#尝试几种尺寸,以防模型需要较大的输入尺寸。
try:
return model_sizes(m, (sz,sz))[-1][1]
except Exception as e:
*= 2
sz if sz > 2048: raise e
= nn.Sequential(nn.Conv2d(5,4,3), nn.Conv2d(4,3,3))
m 3)
test_eq(num_features_model(m), = nn.Sequential(ConvLayer(3, 16), ConvLayer(16, 32, stride=2), ConvLayer(32, 32))
m 32) test_eq(num_features_model(m),
Hook回调 -
为了简化钩子的使用,我们将其封装在一个回调中,您只需实现一个 hook
函数(加上您可能需要的任何元素)。
def has_params(m):
"Check if `m` has at least one parameter"
return len(list(m.parameters())) > 0
assert has_params(nn.Linear(3,4))
assert has_params(nn.LSTM(4,5,2))
assert not has_params(nn.ReLU())
@funcs_kwargs
class HookCallback(Callback):
"`Callback` that can be used to register hooks on `modules`"
= ["hook"]
_methods = noops
hook def __init__(self, modules=None, every=None, remove_end=True, is_forward=True, detach=True, cpu=True, include_paramless=False , **kwargs):
'modules,every,remove_end,is_forward,detach,cpu, include_paramless')
store_attr(assert not kwargs
def before_fit(self):
"Register the `Hooks` on `self.modules`."
if self.modules is None: self.modules = [m for m in flatten_model(self.model) if self.include_paramless or has_params(m)]
if self.every is None: self._register()
def before_batch(self):
if self.every is None: return
if self.training and self.train_iter%self.every==0: self._register()
def after_batch(self):
if self.every is None: return
if self.training and self.train_iter%self.every==0: self._remove()
def after_fit(self):
"Remove the `Hooks`."
if self.remove_end: self._remove()
def _register(self): self.hooks = Hooks(self.modules, self.hook, self.is_forward, self.detach, self.cpu)
def _remove(self):
if getattr(self, 'hooks', None): self.hooks.remove()
def __del__(self): self._remove()
您可以选择子类化并实现一个 hook
函数(以及您想要的任何事件),或者在初始化时传递一个 hook
函数。这样的函数需要接受三个参数:一个层、输入和输出(对于反向 hook,输入是相对于输入的梯度,输出是相对于输出的梯度),并且可以修改它们或根据它们更新状态。
如果未提供,modules
将默认为具有 weight
属性的 self.model
的层。(要包含那些 没有 weight
属性的 self.model
的层,例如 ReLU
、Flatten
等,请将 include_paramless=True
设置为 True)根据 do_remove
,hooks 将在训练结束时(或在出现错误时)被适当地移除。is_forward
、detach
和 cpu
会传递给 Hooks
。
在每次前向(或反向)传递时调用的函数是 self.hook
,并且在子类化此回调时必须实现该函数。
class TstCallback(HookCallback):
def hook(self, m, i, o): return o
def after_batch(self): test_eq(self.hooks.stored[0], self.pred)
= synth_learner(n_trn=5, cbs = TstCallback())
learn 1) learn.fit(
[0, 7.570430278778076, 7.6170854568481445, '00:00']
/home/jhoward/git/fastai/fastai/callback/core.py:67: UserWarning: You are shadowing an attribute (modules) that exists in the learner. Use `self.learn.modules` to avoid this
warn(f"You are shadowing an attribute ({name}) that exists in the learner. Use `self.learn.{name}` to avoid this")
class TstCallback(HookCallback):
def __init__(self, modules=None, remove_end=True, detach=True, cpu=False):
super().__init__(modules, None, remove_end, False, detach, cpu)
def hook(self, m, i, o): return o
def after_batch(self):
if self.training:
self.hooks.stored[0][0], 2*(self.pred-self.y)/self.pred.shape[0])
test_eq(
= synth_learner(n_trn=5, cbs = TstCallback())
learn 1) learn.fit(
[0, 15.194129943847656, 15.124653816223145, '00:00']
show_doc(HookCallback.before_fit)
模型概述
def total_params(m):
"Give the number of parameters of a module and if it's trainable or not"
= sum([p.numel() for p in m.parameters()])
params = [p.requires_grad for p in m.parameters()]
trains return params, (False if len(trains)==0 else trains[0])
10,32)), (32*10+32,True))
test_eq(total_params(nn.Linear(10,32, bias=False)), (32*10,True))
test_eq(total_params(nn.Linear(20)), (20*2, True))
test_eq(total_params(nn.BatchNorm2d(20, affine=False)), (0,False))
test_eq(total_params(nn.BatchNorm2d(16, 32, 3)), (16*32*3*3 + 32, True))
test_eq(total_params(nn.Conv2d(16, 32, 3, bias=False)), (16*32*3*3, True))
test_eq(total_params(nn.Conv2d(#首先,ih层20--10,其余均为10--10。*4表示四个门
20, 10, 2)), (4 * (20*10 + 10) + 3 * 4 * (10*10 + 10), True)) test_eq(total_params(nn.LSTM(
def layer_info(learn, *xb):
"Return layer infos of `model` on `xb` (only support batch first inputs)"
def _track(m, i, o):
= '', '', ''
params, trainable, shape = any((isinstance(x[0], torch.Tensor) and x[0].shape[1:] == x[1].shape for x in zip(i, o)))
same = apply(lambda x: x.shape, o)
shape if hasattr(m, 'weight'): # 非激活层
= total_params(m)
params, trainable return (type(m).__name__, params, trainable, shape, same)
with Hooks(flatten_model(learn.model), _track) as h:
= apply(lambda o:o[:1], xb)
batch = [cb for cb in learn.cbs if hasattr(cb, '_only_train_loop')]
train_only_cbs with learn.removed_cbs(train_only_cbs), learn.no_logging(), learn as l:
= l.get_preds(dl=[batch], inner=True, reorder=False)
r return h.stored
_track
的输出预计为一个包含以下内容的元组:模块名称、参数数量、层的形状、是否可训练、属于哪个层组,以及大小是否发生变化。可能展示的三种组别如下:
- 非激活层(线性层、卷积层等)
- 激活层
- 池化层
根据不同的情况,只有部分输出会被返回,否则返回为''
。对于非激活层,所有信息都会被返回。激活层仅返回名称、形状以及same
的False
。池化层将返回名称、新的形状以及same
的False
。
def _m(): return nn.Sequential(nn.Linear(1,50), nn.ReLU(), nn.BatchNorm1d(50), nn.Linear(50, 1))
= torch.randn((16, 1))
sample_input =_m()), sample_input), [
test_eq(layer_info(synth_learner(model'Linear', 100, True, [1, 50], False),
('ReLU', '', '', [1,50], True),
('BatchNorm1d', 100, True, [1, 50], True),
('Linear', 51, True, [1, 1], False)
( ])
# 扁平化测试
def _tst_m(): return nn.Sequential(
1, 2, kernel_size=3, padding=1, stride=2),
nn.Conv2d(
nn.ReLU(),
nn.Flatten(),8,50),
nn.Linear(
nn.ReLU(), 50),
nn.BatchNorm1d(50, 1)
nn.Linear(
)
= torch.randn((1,1,4,4))
sample_input =_tst_m()), sample_input), [
test_eq(layer_info(synth_learner(model'Conv2d', 20, True, [1, 2, 2, 2], False),
('ReLU', '', '', [1, 2, 2, 2], True),
('Flatten', '', '', [1, 8], False),
('Linear', 450, True, [1, 50], False),
('ReLU', '', '', [1,50], True),
('BatchNorm1d', 100, True, [1, 50], True),
('Linear', 51, True, [1, 1], False)
( ])
# 多输入模型测试
class _2InpModel(Module):
def __init__(self):
super().__init__()
self.seq = nn.Sequential(nn.Linear(2,50), nn.ReLU(), nn.BatchNorm1d(50), nn.Linear(50, 1))
def forward(self, *inps):
= torch.cat(inps, dim=-1)
outputs return self.seq(outputs)
= (torch.randn(16, 1), torch.randn(16, 1))
sample_inputs = synth_learner(model=_2InpModel())
learn = 2
learn.dls.n_inp *sample_inputs), [
test_eq(layer_info(learn, 'Linear', 150, True, [1, 50], False),
('ReLU', '', '', [1,50], True),
('BatchNorm1d', 100, True, [1, 50], True),
('Linear', 51, True, [1, 1], False)
( ])
def _get_shapes(o, bs):
= o[first(o)] if (isinstance(o, dict)) else o
inp return ' x '.join([str(bs)] + [str(t) for t in inp[1:]])
def _print_shapes(o, bs):
if isinstance(o, torch.Size): return _get_shapes(o, bs)
elif isinstance(o, tuple): return _get_shapes(o[0], bs)
else: return str([_print_shapes(x, bs) for x in o])
def module_summary(learn, *xb):
"Print a summary of `model` using `xb`"
#Individual parameters wrapped in ParameterModule aren't called through the hooks in `layer_info`,
# thus are not counted inside the summary
#TODO: find a way to have them counted in param number somehow
= layer_info(learn, *xb)
infos = 76,find_bs(xb)
n,bs = _print_shapes(apply(lambda x:x.shape, xb), bs)
inp_sz = f"{type(learn.model).__name__} (Input shape: {inp_sz})\n"
res += "=" * n + "\n"
res += f"{'Layer (type)':<20} {'Output Shape':<20} {'Param #':<10} {'Trainable':<10}\n"
res += "=" * n
res = 0,0,0
ps,trn_ps,j = [o for o in infos if o is not None] #see comment in previous cell
infos = None
prev_sz for typ,np,trn,sz,chnged in infos:
if sz is None: continue
if j == 0:
+= f'\n{"":<20} {_print_shapes(sz, bs)[:19]:<20}' # to avoid a double line at the top
res if not chnged and not prev_sz == sz and j > 0: res += "\n" + "_" * n + "\n" + f'{"":<20} {_print_shapes(sz, bs)[:19]:<20}'
= 1
j += f"\n{typ:<20} {'':<20} {np:<10} {str(trn):<10}"
res if np != '':
+= np
ps if trn: trn_ps += np
= sz
prev_sz += "\n" + "_" * n + "\n"
res += f"\nTotal params: {ps:,}\n"
res += f"Total trainable params: {trn_ps:,}\n"
res += f"Total non-trainable params: {ps - trn_ps:,}\n\n"
res return PrettyString(res)
@patch
def summary(self:Learner):
"Print a summary of the model, optimizer and loss function."
= self.dls.train.one_batch()[:getattr(self.dls.train, "n_inp", 1)]
xb = module_summary(self, *xb)
res += f"Optimizer used: {self.opt_func}\nLoss function: {self.loss_func}\n\n"
res if self.opt is not None:
+= f"Model " + ("unfrozen\n\n" if self.opt.frozen_idx==0 else f"frozen up to parameter group #{self.opt.frozen_idx}\n\n")
res += "Callbacks:\n" + '\n'.join(f" - {cb}" for cb in self.cbs.sorted('order'))
res return PrettyString(res)
= synth_learner(model=_m())
learn learn.summary()
Sequential (Input shape: 16 x 1)
============================================================================
Layer (type) Output Shape Param # Trainable
============================================================================
16 x 50
Linear 100 True
ReLU
BatchNorm1d 100 True
____________________________________________________________________________
16 x 1
Linear 51 True
____________________________________________________________________________
Total params: 251
Total trainable params: 251
Total non-trainable params: 0
Optimizer used: functools.partial(<function SGD>, mom=0.9)
Loss function: FlattenedLoss of MSELoss()
Callbacks:
- TrainEvalCallback
- Recorder
#|cuda
= synth_learner(model=_m(), cuda=True)
learn learn.summary()
Sequential (Input shape: 16 x 1)
============================================================================
Layer (type) Output Shape Param # Trainable
============================================================================
16 x 50
Linear 100 True
ReLU
BatchNorm1d 100 True
____________________________________________________________________________
16 x 1
Linear 51 True
____________________________________________________________________________
Total params: 251
Total trainable params: 251
Total non-trainable params: 0
Optimizer used: functools.partial(<function SGD>, mom=0.9)
Loss function: FlattenedLoss of MSELoss()
Callbacks:
- TrainEvalCallback
- Recorder
# 多输出测试
class _NOutModel(Module):
def __init__(self): self.lin = nn.Linear(5, 6)
def forward(self, x1):
= torch.randn((10, 5))
x return x,self.lin(x)
= synth_learner(model = _NOutModel())
learn # 输出形状应为 (50, 16, 256), (1, 16, 256) learn.summary()
_NOutModel (Input shape: 16 x 1)
============================================================================
Layer (type) Output Shape Param # Trainable
============================================================================
16 x 6
Linear 36 True
____________________________________________________________________________
Total params: 36
Total trainable params: 36
Total non-trainable params: 0
Optimizer used: functools.partial(<function SGD>, mom=0.9)
Loss function: FlattenedLoss of MSELoss()
Callbacks:
- TrainEvalCallback
- Recorder
# 测试情况(如书中所述),当 learn.dls.train_ds 是一个列表而非 fastai.data.core.Datasets 时。
= torch.rand((100, 4))
train_x = torch.rand((100, 1))
train_y
= torch.rand((100, 4))
valid_x = torch.rand((100,1))
valid_y
= list(zip(train_x,train_y))
dset = list(zip(valid_x,valid_y))
valid_dset
= DataLoader(dset, batch_size=16)
dl = DataLoader(valid_dset, batch_size=16)
val_dl
= DataLoaders(dl, val_dl)
dls
= nn.Sequential(
simple_net 4, 2),
nn.Linear(
nn.ReLU(),2,1)
nn.Linear(
)= Learner(dls, simple_net, loss_func=F.l1_loss)
learn learn.summary()
Sequential (Input shape: 16 x 4)
============================================================================
Layer (type) Output Shape Param # Trainable
============================================================================
16 x 2
Linear 10 True
ReLU
____________________________________________________________________________
16 x 1
Linear 3 True
____________________________________________________________________________
Total params: 13
Total trainable params: 13
Total non-trainable params: 0
Optimizer used: <function Adam>
Loss function: <function l1_loss>
Callbacks:
- TrainEvalCallback
- Recorder
激活图
@delegates()
class ActivationStats(HookCallback):
"Callback that record the mean and std of activations."
=-20
orderdef __init__(self, with_hist=False, **kwargs):
super().__init__(**kwargs)
self.with_hist = with_hist
def before_fit(self):
"Initialize stats."
super().before_fit()
self.stats = L()
def hook(self, m, i, o):
if isinstance(o, tuple): return self.hook_multi_ouput(o)
= o.float()
o = {'mean': o.mean().item(), 'std': o.std().item(),
res 'near_zero': (o<=0.05).long().sum().item()/o.numel()}
if self.with_hist: res['hist'] = o.histc(40,0,10)
return res
def hook_multi_ouput(self,o_tuple):
"For outputs of RNN which are [nested] tuples of tensors"
= []
res for o in self._flatten_tuple(o_tuple):
if not(isinstance(o, Tensor)): continue
self.hook(None, None, o))
res.append(return res
def _flatten_tuple(self, o_tuple):
"Recursively flatten a [nested] tuple"
= []
res for it in o_tuple:
if isinstance(it, tuple): res += self._flatten_tuple(it)
else: res += [it]
return tuple(res)
def after_batch(self):
"Take the stored results and puts it in `self.stats`"
if self.training and (self.every is None or self.train_iter%self.every == 0): self.stats.append(self.hooks.stored)
super().after_batch()
def layer_stats(self, idx):
= self.stats.itemgot(idx)
lstats return L(lstats.itemgot(o) for o in ('mean','std','near_zero'))
def hist(self, idx):
= self.stats.itemgot(idx).itemgot('hist')
res return torch.stack(tuple(res)).t().float().log1p()
def color_dim(self, idx, figsize=(10,5), ax=None):
"The 'colorful dimension' plot"
= self.hist(idx)
res if ax is None: ax = subplots(figsize=figsize)[1][0]
='lower')
ax.imshow(res, origin'off')
ax.axis(
def plot_layer_stats(self, idx):
= subplots(1, 3, figsize=(12,3))
_,axs for o,ax,title in zip(self.layer_stats(idx),axs,('mean','std','% near zero')):
ax.plot(o) ax.set_title(title)
= synth_learner(n_trn=5, cbs = ActivationStats(every=4))
learn 1) learn.fit(
[0, 9.915902137756348, 10.236139297485352, '00:00']
learn.activation_stats.stats
(#2) [[{'mean': 1.0413528680801392, 'std': 0.4082348346710205, 'near_zero': 0.0}],[{'mean': 0.7963836193084717, 'std': 0.3677118122577667, 'near_zero': 0.0}]]
第一行包含训练集中每个批次模型输出的均值,第二行包含它们的标准差。
def test_activation_stats_include_paramless(include_paramless=False):
"create a learner, fit, then check number of layers"
= nn.Sequential(nn.Linear(1,50), nn.ReLU(), nn.BatchNorm1d(50), nn.Linear(50, 1), nn.Flatten())
modl
= synth_learner(model=modl, cbs=ActivationStats(every=4, include_paramless=include_paramless))
learn 1)
learn.fit(
= 3
expected_stats_len if include_paramless: expected_stats_len = 5 # 包括ReLU和展平层
len(learn.activation_stats.modules))
test_eq(expected_stats_len,
=True)
test_activation_stats_include_paramless(include_paramless=False) test_activation_stats_include_paramless(include_paramless
[0, 11.84472370147705, 7.684460639953613, '00:00']
[0, 10.660934448242188, 6.482079029083252, '00:00']
def test_every(n_tr, every):
"create a learner, fit, then check number of stats collected"
= synth_learner(n_trn=n_tr, cbs=ActivationStats(every=every))
learn 1)
learn.fit(= math.ceil(n_tr / every)
expected_stats_len len(learn.activation_stats.stats))
test_eq(expected_stats_len,
for n_tr in [11, 12, 13]:
4)
test_every(n_tr, 1) test_every(n_tr,
[0, 6.150048732757568, 4.771674156188965, '00:00']
[0, 17.470989227294922, 17.58202362060547, '00:00']
[0, 10.142230987548828, 9.362530708312988, '00:00']
[0, 3.4879150390625, 3.3121471405029297, '00:00']
[0, 14.660429000854492, 17.298110961914062, '00:00']
[0, 22.280864715576172, 18.45922088623047, '00:00']
class TstCallback(HookCallback):
def hook(self, m, i, o): return o
def before_fit(self):
super().before_fit()
self.means,self.stds = [],[]
def after_batch(self):
if self.training:
self.means.append(self.hooks.stored[0].mean().item())
self.stds.append (self.hooks.stored[0].std() .item())
= synth_learner(n_trn=5, cbs = [TstCallback(), ActivationStats()])
learn 1)
learn.fit(0).itemgot("mean"), learn.tst.means)
test_eq(learn.activation_stats.stats.itemgot(0).itemgot("std"), learn.tst.stds) test_eq(learn.activation_stats.stats.itemgot(
[0, 9.024697303771973, 6.801002025604248, '00:00']
导出 -
from nbdev import nbdev_export
nbdev_export()
Converted 00_torch_core.ipynb.
Converted 01_layers.ipynb.
Converted 01a_losses.ipynb.
Converted 02_data.load.ipynb.
Converted 03_data.core.ipynb.
Converted 04_data.external.ipynb.
Converted 05_data.transforms.ipynb.
Converted 06_data.block.ipynb.
Converted 07_vision.core.ipynb.
Converted 08_vision.data.ipynb.
Converted 09_vision.augment.ipynb.
Converted 09b_vision.utils.ipynb.
Converted 09c_vision.widgets.ipynb.
Converted 10_tutorial.pets.ipynb.
Converted 10b_tutorial.albumentations.ipynb.
Converted 11_vision.models.xresnet.ipynb.
Converted 12_optimizer.ipynb.
Converted 13_callback.core.ipynb.
Converted 13a_learner.ipynb.
Converted 13b_metrics.ipynb.
Converted 14_callback.schedule.ipynb.
Converted 14a_callback.data.ipynb.
Converted 15_callback.hook.ipynb.
Converted 15a_vision.models.unet.ipynb.
Converted 16_callback.progress.ipynb.
Converted 17_callback.tracker.ipynb.
Converted 18_callback.fp16.ipynb.
Converted 18a_callback.training.ipynb.
Converted 18b_callback.preds.ipynb.
Converted 19_callback.mixup.ipynb.
Converted 20_interpret.ipynb.
Converted 20a_distributed.ipynb.
Converted 21_vision.learner.ipynb.
Converted 22_tutorial.imagenette.ipynb.
Converted 23_tutorial.vision.ipynb.
Converted 24_tutorial.image_sequence.ipynb.
Converted 24_tutorial.siamese.ipynb.
Converted 24_vision.gan.ipynb.
Converted 30_text.core.ipynb.
Converted 31_text.data.ipynb.
Converted 32_text.models.awdlstm.ipynb.
Converted 33_text.models.core.ipynb.
Converted 34_callback.rnn.ipynb.
Converted 35_tutorial.wikitext.ipynb.
Converted 37_text.learner.ipynb.
Converted 38_tutorial.text.ipynb.
Converted 39_tutorial.transformers.ipynb.
Converted 40_tabular.core.ipynb.
Converted 41_tabular.data.ipynb.
Converted 42_tabular.model.ipynb.
Converted 43_tabular.learner.ipynb.
Converted 44_tutorial.tabular.ipynb.
Converted 45_collab.ipynb.
Converted 46_tutorial.collab.ipynb.
Converted 50_tutorial.datablock.ipynb.
Converted 60_medical.imaging.ipynb.
Converted 61_tutorial.medical_imaging.ipynb.
Converted 65_medical.text.ipynb.
Converted 70_callback.wandb.ipynb.
Converted 71_callback.tensorboard.ipynb.
Converted 72_callback.neptune.ipynb.
Converted 73_callback.captum.ipynb.
Converted 74_callback.azureml.ipynb.
Converted 97_test_utils.ipynb.
Converted 99_pytorch_doc.ipynb.
Converted dev-setup.ipynb.
Converted app_examples.ipynb.
Converted camvid.ipynb.
Converted migrating_catalyst.ipynb.
Converted migrating_ignite.ipynb.
Converted migrating_lightning.ipynb.
Converted migrating_pytorch.ipynb.
Converted migrating_pytorch_verbose.ipynb.
Converted ulmfit.ipynb.
Converted index.ipynb.
Converted quick_start.ipynb.
Converted tutorial.ipynb.